From a94016c1cfbbd5457cd666846973683ea390c705 Mon Sep 17 00:00:00 2001 From: Dima Pristupa Date: Mon, 16 Dec 2024 17:49:50 +0200 Subject: [PATCH 01/10] Private Datasets: GQL API: Ability to change dataset visibility (#814) * Changes before rebasing * from_catalog_n: add clippy warnings suppression * kamu-adapter-auth-oso: add TODOs * Migrations: re-index ReBAC properties * test_oso: update imports * KamuAuthOso: add TODOs * DatasetActionAuthorizer::check_action_allowed(): add a TODO * DatasetEntryServiceHarness: update for tests * RebacService::{get_account_properties(),get_dataset_properties()}: return idempotency * DatasetEntryRepository::get_dataset_entries(): implement for SQLite & Postgres * RebacRepository::properties_count(): implement for SQLite & Postgres * AccountRepository::get_accounts(): implement for SQLite & Postgres * OsoResourceServiceInMem: handle DatasetLifecycleMessage's * OsoResourceServiceInMem::initialize(): update types * Split OsoResourceHolder to OsoResourceServiceInMem & OsoResourceServiceInitializator * OsoResourceHolder: remove dependency to JOB_KAMU_DATASETS_DATASET_ENTRY_INDEXER * kamu-cli: register DatasetEntryIndexer even if not in workspace * Tests stabilization activities * RebacIndexer: add missed #[interface(dyn InitOnStartup)] * kamu-cli: kamu_auth_rebac_services::register_dependencies() * kamu-cli: kamu_adapter_auth_oso::register_dependencies() * OsoDatasetAuthorizer: integrate OsoResourceHolder * OsoResourceHolder: introduce * DatasetEntryIndexer::index_datasets(): increase log severity * RebacIndexer: introduce * kamu-adapter-auth-oso: update description * RebacServiceImpl: dataset_id_entity -> dataset_entity * test_multi_tenant_rebac_dataset_lifecycle_message_consumer: actualize tests * kamu-adapter-auth-oso: add anonymous() helper * kamu-adapter-auth-oso: use MockDatasetRepositoryWriter * kamu-adapter-auth-oso: actualize tests * DatasetActionAuthorizer, DatasetAction: add oso-related impls * OsoDatasetAuthorizer::get_allowed_actions(): return , InternalError> * GQL, Dataset::properties(): use kamu_auth_rebac::DatasetProperties * kamu-adapter-auth{,-rebac}: remove experimental crates * OsoDatasetAuthorizer: initial RebacService integration * #[allow(unused_variables)] -> #[expect(unused_variables)] * kamu-adapter-auth: extract * kamu-adapter-rebac: initial * kamu-adapter-oauth, AggregatingDatasetActionAuthorizer: initial * kamu-adapter-graphql, from_catalog_n!(): introduce * test_multi_tenant_rebac_dataset_lifecycle_message_consumer: stabilize tests * SmTP, AxumServerPushProtocolInstance::push_main_flow(): remove extra allocations * Tests, test_gql_datasets: use macros for tests * Tests, test_gql_datasets: expected first * Fixes after rebasing * Tests: update dataset_create_empty_*() * RunInDatabaseTransactionLayer: remove unused * GQL, Datasets: use pretty_assertions::assert_eq!() * GQL, DatasetPropertyName: remove outdated scalar * MultiTenantRebacDatasetLifecycleMessageConsumer::handle_dataset_lifecycle_created_message(): add "allows_anonymous_read" property as well * GQL, Dataset::properties(): return flags for simplicity * DatasetMut::set_visibility(): stabilize * Preparations - DependencyGraphServiceInMemory: remove extra .int_err() calls - Dataset::rebac_properties(): introduce - RebacService::get_dataset_properties(): use DatasetPropertyName instead of PropertyName - kamu-auth-rebac: extract value constants - DatasetMut::{set_publicly_available(),set_anonymous_available()}: ensure account owns dataset - DatasetMut: move to own directory - DatasetMut::{set_publicly_available(),set_anonymous_available()}: hide methods behind logging guards - DatasetMut::set_property(): extract method - DatasetMut::set_anonymous_available(): implement - DatasetMut::set_publicly_available(): implement - RevokeResultSuccess::message(): fix typo * Fixes after rebasing on 0.208.* * Tests, kamu-cli: auto-register e2e-user for the e2e mode * OSO: replace names with IDs in schema * Tests stabilization * sqlx: add cached queries * Build speed-ups: remove unused deps * test_pull_derivative_mt: correct running * CHANGELOG: add some entries * DatasetEntryRepository: simplify lifetimes * kamu-adapter-auth-oso-rebac: add "-rebac" suffix * Remove several TODOs * CHANGELOG.md: add several entries * OsoDatasetAuthorizer: revisit implementation * Review 1: GQL: remove Dataset.properties * Review 1: OsoDatasetAuthorizer::ctor(): fix param name * database-common, EntityStreamer: introduce * DatasetEntryServiceImpl: use EntityStreamer * RebacServiceImpl::get_dataset_properties_by_ids(): add * PaginationOpts::safe_limit(): add * Tests, EntityStreamer: add tests with input data * RebacService::get_dataset_properties_by_ids(): update interface * DatasetEntryServiceImpl: use EntityStreamer [2] * OsoResourceServiceInMem: rewrite to use streamed pages * OsoDatasetAuthorizer: use get_multiple_dataset_resources() * OsoResourceServiceInitializator: remove * query_handler_post(): add a comma in doc * DatasetActionAuthorizer: add TODOs * test_flow_event_store: fix typos * OsoDatasetAuthorizer::user_dataset_pair(): remove * RebacIndexer::index_dataset_entries(): iterate over a stream * EntityStreamer: remove extra int_err() & resort declarations * AccountRepository::get_accounts(): streamed version * RebacIndexer::index_accounts(): use iterate over a stream * Test fixes * RebacRepository::get_entity_properties_by_ids(): implementations * Remove extra as_did_str() call * RebacRepository::get_entity_properties_by_ids(): implementations[2] * AccountRepository::accounts_count(): implementations * PostgresAccountRepository::get_accounts(): implementation * sqlx: update cached queries * RebacRepository::get_entity_properties_by_ids(): implementations[3] * DatasetEntryServiceImpl: use tokio::sync::RwLock * PostgresDatasetEntryRepository: tweaks * EntityStreamer -> EntityPageStreamer * sqlite_generate_placeholders_list: extract & use * OsoResourceServiceInMem: add a TODO about state * Search::query(): use from_catalog_n!() * OsoResourceServiceInMem -> OsoResourceServiceImpl * KamuAuthOso: impl Deref to Arc * OsoResourceServiceImpl: concrete error types * kamu-adapter-auth-oso-rebac: remove extra dep * DatasetEntryRepository: use odf namespace * DatasetEntryServiceImpl: use odf namespace * DatasetEntryService::list_entries_owned_by(): do not clone owner_id * DatasetEntryRepository::get_dataset_entries(): update ORDER BY column * EntityListing -> EntityPageListing * Tweaks before merging --- CHANGELOG.md | 11 + Cargo.lock | 163 ++----- Cargo.toml | 6 +- ...1126095018_rebac_properties_reindexing.sql | 3 + ...1126095018_rebac_properties_reindexing.sql | 3 + resources/openapi-mt.json | 2 +- resources/openapi.json | 2 +- resources/schema.gql | 22 + src/adapter/auth-oso-rebac/Cargo.toml | 53 +++ .../src/dataset_resource.rs | 22 +- .../auth-oso-rebac/src/dependencies.rs | 22 + .../src/kamu_auth_oso.rs | 18 + src/adapter/auth-oso-rebac/src/lib.rs | 24 + .../src/oso_dataset_authorizer.rs | 240 ++++++++++ .../src/oso_resource_service_impl.rs | 270 ++++++++++++ .../src/schema.polar | 12 +- .../src/user_actor.rs | 21 +- .../{auth-oso => auth-oso-rebac}/tests/mod.rs | 0 .../tests/tests/mod.rs | 0 .../auth-oso-rebac/tests/tests/test_oso.rs | 207 +++++++++ .../tests/test_oso_dataset_authorizer.rs | 248 +++++++++++ src/adapter/auth-oso/Cargo.toml | 43 -- .../auth-oso/src/oso_dataset_authorizer.rs | 196 --------- src/adapter/auth-oso/tests/tests/test_oso.rs | 181 -------- .../tests/test_oso_dataset_authorizer.rs | 151 ------- src/adapter/flight-sql/Cargo.toml | 1 - src/adapter/flight-sql/src/lib.rs | 2 + src/adapter/flight-sql/src/service.rs | 2 +- src/adapter/flight-sql/src/session_factory.rs | 7 +- src/adapter/graphql/Cargo.toml | 11 +- src/adapter/graphql/src/guards.rs | 2 + src/adapter/graphql/src/lib.rs | 1 + src/adapter/graphql/src/mutations/auth_mut.rs | 2 +- .../{ => dataset_mut}/dataset_mut.rs | 91 +++- .../dataset_mut/dataset_mut_utils.rs | 36 ++ .../src/mutations/dataset_mut/mod.rs} | 11 +- .../graphql/src/mutations/datasets_mut.rs | 1 - .../graphql/src/queries/accounts/accounts.rs | 2 - .../graphql/src/queries/datasets/dataset.rs | 9 +- .../src/queries/datasets/dataset_metadata.rs | 4 + .../graphql/src/queries/datasets/datasets.rs | 4 - src/adapter/graphql/src/queries/search.rs | 13 +- src/adapter/graphql/src/scalars/account.rs | 2 +- .../graphql/src/scalars/dataset_id_name.rs | 2 +- src/adapter/graphql/src/utils.rs | 22 +- src/adapter/graphql/tests/tests/test_auth.rs | 2 +- .../graphql/tests/tests/test_gql_datasets.rs | 293 +++++++++---- src/adapter/graphql/tests/utils/auth_utils.rs | 7 +- src/adapter/http/Cargo.toml | 3 - src/adapter/http/src/data/query_handler.rs | 2 +- .../middleware/dataset_authorization_layer.rs | 12 +- .../axum_server_push_protocol.rs | 8 +- .../http/tests/tests/test_data_query.rs | 6 +- src/adapter/http/tests/tests/test_routing.rs | 2 +- src/adapter/oauth/Cargo.toml | 13 +- src/adapter/oauth/src/lib.rs | 3 +- src/adapter/odata/Cargo.toml | 5 +- src/app/cli/Cargo.toml | 4 +- src/app/cli/src/app.rs | 52 ++- src/app/cli/src/commands/list_command.rs | 2 +- src/app/cli/tests/tests/test_di_graph.rs | 2 + src/domain/accounts/domain/Cargo.toml | 1 - .../accounts/domain/src/entities/account.rs | 2 +- .../domain/src/repos/account_repository.rs | 29 ++ .../domain/src/services/account_service.rs | 40 ++ .../accounts/domain/src/services/mod.rs | 2 + src/domain/accounts/services/Cargo.toml | 3 +- .../services/src/account_service_impl.rs | 72 +++ src/domain/accounts/services/src/lib.rs | 2 + .../domain/src/entities/property.rs | 41 +- .../domain/src/repos/rebac_repository.rs | 16 + .../domain/src/services/rebac_service.rs | 62 ++- src/domain/auth-rebac/services/Cargo.toml | 5 + .../auth-rebac/services/src/dependencies.rs | 26 ++ .../auth-rebac/services/src/jobs/mod.rs | 14 + src/domain/auth-rebac/services/src/lib.rs | 6 + ...ebac_dataset_lifecycle_message_consumer.rs | 17 +- .../auth-rebac/services/src/rebac_indexer.rs | 124 ++++++ .../services/src/rebac_service_impl.rs | 158 +++++-- ...ebac_dataset_lifecycle_message_consumer.rs | 97 ++-- src/domain/core/Cargo.toml | 4 +- .../src/auth/dataset_action_authorizer.rs | 68 ++- .../src/services/dataset_ownership_service.rs | 1 + .../core/src/services/dataset_registry.rs | 1 + .../services/ingest/polling_ingest_service.rs | 9 +- .../services/ingest/push_ingest_service.rs | 7 +- .../src/repos/dataset_entry_repository.rs | 63 +-- .../src/services/dataset_entry_service.rs | 23 +- src/domain/datasets/services/Cargo.toml | 3 - .../services/src/dataset_entry_indexer.rs | 11 +- .../src/dataset_entry_service_impl.rs | 230 +++++----- .../tests/tests/test_dataset_entry_service.rs | 2 +- src/domain/flow-system/domain/Cargo.toml | 4 +- src/domain/flow-system/services/Cargo.toml | 9 - src/domain/opendatafabric/Cargo.toml | 3 - .../tests/tests/test_dataset_id.rs | 2 +- src/domain/task-system/domain/Cargo.toml | 3 + src/domain/task-system/services/Cargo.toml | 2 - .../services/src/task_executor_impl.rs | 2 +- src/e2e/app/cli/common/src/e2e_harness.rs | 1 + .../common/src/kamu_api_server_client_ext.rs | 14 +- src/e2e/app/cli/inmem/Cargo.toml | 1 - .../tests/tests/commands/test_pull_command.rs | 2 +- src/e2e/app/cli/mysql/Cargo.toml | 1 - src/e2e/app/cli/postgres/Cargo.toml | 1 - .../tests/tests/commands/test_pull_command.rs | 2 +- src/e2e/app/cli/repo-tests/Cargo.toml | 2 - .../src/commands/test_delete_command.rs | 15 +- .../repo-tests/src/rest_api/test_accounts.rs | 10 +- .../cli/repo-tests/src/rest_api/test_auth.rs | 30 +- .../src/test_smart_transfer_protocol.rs | 36 +- src/e2e/app/cli/sqlite/Cargo.toml | 1 - .../tests/tests/commands/test_pull_command.rs | 2 +- src/infra/accounts/inmem/Cargo.toml | 3 +- .../src/repos/inmem_account_repository.rs | 26 ++ ...cb172b12f77a3fa7eccd32563238043409df1.json | 24 + ...c0635e35b499be6a025b71513d51b41ee1fb8.json | 114 +++++ src/infra/accounts/mysql/Cargo.toml | 3 +- .../src/repos/mysql_account_repository.rs | 59 ++- ...fc54b6e746d5ceaa2f44a88be3cefd845744a.json | 87 ++++ ...cb172b12f77a3fa7eccd32563238043409df1.json | 20 + src/infra/accounts/postgres/Cargo.toml | 3 +- .../src/repos/postgres_account_repository.rs | 59 ++- ...2103074dafdfc2250b9a12b63d3ef1be598c7.json | 74 ++++ ...cb172b12f77a3fa7eccd32563238043409df1.json | 20 + src/infra/accounts/sqlite/Cargo.toml | 3 +- .../src/repos/sqlite_account_repository.rs | 59 ++- .../inmem/src/repos/inmem_rebac_repository.rs | 37 ++ ...15f28850509bf940b37d53f6af91922a9c030.json | 20 + src/infra/auth-rebac/postgres/Cargo.toml | 1 - src/infra/auth-rebac/postgres/src/lib.rs | 2 + .../src/repos/postgres_rebac_repository.rs | 87 ++++ src/infra/auth-rebac/repo-tests/Cargo.toml | 1 - ...15f28850509bf940b37d53f6af91922a9c030.json | 20 + src/infra/auth-rebac/sqlite/Cargo.toml | 2 +- src/infra/auth-rebac/sqlite/src/lib.rs | 2 + .../src/repos/sqlite_rebac_repository.rs | 85 ++++ src/infra/core/Cargo.toml | 10 +- .../src/dependency_graph_service_inmem.rs | 2 - .../core/src/ingest/fetch_service/core.rs | 1 - .../testing/mock_dataset_action_authorizer.rs | 5 +- .../tests/test_dependency_graph_inmem.rs | 1 - src/infra/datasets/inmem/Cargo.toml | 5 - .../repos/inmem_dateset_entry_repository.rs | 61 +-- ...d811c12600aa6af7a06b226e7c5b7df64fb2.json} | 4 +- ...5ffca15f34c2af9aaeb8d31453ab364f97495.json | 14 + ...53da373b1ba803953af87ab913f22d6a1aef7.json | 14 - ...c708a4f95d15b637371f038e362241b5bd05.json} | 4 +- ...79ef9e3886268865cce559cf2268c66ea800.json} | 4 +- src/infra/datasets/postgres/Cargo.toml | 4 - .../postgres_dataset_entry_repository.rs | 26 +- .../dataset_entry_repository_test_suite.rs | 30 +- ...d811c12600aa6af7a06b226e7c5b7df64fb2.json} | 4 +- src/infra/datasets/sqlite/Cargo.toml | 4 - .../repos/sqlite_dateset_entry_repository.rs | 15 +- src/infra/flow-system/inmem/Cargo.toml | 13 - src/infra/flow-system/postgres/Cargo.toml | 5 +- .../repo-tests/src/test_flow_event_store.rs | 48 +- src/infra/flow-system/sqlite/Cargo.toml | 5 +- .../sqlite/src/sqlite_flow_event_store.rs | 74 ++-- src/infra/ingest-datafusion/Cargo.toml | 4 - src/infra/messaging-outbox/inmem/Cargo.toml | 4 - .../messaging-outbox/postgres/Cargo.toml | 5 - .../messaging-outbox/repo-tests/Cargo.toml | 1 - src/infra/messaging-outbox/sqlite/Cargo.toml | 4 - src/infra/task-system/inmem/Cargo.toml | 2 - src/infra/task-system/postgres/Cargo.toml | 4 +- src/infra/task-system/repo-tests/Cargo.toml | 3 + src/infra/task-system/sqlite/Cargo.toml | 3 +- src/utils/container-runtime/Cargo.toml | 1 - src/utils/data-utils/Cargo.toml | 1 - src/utils/database-common-macros/Cargo.toml | 3 + src/utils/database-common/Cargo.toml | 17 +- src/utils/database-common/src/entities.rs | 99 +++++ src/utils/database-common/src/helpers.rs | 19 + src/utils/database-common/src/lib.rs | 7 + src/utils/database-common/tests/mod.rs | 11 + src/utils/database-common/tests/tests/mod.rs | 10 + .../tests/tests/test_entries_streamer.rs | 414 ++++++++++++++++++ src/utils/datafusion-cli/Cargo.toml | 8 +- src/utils/event-sourcing-macros/Cargo.toml | 3 + src/utils/event-sourcing/Cargo.toml | 1 - src/utils/init-on-startup/Cargo.toml | 2 +- .../init-on-startup/src/init_on_startup.rs | 3 +- .../kamu-cli-puppet/src/kamu_cli_puppet.rs | 46 +- 185 files changed, 4135 insertions(+), 1547 deletions(-) create mode 100644 migrations/postgres/20241126095018_rebac_properties_reindexing.sql create mode 100644 migrations/sqlite/20241126095018_rebac_properties_reindexing.sql create mode 100644 src/adapter/auth-oso-rebac/Cargo.toml rename src/adapter/{auth-oso => auth-oso-rebac}/src/dataset_resource.rs (71%) create mode 100644 src/adapter/auth-oso-rebac/src/dependencies.rs rename src/adapter/{auth-oso => auth-oso-rebac}/src/kamu_auth_oso.rs (64%) create mode 100644 src/adapter/auth-oso-rebac/src/lib.rs create mode 100644 src/adapter/auth-oso-rebac/src/oso_dataset_authorizer.rs create mode 100644 src/adapter/auth-oso-rebac/src/oso_resource_service_impl.rs rename src/adapter/{auth-oso => auth-oso-rebac}/src/schema.polar (55%) rename src/adapter/{auth-oso => auth-oso-rebac}/src/user_actor.rs (72%) rename src/adapter/{auth-oso => auth-oso-rebac}/tests/mod.rs (100%) rename src/adapter/{auth-oso => auth-oso-rebac}/tests/tests/mod.rs (100%) create mode 100644 src/adapter/auth-oso-rebac/tests/tests/test_oso.rs create mode 100644 src/adapter/auth-oso-rebac/tests/tests/test_oso_dataset_authorizer.rs delete mode 100644 src/adapter/auth-oso/Cargo.toml delete mode 100644 src/adapter/auth-oso/src/oso_dataset_authorizer.rs delete mode 100644 src/adapter/auth-oso/tests/tests/test_oso.rs delete mode 100644 src/adapter/auth-oso/tests/tests/test_oso_dataset_authorizer.rs rename src/adapter/graphql/src/mutations/{ => dataset_mut}/dataset_mut.rs (75%) create mode 100644 src/adapter/graphql/src/mutations/dataset_mut/dataset_mut_utils.rs rename src/adapter/{auth-oso/src/lib.rs => graphql/src/mutations/dataset_mut/mod.rs} (65%) create mode 100644 src/domain/accounts/domain/src/services/account_service.rs create mode 100644 src/domain/accounts/services/src/account_service_impl.rs create mode 100644 src/domain/auth-rebac/services/src/dependencies.rs create mode 100644 src/domain/auth-rebac/services/src/jobs/mod.rs create mode 100644 src/domain/auth-rebac/services/src/rebac_indexer.rs create mode 100644 src/infra/accounts/mysql/.sqlx/query-6e609dea1564effee6f02e41a1dcb172b12f77a3fa7eccd32563238043409df1.json create mode 100644 src/infra/accounts/mysql/.sqlx/query-e6ae81849ab0194ecdfa3e69720c0635e35b499be6a025b71513d51b41ee1fb8.json create mode 100644 src/infra/accounts/postgres/.sqlx/query-4f94a12bf580d47e4a39d59fd6afc54b6e746d5ceaa2f44a88be3cefd845744a.json create mode 100644 src/infra/accounts/postgres/.sqlx/query-6e609dea1564effee6f02e41a1dcb172b12f77a3fa7eccd32563238043409df1.json create mode 100644 src/infra/accounts/sqlite/.sqlx/query-67ddd71595415860d71c398b5db2103074dafdfc2250b9a12b63d3ef1be598c7.json create mode 100644 src/infra/accounts/sqlite/.sqlx/query-6e609dea1564effee6f02e41a1dcb172b12f77a3fa7eccd32563238043409df1.json create mode 100644 src/infra/auth-rebac/postgres/.sqlx/query-4521f606dd6c679519b7545fd2215f28850509bf940b37d53f6af91922a9c030.json create mode 100644 src/infra/auth-rebac/sqlite/.sqlx/query-4521f606dd6c679519b7545fd2215f28850509bf940b37d53f6af91922a9c030.json rename src/infra/datasets/postgres/.sqlx/{query-13fe35a7997b790566736b78e16c17cd7452d48887938a2a28cbd9a1408472e2.json => query-168c5decfa4e1abb634750d661f3d811c12600aa6af7a06b226e7c5b7df64fb2.json} (81%) create mode 100644 src/infra/datasets/postgres/.sqlx/query-30c92efe33072f0b9fa446ea3255ffca15f34c2af9aaeb8d31453ab364f97495.json delete mode 100644 src/infra/datasets/postgres/.sqlx/query-4783a9c4fca85d1965fb119ea2453da373b1ba803953af87ab913f22d6a1aef7.json rename src/infra/datasets/postgres/.sqlx/{query-2bcdb350c9c397529fafa84a0b575eca95214025291d1bd310c3900040a3c9c8.json => query-62939275935e6f623b32580d5679c708a4f95d15b637371f038e362241b5bd05.json} (85%) rename src/infra/datasets/postgres/.sqlx/{query-fcb34f3fa8f59b1f8190694fc38dc66874757b9f56f23ed86f8494c6ed4b0b7a.json => query-7954a6acf1cdb627dfe2890b042679ef9e3886268865cce559cf2268c66ea800.json} (85%) rename src/infra/datasets/sqlite/.sqlx/{query-13fe35a7997b790566736b78e16c17cd7452d48887938a2a28cbd9a1408472e2.json => query-168c5decfa4e1abb634750d661f3d811c12600aa6af7a06b226e7c5b7df64fb2.json} (80%) create mode 100644 src/utils/database-common/src/helpers.rs create mode 100644 src/utils/database-common/tests/mod.rs create mode 100644 src/utils/database-common/tests/tests/mod.rs create mode 100644 src/utils/database-common/tests/tests/test_entries_streamer.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b7c83077..a95d73500 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,17 @@ Recommendation: for ease of reading, use the following order: ## [Unreleased] ### Added - Console warning when deleting datasets which are out of sync with their push remotes +### Changed +- Speed up project build time by removing unused dependencies which were not detected by automated tools +- Private Datasets: + - OSO: using user actors / dateset resources that come from the database + - Thus, any access check relies on real entities + - GQL, added `DatasetMut.setVisibility()` to be able to change the dataset visibility after it has been created + - Deletion of previously created (and unused) ReBAC-properties and reindexing + - OSO: updating the schema to use identifiers instead of names + - OSO: added resource storage for access speed + - E2E: Using the correct account in multi-tenant mode + - And also the possibility of set it up ## [0.209.0] - 2024-11-25 ### Changed diff --git a/Cargo.lock b/Cargo.lock index 85a9e93e0..38527fb9d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2495,7 +2495,6 @@ dependencies = [ "thiserror 1.0.69", "tokio", "tracing", - "tracing-subscriber", "url", ] @@ -2920,15 +2919,19 @@ checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" name = "database-common" version = "0.209.0" dependencies = [ + "async-stream", "async-trait", "aws-config", "aws-credential-types", "aws-sdk-secretsmanager", "chrono", "dill", + "futures", "hex", "hmac", "internal-error", + "mockall", + "pretty_assertions", "secrecy", "serde", "serde_json", @@ -2937,7 +2940,6 @@ dependencies = [ "thiserror 1.0.69", "tokio", "tracing", - "uuid", ] [[package]] @@ -3862,7 +3864,6 @@ dependencies = [ "event-sourcing-macros", "futures", "internal-error", - "serde", "thiserror 1.0.69", "tokio", "tokio-stream", @@ -5185,7 +5186,6 @@ dependencies = [ "aws-config", "aws-credential-types", "aws-sdk-s3", - "aws-smithy-http", "aws-smithy-types", "axum", "bytes", @@ -5210,7 +5210,6 @@ dependencies = [ "glob", "hex", "http 1.1.0", - "hyper 1.5.1", "indoc 2.0.5", "init-on-startup", "internal-error", @@ -5226,16 +5225,13 @@ dependencies = [ "kamu-ingest-datafusion", "lazy_static", "libc", - "like", "messaging-outbox", "mockall", "nanoid", "object_store", "oop", "opendatafabric", - "parking_lot", "petgraph", - "pin-project", "pretty_assertions", "rand", "random-names", @@ -5243,7 +5239,6 @@ dependencies = [ "reqwest", "ringbuf", "rumqttc", - "secrecy", "serde", "serde_json", "serde_with", @@ -5260,10 +5255,8 @@ dependencies = [ "tower 0.5.1", "tower-http", "tracing", - "tracing-subscriber", "trust-dns-resolver", "url", - "walkdir", "zip", ] @@ -5289,7 +5282,6 @@ dependencies = [ "serde_with", "sqlx", "thiserror 1.0.69", - "tracing", "uuid", ] @@ -5302,15 +5294,14 @@ dependencies = [ "database-common", "database-common-macros", "dill", + "futures", "internal-error", "kamu-accounts", "kamu-accounts-repo-tests", "opendatafabric", "test-group", "test-log", - "thiserror 1.0.69", "tokio", - "tracing", "uuid", ] @@ -5318,11 +5309,13 @@ dependencies = [ name = "kamu-accounts-mysql" version = "0.209.0" dependencies = [ + "async-stream", "async-trait", "chrono", "database-common", "database-common-macros", "dill", + "futures", "internal-error", "kamu-accounts", "kamu-accounts-repo-tests", @@ -5330,7 +5323,6 @@ dependencies = [ "sqlx", "test-group", "test-log", - "thiserror 1.0.69", "tracing", "uuid", ] @@ -5339,11 +5331,13 @@ dependencies = [ name = "kamu-accounts-postgres" version = "0.209.0" dependencies = [ + "async-stream", "async-trait", "chrono", "database-common", "database-common-macros", "dill", + "futures", "internal-error", "kamu-accounts", "kamu-accounts-repo-tests", @@ -5351,7 +5345,6 @@ dependencies = [ "sqlx", "test-group", "test-log", - "thiserror 1.0.69", "tracing", "uuid", ] @@ -5381,6 +5374,7 @@ dependencies = [ "chrono", "database-common", "dill", + "futures", "init-on-startup", "internal-error", "jsonwebtoken", @@ -5388,11 +5382,9 @@ dependencies = [ "kamu-accounts-inmem", "opendatafabric", "password-hash", - "random-names", "serde", "serde_json", "test-log", - "thiserror 1.0.69", "time-source", "tokio", "tracing", @@ -5403,11 +5395,13 @@ dependencies = [ name = "kamu-accounts-sqlite" version = "0.209.0" dependencies = [ + "async-stream", "async-trait", "chrono", "database-common", "database-common-macros", "dill", + "futures", "internal-error", "kamu-accounts", "kamu-accounts-repo-tests", @@ -5415,31 +5409,38 @@ dependencies = [ "sqlx", "test-group", "test-log", - "thiserror 1.0.69", "tracing", "uuid", ] [[package]] -name = "kamu-adapter-auth-oso" +name = "kamu-adapter-auth-oso-rebac" version = "0.209.0" dependencies = [ "async-trait", + "database-common", "dill", + "futures", + "init-on-startup", "internal-error", - "kamu", "kamu-accounts", + "kamu-accounts-inmem", + "kamu-accounts-services", + "kamu-auth-rebac", + "kamu-auth-rebac-inmem", + "kamu-auth-rebac-services", "kamu-core", + "kamu-datasets", + "kamu-datasets-inmem", + "kamu-datasets-services", "messaging-outbox", "opendatafabric", "oso", - "oso-derive", - "tempfile", "test-log", + "thiserror 1.0.69", "time-source", "tokio", "tracing", - "tracing-subscriber", ] [[package]] @@ -5461,7 +5462,6 @@ dependencies = [ "tokio-stream", "tonic", "tracing", - "tracing-subscriber", "uuid", ] @@ -5473,7 +5473,6 @@ dependencies = [ "async-trait", "chrono", "container-runtime", - "cron", "database-common", "datafusion", "dill", @@ -5486,6 +5485,9 @@ dependencies = [ "kamu-accounts", "kamu-accounts-inmem", "kamu-accounts-services", + "kamu-auth-rebac", + "kamu-auth-rebac-inmem", + "kamu-auth-rebac-services", "kamu-core", "kamu-data-utils", "kamu-datasets", @@ -5512,7 +5514,6 @@ dependencies = [ "tokio", "tokio-stream", "tracing", - "tracing-subscriber", "url", "uuid", ] @@ -5542,7 +5543,6 @@ dependencies = [ "http 1.1.0", "http-body-util", "http-common", - "hyper 1.5.1", "indoc 2.0.5", "init-on-startup", "internal-error", @@ -5564,7 +5564,6 @@ dependencies = [ "serde", "serde_json", "serde_with", - "sha3", "strum", "tar", "tempfile", @@ -5579,7 +5578,6 @@ dependencies = [ "tower 0.5.1", "tower-http", "tracing", - "tracing-subscriber", "url", "utoipa", "utoipa-axum", @@ -5591,7 +5589,6 @@ name = "kamu-adapter-oauth" version = "0.209.0" dependencies = [ "async-trait", - "chrono", "dill", "http 1.1.0", "internal-error", @@ -5601,8 +5598,6 @@ dependencies = [ "serde", "serde_json", "thiserror 1.0.69", - "tokio", - "tracing", ] [[package]] @@ -5619,7 +5614,6 @@ dependencies = [ "futures", "http 1.1.0", "http-common", - "hyper 1.5.1", "indoc 2.0.5", "internal-error", "kamu", @@ -5628,9 +5622,7 @@ dependencies = [ "messaging-outbox", "opendatafabric", "pretty_assertions", - "quick-xml", "reqwest", - "serde", "tempfile", "test-group", "test-log", @@ -5683,7 +5675,6 @@ dependencies = [ "sqlx", "test-group", "test-log", - "tokio", ] [[package]] @@ -5692,7 +5683,6 @@ version = "0.209.0" dependencies = [ "dill", "kamu-auth-rebac", - "tokio", ] [[package]] @@ -5700,12 +5690,17 @@ name = "kamu-auth-rebac-services" version = "0.209.0" dependencies = [ "async-trait", + "database-common", "dill", "futures", + "init-on-startup", "internal-error", + "kamu-accounts", "kamu-auth-rebac", "kamu-auth-rebac-inmem", "kamu-core", + "kamu-datasets", + "kamu-datasets-services", "messaging-outbox", "opendatafabric", "serde_json", @@ -5728,7 +5723,6 @@ dependencies = [ "sqlx", "test-group", "test-log", - "tokio", ] [[package]] @@ -5741,7 +5735,6 @@ dependencies = [ "async-trait", "async-utils", "axum", - "axum-extra", "cfg-if", "chrono", "chrono-humanize", @@ -5763,7 +5756,6 @@ dependencies = [ "http 1.1.0", "http-common", "humansize", - "hyper 1.5.1", "indicatif", "indoc 2.0.5", "init-on-startup", @@ -5776,7 +5768,7 @@ dependencies = [ "kamu-accounts-postgres", "kamu-accounts-services", "kamu-accounts-sqlite", - "kamu-adapter-auth-oso", + "kamu-adapter-auth-oso-rebac", "kamu-adapter-flight-sql", "kamu-adapter-graphql", "kamu-adapter-http", @@ -5900,7 +5892,6 @@ dependencies = [ "indoc 2.0.5", "kamu-cli-e2e-common", "kamu-cli-e2e-repo-tests", - "paste", "test-group", "test-log", "tokio", @@ -5913,7 +5904,6 @@ dependencies = [ "indoc 2.0.5", "kamu-cli-e2e-common", "kamu-cli-e2e-repo-tests", - "paste", "sqlx", "test-group", "test-log", @@ -5927,7 +5917,6 @@ dependencies = [ "indoc 2.0.5", "kamu-cli-e2e-common", "kamu-cli-e2e-repo-tests", - "paste", "sqlx", "test-group", "test-log", @@ -5941,7 +5930,6 @@ dependencies = [ "chrono", "http-common", "indoc 2.0.5", - "internal-error", "kamu", "kamu-accounts", "kamu-adapter-http", @@ -5955,7 +5943,6 @@ dependencies = [ "reqwest", "serde_json", "tempfile", - "tokio", "url", ] @@ -5966,7 +5953,6 @@ dependencies = [ "indoc 2.0.5", "kamu-cli-e2e-common", "kamu-cli-e2e-repo-tests", - "paste", "sqlx", "test-group", "test-log", @@ -5996,7 +5982,6 @@ dependencies = [ name = "kamu-core" version = "0.209.0" dependencies = [ - "async-stream", "async-trait", "bytes", "chrono", @@ -6012,6 +5997,7 @@ dependencies = [ "mockall", "object_store", "opendatafabric", + "oso", "pathdiff", "pin-project", "serde", @@ -6019,7 +6005,6 @@ dependencies = [ "thiserror 1.0.69", "tokio", "tokio-stream", - "tracing", "url", "utoipa", ] @@ -6045,7 +6030,6 @@ dependencies = [ "thiserror 1.0.69", "tokio", "tracing", - "tracing-subscriber", "url", ] @@ -6057,9 +6041,6 @@ dependencies = [ "async-trait", "aws-config", "aws-credential-types", - "aws-sdk-sso", - "aws-sdk-ssooidc", - "aws-sdk-sts", "clap", "datafusion", "dirs", @@ -6067,7 +6048,6 @@ dependencies = [ "object_store", "parking_lot", "parquet", - "regex", "rustyline", "tokio", "url", @@ -6098,9 +6078,7 @@ dependencies = [ name = "kamu-datasets-inmem" version = "0.209.0" dependencies = [ - "async-stream", "async-trait", - "chrono", "database-common", "database-common-macros", "dill", @@ -6110,12 +6088,9 @@ dependencies = [ "kamu-datasets", "kamu-datasets-repo-tests", "opendatafabric", - "secrecy", "test-group", "test-log", - "thiserror 1.0.69", "tokio", - "tracing", "uuid", ] @@ -6125,7 +6100,6 @@ version = "0.209.0" dependencies = [ "async-stream", "async-trait", - "chrono", "database-common", "database-common-macros", "dill", @@ -6135,12 +6109,9 @@ dependencies = [ "kamu-datasets", "kamu-datasets-repo-tests", "opendatafabric", - "secrecy", "sqlx", "test-group", "test-log", - "thiserror 1.0.69", - "tracing", "uuid", ] @@ -6181,10 +6152,7 @@ dependencies = [ "opendatafabric", "pretty_assertions", "secrecy", - "serde", - "serde_json", "test-log", - "thiserror 1.0.69", "time-source", "tokio", "tracing", @@ -6197,7 +6165,6 @@ version = "0.209.0" dependencies = [ "async-stream", "async-trait", - "chrono", "database-common", "database-common-macros", "dill", @@ -6207,12 +6174,9 @@ dependencies = [ "kamu-datasets", "kamu-datasets-repo-tests", "opendatafabric", - "secrecy", "sqlx", "test-group", "test-log", - "thiserror 1.0.69", - "tracing", "uuid", ] @@ -6224,7 +6188,6 @@ dependencies = [ "chrono", "cron", "database-common", - "datafusion", "dill", "enum-variants", "event-sourcing", @@ -6241,38 +6204,26 @@ dependencies = [ "strum", "thiserror 1.0.69", "tokio-stream", - "tracing", - "url", ] [[package]] name = "kamu-flow-system-inmem" version = "0.209.0" dependencies = [ - "async-stream", "async-trait", "chrono", - "cron", "database-common", "database-common-macros", "dill", "futures", - "internal-error", "kamu-flow-system", "kamu-flow-system-repo-tests", - "kamu-task-system", "opendatafabric", - "serde", - "serde_with", - "tempfile", "test-group", "test-log", - "thiserror 1.0.69", "tokio", "tokio-stream", "tracing", - "tracing-subscriber", - "url", ] [[package]] @@ -6290,14 +6241,10 @@ dependencies = [ "kamu-flow-system", "kamu-flow-system-repo-tests", "opendatafabric", - "serde", "serde_json", "sqlx", "test-group", "test-log", - "tokio", - "tokio-stream", - "tracing", ] [[package]] @@ -6343,18 +6290,11 @@ dependencies = [ "observability", "opendatafabric", "pretty_assertions", - "serde", - "serde_json", - "serde_with", "tempfile", "test-log", - "thiserror 1.0.69", "time-source", "tokio", - "tokio-stream", "tracing", - "tracing-subscriber", - "url", ] [[package]] @@ -6372,14 +6312,10 @@ dependencies = [ "kamu-flow-system", "kamu-flow-system-repo-tests", "opendatafabric", - "serde", "serde_json", "sqlx", "test-group", "test-log", - "tokio", - "tokio-stream", - "tracing", ] [[package]] @@ -6390,7 +6326,6 @@ dependencies = [ "chrono", "criterion", "datafusion", - "digest 0.10.7", "futures", "geo-types", "geojson", @@ -6404,7 +6339,6 @@ dependencies = [ "rand", "serde", "serde_json", - "sha3", "shapefile", "tempfile", "test-group", @@ -6412,8 +6346,6 @@ dependencies = [ "thiserror 1.0.69", "tokio", "tracing", - "tracing-subscriber", - "url", "walkdir", "zip", ] @@ -6423,7 +6355,6 @@ name = "kamu-messaging-outbox-inmem" version = "0.209.0" dependencies = [ "async-trait", - "chrono", "database-common-macros", "dill", "internal-error", @@ -6431,10 +6362,8 @@ dependencies = [ "messaging-outbox", "test-group", "test-log", - "thiserror 1.0.69", "tokio", "tokio-stream", - "tracing", ] [[package]] @@ -6443,7 +6372,6 @@ version = "0.209.0" dependencies = [ "async-stream", "async-trait", - "chrono", "database-common", "database-common-macros", "dill", @@ -6454,10 +6382,6 @@ dependencies = [ "sqlx", "test-group", "test-log", - "thiserror 1.0.69", - "tokio", - "tracing", - "uuid", ] [[package]] @@ -6465,7 +6389,6 @@ name = "kamu-messaging-outbox-repo-tests" version = "0.209.0" dependencies = [ "chrono", - "database-common", "dill", "futures", "messaging-outbox", @@ -6480,7 +6403,6 @@ version = "0.209.0" dependencies = [ "async-stream", "async-trait", - "chrono", "database-common", "database-common-macros", "dill", @@ -6492,9 +6414,6 @@ dependencies = [ "sqlx", "test-group", "test-log", - "thiserror 1.0.69", - "tokio", - "tracing", ] [[package]] @@ -6536,12 +6455,10 @@ name = "kamu-task-system-inmem" version = "0.209.0" dependencies = [ "async-trait", - "chrono", "database-common", "database-common-macros", "dill", "futures", - "internal-error", "kamu-task-system", "kamu-task-system-repo-tests", "opendatafabric", @@ -6556,7 +6473,6 @@ version = "0.209.0" dependencies = [ "async-stream", "async-trait", - "chrono", "database-common", "database-common-macros", "dill", @@ -6569,8 +6485,6 @@ dependencies = [ "sqlx", "test-group", "test-log", - "tokio", - "tokio-stream", ] [[package]] @@ -6589,7 +6503,6 @@ dependencies = [ name = "kamu-task-system-services" version = "0.209.0" dependencies = [ - "async-stream", "async-trait", "chrono", "database-common", @@ -6609,7 +6522,6 @@ dependencies = [ "messaging-outbox", "mockall", "observability", - "opendatafabric", "serde_json", "tempfile", "test-log", @@ -6637,8 +6549,6 @@ dependencies = [ "sqlx", "test-group", "test-log", - "tokio", - "tokio-stream", ] [[package]] @@ -7609,14 +7519,11 @@ dependencies = [ "ed25519-dalek", "enum-variants", "flatbuffers", - "futures-core", - "hex", "indoc 2.0.5", "internal-error", "like", "multiformats", "prost", - "rand", "serde", "serde_json", "serde_with", diff --git a/Cargo.toml b/Cargo.toml index 0955d277f..421eaccb3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,7 +69,7 @@ members = [ "src/infra/messaging-outbox/postgres", "src/infra/messaging-outbox/sqlite", # Adapters - "src/adapter/auth-oso", + "src/adapter/auth-oso-rebac", "src/adapter/flight-sql", "src/adapter/graphql", "src/adapter/http", @@ -167,12 +167,12 @@ kamu-messaging-outbox-sqlite = { version = "0.209.0", path = "src/infra/messagin kamu-messaging-outbox-repo-tests = { version = "0.209.0", path = "src/infra/messaging-outbox/repo-tests", default-features = false } # Adapters -kamu-adapter-auth-oso = { version = "0.209.0", path = "src/adapter/auth-oso", default-features = false } +kamu-adapter-auth-oso-rebac = { version = "0.209.0", path = "src/adapter/auth-oso-rebac", default-features = false } kamu-adapter-flight-sql = { version = "0.209.0", path = "src/adapter/flight-sql", default-features = false } kamu-adapter-graphql = { version = "0.209.0", path = "src/adapter/graphql", default-features = false } kamu-adapter-http = { version = "0.209.0", path = "src/adapter/http", default-features = false } -kamu-adapter-odata = { version = "0.209.0", path = "src/adapter/odata", default-features = false } kamu-adapter-oauth = { version = "0.209.0", path = "src/adapter/oauth", default-features = false } +kamu-adapter-odata = { version = "0.209.0", path = "src/adapter/odata", default-features = false } # E2E kamu-cli-e2e-common = { version = "0.209.0", path = "src/e2e/app/cli/common", default-features = false } diff --git a/migrations/postgres/20241126095018_rebac_properties_reindexing.sql b/migrations/postgres/20241126095018_rebac_properties_reindexing.sql new file mode 100644 index 000000000..d4cf87a7b --- /dev/null +++ b/migrations/postgres/20241126095018_rebac_properties_reindexing.sql @@ -0,0 +1,3 @@ +-- Start re-indexing. +DELETE +FROM auth_rebac_properties; diff --git a/migrations/sqlite/20241126095018_rebac_properties_reindexing.sql b/migrations/sqlite/20241126095018_rebac_properties_reindexing.sql new file mode 100644 index 000000000..d4cf87a7b --- /dev/null +++ b/migrations/sqlite/20241126095018_rebac_properties_reindexing.sql @@ -0,0 +1,3 @@ +-- Start re-indexing. +DELETE +FROM auth_rebac_properties; diff --git a/resources/openapi-mt.json b/resources/openapi-mt.json index 32c361cb3..f48ca482b 100644 --- a/resources/openapi-mt.json +++ b/resources/openapi-mt.json @@ -1398,7 +1398,7 @@ ] }, "post": { - "description": "### Regular Queries\nThis endpoint lets you execute arbitrary SQL that can access multiple\ndatasets at once.\n\nExample request body:\n```json\n{\n \"query\": \"select event_time, from, to, close from \\\"kamu/eth-to-usd\\\"\",\n \"limit\": 3,\n \"queryDialect\": \"SqlDataFusion\",\n \"dataFormat\": \"JsonAoA\",\n \"schemaFormat\": \"ArrowJson\"\n}\n```\n\nExample response:\n```json\n{\n \"output\": {\n \"data\": [\n [\"2024-09-02T21:50:00Z\", \"eth\", \"usd\", 2537.07],\n [\"2024-09-02T21:51:00Z\", \"eth\", \"usd\", 2541.37],\n [\"2024-09-02T21:52:00Z\", \"eth\", \"usd\", 2542.66]\n ],\n \"dataFormat\": \"JsonAoA\",\n \"schema\": {\"fields\": [\"...\"]},\n \"schemaFormat\": \"ArrowJson\"\n }\n}\n```\n\n### Verifiable Queries\n[Cryptographic proofs](https://docs.kamu.dev/node/commitments) can be\nalso requested to hold the node **forever accountable** for the provided\nresult.\n\nExample request body:\n```json\n{\n \"query\": \"select event_time, from, to, close from \\\"kamu/eth-to-usd\\\"\",\n \"limit\": 3,\n \"queryDialect\": \"SqlDataFusion\",\n \"dataFormat\": \"JsonAoA\",\n \"schemaFormat\": \"ArrowJson\",\n \"include\": [\"proof\"]\n}\n```\n\nCurrently we support verifiability by ensuring that queries are\ndeterministic and fully reproducible and signing the original response with\nNode's private key. In future more types of proofs will be supported.\n\nExample response:\n```json\n{\n \"input\": {\n \"query\": \"select event_time, from, to, close from \\\"kamu/eth-to-usd\\\"\",\n \"queryDialect\": \"SqlDataFusion\",\n \"dataFormat\": \"JsonAoA\",\n \"include\": [\"Input\", \"Proof\", \"Schema\"],\n \"schemaFormat\": \"ArrowJson\",\n \"datasets\": [{\n \"id\": \"did:odf:fed0119d20360650afd3d412c6b11529778b784c697559c0107d37ee5da61465726c4\",\n \"alias\": \"kamu/eth-to-usd\",\n \"blockHash\": \"f1620708557a44c88d23c83f2b915abc10a41cc38d2a278e851e5dc6bb02b7e1f9a1a\"\n }],\n \"skip\": 0,\n \"limit\": 3\n },\n \"output\": {\n \"data\": [\n [\"2024-09-02T21:50:00Z\", \"eth\", \"usd\", 2537.07],\n [\"2024-09-02T21:51:00Z\", \"eth\", \"usd\", 2541.37],\n [\"2024-09-02T21:52:00Z\", \"eth\", \"usd\", 2542.66]\n ],\n \"dataFormat\": \"JsonAoA\",\n \"schema\": {\"fields\": [\"...\"]},\n \"schemaFormat\": \"ArrowJson\"\n },\n \"subQueries\": [],\n \"commitment\": {\n \"inputHash\": \"f1620e23f7d8cdde7504eadb86f3cdf34b3b1a7d71f10fe5b54b528dd803387422efc\",\n \"outputHash\": \"f1620e91f4d3fa26bc4ca0c49d681c8b630550239b64d3cbcfd7c6c2d6ff45998b088\",\n \"subQueriesHash\": \"f1620ca4510738395af1429224dd785675309c344b2b549632e20275c69b15ed1d210\"\n },\n \"proof\": {\n \"type\": \"Ed25519Signature2020\",\n \"verificationMethod\": \"did:key:z6MkkhJQPHpA41mTPLFgBeygnjeeADUSwuGDoF9pbGQsfwZp\",\n \"proofValue\": \"uJfY3_g03WbmqlQG8TL-WUxKYU8ZoJaP14MzOzbnJedNiu7jpoKnCTNnDI3TYuaXv89vKlirlGs-5AN06mBseCg\"\n }\n}\n```\n\nA client that gets a proof in response should\nperform [a few basic steps](https://docs.kamu.dev/node/commitments#response-validation) to validate\nthe proof integrity. For example making sure that the DID in\n`proof.verificationMethod` actually corresponds to the node you're querying\ndata from and that the signature in `proof.proofValue` is actually valid.\nOnly after this you can use this proof to hold the node accountable for the\nresult.\n\nA proof can be stored long-term and then disputed at a later point using\nyour own node or a 3rd party node you can trust via the\n[`/verify`](#tag/odf-query/POST/verify) endpoint.\n\nSee [commitments documentation](https://docs.kamu.dev/node/commitments) for details.", + "description": "### Regular Queries\nThis endpoint lets you execute arbitrary SQL that can access multiple\ndatasets at once.\n\nExample request body:\n```json\n{\n \"query\": \"select event_time, from, to, close from \\\"kamu/eth-to-usd\\\"\",\n \"limit\": 3,\n \"queryDialect\": \"SqlDataFusion\",\n \"dataFormat\": \"JsonAoA\",\n \"schemaFormat\": \"ArrowJson\"\n}\n```\n\nExample response:\n```json\n{\n \"output\": {\n \"data\": [\n [\"2024-09-02T21:50:00Z\", \"eth\", \"usd\", 2537.07],\n [\"2024-09-02T21:51:00Z\", \"eth\", \"usd\", 2541.37],\n [\"2024-09-02T21:52:00Z\", \"eth\", \"usd\", 2542.66]\n ],\n \"dataFormat\": \"JsonAoA\",\n \"schema\": {\"fields\": [\"...\"]},\n \"schemaFormat\": \"ArrowJson\"\n }\n}\n```\n\n### Verifiable Queries\n[Cryptographic proofs](https://docs.kamu.dev/node/commitments) can be\nalso requested to hold the node **forever accountable** for the provided\nresult.\n\nExample request body:\n```json\n{\n \"query\": \"select event_time, from, to, close from \\\"kamu/eth-to-usd\\\"\",\n \"limit\": 3,\n \"queryDialect\": \"SqlDataFusion\",\n \"dataFormat\": \"JsonAoA\",\n \"schemaFormat\": \"ArrowJson\",\n \"include\": [\"proof\"]\n}\n```\n\nCurrently, we support verifiability by ensuring that queries are\ndeterministic and fully reproducible and signing the original response with\nNode's private key. In future more types of proofs will be supported.\n\nExample response:\n```json\n{\n \"input\": {\n \"query\": \"select event_time, from, to, close from \\\"kamu/eth-to-usd\\\"\",\n \"queryDialect\": \"SqlDataFusion\",\n \"dataFormat\": \"JsonAoA\",\n \"include\": [\"Input\", \"Proof\", \"Schema\"],\n \"schemaFormat\": \"ArrowJson\",\n \"datasets\": [{\n \"id\": \"did:odf:fed0119d20360650afd3d412c6b11529778b784c697559c0107d37ee5da61465726c4\",\n \"alias\": \"kamu/eth-to-usd\",\n \"blockHash\": \"f1620708557a44c88d23c83f2b915abc10a41cc38d2a278e851e5dc6bb02b7e1f9a1a\"\n }],\n \"skip\": 0,\n \"limit\": 3\n },\n \"output\": {\n \"data\": [\n [\"2024-09-02T21:50:00Z\", \"eth\", \"usd\", 2537.07],\n [\"2024-09-02T21:51:00Z\", \"eth\", \"usd\", 2541.37],\n [\"2024-09-02T21:52:00Z\", \"eth\", \"usd\", 2542.66]\n ],\n \"dataFormat\": \"JsonAoA\",\n \"schema\": {\"fields\": [\"...\"]},\n \"schemaFormat\": \"ArrowJson\"\n },\n \"subQueries\": [],\n \"commitment\": {\n \"inputHash\": \"f1620e23f7d8cdde7504eadb86f3cdf34b3b1a7d71f10fe5b54b528dd803387422efc\",\n \"outputHash\": \"f1620e91f4d3fa26bc4ca0c49d681c8b630550239b64d3cbcfd7c6c2d6ff45998b088\",\n \"subQueriesHash\": \"f1620ca4510738395af1429224dd785675309c344b2b549632e20275c69b15ed1d210\"\n },\n \"proof\": {\n \"type\": \"Ed25519Signature2020\",\n \"verificationMethod\": \"did:key:z6MkkhJQPHpA41mTPLFgBeygnjeeADUSwuGDoF9pbGQsfwZp\",\n \"proofValue\": \"uJfY3_g03WbmqlQG8TL-WUxKYU8ZoJaP14MzOzbnJedNiu7jpoKnCTNnDI3TYuaXv89vKlirlGs-5AN06mBseCg\"\n }\n}\n```\n\nA client that gets a proof in response should\nperform [a few basic steps](https://docs.kamu.dev/node/commitments#response-validation) to validate\nthe proof integrity. For example making sure that the DID in\n`proof.verificationMethod` actually corresponds to the node you're querying\ndata from and that the signature in `proof.proofValue` is actually valid.\nOnly after this you can use this proof to hold the node accountable for the\nresult.\n\nA proof can be stored long-term and then disputed at a later point using\nyour own node or a 3rd party node you can trust via the\n[`/verify`](#tag/odf-query/POST/verify) endpoint.\n\nSee [commitments documentation](https://docs.kamu.dev/node/commitments) for details.", "operationId": "query_handler_post", "requestBody": { "content": { diff --git a/resources/openapi.json b/resources/openapi.json index e6ff20989..7c0d45455 100644 --- a/resources/openapi.json +++ b/resources/openapi.json @@ -1367,7 +1367,7 @@ ] }, "post": { - "description": "### Regular Queries\nThis endpoint lets you execute arbitrary SQL that can access multiple\ndatasets at once.\n\nExample request body:\n```json\n{\n \"query\": \"select event_time, from, to, close from \\\"kamu/eth-to-usd\\\"\",\n \"limit\": 3,\n \"queryDialect\": \"SqlDataFusion\",\n \"dataFormat\": \"JsonAoA\",\n \"schemaFormat\": \"ArrowJson\"\n}\n```\n\nExample response:\n```json\n{\n \"output\": {\n \"data\": [\n [\"2024-09-02T21:50:00Z\", \"eth\", \"usd\", 2537.07],\n [\"2024-09-02T21:51:00Z\", \"eth\", \"usd\", 2541.37],\n [\"2024-09-02T21:52:00Z\", \"eth\", \"usd\", 2542.66]\n ],\n \"dataFormat\": \"JsonAoA\",\n \"schema\": {\"fields\": [\"...\"]},\n \"schemaFormat\": \"ArrowJson\"\n }\n}\n```\n\n### Verifiable Queries\n[Cryptographic proofs](https://docs.kamu.dev/node/commitments) can be\nalso requested to hold the node **forever accountable** for the provided\nresult.\n\nExample request body:\n```json\n{\n \"query\": \"select event_time, from, to, close from \\\"kamu/eth-to-usd\\\"\",\n \"limit\": 3,\n \"queryDialect\": \"SqlDataFusion\",\n \"dataFormat\": \"JsonAoA\",\n \"schemaFormat\": \"ArrowJson\",\n \"include\": [\"proof\"]\n}\n```\n\nCurrently we support verifiability by ensuring that queries are\ndeterministic and fully reproducible and signing the original response with\nNode's private key. In future more types of proofs will be supported.\n\nExample response:\n```json\n{\n \"input\": {\n \"query\": \"select event_time, from, to, close from \\\"kamu/eth-to-usd\\\"\",\n \"queryDialect\": \"SqlDataFusion\",\n \"dataFormat\": \"JsonAoA\",\n \"include\": [\"Input\", \"Proof\", \"Schema\"],\n \"schemaFormat\": \"ArrowJson\",\n \"datasets\": [{\n \"id\": \"did:odf:fed0119d20360650afd3d412c6b11529778b784c697559c0107d37ee5da61465726c4\",\n \"alias\": \"kamu/eth-to-usd\",\n \"blockHash\": \"f1620708557a44c88d23c83f2b915abc10a41cc38d2a278e851e5dc6bb02b7e1f9a1a\"\n }],\n \"skip\": 0,\n \"limit\": 3\n },\n \"output\": {\n \"data\": [\n [\"2024-09-02T21:50:00Z\", \"eth\", \"usd\", 2537.07],\n [\"2024-09-02T21:51:00Z\", \"eth\", \"usd\", 2541.37],\n [\"2024-09-02T21:52:00Z\", \"eth\", \"usd\", 2542.66]\n ],\n \"dataFormat\": \"JsonAoA\",\n \"schema\": {\"fields\": [\"...\"]},\n \"schemaFormat\": \"ArrowJson\"\n },\n \"subQueries\": [],\n \"commitment\": {\n \"inputHash\": \"f1620e23f7d8cdde7504eadb86f3cdf34b3b1a7d71f10fe5b54b528dd803387422efc\",\n \"outputHash\": \"f1620e91f4d3fa26bc4ca0c49d681c8b630550239b64d3cbcfd7c6c2d6ff45998b088\",\n \"subQueriesHash\": \"f1620ca4510738395af1429224dd785675309c344b2b549632e20275c69b15ed1d210\"\n },\n \"proof\": {\n \"type\": \"Ed25519Signature2020\",\n \"verificationMethod\": \"did:key:z6MkkhJQPHpA41mTPLFgBeygnjeeADUSwuGDoF9pbGQsfwZp\",\n \"proofValue\": \"uJfY3_g03WbmqlQG8TL-WUxKYU8ZoJaP14MzOzbnJedNiu7jpoKnCTNnDI3TYuaXv89vKlirlGs-5AN06mBseCg\"\n }\n}\n```\n\nA client that gets a proof in response should\nperform [a few basic steps](https://docs.kamu.dev/node/commitments#response-validation) to validate\nthe proof integrity. For example making sure that the DID in\n`proof.verificationMethod` actually corresponds to the node you're querying\ndata from and that the signature in `proof.proofValue` is actually valid.\nOnly after this you can use this proof to hold the node accountable for the\nresult.\n\nA proof can be stored long-term and then disputed at a later point using\nyour own node or a 3rd party node you can trust via the\n[`/verify`](#tag/odf-query/POST/verify) endpoint.\n\nSee [commitments documentation](https://docs.kamu.dev/node/commitments) for details.", + "description": "### Regular Queries\nThis endpoint lets you execute arbitrary SQL that can access multiple\ndatasets at once.\n\nExample request body:\n```json\n{\n \"query\": \"select event_time, from, to, close from \\\"kamu/eth-to-usd\\\"\",\n \"limit\": 3,\n \"queryDialect\": \"SqlDataFusion\",\n \"dataFormat\": \"JsonAoA\",\n \"schemaFormat\": \"ArrowJson\"\n}\n```\n\nExample response:\n```json\n{\n \"output\": {\n \"data\": [\n [\"2024-09-02T21:50:00Z\", \"eth\", \"usd\", 2537.07],\n [\"2024-09-02T21:51:00Z\", \"eth\", \"usd\", 2541.37],\n [\"2024-09-02T21:52:00Z\", \"eth\", \"usd\", 2542.66]\n ],\n \"dataFormat\": \"JsonAoA\",\n \"schema\": {\"fields\": [\"...\"]},\n \"schemaFormat\": \"ArrowJson\"\n }\n}\n```\n\n### Verifiable Queries\n[Cryptographic proofs](https://docs.kamu.dev/node/commitments) can be\nalso requested to hold the node **forever accountable** for the provided\nresult.\n\nExample request body:\n```json\n{\n \"query\": \"select event_time, from, to, close from \\\"kamu/eth-to-usd\\\"\",\n \"limit\": 3,\n \"queryDialect\": \"SqlDataFusion\",\n \"dataFormat\": \"JsonAoA\",\n \"schemaFormat\": \"ArrowJson\",\n \"include\": [\"proof\"]\n}\n```\n\nCurrently, we support verifiability by ensuring that queries are\ndeterministic and fully reproducible and signing the original response with\nNode's private key. In future more types of proofs will be supported.\n\nExample response:\n```json\n{\n \"input\": {\n \"query\": \"select event_time, from, to, close from \\\"kamu/eth-to-usd\\\"\",\n \"queryDialect\": \"SqlDataFusion\",\n \"dataFormat\": \"JsonAoA\",\n \"include\": [\"Input\", \"Proof\", \"Schema\"],\n \"schemaFormat\": \"ArrowJson\",\n \"datasets\": [{\n \"id\": \"did:odf:fed0119d20360650afd3d412c6b11529778b784c697559c0107d37ee5da61465726c4\",\n \"alias\": \"kamu/eth-to-usd\",\n \"blockHash\": \"f1620708557a44c88d23c83f2b915abc10a41cc38d2a278e851e5dc6bb02b7e1f9a1a\"\n }],\n \"skip\": 0,\n \"limit\": 3\n },\n \"output\": {\n \"data\": [\n [\"2024-09-02T21:50:00Z\", \"eth\", \"usd\", 2537.07],\n [\"2024-09-02T21:51:00Z\", \"eth\", \"usd\", 2541.37],\n [\"2024-09-02T21:52:00Z\", \"eth\", \"usd\", 2542.66]\n ],\n \"dataFormat\": \"JsonAoA\",\n \"schema\": {\"fields\": [\"...\"]},\n \"schemaFormat\": \"ArrowJson\"\n },\n \"subQueries\": [],\n \"commitment\": {\n \"inputHash\": \"f1620e23f7d8cdde7504eadb86f3cdf34b3b1a7d71f10fe5b54b528dd803387422efc\",\n \"outputHash\": \"f1620e91f4d3fa26bc4ca0c49d681c8b630550239b64d3cbcfd7c6c2d6ff45998b088\",\n \"subQueriesHash\": \"f1620ca4510738395af1429224dd785675309c344b2b549632e20275c69b15ed1d210\"\n },\n \"proof\": {\n \"type\": \"Ed25519Signature2020\",\n \"verificationMethod\": \"did:key:z6MkkhJQPHpA41mTPLFgBeygnjeeADUSwuGDoF9pbGQsfwZp\",\n \"proofValue\": \"uJfY3_g03WbmqlQG8TL-WUxKYU8ZoJaP14MzOzbnJedNiu7jpoKnCTNnDI3TYuaXv89vKlirlGs-5AN06mBseCg\"\n }\n}\n```\n\nA client that gets a proof in response should\nperform [a few basic steps](https://docs.kamu.dev/node/commitments#response-validation) to validate\nthe proof integrity. For example making sure that the DID in\n`proof.verificationMethod` actually corresponds to the node you're querying\ndata from and that the signature in `proof.proofValue` is actually valid.\nOnly after this you can use this proof to hold the node accountable for the\nresult.\n\nA proof can be stored long-term and then disputed at a later point using\nyour own node or a 3rd party node you can trust via the\n[`/verify`](#tag/odf-query/POST/verify) endpoint.\n\nSee [commitments documentation](https://docs.kamu.dev/node/commitments) for details.", "operationId": "query_handler_post", "requestBody": { "content": { diff --git a/resources/schema.gql b/resources/schema.gql index 1dfc33404..c987dd9bd 100644 --- a/resources/schema.gql +++ b/resources/schema.gql @@ -718,6 +718,10 @@ type DatasetMut { Manually advances the watermark of a root dataset """ setWatermark(watermark: DateTime!): SetWatermarkResult! + """ + Set visibility for the dataset + """ + setVisibility(visibility: DatasetVisibilityInput!): SetDatasetPropertyResultSuccess! } scalar DatasetName @@ -764,6 +768,11 @@ enum DatasetVisibility { PUBLIC } +input DatasetVisibilityInput @oneOf { + private: PrivateDatasetVisibilityInput + public: PublicDatasetVisibilityInput +} + type Datasets { """ Returns dataset by its ID @@ -1515,11 +1524,19 @@ type PrepStepPipe { command: [String!]! } +input PrivateDatasetVisibilityInput { + dummy: String +} + input PropagationMode @oneOf { custom: FlowConfigurationResetCustom toSeed: FlowConfigurationResetToSeedDummy } +input PublicDatasetVisibilityInput { + anonymousAvailable: Boolean! +} + type Query { """ Returns the version of the GQL API @@ -1727,6 +1744,11 @@ type SetDataSchema { schema: DataSchema! } +type SetDatasetPropertyResultSuccess { + dummy: String + message: String! +} + interface SetFlowCompactionConfigResult { message: String! } diff --git a/src/adapter/auth-oso-rebac/Cargo.toml b/src/adapter/auth-oso-rebac/Cargo.toml new file mode 100644 index 000000000..0929b23aa --- /dev/null +++ b/src/adapter/auth-oso-rebac/Cargo.toml @@ -0,0 +1,53 @@ +[package] +name = "kamu-adapter-auth-oso-rebac" +description = "Authorization adapter for Kamu domain based on OSO library & ReBAC properties" +version = { workspace = true } +homepage = { workspace = true } +repository = { workspace = true } +authors = { workspace = true } +readme = { workspace = true } +license-file = { workspace = true } +keywords = { workspace = true } +include = { workspace = true } +edition = { workspace = true } +publish = { workspace = true } + + +[lints] +workspace = true + + +[lib] +doctest = false + +[dependencies] +database-common = { workspace = true } +init-on-startup = { workspace = true } +internal-error = { workspace = true } +kamu-accounts = { workspace = true } +kamu-auth-rebac = { workspace = true } +kamu-auth-rebac-services = { workspace = true } +kamu-core = { workspace = true, default-features = false, features = ["oso"] } +kamu-datasets = { workspace = true } +messaging-outbox = { workspace = true } +opendatafabric = { workspace = true } + +async-trait = "0.1" +dill = "0.9" +futures = { version = "0.3", default-features = false } +oso = { version = "0.27", default-features = false, features = ["derive"] } +thiserror = { version = "1", default-features = false } +tokio = { version = "1", default-features = false, features = ["macros"] } +tracing = { version = "0.1", default-features = false } + +[dev-dependencies] +kamu-accounts-inmem = { workspace = true } +kamu-accounts-services = { workspace = true } +kamu-auth-rebac-inmem = { workspace = true } +kamu-auth-rebac-services = { workspace = true } +kamu-core = { workspace = true, default-features = false, features = ["oso", "testing"] } +kamu-datasets-inmem = { workspace = true } +kamu-datasets-services = { workspace = true } +time-source = { workspace = true } + +test-log = { version = "0.2", features = ["trace"] } diff --git a/src/adapter/auth-oso/src/dataset_resource.rs b/src/adapter/auth-oso-rebac/src/dataset_resource.rs similarity index 71% rename from src/adapter/auth-oso/src/dataset_resource.rs rename to src/adapter/auth-oso-rebac/src/dataset_resource.rs index 3ca2f861b..7aa4db3f0 100644 --- a/src/adapter/auth-oso/src/dataset_resource.rs +++ b/src/adapter/auth-oso-rebac/src/dataset_resource.rs @@ -9,6 +9,7 @@ use std::collections::HashMap; +use opendatafabric as odf; use oso::PolarClass; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -19,7 +20,7 @@ const ROLE_EDITOR: &str = "Editor"; #[derive(PolarClass, Debug, Clone)] pub struct DatasetResource { #[polar(attribute)] - pub created_by: String, + pub owner_account_id: String, #[polar(attribute)] pub allows_public_read: bool, #[polar(attribute)] @@ -29,24 +30,23 @@ pub struct DatasetResource { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// impl DatasetResource { - pub fn new(created_by: &str, allows_public_read: bool) -> Self { + pub fn new(owner_account_id: &odf::AccountID, allows_public_read: bool) -> Self { Self { - created_by: created_by.to_string(), + owner_account_id: owner_account_id.to_string(), allows_public_read, authorized_users: HashMap::new(), } } - #[allow(dead_code)] - pub fn authorize_reader(&mut self, reader: &str) { + // TODO: Private Datasets: use for relations + pub fn authorize_reader(&mut self, reader_account_id: &odf::AccountID) { self.authorized_users - .insert(reader.to_string(), ROLE_READER); + .insert(reader_account_id.to_string(), ROLE_READER); } - #[allow(dead_code)] - pub fn authorize_editor(&mut self, editor: &str) { + pub fn authorize_editor(&mut self, editor_account_id: &odf::AccountID) { self.authorized_users - .insert(editor.to_string(), ROLE_EDITOR); + .insert(editor_account_id.to_string(), ROLE_EDITOR); } } @@ -56,8 +56,8 @@ impl std::fmt::Display for DatasetResource { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, - "Dataset(created_by='{}', allows_public_read={}, num_authorizations={})", - &self.created_by, + "Dataset(owner_account_id='{}', allows_public_read={}, num_authorizations={})", + &self.owner_account_id, self.allows_public_read, self.authorized_users.len(), ) diff --git a/src/adapter/auth-oso-rebac/src/dependencies.rs b/src/adapter/auth-oso-rebac/src/dependencies.rs new file mode 100644 index 000000000..7d88e90cb --- /dev/null +++ b/src/adapter/auth-oso-rebac/src/dependencies.rs @@ -0,0 +1,22 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use dill::CatalogBuilder; + +use crate::*; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub fn register_dependencies(catalog_builder: &mut CatalogBuilder) { + catalog_builder.add::(); + catalog_builder.add::(); + catalog_builder.add::(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/auth-oso/src/kamu_auth_oso.rs b/src/adapter/auth-oso-rebac/src/kamu_auth_oso.rs similarity index 64% rename from src/adapter/auth-oso/src/kamu_auth_oso.rs rename to src/adapter/auth-oso-rebac/src/kamu_auth_oso.rs index f0047e80e..0264c53b4 100644 --- a/src/adapter/auth-oso/src/kamu_auth_oso.rs +++ b/src/adapter/auth-oso-rebac/src/kamu_auth_oso.rs @@ -7,6 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use std::ops::Deref; use std::sync::Arc; use dill::component; @@ -21,6 +22,8 @@ pub struct KamuAuthOso { pub oso: Arc, } +// TODO: Private Datasets: move from stateless component to stateful +// (do not parse Polar every time) #[component(pub)] impl KamuAuthOso { pub fn new() -> Self { @@ -35,11 +38,16 @@ impl KamuAuthOso { } fn load_oso() -> Result { + // TODO: Private Datasets: make a patch for OSO: + // - remove extra allocations (check tracing logs) + // - add removing/updating for cached instances let mut oso = Oso::new(); oso.register_class(DatasetResource::get_polar_class())?; oso.register_class(UserActor::get_polar_class())?; + // TODO: Private Datasets: add Polar-related tests: + // https://www.osohq.com/docs/modeling-in-polar/conceptual-overview/test#policy-tests oso.load_str(include_str!("schema.polar"))?; Ok(oso) @@ -47,3 +55,13 @@ impl KamuAuthOso { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +impl Deref for KamuAuthOso { + type Target = Arc; + + fn deref(&self) -> &Self::Target { + &self.oso + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/auth-oso-rebac/src/lib.rs b/src/adapter/auth-oso-rebac/src/lib.rs new file mode 100644 index 000000000..0ddb42220 --- /dev/null +++ b/src/adapter/auth-oso-rebac/src/lib.rs @@ -0,0 +1,24 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +#![feature(lint_reasons)] + +mod dataset_resource; +mod dependencies; +mod kamu_auth_oso; +mod oso_dataset_authorizer; +mod oso_resource_service_impl; +mod user_actor; + +pub use dataset_resource::*; +pub use dependencies::*; +pub use kamu_auth_oso::*; +pub use oso_dataset_authorizer::*; +pub use oso_resource_service_impl::*; +pub use user_actor::*; diff --git a/src/adapter/auth-oso-rebac/src/oso_dataset_authorizer.rs b/src/adapter/auth-oso-rebac/src/oso_dataset_authorizer.rs new file mode 100644 index 000000000..113549ed5 --- /dev/null +++ b/src/adapter/auth-oso-rebac/src/oso_dataset_authorizer.rs @@ -0,0 +1,240 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + +use dill::*; +use internal_error::{ErrorIntoInternal, InternalError, ResultIntoInternal}; +use kamu_accounts::CurrentAccountSubject; +use kamu_core::auth::*; +use kamu_core::AccessError; +use opendatafabric as odf; +use tokio::try_join; + +use crate::dataset_resource::*; +use crate::user_actor::*; +use crate::{KamuAuthOso, OsoResourceServiceImpl}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct OsoDatasetAuthorizer { + kamu_auth_oso: Arc, + current_account_subject: Arc, + oso_resource_service: Arc, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[component(pub)] +#[interface(dyn DatasetActionAuthorizer)] +impl OsoDatasetAuthorizer { + pub fn new( + kamu_auth_oso: Arc, + current_account_subject: Arc, + oso_resource_service: Arc, + ) -> Self { + Self { + kamu_auth_oso, + current_account_subject, + oso_resource_service, + } + } + + async fn user_actor(&self) -> Result { + let maybe_account_id = self.get_maybe_logged_account_id(); + + let user_actor = self + .oso_resource_service + .user_actor(maybe_account_id) + .await + .int_err()?; + + Ok(user_actor) + } + + async fn dataset_resource( + &self, + dataset_handle: &odf::DatasetHandle, + ) -> Result { + let dataset_id = &dataset_handle.id; + + let dataset_resource = self + .oso_resource_service + .dataset_resource(dataset_id) + .await + .int_err()?; + + Ok(dataset_resource) + } + + fn get_maybe_logged_account_id(&self) -> Option<&odf::AccountID> { + match self.current_account_subject.as_ref() { + CurrentAccountSubject::Anonymous(_) => None, + CurrentAccountSubject::Logged(logged_account) => Some(&logged_account.account_id), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl DatasetActionAuthorizer for OsoDatasetAuthorizer { + #[tracing::instrument(level = "debug", skip_all, fields(%dataset_handle, ?action))] + async fn check_action_allowed( + &self, + dataset_handle: &odf::DatasetHandle, + action: DatasetAction, + ) -> Result<(), DatasetActionUnauthorizedError> { + let (user_actor, dataset_resource) = + try_join!(self.user_actor(), self.dataset_resource(dataset_handle))?; + + match self + .kamu_auth_oso + .is_allowed(user_actor, action.to_string(), dataset_resource) + { + Ok(allowed) if allowed => Ok(()), + Ok(_not_allowed) => Err(DatasetActionUnauthorizedError::Access( + AccessError::Forbidden( + DatasetActionNotEnoughPermissionsError { + action, + dataset_ref: dataset_handle.as_local_ref(), + } + .into(), + ), + )), + Err(e) => Err(DatasetActionUnauthorizedError::Internal(e.int_err())), + } + } + + #[tracing::instrument(level = "debug", skip_all, fields(%dataset_handle))] + async fn get_allowed_actions( + &self, + dataset_handle: &odf::DatasetHandle, + ) -> Result, InternalError> { + let (user_actor, dataset_resource) = + try_join!(self.user_actor(), self.dataset_resource(dataset_handle))?; + + self.kamu_auth_oso + .get_allowed_actions(user_actor, dataset_resource) + .int_err() + } + + #[tracing::instrument(level = "debug", skip_all, fields(dataset_handles=?dataset_handles, action=%action))] + async fn filter_datasets_allowing( + &self, + dataset_handles: Vec, + action: DatasetAction, + ) -> Result, InternalError> { + let user_actor = self.user_actor().await?; + let mut matched_dataset_handles = Vec::with_capacity(dataset_handles.len()); + + let dataset_ids = dataset_handles + .iter() + .map(|hdl| hdl.id.clone()) + .collect::>(); + let dataset_resources_resolution = self + .oso_resource_service + .get_multiple_dataset_resources(&dataset_ids) + .await + .int_err()?; + let mut dataset_handle_id_mapping = + dataset_handles + .into_iter() + .fold(HashMap::new(), |mut acc, hdl| { + acc.insert(hdl.id.clone(), hdl); + acc + }); + + for (dataset_id, dataset_resource) in dataset_resources_resolution.resolved_resources { + let is_allowed = self + .kamu_auth_oso + .is_allowed(user_actor.clone(), action, dataset_resource) + .int_err()?; + + if is_allowed { + let dataset_handle = dataset_handle_id_mapping + // Thus we obtain the value without cloning + .remove(&dataset_id) + .ok_or_else(|| { + format!("Unexpectedly, dataset_handle was found: {dataset_id}").int_err() + })?; + + matched_dataset_handles.push(dataset_handle); + } + } + + Ok(matched_dataset_handles) + } + + #[tracing::instrument(level = "debug", skip_all, fields(dataset_handles=?dataset_handles, action=%action))] + async fn classify_datasets_by_allowance( + &self, + dataset_handles: Vec, + action: DatasetAction, + ) -> Result { + let user_actor = self.user_actor().await?; + let mut matched_dataset_handles = Vec::with_capacity(dataset_handles.len()); + let mut unmatched_results = Vec::new(); + + let dataset_ids = dataset_handles + .iter() + .map(|hdl| hdl.id.clone()) + .collect::>(); + let dataset_resources_resolution = self + .oso_resource_service + .get_multiple_dataset_resources(&dataset_ids) + .await + .int_err()?; + let mut dataset_handle_id_mapping = + dataset_handles + .into_iter() + .fold(HashMap::new(), |mut acc, hdl| { + acc.insert(hdl.id.clone(), hdl); + acc + }); + + for (dataset_id, dataset_resource) in dataset_resources_resolution.resolved_resources { + let dataset_handle = dataset_handle_id_mapping + // Thus we obtain the value without cloning + .remove(&dataset_id) + .ok_or_else(|| { + format!("Unexpectedly, dataset_handle was found: {dataset_id}").int_err() + })?; + + let is_allowed = self + .kamu_auth_oso + .is_allowed(user_actor.clone(), action, dataset_resource) + .int_err()?; + + if is_allowed { + matched_dataset_handles.push(dataset_handle); + } else { + let dataset_ref = dataset_handle.as_local_ref(); + unmatched_results.push(( + dataset_handle, + DatasetActionUnauthorizedError::Access(AccessError::Forbidden( + DatasetActionNotEnoughPermissionsError { + action, + dataset_ref, + } + .into(), + )), + )); + } + } + + Ok(ClassifyByAllowanceResponse { + authorized_handles: matched_dataset_handles, + unauthorized_handles_with_errors: unmatched_results, + }) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/auth-oso-rebac/src/oso_resource_service_impl.rs b/src/adapter/auth-oso-rebac/src/oso_resource_service_impl.rs new file mode 100644 index 000000000..cc4287e21 --- /dev/null +++ b/src/adapter/auth-oso-rebac/src/oso_resource_service_impl.rs @@ -0,0 +1,270 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::collections::HashMap; +use std::sync::Arc; + +use database_common::{EntityPageListing, EntityPageStreamer}; +use dill::*; +use internal_error::{ErrorIntoInternal, InternalError, ResultIntoInternal}; +use kamu_accounts::{AccountNotFoundByIdError, AccountRepository, GetAccountByIdError}; +use kamu_auth_rebac::RebacService; +use kamu_datasets::{ + DatasetEntriesResolution, + DatasetEntryNotFoundError, + DatasetEntryRepository, + GetDatasetEntryError, +}; +use opendatafabric as odf; +use thiserror::Error; +use tokio::sync::RwLock; + +use crate::{DatasetResource, UserActor}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +type EntityId = String; + +#[derive(Debug, Default)] +struct State { + user_actor_cache_map: HashMap, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// TODO: Private Datasets: add Service trait? +pub struct OsoResourceServiceImpl { + state: RwLock, + dataset_entry_repo: Arc, + rebac_service: Arc, + account_repo: Arc, +} + +#[component(pub)] +// TODO: Private Datasets: This service should be a singleton +// Alternative: put the state into a separate component +// #[scope(Singleton)] +impl OsoResourceServiceImpl { + pub fn new( + dataset_entry_repo: Arc, + rebac_service: Arc, + account_repo: Arc, + ) -> Self { + Self { + state: RwLock::new(State::default()), + dataset_entry_repo, + rebac_service, + account_repo, + } + } + + pub async fn user_actor( + &self, + maybe_account_id: Option<&odf::AccountID>, + ) -> Result { + let Some(account_id) = maybe_account_id else { + return Ok(UserActor::anonymous()); + }; + + // First, an attempt to get from the cache + { + let readable_state = self.state.read().await; + + let account_id_stack = account_id.as_did_str().to_stack_string(); + let maybe_cached_user_actor = readable_state + .user_actor_cache_map + .get(account_id_stack.as_str()) + .cloned(); + + if let Some(cached_user_actor) = maybe_cached_user_actor { + return Ok(cached_user_actor); + } + } + + // The second attempt is from the database + let user_actor = { + let account = match self.account_repo.get_account_by_id(account_id).await { + Ok(found_account) => found_account, + Err(e) => return Err(e.into()), + }; + + let account_properties = self + .rebac_service + .get_account_properties(&account.id) + .await + .int_err()?; + + UserActor::logged(&account.id, account_properties.is_admin) + }; + + // Lastly, caching + let mut writable_state = self.state.write().await; + + writable_state + .user_actor_cache_map + .insert(user_actor.account_id.clone(), user_actor.clone()); + + Ok(user_actor) + } + + pub async fn dataset_resource( + &self, + dataset_id: &odf::DatasetID, + ) -> Result { + let dataset_entry = match self.dataset_entry_repo.get_dataset_entry(dataset_id).await { + Ok(found_dataset_entry) => found_dataset_entry, + Err(e) => return Err(e.into()), + }; + let dataset_properties = self + .rebac_service + .get_dataset_properties(&dataset_entry.id) + .await + .int_err()?; + + let dataset_resource = DatasetResource::new( + &dataset_entry.owner_id, + dataset_properties.allows_public_read, + ); + + Ok(dataset_resource) + } + + pub async fn get_multiple_dataset_resources( + &self, + dataset_ids: &[odf::DatasetID], + ) -> Result { + let DatasetEntriesResolution { + resolved_entries, + unresolved_entries, + } = self + .dataset_entry_repo + .get_multiple_dataset_entries(dataset_ids) + .await + .int_err()?; + + let dataset_resources_stream = EntityPageStreamer::default().into_stream( + || async { Ok(Arc::new(resolved_entries)) }, + |dataset_entries, pagination| { + let dataset_entries_page = dataset_entries + .iter() + .skip(pagination.offset) + .take(pagination.safe_limit(dataset_entries.len())) + .collect::>(); + + let dataset_id_owner_id_mapping = + dataset_entries_page + .iter() + .fold(HashMap::new(), |mut acc, dataset_entry| { + acc.insert(dataset_entry.id.clone(), dataset_entry.owner_id.clone()); + acc + }); + let dataset_ids = dataset_entries_page + .iter() + .map(|dataset_entry| dataset_entry.id.clone()) + .collect::>(); + + async move { + let dataset_properties_map = self + .rebac_service + .get_dataset_properties_by_ids(&dataset_ids) + .await + .int_err()?; + + let mut dataset_resources = Vec::with_capacity(dataset_properties_map.len()); + + for (dataset_id, dataset_properties) in dataset_properties_map { + let owner_id = + dataset_id_owner_id_mapping + .get(&dataset_id) + .ok_or_else(|| { + format!("Unexpectedly, owner_id not found: {dataset_id}") + .int_err() + })?; + + let dataset_resource = + DatasetResource::new(owner_id, dataset_properties.allows_public_read); + + dataset_resources.push((dataset_id, dataset_resource)); + } + + Ok(EntityPageListing { + list: dataset_resources, + total_count: dataset_entries.len(), + }) + } + }, + ); + + use futures::TryStreamExt; + + Ok(DatasetResourcesResolution { + resolved_resources: dataset_resources_stream.try_collect().await?, + unresolved_resources: unresolved_entries, + }) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct DatasetResourcesResolution { + pub resolved_resources: Vec<(odf::DatasetID, DatasetResource)>, + pub unresolved_resources: Vec, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Errors +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Error, Debug)] +pub enum GetUserActorError { + #[error(transparent)] + NotFound(#[from] AccountNotFoundByIdError), + + #[error(transparent)] + Internal(#[from] InternalError), +} + +impl From for GetUserActorError { + fn from(err: GetAccountByIdError) -> Self { + match err { + GetAccountByIdError::NotFound(e) => Self::NotFound(e), + GetAccountByIdError::Internal(e) => Self::Internal(e), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Error, Debug)] +pub enum GetDatasetResourceError { + #[error(transparent)] + NotFound(#[from] DatasetEntryNotFoundError), + + #[error(transparent)] + Internal(#[from] InternalError), +} + +impl From for GetDatasetResourceError { + fn from(err: GetDatasetEntryError) -> Self { + match err { + GetDatasetEntryError::NotFound(e) => Self::NotFound(e), + GetDatasetEntryError::Internal(e) => Self::Internal(e), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Error, Debug)] +pub enum GetMultipleDatasetResourcesError { + #[error(transparent)] + Internal(#[from] InternalError), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/auth-oso/src/schema.polar b/src/adapter/auth-oso-rebac/src/schema.polar similarity index 55% rename from src/adapter/auth-oso/src/schema.polar rename to src/adapter/auth-oso-rebac/src/schema.polar index f5db4ee24..7dff23966 100644 --- a/src/adapter/auth-oso/src/schema.polar +++ b/src/adapter/auth-oso-rebac/src/schema.polar @@ -7,16 +7,16 @@ resource DatasetResource { has_permission(actor: UserActor, "read", dataset: DatasetResource) if actor.is_admin or dataset.allows_public_read or - dataset.created_by == actor.name or ( - actor_name = actor.name and - dataset.authorized_users.(actor_name) in ["Reader", "Editor"] + dataset.owner_account_id == actor.account_id or ( + actor_account_id = actor.account_id and + dataset.authorized_users.(actor_account_id) in ["Reader", "Editor"] ); has_permission(actor: UserActor, "write", dataset: DatasetResource) if actor.is_admin or - dataset.created_by == actor.name or ( - actor_name = actor.name and - dataset.authorized_users.(actor_name) == "Editor" + dataset.owner_account_id == actor.account_id or ( + actor_account_id = actor.account_id and + dataset.authorized_users.(actor_account_id) == "Editor" ); allow(actor: UserActor, action: String, dataset: DatasetResource) if diff --git a/src/adapter/auth-oso/src/user_actor.rs b/src/adapter/auth-oso-rebac/src/user_actor.rs similarity index 72% rename from src/adapter/auth-oso/src/user_actor.rs rename to src/adapter/auth-oso-rebac/src/user_actor.rs index 78c426b13..dc5b3be58 100644 --- a/src/adapter/auth-oso/src/user_actor.rs +++ b/src/adapter/auth-oso-rebac/src/user_actor.rs @@ -7,6 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use opendatafabric as odf; use oso::PolarClass; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -14,7 +15,7 @@ use oso::PolarClass; #[derive(PolarClass, Debug, Clone)] pub struct UserActor { #[polar(attribute)] - pub name: String, + pub account_id: String, #[polar(attribute)] pub anonymous: bool, #[polar(attribute)] @@ -24,10 +25,18 @@ pub struct UserActor { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// impl UserActor { - pub fn new(name: &str, anonymous: bool, is_admin: bool) -> Self { + pub fn anonymous() -> Self { + UserActor { + account_id: String::new(), + anonymous: true, + is_admin: false, + } + } + + pub fn logged(account_id: &odf::AccountID, is_admin: bool) -> Self { Self { - name: name.to_string(), - anonymous, + account_id: account_id.to_string(), + anonymous: false, is_admin, } } @@ -39,8 +48,8 @@ impl std::fmt::Display for UserActor { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, - "User(name='{}', anonymous={}, is_admin={})", - &self.name, self.anonymous, self.is_admin + "User(account_id={}, anonymous={}, is_admin={})", + &self.account_id, self.anonymous, self.is_admin ) } } diff --git a/src/adapter/auth-oso/tests/mod.rs b/src/adapter/auth-oso-rebac/tests/mod.rs similarity index 100% rename from src/adapter/auth-oso/tests/mod.rs rename to src/adapter/auth-oso-rebac/tests/mod.rs diff --git a/src/adapter/auth-oso/tests/tests/mod.rs b/src/adapter/auth-oso-rebac/tests/tests/mod.rs similarity index 100% rename from src/adapter/auth-oso/tests/tests/mod.rs rename to src/adapter/auth-oso-rebac/tests/tests/mod.rs diff --git a/src/adapter/auth-oso-rebac/tests/tests/test_oso.rs b/src/adapter/auth-oso-rebac/tests/tests/test_oso.rs new file mode 100644 index 000000000..c112cc5e0 --- /dev/null +++ b/src/adapter/auth-oso-rebac/tests/tests/test_oso.rs @@ -0,0 +1,207 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use kamu_adapter_auth_oso_rebac::{DatasetResource, KamuAuthOso, UserActor}; +use kamu_core::auth::DatasetAction; +use opendatafabric as odf; + +// TODO: Private Datasets: cover all other schema branches + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +macro_rules! assert_allowed { + ($check_result: expr) => { + assert!($check_result.is_ok() && $check_result.unwrap()); + }; +} + +macro_rules! assert_forbidden { + ($check_result: expr) => { + assert!($check_result.is_ok() && !$check_result.unwrap()); + }; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test_log::test(tokio::test)] +async fn test_owner_can_read_and_write() { + let owner_account_id = random_account_id(); + let is_admin = false; + let owner_user_actor = UserActor::logged(&owner_account_id, is_admin); + + let allows_public_read = false; + let owned_dataset_resource = DatasetResource::new(&owner_account_id, allows_public_read); + + let oso = KamuAuthOso::new(); + + let write_result = oso.is_allowed( + owner_user_actor.clone(), + DatasetAction::Write, + owned_dataset_resource.clone(), + ); + let read_result = oso.is_allowed( + owner_user_actor.clone(), + DatasetAction::Read, + owned_dataset_resource.clone(), + ); + + assert_allowed!(write_result); + assert_allowed!(read_result); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test_log::test(tokio::test)] +async fn test_unrelated_can_read_public() { + let is_admin = false; + let unrelated_user_actor = UserActor::logged(&random_account_id(), is_admin); + + let allows_public_read = true; + let public_dataset_resource = DatasetResource::new(&random_account_id(), allows_public_read); + + let oso = KamuAuthOso::new(); + + let write_result = oso.is_allowed( + unrelated_user_actor.clone(), + DatasetAction::Write, + public_dataset_resource.clone(), + ); + let read_result = oso.is_allowed( + unrelated_user_actor.clone(), + DatasetAction::Read, + public_dataset_resource.clone(), + ); + + assert_forbidden!(write_result); + assert_allowed!(read_result); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test_log::test(tokio::test)] +async fn test_unrelated_cannot_read_private() { + let is_admin = false; + let unrelated_user_actor = UserActor::logged(&random_account_id(), is_admin); + + let allows_public_read = false; + let private_dataset_resource = DatasetResource::new(&random_account_id(), allows_public_read); + + let oso = KamuAuthOso::new(); + + let write_result = oso.is_allowed( + unrelated_user_actor.clone(), + DatasetAction::Write, + private_dataset_resource.clone(), + ); + let read_result = oso.is_allowed( + unrelated_user_actor.clone(), + DatasetAction::Read, + private_dataset_resource.clone(), + ); + + assert_forbidden!(write_result); + assert_forbidden!(read_result); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test_log::test(tokio::test)] +async fn test_having_explicit_read_permission_in_private_dataset() { + let reader_account_id = random_account_id(); + let is_admin = false; + let reader_user_actor = UserActor::logged(&reader_account_id, is_admin); + + let allows_public_read = false; + let mut private_dataset_resource = + DatasetResource::new(&random_account_id(), allows_public_read); + private_dataset_resource.authorize_reader(&reader_account_id); + + let oso = KamuAuthOso::new(); + + let write_result = oso.is_allowed( + reader_user_actor.clone(), + DatasetAction::Write, + private_dataset_resource.clone(), + ); + let read_result = oso.is_allowed( + reader_user_actor.clone(), + DatasetAction::Read, + private_dataset_resource.clone(), + ); + + assert_forbidden!(write_result); + assert_allowed!(read_result); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test_log::test(tokio::test)] +async fn test_having_explicit_write_permission_in_private_dataset() { + let editor_account_id = random_account_id(); + let is_admin = false; + let editor_user_actor = UserActor::logged(&editor_account_id, is_admin); + + let allows_public_read = false; + let mut private_dataset_resource = + DatasetResource::new(&random_account_id(), allows_public_read); + private_dataset_resource.authorize_editor(&editor_account_id); + + let oso = KamuAuthOso::new(); + + let write_result = oso.is_allowed( + editor_user_actor.clone(), + DatasetAction::Write, + private_dataset_resource.clone(), + ); + let read_result = oso.is_allowed( + editor_user_actor.clone(), + DatasetAction::Read, + private_dataset_resource.clone(), + ); + + assert_allowed!(write_result); + assert_allowed!(read_result); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test_log::test(tokio::test)] +async fn test_admin_can_read_and_write_another_private_dataset() { + let is_admin = true; + let admin_user_actor = UserActor::logged(&random_account_id(), is_admin); + + let allows_public_read = false; + let dataset_resource = DatasetResource::new(&random_account_id(), allows_public_read); + + let oso = KamuAuthOso::new(); + + let write_result = oso.is_allowed( + admin_user_actor.clone(), + DatasetAction::Write, + dataset_resource.clone(), + ); + let read_result = oso.is_allowed( + admin_user_actor.clone(), + DatasetAction::Read, + dataset_resource.clone(), + ); + + assert_allowed!(write_result); + assert_allowed!(read_result); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Helpers +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +fn random_account_id() -> odf::AccountID { + odf::AccountID::new_generated_ed25519().1 +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/auth-oso-rebac/tests/tests/test_oso_dataset_authorizer.rs b/src/adapter/auth-oso-rebac/tests/tests/test_oso_dataset_authorizer.rs new file mode 100644 index 000000000..1685259fb --- /dev/null +++ b/src/adapter/auth-oso-rebac/tests/tests/test_oso_dataset_authorizer.rs @@ -0,0 +1,248 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::assert_matches::assert_matches; +use std::collections::HashSet; +use std::sync::Arc; + +use dill::Component; +use kamu_accounts::{ + AccountConfig, + AnonymousAccountReason, + CurrentAccountSubject, + PredefinedAccountsConfig, +}; +use kamu_accounts_inmem::InMemoryAccountRepository; +use kamu_accounts_services::{LoginPasswordAuthProvider, PredefinedAccountsRegistrator}; +use kamu_auth_rebac_inmem::InMemoryRebacRepository; +use kamu_core::auth::{DatasetAction, DatasetActionAuthorizer, DatasetActionUnauthorizedError}; +use kamu_core::testing::MockDatasetRepository; +use kamu_core::{ + AccessError, + DatasetLifecycleMessage, + DatasetRepository, + DatasetVisibility, + TenancyConfig, + MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE, +}; +use kamu_datasets_inmem::InMemoryDatasetEntryRepository; +use kamu_datasets_services::DatasetEntryServiceImpl; +use messaging_outbox::{ + register_message_dispatcher, + ConsumerFilter, + Outbox, + OutboxExt, + OutboxImmediateImpl, +}; +use opendatafabric as odf; +use time_source::SystemTimeSourceDefault; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test_log::test(tokio::test)] +async fn test_owner_can_read_and_write_private_dataset() { + let harness = DatasetAuthorizerHarness::new(logged("john")).await; + let dataset_handle = harness + .create_private_dataset(dataset_alias("john/foo")) + .await; + + let read_result = harness + .dataset_authorizer + .check_action_allowed(&dataset_handle, DatasetAction::Read) + .await; + + let write_result = harness + .dataset_authorizer + .check_action_allowed(&dataset_handle, DatasetAction::Write) + .await; + + let allowed_actions = harness + .dataset_authorizer + .get_allowed_actions(&dataset_handle) + .await; + + assert_matches!(read_result, Ok(())); + assert_matches!(write_result, Ok(())); + assert_matches!( + allowed_actions, + Ok(actual_actions) + if actual_actions == HashSet::from([DatasetAction::Read, DatasetAction::Write]) + ); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test_log::test(tokio::test)] +async fn test_guest_can_read_but_not_write_public_dataset() { + let harness = DatasetAuthorizerHarness::new(anonymous()).await; + let dataset_handle = harness + .create_public_dataset(dataset_alias("john/foo")) + .await; + + let read_result = harness + .dataset_authorizer + .check_action_allowed(&dataset_handle, DatasetAction::Read) + .await; + + let write_result = harness + .dataset_authorizer + .check_action_allowed(&dataset_handle, DatasetAction::Write) + .await; + + let allowed_actions = harness + .dataset_authorizer + .get_allowed_actions(&dataset_handle) + .await; + + assert_matches!(read_result, Ok(())); + assert_matches!( + write_result, + Err(DatasetActionUnauthorizedError::Access( + AccessError::Forbidden(_) + )) + ); + assert_matches!( + allowed_actions, + Ok(actual_actions) + if actual_actions == HashSet::from([DatasetAction::Read]) + ); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[allow(dead_code)] +pub struct DatasetAuthorizerHarness { + dataset_authorizer: Arc, + outbox: Arc, +} + +impl DatasetAuthorizerHarness { + pub async fn new(current_account_subject: CurrentAccountSubject) -> Self { + let mut predefined_accounts_config = PredefinedAccountsConfig::new(); + + if let CurrentAccountSubject::Logged(logged_account) = ¤t_account_subject { + predefined_accounts_config + .predefined + .push(AccountConfig::from_name( + logged_account.account_name.clone(), + )); + } + + let catalog = { + let tenancy_config = TenancyConfig::MultiTenant; + + let mut b = dill::CatalogBuilder::new(); + + b.add::() + .add_value(current_account_subject) + .add_value(predefined_accounts_config) + .add::() + .add::() + .add_builder( + OutboxImmediateImpl::builder() + .with_consumer_filter(ConsumerFilter::AllConsumers), + ) + .add_value(MockDatasetRepository::new()) + .bind::() + .add_value(tenancy_config) + .add::() + .add::() + .add::() + .add::() + .bind::(); + + kamu_adapter_auth_oso_rebac::register_dependencies(&mut b); + + kamu_auth_rebac_services::register_dependencies(&mut b, tenancy_config); + + register_message_dispatcher::( + &mut b, + MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE, + ); + + b.build() + }; + + { + use init_on_startup::InitOnStartup; + catalog + .get_one::() + .unwrap() + .run_initialization() + .await + .unwrap(); + }; + + Self { + dataset_authorizer: catalog.get_one().unwrap(), + outbox: catalog.get_one().unwrap(), + } + } + + async fn create_public_dataset(&self, alias: odf::DatasetAlias) -> odf::DatasetHandle { + self.create_dataset(alias, DatasetVisibility::Public).await + } + + async fn create_private_dataset(&self, alias: odf::DatasetAlias) -> odf::DatasetHandle { + self.create_dataset(alias, DatasetVisibility::Private).await + } + + async fn create_dataset( + &self, + alias: odf::DatasetAlias, + visibility: DatasetVisibility, + ) -> odf::DatasetHandle { + let dataset_id = dataset_id(&alias); + + self.outbox + .post_message( + MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE, + DatasetLifecycleMessage::created( + dataset_id.clone(), + account_id(&alias), + visibility, + alias.dataset_name.clone(), + ), + ) + .await + .unwrap(); + + odf::DatasetHandle::new(dataset_id, alias) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Helpers +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +fn logged(account_name: &str) -> CurrentAccountSubject { + CurrentAccountSubject::logged( + odf::AccountID::new_seeded_ed25519(account_name.as_bytes()), + odf::AccountName::new_unchecked(account_name), + false, + ) +} + +fn anonymous() -> CurrentAccountSubject { + CurrentAccountSubject::anonymous(AnonymousAccountReason::NoAuthenticationProvided) +} + +fn dataset_alias(raw_alias: &str) -> odf::DatasetAlias { + odf::DatasetAlias::try_from(raw_alias).unwrap() +} + +fn dataset_id(alias: &odf::DatasetAlias) -> odf::DatasetID { + odf::DatasetID::new_seeded_ed25519(alias.to_string().as_bytes()) +} + +fn account_id(alias: &odf::DatasetAlias) -> odf::AccountID { + odf::AccountID::new_seeded_ed25519(alias.account_name.as_ref().unwrap().as_bytes()) +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/auth-oso/Cargo.toml b/src/adapter/auth-oso/Cargo.toml deleted file mode 100644 index 1516c5a80..000000000 --- a/src/adapter/auth-oso/Cargo.toml +++ /dev/null @@ -1,43 +0,0 @@ -[package] -name = "kamu-adapter-auth-oso" -description = "Authorization adapter for Kamu domain based on OSO library" -version = { workspace = true } -homepage = { workspace = true } -repository = { workspace = true } -authors = { workspace = true } -readme = { workspace = true } -license-file = { workspace = true } -keywords = { workspace = true } -include = { workspace = true } -edition = { workspace = true } -publish = { workspace = true } - - -[lints] -workspace = true - - -[lib] -doctest = false - -[dependencies] -internal-error = { workspace = true } -messaging-outbox = { workspace = true } -opendatafabric = { workspace = true } -kamu-accounts = { workspace = true } -kamu-core = { workspace = true } - -async-trait = "0.1" -dill = "0.9" -oso = "0.27" -oso-derive = "0.27" -tracing = { version = "0.1", default-features = false } - -[dev-dependencies] -kamu = { workspace = true, features = ["testing"] } -time-source = { workspace = true } - -tempfile = "3" -test-log = { version = "0.2", features = ["trace"] } -tokio = { version = "1", default-features = false, features = [] } -tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/src/adapter/auth-oso/src/oso_dataset_authorizer.rs b/src/adapter/auth-oso/src/oso_dataset_authorizer.rs deleted file mode 100644 index dc09d71c8..000000000 --- a/src/adapter/auth-oso/src/oso_dataset_authorizer.rs +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use std::collections::HashSet; -use std::str::FromStr; -use std::sync::Arc; - -use dill::*; -use internal_error::{ErrorIntoInternal, InternalError, ResultIntoInternal}; -use kamu_accounts::CurrentAccountSubject; -use kamu_core::auth::*; -use kamu_core::AccessError; -use opendatafabric::DatasetHandle; -use oso::Oso; - -use crate::dataset_resource::*; -use crate::user_actor::*; -use crate::KamuAuthOso; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -pub struct OsoDatasetAuthorizer { - oso: Arc, - current_account_subject: Arc, -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[component(pub)] -#[interface(dyn DatasetActionAuthorizer)] -impl OsoDatasetAuthorizer { - #[allow(clippy::needless_pass_by_value)] - pub fn new( - kamu_auth_oso: Arc, - current_account_subject: Arc, - ) -> Self { - Self { - oso: kamu_auth_oso.oso.clone(), - current_account_subject, - } - } - - fn actor(&self) -> UserActor { - match self.current_account_subject.as_ref() { - CurrentAccountSubject::Anonymous(_) => UserActor::new("", true, false), - CurrentAccountSubject::Logged(l) => { - UserActor::new(l.account_name.as_str(), false, l.is_admin) - } - } - } - - fn dataset_resource(&self, dataset_handle: &DatasetHandle) -> DatasetResource { - let dataset_alias = &dataset_handle.alias; - let creator = dataset_alias.account_name.as_ref().map_or_else( - || { - self.current_account_subject - .account_name_or_default() - .as_str() - }, - |a| a.as_str(), - ); - - // TODO: for now let's treat all datasets as public - // TODO: explicit read/write permissions - DatasetResource::new(creator, true) - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[async_trait::async_trait] -impl DatasetActionAuthorizer for OsoDatasetAuthorizer { - #[tracing::instrument(level = "debug", skip_all, fields(%dataset_handle, ?action))] - async fn check_action_allowed( - &self, - dataset_handle: &DatasetHandle, - action: DatasetAction, - ) -> Result<(), DatasetActionUnauthorizedError> { - let actor = self.actor(); - let dataset_resource = self.dataset_resource(dataset_handle); - - match self - .oso - .is_allowed(actor, action.to_string(), dataset_resource) - { - Ok(r) => { - if r { - Ok(()) - } else { - Err(DatasetActionUnauthorizedError::Access( - AccessError::Forbidden( - DatasetActionNotEnoughPermissionsError { - action, - dataset_ref: dataset_handle.as_local_ref(), - } - .into(), - ), - )) - } - } - Err(e) => Err(DatasetActionUnauthorizedError::Internal(e.int_err())), - } - } - - #[tracing::instrument(level = "debug", skip_all, fields(%dataset_handle))] - async fn get_allowed_actions(&self, dataset_handle: &DatasetHandle) -> HashSet { - let actor = self.actor(); - let dataset_resource = self.dataset_resource(dataset_handle); - - let allowed_action_names: HashSet = self - .oso - .get_allowed_actions(actor, dataset_resource) - .unwrap(); - - let mut allowed_actions = HashSet::new(); - for action_name in allowed_action_names { - let action = DatasetAction::from_str(action_name.as_str()).unwrap(); - allowed_actions.insert(action); - } - - allowed_actions - } - - #[tracing::instrument(level = "debug", skip_all, fields(dataset_handles=?dataset_handles, action=%action))] - async fn filter_datasets_allowing( - &self, - dataset_handles: Vec, - action: DatasetAction, - ) -> Result, InternalError> { - let mut matched_dataset_handles = Vec::new(); - for hdl in dataset_handles { - let is_allowed = self - .oso - .is_allowed( - self.actor(), - action.to_string(), - self.dataset_resource(&hdl), - ) - .int_err()?; - if is_allowed { - matched_dataset_handles.push(hdl); - } - } - - Ok(matched_dataset_handles) - } - - #[tracing::instrument(level = "debug", skip_all, fields(dataset_handles=?dataset_handles, action=%action))] - async fn classify_datasets_by_allowance( - &self, - dataset_handles: Vec, - action: DatasetAction, - ) -> Result { - let mut matched_dataset_handles = Vec::with_capacity(dataset_handles.len()); - let mut unmatched_results = Vec::new(); - - for hdl in dataset_handles { - let is_allowed = self - .oso - .is_allowed( - self.actor(), - action.to_string(), - self.dataset_resource(&hdl), - ) - .int_err()?; - if is_allowed { - matched_dataset_handles.push(hdl); - } else { - let dataset_ref = hdl.as_local_ref(); - unmatched_results.push(( - hdl, - DatasetActionUnauthorizedError::Access(AccessError::Forbidden( - DatasetActionNotEnoughPermissionsError { - action, - dataset_ref, - } - .into(), - )), - )); - } - } - - Ok(ClassifyByAllowanceResponse { - authorized_handles: matched_dataset_handles, - unauthorized_handles_with_errors: unmatched_results, - }) - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/auth-oso/tests/tests/test_oso.rs b/src/adapter/auth-oso/tests/tests/test_oso.rs deleted file mode 100644 index 42136d51e..000000000 --- a/src/adapter/auth-oso/tests/tests/test_oso.rs +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use kamu_adapter_auth_oso::dataset_resource::DatasetResource; -use kamu_adapter_auth_oso::user_actor::UserActor; -use kamu_adapter_auth_oso::KamuAuthOso; -use kamu_core::auth::DatasetAction; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -macro_rules! assert_allowed { - ($check_result: expr) => { - assert!($check_result.is_ok() && $check_result.unwrap()); - }; -} - -macro_rules! assert_forbidden { - ($check_result: expr) => { - assert!($check_result.is_ok() && !$check_result.unwrap()); - }; -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[test_log::test(tokio::test)] -async fn test_owner_can_read_and_write() { - let is_admin = false; - let user_actor = UserActor::new("foo", false, is_admin); - let dataset_resource = DatasetResource::new("foo", false); - - let oso = KamuAuthOso::new().oso; - - let write_result = oso.is_allowed( - user_actor.clone(), - format!("{}", DatasetAction::Write), - dataset_resource.clone(), - ); - let read_result = oso.is_allowed( - user_actor.clone(), - format!("{}", DatasetAction::Read), - dataset_resource.clone(), - ); - - assert_allowed!(write_result); - assert_allowed!(read_result); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[test_log::test(tokio::test)] -async fn test_unrelated_can_read_public() { - let is_admin = false; - let user_actor = UserActor::new("foo", false, is_admin); - let dataset_resource = DatasetResource::new("bar", true); - - let oso = KamuAuthOso::new().oso; - - let write_result = oso.is_allowed( - user_actor.clone(), - format!("{}", DatasetAction::Write), - dataset_resource.clone(), - ); - let read_result = oso.is_allowed( - user_actor.clone(), - format!("{}", DatasetAction::Read), - dataset_resource.clone(), - ); - - assert_forbidden!(write_result); - assert_allowed!(read_result); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[test_log::test(tokio::test)] -async fn test_unrelated_cannot_read_private() { - let is_admin = false; - let user_actor = UserActor::new("foo", false, is_admin); - let dataset_resource = DatasetResource::new("bar", false); - - let oso = KamuAuthOso::new().oso; - - let write_result = oso.is_allowed( - user_actor.clone(), - format!("{}", DatasetAction::Write), - dataset_resource.clone(), - ); - let read_result = oso.is_allowed( - user_actor.clone(), - format!("{}", DatasetAction::Read), - dataset_resource.clone(), - ); - - assert_forbidden!(write_result); - assert_forbidden!(read_result); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[test_log::test(tokio::test)] -async fn test_having_explicit_read_permission_in_private_dataset() { - let is_admin = false; - let user_actor = UserActor::new("foo", false, is_admin); - let mut dataset_resource = DatasetResource::new("bar", false); - dataset_resource.authorize_reader("foo"); - - let oso = KamuAuthOso::new().oso; - - let write_result = oso.is_allowed( - user_actor.clone(), - format!("{}", DatasetAction::Write), - dataset_resource.clone(), - ); - let read_result = oso.is_allowed( - user_actor.clone(), - format!("{}", DatasetAction::Read), - dataset_resource.clone(), - ); - - assert_forbidden!(write_result); - assert_allowed!(read_result); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[test_log::test(tokio::test)] -async fn test_having_explicit_write_permission_in_private_dataset() { - let is_admin = false; - let user_actor = UserActor::new("foo", false, is_admin); - let mut dataset_resource = DatasetResource::new("bar", false); - dataset_resource.authorize_editor("foo"); - - let oso = KamuAuthOso::new().oso; - - let write_result = oso.is_allowed( - user_actor.clone(), - format!("{}", DatasetAction::Write), - dataset_resource.clone(), - ); - let read_result = oso.is_allowed( - user_actor.clone(), - format!("{}", DatasetAction::Read), - dataset_resource.clone(), - ); - - assert_allowed!(write_result); - assert_allowed!(read_result); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[test_log::test(tokio::test)] -async fn test_admin_can_read_and_write_another_private_dataset() { - let is_admin = true; - let user_actor = UserActor::new("foo", false, is_admin); - let dataset_resource = DatasetResource::new("bar", false); - - let oso = KamuAuthOso::new().oso; - - let write_result = oso.is_allowed( - user_actor.clone(), - format!("{}", DatasetAction::Write), - dataset_resource.clone(), - ); - let read_result = oso.is_allowed( - user_actor.clone(), - format!("{}", DatasetAction::Read), - dataset_resource.clone(), - ); - - assert_allowed!(write_result); - assert_allowed!(read_result); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/auth-oso/tests/tests/test_oso_dataset_authorizer.rs b/src/adapter/auth-oso/tests/tests/test_oso_dataset_authorizer.rs deleted file mode 100644 index 82d156f24..000000000 --- a/src/adapter/auth-oso/tests/tests/test_oso_dataset_authorizer.rs +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use std::assert_matches::assert_matches; -use std::collections::HashSet; -use std::sync::Arc; - -use dill::{Catalog, Component}; -use kamu::testing::MetadataFactory; -use kamu::{CreateDatasetUseCaseImpl, DatasetRepositoryLocalFs, DatasetRepositoryWriter}; -use kamu_accounts::CurrentAccountSubject; -use kamu_adapter_auth_oso::{KamuAuthOso, OsoDatasetAuthorizer}; -use kamu_core::auth::{DatasetAction, DatasetActionAuthorizer, DatasetActionUnauthorizedError}; -use kamu_core::{AccessError, CreateDatasetUseCase, DatasetRepository, TenancyConfig}; -use messaging_outbox::DummyOutboxImpl; -use opendatafabric::{AccountID, AccountName, DatasetAlias, DatasetHandle, DatasetKind}; -use tempfile::TempDir; -use time_source::SystemTimeSourceDefault; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[test_log::test(tokio::test)] -async fn test_owner_can_read_and_write() { - let harness = DatasetAuthorizerHarness::new("john"); - let dataset_handle = harness - .create_dataset(&DatasetAlias::try_from("john/foo").unwrap()) - .await; - - let read_result = harness - .dataset_authorizer - .check_action_allowed(&dataset_handle, DatasetAction::Read) - .await; - - let write_result = harness - .dataset_authorizer - .check_action_allowed(&dataset_handle, DatasetAction::Write) - .await; - - let allowed_actions = harness - .dataset_authorizer - .get_allowed_actions(&dataset_handle) - .await; - - assert_matches!(read_result, Ok(())); - assert_matches!(write_result, Ok(())); - - assert_eq!( - allowed_actions, - HashSet::from([DatasetAction::Read, DatasetAction::Write]) - ); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[test_log::test(tokio::test)] -async fn test_guest_can_read_but_not_write() { - let harness = DatasetAuthorizerHarness::new("kate"); - let dataset_handle = harness - .create_dataset(&DatasetAlias::try_from("john/foo").unwrap()) - .await; - - let read_result = harness - .dataset_authorizer - .check_action_allowed(&dataset_handle, DatasetAction::Read) - .await; - - let write_result = harness - .dataset_authorizer - .check_action_allowed(&dataset_handle, DatasetAction::Write) - .await; - - let allowed_actions = harness - .dataset_authorizer - .get_allowed_actions(&dataset_handle) - .await; - - assert_matches!(read_result, Ok(())); - assert_matches!( - write_result, - Err(DatasetActionUnauthorizedError::Access( - AccessError::Forbidden(_) - )) - ); - - assert_eq!(allowed_actions, HashSet::from([DatasetAction::Read])); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[allow(dead_code)] -pub struct DatasetAuthorizerHarness { - tempdir: TempDir, - catalog: Catalog, - dataset_authorizer: Arc, -} - -impl DatasetAuthorizerHarness { - pub fn new(current_account_name: &str) -> Self { - let tempdir = tempfile::tempdir().unwrap(); - let datasets_dir = tempdir.path().join("datasets"); - std::fs::create_dir(&datasets_dir).unwrap(); - - let catalog = dill::CatalogBuilder::new() - .add::() - .add::() - .add_value(CurrentAccountSubject::logged( - AccountID::new_seeded_ed25519(current_account_name.as_bytes()), - AccountName::new_unchecked(current_account_name), - false, - )) - .add::() - .add::() - .add_value(TenancyConfig::MultiTenant) - .add_builder(DatasetRepositoryLocalFs::builder().with_root(datasets_dir)) - .bind::() - .bind::() - .add::() - .build(); - - let dataset_authorizer = catalog.get_one::().unwrap(); - - Self { - tempdir, - catalog, - dataset_authorizer, - } - } - - pub async fn create_dataset(&self, alias: &DatasetAlias) -> DatasetHandle { - let create_dataset = self.catalog.get_one::().unwrap(); - - create_dataset - .execute( - alias, - MetadataFactory::metadata_block(MetadataFactory::seed(DatasetKind::Root).build()) - .build_typed(), - Default::default(), - ) - .await - .unwrap() - .dataset_handle - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/flight-sql/Cargo.toml b/src/adapter/flight-sql/Cargo.toml index 4d0fcd528..bcd307d02 100644 --- a/src/adapter/flight-sql/Cargo.toml +++ b/src/adapter/flight-sql/Cargo.toml @@ -42,4 +42,3 @@ indoc = "2" test-log = { version = "0.2", features = ["trace"] } tokio = { version = "1", default-features = false, features = [] } tokio-stream = { version = "0.1", default-features = false, features = ["net"] } -tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/src/adapter/flight-sql/src/lib.rs b/src/adapter/flight-sql/src/lib.rs index 0d65ef68a..4d0e1720c 100644 --- a/src/adapter/flight-sql/src/lib.rs +++ b/src/adapter/flight-sql/src/lib.rs @@ -7,6 +7,8 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +#![feature(lint_reasons)] + mod service; mod service_builder; mod session_factory; diff --git a/src/adapter/flight-sql/src/service.rs b/src/adapter/flight-sql/src/service.rs index 64f563bab..70bebaddd 100644 --- a/src/adapter/flight-sql/src/service.rs +++ b/src/adapter/flight-sql/src/service.rs @@ -670,7 +670,7 @@ impl KamuFlightSqlService { // FlightSqlService //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[allow(unused_variables)] +#[expect(unused_variables)] #[tonic::async_trait] impl FlightSqlService for KamuFlightSqlService { type FlightService = KamuFlightSqlService; diff --git a/src/adapter/flight-sql/src/session_factory.rs b/src/adapter/flight-sql/src/session_factory.rs index d78c7b313..cbfce9a18 100644 --- a/src/adapter/flight-sql/src/session_factory.rs +++ b/src/adapter/flight-sql/src/session_factory.rs @@ -17,13 +17,14 @@ use tonic::Status; pub type Token = String; #[async_trait::async_trait] -#[allow(unused_variables)] pub trait SessionFactory: Send + Sync { - async fn authenticate(&self, username: &str, password: &str) -> Result { + async fn authenticate(&self, _username: &str, _password: &str) -> Result { Err(Status::unauthenticated("Invalid credentials!")) } - async fn get_context(&self, token: &Token) -> Result, Status> { + async fn get_context(&self, _token: &Token) -> Result, Status> { Err(Status::unauthenticated("Invalid credentials!"))? } } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/graphql/Cargo.toml b/src/adapter/graphql/Cargo.toml index 4264fd3e8..df96b6c34 100644 --- a/src/adapter/graphql/Cargo.toml +++ b/src/adapter/graphql/Cargo.toml @@ -26,15 +26,16 @@ database-common = { workspace = true } internal-error = { workspace = true } opendatafabric = { workspace = true, features = ["arrow"] } +event-sourcing = { workspace = true } kamu = { workspace = true } kamu-accounts = { workspace = true } -kamu-data-utils = { workspace = true } +kamu-auth-rebac = { workspace = true } kamu-core = { workspace = true } +kamu-data-utils = { workspace = true } kamu-datasets = { workspace = true } -kamu-task-system = { workspace = true } kamu-flow-system = { workspace = true } kamu-flow-system-services = { workspace = true } -event-sourcing = { workspace = true } +kamu-task-system = { workspace = true } async-graphql = { version = "7", features = [ "chrono", @@ -42,7 +43,6 @@ async-graphql = { version = "7", features = [ "apollo_tracing", ] } async-trait = { version = "0.1", default-features = false } -cron = { version = "0.12", default-features = false } chrono = "0.4" datafusion = { version = "42", default-features = false, features = [ "serde", @@ -68,6 +68,8 @@ kamu = { workspace = true, features = ["testing"] } kamu-accounts = { workspace = true, features = ["testing"] } kamu-accounts-inmem = { workspace = true } kamu-accounts-services = { workspace = true } +kamu-auth-rebac-inmem = { workspace = true } +kamu-auth-rebac-services = { workspace = true } kamu-datasets-inmem = { workspace = true } kamu-datasets-services = { workspace = true } kamu-flow-system-inmem = { workspace = true } @@ -83,4 +85,3 @@ tempfile = "3" test-group = { version = "1" } test-log = { version = "0.2", features = ["trace"] } tokio = { version = "1", default-features = false, features = [] } -tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/src/adapter/graphql/src/guards.rs b/src/adapter/graphql/src/guards.rs index d6818d2b6..0b45a47c1 100644 --- a/src/adapter/graphql/src/guards.rs +++ b/src/adapter/graphql/src/guards.rs @@ -18,6 +18,8 @@ pub const ANONYMOUS_ACCESS_FORBIDDEN_MESSAGE: &str = "Anonymous access forbidden pub const INVALID_ACCESS_TOKEN_MESSAGE: &str = "Invalid access token"; pub const EXPIRED_ACCESS_TOKEN_MESSAGE: &str = "Expired access token"; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + pub struct LoggedInGuard {} impl LoggedInGuard { diff --git a/src/adapter/graphql/src/lib.rs b/src/adapter/graphql/src/lib.rs index be9371c80..0603ec397 100644 --- a/src/adapter/graphql/src/lib.rs +++ b/src/adapter/graphql/src/lib.rs @@ -10,6 +10,7 @@ #![feature(error_generic_member_access)] #![feature(int_roundings)] #![feature(let_chains)] +#![feature(lint_reasons)] pub mod extensions; pub(crate) mod mutations; diff --git a/src/adapter/graphql/src/mutations/auth_mut.rs b/src/adapter/graphql/src/mutations/auth_mut.rs index fee06066b..e833ba2b2 100644 --- a/src/adapter/graphql/src/mutations/auth_mut.rs +++ b/src/adapter/graphql/src/mutations/auth_mut.rs @@ -174,7 +174,7 @@ pub struct RevokeResultSuccess { #[ComplexObject] impl RevokeResultSuccess { async fn message(&self) -> String { - "Access token revoked succesfully".to_string() + "Access token revoked successfully".to_string() } } diff --git a/src/adapter/graphql/src/mutations/dataset_mut.rs b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs similarity index 75% rename from src/adapter/graphql/src/mutations/dataset_mut.rs rename to src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs index 387d8a06d..8ec9b5f87 100644 --- a/src/adapter/graphql/src/mutations/dataset_mut.rs +++ b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs @@ -12,11 +12,18 @@ use domain::{DeleteDatasetError, RenameDatasetError}; use kamu_core::{self as domain, SetWatermarkUseCase}; use opendatafabric as odf; -use super::{DatasetEnvVarsMut, DatasetFlowsMut, DatasetMetadataMut}; +use crate::mutations::{ + ensure_account_owns_dataset, + DatasetEnvVarsMut, + DatasetFlowsMut, + DatasetMetadataMut, +}; use crate::prelude::*; -use crate::utils::ensure_dataset_env_vars_enabled; +use crate::utils::{ensure_dataset_env_vars_enabled, from_catalog_n}; use crate::LoggedInGuard; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[derive(Debug, Clone)] pub struct DatasetMut { dataset_handle: odf::DatasetHandle, @@ -147,6 +154,39 @@ impl DatasetMut { Err(e) => Err(e.int_err().into()), } } + + /// Set visibility for the dataset + #[graphql(guard = "LoggedInGuard::new()")] + async fn set_visibility( + &self, + ctx: &Context<'_>, + visibility: DatasetVisibilityInput, + ) -> Result { + ensure_account_owns_dataset(ctx, &self.dataset_handle).await?; + + let rebac_svc = from_catalog_n!(ctx, dyn kamu_auth_rebac::RebacService); + + let (allows_public_read, allows_anonymous_read) = match visibility { + DatasetVisibilityInput::Private(_) => (false, false), + DatasetVisibilityInput::Public(PublicDatasetVisibility { + anonymous_available, + }) => (true, anonymous_available), + }; + + use kamu_auth_rebac::DatasetPropertyName; + + for (name, value) in [ + DatasetPropertyName::allows_public_read(allows_public_read), + DatasetPropertyName::allows_anonymous_read(allows_anonymous_read), + ] { + rebac_svc + .set_dataset_property(&self.dataset_handle.id, name, &value) + .await + .int_err()?; + } + + Ok(SetDatasetPropertyResultSuccess::default()) + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -241,6 +281,53 @@ impl DeleteResultDanglingReference { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[derive(OneofObject)] +pub enum DatasetVisibilityInput { + Private(PrivateDatasetVisibility), + Public(PublicDatasetVisibility), +} + +#[derive(Union, Debug, Clone, PartialEq, Eq)] +pub enum DatasetVisibility { + Private(PrivateDatasetVisibility), + Public(PublicDatasetVisibility), +} + +#[derive(SimpleObject, InputObject, Debug, Clone, PartialEq, Eq)] +#[graphql(input_name = "PrivateDatasetVisibilityInput")] +pub struct PrivateDatasetVisibility { + _dummy: Option, +} + +#[derive(SimpleObject, InputObject, Debug, Clone, PartialEq, Eq)] +#[graphql(input_name = "PublicDatasetVisibilityInput")] +pub struct PublicDatasetVisibility { + anonymous_available: bool, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Interface, Debug)] +#[graphql(field(name = "message", ty = "String"))] +pub enum SetDatasetPropertyResult { + Success(SetDatasetPropertyResultSuccess), +} + +#[derive(SimpleObject, Debug, Default)] +#[graphql(complex)] +pub struct SetDatasetPropertyResultSuccess { + _dummy: Option, +} + +#[ComplexObject] +impl SetDatasetPropertyResultSuccess { + async fn message(&self) -> String { + "Updated".to_string() + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[derive(Interface, Debug, Clone)] #[graphql(field(name = "message", ty = "String"))] pub enum SetWatermarkResult { diff --git a/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut_utils.rs b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut_utils.rs new file mode 100644 index 000000000..9386c08c7 --- /dev/null +++ b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut_utils.rs @@ -0,0 +1,36 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use kamu_core::DatasetOwnershipService; +use opendatafabric as odf; + +use crate::prelude::*; +use crate::utils; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub(crate) async fn ensure_account_owns_dataset( + ctx: &Context<'_>, + dataset_handle: &odf::DatasetHandle, +) -> Result<()> { + let dataset_ownership_service = from_catalog::(ctx).unwrap(); + let logged_account = utils::get_logged_account(ctx); + + let not_owner = !dataset_ownership_service + .is_dataset_owned_by(&dataset_handle.id, &logged_account.account_id) + .await?; + + if not_owner { + return Err(Error::new("Only the dataset owner can perform this action").into()); + } + + Ok(()) +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/auth-oso/src/lib.rs b/src/adapter/graphql/src/mutations/dataset_mut/mod.rs similarity index 65% rename from src/adapter/auth-oso/src/lib.rs rename to src/adapter/graphql/src/mutations/dataset_mut/mod.rs index 095198549..618e23b72 100644 --- a/src/adapter/auth-oso/src/lib.rs +++ b/src/adapter/graphql/src/mutations/dataset_mut/mod.rs @@ -7,11 +7,8 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -pub mod dataset_resource; -pub mod user_actor; +mod dataset_mut; +mod dataset_mut_utils; -pub mod kamu_auth_oso; -pub use kamu_auth_oso::*; - -pub mod oso_dataset_authorizer; -pub use oso_dataset_authorizer::OsoDatasetAuthorizer; +pub(crate) use dataset_mut::*; +pub(crate) use dataset_mut_utils::*; diff --git a/src/adapter/graphql/src/mutations/datasets_mut.rs b/src/adapter/graphql/src/mutations/datasets_mut.rs index 80c0470a2..fc6a752b4 100644 --- a/src/adapter/graphql/src/mutations/datasets_mut.rs +++ b/src/adapter/graphql/src/mutations/datasets_mut.rs @@ -114,7 +114,6 @@ impl DatasetsMut { // TODO: Multi-tenant resolution for derivative dataset inputs (should it only // work by ID?) - #[allow(unused_variables)] #[graphql(skip)] async fn create_from_snapshot_impl( &self, diff --git a/src/adapter/graphql/src/queries/accounts/accounts.rs b/src/adapter/graphql/src/queries/accounts/accounts.rs index bc1216511..e163ab715 100644 --- a/src/adapter/graphql/src/queries/accounts/accounts.rs +++ b/src/adapter/graphql/src/queries/accounts/accounts.rs @@ -19,8 +19,6 @@ pub struct Accounts; #[Object] impl Accounts { /// Returns account by its ID - #[allow(unused_variables)] - #[allow(clippy::unused_async)] async fn by_id(&self, ctx: &Context<'_>, account_id: AccountID) -> Result> { let authentication_service = from_catalog::(ctx).unwrap(); diff --git a/src/adapter/graphql/src/queries/datasets/dataset.rs b/src/adapter/graphql/src/queries/datasets/dataset.rs index 27129c282..28ff9448c 100644 --- a/src/adapter/graphql/src/queries/datasets/dataset.rs +++ b/src/adapter/graphql/src/queries/datasets/dataset.rs @@ -15,6 +15,8 @@ use crate::prelude::*; use crate::queries::*; use crate::utils::ensure_dataset_env_vars_enabled; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[derive(Debug, Clone)] pub struct Dataset { owner: Account, @@ -137,12 +139,13 @@ impl Dataset { /// Permissions of the current user async fn permissions(&self, ctx: &Context<'_>) -> Result { use kamu_core::auth; + let dataset_action_authorizer = from_catalog::(ctx).unwrap(); let allowed_actions = dataset_action_authorizer .get_allowed_actions(&self.dataset_handle) - .await; + .await?; let can_read = allowed_actions.contains(&auth::DatasetAction::Read); let can_write = allowed_actions.contains(&auth::DatasetAction::Write); @@ -163,6 +166,8 @@ impl Dataset { } } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[derive(SimpleObject, Debug, Clone, PartialEq, Eq)] pub struct DatasetPermissions { can_view: bool, @@ -171,3 +176,5 @@ pub struct DatasetPermissions { can_commit: bool, can_schedule: bool, } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs b/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs index f19ccd3db..968293eb9 100644 --- a/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs +++ b/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs @@ -23,6 +23,8 @@ use crate::prelude::*; use crate::queries::*; use crate::scalars::DatasetPushStatuses; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + pub struct DatasetMetadata { dataset_handle: odf::DatasetHandle, } @@ -272,3 +274,5 @@ impl DatasetMetadata { .map(Into::into)) } } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/graphql/src/queries/datasets/datasets.rs b/src/adapter/graphql/src/queries/datasets/datasets.rs index 727dc5d4c..e807aadab 100644 --- a/src/adapter/graphql/src/queries/datasets/datasets.rs +++ b/src/adapter/graphql/src/queries/datasets/datasets.rs @@ -43,7 +43,6 @@ impl Datasets { } /// Returns dataset by its owner and name - #[allow(unused_variables)] async fn by_owner_and_name( &self, ctx: &Context<'_>, @@ -102,8 +101,6 @@ impl Datasets { } /// Returns datasets belonging to the specified account - #[allow(unused_variables)] - #[allow(clippy::unused_async)] async fn by_account_id( &self, ctx: &Context<'_>, @@ -139,7 +136,6 @@ impl Datasets { } /// Returns datasets belonging to the specified account - #[allow(unused_variables)] async fn by_account_name( &self, ctx: &Context<'_>, diff --git a/src/adapter/graphql/src/queries/search.rs b/src/adapter/graphql/src/queries/search.rs index 363752b3f..f8aa5e336 100644 --- a/src/adapter/graphql/src/queries/search.rs +++ b/src/adapter/graphql/src/queries/search.rs @@ -13,6 +13,7 @@ use kamu_core::{self as domain, TryStreamExtExt}; use crate::prelude::*; use crate::queries::{Account, Dataset}; +use crate::utils::from_catalog_n; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Search @@ -32,13 +33,19 @@ impl Search { page: Option, per_page: Option, ) -> Result { - let dataset_registry = from_catalog::(ctx).unwrap(); - let dataset_action_authorizer = - from_catalog::(ctx).unwrap(); + let (dataset_registry, dataset_action_authorizer) = from_catalog_n!( + ctx, + dyn domain::DatasetRegistry, + dyn domain::auth::DatasetActionAuthorizer + ); let page = page.unwrap_or(0); let per_page = per_page.unwrap_or(Self::DEFAULT_RESULTS_PER_PAGE); + // TODO: Private Datasets: PERF: find a way to narrow down the number of records + // to filter, e.g.: + // - Anonymous: get all the public + // - Logged: all owned datasets and datasets with relations let filtered_dataset_handles: Vec<_> = dataset_registry .all_dataset_handles() .filter_ok(|hdl| hdl.alias.dataset_name.contains(&query)) diff --git a/src/adapter/graphql/src/scalars/account.rs b/src/adapter/graphql/src/scalars/account.rs index ba29fcc7d..a9f206b9b 100644 --- a/src/adapter/graphql/src/scalars/account.rs +++ b/src/adapter/graphql/src/scalars/account.rs @@ -40,7 +40,7 @@ impl From<&AccountID> for odf::AccountID { impl From for String { fn from(val: AccountID) -> Self { - val.0.as_did_str().to_string() + val.0.to_string() } } diff --git a/src/adapter/graphql/src/scalars/dataset_id_name.rs b/src/adapter/graphql/src/scalars/dataset_id_name.rs index 73feec314..2e9d43865 100644 --- a/src/adapter/graphql/src/scalars/dataset_id_name.rs +++ b/src/adapter/graphql/src/scalars/dataset_id_name.rs @@ -35,7 +35,7 @@ impl From for odf::DatasetID { impl From for String { fn from(val: DatasetID) -> Self { - val.0.as_did_str().to_string() + val.0.to_string() } } diff --git a/src/adapter/graphql/src/utils.rs b/src/adapter/graphql/src/utils.rs index b332fde89..94ede247d 100644 --- a/src/adapter/graphql/src/utils.rs +++ b/src/adapter/graphql/src/utils.rs @@ -22,7 +22,8 @@ use crate::prelude::{AccessTokenID, AccountID, AccountName}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// TODO: Return gql-specific error and get rid of unwraps +// TODO: Replace with from_catalog_n!() macro +// Return gql-specific error and get rid of unwraps pub(crate) fn from_catalog(ctx: &Context<'_>) -> Result, dill::InjectionError> where T: ?Sized + Send + Sync + 'static, @@ -33,6 +34,23 @@ where //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +macro_rules! from_catalog_n { + ($gql_ctx:ident, $T:ty ) => {{ + let catalog = $gql_ctx.data::().unwrap(); + + catalog.get_one::<$T>().int_err()? + }}; + ($gql_ctx:ident, $T:ty, $($Ts:ty),+) => {{ + let catalog = $gql_ctx.data::().unwrap(); + + ( catalog.get_one::<$T>().int_err()?, $( catalog.get_one::<$Ts>().int_err()? ),+ ) + }}; +} + +pub(crate) use from_catalog_n; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + pub(crate) fn get_dataset(ctx: &Context<'_>, dataset_handle: &DatasetHandle) -> ResolvedDataset { let dataset_registry = from_catalog::(ctx).unwrap(); dataset_registry.get_dataset_by_handle(dataset_handle) @@ -233,3 +251,5 @@ impl From for async_graphql::Error { } } } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/graphql/tests/tests/test_auth.rs b/src/adapter/graphql/tests/tests/test_auth.rs index 1a7a601b6..09c5b01f6 100644 --- a/src/adapter/graphql/tests/tests/test_auth.rs +++ b/src/adapter/graphql/tests/tests/test_auth.rs @@ -303,7 +303,7 @@ async fn test_revoke_access_token() { "auth": { "revokeAccessToken": { "__typename": "RevokeResultSuccess", - "message": "Access token revoked succesfully" + "message": "Access token revoked successfully" } } }) diff --git a/src/adapter/graphql/tests/tests/test_gql_datasets.rs b/src/adapter/graphql/tests/tests/test_gql_datasets.rs index e2fa28010..b3b1e2ab5 100644 --- a/src/adapter/graphql/tests/tests/test_gql_datasets.rs +++ b/src/adapter/graphql/tests/tests/test_gql_datasets.rs @@ -7,7 +7,6 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use async_graphql::*; use database_common::NoOpDatabasePlugin; use dill::Component; use indoc::indoc; @@ -15,6 +14,8 @@ use kamu::testing::MetadataFactory; use kamu::*; use kamu_accounts::testing::MockAuthenticationService; use kamu_accounts::*; +use kamu_auth_rebac_inmem::InMemoryRebacRepository; +use kamu_auth_rebac_services::{MultiTenantRebacDatasetLifecycleMessageConsumer, RebacServiceImpl}; use kamu_core::*; use messaging_outbox::{register_message_dispatcher, Outbox, OutboxImmediateImpl}; use mockall::predicate::eq; @@ -25,10 +26,102 @@ use time_source::SystemTimeSourceDefault; use crate::utils::{authentication_catalogs, expect_anonymous_access_error}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Implementations +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +macro_rules! test_dataset_create_empty_without_visibility { + ($tenancy_config:expr) => { + let harness = GraphQLDatasetsHarness::new($tenancy_config).await; + + let request_code = indoc::indoc!( + r#" + mutation { + datasets { + createEmpty(datasetKind: ROOT, datasetAlias: "foo") { + ... on CreateDatasetResultSuccess { + dataset { + name + alias + } + } + } + } + } + "# + ); + + expect_anonymous_access_error(harness.execute_anonymous_query(request_code).await); + + let res = harness.execute_authorized_query(request_code).await; + + assert!(res.is_ok(), "{res:?}"); + pretty_assertions::assert_eq!( + async_graphql::value!({ + "datasets": { + "createEmpty": { + "dataset": { + "name": "foo", + "alias": "foo", + } + } + } + }), + res.data, + ); + }; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +macro_rules! test_dataset_create_empty_public { + ($tenancy_config:expr) => { + let harness = GraphQLDatasetsHarness::new($tenancy_config).await; + + let request_code = indoc::indoc!( + r#" + mutation { + datasets { + createEmpty(datasetKind: ROOT, datasetAlias: "foo", datasetVisibility: PUBLIC) { + ... on CreateDatasetResultSuccess { + dataset { + name + alias + } + } + } + } + } + "# + ); + + expect_anonymous_access_error(harness.execute_anonymous_query(request_code).await); + + let res = harness.execute_authorized_query(request_code).await; + + assert!(res.is_ok(), "{res:?}"); + pretty_assertions::assert_eq!( + async_graphql::value!({ + "datasets": { + "createEmpty": { + "dataset": { + "name": "foo", + "alias": "foo", + } + } + } + }), + res.data, + ); + }; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Tests //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] -async fn dataset_by_id_does_not_exist() { +async fn test_dataset_by_id_does_not_exist() { let harness = GraphQLDatasetsHarness::new(TenancyConfig::SingleTenant).await; let res = harness.execute_anonymous_query(indoc!( r#" @@ -42,21 +135,22 @@ async fn dataset_by_id_does_not_exist() { "# )) .await; + assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ + pretty_assertions::assert_eq!( + async_graphql::value!({ "datasets": { "byId": null, } - }) + }), + res.data, ); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] -async fn dataset_by_id() { +async fn test_dataset_by_id() { let harness = GraphQLDatasetsHarness::new(TenancyConfig::SingleTenant).await; let foo_result = harness @@ -82,23 +176,24 @@ async fn dataset_by_id() { ), ) .await; + assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ + pretty_assertions::assert_eq!( + async_graphql::value!({ "datasets": { "byId": { "name": "foo", } } - }) + }), + res.data, ); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] -async fn dataset_by_account_and_name_case_insensitive() { +async fn test_dataset_by_account_and_name_case_insensitive() { let account_name = AccountName::new_unchecked("KaMu"); let mut mock_authentication_service = MockAuthenticationService::new(); @@ -137,10 +232,10 @@ async fn dataset_by_account_and_name_case_insensitive() { .replace("", "FoO"), ) .await; + assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ + pretty_assertions::assert_eq!( + async_graphql::value!({ "datasets": { "byOwnerAndName": { "name": "Foo", @@ -149,14 +244,15 @@ async fn dataset_by_account_and_name_case_insensitive() { } } } - }) + }), + res.data, ); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] -async fn dataset_by_account_id() { +async fn test_dataset_by_account_id() { let mut mock_authentication_service = MockAuthenticationService::new(); mock_authentication_service .expect_find_account_name_by_id() @@ -191,10 +287,10 @@ async fn dataset_by_account_id() { .replace("", DEFAULT_ACCOUNT_ID.to_string().as_str()), ) .await; + assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ + pretty_assertions::assert_eq!( + async_graphql::value!({ "datasets": { "byAccountId": { "nodes": [ @@ -208,56 +304,43 @@ async fn dataset_by_account_id() { } } - }) + }), + res.data, ); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] -async fn dataset_create_empty() { - let harness = GraphQLDatasetsHarness::new(TenancyConfig::SingleTenant).await; +async fn test_dataset_create_empty_without_visibility_st() { + test_dataset_create_empty_without_visibility!(TenancyConfig::SingleTenant); +} - let request_code = indoc::indoc!( - r#" - mutation { - datasets { - createEmpty (datasetKind: ROOT, datasetAlias: "foo") { - ... on CreateDatasetResultSuccess { - dataset { - name - alias - } - } - } - } - } - "# - ); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - expect_anonymous_access_error(harness.execute_anonymous_query(request_code).await); +#[test_log::test(tokio::test)] +async fn test_dataset_create_empty_without_visibility_mt() { + test_dataset_create_empty_without_visibility!(TenancyConfig::MultiTenant); +} - let res = harness.execute_authorized_query(request_code).await; - assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ - "datasets": { - "createEmpty": { - "dataset": { - "name": "foo", - "alias": "foo", - } - } - } - }) - ); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test_log::test(tokio::test)] +async fn test_dataset_create_empty_public_st() { + test_dataset_create_empty_public!(TenancyConfig::SingleTenant); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[test_log::test(tokio::test)] +async fn test_dataset_create_empty_public_mt() { + test_dataset_create_empty_public!(TenancyConfig::MultiTenant); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] -async fn dataset_create_from_snapshot() { +async fn test_dataset_create_from_snapshot() { let harness = GraphQLDatasetsHarness::new(TenancyConfig::MultiTenant).await; let snapshot = MetadataFactory::dataset_snapshot() @@ -294,10 +377,10 @@ async fn dataset_create_from_snapshot() { expect_anonymous_access_error(harness.execute_anonymous_query(request_code.clone()).await); let res = harness.execute_authorized_query(request_code).await; + assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ + pretty_assertions::assert_eq!( + async_graphql::value!({ "datasets": { "createFromSnapshot": { "dataset": { @@ -306,14 +389,15 @@ async fn dataset_create_from_snapshot() { } } } - }) + }), + res.data, ); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] -async fn dataset_create_from_snapshot_malformed() { +async fn test_dataset_create_from_snapshot_malformed() { let harness = GraphQLDatasetsHarness::new(TenancyConfig::SingleTenant).await; let res = harness @@ -331,23 +415,24 @@ async fn dataset_create_from_snapshot_malformed() { "# )) .await; + assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ + pretty_assertions::assert_eq!( + async_graphql::value!({ "datasets": { "createFromSnapshot": { "__typename": "MetadataManifestMalformed", } } - }) + }), + res.data, ); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] -async fn dataset_rename_success() { +async fn test_dataset_rename_success() { let harness = GraphQLDatasetsHarness::new(TenancyConfig::SingleTenant).await; let foo_result = harness @@ -378,10 +463,10 @@ async fn dataset_rename_success() { expect_anonymous_access_error(harness.execute_anonymous_query(request_code.clone()).await); let res = harness.execute_authorized_query(request_code).await; + assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ + pretty_assertions::assert_eq!( + async_graphql::value!({ "datasets": { "byId": { "rename": { @@ -392,14 +477,15 @@ async fn dataset_rename_success() { } } } - }) + }), + res.data, ); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] -async fn dataset_rename_no_changes() { +async fn test_dataset_rename_no_changes() { let harness = GraphQLDatasetsHarness::new(TenancyConfig::SingleTenant).await; let foo_result = harness @@ -429,10 +515,10 @@ async fn dataset_rename_no_changes() { .replace("", "foo"), ) .await; + assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ + pretty_assertions::assert_eq!( + async_graphql::value!({ "datasets": { "byId": { "rename": { @@ -442,14 +528,15 @@ async fn dataset_rename_no_changes() { } } } - }) + }), + res.data, ); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] -async fn dataset_rename_name_collision() { +async fn test_dataset_rename_name_collision() { let harness = GraphQLDatasetsHarness::new(TenancyConfig::SingleTenant).await; let foo_result = harness @@ -482,10 +569,10 @@ async fn dataset_rename_name_collision() { .replace("", "bar"), ) .await; + assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ + pretty_assertions::assert_eq!( + async_graphql::value!({ "datasets": { "byId": { "rename": { @@ -495,14 +582,15 @@ async fn dataset_rename_name_collision() { } } } - }) + }), + res.data, ); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] -async fn dataset_delete_success() { +async fn test_dataset_delete_success() { let harness = GraphQLDatasetsHarness::new(TenancyConfig::SingleTenant).await; harness.init_dependencies_graph().await; @@ -532,10 +620,10 @@ async fn dataset_delete_success() { expect_anonymous_access_error(harness.execute_anonymous_query(request_code.clone()).await); let res = harness.execute_authorized_query(request_code).await; + assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ + pretty_assertions::assert_eq!( + async_graphql::value!({ "datasets": { "byId": { "delete": { @@ -545,14 +633,15 @@ async fn dataset_delete_success() { } } } - }) + }), + res.data, ); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] -async fn dataset_delete_dangling_ref() { +async fn test_dataset_delete_dangling_ref() { let harness = GraphQLDatasetsHarness::new(TenancyConfig::SingleTenant).await; harness.init_dependencies_graph().await; @@ -589,10 +678,10 @@ async fn dataset_delete_dangling_ref() { .replace("", &foo_result.dataset_handle.id.to_string()), ) .await; + assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ + pretty_assertions::assert_eq!( + async_graphql::value!({ "datasets": { "byId": { "delete": { @@ -603,14 +692,15 @@ async fn dataset_delete_dangling_ref() { } } } - }) + }), + res.data, ); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] -async fn dataset_view_permissions() { +async fn test_dataset_view_permissions() { let harness = GraphQLDatasetsHarness::new(TenancyConfig::SingleTenant).await; let foo_result = harness @@ -637,10 +727,10 @@ async fn dataset_view_permissions() { .replace("", &foo_result.dataset_handle.id.to_string()); let res = harness.execute_authorized_query(request_code).await; + assert!(res.is_ok(), "{res:?}"); - assert_eq!( - res.data, - value!({ + pretty_assertions::assert_eq!( + async_graphql::value!({ "datasets": { "byId": { "permissions": { @@ -652,10 +742,13 @@ async fn dataset_view_permissions() { } } } - }) + }), + res.data, ); } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Harness //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct GraphQLDatasetsHarness { @@ -683,7 +776,7 @@ impl GraphQLDatasetsHarness { b.add::() .add_builder( - messaging_outbox::OutboxImmediateImpl::builder() + OutboxImmediateImpl::builder() .with_consumer_filter(messaging_outbox::ConsumerFilter::AllConsumers), ) .bind::() @@ -698,7 +791,13 @@ impl GraphQLDatasetsHarness { .add::() .add_value(mock_authentication_service) .bind::() - .add::(); + .add::() + .add::() + .add::(); + + if tenancy_config == TenancyConfig::MultiTenant { + b.add::(); + } NoOpDatabasePlugin::init_database_components(&mut b); diff --git a/src/adapter/graphql/tests/utils/auth_utils.rs b/src/adapter/graphql/tests/utils/auth_utils.rs index c99ea262e..84e5b53c6 100644 --- a/src/adapter/graphql/tests/utils/auth_utils.rs +++ b/src/adapter/graphql/tests/utils/auth_utils.rs @@ -54,14 +54,15 @@ pub async fn authentication_catalogs( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub fn expect_anonymous_access_error(response: async_graphql::Response) { - assert!(response.is_err()); - assert_eq!( + assert!(response.is_err(), "{response:#?}"); + + pretty_assertions::assert_eq!( + vec![ANONYMOUS_ACCESS_FORBIDDEN_MESSAGE.to_string()], response .errors .into_iter() .map(|e| e.message) .collect::>(), - vec![ANONYMOUS_ACCESS_FORBIDDEN_MESSAGE.to_string()] ); } diff --git a/src/adapter/http/Cargo.toml b/src/adapter/http/Cargo.toml index 727483d14..2f207961b 100644 --- a/src/adapter/http/Cargo.toml +++ b/src/adapter/http/Cargo.toml @@ -60,7 +60,6 @@ flate2 = "1" # GZip decoder futures = "0.3" headers = { version = "0.4", default-features = false } http = "1" -hyper = { version = "1", default-features = false, features = [] } indoc = "2" reqwest = { version = "0.12", default-features = false, features = [ "rustls-tls", @@ -120,8 +119,6 @@ serde_json = "1" tempfile = "3" test-group = { version = "1" } test-log = { version = "0.2", features = ["trace"] } -tracing-subscriber = { version = "0.3", features = ["env-filter"] } tower-http = { version = "0.6", features = ["trace", "cors"] } rand = "0.8" -sha3 = "0.10" mockall = "0.13" diff --git a/src/adapter/http/src/data/query_handler.rs b/src/adapter/http/src/data/query_handler.rs index 8060f0d12..2c53079e9 100644 --- a/src/adapter/http/src/data/query_handler.rs +++ b/src/adapter/http/src/data/query_handler.rs @@ -70,7 +70,7 @@ use super::query_types::{QueryResponse, *}; /// } /// ``` /// -/// Currently we support verifiability by ensuring that queries are +/// Currently, we support verifiability by ensuring that queries are /// deterministic and fully reproducible and signing the original response with /// Node's private key. In future more types of proofs will be supported. /// diff --git a/src/adapter/http/src/middleware/dataset_authorization_layer.rs b/src/adapter/http/src/middleware/dataset_authorization_layer.rs index 43d393141..e8f9534d1 100644 --- a/src/adapter/http/src/middleware/dataset_authorization_layer.rs +++ b/src/adapter/http/src/middleware/dataset_authorization_layer.rs @@ -136,18 +136,14 @@ where { if let Err(err_result) = Self::check_logged_in(catalog) { tracing::error!( - "Dataset '{}' {} access denied: user not logged in", - dataset_ref, - action + "Dataset '{dataset_ref}' {action} access denied: user \ + not logged in", ); return Ok(CheckResult::ErrorResponse(err_result)); } tracing::error!( - "Dataset '{}' {} access denied: {:?}", - dataset_ref, - action, - err + "Dataset '{dataset_ref}' {action} access denied: {err:?}", ); return Ok(CheckResult::ErrorResponse( forbidden_access_response(), @@ -175,3 +171,5 @@ where }) } } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/http/src/smart_protocol/axum_server_push_protocol.rs b/src/adapter/http/src/smart_protocol/axum_server_push_protocol.rs index be7ab9ebd..e495cdbcb 100644 --- a/src/adapter/http/src/smart_protocol/axum_server_push_protocol.rs +++ b/src/adapter/http/src/smart_protocol/axum_server_push_protocol.rs @@ -240,13 +240,13 @@ impl AxumServerPushProtocolInstance { Ok(create_result) => { self.maybe_dataset = Some(create_result.dataset); } - Err(ref _e @ CreateDatasetError::RefCollision(ref err)) => { + Err(CreateDatasetError::RefCollision(err)) => { return Err(PushServerError::RefCollision(RefCollisionError { - id: err.id.clone(), + id: err.id, })); } - Err(ref _e @ CreateDatasetError::NameCollision(ref err)) => { - return Err(PushServerError::NameCollision(err.clone())); + Err(CreateDatasetError::NameCollision(err)) => { + return Err(PushServerError::NameCollision(err)); } Err(e) => { return Err(PushServerError::Internal(PhaseInternalError { diff --git a/src/adapter/http/tests/tests/test_data_query.rs b/src/adapter/http/tests/tests/test_data_query.rs index 14b171992..36c85c528 100644 --- a/src/adapter/http/tests/tests/test_data_query.rs +++ b/src/adapter/http/tests/tests/test_data_query.rs @@ -407,7 +407,7 @@ async fn test_data_query_handler() { "datasets": [{ "alias": "kamu-server/population", "blockHash": head, - "id": harness.dataset_handle.id.as_did_str().to_string(), + "id": harness.dataset_handle.id.to_string(), }], }, "output": { @@ -453,7 +453,7 @@ async fn test_data_query_handler() { "datasets": [{ "alias": "kamu-server/population", "blockHash": head, - "id": harness.dataset_handle.id.as_did_str().to_string(), + "id": harness.dataset_handle.id.to_string(), }], }, "output": { @@ -609,7 +609,7 @@ async fn test_data_verify_handler() { "datasets": [{ "alias": "kamu-server/population", "blockHash": head, - "id": harness.dataset_handle.id.as_did_str().to_string(), + "id": harness.dataset_handle.id.to_string(), }], }, "output": { diff --git a/src/adapter/http/tests/tests/test_routing.rs b/src/adapter/http/tests/tests/test_routing.rs index 4d06f36f2..f7d7c62d1 100644 --- a/src/adapter/http/tests/tests/test_routing.rs +++ b/src/adapter/http/tests/tests/test_routing.rs @@ -102,7 +102,7 @@ where kamu_adapter_http::smart_transfer_protocol_router() .layer(kamu_adapter_http::DatasetResolverLayer::new( identity_extractor, - |_| false, /* does not mater for routing tests */ + |_| false, /* does not matter for routing tests */ )) .layer(axum::extract::Extension(catalog)), ) diff --git a/src/adapter/oauth/Cargo.toml b/src/adapter/oauth/Cargo.toml index d2a87b346..4d12f84f9 100644 --- a/src/adapter/oauth/Cargo.toml +++ b/src/adapter/oauth/Cargo.toml @@ -20,23 +20,22 @@ workspace = true [lib] doctest = false + [dependencies] internal-error = { workspace = true } -opendatafabric = { workspace = true } kamu-accounts = { workspace = true } +opendatafabric = { workspace = true } async-trait = "0.1" -chrono = "0.4" dill = "0.9" http = "1" reqwest = { version = "0.12", default-features = false, features = [ "rustls-tls", "json", ] } -serde = "1" -serde_json = "1" -thiserror = "1" +serde = { version = "1", default-features = false } +serde_json = { version = "1", default-features = false } +thiserror = { version = "1", default-features = false } + [dev-dependencies] -tokio = { version = "1", default-features = false, features = [] } -tracing = "0.1" diff --git a/src/adapter/oauth/src/lib.rs b/src/adapter/oauth/src/lib.rs index 7f059c6c6..9424a2c57 100644 --- a/src/adapter/oauth/src/lib.rs +++ b/src/adapter/oauth/src/lib.rs @@ -7,5 +7,6 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -pub mod oauth_github; +mod oauth_github; + pub use oauth_github::*; diff --git a/src/adapter/odata/Cargo.toml b/src/adapter/odata/Cargo.toml index 8539c313f..35db251d2 100644 --- a/src/adapter/odata/Cargo.toml +++ b/src/adapter/odata/Cargo.toml @@ -28,7 +28,7 @@ http-common = { workspace = true } internal-error = { workspace = true } kamu-accounts = { workspace = true } kamu-core = { workspace = true } -opendatafabric = { workspace = true } +opendatafabric = { workspace = true, default-features = false, features = ["arrow"] } axum = { version = "0.7", default-features = false, features = [] } chrono = { version = "0.4", default-features = false } @@ -37,8 +37,6 @@ datafusion-odata = { version = "42", default-features = false } dill = { version = "0.9" } futures = { version = "0.3", default-features = false } http = "1" -quick-xml = { version = "0.36", features = ["serialize"] } -serde = { version = "1", features = ["derive"] } tracing = "0.1" utoipa = { version = "5", default-features = false, features = [] } utoipa-axum = { version = "0.1", default-features = false, features = [] } @@ -49,7 +47,6 @@ kamu = { workspace = true, features = ["testing"] } messaging-outbox = { workspace = true } time-source = { workspace = true } -hyper = { version = "1", default-features = false } indoc = { version = "2" } pretty_assertions = { version = "1" } reqwest = { version = "0.12", default-features = false } diff --git a/src/app/cli/Cargo.toml b/src/app/cli/Cargo.toml index 1897422dd..5406b6221 100644 --- a/src/app/cli/Cargo.toml +++ b/src/app/cli/Cargo.toml @@ -54,7 +54,7 @@ time-source = { workspace = true } kamu = { workspace = true } kamu-data-utils = { workspace = true } -kamu-adapter-auth-oso = { workspace = true } +kamu-adapter-auth-oso-rebac = { workspace = true } kamu-adapter-flight-sql = { optional = true, workspace = true } kamu-adapter-graphql = { workspace = true } kamu-adapter-http = { workspace = true, features = [ @@ -122,9 +122,7 @@ async-graphql = { version = "7", features = [ ] } async-graphql-axum = "7" axum = { version = "0.7", features = ["ws"] } -axum-extra = { version = "0.9", features = ["async-read-body"] } http = "1" -hyper = "1" reqwest = { version = "0.12", default-features = false, features = [] } serde_json = "1" tonic = { version = "0.12", default-features = false } diff --git a/src/app/cli/src/app.rs b/src/app/cli/src/app.rs index ba3b1d6ca..e3c7722b9 100644 --- a/src/app/cli/src/app.rs +++ b/src/app/cli/src/app.rs @@ -16,6 +16,7 @@ use chrono::{DateTime, Duration, Utc}; use container_runtime::{ContainerRuntime, ContainerRuntimeConfig}; use database_common::DatabaseTransactionRunner; use dill::*; +use init_on_startup::InitOnStartup; use internal_error::{InternalError, ResultIntoInternal}; use kamu::domain::*; use kamu::*; @@ -23,9 +24,7 @@ use kamu_accounts::*; use kamu_accounts_services::PredefinedAccountsRegistrator; use kamu_adapter_http::{FileUploadLimitConfig, UploadServiceLocal}; use kamu_adapter_oauth::GithubAuthenticationConfig; -use kamu_auth_rebac_services::{MultiTenantRebacDatasetLifecycleMessageConsumer, RebacServiceImpl}; use kamu_datasets::DatasetEnvVar; -use kamu_datasets_services::{DatasetEntryIndexer, DatasetEntryServiceImpl}; use kamu_flow_system_inmem::domain::{FlowConfigurationUpdatedMessage, FlowProgressMessage}; use kamu_flow_system_services::{ MESSAGE_PRODUCER_KAMU_FLOW_CONFIGURATION_SERVICE, @@ -33,6 +32,7 @@ use kamu_flow_system_services::{ }; use kamu_task_system_inmem::domain::{TaskProgressMessage, MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR}; use messaging_outbox::{register_message_dispatcher, Outbox, OutboxDispatchingImpl}; +use opendatafabric as odf; use time_source::{SystemTimeSource, SystemTimeSourceDefault, SystemTimeSourceStub}; use tracing::{warn, Instrument}; @@ -128,17 +128,26 @@ pub async fn run(workspace_layout: WorkspaceLayout, args: cli::Cli) -> Result<() tenancy_config, current_account.to_current_account_subject(), ); + let is_e2e_testing = args.e2e_output_data_path.is_some(); let mut base_catalog_builder = configure_base_catalog( &workspace_layout, tenancy_config, args.system_time.map(Into::into), - args.e2e_output_data_path.is_some(), + is_e2e_testing, ); - if workspace_svc.is_in_workspace() { - base_catalog_builder.add::(); - } + // NOTE: Register DatasetEntryIndexer in DI, since it is referenced by other + // components (via InitOnStartup) + // TODO: PERF: Do not register InitOnStartup-components if we are not inside the + // workspace + base_catalog_builder.add_builder( + kamu_datasets_services::DatasetEntryIndexer::builder() + .with_is_in_workspace(workspace_svc.is_in_workspace()), + ); + // The indexer has no other interfaces + base_catalog_builder + .bind::(); base_catalog_builder.add_value(JwtAuthenticationConfig::load_from_env()); base_catalog_builder.add_value(GithubAuthenticationConfig::load_from_env()); @@ -171,7 +180,12 @@ pub async fn run(workspace_layout: WorkspaceLayout, args: cli::Cli) -> Result<() "Initializing {BINARY_NAME}" ); - register_config_in_catalog(&config, &mut base_catalog_builder, tenancy_config); + register_config_in_catalog( + &config, + &mut base_catalog_builder, + tenancy_config, + is_e2e_testing, + ); let base_catalog = base_catalog_builder.build(); @@ -492,24 +506,20 @@ pub fn configure_base_catalog( b.add::(); b.add::(); + b.add::(); b.add::(); // Give both CLI and server access to stored repo access tokens b.add::(); b.add::(); - b.add::(); - b.add::(); - - b.add::(); + kamu_auth_rebac_services::register_dependencies(&mut b, tenancy_config); - b.add::(); + kamu_adapter_auth_oso_rebac::register_dependencies(&mut b); - if tenancy_config == TenancyConfig::MultiTenant { - b.add::(); - } + b.add::(); - b.add::(); + b.add::(); b.add_builder( messaging_outbox::OutboxImmediateImpl::builder() @@ -613,6 +623,7 @@ pub fn register_config_in_catalog( config: &config::CLIConfig, catalog_builder: &mut CatalogBuilder, tenancy_config: TenancyConfig, + is_e2e_testing: bool, ) { let network_ns = config.engine.as_ref().unwrap().network_ns.unwrap(); @@ -729,7 +740,7 @@ pub fn register_config_in_catalog( if tenancy_config == TenancyConfig::MultiTenant { let mut implicit_user_config = PredefinedAccountsConfig::new(); implicit_user_config.predefined.push( - AccountConfig::from_name(opendatafabric::AccountName::new_unchecked( + AccountConfig::from_name(odf::AccountName::new_unchecked( AccountService::default_account_name(TenancyConfig::MultiTenant).as_str(), )) .set_display_name(AccountService::default_user_name( @@ -737,6 +748,13 @@ pub fn register_config_in_catalog( )), ); + if is_e2e_testing { + let e2e_user_config = + AccountConfig::from_name(odf::AccountName::new_unchecked("e2e-user")); + + implicit_user_config.predefined.push(e2e_user_config); + } + use merge::Merge; let mut user_config = config.users.clone().unwrap(); user_config.merge(implicit_user_config); diff --git a/src/app/cli/src/commands/list_command.rs b/src/app/cli/src/commands/list_command.rs index 92cd8b569..5abb0dcd9 100644 --- a/src/app/cli/src/commands/list_command.rs +++ b/src/app/cli/src/commands/list_command.rs @@ -283,7 +283,7 @@ impl Command for ListCommand { .into_event() .and_then(|event| event.new_watermark.map(|t| t.timestamp_micros())); - id.push(hdl.id.as_did_str().to_string()); + id.push(hdl.id.to_string()); head.push(current_head.as_multibase().to_string()); blocks.push(num_blocks); watermark.push(last_watermark); diff --git a/src/app/cli/tests/tests/test_di_graph.rs b/src/app/cli/tests/tests/test_di_graph.rs index da654ebf8..43264b8c7 100644 --- a/src/app/cli/tests/tests/test_di_graph.rs +++ b/src/app/cli/tests/tests/test_di_graph.rs @@ -57,6 +57,7 @@ fn test_di_cli_graph_validates(tenancy_config: TenancyConfig) { &kamu_cli::config::CLIConfig::default(), &mut base_catalog_builder, tenancy_config, + false, ); base_catalog_builder.add_value(Interact::new(false, false)); let base_catalog = base_catalog_builder.build(); @@ -90,6 +91,7 @@ fn test_di_server_graph_validates(tenancy_config: TenancyConfig) { &kamu_cli::config::CLIConfig::default(), &mut base_catalog_builder, tenancy_config, + false, ); let base_catalog = base_catalog_builder.build(); diff --git a/src/domain/accounts/domain/Cargo.toml b/src/domain/accounts/domain/Cargo.toml index cd65c65c1..8fca119f4 100644 --- a/src/domain/accounts/domain/Cargo.toml +++ b/src/domain/accounts/domain/Cargo.toml @@ -46,7 +46,6 @@ reusable = "0.1" serde = "1" serde_with = { version = "3", default-features = false } thiserror = { version = "1", default-features = false } -tracing = { version = "0.1", default-features = false } uuid = { version = "1", default-features = false, features = ["v4"] } # Optional diff --git a/src/domain/accounts/domain/src/entities/account.rs b/src/domain/accounts/domain/src/entities/account.rs index c2e627927..3ac52005d 100644 --- a/src/domain/accounts/domain/src/entities/account.rs +++ b/src/domain/accounts/domain/src/entities/account.rs @@ -39,7 +39,7 @@ pub struct Account { pub account_type: AccountType, pub avatar_url: Option, pub registered_at: DateTime, - // TODO: ReBAC: absorb the `is_admin` attribute from the Accounts domain + // TODO: Private Datasets: absorb the `is_admin` attribute from the Accounts domain // https://github.com/kamu-data/kamu-cli/issues/766 pub is_admin: bool, pub provider: String, diff --git a/src/domain/accounts/domain/src/repos/account_repository.rs b/src/domain/accounts/domain/src/repos/account_repository.rs index 5e43f3d66..cea6ad3aa 100644 --- a/src/domain/accounts/domain/src/repos/account_repository.rs +++ b/src/domain/accounts/domain/src/repos/account_repository.rs @@ -9,6 +9,7 @@ use std::fmt::Display; +use database_common::{EntityPageStream, PaginationOpts}; use internal_error::InternalError; use opendatafabric::{AccountID, AccountName}; use thiserror::Error; @@ -19,8 +20,14 @@ use crate::Account; #[async_trait::async_trait] pub trait AccountRepository: Send + Sync { + // TODO: Private Datasets: tests + async fn accounts_count(&self) -> Result; + async fn create_account(&self, account: &Account) -> Result<(), CreateAccountError>; + // TODO: Private Datasets: tests + async fn get_accounts(&self, pagination: PaginationOpts) -> AccountPageStream; + async fn get_account_by_id( &self, account_id: &AccountID, @@ -54,6 +61,20 @@ pub trait AccountRepository: Send + Sync { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +pub type AccountPageStream<'a> = EntityPageStream<'a, Account>; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Errors +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Error, Debug)] +pub enum AccountsCountError { + #[error(transparent)] + Internal(#[from] InternalError), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[derive(Error, Debug)] pub enum CreateAccountError { #[error(transparent)] @@ -94,6 +115,14 @@ impl Display for CreateAccountDuplicateField { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[derive(Error, Debug)] +pub enum GetAccountsError { + #[error(transparent)] + Internal(#[from] InternalError), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[derive(Error, Debug)] pub enum GetAccountByIdError { #[error(transparent)] diff --git a/src/domain/accounts/domain/src/services/account_service.rs b/src/domain/accounts/domain/src/services/account_service.rs new file mode 100644 index 000000000..8bd680027 --- /dev/null +++ b/src/domain/accounts/domain/src/services/account_service.rs @@ -0,0 +1,40 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use database_common::{EntityPageListing, PaginationOpts}; +use internal_error::InternalError; +use thiserror::Error; + +use crate::{Account, AccountPageStream}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +// TODO: Private Datasets: tests +#[async_trait::async_trait] +pub trait AccountService: Sync + Send { + // TODO: Private Datasets: extract to AccountRegistry? + fn all_accounts(&self) -> AccountPageStream; + + async fn list_all_accounts( + &self, + pagination: PaginationOpts, + ) -> Result, ListAccountError>; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Error +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Error)] +pub enum ListAccountError { + #[error(transparent)] + Internal(#[from] InternalError), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/accounts/domain/src/services/mod.rs b/src/domain/accounts/domain/src/services/mod.rs index 6ef779bbe..a68c33032 100644 --- a/src/domain/accounts/domain/src/services/mod.rs +++ b/src/domain/accounts/domain/src/services/mod.rs @@ -8,12 +8,14 @@ // by the Apache License, Version 2.0. mod access_token_service; +mod account_service; mod authentication_config; mod authentication_errors; mod authentication_provider; mod authentication_service; pub use access_token_service::*; +pub use account_service::*; pub use authentication_config::*; pub use authentication_errors::*; pub use authentication_provider::*; diff --git a/src/domain/accounts/services/Cargo.toml b/src/domain/accounts/services/Cargo.toml index d8ec8bd2c..f49f5e275 100644 --- a/src/domain/accounts/services/Cargo.toml +++ b/src/domain/accounts/services/Cargo.toml @@ -28,14 +28,13 @@ internal-error = { workspace = true } kamu-accounts = { workspace = true } opendatafabric = { workspace = true } time-source = { workspace = true } -random-names = { workspace = true } argon2 = { version = "0.5" } async-trait = { version = "0.1", default-features = false } chrono = { version = "0.4", default-features = false } dill = "0.9" +futures = { version = "0.3", default-features = false } jsonwebtoken = "9" -thiserror = { version = "1", default-features = false } password-hash = { version = "0.5", default-features = false } serde = "1" serde_json = "1" diff --git a/src/domain/accounts/services/src/account_service_impl.rs b/src/domain/accounts/services/src/account_service_impl.rs new file mode 100644 index 000000000..c1bd8b168 --- /dev/null +++ b/src/domain/accounts/services/src/account_service_impl.rs @@ -0,0 +1,72 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::sync::Arc; + +use database_common::{EntityPageListing, EntityPageStreamer, PaginationOpts}; +use dill::*; +use internal_error::ResultIntoInternal; +use kamu_accounts::{ + Account, + AccountPageStream, + AccountRepository, + AccountService, + ListAccountError, +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct AccountServiceImpl { + account_repo: Arc, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[component(pub)] +#[interface(dyn AccountService)] +impl AccountServiceImpl { + pub fn new(account_repo: Arc) -> Self { + Self { account_repo } + } +} + +#[async_trait::async_trait] +impl AccountService for AccountServiceImpl { + fn all_accounts(&self) -> AccountPageStream { + EntityPageStreamer::default().into_stream( + || async { Ok(()) }, + |_, pagination| { + let list_fut = self.list_all_accounts(pagination); + async { list_fut.await.int_err() } + }, + ) + } + + async fn list_all_accounts( + &self, + pagination: PaginationOpts, + ) -> Result, ListAccountError> { + use futures::TryStreamExt; + + let total_count = self.account_repo.accounts_count().await.int_err()?; + let entries = self + .account_repo + .get_accounts(pagination) + .await + .try_collect() + .await?; + + Ok(EntityPageListing { + list: entries, + total_count, + }) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/accounts/services/src/lib.rs b/src/domain/accounts/services/src/lib.rs index 0658522cf..072eb3fc1 100644 --- a/src/domain/accounts/services/src/lib.rs +++ b/src/domain/accounts/services/src/lib.rs @@ -13,11 +13,13 @@ pub use kamu_accounts as domain; mod access_token_service_impl; +mod account_service_impl; mod authentication_service_impl; mod login_password_auth_provider; mod predefined_accounts_registrator; pub use access_token_service_impl::*; +pub use account_service_impl::*; pub use authentication_service_impl::*; pub use login_password_auth_provider::*; pub use predefined_accounts_registrator::*; diff --git a/src/domain/auth-rebac/domain/src/entities/property.rs b/src/domain/auth-rebac/domain/src/entities/property.rs index 5bf471b60..222b2f07f 100644 --- a/src/domain/auth-rebac/domain/src/entities/property.rs +++ b/src/domain/auth-rebac/domain/src/entities/property.rs @@ -18,6 +18,9 @@ pub const PROPERTY_GROUP_SEPARATOR: &str = "/"; const PROPERTY_GROUP_DATASET: &str = "dataset"; const PROPERTY_GROUP_ACCOUNT: &str = "account"; +pub const PROPERTY_VALUE_BOOLEAN_TRUE: &str = "true"; +pub const PROPERTY_VALUE_BOOLEAN_FALSE: &str = "false"; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub type PropertyValue<'a> = Cow<'a, str>; @@ -32,15 +35,21 @@ pub enum PropertyName { impl PropertyName { pub fn dataset_allows_anonymous_read<'a>(allows: bool) -> (Self, PropertyValue<'a>) { - let property = DatasetPropertyName::allows_anonymous_read(allows); + let (name, value) = DatasetPropertyName::allows_anonymous_read(allows); - (Self::Dataset(property.0), property.1) + (Self::Dataset(name), value) } pub fn dataset_allows_public_read<'a>(allows: bool) -> (Self, PropertyValue<'a>) { - let property = DatasetPropertyName::allows_public_read(allows); + let (name, value) = DatasetPropertyName::allows_public_read(allows); + + (Self::Dataset(name), value) + } + + pub fn account_is_admin<'a>(yes: bool) -> (Self, PropertyValue<'a>) { + let (name, value) = AccountPropertyName::is_admin(yes); - (Self::Dataset(property.0), property.1) + (Self::Account(name), value) } pub fn property_group(&self) -> &'static str { @@ -118,13 +127,13 @@ pub enum DatasetPropertyName { impl DatasetPropertyName { pub fn allows_anonymous_read<'a>(allows: bool) -> (Self, PropertyValue<'a>) { - let value = if allows { "true" } else { "false" }; + let value = boolean_property_value(allows); (DatasetPropertyName::AllowsAnonymousRead, value.into()) } pub fn allows_public_read<'a>(allows: bool) -> (Self, PropertyValue<'a>) { - let value = if allows { "true" } else { "false" }; + let value = boolean_property_value(allows); (DatasetPropertyName::AllowsPublicRead, value.into()) } @@ -143,11 +152,19 @@ impl From for PropertyName { )] #[strum(serialize_all = "snake_case")] pub enum AccountPropertyName { - // TODO: ReBAC: absorb the `is_admin` attribute from the Accounts domain + // TODO: Private Datasets: absorb the `is_admin` attribute from the Accounts domain // https://github.com/kamu-data/kamu-cli/issues/766 IsAnAdmin, } +impl AccountPropertyName { + pub fn is_admin<'a>(yes: bool) -> (Self, PropertyValue<'a>) { + let value = boolean_property_value(yes); + + (AccountPropertyName::IsAnAdmin, value.into()) + } +} + impl From for PropertyName { fn from(value: AccountPropertyName) -> PropertyName { PropertyName::Account(value) @@ -176,3 +193,13 @@ impl TryFrom for (PropertyName, PropertyValue<'static>) { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +fn boolean_property_value(value: bool) -> &'static str { + if value { + PROPERTY_VALUE_BOOLEAN_TRUE + } else { + PROPERTY_VALUE_BOOLEAN_FALSE + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/auth-rebac/domain/src/repos/rebac_repository.rs b/src/domain/auth-rebac/domain/src/repos/rebac_repository.rs index b03484657..2b902c47b 100644 --- a/src/domain/auth-rebac/domain/src/repos/rebac_repository.rs +++ b/src/domain/auth-rebac/domain/src/repos/rebac_repository.rs @@ -17,6 +17,8 @@ use crate::{Entity, EntityType, EntityWithRelation, PropertyName, PropertyValue, #[async_trait::async_trait] pub trait RebacRepository: Send + Sync { // Properties + // TODO: Private Datasets: tests + async fn properties_count(&self) -> Result; async fn set_entity_property( &self, @@ -41,6 +43,12 @@ pub trait RebacRepository: Send + Sync { entity: &Entity, ) -> Result, GetEntityPropertiesError>; + // TODO: Private Datasets: tests + async fn get_entity_properties_by_ids( + &self, + entities: &[Entity], + ) -> Result, GetEntityPropertiesError>; + // Relations async fn insert_entities_relation( @@ -79,6 +87,14 @@ pub trait RebacRepository: Send + Sync { // Errors //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[derive(Error, Debug)] +pub enum PropertiesCountError { + #[error(transparent)] + Internal(#[from] InternalError), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[derive(Error, Debug)] pub enum SetEntityPropertyError { #[error(transparent)] diff --git a/src/domain/auth-rebac/domain/src/services/rebac_service.rs b/src/domain/auth-rebac/domain/src/services/rebac_service.rs index 8b29db652..8f77c3f0d 100644 --- a/src/domain/auth-rebac/domain/src/services/rebac_service.rs +++ b/src/domain/auth-rebac/domain/src/services/rebac_service.rs @@ -7,8 +7,10 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use std::collections::HashMap; + use internal_error::InternalError; -use opendatafabric::{AccountID, DatasetID}; +use opendatafabric as odf; use thiserror::Error; use crate::{ @@ -17,8 +19,6 @@ use crate::{ DatasetPropertyName, EntityNotFoundError, EntityWithRelation, - GetEntityPropertiesError, - PropertyName, PropertyValue, SetEntityPropertyError, SubjectEntityRelationsError, @@ -31,67 +31,87 @@ pub trait RebacService: Send + Sync { // Account async fn set_account_property( &self, - account_id: &AccountID, + account_id: &odf::AccountID, property_name: AccountPropertyName, property_value: &PropertyValue, ) -> Result<(), SetEntityPropertyError>; async fn unset_account_property( &self, - account_id: &AccountID, + account_id: &odf::AccountID, property_name: AccountPropertyName, ) -> Result<(), UnsetEntityPropertyError>; async fn get_account_properties( &self, - account_id: &AccountID, - ) -> Result, GetEntityPropertiesError>; + account_id: &odf::AccountID, + ) -> Result; // Dataset async fn set_dataset_property( &self, - dataset_id: &DatasetID, + dataset_id: &odf::DatasetID, property_name: DatasetPropertyName, property_value: &PropertyValue, ) -> Result<(), SetEntityPropertyError>; async fn unset_dataset_property( &self, - dataset_id: &DatasetID, + dataset_id: &odf::DatasetID, property_name: DatasetPropertyName, ) -> Result<(), UnsetEntityPropertyError>; async fn delete_dataset_properties( &self, - dataset_id: &DatasetID, + dataset_id: &odf::DatasetID, ) -> Result<(), DeletePropertiesError>; async fn get_dataset_properties( &self, - dataset_id: &DatasetID, - ) -> Result, GetEntityPropertiesError>; + dataset_id: &odf::DatasetID, + ) -> Result; + + async fn get_dataset_properties_by_ids( + &self, + dataset_ids: &[odf::DatasetID], + ) -> Result, GetPropertiesError>; // Relations async fn insert_account_dataset_relation( &self, - account_id: &AccountID, + account_id: &odf::AccountID, relationship: AccountToDatasetRelation, - dataset_id: &DatasetID, + dataset_id: &odf::DatasetID, ) -> Result<(), InsertRelationError>; async fn delete_account_dataset_relation( &self, - account_id: &AccountID, + account_id: &odf::AccountID, relationship: AccountToDatasetRelation, - dataset_id: &DatasetID, + dataset_id: &odf::DatasetID, ) -> Result<(), DeleteRelationError>; async fn get_account_dataset_relations( &self, - account_id: &AccountID, + account_id: &odf::AccountID, ) -> Result, SubjectEntityRelationsError>; } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Default)] +pub struct AccountProperties { + pub is_admin: bool, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Default)] +pub struct DatasetProperties { + pub allows_anonymous_read: bool, + pub allows_public_read: bool, +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Errors //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -115,6 +135,14 @@ pub enum DeletePropertiesError { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[derive(Error, Debug)] +pub enum GetPropertiesError { + #[error(transparent)] + Internal(#[from] InternalError), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[derive(Error, Debug)] pub enum InsertRelationError { #[error(transparent)] diff --git a/src/domain/auth-rebac/services/Cargo.toml b/src/domain/auth-rebac/services/Cargo.toml index 91c5d7f0d..c9f3ecabc 100644 --- a/src/domain/auth-rebac/services/Cargo.toml +++ b/src/domain/auth-rebac/services/Cargo.toml @@ -22,9 +22,14 @@ doctest = false [dependencies] +database-common = { workspace = true } +init-on-startup = { workspace = true } internal-error = { workspace = true } +kamu-accounts = { workspace = true } kamu-auth-rebac = { workspace = true } kamu-core = { workspace = true } +kamu-datasets = { workspace = true } +kamu-datasets-services = { workspace = true } messaging-outbox = { workspace = true } opendatafabric = { workspace = true } diff --git a/src/domain/auth-rebac/services/src/dependencies.rs b/src/domain/auth-rebac/services/src/dependencies.rs new file mode 100644 index 000000000..10dbd085b --- /dev/null +++ b/src/domain/auth-rebac/services/src/dependencies.rs @@ -0,0 +1,26 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use dill::CatalogBuilder; +use kamu_core::TenancyConfig; + +use crate::*; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub fn register_dependencies(catalog_builder: &mut CatalogBuilder, tenancy_config: TenancyConfig) { + catalog_builder.add::(); + catalog_builder.add::(); + + if tenancy_config == TenancyConfig::MultiTenant { + catalog_builder.add::(); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/auth-rebac/services/src/jobs/mod.rs b/src/domain/auth-rebac/services/src/jobs/mod.rs new file mode 100644 index 000000000..e9ddfa018 --- /dev/null +++ b/src/domain/auth-rebac/services/src/jobs/mod.rs @@ -0,0 +1,14 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub const JOB_KAMU_REBAC_INDEXER: &str = "dev.kamu.rebac.RebacIndexer"; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/auth-rebac/services/src/lib.rs b/src/domain/auth-rebac/services/src/lib.rs index 3d9f0620f..f328f6fdf 100644 --- a/src/domain/auth-rebac/services/src/lib.rs +++ b/src/domain/auth-rebac/services/src/lib.rs @@ -10,10 +10,16 @@ // Re-exports pub use kamu_auth_rebac as domain; +mod dependencies; +mod jobs; mod messages; mod multi_tenant_rebac_dataset_lifecycle_message_consumer; +mod rebac_indexer; mod rebac_service_impl; +pub use dependencies::*; +pub use jobs::*; pub use messages::*; pub use multi_tenant_rebac_dataset_lifecycle_message_consumer::*; +pub use rebac_indexer::*; pub use rebac_service_impl::*; diff --git a/src/domain/auth-rebac/services/src/multi_tenant_rebac_dataset_lifecycle_message_consumer.rs b/src/domain/auth-rebac/services/src/multi_tenant_rebac_dataset_lifecycle_message_consumer.rs index 2c8fb3108..1323f2ae0 100644 --- a/src/domain/auth-rebac/services/src/multi_tenant_rebac_dataset_lifecycle_message_consumer.rs +++ b/src/domain/auth-rebac/services/src/multi_tenant_rebac_dataset_lifecycle_message_consumer.rs @@ -53,12 +53,19 @@ impl MultiTenantRebacDatasetLifecycleMessageConsumer { message: &DatasetLifecycleMessageCreated, ) -> Result<(), InternalError> { let allows = message.dataset_visibility.is_public(); - let (name, value) = DatasetPropertyName::allows_public_read(allows); - self.rebac_service - .set_dataset_property(&message.dataset_id, name, &value) - .await - .int_err() + for (name, value) in [ + DatasetPropertyName::allows_public_read(allows), + // TODO: Private Datasets: Read from a specific environment's config + DatasetPropertyName::allows_anonymous_read(false), + ] { + self.rebac_service + .set_dataset_property(&message.dataset_id, name, &value) + .await + .int_err()?; + } + + Ok(()) } async fn handle_dataset_lifecycle_deleted_message( diff --git a/src/domain/auth-rebac/services/src/rebac_indexer.rs b/src/domain/auth-rebac/services/src/rebac_indexer.rs new file mode 100644 index 000000000..f2fb26e9b --- /dev/null +++ b/src/domain/auth-rebac/services/src/rebac_indexer.rs @@ -0,0 +1,124 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::sync::Arc; + +use dill::{component, interface, meta}; +use init_on_startup::{InitOnStartup, InitOnStartupMeta}; +use internal_error::{InternalError, ResultIntoInternal}; +use kamu_accounts::{AccountService, JOB_KAMU_ACCOUNTS_PREDEFINED_ACCOUNTS_REGISTRATOR}; +use kamu_auth_rebac::{AccountPropertyName, DatasetPropertyName, RebacRepository, RebacService}; +use kamu_datasets::DatasetEntryService; +use kamu_datasets_services::JOB_KAMU_DATASETS_DATASET_ENTRY_INDEXER; + +use crate::JOB_KAMU_REBAC_INDEXER; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct RebacIndexer { + rebac_repo: Arc, + rebac_service: Arc, + dataset_entry_service: Arc, + account_service: Arc, +} + +#[component(pub)] +#[interface(dyn InitOnStartup)] +#[meta(InitOnStartupMeta { + job_name: JOB_KAMU_REBAC_INDEXER, + depends_on: &[ + JOB_KAMU_ACCOUNTS_PREDEFINED_ACCOUNTS_REGISTRATOR, + JOB_KAMU_DATASETS_DATASET_ENTRY_INDEXER + ], + requires_transaction: true, +})] +impl RebacIndexer { + pub fn new( + rebac_repo: Arc, + rebac_service: Arc, + dataset_entry_service: Arc, + account_service: Arc, + ) -> Self { + Self { + rebac_repo, + rebac_service, + dataset_entry_service, + account_service, + } + } + + async fn has_entities_indexed(&self) -> Result { + let properties_count = self.rebac_repo.properties_count().await.int_err()?; + + Ok(properties_count > 0) + } + + async fn index_entities(&self) -> Result<(), InternalError> { + self.index_dataset_entries().await?; + self.index_accounts().await?; + + Ok(()) + } + + #[tracing::instrument(level = "debug", skip_all)] + async fn index_dataset_entries(&self) -> Result<(), InternalError> { + use futures::TryStreamExt; + + let mut dataset_entry_stream = self.dataset_entry_service.all_entries(); + + while let Some(dataset_entry) = dataset_entry_stream.try_next().await? { + for (name, value) in [ + DatasetPropertyName::allows_public_read(false), + DatasetPropertyName::allows_anonymous_read(false), + ] { + self.rebac_service + .set_dataset_property(&dataset_entry.id, name, &value) + .await + .int_err()?; + } + } + + Ok(()) + } + + #[tracing::instrument(level = "debug", skip_all)] + async fn index_accounts(&self) -> Result<(), InternalError> { + use futures::TryStreamExt; + + let mut accounts_stream = self.account_service.all_accounts(); + + while let Some(account) = accounts_stream.try_next().await? { + for (name, value) in [AccountPropertyName::is_admin(account.is_admin)] { + self.rebac_service + .set_account_property(&account.id, name, &value) + .await + .int_err()?; + } + } + + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl InitOnStartup for RebacIndexer { + #[tracing::instrument(level = "debug", skip_all, name = "RebacIndexer::run_initialization")] + async fn run_initialization(&self) -> Result<(), InternalError> { + if self.has_entities_indexed().await? { + tracing::debug!("Skip initialization: entities already have indexed"); + return Ok(()); + } + + self.index_entities().await + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/auth-rebac/services/src/rebac_service_impl.rs b/src/domain/auth-rebac/services/src/rebac_service_impl.rs index 563ca43d5..1d2870cb9 100644 --- a/src/domain/auth-rebac/services/src/rebac_service_impl.rs +++ b/src/domain/auth-rebac/services/src/rebac_service_impl.rs @@ -7,12 +7,16 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use std::collections::HashMap; use std::sync::Arc; use dill::{component, interface}; +use internal_error::{ErrorIntoInternal, ResultIntoInternal}; use kamu_auth_rebac::{ + AccountProperties, AccountPropertyName, AccountToDatasetRelation, + DatasetProperties, DatasetPropertyName, DeleteEntitiesRelationError, DeleteEntityPropertiesError, @@ -21,7 +25,7 @@ use kamu_auth_rebac::{ DeleteRelationError, Entity, EntityWithRelation, - GetEntityPropertiesError, + GetPropertiesError, InsertEntitiesRelationError, InsertRelationError, PropertyName, @@ -32,8 +36,9 @@ use kamu_auth_rebac::{ SetEntityPropertyError, SubjectEntityRelationsError, UnsetEntityPropertyError, + PROPERTY_VALUE_BOOLEAN_TRUE, }; -use opendatafabric::{AccountID, DatasetID}; +use opendatafabric as odf; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -53,7 +58,7 @@ impl RebacServiceImpl { impl RebacService for RebacServiceImpl { async fn set_account_property( &self, - account_id: &AccountID, + account_id: &odf::AccountID, property_name: AccountPropertyName, property_value: &PropertyValue, ) -> Result<(), SetEntityPropertyError> { @@ -67,7 +72,7 @@ impl RebacService for RebacServiceImpl { async fn unset_account_property( &self, - account_id: &AccountID, + account_id: &odf::AccountID, property_name: AccountPropertyName, ) -> Result<(), UnsetEntityPropertyError> { use futures::FutureExt; @@ -83,59 +88,75 @@ impl RebacService for RebacServiceImpl { async fn get_account_properties( &self, - account_id: &AccountID, - ) -> Result, GetEntityPropertiesError> { + account_id: &odf::AccountID, + ) -> Result { let account_id = account_id.as_did_str().to_stack_string(); let account_entity = Entity::new_account(account_id.as_str()); - let properties = self + let entity_properties = self .rebac_repo .get_entity_properties(&account_entity) - .await?; + .await + .int_err()?; + + let account_properties = entity_properties + .into_iter() + .map(|(name, value)| match name { + PropertyName::Dataset(_) => unreachable!(), + PropertyName::Account(account_property_name) => (account_property_name, value), + }) + .fold(AccountProperties::default(), |mut acc, (name, value)| { + match name { + AccountPropertyName::IsAnAdmin => { + acc.is_admin = value == PROPERTY_VALUE_BOOLEAN_TRUE; + } + }; + acc + }); - Ok(properties) + Ok(account_properties) } async fn set_dataset_property( &self, - dataset_id: &DatasetID, + dataset_id: &odf::DatasetID, property_name: DatasetPropertyName, property_value: &PropertyValue, ) -> Result<(), SetEntityPropertyError> { let dataset_id = dataset_id.as_did_str().to_stack_string(); - let dataset_id_entity = Entity::new_dataset(dataset_id.as_str()); + let dataset_entity = Entity::new_dataset(dataset_id.as_str()); self.rebac_repo - .set_entity_property(&dataset_id_entity, property_name.into(), property_value) + .set_entity_property(&dataset_entity, property_name.into(), property_value) .await } async fn unset_dataset_property( &self, - dataset_id: &DatasetID, + dataset_id: &odf::DatasetID, property_name: DatasetPropertyName, ) -> Result<(), UnsetEntityPropertyError> { use futures::FutureExt; let dataset_id = dataset_id.as_did_str().to_stack_string(); - let dataset_id_entity = Entity::new_dataset(dataset_id.as_str()); + let dataset_entity = Entity::new_dataset(dataset_id.as_str()); self.rebac_repo - .delete_entity_property(&dataset_id_entity, property_name.into()) + .delete_entity_property(&dataset_entity, property_name.into()) .map(map_delete_entity_property_result) .await } async fn delete_dataset_properties( &self, - dataset_id: &DatasetID, + dataset_id: &odf::DatasetID, ) -> Result<(), DeletePropertiesError> { let dataset_id = dataset_id.as_did_str().to_stack_string(); - let dataset_id_entity = Entity::new_dataset(dataset_id.as_str()); + let dataset_entity = Entity::new_dataset(dataset_id.as_str()); match self .rebac_repo - .delete_entity_properties(&dataset_id_entity) + .delete_entity_properties(&dataset_entity) .await { Ok(_) => Ok(()), @@ -148,24 +169,93 @@ impl RebacService for RebacServiceImpl { async fn get_dataset_properties( &self, - dataset_id: &DatasetID, - ) -> Result, GetEntityPropertiesError> { + dataset_id: &odf::DatasetID, + ) -> Result { let dataset_id = dataset_id.as_did_str().to_stack_string(); - let dataset_id_entity = Entity::new_dataset(dataset_id.as_str()); + let dataset_entity = Entity::new_dataset(dataset_id.as_str()); - let properties = self + let entity_properties = self .rebac_repo - .get_entity_properties(&dataset_id_entity) - .await?; + .get_entity_properties(&dataset_entity) + .await + .int_err()?; + + let dataset_properties = entity_properties + .into_iter() + .map(|(name, value)| match name { + PropertyName::Dataset(dataset_property_name) => (dataset_property_name, value), + PropertyName::Account(_) => unreachable!(), + }) + .fold(DatasetProperties::default(), |mut acc, (name, value)| { + match name { + DatasetPropertyName::AllowsAnonymousRead => { + acc.allows_anonymous_read = value == PROPERTY_VALUE_BOOLEAN_TRUE; + } + DatasetPropertyName::AllowsPublicRead => { + acc.allows_public_read = value == PROPERTY_VALUE_BOOLEAN_TRUE; + } + }; + acc + }); + + Ok(dataset_properties) + } + + async fn get_dataset_properties_by_ids( + &self, + dataset_ids: &[odf::DatasetID], + ) -> Result, GetPropertiesError> { + let dataset_entities = dataset_ids + .iter() + .map(|id| Entity::new_dataset(id.to_string())) + .collect::>(); + + let entity_properties = self + .rebac_repo + .get_entity_properties_by_ids(&dataset_entities) + .await + .int_err()?; + + let mut dataset_properties_map = HashMap::new(); + + for dataset_id in dataset_ids { + dataset_properties_map.insert(dataset_id.clone(), DatasetProperties::default()); + } + + let entity_properties_it = + entity_properties + .into_iter() + .map(|(entity, name, value)| match name { + PropertyName::Dataset(dataset_property_name) => { + (entity.entity_id, dataset_property_name, value) + } + PropertyName::Account(_) => unreachable!(), + }); + + for (entity_id, name, value) in entity_properties_it { + let dataset_id = odf::DatasetID::from_did_str(&entity_id).int_err()?; + let dataset_properties = dataset_properties_map + .get_mut(&dataset_id) + .ok_or_else(|| format!("dataset_id not found: {dataset_id}").int_err())?; + + match name { + DatasetPropertyName::AllowsAnonymousRead => { + dataset_properties.allows_anonymous_read = value == PROPERTY_VALUE_BOOLEAN_TRUE; + } + DatasetPropertyName::AllowsPublicRead => { + dataset_properties.allows_public_read = value == PROPERTY_VALUE_BOOLEAN_TRUE; + } + }; + } - Ok(properties) + Ok(dataset_properties_map) } async fn insert_account_dataset_relation( &self, - account_id: &AccountID, + account_id: &odf::AccountID, relationship: AccountToDatasetRelation, - dataset_id: &DatasetID, + dataset_id: &odf::DatasetID, ) -> Result<(), InsertRelationError> { use futures::FutureExt; @@ -173,13 +263,13 @@ impl RebacService for RebacServiceImpl { let account_entity = Entity::new_account(account_id.as_str()); let dataset_id = dataset_id.as_did_str().to_stack_string(); - let dataset_id_entity = Entity::new_dataset(dataset_id.as_str()); + let dataset_entity = Entity::new_dataset(dataset_id.as_str()); self.rebac_repo .insert_entities_relation( &account_entity, Relation::AccountToDataset(relationship), - &dataset_id_entity, + &dataset_entity, ) .map(|res| match res { Ok(_) => Ok(()), @@ -195,22 +285,22 @@ impl RebacService for RebacServiceImpl { async fn delete_account_dataset_relation( &self, - account_id: &AccountID, + account_id: &odf::AccountID, relationship: AccountToDatasetRelation, - dataset_id: &DatasetID, + dataset_id: &odf::DatasetID, ) -> Result<(), DeleteRelationError> { let account_id = account_id.as_did_str().to_stack_string(); let account_entity = Entity::new_account(account_id.as_str()); let dataset_id = dataset_id.as_did_str().to_stack_string(); - let dataset_id_entity = Entity::new_dataset(dataset_id.as_str()); + let dataset_entity = Entity::new_dataset(dataset_id.as_str()); match self .rebac_repo .delete_entities_relation( &account_entity, Relation::AccountToDataset(relationship), - &dataset_id_entity, + &dataset_entity, ) .await { @@ -224,7 +314,7 @@ impl RebacService for RebacServiceImpl { async fn get_account_dataset_relations( &self, - account_id: &AccountID, + account_id: &odf::AccountID, ) -> Result, SubjectEntityRelationsError> { let account_id = account_id.as_did_str().to_stack_string(); let account_entity = Entity::new_account(account_id.as_str()); diff --git a/src/domain/auth-rebac/services/tests/tests/test_multi_tenant_rebac_dataset_lifecycle_message_consumer.rs b/src/domain/auth-rebac/services/tests/tests/test_multi_tenant_rebac_dataset_lifecycle_message_consumer.rs index 18bce4f31..d9edee196 100644 --- a/src/domain/auth-rebac/services/tests/tests/test_multi_tenant_rebac_dataset_lifecycle_message_consumer.rs +++ b/src/domain/auth-rebac/services/tests/tests/test_multi_tenant_rebac_dataset_lifecycle_message_consumer.rs @@ -11,12 +11,12 @@ use std::assert_matches::assert_matches; use std::sync::Arc; use dill::{Catalog, CatalogBuilder}; -use kamu_auth_rebac::{PropertyName, RebacService}; +use kamu_auth_rebac::{DatasetProperties, Entity, RebacRepository, RebacService}; use kamu_auth_rebac_inmem::InMemoryRebacRepository; use kamu_auth_rebac_services::{MultiTenantRebacDatasetLifecycleMessageConsumer, RebacServiceImpl}; use kamu_core::{DatasetLifecycleMessage, DatasetVisibility}; use messaging_outbox::{consume_deserialized_message, ConsumerFilter, Message}; -use opendatafabric::{AccountID, DatasetID, DatasetName}; +use opendatafabric as odf; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -28,46 +28,49 @@ type Harness = MultiTenantRebacDatasetLifecycleMessageConsumerHarness; async fn test_rebac_properties_added() { let harness = Harness::new(); - let (_, public_dataset_id) = DatasetID::new_generated_ed25519(); - let (_, private_dataset_id) = DatasetID::new_generated_ed25519(); - let (_, owner_id) = AccountID::new_generated_ed25519(); + let (_, public_dataset_id) = odf::DatasetID::new_generated_ed25519(); + let (_, private_dataset_id) = odf::DatasetID::new_generated_ed25519(); + let (_, owner_id) = odf::AccountID::new_generated_ed25519(); // Pre-checks { + let public_dataset_entity = Entity::new_dataset(public_dataset_id.to_string()); + let private_dataset_entity = Entity::new_dataset(private_dataset_id.to_string()); + assert_matches!( harness - .rebac_service - .get_dataset_properties(&public_dataset_id) - .await, - Ok(props) - if props.is_empty() + .rebac_repo + .get_entity_properties(&public_dataset_entity) + .await + .as_deref(), + Ok([]) ); assert_matches!( harness - .rebac_service - .get_dataset_properties(&private_dataset_id) - .await, - Ok(props) - if props.is_empty() + .rebac_repo + .get_entity_properties(&private_dataset_entity) + .await + .as_deref(), + Ok([]) ); } // Simulate creations { harness - .consume_message(DatasetLifecycleMessage::created( + .mimic(DatasetLifecycleMessage::created( public_dataset_id.clone(), owner_id.clone(), DatasetVisibility::Public, - DatasetName::new_unchecked("public-dataset"), + odf::DatasetName::new_unchecked("public-dataset"), )) .await; harness - .consume_message(DatasetLifecycleMessage::created( + .mimic(DatasetLifecycleMessage::created( private_dataset_id.clone(), owner_id, DatasetVisibility::Private, - DatasetName::new_unchecked("private-dataset"), + odf::DatasetName::new_unchecked("private-dataset"), )) .await; } @@ -79,16 +82,20 @@ async fn test_rebac_properties_added() { .rebac_service .get_dataset_properties(&public_dataset_id) .await, - Ok(props) - if props == vec![PropertyName::dataset_allows_public_read(true)] + Ok(DatasetProperties { + allows_anonymous_read: false, + allows_public_read: true + }) ); assert_matches!( harness .rebac_service .get_dataset_properties(&private_dataset_id) .await, - Ok(props) - if props == vec![PropertyName::dataset_allows_public_read(false)] + Ok(DatasetProperties { + allows_anonymous_read: false, + allows_public_read: false + }) ); } } @@ -99,17 +106,17 @@ async fn test_rebac_properties_added() { async fn test_rebac_properties_deleted() { let harness = Harness::new(); - let (_, dataset_id) = DatasetID::new_generated_ed25519(); - let (_, owner_id) = AccountID::new_generated_ed25519(); + let (_, dataset_id) = odf::DatasetID::new_generated_ed25519(); + let (_, owner_id) = odf::AccountID::new_generated_ed25519(); // Simulate creation { harness - .consume_message(DatasetLifecycleMessage::created( + .mimic(DatasetLifecycleMessage::created( dataset_id.clone(), owner_id.clone(), DatasetVisibility::Public, - DatasetName::new_unchecked("public-dataset"), + odf::DatasetName::new_unchecked("public-dataset"), )) .await; } @@ -121,34 +128,38 @@ async fn test_rebac_properties_deleted() { .rebac_service .get_dataset_properties(&dataset_id) .await, - Ok(props) - if props == vec![PropertyName::dataset_allows_public_read(true)] + Ok(DatasetProperties { + allows_anonymous_read: false, + allows_public_read: true + }) ); } // Simulate deletion { harness - .consume_message(DatasetLifecycleMessage::deleted(dataset_id.clone())) + .mimic(DatasetLifecycleMessage::deleted(dataset_id.clone())) .await; } + let dataset_entity = Entity::new_dataset(dataset_id.to_string()); + // Validate { assert_matches!( harness - .rebac_service - .get_dataset_properties(&dataset_id) - .await, - Ok(props) - if props.is_empty() + .rebac_repo + .get_entity_properties(&dataset_entity) + .await + .as_deref(), + Ok([]) ); } // Simulate deletion again to check idempotency { harness - .consume_message(DatasetLifecycleMessage::deleted(dataset_id.clone())) + .mimic(DatasetLifecycleMessage::deleted(dataset_id.clone())) .await; } @@ -156,11 +167,11 @@ async fn test_rebac_properties_deleted() { { assert_matches!( harness - .rebac_service - .get_dataset_properties(&dataset_id) - .await, - Ok(props) - if props.is_empty() + .rebac_repo + .get_entity_properties(&dataset_entity) + .await + .as_deref(), + Ok([]) ); } } @@ -169,6 +180,7 @@ async fn test_rebac_properties_deleted() { struct MultiTenantRebacDatasetLifecycleMessageConsumerHarness { catalog: Catalog, + rebac_repo: Arc, rebac_service: Arc, } @@ -184,12 +196,13 @@ impl MultiTenantRebacDatasetLifecycleMessageConsumerHarness { let catalog = catalog_builder.build(); Self { + rebac_repo: catalog.get_one().unwrap(), rebac_service: catalog.get_one().unwrap(), catalog, } } - pub async fn consume_message(&self, message: TMessage) { + pub async fn mimic(&self, message: TMessage) { let content_json = serde_json::to_string(&message).unwrap(); consume_deserialized_message::( diff --git a/src/domain/core/Cargo.toml b/src/domain/core/Cargo.toml index 4d4ad5a24..70c340b26 100644 --- a/src/domain/core/Cargo.toml +++ b/src/domain/core/Cargo.toml @@ -26,6 +26,7 @@ default = [] testing = ["dep:mockall"] utoipa = ["dep:utoipa"] +oso = ["dep:oso"] [dependencies] @@ -36,7 +37,6 @@ messaging-outbox = { workspace = true } internal-error = { workspace = true } opendatafabric = { workspace = true } -async-stream = { version = "0.3", default-features = false } async-trait = { version = "0.1", default-features = false } bytes = { version = "1", default-features = false } chrono = { version = "0.4", default-features = false } @@ -48,7 +48,6 @@ pin-project = { version = "1", default-features = false } thiserror = { version = "1", default-features = false } tokio = { version = "1", default-features = false } tokio-stream = { version = "0.1", default-features = false } -tracing = { version = "0.1", default-features = false } url = { version = "2", default-features = false, features = ["serde"] } # TODO: Avoid this dependency or depend on sub-crates @@ -63,6 +62,7 @@ serde_with = { version = "3", default-features = false } # Optional mockall = { optional = true, version = "0.13", default-features = false } +oso = { optional = true, version = "0.27", default-features = false } utoipa = { optional = true, version = "5", default-features = false, features = [ ] } diff --git a/src/domain/core/src/auth/dataset_action_authorizer.rs b/src/domain/core/src/auth/dataset_action_authorizer.rs index 7e7f09d23..cc452af02 100644 --- a/src/domain/core/src/auth/dataset_action_authorizer.rs +++ b/src/domain/core/src/auth/dataset_action_authorizer.rs @@ -12,7 +12,7 @@ use std::str::FromStr; use dill::*; use internal_error::{ErrorIntoInternal, InternalError}; -use opendatafabric::{DatasetHandle, DatasetRef}; +use opendatafabric as odf; use thiserror::Error; use crate::AccessError; @@ -23,13 +23,14 @@ use crate::AccessError; pub trait DatasetActionAuthorizer: Sync + Send { async fn check_action_allowed( &self, - dataset_handle: &DatasetHandle, + // TODO: Private Datasets: use odf::DatasetID, here and below + dataset_handle: &odf::DatasetHandle, action: DatasetAction, ) -> Result<(), DatasetActionUnauthorizedError>; async fn is_action_allowed( &self, - dataset_handle: &DatasetHandle, + dataset_handle: &odf::DatasetHandle, action: DatasetAction, ) -> Result { match self.check_action_allowed(dataset_handle, action).await { @@ -39,17 +40,24 @@ pub trait DatasetActionAuthorizer: Sync + Send { } } - async fn get_allowed_actions(&self, dataset_handle: &DatasetHandle) -> HashSet; + // TODO: Private Datasets: tests + async fn get_allowed_actions( + &self, + dataset_handle: &odf::DatasetHandle, + ) -> Result, InternalError>; + // TODO: Private Datasets: tests async fn filter_datasets_allowing( &self, - dataset_handles: Vec, + // TODO: Private Datasets: use slice? here and above + dataset_handles: Vec, action: DatasetAction, - ) -> Result, InternalError>; + ) -> Result, InternalError>; + // TODO: Private Datasets: tests async fn classify_datasets_by_allowance( &self, - dataset_handles: Vec, + dataset_handles: Vec, action: DatasetAction, ) -> Result; } @@ -85,6 +93,29 @@ impl std::fmt::Display for DatasetAction { } } +#[cfg(feature = "oso")] +impl oso::FromPolar for DatasetAction { + fn from_polar(polar_value: oso::PolarValue) -> oso::Result { + use oso::errors::{OsoError, TypeError}; + use oso::PolarValue; + + let PolarValue::String(raw_dataset_action) = polar_value else { + return Err(TypeError::expected("String").user()); + }; + + Self::from_str(&raw_dataset_action).map_err(|e| OsoError::Custom { + message: e.to_string(), + }) + } +} + +#[cfg(feature = "oso")] +impl oso::ToPolar for DatasetAction { + fn to_polar(self) -> oso::PolarValue { + self.to_string().to_polar() + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[derive(Debug, Error)] @@ -93,22 +124,22 @@ pub enum DatasetActionUnauthorizedError { Access(AccessError), #[error(transparent)] - Internal(InternalError), + Internal(#[from] InternalError), } #[derive(Debug, Error)] #[error("User has no '{action}' permission in dataset '{dataset_ref}'")] pub struct DatasetActionNotEnoughPermissionsError { pub action: DatasetAction, - pub dataset_ref: DatasetRef, + pub dataset_ref: odf::DatasetRef, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[derive(Debug)] pub struct ClassifyByAllowanceResponse { - pub authorized_handles: Vec, - pub unauthorized_handles_with_errors: Vec<(DatasetHandle, DatasetActionUnauthorizedError)>, + pub authorized_handles: Vec, + pub unauthorized_handles_with_errors: Vec<(odf::DatasetHandle, DatasetActionUnauthorizedError)>, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -127,28 +158,31 @@ impl AlwaysHappyDatasetActionAuthorizer { impl DatasetActionAuthorizer for AlwaysHappyDatasetActionAuthorizer { async fn check_action_allowed( &self, - _dataset_handle: &DatasetHandle, + _dataset_handle: &odf::DatasetHandle, _action: DatasetAction, ) -> Result<(), DatasetActionUnauthorizedError> { // Ignore rules Ok(()) } - async fn get_allowed_actions(&self, _dataset_handle: &DatasetHandle) -> HashSet { - HashSet::from([DatasetAction::Read, DatasetAction::Write]) + async fn get_allowed_actions( + &self, + _dataset_handle: &odf::DatasetHandle, + ) -> Result, InternalError> { + Ok(HashSet::from([DatasetAction::Read, DatasetAction::Write])) } async fn filter_datasets_allowing( &self, - dataset_handles: Vec, + dataset_handles: Vec, _action: DatasetAction, - ) -> Result, InternalError> { + ) -> Result, InternalError> { Ok(dataset_handles) } async fn classify_datasets_by_allowance( &self, - dataset_handles: Vec, + dataset_handles: Vec, _action: DatasetAction, ) -> Result { Ok(ClassifyByAllowanceResponse { diff --git a/src/domain/core/src/services/dataset_ownership_service.rs b/src/domain/core/src/services/dataset_ownership_service.rs index 2cdfbd7c4..384b698c2 100644 --- a/src/domain/core/src/services/dataset_ownership_service.rs +++ b/src/domain/core/src/services/dataset_ownership_service.rs @@ -12,6 +12,7 @@ use opendatafabric::{AccountID, DatasetID}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// TODO: Private Datasets: replace with DatasetEntry-related service #[async_trait::async_trait] pub trait DatasetOwnershipService: Sync + Send { async fn get_dataset_owners( diff --git a/src/domain/core/src/services/dataset_registry.rs b/src/domain/core/src/services/dataset_registry.rs index 8953b6bf7..a413f7988 100644 --- a/src/domain/core/src/services/dataset_registry.rs +++ b/src/domain/core/src/services/dataset_registry.rs @@ -19,6 +19,7 @@ use crate::{DatasetHandleStream, GetDatasetError, ResolvedDataset}; pub trait DatasetRegistry: Send + Sync { fn all_dataset_handles(&self) -> DatasetHandleStream<'_>; + // TODO: Private Datasets: replace AccountName with AccountID? fn all_dataset_handles_by_owner(&self, owner_name: &AccountName) -> DatasetHandleStream<'_>; async fn resolve_dataset_handle_by_ref( diff --git a/src/domain/core/src/services/ingest/polling_ingest_service.rs b/src/domain/core/src/services/ingest/polling_ingest_service.rs index be5e18ed1..091310f48 100644 --- a/src/domain/core/src/services/ingest/polling_ingest_service.rs +++ b/src/domain/core/src/services/ingest/polling_ingest_service.rs @@ -111,13 +111,12 @@ pub enum PollingIngestStage { Commit, } -#[allow(unused_variables)] pub trait PollingIngestListener: Send + Sync { fn begin(&self) {} - fn on_cache_hit(&self, created_at: &DateTime) {} - fn on_stage_progress(&self, stage: PollingIngestStage, _progress: u64, _out_of: TotalSteps) {} - fn success(&self, result: &PollingIngestResult) {} - fn error(&self, error: &PollingIngestError) {} + fn on_cache_hit(&self, _created_at: &DateTime) {} + fn on_stage_progress(&self, _stage: PollingIngestStage, _progress: u64, _out_of: TotalSteps) {} + fn success(&self, _result: &PollingIngestResult) {} + fn error(&self, _error: &PollingIngestError) {} fn get_pull_image_listener(self: Arc) -> Option> { None diff --git a/src/domain/core/src/services/ingest/push_ingest_service.rs b/src/domain/core/src/services/ingest/push_ingest_service.rs index b284d042c..f2e6fa7c3 100644 --- a/src/domain/core/src/services/ingest/push_ingest_service.rs +++ b/src/domain/core/src/services/ingest/push_ingest_service.rs @@ -94,12 +94,11 @@ pub enum PushIngestStage { Commit, } -#[allow(unused_variables)] pub trait PushIngestListener: Send + Sync { fn begin(&self) {} - fn on_stage_progress(&self, stage: PushIngestStage, _progress: u64, _out_of: TotalSteps) {} - fn success(&self, result: &PushIngestResult) {} - fn error(&self, error: &PushIngestError) {} + fn on_stage_progress(&self, _stage: PushIngestStage, _progress: u64, _out_of: TotalSteps) {} + fn success(&self, _result: &PushIngestResult) {} + fn error(&self, _error: &PushIngestError) {} fn get_pull_image_listener(self: Arc) -> Option> { None diff --git a/src/domain/datasets/domain/src/repos/dataset_entry_repository.rs b/src/domain/datasets/domain/src/repos/dataset_entry_repository.rs index f7ec4a80b..af8036d61 100644 --- a/src/domain/datasets/domain/src/repos/dataset_entry_repository.rs +++ b/src/domain/datasets/domain/src/repos/dataset_entry_repository.rs @@ -9,7 +9,7 @@ use database_common::PaginationOpts; use internal_error::InternalError; -use opendatafabric::{AccountID, DatasetID, DatasetName}; +use opendatafabric as odf; use thiserror::Error; use crate::DatasetEntry; @@ -19,35 +19,38 @@ use crate::DatasetEntry; #[cfg_attr(any(feature = "testing", test), mockall::automock)] #[async_trait::async_trait] pub trait DatasetEntryRepository: Send + Sync { - async fn dataset_entries_count(&self) -> Result; + async fn dataset_entries_count(&self) -> Result; async fn dataset_entries_count_by_owner_id( &self, - owner_id: &AccountID, + owner_id: &odf::AccountID, ) -> Result; - fn get_dataset_entries(&self, pagination: PaginationOpts) -> DatasetEntryStream<'_>; + async fn get_dataset_entries<'a>( + &'a self, + pagination: PaginationOpts, + ) -> DatasetEntryStream<'a>; - fn get_dataset_entries_by_owner_id( - &self, - owner_id: &AccountID, + async fn get_dataset_entries_by_owner_id<'a>( + &'a self, + owner_id: &odf::AccountID, pagination: PaginationOpts, - ) -> DatasetEntryStream<'_>; + ) -> DatasetEntryStream<'a>; async fn get_dataset_entry( &self, - dataset_id: &DatasetID, + dataset_id: &odf::DatasetID, ) -> Result; async fn get_multiple_dataset_entries( &self, - dataset_ids: &[DatasetID], + dataset_ids: &[odf::DatasetID], ) -> Result; async fn get_dataset_entry_by_owner_and_name( &self, - owner_id: &AccountID, - name: &DatasetName, + owner_id: &odf::AccountID, + name: &odf::DatasetName, ) -> Result; async fn save_dataset_entry( @@ -57,13 +60,13 @@ pub trait DatasetEntryRepository: Send + Sync { async fn update_dataset_entry_name( &self, - dataset_id: &DatasetID, - new_name: &DatasetName, + dataset_id: &odf::DatasetID, + new_name: &odf::DatasetName, ) -> Result<(), UpdateDatasetEntryNameError>; async fn delete_dataset_entry( &self, - dataset_id: &DatasetID, + dataset_id: &odf::DatasetID, ) -> Result<(), DeleteEntryDatasetError>; } @@ -78,7 +81,17 @@ pub type DatasetEntryStream<'a> = std::pin::Pin< #[derive(Default, Debug, Eq, PartialEq)] pub struct DatasetEntriesResolution { pub resolved_entries: Vec, - pub unresolved_entries: Vec, + pub unresolved_entries: Vec, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Errors +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Error, Debug)] +pub enum DatasetEntriesCountError { + #[error(transparent)] + Internal(#[from] InternalError), } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -101,11 +114,11 @@ pub enum GetMultipleDatasetEntriesError { #[derive(Error, Debug)] #[error("Dataset entry with dataset_id '{dataset_id}' not found")] pub struct DatasetEntryNotFoundError { - pub dataset_id: DatasetID, + pub dataset_id: odf::DatasetID, } impl DatasetEntryNotFoundError { - pub fn new(dataset_id: DatasetID) -> Self { + pub fn new(dataset_id: odf::DatasetID) -> Self { Self { dataset_id } } } @@ -124,12 +137,12 @@ pub enum GetDatasetEntryByNameError { #[derive(Error, Debug)] #[error("Dataset entry with owner_id '{owner_id}' and name '{dataset_name}' not found")] pub struct DatasetEntryByNameNotFoundError { - pub owner_id: AccountID, - pub dataset_name: DatasetName, + pub owner_id: odf::AccountID, + pub dataset_name: odf::DatasetName, } impl DatasetEntryByNameNotFoundError { - pub fn new(owner_id: AccountID, dataset_name: DatasetName) -> Self { + pub fn new(owner_id: odf::AccountID, dataset_name: odf::DatasetName) -> Self { Self { owner_id, dataset_name, @@ -154,11 +167,11 @@ pub enum SaveDatasetEntryError { #[derive(Error, Debug)] #[error("Dataset entry with dataset_id '{dataset_id}' already exists")] pub struct SaveDatasetEntryErrorDuplicate { - pub dataset_id: DatasetID, + pub dataset_id: odf::DatasetID, } impl SaveDatasetEntryErrorDuplicate { - pub fn new(dataset_id: DatasetID) -> Self { + pub fn new(dataset_id: odf::DatasetID) -> Self { Self { dataset_id } } } @@ -180,11 +193,11 @@ pub enum UpdateDatasetEntryNameError { #[derive(Error, Debug)] #[error("Dataset entry with name {dataset_name} for same owner already exists")] pub struct DatasetEntryNameCollisionError { - pub dataset_name: DatasetName, + pub dataset_name: odf::DatasetName, } impl DatasetEntryNameCollisionError { - pub fn new(dataset_name: DatasetName) -> Self { + pub fn new(dataset_name: odf::DatasetName) -> Self { Self { dataset_name } } } diff --git a/src/domain/datasets/domain/src/services/dataset_entry_service.rs b/src/domain/datasets/domain/src/services/dataset_entry_service.rs index fb04f2acc..0da7d1587 100644 --- a/src/domain/datasets/domain/src/services/dataset_entry_service.rs +++ b/src/domain/datasets/domain/src/services/dataset_entry_service.rs @@ -7,36 +7,35 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use database_common::PaginationOpts; +use database_common::{EntityPageListing, PaginationOpts}; use internal_error::InternalError; -use opendatafabric::AccountID; +use opendatafabric as odf; use thiserror::Error; -use crate::DatasetEntry; +use crate::{DatasetEntry, DatasetEntryStream}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] pub trait DatasetEntryService: Sync + Send { + // TODO: Private Datasets: tests + // TODO: Private Datasets: extract to DatasetEntryRegistry? + fn all_entries(&self) -> DatasetEntryStream; + async fn list_all_entries( &self, pagination: PaginationOpts, - ) -> Result; + ) -> Result, ListDatasetEntriesError>; async fn list_entries_owned_by( &self, - owner_id: AccountID, + owner_id: &odf::AccountID, pagination: PaginationOpts, - ) -> Result; + ) -> Result, ListDatasetEntriesError>; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -pub struct DatasetEntryListing { - pub list: Vec, - pub total_count: usize, -} - +// Errors //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[derive(Error, Debug)] diff --git a/src/domain/datasets/services/Cargo.toml b/src/domain/datasets/services/Cargo.toml index c1c36c136..cc64dd0d5 100644 --- a/src/domain/datasets/services/Cargo.toml +++ b/src/domain/datasets/services/Cargo.toml @@ -38,9 +38,6 @@ chrono = { version = "0.4", default-features = false } dill = "0.9" futures = { version = "0.3", default-features = false } secrecy = "0.10" -serde = "1" -serde_json = "1" -thiserror = { version = "1", default-features = false } tokio = { version = "1", default-features = false } tracing = { version = "0.1", default-features = false } uuid = { version = "1", default-features = false } diff --git a/src/domain/datasets/services/src/dataset_entry_indexer.rs b/src/domain/datasets/services/src/dataset_entry_indexer.rs index e4bf2d9b5..86fa4efba 100644 --- a/src/domain/datasets/services/src/dataset_entry_indexer.rs +++ b/src/domain/datasets/services/src/dataset_entry_indexer.rs @@ -35,6 +35,7 @@ pub struct DatasetEntryIndexer { time_source: Arc, dataset_repo: Arc, account_repository: Arc, + is_in_workspace: bool, } #[component(pub)] @@ -53,12 +54,14 @@ impl DatasetEntryIndexer { time_source: Arc, dataset_repo: Arc, account_repository: Arc, + is_in_workspace: bool, ) -> Self { Self { dataset_entry_repo, time_source, dataset_repo, account_repository, + is_in_workspace, } } @@ -93,7 +96,7 @@ impl DatasetEntryIndexer { .get(&dataset_handle.alias.account_name) .cloned() else { - tracing::debug!(dataset_handle=%dataset_handle, "Skipped indexing dataset due to unresolved owner"); + tracing::warn!(dataset_handle=%dataset_handle, "Skipped indexing dataset due to unresolved owner"); continue; }; @@ -175,6 +178,12 @@ impl InitOnStartup for DatasetEntryIndexer { name = "DatasetEntryIndexer::run_initialization" )] async fn run_initialization(&self) -> Result<(), InternalError> { + if !self.is_in_workspace { + tracing::debug!("Skip initialization: not in a workspace"); + + return Ok(()); + } + if self.has_datasets_indexed().await? { tracing::debug!("Skip initialization: datasets already have indexed"); diff --git a/src/domain/datasets/services/src/dataset_entry_service_impl.rs b/src/domain/datasets/services/src/dataset_entry_service_impl.rs index dc4c21b52..429b13416 100644 --- a/src/domain/datasets/services/src/dataset_entry_service_impl.rs +++ b/src/domain/datasets/services/src/dataset_entry_service_impl.rs @@ -8,9 +8,9 @@ // by the Apache License, Version 2.0. use std::collections::{HashMap, HashSet}; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; -use database_common::PaginationOpts; +use database_common::{EntityPageListing, EntityPageStreamer, PaginationOpts}; use dill::{component, interface, meta, Catalog}; use internal_error::{InternalError, ResultIntoInternal}; use kamu_accounts::{AccountRepository, CurrentAccountSubject}; @@ -37,16 +37,9 @@ use messaging_outbox::{ MessageConsumerT, MessageDeliveryMechanism, }; -use opendatafabric::{ - AccountID, - AccountName, - DatasetAlias, - DatasetHandle, - DatasetID, - DatasetName, - DatasetRef, -}; +use opendatafabric as odf; use time_source::SystemTimeSource; +use tokio::sync::RwLock; use crate::MESSAGE_CONSUMER_KAMU_DATASET_ENTRY_SERVICE; @@ -59,15 +52,15 @@ pub struct DatasetEntryServiceImpl { account_repo: Arc, current_account_subject: Arc, tenancy_config: Arc, - accounts_cache: Arc>, + accounts_cache: Arc>, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[derive(Default)] struct AccountsCache { - id2names: HashMap, - names2ids: HashMap, + id2names: HashMap, + names2ids: HashMap, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -163,14 +156,17 @@ impl DatasetEntryServiceImpl { async fn entries_as_handles( &self, entries: Vec, - ) -> Result, ListDatasetEntriesError> { + ) -> Result, ListDatasetEntriesError> { // Select which accounts haven't been processed yet let first_seen_account_ids = { - let accounts_cache = self.accounts_cache.lock().unwrap(); + let readable_accounts_cache = self.accounts_cache.read().await; - let mut first_seen_account_ids: HashSet = HashSet::new(); + let mut first_seen_account_ids = HashSet::new(); for entry in &entries { - if !accounts_cache.id2names.contains_key(&entry.owner_id) { + if !readable_accounts_cache + .id2names + .contains_key(&entry.owner_id) + { first_seen_account_ids.insert(entry.owner_id.clone()); } } @@ -187,12 +183,12 @@ impl DatasetEntryServiceImpl { .await .int_err()?; - let mut accounts_cache = self.accounts_cache.lock().unwrap(); + let mut writable_accounts_cache = self.accounts_cache.write().await; for account in accounts { - accounts_cache + writable_accounts_cache .id2names .insert(account.id.clone(), account.account_name.clone()); - accounts_cache + writable_accounts_cache .names2ids .insert(account.account_name, account.id); } @@ -200,13 +196,13 @@ impl DatasetEntryServiceImpl { // Convert the entries to handles let mut handles = Vec::new(); - let accounts_cache = self.accounts_cache.lock().unwrap(); + let readable_accounts_cache = self.accounts_cache.read().await; for entry in &entries { // By now we should now the account name - let maybe_owner_name = accounts_cache.id2names.get(&entry.owner_id); + let maybe_owner_name = readable_accounts_cache.id2names.get(&entry.owner_id); if let Some(owner_name) = maybe_owner_name { // Form DatasetHandle - handles.push(DatasetHandle::new( + handles.push(odf::DatasetHandle::new( entry.id.clone(), self.make_alias(owner_name.clone(), entry.name.clone()), )); @@ -219,11 +215,11 @@ impl DatasetEntryServiceImpl { async fn resolve_account_name_by_id( &self, - account_id: &AccountID, - ) -> Result { + account_id: &odf::AccountID, + ) -> Result { let maybe_cached_name = { - let accounts_cache = self.accounts_cache.lock().unwrap(); - accounts_cache.id2names.get(account_id).cloned() + let readable_accounts_cache = self.accounts_cache.read().await; + readable_accounts_cache.id2names.get(account_id).cloned() }; if let Some(name) = maybe_cached_name { @@ -235,11 +231,11 @@ impl DatasetEntryServiceImpl { .await .int_err()?; - let mut accounts_cache = self.accounts_cache.lock().unwrap(); - accounts_cache + let mut writable_accounts_cache = self.accounts_cache.write().await; + writable_accounts_cache .id2names .insert(account_id.clone(), account.account_name.clone()); - accounts_cache + writable_accounts_cache .names2ids .insert(account.account_name.clone(), account_id.clone()); @@ -249,14 +245,14 @@ impl DatasetEntryServiceImpl { async fn resolve_account_id_by_maybe_name( &self, - maybe_account_name: Option<&AccountName>, - ) -> Result { + maybe_account_name: Option<&odf::AccountName>, + ) -> Result { let account_name = maybe_account_name .unwrap_or_else(|| self.current_account_subject.account_name_or_default()); let maybe_cached_id = { - let accounts_cache = self.accounts_cache.lock().unwrap(); - accounts_cache.names2ids.get(account_name).cloned() + let readable_accounts_cache = self.accounts_cache.read().await; + readable_accounts_cache.names2ids.get(account_name).cloned() }; if let Some(id) = maybe_cached_id { @@ -268,11 +264,11 @@ impl DatasetEntryServiceImpl { .await .int_err()?; - let mut accounts_cache = self.accounts_cache.lock().unwrap(); - accounts_cache + let mut writable_accounts_cache = self.accounts_cache.write().await; + writable_accounts_cache .id2names .insert(account.id.clone(), account_name.clone()); - accounts_cache + writable_accounts_cache .names2ids .insert(account_name.clone(), account.id.clone()); @@ -280,58 +276,48 @@ impl DatasetEntryServiceImpl { } } - fn stream_datasets<'a, Args, HInitArgs, HInitArgsFut, HListing, HListingFut>( - &'a self, - get_args_callback: HInitArgs, - next_entries_callback: HListing, - ) -> DatasetHandleStream<'a> - where - Args: Clone + Send + 'a, - HInitArgs: FnOnce() -> HInitArgsFut + Send + 'a, - HInitArgsFut: std::future::Future> + Send + 'a, - HListing: Fn(Args, PaginationOpts) -> HListingFut + Send + 'a, - HListingFut: std::future::Future> - + Send - + 'a, - { - Box::pin(async_stream::try_stream! { - // Init arguments - let args = get_args_callback().await?; - - // Tracking pagination progress - let mut offset = 0; - let limit = 100; - - loop { - // Load a page of dataset entries - let entries_page = next_entries_callback(args.clone(), PaginationOpts { limit, offset }) - .await - .int_err()?; - - // Actually read entires - let loaded_entries_count = entries_page.list.len(); + async fn list_all_dataset_handles( + &self, + pagination: PaginationOpts, + ) -> Result, InternalError> { + let dataset_entry_listing = self.list_all_entries(pagination).await.int_err()?; - // Convert entries to handles - let handles = self.entries_as_handles(entries_page.list).await.int_err()?; + Ok(EntityPageListing { + total_count: dataset_entry_listing.total_count, + list: self + .entries_as_handles(dataset_entry_listing.list) + .await + .int_err()?, + }) + } - // Stream the entries - for hdl in handles { - yield hdl; - } + async fn list_all_dataset_handles_by_owner_name( + &self, + owner_id: &odf::AccountID, + pagination: PaginationOpts, + ) -> Result, InternalError> { + let dataset_entry_listing = self + .list_entries_owned_by(owner_id, pagination) + .await + .int_err()?; - // Next page - offset += loaded_entries_count; - if offset >= entries_page.total_count { - break; - } - } + Ok(EntityPageListing { + total_count: dataset_entry_listing.total_count, + list: self + .entries_as_handles(dataset_entry_listing.list) + .await + .int_err()?, }) } - fn make_alias(&self, owner_name: AccountName, dataset_name: DatasetName) -> DatasetAlias { + fn make_alias( + &self, + owner_name: odf::AccountName, + dataset_name: odf::DatasetName, + ) -> odf::DatasetAlias { match *self.tenancy_config { - TenancyConfig::MultiTenant => DatasetAlias::new(Some(owner_name), dataset_name), - TenancyConfig::SingleTenant => DatasetAlias::new(None, dataset_name), + TenancyConfig::MultiTenant => odf::DatasetAlias::new(Some(owner_name), dataset_name), + TenancyConfig::SingleTenant => odf::DatasetAlias::new(None, dataset_name), } } } @@ -340,20 +326,35 @@ impl DatasetEntryServiceImpl { #[async_trait::async_trait] impl DatasetEntryService for DatasetEntryServiceImpl { + fn all_entries(&self) -> DatasetEntryStream { + EntityPageStreamer::default().into_stream( + || async { Ok(()) }, + |_, pagination| { + let list_fut = self.list_all_entries(pagination); + async { list_fut.await.int_err() } + }, + ) + } + async fn list_all_entries( &self, pagination: PaginationOpts, - ) -> Result { + ) -> Result, ListDatasetEntriesError> { use futures::TryStreamExt; - let total_count = self.dataset_entry_repo.dataset_entries_count().await?; + let total_count = self + .dataset_entry_repo + .dataset_entries_count() + .await + .int_err()?; let entries = self .dataset_entry_repo .get_dataset_entries(pagination) + .await .try_collect() .await?; - Ok(DatasetEntryListing { + Ok(EntityPageListing { list: entries, total_count, }) @@ -361,22 +362,23 @@ impl DatasetEntryService for DatasetEntryServiceImpl { async fn list_entries_owned_by( &self, - owner_id: AccountID, + owner_id: &odf::AccountID, pagination: PaginationOpts, - ) -> Result { + ) -> Result, ListDatasetEntriesError> { use futures::TryStreamExt; let total_count = self .dataset_entry_repo - .dataset_entries_count_by_owner_id(&owner_id) + .dataset_entries_count_by_owner_id(owner_id) .await?; let entries = self .dataset_entry_repo - .get_dataset_entries_by_owner_id(&owner_id, pagination) + .get_dataset_entries_by_owner_id(owner_id, pagination) + .await .try_collect() .await?; - Ok(DatasetEntryListing { + Ok(EntityPageListing { list: entries, total_count, }) @@ -388,44 +390,46 @@ impl DatasetEntryService for DatasetEntryServiceImpl { #[async_trait::async_trait] impl DatasetRegistry for DatasetEntryServiceImpl { #[tracing::instrument(level = "debug", skip_all)] - fn all_dataset_handles<'a>(&'a self) -> DatasetHandleStream<'a> { - #[derive(Clone)] - struct NoArgs {} - - self.stream_datasets( - || async { Ok(NoArgs {}) }, - |_, pagination| self.list_all_entries(pagination), + fn all_dataset_handles(&self) -> DatasetHandleStream { + EntityPageStreamer::default().into_stream( + || async { Ok(()) }, + |_, pagination| self.list_all_dataset_handles(pagination), ) } #[tracing::instrument(level = "debug", skip_all, fields(%owner_name))] - fn all_dataset_handles_by_owner(&self, owner_name: &AccountName) -> DatasetHandleStream<'_> { - #[derive(Clone)] + fn all_dataset_handles_by_owner(&self, owner_name: &odf::AccountName) -> DatasetHandleStream { struct OwnerArgs { - owner_id: AccountID, + owner_id: odf::AccountID, } let owner_name = owner_name.clone(); - self.stream_datasets( + EntityPageStreamer::default().into_stream( move || async move { let owner_id = self .resolve_account_id_by_maybe_name(Some(&owner_name)) .await?; - Ok(OwnerArgs { owner_id }) + Ok(Arc::new(OwnerArgs { owner_id })) + }, + move |args, pagination| { + let args = args.clone(); + async move { + self.list_all_dataset_handles_by_owner_name(&args.owner_id, pagination) + .await + } }, - |args, pagination| self.list_entries_owned_by(args.owner_id, pagination), ) } #[tracing::instrument(level = "debug", skip_all, fields(%dataset_ref))] async fn resolve_dataset_handle_by_ref( &self, - dataset_ref: &DatasetRef, - ) -> Result { + dataset_ref: &odf::DatasetRef, + ) -> Result { match dataset_ref { - DatasetRef::Handle(h) => Ok(h.clone()), - DatasetRef::Alias(alias) => { + odf::DatasetRef::Handle(h) => Ok(h.clone()), + odf::DatasetRef::Alias(alias) => { let owner_id = self .resolve_account_id_by_maybe_name(alias.account_name.as_ref()) .await?; @@ -434,7 +438,7 @@ impl DatasetRegistry for DatasetEntryServiceImpl { .get_dataset_entry_by_owner_and_name(&owner_id, &alias.dataset_name) .await { - Ok(entry) => Ok(DatasetHandle::new(entry.id.clone(), alias.clone())), + Ok(entry) => Ok(odf::DatasetHandle::new(entry.id.clone(), alias.clone())), Err(GetDatasetEntryByNameError::NotFound(_)) => { Err(GetDatasetError::NotFound(DatasetNotFoundError { dataset_ref: dataset_ref.clone(), @@ -445,10 +449,10 @@ impl DatasetRegistry for DatasetEntryServiceImpl { } } } - DatasetRef::ID(id) => match self.dataset_entry_repo.get_dataset_entry(id).await { + odf::DatasetRef::ID(id) => match self.dataset_entry_repo.get_dataset_entry(id).await { Ok(entry) => { let owner_name = self.resolve_account_name_by_id(&entry.owner_id).await?; - Ok(DatasetHandle::new( + Ok(odf::DatasetHandle::new( entry.id.clone(), self.make_alias(owner_name, entry.name.clone()), )) @@ -466,7 +470,7 @@ impl DatasetRegistry for DatasetEntryServiceImpl { #[tracing::instrument(level = "debug", skip_all, fields(?dataset_ids))] async fn resolve_multiple_dataset_handles_by_ids( &self, - dataset_ids: Vec, + dataset_ids: Vec, ) -> Result { let entries_resolution = self .dataset_entry_repo @@ -506,7 +510,7 @@ impl DatasetRegistry for DatasetEntryServiceImpl { // Note: in future we will be resolving storage repository, // but for now we have just a single one - fn get_dataset_by_handle(&self, dataset_handle: &DatasetHandle) -> ResolvedDataset { + fn get_dataset_by_handle(&self, dataset_handle: &odf::DatasetHandle) -> ResolvedDataset { let dataset = self.dataset_repo.get_dataset_by_handle(dataset_handle); ResolvedDataset::new(dataset, dataset_handle.clone()) } diff --git a/src/domain/datasets/services/tests/tests/test_dataset_entry_service.rs b/src/domain/datasets/services/tests/tests/test_dataset_entry_service.rs index 8e751cb3f..dcb4f91c1 100644 --- a/src/domain/datasets/services/tests/tests/test_dataset_entry_service.rs +++ b/src/domain/datasets/services/tests/tests/test_dataset_entry_service.rs @@ -210,7 +210,7 @@ impl DatasetEntryServiceHarness { let mut b = CatalogBuilder::new(); b.add::(); - b.add::(); + b.add_builder(DatasetEntryIndexer::builder().with_is_in_workspace(true)); b.add_value(mock_dataset_entry_repository); b.bind::(); diff --git a/src/domain/flow-system/domain/Cargo.toml b/src/domain/flow-system/domain/Cargo.toml index 136ebb050..5363f95aa 100644 --- a/src/domain/flow-system/domain/Cargo.toml +++ b/src/domain/flow-system/domain/Cargo.toml @@ -41,8 +41,6 @@ sqlx = { version = "0.8", default-features = false, features = ["macros"] } strum = { version = "0.26", features = ["derive"] } thiserror = { version = "1", default-features = false } tokio-stream = { version = "0.1", default-features = false } -tracing = { version = "0.1", default-features = false } -url = { version = "2", default-features = false } # TODO: Make serde optional serde = { version = "1", default-features = false, features = ["derive"] } @@ -50,5 +48,5 @@ serde_with = { version = "3", default-features = false, features = [ "chrono_0_4", ] } + [dev-dependencies] -datafusion = { version = "42", default-features = false } diff --git a/src/domain/flow-system/services/Cargo.toml b/src/domain/flow-system/services/Cargo.toml index ab5afe299..5eb04093e 100644 --- a/src/domain/flow-system/services/Cargo.toml +++ b/src/domain/flow-system/services/Cargo.toml @@ -40,16 +40,8 @@ async-trait = { version = "0.1", default-features = false } chrono = { version = "0.4", default-features = false } dill = "0.9" futures = "0.3" -thiserror = { version = "1", default-features = false } tokio = { version = "1", default-features = false, features = [] } -tokio-stream = { version = "0.1", default-features = false } tracing = { version = "0.1", default-features = false } -url = { version = "2", default-features = false, features = ["serde"] } - -# TODO: Make serde optional -serde = { version = "1", default-features = false, features = ["derive"] } -serde_json = "1" -serde_with = { version = "3", default-features = false } [dev-dependencies] @@ -67,4 +59,3 @@ pretty_assertions = "1" tempfile = "3" test-log = { version = "0.2", features = ["trace"] } tokio = { version = "1", default-features = false, features = ["rt", "macros"] } -tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/src/domain/opendatafabric/Cargo.toml b/src/domain/opendatafabric/Cargo.toml index ccc45a632..a4cf9f999 100644 --- a/src/domain/opendatafabric/Cargo.toml +++ b/src/domain/opendatafabric/Cargo.toml @@ -38,7 +38,6 @@ multiformats = { workspace = true } chrono = { version = "0.4", features = ["serde"] } digest = "0.10" -futures-core = "0.3" thiserror = { version = "1", default-features = false } bitflags = { version = "2", default-features = false } @@ -54,12 +53,10 @@ ed25519-dalek = { version = "2", default-features = false, features = [ "zeroize", "rand_core", ] } -rand = "0.8" # Serialization base64 = { version = "0.22", default-features = false, features = ["std"] } flatbuffers = "24" -hex = "0.4" serde = { version = "1", features = ["derive"] } serde_with = "3" serde_yaml = "0.9" diff --git a/src/domain/opendatafabric/tests/tests/test_dataset_id.rs b/src/domain/opendatafabric/tests/tests/test_dataset_id.rs index e9f35ac17..43263c7e5 100644 --- a/src/domain/opendatafabric/tests/tests/test_dataset_id.rs +++ b/src/domain/opendatafabric/tests/tests/test_dataset_id.rs @@ -43,7 +43,7 @@ fn test_did_string() { assert_eq!(value.key_type(), Multicodec::Ed25519Pub); assert_eq!( - value.as_did_str().to_string(), + value.to_string(), "did:odf:fed012e6fcce36701dc791488e0d0b1745cc1e33a4c1c9fcc41c63bd343dbbe0970e6", ); diff --git a/src/domain/task-system/domain/Cargo.toml b/src/domain/task-system/domain/Cargo.toml index db4b883ef..65ed0049e 100644 --- a/src/domain/task-system/domain/Cargo.toml +++ b/src/domain/task-system/domain/Cargo.toml @@ -37,3 +37,6 @@ serde = { version = "1", features = ["derive"] } sqlx = { version = "0.8", default-features = false, features = ["macros"] } thiserror = { version = "1", default-features = false } tokio-stream = { version = "0.1", default-features = false } + + +[dev-dependencies] diff --git a/src/domain/task-system/services/Cargo.toml b/src/domain/task-system/services/Cargo.toml index abbae3323..866fb0a6c 100644 --- a/src/domain/task-system/services/Cargo.toml +++ b/src/domain/task-system/services/Cargo.toml @@ -28,13 +28,11 @@ init-on-startup = { workspace = true } internal-error = { workspace = true } messaging-outbox = { workspace = true } observability = { workspace = true } -opendatafabric = { workspace = true } kamu-core = { workspace = true } kamu-datasets = { workspace = true } time-source = { workspace = true } kamu-task-system = { workspace = true } -async-stream = "0.3" async-trait = { version = "0.1", default-features = false } dill = "0.9" futures = "0.3" diff --git a/src/domain/task-system/services/src/task_executor_impl.rs b/src/domain/task-system/services/src/task_executor_impl.rs index 3350e2b9d..48c626ca5 100644 --- a/src/domain/task-system/services/src/task_executor_impl.rs +++ b/src/domain/task-system/services/src/task_executor_impl.rs @@ -157,7 +157,7 @@ impl TaskExecutorImpl { ) .await?; - // Run task via ldefinition + // Run task via definition let task_run_result = self.task_runner.run_task(task_definition).await; // Deal with errors: we should not interrupt the main loop if task fails diff --git a/src/e2e/app/cli/common/src/e2e_harness.rs b/src/e2e/app/cli/common/src/e2e_harness.rs index 2ea8be8f9..df5d57e63 100644 --- a/src/e2e/app/cli/common/src/e2e_harness.rs +++ b/src/e2e/app/cli/common/src/e2e_harness.rs @@ -236,6 +236,7 @@ impl KamuCliApiServerHarness { is_multi_tenant, kamu_config, env_vars, + account: None, }) .await } diff --git a/src/e2e/app/cli/common/src/kamu_api_server_client_ext.rs b/src/e2e/app/cli/common/src/kamu_api_server_client_ext.rs index 925f939dd..7a4dc89a5 100644 --- a/src/e2e/app/cli/common/src/kamu_api_server_client_ext.rs +++ b/src/e2e/app/cli/common/src/kamu_api_server_client_ext.rs @@ -446,6 +446,17 @@ impl DatasetApi<'_> { } pub async fn create_dataset(&self, dataset_snapshot_yaml: &str) -> CreateDatasetResponse { + self.create_dataset_with_visibility(dataset_snapshot_yaml, true) + .await + } + + pub async fn create_dataset_with_visibility( + &self, + dataset_snapshot_yaml: &str, + is_public: bool, + ) -> CreateDatasetResponse { + let dataset_visibility_value = if is_public { "PUBLIC" } else { "PRIVATE" }; + let create_response = self .client .graphql_api_call( @@ -453,7 +464,7 @@ impl DatasetApi<'_> { r#" mutation { datasets { - createFromSnapshot(snapshot: "", snapshotFormat: YAML) { + createFromSnapshot(snapshot: "", snapshotFormat: YAML, datasetVisibility: "") { message ... on CreateDatasetResultSuccess { dataset { @@ -466,6 +477,7 @@ impl DatasetApi<'_> { "#, ) .replace("", dataset_snapshot_yaml) + .replace("", dataset_visibility_value) .as_str(), ) .await; diff --git a/src/e2e/app/cli/inmem/Cargo.toml b/src/e2e/app/cli/inmem/Cargo.toml index ac229fd24..52f625e57 100644 --- a/src/e2e/app/cli/inmem/Cargo.toml +++ b/src/e2e/app/cli/inmem/Cargo.toml @@ -29,7 +29,6 @@ kamu-cli-e2e-common = { workspace = true } kamu-cli-e2e-repo-tests = { workspace = true } indoc = "2" -paste = "1" test-group = { version = "1" } test-log = { version = "0.2", features = ["trace"] } tokio = { version = "1", default-features = false, features = [] } diff --git a/src/e2e/app/cli/inmem/tests/tests/commands/test_pull_command.rs b/src/e2e/app/cli/inmem/tests/tests/commands/test_pull_command.rs index 9874ef01c..bd8675a0d 100644 --- a/src/e2e/app/cli/inmem/tests/tests/commands/test_pull_command.rs +++ b/src/e2e/app/cli/inmem/tests/tests/commands/test_pull_command.rs @@ -102,7 +102,7 @@ kamu_cli_execute_command_e2e_test!( kamu_cli_execute_command_e2e_test!( storage = inmem, - fixture = kamu_cli_e2e_repo_tests::commands::test_pull_derivative_st, + fixture = kamu_cli_e2e_repo_tests::commands::test_pull_derivative_mt, options = Options::default() .with_multi_tenant() .with_frozen_system_time(), diff --git a/src/e2e/app/cli/mysql/Cargo.toml b/src/e2e/app/cli/mysql/Cargo.toml index 8ec60d9b1..c86fb6f76 100644 --- a/src/e2e/app/cli/mysql/Cargo.toml +++ b/src/e2e/app/cli/mysql/Cargo.toml @@ -29,7 +29,6 @@ kamu-cli-e2e-common = { workspace = true } kamu-cli-e2e-repo-tests = { workspace = true } indoc = "2" -paste = "1" sqlx = { version = "0.8", default-features = false, features = [ "macros", "mysql", diff --git a/src/e2e/app/cli/postgres/Cargo.toml b/src/e2e/app/cli/postgres/Cargo.toml index 7b84d8144..50bf40331 100644 --- a/src/e2e/app/cli/postgres/Cargo.toml +++ b/src/e2e/app/cli/postgres/Cargo.toml @@ -29,7 +29,6 @@ kamu-cli-e2e-common = { workspace = true } kamu-cli-e2e-repo-tests = { workspace = true } indoc = "2" -paste = "1" sqlx = { version = "0.8", default-features = false, features = [ "macros", "postgres", diff --git a/src/e2e/app/cli/postgres/tests/tests/commands/test_pull_command.rs b/src/e2e/app/cli/postgres/tests/tests/commands/test_pull_command.rs index a592b56a5..7c5ba5f21 100644 --- a/src/e2e/app/cli/postgres/tests/tests/commands/test_pull_command.rs +++ b/src/e2e/app/cli/postgres/tests/tests/commands/test_pull_command.rs @@ -102,7 +102,7 @@ kamu_cli_execute_command_e2e_test!( kamu_cli_execute_command_e2e_test!( storage = postgres, - fixture = kamu_cli_e2e_repo_tests::commands::test_pull_derivative_st, + fixture = kamu_cli_e2e_repo_tests::commands::test_pull_derivative_mt, options = Options::default() .with_multi_tenant() .with_frozen_system_time(), diff --git a/src/e2e/app/cli/repo-tests/Cargo.toml b/src/e2e/app/cli/repo-tests/Cargo.toml index 9be6242cd..659a88727 100644 --- a/src/e2e/app/cli/repo-tests/Cargo.toml +++ b/src/e2e/app/cli/repo-tests/Cargo.toml @@ -27,7 +27,6 @@ normal = ["kamu-cli"] [dependencies] http-common = { workspace = true } -internal-error = { workspace = true } kamu = { workspace = true, features = ["testing"] } kamu-accounts = { workspace = true } kamu-adapter-http = { workspace = true } @@ -47,7 +46,6 @@ pretty_assertions = { version = "1" } reqwest = { version = "0.12", default-features = false, features = [] } serde_json = { version = "1", default-features = false } tempfile = { version = "3" } -tokio = { version = "1", default-features = false, features = [] } url = { version = "2", default-features = false } diff --git a/src/e2e/app/cli/repo-tests/src/commands/test_delete_command.rs b/src/e2e/app/cli/repo-tests/src/commands/test_delete_command.rs index 58ff4a35c..6913f1d89 100644 --- a/src/e2e/app/cli/repo-tests/src/commands/test_delete_command.rs +++ b/src/e2e/app/cli/repo-tests/src/commands/test_delete_command.rs @@ -7,16 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use kamu_cli_e2e_common::{ - KamuApiServerClient, - KamuApiServerClientExt, - DATASET_DERIVATIVE_LEADERBOARD_SNAPSHOT_STR, - DATASET_ROOT_PLAYER_SCORES_INGEST_DATA_NDJSON_CHUNK_1, - DATASET_ROOT_PLAYER_SCORES_INGEST_DATA_NDJSON_CHUNK_2, - DATASET_ROOT_PLAYER_SCORES_INGEST_DATA_NDJSON_CHUNK_3, - DATASET_ROOT_PLAYER_SCORES_INGEST_DATA_NDJSON_CHUNK_4, - DATASET_ROOT_PLAYER_SCORES_SNAPSHOT_STR, -}; +use kamu_cli_e2e_common::*; use kamu_cli_puppet::extensions::KamuCliPuppetExt; use kamu_cli_puppet::KamuCliPuppet; use opendatafabric as odf; @@ -176,10 +167,10 @@ pub async fn test_delete_dataset_all(kamu: KamuCliPuppet) { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub async fn test_delete_warning(mut kamu_node_api_client: KamuApiServerClient) { - let kamu: KamuCliPuppet = KamuCliPuppet::new_workspace_tmp(true).await; + let kamu = KamuCliPuppet::new_workspace_tmp_multi_tenant().await; let ds_name_str = "player-scores"; - let ds_name: odf::DatasetName = odf::DatasetName::new_unchecked(ds_name_str); + let ds_name = odf::DatasetName::new_unchecked(ds_name_str); kamu.execute_with_input( ["add", "--stdin", "--name", ds_name_str], diff --git a/src/e2e/app/cli/repo-tests/src/rest_api/test_accounts.rs b/src/e2e/app/cli/repo-tests/src/rest_api/test_accounts.rs index 2333020ba..d4eeba4dd 100644 --- a/src/e2e/app/cli/repo-tests/src/rest_api/test_accounts.rs +++ b/src/e2e/app/cli/repo-tests/src/rest_api/test_accounts.rs @@ -10,13 +10,7 @@ use std::assert_matches::assert_matches; use kamu_accounts::{DEFAULT_ACCOUNT_ID, DEFAULT_ACCOUNT_NAME}; -use kamu_cli_e2e_common::{ - AccountMeError, - KamuApiServerClient, - KamuApiServerClientExt, - E2E_USER_ACCOUNT_NAME_STR, -}; -use opendatafabric as odf; +use kamu_cli_e2e_common::*; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -38,7 +32,7 @@ pub async fn test_accounts_me_e2e_user(mut kamu_api_server_client: KamuApiServer assert_matches!( kamu_api_server_client.account().me().await, Ok(response) - if response.account_name == odf::AccountName::new_unchecked(E2E_USER_ACCOUNT_NAME_STR) + if response.account_name == *E2E_USER_ACCOUNT_NAME ); } diff --git a/src/e2e/app/cli/repo-tests/src/rest_api/test_auth.rs b/src/e2e/app/cli/repo-tests/src/rest_api/test_auth.rs index 8a3cb8aad..32d3f9a63 100644 --- a/src/e2e/app/cli/repo-tests/src/rest_api/test_auth.rs +++ b/src/e2e/app/cli/repo-tests/src/rest_api/test_auth.rs @@ -86,33 +86,7 @@ pub async fn test_login_enabled_methods(kamu_api_server_client: KamuApiServerCli //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub async fn test_login_dummy_github(kamu_api_server_client: KamuApiServerClient) { - // 1. No user - kamu_api_server_client - .graphql_api_call_assert( - indoc::indoc!( - r#" - query { - accounts { - byName(name: "e2e-user") { - accountName - } - } - } - "#, - ), - Ok(indoc::indoc!( - r#" - { - "accounts": { - "byName": null - } - } - "#, - )), - ) - .await; - - // 2. Create a user + // Create a user kamu_api_server_client .graphql_api_call_assert( indoc::indoc!( @@ -144,7 +118,7 @@ pub async fn test_login_dummy_github(kamu_api_server_client: KamuApiServerClient ) .await; - // 3. Verify that the user has been created + // Verify that the user has been created kamu_api_server_client .graphql_api_call_assert( indoc::indoc!( diff --git a/src/e2e/app/cli/repo-tests/src/test_smart_transfer_protocol.rs b/src/e2e/app/cli/repo-tests/src/test_smart_transfer_protocol.rs index 11a2ae7eb..eec7b0cc2 100644 --- a/src/e2e/app/cli/repo-tests/src/test_smart_transfer_protocol.rs +++ b/src/e2e/app/cli/repo-tests/src/test_smart_transfer_protocol.rs @@ -11,18 +11,7 @@ use std::str::FromStr; use chrono::DateTime; use kamu::testing::LocalS3Server; -use kamu_cli_e2e_common::{ - KamuApiServerClient, - KamuApiServerClientExt, - DATASET_DERIVATIVE_LEADERBOARD_NAME, - DATASET_DERIVATIVE_LEADERBOARD_SNAPSHOT_STR, - DATASET_ROOT_PLAYER_NAME, - DATASET_ROOT_PLAYER_SCORES_INGEST_DATA_NDJSON_CHUNK_1, - DATASET_ROOT_PLAYER_SCORES_INGEST_DATA_NDJSON_CHUNK_2, - DATASET_ROOT_PLAYER_SCORES_SNAPSHOT_STR, - E2E_USER_ACCOUNT_NAME, - E2E_USER_ACCOUNT_NAME_STR, -}; +use kamu_cli_e2e_common::*; use kamu_cli_puppet::extensions::{KamuCliPuppetExt, RepoAlias}; use kamu_cli_puppet::KamuCliPuppet; use opendatafabric as odf; @@ -171,6 +160,8 @@ async fn test_smart_push_smart_pull_sequence( dataset_alias.dataset_name.as_str(), "--to", kamu_api_server_dataset_endpoint.as_str(), + "--visibility", + "public", ], None, Some(["1 dataset(s) pushed"]), @@ -323,6 +314,8 @@ async fn test_smart_push_force_smart_pull_force( dataset_alias.dataset_name.as_str(), "--to", kamu_api_server_dataset_endpoint.as_str(), + "--visibility", + "public", ], None, Some(["1 dataset(s) pushed"]), @@ -476,6 +469,8 @@ async fn test_smart_push_no_alias_smart_pull_no_alias( "--to", kamu_api_server_dataset_endpoint.as_str(), "--no-alias", + "--visibility", + "public", ], None, Some(["1 dataset(s) pushed"]), @@ -607,9 +602,7 @@ async fn test_smart_pull_as( kamu_api_server_client .dataset() - .create_player_scores_dataset_with_data(Some(odf::AccountName::new_unchecked( - E2E_USER_ACCOUNT_NAME_STR, - ))) + .create_player_scores_dataset_with_data(Some(E2E_USER_ACCOUNT_NAME.clone())) .await; { @@ -729,6 +722,8 @@ async fn test_smart_push_all_smart_pull_all( root_dataset_alias.dataset_name.as_str(), "--to", kamu_api_server_root_dataset_endpoint.as_str(), + "--visibility", + "public", ], None, Some(["1 dataset(s) pushed"]), @@ -747,6 +742,8 @@ async fn test_smart_push_all_smart_pull_all( derivative_dataset_alias.dataset_name.as_str(), "--to", kamu_api_server_derivative_dataset_endpoint.as_str(), + "--visibility", + "public", ], None, Some(["1 dataset(s) pushed"]), @@ -1011,6 +1008,8 @@ async fn test_smart_push_recursive_smart_pull_recursive( root_dataset_alias.dataset_name.as_str(), "--to", kamu_api_server_root_dataset_endpoint.as_str(), + "--visibility", + "public", ], None, Some(["1 dataset(s) pushed"]), @@ -1384,7 +1383,12 @@ async fn test_smart_push_to_registered_repo_smart_pull( // 2.3. Push the dataset to the API server without too argument kamu_in_push_workspace .assert_success_command_execution( - ["push", dataset_alias.dataset_name.as_str()], + [ + "push", + "--visibility", + "public", + dataset_alias.dataset_name.as_str(), + ], None, Some(["1 dataset(s) pushed"]), ) diff --git a/src/e2e/app/cli/sqlite/Cargo.toml b/src/e2e/app/cli/sqlite/Cargo.toml index c4b1b0e16..21141d282 100644 --- a/src/e2e/app/cli/sqlite/Cargo.toml +++ b/src/e2e/app/cli/sqlite/Cargo.toml @@ -29,7 +29,6 @@ kamu-cli-e2e-common = { workspace = true } kamu-cli-e2e-repo-tests = { workspace = true } indoc = "2" -paste = "1" sqlx = { version = "0.8", default-features = false, features = [ "macros", "sqlite", diff --git a/src/e2e/app/cli/sqlite/tests/tests/commands/test_pull_command.rs b/src/e2e/app/cli/sqlite/tests/tests/commands/test_pull_command.rs index 52e266372..fda398f11 100644 --- a/src/e2e/app/cli/sqlite/tests/tests/commands/test_pull_command.rs +++ b/src/e2e/app/cli/sqlite/tests/tests/commands/test_pull_command.rs @@ -102,7 +102,7 @@ kamu_cli_execute_command_e2e_test!( kamu_cli_execute_command_e2e_test!( storage = sqlite, - fixture = kamu_cli_e2e_repo_tests::commands::test_pull_derivative_st, + fixture = kamu_cli_e2e_repo_tests::commands::test_pull_derivative_mt, options = Options::default() .with_multi_tenant() .with_frozen_system_time(), diff --git a/src/infra/accounts/inmem/Cargo.toml b/src/infra/accounts/inmem/Cargo.toml index 2c9411e46..81e86bb13 100644 --- a/src/infra/accounts/inmem/Cargo.toml +++ b/src/infra/accounts/inmem/Cargo.toml @@ -30,8 +30,7 @@ internal-error = { workspace = true } async-trait = { version = "0.1", default-features = false } chrono = { version = "0.4", default-features = false } dill = "0.9" -thiserror = { version = "1", default-features = false } -tracing = { version = "0.1", default-features = false } +futures = { version = "0.3", default-features = false } uuid = "1" diff --git a/src/infra/accounts/inmem/src/repos/inmem_account_repository.rs b/src/infra/accounts/inmem/src/repos/inmem_account_repository.rs index ca0e09ca2..829ecaf49 100644 --- a/src/infra/accounts/inmem/src/repos/inmem_account_repository.rs +++ b/src/infra/accounts/inmem/src/repos/inmem_account_repository.rs @@ -10,6 +10,7 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; +use database_common::PaginationOpts; use dill::*; use opendatafabric::{AccountID, AccountName}; @@ -60,6 +61,14 @@ impl InMemoryAccountRepository { #[async_trait::async_trait] impl AccountRepository for InMemoryAccountRepository { + async fn accounts_count(&self) -> Result { + let readable_state = self.state.lock().unwrap(); + + let accounts_count = readable_state.accounts_by_id.len(); + + Ok(accounts_count) + } + async fn create_account(&self, account: &Account) -> Result<(), CreateAccountError> { let mut guard = self.state.lock().unwrap(); if guard.accounts_by_id.contains_key(&account.id) { @@ -105,6 +114,23 @@ impl AccountRepository for InMemoryAccountRepository { Ok(()) } + async fn get_accounts(&self, pagination: PaginationOpts) -> AccountPageStream { + let dataset_entries_page = { + let readable_state = self.state.lock().unwrap(); + + readable_state + .accounts_by_id + .values() + .skip(pagination.offset) + .take(pagination.limit) + .cloned() + .map(Ok) + .collect::>() + }; + + Box::pin(futures::stream::iter(dataset_entries_page)) + } + async fn get_account_by_id( &self, account_id: &AccountID, diff --git a/src/infra/accounts/mysql/.sqlx/query-6e609dea1564effee6f02e41a1dcb172b12f77a3fa7eccd32563238043409df1.json b/src/infra/accounts/mysql/.sqlx/query-6e609dea1564effee6f02e41a1dcb172b12f77a3fa7eccd32563238043409df1.json new file mode 100644 index 000000000..ca91fd9a4 --- /dev/null +++ b/src/infra/accounts/mysql/.sqlx/query-6e609dea1564effee6f02e41a1dcb172b12f77a3fa7eccd32563238043409df1.json @@ -0,0 +1,24 @@ +{ + "db_name": "MySQL", + "query": "\n SELECT COUNT(*)\n FROM accounts\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "COUNT(*)", + "type_info": { + "type": "LongLong", + "flags": "NOT_NULL | BINARY", + "max_size": 21 + } + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + false + ] + }, + "hash": "6e609dea1564effee6f02e41a1dcb172b12f77a3fa7eccd32563238043409df1" +} diff --git a/src/infra/accounts/mysql/.sqlx/query-e6ae81849ab0194ecdfa3e69720c0635e35b499be6a025b71513d51b41ee1fb8.json b/src/infra/accounts/mysql/.sqlx/query-e6ae81849ab0194ecdfa3e69720c0635e35b499be6a025b71513d51b41ee1fb8.json new file mode 100644 index 000000000..3cab6db85 --- /dev/null +++ b/src/infra/accounts/mysql/.sqlx/query-e6ae81849ab0194ecdfa3e69720c0635e35b499be6a025b71513d51b41ee1fb8.json @@ -0,0 +1,114 @@ +{ + "db_name": "MySQL", + "query": "\n SELECT id AS \"id: _\",\n account_name,\n email AS \"email?\",\n display_name,\n account_type AS \"account_type: AccountType\",\n avatar_url,\n registered_at AS \"registered_at: _\",\n is_admin AS \"is_admin: _\",\n provider,\n provider_identity_key\n FROM accounts\n ORDER BY registered_at ASC\n LIMIT ? OFFSET ?\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id: _", + "type_info": { + "type": "VarString", + "flags": "NOT_NULL | PRIMARY_KEY | NO_DEFAULT_VALUE", + "max_size": 400 + } + }, + { + "ordinal": 1, + "name": "account_name", + "type_info": { + "type": "VarString", + "flags": "NOT_NULL | UNIQUE_KEY | NO_DEFAULT_VALUE", + "max_size": 400 + } + }, + { + "ordinal": 2, + "name": "email?", + "type_info": { + "type": "VarString", + "flags": "UNIQUE_KEY", + "max_size": 1280 + } + }, + { + "ordinal": 3, + "name": "display_name", + "type_info": { + "type": "VarString", + "flags": "NOT_NULL | NO_DEFAULT_VALUE", + "max_size": 800 + } + }, + { + "ordinal": 4, + "name": "account_type: AccountType", + "type_info": { + "type": "String", + "flags": "NOT_NULL | ENUM | NO_DEFAULT_VALUE", + "max_size": 48 + } + }, + { + "ordinal": 5, + "name": "avatar_url", + "type_info": { + "type": "VarString", + "flags": "", + "max_size": 4000 + } + }, + { + "ordinal": 6, + "name": "registered_at: _", + "type_info": { + "type": "Timestamp", + "flags": "NOT_NULL | UNSIGNED | BINARY | NO_DEFAULT_VALUE", + "max_size": 26 + } + }, + { + "ordinal": 7, + "name": "is_admin: _", + "type_info": { + "type": "Tiny", + "flags": "NOT_NULL | NO_DEFAULT_VALUE", + "max_size": 4 + } + }, + { + "ordinal": 8, + "name": "provider", + "type_info": { + "type": "VarString", + "flags": "NOT_NULL | NO_DEFAULT_VALUE", + "max_size": 100 + } + }, + { + "ordinal": 9, + "name": "provider_identity_key", + "type_info": { + "type": "VarString", + "flags": "NOT_NULL | UNIQUE_KEY | NO_DEFAULT_VALUE", + "max_size": 400 + } + } + ], + "parameters": { + "Right": 2 + }, + "nullable": [ + false, + false, + true, + false, + false, + true, + false, + false, + false, + false + ] + }, + "hash": "e6ae81849ab0194ecdfa3e69720c0635e35b499be6a025b71513d51b41ee1fb8" +} diff --git a/src/infra/accounts/mysql/Cargo.toml b/src/infra/accounts/mysql/Cargo.toml index 77e8d5af1..a9645e847 100644 --- a/src/infra/accounts/mysql/Cargo.toml +++ b/src/infra/accounts/mysql/Cargo.toml @@ -27,16 +27,17 @@ internal-error = { workspace = true } kamu-accounts = { workspace = true, features = ["sqlx"] } opendatafabric = { workspace = true, features = ["sqlx-mysql"] } +async-stream = { version = "0.3", default-features = false } async-trait = { version = "0.1", default-features = false } chrono = { version = "0.4", default-features = false } dill = "0.9" +futures = { version = "0.3", default-features = false } sqlx = { version = "0.8", default-features = false, features = [ "runtime-tokio-rustls", "macros", "mysql", "chrono", ] } -thiserror = { version = "1", default-features = false } tracing = { version = "0.1", default-features = false } uuid = "1" diff --git a/src/infra/accounts/mysql/src/repos/mysql_account_repository.rs b/src/infra/accounts/mysql/src/repos/mysql_account_repository.rs index 45f0dfd44..355f9cb59 100644 --- a/src/infra/accounts/mysql/src/repos/mysql_account_repository.rs +++ b/src/infra/accounts/mysql/src/repos/mysql_account_repository.rs @@ -7,7 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use database_common::{TransactionRef, TransactionRefT}; +use database_common::{PaginationOpts, TransactionRef, TransactionRefT}; use dill::{component, interface}; use internal_error::{ErrorIntoInternal, ResultIntoInternal}; use opendatafabric::{AccountID, AccountName}; @@ -34,6 +34,24 @@ impl MySqlAccountRepository { #[async_trait::async_trait] impl AccountRepository for MySqlAccountRepository { + async fn accounts_count(&self) -> Result { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + + let accounts_count = sqlx::query_scalar!( + r#" + SELECT COUNT(*) + FROM accounts + "#, + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + Ok(usize::try_from(accounts_count).unwrap_or(0)) + } + async fn create_account(&self, account: &Account) -> Result<(), CreateAccountError> { let mut tr = self.transaction.lock().await; @@ -92,6 +110,45 @@ impl AccountRepository for MySqlAccountRepository { Ok(()) } + async fn get_accounts(&self, pagination: PaginationOpts) -> AccountPageStream { + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let limit = i64::try_from(pagination.limit).int_err()?; + let offset = i64::try_from(pagination.offset).int_err()?; + + let mut query_stream = sqlx::query_as!( + AccountRowModel, + r#" + SELECT id AS "id: _", + account_name, + email AS "email?", + display_name, + account_type AS "account_type: AccountType", + avatar_url, + registered_at AS "registered_at: _", + is_admin AS "is_admin: _", + provider, + provider_identity_key + FROM accounts + ORDER BY registered_at ASC + LIMIT ? OFFSET ? + "#, + limit, + offset, + ) + .fetch(connection_mut) + .map_err(ErrorIntoInternal::int_err); + + use futures::TryStreamExt; + + while let Some(entry) = query_stream.try_next().await? { + yield Ok(entry.into()); + } + }) + } + async fn get_account_by_id( &self, account_id: &AccountID, diff --git a/src/infra/accounts/postgres/.sqlx/query-4f94a12bf580d47e4a39d59fd6afc54b6e746d5ceaa2f44a88be3cefd845744a.json b/src/infra/accounts/postgres/.sqlx/query-4f94a12bf580d47e4a39d59fd6afc54b6e746d5ceaa2f44a88be3cefd845744a.json new file mode 100644 index 000000000..2a0ace571 --- /dev/null +++ b/src/infra/accounts/postgres/.sqlx/query-4f94a12bf580d47e4a39d59fd6afc54b6e746d5ceaa2f44a88be3cefd845744a.json @@ -0,0 +1,87 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT id AS \"id: _\",\n account_name,\n email,\n display_name,\n account_type AS \"account_type: AccountType\",\n avatar_url,\n registered_at,\n is_admin,\n provider,\n provider_identity_key\n FROM accounts\n ORDER BY registered_at ASC\n LIMIT $1 OFFSET $2\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "id: _", + "type_info": "Varchar" + }, + { + "ordinal": 1, + "name": "account_name", + "type_info": "Varchar" + }, + { + "ordinal": 2, + "name": "email", + "type_info": "Varchar" + }, + { + "ordinal": 3, + "name": "display_name", + "type_info": "Varchar" + }, + { + "ordinal": 4, + "name": "account_type: AccountType", + "type_info": { + "Custom": { + "name": "account_type", + "kind": { + "Enum": [ + "user", + "organization" + ] + } + } + } + }, + { + "ordinal": 5, + "name": "avatar_url", + "type_info": "Varchar" + }, + { + "ordinal": 6, + "name": "registered_at", + "type_info": "Timestamptz" + }, + { + "ordinal": 7, + "name": "is_admin", + "type_info": "Bool" + }, + { + "ordinal": 8, + "name": "provider", + "type_info": "Varchar" + }, + { + "ordinal": 9, + "name": "provider_identity_key", + "type_info": "Varchar" + } + ], + "parameters": { + "Left": [ + "Int8", + "Int8" + ] + }, + "nullable": [ + false, + false, + true, + false, + false, + true, + false, + false, + false, + false + ] + }, + "hash": "4f94a12bf580d47e4a39d59fd6afc54b6e746d5ceaa2f44a88be3cefd845744a" +} diff --git a/src/infra/accounts/postgres/.sqlx/query-6e609dea1564effee6f02e41a1dcb172b12f77a3fa7eccd32563238043409df1.json b/src/infra/accounts/postgres/.sqlx/query-6e609dea1564effee6f02e41a1dcb172b12f77a3fa7eccd32563238043409df1.json new file mode 100644 index 000000000..9cbec000d --- /dev/null +++ b/src/infra/accounts/postgres/.sqlx/query-6e609dea1564effee6f02e41a1dcb172b12f77a3fa7eccd32563238043409df1.json @@ -0,0 +1,20 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT COUNT(*)\n FROM accounts\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "count", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + null + ] + }, + "hash": "6e609dea1564effee6f02e41a1dcb172b12f77a3fa7eccd32563238043409df1" +} diff --git a/src/infra/accounts/postgres/Cargo.toml b/src/infra/accounts/postgres/Cargo.toml index 844ef5b41..c3838a287 100644 --- a/src/infra/accounts/postgres/Cargo.toml +++ b/src/infra/accounts/postgres/Cargo.toml @@ -27,16 +27,17 @@ internal-error = { workspace = true } kamu-accounts = { workspace = true, features = ["sqlx"] } opendatafabric = { workspace = true, features = ["sqlx-postgres"] } +async-stream = { version = "0.3", default-features = false } async-trait = { version = "0.1", default-features = false } chrono = { version = "0.4", default-features = false } dill = "0.9" +futures = { version = "0.3", default-features = false } sqlx = { version = "0.8", default-features = false, features = [ "runtime-tokio-rustls", "macros", "postgres", "chrono", ] } -thiserror = { version = "1", default-features = false } tracing = { version = "0.1", default-features = false } uuid = "1" diff --git a/src/infra/accounts/postgres/src/repos/postgres_account_repository.rs b/src/infra/accounts/postgres/src/repos/postgres_account_repository.rs index d1d090863..669e8cc0d 100644 --- a/src/infra/accounts/postgres/src/repos/postgres_account_repository.rs +++ b/src/infra/accounts/postgres/src/repos/postgres_account_repository.rs @@ -7,7 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use database_common::{TransactionRef, TransactionRefT}; +use database_common::{PaginationOpts, TransactionRef, TransactionRefT}; use dill::{component, interface}; use internal_error::{ErrorIntoInternal, ResultIntoInternal}; use opendatafabric::{AccountID, AccountName}; @@ -33,6 +33,24 @@ impl PostgresAccountRepository { #[async_trait::async_trait] impl AccountRepository for PostgresAccountRepository { + async fn accounts_count(&self) -> Result { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + + let accounts_count = sqlx::query_scalar!( + r#" + SELECT COUNT(*) + FROM accounts + "#, + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + Ok(usize::try_from(accounts_count.unwrap()).unwrap()) + } + async fn create_account(&self, account: &Account) -> Result<(), CreateAccountError> { let mut tr = self.transaction.lock().await; @@ -86,6 +104,45 @@ impl AccountRepository for PostgresAccountRepository { Ok(()) } + async fn get_accounts(&self, pagination: PaginationOpts) -> AccountPageStream { + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let limit = i64::try_from(pagination.limit).int_err()?; + let offset = i64::try_from(pagination.offset).int_err()?; + + let mut query_stream = sqlx::query_as!( + AccountRowModel, + r#" + SELECT id AS "id: _", + account_name, + email, + display_name, + account_type AS "account_type: AccountType", + avatar_url, + registered_at, + is_admin, + provider, + provider_identity_key + FROM accounts + ORDER BY registered_at ASC + LIMIT $1 OFFSET $2 + "#, + limit, + offset, + ) + .fetch(connection_mut) + .map_err(ErrorIntoInternal::int_err); + + use futures::TryStreamExt; + + while let Some(account_row_model) = query_stream.try_next().await? { + yield Ok(account_row_model.into()); + } + }) + } + async fn get_account_by_id( &self, account_id: &AccountID, diff --git a/src/infra/accounts/sqlite/.sqlx/query-67ddd71595415860d71c398b5db2103074dafdfc2250b9a12b63d3ef1be598c7.json b/src/infra/accounts/sqlite/.sqlx/query-67ddd71595415860d71c398b5db2103074dafdfc2250b9a12b63d3ef1be598c7.json new file mode 100644 index 000000000..0486da2ab --- /dev/null +++ b/src/infra/accounts/sqlite/.sqlx/query-67ddd71595415860d71c398b5db2103074dafdfc2250b9a12b63d3ef1be598c7.json @@ -0,0 +1,74 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT id AS \"id: _\",\n account_name,\n email,\n display_name,\n account_type AS \"account_type: AccountType\",\n avatar_url,\n registered_at AS \"registered_at: _\",\n is_admin AS \"is_admin: _\",\n provider,\n provider_identity_key\n FROM accounts\n ORDER BY registered_at ASC\n LIMIT $1 OFFSET $2\n ", + "describe": { + "columns": [ + { + "name": "id: _", + "ordinal": 0, + "type_info": "Text" + }, + { + "name": "account_name", + "ordinal": 1, + "type_info": "Text" + }, + { + "name": "email", + "ordinal": 2, + "type_info": "Text" + }, + { + "name": "display_name", + "ordinal": 3, + "type_info": "Text" + }, + { + "name": "account_type: AccountType", + "ordinal": 4, + "type_info": "Text" + }, + { + "name": "avatar_url", + "ordinal": 5, + "type_info": "Text" + }, + { + "name": "registered_at: _", + "ordinal": 6, + "type_info": "Null" + }, + { + "name": "is_admin: _", + "ordinal": 7, + "type_info": "Integer" + }, + { + "name": "provider", + "ordinal": 8, + "type_info": "Text" + }, + { + "name": "provider_identity_key", + "ordinal": 9, + "type_info": "Text" + } + ], + "parameters": { + "Right": 2 + }, + "nullable": [ + false, + false, + true, + false, + false, + true, + false, + false, + false, + false + ] + }, + "hash": "67ddd71595415860d71c398b5db2103074dafdfc2250b9a12b63d3ef1be598c7" +} diff --git a/src/infra/accounts/sqlite/.sqlx/query-6e609dea1564effee6f02e41a1dcb172b12f77a3fa7eccd32563238043409df1.json b/src/infra/accounts/sqlite/.sqlx/query-6e609dea1564effee6f02e41a1dcb172b12f77a3fa7eccd32563238043409df1.json new file mode 100644 index 000000000..bcb45ddf5 --- /dev/null +++ b/src/infra/accounts/sqlite/.sqlx/query-6e609dea1564effee6f02e41a1dcb172b12f77a3fa7eccd32563238043409df1.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT COUNT(*)\n FROM accounts\n ", + "describe": { + "columns": [ + { + "name": "COUNT(*)", + "ordinal": 0, + "type_info": "Integer" + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + false + ] + }, + "hash": "6e609dea1564effee6f02e41a1dcb172b12f77a3fa7eccd32563238043409df1" +} diff --git a/src/infra/accounts/sqlite/Cargo.toml b/src/infra/accounts/sqlite/Cargo.toml index a4c62e431..cb2c90879 100644 --- a/src/infra/accounts/sqlite/Cargo.toml +++ b/src/infra/accounts/sqlite/Cargo.toml @@ -27,16 +27,17 @@ internal-error = { workspace = true } kamu-accounts = { workspace = true, features = ["sqlx"] } opendatafabric = { workspace = true, features = ["sqlx-sqlite"] } +async-stream = { version = "0.3", default-features = false } async-trait = { version = "0.1", default-features = false } chrono = { version = "0.4", default-features = false } dill = "0.9" +futures = { version = "0.3", default-features = false } sqlx = { version = "0.8", default-features = false, features = [ "runtime-tokio-rustls", "macros", "sqlite", "chrono", ] } -thiserror = { version = "1", default-features = false } tracing = { version = "0.1", default-features = false } uuid = "1" diff --git a/src/infra/accounts/sqlite/src/repos/sqlite_account_repository.rs b/src/infra/accounts/sqlite/src/repos/sqlite_account_repository.rs index d4c494051..dad5ef75b 100644 --- a/src/infra/accounts/sqlite/src/repos/sqlite_account_repository.rs +++ b/src/infra/accounts/sqlite/src/repos/sqlite_account_repository.rs @@ -7,7 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use database_common::{TransactionRef, TransactionRefT}; +use database_common::{PaginationOpts, TransactionRef, TransactionRefT}; use dill::{component, interface}; use internal_error::{ErrorIntoInternal, ResultIntoInternal}; use opendatafabric::{AccountID, AccountName}; @@ -34,6 +34,24 @@ impl SqliteAccountRepository { #[async_trait::async_trait] impl AccountRepository for SqliteAccountRepository { + async fn accounts_count(&self) -> Result { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + + let accounts_count = sqlx::query_scalar!( + r#" + SELECT COUNT(*) + FROM accounts + "#, + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + Ok(usize::try_from(accounts_count).unwrap_or(0)) + } + async fn create_account(&self, account: &Account) -> Result<(), CreateAccountError> { let mut tr = self.transaction.lock().await; @@ -104,6 +122,45 @@ impl AccountRepository for SqliteAccountRepository { Ok(()) } + async fn get_accounts(&self, pagination: PaginationOpts) -> AccountPageStream { + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let limit = i64::try_from(pagination.limit).int_err()?; + let offset = i64::try_from(pagination.offset).int_err()?; + + let mut query_stream = sqlx::query_as!( + AccountRowModel, + r#" + SELECT id AS "id: _", + account_name, + email, + display_name, + account_type AS "account_type: AccountType", + avatar_url, + registered_at AS "registered_at: _", + is_admin AS "is_admin: _", + provider, + provider_identity_key + FROM accounts + ORDER BY registered_at ASC + LIMIT $1 OFFSET $2 + "#, + limit, + offset, + ) + .fetch(connection_mut) + .map_err(ErrorIntoInternal::int_err); + + use futures::TryStreamExt; + + while let Some(account_row_model) = query_stream.try_next().await? { + yield Ok(account_row_model.into()); + } + }) + } + async fn get_account_by_id( &self, account_id: &AccountID, diff --git a/src/infra/auth-rebac/inmem/src/repos/inmem_rebac_repository.rs b/src/infra/auth-rebac/inmem/src/repos/inmem_rebac_repository.rs index 8f2ab9a25..641929eb4 100644 --- a/src/infra/auth-rebac/inmem/src/repos/inmem_rebac_repository.rs +++ b/src/infra/auth-rebac/inmem/src/repos/inmem_rebac_repository.rs @@ -22,6 +22,7 @@ use kamu_auth_rebac::{ GetEntityPropertiesError, GetRelationsBetweenEntitiesError, InsertEntitiesRelationError, + PropertiesCountError, PropertyName, PropertyValue, RebacRepository, @@ -77,6 +78,14 @@ impl InMemoryRebacRepository { #[async_trait::async_trait] impl RebacRepository for InMemoryRebacRepository { + async fn properties_count(&self) -> Result { + let readable_state = self.state.read().await; + + let count = readable_state.entities_properties_map.len(); + + Ok(count) + } + async fn set_entity_property( &self, entity: &Entity, @@ -157,6 +166,34 @@ impl RebacRepository for InMemoryRebacRepository { Ok(properties) } + async fn get_entity_properties_by_ids( + &self, + entities: &[Entity], + ) -> Result, GetEntityPropertiesError> { + let entities_set = entities.iter().cloned().collect::>(); + + let readable_state = self.state.read().await; + + let entities_properties = readable_state + .entities_properties_map + .iter() + .filter(|(entity, _)| entities_set.contains(entity)) + .map(|(entity, entity_properties)| { + entity_properties + .iter() + .map(|(property_name, property_value)| { + (entity.clone(), *property_name, property_value.clone()) + }) + .collect::>() + }) + .fold(Vec::new(), |mut acc, entity_properties| { + acc.extend(entity_properties); + acc + }); + + Ok(entities_properties) + } + async fn insert_entities_relation( &self, subject_entity: &Entity, diff --git a/src/infra/auth-rebac/postgres/.sqlx/query-4521f606dd6c679519b7545fd2215f28850509bf940b37d53f6af91922a9c030.json b/src/infra/auth-rebac/postgres/.sqlx/query-4521f606dd6c679519b7545fd2215f28850509bf940b37d53f6af91922a9c030.json new file mode 100644 index 000000000..d047b96da --- /dev/null +++ b/src/infra/auth-rebac/postgres/.sqlx/query-4521f606dd6c679519b7545fd2215f28850509bf940b37d53f6af91922a9c030.json @@ -0,0 +1,20 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT COUNT(*)\n FROM auth_rebac_properties\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "count", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + null + ] + }, + "hash": "4521f606dd6c679519b7545fd2215f28850509bf940b37d53f6af91922a9c030" +} diff --git a/src/infra/auth-rebac/postgres/Cargo.toml b/src/infra/auth-rebac/postgres/Cargo.toml index 6c8a36b43..ced872985 100644 --- a/src/infra/auth-rebac/postgres/Cargo.toml +++ b/src/infra/auth-rebac/postgres/Cargo.toml @@ -34,7 +34,6 @@ sqlx = { version = "0.8", default-features = false, features = [ "postgres", "chrono", ] } -tokio = { version = "1", default-features = false, features = [] } [dev-dependencies] database-common-macros = { workspace = true } diff --git a/src/infra/auth-rebac/postgres/src/lib.rs b/src/infra/auth-rebac/postgres/src/lib.rs index 705a252e5..324139b5f 100644 --- a/src/infra/auth-rebac/postgres/src/lib.rs +++ b/src/infra/auth-rebac/postgres/src/lib.rs @@ -7,6 +7,8 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +#![feature(iter_intersperse)] + // Re-exports pub use kamu_auth_rebac as domain; diff --git a/src/infra/auth-rebac/postgres/src/repos/postgres_rebac_repository.rs b/src/infra/auth-rebac/postgres/src/repos/postgres_rebac_repository.rs index 3e14ecc5a..0cf5d6413 100644 --- a/src/infra/auth-rebac/postgres/src/repos/postgres_rebac_repository.rs +++ b/src/infra/auth-rebac/postgres/src/repos/postgres_rebac_repository.rs @@ -7,10 +7,13 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use std::borrow::Cow; + use database_common::{TransactionRef, TransactionRefT}; use dill::{component, interface}; use internal_error::{ErrorIntoInternal, ResultIntoInternal}; use kamu_auth_rebac::*; +use sqlx::Row; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -32,6 +35,24 @@ impl PostgresRebacRepository { #[async_trait::async_trait] impl RebacRepository for PostgresRebacRepository { + async fn properties_count(&self) -> Result { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + + let properties_count = sqlx::query_scalar!( + r#" + SELECT COUNT(*) + FROM auth_rebac_properties + "#, + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + Ok(usize::try_from(properties_count.unwrap()).unwrap()) + } + async fn set_entity_property( &self, entity: &Entity, @@ -151,6 +172,72 @@ impl RebacRepository for PostgresRebacRepository { .map_err(GetEntityPropertiesError::Internal) } + async fn get_entity_properties_by_ids( + &self, + entities: &[Entity], + ) -> Result, GetEntityPropertiesError> { + if entities.is_empty() { + return Ok(vec![]); + } + + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + + let placeholder_list = { + (1..=entities.len()) + .map(|i| { + // i | idxs + // 1 | 1, 2 + // 2 | 3, 4 + // 3 | 5, 6 + // ... + let entity_type_idx = i * 2 - 1; + let entity_id_idx = i * 2; + + format!("(${entity_type_idx},${entity_id_idx})") + }) + .intersperse(",".to_string()) + .collect::() + }; + + // TODO: replace it by macro once sqlx will support it + // https://github.com/launchbadge/sqlx/blob/main/FAQ.md#how-can-i-do-a-select--where-foo-in--query + let query_str = format!( + r#" + SELECT entity_type, entity_id, property_name, property_value + FROM auth_rebac_properties + WHERE (entity_type, entity_id) IN ({placeholder_list}) + "#, + ); + + let mut query = sqlx::query(&query_str); + for entity in entities { + query = query.bind(entity.entity_type); + query = query.bind(entity.entity_id.to_string()); + } + + let raw_rows = query.fetch_all(connection_mut).await.int_err()?; + let entity_properties: Vec<_> = raw_rows + .into_iter() + .map(|row| { + let entity_type = row.get_unchecked("entity_type"); + let entity_id = row.get_unchecked::("entity_id"); + let property_name = row.get_unchecked::("property_name").parse()?; + let property_value = Cow::Owned(row.get_unchecked("property_value")); + + Ok(( + Entity::new(entity_type, entity_id), + property_name, + property_value, + )) + }) + .collect::, _>>() + .map_err(GetEntityPropertiesError::Internal)?; + + Ok(entity_properties) + } + async fn insert_entities_relation( &self, subject_entity: &Entity, diff --git a/src/infra/auth-rebac/repo-tests/Cargo.toml b/src/infra/auth-rebac/repo-tests/Cargo.toml index f5c472d0d..4255b9aa8 100644 --- a/src/infra/auth-rebac/repo-tests/Cargo.toml +++ b/src/infra/auth-rebac/repo-tests/Cargo.toml @@ -25,4 +25,3 @@ doctest = false kamu-auth-rebac = { workspace = true } dill = "0.9" -tokio = { version = "1", default-features = false, features = ["macros"] } diff --git a/src/infra/auth-rebac/sqlite/.sqlx/query-4521f606dd6c679519b7545fd2215f28850509bf940b37d53f6af91922a9c030.json b/src/infra/auth-rebac/sqlite/.sqlx/query-4521f606dd6c679519b7545fd2215f28850509bf940b37d53f6af91922a9c030.json new file mode 100644 index 000000000..326acf7d6 --- /dev/null +++ b/src/infra/auth-rebac/sqlite/.sqlx/query-4521f606dd6c679519b7545fd2215f28850509bf940b37d53f6af91922a9c030.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT COUNT(*)\n FROM auth_rebac_properties\n ", + "describe": { + "columns": [ + { + "name": "COUNT(*)", + "ordinal": 0, + "type_info": "Integer" + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + false + ] + }, + "hash": "4521f606dd6c679519b7545fd2215f28850509bf940b37d53f6af91922a9c030" +} diff --git a/src/infra/auth-rebac/sqlite/Cargo.toml b/src/infra/auth-rebac/sqlite/Cargo.toml index 273f04c73..b63a3ca85 100644 --- a/src/infra/auth-rebac/sqlite/Cargo.toml +++ b/src/infra/auth-rebac/sqlite/Cargo.toml @@ -34,7 +34,7 @@ sqlx = { version = "0.8", default-features = false, features = [ "sqlite", "chrono", ] } -tokio = { version = "1", default-features = false, features = [] } + [dev-dependencies] database-common-macros = { workspace = true } diff --git a/src/infra/auth-rebac/sqlite/src/lib.rs b/src/infra/auth-rebac/sqlite/src/lib.rs index 705a252e5..324139b5f 100644 --- a/src/infra/auth-rebac/sqlite/src/lib.rs +++ b/src/infra/auth-rebac/sqlite/src/lib.rs @@ -7,6 +7,8 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +#![feature(iter_intersperse)] + // Re-exports pub use kamu_auth_rebac as domain; diff --git a/src/infra/auth-rebac/sqlite/src/repos/sqlite_rebac_repository.rs b/src/infra/auth-rebac/sqlite/src/repos/sqlite_rebac_repository.rs index f5674edce..a5138ba0f 100644 --- a/src/infra/auth-rebac/sqlite/src/repos/sqlite_rebac_repository.rs +++ b/src/infra/auth-rebac/sqlite/src/repos/sqlite_rebac_repository.rs @@ -7,10 +7,13 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use std::borrow::Cow; + use database_common::{TransactionRef, TransactionRefT}; use dill::{component, interface}; use internal_error::{ErrorIntoInternal, ResultIntoInternal}; use kamu_auth_rebac::*; +use sqlx::Row; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -32,6 +35,24 @@ impl SqliteRebacRepository { #[async_trait::async_trait] impl RebacRepository for SqliteRebacRepository { + async fn properties_count(&self) -> Result { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + + let properties_count = sqlx::query_scalar!( + r#" + SELECT COUNT(*) + FROM auth_rebac_properties + "#, + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + Ok(usize::try_from(properties_count).unwrap()) + } + async fn set_entity_property( &self, entity: &Entity, @@ -162,6 +183,70 @@ impl RebacRepository for SqliteRebacRepository { .map_err(GetEntityPropertiesError::Internal) } + async fn get_entity_properties_by_ids( + &self, + entities: &[Entity], + ) -> Result, GetEntityPropertiesError> { + if entities.is_empty() { + return Ok(vec![]); + } + + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + + let placeholder_list = (1..=entities.len()) + .map(|i| { + // i | idxs + // 1 | 1, 2 + // 2 | 3, 4 + // 3 | 5, 6 + // ... + let entity_type_idx = i * 2 - 1; + let entity_id_idx = i * 2; + + format!("(${entity_type_idx},${entity_id_idx})") + }) + .intersperse(",".to_string()) + .collect::(); + + // TODO: replace it by macro once sqlx will support it + // https://github.com/launchbadge/sqlx/blob/main/FAQ.md#how-can-i-do-a-select--where-foo-in--query + let query_str = format!( + r#" + SELECT entity_type, entity_id, property_name, property_value + FROM auth_rebac_properties + WHERE (entity_type, entity_id) IN ({placeholder_list}) + "#, + ); + + let mut query = sqlx::query(&query_str); + for entity in entities { + query = query.bind(entity.entity_type); + query = query.bind(entity.entity_id.to_string()); + } + + let raw_rows = query.fetch_all(connection_mut).await.int_err()?; + let entity_properties: Vec<_> = raw_rows + .into_iter() + .map(|row| { + let entity_type = row.get_unchecked("entity_type"); + let entity_id = row.get_unchecked::("entity_id"); + let property_name = row.get_unchecked::("property_name").parse()?; + let property_value = Cow::Owned(row.get_unchecked("property_value")); + + Ok(( + Entity::new(entity_type, entity_id), + property_name, + property_value, + )) + }) + .collect::, _>>() + .map_err(GetEntityPropertiesError::Internal)?; + + Ok(entity_properties) + } + async fn insert_entities_relation( &self, subject_entity: &Entity, diff --git a/src/infra/core/Cargo.toml b/src/infra/core/Cargo.toml index d9424e41c..43a9908e3 100644 --- a/src/infra/core/Cargo.toml +++ b/src/infra/core/Cargo.toml @@ -66,20 +66,17 @@ reqwest = { version = "0.12", default-features = false, features = [ "json", ] } ringbuf = "0.3" -secrecy = "0.10" zip = "2" # Data datafusion = { version = "42", default-features = false } digest = "0.10" object_store = { version = "0.11", features = ["aws"] } -parking_lot = { version = "0.12" } sha3 = "0.10" # Repositories aws-config = { version = "1" } aws-sdk-s3 = { version = "1" } -aws-smithy-http = { version = "0.60", features = ["rt-tokio"] } aws-smithy-types = { version = "1" } aws-credential-types = { version = "1" } trust-dns-resolver = "0.23" # TODO: Needed for DNSLink resolution with IPFS @@ -96,11 +93,8 @@ dashmap = { version = "6", default-features = false } dill = "0.9" futures = "0.3" glob = "0.3" # Used for glob fetch -hyper = "1" itertools = "0.13" libc = "0.2" # Signal names -like = { version = "0.3", default-features = false } -pin-project = "1" petgraph = { version = "0.6", default-features = false, features = [ "stable_graph", ] } @@ -120,7 +114,6 @@ tokio-util = { version = "0.7", default-features = false, features = [ ] } tracing = "0.1" url = { version = "2", features = ["serde"] } -walkdir = "2" # Http file server tower = "0.5" @@ -165,7 +158,7 @@ datafusion = { version = "42", default-features = false, features = [ "parquet", ] } filetime = "0.2" -fs_extra = "1.3" +fs_extra = "1.3" indoc = "2" mockall = { version = "0.13", default-features = false } nanoid = "0.4.0" @@ -174,7 +167,6 @@ pretty_assertions = { version = "1" } test-group = { version = "1" } test-log = { version = "0.2", features = ["trace"] } tokio = { version = "1", default-features = false, features = ["rt", "macros"] } -tracing-subscriber = { version = "0.3", features = ["env-filter"] } [[bench]] diff --git a/src/infra/core/src/dependency_graph_service_inmem.rs b/src/infra/core/src/dependency_graph_service_inmem.rs index 6651a671e..21266a5e5 100644 --- a/src/infra/core/src/dependency_graph_service_inmem.rs +++ b/src/infra/core/src/dependency_graph_service_inmem.rs @@ -305,7 +305,6 @@ impl DependencyGraphService for DependencyGraphServiceInMemory { ) -> Result { self.ensure_datasets_initially_scanned() .await - .int_err() .map_err(GetDependenciesError::Internal) .unwrap(); @@ -356,7 +355,6 @@ impl DependencyGraphService for DependencyGraphServiceInMemory { ) -> Result { self.ensure_datasets_initially_scanned() .await - .int_err() .map_err(GetDependenciesError::Internal)?; let upstream_node_datasets: Vec<_> = { diff --git a/src/infra/core/src/ingest/fetch_service/core.rs b/src/infra/core/src/ingest/fetch_service/core.rs index d0985d49f..3810f9a67 100644 --- a/src/infra/core/src/ingest/fetch_service/core.rs +++ b/src/infra/core/src/ingest/fetch_service/core.rs @@ -69,7 +69,6 @@ impl FetchService { } } - #[allow(unused_variables)] pub async fn fetch( &self, dataset_handle: &DatasetHandle, diff --git a/src/infra/core/src/testing/mock_dataset_action_authorizer.rs b/src/infra/core/src/testing/mock_dataset_action_authorizer.rs index 4f1fc6cd3..e7baae422 100644 --- a/src/infra/core/src/testing/mock_dataset_action_authorizer.rs +++ b/src/infra/core/src/testing/mock_dataset_action_authorizer.rs @@ -36,7 +36,10 @@ mockall::mock! { action: DatasetAction, ) -> Result<(), DatasetActionUnauthorizedError>; - async fn get_allowed_actions(&self, dataset_handle: &DatasetHandle) -> HashSet; + async fn get_allowed_actions( + &self, + dataset_handle: &DatasetHandle, + ) -> Result, InternalError>; async fn filter_datasets_allowing( &self, diff --git a/src/infra/core/tests/tests/test_dependency_graph_inmem.rs b/src/infra/core/tests/tests/test_dependency_graph_inmem.rs index 601d6a1d5..6dd980e13 100644 --- a/src/infra/core/tests/tests/test_dependency_graph_inmem.rs +++ b/src/infra/core/tests/tests/test_dependency_graph_inmem.rs @@ -776,7 +776,6 @@ impl DependencyGraphHarness { .dependency_graph_service .get_recursive_upstream_dependencies(dataset_ids) .await - .int_err() .unwrap() .collect() .await; diff --git a/src/infra/datasets/inmem/Cargo.toml b/src/infra/datasets/inmem/Cargo.toml index d5fa677bf..cea50d914 100644 --- a/src/infra/datasets/inmem/Cargo.toml +++ b/src/infra/datasets/inmem/Cargo.toml @@ -27,15 +27,10 @@ kamu-datasets = { workspace = true } opendatafabric = { workspace = true } internal-error = { workspace = true } -async-stream = "0.3" async-trait = { version = "0.1", default-features = false } -chrono = { version = "0.4", default-features = false } dill = "0.9" futures = "0.3" -secrecy = "0.10" -thiserror = { version = "1", default-features = false } tokio = { version = "1", default-features = false } -tracing = { version = "0.1", default-features = false } uuid = "1" diff --git a/src/infra/datasets/inmem/src/repos/inmem_dateset_entry_repository.rs b/src/infra/datasets/inmem/src/repos/inmem_dateset_entry_repository.rs index a0059d11f..a42dce5ef 100644 --- a/src/infra/datasets/inmem/src/repos/inmem_dateset_entry_repository.rs +++ b/src/infra/datasets/inmem/src/repos/inmem_dateset_entry_repository.rs @@ -8,13 +8,14 @@ // by the Apache License, Version 2.0. use std::collections::{BTreeMap, BTreeSet, HashMap}; -use std::sync::{Arc, Mutex}; +use std::sync::Arc; use database_common::PaginationOpts; use dill::*; use internal_error::InternalError; use kamu_datasets::*; use opendatafabric::{AccountID, DatasetID, DatasetName}; +use tokio::sync::RwLock; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -38,7 +39,7 @@ impl State { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub struct InMemoryDatasetEntryRepository { - state: Arc>, + state: Arc>, } #[component(pub)] @@ -47,7 +48,7 @@ pub struct InMemoryDatasetEntryRepository { impl InMemoryDatasetEntryRepository { pub fn new() -> Self { Self { - state: Arc::new(Mutex::new(State::new())), + state: Arc::new(RwLock::new(State::new())), } } } @@ -56,23 +57,32 @@ impl InMemoryDatasetEntryRepository { #[async_trait::async_trait] impl DatasetEntryRepository for InMemoryDatasetEntryRepository { - async fn dataset_entries_count(&self) -> Result { - let readable_state = self.state.lock().unwrap(); - Ok(readable_state.rows.len()) + async fn dataset_entries_count(&self) -> Result { + let readable_state = self.state.read().await; + + let dataset_entries_count = readable_state.rows.len(); + + Ok(dataset_entries_count) } async fn dataset_entries_count_by_owner_id( &self, owner_id: &AccountID, ) -> Result { - let readable_state = self.state.lock().unwrap(); - let owner_entires = readable_state.rows_by_owner.get(owner_id); - Ok(owner_entires.map_or(0, BTreeSet::len)) + let readable_state = self.state.read().await; + + let owner_entries = readable_state.rows_by_owner.get(owner_id); + + Ok(owner_entries.map_or(0, BTreeSet::len)) } - fn get_dataset_entries(&self, pagination: PaginationOpts) -> DatasetEntryStream { - let dataset_entries_page: Vec<_> = { - let readable_state = self.state.lock().unwrap(); + async fn get_dataset_entries<'a>( + &'a self, + pagination: PaginationOpts, + ) -> DatasetEntryStream<'a> { + let dataset_entries_page = { + let readable_state = self.state.read().await; + readable_state .rows_by_name .values() @@ -80,7 +90,7 @@ impl DatasetEntryRepository for InMemoryDatasetEntryRepository { .take(pagination.limit) .cloned() .map(Ok) - .collect() + .collect::>() }; Box::pin(futures::stream::iter(dataset_entries_page)) @@ -90,7 +100,7 @@ impl DatasetEntryRepository for InMemoryDatasetEntryRepository { &self, dataset_id: &DatasetID, ) -> Result { - let readable_state = self.state.lock().unwrap(); + let readable_state = self.state.read().await; let maybe_dataset_entry = readable_state.rows.get(dataset_id); @@ -105,7 +115,7 @@ impl DatasetEntryRepository for InMemoryDatasetEntryRepository { &self, dataset_ids: &[DatasetID], ) -> Result { - let readable_state = self.state.lock().unwrap(); + let readable_state = self.state.read().await; let mut resolution = DatasetEntriesResolution::default(); @@ -126,7 +136,7 @@ impl DatasetEntryRepository for InMemoryDatasetEntryRepository { owner_id: &AccountID, name: &DatasetName, ) -> Result { - let readable_state = self.state.lock().unwrap(); + let readable_state = self.state.read().await; let maybe_dataset_entry = readable_state .rows @@ -142,13 +152,14 @@ impl DatasetEntryRepository for InMemoryDatasetEntryRepository { Ok(dataset_entry.clone()) } - fn get_dataset_entries_by_owner_id( - &self, + async fn get_dataset_entries_by_owner_id<'a>( + &'a self, owner_id: &AccountID, pagination: PaginationOpts, - ) -> DatasetEntryStream<'_> { - let dataset_entries_page: Vec<_> = { - let readable_state = self.state.lock().unwrap(); + ) -> DatasetEntryStream<'a> { + let dataset_entries_page = { + let readable_state = self.state.read().await; + if let Some(dataset_ids) = readable_state.rows_by_owner.get(owner_id) { dataset_ids .iter() @@ -157,7 +168,7 @@ impl DatasetEntryRepository for InMemoryDatasetEntryRepository { .map(|dataset_id| readable_state.rows.get(dataset_id).unwrap()) .cloned() .map(Ok) - .collect() + .collect::>() } else { vec![] } @@ -170,7 +181,7 @@ impl DatasetEntryRepository for InMemoryDatasetEntryRepository { &self, dataset_entry: &DatasetEntry, ) -> Result<(), SaveDatasetEntryError> { - let mut writable_state = self.state.lock().unwrap(); + let mut writable_state = self.state.write().await; for row in writable_state.rows.values() { if row.id == dataset_entry.id { @@ -206,7 +217,7 @@ impl DatasetEntryRepository for InMemoryDatasetEntryRepository { dataset_id: &DatasetID, new_name: &DatasetName, ) -> Result<(), UpdateDatasetEntryNameError> { - let mut writable_state = self.state.lock().unwrap(); + let mut writable_state = self.state.write().await; let maybe_dataset_entry = writable_state.rows.get(dataset_id); @@ -248,7 +259,7 @@ impl DatasetEntryRepository for InMemoryDatasetEntryRepository { &self, dataset_id: &DatasetID, ) -> Result<(), DeleteEntryDatasetError> { - let mut writable_state = self.state.lock().unwrap(); + let mut writable_state = self.state.write().await; let maybe_removed_entry = writable_state.rows.remove(dataset_id); if let Some(removed_entry) = maybe_removed_entry { diff --git a/src/infra/datasets/postgres/.sqlx/query-13fe35a7997b790566736b78e16c17cd7452d48887938a2a28cbd9a1408472e2.json b/src/infra/datasets/postgres/.sqlx/query-168c5decfa4e1abb634750d661f3d811c12600aa6af7a06b226e7c5b7df64fb2.json similarity index 81% rename from src/infra/datasets/postgres/.sqlx/query-13fe35a7997b790566736b78e16c17cd7452d48887938a2a28cbd9a1408472e2.json rename to src/infra/datasets/postgres/.sqlx/query-168c5decfa4e1abb634750d661f3d811c12600aa6af7a06b226e7c5b7df64fb2.json index 128220e4a..81679caa2 100644 --- a/src/infra/datasets/postgres/.sqlx/query-13fe35a7997b790566736b78e16c17cd7452d48887938a2a28cbd9a1408472e2.json +++ b/src/infra/datasets/postgres/.sqlx/query-168c5decfa4e1abb634750d661f3d811c12600aa6af7a06b226e7c5b7df64fb2.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT\n dataset_id as \"id: _\",\n owner_id as \"owner_id: _\",\n dataset_name as name,\n created_at as \"created_at: _\"\n FROM dataset_entries\n ORDER BY dataset_name ASC\n LIMIT $1 OFFSET $2\n ", + "query": "\n SELECT\n dataset_id as \"id: _\",\n owner_id as \"owner_id: _\",\n dataset_name as name,\n created_at as \"created_at: _\"\n FROM dataset_entries\n ORDER BY created_at ASC\n LIMIT $1 OFFSET $2\n ", "describe": { "columns": [ { @@ -37,5 +37,5 @@ false ] }, - "hash": "13fe35a7997b790566736b78e16c17cd7452d48887938a2a28cbd9a1408472e2" + "hash": "168c5decfa4e1abb634750d661f3d811c12600aa6af7a06b226e7c5b7df64fb2" } diff --git a/src/infra/datasets/postgres/.sqlx/query-30c92efe33072f0b9fa446ea3255ffca15f34c2af9aaeb8d31453ab364f97495.json b/src/infra/datasets/postgres/.sqlx/query-30c92efe33072f0b9fa446ea3255ffca15f34c2af9aaeb8d31453ab364f97495.json new file mode 100644 index 000000000..8d99fbfce --- /dev/null +++ b/src/infra/datasets/postgres/.sqlx/query-30c92efe33072f0b9fa446ea3255ffca15f34c2af9aaeb8d31453ab364f97495.json @@ -0,0 +1,14 @@ +{ + "db_name": "PostgreSQL", + "query": "\n DELETE\n FROM dataset_entries\n WHERE dataset_id = $1\n ", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [] + }, + "hash": "30c92efe33072f0b9fa446ea3255ffca15f34c2af9aaeb8d31453ab364f97495" +} diff --git a/src/infra/datasets/postgres/.sqlx/query-4783a9c4fca85d1965fb119ea2453da373b1ba803953af87ab913f22d6a1aef7.json b/src/infra/datasets/postgres/.sqlx/query-4783a9c4fca85d1965fb119ea2453da373b1ba803953af87ab913f22d6a1aef7.json deleted file mode 100644 index 3b0d0c2d6..000000000 --- a/src/infra/datasets/postgres/.sqlx/query-4783a9c4fca85d1965fb119ea2453da373b1ba803953af87ab913f22d6a1aef7.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "\n DELETE FROM dataset_entries WHERE dataset_id = $1\n ", - "describe": { - "columns": [], - "parameters": { - "Left": [ - "Text" - ] - }, - "nullable": [] - }, - "hash": "4783a9c4fca85d1965fb119ea2453da373b1ba803953af87ab913f22d6a1aef7" -} diff --git a/src/infra/datasets/postgres/.sqlx/query-2bcdb350c9c397529fafa84a0b575eca95214025291d1bd310c3900040a3c9c8.json b/src/infra/datasets/postgres/.sqlx/query-62939275935e6f623b32580d5679c708a4f95d15b637371f038e362241b5bd05.json similarity index 85% rename from src/infra/datasets/postgres/.sqlx/query-2bcdb350c9c397529fafa84a0b575eca95214025291d1bd310c3900040a3c9c8.json rename to src/infra/datasets/postgres/.sqlx/query-62939275935e6f623b32580d5679c708a4f95d15b637371f038e362241b5bd05.json index f9498617c..ebf49ddb9 100644 --- a/src/infra/datasets/postgres/.sqlx/query-2bcdb350c9c397529fafa84a0b575eca95214025291d1bd310c3900040a3c9c8.json +++ b/src/infra/datasets/postgres/.sqlx/query-62939275935e6f623b32580d5679c708a4f95d15b637371f038e362241b5bd05.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT dataset_id as \"id: _\",\n owner_id as \"owner_id: _\",\n dataset_name as name,\n created_at as \"created_at: _\"\n FROM dataset_entries\n WHERE dataset_id = ANY($1)\n ORDER BY dataset_id\n ", + "query": "\n SELECT dataset_id as \"id: _\",\n owner_id as \"owner_id: _\",\n dataset_name as name,\n created_at as \"created_at: _\"\n FROM dataset_entries\n WHERE dataset_id = ANY($1)\n ", "describe": { "columns": [ { @@ -36,5 +36,5 @@ false ] }, - "hash": "2bcdb350c9c397529fafa84a0b575eca95214025291d1bd310c3900040a3c9c8" + "hash": "62939275935e6f623b32580d5679c708a4f95d15b637371f038e362241b5bd05" } diff --git a/src/infra/datasets/postgres/.sqlx/query-fcb34f3fa8f59b1f8190694fc38dc66874757b9f56f23ed86f8494c6ed4b0b7a.json b/src/infra/datasets/postgres/.sqlx/query-7954a6acf1cdb627dfe2890b042679ef9e3886268865cce559cf2268c66ea800.json similarity index 85% rename from src/infra/datasets/postgres/.sqlx/query-fcb34f3fa8f59b1f8190694fc38dc66874757b9f56f23ed86f8494c6ed4b0b7a.json rename to src/infra/datasets/postgres/.sqlx/query-7954a6acf1cdb627dfe2890b042679ef9e3886268865cce559cf2268c66ea800.json index 1d1911431..66fcff185 100644 --- a/src/infra/datasets/postgres/.sqlx/query-fcb34f3fa8f59b1f8190694fc38dc66874757b9f56f23ed86f8494c6ed4b0b7a.json +++ b/src/infra/datasets/postgres/.sqlx/query-7954a6acf1cdb627dfe2890b042679ef9e3886268865cce559cf2268c66ea800.json @@ -1,6 +1,6 @@ { "db_name": "PostgreSQL", - "query": "\n SELECT dataset_id as \"id: _\",\n owner_id as \"owner_id: _\",\n dataset_name as name,\n created_at as \"created_at: _\"\n FROM dataset_entries\n WHERE dataset_id = $1\n ORDER BY created_at\n ", + "query": "\n SELECT dataset_id as \"id: _\",\n owner_id as \"owner_id: _\",\n dataset_name as name,\n created_at as \"created_at: _\"\n FROM dataset_entries\n WHERE dataset_id = $1\n ", "describe": { "columns": [ { @@ -36,5 +36,5 @@ false ] }, - "hash": "fcb34f3fa8f59b1f8190694fc38dc66874757b9f56f23ed86f8494c6ed4b0b7a" + "hash": "7954a6acf1cdb627dfe2890b042679ef9e3886268865cce559cf2268c66ea800" } diff --git a/src/infra/datasets/postgres/Cargo.toml b/src/infra/datasets/postgres/Cargo.toml index 6c2299ff8..91924d7cf 100644 --- a/src/infra/datasets/postgres/Cargo.toml +++ b/src/infra/datasets/postgres/Cargo.toml @@ -29,18 +29,14 @@ opendatafabric = { workspace = true, features = ["sqlx-postgres"] } async-stream = "0.3" async-trait = { version = "0.1", default-features = false } -chrono = { version = "0.4", default-features = false } dill = "0.9" futures = "0.3" -secrecy = "0.10" sqlx = { version = "0.8", default-features = false, features = [ "runtime-tokio-rustls", "macros", "postgres", "chrono", ] } -thiserror = { version = "1", default-features = false } -tracing = { version = "0.1", default-features = false } uuid = "1" diff --git a/src/infra/datasets/postgres/src/repos/postgres_dataset_entry_repository.rs b/src/infra/datasets/postgres/src/repos/postgres_dataset_entry_repository.rs index e77999980..abb5d46f4 100644 --- a/src/infra/datasets/postgres/src/repos/postgres_dataset_entry_repository.rs +++ b/src/infra/datasets/postgres/src/repos/postgres_dataset_entry_repository.rs @@ -35,7 +35,7 @@ impl PostgresDatasetEntryRepository { #[async_trait::async_trait] impl DatasetEntryRepository for PostgresDatasetEntryRepository { - async fn dataset_entries_count(&self) -> Result { + async fn dataset_entries_count(&self) -> Result { let mut tr = self.transaction.lock().await; let connection_mut = tr.connection_mut().await?; @@ -78,7 +78,10 @@ impl DatasetEntryRepository for PostgresDatasetEntryRepository { Ok(usize::try_from(dataset_entries_count.unwrap_or(0)).unwrap()) } - fn get_dataset_entries(&self, pagination: PaginationOpts) -> DatasetEntryStream { + async fn get_dataset_entries<'a>( + &'a self, + pagination: PaginationOpts, + ) -> DatasetEntryStream<'a> { Box::pin(async_stream::stream! { let mut tr = self.transaction.lock().await; let connection_mut = tr.connection_mut().await?; @@ -95,7 +98,7 @@ impl DatasetEntryRepository for PostgresDatasetEntryRepository { dataset_name as name, created_at as "created_at: _" FROM dataset_entries - ORDER BY dataset_name ASC + ORDER BY created_at ASC LIMIT $1 OFFSET $2 "#, limit, @@ -130,7 +133,6 @@ impl DatasetEntryRepository for PostgresDatasetEntryRepository { created_at as "created_at: _" FROM dataset_entries WHERE dataset_id = $1 - ORDER BY created_at "#, stack_dataset_id.as_str(), ) @@ -153,10 +155,7 @@ impl DatasetEntryRepository for PostgresDatasetEntryRepository { let connection_mut = tr.connection_mut().await?; - let dataset_ids_search: Vec<_> = dataset_ids - .iter() - .map(|dataset_id| dataset_id.as_did_str().to_string()) - .collect(); + let dataset_ids_search: Vec<_> = dataset_ids.iter().map(ToString::to_string).collect(); let resolved_entries = sqlx::query_as!( DatasetEntryRowModel, @@ -167,7 +166,6 @@ impl DatasetEntryRepository for PostgresDatasetEntryRepository { created_at as "created_at: _" FROM dataset_entries WHERE dataset_id = ANY($1) - ORDER BY dataset_id "#, &dataset_ids_search, ) @@ -230,11 +228,11 @@ impl DatasetEntryRepository for PostgresDatasetEntryRepository { } } - fn get_dataset_entries_by_owner_id( - &self, + async fn get_dataset_entries_by_owner_id<'a>( + &'a self, owner_id: &AccountID, pagination: PaginationOpts, - ) -> DatasetEntryStream<'_> { + ) -> DatasetEntryStream<'a> { let stack_owner_id = owner_id.as_did_str().to_stack_string(); Box::pin(async_stream::stream! { @@ -353,7 +351,9 @@ impl DatasetEntryRepository for PostgresDatasetEntryRepository { let delete_result = sqlx::query!( r#" - DELETE FROM dataset_entries WHERE dataset_id = $1 + DELETE + FROM dataset_entries + WHERE dataset_id = $1 "#, stack_dataset_id.as_str(), ) diff --git a/src/infra/datasets/repo-tests/src/dataset_entry_repository_test_suite.rs b/src/infra/datasets/repo-tests/src/dataset_entry_repository_test_suite.rs index bef7e14ac..86b2c4bf7 100644 --- a/src/infra/datasets/repo-tests/src/dataset_entry_repository_test_suite.rs +++ b/src/infra/datasets/repo-tests/src/dataset_entry_repository_test_suite.rs @@ -85,12 +85,13 @@ pub async fn test_stream_many_entries(catalog: &Catalog) { use futures::TryStreamExt; { - let get_res: Result, _> = dataset_entry_repo + let get_res = dataset_entry_repo .get_dataset_entries(PaginationOpts { limit: 100, offset: 0, }) - .try_collect() + .await + .try_collect::>() .await; let expected_dataset_entries = vec![]; @@ -131,12 +132,13 @@ pub async fn test_stream_many_entries(catalog: &Catalog) { } { - let get_res: Result, _> = dataset_entry_repo + let get_res = dataset_entry_repo .get_dataset_entries(PaginationOpts { limit: 100, offset: 0, }) - .try_collect() + .await + .try_collect::>() .await; let expected_dataset_entries = vec![ dataset_entry_acc_1_1, @@ -329,7 +331,7 @@ pub async fn test_get_dataset_entries_by_owner_id(catalog: &Catalog) { 0, ); - let get_res: Result, _> = dataset_entry_repo + let get_res = dataset_entry_repo .get_dataset_entries_by_owner_id( &account_1.id, PaginationOpts { @@ -337,7 +339,8 @@ pub async fn test_get_dataset_entries_by_owner_id(catalog: &Catalog) { offset: 0, }, ) - .try_collect() + .await + .try_collect::>() .await; let expected_dataset_entries = vec![]; @@ -348,7 +351,7 @@ pub async fn test_get_dataset_entries_by_owner_id(catalog: &Catalog) { ); } { - let get_res: Result, _> = dataset_entry_repo + let get_res = dataset_entry_repo .get_dataset_entries_by_owner_id( &account_2.id, PaginationOpts { @@ -356,7 +359,8 @@ pub async fn test_get_dataset_entries_by_owner_id(catalog: &Catalog) { offset: 0, }, ) - .try_collect() + .await + .try_collect::>() .await; let expected_dataset_entries = vec![]; @@ -392,7 +396,7 @@ pub async fn test_get_dataset_entries_by_owner_id(catalog: &Catalog) { assert_matches!(save_res, Ok(_)); } { - let get_res: Result, _> = dataset_entry_repo + let get_res = dataset_entry_repo .get_dataset_entries_by_owner_id( &account_1.id, PaginationOpts { @@ -400,7 +404,8 @@ pub async fn test_get_dataset_entries_by_owner_id(catalog: &Catalog) { offset: 0, }, ) - .try_collect() + .await + .try_collect::>() .await; let mut expected_dataset_entries = vec![dataset_entry_acc_1_1, dataset_entry_acc_1_2]; @@ -426,7 +431,7 @@ pub async fn test_get_dataset_entries_by_owner_id(catalog: &Catalog) { ); } { - let get_res: Result, _> = dataset_entry_repo + let get_res = dataset_entry_repo .get_dataset_entries_by_owner_id( &account_2.id, PaginationOpts { @@ -434,7 +439,8 @@ pub async fn test_get_dataset_entries_by_owner_id(catalog: &Catalog) { offset: 0, }, ) - .try_collect() + .await + .try_collect::>() .await; let expected_dataset_entries = vec![dataset_entry_acc_2_3]; diff --git a/src/infra/datasets/sqlite/.sqlx/query-13fe35a7997b790566736b78e16c17cd7452d48887938a2a28cbd9a1408472e2.json b/src/infra/datasets/sqlite/.sqlx/query-168c5decfa4e1abb634750d661f3d811c12600aa6af7a06b226e7c5b7df64fb2.json similarity index 80% rename from src/infra/datasets/sqlite/.sqlx/query-13fe35a7997b790566736b78e16c17cd7452d48887938a2a28cbd9a1408472e2.json rename to src/infra/datasets/sqlite/.sqlx/query-168c5decfa4e1abb634750d661f3d811c12600aa6af7a06b226e7c5b7df64fb2.json index be47e2fc9..23640468a 100644 --- a/src/infra/datasets/sqlite/.sqlx/query-13fe35a7997b790566736b78e16c17cd7452d48887938a2a28cbd9a1408472e2.json +++ b/src/infra/datasets/sqlite/.sqlx/query-168c5decfa4e1abb634750d661f3d811c12600aa6af7a06b226e7c5b7df64fb2.json @@ -1,6 +1,6 @@ { "db_name": "SQLite", - "query": "\n SELECT\n dataset_id as \"id: _\",\n owner_id as \"owner_id: _\",\n dataset_name as name,\n created_at as \"created_at: _\"\n FROM dataset_entries\n ORDER BY dataset_name ASC\n LIMIT $1 OFFSET $2\n ", + "query": "\n SELECT\n dataset_id as \"id: _\",\n owner_id as \"owner_id: _\",\n dataset_name as name,\n created_at as \"created_at: _\"\n FROM dataset_entries\n ORDER BY created_at ASC\n LIMIT $1 OFFSET $2\n ", "describe": { "columns": [ { @@ -34,5 +34,5 @@ false ] }, - "hash": "13fe35a7997b790566736b78e16c17cd7452d48887938a2a28cbd9a1408472e2" + "hash": "168c5decfa4e1abb634750d661f3d811c12600aa6af7a06b226e7c5b7df64fb2" } diff --git a/src/infra/datasets/sqlite/Cargo.toml b/src/infra/datasets/sqlite/Cargo.toml index feb3be99f..83f58ed1d 100644 --- a/src/infra/datasets/sqlite/Cargo.toml +++ b/src/infra/datasets/sqlite/Cargo.toml @@ -29,18 +29,14 @@ opendatafabric = { workspace = true, features = ["sqlx-sqlite"] } async-stream = "0.3" async-trait = { version = "0.1", default-features = false } -chrono = { version = "0.4", default-features = false } dill = "0.9" futures = "0.3" -secrecy = "0.10" sqlx = { version = "0.8", default-features = false, features = [ "runtime-tokio-rustls", "macros", "sqlite", "chrono", ] } -thiserror = { version = "1", default-features = false } -tracing = { version = "0.1", default-features = false } uuid = "1" diff --git a/src/infra/datasets/sqlite/src/repos/sqlite_dateset_entry_repository.rs b/src/infra/datasets/sqlite/src/repos/sqlite_dateset_entry_repository.rs index ff0a02ffc..158016c4d 100644 --- a/src/infra/datasets/sqlite/src/repos/sqlite_dateset_entry_repository.rs +++ b/src/infra/datasets/sqlite/src/repos/sqlite_dateset_entry_repository.rs @@ -36,7 +36,7 @@ impl SqliteDatasetEntryRepository { #[async_trait::async_trait] impl DatasetEntryRepository for SqliteDatasetEntryRepository { - async fn dataset_entries_count(&self) -> Result { + async fn dataset_entries_count(&self) -> Result { let mut tr = self.transaction.lock().await; let connection_mut = tr.connection_mut().await?; @@ -80,7 +80,10 @@ impl DatasetEntryRepository for SqliteDatasetEntryRepository { Ok(usize::try_from(dataset_entries_count).unwrap()) } - fn get_dataset_entries(&self, pagination: PaginationOpts) -> DatasetEntryStream { + async fn get_dataset_entries<'a>( + &'a self, + pagination: PaginationOpts, + ) -> DatasetEntryStream<'a> { Box::pin(async_stream::stream! { let mut tr = self.transaction.lock().await; let connection_mut = tr.connection_mut().await?; @@ -97,7 +100,7 @@ impl DatasetEntryRepository for SqliteDatasetEntryRepository { dataset_name as name, created_at as "created_at: _" FROM dataset_entries - ORDER BY dataset_name ASC + ORDER BY created_at ASC LIMIT $1 OFFSET $2 "#, limit, @@ -254,11 +257,11 @@ impl DatasetEntryRepository for SqliteDatasetEntryRepository { } } - fn get_dataset_entries_by_owner_id( - &self, + async fn get_dataset_entries_by_owner_id<'a>( + &'a self, owner_id: &AccountID, pagination: PaginationOpts, - ) -> DatasetEntryStream<'_> { + ) -> DatasetEntryStream<'a> { let stack_owner_id = owner_id.as_did_str().to_stack_string(); let limit = i64::try_from(pagination.limit).unwrap(); diff --git a/src/infra/flow-system/inmem/Cargo.toml b/src/infra/flow-system/inmem/Cargo.toml index 91786d0fb..b0346f9d2 100644 --- a/src/infra/flow-system/inmem/Cargo.toml +++ b/src/infra/flow-system/inmem/Cargo.toml @@ -23,34 +23,21 @@ doctest = false [dependencies] database-common = { workspace = true } -internal-error = { workspace = true } opendatafabric = { workspace = true } -kamu-task-system = { workspace = true } kamu-flow-system = { workspace = true } -async-stream = "0.3" async-trait = { version = "0.1", default-features = false } chrono = { version = "0.4", default-features = false } dill = "0.9" futures = "0.3" -thiserror = { version = "1", default-features = false } tokio = { version = "1", default-features = false, features = [] } tokio-stream = { version = "0.1", default-features = false } tracing = { version = "0.1", default-features = false } -url = { version = "2", default-features = false, features = ["serde"] } - -# TODO: Make serde optional -serde = { version = "1", default-features = false, features = ["derive"] } -serde_with = { version = "3", default-features = false } [dev-dependencies] database-common-macros = { workspace = true } kamu-flow-system-repo-tests = { workspace = true } -cron = { version = "0.12", default-features = false } -tempfile = "3" test-group = { version = "1" } test-log = { version = "0.2", features = ["trace"] } -tokio = { version = "1", default-features = false, features = ["rt", "macros"] } -tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/src/infra/flow-system/postgres/Cargo.toml b/src/infra/flow-system/postgres/Cargo.toml index 90a8bc4e9..681592635 100644 --- a/src/infra/flow-system/postgres/Cargo.toml +++ b/src/infra/flow-system/postgres/Cargo.toml @@ -39,8 +39,7 @@ sqlx = { version = "0.8", default-features = false, features = [ "postgres", "chrono" ] } -tokio-stream = { version = "0.1", default-features = false } -tracing = { version = "0.1", default-features = false } + [dev-dependencies] database-common-macros = { workspace = true } @@ -48,6 +47,4 @@ internal-error = { workspace = true } kamu-flow-system-repo-tests = { workspace = true } test-log = { version = "0.2", features = ["trace"] } -tokio = { version = "1", default-features = false, features = ["rt", "macros"] } test-group = { version = "1" } -serde = { version = "1", features = ["derive"] } diff --git a/src/infra/flow-system/repo-tests/src/test_flow_event_store.rs b/src/infra/flow-system/repo-tests/src/test_flow_event_store.rs index 0db1c7708..7a9e32dcf 100644 --- a/src/infra/flow-system/repo-tests/src/test_flow_event_store.rs +++ b/src/infra/flow-system/repo-tests/src/test_flow_event_store.rs @@ -29,7 +29,7 @@ pub async fn test_dataset_flow_empty_filters_distingush_dataset(catalog: &Catalo let foo_cases = make_dataset_test_case(flow_event_store.clone()).await; let bar_cases = make_dataset_test_case(flow_event_store.clone()).await; - assert_dataset_flow_expectaitons( + assert_dataset_flow_expectations( flow_event_store.clone(), &foo_cases, always_happy_filters.clone(), @@ -49,7 +49,7 @@ pub async fn test_dataset_flow_empty_filters_distingush_dataset(catalog: &Catalo ) .await; - assert_dataset_flow_expectaitons( + assert_dataset_flow_expectations( flow_event_store.clone(), &bar_cases, always_happy_filters.clone(), @@ -111,7 +111,7 @@ pub async fn test_dataset_flow_filter_by_status(catalog: &Catalog) { ]; for (filters, expected_flow_ids) in cases { - assert_dataset_flow_expectaitons( + assert_dataset_flow_expectations( flow_event_store.clone(), &foo_cases, filters, @@ -166,7 +166,7 @@ pub async fn test_dataset_flow_filter_by_flow_type(catalog: &Catalog) { ]; for (filters, expected_flow_ids) in cases { - assert_dataset_flow_expectaitons( + assert_dataset_flow_expectations( flow_event_store.clone(), &foo_cases, filters, @@ -225,7 +225,7 @@ pub async fn test_dataset_flow_filter_by_initiator(catalog: &Catalog) { ]; for (filters, expected_flow_ids) in cases { - assert_dataset_flow_expectaitons( + assert_dataset_flow_expectations( flow_event_store.clone(), &foo_cases, filters, @@ -283,7 +283,7 @@ pub async fn test_dataset_flow_filter_by_initiator_with_multiple_variants(catalo ]; for (filters, expected_flow_ids) in cases { - assert_dataset_flow_expectaitons( + assert_dataset_flow_expectations( flow_event_store.clone(), &foo_cases, filters, @@ -334,7 +334,7 @@ pub async fn test_dataset_flow_filter_combinations(catalog: &Catalog) { ]; for (filters, expected_flow_ids) in cases { - assert_dataset_flow_expectaitons( + assert_dataset_flow_expectations( flow_event_store.clone(), &foo_cases, filters, @@ -568,7 +568,7 @@ pub async fn test_dataset_flow_pagination(catalog: &Catalog) { ]; for (pagination, expected_flow_ids) in cases { - assert_dataset_flow_expectaitons( + assert_dataset_flow_expectations( flow_event_store.clone(), &foo_cases, Default::default(), @@ -630,7 +630,7 @@ pub async fn test_dataset_flow_pagination_with_filters(catalog: &Catalog) { ]; for (pagination, filters, expected_total_count, expected_flow_ids) in cases { - assert_dataset_flow_expectaitons( + assert_dataset_flow_expectations( flow_event_store.clone(), &foo_cases, filters, @@ -670,7 +670,7 @@ pub async fn test_unfiltered_system_flows(catalog: &Catalog) { let system_case = make_system_test_case(flow_event_store.clone()).await; - assert_system_flow_expectaitons( + assert_system_flow_expectations( flow_event_store.clone(), SystemFlowFilters::default(), PaginationOpts { @@ -707,7 +707,7 @@ pub async fn test_system_flows_filtered_by_flow_type(catalog: &Catalog) { )]; for (filters, expected_flow_ids) in cases { - assert_system_flow_expectaitons( + assert_system_flow_expectations( flow_event_store.clone(), filters, PaginationOpts { @@ -753,7 +753,7 @@ pub async fn test_system_flows_filtered_by_flow_status(catalog: &Catalog) { ]; for (filters, expected_flow_ids) in cases { - assert_system_flow_expectaitons( + assert_system_flow_expectations( flow_event_store.clone(), filters, PaginationOpts { @@ -803,7 +803,7 @@ pub async fn test_system_flows_filtered_by_initiator(catalog: &Catalog) { ]; for (filters, expected_flow_ids) in cases { - assert_system_flow_expectaitons( + assert_system_flow_expectations( flow_event_store.clone(), filters, PaginationOpts { @@ -853,7 +853,7 @@ pub async fn test_system_flows_complex_filter(catalog: &Catalog) { ]; for (filters, expected_flow_ids) in cases { - assert_system_flow_expectaitons( + assert_system_flow_expectations( flow_event_store.clone(), filters, PaginationOpts { @@ -912,7 +912,7 @@ pub async fn test_system_flow_pagination(catalog: &Catalog) { ]; for (pagination, expected_flow_ids) in cases { - assert_system_flow_expectaitons( + assert_system_flow_expectations( flow_event_store.clone(), Default::default(), pagination, @@ -973,7 +973,7 @@ pub async fn test_system_flow_pagination_with_filters(catalog: &Catalog) { ]; for (pagination, filters, expected_total_count, expected_flow_ids) in cases { - assert_system_flow_expectaitons( + assert_system_flow_expectations( flow_event_store.clone(), filters, pagination, @@ -993,7 +993,7 @@ pub async fn test_all_flows_unpaged(catalog: &Catalog) { let system_case = make_system_test_case(flow_event_store.clone()).await; - assert_all_flow_expectaitons( + assert_all_flow_expectations( flow_event_store.clone(), AllFlowFilters::default(), PaginationOpts { @@ -1072,7 +1072,7 @@ pub async fn test_all_flows_pagination(catalog: &Catalog) { ]; for (pagination, expected_flow_ids) in cases { - assert_all_flow_expectaitons( + assert_all_flow_expectations( flow_event_store.clone(), AllFlowFilters::default(), pagination, @@ -1092,7 +1092,7 @@ pub async fn test_all_flows_filters(catalog: &Catalog) { let system_case = make_system_test_case(flow_event_store.clone()).await; - assert_all_flow_expectaitons( + assert_all_flow_expectations( flow_event_store.clone(), AllFlowFilters { by_flow_status: Some(FlowStatus::Waiting), @@ -1111,7 +1111,7 @@ pub async fn test_all_flows_filters(catalog: &Catalog) { ) .await; - assert_all_flow_expectaitons( + assert_all_flow_expectations( flow_event_store.clone(), AllFlowFilters { by_flow_status: Some(FlowStatus::Running), @@ -1130,7 +1130,7 @@ pub async fn test_all_flows_filters(catalog: &Catalog) { ) .await; - assert_all_flow_expectaitons( + assert_all_flow_expectations( flow_event_store.clone(), AllFlowFilters { by_flow_status: Some(FlowStatus::Finished), @@ -2365,7 +2365,7 @@ async fn make_system_test_flows( } } -async fn assert_dataset_flow_expectaitons( +async fn assert_dataset_flow_expectations( flow_event_store: Arc, dataset_test_case: &DatasetTestCase, filters: DatasetFlowFilters, @@ -2402,7 +2402,7 @@ async fn assert_multiple_dataset_flow_expectations( assert_eq!(flow_ids, expected_flow_ids); } -async fn assert_system_flow_expectaitons( +async fn assert_system_flow_expectations( flow_event_store: Arc, filters: SystemFlowFilters, pagination: PaginationOpts, @@ -2423,7 +2423,7 @@ async fn assert_system_flow_expectaitons( assert_eq!(flow_ids, expected_flow_ids); } -async fn assert_all_flow_expectaitons( +async fn assert_all_flow_expectations( flow_event_store: Arc, filters: AllFlowFilters, pagination: PaginationOpts, diff --git a/src/infra/flow-system/sqlite/Cargo.toml b/src/infra/flow-system/sqlite/Cargo.toml index a6a7b9cd7..c349ff22c 100644 --- a/src/infra/flow-system/sqlite/Cargo.toml +++ b/src/infra/flow-system/sqlite/Cargo.toml @@ -39,8 +39,7 @@ sqlx = { version = "0.8", default-features = false, features = [ "sqlite", "chrono" ] } -tokio-stream = { version = "0.1", default-features = false } -tracing = { version = "0.1", default-features = false } + [dev-dependencies] database-common-macros = { workspace = true } @@ -48,6 +47,4 @@ internal-error = { workspace = true } kamu-flow-system-repo-tests = { workspace = true } test-log = { version = "0.2", features = ["trace"] } -tokio = { version = "1", default-features = false, features = ["rt", "macros"] } test-group = { version = "1" } -serde = { version = "1", features = ["derive"] } diff --git a/src/infra/flow-system/sqlite/src/sqlite_flow_event_store.rs b/src/infra/flow-system/sqlite/src/sqlite_flow_event_store.rs index e4e3eff56..44e6a2b20 100644 --- a/src/infra/flow-system/sqlite/src/sqlite_flow_event_store.rs +++ b/src/infra/flow-system/sqlite/src/sqlite_flow_event_store.rs @@ -10,7 +10,12 @@ use std::collections::HashSet; use chrono::{DateTime, Utc}; -use database_common::{PaginationOpts, TransactionRef, TransactionRefT}; +use database_common::{ + sqlite_generate_placeholders_list, + PaginationOpts, + TransactionRef, + TransactionRefT, +}; use dill::*; use futures::TryStreamExt; use kamu_flow_system::*; @@ -44,14 +49,6 @@ impl SqliteFlowEventStore { } } - fn generate_placeholders_list(args: &[T], index_offset: usize) -> String { - args.iter() - .enumerate() - .map(|(i, _)| format!("${}", i + index_offset)) - .collect::>() - .join(", ") - } - async fn register_flow( &self, tr: &mut database_common::TransactionGuard<'_, Sqlite>, @@ -659,7 +656,7 @@ impl FlowEventStore for SqliteFlowEventStore { "#, maybe_initiators .as_ref() - .map(|initiators| Self::generate_placeholders_list(initiators, 7)) + .map(|initiators| sqlite_generate_placeholders_list(initiators.len(), 7)) .unwrap_or_default(), ); @@ -677,10 +674,9 @@ impl FlowEventStore for SqliteFlowEventStore { } } - let mut query_stream = query.try_map(|event_row: SqliteRow| { - Ok(FlowID::new(event_row.get(0))) - }) - .fetch(connection_mut); + let mut query_stream = query + .try_map(|event_row: SqliteRow| Ok(FlowID::new(event_row.get(0)))) + .fetch(connection_mut); while let Some(flow_id) = query_stream.try_next().await.int_err()? { yield Ok(flow_id); @@ -716,7 +712,7 @@ impl FlowEventStore for SqliteFlowEventStore { "#, maybe_initiators .as_ref() - .map(|initiators| Self::generate_placeholders_list(initiators, 5)) + .map(|initiators| sqlite_generate_placeholders_list(initiators.len(), 5)) .unwrap_or_default() ); @@ -757,9 +753,7 @@ impl FlowEventStore for SqliteFlowEventStore { Box::pin(async_stream::stream! { let mut tr = self.transaction.lock().await; - let connection_mut = tr - .connection_mut() - .await?; + let connection_mut = tr.connection_mut().await?; let query_str = format!( r#" @@ -771,10 +765,13 @@ impl FlowEventStore for SqliteFlowEventStore { ORDER BY flow_id DESC LIMIT $4 OFFSET $5 "#, - Self::generate_placeholders_list(&dataset_ids, 6), + sqlite_generate_placeholders_list(dataset_ids.len(), 6), maybe_initiators .as_ref() - .map(|initiators| Self::generate_placeholders_list(initiators, 6 + dataset_ids.len())) + .map(|initiators| sqlite_generate_placeholders_list( + initiators.len(), + 6 + dataset_ids.len() + )) .unwrap_or_default() ); @@ -795,10 +792,9 @@ impl FlowEventStore for SqliteFlowEventStore { } } - let mut query_stream = query.try_map(|event_row: SqliteRow| { - Ok(FlowID::new(event_row.get(0))) - }) - .fetch(connection_mut); + let mut query_stream = query + .try_map(|event_row: SqliteRow| Ok(FlowID::new(event_row.get(0)))) + .fetch(connection_mut); while let Some(flow_id) = query_stream.try_next().await.int_err()? { yield Ok(flow_id); @@ -853,9 +849,7 @@ impl FlowEventStore for SqliteFlowEventStore { Box::pin(async_stream::stream! { let mut tr = self.transaction.lock().await; - let connection_mut = tr - .connection_mut() - .await?; + let connection_mut = tr.connection_mut().await?; let query_str = format!( r#" @@ -869,7 +863,7 @@ impl FlowEventStore for SqliteFlowEventStore { "#, maybe_initiators .as_ref() - .map(|initiators| Self::generate_placeholders_list(initiators, 6)) + .map(|initiators| sqlite_generate_placeholders_list(initiators.len(), 6)) .unwrap_or_default() ); @@ -886,10 +880,9 @@ impl FlowEventStore for SqliteFlowEventStore { } } - let mut query_stream = query.try_map(|event_row: SqliteRow| { - Ok(FlowID::new(event_row.get(0))) - }) - .fetch(connection_mut); + let mut query_stream = query + .try_map(|event_row: SqliteRow| Ok(FlowID::new(event_row.get(0)))) + .fetch(connection_mut); while let Some(flow_id) = query_stream.try_next().await.int_err()? { yield Ok(flow_id); @@ -923,7 +916,7 @@ impl FlowEventStore for SqliteFlowEventStore { "#, maybe_initiators .as_ref() - .map(|initiators| Self::generate_placeholders_list(initiators, 4)) + .map(|initiators| sqlite_generate_placeholders_list(initiators.len(), 4)) .unwrap_or_default() ); @@ -959,9 +952,7 @@ impl FlowEventStore for SqliteFlowEventStore { Box::pin(async_stream::stream! { let mut tr = self.transaction.lock().await; - let connection_mut = tr - .connection_mut() - .await?; + let connection_mut = tr.connection_mut().await?; let query_str = format!( r#" @@ -974,7 +965,7 @@ impl FlowEventStore for SqliteFlowEventStore { "#, maybe_initiators .as_ref() - .map(|initiators| Self::generate_placeholders_list(initiators, 5)) + .map(|initiators| sqlite_generate_placeholders_list(initiators.len(), 5)) .unwrap_or_default() ); @@ -990,10 +981,9 @@ impl FlowEventStore for SqliteFlowEventStore { } } - let mut query_stream = query.try_map(|event_row: SqliteRow| { - Ok(FlowID::new(event_row.get(0))) - }) - .fetch(connection_mut); + let mut query_stream = query + .try_map(|event_row: SqliteRow| Ok(FlowID::new(event_row.get(0)))) + .fetch(connection_mut); while let Some(flow_id) = query_stream.try_next().await.int_err()? { yield Ok(flow_id); @@ -1022,7 +1012,7 @@ impl FlowEventStore for SqliteFlowEventStore { "#, maybe_initiators .as_ref() - .map(|initiators| Self::generate_placeholders_list(initiators, 3)) + .map(|initiators| sqlite_generate_placeholders_list(initiators.len(), 3)) .unwrap_or_default() ); diff --git a/src/infra/ingest-datafusion/Cargo.toml b/src/infra/ingest-datafusion/Cargo.toml index d37226c45..a3384da9b 100644 --- a/src/infra/ingest-datafusion/Cargo.toml +++ b/src/infra/ingest-datafusion/Cargo.toml @@ -28,7 +28,6 @@ kamu-core = { workspace = true } kamu-data-utils = { workspace = true } datafusion = { version = "42", default-features = false } -digest = "0.10" geo-types = { version = "0.7", default-features = false, features = [] } geojson = { version = "0.24", default-features = false, features = [ "geo-types", @@ -36,7 +35,6 @@ geojson = { version = "0.24", default-features = false, features = [ glob = "0.3" serde = { version = "1" } serde_json = "1" -sha3 = "0.10" shapefile = { version = "0.6", features = ["geo-types"] } walkdir = "2" zip = { version = "2", default-features = false, features = [ @@ -55,7 +53,6 @@ tokio = { version = "1", default-features = false, features = [ "process", ] } tracing = "0.1" -url = { version = "2", features = ["serde"] } [dev-dependencies] @@ -69,7 +66,6 @@ test-group = { version = "1" } test-log = { version = "0.2", features = ["trace"] } tempfile = "3" tokio = { version = "1", default-features = false, features = ["rt", "macros"] } -tracing-subscriber = { version = "0.3", features = ["env-filter"] } [[bench]] diff --git a/src/infra/messaging-outbox/inmem/Cargo.toml b/src/infra/messaging-outbox/inmem/Cargo.toml index a9c227f78..e4377c067 100644 --- a/src/infra/messaging-outbox/inmem/Cargo.toml +++ b/src/infra/messaging-outbox/inmem/Cargo.toml @@ -26,12 +26,9 @@ messaging-outbox = { workspace = true } internal-error = { workspace = true } async-trait = { version = "0.1", default-features = false } -chrono = { version = "0.4", default-features = false } dill = "0.9" tokio = { version = "1", default-features = false } tokio-stream = "0.1" -thiserror = { version = "1", default-features = false } -tracing = { version = "0.1", default-features = false } [dev-dependencies] @@ -40,4 +37,3 @@ kamu-messaging-outbox-repo-tests = { workspace = true } test-group = { version = "1" } test-log = { version = "0.2", features = ["trace"] } -tokio = { version = "1", default-features = false, features = ["rt", "macros"] } diff --git a/src/infra/messaging-outbox/postgres/Cargo.toml b/src/infra/messaging-outbox/postgres/Cargo.toml index cb1aa6ff3..39857a771 100644 --- a/src/infra/messaging-outbox/postgres/Cargo.toml +++ b/src/infra/messaging-outbox/postgres/Cargo.toml @@ -28,7 +28,6 @@ internal-error = { workspace = true } async-stream = "0.3" async-trait = { version = "0.1", default-features = false } -chrono = { version = "0.4", default-features = false } dill = "0.9" futures = "0.3" sqlx = { version = "0.8", default-features = false, features = [ @@ -38,9 +37,6 @@ sqlx = { version = "0.8", default-features = false, features = [ "chrono", "json" ] } -thiserror = { version = "1", default-features = false } -tracing = { version = "0.1", default-features = false } -uuid = "1" [dev-dependencies] @@ -49,4 +45,3 @@ kamu-messaging-outbox-repo-tests = { workspace = true } test-group = { version = "1" } test-log = { version = "0.2", features = ["trace"] } -tokio = { version = "1", default-features = false, features = ["rt", "macros"] } diff --git a/src/infra/messaging-outbox/repo-tests/Cargo.toml b/src/infra/messaging-outbox/repo-tests/Cargo.toml index 986fcf97a..ff7916ec9 100644 --- a/src/infra/messaging-outbox/repo-tests/Cargo.toml +++ b/src/infra/messaging-outbox/repo-tests/Cargo.toml @@ -22,7 +22,6 @@ doctest = false [dependencies] -database-common = { workspace = true } messaging-outbox = { workspace = true } chrono = { version = "0.4", default-features = false } diff --git a/src/infra/messaging-outbox/sqlite/Cargo.toml b/src/infra/messaging-outbox/sqlite/Cargo.toml index 6371749e1..7e422314a 100644 --- a/src/infra/messaging-outbox/sqlite/Cargo.toml +++ b/src/infra/messaging-outbox/sqlite/Cargo.toml @@ -28,7 +28,6 @@ internal-error = { workspace = true } async-stream = "0.3" async-trait = { version = "0.1", default-features = false } -chrono = { version = "0.4", default-features = false } dill = "0.9" futures = "0.3" indoc = "2" @@ -39,8 +38,6 @@ sqlx = { version = "0.8", default-features = false, features = [ "chrono", "json" ] } -thiserror = { version = "1", default-features = false } -tracing = { version = "0.1", default-features = false } [dev-dependencies] @@ -49,4 +46,3 @@ kamu-messaging-outbox-repo-tests = { workspace = true } test-group = { version = "1" } test-log = { version = "0.2", features = ["trace"] } -tokio = { version = "1", default-features = false, features = ["rt", "macros"] } diff --git a/src/infra/task-system/inmem/Cargo.toml b/src/infra/task-system/inmem/Cargo.toml index eb040f170..72e6c017f 100644 --- a/src/infra/task-system/inmem/Cargo.toml +++ b/src/infra/task-system/inmem/Cargo.toml @@ -27,7 +27,6 @@ opendatafabric = { workspace = true } kamu-task-system = { workspace = true } async-trait = { version = "0.1", default-features = false } -chrono = { version = "0.4", default-features = false } dill = "0.9" futures = "0.3" @@ -35,7 +34,6 @@ futures = "0.3" [dev-dependencies] database-common = { workspace = true } database-common-macros = { workspace = true } -internal-error = { workspace = true } kamu-task-system-repo-tests = { workspace = true } test-group = { version = "1" } diff --git a/src/infra/task-system/postgres/Cargo.toml b/src/infra/task-system/postgres/Cargo.toml index 4eaca1f73..8820253ba 100644 --- a/src/infra/task-system/postgres/Cargo.toml +++ b/src/infra/task-system/postgres/Cargo.toml @@ -28,7 +28,6 @@ kamu-task-system = { workspace = true } async-stream = "0.3" async-trait = { version = "0.1", default-features = false } -chrono = { version = "0.4", default-features = false } dill = "0.9" futures = "0.3" serde_json = "1" @@ -39,7 +38,7 @@ sqlx = { version = "0.8", default-features = false, features = [ "postgres", "chrono" ] } -tokio-stream = { version = "0.1", default-features = false } + [dev-dependencies] database-common-macros = { workspace = true } @@ -47,5 +46,4 @@ internal-error = { workspace = true } kamu-task-system-repo-tests = { workspace = true } test-log = { version = "0.2", features = ["trace"] } -tokio = { version = "1", default-features = false, features = ["rt", "macros"] } test-group = { version = "1" } diff --git a/src/infra/task-system/repo-tests/Cargo.toml b/src/infra/task-system/repo-tests/Cargo.toml index 205d49019..cd2a5cce1 100644 --- a/src/infra/task-system/repo-tests/Cargo.toml +++ b/src/infra/task-system/repo-tests/Cargo.toml @@ -29,3 +29,6 @@ opendatafabric = { workspace = true } chrono = { version = "0.4", default-features = false } dill = "0.9" futures = "0.3" + + +[dev-dependencies] diff --git a/src/infra/task-system/sqlite/Cargo.toml b/src/infra/task-system/sqlite/Cargo.toml index ccc565ca1..7abb3fe3e 100644 --- a/src/infra/task-system/sqlite/Cargo.toml +++ b/src/infra/task-system/sqlite/Cargo.toml @@ -39,7 +39,7 @@ sqlx = { version = "0.8", default-features = false, features = [ "sqlite", "chrono" ] } -tokio-stream = { version = "0.1", default-features = false } + [dev-dependencies] database-common-macros = { workspace = true } @@ -47,5 +47,4 @@ internal-error = { workspace = true } kamu-task-system-repo-tests = { workspace = true } test-log = { version = "0.2", features = ["trace"] } -tokio = { version = "1", default-features = false, features = ["rt", "macros"] } test-group = { version = "1" } diff --git a/src/utils/container-runtime/Cargo.toml b/src/utils/container-runtime/Cargo.toml index 7a0a8c101..77932634e 100644 --- a/src/utils/container-runtime/Cargo.toml +++ b/src/utils/container-runtime/Cargo.toml @@ -44,4 +44,3 @@ test-group = { version = "1" } test-log = { version = "0.2", features = ["trace"] } tempfile = "3" tokio = { version = "1", default-features = false, features = ["rt", "macros"] } -tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/src/utils/data-utils/Cargo.toml b/src/utils/data-utils/Cargo.toml index b68c78ec4..1842629da 100644 --- a/src/utils/data-utils/Cargo.toml +++ b/src/utils/data-utils/Cargo.toml @@ -55,4 +55,3 @@ indoc = "2" pretty_assertions = { version = "1" } test-log = { version = "0.2", features = ["trace"] } tokio = { version = "1", default-features = false, features = ["rt", "macros"] } -tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/src/utils/database-common-macros/Cargo.toml b/src/utils/database-common-macros/Cargo.toml index 108599223..69feea44a 100644 --- a/src/utils/database-common-macros/Cargo.toml +++ b/src/utils/database-common-macros/Cargo.toml @@ -29,3 +29,6 @@ syn = { version = "2", default-features = false, features = [ "printing", "proc-macro", ] } + + +[dev-dependencies] diff --git a/src/utils/database-common/Cargo.toml b/src/utils/database-common/Cargo.toml index cccf41a4a..e2d7eecea 100644 --- a/src/utils/database-common/Cargo.toml +++ b/src/utils/database-common/Cargo.toml @@ -22,23 +22,24 @@ doctest = false [dependencies] +async-stream = "0.3" +async-trait = "0.1" aws-config = "1" -aws-sdk-secretsmanager = "1" aws-credential-types = "1" -async-trait = "0.1" +aws-sdk-secretsmanager = "1" chrono = { version = "0.4", default-features = false } dill = "0.9" +futures = { version = "0.3", default-features = false } hex = "0.4" hmac = "0.12" internal-error = { workspace = true } secrecy = "0.10" +serde = "1" serde_json = "1" +sha2 = "0.10" thiserror = { version = "1", default-features = false } tokio = { version = "1", default-features = false, features = ["sync"] } tracing = "0.1" -serde = "1" -sha2 = "0.10" -uuid = "1" [dependencies.sqlx] version = "0.8" @@ -53,3 +54,9 @@ features = [ "chrono", "migrate", ] + + +[dev-dependencies] +mockall = { version = "0.13", default-features = false } +pretty_assertions = { version = "1" } +tokio = { version = "1", default-features = true, features = ["macros"] } diff --git a/src/utils/database-common/src/entities.rs b/src/utils/database-common/src/entities.rs index 1c4d25570..4f4a361f9 100644 --- a/src/utils/database-common/src/entities.rs +++ b/src/utils/database-common/src/entities.rs @@ -7,6 +7,12 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use std::future::Future; +use std::pin::Pin; + +use futures::Stream; +use internal_error::InternalError; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[derive(Debug, Copy, Clone)] @@ -15,4 +21,97 @@ pub struct PaginationOpts { pub offset: usize, } +impl PaginationOpts { + pub fn safe_limit(&self, total: usize) -> usize { + let rest = total.saturating_sub(self.offset); + + self.limit.min(rest) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct EntityPageListing { + pub list: Vec, + pub total_count: usize, +} + +pub type EntityPageStream<'a, Entity> = + Pin> + Send + 'a>>; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct EntityPageStreamer { + start_offset: usize, + page_limit: usize, +} + +impl Default for EntityPageStreamer { + fn default() -> Self { + Self { + start_offset: 0, + page_limit: 100, + } + } +} + +impl EntityPageStreamer { + pub fn new(start_offset: usize, page_limit: usize) -> Self { + Self { + start_offset, + page_limit, + } + } + + pub fn into_stream<'a, Entity, Args, HInitArgs, HInitArgsFut, HListing, HListingFut>( + self, + get_args_callback: HInitArgs, + next_entities_callback: HListing, + ) -> EntityPageStream<'a, Entity> + where + Entity: Send + 'a, + + Args: Clone + Send + 'a, + + HInitArgs: FnOnce() -> HInitArgsFut + Send + 'a, + HInitArgsFut: Future> + Send + 'a, + + HListing: Fn(Args, PaginationOpts) -> HListingFut + Send + 'a, + HListingFut: Future, InternalError>> + Send + 'a, + { + let init_offset = self.start_offset; + let init_limit = self.page_limit; + + Box::pin(async_stream::try_stream! { + // Init arguments + let args = get_args_callback().await?; + + // Tracking pagination progress + let mut offset = init_offset; + let limit = init_limit; + + loop { + // Load a page of dataset entities + let entities_page = + next_entities_callback(args.clone(), PaginationOpts { limit, offset }) + .await?; + + // Actually read entities + let loaded_entries_count = entities_page.list.len(); + + // Stream the entities + for entity in entities_page.list { + yield entity; + } + + // Next page + offset += loaded_entries_count; + if offset >= entities_page.total_count { + break; + } + } + }) + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/utils/database-common/src/helpers.rs b/src/utils/database-common/src/helpers.rs new file mode 100644 index 000000000..160971f4b --- /dev/null +++ b/src/utils/database-common/src/helpers.rs @@ -0,0 +1,19 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub fn sqlite_generate_placeholders_list(arguments_count: usize, index_offset: usize) -> String { + (0..arguments_count) + .map(|i| format!("${}", i + index_offset)) + .intersperse(",".to_string()) + .collect() +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/utils/database-common/src/lib.rs b/src/utils/database-common/src/lib.rs index 7ea140fd7..d31627c52 100644 --- a/src/utils/database-common/src/lib.rs +++ b/src/utils/database-common/src/lib.rs @@ -7,12 +7,18 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +#![feature(lint_reasons)] +#![expect(incomplete_features)] +#![feature(inherent_associated_types)] +#![feature(iter_intersperse)] + mod db_connection_settings; mod db_credentials; mod db_error; mod db_provider; mod entities; +mod helpers; mod password; mod plugins; mod transactions; @@ -22,6 +28,7 @@ pub use db_credentials::*; pub use db_error::*; pub use db_provider::*; pub use entities::*; +pub use helpers::*; pub use password::*; pub use plugins::*; pub use transactions::*; diff --git a/src/utils/database-common/tests/mod.rs b/src/utils/database-common/tests/mod.rs new file mode 100644 index 000000000..8c3d1733a --- /dev/null +++ b/src/utils/database-common/tests/mod.rs @@ -0,0 +1,11 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +#![feature(assert_matches)] +mod tests; diff --git a/src/utils/database-common/tests/tests/mod.rs b/src/utils/database-common/tests/tests/mod.rs new file mode 100644 index 000000000..e6dbbae29 --- /dev/null +++ b/src/utils/database-common/tests/tests/mod.rs @@ -0,0 +1,10 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +mod test_entries_streamer; diff --git a/src/utils/database-common/tests/tests/test_entries_streamer.rs b/src/utils/database-common/tests/tests/test_entries_streamer.rs new file mode 100644 index 000000000..b80e6294f --- /dev/null +++ b/src/utils/database-common/tests/tests/test_entries_streamer.rs @@ -0,0 +1,414 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::sync::Arc; + +use database_common::{EntityPageListing, EntityPageStreamer, PaginationOpts}; +use futures::TryStreamExt; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +struct TestPaginationOpts { + total_entity_count: usize, + expected_entities_call_count: usize, + start_offset: usize, + page_limit: usize, + expected_entities: Vec, +} + +macro_rules! test_pagination { + ($test_pagination_opts: expr) => { + let TestPaginationOpts { + total_entity_count, + expected_entities_call_count, + start_offset, + page_limit, + expected_entities, + } = $test_pagination_opts; + + let entity_source = entity_source(total_entity_count, expected_entities_call_count); + let streamer = EntityPageStreamer::new(start_offset, page_limit); + + let stream = streamer.into_stream( + || async { + let arguments = entity_source.init_arguments().await; + Ok(arguments) + }, + |_, pagination| { + let entity_source = entity_source.clone(); + async move { + let listing = entity_source.entities(pagination).await; + Ok(listing) + } + }, + ); + + let actual_entries = stream.try_collect::>().await.unwrap(); + + pretty_assertions::assert_eq!(expected_entities, actual_entries); + }; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_pagination_less_than_a_page() { + test_pagination!(TestPaginationOpts { + total_entity_count: 3, + start_offset: 0, + page_limit: 5, + expected_entities_call_count: 1, + expected_entities: vec![ + TestEntity { id: 0 }, + TestEntity { id: 1 }, + TestEntity { id: 2 }, + ], + }); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_pagination_fits_on_one_page() { + test_pagination!(TestPaginationOpts { + total_entity_count: 5, + start_offset: 0, + page_limit: 5, + expected_entities_call_count: 1, + expected_entities: vec![ + TestEntity { id: 0 }, + TestEntity { id: 1 }, + TestEntity { id: 2 }, + TestEntity { id: 3 }, + TestEntity { id: 4 }, + ], + }); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_pagination_more_than_a_page() { + test_pagination!(TestPaginationOpts { + total_entity_count: 7, + start_offset: 0, + page_limit: 5, + expected_entities_call_count: 2, + expected_entities: vec![ + TestEntity { id: 0 }, + TestEntity { id: 1 }, + TestEntity { id: 2 }, + TestEntity { id: 3 }, + TestEntity { id: 4 }, + TestEntity { id: 5 }, + TestEntity { id: 6 }, + ], + }); +} + +#[tokio::test] +async fn test_pagination_fits_on_few_pages() { + test_pagination!(TestPaginationOpts { + total_entity_count: 10, + start_offset: 0, + page_limit: 5, + expected_entities_call_count: 2, + expected_entities: vec![ + TestEntity { id: 0 }, + TestEntity { id: 1 }, + TestEntity { id: 2 }, + TestEntity { id: 3 }, + TestEntity { id: 4 }, + TestEntity { id: 5 }, + TestEntity { id: 6 }, + TestEntity { id: 7 }, + TestEntity { id: 8 }, + TestEntity { id: 9 }, + ], + }); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_pagination_start_offset_in_the_page_middle() { + test_pagination!(TestPaginationOpts { + total_entity_count: 10, + start_offset: 5, + page_limit: 10, + expected_entities_call_count: 1, + expected_entities: vec![ + TestEntity { id: 5 }, + TestEntity { id: 6 }, + TestEntity { id: 7 }, + TestEntity { id: 8 }, + TestEntity { id: 9 }, + ], + }); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_pagination_start_offset_is_greater_than_the_total_entity_count() { + test_pagination!(TestPaginationOpts { + total_entity_count: 10, + start_offset: 11, + page_limit: 10, + expected_entities_call_count: 1, + expected_entities: vec![], + }); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_paged_page_processing_of_input_data_by_ref() { + fn assert_page(page: &[&TestEntity], pagination: &PaginationOpts) { + match pagination.offset { + 0 => { + pretty_assertions::assert_eq!( + vec![ + &TestEntity { id: 0 }, + &TestEntity { id: 1 }, + &TestEntity { id: 2 }, + ], + page + ); + } + 3 => { + pretty_assertions::assert_eq!( + vec![ + &TestEntity { id: 3 }, + &TestEntity { id: 4 }, + &TestEntity { id: 5 }, + ], + page + ); + } + 6 => { + pretty_assertions::assert_eq!( + vec![ + &TestEntity { id: 6 }, + &TestEntity { id: 7 }, + &TestEntity { id: 8 }, + ], + page + ); + } + 9 => { + pretty_assertions::assert_eq!(vec![&TestEntity { id: 9 },], page); + } + _ => { + unreachable!() + } + } + } + + let input_data = vec![ + TestEntity { id: 0 }, + TestEntity { id: 1 }, + TestEntity { id: 2 }, + TestEntity { id: 3 }, + TestEntity { id: 4 }, + TestEntity { id: 5 }, + TestEntity { id: 6 }, + TestEntity { id: 7 }, + TestEntity { id: 8 }, + TestEntity { id: 9 }, + ]; + + struct CollectionArgs<'a> { + pub input_data: &'a Vec, + } + + let streamer = EntityPageStreamer::new(0, 3); + + let stream = streamer.into_stream( + || async { + Ok(Arc::new(CollectionArgs { + input_data: &input_data, + })) + }, + |input, pagination| { + let input_len = input.input_data.len(); + + let input_page = input + .input_data + .iter() + .skip(pagination.offset) + .take(pagination.safe_limit(input_len)) + .collect::>(); + + assert_page(&input_page, &pagination); + + async move { + Ok(EntityPageListing { + list: input_page, + total_count: input_len, + }) + } + }, + ); + + stream.try_collect::>().await.unwrap(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_paged_page_processing_of_input_data_by_value() { + #[derive(Debug, Clone, PartialEq)] + struct ClonableTestEntity { + id: usize, + } + + fn assert_page(page: &[ClonableTestEntity], pagination: &PaginationOpts) { + match pagination.offset { + 0 => { + pretty_assertions::assert_eq!( + vec![ + ClonableTestEntity { id: 0 }, + ClonableTestEntity { id: 1 }, + ClonableTestEntity { id: 2 }, + ], + page + ); + } + 3 => { + pretty_assertions::assert_eq!( + vec![ + ClonableTestEntity { id: 3 }, + ClonableTestEntity { id: 4 }, + ClonableTestEntity { id: 5 }, + ], + page + ); + } + 6 => { + pretty_assertions::assert_eq!( + vec![ + ClonableTestEntity { id: 6 }, + ClonableTestEntity { id: 7 }, + ClonableTestEntity { id: 8 }, + ], + page + ); + } + 9 => { + pretty_assertions::assert_eq!(vec![ClonableTestEntity { id: 9 },], page); + } + _ => { + unreachable!() + } + } + } + + let input_data = vec![ + ClonableTestEntity { id: 0 }, + ClonableTestEntity { id: 1 }, + ClonableTestEntity { id: 2 }, + ClonableTestEntity { id: 3 }, + ClonableTestEntity { id: 4 }, + ClonableTestEntity { id: 5 }, + ClonableTestEntity { id: 6 }, + ClonableTestEntity { id: 7 }, + ClonableTestEntity { id: 8 }, + ClonableTestEntity { id: 9 }, + ]; + + let streamer = EntityPageStreamer::new(0, 3); + + let stream = streamer.into_stream( + || async { Ok(Arc::new(input_data)) }, + |input, pagination| { + let input_page = input + .iter() + .skip(pagination.offset) + .take(pagination.safe_limit(input.len())) + .cloned() + .collect::>(); + + assert_page(&input_page, &pagination); + + async move { + Ok(EntityPageListing { + list: input_page, + total_count: input.len(), + }) + } + }, + ); + + stream.try_collect::>().await.unwrap(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Helpers +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +fn entity_source( + total_entities_count: usize, + expected_entities_call_count: usize, +) -> Arc { + let mut entity_source = MockEntitySource::new(); + + entity_source + .expect_init_arguments() + .times(1) + .returning(|| NoArgs); + + entity_source + .expect_entities() + .times(expected_entities_call_count) + .returning(move |pagination| { + let result = (0..) + .skip(pagination.offset) + .take(pagination.safe_limit(total_entities_count)) + .map(|id| TestEntity { id }) + .collect::>(); + + EntityPageListing { + list: result, + total_count: total_entities_count, + } + }); + + Arc::new(entity_source) +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Clone)] +struct NoArgs; + +#[derive(Debug, PartialEq)] +struct TestEntity { + id: usize, +} + +#[async_trait::async_trait] +trait EntitySource { + async fn init_arguments(&self) -> NoArgs; + + async fn entities(&self, pagination: PaginationOpts) -> EntityPageListing; +} + +mockall::mock! { + pub EntitySource {} + + #[async_trait::async_trait] + impl EntitySource for EntitySource { + async fn init_arguments(&self) -> NoArgs; + + async fn entities(&self, pagination: PaginationOpts) -> EntityPageListing; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/utils/datafusion-cli/Cargo.toml b/src/utils/datafusion-cli/Cargo.toml index 277f0b512..b02096c28 100644 --- a/src/utils/datafusion-cli/Cargo.toml +++ b/src/utils/datafusion-cli/Cargo.toml @@ -29,13 +29,11 @@ include = { workspace = true } edition = { workspace = true } publish = { workspace = true } + [dependencies] arrow = { version = "53" } async-trait = "0.1" aws-config = "1" -aws-sdk-sso = "1" -aws-sdk-ssooidc = "1" -aws-sdk-sts = "1" aws-credential-types = "1" clap = { version = "4", features = ["derive"] } datafusion = { version = "42", features = [ @@ -52,7 +50,6 @@ futures = "0.3" object_store = { version = "0.11", features = ["aws", "gcp", "http"] } parking_lot = { version = "0.12" } parquet = { version = "53", default-features = false } -regex = "1" rustyline = "14.0" tokio = { version = "1", features = [ "macros", @@ -63,3 +60,6 @@ tokio = { version = "1", features = [ "signal", ] } url = "2" + + +[dev-dependencies] diff --git a/src/utils/event-sourcing-macros/Cargo.toml b/src/utils/event-sourcing-macros/Cargo.toml index 1d2cf1a99..6cbf9e67e 100644 --- a/src/utils/event-sourcing-macros/Cargo.toml +++ b/src/utils/event-sourcing-macros/Cargo.toml @@ -29,3 +29,6 @@ syn = { version = "2", default-features = false, features = [ "printing", "proc-macro", ] } + + +[dev-dependencies] diff --git a/src/utils/event-sourcing/Cargo.toml b/src/utils/event-sourcing/Cargo.toml index ef959151b..d53eb1b5e 100644 --- a/src/utils/event-sourcing/Cargo.toml +++ b/src/utils/event-sourcing/Cargo.toml @@ -27,7 +27,6 @@ internal-error = { workspace = true } async-stream = "0.3" async-trait = { version = "0.1", default-features = false } -serde = { version = "1", features = ["derive"] } thiserror = { version = "1", default-features = false } tokio-stream = { version = "0.1", default-features = false } tracing = { version = "0.1", default-features = false, features = ["attributes"] } diff --git a/src/utils/init-on-startup/Cargo.toml b/src/utils/init-on-startup/Cargo.toml index aa7b106bb..5cbaf0224 100644 --- a/src/utils/init-on-startup/Cargo.toml +++ b/src/utils/init-on-startup/Cargo.toml @@ -27,7 +27,7 @@ internal-error = { workspace = true } async-trait = "0.1" dill = "0.9" -petgraph = { version = "0.6", default-features = false, features = ["stable_graph"]} +petgraph = { version = "0.6", default-features = false, features = ["stable_graph"] } thiserror = "1" tracing = "0.1" diff --git a/src/utils/init-on-startup/src/init_on_startup.rs b/src/utils/init-on-startup/src/init_on_startup.rs index 808b69200..96ef71505 100644 --- a/src/utils/init-on-startup/src/init_on_startup.rs +++ b/src/utils/init-on-startup/src/init_on_startup.rs @@ -82,8 +82,9 @@ pub async fn run_startup_jobs(catalog: &Catalog) -> Result<(), StartupJobsError> for startup_job_builder in startup_job_builders { let metadata = get_startup_job_metadata(&startup_job_builder); let job_name = metadata.job_name; + if job_builders_by_name - .insert(metadata.job_name, (startup_job_builder, metadata)) + .insert(job_name, (startup_job_builder, metadata)) .is_some() { return Err(StartupJobsError::JobNameNonUnique( diff --git a/src/utils/kamu-cli-puppet/src/kamu_cli_puppet.rs b/src/utils/kamu-cli-puppet/src/kamu_cli_puppet.rs index 7b9846279..84730968f 100644 --- a/src/utils/kamu-cli-puppet/src/kamu_cli_puppet.rs +++ b/src/utils/kamu-cli-puppet/src/kamu_cli_puppet.rs @@ -11,6 +11,7 @@ use std::path::{Path, PathBuf}; use std::{ffi, fs}; use chrono::{DateTime, Utc}; +use opendatafabric as odf; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -21,6 +22,7 @@ pub type ExecuteCommandResult = assert_cmd::assert::Assert; pub struct KamuCliPuppet { workspace_path: PathBuf, system_time: Option>, + account: Option, temp_dir: Option, } @@ -31,23 +33,46 @@ impl KamuCliPuppet { Self { workspace_path, system_time: None, + account: None, temp_dir: None, } } pub async fn new_workspace_tmp(is_multi_tenant: bool) -> Self { - Self::new_workspace_tmp_inner(NewWorkspaceOptions { - is_multi_tenant, + if is_multi_tenant { + Self::new_workspace_tmp_multi_tenant().await + } else { + Self::new_workspace_tmp_single_tenant().await + } + } + + pub async fn new_workspace_tmp_single_tenant() -> Self { + Self::new_workspace_tmp_with(NewWorkspaceOptions::default()).await + } + + pub async fn new_workspace_tmp_multi_tenant() -> Self { + Self::new_workspace_tmp_with(NewWorkspaceOptions { + is_multi_tenant: true, + kamu_config: Some( + indoc::indoc!( + r#" + kind: CLIConfig + version: 1 + content: + users: + predefined: + - accountName: e2e-user + "# + ) + .into(), + ), + account: Some(odf::AccountName::new_unchecked("e2e-user")), ..Default::default() }) .await } pub async fn new_workspace_tmp_with(options: NewWorkspaceOptions) -> Self { - Self::new_workspace_tmp_inner(options).await - } - - async fn new_workspace_tmp_inner(options: NewWorkspaceOptions) -> Self { let temp_dir = tempfile::tempdir().unwrap(); if let Some(config) = options.kamu_config { @@ -79,6 +104,10 @@ impl KamuCliPuppet { self.system_time = t; } + pub fn set_account(&mut self, account: Option) { + self.account = account; + } + pub fn workspace_path(&self) -> &Path { &self.workspace_path } @@ -158,6 +187,10 @@ impl KamuCliPuppet { command.args(["--system-time", system_time.as_str()]); } + if let Some(account) = &self.account { + command.args(["--account", account.as_str()]); + } + command.args(cmd); if let Some(input) = maybe_input { @@ -177,6 +210,7 @@ pub struct NewWorkspaceOptions { pub is_multi_tenant: bool, pub kamu_config: Option, pub env_vars: Vec<(String, String)>, + pub account: Option, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// From 99251e7c32968a9dfb39ed058ed4a633954cf08a Mon Sep 17 00:00:00 2001 From: Dima Pristupa Date: Wed, 18 Dec 2024 21:15:39 +0200 Subject: [PATCH 02/10] GQL: Dataset.visibility(): return back, after being deleted by mistake (#997) --- CHANGELOG.md | 2 +- resources/schema.gql | 14 +++++ .../src/mutations/dataset_mut/dataset_mut.rs | 19 +------ .../graphql/src/mutations/datasets_mut.rs | 2 + .../graphql/src/queries/datasets/dataset.rs | 54 ++++++++++++++++++- 5 files changed, 71 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a95d73500..235971f83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,10 +15,10 @@ Recommendation: for ease of reading, use the following order: ### Added - Console warning when deleting datasets which are out of sync with their push remotes ### Changed -- Speed up project build time by removing unused dependencies which were not detected by automated tools - Private Datasets: - OSO: using user actors / dateset resources that come from the database - Thus, any access check relies on real entities + - GQL, added `Dataset.visibility()` to get the current visibility value - GQL, added `DatasetMut.setVisibility()` to be able to change the dataset visibility after it has been created - Deletion of previously created (and unused) ReBAC-properties and reindexing - OSO: updating the schema to use identifiers instead of names diff --git a/resources/schema.gql b/resources/schema.gql index c987dd9bd..8b25c2fba 100644 --- a/resources/schema.gql +++ b/resources/schema.gql @@ -451,6 +451,10 @@ type Dataset { """ kind: DatasetKind! """ + Returns the visibility of dataset + """ + visibility: DatasetVisibilityOutput! + """ Access to the data of the dataset """ data: DatasetData! @@ -773,6 +777,8 @@ input DatasetVisibilityInput @oneOf { public: PublicDatasetVisibilityInput } +union DatasetVisibilityOutput = PrivateDatasetVisibility | PublicDatasetVisibility + type Datasets { """ Returns dataset by its ID @@ -1524,6 +1530,10 @@ type PrepStepPipe { command: [String!]! } +type PrivateDatasetVisibility { + dummy: String +} + input PrivateDatasetVisibilityInput { dummy: String } @@ -1533,6 +1543,10 @@ input PropagationMode @oneOf { toSeed: FlowConfigurationResetToSeedDummy } +type PublicDatasetVisibility { + anonymousAvailable: Boolean! +} + input PublicDatasetVisibilityInput { anonymousAvailable: Boolean! } diff --git a/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs index 8ec9b5f87..ada739a65 100644 --- a/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs +++ b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs @@ -19,6 +19,7 @@ use crate::mutations::{ DatasetMetadataMut, }; use crate::prelude::*; +use crate::queries::*; use crate::utils::{ensure_dataset_env_vars_enabled, from_catalog_n}; use crate::LoggedInGuard; @@ -287,24 +288,6 @@ pub enum DatasetVisibilityInput { Public(PublicDatasetVisibility), } -#[derive(Union, Debug, Clone, PartialEq, Eq)] -pub enum DatasetVisibility { - Private(PrivateDatasetVisibility), - Public(PublicDatasetVisibility), -} - -#[derive(SimpleObject, InputObject, Debug, Clone, PartialEq, Eq)] -#[graphql(input_name = "PrivateDatasetVisibilityInput")] -pub struct PrivateDatasetVisibility { - _dummy: Option, -} - -#[derive(SimpleObject, InputObject, Debug, Clone, PartialEq, Eq)] -#[graphql(input_name = "PublicDatasetVisibilityInput")] -pub struct PublicDatasetVisibility { - anonymous_available: bool, -} - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[derive(Interface, Debug)] diff --git a/src/adapter/graphql/src/mutations/datasets_mut.rs b/src/adapter/graphql/src/mutations/datasets_mut.rs index fc6a752b4..31284858e 100644 --- a/src/adapter/graphql/src/mutations/datasets_mut.rs +++ b/src/adapter/graphql/src/mutations/datasets_mut.rs @@ -242,3 +242,5 @@ impl CreateDatasetResultMissingInputs { ) } } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/graphql/src/queries/datasets/dataset.rs b/src/adapter/graphql/src/queries/datasets/dataset.rs index 28ff9448c..ecdfe219b 100644 --- a/src/adapter/graphql/src/queries/datasets/dataset.rs +++ b/src/adapter/graphql/src/queries/datasets/dataset.rs @@ -13,7 +13,7 @@ use opendatafabric as odf; use crate::prelude::*; use crate::queries::*; -use crate::utils::ensure_dataset_env_vars_enabled; +use crate::utils::{ensure_dataset_env_vars_enabled, from_catalog_n}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -87,6 +87,26 @@ impl Dataset { Ok(summary.kind.into()) } + // TODO: Private Datasets: tests + /// Returns the visibility of dataset + async fn visibility(&self, ctx: &Context<'_>) -> Result { + let rebac_svc = from_catalog_n!(ctx, dyn kamu_auth_rebac::RebacService); + + let resolved_dataset = self.get_dataset(ctx); + let properties = rebac_svc + .get_dataset_properties(resolved_dataset.get_id()) + .await + .int_err()?; + + let visibility = if properties.allows_public_read { + DatasetVisibilityOutput::public(properties.allows_anonymous_read) + } else { + DatasetVisibilityOutput::private() + }; + + Ok(visibility) + } + /// Access to the data of the dataset async fn data(&self) -> DatasetData { DatasetData::new(self.dataset_handle.clone()) @@ -168,6 +188,38 @@ impl Dataset { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[derive(Union, Debug, Clone, PartialEq, Eq)] +pub enum DatasetVisibilityOutput { + Private(PrivateDatasetVisibility), + Public(PublicDatasetVisibility), +} + +impl DatasetVisibilityOutput { + pub fn private() -> Self { + Self::Private(PrivateDatasetVisibility { _dummy: None }) + } + + pub fn public(anonymous_available: bool) -> Self { + Self::Public(PublicDatasetVisibility { + anonymous_available, + }) + } +} + +#[derive(SimpleObject, InputObject, Debug, Clone, PartialEq, Eq)] +#[graphql(input_name = "PrivateDatasetVisibilityInput")] +pub struct PrivateDatasetVisibility { + _dummy: Option, +} + +#[derive(SimpleObject, InputObject, Debug, Clone, PartialEq, Eq)] +#[graphql(input_name = "PublicDatasetVisibilityInput")] +pub struct PublicDatasetVisibility { + pub anonymous_available: bool, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[derive(SimpleObject, Debug, Clone, PartialEq, Eq)] pub struct DatasetPermissions { can_view: bool, From 8eadc3ca8d0946a96603f3af572daf6298dcee66 Mon Sep 17 00:00:00 2001 From: Dima Pristupa Date: Thu, 19 Dec 2024 13:29:41 +0200 Subject: [PATCH 03/10] Merge actual changes (#998) * Do not show usage error for --all flag (#960) * Do not show usage error for --all flag When --all flag is set for the `repo delete` command, and there are no repositories to delete, do not shoow usage error. * Improve args validation * Improve args validation, e2e tests * Typo corrected in feature flags (#974) * Images, kamu-base-git: fix collision of executable files (#975) * 868 api server provide feature flags for UI (#976) Separated runtime and UI configuration flags. UI config is provided by API server too. * Release v0.210.0 + minor deps * 854 persistent storage of dataset dependencies graph (#973) Dependency graph service moved to 'datasets' domain. Defined dataset dependency repository interface and created 3 implementations. No more postponed initialization, organized initial setup in the form of an indexer. Added telemetry extensions on the way. Tests for repositories, stabilized other tests. Cascading effect on delete within the dataset entry domain. * v0.211.0 + minor deps * Fixed image building (#977) Replaced cascade delete of dataset entries in graph with more explicit events to allow orphan upstream dependencies where only ID is given * Upgrade to datafusion 43 * Use thiserror v2 throughout * trust-dns-resolver => hickory-resolver + minor deps * Fix non-sequential offsets on ingest * 0.212.0 * Use KAMU_CONTAINER_RUNTIME_TYPE env var in Makefile (#991) * Use KAMU_CONTAINER_RUNTIME_TYPE env var in Makefile * Make podman default engine for e2e tests * Backporting changes from Private Datasets feature branch (#992) * Backport tweaks * Add doc strings * Remove unused deps * Remove unactual test * CHANGELOG.md: update * Tips after self-review * Delete env var on dataset delete (#993) * Delete env var on dataset delete * 984 refactoring separate planning and execution phases in key dataset manipulation services (#994) * Draft split of `CompactionService` into planner and execution parts * Compaction cleanups * Compacting more cleanups * Compacting: read old HEAD on planning phase * Reset service split on planner and execution * Extracted `MetadataQueryService` - to query polling, push sources and set transform, instead of ingest/transform planners * DataWriterMetadataState became part of polling ingest item at the planning phase * Setting watermark : separate planner and execution service * Push ingest service prepared for split * Push ingest split on planning and executing * Made some order in infra/core services * {Flow,Task,Outbox}Executor=>Agent * Unified naming of planners and executors * Revised telemetry in refactored components * Review: DataWriterDataFusionBuilder flattened * changelog * v0.123.0 + minor deps * kamu-dev-base: include short commit hash as well (#995) * v0.213.1: less agressive telemetry with `DataWriterMetadataState` --------- Co-authored-by: Andrii Demus Co-authored-by: Sergei Zaychenko Co-authored-by: Sergii Mikhtoniuk Co-authored-by: Roman Boiko --- CHANGELOG.md | 42 +- Cargo.lock | 1400 +++++++++-------- Cargo.toml | 140 +- LICENSE.txt | 4 +- Makefile | 20 +- deny.toml | 5 +- images/kamu-base-git/Dockerfile | 2 +- images/kamu-dev-base/Makefile | 35 +- images/kamu-dev-base/README.md | 61 + .../20241125193114_dataset_dependencies.sql | 19 + .../20241217112645_delete-env-var_fk.sql | 5 + .../20241217205719_executor2agent.sql | 16 + .../20241125192943_dataset_dependencies.sql | 19 + .../20241217112645_delete-env-var_fk.sql | 25 + .../sqlite/20241217205726_executor2agent.sql | 16 + resources/openapi-mt.json | 2 +- resources/openapi.json | 2 +- .../tests/test_oso_dataset_authorizer.rs | 4 +- src/adapter/flight-sql/Cargo.toml | 2 +- src/adapter/flight-sql/src/lib.rs | 1 + src/adapter/flight-sql/src/service.rs | 4 +- src/adapter/graphql/Cargo.toml | 3 +- src/adapter/graphql/src/guards.rs | 8 +- .../graphql/src/mutations/accounts_mut.rs | 10 +- src/adapter/graphql/src/mutations/auth_mut.rs | 12 +- .../src/mutations/dataset_env_vars_mut.rs | 6 +- .../src/mutations/dataset_metadata_mut.rs | 15 +- .../src/mutations/dataset_mut/dataset_mut.rs | 18 +- .../dataset_mut/dataset_mut_utils.rs | 4 +- .../graphql/src/mutations/datasets_mut.rs | 4 +- .../flows_mut/account_flow_configs_mut.rs | 10 +- .../flows_mut/dataset_flow_configs_mut.rs | 10 +- .../flows_mut/dataset_flow_runs_mut.rs | 9 +- .../mutations/flows_mut/flows_mut_utils.rs | 20 +- .../src/mutations/metadata_chain_mut.rs | 2 +- src/adapter/graphql/src/prelude.rs | 2 +- .../graphql/src/queries/accounts/account.rs | 8 +- .../queries/accounts/account_flow_configs.rs | 8 +- .../src/queries/accounts/account_flow_runs.rs | 8 +- .../graphql/src/queries/accounts/accounts.rs | 6 +- src/adapter/graphql/src/queries/auth.rs | 6 +- src/adapter/graphql/src/queries/data.rs | 6 +- .../graphql/src/queries/datasets/dataset.rs | 23 +- .../src/queries/datasets/dataset_data.rs | 10 +- .../src/queries/datasets/dataset_env_vars.rs | 5 +- .../queries/datasets/dataset_flow_configs.rs | 4 +- .../src/queries/datasets/dataset_flow_runs.rs | 8 +- .../src/queries/datasets/dataset_metadata.rs | 84 +- .../graphql/src/queries/datasets/datasets.rs | 10 +- .../src/queries/datasets/metadata_chain.rs | 16 +- src/adapter/graphql/src/queries/flows/flow.rs | 22 +- .../graphql/src/queries/flows/flow_outcome.rs | 2 +- .../src/queries/flows/flow_start_condition.rs | 3 +- .../graphql/src/queries/flows/flow_trigger.rs | 2 +- .../graphql/src/scalars/flow_configuration.rs | 5 +- src/adapter/graphql/src/utils.rs | 106 +- .../tests/tests/test_error_handling.rs | 29 +- .../tests/test_gql_account_flow_configs.rs | 41 +- .../graphql/tests/tests/test_gql_data.rs | 5 +- .../tests/tests/test_gql_dataset_env_vars.rs | 8 +- .../tests/test_gql_dataset_flow_configs.rs | 145 +- .../tests/tests/test_gql_dataset_flow_runs.rs | 153 +- .../graphql/tests/tests/test_gql_datasets.rs | 24 +- .../graphql/tests/tests/test_gql_metadata.rs | 7 +- .../tests/tests/test_gql_metadata_chain.rs | 5 +- .../graphql/tests/tests/test_gql_search.rs | 5 +- src/adapter/http/Cargo.toml | 5 +- src/adapter/http/src/data/ingest_handler.rs | 31 +- src/adapter/http/src/e2e/e2e_middleware.rs | 6 +- .../http/tests/harness/client_side_harness.rs | 19 +- .../http/tests/harness/server_side_harness.rs | 7 +- .../harness/server_side_local_fs_harness.rs | 24 +- .../tests/harness/server_side_s3_harness.rs | 24 +- .../http/tests/tests/test_data_ingest.rs | 3 +- .../http/tests/tests/test_data_query.rs | 24 +- .../tests/test_dataset_authorization_layer.rs | 6 +- src/adapter/http/tests/tests/test_routing.rs | 5 +- .../scenario_existing_diverged_dataset.rs | 15 +- .../scenario_existing_diverged_dataset.rs | 15 +- src/adapter/oauth/Cargo.toml | 2 +- src/adapter/odata/Cargo.toml | 4 +- .../odata/tests/tests/test_handlers.rs | 38 +- src/app/cli/Cargo.toml | 4 +- src/app/cli/src/app.rs | 101 +- src/app/cli/src/cli_commands.rs | 3 + src/app/cli/src/commands/delete_command.rs | 15 +- src/app/cli/src/commands/ingest_command.rs | 26 +- .../src/commands/repository_delete_command.rs | 19 +- .../cli/src/commands/set_watermark_command.rs | 11 +- .../commands/system_api_server_run_command.rs | 10 + src/app/cli/src/database.rs | 4 + src/app/cli/src/explore/api_server.rs | 58 +- src/app/cli/src/explore/mod.rs | 3 + .../cli/src/explore/ui_configuration.rs} | 27 +- src/app/cli/src/explore/web_ui_server.rs | 52 +- src/app/cli/src/output/compact_progress.rs | 2 +- .../src/services/confirm_delete_service.rs | 2 + src/domain/accounts/domain/Cargo.toml | 2 +- src/domain/auth-rebac/domain/Cargo.toml | 2 +- src/domain/core/Cargo.toml | 5 +- .../core/src/entities/metadata_chain.rs | 8 +- src/domain/core/src/entities/mod.rs | 16 +- .../src/entities/writer_metadata_state.rs | 209 +++ .../src/entities/writer_source_visitor.rs} | 19 +- .../src/messages/core_message_consumers.rs | 3 - .../core/src/messages/core_message_types.rs | 2 +- .../compaction/compaction_executor.rs | 71 + .../compaction/compaction_listener.rs | 55 + .../compaction_planner.rs} | 219 ++- .../core/src/services/compaction/mod.rs | 16 + .../src/services/dependency_graph_service.rs | 10 +- src/domain/core/src/services/ingest/mod.rs | 6 +- .../services/ingest/polling_ingest_service.rs | 7 +- ...est_service.rs => push_ingest_executor.rs} | 76 +- .../services/ingest/push_ingest_planner.rs | 145 ++ .../src/services/metadata_query_service.rs | 51 + src/domain/core/src/services/mod.rs | 26 +- .../core/src/services/pull_request_planner.rs | 16 + .../src/services/remote_status_service.rs | 1 + src/domain/core/src/services/reset/mod.rs | 14 + .../core/src/services/reset/reset_executor.rs | 46 + .../core/src/services/reset/reset_planner.rs | 59 + src/domain/core/src/services/reset_service.rs | 109 -- src/domain/core/src/services/transform/mod.rs | 4 +- ...ution_service.rs => transform_executor.rs} | 2 +- .../transform/transform_request_planner.rs | 6 - src/domain/core/src/services/watermark/mod.rs | 14 + .../watermark/set_watermark_executor.rs | 58 + .../watermark/set_watermark_planner.rs | 54 + .../core/src/services/watermark_service.rs | 95 -- .../src/use_cases/compact_dataset_use_case.rs | 62 +- .../src/use_cases/reset_dataset_use_case.rs | 47 +- .../src/use_cases/set_watermark_use_case.rs | 46 +- src/domain/core/src/utils/owned_file.rs | 3 +- src/domain/datasets/domain/Cargo.toml | 2 +- .../src/entities/dataset_dependency_entry.rs | 20 + .../datasets/domain/src/entities/mod.rs | 4 + .../repos/dataset_dependency_repository.rs | 87 + .../src/repos/dataset_entry_repository.rs | 10 + src/domain/datasets/domain/src/repos/mod.rs | 2 + src/domain/datasets/services/Cargo.toml | 7 + .../src/dataset_entry_service_impl.rs | 15 +- .../services/src/dependency_graph_indexer.rs | 118 ++ .../src/dependency_graph_service_impl.rs} | 155 +- src/domain/datasets/services/src/jobs/mod.rs | 3 + src/domain/datasets/services/src/lib.rs | 4 + .../dataset_services_message_consumers.rs | 3 + .../datasets/services/tests/tests/mod.rs | 1 + .../test_dependency_graph_service_impl.rs} | 50 +- src/domain/flow-system/domain/Cargo.toml | 2 +- .../flow_executor.rs => agents/flow_agent.rs} | 6 +- .../domain/src/{executors => agents}/mod.rs | 4 +- .../domain/src/entities/flow/flow_outcome.rs | 2 +- .../domain/src/flow_messages_types.rs | 12 +- src/domain/flow-system/domain/src/jobs/mod.rs | 3 +- src/domain/flow-system/domain/src/lib.rs | 4 +- .../services/flow/flow_service_test_driver.rs | 2 +- src/domain/flow-system/services/Cargo.toml | 2 + .../flow-system/services/src/dependencies.rs | 2 +- ...ow_executor_impl.rs => flow_agent_impl.rs} | 74 +- .../src/flow/flow_query_service_impl.rs | 8 +- .../src/flow/flow_scheduling_helper.rs | 10 +- .../flow-system/services/src/flow/mod.rs | 4 +- .../src/messages/flow_message_consumers.rs | 2 +- .../src/messages/flow_message_producers.rs | 2 +- .../flow-system/services/tests/tests/mod.rs | 2 +- ...ecutor_impl.rs => test_flow_agent_impl.rs} | 74 +- .../test_flow_configuration_service_impl.rs | 21 +- .../tests/tests/utils/flow_harness_shared.rs | 46 +- .../tests/utils/flow_system_test_listener.rs | 10 +- .../services/tests/tests/utils/task_driver.rs | 4 +- src/domain/opendatafabric/Cargo.toml | 2 +- src/domain/task-system/domain/Cargo.toml | 2 +- .../domain/src/entities/task_status.rs | 8 +- src/domain/task-system/domain/src/jobs/mod.rs | 3 +- .../src/messages/task_message_producers.rs | 2 +- .../task-system/domain/src/services/mod.rs | 4 +- .../{task_executor.rs => task_agent.rs} | 4 +- .../src/services/task_definition_planner.rs | 8 +- .../task-system/services/src/dependencies.rs | 2 +- src/domain/task-system/services/src/lib.rs | 4 +- ...sk_executor_impl.rs => task_agent_impl.rs} | 18 +- .../src/task_definition_planner_impl.rs | 27 +- .../services/src/task_runner_impl.rs | 52 +- .../services/src/task_scheduler_impl.rs | 4 +- .../task-system/services/tests/tests/mod.rs | 2 +- ...ecutor_impl.rs => test_task_agent_impl.rs} | 54 +- src/e2e/app/cli/common/Cargo.toml | 6 +- src/e2e/app/cli/inmem/Cargo.toml | 1 + .../tests/commands/test_delete_command.rs | 7 + .../tests/tests/commands/test_repo_command.rs | 7 + src/e2e/app/cli/mysql/Cargo.toml | 1 + src/e2e/app/cli/postgres/Cargo.toml | 1 + .../tests/commands/test_delete_command.rs | 7 + .../tests/tests/commands/test_repo_command.rs | 7 + .../src/commands/test_delete_command.rs | 25 + .../src/commands/test_repo_command.rs | 32 + .../src/test_smart_transfer_protocol.rs | 8 +- src/e2e/app/cli/sqlite/Cargo.toml | 1 + .../tests/commands/test_delete_command.rs | 7 + .../tests/tests/commands/test_repo_command.rs | 7 + src/infra/auth-rebac/postgres/Cargo.toml | 1 + src/infra/core/Cargo.toml | 21 +- src/infra/core/src/compaction_service_impl.rs | 508 ------ .../src/dependency_graph_repository_inmem.rs | 58 - .../src/engine/engine_datafusion_inproc.rs | 1 + .../core/src/engine/engine_io_strategy.rs | 18 +- src/infra/core/src/engine/engine_odf.rs | 11 +- src/infra/core/src/lib.rs | 56 +- .../core/src/repos/dataset_factory_impl.rs | 2 +- .../src/repos/dataset_repository_local_fs.rs | 4 + .../core/src/repos/dataset_repository_s3.rs | 4 + .../compaction/compaction_executor_impl.rs | 248 +++ .../compaction/compaction_planner_impl.rs | 247 +++ src/infra/core/src/services/compaction/mod.rs | 14 + .../dataset_changes_service_impl.rs | 0 .../dataset_ownership_service_inmem.rs | 0 .../dataset_registry_repo_bridge.rs | 0 .../ingest/data_format_registry_impl.rs | 0 .../ingest/fetch_service/configs.rs | 0 .../ingest/fetch_service/container.rs | 0 .../ingest/fetch_service/core.rs | 0 .../ingest/fetch_service/evm.rs | 5 + .../ingest/fetch_service/file.rs | 0 .../ingest/fetch_service/ftp.rs | 0 .../ingest/fetch_service/http.rs | 0 .../ingest/fetch_service/mod.rs | 0 .../ingest/fetch_service/mqtt.rs | 0 .../ingest/fetch_service/template.rs | 0 .../{ => services}/ingest/ingest_common.rs | 18 +- .../core/src/{ => services}/ingest/mod.rs | 6 +- .../ingest/polling_ingest_service_impl.rs | 76 +- .../ingest/polling_source_state.rs | 0 .../src/{ => services}/ingest/prep_service.rs | 0 .../ingest/push_ingest_executor_impl.rs} | 229 +-- .../ingest/push_ingest_planner_impl.rs | 155 ++ .../services/metadata_query_service_impl.rs | 98 ++ src/infra/core/src/services/mod.rs | 46 + .../{ => services}/provenance_service_impl.rs | 0 .../pull_request_planner_impl.rs | 32 +- .../push_request_planner_impl.rs | 0 .../core/src/{ => services}/query/mod.rs | 26 + .../src/{ => services}/query_service_impl.rs | 9 +- src/infra/core/src/services/remote/mod.rs | 22 + .../remote}/remote_alias_resolver_impl.rs | 0 .../remote}/remote_aliases_registry_impl.rs | 6 +- .../remote_repository_registry_impl.rs | 0 .../remote}/remote_status_service_impl.rs | 5 + .../remote}/resource_loader_impl.rs | 0 .../remote}/search_service_impl.rs | 0 src/infra/core/src/services/reset/mod.rs | 14 + .../src/services/reset/reset_executor_impl.rs | 47 + .../reset/reset_planner_impl.rs} | 55 +- src/infra/core/src/services/sync/mod.rs | 14 + .../sync}/sync_request_builder.rs | 0 .../{ => services/sync}/sync_service_impl.rs | 6 +- .../core/src/{ => services}/transform/mod.rs | 4 +- .../transform_elaboration_service_impl.rs | 23 +- .../transform/transform_executor_impl.rs} | 12 +- .../transform/transform_helpers.rs | 0 .../transform_request_planner_impl.rs | 18 - .../verification_service_impl.rs | 8 +- src/infra/core/src/services/watermark/mod.rs | 14 + .../watermark/set_watermark_executor_impl.rs | 69 + .../watermark/set_watermark_planner_impl.rs | 84 + .../testing}/base_repo_harness.rs | 5 +- .../mock_dependency_graph_repository.rs | 35 - .../testing/mock_polling_source_service.rs | 71 +- .../mock_transform_execution_service.rs | 2 +- .../testing/mock_transform_request_planner.rs | 42 - src/infra/core/src/testing/mod.rs | 4 +- .../compact_dataset_use_case_impl.rs | 29 +- .../use_cases/pull_dataset_use_case_impl.rs | 23 +- .../use_cases/reset_dataset_use_case_impl.rs | 36 +- .../use_cases/set_watermark_use_case_impl.rs | 33 +- src/infra/core/src/watermark_service_impl.rs | 152 -- .../parallel_simple_transfer_protocol.rs | 6 +- .../core/tests/tests/engine/test_engine_io.rs | 21 +- .../tests/engine/test_engine_transform.rs | 66 +- .../tests/tests/ingest/test_polling_ingest.rs | 9 +- .../tests/tests/ingest/test_push_ingest.rs | 128 +- .../core/tests/tests/ingest/test_writer.rs | 411 ++++- src/infra/core/tests/tests/mod.rs | 9 +- ...pl.rs => test_compaction_services_impl.rs} | 245 ++- .../test_dataset_changes_service_impl.rs | 4 +- .../test_dataset_ownership_service_inmem.rs | 3 +- .../tests/tests/test_datasets_filtering.rs | 3 +- .../tests/test_pull_request_planner_impl.rs | 4 +- .../tests/test_push_request_planner_impl.rs | 3 +- .../tests/tests/test_query_service_impl.rs | 77 +- .../tests/tests/test_remote_status_service.rs | 4 +- ...ce_impl.rs => test_reset_services_impl.rs} | 48 +- ...mpl.rs => test_transform_services_impl.rs} | 72 +- .../tests/test_verification_service_impl.rs | 6 +- ...mpl.rs => test_watermark_services_impl.rs} | 44 +- .../tests/use_cases/base_use_case_harness.rs | 4 +- ..._append_dataset_metadata_batch_use_case.rs | 3 +- .../test_compact_dataset_use_case.rs | 3 +- .../use_cases/test_delete_dataset_use_case.rs | 45 +- .../use_cases/test_pull_dataset_use_case.rs | 2 +- .../use_cases/test_reset_dataset_use_case.rs | 7 +- .../use_cases/test_set_watermark_use_case.rs | 15 +- .../use_cases/test_verify_dataset_use_case.rs | 2 +- src/infra/core/tests/utils/mod.rs | 3 - .../core/tests/utils/transform_test_helper.rs | 27 +- src/infra/datasets/inmem/Cargo.toml | 5 +- .../inmem_dataset_dependency_repository.rs | 192 +++ .../repos/inmem_dataset_env_var_repository.rs | 21 + .../repos/inmem_dateset_entry_repository.rs | 37 +- src/infra/datasets/inmem/src/repos/mod.rs | 2 + src/infra/datasets/inmem/tests/repos/mod.rs | 1 + ...est_inmem_dataset_dependency_repository.rs | 116 ++ .../test_inmem_dataset_env_var_repository.rs | 13 +- ...7b2b12465ab511b6f53d28289cf518fcfd748.json | 14 + ...90170110f38925697e5898b1fc9d7e7f91b39.json | 20 + ...39e6d34070193b15e7dd0ec3ccca710ec9b55.json | 15 + ...37f66a76b0af3c4a999d0e37710a89679b310.json | 26 + ...38f73eb6ea9ee52ec69c97343683c28757e4a.json | 14 + src/infra/datasets/postgres/src/repos/mod.rs | 2 + .../postgres_dataset_dependency_repository.rs | 211 +++ .../postgres_dataset_entry_repository.rs | 52 +- .../datasets/postgres/tests/repos/mod.rs | 1 + ..._postgres_dataset_dependency_repository.rs | 121 ++ .../test_postgres_dataset_entry_repository.rs | 1 + ...est_postgres_dataset_env_var_repository.rs | 14 +- src/infra/datasets/repo-tests/Cargo.toml | 1 + ...aset_dependencies_repository_test_suite.rs | 597 +++++++ .../dataset_entry_repository_test_suite.rs | 55 +- .../dataset_env_var_repository_test_suite.rs | 151 +- src/infra/datasets/repo-tests/src/helpers.rs | 64 + src/infra/datasets/repo-tests/src/lib.rs | 5 + ...7b2b12465ab511b6f53d28289cf518fcfd748.json | 12 + ...90170110f38925697e5898b1fc9d7e7f91b39.json | 20 + ...5ffca15f34c2af9aaeb8d31453ab364f97495.json | 12 - ...3f674909a6dcf6997e8f28476667275d104f0.json | 2 +- ...3bf9ab82360dfaa972f9bcfe80e14241afde5.json | 2 +- ...37f66a76b0af3c4a999d0e37710a89679b310.json | 26 + ...c865a15f8d6f9406f1622baba64ea0a14cebf.json | 2 +- ...38f73eb6ea9ee52ec69c97343683c28757e4a.json | 12 + src/infra/datasets/sqlite/src/repos/mod.rs | 2 + .../sqlite_dataset_dependency_repository.rs | 221 +++ .../repos/sqlite_dateset_entry_repository.rs | 53 +- src/infra/datasets/sqlite/tests/repos/mod.rs | 1 + ...st_sqlite_dataset_dependency_repository.rs | 121 ++ .../test_sqlite_dataset_env_var_repository.rs | 14 +- src/infra/ingest-datafusion/Cargo.toml | 4 +- src/infra/ingest-datafusion/src/lib.rs | 1 - src/infra/ingest-datafusion/src/writer.rs | 357 +---- .../messaging-outbox/postgres/Cargo.toml | 2 +- .../messaging-outbox/repo-tests/Cargo.toml | 3 + src/infra/messaging-outbox/sqlite/Cargo.toml | 2 +- src/utils/container-runtime/Cargo.toml | 9 +- src/utils/data-utils/Cargo.toml | 4 +- src/utils/data-utils/src/data/hash.rs | 1 + src/utils/data-utils/src/testing/mod.rs | 33 + src/utils/database-common/Cargo.toml | 6 +- src/utils/database-common/src/entities.rs | 38 + src/utils/database-common/src/helpers.rs | 11 + src/utils/database-common/src/lib.rs | 3 - src/utils/database-common/tests/tests/mod.rs | 3 +- .../tests/tests/test_entity_page_streamer.rs | 414 +++++ .../tests/tests/test_helpers.rs | 30 + src/utils/datafusion-cli/Cargo.toml | 2 +- src/utils/datafusion-cli/README.md | 4 +- src/utils/datafusion-cli/src/catalog.rs | 3 + src/utils/datafusion-cli/src/exec.rs | 2 +- src/utils/datafusion-cli/src/functions.rs | 2 + .../datafusion-cli/src/object_storage.rs | 10 +- src/utils/event-sourcing/Cargo.toml | 6 +- src/utils/http-common/Cargo.toml | 2 +- src/utils/init-on-startup/Cargo.toml | 9 +- src/utils/internal-error/Cargo.toml | 2 +- src/utils/kamu-cli-puppet/Cargo.toml | 2 +- src/utils/messaging-outbox/Cargo.toml | 2 +- .../src/{executors => agent}/mod.rs | 12 +- .../outbox_agent.rs} | 18 +- .../outbox_agent_metrics.rs} | 6 +- .../outbox_agent_shared.rs} | 0 .../outbox_consumption_iteration_planner.rs | 6 +- .../outbox_producer_consumption_job.rs | 6 +- src/utils/messaging-outbox/src/lib.rs | 4 +- src/utils/messaging-outbox/tests/tests/mod.rs | 2 +- ...utbox_executor.rs => test_outbox_agent.rs} | 40 +- src/utils/multiformats/Cargo.toml | 2 +- src/utils/observability/Cargo.toml | 2 +- 385 files changed, 8757 insertions(+), 4519 deletions(-) create mode 100644 images/kamu-dev-base/README.md create mode 100644 migrations/postgres/20241125193114_dataset_dependencies.sql create mode 100644 migrations/postgres/20241217112645_delete-env-var_fk.sql create mode 100644 migrations/postgres/20241217205719_executor2agent.sql create mode 100644 migrations/sqlite/20241125192943_dataset_dependencies.sql create mode 100644 migrations/sqlite/20241217112645_delete-env-var_fk.sql create mode 100644 migrations/sqlite/20241217205726_executor2agent.sql rename src/{domain/core/src/services/dependency_graph_repository.rs => app/cli/src/explore/ui_configuration.rs} (58%) create mode 100644 src/domain/core/src/entities/writer_metadata_state.rs rename src/{infra/ingest-datafusion/src/visitor.rs => domain/core/src/entities/writer_source_visitor.rs} (96%) create mode 100644 src/domain/core/src/services/compaction/compaction_executor.rs create mode 100644 src/domain/core/src/services/compaction/compaction_listener.rs rename src/domain/core/src/services/{compaction_service.rs => compaction/compaction_planner.rs} (51%) create mode 100644 src/domain/core/src/services/compaction/mod.rs rename src/domain/core/src/services/ingest/{push_ingest_service.rs => push_ingest_executor.rs} (66%) create mode 100644 src/domain/core/src/services/ingest/push_ingest_planner.rs create mode 100644 src/domain/core/src/services/metadata_query_service.rs create mode 100644 src/domain/core/src/services/reset/mod.rs create mode 100644 src/domain/core/src/services/reset/reset_executor.rs create mode 100644 src/domain/core/src/services/reset/reset_planner.rs delete mode 100644 src/domain/core/src/services/reset_service.rs rename src/domain/core/src/services/transform/{transform_execution_service.rs => transform_executor.rs} (98%) create mode 100644 src/domain/core/src/services/watermark/mod.rs create mode 100644 src/domain/core/src/services/watermark/set_watermark_executor.rs create mode 100644 src/domain/core/src/services/watermark/set_watermark_planner.rs delete mode 100644 src/domain/core/src/services/watermark_service.rs create mode 100644 src/domain/datasets/domain/src/entities/dataset_dependency_entry.rs create mode 100644 src/domain/datasets/domain/src/repos/dataset_dependency_repository.rs create mode 100644 src/domain/datasets/services/src/dependency_graph_indexer.rs rename src/{infra/core/src/dependency_graph_service_inmem.rs => domain/datasets/services/src/dependency_graph_service_impl.rs} (81%) rename src/{infra/core/tests/tests/test_dependency_graph_inmem.rs => domain/datasets/services/tests/tests/test_dependency_graph_service_impl.rs} (97%) rename src/domain/flow-system/domain/src/{executors/flow_executor.rs => agents/flow_agent.rs} (94%) rename src/domain/flow-system/domain/src/{executors => agents}/mod.rs (88%) rename src/domain/flow-system/services/src/flow/{flow_executor_impl.rs => flow_agent_impl.rs} (93%) rename src/domain/flow-system/services/tests/tests/{test_flow_executor_impl.rs => test_flow_agent_impl.rs} (99%) rename src/domain/task-system/domain/src/services/{task_executor.rs => task_agent.rs} (91%) rename src/domain/task-system/services/src/{task_executor_impl.rs => task_agent_impl.rs} (95%) rename src/domain/task-system/services/tests/tests/{test_task_executor_impl.rs => test_task_agent_impl.rs} (87%) delete mode 100644 src/infra/core/src/compaction_service_impl.rs delete mode 100644 src/infra/core/src/dependency_graph_repository_inmem.rs create mode 100644 src/infra/core/src/services/compaction/compaction_executor_impl.rs create mode 100644 src/infra/core/src/services/compaction/compaction_planner_impl.rs create mode 100644 src/infra/core/src/services/compaction/mod.rs rename src/infra/core/src/{ => services}/dataset_changes_service_impl.rs (100%) rename src/infra/core/src/{ => services}/dataset_ownership_service_inmem.rs (100%) rename src/infra/core/src/{ => services}/dataset_registry_repo_bridge.rs (100%) rename src/infra/core/src/{ => services}/ingest/data_format_registry_impl.rs (100%) rename src/infra/core/src/{ => services}/ingest/fetch_service/configs.rs (100%) rename src/infra/core/src/{ => services}/ingest/fetch_service/container.rs (100%) rename src/infra/core/src/{ => services}/ingest/fetch_service/core.rs (100%) rename src/infra/core/src/{ => services}/ingest/fetch_service/evm.rs (97%) rename src/infra/core/src/{ => services}/ingest/fetch_service/file.rs (100%) rename src/infra/core/src/{ => services}/ingest/fetch_service/ftp.rs (100%) rename src/infra/core/src/{ => services}/ingest/fetch_service/http.rs (100%) rename src/infra/core/src/{ => services}/ingest/fetch_service/mod.rs (100%) rename src/infra/core/src/{ => services}/ingest/fetch_service/mqtt.rs (100%) rename src/infra/core/src/{ => services}/ingest/fetch_service/template.rs (100%) rename src/infra/core/src/{ => services}/ingest/ingest_common.rs (91%) rename src/infra/core/src/{ => services}/ingest/mod.rs (83%) rename src/infra/core/src/{ => services}/ingest/polling_ingest_service_impl.rs (91%) rename src/infra/core/src/{ => services}/ingest/polling_source_state.rs (100%) rename src/infra/core/src/{ => services}/ingest/prep_service.rs (100%) rename src/infra/core/src/{ingest/push_ingest_service_impl.rs => services/ingest/push_ingest_executor_impl.rs} (62%) create mode 100644 src/infra/core/src/services/ingest/push_ingest_planner_impl.rs create mode 100644 src/infra/core/src/services/metadata_query_service_impl.rs create mode 100644 src/infra/core/src/services/mod.rs rename src/infra/core/src/{ => services}/provenance_service_impl.rs (100%) rename src/infra/core/src/{ => services}/pull_request_planner_impl.rs (95%) rename src/infra/core/src/{ => services}/push_request_planner_impl.rs (100%) rename src/infra/core/src/{ => services}/query/mod.rs (94%) rename src/infra/core/src/{ => services}/query_service_impl.rs (98%) create mode 100644 src/infra/core/src/services/remote/mod.rs rename src/infra/core/src/{ => services/remote}/remote_alias_resolver_impl.rs (100%) rename src/infra/core/src/{ => services/remote}/remote_aliases_registry_impl.rs (97%) rename src/infra/core/src/{ => services/remote}/remote_repository_registry_impl.rs (100%) rename src/infra/core/src/{ => services/remote}/remote_status_service_impl.rs (94%) rename src/infra/core/src/{ => services/remote}/resource_loader_impl.rs (100%) rename src/infra/core/src/{ => services/remote}/search_service_impl.rs (100%) create mode 100644 src/infra/core/src/services/reset/mod.rs create mode 100644 src/infra/core/src/services/reset/reset_executor_impl.rs rename src/infra/core/src/{reset_service_impl.rs => services/reset/reset_planner_impl.rs} (56%) create mode 100644 src/infra/core/src/services/sync/mod.rs rename src/infra/core/src/{ => services/sync}/sync_request_builder.rs (100%) rename src/infra/core/src/{ => services/sync}/sync_service_impl.rs (99%) rename src/infra/core/src/{ => services}/transform/mod.rs (87%) rename src/infra/core/src/{ => services}/transform/transform_elaboration_service_impl.rs (93%) rename src/infra/core/src/{transform/transform_execution_service_impl.rs => services/transform/transform_executor_impl.rs} (97%) rename src/infra/core/src/{ => services}/transform/transform_helpers.rs (100%) rename src/infra/core/src/{ => services}/transform/transform_request_planner_impl.rs (94%) rename src/infra/core/src/{ => services}/verification_service_impl.rs (98%) create mode 100644 src/infra/core/src/services/watermark/mod.rs create mode 100644 src/infra/core/src/services/watermark/set_watermark_executor_impl.rs create mode 100644 src/infra/core/src/services/watermark/set_watermark_planner_impl.rs rename src/infra/core/{tests/utils => src/testing}/base_repo_harness.rs (97%) delete mode 100644 src/infra/core/src/testing/mock_dependency_graph_repository.rs delete mode 100644 src/infra/core/src/watermark_service_impl.rs rename src/infra/core/tests/tests/{test_compact_service_impl.rs => test_compaction_services_impl.rs} (86%) rename src/infra/core/tests/tests/{test_reset_service_impl.rs => test_reset_services_impl.rs} (85%) rename src/infra/core/tests/tests/{test_transform_service_impl.rs => test_transform_services_impl.rs} (93%) rename src/infra/core/tests/tests/{test_watermark_service_impl.rs => test_watermark_services_impl.rs} (82%) create mode 100644 src/infra/datasets/inmem/src/repos/inmem_dataset_dependency_repository.rs create mode 100644 src/infra/datasets/inmem/tests/repos/test_inmem_dataset_dependency_repository.rs create mode 100644 src/infra/datasets/postgres/.sqlx/query-00c7d1b42566c90d6752f442fef7b2b12465ab511b6f53d28289cf518fcfd748.json create mode 100644 src/infra/datasets/postgres/.sqlx/query-2cff2b63ca18bba00390b2b61ec90170110f38925697e5898b1fc9d7e7f91b39.json create mode 100644 src/infra/datasets/postgres/.sqlx/query-2d0e3957bf855f14108200effab39e6d34070193b15e7dd0ec3ccca710ec9b55.json create mode 100644 src/infra/datasets/postgres/.sqlx/query-c1a820bf91e3efcba1d0470753a37f66a76b0af3c4a999d0e37710a89679b310.json create mode 100644 src/infra/datasets/postgres/.sqlx/query-fe344c04ab30051d00929bf196f38f73eb6ea9ee52ec69c97343683c28757e4a.json create mode 100644 src/infra/datasets/postgres/src/repos/postgres_dataset_dependency_repository.rs create mode 100644 src/infra/datasets/postgres/tests/repos/test_postgres_dataset_dependency_repository.rs create mode 100644 src/infra/datasets/repo-tests/src/dataset_dependencies_repository_test_suite.rs create mode 100644 src/infra/datasets/repo-tests/src/helpers.rs create mode 100644 src/infra/datasets/sqlite/.sqlx/query-00c7d1b42566c90d6752f442fef7b2b12465ab511b6f53d28289cf518fcfd748.json create mode 100644 src/infra/datasets/sqlite/.sqlx/query-2cff2b63ca18bba00390b2b61ec90170110f38925697e5898b1fc9d7e7f91b39.json delete mode 100644 src/infra/datasets/sqlite/.sqlx/query-30c92efe33072f0b9fa446ea3255ffca15f34c2af9aaeb8d31453ab364f97495.json create mode 100644 src/infra/datasets/sqlite/.sqlx/query-c1a820bf91e3efcba1d0470753a37f66a76b0af3c4a999d0e37710a89679b310.json create mode 100644 src/infra/datasets/sqlite/.sqlx/query-fe344c04ab30051d00929bf196f38f73eb6ea9ee52ec69c97343683c28757e4a.json create mode 100644 src/infra/datasets/sqlite/src/repos/sqlite_dataset_dependency_repository.rs create mode 100644 src/infra/datasets/sqlite/tests/repos/test_sqlite_dataset_dependency_repository.rs create mode 100644 src/utils/database-common/tests/tests/test_entity_page_streamer.rs create mode 100644 src/utils/database-common/tests/tests/test_helpers.rs rename src/utils/messaging-outbox/src/{executors => agent}/mod.rs (74%) rename src/utils/messaging-outbox/src/{executors/outbox_executor.rs => agent/outbox_agent.rs} (96%) rename src/utils/messaging-outbox/src/{executors/outbox_executor_metrics.rs => agent/outbox_agent_metrics.rs} (96%) rename src/utils/messaging-outbox/src/{executors/outbox_executor_shared.rs => agent/outbox_agent_shared.rs} (100%) rename src/utils/messaging-outbox/src/{executors => agent}/outbox_consumption_iteration_planner.rs (98%) rename src/utils/messaging-outbox/src/{executors => agent}/outbox_producer_consumption_job.rs (98%) rename src/utils/messaging-outbox/tests/tests/{test_outbox_executor.rs => test_outbox_agent.rs} (93%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 235971f83..467c1e7c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,8 +12,6 @@ Recommendation: for ease of reading, use the following order: --> ## [Unreleased] -### Added -- Console warning when deleting datasets which are out of sync with their push remotes ### Changed - Private Datasets: - OSO: using user actors / dateset resources that come from the database @@ -26,6 +24,42 @@ Recommendation: for ease of reading, use the following order: - E2E: Using the correct account in multi-tenant mode - And also the possibility of set it up +## [0.213.1] - 2024-12-18 +### Fixed +- Removed all occurrences of `DataWriterMetadataState` from telemetry spans (too much pressure) + +## [0.213.0] - 2024-12-18 +### Added +- kamu-adapter-graphql: added macros (`from_catalog_n!()` & `unsafe_from_catalog_n!()`) + that simplify the extraction of components from the DI catalog +- database-common: the logic for pagination of data processing is generalized in `EntityPageStreamer` +### Changed +- Speed up project build time by removing unused dependencies which were not detected by automated tools +- Extracted "planner" and "executor" for compacting, reset, set watermark, push ingest, partially polling ingest. +- Renamed long-running "executors" to "agents". +- Introduced `MetadataQueryService` to absorb simple queries that do not have to be defined at the level of metadata chian from the interface point of view. +### Fixed +- `DatasetEnvVar` entity now deletes during deleting `DatasetEntry` entity + +## [0.212.0] - 2024-12-11 +### Changed +- Upgraded to `datafusion v43` +### Fixed +- Ingest was sometimes producing Parquet files with non-sequential `offset` column which violated the ODF spec + +## [0.211.0] - 2024-12-02 +### Changed +- Dataset dependency graph is now backed with a database, removing need in dependency scanning at startup. + +## [0.210.0] - 2024-11-28 +### Added +- Console warning when deleting datasets which are out of sync with their push remotes +### Changed +- Separated Web UI runtime and UI configuration flags. UI configuration is now provided by API server too. +### Fixed +- Typo in feature flags (enableDatasetEnvVarsManagement) + ^ + ## [0.209.0] - 2024-11-25 ### Changed - Improved OpenAPI integration @@ -230,7 +264,7 @@ Introduced `DatasetRegistry` abstraction, encapsulating listing and resolution o - Outbox refactoring towards true parallelism via Tokio spaned tasks instead of futures ### Fixed - Failed flows should still propagate `finishedAt` time -- Eliminate span.enter, replaced with instrument everywhere +- Eliminate `span.enter`, replaced with instrument everywhere ## [0.201.0] - 2024-09-18 ### Added @@ -245,7 +279,7 @@ Introduced `DatasetRegistry` abstraction, encapsulating listing and resolution o - Revised implementation of flow scheduling to avoid in-memory time wheel: - recording `FlowEventScheduledForActivation` event (previously, placement moment into the time wheel) - replaced binary heap based time wheel operations with event store queries - - Postgres/SQlite event stores additionally track activation time for the waiting flows + - Postgres/SQLite event stores additionally track activation time for the waiting flows - in-memory event store keeps prepared map-based lookup structures for activation time ## [0.200.0] - 2024-09-13 diff --git a/Cargo.lock b/Cargo.lock index 38527fb9d..267a57d3a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -108,15 +108,15 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] name = "alloy" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea8ebf106e84a1c37f86244df7da0c7587e697b71a0d565cce079449b85ac6f8" +checksum = "b5b524b8c28a7145d1fe4950f84360b5de3e307601679ff0558ddc20ea229399" dependencies = [ "alloy-consensus", "alloy-contract", @@ -136,9 +136,9 @@ dependencies = [ [[package]] name = "alloy-chains" -version = "0.1.47" +version = "0.1.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18c5c520273946ecf715c0010b4e3503d7eba9893cd9ce6b7fff5654c4a3c470" +checksum = "a0161082e0edd9013d23083465cc04b20e44b7a15646d36ba7b0cdb7cd6fe18f" dependencies = [ "alloy-primitives", "num_enum", @@ -147,9 +147,9 @@ dependencies = [ [[package]] name = "alloy-consensus" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41ed961a48297c732a5d97ee321aa8bb5009ecadbcb077d8bec90cb54e651629" +checksum = "ae09ffd7c29062431dd86061deefe4e3c6f07fa0d674930095f8dcedb0baf02c" dependencies = [ "alloy-eips", "alloy-primitives", @@ -163,9 +163,9 @@ dependencies = [ [[package]] name = "alloy-contract" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460ab80ce4bda1c80bcf96fe7460520476f2c7b734581c6567fac2708e2a60ef" +checksum = "66430a72d5bf5edead101c8c2f0a24bada5ec9f3cf9909b3e08b6d6899b4803e" dependencies = [ "alloy-dyn-abi", "alloy-json-abi", @@ -184,9 +184,9 @@ dependencies = [ [[package]] name = "alloy-core" -version = "0.8.12" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8316d83e590f4163b221b8180008f302bda5cf5451202855cdd323e588849c" +checksum = "c618bd382f0bc2ac26a7e4bfae01c9b015ca8f21b37ca40059ae35a7e62b3dc6" dependencies = [ "alloy-dyn-abi", "alloy-json-abi", @@ -197,9 +197,9 @@ dependencies = [ [[package]] name = "alloy-dyn-abi" -version = "0.8.12" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef2364c782a245cf8725ea6dbfca5f530162702b5d685992ea03ce64529136cc" +checksum = "41056bde53ae10ffbbf11618efbe1e0290859e5eab0fe9ef82ebdb62f12a866f" dependencies = [ "alloy-json-abi", "alloy-primitives", @@ -225,9 +225,9 @@ dependencies = [ [[package]] name = "alloy-eip7702" -version = "0.3.2" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64ffc577390ce50234e02d841214b3dc0bea6aaaae8e04bbf3cb82e9a45da9eb" +checksum = "4c986539255fb839d1533c128e190e557e52ff652c9ef62939e233a81dd93f7e" dependencies = [ "alloy-primitives", "alloy-rlp", @@ -237,9 +237,9 @@ dependencies = [ [[package]] name = "alloy-eips" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b69e06cf9c37be824b9d26d6d101114fdde6af0c87de2828b414c05c4b3daa71" +checksum = "5b6aa3961694b30ba53d41006131a2fca3bdab22e4c344e46db2c639e7c2dfdd" dependencies = [ "alloy-eip2930", "alloy-eip7702", @@ -255,9 +255,9 @@ dependencies = [ [[package]] name = "alloy-genesis" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dde15e14944a88bd6a57d325e9a49b75558746fe16aaccc79713ae50a6a9574c" +checksum = "e53f7877ded3921d18a0a9556d55bedf84535567198c9edab2aa23106da91855" dependencies = [ "alloy-primitives", "alloy-serde", @@ -266,9 +266,9 @@ dependencies = [ [[package]] name = "alloy-json-abi" -version = "0.8.12" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b84c506bf264110fa7e90d9924f742f40ef53c6572ea56a0b0bd714a567ed389" +checksum = "c357da577dfb56998d01f574d81ad7a1958d248740a7981b205d69d65a7da404" dependencies = [ "alloy-primitives", "alloy-sol-type-parser", @@ -278,9 +278,9 @@ dependencies = [ [[package]] name = "alloy-json-rpc" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af5979e0d5a7bf9c7eb79749121e8256e59021af611322aee56e77e20776b4b3" +checksum = "3694b7e480728c0b3e228384f223937f14c10caef5a4c766021190fc8f283d35" dependencies = [ "alloy-primitives", "alloy-sol-types", @@ -292,9 +292,9 @@ dependencies = [ [[package]] name = "alloy-network" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "204237129086ce5dc17a58025e93739b01b45313841f98fa339eb1d780511e57" +checksum = "ea94b8ceb5c75d7df0a93ba0acc53b55a22b47b532b600a800a87ef04eb5b0b4" dependencies = [ "alloy-consensus", "alloy-eips", @@ -308,14 +308,16 @@ dependencies = [ "async-trait", "auto_impl", "futures-utils-wasm", + "serde", + "serde_json", "thiserror 1.0.69", ] [[package]] name = "alloy-network-primitives" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514f70ee2a953db21631cd817b13a1571474ec77ddc03d47616d5e8203489fde" +checksum = "df9f3e281005943944d15ee8491534a1c7b3cbf7a7de26f8c433b842b93eb5f9" dependencies = [ "alloy-consensus", "alloy-eips", @@ -326,9 +328,9 @@ dependencies = [ [[package]] name = "alloy-primitives" -version = "0.8.12" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fce5dbd6a4f118eecc4719eaa9c7ffc31c315e6c5ccde3642db927802312425" +checksum = "6259a506ab13e1d658796c31e6e39d2e2ee89243bcc505ddc613b35732e0a430" dependencies = [ "alloy-rlp", "bytes", @@ -338,7 +340,7 @@ dependencies = [ "foldhash", "hashbrown 0.15.2", "hex-literal", - "indexmap 2.6.0", + "indexmap 2.7.0", "itoa", "k256", "keccak-asm", @@ -354,9 +356,9 @@ dependencies = [ [[package]] name = "alloy-provider" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4814d141ede360bb6cd1b4b064f1aab9de391e7c4d0d4d50ac89ea4bc1e25fbd" +checksum = "40c1f9eede27bf4c13c099e8e64d54efd7ce80ef6ea47478aa75d5d74e2dba3b" dependencies = [ "alloy-chains", "alloy-consensus", @@ -390,9 +392,9 @@ dependencies = [ [[package]] name = "alloy-pubsub" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96ba46eb69ddf7a9925b81f15229cb74658e6eebe5dd30a5b74e2cd040380573" +checksum = "90f1f34232f77341076541c405482e4ae12f0ee7153d8f9969fc1691201b2247" dependencies = [ "alloy-json-rpc", "alloy-primitives", @@ -403,15 +405,15 @@ dependencies = [ "serde_json", "tokio", "tokio-stream", - "tower 0.5.1", + "tower 0.5.2", "tracing", ] [[package]] name = "alloy-rlp" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0822426598f95e45dd1ea32a738dac057529a709ee645fcc516ffa4cbde08f" +checksum = "f542548a609dca89fcd72b3b9f355928cf844d4363c5eed9c5273a3dd225e097" dependencies = [ "alloy-rlp-derive", "arrayvec", @@ -420,20 +422,20 @@ dependencies = [ [[package]] name = "alloy-rlp-derive" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b09cae092c27b6f1bde952653a22708691802e57bfef4a2973b80bea21efd3f" +checksum = "5a833d97bf8a5f0f878daf2c8451fff7de7f9de38baa5a45d936ec718d81255a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "alloy-rpc-client" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fc2bd1e7403463a5f2c61e955bcc9d3072b63aa177442b0f9aa6a6d22a941e3" +checksum = "374dbe0dc3abdc2c964f36b3d3edf9cdb3db29d16bda34aa123f03d810bec1dd" dependencies = [ "alloy-json-rpc", "alloy-primitives", @@ -447,7 +449,7 @@ dependencies = [ "serde_json", "tokio", "tokio-stream", - "tower 0.5.1", + "tower 0.5.2", "tracing", "url", "wasmtimer", @@ -455,9 +457,9 @@ dependencies = [ [[package]] name = "alloy-rpc-types" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eea9bf1abdd506f985a53533f5ac01296bcd6102c5e139bbc5d40bc468d2c916" +checksum = "c74832aa474b670309c20fffc2a869fa141edab7c79ff7963fad0a08de60bae1" dependencies = [ "alloy-primitives", "alloy-rpc-types-eth", @@ -467,9 +469,9 @@ dependencies = [ [[package]] name = "alloy-rpc-types-eth" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b034779a4850b4b03f5be5ea674a1cf7d746b2da762b34d1860ab45e48ca27" +checksum = "a8a477281940d82d29315846c7216db45b15e90bcd52309da9f54bcf7ad94a11" dependencies = [ "alloy-consensus", "alloy-eips", @@ -486,9 +488,9 @@ dependencies = [ [[package]] name = "alloy-serde" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "028e72eaa9703e4882344983cfe7636ce06d8cce104a78ea62fd19b46659efc4" +checksum = "4dfa4a7ccf15b2492bb68088692481fd6b2604ccbee1d0d6c44c21427ae4df83" dependencies = [ "alloy-primitives", "serde", @@ -497,9 +499,9 @@ dependencies = [ [[package]] name = "alloy-signer" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "592c185d7100258c041afac51877660c7bf6213447999787197db4842f0e938e" +checksum = "2e10aec39d60dc27edcac447302c7803d2371946fb737245320a05b78eb2fafd" dependencies = [ "alloy-primitives", "async-trait", @@ -511,42 +513,42 @@ dependencies = [ [[package]] name = "alloy-sol-macro" -version = "0.8.12" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9343289b4a7461ed8bab8618504c995c049c082b70c7332efd7b32125633dc05" +checksum = "d9d64f851d95619233f74b310f12bcf16e0cbc27ee3762b6115c14a84809280a" dependencies = [ "alloy-sol-macro-expander", "alloy-sol-macro-input", "proc-macro-error2", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "alloy-sol-macro-expander" -version = "0.8.12" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4222d70bec485ceccc5d8fd4f2909edd65b5d5e43d4aca0b5dcee65d519ae98f" +checksum = "6bf7ed1574b699f48bf17caab4e6e54c6d12bc3c006ab33d58b1e227c1c3559f" dependencies = [ "alloy-json-abi", "alloy-sol-macro-input", "const-hex", "heck 0.5.0", - "indexmap 2.6.0", + "indexmap 2.7.0", "proc-macro-error2", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", "syn-solidity", "tiny-keccak", ] [[package]] name = "alloy-sol-macro-input" -version = "0.8.12" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e17f2677369571b976e51ea1430eb41c3690d344fef567b840bfc0b01b6f83a" +checksum = "8c02997ccef5f34f9c099277d4145f183b422938ed5322dc57a089fe9b9ad9ee" dependencies = [ "alloy-json-abi", "const-hex", @@ -555,15 +557,15 @@ dependencies = [ "proc-macro2", "quote", "serde_json", - "syn 2.0.89", + "syn 2.0.90", "syn-solidity", ] [[package]] name = "alloy-sol-type-parser" -version = "0.8.12" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa64d80ae58ffaafdff9d5d84f58d03775f66c84433916dc9a64ed16af5755da" +checksum = "ce13ff37285b0870d0a0746992a4ae48efaf34b766ae4c2640fa15e5305f8e73" dependencies = [ "serde", "winnow", @@ -571,9 +573,9 @@ dependencies = [ [[package]] name = "alloy-sol-types" -version = "0.8.12" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6520d427d4a8eb7aa803d852d7a52ceb0c519e784c292f64bb339e636918cf27" +checksum = "1174cafd6c6d810711b4e00383037bdb458efc4fe3dbafafa16567e0320c54d8" dependencies = [ "alloy-json-abi", "alloy-primitives", @@ -584,9 +586,9 @@ dependencies = [ [[package]] name = "alloy-transport" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be77579633ebbc1266ae6fd7694f75c408beb1aeb6865d0b18f22893c265a061" +checksum = "f99acddb34000d104961897dbb0240298e8b775a7efffb9fda2a1a3efedd65b3" dependencies = [ "alloy-json-rpc", "base64 0.22.1", @@ -596,7 +598,7 @@ dependencies = [ "serde_json", "thiserror 1.0.69", "tokio", - "tower 0.5.1", + "tower 0.5.2", "tracing", "url", "wasmtimer", @@ -604,9 +606,9 @@ dependencies = [ [[package]] name = "alloy-transport-http" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91fd1a5d0827939847983b46f2f79510361f901dc82f8e3c38ac7397af142c6e" +checksum = "5dc013132e34eeadaa0add7e74164c1503988bfba8bae885b32e0918ba85a8a6" dependencies = [ "alloy-transport", "url", @@ -614,15 +616,15 @@ dependencies = [ [[package]] name = "alloy-transport-ws" -version = "0.5.4" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61f27837bb4a1d6c83a28231c94493e814882f0e9058648a97e908a5f3fc9fcf" +checksum = "abd170e600801116d5efe64f74a4fc073dbbb35c807013a7d0a388742aeebba0" dependencies = [ "alloy-pubsub", "alloy-transport", "futures", - "http 1.1.0", - "rustls 0.23.18", + "http 1.2.0", + "rustls 0.23.20", "serde_json", "tokio", "tokio-tungstenite", @@ -702,9 +704,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.93" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" +checksum = "c1fd03a028ef38ba2276dce7e33fcd6369c158a1bca17946c4b1b701891c1ff7" [[package]] name = "approx" @@ -1054,7 +1056,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.6.0", + "indexmap 2.7.0", "lexical-core", "num", "serde", @@ -1181,9 +1183,9 @@ dependencies = [ [[package]] name = "async-graphql" -version = "7.0.11" +version = "7.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ba6d24703c5adc5ba9116901b92ee4e4c0643c01a56c4fd303f3818638d7449" +checksum = "59fd6bd734afb8b6e4d0f84a3e77305ce0a7ccc60d70f6001cb5e1c3f38d8ff1" dependencies = [ "async-graphql-derive", "async-graphql-parser", @@ -1198,12 +1200,11 @@ dependencies = [ "futures-timer", "futures-util", "handlebars", - "http 1.1.0", - "indexmap 2.6.0", + "http 1.2.0", + "indexmap 2.7.0", "mime", "multer", "num-traits", - "once_cell", "pin-project-lite", "regex", "serde", @@ -1217,9 +1218,9 @@ dependencies = [ [[package]] name = "async-graphql-axum" -version = "7.0.11" +version = "7.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9aa80e171205c6d562057fd5a49167c8fbe61f7db2bed6540f6d4f2234d7ff2" +checksum = "ec8c1bb47161c37286e40e2fa58055e97b2a2b6cf1022a6686967e10636fa5d7" dependencies = [ "async-graphql", "async-trait", @@ -1235,9 +1236,9 @@ dependencies = [ [[package]] name = "async-graphql-derive" -version = "7.0.11" +version = "7.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a94c2d176893486bd37cd1b6defadd999f7357bf5804e92f510c08bcf16c538f" +checksum = "ac38b4dd452d529d6c0248b51df23603f0a875770352e26ae8c346ce6c149b3e" dependencies = [ "Inflector", "async-graphql-parser", @@ -1246,15 +1247,15 @@ dependencies = [ "proc-macro2", "quote", "strum", - "syn 2.0.89", + "syn 2.0.90", "thiserror 1.0.69", ] [[package]] name = "async-graphql-parser" -version = "7.0.11" +version = "7.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79272bdbf26af97866e149f05b2b546edb5c00e51b5f916289931ed233e208ad" +checksum = "42d271ddda2f55b13970928abbcbc3423cfc18187c60e8769b48f21a93b7adaa" dependencies = [ "async-graphql-value", "pest", @@ -1264,12 +1265,12 @@ dependencies = [ [[package]] name = "async-graphql-value" -version = "7.0.11" +version = "7.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef5ec94176a12a8cbe985cd73f2e54dc9c702c88c766bdef12f1f3a67cedbee1" +checksum = "aefe909173a037eaf3281b046dc22580b59a38b765d7b8d5116f2ffef098048d" dependencies = [ "bytes", - "indexmap 2.6.0", + "indexmap 2.7.0", "serde", "serde_json", ] @@ -1282,7 +1283,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -1304,7 +1305,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -1315,12 +1316,12 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "async-utils" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", ] @@ -1359,7 +1360,7 @@ checksum = "3c87f3f15e7794432337fc718554eaa4dc8f04c9677a950ffe366f20a162ae42" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -1381,7 +1382,7 @@ dependencies = [ "aws-sdk-sts", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.60.7", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -1412,9 +1413,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.4.3" +version = "1.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a10d5c055aa540164d9561a0e2e74ad30f0dcf7393c3a92f6733ddf9c5762468" +checksum = "b5ac934720fbb46206292d2c75b57e67acfc56fe7dfd34fb9a02334af08409ea" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -1438,9 +1439,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3" -version = "1.62.0" +version = "1.65.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83d3a2854c7490b4c63d2b0e8c3976d628c80afa3045d078a715b2edb2ee4e0a" +checksum = "d3ba2c5c0f2618937ce3d4a5ad574b86775576fa24006bcb3128c6e2cbf3c34e" dependencies = [ "aws-credential-types", "aws-runtime", @@ -1449,7 +1450,7 @@ dependencies = [ "aws-smithy-checksums", "aws-smithy-eventstream", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.61.1", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -1472,15 +1473,15 @@ dependencies = [ [[package]] name = "aws-sdk-secretsmanager" -version = "1.53.0" +version = "1.54.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd3370af2d5d01f9ddf1705d9896cf8c406f444c9dc33abe1d2166d4d50f0b3b" +checksum = "450b2e8cb5f0ee102e4a04c5f8e923aff8187ae9323058707c6cec238cf51699" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.61.1", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -1495,15 +1496,15 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.49.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09677244a9da92172c8dc60109b4a9658597d4d298b188dd0018b6a66b410ca4" +checksum = "05ca43a4ef210894f93096039ef1d6fa4ad3edfabb3be92b80908b9f2e4b4eab" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.61.1", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -1517,15 +1518,15 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.50.0" +version = "1.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fea2f3a8bb3bd10932ae7ad59cc59f65f270fc9183a7e91f501dc5efbef7ee" +checksum = "abaf490c2e48eed0bb8e2da2fb08405647bd7f253996e0f93b981958ea0f73b0" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.61.1", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -1539,15 +1540,15 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.50.0" +version = "1.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ada54e5f26ac246dc79727def52f7f8ed38915cb47781e2a72213957dc3a7d5" +checksum = "b68fde0d69c8bfdc1060ea7da21df3e39f6014da316783336deff0a9ec28f4bf" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.61.1", "aws-smithy-query", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -1562,9 +1563,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.2.5" +version = "1.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5619742a0d8f253be760bfbb8e8e8368c69e3587e4637af5754e488a611499b1" +checksum = "7d3820e0c08d0737872ff3c7c1f21ebbb6693d832312d6152bf18ef50a5471c2" dependencies = [ "aws-credential-types", "aws-smithy-eventstream", @@ -1577,7 +1578,7 @@ dependencies = [ "hex", "hmac", "http 0.2.12", - "http 1.1.0", + "http 1.2.0", "once_cell", "p256", "percent-encoding", @@ -1591,9 +1592,9 @@ dependencies = [ [[package]] name = "aws-smithy-async" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62220bc6e97f946ddd51b5f1361f78996e704677afc518a4ff66b7a72ea1378c" +checksum = "8aa8ff1492fd9fb99ae28e8467af0dbbb7c31512b16fabf1a0f10d7bb6ef78bb" dependencies = [ "futures-util", "pin-project-lite", @@ -1662,6 +1663,15 @@ dependencies = [ "aws-smithy-types", ] +[[package]] +name = "aws-smithy-json" +version = "0.61.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee4e69cc50921eb913c6b662f8d909131bb3e6ad6cb6090d3a39b66fc5c52095" +dependencies = [ + "aws-smithy-types", +] + [[package]] name = "aws-smithy-query" version = "0.60.7" @@ -1674,9 +1684,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.7.3" +version = "1.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be28bd063fa91fd871d131fc8b68d7cd4c5fa0869bea68daca50dcb1cbd76be2" +checksum = "431a10d0e07e09091284ef04453dae4069283aa108d209974d67e77ae1caa658" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -1689,7 +1699,7 @@ dependencies = [ "http-body 0.4.6", "http-body 1.0.1", "httparse", - "hyper 0.14.31", + "hyper 0.14.32", "hyper-rustls 0.24.2", "once_cell", "pin-project-lite", @@ -1709,7 +1719,7 @@ dependencies = [ "aws-smithy-types", "bytes", "http 0.2.12", - "http 1.1.0", + "http 1.2.0", "pin-project-lite", "tokio", "tracing", @@ -1718,16 +1728,16 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.2.9" +version = "1.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fbd94a32b3a7d55d3806fe27d98d3ad393050439dd05eb53ece36ec5e3d3510" +checksum = "8ecbf4d5dfb169812e2b240a4350f15ad3c6b03a54074e5712818801615f2dc5" dependencies = [ "base64-simd", "bytes", "bytes-utils", "futures-core", "http 0.2.12", - "http 1.1.0", + "http 1.2.0", "http-body 0.4.6", "http-body 1.0.1", "http-body-util", @@ -1776,10 +1786,10 @@ dependencies = [ "base64 0.22.1", "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", - "hyper 1.5.1", + "hyper 1.5.2", "hyper-util", "itoa", "matchit", @@ -1794,10 +1804,10 @@ dependencies = [ "serde_path_to_error", "serde_urlencoded", "sha1", - "sync_wrapper 1.0.2", + "sync_wrapper", "tokio", "tokio-tungstenite", - "tower 0.5.1", + "tower 0.5.2", "tower-layer", "tower-service", "tracing", @@ -1812,13 +1822,13 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", "mime", "pin-project-lite", "rustversion", - "sync_wrapper 1.0.2", + "sync_wrapper", "tower-layer", "tower-service", "tracing", @@ -1836,7 +1846,7 @@ dependencies = [ "fastrand", "futures-util", "headers", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", "mime", @@ -1845,7 +1855,7 @@ dependencies = [ "serde", "tokio", "tokio-util", - "tower 0.5.1", + "tower 0.5.2", "tower-layer", "tower-service", ] @@ -1913,9 +1923,9 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "bigdecimal" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f850665a0385e070b64c38d2354e6c104c8479c59868d1e48a0c13ee2c7a1c1" +checksum = "7f31f3af01c5c65a07985c804d3366560e6fa7883d640a122819b14ec327482c" dependencies = [ "autocfg", "libm", @@ -1983,9 +1993,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.4" +version = "1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" +checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e" dependencies = [ "arrayref", "arrayvec", @@ -2053,9 +2063,9 @@ checksum = "bf88ba1141d185c399bee5288d850d63b8369520c1eafc32a0430b5b6c287bf4" [[package]] name = "bstr" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a68f1f47cdf0ec8ee4b941b2eee2a80cb796db73118c0dd09ac63fbe405be22" +checksum = "786a307d683a5bf92e6fd5fd69a7eb613751668d1d8d67d802846dfe367c62c8" dependencies = [ "memchr", "regex-automata 0.4.9", @@ -2082,9 +2092,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" dependencies = [ "serde", ] @@ -2159,9 +2169,9 @@ dependencies = [ [[package]] name = "cargo-platform" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24b1f0365a6c6bb4020cd05806fd0d33c44d38046b8bd7f0e40814b9763cabfc" +checksum = "e35af189006b9c0f00a064685c727031e3ed2d8020f7ba284d78cc2671bd36ea" dependencies = [ "serde", ] @@ -2174,7 +2184,7 @@ checksum = "2d886547e41f740c616ae73108f6eb70afe6d940c7bc697cb30f13daec073037" dependencies = [ "camino", "cargo-platform", - "semver 1.0.23", + "semver 1.0.24", "serde", "serde_json", "thiserror 1.0.69", @@ -2188,9 +2198,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.1" +version = "1.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" +checksum = "9157bbaa6b165880c27a4293a474c91cdcf265cc68cc829bf10be0964a391caf" dependencies = [ "jobserver", "libc", @@ -2223,9 +2233,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" dependencies = [ "android-tzdata", "iana-time-zone", @@ -2327,9 +2337,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.21" +version = "4.5.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb3b4b9e5a7c7514dfa52869339ee98b3156b0bfb4e8a77c4ff4babb64b1604f" +checksum = "3135e7ec2ef7b10c6ed8950f0f792ed96ee093fa088608f1c76e569722700c84" dependencies = [ "clap_builder", "clap_derive", @@ -2337,9 +2347,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.21" +version = "4.5.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b17a95aa67cc7b5ebd32aa5370189aa0d79069ef1c64ce893bd30fb24bff20ec" +checksum = "30582fc632330df2bd26877bde0c1f4470d57c582bbc070376afcd04d8cb4838" dependencies = [ "anstream", "anstyle", @@ -2349,9 +2359,9 @@ dependencies = [ [[package]] name = "clap_complete" -version = "4.5.38" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9647a559c112175f17cf724dc72d3645680a883c58481332779192b0d8e7a01" +checksum = "ac2e663e3e3bed2d32d065a8404024dad306e699a04263ec59919529f803aee9" dependencies = [ "clap", ] @@ -2365,14 +2375,14 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "clap_lex" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afb84c814227b90d6895e01398aee0d8033c00e7466aca416fb6a8e0eb19d8a7" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "clipboard-win" @@ -2421,22 +2431,22 @@ dependencies = [ [[package]] name = "console" -version = "0.15.8" +version = "0.15.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b" dependencies = [ - "encode_unicode 0.3.6", - "lazy_static", + "encode_unicode", "libc", - "unicode-width 0.1.14", - "windows-sys 0.52.0", + "once_cell", + "unicode-width 0.2.0", + "windows-sys 0.59.0", ] [[package]] name = "const-hex" -version = "1.13.2" +version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "487981fa1af147182687064d0a2c336586d337a606595ced9ffb0c685c250c73" +checksum = "4b0485bab839b018a8f1723fc5391819fea5f8f0f32288ef8a735fd096b6160c" dependencies = [ "cfg-if", "cpufeatures", @@ -2479,7 +2489,7 @@ checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" [[package]] name = "container-runtime" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "cfg-if", @@ -2492,7 +2502,7 @@ dependencies = [ "tempfile", "test-group", "test-log", - "thiserror 1.0.69", + "thiserror 2.0.8", "tokio", "tracing", "url", @@ -2644,18 +2654,18 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.13" +version = "0.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-deque" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ "crossbeam-epoch", "crossbeam-utils", @@ -2672,18 +2682,18 @@ dependencies = [ [[package]] name = "crossbeam-queue" -version = "0.3.11" +version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" -version = "0.8.20" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crossterm" @@ -2845,7 +2855,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -2875,7 +2885,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -2886,7 +2896,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -2917,7 +2927,7 @@ checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" [[package]] name = "database-common" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-stream", "async-trait", @@ -2937,24 +2947,24 @@ dependencies = [ "serde_json", "sha2", "sqlx", - "thiserror 1.0.69", + "thiserror 2.0.8", "tokio", "tracing", ] [[package]] name = "database-common-macros" -version = "0.209.0" +version = "0.213.1" dependencies = [ "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "datafusion" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dae5f2abc725737d6e87b6d348a5aa2d0a77e4cf873045f004546da946e6e619" +checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" dependencies = [ "ahash", "arrow", @@ -2987,7 +2997,7 @@ dependencies = [ "glob", "half", "hashbrown 0.14.5", - "indexmap 2.6.0", + "indexmap 2.7.0", "itertools 0.13.0", "log", "num_cpus", @@ -3009,9 +3019,9 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "998761705551f11ffa4ee692cc285b44eb1def6e0d28c4eaf5041b9e2810dc1e" +checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" dependencies = [ "arrow-schema", "async-trait", @@ -3024,9 +3034,9 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11986f191e88d950f10a5cc512a598afba27d92e04a0201215ad60785005115a" +checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" dependencies = [ "ahash", "arrow", @@ -3036,6 +3046,7 @@ dependencies = [ "chrono", "half", "hashbrown 0.14.5", + "indexmap 2.7.0", "instant", "libc", "num_cpus", @@ -3048,9 +3059,9 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "694c9d7ea1b82f95768215c4cb5c2d5c613690624e832a7ee64be563139d582f" +checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" dependencies = [ "log", "tokio", @@ -3058,9 +3069,9 @@ dependencies = [ [[package]] name = "datafusion-ethers" -version = "42.1.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab3507a26e64835d679ad3dec8c39331c843406313ebce3d92ce844042d6b9" +checksum = "1f33281d97d262dc10a864a696b6c04bd456e3dd4a2e0998775f32875cc9c38a" dependencies = [ "alloy", "async-stream", @@ -3068,16 +3079,16 @@ dependencies = [ "datafusion", "futures", "serde_json", - "thiserror 1.0.69", + "thiserror 2.0.8", "tokio", "tracing", ] [[package]] name = "datafusion-execution" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30b4cedcd98151e0a297f34021b6b232ff0ebc0f2f18ea5e7446b5ebda99b1a1" +checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" dependencies = [ "arrow", "chrono", @@ -3096,9 +3107,9 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8dd114dc0296cacaee98ad3165724529fcca9a65b2875abcd447b9cc02b2b74" +checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" dependencies = [ "ahash", "arrow", @@ -3108,7 +3119,9 @@ dependencies = [ "datafusion-common", "datafusion-expr-common", "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", "datafusion-physical-expr-common", + "indexmap 2.7.0", "paste", "serde_json", "sqlparser", @@ -3118,20 +3131,21 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d1ba2bb018218d9260bbd7de6a46a20f61b93d4911dba8aa07735625004c4fb" +checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" dependencies = [ "arrow", "datafusion-common", + "itertools 0.13.0", "paste", ] [[package]] name = "datafusion-functions" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "547cb780a4ac51fd8e52c0fb9188bc16cea4e35aebf6c454bda0b82a7a417304" +checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" dependencies = [ "arrow", "arrow-buffer", @@ -3156,9 +3170,9 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e68cf5aa7ebcac08bd04bb709a9a6d4963eafd227da62b628133bc509c40f5a0" +checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" dependencies = [ "ahash", "arrow", @@ -3170,16 +3184,16 @@ dependencies = [ "datafusion-physical-expr", "datafusion-physical-expr-common", "half", + "indexmap 2.7.0", "log", "paste", - "sqlparser", ] [[package]] name = "datafusion-functions-aggregate-common" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2285d080dfecdfb8605b0ab2f1a41e2473208dc8e9bd6f5d1dbcfe97f517e6f" +checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" dependencies = [ "ahash", "arrow", @@ -3191,9 +3205,9 @@ dependencies = [ [[package]] name = "datafusion-functions-json" -version = "0.42.0" +version = "0.43.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "532feb5c208fd1708f4d93b1984fb7a7ed678a9f0e6f799af97118d7c4e863a1" +checksum = "744cf7ae121977c453586f3f098239e50da6b0cfcf2de3ccb9338a4896f97dc0" dependencies = [ "datafusion", "jiter", @@ -3203,9 +3217,9 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b6ffbbb7cf7bf0c0e05eb6207023fef341cac83a593a5365a6fc83803c572a9" +checksum = "fca25bbb87323716d05e54114666e942172ccca23c5a507e9c7851db6e965317" dependencies = [ "arrow", "arrow-array", @@ -3226,41 +3240,54 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e78d30ebd6e9f74d4aeddec32744f5a18b5f9584591bc586fb5259c4848bac5" +checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" dependencies = [ "datafusion-common", "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", "datafusion-physical-expr-common", "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "43.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", ] [[package]] name = "datafusion-odata" -version = "42.1.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37ab54b5cb73aebdea15ab97319c08ab2ed8b126e99f88169e9eb7f98533d96d" +checksum = "b2aec0659fb8f95aa9e6e97fb1233216cdd8508e04dd35db70e7f96755eb0f56" dependencies = [ "async-trait", "axum", "chrono", "datafusion", - "http 1.1.0", - "hyper 1.5.1", + "http 1.2.0", + "hyper 1.5.2", "odata-params", - "quick-xml", + "quick-xml 0.37.1", "regex", "serde", - "thiserror 1.0.69", + "thiserror 2.0.8", "tracing", ] [[package]] name = "datafusion-optimizer" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be172c44bf344df707e0c041fa3f41e6dc5fb0976f539c68bc442bca150ee58c" +checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" dependencies = [ "arrow", "async-trait", @@ -3269,7 +3296,7 @@ dependencies = [ "datafusion-expr", "datafusion-physical-expr", "hashbrown 0.14.5", - "indexmap 2.6.0", + "indexmap 2.7.0", "itertools 0.13.0", "log", "paste", @@ -3278,9 +3305,9 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43b86b7fa0b8161c49b0f005b0df193fc6d9b65ceec675f155422cda5d1583ca" +checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" dependencies = [ "ahash", "arrow", @@ -3289,30 +3316,26 @@ dependencies = [ "arrow-ord", "arrow-schema", "arrow-string", - "base64 0.22.1", "chrono", "datafusion-common", - "datafusion-execution", "datafusion-expr", "datafusion-expr-common", "datafusion-functions-aggregate-common", "datafusion-physical-expr-common", "half", "hashbrown 0.14.5", - "hex", - "indexmap 2.6.0", + "indexmap 2.7.0", "itertools 0.13.0", "log", "paste", "petgraph", - "regex", ] [[package]] name = "datafusion-physical-expr-common" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "242ba8a26351d9ca16295814c46743b0d1b00ec372174bdfbba991d0953dd596" +checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" dependencies = [ "ahash", "arrow", @@ -3324,13 +3347,15 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25ca088eb904bf1cfc9c5e5653110c70a6eaba43164085a9d180b35b77ce3b8b" +checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" dependencies = [ + "arrow", "arrow-schema", "datafusion-common", "datafusion-execution", + "datafusion-expr-common", "datafusion-physical-expr", "datafusion-physical-plan", "itertools 0.13.0", @@ -3338,9 +3363,9 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4989a53b824abc759685eb643f4d604c2fc2fea4e2c309ac3473bea263ecbbeb" +checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" dependencies = [ "ahash", "arrow", @@ -3354,14 +3379,14 @@ dependencies = [ "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate", "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", "futures", "half", "hashbrown 0.14.5", - "indexmap 2.6.0", + "indexmap 2.7.0", "itertools 0.13.0", "log", "once_cell", @@ -3373,15 +3398,16 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "42.2.0" +version = "43.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66b9b75b9da10ed656073ac0553708f17eb8fa5a7b065ef9848914c93150ab9e" +checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" dependencies = [ "arrow", "arrow-array", "arrow-schema", "datafusion-common", "datafusion-expr", + "indexmap 2.7.0", "log", "regex", "sqlparser", @@ -3465,7 +3491,7 @@ checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -3485,7 +3511,7 @@ checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", "unicode-xid", ] @@ -3524,24 +3550,24 @@ dependencies = [ [[package]] name = "dill" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d90bbdacf0338c67d313d20fcfbed9e1ea59408130fada1d60f058502a8638" +checksum = "43c4bb6baf8208749875ef15410dfab2547aa62ec31bcdddf79d8d6434a945e8" dependencies = [ "dill-impl", "multimap", - "thiserror 1.0.69", + "thiserror 2.0.8", ] [[package]] name = "dill-impl" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cb2553bba30bdf737ada37f4d14c1a35f29bb9203f3e5a40410a06591e37506" +checksum = "02401a1881492067da06bda737235c3325c35c35f48dce2eba4e85cefdfac674" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -3594,7 +3620,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -3738,12 +3764,6 @@ dependencies = [ "log", ] -[[package]] -name = "encode_unicode" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" - [[package]] name = "encode_unicode" version = "1.0.0" @@ -3783,12 +3803,12 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "enum-variants" -version = "0.209.0" +version = "0.213.1" [[package]] name = "env_filter" @@ -3819,12 +3839,12 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -3857,14 +3877,14 @@ dependencies = [ [[package]] name = "event-sourcing" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-stream", "async-trait", "event-sourcing-macros", "futures", "internal-error", - "thiserror 1.0.69", + "thiserror 2.0.8", "tokio", "tokio-stream", "tracing", @@ -3872,10 +3892,10 @@ dependencies = [ [[package]] name = "event-sourcing-macros" -version = "0.209.0" +version = "0.213.1" dependencies = [ "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -3889,9 +3909,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "fastrlp" @@ -3904,6 +3924,17 @@ dependencies = [ "bytes", ] +[[package]] +name = "fastrlp" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce8dba4714ef14b8274c371879b175aa55b16b30f269663f19d576f380018dc4" +dependencies = [ + "arrayvec", + "auto_impl", + "bytes", +] + [[package]] name = "fd-lock" version = "4.0.2" @@ -4108,7 +4139,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -4304,7 +4335,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.6.0", + "indexmap 2.7.0", "slab", "tokio", "tokio-util", @@ -4322,8 +4353,8 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http 1.1.0", - "indexmap 2.6.0", + "http 1.2.0", + "indexmap 2.7.0", "slab", "tokio", "tokio-util", @@ -4407,7 +4438,7 @@ dependencies = [ "base64 0.21.7", "bytes", "headers-core", - "http 1.1.0", + "http 1.2.0", "httpdate", "mime", "sha1", @@ -4419,7 +4450,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54b4a22553d4242c49fddb9ba998a99962b5cc6f22cb5a3482bec22522403ce4" dependencies = [ - "http 1.1.0", + "http 1.2.0", ] [[package]] @@ -4461,6 +4492,51 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46" +[[package]] +name = "hickory-proto" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "447afdcdb8afb9d0a852af6dc65d9b285ce720ed7a59e42a8bf2e931c67bc1b5" +dependencies = [ + "async-trait", + "cfg-if", + "data-encoding", + "enum-as-inner", + "futures-channel", + "futures-io", + "futures-util", + "idna", + "ipnet", + "once_cell", + "rand", + "thiserror 1.0.69", + "tinyvec", + "tokio", + "tracing", + "url", +] + +[[package]] +name = "hickory-resolver" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a2e2aba9c389ce5267d31cf1e4dace82390ae276b0b364ea55630b1fa1b44b4" +dependencies = [ + "cfg-if", + "futures-util", + "hickory-proto", + "ipconfig", + "lru-cache", + "once_cell", + "parking_lot", + "rand", + "resolv-conf", + "smallvec", + "thiserror 1.0.69", + "tokio", + "tracing", +] + [[package]] name = "hkdf" version = "0.12.4" @@ -4481,11 +4557,11 @@ dependencies = [ [[package]] name = "home" -version = "0.5.9" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -4512,9 +4588,9 @@ dependencies = [ [[package]] name = "http" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" dependencies = [ "bytes", "fnv", @@ -4539,7 +4615,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.1.0", + "http 1.2.0", ] [[package]] @@ -4550,32 +4626,32 @@ checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "pin-project-lite", ] [[package]] name = "http-common" -version = "0.209.0" +version = "0.213.1" dependencies = [ "axum", - "http 1.1.0", + "http 1.2.0", "internal-error", "kamu-core", "serde", "serde_json", "strum", - "thiserror 1.0.69", + "thiserror 2.0.8", "tracing", "utoipa", ] [[package]] name = "http-range-header" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08a397c49fec283e3d6211adbe480be95aae5f304cfb923e9970e08956d5168a" +checksum = "9171a2ea8a68358193d15dd5d70c1c10a2afc3e7e4c5bc92bc9f025cebd7359c" [[package]] name = "httparse" @@ -4606,9 +4682,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.31" +version = "0.14.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c08302e8fa335b151b788c775ff56e7a03ae64ff85c548ee820fecb70356e85" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" dependencies = [ "bytes", "futures-channel", @@ -4630,15 +4706,15 @@ dependencies = [ [[package]] name = "hyper" -version = "1.5.1" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97818827ef4f364230e16705d4706e2897df2bb60617d6ca15d598025a3c481f" +checksum = "256fb8d4bd6413123cc9d91832d78325c48ff41677595be797d90f42969beae0" dependencies = [ "bytes", "futures-channel", "futures-util", "h2 0.4.7", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "httparse", "httpdate", @@ -4657,7 +4733,7 @@ checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", "http 0.2.12", - "hyper 0.14.31", + "hyper 0.14.32", "log", "rustls 0.21.12", "rustls-native-certs 0.6.3", @@ -4672,14 +4748,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" dependencies = [ "futures-util", - "http 1.1.0", - "hyper 1.5.1", + "http 1.2.0", + "hyper 1.5.2", "hyper-util", - "rustls 0.23.18", + "rustls 0.23.20", "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.0", + "tokio-rustls 0.26.1", "tower-service", "webpki-roots", ] @@ -4690,7 +4766,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper 1.5.1", + "hyper 1.5.2", "hyper-util", "pin-project-lite", "tokio", @@ -4706,9 +4782,9 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", - "hyper 1.5.1", + "hyper 1.5.2", "pin-project-lite", "socket2", "tokio", @@ -4854,7 +4930,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -4863,16 +4939,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" -[[package]] -name = "idna" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c" -dependencies = [ - "unicode-bidi", - "unicode-normalization", -] - [[package]] name = "idna" version = "1.0.3" @@ -4911,7 +4977,7 @@ checksum = "a0eb5a3343abf848c0984fe4604b2b105da9539376e24fc0a3b0007411ae4fd9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -4934,7 +5000,7 @@ dependencies = [ "libflate", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -4950,9 +5016,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -4986,7 +5052,7 @@ checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" [[package]] name = "init-on-startup" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "database-common", @@ -4995,7 +5061,7 @@ dependencies = [ "paste", "petgraph", "test-log", - "thiserror 1.0.69", + "thiserror 2.0.8", "tokio", "tracing", ] @@ -5029,9 +5095,9 @@ checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" [[package]] name = "internal-error" -version = "0.209.0" +version = "0.213.1" dependencies = [ - "thiserror 1.0.69", + "thiserror 2.0.8", ] [[package]] @@ -5089,9 +5155,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "540654e97a3f4470a492cd30ff187bc95d89557a903a2bbf112e2fae98104ef2" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" [[package]] name = "jiter" @@ -5140,10 +5206,11 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.72" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -5177,7 +5244,7 @@ dependencies = [ [[package]] name = "kamu" -version = "0.209.0" +version = "0.213.1" dependencies = [ "alloy", "async-recursion", @@ -5209,7 +5276,8 @@ dependencies = [ "futures", "glob", "hex", - "http 1.1.0", + "hickory-resolver", + "http 1.2.0", "indoc 2.0.5", "init-on-startup", "internal-error", @@ -5221,17 +5289,19 @@ dependencies = [ "kamu-core", "kamu-data-utils", "kamu-datasets", + "kamu-datasets-inmem", "kamu-datasets-services", "kamu-ingest-datafusion", "lazy_static", "libc", + "like", "messaging-outbox", "mockall", "nanoid", "object_store", "oop", "opendatafabric", - "petgraph", + "pin-project", "pretty_assertions", "rand", "random-names", @@ -5247,22 +5317,22 @@ dependencies = [ "tempfile", "test-group", "test-log", - "thiserror 1.0.69", + "testing_logger", + "thiserror 2.0.8", "time-source", "tokio", "tokio-stream", "tokio-util", - "tower 0.5.1", + "tower 0.5.2", "tower-http", "tracing", - "trust-dns-resolver", "url", "zip", ] [[package]] name = "kamu-accounts" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "base32", @@ -5281,13 +5351,13 @@ dependencies = [ "serde", "serde_with", "sqlx", - "thiserror 1.0.69", + "thiserror 2.0.8", "uuid", ] [[package]] name = "kamu-accounts-inmem" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "chrono", @@ -5307,7 +5377,7 @@ dependencies = [ [[package]] name = "kamu-accounts-mysql" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-stream", "async-trait", @@ -5329,7 +5399,7 @@ dependencies = [ [[package]] name = "kamu-accounts-postgres" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-stream", "async-trait", @@ -5351,7 +5421,7 @@ dependencies = [ [[package]] name = "kamu-accounts-repo-tests" -version = "0.209.0" +version = "0.213.1" dependencies = [ "argon2", "chrono", @@ -5367,7 +5437,7 @@ dependencies = [ [[package]] name = "kamu-accounts-services" -version = "0.209.0" +version = "0.213.1" dependencies = [ "argon2", "async-trait", @@ -5393,7 +5463,7 @@ dependencies = [ [[package]] name = "kamu-accounts-sqlite" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-stream", "async-trait", @@ -5415,7 +5485,7 @@ dependencies = [ [[package]] name = "kamu-adapter-auth-oso-rebac" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "database-common", @@ -5445,7 +5515,7 @@ dependencies = [ [[package]] name = "kamu-adapter-flight-sql" -version = "0.209.0" +version = "0.213.1" dependencies = [ "arrow-flight", "async-trait", @@ -5467,7 +5537,7 @@ dependencies = [ [[package]] name = "kamu-adapter-graphql" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-graphql", "async-trait", @@ -5509,7 +5579,6 @@ dependencies = [ "tempfile", "test-group", "test-log", - "thiserror 1.0.69", "time-source", "tokio", "tokio-stream", @@ -5520,7 +5589,7 @@ dependencies = [ [[package]] name = "kamu-adapter-http" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "aws-sdk-s3", @@ -5540,7 +5609,7 @@ dependencies = [ "fs_extra", "futures", "headers", - "http 1.1.0", + "http 1.2.0", "http-body-util", "http-common", "indoc 2.0.5", @@ -5552,6 +5621,7 @@ dependencies = [ "kamu-accounts-services", "kamu-core", "kamu-data-utils", + "kamu-datasets-inmem", "kamu-datasets-services", "kamu-ingest-datafusion", "messaging-outbox", @@ -5569,13 +5639,13 @@ dependencies = [ "tempfile", "test-group", "test-log", - "thiserror 1.0.69", + "thiserror 2.0.8", "time-source", "tokio", "tokio-stream", "tokio-tungstenite", "tokio-util", - "tower 0.5.1", + "tower 0.5.2", "tower-http", "tracing", "url", @@ -5586,23 +5656,23 @@ dependencies = [ [[package]] name = "kamu-adapter-oauth" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "dill", - "http 1.1.0", + "http 1.2.0", "internal-error", "kamu-accounts", "opendatafabric", "reqwest", "serde", "serde_json", - "thiserror 1.0.69", + "thiserror 2.0.8", ] [[package]] name = "kamu-adapter-odata" -version = "0.209.0" +version = "0.213.1" dependencies = [ "axum", "chrono", @@ -5612,7 +5682,7 @@ dependencies = [ "datafusion-odata", "dill", "futures", - "http 1.1.0", + "http 1.2.0", "http-common", "indoc 2.0.5", "internal-error", @@ -5637,19 +5707,19 @@ dependencies = [ [[package]] name = "kamu-auth-rebac" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "internal-error", "opendatafabric", "sqlx", "strum", - "thiserror 1.0.69", + "thiserror 2.0.8", ] [[package]] name = "kamu-auth-rebac-inmem" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "database-common-macros", @@ -5663,7 +5733,7 @@ dependencies = [ [[package]] name = "kamu-auth-rebac-postgres" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "database-common", @@ -5679,7 +5749,7 @@ dependencies = [ [[package]] name = "kamu-auth-rebac-repo-tests" -version = "0.209.0" +version = "0.213.1" dependencies = [ "dill", "kamu-auth-rebac", @@ -5687,7 +5757,7 @@ dependencies = [ [[package]] name = "kamu-auth-rebac-services" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "database-common", @@ -5711,7 +5781,7 @@ dependencies = [ [[package]] name = "kamu-auth-rebac-sqlite" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "database-common", @@ -5727,7 +5797,7 @@ dependencies = [ [[package]] name = "kamu-cli" -version = "0.209.0" +version = "0.213.1" dependencies = [ "arrow-flight", "async-graphql", @@ -5753,7 +5823,7 @@ dependencies = [ "futures", "glob", "hex", - "http 1.1.0", + "http 1.2.0", "http-common", "humansize", "indicatif", @@ -5824,13 +5894,13 @@ dependencies = [ "tempfile", "test-group", "test-log", - "thiserror 1.0.69", + "thiserror 2.0.8", "time-source", "tokio", "tokio-stream", "tokio-util", "tonic", - "tower 0.5.1", + "tower 0.5.2", "tower-http", "tracing", "tracing-appender", @@ -5849,7 +5919,7 @@ dependencies = [ [[package]] name = "kamu-cli-e2e-common" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "chrono", @@ -5872,22 +5942,22 @@ dependencies = [ "serde_urlencoded", "serde_yaml", "sqlx", - "thiserror 1.0.69", + "thiserror 2.0.8", "tokio", "tokio-retry", ] [[package]] name = "kamu-cli-e2e-common-macros" -version = "0.209.0" +version = "0.213.1" dependencies = [ "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "kamu-cli-e2e-inmem" -version = "0.209.0" +version = "0.213.1" dependencies = [ "indoc 2.0.5", "kamu-cli-e2e-common", @@ -5899,7 +5969,7 @@ dependencies = [ [[package]] name = "kamu-cli-e2e-mysql" -version = "0.209.0" +version = "0.213.1" dependencies = [ "indoc 2.0.5", "kamu-cli-e2e-common", @@ -5912,7 +5982,7 @@ dependencies = [ [[package]] name = "kamu-cli-e2e-postgres" -version = "0.209.0" +version = "0.213.1" dependencies = [ "indoc 2.0.5", "kamu-cli-e2e-common", @@ -5925,7 +5995,7 @@ dependencies = [ [[package]] name = "kamu-cli-e2e-repo-tests" -version = "0.209.0" +version = "0.213.1" dependencies = [ "chrono", "http-common", @@ -5948,7 +6018,7 @@ dependencies = [ [[package]] name = "kamu-cli-e2e-sqlite" -version = "0.209.0" +version = "0.213.1" dependencies = [ "indoc 2.0.5", "kamu-cli-e2e-common", @@ -5961,7 +6031,7 @@ dependencies = [ [[package]] name = "kamu-cli-puppet" -version = "0.209.0" +version = "0.213.1" dependencies = [ "assert_cmd", "async-trait", @@ -5980,7 +6050,7 @@ dependencies = [ [[package]] name = "kamu-core" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "bytes", @@ -5989,7 +6059,7 @@ dependencies = [ "datafusion", "dill", "futures", - "http 1.1.0", + "http 1.2.0", "internal-error", "kamu-accounts", "kamu-datasets", @@ -6002,16 +6072,17 @@ dependencies = [ "pin-project", "serde", "serde_with", - "thiserror 1.0.69", + "thiserror 2.0.8", "tokio", "tokio-stream", + "tracing", "url", "utoipa", ] [[package]] name = "kamu-data-utils" -version = "0.209.0" +version = "0.213.1" dependencies = [ "arrow", "arrow-digest", @@ -6027,7 +6098,7 @@ dependencies = [ "serde_json", "sha3", "test-log", - "thiserror 1.0.69", + "thiserror 2.0.8", "tokio", "tracing", "url", @@ -6035,7 +6106,7 @@ dependencies = [ [[package]] name = "kamu-datafusion-cli" -version = "0.209.0" +version = "0.213.1" dependencies = [ "arrow", "async-trait", @@ -6055,7 +6126,7 @@ dependencies = [ [[package]] name = "kamu-datasets" -version = "0.209.0" +version = "0.213.1" dependencies = [ "aes-gcm", "async-trait", @@ -6069,14 +6140,14 @@ dependencies = [ "serde", "serde_with", "sqlx", - "thiserror 1.0.69", + "thiserror 2.0.8", "tokio-stream", "uuid", ] [[package]] name = "kamu-datasets-inmem" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "database-common", @@ -6091,12 +6162,13 @@ dependencies = [ "test-group", "test-log", "tokio", + "tokio-stream", "uuid", ] [[package]] name = "kamu-datasets-postgres" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-stream", "async-trait", @@ -6117,12 +6189,13 @@ dependencies = [ [[package]] name = "kamu-datasets-repo-tests" -version = "0.209.0" +version = "0.213.1" dependencies = [ "chrono", "database-common", "dill", "futures", + "itertools 0.13.0", "kamu-accounts", "kamu-datasets", "opendatafabric", @@ -6132,7 +6205,7 @@ dependencies = [ [[package]] name = "kamu-datasets-services" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-stream", "async-trait", @@ -6140,6 +6213,7 @@ dependencies = [ "database-common", "dill", "futures", + "indoc 2.0.5", "init-on-startup", "internal-error", "kamu", @@ -6147,21 +6221,25 @@ dependencies = [ "kamu-accounts-inmem", "kamu-core", "kamu-datasets", + "kamu-datasets-inmem", "messaging-outbox", "mockall", + "oop", "opendatafabric", + "petgraph", "pretty_assertions", "secrecy", "test-log", "time-source", "tokio", + "tokio-stream", "tracing", "uuid", ] [[package]] name = "kamu-datasets-sqlite" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-stream", "async-trait", @@ -6182,7 +6260,7 @@ dependencies = [ [[package]] name = "kamu-flow-system" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "chrono", @@ -6202,13 +6280,13 @@ dependencies = [ "serde_with", "sqlx", "strum", - "thiserror 1.0.69", + "thiserror 2.0.8", "tokio-stream", ] [[package]] name = "kamu-flow-system-inmem" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "chrono", @@ -6228,7 +6306,7 @@ dependencies = [ [[package]] name = "kamu-flow-system-postgres" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-stream", "async-trait", @@ -6249,7 +6327,7 @@ dependencies = [ [[package]] name = "kamu-flow-system-repo-tests" -version = "0.209.0" +version = "0.213.1" dependencies = [ "chrono", "database-common", @@ -6262,7 +6340,7 @@ dependencies = [ [[package]] name = "kamu-flow-system-services" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-stream", "async-trait", @@ -6280,6 +6358,8 @@ dependencies = [ "kamu-accounts-inmem", "kamu-accounts-services", "kamu-core", + "kamu-datasets-inmem", + "kamu-datasets-services", "kamu-flow-system", "kamu-flow-system-inmem", "kamu-task-system", @@ -6299,7 +6379,7 @@ dependencies = [ [[package]] name = "kamu-flow-system-sqlite" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-stream", "async-trait", @@ -6320,7 +6400,7 @@ dependencies = [ [[package]] name = "kamu-ingest-datafusion" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "chrono", @@ -6343,7 +6423,7 @@ dependencies = [ "tempfile", "test-group", "test-log", - "thiserror 1.0.69", + "thiserror 2.0.8", "tokio", "tracing", "walkdir", @@ -6352,7 +6432,7 @@ dependencies = [ [[package]] name = "kamu-messaging-outbox-inmem" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "database-common-macros", @@ -6368,7 +6448,7 @@ dependencies = [ [[package]] name = "kamu-messaging-outbox-postgres" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-stream", "async-trait", @@ -6386,7 +6466,7 @@ dependencies = [ [[package]] name = "kamu-messaging-outbox-repo-tests" -version = "0.209.0" +version = "0.213.1" dependencies = [ "chrono", "dill", @@ -6399,7 +6479,7 @@ dependencies = [ [[package]] name = "kamu-messaging-outbox-sqlite" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-stream", "async-trait", @@ -6418,7 +6498,7 @@ dependencies = [ [[package]] name = "kamu-repo-tools" -version = "0.209.0" +version = "0.213.1" dependencies = [ "chrono", "clap", @@ -6427,13 +6507,13 @@ dependencies = [ "grep-searcher", "indoc 2.0.5", "regex", - "semver 1.0.23", + "semver 1.0.24", "toml", ] [[package]] name = "kamu-task-system" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "chrono", @@ -6446,13 +6526,13 @@ dependencies = [ "opendatafabric", "serde", "sqlx", - "thiserror 1.0.69", + "thiserror 2.0.8", "tokio-stream", ] [[package]] name = "kamu-task-system-inmem" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "database-common", @@ -6469,7 +6549,7 @@ dependencies = [ [[package]] name = "kamu-task-system-postgres" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-stream", "async-trait", @@ -6489,7 +6569,7 @@ dependencies = [ [[package]] name = "kamu-task-system-repo-tests" -version = "0.209.0" +version = "0.213.1" dependencies = [ "chrono", "database-common", @@ -6501,7 +6581,7 @@ dependencies = [ [[package]] name = "kamu-task-system-services" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "chrono", @@ -6532,7 +6612,7 @@ dependencies = [ [[package]] name = "kamu-task-system-sqlite" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-stream", "async-trait", @@ -6609,13 +6689,13 @@ dependencies = [ [[package]] name = "lexical-core" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0431c65b318a590c1de6b8fd6e72798c92291d27762d94c9e6c37ed7a73d8458" +checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" dependencies = [ - "lexical-parse-float 1.0.2", - "lexical-parse-integer 1.0.2", - "lexical-util 1.0.3", + "lexical-parse-float 1.0.5", + "lexical-parse-integer 1.0.5", + "lexical-util 1.0.6", "lexical-write-float", "lexical-write-integer", ] @@ -6633,12 +6713,12 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb17a4bdb9b418051aa59d41d65b1c9be5affab314a872e5ad7f06231fb3b4e0" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" dependencies = [ - "lexical-parse-integer 1.0.2", - "lexical-util 1.0.3", + "lexical-parse-integer 1.0.5", + "lexical-util 1.0.6", "static_assertions", ] @@ -6654,11 +6734,11 @@ dependencies = [ [[package]] name = "lexical-parse-integer" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5df98f4a4ab53bf8b175b363a34c7af608fe31f93cc1fb1bf07130622ca4ef61" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" dependencies = [ - "lexical-util 1.0.3", + "lexical-util 1.0.6", "static_assertions", ] @@ -6673,39 +6753,39 @@ dependencies = [ [[package]] name = "lexical-util" -version = "1.0.3" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85314db53332e5c192b6bca611fb10c114a80d1b831ddac0af1e9be1b9232ca0" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" dependencies = [ "static_assertions", ] [[package]] name = "lexical-write-float" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e7c3ad4e37db81c1cbe7cf34610340adc09c322871972f74877a712abc6c809" +checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" dependencies = [ - "lexical-util 1.0.3", + "lexical-util 1.0.6", "lexical-write-integer", "static_assertions", ] [[package]] name = "lexical-write-integer" -version = "1.0.2" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb89e9f6958b83258afa3deed90b5de9ef68eef090ad5086c791cd2345610162" +checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" dependencies = [ - "lexical-util 1.0.3", + "lexical-util 1.0.6", "static_assertions", ] [[package]] name = "libc" -version = "0.2.164" +version = "0.2.168" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" +checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" [[package]] name = "libflate" @@ -6963,7 +7043,7 @@ dependencies = [ [[package]] name = "messaging-outbox" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "chrono", @@ -6982,7 +7062,7 @@ dependencies = [ "serde", "serde_json", "test-log", - "thiserror 1.0.69", + "thiserror 2.0.8", "time-source", "tokio", "tokio-stream", @@ -7013,9 +7093,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +checksum = "4ffbe83022cedc1d264172192511ae958937694cd57ce297164951b8b3568394" dependencies = [ "adler2", ] @@ -7049,11 +7129,10 @@ dependencies = [ [[package]] name = "mio" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ - "hermit-abi 0.3.9", "libc", "wasi", "windows-sys 0.52.0", @@ -7082,7 +7161,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -7094,7 +7173,7 @@ dependencies = [ "bytes", "encoding_rs", "futures-util", - "http 1.1.0", + "http 1.2.0", "httparse", "memchr", "mime", @@ -7104,7 +7183,7 @@ dependencies = [ [[package]] name = "multiformats" -version = "0.209.0" +version = "0.213.1" dependencies = [ "base64 0.22.1", "bs58", @@ -7115,7 +7194,7 @@ dependencies = [ "serde", "serde_json", "sha3", - "thiserror 1.0.69", + "thiserror 2.0.8", "unsigned-varint", "utoipa", ] @@ -7337,7 +7416,7 @@ checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -7410,12 +7489,12 @@ dependencies = [ "chrono", "futures", "humantime", - "hyper 1.5.1", + "hyper 1.5.2", "itertools 0.13.0", "md-5", "parking_lot", "percent-encoding", - "quick-xml", + "quick-xml 0.36.2", "rand", "reqwest", "ring", @@ -7431,12 +7510,12 @@ dependencies = [ [[package]] name = "observability" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "axum", "dill", - "http 1.1.0", + "http 1.2.0", "opentelemetry", "opentelemetry-otlp", "opentelemetry-semantic-conventions", @@ -7444,7 +7523,7 @@ dependencies = [ "prometheus", "serde", "serde_json", - "thiserror 1.0.69", + "thiserror 2.0.8", "tower-http", "tracing", "tracing-appender", @@ -7492,7 +7571,7 @@ checksum = "e493043dcf88be852a9716f7b065640867a72cab403f360003534e8cbf11bf84" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -7509,7 +7588,7 @@ checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" [[package]] name = "opendatafabric" -version = "0.209.0" +version = "0.213.1" dependencies = [ "arrow", "base64 0.22.1", @@ -7530,7 +7609,7 @@ dependencies = [ "serde_yaml", "sha3", "sqlx", - "thiserror 1.0.69", + "thiserror 2.0.8", "tonic", "url", "utoipa", @@ -7586,7 +7665,7 @@ checksum = "29e1f9c8b032d4f635c730c0efcf731d5e2530ea13fa8bef7939ddc8420696bd" dependencies = [ "async-trait", "futures-core", - "http 1.1.0", + "http 1.2.0", "opentelemetry", "opentelemetry-proto", "opentelemetry_sdk", @@ -7698,29 +7777,28 @@ dependencies = [ [[package]] name = "parity-scale-codec" -version = "3.7.0" +version = "3.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8be4817d39f3272f69c59fe05d0535ae6456c2dc2fa1ba02910296c7e0a5c590" +checksum = "306800abfa29c7f16596b5970a588435e3d5b3149683d00c12b699cc19f895ee" dependencies = [ "arrayvec", "bitvec", "byte-slice-cast", "impl-trait-for-tuples", "parity-scale-codec-derive", - "rustversion", "serde", ] [[package]] name = "parity-scale-codec-derive" -version = "3.7.0" +version = "3.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8781a75c6205af67215f382092b6e0a4ff3734798523e69073d4bcd294ec767b" +checksum = "d830939c76d294956402033aee57a6da7b438f2294eb94864c37b0569053a42c" dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.89", + "syn 1.0.109", ] [[package]] @@ -7816,9 +7894,9 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" [[package]] name = "pathdiff" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d61c5ce1153ab5b689d0c074c4e7fc613e942dfb7dd9eea5ab202d2ad91fe361" +checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" [[package]] name = "pbkdf2" @@ -7884,20 +7962,20 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pest" -version = "2.7.14" +version = "2.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "879952a81a83930934cbf1786752d6dedc3b1f29e8f8fb2ad1d0a36f377cf442" +checksum = "8b7cafe60d6cf8e62e1b9b2ea516a089c008945bb5a275416789e7db0bc199dc" dependencies = [ "memchr", - "thiserror 1.0.69", + "thiserror 2.0.8", "ucd-trie", ] [[package]] name = "pest_derive" -version = "2.7.14" +version = "2.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d214365f632b123a47fd913301e14c946c61d1c183ee245fa76eb752e59a02dd" +checksum = "816518421cfc6887a0d62bf441b6ffb4536fcc926395a69e1a85852d4363f57e" dependencies = [ "pest", "pest_generator", @@ -7905,22 +7983,22 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.7.14" +version = "2.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb55586734301717aea2ac313f50b2eb8f60d2fc3dc01d190eefa2e625f60c4e" +checksum = "7d1396fd3a870fc7838768d171b4616d5c91f6cc25e377b673d714567d99377b" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "pest_meta" -version = "2.7.14" +version = "2.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b75da2a70cf4d9cb76833c990ac9cd3923c9a8905a8929789ce347c84564d03d" +checksum = "e1e58089ea25d717bfd31fb534e4f3afcc2cc569c70de3e239778991ea3b7dea" dependencies = [ "once_cell", "pest", @@ -7934,7 +8012,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", - "indexmap 2.6.0", + "indexmap 2.7.0", ] [[package]] @@ -8011,7 +8089,7 @@ checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -8190,7 +8268,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eea25e07510aa6ab6547308ebe3c036016d162b8da920dbb079e3ba8acf3d95a" dependencies = [ "csv", - "encode_unicode 1.0.0", + "encode_unicode", "is-terminal", "lazy_static", "term", @@ -8260,7 +8338,7 @@ dependencies = [ "proc-macro-error-attr2", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -8308,9 +8386,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec" dependencies = [ "bytes", "prost-derive", @@ -8318,22 +8396,22 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3" dependencies = [ "anyhow", "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "prost-types" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +checksum = "cc2f1e56baa61e93533aebc21af4d2134b70f66275e0fcdf3cbe43d77ff7e8fc" dependencies = [ "prost", ] @@ -8354,6 +8432,16 @@ dependencies = [ "serde", ] +[[package]] +name = "quick-xml" +version = "0.37.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f22f29bdff3987b4d8632ef95fd6424ec7e4e0a57e2f4fc63e489e75357f6a03" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quinn" version = "0.11.6" @@ -8365,9 +8453,9 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.23.18", + "rustls 0.23.20", "socket2", - "thiserror 2.0.3", + "thiserror 2.0.8", "tokio", "tracing", ] @@ -8383,10 +8471,10 @@ dependencies = [ "rand", "ring", "rustc-hash", - "rustls 0.23.18", + "rustls 0.23.20", "rustls-pki-types", "slab", - "thiserror 2.0.3", + "thiserror 2.0.8", "tinyvec", "tracing", "web-time", @@ -8394,9 +8482,9 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.7" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d5a626c6807713b15cac82a6acaccd6043c9a5408c24baae07611fec3f243da" +checksum = "1c40286217b4ba3a71d644d752e6a0b71f13f1b6a2c5311acfcbe0c2418ed904" dependencies = [ "cfg_aliases 0.2.1", "libc", @@ -8473,7 +8561,7 @@ dependencies = [ [[package]] name = "random-names" -version = "0.209.0" +version = "0.213.1" dependencies = [ "rand", ] @@ -8506,9 +8594,9 @@ checksum = "2f178674da3d005db760b30d6735a989d692da37b86337daec6f2e311223d608" [[package]] name = "redox_syscall" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ "bitflags 2.6.0", ] @@ -8586,10 +8674,10 @@ dependencies = [ "futures-core", "futures-util", "h2 0.4.7", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", - "hyper 1.5.1", + "hyper 1.5.2", "hyper-rustls 0.27.3", "hyper-util", "ipnet", @@ -8601,16 +8689,16 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.18", + "rustls 0.23.20", "rustls-native-certs 0.8.1", "rustls-pemfile 2.2.0", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", - "sync_wrapper 1.0.2", + "sync_wrapper", "tokio", - "tokio-rustls 0.26.0", + "tokio-rustls 0.26.1", "tokio-util", "tower-service", "url", @@ -8706,9 +8794,9 @@ dependencies = [ [[package]] name = "rsa" -version = "0.9.6" +version = "0.9.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc" +checksum = "47c75d7c5c6b673e58bf54d8544a9f432e3a925b0e80f7cd3602ab5c50c55519" dependencies = [ "const-oid", "digest 0.10.7", @@ -8726,16 +8814,18 @@ dependencies = [ [[package]] name = "ruint" -version = "1.12.3" +version = "1.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c3cc4c2511671f327125da14133d0c5c5d137f006a1017a16f557bc85b16286" +checksum = "f5ef8fb1dd8de3870cb8400d51b4c2023854bbafd5431a3ac7e7317243e22d2f" dependencies = [ "alloy-rlp", "ark-ff 0.3.0", "ark-ff 0.4.2", "bytes", - "fastrlp", + "fastrlp 0.3.1", + "fastrlp 0.4.0", "num-bigint", + "num-integer", "num-traits", "parity-scale-codec", "primitive-types", @@ -8794,7 +8884,7 @@ dependencies = [ "quote", "rust-embed-utils", "shellexpand", - "syn 2.0.89", + "syn 2.0.90", "walkdir", ] @@ -8816,9 +8906,9 @@ checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustc-hash" -version = "2.0.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" +checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" [[package]] name = "rustc-hex" @@ -8841,20 +8931,20 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ - "semver 1.0.23", + "semver 1.0.24", ] [[package]] name = "rustix" -version = "0.38.41" +version = "0.38.42" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" +checksum = "f93dc38ecbab2eb790ff964bb77fa94faf256fd3e73285fd7ba0903b76bedb85" dependencies = [ "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -8885,9 +8975,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.18" +version = "0.23.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c9cc1d47e243d655ace55ed38201c19ae02c148ae56412ab8750e8f0166ab7f" +checksum = "5065c3f250cbd332cd894be57c40fa52387247659b14a2d6041d121547903b1b" dependencies = [ "once_cell", "ring", @@ -8931,7 +9021,7 @@ dependencies = [ "openssl-probe", "rustls-pki-types", "schannel", - "security-framework 3.0.1", + "security-framework 3.1.0", ] [[package]] @@ -8954,9 +9044,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" +checksum = "d2bf47e6ff922db3825eb750c4e2ff784c6ff8fb9e13046ef6a1d1c5401b0b37" dependencies = [ "web-time", ] @@ -9125,9 +9215,9 @@ dependencies = [ [[package]] name = "security-framework" -version = "3.0.1" +version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1415a607e92bec364ea2cf9264646dcce0f91e6d65281bd6f2819cca3bf39c8" +checksum = "81d3f8c9bfcc3cbb6b0179eb57042d75b1582bdc65c3cb95f3fa999509c03cbc" dependencies = [ "bitflags 2.6.0", "core-foundation 0.10.0", @@ -9138,9 +9228,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.12.1" +version = "2.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" +checksum = "1863fd3768cd83c56a7f60faa4dc0d403f1b6df0a38c3c25f44b7894e45370d5" dependencies = [ "core-foundation-sys", "libc", @@ -9157,9 +9247,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.23" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "3cb6eb87a131f756572d7fb904f6e7b68633f09cca868c5df1c4b8d1a694bbba" dependencies = [ "serde", ] @@ -9187,22 +9277,22 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.215" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.215" +version = "1.0.216" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -9258,7 +9348,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.6.0", + "indexmap 2.7.0", "serde", "serde_derive", "serde_json", @@ -9275,7 +9365,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -9284,7 +9374,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.6.0", + "indexmap 2.7.0", "itoa", "ryu", "serde", @@ -9478,7 +9568,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -9489,9 +9579,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" dependencies = [ "libc", "windows-sys 0.52.0", @@ -9538,9 +9628,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.50.0" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2e5b515a2bd5168426033e9efbfd05500114833916f1d5c268f938b4ee130ac" +checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" dependencies = [ "log", "sqlparser_derive", @@ -9554,7 +9644,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -9587,12 +9677,12 @@ dependencies = [ "futures-util", "hashbrown 0.14.5", "hashlink", - "indexmap 2.6.0", + "indexmap 2.7.0", "log", "memchr", "once_cell", "percent-encoding", - "rustls 0.23.18", + "rustls 0.23.20", "rustls-pemfile 2.2.0", "serde", "serde_json", @@ -9617,7 +9707,7 @@ dependencies = [ "quote", "sqlx-core", "sqlx-macros-core", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -9639,7 +9729,7 @@ dependencies = [ "sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", - "syn 2.0.89", + "syn 2.0.90", "tempfile", "tokio", "url", @@ -9831,7 +9921,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -9853,9 +9943,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.89" +version = "2.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" +checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" dependencies = [ "proc-macro2", "quote", @@ -9864,22 +9954,16 @@ dependencies = [ [[package]] name = "syn-solidity" -version = "0.8.12" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f76fe0a3e1476bdaa0775b9aec5b869ed9520c2b2fedfe9c6df3618f8ea6290b" +checksum = "219389c1ebe89f8333df8bdfb871f6631c552ff399c23cac02480b6088aad8f0" dependencies = [ "paste", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] -[[package]] -name = "sync_wrapper" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" - [[package]] name = "sync_wrapper" version = "1.0.2" @@ -9897,7 +9981,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -9957,7 +10041,7 @@ dependencies = [ "proc-macro2", "quote", "sha2", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -9979,7 +10063,16 @@ checksum = "5999e24eaa32083191ba4e425deb75cdf25efefabe5aaccb7446dd0d4122a3f5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", +] + +[[package]] +name = "testing_logger" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d92b727cb45d33ae956f7f46b966b25f1bc712092aeef9dba5ac798fc89f720" +dependencies = [ + "log", ] [[package]] @@ -10002,11 +10095,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.3" +version = "2.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" +checksum = "08f5383f3e0071702bf93ab5ee99b52d26936be9dedd9413067cbdcddcb6141a" dependencies = [ - "thiserror-impl 2.0.3", + "thiserror-impl 2.0.8", ] [[package]] @@ -10017,18 +10110,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "thiserror-impl" -version = "2.0.3" +version = "2.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" +checksum = "f2f357fcec90b3caef6623a099691be676d033b40a058ac95d2a6ade6fa0c943" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -10063,9 +10156,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.36" +version = "0.3.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" dependencies = [ "deranged", "itoa", @@ -10086,9 +10179,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" dependencies = [ "num-conv", "time-core", @@ -10096,7 +10189,7 @@ dependencies = [ [[package]] name = "time-source" -version = "0.209.0" +version = "0.213.1" dependencies = [ "async-trait", "chrono", @@ -10151,14 +10244,14 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.41.1" +version = "1.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551" dependencies = [ "backtrace", "bytes", "libc", - "mio 1.0.2", + "mio 1.0.3", "parking_lot", "pin-project-lite", "signal-hook-registry", @@ -10175,7 +10268,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -10212,20 +10305,19 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.26.0" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" dependencies = [ - "rustls 0.23.18", - "rustls-pki-types", + "rustls 0.23.20", "tokio", ] [[package]] name = "tokio-stream" -version = "0.1.16" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f4e6ce100d0eb49a2734f8c0812bcd324cf357d21810932c5df6b96ef2b86f1" +checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" dependencies = [ "futures-core", "pin-project-lite", @@ -10241,20 +10333,20 @@ checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9" dependencies = [ "futures-util", "log", - "rustls 0.23.18", + "rustls 0.23.20", "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", - "tokio-rustls 0.26.0", + "tokio-rustls 0.26.1", "tungstenite", "webpki-roots", ] [[package]] name = "tokio-util" -version = "0.7.12" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" dependencies = [ "bytes", "futures-core", @@ -10291,7 +10383,7 @@ version = "0.22.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" dependencies = [ - "indexmap 2.6.0", + "indexmap 2.7.0", "serde", "serde_spanned", "toml_datetime", @@ -10310,10 +10402,10 @@ dependencies = [ "base64 0.22.1", "bytes", "h2 0.4.7", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", - "hyper 1.5.1", + "hyper 1.5.2", "hyper-timeout", "hyper-util", "percent-encoding", @@ -10350,14 +10442,14 @@ dependencies = [ [[package]] name = "tower" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2873938d487c3cfb9aed7546dc9f2711d867c9f90c46b889989a2cb84eba6b4f" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" dependencies = [ "futures-core", "futures-util", "pin-project-lite", - "sync_wrapper 0.1.2", + "sync_wrapper", "tokio", "tower-layer", "tower-service", @@ -10373,7 +10465,7 @@ dependencies = [ "bitflags 2.6.0", "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", "http-range-header", @@ -10403,9 +10495,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "log", "pin-project-lite", @@ -10427,20 +10519,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "tracing-bunyan-formatter" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5c266b9ac83dedf0e0385ad78514949e6d89491269e7065bee51d2bb8ec7373" +checksum = "2d637245a0d8774bd48df6482e086c59a8b5348a910c3b0579354045a9d82411" dependencies = [ "ahash", "gethostname", @@ -10456,9 +10548,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", "valuable", @@ -10504,7 +10596,7 @@ dependencies = [ [[package]] name = "tracing-perfetto" -version = "0.209.0" +version = "0.213.1" dependencies = [ "conv", "serde", @@ -10516,9 +10608,9 @@ dependencies = [ [[package]] name = "tracing-serde" -version = "0.1.3" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" dependencies = [ "serde", "tracing-core", @@ -10526,9 +10618,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.18" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ "matchers", "nu-ansi-term", @@ -10545,52 +10637,6 @@ dependencies = [ "tracing-serde", ] -[[package]] -name = "trust-dns-proto" -version = "0.23.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3119112651c157f4488931a01e586aa459736e9d6046d3bd9105ffb69352d374" -dependencies = [ - "async-trait", - "cfg-if", - "data-encoding", - "enum-as-inner", - "futures-channel", - "futures-io", - "futures-util", - "idna 0.4.0", - "ipnet", - "once_cell", - "rand", - "smallvec", - "thiserror 1.0.69", - "tinyvec", - "tokio", - "tracing", - "url", -] - -[[package]] -name = "trust-dns-resolver" -version = "0.23.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10a3e6c3aff1718b3c73e395d1f35202ba2ffa847c6a62eea0db8fb4cfe30be6" -dependencies = [ - "cfg-if", - "futures-util", - "ipconfig", - "lru-cache", - "once_cell", - "parking_lot", - "rand", - "resolv-conf", - "smallvec", - "thiserror 1.0.69", - "tokio", - "tracing", - "trust-dns-proto", -] - [[package]] name = "try-lock" version = "0.2.5" @@ -10606,11 +10652,11 @@ dependencies = [ "byteorder", "bytes", "data-encoding", - "http 1.1.0", + "http 1.2.0", "httparse", "log", "rand", - "rustls 0.23.18", + "rustls 0.23.20", "rustls-pki-types", "sha1", "thiserror 1.0.69", @@ -10666,9 +10712,9 @@ checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df" [[package]] name = "unicode-bidi" -version = "0.3.17" +version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" +checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" [[package]] name = "unicode-ident" @@ -10756,7 +10802,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", - "idna 1.0.3", + "idna", "percent-encoding", "serde", ] @@ -10797,7 +10843,7 @@ version = "5.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "514a48569e4e21c86d0b84b5612b5e73c0b2cf09db63260134ba426d4e8ea714" dependencies = [ - "indexmap 2.6.0", + "indexmap 2.7.0", "serde", "serde_json", "utoipa-gen", @@ -10824,7 +10870,7 @@ checksum = "5629efe65599d0ccd5d493688cbf6e03aa7c1da07fe59ff97cf5977ed0637f66" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -10917,9 +10963,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" [[package]] name = "wasm-bindgen" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" dependencies = [ "cfg-if", "once_cell", @@ -10928,36 +10974,36 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.45" +version = "0.4.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" +checksum = "38176d9b44ea84e9184eff0bc34cc167ed044f816accfe5922e54d84cf48eca2" dependencies = [ "cfg-if", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -10965,22 +11011,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.95" +version = "0.2.99" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" +checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" [[package]] name = "wasm-streams" @@ -10997,9 +11043,9 @@ dependencies = [ [[package]] name = "wasmtimer" -version = "0.2.1" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7ed9d8b15c7fb594d72bfb4b5a276f3d2029333cd93a932f376f5937f6f80ee" +checksum = "0048ad49a55b9deb3953841fa1fc5858f0efbcb7a18868c899a360269fac1b23" dependencies = [ "futures", "js-sys", @@ -11011,9 +11057,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.72" +version = "0.3.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" dependencies = [ "js-sys", "wasm-bindgen", @@ -11031,9 +11077,9 @@ dependencies = [ [[package]] name = "webbrowser" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e5f07fb9bc8de2ddfe6b24a71a75430673fd679e568c48b52716cef1cfae923" +checksum = "ea9fe1ebb156110ff855242c1101df158b822487e4957b0556d9ffce9db0f535" dependencies = [ "block2", "core-foundation 0.10.0", @@ -11468,7 +11514,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", "synstructure", ] @@ -11490,7 +11536,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -11510,7 +11556,7 @@ checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", "synstructure", ] @@ -11531,7 +11577,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -11553,14 +11599,14 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "zip" -version = "2.2.1" +version = "2.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d52293fc86ea7cf13971b3bb81eb21683636e7ae24c729cdaf1b7c4157a352" +checksum = "ae9c1ea7b3a5e1f4b922ff856a129881167511563dc219869afe3787fc0c1a45" dependencies = [ "aes", "arbitrary", @@ -11572,13 +11618,13 @@ dependencies = [ "displaydoc", "flate2", "hmac", - "indexmap 2.6.0", + "indexmap 2.7.0", "lzma-rs", "memchr", "pbkdf2", "rand", "sha1", - "thiserror 2.0.3", + "thiserror 2.0.8", "time", "zeroize", "zopfli", diff --git a/Cargo.toml b/Cargo.toml index 421eaccb3..f20460423 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -92,95 +92,95 @@ resolver = "2" [workspace.dependencies] # Apps -kamu-cli = { version = "0.209.0", path = "src/app/cli", default-features = false } +kamu-cli = { version = "0.213.1", path = "src/app/cli", default-features = false } # Utils -async-utils = { version = "0.209.0", path = "src/utils/async-utils", default-features = false } -container-runtime = { version = "0.209.0", path = "src/utils/container-runtime", default-features = false } -database-common = { version = "0.209.0", path = "src/utils/database-common", default-features = false } -database-common-macros = { version = "0.209.0", path = "src/utils/database-common-macros", default-features = false } -enum-variants = { version = "0.209.0", path = "src/utils/enum-variants", default-features = false } -event-sourcing = { version = "0.209.0", path = "src/utils/event-sourcing", default-features = false } -event-sourcing-macros = { version = "0.209.0", path = "src/utils/event-sourcing-macros", default-features = false } -http-common = { version = "0.209.0", path = "src/utils/http-common", default-features = false } -init-on-startup = { version = "0.209.0", path = "src/utils/init-on-startup", default-features = false } -internal-error = { version = "0.209.0", path = "src/utils/internal-error", default-features = false } -kamu-cli-puppet = { version = "0.209.0", path = "src/utils/kamu-cli-puppet", default-features = false } -kamu-data-utils = { version = "0.209.0", path = "src/utils/data-utils", default-features = false } -kamu-datafusion-cli = { version = "0.209.0", path = "src/utils/datafusion-cli", default-features = false } -messaging-outbox = { version = "0.209.0", path = "src/utils/messaging-outbox", default-features = false } -multiformats = { version = "0.209.0", path = "src/utils/multiformats", default-features = false } -observability = { version = "0.209.0", path = "src/utils/observability", default-features = false } -random-names = { version = "0.209.0", path = "src/utils/random-names", default-features = false } -time-source = { version = "0.209.0", path = "src/utils/time-source", default-features = false } -tracing-perfetto = { version = "0.209.0", path = "src/utils/tracing-perfetto", default-features = false } +async-utils = { version = "0.213.1", path = "src/utils/async-utils", default-features = false } +container-runtime = { version = "0.213.1", path = "src/utils/container-runtime", default-features = false } +database-common = { version = "0.213.1", path = "src/utils/database-common", default-features = false } +database-common-macros = { version = "0.213.1", path = "src/utils/database-common-macros", default-features = false } +enum-variants = { version = "0.213.1", path = "src/utils/enum-variants", default-features = false } +event-sourcing = { version = "0.213.1", path = "src/utils/event-sourcing", default-features = false } +event-sourcing-macros = { version = "0.213.1", path = "src/utils/event-sourcing-macros", default-features = false } +http-common = { version = "0.213.1", path = "src/utils/http-common", default-features = false } +init-on-startup = { version = "0.213.1", path = "src/utils/init-on-startup", default-features = false } +internal-error = { version = "0.213.1", path = "src/utils/internal-error", default-features = false } +kamu-cli-puppet = { version = "0.213.1", path = "src/utils/kamu-cli-puppet", default-features = false } +kamu-data-utils = { version = "0.213.1", path = "src/utils/data-utils", default-features = false } +kamu-datafusion-cli = { version = "0.213.1", path = "src/utils/datafusion-cli", default-features = false } +messaging-outbox = { version = "0.213.1", path = "src/utils/messaging-outbox", default-features = false } +multiformats = { version = "0.213.1", path = "src/utils/multiformats", default-features = false } +observability = { version = "0.213.1", path = "src/utils/observability", default-features = false } +random-names = { version = "0.213.1", path = "src/utils/random-names", default-features = false } +time-source = { version = "0.213.1", path = "src/utils/time-source", default-features = false } +tracing-perfetto = { version = "0.213.1", path = "src/utils/tracing-perfetto", default-features = false } # Domain -kamu-accounts = { version = "0.209.0", path = "src/domain/accounts/domain", default-features = false } -kamu-auth-rebac = { version = "0.209.0", path = "src/domain/auth-rebac/domain", default-features = false } -kamu-core = { version = "0.209.0", path = "src/domain/core", default-features = false } -kamu-datasets = { version = "0.209.0", path = "src/domain/datasets/domain", default-features = false } -kamu-flow-system = { version = "0.209.0", path = "src/domain/flow-system/domain", default-features = false } -kamu-task-system = { version = "0.209.0", path = "src/domain/task-system/domain", default-features = false } -opendatafabric = { version = "0.209.0", path = "src/domain/opendatafabric", default-features = false } +kamu-accounts = { version = "0.213.1", path = "src/domain/accounts/domain", default-features = false } +kamu-auth-rebac = { version = "0.213.1", path = "src/domain/auth-rebac/domain", default-features = false } +kamu-core = { version = "0.213.1", path = "src/domain/core", default-features = false } +kamu-datasets = { version = "0.213.1", path = "src/domain/datasets/domain", default-features = false } +kamu-flow-system = { version = "0.213.1", path = "src/domain/flow-system/domain", default-features = false } +kamu-task-system = { version = "0.213.1", path = "src/domain/task-system/domain", default-features = false } +opendatafabric = { version = "0.213.1", path = "src/domain/opendatafabric", default-features = false } # Domain service layer -kamu-accounts-services = { version = "0.209.0", path = "src/domain/accounts/services", default-features = false } -kamu-auth-rebac-services = { version = "0.209.0", path = "src/domain/auth-rebac/services", default-features = false } -kamu-datasets-services = { version = "0.209.0", path = "src/domain/datasets/services", default-features = false } -kamu-flow-system-services = { version = "0.209.0", path = "src/domain/flow-system/services", default-features = false } -kamu-task-system-services = { version = "0.209.0", path = "src/domain/task-system/services", default-features = false } +kamu-accounts-services = { version = "0.213.1", path = "src/domain/accounts/services", default-features = false } +kamu-auth-rebac-services = { version = "0.213.1", path = "src/domain/auth-rebac/services", default-features = false } +kamu-datasets-services = { version = "0.213.1", path = "src/domain/datasets/services", default-features = false } +kamu-flow-system-services = { version = "0.213.1", path = "src/domain/flow-system/services", default-features = false } +kamu-task-system-services = { version = "0.213.1", path = "src/domain/task-system/services", default-features = false } # Infra -kamu = { version = "0.209.0", path = "src/infra/core", default-features = false } -kamu-ingest-datafusion = { version = "0.209.0", path = "src/infra/ingest-datafusion", default-features = false } +kamu = { version = "0.213.1", path = "src/infra/core", default-features = false } +kamu-ingest-datafusion = { version = "0.213.1", path = "src/infra/ingest-datafusion", default-features = false } ## Flow System -kamu-flow-system-repo-tests = { version = "0.209.0", path = "src/infra/flow-system/repo-tests", default-features = false } -kamu-flow-system-inmem = { version = "0.209.0", path = "src/infra/flow-system/inmem", default-features = false } -kamu-flow-system-postgres = { version = "0.209.0", path = "src/infra/flow-system/postgres", default-features = false } -kamu-flow-system-sqlite = { version = "0.209.0", path = "src/infra/flow-system/sqlite", default-features = false } +kamu-flow-system-repo-tests = { version = "0.213.1", path = "src/infra/flow-system/repo-tests", default-features = false } +kamu-flow-system-inmem = { version = "0.213.1", path = "src/infra/flow-system/inmem", default-features = false } +kamu-flow-system-postgres = { version = "0.213.1", path = "src/infra/flow-system/postgres", default-features = false } +kamu-flow-system-sqlite = { version = "0.213.1", path = "src/infra/flow-system/sqlite", default-features = false } ## Accounts -kamu-accounts-inmem = { version = "0.209.0", path = "src/infra/accounts/inmem", default-features = false } -kamu-accounts-mysql = { version = "0.209.0", path = "src/infra/accounts/mysql", default-features = false } -kamu-accounts-postgres = { version = "0.209.0", path = "src/infra/accounts/postgres", default-features = false } -kamu-accounts-sqlite = { version = "0.209.0", path = "src/infra/accounts/sqlite", default-features = false } -kamu-accounts-repo-tests = { version = "0.209.0", path = "src/infra/accounts/repo-tests", default-features = false } +kamu-accounts-inmem = { version = "0.213.1", path = "src/infra/accounts/inmem", default-features = false } +kamu-accounts-mysql = { version = "0.213.1", path = "src/infra/accounts/mysql", default-features = false } +kamu-accounts-postgres = { version = "0.213.1", path = "src/infra/accounts/postgres", default-features = false } +kamu-accounts-sqlite = { version = "0.213.1", path = "src/infra/accounts/sqlite", default-features = false } +kamu-accounts-repo-tests = { version = "0.213.1", path = "src/infra/accounts/repo-tests", default-features = false } ## Datasets -kamu-datasets-inmem = { version = "0.209.0", path = "src/infra/datasets/inmem", default-features = false } -kamu-datasets-postgres = { version = "0.209.0", path = "src/infra/datasets/postgres", default-features = false } -kamu-datasets-sqlite = { version = "0.209.0", path = "src/infra/datasets/sqlite", default-features = false } -kamu-datasets-repo-tests = { version = "0.209.0", path = "src/infra/datasets/repo-tests", default-features = false } +kamu-datasets-inmem = { version = "0.213.1", path = "src/infra/datasets/inmem", default-features = false } +kamu-datasets-postgres = { version = "0.213.1", path = "src/infra/datasets/postgres", default-features = false } +kamu-datasets-sqlite = { version = "0.213.1", path = "src/infra/datasets/sqlite", default-features = false } +kamu-datasets-repo-tests = { version = "0.213.1", path = "src/infra/datasets/repo-tests", default-features = false } ## Task System -kamu-task-system-inmem = { version = "0.209.0", path = "src/infra/task-system/inmem", default-features = false } -kamu-task-system-postgres = { version = "0.209.0", path = "src/infra/task-system/postgres", default-features = false } -kamu-task-system-sqlite = { version = "0.209.0", path = "src/infra/task-system/sqlite", default-features = false } -kamu-task-system-repo-tests = { version = "0.209.0", path = "src/infra/task-system/repo-tests", default-features = false } +kamu-task-system-inmem = { version = "0.213.1", path = "src/infra/task-system/inmem", default-features = false } +kamu-task-system-postgres = { version = "0.213.1", path = "src/infra/task-system/postgres", default-features = false } +kamu-task-system-sqlite = { version = "0.213.1", path = "src/infra/task-system/sqlite", default-features = false } +kamu-task-system-repo-tests = { version = "0.213.1", path = "src/infra/task-system/repo-tests", default-features = false } ## ReBAC -kamu-auth-rebac-inmem = { version = "0.209.0", path = "src/infra/auth-rebac/inmem", default-features = false } -kamu-auth-rebac-repo-tests = { version = "0.209.0", path = "src/infra/auth-rebac/repo-tests", default-features = false } -kamu-auth-rebac-postgres = { version = "0.209.0", path = "src/infra/auth-rebac/postgres", default-features = false } -kamu-auth-rebac-sqlite = { version = "0.209.0", path = "src/infra/auth-rebac/sqlite", default-features = false } +kamu-auth-rebac-inmem = { version = "0.213.1", path = "src/infra/auth-rebac/inmem", default-features = false } +kamu-auth-rebac-repo-tests = { version = "0.213.1", path = "src/infra/auth-rebac/repo-tests", default-features = false } +kamu-auth-rebac-postgres = { version = "0.213.1", path = "src/infra/auth-rebac/postgres", default-features = false } +kamu-auth-rebac-sqlite = { version = "0.213.1", path = "src/infra/auth-rebac/sqlite", default-features = false } ## Outbox -kamu-messaging-outbox-inmem = { version = "0.209.0", path = "src/infra/messaging-outbox/inmem", default-features = false } -kamu-messaging-outbox-postgres = { version = "0.209.0", path = "src/infra/messaging-outbox/postgres", default-features = false } -kamu-messaging-outbox-sqlite = { version = "0.209.0", path = "src/infra/messaging-outbox/sqlite", default-features = false } -kamu-messaging-outbox-repo-tests = { version = "0.209.0", path = "src/infra/messaging-outbox/repo-tests", default-features = false } +kamu-messaging-outbox-inmem = { version = "0.213.1", path = "src/infra/messaging-outbox/inmem", default-features = false } +kamu-messaging-outbox-postgres = { version = "0.213.1", path = "src/infra/messaging-outbox/postgres", default-features = false } +kamu-messaging-outbox-sqlite = { version = "0.213.1", path = "src/infra/messaging-outbox/sqlite", default-features = false } +kamu-messaging-outbox-repo-tests = { version = "0.213.1", path = "src/infra/messaging-outbox/repo-tests", default-features = false } # Adapters -kamu-adapter-auth-oso-rebac = { version = "0.209.0", path = "src/adapter/auth-oso-rebac", default-features = false } -kamu-adapter-flight-sql = { version = "0.209.0", path = "src/adapter/flight-sql", default-features = false } -kamu-adapter-graphql = { version = "0.209.0", path = "src/adapter/graphql", default-features = false } -kamu-adapter-http = { version = "0.209.0", path = "src/adapter/http", default-features = false } -kamu-adapter-oauth = { version = "0.209.0", path = "src/adapter/oauth", default-features = false } -kamu-adapter-odata = { version = "0.209.0", path = "src/adapter/odata", default-features = false } +kamu-adapter-auth-oso-rebac = { version = "0.213.1", path = "src/adapter/auth-oso-rebac", default-features = false } +kamu-adapter-flight-sql = { version = "0.213.1", path = "src/adapter/flight-sql", default-features = false } +kamu-adapter-graphql = { version = "0.213.1", path = "src/adapter/graphql", default-features = false } +kamu-adapter-http = { version = "0.213.1", path = "src/adapter/http", default-features = false } +kamu-adapter-odata = { version = "0.213.1", path = "src/adapter/odata", default-features = false } +kamu-adapter-oauth = { version = "0.213.1", path = "src/adapter/oauth", default-features = false } # E2E -kamu-cli-e2e-common = { version = "0.209.0", path = "src/e2e/app/cli/common", default-features = false } -kamu-cli-e2e-common-macros = { version = "0.209.0", path = "src/e2e/app/cli/common-macros", default-features = false } -kamu-cli-e2e-repo-tests = { version = "0.209.0", path = "src/e2e/app/cli/repo-tests", default-features = false } +kamu-cli-e2e-common = { version = "0.213.1", path = "src/e2e/app/cli/common", default-features = false } +kamu-cli-e2e-common-macros = { version = "0.213.1", path = "src/e2e/app/cli/common-macros", default-features = false } +kamu-cli-e2e-repo-tests = { version = "0.213.1", path = "src/e2e/app/cli/repo-tests", default-features = false } [workspace.package] -version = "0.209.0" +version = "0.213.1" edition = "2021" homepage = "https://github.com/kamu-data/kamu-cli" repository = "https://github.com/kamu-data/kamu-cli" @@ -264,7 +264,7 @@ debug = "line-tables-only" # Use this section to test or apply emergency overrides to dependencies # See: https://doc.rust-lang.org/cargo/reference/overriding-dependencies.html [patch.crates-io] -# datafusion = { git = 'https://github.com/apache/datafusion.git', tag = '42.0.0-rc1' } +# datafusion = { git = 'https://github.com/apache/datafusion.git', branch = 'main' } # datafusion-common = { git = 'https://github.com/apache/datafusion.git', tag = '42.0.0-rc1' } # datafusion-execution = { git = 'https://github.com/apache/datafusion.git', tag = '42.0.0-rc1' } # datafusion-expr = { git = 'https://github.com/apache/datafusion.git', tag = '42.0.0-rc1' } diff --git a/LICENSE.txt b/LICENSE.txt index 55ec9e2c1..be2fa7cc1 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -11,7 +11,7 @@ Business Source License 1.1 Licensor: Kamu Data, Inc. -Licensed Work: Kamu CLI Version 0.209.0 +Licensed Work: Kamu CLI Version 0.213.1 The Licensed Work is © 2023 Kamu Data, Inc. Additional Use Grant: You may use the Licensed Work for any purpose, @@ -24,7 +24,7 @@ Additional Use Grant: You may use the Licensed Work for any purpose, Licensed Work where data or transformations are controlled by such third parties. -Change Date: 2028-11-26 +Change Date: 2028-12-18 Change License: Apache License, Version 2.0 diff --git a/Makefile b/Makefile index 63305e03e..8641fe8c2 100644 --- a/Makefile +++ b/Makefile @@ -12,6 +12,8 @@ SQLITE_CRATES := ./src/infra/accounts/sqlite ./src/infra/auth-rebac/sqlite ./src ALL_DATABASE_CRATES := $(POSTGRES_CRATES) $(MYSQL_CRATES) $(SQLITE_CRATES) MIGRATION_DIRS := ./migrations/mysql ./migrations/postgres ./migrations/sqlite +KAMU_CONTAINER_RUNTIME_TYPE ?= podman + ############################################################################### # Lint ############################################################################### @@ -48,7 +50,7 @@ clippy: # See: https://github.com/IBM/openapi-validator .PHONY: lint-openapi lint-openapi: - docker run --rm -t \ + $(KAMU_CONTAINER_RUNTIME_TYPE) run --rm -t \ -v "${PWD}:/data:ro" \ ibmdevxsdk/openapi-validator:latest \ --config src/adapter/http/resources/openapi/linter-config.yaml \ @@ -89,9 +91,9 @@ sqlx-local-setup: sqlx-local-setup-postgres sqlx-local-setup-mariadb sqlx-local- .PHONY: sqlx-local-setup-postgres sqlx-local-setup-postgres: - docker pull postgres:latest - docker stop kamu-postgres || true && docker rm kamu-postgres || true - docker run --name kamu-postgres -p 5432:5432 -e POSTGRES_USER=root -e POSTGRES_PASSWORD=root -d postgres:latest + $(KAMU_CONTAINER_RUNTIME_TYPE) pull postgres:latest + $(KAMU_CONTAINER_RUNTIME_TYPE) stop kamu-postgres || true && $(KAMU_CONTAINER_RUNTIME_TYPE) rm kamu-postgres || true + $(KAMU_CONTAINER_RUNTIME_TYPE) run --name kamu-postgres -p 5432:5432 -e POSTGRES_USER=root -e POSTGRES_PASSWORD=root -d postgres:latest $(foreach crate,$(POSTGRES_CRATES),$(call Setup_EnvFile,postgres,5432,$(crate))) sleep 3 # Letting the container to start until PGPASSWORD=root psql -h localhost -U root -p 5432 -d root -c '\q'; do sleep 3; done @@ -100,9 +102,9 @@ sqlx-local-setup-postgres: .PHONY: sqlx-local-setup-mariadb sqlx-local-setup-mariadb: - docker pull mariadb:latest - docker stop kamu-mariadb || true && docker rm kamu-mariadb || true - docker run --name kamu-mariadb -p 3306:3306 -e MARIADB_ROOT_PASSWORD=root -d mariadb:latest + $(KAMU_CONTAINER_RUNTIME_TYPE) pull mariadb:latest + $(KAMU_CONTAINER_RUNTIME_TYPE) stop kamu-mariadb || true && $(KAMU_CONTAINER_RUNTIME_TYPE) rm kamu-mariadb || true + $(KAMU_CONTAINER_RUNTIME_TYPE) run --name kamu-mariadb -p 3306:3306 -e MARIADB_ROOT_PASSWORD=root -d mariadb:latest $(foreach crate,$(MYSQL_CRATES),$(call Setup_EnvFile,mysql,3306,$(crate))) sleep 10 # Letting the container to start until mariadb -h localhost -P 3306 -u root --password=root sys --protocol=tcp -e "SELECT 'Hello'" -b; do sleep 3; done @@ -121,12 +123,12 @@ sqlx-local-clean: sqlx-local-clean-postgres sqlx-local-clean-mariadb sqlx-local- .PHONY: sqlx-local-clean-postgres sqlx-local-clean-postgres: - docker stop kamu-postgres || true && docker rm kamu-postgres || true + $(KAMU_CONTAINER_RUNTIME_TYPE) stop kamu-postgres || true && $(KAMU_CONTAINER_RUNTIME_TYPE) rm kamu-postgres || true $(foreach crate,$(POSTGRES_CRATES),rm $(crate)/.env -f ;) .PHONY: sqlx-local-clean-mariadb sqlx-local-clean-mariadb: - docker stop kamu-mariadb || true && docker rm kamu-mariadb || true + $(KAMU_CONTAINER_RUNTIME_TYPE) stop kamu-mariadb || true && $(KAMU_CONTAINER_RUNTIME_TYPE) rm kamu-mariadb || true $(foreach crate,$(MYSQL_CRATES),rm $(crate)/.env -f ;) .PHONY: sqlx-local-clean-sqlite diff --git a/deny.toml b/deny.toml index 016b1dd71..2fb06c86b 100644 --- a/deny.toml +++ b/deny.toml @@ -99,5 +99,8 @@ ignore = [ "RUSTSEC-2024-0370", # Unmaintained (instant) # https://rustsec.org/advisories/RUSTSEC-2024-0384.html - "RUSTSEC-2024-0384" + "RUSTSEC-2024-0384", + # Security: requires update in hickory-resolver, which is not available yet + # https://github.com/rust-lang/crates.io-index + "RUSTSEC-2024-0421" ] diff --git a/images/kamu-base-git/Dockerfile b/images/kamu-base-git/Dockerfile index fb1e7364a..0544a6403 100644 --- a/images/kamu-base-git/Dockerfile +++ b/images/kamu-base-git/Dockerfile @@ -24,4 +24,4 @@ RUN apt update \ FROM ghcr.io/kamu-data/kamu-base:latest -COPY --from=builder /app/target/debug/kamu-cli /usr/bin/kamu +COPY --from=builder /app/target/debug/kamu-cli /usr/local/bin/kamu diff --git a/images/kamu-dev-base/Makefile b/images/kamu-dev-base/Makefile index 201f525a6..1b9d5848f 100644 --- a/images/kamu-dev-base/Makefile +++ b/images/kamu-dev-base/Makefile @@ -1,9 +1,23 @@ -KAMU_BASE_VERSION = $(shell cargo metadata --format-version 1 | jq -r '.packages[] | select( .name == "kamu") | .version') +KAMU_BASE_VERSION=$(shell cargo metadata --format-version 1 | jq -r '.packages[] | select( .name == "kamu") | .version') IMAGE_REPO=ghcr.io/kamu-data IMAGE_NAME=kamu-dev-base TARGET=x86_64-unknown-linux-gnu -# Branch name -TAG=$(shell git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') + +# Branch name (feature/private-dataset -> feature-private-dataset) +BRANCH=$(shell git rev-parse --abbrev-ref HEAD | sed 's/\//-/g') +# Short commit hash (603bf0885) +COMMIT=$(shell git rev-parse --short HEAD) +# Example: feature-private-dataset-603bf0885 +TAG=$(BRANCH)-$(COMMIT) +# Example: feature-private-dataset-latest +TAG_LATEST=$(BRANCH)-latest + +ESC=\033 +GREEN=$(ESC)[32m +BOLD=$(ESC)[1m +RESET=$(ESC)[0m + +################################################################################ .PHONY: build @@ -21,8 +35,23 @@ image: build -t $(IMAGE_REPO)/$(IMAGE_NAME):$(TAG) \ . rm -rf ./tmp + @echo + @echo -e "$(GREEN)✨ Image successfully built:$(RESET)" + @echo -e "· $(IMAGE_REPO)/$(IMAGE_NAME):$(TAG)" + @echo .PHONY: image-push image-push: docker push $(IMAGE_REPO)/$(IMAGE_NAME):$(TAG) + docker tag $(IMAGE_REPO)/$(IMAGE_NAME):$(TAG) $(IMAGE_REPO)/$(IMAGE_NAME):$(TAG_LATEST) + docker push $(IMAGE_REPO)/$(IMAGE_NAME):$(TAG_LATEST) + + @echo + @echo -e "$(GREEN)✨ Image successfully pushed:$(RESET)" + @echo -e "· $(BOLD)https://$(IMAGE_REPO)/$(IMAGE_NAME):$(TAG) $(RESET)" + @echo -e "· $(BOLD)https://$(IMAGE_REPO)/$(IMAGE_NAME):$(TAG_LATEST) $(RESET)" + @echo + + +################################################################################ diff --git a/images/kamu-dev-base/README.md b/images/kamu-dev-base/README.md new file mode 100644 index 000000000..6fba962fd --- /dev/null +++ b/images/kamu-dev-base/README.md @@ -0,0 +1,61 @@ +# Building `kamu` dev images + +This can be useful if you want to provide an image from changes that are not already in the main branch. +For example, during the development of a feature. + +⚠️ Please note: for flexibility, this image does NOT include any kamu workspace. + +### Build + +```shell +# During the release build, we will be using cached SQLX queries +make sqlx-local-clean + +cd ./images/kamu-dev-base/ +make image +# Outputs: +# +# ✨ Image successfully built: +# · ghcr.io/kamu-data/kamu-dev-base:feature-private-datasets-603bf0885 +``` + +### Verification + +```shell +# We're taking IMAGE from a previous command +IMAGE=ghcr.io/kamu-data/kamu-dev-base:feature-private-datasets-603bf0885 + +docker run --rm \ + ${IMAGE} \ + kamu version + +# Outputs: +# +# ... +# gitSha: 603bf08854ad2f93f2c20070bbcd7e8c7c547173 +# gitBranch: feature/private-datasets +# +``` + +### Upload + +Or, you can also specify the desired branch: +```shell +make image-push +# Outputs: +# +# ✨ Image successfully built: +# · https://ghcr.io/kamu-data/kamu-dev-base:feature-private-datasets-603bf0885 +# · https://ghcr.io/kamu-data/kamu-dev-base:feature-private-datasets-latest +``` + + +### Running with workspace attached + +⚠️ Make sure you are in the directory where `.kamu` is located. + +```shell +docker run --rm -v ./.kamu:/opt/kamu/workspace/.kamu kamu-base:git kamu ls -o table + +# Outputs datasets +``` diff --git a/migrations/postgres/20241125193114_dataset_dependencies.sql b/migrations/postgres/20241125193114_dataset_dependencies.sql new file mode 100644 index 000000000..ad7b62b8d --- /dev/null +++ b/migrations/postgres/20241125193114_dataset_dependencies.sql @@ -0,0 +1,19 @@ +/* ------------------------------ */ + +CREATE TABLE dataset_dependencies +( + -- Note: no foreign keys here, as external orphans are possible in the graph + upstream_dataset_id VARCHAR(100) NOT NULL, + downstream_dataset_id VARCHAR(100) NOT NULL +); + +CREATE UNIQUE INDEX idx_dataset_dependencies + ON dataset_dependencies (upstream_dataset_id, downstream_dataset_id); + +CREATE INDEX idx_dataset_dependencies_upstream_dataset_id + ON dataset_dependencies(upstream_dataset_id); + +CREATE INDEX idx_dataset_dependencies_downstream_dataset_id + ON dataset_dependencies (downstream_dataset_id); + +/* ------------------------------ */ diff --git a/migrations/postgres/20241217112645_delete-env-var_fk.sql b/migrations/postgres/20241217112645_delete-env-var_fk.sql new file mode 100644 index 000000000..4b75fe9b7 --- /dev/null +++ b/migrations/postgres/20241217112645_delete-env-var_fk.sql @@ -0,0 +1,5 @@ +alter table dataset_env_vars +add constraint dataset_env_var_dataset_entry + foreign key (dataset_id) + references dataset_entries(dataset_id) + on delete cascade; \ No newline at end of file diff --git a/migrations/postgres/20241217205719_executor2agent.sql b/migrations/postgres/20241217205719_executor2agent.sql new file mode 100644 index 000000000..208a5fe19 --- /dev/null +++ b/migrations/postgres/20241217205719_executor2agent.sql @@ -0,0 +1,16 @@ +/* ------------------------------ */ + +UPDATE outbox_messages SET producer_name = 'dev.kamu.domain.task-system.TaskAgent' + WHERE producer_name = 'dev.kamu.domain.task-system.TaskExecutor'; +UPDATE outbox_messages SET producer_name = 'dev.kamu.domain.flow-system.FlowAgent' + WHERE producer_name = 'dev.kamu.domain.flow-system.FlowExecutor'; + +UPDATE outbox_message_consumptions SET producer_name = 'dev.kamu.domain.task-system.TaskAgent' + WHERE producer_name = 'dev.kamu.domain.task-system.TaskExecutor'; +UPDATE outbox_message_consumptions SET producer_name = 'dev.kamu.domain.flow-system.FlowAgent' + WHERE producer_name = 'dev.kamu.domain.flow-system.FlowExecutor'; + +UPDATE outbox_message_consumptions SET consumer_name = 'dev.kamu.domain.flow-system.FlowAgent' + WHERE consumer_name = 'dev.kamu.domain.flow-system.FlowExecutor'; + +/* ------------------------------ */ diff --git a/migrations/sqlite/20241125192943_dataset_dependencies.sql b/migrations/sqlite/20241125192943_dataset_dependencies.sql new file mode 100644 index 000000000..ad7b62b8d --- /dev/null +++ b/migrations/sqlite/20241125192943_dataset_dependencies.sql @@ -0,0 +1,19 @@ +/* ------------------------------ */ + +CREATE TABLE dataset_dependencies +( + -- Note: no foreign keys here, as external orphans are possible in the graph + upstream_dataset_id VARCHAR(100) NOT NULL, + downstream_dataset_id VARCHAR(100) NOT NULL +); + +CREATE UNIQUE INDEX idx_dataset_dependencies + ON dataset_dependencies (upstream_dataset_id, downstream_dataset_id); + +CREATE INDEX idx_dataset_dependencies_upstream_dataset_id + ON dataset_dependencies(upstream_dataset_id); + +CREATE INDEX idx_dataset_dependencies_downstream_dataset_id + ON dataset_dependencies (downstream_dataset_id); + +/* ------------------------------ */ diff --git a/migrations/sqlite/20241217112645_delete-env-var_fk.sql b/migrations/sqlite/20241217112645_delete-env-var_fk.sql new file mode 100644 index 000000000..fdd2f420d --- /dev/null +++ b/migrations/sqlite/20241217112645_delete-env-var_fk.sql @@ -0,0 +1,25 @@ +CREATE TABLE dataset_env_vars_tmp( + id VARCHAR(36) NOT NULL PRIMARY KEY, + key VARCHAR(200) NOT NULL, + value BLOB NOT NULL, + secret_nonce BLOB, + created_at timestamptz NOT NULL, + dataset_id VARCHAR(100) NOT NULL, + CONSTRAINT dataset_env_var_dataset_entry + FOREIGN KEY (dataset_id) + REFERENCES dataset_entries(dataset_id) + ON DELETE CASCADE +); + +DROP INDEX idx_dataset_env_vars_key_dataset; +CREATE UNIQUE INDEX idx_dataset_env_vars_key_dataset ON dataset_env_vars_tmp(dataset_id, key); +DROP INDEX idx_dataset_env_vars_dataset_id; +CREATE INDEX idx_dataset_env_vars_dataset_id ON dataset_env_vars_tmp(dataset_id); + +INSERT INTO dataset_env_vars_tmp (id, key, value, secret_nonce, created_at, dataset_id) +SELECT id, key, value, secret_nonce, created_at, dataset_id +FROM dataset_env_vars; + +DROP TABLE dataset_env_vars; +ALTER TABLE dataset_env_vars_tmp +RENAME TO dataset_env_vars; \ No newline at end of file diff --git a/migrations/sqlite/20241217205726_executor2agent.sql b/migrations/sqlite/20241217205726_executor2agent.sql new file mode 100644 index 000000000..0e93608ef --- /dev/null +++ b/migrations/sqlite/20241217205726_executor2agent.sql @@ -0,0 +1,16 @@ +/* ------------------------------ */ + +UPDATE outbox_messages SET producer_name = 'dev.kamu.domain.task-system.TaskAgent' + WHERE producer_name = 'dev.kamu.domain.task-system.TaskExecutor'; +UPDATE outbox_messages SET producer_name = 'dev.kamu.domain.flow-system.FlowAgent' + WHERE producer_name = 'dev.kamu.domain.flow-system.FlowExecutor'; + +UPDATE outbox_message_consumptions SET producer_name = 'dev.kamu.domain.task-system.TaskAgent' + WHERE producer_name = 'dev.kamu.domain.task-system.TaskExecutor'; +UPDATE outbox_message_consumptions SET producer_name = 'dev.kamu.domain.flow-system.FlowAgent' + WHERE producer_name = 'dev.kamu.domain.flow-system.FlowExecutor'; + +UPDATE outbox_message_consumptions SET consumer_name = 'dev.kamu.domain.flow-system.FlowAgent' + WHERE consumer_name = 'dev.kamu.domain.flow-system.FlowExecutor'; + +/* ------------------------------ */ diff --git a/resources/openapi-mt.json b/resources/openapi-mt.json index f48ca482b..8dff21bf7 100644 --- a/resources/openapi-mt.json +++ b/resources/openapi-mt.json @@ -818,7 +818,7 @@ }, "termsOfService": "https://docs.kamu.dev/terms-of-service/", "title": "Kamu REST API", - "version": "0.209.0" + "version": "0.213.1" }, "openapi": "3.1.0", "paths": { diff --git a/resources/openapi.json b/resources/openapi.json index 7c0d45455..16a825304 100644 --- a/resources/openapi.json +++ b/resources/openapi.json @@ -818,7 +818,7 @@ }, "termsOfService": "https://docs.kamu.dev/terms-of-service/", "title": "Kamu REST API", - "version": "0.209.0" + "version": "0.213.1" }, "openapi": "3.1.0", "paths": { diff --git a/src/adapter/auth-oso-rebac/tests/tests/test_oso_dataset_authorizer.rs b/src/adapter/auth-oso-rebac/tests/tests/test_oso_dataset_authorizer.rs index 1685259fb..b412b6771 100644 --- a/src/adapter/auth-oso-rebac/tests/tests/test_oso_dataset_authorizer.rs +++ b/src/adapter/auth-oso-rebac/tests/tests/test_oso_dataset_authorizer.rs @@ -143,6 +143,8 @@ impl DatasetAuthorizerHarness { .add_value(current_account_subject) .add_value(predefined_accounts_config) .add::() + .add::() + .add::() .add::() .add_builder( OutboxImmediateImpl::builder() @@ -159,8 +161,6 @@ impl DatasetAuthorizerHarness { kamu_adapter_auth_oso_rebac::register_dependencies(&mut b); - kamu_auth_rebac_services::register_dependencies(&mut b, tenancy_config); - register_message_dispatcher::( &mut b, MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE, diff --git a/src/adapter/flight-sql/Cargo.toml b/src/adapter/flight-sql/Cargo.toml index bcd307d02..f6ec0eca2 100644 --- a/src/adapter/flight-sql/Cargo.toml +++ b/src/adapter/flight-sql/Cargo.toml @@ -26,7 +26,7 @@ arrow-flight = { version = "53", features = ["flight-sql-experimental"] } async-trait = { version = "0.1", default-features = false } base64 = { version = "0.22", default-features = false } dashmap = { version = "6", default-features = false } -datafusion = { version = "42", default-features = false } +datafusion = { version = "43", default-features = false } futures = "0.3" like = { version = "0.3", default-features = false } prost = { version = "0.13", default-features = false } diff --git a/src/adapter/flight-sql/src/lib.rs b/src/adapter/flight-sql/src/lib.rs index 4d0e1720c..cd79789cc 100644 --- a/src/adapter/flight-sql/src/lib.rs +++ b/src/adapter/flight-sql/src/lib.rs @@ -12,6 +12,7 @@ mod service; mod service_builder; mod session_factory; + pub use service::*; pub use service_builder::*; pub use session_factory::*; diff --git a/src/adapter/flight-sql/src/service.rs b/src/adapter/flight-sql/src/service.rs index 70bebaddd..eb542cd37 100644 --- a/src/adapter/flight-sql/src/service.rs +++ b/src/adapter/flight-sql/src/service.rs @@ -137,7 +137,7 @@ impl KamuFlightSqlService { .map_err(|e| Status::internal(format!("Error: {e}"))) } - #[allow(clippy::trivially_copy_pass_by_ref)] + #[expect(clippy::trivially_copy_pass_by_ref)] fn get_catalogs( &self, ctx: &SessionContext, @@ -1201,3 +1201,5 @@ impl FlightSqlService for KamuFlightSqlService { #[tracing::instrument(level = "debug", skip_all, fields(%id, ?result))] async fn register_sql_info(&self, id: i32, result: &SqlInfo) {} } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/graphql/Cargo.toml b/src/adapter/graphql/Cargo.toml index df96b6c34..dd40ef1df 100644 --- a/src/adapter/graphql/Cargo.toml +++ b/src/adapter/graphql/Cargo.toml @@ -44,7 +44,7 @@ async-graphql = { version = "7", features = [ ] } async-trait = { version = "0.1", default-features = false } chrono = "0.4" -datafusion = { version = "42", default-features = false, features = [ +datafusion = { version = "43", default-features = false, features = [ "serde", ] } # TODO: Currently needed for type conversions but ideally should be encapsulated by kamu-core dill = "0.9" @@ -55,7 +55,6 @@ serde_json = "1" tokio = { version = "1", default-features = false, features = [] } tokio-stream = { version = "0.1", default-features = false } tracing = "0.1" -thiserror = { version = "1", default-features = false } url = { version = "2", default-features = false } uuid = { version = "1", default-features = false } diff --git a/src/adapter/graphql/src/guards.rs b/src/adapter/graphql/src/guards.rs index 0b45a47c1..8329b1db2 100644 --- a/src/adapter/graphql/src/guards.rs +++ b/src/adapter/graphql/src/guards.rs @@ -10,7 +10,7 @@ use async_graphql::{Context, Guard, Result}; use kamu_accounts::{AnonymousAccountReason, CurrentAccountSubject}; -use crate::prelude::from_catalog; +use crate::prelude::*; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -30,7 +30,7 @@ impl LoggedInGuard { impl Guard for LoggedInGuard { async fn check(&self, ctx: &Context<'_>) -> Result<()> { - let current_account_subject = from_catalog::(ctx).unwrap(); + let current_account_subject = from_catalog_n!(ctx, CurrentAccountSubject); if let CurrentAccountSubject::Anonymous(reason) = current_account_subject.as_ref() { Err(async_graphql::Error::new(match reason { AnonymousAccountReason::NoAuthenticationProvided => { @@ -49,6 +49,8 @@ impl Guard for LoggedInGuard { pub const STAFF_ONLY_MESSAGE: &str = "Access restricted to administrators only"; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + pub struct AdminGuard {} impl AdminGuard { @@ -59,7 +61,7 @@ impl AdminGuard { impl Guard for AdminGuard { async fn check(&self, ctx: &Context<'_>) -> Result<()> { - let current_account_subject = from_catalog::(ctx).unwrap(); + let current_account_subject = from_catalog_n!(ctx, CurrentAccountSubject); match current_account_subject.as_ref() { CurrentAccountSubject::Logged(a) if a.is_admin => Ok(()), diff --git a/src/adapter/graphql/src/mutations/accounts_mut.rs b/src/adapter/graphql/src/mutations/accounts_mut.rs index d108668ce..469a9c193 100644 --- a/src/adapter/graphql/src/mutations/accounts_mut.rs +++ b/src/adapter/graphql/src/mutations/accounts_mut.rs @@ -18,15 +18,14 @@ use crate::utils::{check_logged_account_id_match, check_logged_account_name_matc pub struct AccountsMut; -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - #[Object] impl AccountsMut { /// Returns a mutable account by its id async fn by_id(&self, ctx: &Context<'_>, account_id: AccountID) -> Result> { - let authentication_service = from_catalog::(ctx).unwrap(); check_logged_account_id_match(ctx, &account_id)?; + let authentication_service = from_catalog_n!(ctx, dyn AuthenticationService); + let account_maybe = authentication_service.account_by_id(&account_id).await?; Ok(account_maybe.map(AccountMut::new)) } @@ -37,12 +36,15 @@ impl AccountsMut { ctx: &Context<'_>, account_name: AccountName, ) -> Result> { - let authentication_service = from_catalog::(ctx).unwrap(); check_logged_account_name_match(ctx, &account_name)?; + let authentication_service = from_catalog_n!(ctx, dyn AuthenticationService); + let account_maybe = authentication_service .account_by_name(&account_name) .await?; Ok(account_maybe.map(AccountMut::new)) } } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/graphql/src/mutations/auth_mut.rs b/src/adapter/graphql/src/mutations/auth_mut.rs index e833ba2b2..bed46415f 100644 --- a/src/adapter/graphql/src/mutations/auth_mut.rs +++ b/src/adapter/graphql/src/mutations/auth_mut.rs @@ -25,8 +25,7 @@ impl AuthMut { login_method: String, login_credentials_json: String, ) -> Result { - let authentication_service = - from_catalog::(ctx).unwrap(); + let authentication_service = from_catalog_n!(ctx, dyn kamu_accounts::AuthenticationService); let login_result = authentication_service .login(login_method.as_str(), login_credentials_json) @@ -39,8 +38,7 @@ impl AuthMut { } async fn account_details(&self, ctx: &Context<'_>, access_token: String) -> Result { - let authentication_service = - from_catalog::(ctx).unwrap(); + let authentication_service = from_catalog_n!(ctx, dyn kamu_accounts::AuthenticationService); match authentication_service.account_by_token(access_token).await { Ok(a) => Ok(Account::from_account(a)), @@ -56,8 +54,7 @@ impl AuthMut { ) -> Result { check_logged_account_id_match(ctx, &account_id)?; - let access_token_service = - from_catalog::(ctx).unwrap(); + let access_token_service = from_catalog_n!(ctx, dyn kamu_accounts::AccessTokenService); match access_token_service .create_access_token(&token_name, &account_id) @@ -84,8 +81,7 @@ impl AuthMut { ) -> Result { check_access_token_valid(ctx, &token_id).await?; - let access_token_service = - from_catalog::(ctx).unwrap(); + let access_token_service = from_catalog_n!(ctx, dyn kamu_accounts::AccessTokenService); match access_token_service.revoke_access_token(&token_id).await { Ok(_) => Ok(RevokeResult::Success(RevokeResultSuccess { token_id })), diff --git a/src/adapter/graphql/src/mutations/dataset_env_vars_mut.rs b/src/adapter/graphql/src/mutations/dataset_env_vars_mut.rs index 5c786d636..52d2e3726 100644 --- a/src/adapter/graphql/src/mutations/dataset_env_vars_mut.rs +++ b/src/adapter/graphql/src/mutations/dataset_env_vars_mut.rs @@ -43,7 +43,7 @@ impl DatasetEnvVarsMut { ) -> Result { utils::check_dataset_write_access(ctx, &self.dataset_handle).await?; - let dataset_env_var_service = from_catalog::(ctx).unwrap(); + let dataset_env_var_service = from_catalog_n!(ctx, dyn DatasetEnvVarService); let dataset_env_var_value = if is_secret { DatasetEnvVarValue::Secret(SecretString::from(value)) @@ -85,7 +85,7 @@ impl DatasetEnvVarsMut { ) -> Result { utils::check_dataset_write_access(ctx, &self.dataset_handle).await?; - let dataset_env_var_service = from_catalog::(ctx).unwrap(); + let dataset_env_var_service = from_catalog_n!(ctx, dyn DatasetEnvVarService); match dataset_env_var_service .delete_dataset_env_var(&id.clone().into()) @@ -118,7 +118,7 @@ impl DatasetEnvVarsMut { ) -> Result { utils::check_dataset_write_access(ctx, &self.dataset_handle).await?; - let dataset_env_var_service = from_catalog::(ctx).unwrap(); + let dataset_env_var_service = from_catalog_n!(ctx, dyn DatasetEnvVarService); let dataset_env_var_value = if is_secret { DatasetEnvVarValue::Secret(SecretString::from(new_value)) } else { diff --git a/src/adapter/graphql/src/mutations/dataset_metadata_mut.rs b/src/adapter/graphql/src/mutations/dataset_metadata_mut.rs index df40c836c..2cc452ee8 100644 --- a/src/adapter/graphql/src/mutations/dataset_metadata_mut.rs +++ b/src/adapter/graphql/src/mutations/dataset_metadata_mut.rs @@ -18,9 +18,11 @@ use opendatafabric as odf; use super::{CommitResultAppendError, CommitResultSuccess, NoChanges}; use crate::mutations::MetadataChainMut; use crate::prelude::*; -use crate::utils::make_dataset_access_error; +use crate::utils::{get_dataset, make_dataset_access_error}; use crate::LoggedInGuard; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + pub struct DatasetMetadataMut { dataset_handle: odf::DatasetHandle, } @@ -32,13 +34,6 @@ impl DatasetMetadataMut { Self { dataset_handle } } - #[graphql(skip)] - fn get_dataset(&self, ctx: &Context<'_>) -> domain::ResolvedDataset { - // TODO: cut off this dependency - extract a higher level use case - let dataset_registry = from_catalog::(ctx).unwrap(); - dataset_registry.get_dataset_by_handle(&self.dataset_handle) - } - /// Access to the mutable metadata chain of the dataset async fn chain(&self) -> MetadataChainMut { MetadataChainMut::new(self.dataset_handle.clone()) @@ -51,7 +46,7 @@ impl DatasetMetadataMut { ctx: &Context<'_>, content: Option, ) -> Result { - let resolved_dataset = self.get_dataset(ctx); + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; let old_attachments = resolved_dataset .as_metadata_chain() @@ -112,7 +107,7 @@ impl DatasetMetadataMut { attachments: new_attachments.into(), }; - let commit_event = from_catalog::(ctx).unwrap(); + let commit_event = from_catalog_n!(ctx, dyn CommitDatasetEventUseCase); let result = match commit_event .execute( diff --git a/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs index ada739a65..4d77495e2 100644 --- a/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs +++ b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs @@ -9,7 +9,7 @@ use chrono::{DateTime, Utc}; use domain::{DeleteDatasetError, RenameDatasetError}; -use kamu_core::{self as domain, SetWatermarkUseCase}; +use kamu_core::{self as domain, SetWatermarkPlanningError, SetWatermarkUseCase}; use opendatafabric as odf; use crate::mutations::{ @@ -70,7 +70,7 @@ impl DatasetMut { })); } - let rename_dataset = from_catalog::(ctx).unwrap(); + let rename_dataset = from_catalog_n!(ctx, dyn domain::RenameDatasetUseCase); match rename_dataset .execute(&self.dataset_handle.as_local_ref(), &new_name) .await @@ -97,7 +97,7 @@ impl DatasetMut { /// Delete the dataset #[graphql(guard = "LoggedInGuard::new()")] async fn delete(&self, ctx: &Context<'_>) -> Result { - let delete_dataset = from_catalog::(ctx).unwrap(); + let delete_dataset = from_catalog_n!(ctx, dyn domain::DeleteDatasetUseCase); match delete_dataset .execute_via_handle(&self.dataset_handle) .await @@ -132,7 +132,7 @@ impl DatasetMut { ctx: &Context<'_>, watermark: DateTime, ) -> Result { - let set_watermark_use_case = from_catalog::(ctx).unwrap(); + let set_watermark_use_case = from_catalog_n!(ctx, dyn SetWatermarkUseCase); match set_watermark_use_case .execute(&self.dataset_handle, watermark) .await @@ -147,11 +147,11 @@ impl DatasetMut { new_head: new_head.into(), })) } - Err(e @ domain::SetWatermarkError::IsDerivative) => { - Ok(SetWatermarkResult::IsDerivative(SetWatermarkIsDerivative { - message: e.to_string(), - })) - } + Err( + e @ domain::SetWatermarkError::Planning(SetWatermarkPlanningError::IsDerivative), + ) => Ok(SetWatermarkResult::IsDerivative(SetWatermarkIsDerivative { + message: e.to_string(), + })), Err(e) => Err(e.int_err().into()), } } diff --git a/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut_utils.rs b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut_utils.rs index 9386c08c7..b156d92db 100644 --- a/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut_utils.rs +++ b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut_utils.rs @@ -19,8 +19,8 @@ pub(crate) async fn ensure_account_owns_dataset( ctx: &Context<'_>, dataset_handle: &odf::DatasetHandle, ) -> Result<()> { - let dataset_ownership_service = from_catalog::(ctx).unwrap(); - let logged_account = utils::get_logged_account(ctx); + let dataset_ownership_service = from_catalog_n!(ctx, dyn DatasetOwnershipService); + let logged_account = utils::get_logged_account(ctx)?; let not_owner = !dataset_ownership_service .is_dataset_owned_by(&dataset_handle.id, &logged_account.account_id) diff --git a/src/adapter/graphql/src/mutations/datasets_mut.rs b/src/adapter/graphql/src/mutations/datasets_mut.rs index 31284858e..8dc171fad 100644 --- a/src/adapter/graphql/src/mutations/datasets_mut.rs +++ b/src/adapter/graphql/src/mutations/datasets_mut.rs @@ -25,7 +25,7 @@ pub struct DatasetsMut; impl DatasetsMut { /// Returns a mutable dataset by its ID async fn by_id(&self, ctx: &Context<'_>, dataset_id: DatasetID) -> Result> { - let dataset_registry = from_catalog::(ctx).unwrap(); + let dataset_registry = from_catalog_n!(ctx, dyn domain::DatasetRegistry); let hdl = dataset_registry .try_resolve_dataset_handle_by_ref(&dataset_id.as_local_ref()) .await?; @@ -122,7 +122,7 @@ impl DatasetsMut { dataset_visibility: domain::DatasetVisibility, ) -> Result { let create_from_snapshot = - from_catalog::(ctx).unwrap(); + from_catalog_n!(ctx, dyn domain::CreateDatasetFromSnapshotUseCase); let create_options = CreateDatasetUseCaseOptions { dataset_visibility }; diff --git a/src/adapter/graphql/src/mutations/flows_mut/account_flow_configs_mut.rs b/src/adapter/graphql/src/mutations/flows_mut/account_flow_configs_mut.rs index c9518218a..32769e6d0 100644 --- a/src/adapter/graphql/src/mutations/flows_mut/account_flow_configs_mut.rs +++ b/src/adapter/graphql/src/mutations/flows_mut/account_flow_configs_mut.rs @@ -16,6 +16,8 @@ use opendatafabric::DatasetID; use crate::prelude::*; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + pub struct AccountFlowConfigsMut { account: Account, } @@ -29,7 +31,7 @@ impl AccountFlowConfigsMut { #[graphql(skip)] async fn get_account_dataset_ids(&self, ctx: &Context<'_>) -> Result> { - let dataset_ownership_service = from_catalog::(ctx).unwrap(); + let dataset_ownership_service = from_catalog_n!(ctx, dyn DatasetOwnershipService); let dataset_ids: Vec<_> = dataset_ownership_service .get_owned_datasets(&self.account.id) .await?; @@ -38,7 +40,7 @@ impl AccountFlowConfigsMut { } async fn resume_account_dataset_flows(&self, ctx: &Context<'_>) -> Result { - let flow_config_service = from_catalog::(ctx).unwrap(); + let flow_config_service = from_catalog_n!(ctx, dyn FlowConfigurationService); let account_dataset_ids = self.get_account_dataset_ids(ctx).await?; for dataset_id in &account_dataset_ids { @@ -52,7 +54,7 @@ impl AccountFlowConfigsMut { } async fn pause_account_dataset_flows(&self, ctx: &Context<'_>) -> Result { - let flow_config_service = from_catalog::(ctx).unwrap(); + let flow_config_service = from_catalog_n!(ctx, dyn FlowConfigurationService); let account_dataset_ids = self.get_account_dataset_ids(ctx).await?; for dataset_id in &account_dataset_ids { @@ -65,3 +67,5 @@ impl AccountFlowConfigsMut { Ok(true) } } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/graphql/src/mutations/flows_mut/dataset_flow_configs_mut.rs b/src/adapter/graphql/src/mutations/flows_mut/dataset_flow_configs_mut.rs index 6873c461f..df79b2ac5 100644 --- a/src/adapter/graphql/src/mutations/flows_mut/dataset_flow_configs_mut.rs +++ b/src/adapter/graphql/src/mutations/flows_mut/dataset_flow_configs_mut.rs @@ -76,7 +76,7 @@ impl DatasetFlowConfigsMut { return Ok(SetFlowConfigResult::PreconditionsNotMet(e)); } - let flow_config_service = from_catalog::(ctx).unwrap(); + let flow_config_service = from_catalog_n!(ctx, dyn FlowConfigurationService); let configuration_rule: IngestRule = ingest .try_into() .map_err(|e: ScheduleCronError| GqlError::Gql(e.into()))?; @@ -144,7 +144,7 @@ impl DatasetFlowConfigsMut { return Ok(SetFlowTransformConfigResult::PreconditionsNotMet(e)); } - let flow_config_service = from_catalog::(ctx).unwrap(); + let flow_config_service = from_catalog_n!(ctx, dyn FlowConfigurationService); let res = flow_config_service .set_configuration( @@ -212,7 +212,7 @@ impl DatasetFlowConfigsMut { } ensure_scheduling_permission(ctx, &self.dataset_handle).await?; - let flow_config_service = from_catalog::(ctx).unwrap(); + let flow_config_service = from_catalog_n!(ctx, dyn FlowConfigurationService); let res = flow_config_service .set_configuration( @@ -240,7 +240,7 @@ impl DatasetFlowConfigsMut { ) -> Result { ensure_scheduling_permission(ctx, &self.dataset_handle).await?; - let flow_config_service = from_catalog::(ctx).unwrap(); + let flow_config_service = from_catalog_n!(ctx, dyn FlowConfigurationService); flow_config_service .pause_dataset_flows( @@ -261,7 +261,7 @@ impl DatasetFlowConfigsMut { ) -> Result { ensure_scheduling_permission(ctx, &self.dataset_handle).await?; - let flow_config_service = from_catalog::(ctx).unwrap(); + let flow_config_service = from_catalog_n!(ctx, dyn FlowConfigurationService); flow_config_service .resume_dataset_flows( diff --git a/src/adapter/graphql/src/mutations/flows_mut/dataset_flow_runs_mut.rs b/src/adapter/graphql/src/mutations/flows_mut/dataset_flow_runs_mut.rs index c87fe196e..f1d592f58 100644 --- a/src/adapter/graphql/src/mutations/flows_mut/dataset_flow_runs_mut.rs +++ b/src/adapter/graphql/src/mutations/flows_mut/dataset_flow_runs_mut.rs @@ -71,8 +71,8 @@ impl DatasetFlowRunsMut { // TODO: for some datasets launching manually might not be an option: // i.e., root datasets with push sources require input data to arrive - let flow_query_service = from_catalog::(ctx).unwrap(); - let logged_account = utils::get_logged_account(ctx); + let flow_query_service = from_catalog_n!(ctx, dyn fs::FlowQueryService); + let logged_account = utils::get_logged_account(ctx)?; let flow_run_snapshot = match FlowRunConfiguration::try_into_snapshot( ctx, @@ -121,7 +121,7 @@ impl DatasetFlowRunsMut { } // Attempt cancelling scheduled tasks - let flow_query_service = from_catalog::(ctx).unwrap(); + let flow_query_service = from_catalog_n!(ctx, dyn fs::FlowQueryService); let flow_state = flow_query_service .cancel_scheduled_tasks(flow_id.into()) .await @@ -132,8 +132,7 @@ impl DatasetFlowRunsMut { // Pause flow configuration regardless of current state. // Duplicate requests are auto-ignored. - let flow_configuration_service = - from_catalog::(ctx).unwrap(); + let flow_configuration_service = from_catalog_n!(ctx, dyn fs::FlowConfigurationService); flow_configuration_service .pause_flow_configuration(Utc::now(), flow_state.flow_key.clone()) .await?; diff --git a/src/adapter/graphql/src/mutations/flows_mut/flows_mut_utils.rs b/src/adapter/graphql/src/mutations/flows_mut/flows_mut_utils.rs index 140b998f6..bc9fa6933 100644 --- a/src/adapter/graphql/src/mutations/flows_mut/flows_mut_utils.rs +++ b/src/adapter/graphql/src/mutations/flows_mut/flows_mut_utils.rs @@ -36,7 +36,7 @@ pub(crate) async fn check_if_flow_belongs_to_dataset( flow_id: FlowID, dataset_handle: &odf::DatasetHandle, ) -> Result> { - let flow_query_service = from_catalog::(ctx).unwrap(); + let flow_query_service = from_catalog_n!(ctx, dyn fs::FlowQueryService); match flow_query_service.get_flow(flow_id.into()).await { Ok(flow_state) => match flow_state.flow_key { @@ -76,7 +76,7 @@ pub(crate) async fn ensure_expected_dataset_kind( let dataset_flow_type: kamu_flow_system::DatasetFlowType = dataset_flow_type.into(); match dataset_flow_type.dataset_kind_restriction() { Some(expected_kind) => { - let resolved_dataset = utils::get_dataset(ctx, dataset_handle); + let resolved_dataset = utils::get_dataset(ctx, dataset_handle)?; let dataset_kind = resolved_dataset .get_summary(GetSummaryOpts::default()) @@ -106,15 +106,13 @@ pub(crate) async fn ensure_flow_preconditions( dataset_flow_type: DatasetFlowType, flow_run_configuration: Option<&FlowRunConfiguration>, ) -> Result> { - let dataset_registry = from_catalog::(ctx).unwrap(); + let dataset_registry = from_catalog_n!(ctx, dyn DatasetRegistry); let target = dataset_registry.get_dataset_by_handle(dataset_handle); match dataset_flow_type { DatasetFlowType::Ingest => { - let polling_ingest_svc = - from_catalog::(ctx).unwrap(); - - let source_res = polling_ingest_svc + let metadata_query_service = from_catalog_n!(ctx, dyn kamu_core::MetadataQueryService); + let source_res = metadata_query_service .get_active_polling_source(target) .await .int_err()?; @@ -125,12 +123,8 @@ pub(crate) async fn ensure_flow_preconditions( } } DatasetFlowType::ExecuteTransform => { - let transform_request_planner = - from_catalog::(ctx).unwrap(); - - let source_res = transform_request_planner - .get_active_transform(target) - .await?; + let metadata_query_service = from_catalog_n!(ctx, dyn kamu_core::MetadataQueryService); + let source_res = metadata_query_service.get_active_transform(target).await?; if source_res.is_none() { return Ok(Some(FlowPreconditionsNotMet { preconditions: "No SetTransform event defined".to_string(), diff --git a/src/adapter/graphql/src/mutations/metadata_chain_mut.rs b/src/adapter/graphql/src/mutations/metadata_chain_mut.rs index 596cd1837..9d69e14f7 100644 --- a/src/adapter/graphql/src/mutations/metadata_chain_mut.rs +++ b/src/adapter/graphql/src/mutations/metadata_chain_mut.rs @@ -56,7 +56,7 @@ impl MetadataChainMut { } }; - let commit_dataset_event = from_catalog::(ctx).unwrap(); + let commit_dataset_event = from_catalog_n!(ctx, dyn CommitDatasetEventUseCase); let result = match commit_dataset_event .execute(&self.dataset_handle, event, domain::CommitOpts::default()) diff --git a/src/adapter/graphql/src/prelude.rs b/src/adapter/graphql/src/prelude.rs index e20dd9ff7..e5d0064f2 100644 --- a/src/adapter/graphql/src/prelude.rs +++ b/src/adapter/graphql/src/prelude.rs @@ -11,6 +11,6 @@ pub(crate) use async_graphql::*; pub(crate) use internal_error::*; pub(crate) use crate::scalars::*; -pub(crate) use crate::utils::{from_catalog, GqlError}; +pub(crate) use crate::utils::{from_catalog_n, GqlError}; pub(crate) type Result = ::core::result::Result; diff --git a/src/adapter/graphql/src/queries/accounts/account.rs b/src/adapter/graphql/src/queries/accounts/account.rs index c5ba37eb0..2b004625a 100644 --- a/src/adapter/graphql/src/queries/accounts/account.rs +++ b/src/adapter/graphql/src/queries/accounts/account.rs @@ -60,7 +60,7 @@ impl Account { ctx: &Context<'_>, account_id: odf::AccountID, ) -> Result { - let authentication_service = from_catalog::(ctx).unwrap(); + let authentication_service = from_catalog_n!(ctx, dyn AuthenticationService); let account_name = authentication_service .find_account_name_by_id(&account_id) @@ -75,7 +75,7 @@ impl Account { ctx: &Context<'_>, account_name: odf::AccountName, ) -> Result, InternalError> { - let authentication_service = from_catalog::(ctx).unwrap(); + let authentication_service = from_catalog_n!(ctx, dyn AuthenticationService); let maybe_account = authentication_service .account_by_name(&account_name) @@ -92,7 +92,7 @@ impl Account { if alias.is_multi_tenant() { Ok(Self::from_account_name(ctx, alias.account_name.as_ref().unwrap().clone()).await?) } else { - let current_account_subject = from_catalog::(ctx).unwrap(); + let current_account_subject = from_catalog_n!(ctx, CurrentAccountSubject); Ok(Some(match current_account_subject.as_ref() { CurrentAccountSubject::Anonymous(_) => Self::new( @@ -108,7 +108,7 @@ impl Account { #[graphql(skip)] async fn resolve_full_account_info(&self, ctx: &Context<'_>) -> Result { - let authentication_service = from_catalog::(ctx).unwrap(); + let authentication_service = from_catalog_n!(ctx, dyn AuthenticationService); let maybe_account_info = authentication_service .account_by_id(&self.account_id) diff --git a/src/adapter/graphql/src/queries/accounts/account_flow_configs.rs b/src/adapter/graphql/src/queries/accounts/account_flow_configs.rs index e6f9b9307..d9543d2df 100644 --- a/src/adapter/graphql/src/queries/accounts/account_flow_configs.rs +++ b/src/adapter/graphql/src/queries/accounts/account_flow_configs.rs @@ -29,11 +29,15 @@ impl AccountFlowConfigs { /// Checks if all configs of all datasets in account are disabled async fn all_paused(&self, ctx: &Context<'_>) -> Result { - let dataset_ownership_service = from_catalog::(ctx).unwrap(); + let (dataset_ownership_service, flow_config_service) = from_catalog_n!( + ctx, + dyn DatasetOwnershipService, + dyn FlowConfigurationService + ); + let owned_dataset_ids: Vec<_> = dataset_ownership_service .get_owned_datasets(&self.account.id) .await?; - let flow_config_service = from_catalog::(ctx).unwrap(); let mut all_configurations = flow_config_service .find_configurations_by_datasets(owned_dataset_ids) diff --git a/src/adapter/graphql/src/queries/accounts/account_flow_runs.rs b/src/adapter/graphql/src/queries/accounts/account_flow_runs.rs index 46b6f20a4..7fb5416e8 100644 --- a/src/adapter/graphql/src/queries/accounts/account_flow_runs.rs +++ b/src/adapter/graphql/src/queries/accounts/account_flow_runs.rs @@ -21,6 +21,8 @@ use super::Account; use crate::prelude::*; use crate::queries::{Dataset, DatasetConnection, Flow, FlowConnection, InitiatorFilterInput}; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + pub struct AccountFlowRuns { account: AccountEntity, } @@ -41,7 +43,7 @@ impl AccountFlowRuns { per_page: Option, filters: Option, ) -> Result { - let flow_query_service = from_catalog::(ctx).unwrap(); + let flow_query_service = from_catalog_n!(ctx, dyn fs::FlowQueryService); let page = page.unwrap_or(0); let per_page = per_page.unwrap_or(Self::DEFAULT_PER_PAGE); @@ -100,7 +102,7 @@ impl AccountFlowRuns { } async fn list_datasets_with_flow(&self, ctx: &Context<'_>) -> Result { - let flow_query_service = from_catalog::(ctx).unwrap(); + let flow_query_service = from_catalog_n!(ctx, dyn fs::FlowQueryService); let datasets_with_flows: Vec<_> = flow_query_service .list_all_datasets_with_flow_by_account(&self.account.id) @@ -111,7 +113,7 @@ impl AccountFlowRuns { .try_collect() .await?; - let dataset_registry = from_catalog::(ctx).unwrap(); + let dataset_registry = from_catalog_n!(ctx, dyn DatasetRegistry); let account = Account::new( self.account.id.clone().into(), diff --git a/src/adapter/graphql/src/queries/accounts/accounts.rs b/src/adapter/graphql/src/queries/accounts/accounts.rs index e163ab715..810af225f 100644 --- a/src/adapter/graphql/src/queries/accounts/accounts.rs +++ b/src/adapter/graphql/src/queries/accounts/accounts.rs @@ -20,8 +20,7 @@ pub struct Accounts; impl Accounts { /// Returns account by its ID async fn by_id(&self, ctx: &Context<'_>, account_id: AccountID) -> Result> { - let authentication_service = - from_catalog::(ctx).unwrap(); + let authentication_service = from_catalog_n!(ctx, dyn kamu_accounts::AuthenticationService); let account_id: odf::AccountID = account_id.into(); let maybe_account_name = authentication_service @@ -34,8 +33,7 @@ impl Accounts { /// Returns account by its name async fn by_name(&self, ctx: &Context<'_>, name: AccountName) -> Result> { - let authentication_service = - from_catalog::(ctx).unwrap(); + let authentication_service = from_catalog_n!(ctx, dyn kamu_accounts::AuthenticationService); let account_name: odf::AccountName = name.into(); diff --git a/src/adapter/graphql/src/queries/auth.rs b/src/adapter/graphql/src/queries/auth.rs index 2297aa5e6..20879ce1b 100644 --- a/src/adapter/graphql/src/queries/auth.rs +++ b/src/adapter/graphql/src/queries/auth.rs @@ -23,8 +23,7 @@ impl Auth { #[allow(clippy::unused_async)] async fn enabled_login_methods(&self, ctx: &Context<'_>) -> Result> { - let authentication_service = - from_catalog::(ctx).unwrap(); + let authentication_service = from_catalog_n!(ctx, dyn kamu_accounts::AuthenticationService); Ok(authentication_service.supported_login_methods()) } @@ -38,8 +37,7 @@ impl Auth { ) -> Result { check_logged_account_id_match(ctx, &account_id)?; - let access_token_service = - from_catalog::(ctx).unwrap(); + let access_token_service = from_catalog_n!(ctx, dyn kamu_accounts::AccessTokenService); let page = page.unwrap_or(0); let per_page = per_page.unwrap_or(Self::DEFAULT_PER_PAGE); diff --git a/src/adapter/graphql/src/queries/data.rs b/src/adapter/graphql/src/queries/data.rs index f06c9ccf1..0b69bf2e2 100644 --- a/src/adapter/graphql/src/queries/data.rs +++ b/src/adapter/graphql/src/queries/data.rs @@ -42,13 +42,13 @@ impl DataQueries { "Query", ); + let query_svc = from_catalog_n!(ctx, dyn domain::QueryService); + // TODO: Default to JsonSoA format once implemented let data_format = data_format.unwrap_or(DataBatchFormat::Json); let schema_format = schema_format.unwrap_or(DataSchemaFormat::Parquet); let limit = limit.unwrap_or(Self::DEFAULT_QUERY_LIMIT); - let query_svc = from_catalog::(ctx).unwrap(); - let query_result = match query_dialect { QueryDialect::SqlDataFusion => { let sql_result = query_svc @@ -88,7 +88,7 @@ impl DataQueries { /// Lists engines known to the system and recommended for use async fn known_engines(&self, ctx: &Context<'_>) -> Result> { - let query_svc = from_catalog::(ctx).unwrap(); + let query_svc = from_catalog_n!(ctx, dyn domain::QueryService); Ok(query_svc .get_known_engines() .await? diff --git a/src/adapter/graphql/src/queries/datasets/dataset.rs b/src/adapter/graphql/src/queries/datasets/dataset.rs index ecdfe219b..988912ce3 100644 --- a/src/adapter/graphql/src/queries/datasets/dataset.rs +++ b/src/adapter/graphql/src/queries/datasets/dataset.rs @@ -13,7 +13,7 @@ use opendatafabric as odf; use crate::prelude::*; use crate::queries::*; -use crate::utils::{ensure_dataset_env_vars_enabled, from_catalog_n}; +use crate::utils::{ensure_dataset_env_vars_enabled, get_dataset}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -35,7 +35,7 @@ impl Dataset { #[graphql(skip)] pub async fn from_ref(ctx: &Context<'_>, dataset_ref: &odf::DatasetRef) -> Result { - let dataset_registry = from_catalog::(ctx).unwrap(); + let dataset_registry = from_catalog_n!(ctx, dyn domain::DatasetRegistry); // TODO: Should we resolve reference at this point or allow unresolved and fail // later? @@ -49,12 +49,6 @@ impl Dataset { Ok(Dataset::new(account, hdl)) } - #[graphql(skip)] - fn get_dataset(&self, ctx: &Context<'_>) -> domain::ResolvedDataset { - let dataset_registry = from_catalog::(ctx).unwrap(); - dataset_registry.get_dataset_by_handle(&self.dataset_handle) - } - /// Unique identifier of the dataset async fn id(&self) -> DatasetID { self.dataset_handle.id.clone().into() @@ -79,7 +73,7 @@ impl Dataset { /// Returns the kind of dataset (Root or Derivative) async fn kind(&self, ctx: &Context<'_>) -> Result { - let resolved_dataset = self.get_dataset(ctx); + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; let summary = resolved_dataset .get_summary(domain::GetSummaryOpts::default()) .await @@ -92,7 +86,7 @@ impl Dataset { async fn visibility(&self, ctx: &Context<'_>) -> Result { let rebac_svc = from_catalog_n!(ctx, dyn kamu_auth_rebac::RebacService); - let resolved_dataset = self.get_dataset(ctx); + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; let properties = rebac_svc .get_dataset_properties(resolved_dataset.get_id()) .await @@ -133,7 +127,7 @@ impl Dataset { // TODO: PERF: Avoid traversing the entire chain /// Creation time of the first metadata block in the chain async fn created_at(&self, ctx: &Context<'_>) -> Result> { - let resolved_dataset = self.get_dataset(ctx); + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; Ok(resolved_dataset .as_metadata_chain() @@ -147,7 +141,7 @@ impl Dataset { /// Creation time of the most recent metadata block in the chain async fn last_updated_at(&self, ctx: &Context<'_>) -> Result> { - let resolved_dataset = self.get_dataset(ctx); + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; Ok(resolved_dataset .as_metadata_chain() @@ -160,8 +154,7 @@ impl Dataset { async fn permissions(&self, ctx: &Context<'_>) -> Result { use kamu_core::auth; - let dataset_action_authorizer = - from_catalog::(ctx).unwrap(); + let dataset_action_authorizer = from_catalog_n!(ctx, dyn auth::DatasetActionAuthorizer); let allowed_actions = dataset_action_authorizer .get_allowed_actions(&self.dataset_handle) @@ -180,7 +173,7 @@ impl Dataset { /// Various endpoints for interacting with data async fn endpoints(&self, ctx: &Context<'_>) -> DatasetEndpoints<'_> { - let config = from_catalog::(ctx).unwrap(); + let config = crate::utils::unsafe_from_catalog_n!(ctx, ServerUrlConfig); DatasetEndpoints::new(&self.owner, self.dataset_handle.clone(), config) } diff --git a/src/adapter/graphql/src/queries/datasets/dataset_data.rs b/src/adapter/graphql/src/queries/datasets/dataset_data.rs index 0f49175ab..7fabb982a 100644 --- a/src/adapter/graphql/src/queries/datasets/dataset_data.rs +++ b/src/adapter/graphql/src/queries/datasets/dataset_data.rs @@ -11,6 +11,7 @@ use kamu_core::{self as domain, GetSummaryOpts, QueryError}; use opendatafabric as odf; use crate::prelude::*; +use crate::utils::get_dataset; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -30,8 +31,7 @@ impl DatasetData { /// Total number of records in this dataset #[tracing::instrument(level = "info", skip_all)] async fn num_records_total(&self, ctx: &Context<'_>) -> Result { - let dataset_registry = from_catalog::(ctx).unwrap(); - let resolved_dataset = dataset_registry.get_dataset_by_handle(&self.dataset_handle); + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; let summary = resolved_dataset .get_summary(GetSummaryOpts::default()) .await @@ -43,8 +43,7 @@ impl DatasetData { /// caching #[tracing::instrument(level = "info", skip_all)] async fn estimated_size(&self, ctx: &Context<'_>) -> Result { - let dataset_registry = from_catalog::(ctx).unwrap(); - let resolved_dataset = dataset_registry.get_dataset_by_handle(&self.dataset_handle); + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; let summary = resolved_dataset .get_summary(GetSummaryOpts::default()) .await @@ -77,12 +76,13 @@ impl DatasetData { ) -> Result { tracing::debug!(?data_format, ?schema_format, ?skip, ?limit, "Tail query"); + let query_svc = from_catalog_n!(ctx, dyn domain::QueryService); + // TODO: Default to JsonSoA format once implemented let data_format = data_format.unwrap_or(DataBatchFormat::Json); let schema_format = schema_format.unwrap_or(DataSchemaFormat::Parquet); let limit = limit.unwrap_or(Self::DEFAULT_TAIL_LIMIT); - let query_svc = from_catalog::(ctx).unwrap(); let tail_result = query_svc .tail( &self.dataset_handle.as_local_ref(), diff --git a/src/adapter/graphql/src/queries/datasets/dataset_env_vars.rs b/src/adapter/graphql/src/queries/datasets/dataset_env_vars.rs index d966fb3c3..bcedbbf72 100644 --- a/src/adapter/graphql/src/queries/datasets/dataset_env_vars.rs +++ b/src/adapter/graphql/src/queries/datasets/dataset_env_vars.rs @@ -37,7 +37,7 @@ impl DatasetEnvVars { ) -> Result { utils::check_dataset_read_access(ctx, &self.dataset_handle).await?; - let dataset_env_var_service = from_catalog::(ctx).unwrap(); + let dataset_env_var_service = from_catalog_n!(ctx, dyn DatasetEnvVarService); let dataset_env_var = dataset_env_var_service .get_dataset_env_var_by_id(&dataset_env_var_id) .await @@ -59,10 +59,11 @@ impl DatasetEnvVars { ) -> Result { utils::check_dataset_read_access(ctx, &self.dataset_handle).await?; + let dataset_env_var_service = from_catalog_n!(ctx, dyn DatasetEnvVarService); + let page = page.unwrap_or(0); let per_page = per_page.unwrap_or(Self::DEFAULT_PER_PAGE); - let dataset_env_var_service = from_catalog::(ctx).unwrap(); let dataset_env_var_listing = dataset_env_var_service .get_all_dataset_env_vars_by_dataset_id( &self.dataset_handle.id, diff --git a/src/adapter/graphql/src/queries/datasets/dataset_flow_configs.rs b/src/adapter/graphql/src/queries/datasets/dataset_flow_configs.rs index 340e94d63..a094fea26 100644 --- a/src/adapter/graphql/src/queries/datasets/dataset_flow_configs.rs +++ b/src/adapter/graphql/src/queries/datasets/dataset_flow_configs.rs @@ -34,7 +34,7 @@ impl DatasetFlowConfigs { ) -> Result> { check_dataset_read_access(ctx, &self.dataset_handle).await?; - let flow_config_service = from_catalog::(ctx).unwrap(); + let flow_config_service = from_catalog_n!(ctx, dyn FlowConfigurationService); let maybe_flow_config = flow_config_service .find_configuration( FlowKeyDataset::new(self.dataset_handle.id.clone(), dataset_flow_type.into()) @@ -50,7 +50,7 @@ impl DatasetFlowConfigs { async fn all_paused(&self, ctx: &Context<'_>) -> Result { check_dataset_read_access(ctx, &self.dataset_handle).await?; - let flow_config_service = from_catalog::(ctx).unwrap(); + let flow_config_service = from_catalog_n!(ctx, dyn FlowConfigurationService); for dataset_flow_type in kamu_flow_system::DatasetFlowType::all() { let maybe_flow_config = flow_config_service .find_configuration( diff --git a/src/adapter/graphql/src/queries/datasets/dataset_flow_runs.rs b/src/adapter/graphql/src/queries/datasets/dataset_flow_runs.rs index 0625c5f8c..9aad65a50 100644 --- a/src/adapter/graphql/src/queries/datasets/dataset_flow_runs.rs +++ b/src/adapter/graphql/src/queries/datasets/dataset_flow_runs.rs @@ -45,7 +45,7 @@ impl DatasetFlowRuns { }); } - let flow_query_service = from_catalog::(ctx).unwrap(); + let flow_query_service = from_catalog_n!(ctx, dyn fs::FlowQueryService); let flow_state = flow_query_service .get_flow(flow_id.into()) @@ -66,7 +66,7 @@ impl DatasetFlowRuns { ) -> Result { utils::check_dataset_read_access(ctx, &self.dataset_handle).await?; - let flow_query_service = from_catalog::(ctx).unwrap(); + let flow_query_service = from_catalog_n!(ctx, dyn fs::FlowQueryService); let page = page.unwrap_or(0); let per_page = per_page.unwrap_or(Self::DEFAULT_PER_PAGE); @@ -124,7 +124,7 @@ impl DatasetFlowRuns { async fn list_flow_initiators(&self, ctx: &Context<'_>) -> Result { utils::check_dataset_read_access(ctx, &self.dataset_handle).await?; - let flow_query_service = from_catalog::(ctx).unwrap(); + let flow_query_service = from_catalog_n!(ctx, dyn fs::FlowQueryService); let flow_initiator_ids: Vec<_> = flow_query_service .list_all_flow_initiators_by_dataset(&self.dataset_handle.id) @@ -134,7 +134,7 @@ impl DatasetFlowRuns { .try_collect() .await?; - let authentication_service = from_catalog::(ctx).unwrap(); + let authentication_service = from_catalog_n!(ctx, dyn AuthenticationService); let matched_flow_initiators: Vec<_> = authentication_service .accounts_by_ids(flow_initiator_ids) diff --git a/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs b/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs index 968293eb9..1d89e5fbe 100644 --- a/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs +++ b/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs @@ -10,7 +10,6 @@ use chrono::prelude::*; use kamu_core::{ self as domain, - DatasetRegistry, MetadataChainExt, SearchSetAttachmentsVisitor, SearchSetInfoVisitor, @@ -22,6 +21,9 @@ use opendatafabric as odf; use crate::prelude::*; use crate::queries::*; use crate::scalars::DatasetPushStatuses; +use crate::utils::get_dataset; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -36,12 +38,6 @@ impl DatasetMetadata { Self { dataset_handle } } - #[graphql(skip)] - fn get_dataset(&self, ctx: &Context<'_>) -> domain::ResolvedDataset { - let dataset_registry = from_catalog::(ctx).unwrap(); - dataset_registry.get_dataset_by_handle(&self.dataset_handle) - } - /// Access to the temporal metadata chain of the dataset async fn chain(&self) -> MetadataChain { MetadataChain::new(self.dataset_handle.clone()) @@ -49,7 +45,7 @@ impl DatasetMetadata { /// Last recorded watermark async fn current_watermark(&self, ctx: &Context<'_>) -> Result>> { - let resolved_dataset = self.get_dataset(ctx); + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; Ok(resolved_dataset .as_metadata_chain() @@ -66,11 +62,11 @@ impl DatasetMetadata { ctx: &Context<'_>, format: Option, ) -> Result> { + let query_svc = from_catalog_n!(ctx, dyn domain::QueryService); + // TODO: Default to Arrow eventually let format = format.unwrap_or(DataSchemaFormat::Parquet); - let query_svc = from_catalog::(ctx).unwrap(); - if let Some(schema) = query_svc .get_schema(&self.dataset_handle.as_local_ref()) .await @@ -87,8 +83,11 @@ impl DatasetMetadata { /// Current upstream dependencies of a dataset async fn current_upstream_dependencies(&self, ctx: &Context<'_>) -> Result> { - let dependency_graph_service = - from_catalog::(ctx).unwrap(); + let (dependency_graph_service, dataset_registry) = from_catalog_n!( + ctx, + dyn domain::DependencyGraphService, + dyn domain::DatasetRegistry + ); use tokio_stream::StreamExt; let upstream_dataset_ids: Vec<_> = dependency_graph_service @@ -98,7 +97,6 @@ impl DatasetMetadata { .collect() .await; - let dataset_registry = from_catalog::(ctx).unwrap(); let mut upstream = Vec::with_capacity(upstream_dataset_ids.len()); for upstream_dataset_id in upstream_dataset_ids { let hdl = dataset_registry @@ -122,8 +120,11 @@ impl DatasetMetadata { // TODO: Convert to collection /// Current downstream dependencies of a dataset async fn current_downstream_dependencies(&self, ctx: &Context<'_>) -> Result> { - let dependency_graph_service = - from_catalog::(ctx).unwrap(); + let (dependency_graph_service, dataset_registry) = from_catalog_n!( + ctx, + dyn domain::DependencyGraphService, + dyn domain::DatasetRegistry + ); use tokio_stream::StreamExt; let downstream_dataset_ids: Vec<_> = dependency_graph_service @@ -133,7 +134,6 @@ impl DatasetMetadata { .collect() .await; - let dataset_registry = from_catalog::(ctx).unwrap(); let mut downstream = Vec::with_capacity(downstream_dataset_ids.len()); for downstream_dataset_id in downstream_dataset_ids { let hdl = dataset_registry @@ -156,11 +156,15 @@ impl DatasetMetadata { /// Current polling source used by the root dataset async fn current_polling_source(&self, ctx: &Context<'_>) -> Result> { - let dataset_registry = from_catalog::(ctx).unwrap(); - let polling_ingest_svc = from_catalog::(ctx).unwrap(); - - let source = polling_ingest_svc - .get_active_polling_source(dataset_registry.get_dataset_by_handle(&self.dataset_handle)) + let (dataset_registry, metadata_query_service) = from_catalog_n!( + ctx, + dyn domain::DatasetRegistry, + dyn domain::MetadataQueryService + ); + + let target = dataset_registry.get_dataset_by_handle(&self.dataset_handle); + let source = metadata_query_service + .get_active_polling_source(target) .await .int_err()?; @@ -169,11 +173,15 @@ impl DatasetMetadata { /// Current push sources used by the root dataset async fn current_push_sources(&self, ctx: &Context<'_>) -> Result> { - let push_ingest_svc = from_catalog::(ctx).unwrap(); - let dataset_registry = from_catalog::(ctx).unwrap(); - - let mut push_sources: Vec = push_ingest_svc - .get_active_push_sources(dataset_registry.get_dataset_by_handle(&self.dataset_handle)) + let (metadata_query_service, dataset_registry) = from_catalog_n!( + ctx, + dyn domain::MetadataQueryService, + dyn domain::DatasetRegistry + ); + + let target = dataset_registry.get_dataset_by_handle(&self.dataset_handle); + let mut push_sources: Vec = metadata_query_service + .get_active_push_sources(target) .await .int_err()? .into_iter() @@ -187,7 +195,7 @@ impl DatasetMetadata { /// Sync statuses of push remotes async fn push_sync_statuses(&self, ctx: &Context<'_>) -> Result { - let service = from_catalog::(ctx).unwrap(); + let service = from_catalog_n!(ctx, dyn domain::RemoteStatusService); let statuses = service.check_remotes_status(&self.dataset_handle).await?; Ok(statuses.into()) @@ -195,21 +203,21 @@ impl DatasetMetadata { /// Current transformation used by the derivative dataset async fn current_transform(&self, ctx: &Context<'_>) -> Result> { - let transform_request_planner = - from_catalog::(ctx).unwrap(); - - let dataset_registry = from_catalog::(ctx).unwrap(); + let (metadata_query_service, dataset_registry) = from_catalog_n!( + ctx, + dyn kamu_core::MetadataQueryService, + dyn domain::DatasetRegistry + ); - let source = transform_request_planner - .get_active_transform(dataset_registry.get_dataset_by_handle(&self.dataset_handle)) - .await?; + let target = dataset_registry.get_dataset_by_handle(&self.dataset_handle); + let source = metadata_query_service.get_active_transform(target).await?; Ok(source.map(|(_hash, block)| block.event.into())) } /// Current descriptive information about the dataset async fn current_info(&self, ctx: &Context<'_>) -> Result { - let resolved_dataset = self.get_dataset(ctx); + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; Ok(resolved_dataset .as_metadata_chain() @@ -229,7 +237,7 @@ impl DatasetMetadata { /// Current readme file as discovered from attachments associated with the /// dataset async fn current_readme(&self, ctx: &Context<'_>) -> Result> { - let resolved_dataset = self.get_dataset(ctx); + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; Ok(resolved_dataset .as_metadata_chain() @@ -250,7 +258,7 @@ impl DatasetMetadata { /// Current license associated with the dataset async fn current_license(&self, ctx: &Context<'_>) -> Result> { - let resolved_dataset = self.get_dataset(ctx); + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; Ok(resolved_dataset .as_metadata_chain() @@ -263,7 +271,7 @@ impl DatasetMetadata { /// Current vocabulary associated with the dataset async fn current_vocab(&self, ctx: &Context<'_>) -> Result> { - let resolved_dataset = self.get_dataset(ctx); + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; Ok(resolved_dataset .as_metadata_chain() diff --git a/src/adapter/graphql/src/queries/datasets/datasets.rs b/src/adapter/graphql/src/queries/datasets/datasets.rs index e807aadab..546ccfa85 100644 --- a/src/adapter/graphql/src/queries/datasets/datasets.rs +++ b/src/adapter/graphql/src/queries/datasets/datasets.rs @@ -27,7 +27,7 @@ impl Datasets { /// Returns dataset by its ID async fn by_id(&self, ctx: &Context<'_>, dataset_id: DatasetID) -> Result> { - let dataset_registry = from_catalog::(ctx).unwrap(); + let dataset_registry = from_catalog_n!(ctx, dyn domain::DatasetRegistry); let hdl = dataset_registry .try_resolve_dataset_handle_by_ref(&dataset_id.as_local_ref()) .await?; @@ -49,9 +49,8 @@ impl Datasets { account_name: AccountName, dataset_name: DatasetName, ) -> Result> { + let dataset_registry = from_catalog_n!(ctx, dyn domain::DatasetRegistry); let dataset_alias = odf::DatasetAlias::new(Some(account_name.into()), dataset_name.into()); - - let dataset_registry = from_catalog::(ctx).unwrap(); let hdl = dataset_registry .try_resolve_dataset_handle_by_ref(&dataset_alias.into_local_ref()) .await?; @@ -76,7 +75,7 @@ impl Datasets { page: Option, per_page: Option, ) -> Result { - let dataset_registry = from_catalog::(ctx).unwrap(); + let dataset_registry = from_catalog_n!(ctx, dyn domain::DatasetRegistry); let page = page.unwrap_or(0); let per_page = per_page.unwrap_or(Self::DEFAULT_PER_PAGE); @@ -108,8 +107,7 @@ impl Datasets { page: Option, per_page: Option, ) -> Result { - let authentication_service = - from_catalog::(ctx).unwrap(); + let authentication_service = from_catalog_n!(ctx, dyn kamu_accounts::AuthenticationService); let account_id: odf::AccountID = account_id.into(); let maybe_account_name = authentication_service diff --git a/src/adapter/graphql/src/queries/datasets/metadata_chain.rs b/src/adapter/graphql/src/queries/datasets/metadata_chain.rs index 61c0636ee..3208ac2fb 100644 --- a/src/adapter/graphql/src/queries/datasets/metadata_chain.rs +++ b/src/adapter/graphql/src/queries/datasets/metadata_chain.rs @@ -13,6 +13,7 @@ use opendatafabric as odf; use crate::prelude::*; use crate::queries::Account; +use crate::utils::get_dataset; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // MetadataRef @@ -41,16 +42,10 @@ impl MetadataChain { Self { dataset_handle } } - #[graphql(skip)] - fn get_dataset(&self, ctx: &Context<'_>) -> domain::ResolvedDataset { - let dataset_registry = from_catalog::(ctx).unwrap(); - dataset_registry.get_dataset_by_handle(&self.dataset_handle) - } - /// Returns all named metadata block references #[tracing::instrument(level = "info", skip_all)] async fn refs(&self, ctx: &Context<'_>) -> Result> { - let resolved_dataset = self.get_dataset(ctx); + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; Ok(vec![BlockRef { name: "head".to_owned(), block_hash: resolved_dataset @@ -69,7 +64,7 @@ impl MetadataChain { ctx: &Context<'_>, hash: Multihash, ) -> Result> { - let resolved_dataset = self.get_dataset(ctx); + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; let block = resolved_dataset .as_metadata_chain() .try_get_block(&hash) @@ -91,7 +86,7 @@ impl MetadataChain { ) -> Result> { use odf::serde::MetadataBlockSerializer; - let resolved_dataset = self.get_dataset(ctx); + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; match resolved_dataset .as_metadata_chain() .try_get_block(&hash) @@ -119,10 +114,11 @@ impl MetadataChain { page: Option, per_page: Option, ) -> Result { + let resolved_dataset = get_dataset(ctx, &self.dataset_handle)?; + let page = page.unwrap_or(0); let per_page = per_page.unwrap_or(Self::DEFAULT_BLOCKS_PER_PAGE); - let resolved_dataset = self.get_dataset(ctx); let chain = resolved_dataset.as_metadata_chain(); let head = chain.resolve_ref(&domain::BlockRef::Head).await.int_err()?; diff --git a/src/adapter/graphql/src/queries/flows/flow.rs b/src/adapter/graphql/src/queries/flows/flow.rs index ec657784a..290701007 100644 --- a/src/adapter/graphql/src/queries/flows/flow.rs +++ b/src/adapter/graphql/src/queries/flows/flow.rs @@ -8,7 +8,7 @@ // by the Apache License, Version 2.0. use chrono::{DateTime, Utc}; -use kamu_core::{DatasetChangesService, DatasetRegistry, DatasetRegistryExt, PollingIngestService}; +use kamu_core::{DatasetChangesService, DatasetRegistry, DatasetRegistryExt, MetadataQueryService}; use kamu_flow_system::FlowResultDatasetUpdate; use {kamu_flow_system as fs, opendatafabric as odf}; @@ -58,20 +58,22 @@ impl Flow { ) -> Result { Ok(match dataset_key.flow_type { fs::DatasetFlowType::Ingest => { - let dataset_registry = from_catalog::(ctx).unwrap(); - let resolved_dataset = dataset_registry + let (dataset_registry, metadata_query_service, dataset_changes_svc) = from_catalog_n!( + ctx, + dyn DatasetRegistry, + dyn MetadataQueryService, + dyn DatasetChangesService + ); + let target = dataset_registry .get_dataset_by_ref(&dataset_key.dataset_id.as_local_ref()) .await .int_err()?; - let polling_ingest_svc = from_catalog::(ctx).unwrap(); - let maybe_polling_source = polling_ingest_svc - .get_active_polling_source(resolved_dataset) + let maybe_polling_source = metadata_query_service + .get_active_polling_source(target) .await .int_err()?; - let dataset_changes_svc = from_catalog::(ctx).unwrap(); - let ingest_result = FlowDescriptionUpdateResult::from_maybe_flow_outcome( self.flow_state.outcome.as_ref(), &dataset_key.dataset_id, @@ -96,7 +98,7 @@ impl Flow { } } fs::DatasetFlowType::ExecuteTransform => { - let dataset_changes_svc = from_catalog::(ctx).unwrap(); + let dataset_changes_svc = from_catalog_n!(ctx, dyn DatasetChangesService); FlowDescriptionDataset::ExecuteTransform(FlowDescriptionDatasetExecuteTransform { dataset_id: dataset_key.dataset_id.clone().into(), @@ -169,7 +171,7 @@ impl Flow { /// History of flow events async fn history(&self, ctx: &Context<'_>) -> Result> { - let flow_event_store = from_catalog::(ctx).unwrap(); + let flow_event_store = from_catalog_n!(ctx, dyn fs::FlowEventStore); use futures::TryStreamExt; let flow_events: Vec<_> = flow_event_store diff --git a/src/adapter/graphql/src/queries/flows/flow_outcome.rs b/src/adapter/graphql/src/queries/flows/flow_outcome.rs index 04c185369..f10416d13 100644 --- a/src/adapter/graphql/src/queries/flows/flow_outcome.rs +++ b/src/adapter/graphql/src/queries/flows/flow_outcome.rs @@ -71,7 +71,7 @@ impl FlowOutcome { }), }), FlowError::InputDatasetCompacted(err) => { - let dataset_registry = from_catalog::(ctx).unwrap(); + let dataset_registry = from_catalog_n!(ctx, dyn DatasetRegistry); let hdl = dataset_registry .resolve_dataset_handle_by_ref(&err.dataset_id.as_local_ref()) .await diff --git a/src/adapter/graphql/src/queries/flows/flow_start_condition.rs b/src/adapter/graphql/src/queries/flows/flow_start_condition.rs index f6a1d4e43..b70a87121 100644 --- a/src/adapter/graphql/src/queries/flows/flow_start_condition.rs +++ b/src/adapter/graphql/src/queries/flows/flow_start_condition.rs @@ -35,8 +35,7 @@ impl FlowStartCondition { }), fs::FlowStartCondition::Throttling(t) => Self::Throttling((*t).into()), fs::FlowStartCondition::Batching(b) => { - let dataset_changes_service = - from_catalog::(ctx).unwrap(); + let dataset_changes_service = from_catalog_n!(ctx, dyn DatasetChangesService); // Start from zero increment let mut total_increment = DatasetIntervalIncrement::default(); diff --git a/src/adapter/graphql/src/queries/flows/flow_trigger.rs b/src/adapter/graphql/src/queries/flows/flow_trigger.rs index d9c1d75ad..efd45be39 100644 --- a/src/adapter/graphql/src/queries/flows/flow_trigger.rs +++ b/src/adapter/graphql/src/queries/flows/flow_trigger.rs @@ -33,7 +33,7 @@ impl FlowTrigger { fs::FlowTrigger::AutoPolling(auto_polling) => Self::AutoPolling(auto_polling.into()), fs::FlowTrigger::Push(push) => Self::Push(push.into()), fs::FlowTrigger::InputDatasetFlow(input) => { - let dataset_registry = from_catalog::(ctx).unwrap(); + let dataset_registry = from_catalog_n!(ctx, dyn DatasetRegistry); let hdl = dataset_registry .resolve_dataset_handle_by_ref(&input.dataset_id.as_local_ref()) .await diff --git a/src/adapter/graphql/src/scalars/flow_configuration.rs b/src/adapter/graphql/src/scalars/flow_configuration.rs index d7c284a4e..6e65543ee 100644 --- a/src/adapter/graphql/src/scalars/flow_configuration.rs +++ b/src/adapter/graphql/src/scalars/flow_configuration.rs @@ -522,10 +522,11 @@ impl FlowRunConfiguration { } } DatasetFlowType::Reset => { - let dataset_registry = from_catalog::(ctx).unwrap(); + let dataset_registry = + crate::utils::unsafe_from_catalog_n!(ctx, dyn kamu_core::DatasetRegistry); let resolved_dataset = dataset_registry.get_dataset_by_handle(dataset_handle); - // Assume unwrap safe such as we have checked this existance during + // Assume unwrap safe such as we have checked this existence during // validation step let current_head_hash = resolved_dataset .as_metadata_chain() diff --git a/src/adapter/graphql/src/utils.rs b/src/adapter/graphql/src/utils.rs index 94ede247d..abf34168e 100644 --- a/src/adapter/graphql/src/utils.rs +++ b/src/adapter/graphql/src/utils.rs @@ -7,33 +7,42 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use std::sync::Arc; - use async_graphql::{Context, ErrorExtensions}; use internal_error::*; use kamu_accounts::{CurrentAccountSubject, GetAccessTokenError, LoggedAccount}; use kamu_core::auth::DatasetActionUnauthorizedError; use kamu_core::{DatasetRegistry, ResolvedDataset}; use kamu_datasets::DatasetEnvVarsConfig; -use kamu_task_system as ts; -use opendatafabric::{AccountName as OdfAccountName, DatasetHandle}; +use {kamu_task_system as ts, opendatafabric as odf}; use crate::prelude::{AccessTokenID, AccountID, AccountName}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// TODO: Replace with from_catalog_n!() macro -// Return gql-specific error and get rid of unwraps -pub(crate) fn from_catalog(ctx: &Context<'_>) -> Result, dill::InjectionError> -where - T: ?Sized + Send + Sync + 'static, -{ - let cat = ctx.data::().unwrap(); - cat.get_one::() -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - +/// `unwrap()`-free helper macro to hide the logic for extracting DI components +/// from a catalog using [`async_graphql::Context`] that is present for each GQL +/// request. +/// +/// If one of the required DI components is not found, `.int_err()?` will be +/// initiated. +/// +/// There is also a variant of the macro for exceptional situations that uses +/// `unwrap()` internally: [`unsafe_from_catalog_n!`]. +/// +/// # Examples +/// +/// ``` +/// // Most often, we extract only one component: +/// let current_account_subject = from_catalog_n!(ctx, CurrentAccountSubject); +/// +/// // But sometimes, three at once: +/// let (dataset_registry, polling_ingest_svc, dataset_changes_svc) = from_catalog_n!( +/// ctx, +/// dyn DatasetRegistry, +/// dyn PollingIngestService, +/// dyn DatasetChangesService +/// ); +/// ``` macro_rules! from_catalog_n { ($gql_ctx:ident, $T:ty ) => {{ let catalog = $gql_ctx.data::().unwrap(); @@ -51,17 +60,43 @@ pub(crate) use from_catalog_n; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub(crate) fn get_dataset(ctx: &Context<'_>, dataset_handle: &DatasetHandle) -> ResolvedDataset { - let dataset_registry = from_catalog::(ctx).unwrap(); - dataset_registry.get_dataset_by_handle(dataset_handle) +/// Unsafe variant of [`from_catalog_n!`] macro. +/// +/// Try to avoid using it. +macro_rules! unsafe_from_catalog_n { + ($gql_ctx:ident, $T:ty ) => {{ + let catalog = $gql_ctx.data::().unwrap(); + + catalog.get_one::<$T>().unwrap() + }}; + ($gql_ctx:ident, $T:ty, $($Ts:ty),+) => {{ + let catalog = $gql_ctx.data::().unwrap(); + + ( catalog.get_one::<$T>().unwrap(), $( catalog.get_one::<$Ts>().unwrap() ),+ ) + }}; } +pub(crate) use unsafe_from_catalog_n; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub(crate) fn get_logged_account(ctx: &Context<'_>) -> LoggedAccount { - let current_account_subject = from_catalog::(ctx).unwrap(); +pub(crate) fn get_dataset( + ctx: &Context<'_>, + dataset_handle: &odf::DatasetHandle, +) -> Result { + let dataset_registry = from_catalog_n!(ctx, dyn DatasetRegistry); + let resolved_dataset = dataset_registry.get_dataset_by_handle(dataset_handle); + + Ok(resolved_dataset) +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub(crate) fn get_logged_account(ctx: &Context<'_>) -> Result { + let current_account_subject = from_catalog_n!(ctx, CurrentAccountSubject); + match current_account_subject.as_ref() { - CurrentAccountSubject::Logged(la) => la.clone(), + CurrentAccountSubject::Logged(la) => Ok(la.clone()), CurrentAccountSubject::Anonymous(_) => { unreachable!("We are not expecting anonymous accounts") } @@ -72,10 +107,10 @@ pub(crate) fn get_logged_account(ctx: &Context<'_>) -> LoggedAccount { pub(crate) async fn check_dataset_read_access( ctx: &Context<'_>, - dataset_handle: &DatasetHandle, + dataset_handle: &odf::DatasetHandle, ) -> Result<(), GqlError> { let dataset_action_authorizer = - from_catalog::(ctx).int_err()?; + from_catalog_n!(ctx, dyn kamu_core::auth::DatasetActionAuthorizer); dataset_action_authorizer .check_action_allowed(dataset_handle, kamu_core::auth::DatasetAction::Read) @@ -93,10 +128,10 @@ pub(crate) async fn check_dataset_read_access( pub(crate) async fn check_dataset_write_access( ctx: &Context<'_>, - dataset_handle: &DatasetHandle, + dataset_handle: &odf::DatasetHandle, ) -> Result<(), GqlError> { let dataset_action_authorizer = - from_catalog::(ctx).int_err()?; + from_catalog_n!(ctx, dyn kamu_core::auth::DatasetActionAuthorizer); dataset_action_authorizer .check_action_allowed(dataset_handle, kamu_core::auth::DatasetAction::Write) @@ -111,7 +146,7 @@ pub(crate) async fn check_dataset_write_access( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub(crate) fn make_dataset_access_error(dataset_handle: &DatasetHandle) -> GqlError { +pub(crate) fn make_dataset_access_error(dataset_handle: &odf::DatasetHandle) -> GqlError { GqlError::Gql( async_graphql::Error::new("Dataset access error") .extend_with(|_, eev| eev.set("alias", dataset_handle.alias.to_string())), @@ -124,7 +159,7 @@ pub(crate) async fn get_task( ctx: &Context<'_>, task_id: ts::TaskID, ) -> Result { - let task_event_store = from_catalog::(ctx).unwrap(); + let task_event_store = from_catalog_n!(ctx, dyn ts::TaskEventStore); let task = ts::Task::load(task_id, task_event_store.as_ref()) .await .int_err()?; @@ -134,7 +169,7 @@ pub(crate) async fn get_task( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub(crate) fn ensure_dataset_env_vars_enabled(ctx: &Context<'_>) -> Result<(), GqlError> { - let dataset_env_vars_config = from_catalog::(ctx).unwrap(); + let dataset_env_vars_config = from_catalog_n!(ctx, DatasetEnvVarsConfig); if !dataset_env_vars_config.as_ref().is_enabled() { return Err(GqlError::Gql(async_graphql::Error::new( "API is unavailable", @@ -149,7 +184,7 @@ pub(crate) fn check_logged_account_id_match( ctx: &Context<'_>, account_id: &AccountID, ) -> Result<(), GqlError> { - let current_account_subject = from_catalog::(ctx).unwrap(); + let current_account_subject = from_catalog_n!(ctx, CurrentAccountSubject); if let CurrentAccountSubject::Logged(logged_account) = current_account_subject.as_ref() { if logged_account.account_id == account_id.clone().into() { @@ -168,8 +203,11 @@ pub(crate) async fn check_access_token_valid( ctx: &Context<'_>, token_id: &AccessTokenID, ) -> Result<(), GqlError> { - let current_account_subject = from_catalog::(ctx).unwrap(); - let access_token_service = from_catalog::(ctx).unwrap(); + let (current_account_subject, access_token_service) = from_catalog_n!( + ctx, + CurrentAccountSubject, + dyn kamu_accounts::AccessTokenService + ); let existing_access_token = access_token_service .get_token_by_id(token_id) @@ -199,10 +237,10 @@ pub(crate) fn check_logged_account_name_match( ctx: &Context<'_>, account_name: &AccountName, ) -> Result<(), GqlError> { - let current_account_subject = from_catalog::(ctx).unwrap(); + let current_account_subject = from_catalog_n!(ctx, CurrentAccountSubject); if let CurrentAccountSubject::Logged(logged_account) = current_account_subject.as_ref() { - if logged_account.account_name == OdfAccountName::from(account_name.clone()) { + if logged_account.account_name == odf::AccountName::from(account_name.clone()) { return Ok(()); } }; diff --git a/src/adapter/graphql/tests/tests/test_error_handling.rs b/src/adapter/graphql/tests/tests/test_error_handling.rs index 6ef57e232..f2286cb60 100644 --- a/src/adapter/graphql/tests/tests/test_error_handling.rs +++ b/src/adapter/graphql/tests/tests/test_error_handling.rs @@ -14,6 +14,8 @@ use kamu_accounts::CurrentAccountSubject; use kamu_core::{DatasetRepository, TenancyConfig}; use time_source::SystemTimeSourceDefault; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[test_log::test(tokio::test)] async fn test_malformed_argument() { let schema = kamu_adapter_graphql::schema_quiet(); @@ -54,6 +56,8 @@ async fn test_malformed_argument() { ); } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[test_log::test(tokio::test)] async fn test_internal_error() { let tempdir = tempfile::tempdir().unwrap(); @@ -101,27 +105,4 @@ async fn test_internal_error() { ); } -// TODO: There seems to be an issue with libunwind in a version of XCode used on -// GitHub's M1 runners which results in error: -// libunwind: stepWithCompactEncoding - invalid compact unwind encoding -#[cfg(not(target_os = "macos"))] -#[test_log::test(tokio::test)] -// We use the substring part because we have a dynamic panic message part. -#[should_panic(expected = "called `Result::unwrap()` on an `Err` value: \ - Unregistered(UnregisteredTypeError { type_id: TypeId { t: ")] -async fn test_handler_panics() { - // Not expecting panic to be trapped - that's the job of an HTTP server - let schema = kamu_adapter_graphql::schema_quiet(); - schema.execute(async_graphql::Request::new(indoc!( - r#" - { - datasets { - byId (datasetId: "did:odf:fed012126262ba49e1ba8392c26f7a39e1ba8d756c7469786d3365200c68402ff65dc") { - name - } - } - } - "# - )).data(dill::CatalogBuilder::new().build())) - .await; -} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/graphql/tests/tests/test_gql_account_flow_configs.rs b/src/adapter/graphql/tests/tests/test_gql_account_flow_configs.rs index 0a85f1683..f5562d189 100644 --- a/src/adapter/graphql/tests/tests/test_gql_account_flow_configs.rs +++ b/src/adapter/graphql/tests/tests/test_gql_account_flow_configs.rs @@ -14,14 +14,7 @@ use chrono::Duration; use database_common::{DatabaseTransactionRunner, NoOpDatabasePlugin}; use dill::Component; use indoc::indoc; -use kamu::testing::{ - MetadataFactory, - MockDatasetActionAuthorizer, - MockDatasetChangesService, - MockDependencyGraphRepository, - MockPollingIngestService, - MockTransformRequestPlanner, -}; +use kamu::testing::{MetadataFactory, MockDatasetActionAuthorizer, MockDatasetChangesService}; use kamu::{ CreateDatasetFromSnapshotUseCaseImpl, DatasetOwnershipServiceInMemory, @@ -29,13 +22,15 @@ use kamu::{ DatasetRegistryRepoBridge, DatasetRepositoryLocalFs, DatasetRepositoryWriter, - DependencyGraphServiceInMemory, + MetadataQueryServiceImpl, }; use kamu_accounts::{JwtAuthenticationConfig, DEFAULT_ACCOUNT_NAME, DEFAULT_ACCOUNT_NAME_STR}; use kamu_accounts_inmem::InMemoryAccessTokenRepository; use kamu_accounts_services::{AccessTokenServiceImpl, AuthenticationServiceImpl}; use kamu_core::*; -use kamu_flow_system::FlowExecutorConfig; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; +use kamu_flow_system::FlowAgentConfig; use kamu_flow_system_inmem::{InMemoryFlowConfigurationEventStore, InMemoryFlowEventStore}; use kamu_task_system_inmem::InMemoryTaskEventStore; use kamu_task_system_services::TaskSchedulerImpl; @@ -55,8 +50,6 @@ async fn test_list_account_flows() { let mock_dataset_action_authorizer = MockDatasetActionAuthorizer::allowing(); let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), mock_dataset_action_authorizer: Some(mock_dataset_action_authorizer), ..Default::default() }) @@ -137,8 +130,6 @@ async fn test_list_account_flows() { async fn test_list_datasets_with_flow() { let mock_dataset_action_authorizer = MockDatasetActionAuthorizer::allowing(); let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), mock_dataset_action_authorizer: Some(mock_dataset_action_authorizer), ..Default::default() }) @@ -249,8 +240,6 @@ async fn test_pause_resume_account_flows() { let mock_dataset_action_authorizer = MockDatasetActionAuthorizer::allowing(); let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), mock_dataset_action_authorizer: Some(mock_dataset_action_authorizer), ..Default::default() }) @@ -455,8 +444,6 @@ async fn test_account_configs_all_paused() { let mock_dataset_action_authorizer = MockDatasetActionAuthorizer::allowing(); let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), mock_dataset_action_authorizer: Some(mock_dataset_action_authorizer), ..Default::default() }) @@ -630,10 +617,7 @@ struct FlowConfigHarness { #[derive(Default)] struct FlowRunsHarnessOverrides { - dependency_graph_mock: Option, dataset_changes_mock: Option, - transform_planner_mock: Option, - polling_service_mock: Option, mock_dataset_action_authorizer: Option, } @@ -644,9 +628,6 @@ impl FlowConfigHarness { std::fs::create_dir(&datasets_dir).unwrap(); let dataset_changes_mock = overrides.dataset_changes_mock.unwrap_or_default(); - let dependency_graph_mock = overrides.dependency_graph_mock.unwrap_or_default(); - let transform_planner_mock = overrides.transform_planner_mock.unwrap_or_default(); - let polling_service_mock = overrides.polling_service_mock.unwrap_or_default(); let mock_dataset_action_authorizer = overrides.mock_dataset_action_authorizer.unwrap_or_default(); @@ -663,6 +644,7 @@ impl FlowConfigHarness { .bind::() .bind::() .add::() + .add::() .add::() .add_value(dataset_changes_mock) .bind::() @@ -673,21 +655,16 @@ impl FlowConfigHarness { .add::() .add_value(JwtAuthenticationConfig::default()) .bind::() - .add::() - .add_value(dependency_graph_mock) - .bind::() + .add::() + .add::() .add::() .add::() - .add_value(FlowExecutorConfig::new( + .add_value(FlowAgentConfig::new( Duration::seconds(1), Duration::minutes(1), )) .add::() .add::() - .add_value(transform_planner_mock) - .bind::() - .add_value(polling_service_mock) - .bind::() .add::() .add::() .add::(); diff --git a/src/adapter/graphql/tests/tests/test_gql_data.rs b/src/adapter/graphql/tests/tests/test_gql_data.rs index 62acb8a3b..45fafe0f8 100644 --- a/src/adapter/graphql/tests/tests/test_gql_data.rs +++ b/src/adapter/graphql/tests/tests/test_gql_data.rs @@ -26,6 +26,8 @@ use kamu_accounts_services::{ PredefinedAccountsRegistrator, }; use kamu_core::*; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; use messaging_outbox::DummyOutboxImpl; use opendatafabric::*; use serde_json::json; @@ -56,7 +58,8 @@ async fn create_catalog_with_local_workspace( let catalog = { let mut b = dill::CatalogBuilder::new(); - b.add::() + b.add::() + .add::() .add_value(current_account_subject) .add_value(predefined_accounts_config) .add_value(tenancy_config) diff --git a/src/adapter/graphql/tests/tests/test_gql_dataset_env_vars.rs b/src/adapter/graphql/tests/tests/test_gql_dataset_env_vars.rs index 9f989b55a..69d74b3da 100644 --- a/src/adapter/graphql/tests/tests/test_gql_dataset_env_vars.rs +++ b/src/adapter/graphql/tests/tests/test_gql_dataset_env_vars.rs @@ -17,7 +17,6 @@ use kamu::{ DatasetRegistryRepoBridge, DatasetRepositoryLocalFs, DatasetRepositoryWriter, - DependencyGraphServiceInMemory, }; use kamu_core::{ auth, @@ -27,8 +26,8 @@ use kamu_core::{ TenancyConfig, }; use kamu_datasets::DatasetEnvVarsConfig; -use kamu_datasets_inmem::InMemoryDatasetEnvVarRepository; -use kamu_datasets_services::DatasetEnvVarServiceImpl; +use kamu_datasets_inmem::{InMemoryDatasetDependencyRepository, InMemoryDatasetEnvVarRepository}; +use kamu_datasets_services::{DatasetEnvVarServiceImpl, DependencyGraphServiceImpl}; use messaging_outbox::DummyOutboxImpl; use opendatafabric::DatasetKind; use time_source::SystemTimeSourceDefault; @@ -360,7 +359,8 @@ impl DatasetEnvVarsHarness { .add::() .add::() .add::() - .add::() + .add::() + .add::() .add::() .add::() .add::(); diff --git a/src/adapter/graphql/tests/tests/test_gql_dataset_flow_configs.rs b/src/adapter/graphql/tests/tests/test_gql_dataset_flow_configs.rs index d34cc8b93..d2499be5e 100644 --- a/src/adapter/graphql/tests/tests/test_gql_dataset_flow_configs.rs +++ b/src/adapter/graphql/tests/tests/test_gql_dataset_flow_configs.rs @@ -11,23 +11,23 @@ use async_graphql::value; use database_common::{DatabaseTransactionRunner, NoOpDatabasePlugin}; use dill::Component; use indoc::indoc; -use kamu::testing::{MetadataFactory, MockPollingIngestService, MockTransformRequestPlanner}; +use kamu::testing::MetadataFactory; use kamu::{ CreateDatasetFromSnapshotUseCaseImpl, DatasetRegistryRepoBridge, DatasetRepositoryLocalFs, DatasetRepositoryWriter, - DependencyGraphServiceInMemory, + MetadataQueryServiceImpl, }; use kamu_core::{ auth, CreateDatasetFromSnapshotUseCase, CreateDatasetResult, DatasetRepository, - PollingIngestService, TenancyConfig, - TransformRequestPlanner, }; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; use kamu_flow_system_inmem::InMemoryFlowConfigurationEventStore; use kamu_flow_system_services::FlowConfigurationServiceImpl; use messaging_outbox::DummyOutboxImpl; @@ -40,11 +40,7 @@ use crate::utils::{authentication_catalogs, expect_anonymous_access_error}; #[test_log::test(tokio::test)] async fn test_crud_time_delta_root_dataset() { - let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), - }) - .await; + let harness = FlowConfigHarness::make().await; let create_result = harness.create_root_dataset().await; @@ -211,11 +207,7 @@ async fn test_crud_time_delta_root_dataset() { #[test_log::test(tokio::test)] async fn test_time_delta_validation() { - let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), - }) - .await; + let harness = FlowConfigHarness::make().await; let create_result = harness.create_root_dataset().await; let schema = kamu_adapter_graphql::schema_quiet(); @@ -287,11 +279,7 @@ async fn test_time_delta_validation() { #[test_log::test(tokio::test)] async fn test_crud_cron_root_dataset() { - let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), - }) - .await; + let harness = FlowConfigHarness::make().await; let create_result = harness.create_root_dataset().await; let request_code = indoc!( @@ -496,11 +484,7 @@ async fn test_crud_cron_root_dataset() { #[test_log::test(tokio::test)] async fn test_crud_transform_derived_dataset() { - let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), - }) - .await; + let harness = FlowConfigHarness::make().await; harness.create_root_dataset().await; let create_derived_result = harness.create_derived_dataset().await; @@ -615,11 +599,7 @@ async fn test_crud_transform_derived_dataset() { #[test_log::test(tokio::test)] async fn test_crud_compaction_root_dataset() { - let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), - }) - .await; + let harness = FlowConfigHarness::make().await; let create_result = harness.create_root_dataset().await; let request_code = indoc!( @@ -733,11 +713,7 @@ async fn test_crud_compaction_root_dataset() { #[test_log::test(tokio::test)] async fn test_transform_config_validation() { - let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), - }) - .await; + let harness = FlowConfigHarness::make().await; harness.create_root_dataset().await; let create_derived_result = harness.create_derived_dataset().await; @@ -802,11 +778,7 @@ async fn test_transform_config_validation() { #[test_log::test(tokio::test)] async fn test_compaction_config_validation() { - let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), - }) - .await; + let harness = FlowConfigHarness::make().await; harness.create_root_dataset().await; let create_derived_result = harness.create_derived_dataset().await; @@ -927,11 +899,7 @@ async fn test_pause_resume_dataset_flows() { // Setup initial flow configs for datasets - let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), - }) - .await; + let harness = FlowConfigHarness::make().await; let create_root_result = harness.create_root_dataset().await; let create_derived_result = harness.create_derived_dataset().await; @@ -1152,13 +1120,9 @@ async fn test_pause_resume_dataset_flows() { #[test_log::test(tokio::test)] async fn test_conditions_not_met_for_flows() { - let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::without_set_transform()), - polling_service_mock: Some(MockPollingIngestService::without_active_polling_source()), - }) - .await; - let create_root_result = harness.create_root_dataset().await; - let create_derived_result = harness.create_derived_dataset().await; + let harness = FlowConfigHarness::make().await; + let create_root_result = harness.create_root_dataset_no_source().await; + let create_derived_result = harness.create_derived_dataset_no_transform().await; //// @@ -1241,11 +1205,7 @@ async fn test_conditions_not_met_for_flows() { #[test_log::test(tokio::test)] async fn test_incorrect_dataset_kinds_for_flow_type() { - let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), - }) - .await; + let harness = FlowConfigHarness::make().await; let create_root_result = harness.create_root_dataset().await; let create_derived_result = harness.create_derived_dataset().await; @@ -1399,11 +1359,7 @@ async fn test_incorrect_dataset_kinds_for_flow_type() { #[test_log::test(tokio::test)] async fn test_set_metadataonly_compaction_config_form_derivative() { - let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), - }) - .await; + let harness = FlowConfigHarness::make().await; harness.create_root_dataset().await; let create_derived_result = harness.create_derived_dataset().await; @@ -1455,11 +1411,7 @@ async fn test_set_metadataonly_compaction_config_form_derivative() { #[test_log::test(tokio::test)] async fn test_set_config_for_hard_compaction_fails() { - let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::without_set_transform()), - polling_service_mock: Some(MockPollingIngestService::without_active_polling_source()), - }) - .await; + let harness = FlowConfigHarness::make().await; let create_root_result = harness.create_root_dataset().await; //// @@ -1543,11 +1495,7 @@ async fn test_set_config_for_hard_compaction_fails() { #[test_log::test(tokio::test)] async fn test_anonymous_setters_fail() { - let harness = FlowConfigHarness::with_overrides(FlowRunsHarnessOverrides { - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), - }) - .await; + let harness = FlowConfigHarness::make().await; let create_root_result = harness.create_root_dataset().await; let create_derived_result = harness.create_derived_dataset().await; @@ -1599,12 +1547,6 @@ async fn test_anonymous_setters_fail() { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Default)] -struct FlowRunsHarnessOverrides { - transform_planner_mock: Option, - polling_service_mock: Option, -} - struct FlowConfigHarness { _tempdir: tempfile::TempDir, _catalog_base: dill::Catalog, @@ -1613,14 +1555,11 @@ struct FlowConfigHarness { } impl FlowConfigHarness { - async fn with_overrides(overrides: FlowRunsHarnessOverrides) -> Self { + async fn make() -> Self { let tempdir = tempfile::tempdir().unwrap(); let datasets_dir = tempdir.path().join("datasets"); std::fs::create_dir(&datasets_dir).unwrap(); - let transform_planner_mock = overrides.transform_planner_mock.unwrap_or_default(); - let polling_service_mock = overrides.polling_service_mock.unwrap_or_default(); - let catalog_base = { let mut b = dill::CatalogBuilder::new(); @@ -1630,14 +1569,12 @@ impl FlowConfigHarness { .bind::() .bind::() .add::() + .add::() .add::() .add::() - .add_value(polling_service_mock) - .bind::() - .add_value(transform_planner_mock) - .bind::() .add::() - .add::() + .add::() + .add::() .add::() .add::() .add::(); @@ -1677,6 +1614,24 @@ impl FlowConfigHarness { .unwrap() } + async fn create_root_dataset_no_source(&self) -> CreateDatasetResult { + let create_dataset_from_snapshot = self + .catalog_authorized + .get_one::() + .unwrap(); + + create_dataset_from_snapshot + .execute( + MetadataFactory::dataset_snapshot() + .kind(DatasetKind::Root) + .name("foo") + .build(), + Default::default(), + ) + .await + .unwrap() + } + async fn create_derived_dataset(&self) -> CreateDatasetResult { let create_dataset_from_snapshot = self .catalog_authorized @@ -1700,6 +1655,24 @@ impl FlowConfigHarness { .unwrap() } + async fn create_derived_dataset_no_transform(&self) -> CreateDatasetResult { + let create_dataset_from_snapshot = self + .catalog_authorized + .get_one::() + .unwrap(); + + create_dataset_from_snapshot + .execute( + MetadataFactory::dataset_snapshot() + .name("bar") + .kind(DatasetKind::Derivative) + .build(), + Default::default(), + ) + .await + .unwrap() + } + fn extract_time_delta_from_response(response_json: &serde_json::Value) -> (u64, &str) { let schedule_json = &response_json["datasets"]["byId"]["flows"]["configs"] ["setConfigIngest"]["config"]["ingest"]["schedule"]; diff --git a/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs b/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs index 3412623c7..b593f0bc6 100644 --- a/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs +++ b/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs @@ -15,20 +15,14 @@ use database_common::{DatabaseTransactionRunner, NoOpDatabasePlugin}; use dill::Component; use futures::TryStreamExt; use indoc::indoc; -use kamu::testing::{ - MetadataFactory, - MockDatasetChangesService, - MockDependencyGraphRepository, - MockPollingIngestService, - MockTransformRequestPlanner, -}; +use kamu::testing::{MetadataFactory, MockDatasetChangesService}; use kamu::{ CreateDatasetFromSnapshotUseCaseImpl, DatasetOwnershipServiceInMemory, DatasetRegistryRepoBridge, DatasetRepositoryLocalFs, DatasetRepositoryWriter, - DependencyGraphServiceInMemory, + MetadataQueryServiceImpl, }; use kamu_accounts::{ CurrentAccountSubject, @@ -48,19 +42,19 @@ use kamu_core::{ DatasetIntervalIncrement, DatasetLifecycleMessage, DatasetRepository, - DependencyGraphRepository, - PollingIngestService, PullResult, + ResetResult, TenancyConfig, - TransformRequestPlanner, MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE, }; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; use kamu_flow_system::{ Flow, + FlowAgentConfig, + FlowAgentTestDriver, FlowConfigurationUpdatedMessage, FlowEventStore, - FlowExecutorConfig, - FlowExecutorTestDriver, FlowID, FlowTrigger, FlowTriggerAutoPolling, @@ -82,7 +76,6 @@ use crate::utils::{authentication_catalogs, expect_anonymous_access_error}; #[test_log::test(tokio::test)] async fn test_trigger_ingest_root_dataset() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: Some(MockDependencyGraphRepository::no_dependencies()), dataset_changes_mock: Some(MockDatasetChangesService::with_increment_between( DatasetIntervalIncrement { num_blocks: 1, @@ -90,8 +83,6 @@ async fn test_trigger_ingest_root_dataset() { updated_watermark: None, }, )), - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; @@ -437,10 +428,7 @@ async fn test_trigger_ingest_root_dataset() { #[test_log::test(tokio::test)] async fn test_trigger_reset_root_dataset_flow() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: Some(MockDependencyGraphRepository::no_dependencies()), dataset_changes_mock: Some(MockDatasetChangesService::default()), - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; @@ -512,7 +500,9 @@ async fn test_trigger_reset_root_dataset_flow() { complete_time, ts::TaskOutcome::Success(ts::TaskResult::ResetDatasetResult( ts::TaskResetDatasetResult { - new_head: root_dataset_blocks[1].0.clone(), + reset_result: ResetResult { + new_head: root_dataset_blocks[1].0.clone(), + }, }, )), ) @@ -601,10 +591,7 @@ async fn test_trigger_reset_root_dataset_flow() { #[test_log::test(tokio::test)] async fn test_trigger_reset_root_dataset_flow_with_invalid_head() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: Some(MockDependencyGraphRepository::no_dependencies()), dataset_changes_mock: Some(MockDatasetChangesService::default()), - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; @@ -697,7 +684,6 @@ async fn test_trigger_reset_root_dataset_flow_with_invalid_head() { #[test_log::test(tokio::test)] async fn test_trigger_execute_transform_derived_dataset() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: Some(MockDependencyGraphRepository::no_dependencies()), dataset_changes_mock: Some(MockDatasetChangesService::with_increment_between( DatasetIntervalIncrement { num_blocks: 1, @@ -705,8 +691,6 @@ async fn test_trigger_execute_transform_derived_dataset() { updated_watermark: None, }, )), - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; @@ -918,7 +902,6 @@ async fn test_trigger_execute_transform_derived_dataset() { #[test_log::test(tokio::test)] async fn test_trigger_compaction_root_dataset() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: Some(MockDependencyGraphRepository::no_dependencies()), dataset_changes_mock: Some(MockDatasetChangesService::with_increment_between( DatasetIntervalIncrement { num_blocks: 1, @@ -926,8 +909,6 @@ async fn test_trigger_compaction_root_dataset() { updated_watermark: None, }, )), - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; @@ -1277,10 +1258,7 @@ async fn test_trigger_compaction_root_dataset() { #[test_log::test(tokio::test)] async fn test_list_flows_with_filters_and_pagination() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: None, dataset_changes_mock: None, - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; let create_result = harness.create_root_dataset().await; @@ -1682,10 +1660,7 @@ async fn test_list_flows_with_filters_and_pagination() { #[test_log::test(tokio::test)] async fn test_list_flow_initiators() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: None, dataset_changes_mock: None, - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; let create_result = harness.create_root_dataset().await; @@ -1775,14 +1750,11 @@ async fn test_list_flow_initiators() { #[test_log::test(tokio::test)] async fn test_conditions_not_met_for_flows() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: None, dataset_changes_mock: None, - transform_planner_mock: Some(MockTransformRequestPlanner::without_set_transform()), - polling_service_mock: Some(MockPollingIngestService::without_active_polling_source()), }) .await; - let create_root_result = harness.create_root_dataset().await; - let create_derived_result = harness.create_derived_dataset().await; + let create_root_result = harness.create_root_dataset_no_source().await; + let create_derived_result = harness.create_derived_dataset_no_transform().await; //// @@ -1858,10 +1830,7 @@ async fn test_conditions_not_met_for_flows() { #[test_log::test(tokio::test)] async fn test_incorrect_dataset_kinds_for_flow_type() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: None, dataset_changes_mock: None, - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; @@ -1975,10 +1944,7 @@ async fn test_incorrect_dataset_kinds_for_flow_type() { #[test_log::test(tokio::test)] async fn test_cancel_ingest_root_dataset() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: None, dataset_changes_mock: None, - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; let create_result = harness.create_root_dataset().await; @@ -2047,10 +2013,7 @@ async fn test_cancel_ingest_root_dataset() { #[test_log::test(tokio::test)] async fn test_cancel_running_transform_derived_dataset() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: None, dataset_changes_mock: None, - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; harness.create_root_dataset().await; @@ -2124,10 +2087,7 @@ async fn test_cancel_running_transform_derived_dataset() { #[test_log::test(tokio::test)] async fn test_cancel_hard_compaction_root_dataset() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: None, dataset_changes_mock: None, - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; let create_result = harness.create_root_dataset().await; @@ -2196,10 +2156,7 @@ async fn test_cancel_hard_compaction_root_dataset() { #[test_log::test(tokio::test)] async fn test_cancel_wrong_flow_id_fails() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: None, dataset_changes_mock: None, - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; let create_result = harness.create_root_dataset().await; @@ -2240,10 +2197,7 @@ async fn test_cancel_wrong_flow_id_fails() { #[test_log::test(tokio::test)] async fn test_cancel_foreign_flow_fails() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: None, dataset_changes_mock: None, - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; let create_root_result = harness.create_root_dataset().await; @@ -2301,10 +2255,7 @@ async fn test_cancel_foreign_flow_fails() { #[test_log::test(tokio::test)] async fn test_cancel_waiting_flow() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: None, dataset_changes_mock: None, - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; let create_result = harness.create_root_dataset().await; @@ -2371,10 +2322,7 @@ async fn test_cancel_waiting_flow() { #[test_log::test(tokio::test)] async fn test_cancel_already_aborted_flow() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: None, dataset_changes_mock: None, - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; let create_result = harness.create_root_dataset().await; @@ -2456,10 +2404,7 @@ async fn test_cancel_already_aborted_flow() { #[test_log::test(tokio::test)] async fn test_cancel_already_succeeded_flow() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: Some(MockDependencyGraphRepository::no_dependencies()), dataset_changes_mock: None, - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; let create_result: CreateDatasetResult = harness.create_root_dataset().await; @@ -2536,10 +2481,7 @@ async fn test_cancel_already_succeeded_flow() { #[test_log::test(tokio::test)] async fn test_history_of_completed_flow() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: Some(MockDependencyGraphRepository::no_dependencies()), dataset_changes_mock: None, - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; @@ -2672,7 +2614,6 @@ async fn test_history_of_completed_flow() { #[test_log::test(tokio::test)] async fn test_execute_transfrom_flow_error_after_compaction() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: Some(MockDependencyGraphRepository::no_dependencies()), dataset_changes_mock: Some(MockDatasetChangesService::with_increment_between( DatasetIntervalIncrement { num_blocks: 1, @@ -2680,8 +2621,6 @@ async fn test_execute_transfrom_flow_error_after_compaction() { updated_watermark: None, }, )), - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; @@ -2978,10 +2917,7 @@ async fn test_execute_transfrom_flow_error_after_compaction() { #[test_log::test(tokio::test)] async fn test_anonymous_operation_fails() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: None, dataset_changes_mock: None, - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; @@ -3018,7 +2954,6 @@ async fn test_anonymous_operation_fails() { #[test_log::test(tokio::test)] async fn test_config_snapshot_returned_correctly() { let harness = FlowRunsHarness::with_overrides(FlowRunsHarnessOverrides { - dependency_graph_mock: Some(MockDependencyGraphRepository::no_dependencies()), dataset_changes_mock: Some(MockDatasetChangesService::with_increment_between( DatasetIntervalIncrement { num_blocks: 1, @@ -3026,8 +2961,6 @@ async fn test_config_snapshot_returned_correctly() { updated_watermark: None, }, )), - transform_planner_mock: Some(MockTransformRequestPlanner::with_set_transform()), - polling_service_mock: Some(MockPollingIngestService::with_active_polling_source()), }) .await; @@ -3155,10 +3088,7 @@ struct FlowRunsHarness { #[derive(Default)] struct FlowRunsHarnessOverrides { - dependency_graph_mock: Option, dataset_changes_mock: Option, - transform_planner_mock: Option, - polling_service_mock: Option, } impl FlowRunsHarness { @@ -3168,9 +3098,6 @@ impl FlowRunsHarness { std::fs::create_dir(&datasets_dir).unwrap(); let dataset_changes_mock = overrides.dataset_changes_mock.unwrap_or_default(); - let dependency_graph_mock = overrides.dependency_graph_mock.unwrap_or_default(); - let transform_planner_mock = overrides.transform_planner_mock.unwrap_or_default(); - let polling_service_mock = overrides.polling_service_mock.unwrap_or_default(); let catalog_base = { let mut b = dill::CatalogBuilder::new(); @@ -3185,26 +3112,22 @@ impl FlowRunsHarness { .bind::() .bind::() .add::() + .add::() .add::() .add_value(dataset_changes_mock) .bind::() .add::() .add::() - .add::() - .add_value(dependency_graph_mock) - .bind::() + .add::() + .add::() .add::() .add::() - .add_value(FlowExecutorConfig::new( + .add_value(FlowAgentConfig::new( Duration::seconds(1), Duration::minutes(1), )) .add::() .add::() - .add_value(transform_planner_mock) - .bind::() - .add_value(polling_service_mock) - .bind::() .add::() .add::() .add::() @@ -3221,7 +3144,7 @@ impl FlowRunsHarness { ); register_message_dispatcher::( &mut b, - ts::MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR, + ts::MESSAGE_PRODUCER_KAMU_TASK_AGENT, ); register_message_dispatcher::( &mut b, @@ -3274,6 +3197,24 @@ impl FlowRunsHarness { .unwrap() } + async fn create_root_dataset_no_source(&self) -> CreateDatasetResult { + let create_dataset_from_snapshot = self + .catalog_authorized + .get_one::() + .unwrap(); + + create_dataset_from_snapshot + .execute( + MetadataFactory::dataset_snapshot() + .kind(DatasetKind::Root) + .name("foo") + .build(), + Default::default(), + ) + .await + .unwrap() + } + async fn create_derived_dataset(&self) -> CreateDatasetResult { let create_dataset_from_snapshot = self .catalog_authorized @@ -3297,6 +3238,24 @@ impl FlowRunsHarness { .unwrap() } + async fn create_derived_dataset_no_transform(&self) -> CreateDatasetResult { + let create_dataset_from_snapshot = self + .catalog_authorized + .get_one::() + .unwrap(); + + create_dataset_from_snapshot + .execute( + MetadataFactory::dataset_snapshot() + .name("bar") + .kind(DatasetKind::Derivative) + .build(), + Default::default(), + ) + .await + .unwrap() + } + async fn mimic_flow_scheduled( &self, flow_id: &str, @@ -3304,7 +3263,7 @@ impl FlowRunsHarness { ) -> ts::TaskID { let flow_service_test_driver = self .catalog_authorized - .get_one::() + .get_one::() .unwrap(); let flow_id = FlowID::new(flow_id.parse::().unwrap()); @@ -3352,7 +3311,7 @@ impl FlowRunsHarness { let outbox = self.catalog_authorized.get_one::().unwrap(); outbox .post_message( - ts::MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR, + ts::MESSAGE_PRODUCER_KAMU_TASK_AGENT, ts::TaskProgressMessage::running(event_time, task_id, task_metadata), ) .await @@ -3380,7 +3339,7 @@ impl FlowRunsHarness { let outbox = self.catalog_authorized.get_one::().unwrap(); outbox .post_message( - ts::MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR, + ts::MESSAGE_PRODUCER_KAMU_TASK_AGENT, ts::TaskProgressMessage::finished(event_time, task_id, task_metadata, task_outcome), ) .await diff --git a/src/adapter/graphql/tests/tests/test_gql_datasets.rs b/src/adapter/graphql/tests/tests/test_gql_datasets.rs index b3b1e2ab5..2162cf31c 100644 --- a/src/adapter/graphql/tests/tests/test_gql_datasets.rs +++ b/src/adapter/graphql/tests/tests/test_gql_datasets.rs @@ -17,6 +17,8 @@ use kamu_accounts::*; use kamu_auth_rebac_inmem::InMemoryRebacRepository; use kamu_auth_rebac_services::{MultiTenantRebacDatasetLifecycleMessageConsumer, RebacServiceImpl}; use kamu_core::*; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; use messaging_outbox::{register_message_dispatcher, Outbox, OutboxImmediateImpl}; use mockall::predicate::eq; use opendatafabric::serde::yaml::YamlDatasetSnapshotSerializer; @@ -592,7 +594,6 @@ async fn test_dataset_rename_name_collision() { #[test_log::test(tokio::test)] async fn test_dataset_delete_success() { let harness = GraphQLDatasetsHarness::new(TenancyConfig::SingleTenant).await; - harness.init_dependencies_graph().await; let foo_result = harness .create_root_dataset(None, DatasetName::new_unchecked("foo")) @@ -643,7 +644,6 @@ async fn test_dataset_delete_success() { #[test_log::test(tokio::test)] async fn test_dataset_delete_dangling_ref() { let harness = GraphQLDatasetsHarness::new(TenancyConfig::SingleTenant).await; - harness.init_dependencies_graph().await; let foo_result = harness .create_root_dataset(None, DatasetName::new_unchecked("foo")) @@ -753,7 +753,6 @@ async fn test_dataset_view_permissions() { struct GraphQLDatasetsHarness { _tempdir: tempfile::TempDir, - base_catalog: dill::Catalog, catalog_authorized: dill::Catalog, catalog_anonymous: dill::Catalog, } @@ -783,7 +782,8 @@ impl GraphQLDatasetsHarness { .add::() .add::() .add::() - .add::() + .add::() + .add::() .add_value(tenancy_config) .add_builder(DatasetRepositoryLocalFs::builder().with_root(datasets_dir)) .bind::() @@ -813,27 +813,11 @@ impl GraphQLDatasetsHarness { Self { _tempdir: tempdir, - base_catalog, catalog_anonymous, catalog_authorized, } } - pub async fn init_dependencies_graph(&self) { - let dataset_repo = self - .catalog_authorized - .get_one::() - .unwrap(); - let dependency_graph_service = self - .base_catalog - .get_one::() - .unwrap(); - dependency_graph_service - .eager_initialization(&DependencyGraphRepositoryInMemory::new(dataset_repo)) - .await - .unwrap(); - } - pub async fn create_root_dataset( &self, account_name: Option, diff --git a/src/adapter/graphql/tests/tests/test_gql_metadata.rs b/src/adapter/graphql/tests/tests/test_gql_metadata.rs index f6aefedb4..10f96732e 100644 --- a/src/adapter/graphql/tests/tests/test_gql_metadata.rs +++ b/src/adapter/graphql/tests/tests/test_gql_metadata.rs @@ -14,6 +14,8 @@ use indoc::indoc; use kamu::testing::MetadataFactory; use kamu::*; use kamu_core::*; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; use messaging_outbox::DummyOutboxImpl; use opendatafabric::*; use time_source::SystemTimeSourceDefault; @@ -38,14 +40,15 @@ async fn test_current_push_sources() { .bind::() .bind::() .add::() + .add::() .add::() - .add::() .add::() .add::() .add::() .add::() .add::() - .add::(); + .add::() + .add::(); NoOpDatabasePlugin::init_database_components(&mut b); diff --git a/src/adapter/graphql/tests/tests/test_gql_metadata_chain.rs b/src/adapter/graphql/tests/tests/test_gql_metadata_chain.rs index 805ae8d24..4e459b648 100644 --- a/src/adapter/graphql/tests/tests/test_gql_metadata_chain.rs +++ b/src/adapter/graphql/tests/tests/test_gql_metadata_chain.rs @@ -16,6 +16,8 @@ use indoc::indoc; use kamu::testing::MetadataFactory; use kamu::*; use kamu_core::*; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; use messaging_outbox::DummyOutboxImpl; use opendatafabric::serde::yaml::YamlMetadataEventSerializer; use opendatafabric::*; @@ -531,7 +533,8 @@ impl GraphQLMetadataChainHarness { .add::() .add::() .add::() - .add::() + .add::() + .add::() .add_value(tenancy_config) .add_builder(DatasetRepositoryLocalFs::builder().with_root(datasets_dir)) .bind::() diff --git a/src/adapter/graphql/tests/tests/test_gql_search.rs b/src/adapter/graphql/tests/tests/test_gql_search.rs index 7d6698bfe..9b59fa1f1 100644 --- a/src/adapter/graphql/tests/tests/test_gql_search.rs +++ b/src/adapter/graphql/tests/tests/test_gql_search.rs @@ -13,6 +13,8 @@ use kamu::testing::MetadataFactory; use kamu::*; use kamu_accounts::CurrentAccountSubject; use kamu_core::*; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; use messaging_outbox::DummyOutboxImpl; use opendatafabric::*; use time_source::SystemTimeSourceDefault; @@ -28,7 +30,8 @@ async fn test_search_query() { let cat = dill::CatalogBuilder::new() .add::() .add::() - .add::() + .add::() + .add::() .add_value(CurrentAccountSubject::new_test()) .add::() .add_value(TenancyConfig::SingleTenant) diff --git a/src/adapter/http/Cargo.toml b/src/adapter/http/Cargo.toml index 2f207961b..754723de2 100644 --- a/src/adapter/http/Cargo.toml +++ b/src/adapter/http/Cargo.toml @@ -49,7 +49,7 @@ base64 = { version = "0.22", default-features = false } bytes = "1" canonical_json = { version = "0.5.0", default-features = false } chrono = { version = "0.4", features = ["serde"] } -datafusion = { version = "42", default-features = false } # TODO: Currently needed for type conversions but ideally should be encapsulated by kamu-core +datafusion = { version = "43", default-features = false } # TODO: Currently needed for type conversions but ideally should be encapsulated by kamu-core dill = "0.9" ed25519-dalek = { version = "2", default-features = false, features = [ "std", @@ -75,7 +75,7 @@ serde_json = "1" serde_with = { version = "3", default-features = false } strum = { version = "0.26", features = ["derive"] } tar = "0.4" -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false, features = ["std"] } tokio = { version = "1", default-features = false, features = [] } tokio-stream = "0.1" tokio-util = { version = "0.7", default-features = false, features = [ @@ -108,6 +108,7 @@ kamu-accounts = { workspace = true, features = ["testing"] } kamu-accounts-inmem = { workspace = true } kamu-accounts-services = { workspace = true } kamu-datasets-services = { workspace = true } +kamu-datasets-inmem = { workspace = true } kamu-ingest-datafusion = { workspace = true } messaging-outbox = { workspace = true } diff --git a/src/adapter/http/src/data/ingest_handler.rs b/src/adapter/http/src/data/ingest_handler.rs index ad2c644cd..59a8c9cd8 100644 --- a/src/adapter/http/src/data/ingest_handler.rs +++ b/src/adapter/http/src/data/ingest_handler.rs @@ -14,6 +14,7 @@ use database_common_macros::transactional_handler; use dill::Catalog; use http::HeaderMap; use http_common::*; +use internal_error::ErrorIntoInternal; use kamu_core::*; use opendatafabric::DatasetRef; use time_source::SystemTimeSource; @@ -122,7 +123,7 @@ pub async fn dataset_ingest_handler( // Resolve dataset let dataset_registry = catalog.get_one::().unwrap(); - let resolved_dataset = dataset_registry + let target = dataset_registry .get_dataset_by_ref(&dataset_ref) .await .map_err(ApiError::not_found)?; @@ -130,35 +131,45 @@ pub async fn dataset_ingest_handler( // Authorization check let authorizer = catalog.get_one::().unwrap(); authorizer - .check_action_allowed(resolved_dataset.get_handle(), auth::DatasetAction::Write) + .check_action_allowed(target.get_handle(), auth::DatasetAction::Write) .await .map_err(|e| match e { DatasetActionUnauthorizedError::Access(_) => ApiError::new_forbidden(), DatasetActionUnauthorizedError::Internal(e) => e.api_err(), })?; - // Run ingestion - let ingest_svc = catalog.get_one::().unwrap(); - match ingest_svc - .ingest_from_file_stream( - resolved_dataset, + // Plan and run ingestion + let push_ingest_planner = catalog.get_one::().unwrap(); + let ingest_plan = push_ingest_planner + .plan_ingest( + target.clone(), params.source_name.as_deref(), - arguments.data_stream, PushIngestOpts { media_type: arguments.media_type, source_event_time, auto_create_push_source: is_ingest_from_upload, schema_inference: SchemaInferenceOpts::default(), }, - None, ) .await + .map_err(|e| match e { + PushIngestPlanningError::SourceNotFound(e) => ApiError::bad_request(e), + PushIngestPlanningError::UnsupportedMediaType(_) => { + ApiError::new_unsupported_media_type() + } + PushIngestPlanningError::CommitError(e) => e.int_err().api_err(), + PushIngestPlanningError::Internal(e) => e.api_err(), + })?; + + let push_ingest_executor = catalog.get_one::().unwrap(); + match push_ingest_executor + .ingest_from_stream(target, ingest_plan, arguments.data_stream, None) + .await { // Per note above, we're not including any extra information about the result // of the ingest operation at this point to accommodate async execution Ok(_) => Ok(()), Err(PushIngestError::ReadError(e)) => Err(ApiError::bad_request(e)), - Err(PushIngestError::SourceNotFound(e)) => Err(ApiError::bad_request(e)), Err(PushIngestError::UnsupportedMediaType(_)) => { Err(ApiError::new_unsupported_media_type()) } diff --git a/src/adapter/http/src/e2e/e2e_middleware.rs b/src/adapter/http/src/e2e/e2e_middleware.rs index 22ab277d2..543755719 100644 --- a/src/adapter/http/src/e2e/e2e_middleware.rs +++ b/src/adapter/http/src/e2e/e2e_middleware.rs @@ -29,11 +29,11 @@ pub async fn e2e_middleware_fn(request: Request, next: Next) -> Result() + let outbox_agent = base_catalog + .get_one::() .unwrap(); - outbox_executor.run_while_has_tasks().await?; + outbox_agent.run_while_has_tasks().await?; } Ok(response) diff --git a/src/adapter/http/tests/harness/client_side_harness.rs b/src/adapter/http/tests/harness/client_side_harness.rs index 9f85679a4..442cee8a4 100644 --- a/src/adapter/http/tests/harness/client_side_harness.rs +++ b/src/adapter/http/tests/harness/client_side_harness.rs @@ -22,7 +22,8 @@ use kamu::utils::simple_transfer_protocol::SimpleTransferProtocol; use kamu::*; use kamu_accounts::CurrentAccountSubject; use kamu_adapter_http::{OdfSmtpVersion, SmartTransferProtocolClientWs}; -use kamu_datasets_services::DatasetKeyValueServiceSysEnv; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::{DatasetKeyValueServiceSysEnv, DependencyGraphServiceImpl}; use messaging_outbox::DummyOutboxImpl; use opendatafabric::{ AccountID, @@ -80,7 +81,8 @@ impl ClientSideHarness { b.add::(); - b.add::(); + b.add::(); + b.add::(); b.add_value(CurrentAccountSubject::logged( AccountID::new_seeded_ed25519(CLIENT_ACCOUNT_NAME.as_bytes()), @@ -132,9 +134,10 @@ impl ClientSideHarness { b.add::(); b.add::(); - b.add::(); + b.add::(); - b.add::(); + b.add::(); + b.add::(); b.add::(); @@ -199,8 +202,12 @@ impl ClientSideHarness { .unwrap() } - pub fn compaction_service(&self) -> Arc { - self.catalog.get_one::().unwrap() + pub fn compaction_planner(&self) -> Arc { + self.catalog.get_one::().unwrap() + } + + pub fn compaction_executor(&self) -> Arc { + self.catalog.get_one::().unwrap() } // TODO: accept alias or handle diff --git a/src/adapter/http/tests/harness/server_side_harness.rs b/src/adapter/http/tests/harness/server_side_harness.rs index 767c86316..0bec16e83 100644 --- a/src/adapter/http/tests/harness/server_side_harness.rs +++ b/src/adapter/http/tests/harness/server_side_harness.rs @@ -20,7 +20,6 @@ use kamu::domain::auth::{ }; use kamu::domain::{ CommitDatasetEventUseCase, - CompactionService, CreateDatasetFromSnapshotUseCase, CreateDatasetUseCase, }; @@ -28,7 +27,7 @@ use kamu::testing::MockDatasetActionAuthorizer; use kamu::DatasetLayout; use kamu_accounts::testing::MockAuthenticationService; use kamu_accounts::{Account, AccountType, CurrentAccountSubject, PROVIDER_PASSWORD}; -use kamu_core::{DatasetRegistry, TenancyConfig}; +use kamu_core::{CompactionExecutor, CompactionPlanner, DatasetRegistry, TenancyConfig}; use opendatafabric::{AccountID, AccountName, DatasetAlias, DatasetHandle}; use reqwest::Url; use time_source::SystemTimeSourceStub; @@ -53,7 +52,9 @@ pub(crate) trait ServerSideHarness { fn cli_commit_dataset_event_use_case(&self) -> Arc; - fn cli_compaction_service(&self) -> Arc; + fn cli_compaction_planner(&self) -> Arc; + + fn cli_compaction_executor(&self) -> Arc; fn dataset_layout(&self, dataset_handle: &DatasetHandle) -> DatasetLayout; diff --git a/src/adapter/http/tests/harness/server_side_local_fs_harness.rs b/src/adapter/http/tests/harness/server_side_local_fs_harness.rs index 119ba57ac..cdb620211 100644 --- a/src/adapter/http/tests/harness/server_side_local_fs_harness.rs +++ b/src/adapter/http/tests/harness/server_side_local_fs_harness.rs @@ -17,7 +17,6 @@ use internal_error::{InternalError, ResultIntoInternal}; use kamu::domain::{ CacheDir, CommitDatasetEventUseCase, - CompactionService, CreateDatasetFromSnapshotUseCase, CreateDatasetUseCase, DatasetRepository, @@ -27,21 +26,23 @@ use kamu::domain::{ use kamu::{ AppendDatasetMetadataBatchUseCaseImpl, CommitDatasetEventUseCaseImpl, - CompactionServiceImpl, + CompactionExecutorImpl, + CompactionPlannerImpl, CreateDatasetFromSnapshotUseCaseImpl, CreateDatasetUseCaseImpl, DatasetLayout, DatasetRegistryRepoBridge, DatasetRepositoryLocalFs, DatasetRepositoryWriter, - DependencyGraphServiceInMemory, ObjectStoreBuilderLocalFs, ObjectStoreRegistryImpl, RemoteRepositoryRegistryImpl, }; use kamu_accounts::testing::MockAuthenticationService; use kamu_accounts::{Account, AuthenticationService}; -use kamu_core::{DatasetRegistry, TenancyConfig}; +use kamu_core::{CompactionExecutor, CompactionPlanner, DatasetRegistry, TenancyConfig}; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; use messaging_outbox::DummyOutboxImpl; use opendatafabric::{AccountName, DatasetAlias, DatasetHandle}; use tempfile::TempDir; @@ -102,7 +103,8 @@ impl ServerSideLocalFsHarness { .add::() .add_value(time_source.clone()) .bind::() - .add::() + .add::() + .add::() .add_value(options.tenancy_config) .add_builder(DatasetRepositoryLocalFs::builder().with_root(datasets_dir)) .bind::() @@ -111,7 +113,8 @@ impl ServerSideLocalFsHarness { .add::() .bind::() .add_value(ServerUrlConfig::new_test(Some(&base_url_rest))) - .add::() + .add::() + .add::() .add::() .add::() .add::() @@ -189,9 +192,14 @@ impl ServerSideHarness for ServerSideLocalFsHarness { .unwrap() } - fn cli_compaction_service(&self) -> Arc { + fn cli_compaction_planner(&self) -> Arc { let cli_catalog = create_cli_user_catalog(&self.base_catalog); - cli_catalog.get_one::().unwrap() + cli_catalog.get_one::().unwrap() + } + + fn cli_compaction_executor(&self) -> Arc { + let cli_catalog = create_cli_user_catalog(&self.base_catalog); + cli_catalog.get_one::().unwrap() } fn api_server_addr(&self) -> String { diff --git a/src/adapter/http/tests/harness/server_side_s3_harness.rs b/src/adapter/http/tests/harness/server_side_s3_harness.rs index d969dab80..4d0f7f2b4 100644 --- a/src/adapter/http/tests/harness/server_side_s3_harness.rs +++ b/src/adapter/http/tests/harness/server_side_s3_harness.rs @@ -16,7 +16,8 @@ use dill::Component; use internal_error::{InternalError, ResultIntoInternal}; use kamu::domain::{ CommitDatasetEventUseCase, - CompactionService, + CompactionExecutor, + CompactionPlanner, CreateDatasetFromSnapshotUseCase, CreateDatasetUseCase, DatasetRepository, @@ -29,14 +30,14 @@ use kamu::utils::s3_context::S3Context; use kamu::{ AppendDatasetMetadataBatchUseCaseImpl, CommitDatasetEventUseCaseImpl, - CompactionServiceImpl, + CompactionExecutorImpl, + CompactionPlannerImpl, CreateDatasetFromSnapshotUseCaseImpl, CreateDatasetUseCaseImpl, DatasetLayout, DatasetRegistryRepoBridge, DatasetRepositoryS3, DatasetRepositoryWriter, - DependencyGraphServiceInMemory, ObjectStoreBuilderLocalFs, ObjectStoreBuilderS3, ObjectStoreRegistryImpl, @@ -44,6 +45,8 @@ use kamu::{ use kamu_accounts::testing::MockAuthenticationService; use kamu_accounts::{Account, AuthenticationService}; use kamu_core::{DatasetRegistry, TenancyConfig}; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; use messaging_outbox::DummyOutboxImpl; use opendatafabric::{AccountName, DatasetAlias, DatasetHandle}; use time_source::{SystemTimeSource, SystemTimeSourceStub}; @@ -97,7 +100,8 @@ impl ServerSideS3Harness { .add_value(RunInfoDir::new(run_info_dir)) .bind::() .add::() - .add::() + .add::() + .add::() .add_value(options.tenancy_config) .add_builder(DatasetRepositoryS3::builder().with_s3_context(s3_context.clone())) .bind::() @@ -106,7 +110,8 @@ impl ServerSideS3Harness { .add_value(server_authentication_mock(&account)) .bind::() .add_value(ServerUrlConfig::new_test(Some(&base_url_rest))) - .add::() + .add::() + .add::() .add::() .add::() .add_value(ObjectStoreBuilderS3::new(s3_context, true)) @@ -178,9 +183,14 @@ impl ServerSideHarness for ServerSideS3Harness { .unwrap() } - fn cli_compaction_service(&self) -> Arc { + fn cli_compaction_planner(&self) -> Arc { let cli_catalog = create_cli_user_catalog(&self.base_catalog); - cli_catalog.get_one::().unwrap() + cli_catalog.get_one::().unwrap() + } + + fn cli_compaction_executor(&self) -> Arc { + let cli_catalog = create_cli_user_catalog(&self.base_catalog); + cli_catalog.get_one::().unwrap() } fn dataset_url_with_scheme(&self, dataset_alias: &DatasetAlias, scheme: &str) -> Url { diff --git a/src/adapter/http/tests/tests/test_data_ingest.rs b/src/adapter/http/tests/tests/test_data_ingest.rs index 0b25effe3..fd95572f0 100644 --- a/src/adapter/http/tests/tests/test_data_ingest.rs +++ b/src/adapter/http/tests/tests/test_data_ingest.rs @@ -602,7 +602,8 @@ impl DataIngestHarness { async fn new() -> Self { let catalog = dill::CatalogBuilder::new() .add::() - .add::() + .add::() + .add::() .add::() .add::() .add_value(FileUploadLimitConfig::new_in_bytes(1000)) diff --git a/src/adapter/http/tests/tests/test_data_query.rs b/src/adapter/http/tests/tests/test_data_query.rs index 36c85c528..cc44da0f8 100644 --- a/src/adapter/http/tests/tests/test_data_query.rs +++ b/src/adapter/http/tests/tests/test_data_query.rs @@ -53,7 +53,6 @@ impl Harness { .add_value(identity_config) .add::() .add::() - .add::() .add::() .build(); @@ -120,11 +119,14 @@ impl Harness { } let ctx = SessionContext::new(); - let mut writer = DataWriterDataFusion::builder(create_result.dataset.clone(), ctx.clone()) - .with_metadata_state_scanned(None) - .await - .unwrap() - .build(); + let mut writer = DataWriterDataFusion::from_metadata_chain( + ctx.clone(), + ResolvedDataset::from(&create_result), + &BlockRef::Head, + None, + ) + .await + .unwrap(); writer .write( @@ -149,7 +151,7 @@ impl Harness { source_event_time: system_time, new_watermark: None, new_source_state: None, - data_staging_path: run_info_dir.path().join(".temp-data"), + data_staging_path: run_info_dir.path().join(".temp-data.parquet"), }, ) .await @@ -467,14 +469,14 @@ async fn test_data_query_handler() { }, "subQueries": [], "commitment": { - "inputHash": "f16206ab5788a997a0d05c236c207ea66434f2d4ae933ad62556583979f47d2f522ed", + "inputHash": "f1620c01c8a5746b5c70f76a42cae96b97639737f597f87a74fa3f6e4c4799a130891", "outputHash": "f16208d66e08ce876ba35ce00ea56f02faf83dbc086f877c443e3d493427ccad133f1", "subQueriesHash": "f1620ca4510738395af1429224dd785675309c344b2b549632e20275c69b15ed1d210", }, "proof": { "type": "Ed25519Signature2020", "verificationMethod": "did:key:z6Mko2nqhQ9wYSTS5Giab2j1aHzGnxHimqwmFeEVY8aNsVnN", - "proofValue": "uadIt9gTyeqRUwCIHiq4ILzK79h0jnZwOSVX86NeSJrmoMAV1keK9CJl7yyH9wiJCw1AjAb84nxeqB7kPzHyaDQ", + "proofValue": "ulm5TqOk7Qkut08caRv0_vxzA8zTpScKPHvZKM0e9rO1irRylf1K_9FykhB1oWqgm51eppwFxwm5mrrv7J8BiDQ", } }), response @@ -621,14 +623,14 @@ async fn test_data_verify_handler() { }, "subQueries": [], "commitment": { - "inputHash": "f16205ad9518f5dd9fe6bbc082afc92841dff1cb2ebe85f82d12fbb3567399aa879c1", + "inputHash": "f162043b05d93c527d8573645d2529dd6678e6375f308cdfa21028c34593e1455edf4", "outputHash": "f1620ff7f5beaf16900218a3ac4aae82cdccf764816986c7c739c716cf7dc03112a2c", "subQueriesHash": "f1620ca4510738395af1429224dd785675309c344b2b549632e20275c69b15ed1d210", }, "proof": { "type": "Ed25519Signature2020", "verificationMethod": "did:key:z6Mko2nqhQ9wYSTS5Giab2j1aHzGnxHimqwmFeEVY8aNsVnN", - "proofValue": "uox4Jm0WJgSV-5K7Lp2jLvvRXnVTqu_N-DdGBt4NETRVN-cvZQINUDCcmBFrtmORsJV4PrjDtCNumvsEYeSLZAQ", + "proofValue": "uyK1zM9v3MlkC2-2TyCuwN5hfbsUV15GSegzGuwgaWYKsTtQj1xxUrPsTbPZs0aBSmR569ozZ8ZtAUYVAjoy3Bg", } }), response diff --git a/src/adapter/http/tests/tests/test_dataset_authorization_layer.rs b/src/adapter/http/tests/tests/test_dataset_authorization_layer.rs index 416bc02f8..285ebbdda 100644 --- a/src/adapter/http/tests/tests/test_dataset_authorization_layer.rs +++ b/src/adapter/http/tests/tests/test_dataset_authorization_layer.rs @@ -21,11 +21,12 @@ use kamu::{ DatasetRegistryRepoBridge, DatasetRepositoryLocalFs, DatasetRepositoryWriter, - DependencyGraphServiceInMemory, }; use kamu_accounts::testing::MockAuthenticationService; use kamu_accounts::*; use kamu_core::TenancyConfig; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; use messaging_outbox::DummyOutboxImpl; use mockall::predicate::{eq, function}; use opendatafabric::{DatasetAlias, DatasetHandle, DatasetKind, DatasetName, DatasetRef}; @@ -221,7 +222,8 @@ impl ServerHarness { b.add::() .add::() - .add::() + .add::() + .add::() .add_value(MockAuthenticationService::resolving_token( DUMMY_ACCESS_TOKEN, Account::dummy(), diff --git a/src/adapter/http/tests/tests/test_routing.rs b/src/adapter/http/tests/tests/test_routing.rs index f7d7c62d1..d0e84a04b 100644 --- a/src/adapter/http/tests/tests/test_routing.rs +++ b/src/adapter/http/tests/tests/test_routing.rs @@ -18,6 +18,8 @@ use kamu::domain::*; use kamu::testing::*; use kamu::*; use kamu_accounts::CurrentAccountSubject; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; use messaging_outbox::DummyOutboxImpl; use opendatafabric::*; use time_source::SystemTimeSourceDefault; @@ -42,7 +44,8 @@ async fn setup_repo() -> RepoFixture { let mut b = dill::CatalogBuilder::new(); b.add::() .add::() - .add::() + .add::() + .add::() .add_value(TenancyConfig::SingleTenant) .add_builder(DatasetRepositoryLocalFs::builder().with_root(datasets_dir)) .bind::() diff --git a/src/adapter/http/tests/tests/tests_pull/scenarios/scenario_existing_diverged_dataset.rs b/src/adapter/http/tests/tests/tests_pull/scenarios/scenario_existing_diverged_dataset.rs index 771a68bf4..4e5fb1b19 100644 --- a/src/adapter/http/tests/tests/tests_pull/scenarios/scenario_existing_diverged_dataset.rs +++ b/src/adapter/http/tests/tests/tests_pull/scenarios/scenario_existing_diverged_dataset.rs @@ -95,9 +95,18 @@ impl SmartPullExistingDivergedDatasetScenario .cli_dataset_registry() .get_dataset_by_handle(&server_create_result.dataset_handle); - let compaction_service = server_harness.cli_compaction_service(); - let server_compaction_result = compaction_service - .compact_dataset(server_dataset, CompactionOptions::default(), None) + let compaction_planner = server_harness.cli_compaction_planner(); + let compaction_execution_service = server_harness.cli_compaction_executor(); + + let server_compaction_result = compaction_execution_service + .execute( + server_dataset.clone(), + compaction_planner + .plan_compaction(server_dataset, CompactionOptions::default(), None) + .await + .unwrap(), + None, + ) .await .unwrap(); diff --git a/src/adapter/http/tests/tests/tests_push/scenarios/scenario_existing_diverged_dataset.rs b/src/adapter/http/tests/tests/tests_push/scenarios/scenario_existing_diverged_dataset.rs index 4010d4ec2..14241f675 100644 --- a/src/adapter/http/tests/tests/tests_push/scenarios/scenario_existing_diverged_dataset.rs +++ b/src/adapter/http/tests/tests/tests_push/scenarios/scenario_existing_diverged_dataset.rs @@ -98,9 +98,18 @@ impl SmartPushExistingDivergedDatasetScenario .get_dataset_by_handle(&client_create_result.dataset_handle); // Compact at client side - let compaction_service = client_harness.compaction_service(); - let client_compaction_result = compaction_service - .compact_dataset(client_dataset, CompactionOptions::default(), None) + let compaction_planner = client_harness.compaction_planner(); + let compaction_execution_service = client_harness.compaction_executor(); + + let client_compaction_result = compaction_execution_service + .execute( + client_dataset.clone(), + compaction_planner + .plan_compaction(client_dataset, CompactionOptions::default(), None) + .await + .unwrap(), + None, + ) .await .unwrap(); diff --git a/src/adapter/oauth/Cargo.toml b/src/adapter/oauth/Cargo.toml index 4d12f84f9..f9ec47e5b 100644 --- a/src/adapter/oauth/Cargo.toml +++ b/src/adapter/oauth/Cargo.toml @@ -35,7 +35,7 @@ reqwest = { version = "0.12", default-features = false, features = [ ] } serde = { version = "1", default-features = false } serde_json = { version = "1", default-features = false } -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false, features = ["std"] } [dev-dependencies] diff --git a/src/adapter/odata/Cargo.toml b/src/adapter/odata/Cargo.toml index 35db251d2..9be65c558 100644 --- a/src/adapter/odata/Cargo.toml +++ b/src/adapter/odata/Cargo.toml @@ -32,8 +32,8 @@ opendatafabric = { workspace = true, default-features = false, features = ["arro axum = { version = "0.7", default-features = false, features = [] } chrono = { version = "0.4", default-features = false } -datafusion = { version = "42", default-features = false } -datafusion-odata = { version = "42", default-features = false } +datafusion = { version = "43", default-features = false } +datafusion-odata = { version = "43", default-features = false } dill = { version = "0.9" } futures = { version = "0.3", default-features = false } http = "1" diff --git a/src/adapter/odata/tests/tests/test_handlers.rs b/src/adapter/odata/tests/tests/test_handlers.rs index da77b232b..079bd079c 100644 --- a/src/adapter/odata/tests/tests/test_handlers.rs +++ b/src/adapter/odata/tests/tests/test_handlers.rs @@ -20,6 +20,7 @@ use kamu_accounts::CurrentAccountSubject; use messaging_outbox::DummyOutboxImpl; use opendatafabric::*; use time_source::{SystemTimeSource, SystemTimeSourceStub}; +use url::Url; use super::test_api_server::TestAPIServer; @@ -314,7 +315,8 @@ async fn test_collection_handler_by_id_not_found() { struct TestHarness { temp_dir: tempfile::TempDir, catalog: Catalog, - push_ingest_svc: Arc, + push_ingest_planner: Arc, + push_ingest_executor: Arc, api_server: TestAPIServer, } @@ -357,7 +359,8 @@ impl TestHarness { )) .bind::() .add::() - .add::() + .add::() + .add::() .add::() .add_value(ServerUrlConfig::new_test(None)); @@ -366,7 +369,8 @@ impl TestHarness { b.build() }; - let push_ingest_svc = catalog.get_one::().unwrap(); + let push_ingest_planner = catalog.get_one::().unwrap(); + let push_ingest_executor = catalog.get_one::().unwrap(); let api_server = TestAPIServer::new(catalog.clone(), None, None, TenancyConfig::SingleTenant).await; @@ -374,7 +378,8 @@ impl TestHarness { Self { temp_dir, catalog, - push_ingest_svc, + push_ingest_planner, + push_ingest_executor, api_server, } } @@ -429,17 +434,24 @@ impl TestHarness { ) .unwrap(); - self.push_ingest_svc - .ingest_from_url( - ResolvedDataset::from(&ds), - None, - url::Url::from_file_path(&src_path).unwrap(), - PushIngestOpts::default(), - None, - ) + self.ingest_from_url(&ds, url::Url::from_file_path(&src_path).unwrap()) + .await; + + ds + } + + async fn ingest_from_url(&self, created: &CreateDatasetResult, url: Url) { + let target = ResolvedDataset::from(created); + + let ingest_plan = self + .push_ingest_planner + .plan_ingest(target.clone(), None, PushIngestOpts::default()) .await .unwrap(); - ds + self.push_ingest_executor + .ingest_from_url(target, ingest_plan, url, None) + .await + .unwrap(); } } diff --git a/src/app/cli/Cargo.toml b/src/app/cli/Cargo.toml index 5406b6221..2aa1a8e1b 100644 --- a/src/app/cli/Cargo.toml +++ b/src/app/cli/Cargo.toml @@ -164,7 +164,7 @@ tracing-bunyan-formatter = "0.3" async-trait = "0.1" chrono = "0.4" cfg-if = "1" # Conditional compilation -datafusion = { version = "42", default-features = false, features = [ +datafusion = { version = "43", default-features = false, features = [ "crypto_expressions", "encoding_expressions", "parquet", @@ -186,7 +186,7 @@ secrecy = "0.10" shlex = "1" # Parsing partial input for custom completions signal-hook = "0.3" # Signal handling tempfile = "3" -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false, features = ["std"] } tokio = { version = "1", default-features = false, features = ["io-util"] } tokio-stream = { version = "0.1", default-features = false, features = ["net"] } tokio-util = { version = "0.7", default-features = false, features = ["io"] } diff --git a/src/app/cli/src/app.rs b/src/app/cli/src/app.rs index e3c7722b9..db3e65724 100644 --- a/src/app/cli/src/app.rs +++ b/src/app/cli/src/app.rs @@ -16,7 +16,6 @@ use chrono::{DateTime, Duration, Utc}; use container_runtime::{ContainerRuntime, ContainerRuntimeConfig}; use database_common::DatabaseTransactionRunner; use dill::*; -use init_on_startup::InitOnStartup; use internal_error::{InternalError, ResultIntoInternal}; use kamu::domain::*; use kamu::*; @@ -30,7 +29,7 @@ use kamu_flow_system_services::{ MESSAGE_PRODUCER_KAMU_FLOW_CONFIGURATION_SERVICE, MESSAGE_PRODUCER_KAMU_FLOW_PROGRESS_SERVICE, }; -use kamu_task_system_inmem::domain::{TaskProgressMessage, MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR}; +use kamu_task_system_inmem::domain::{TaskProgressMessage, MESSAGE_PRODUCER_KAMU_TASK_AGENT}; use messaging_outbox::{register_message_dispatcher, Outbox, OutboxDispatchingImpl}; use opendatafabric as odf; use time_source::{SystemTimeSource, SystemTimeSourceDefault, SystemTimeSourceStub}; @@ -123,11 +122,6 @@ pub async fn run(workspace_layout: WorkspaceLayout, args: cli::Cli) -> Result<() // Configure application let (guards, base_catalog, cli_catalog, maybe_server_catalog, output_config) = { - let dependencies_graph_repository = prepare_dependencies_graph_repository( - &workspace_layout, - tenancy_config, - current_account.to_current_account_subject(), - ); let is_e2e_testing = args.e2e_output_data_path.is_some(); let mut base_catalog_builder = configure_base_catalog( @@ -137,17 +131,22 @@ pub async fn run(workspace_layout: WorkspaceLayout, args: cli::Cli) -> Result<() is_e2e_testing, ); - // NOTE: Register DatasetEntryIndexer in DI, since it is referenced by other - // components (via InitOnStartup) - // TODO: PERF: Do not register InitOnStartup-components if we are not inside the - // workspace - base_catalog_builder.add_builder( - kamu_datasets_services::DatasetEntryIndexer::builder() - .with_is_in_workspace(workspace_svc.is_in_workspace()), - ); - // The indexer has no other interfaces - base_catalog_builder - .bind::(); + if workspace_svc.is_in_workspace() { + // TODO: Private Datasets: recheck after merge + // // NOTE: Register DatasetEntryIndexer in DI, since it is referenced by other + // // components (via InitOnStartup) + // // TODO: PERF: Do not register InitOnStartup-components if we are not inside the + // // workspace + // base_catalog_builder.add_builder( + // kamu_datasets_services::DatasetEntryIndexer::builder() + // .with_is_in_workspace(workspace_svc.is_in_workspace()), + // ); + // // The indexer has no other interfaces + // base_catalog_builder + // .bind::(); + base_catalog_builder.add::(); + base_catalog_builder.add::(); + } base_catalog_builder.add_value(JwtAuthenticationConfig::load_from_env()); base_catalog_builder.add_value(GithubAuthenticationConfig::load_from_env()); @@ -162,10 +161,6 @@ pub async fn run(workspace_layout: WorkspaceLayout, args: cli::Cli) -> Result<() configure_in_memory_components(&mut base_catalog_builder); }; - base_catalog_builder - .add_value(dependencies_graph_repository) - .bind::(); - let output_config = configure_output_format(&args, &workspace_svc); base_catalog_builder.add_value(output_config.clone()); base_catalog_builder.add_value(Interact::new(args.yes, output_config.is_tty)); @@ -279,8 +274,8 @@ pub async fn run(workspace_layout: WorkspaceLayout, args: cli::Cli) -> Result<() command_result = command_result // If successful, then process the Outbox messages while they are present .and_then_async(|_| async { - let outbox_executor = cli_catalog.get_one::()?; - outbox_executor + let outbox_agent = cli_catalog.get_one::()?; + outbox_agent .run_while_has_tasks() .await .map_err(CLIError::critical) @@ -364,33 +359,6 @@ where // Catalog //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub fn prepare_dependencies_graph_repository( - workspace_layout: &WorkspaceLayout, - tenancy_config: TenancyConfig, - current_account_subject: CurrentAccountSubject, -) -> DependencyGraphRepositoryInMemory { - // Construct a special catalog just to create 1 object, but with a repository - // bound to CLI user. It also should be authorized to access any dataset. - - let special_catalog_for_graph = CatalogBuilder::new() - .add::() - .add_value(tenancy_config) - .add_builder( - DatasetRepositoryLocalFs::builder().with_root(workspace_layout.datasets_dir.clone()), - ) - .bind::() - .bind::() - .add_value(current_account_subject) - .add::() - .add::() - // Don't add its own initializer, leave optional dependency uninitialized - .build(); - - let dataset_repo = special_catalog_for_graph.get_one().unwrap(); - - DependencyGraphRepositoryInMemory::new(dataset_repo) -} - // Public only for tests pub fn configure_base_catalog( workspace_layout: &WorkspaceLayout, @@ -436,19 +404,23 @@ pub fn configure_base_catalog( b.add::(); + b.add::(); + b.add::(); b.add::(); - b.add::(); + b.add::(); + b.add::(); b.add::(); b.add::(); - b.add::(); + b.add::(); b.add::(); - b.add::(); + b.add::(); + b.add::(); b.add::(); @@ -459,11 +431,13 @@ pub fn configure_base_catalog( b.add::(); - b.add::(); + b.add::(); + b.add::(); b.add::(); - b.add::(); + b.add::(); + b.add::(); b.add::(); @@ -478,8 +452,6 @@ pub fn configure_base_catalog( b.add::(); b.add::(); - b.add::(); - b.add::(); b.add::(); b.add::(); @@ -519,7 +491,12 @@ pub fn configure_base_catalog( b.add::(); + if tenancy_config == TenancyConfig::MultiTenant { + b.add::(); + } + b.add::(); + b.add::(); b.add_builder( messaging_outbox::OutboxImmediateImpl::builder() @@ -528,8 +505,8 @@ pub fn configure_base_catalog( b.add::(); b.add::(); b.bind::(); - b.add::(); - b.add::(); + b.add::(); + b.add::(); register_message_dispatcher::( &mut b, @@ -568,7 +545,7 @@ pub fn configure_server_catalog(base_catalog: &Catalog) -> CatalogBuilder { kamu_task_system_services::register_dependencies(&mut b); - b.add_value(kamu_flow_system_inmem::domain::FlowExecutorConfig::new( + b.add_value(kamu_flow_system_inmem::domain::FlowAgentConfig::new( Duration::seconds(1), Duration::minutes(1), )); @@ -585,7 +562,7 @@ pub fn configure_server_catalog(base_catalog: &Catalog) -> CatalogBuilder { MESSAGE_PRODUCER_KAMU_FLOW_CONFIGURATION_SERVICE, ); - register_message_dispatcher::(&mut b, MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR); + register_message_dispatcher::(&mut b, MESSAGE_PRODUCER_KAMU_TASK_AGENT); b } diff --git a/src/app/cli/src/cli_commands.rs b/src/app/cli/src/cli_commands.rs index b185b9633..8be1ba177 100644 --- a/src/app/cli/src/cli_commands.rs +++ b/src/app/cli/src/cli_commands.rs @@ -109,6 +109,7 @@ pub fn get_command( cli_catalog.get_one()?, cli_catalog.get_one()?, cli_catalog.get_one()?, + cli_catalog.get_one()?, validate_dataset_ref(cli_catalog, c.dataset)?, c.file.unwrap_or_default(), c.source_name, @@ -420,6 +421,8 @@ pub fn get_command( cli_catalog.get_one()?, cli_catalog.get_one()?, cli_catalog.get_one()?, + cli_catalog.get_one()?, + cli_catalog.get_one()?, args.e2e_output_data_path, )), Some(cli::SystemApiServerSubCommand::GqlQuery(ssc)) => Box::new( diff --git a/src/app/cli/src/commands/delete_command.rs b/src/app/cli/src/commands/delete_command.rs index fcf344140..b6de49e50 100644 --- a/src/app/cli/src/commands/delete_command.rs +++ b/src/app/cli/src/commands/delete_command.rs @@ -58,10 +58,13 @@ impl DeleteCommand { #[async_trait::async_trait(?Send)] impl Command for DeleteCommand { async fn validate_args(&self) -> Result<(), CLIError> { - if self.dataset_ref_patterns.is_empty() && !self.all { - Err(CLIError::usage_error("Specify a dataset or use --all flag")) - } else { - Ok(()) + match (self.dataset_ref_patterns.as_slice(), self.all) { + ([], false) => Err(CLIError::usage_error("Specify dataset(s) or pass --all")), + ([], true) => Ok(()), + ([_head, ..], false) => Ok(()), + ([_head, ..], true) => Err(CLIError::usage_error( + "You can either specify dataset(s) or pass --all", + )), } } @@ -110,6 +113,8 @@ impl Command for DeleteCommand { .confirm_delete(&dataset_handles) .await?; + tracing::info!(?dataset_handles, "Trying to define delete order"); + // TODO: Multiple rounds of resolving IDs to handles let dataset_ids = self .dependency_graph_service @@ -120,6 +125,8 @@ impl Command for DeleteCommand { .await .map_err(CLIError::critical)?; + tracing::info!(?dataset_ids, "Delete order defined"); + for id in &dataset_ids { match self .delete_dataset diff --git a/src/app/cli/src/commands/ingest_command.rs b/src/app/cli/src/commands/ingest_command.rs index a55c16cfa..b2726c21c 100644 --- a/src/app/cli/src/commands/ingest_command.rs +++ b/src/app/cli/src/commands/ingest_command.rs @@ -26,7 +26,8 @@ use crate::OutputConfig; pub struct IngestCommand { data_format_reg: Arc, dataset_registry: Arc, - push_ingest_svc: Arc, + push_ingest_planner: Arc, + push_ingest_executor: Arc, output_config: Arc, remote_alias_reg: Arc, dataset_ref: DatasetRef, @@ -42,7 +43,8 @@ impl IngestCommand { pub fn new( data_format_reg: Arc, dataset_registry: Arc, - push_ingest_svc: Arc, + push_ingest_planner: Arc, + push_ingest_executor: Arc, output_config: Arc, remote_alias_reg: Arc, dataset_ref: DatasetRef, @@ -60,7 +62,8 @@ impl IngestCommand { Self { data_format_reg, dataset_registry, - push_ingest_svc, + push_ingest_planner, + push_ingest_executor, output_config, remote_alias_reg, dataset_ref, @@ -191,23 +194,28 @@ impl Command for IngestCommand { let mut updated = 0; for url in urls { - let result = self - .push_ingest_svc - .ingest_from_url( - self.dataset_registry.get_dataset_by_handle(&dataset_handle), + let target = self.dataset_registry.get_dataset_by_handle(&dataset_handle); + let plan = self + .push_ingest_planner + .plan_ingest( + target.clone(), self.source_name.as_deref(), - url, PushIngestOpts { media_type: self.get_media_type()?, source_event_time, auto_create_push_source: false, schema_inference: SchemaInferenceOpts::default(), }, - listener.clone(), ) .await .map_err(CLIError::failure)?; + let result = self + .push_ingest_executor + .ingest_from_url(target, plan, url, listener.clone()) + .await + .map_err(CLIError::failure)?; + match result { PushIngestResult::UpToDate { .. } => (), PushIngestResult::Updated { .. } => updated += 1, diff --git a/src/app/cli/src/commands/repository_delete_command.rs b/src/app/cli/src/commands/repository_delete_command.rs index c98e54d2b..273aa9ab0 100644 --- a/src/app/cli/src/commands/repository_delete_command.rs +++ b/src/app/cli/src/commands/repository_delete_command.rs @@ -45,6 +45,17 @@ impl RepositoryDeleteCommand { #[async_trait::async_trait(?Send)] impl Command for RepositoryDeleteCommand { + async fn validate_args(&self) -> Result<(), CLIError> { + match (self.names.as_slice(), self.all) { + ([], false) => Err(CLIError::usage_error("Specify repository(s) or pass --all")), + ([], true) => Ok(()), + ([_head, ..], false) => Ok(()), + ([_head, ..], true) => Err(CLIError::usage_error( + "You can either specify repository(s) or pass --all", + )), + } + } + async fn run(&mut self) -> Result<(), CLIError> { let repo_names: Vec<_> = if self.all { self.remote_repo_reg.get_all_repositories().collect() @@ -58,9 +69,11 @@ impl Command for RepositoryDeleteCommand { }; if repo_names.is_empty() { - return Err(CLIError::usage_error( - "Specify a repository or use --all flag", - )); + eprintln!( + "{}", + console::style("There are no repositories to delete").yellow() + ); + return Ok(()); } self.interact.require_confirmation(format!( diff --git a/src/app/cli/src/commands/set_watermark_command.rs b/src/app/cli/src/commands/set_watermark_command.rs index 2e7f8256e..6129a9648 100644 --- a/src/app/cli/src/commands/set_watermark_command.rs +++ b/src/app/cli/src/commands/set_watermark_command.rs @@ -110,11 +110,16 @@ impl Command for SetWatermarkCommand { Ok(()) } Err( - e @ (SetWatermarkError::IsDerivative - | SetWatermarkError::IsRemote + e @ (SetWatermarkError::Planning( + SetWatermarkPlanningError::IsRemote | SetWatermarkPlanningError::IsDerivative, + ) | SetWatermarkError::Access(_)), ) => Err(CLIError::failure(e)), - Err(e @ SetWatermarkError::Internal(_)) => Err(CLIError::critical(e)), + Err( + e @ (SetWatermarkError::Execution(SetWatermarkExecutionError::Internal(_)) + | SetWatermarkError::Planning(SetWatermarkPlanningError::Internal(_)) + | SetWatermarkError::Internal(_)), + ) => Err(CLIError::critical(e)), } } } diff --git a/src/app/cli/src/commands/system_api_server_run_command.rs b/src/app/cli/src/commands/system_api_server_run_command.rs index a8abb052a..d97af051a 100644 --- a/src/app/cli/src/commands/system_api_server_run_command.rs +++ b/src/app/cli/src/commands/system_api_server_run_command.rs @@ -18,7 +18,9 @@ use internal_error::ResultIntoInternal; use kamu::domain::TenancyConfig; use kamu_accounts::*; use kamu_accounts_services::PasswordLoginCredentials; +use kamu_adapter_http::FileUploadLimitConfig; use kamu_adapter_oauth::*; +use kamu_datasets::DatasetEnvVarsConfig; use tracing::Instrument; use super::{CLIError, Command}; @@ -36,6 +38,8 @@ pub struct APIServerRunCommand { external_address: Option, get_token: bool, predefined_accounts_config: Arc, + file_upload_limit_config: Arc, + dataset_env_vars_config: Arc, account_subject: Arc, github_auth_config: Arc, e2e_output_data_path: Option, @@ -52,6 +56,8 @@ impl APIServerRunCommand { external_address: Option, get_token: bool, predefined_accounts_config: Arc, + file_upload_limit_config: Arc, + dataset_env_vars_config: Arc, account_subject: Arc, github_auth_config: Arc, e2e_output_data_path: Option, @@ -66,6 +72,8 @@ impl APIServerRunCommand { external_address, get_token, predefined_accounts_config, + file_upload_limit_config, + dataset_env_vars_config, account_subject, github_auth_config, e2e_output_data_path, @@ -138,6 +146,8 @@ impl Command for APIServerRunCommand { self.tenancy_config, self.address, self.port, + self.file_upload_limit_config.clone(), + self.dataset_env_vars_config.is_enabled(), self.external_address, self.e2e_output_data_path.as_ref(), ) diff --git a/src/app/cli/src/database.rs b/src/app/cli/src/database.rs index c4cfc81ef..54410dbc7 100644 --- a/src/app/cli/src/database.rs +++ b/src/app/cli/src/database.rs @@ -115,6 +115,7 @@ pub fn configure_database_components( b.add::(); b.add::(); + b.add::(); b.add::(); b.add::(); @@ -137,6 +138,7 @@ pub fn configure_database_components( b.add::(); b.add::(); + b.add::(); b.add::(); b.add::(); @@ -156,6 +158,7 @@ pub fn configure_database_components( b.add::(); b.add::(); + b.add::(); b.add::(); b.add::(); @@ -187,6 +190,7 @@ pub fn configure_in_memory_components(b: &mut CatalogBuilder) { b.add::(); b.add::(); b.add::(); + b.add::(); b.add::(); NoOpDatabasePlugin::init_database_components(b); diff --git a/src/app/cli/src/explore/api_server.rs b/src/app/cli/src/explore/api_server.rs index 987c552e9..0e926a05b 100644 --- a/src/app/cli/src/explore/api_server.rs +++ b/src/app/cli/src/explore/api_server.rs @@ -22,22 +22,25 @@ use indoc::indoc; use internal_error::*; use kamu::domain::{Protocols, ServerUrlConfig, TenancyConfig}; use kamu_adapter_http::e2e::e2e_router; -use kamu_flow_system_inmem::domain::FlowExecutor; -use kamu_task_system_inmem::domain::TaskExecutor; -use messaging_outbox::OutboxExecutor; +use kamu_adapter_http::FileUploadLimitConfig; +use kamu_flow_system_inmem::domain::FlowAgent; +use kamu_task_system_inmem::domain::TaskAgent; +use messaging_outbox::OutboxAgent; use tokio::sync::Notify; use url::Url; use utoipa_axum::router::OpenApiRouter; use utoipa_axum::routes; +use super::{UIConfiguration, UIFeatureFlags}; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub struct APIServer { server: axum::serve::Serve, axum::Router>, local_addr: SocketAddr, - task_executor: Arc, - flow_executor: Arc, - outbox_executor: Arc, + task_agent: Arc, + flow_agent: Arc, + outbox_agent: Arc, maybe_shutdown_notify: Option>, } @@ -48,16 +51,18 @@ impl APIServer { tenancy_config: TenancyConfig, address: Option, port: Option, + file_upload_limit_config: Arc, + enable_dataset_env_vars_management: bool, external_address: Option, e2e_output_data_path: Option<&PathBuf>, ) -> Result { // Background task executor must run with server privileges to execute tasks on // behalf of the system, as they are automatically scheduled - let task_executor = cli_catalog.get_one().unwrap(); + let task_agent = cli_catalog.get_one().unwrap(); - let flow_executor = cli_catalog.get_one().unwrap(); + let flow_agent = cli_catalog.get_one().unwrap(); - let outbox_executor = cli_catalog.get_one().unwrap(); + let outbox_agent = cli_catalog.get_one().unwrap(); let gql_schema = kamu_adapter_graphql::schema(); @@ -93,6 +98,16 @@ impl APIServer { })) .build(); + let ui_configuration = UIConfiguration { + ingest_upload_file_limit_mb: file_upload_limit_config.max_file_size_in_mb(), + feature_flags: UIFeatureFlags { + enable_logout: true, + enable_scheduling: true, + enable_dataset_env_vars_management, + enable_terms_of_service: true, + }, + }; + let mut router = OpenApiRouter::with_openapi( kamu_adapter_http::openapi::spec_builder( crate::app::VERSION, @@ -116,6 +131,10 @@ impl APIServer { .build(), ) .route("/", axum::routing::get(root)) + .route( + "/ui-config", + axum::routing::get(ui_configuration_handler), + ) .route( // IMPORTANT: The same name is used inside e2e_middleware_fn(). // If there is a need to change, please update there too. @@ -196,6 +215,7 @@ impl APIServer { let router = router .layer(Extension(gql_schema)) .layer(Extension(api_server_catalog)) + .layer(Extension(ui_configuration)) .layer(Extension(Arc::new(api))); let server = axum::serve(listener, router.into_make_service()); @@ -203,9 +223,9 @@ impl APIServer { Ok(Self { server, local_addr, - task_executor, - flow_executor, - outbox_executor, + task_agent, + flow_agent, + outbox_agent, maybe_shutdown_notify, }) } @@ -231,9 +251,9 @@ impl APIServer { tokio::select! { res = server_run_fut => { res.int_err() }, - res = self.outbox_executor.run() => { res.int_err() }, - res = self.task_executor.run() => { res.int_err() }, - res = self.flow_executor.run() => { res.int_err() } + res = self.outbox_agent.run() => { res.int_err() }, + res = self.task_agent.run() => { res.int_err() }, + res = self.flow_agent.run() => { res.int_err() } } } } @@ -256,6 +276,14 @@ async fn root() -> impl axum::response::IntoResponse { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +async fn ui_configuration_handler( + ui_configuration: axum::extract::Extension, +) -> axum::Json { + axum::Json(ui_configuration.0) +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[transactional_handler] async fn graphql_handler( Extension(schema): Extension, diff --git a/src/app/cli/src/explore/mod.rs b/src/app/cli/src/explore/mod.rs index bd35c03d1..1b3db3488 100644 --- a/src/app/cli/src/explore/mod.rs +++ b/src/app/cli/src/explore/mod.rs @@ -19,6 +19,9 @@ pub use notebook_server_impl::*; mod sql_shell_impl; pub use sql_shell_impl::*; +mod ui_configuration; +pub(crate) use ui_configuration::*; + #[cfg(feature = "web-ui")] mod web_ui_server; #[cfg(feature = "web-ui")] diff --git a/src/domain/core/src/services/dependency_graph_repository.rs b/src/app/cli/src/explore/ui_configuration.rs similarity index 58% rename from src/domain/core/src/services/dependency_graph_repository.rs rename to src/app/cli/src/explore/ui_configuration.rs index 79c5dc671..c30b543a2 100644 --- a/src/domain/core/src/services/dependency_graph_repository.rs +++ b/src/app/cli/src/explore/ui_configuration.rs @@ -7,27 +7,26 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use std::pin::Pin; - -use futures::Stream; -use internal_error::InternalError; -use opendatafabric::DatasetID; +use serde::Serialize; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[async_trait::async_trait] -pub trait DependencyGraphRepository: Sync + Send { - fn list_dependencies_of_all_datasets(&self) -> DatasetDependenciesIDStream; +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +pub(crate) struct UIConfiguration { + pub(crate) ingest_upload_file_limit_mb: usize, + pub(crate) feature_flags: UIFeatureFlags, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub struct DatasetDependencies { - pub downstream_dataset_id: DatasetID, - pub upstream_dataset_ids: Vec, +#[derive(Debug, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +pub(crate) struct UIFeatureFlags { + pub(crate) enable_logout: bool, + pub(crate) enable_scheduling: bool, + pub(crate) enable_dataset_env_vars_management: bool, + pub(crate) enable_terms_of_service: bool, } -pub type DatasetDependenciesIDStream<'a> = - Pin> + Send + 'a>>; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/app/cli/src/explore/web_ui_server.rs b/src/app/cli/src/explore/web_ui_server.rs index bd70c022f..e1a679aaf 100644 --- a/src/app/cli/src/explore/web_ui_server.rs +++ b/src/app/cli/src/explore/web_ui_server.rs @@ -34,6 +34,8 @@ use url::Url; use utoipa_axum::router::OpenApiRouter; use utoipa_axum::routes; +use super::{UIConfiguration, UIFeatureFlags}; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[derive(RustEmbed)] @@ -44,12 +46,10 @@ struct HttpRoot; #[derive(Debug, Clone, Serialize)] #[serde(rename_all = "camelCase")] -struct WebUIConfig { +struct WebUIRuntimeConfiguration { api_server_gql_url: String, api_server_http_url: String, - ingest_upload_file_limit_mb: usize, login_instructions: Option, - feature_flags: WebUIFeatureFlags, } #[derive(Debug, Clone, Serialize)] @@ -59,17 +59,6 @@ struct WebUILoginInstructions { login_credentials_json: String, } -#[derive(Debug, Clone, Serialize)] -#[serde(rename_all = "camelCase")] -struct WebUIFeatureFlags { - enable_logout: bool, - enable_scheduling: bool, - // TODO: Correct a typo in `WebUIFeatureFlags` - // (content of `assets/runtime-config.json`) - // https://github.com/kamu-data/kamu-cli/issues/841 - enable_dataset_env_vars_managment: bool, -} - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub struct WebUIServer { @@ -117,20 +106,21 @@ impl WebUIServer { let web_ui_url = format!("http://{local_addr}"); - let web_ui_config = WebUIConfig { + let web_ui_runtime_configuration = WebUIRuntimeConfiguration { api_server_gql_url: format!("http://{local_addr}/graphql"), api_server_http_url: web_ui_url.clone(), login_instructions: Some(login_instructions.clone()), + }; + + let ui_configuration = UIConfiguration { ingest_upload_file_limit_mb: file_upload_limit_config.max_file_size_in_mb(), - feature_flags: WebUIFeatureFlags { + feature_flags: UIFeatureFlags { // No way to log out, always logging in a predefined user enable_logout: false, // No way to configure scheduling of datasets enable_scheduling: false, - // TODO: Correct a typo in `WebUIFeatureFlags` - // (content of `assets/runtime-config.json`) - // https://github.com/kamu-data/kamu-cli/issues/841 - enable_dataset_env_vars_managment: enable_dataset_env_vars_management, + enable_dataset_env_vars_management, + enable_terms_of_service: true, }, }; @@ -156,8 +146,9 @@ impl WebUIServer { ) .route( "/assets/runtime-config.json", - axum::routing::get(runtime_config_handler), + axum::routing::get(runtime_configuration_handler), ) + .route("/ui-config", axum::routing::get(ui_configuration_handler)) .route( "/graphql", axum::routing::get(graphql_playground_handler).post(graphql_handler), @@ -211,7 +202,8 @@ impl WebUIServer { .merge(kamu_adapter_http::openapi::router().into()) .layer(axum::extract::Extension(web_ui_catalog)) .layer(axum::extract::Extension(gql_schema)) - .layer(axum::extract::Extension(web_ui_config)) + .layer(axum::extract::Extension(web_ui_runtime_configuration)) + .layer(axum::extract::Extension(ui_configuration)) .split_for_parts(); let server = axum::serve( @@ -282,10 +274,18 @@ async fn app_handler(uri: Uri) -> impl IntoResponse { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -async fn runtime_config_handler( - web_ui_config: axum::extract::Extension, -) -> axum::Json { - axum::Json(web_ui_config.0) +async fn runtime_configuration_handler( + web_ui_runtime_configuration: axum::extract::Extension, +) -> axum::Json { + axum::Json(web_ui_runtime_configuration.0) +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +async fn ui_configuration_handler( + ui_configuration: axum::extract::Extension, +) -> axum::Json { + axum::Json(ui_configuration.0) } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/app/cli/src/output/compact_progress.rs b/src/app/cli/src/output/compact_progress.rs index 5e780bc21..5f0eb0243 100644 --- a/src/app/cli/src/output/compact_progress.rs +++ b/src/app/cli/src/output/compact_progress.rs @@ -102,7 +102,7 @@ impl CompactionListener for CompactionProgress { .set_message(self.spinner_message("Compaction dataset")); } - fn success(&self, res: &CompactionResult) { + fn execute_success(&self, res: &CompactionResult) { match res { CompactionResult::NothingToDo => { self.curr_progress.finish_with_message( diff --git a/src/app/cli/src/services/confirm_delete_service.rs b/src/app/cli/src/services/confirm_delete_service.rs index d7fc0eb42..08919d13a 100644 --- a/src/app/cli/src/services/confirm_delete_service.rs +++ b/src/app/cli/src/services/confirm_delete_service.rs @@ -30,6 +30,7 @@ impl ConfirmDeleteService { } } + #[tracing::instrument(level = "debug", skip_all, fields(?dataset_handles))] pub async fn confirm_delete(&self, dataset_handles: &[DatasetHandle]) -> Result<(), CLIError> { for hdl in dataset_handles { let statuses = self.push_status_service.check_remotes_status(hdl).await?; @@ -59,6 +60,7 @@ impl ConfirmDeleteService { } let all_synced = out_of_sync.is_empty() && unknown.is_empty(); + tracing::debug!(%all_synced, ?out_of_sync, ?unknown, "Checking remote status finished"); if !all_synced { eprintln!( diff --git a/src/domain/accounts/domain/Cargo.toml b/src/domain/accounts/domain/Cargo.toml index 8fca119f4..fe320c9a5 100644 --- a/src/domain/accounts/domain/Cargo.toml +++ b/src/domain/accounts/domain/Cargo.toml @@ -45,7 +45,7 @@ rand = "0.8" reusable = "0.1" serde = "1" serde_with = { version = "3", default-features = false } -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false, features = ["std"] } uuid = { version = "1", default-features = false, features = ["v4"] } # Optional diff --git a/src/domain/auth-rebac/domain/Cargo.toml b/src/domain/auth-rebac/domain/Cargo.toml index 3cd1a36f8..200335053 100644 --- a/src/domain/auth-rebac/domain/Cargo.toml +++ b/src/domain/auth-rebac/domain/Cargo.toml @@ -31,7 +31,7 @@ opendatafabric = { workspace = true } async-trait = "0.1" strum = { version = "0.26", features = ["derive"] } -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false, features = ["std"] } # Optional sqlx = { optional = true, version = "0.8", default-features = false, features = [ diff --git a/src/domain/core/Cargo.toml b/src/domain/core/Cargo.toml index 70c340b26..ec81ca034 100644 --- a/src/domain/core/Cargo.toml +++ b/src/domain/core/Cargo.toml @@ -45,13 +45,14 @@ futures = { version = "0.3", default-features = false } http = { version = "1" } pathdiff = { version = "0.2", default-features = false } pin-project = { version = "1", default-features = false } -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false, features = ["std"] } tokio = { version = "1", default-features = false } tokio-stream = { version = "0.1", default-features = false } +tracing = { version = "0.1", default-features = false } url = { version = "2", default-features = false, features = ["serde"] } # TODO: Avoid this dependency or depend on sub-crates -datafusion = { version = "42", default-features = false, features = [ +datafusion = { version = "43", default-features = false, features = [ "parquet", ] } object_store = { version = "0.11", default-features = false } diff --git a/src/domain/core/src/entities/metadata_chain.rs b/src/domain/core/src/entities/metadata_chain.rs index b63c6727c..9d130acab 100644 --- a/src/domain/core/src/entities/metadata_chain.rs +++ b/src/domain/core/src/entities/metadata_chain.rs @@ -162,17 +162,17 @@ pub trait MetadataChainExt: MetadataChain { async fn accept_by_ref( &self, visitors: &mut [&mut dyn MetadataChainVisitor], - head: &BlockRef, + block_ref: &BlockRef, ) -> Result<(), AcceptVisitorError> where E: Error + Send, { - let head_hash = self - .resolve_ref(head) + let block_ref_hash = self + .resolve_ref(block_ref) .await .map_err(IterBlocksError::from)?; - self.accept_by_hash(visitors, &head_hash).await + self.accept_by_hash(visitors, &block_ref_hash).await } /// Same as [Self::accept()], allowing us to define the block interval under diff --git a/src/domain/core/src/entities/mod.rs b/src/domain/core/src/entities/mod.rs index ae32ef0e8..90b8e1737 100644 --- a/src/domain/core/src/entities/mod.rs +++ b/src/domain/core/src/entities/mod.rs @@ -7,13 +7,15 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -pub mod dataset; -pub mod dataset_summary; +mod dataset; +mod dataset_summary; pub mod engine; -pub mod metadata_chain; -pub mod metadata_stream; -pub mod resolved_dataset; -pub mod resolved_datasets_map; +mod metadata_chain; +mod metadata_stream; +mod resolved_dataset; +mod resolved_datasets_map; +mod writer_metadata_state; +mod writer_source_visitor; pub use dataset::*; pub use dataset_summary::*; @@ -21,3 +23,5 @@ pub use metadata_chain::*; pub use metadata_stream::*; pub use resolved_dataset::*; pub use resolved_datasets_map::*; +pub use writer_metadata_state::*; +pub use writer_source_visitor::*; diff --git a/src/domain/core/src/entities/writer_metadata_state.rs b/src/domain/core/src/entities/writer_metadata_state.rs new file mode 100644 index 000000000..031de8f34 --- /dev/null +++ b/src/domain/core/src/entities/writer_metadata_state.rs @@ -0,0 +1,209 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use chrono::{DateTime, Utc}; +use internal_error::{ErrorIntoInternal, InternalError, ResultIntoInternal}; +use opendatafabric as odf; + +use super::{ + AcceptVisitorError, + BlockRef, + MetadataChainExt, + ResolvedDataset, + WriterSourceEventVisitor, +}; +use crate::{ + GenericCallbackVisitor, + MetadataChainVisitorExtInfallible, + MetadataVisitorDecision, + PushSourceNotFoundError, + SearchAddDataVisitor, + SearchSeedVisitor, + SearchSetDataSchemaVisitor, + SearchSetVocabVisitor, + SearchSourceStateVisitor, +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Contains a projection of the metadata needed for [`DataWriter`] to function +#[derive(Debug, Clone)] +pub struct DataWriterMetadataState { + pub block_ref: BlockRef, + pub head: odf::Multihash, + pub schema: Option, + pub source_event: Option, + pub merge_strategy: odf::MergeStrategy, + pub vocab: odf::DatasetVocabulary, + pub data_slices: Vec, + pub prev_offset: Option, + pub prev_checkpoint: Option, + pub prev_watermark: Option>, + pub prev_source_state: Option, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +impl DataWriterMetadataState { + /// Scans metadata chain to populate the needed metadata + /// + /// * `source_name` - name of the source to use when extracting the metadata + /// needed for writing. Leave empty for polling sources or to use the only + /// push source defined when there is no ambiguity. + #[tracing::instrument( + level = "debug", + name="DataWriterMetadataState::build", + skip_all, + fields(target=%target.get_handle(), %block_ref, ?source_name) + )] + pub async fn build( + target: ResolvedDataset, + block_ref: &BlockRef, + source_name: Option<&str>, + ) -> Result { + // TODO: PERF: Full metadata scan below - this is expensive and should be + // improved using skip lists. + + let head = target + .as_metadata_chain() + .resolve_ref(block_ref) + .await + .int_err()?; + let mut seed_visitor = SearchSeedVisitor::new().adapt_err(); + let mut set_vocab_visitor = SearchSetVocabVisitor::new().adapt_err(); + let mut set_data_schema_visitor = SearchSetDataSchemaVisitor::new().adapt_err(); + let mut prev_source_state_visitor = SearchSourceStateVisitor::new(source_name).adapt_err(); + let mut add_data_visitor = SearchAddDataVisitor::new().adapt_err(); + let mut add_data_collection_visitor = GenericCallbackVisitor::new( + Vec::new(), + MetadataVisitorDecision::NextOfType(odf::MetadataEventTypeFlags::ADD_DATA), + |state, _, block| { + let odf::MetadataEvent::AddData(e) = &block.event else { + unreachable!() + }; + + if let Some(output_data) = &e.new_data { + state.push(output_data.physical_hash.clone()); + } + + MetadataVisitorDecision::NextOfType(odf::MetadataEventTypeFlags::ADD_DATA) + }, + ) + .adapt_err(); + let mut source_event_visitor = WriterSourceEventVisitor::new(source_name); + + target + .as_metadata_chain() + .accept_by_hash( + &mut [ + &mut source_event_visitor, + &mut seed_visitor, + &mut set_vocab_visitor, + &mut add_data_visitor, + &mut set_data_schema_visitor, + &mut prev_source_state_visitor, + &mut add_data_collection_visitor, + ], + &head, + ) + .await?; + + { + let seed = seed_visitor + .into_inner() + .into_event() + .expect("Dataset without blocks"); + + assert_eq!(seed.dataset_kind, odf::DatasetKind::Root); + } + + let (source_event, merge_strategy) = + source_event_visitor.get_source_event_and_merge_strategy()?; + let (prev_offset, prev_watermark, prev_checkpoint) = { + match add_data_visitor.into_inner().into_event() { + Some(e) => ( + e.last_offset(), + e.new_watermark, + e.new_checkpoint.map(|cp| cp.physical_hash), + ), + None => (None, None, None), + } + }; + Ok(Self { + block_ref: block_ref.clone(), + head, + schema: set_data_schema_visitor.into_inner().into_event(), + source_event, + merge_strategy, + vocab: set_vocab_visitor + .into_inner() + .into_event() + .unwrap_or_default() + .into(), + data_slices: add_data_collection_visitor.into_inner().into_state(), + prev_offset, + prev_checkpoint, + prev_watermark, + prev_source_state: prev_source_state_visitor.into_inner().into_state(), + }) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, thiserror::Error)] +pub enum ScanMetadataError { + #[error(transparent)] + SourceNotFound( + #[from] + #[backtrace] + SourceNotFoundError, + ), + #[error(transparent)] + Internal( + #[from] + #[backtrace] + InternalError, + ), +} + +impl From> for ScanMetadataError { + fn from(v: AcceptVisitorError) -> Self { + match v { + AcceptVisitorError::Visitor(err) => err, + AcceptVisitorError::Traversal(err) => Self::Internal(err.int_err()), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, thiserror::Error)] +#[error("{message}")] +pub struct SourceNotFoundError { + pub source_name: Option, + message: String, +} + +impl SourceNotFoundError { + pub fn new(source_name: Option>, message: impl Into) -> Self { + Self { + source_name: source_name.map(std::convert::Into::into), + message: message.into(), + } + } +} + +impl From for PushSourceNotFoundError { + fn from(val: SourceNotFoundError) -> Self { + PushSourceNotFoundError::new(val.source_name) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/ingest-datafusion/src/visitor.rs b/src/domain/core/src/entities/writer_source_visitor.rs similarity index 96% rename from src/infra/ingest-datafusion/src/visitor.rs rename to src/domain/core/src/entities/writer_source_visitor.rs index cf90aacf9..a76a09cef 100644 --- a/src/infra/ingest-datafusion/src/visitor.rs +++ b/src/domain/core/src/entities/writer_source_visitor.rs @@ -7,11 +7,6 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use kamu_core::{ - HashedMetadataBlockRef, - MetadataChainVisitor, - MetadataVisitorDecision as Decision, -}; use opendatafabric::{ AddPushSource, MergeStrategy, @@ -21,17 +16,23 @@ use opendatafabric::{ SetPollingSource, }; -use crate::{ScanMetadataError, SourceNotFoundError}; +use crate::{ + HashedMetadataBlockRef, + MetadataChainVisitor, + MetadataVisitorDecision as Decision, + ScanMetadataError, + SourceNotFoundError, +}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub struct SourceEventVisitor<'a> { +pub struct WriterSourceEventVisitor<'a> { maybe_source_name: Option<&'a str>, next_block_flags: Flag, maybe_source_event: Option, } -impl<'a> SourceEventVisitor<'a> { +impl<'a> WriterSourceEventVisitor<'a> { pub fn new(maybe_source_name: Option<&'a str>) -> Self { const INITIAL_NEXT_BLOCK_FLAGS: Flag = Flag::SET_POLLING_SOURCE .union(Flag::DISABLE_POLLING_SOURCE) @@ -106,7 +107,7 @@ impl<'a> SourceEventVisitor<'a> { } } -impl<'a> MetadataChainVisitor for SourceEventVisitor<'a> { +impl<'a> MetadataChainVisitor for WriterSourceEventVisitor<'a> { type Error = ScanMetadataError; fn initial_decision(&self) -> Decision { diff --git a/src/domain/core/src/messages/core_message_consumers.rs b/src/domain/core/src/messages/core_message_consumers.rs index 758dac0b5..ec02486e5 100644 --- a/src/domain/core/src/messages/core_message_consumers.rs +++ b/src/domain/core/src/messages/core_message_consumers.rs @@ -12,7 +12,4 @@ pub const MESSAGE_CONSUMER_KAMU_CORE_DATASET_OWNERSHIP_SERVICE: &str = "dev.kamu.domain.core.services.DatasetOwnershipService"; -pub const MESSAGE_CONSUMER_KAMU_CORE_DEPENDENCY_GRAPH_SERVICE: &str = - "dev.kamu.domain.core.services.DependencyGraphService"; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/core/src/messages/core_message_types.rs b/src/domain/core/src/messages/core_message_types.rs index bf7029297..c37b0316e 100644 --- a/src/domain/core/src/messages/core_message_types.rs +++ b/src/domain/core/src/messages/core_message_types.rs @@ -23,8 +23,8 @@ const DATASET_LIFECYCLE_OUTBOX_VERSION: u32 = 1; pub enum DatasetLifecycleMessage { Created(DatasetLifecycleMessageCreated), DependenciesUpdated(DatasetLifecycleMessageDependenciesUpdated), - Deleted(DatasetLifecycleMessageDeleted), Renamed(DatasetLifecycleMessageRenamed), + Deleted(DatasetLifecycleMessageDeleted), } impl DatasetLifecycleMessage { diff --git a/src/domain/core/src/services/compaction/compaction_executor.rs b/src/domain/core/src/services/compaction/compaction_executor.rs new file mode 100644 index 000000000..1bbb2f4d0 --- /dev/null +++ b/src/domain/core/src/services/compaction/compaction_executor.rs @@ -0,0 +1,71 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::sync::Arc; + +use internal_error::{ErrorIntoInternal, InternalError}; +use opendatafabric as odf; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +use crate::{AccessError, CompactionListener, CompactionPlan, ResolvedDataset, SetRefError}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +pub trait CompactionExecutor: Send + Sync { + async fn execute( + &self, + target: ResolvedDataset, + plan: CompactionPlan, + maybe_listener: Option>, + ) -> Result; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] +pub enum CompactionResult { + NothingToDo, + Success { + old_head: odf::Multihash, + new_head: odf::Multihash, + old_num_blocks: usize, + new_num_blocks: usize, + }, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Error)] +pub enum CompactionExecutionError { + #[error(transparent)] + Access( + #[from] + #[backtrace] + AccessError, + ), + + #[error(transparent)] + Internal(#[from] InternalError), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +impl From for CompactionExecutionError { + fn from(v: SetRefError) -> Self { + match v { + SetRefError::Access(e) => Self::Access(e), + SetRefError::Internal(e) => Self::Internal(e), + _ => Self::Internal(v.int_err()), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/core/src/services/compaction/compaction_listener.rs b/src/domain/core/src/services/compaction/compaction_listener.rs new file mode 100644 index 000000000..1e610373a --- /dev/null +++ b/src/domain/core/src/services/compaction/compaction_listener.rs @@ -0,0 +1,55 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::sync::Arc; + +use opendatafabric as odf; + +use super::{CompactionExecutionError, CompactionPlan, CompactionPlanningError}; +use crate::CompactionResult; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub trait CompactionListener: Send + Sync { + fn begin(&self) {} + + fn plan_success(&self, _plan: &CompactionPlan) {} + fn execute_success(&self, _res: &CompactionResult) {} + + fn plan_error(&self, _err: &CompactionPlanningError) {} + fn execute_error(&self, _err: &CompactionExecutionError) {} + + fn begin_phase(&self, _phase: CompactionPhase) {} + fn end_phase(&self, _phase: CompactionPhase) {} +} + +pub struct NullCompactionListener; +impl CompactionListener for NullCompactionListener {} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub trait CompactionMultiListener: Send + Sync { + fn begin_compact(&self, _dataset: &odf::DatasetHandle) -> Option> { + None + } +} + +pub struct NullCompactionMultiListener; +impl CompactionMultiListener for NullCompactionMultiListener {} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CompactionPhase { + GatherChainInfo, + MergeDataslices, + CommitNewBlocks, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/core/src/services/compaction_service.rs b/src/domain/core/src/services/compaction/compaction_planner.rs similarity index 51% rename from src/domain/core/src/services/compaction_service.rs rename to src/domain/core/src/services/compaction/compaction_planner.rs index 30e742007..4a8a56ebc 100644 --- a/src/domain/core/src/services/compaction_service.rs +++ b/src/domain/core/src/services/compaction/compaction_planner.rs @@ -9,12 +9,14 @@ use std::sync::Arc; -use ::serde::{Deserialize, Serialize}; +use chrono::{DateTime, Utc}; use internal_error::{ErrorIntoInternal, InternalError}; -use opendatafabric::*; +use opendatafabric as odf; +use serde::{Deserialize, Serialize}; use thiserror::Error; +use url::Url; -use crate::*; +use crate::{AccessError, CompactionListener, GetRefError, IterBlocksError, ResolvedDataset}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -24,167 +26,134 @@ pub const DEFAULT_MAX_SLICE_RECORDS: u64 = 10_000; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -pub trait CompactionService: Send + Sync { - async fn compact_dataset( +pub trait CompactionPlanner: Send + Sync { + async fn plan_compaction( &self, target: ResolvedDataset, options: CompactionOptions, - listener: Option>, - ) -> Result; + maybe_listener: Option>, + ) -> Result; } -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Errors //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Error)] -pub enum CompactionError { - #[error(transparent)] - DatasetNotFound( - #[from] - #[backtrace] - DatasetNotFoundError, - ), - #[error(transparent)] - Access( - #[from] - #[backtrace] - AccessError, - ), - #[error(transparent)] - Internal( - #[from] - #[backtrace] - InternalError, - ), - #[error(transparent)] - InvalidDatasetKind( - #[from] - #[backtrace] - InvalidDatasetKindError, - ), +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct CompactionOptions { + pub max_slice_size: Option, + pub max_slice_records: Option, + pub keep_metadata_only: bool, } -impl From for CompactionError { - fn from(v: GetDatasetError) -> Self { - match v { - GetDatasetError::NotFound(e) => Self::DatasetNotFound(e), - GetDatasetError::Internal(e) => Self::Internal(e), +impl Default for CompactionOptions { + fn default() -> Self { + Self { + max_slice_size: Some(DEFAULT_MAX_SLICE_SIZE), + max_slice_records: Some(DEFAULT_MAX_SLICE_RECORDS), + keep_metadata_only: false, } } } -impl From for CompactionError { - fn from(v: auth::DatasetActionUnauthorizedError) -> Self { - match v { - auth::DatasetActionUnauthorizedError::Access(e) => Self::Access(e), - auth::DatasetActionUnauthorizedError::Internal(e) => Self::Internal(e), - } - } -} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -impl From for CompactionError { - fn from(v: GetRefError) -> Self { - match v { - GetRefError::NotFound(e) => Self::Internal(e.int_err()), - GetRefError::Access(e) => Self::Access(e), - GetRefError::Internal(e) => Self::Internal(e), - } - } +#[derive(Debug)] +pub struct CompactionPlan { + pub seed: odf::Multihash, + pub old_head: odf::Multihash, + pub old_num_blocks: usize, + pub offset_column_name: String, + pub data_slice_batches: Vec, } -impl From for CompactionError { - fn from(v: IterBlocksError) -> Self { - match v { - IterBlocksError::Access(e) => CompactionError::Access(e), - IterBlocksError::Internal(e) => CompactionError::Internal(e), - _ => CompactionError::Internal(v.int_err()), - } +impl CompactionPlan { + pub fn has_no_effect(&self) -> bool { + // slices amount +1(seed block) eq to amount of blocks we should not compact + self.data_slice_batches.len() + 1 == self.old_num_blocks } } -impl From for CompactionError { - fn from(v: SetRefError) -> Self { - match v { - SetRefError::Access(e) => CompactionError::Access(e), - SetRefError::Internal(e) => CompactionError::Internal(e), - _ => CompactionError::Internal(v.int_err()), - } - } +#[allow(clippy::large_enum_variant)] +#[derive(Debug)] +pub enum CompactionDataSliceBatch { + CompactedBatch(CompactionDataSliceBatchInfo), + // Hash of block will not be None value in case + // when we will get only one block in batch + // and will be used tp not rewriting such blocks + SingleBlock(odf::Multihash), } -#[derive(Error, Debug)] -#[error("Dataset '{dataset_alias}' in not root kind")] -pub struct InvalidDatasetKindError { - pub dataset_alias: DatasetAlias, +#[derive(Debug, Default, Clone)] +pub struct CompactionDataSliceBatchInfo { + pub data_slices_batch: Vec, + pub upper_bound: CompactionDataSliceBatchUpperBound, + pub lower_bound: CompactionDataSliceBatchLowerBound, } -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Progress bar -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -pub trait CompactionListener: Send + Sync { - fn begin(&self) {} - fn success(&self, _res: &CompactionResult) {} - fn error(&self, _err: &CompactionError) {} - - fn begin_phase(&self, _phase: CompactionPhase) {} - fn end_phase(&self, _phase: CompactionPhase) {} +#[derive(Debug, Default, Clone)] +pub struct CompactionDataSliceBatchUpperBound { + pub new_source_state: Option, + pub new_watermark: Option>, + pub new_checkpoint: Option, + pub end_offset: u64, } -pub struct NullCompactionListener; -impl CompactionListener for NullCompactionListener {} +#[derive(Debug, Default, Clone)] +pub struct CompactionDataSliceBatchLowerBound { + pub prev_offset: Option, + pub prev_checkpoint: Option, + pub start_offset: u64, +} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub trait CompactionMultiListener: Send + Sync { - fn begin_compact(&self, _dataset: &DatasetHandle) -> Option> { - None - } -} +#[derive(Debug, Error)] +pub enum CompactionPlanningError { + #[error(transparent)] + InvalidDatasetKind( + #[from] + #[backtrace] + InvalidDatasetKindError, + ), -pub struct NullCompactionMultiListener; -impl CompactionMultiListener for NullCompactionMultiListener {} + #[error(transparent)] + Access( + #[from] + #[backtrace] + AccessError, + ), -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum CompactionPhase { - GatherChainInfo, - MergeDataslices, - CommitNewBlocks, + #[error(transparent)] + Internal(#[from] InternalError), } -#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] -pub enum CompactionResult { - NothingToDo, - Success { - old_head: Multihash, - new_head: Multihash, - old_num_blocks: usize, - new_num_blocks: usize, - }, -} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug)] -pub struct CompactionResponse { - pub dataset_ref: DatasetRef, - pub result: Result, +#[derive(Error, Debug)] +#[error("Dataset '{dataset_alias}' in not root kind")] +pub struct InvalidDatasetKindError { + pub dataset_alias: odf::DatasetAlias, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub struct CompactionOptions { - pub max_slice_size: Option, - pub max_slice_records: Option, - pub keep_metadata_only: bool, +impl From for CompactionPlanningError { + fn from(v: GetRefError) -> Self { + match v { + GetRefError::NotFound(e) => Self::Internal(e.int_err()), + GetRefError::Access(e) => Self::Access(e), + GetRefError::Internal(e) => Self::Internal(e), + } + } } -impl Default for CompactionOptions { - fn default() -> Self { - Self { - max_slice_size: Some(DEFAULT_MAX_SLICE_SIZE), - max_slice_records: Some(DEFAULT_MAX_SLICE_RECORDS), - keep_metadata_only: false, +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +impl From for CompactionPlanningError { + fn from(v: IterBlocksError) -> Self { + match v { + IterBlocksError::Access(e) => Self::Access(e), + IterBlocksError::Internal(e) => Self::Internal(e), + _ => CompactionPlanningError::Internal(v.int_err()), } } } diff --git a/src/domain/core/src/services/compaction/mod.rs b/src/domain/core/src/services/compaction/mod.rs new file mode 100644 index 000000000..92940d671 --- /dev/null +++ b/src/domain/core/src/services/compaction/mod.rs @@ -0,0 +1,16 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +mod compaction_executor; +mod compaction_listener; +mod compaction_planner; + +pub use compaction_executor::*; +pub use compaction_listener::*; +pub use compaction_planner::*; diff --git a/src/domain/core/src/services/dependency_graph_service.rs b/src/domain/core/src/services/dependency_graph_service.rs index 2dfc25831..ae3d47822 100644 --- a/src/domain/core/src/services/dependency_graph_service.rs +++ b/src/domain/core/src/services/dependency_graph_service.rs @@ -12,19 +12,10 @@ use opendatafabric::DatasetID; use thiserror::Error; use tokio_stream::Stream; -use crate::DependencyGraphRepository; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] pub trait DependencyGraphService: Sync + Send { - /// Forces initialization of graph data, if it wasn't initialized already. - /// Ignored if called multiple times - async fn eager_initialization( - &self, - repository: &dyn DependencyGraphRepository, - ) -> Result<(), InternalError>; - /// Iterates over 1st level of dataset's downstream dependencies async fn get_downstream_dependencies( &self, @@ -68,6 +59,7 @@ pub type DatasetIDStream<'a> = std::pin::Pin + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[derive(Debug)] pub enum DependencyOrder { BreadthFirst, DepthFirst, diff --git a/src/domain/core/src/services/ingest/mod.rs b/src/domain/core/src/services/ingest/mod.rs index dbec7de9d..a56984b36 100644 --- a/src/domain/core/src/services/ingest/mod.rs +++ b/src/domain/core/src/services/ingest/mod.rs @@ -11,12 +11,14 @@ mod data_format_registry; mod data_writer; mod merge_strategy; mod polling_ingest_service; -mod push_ingest_service; +mod push_ingest_executor; +mod push_ingest_planner; mod reader; pub use data_format_registry::*; pub use data_writer::*; pub use merge_strategy::*; pub use polling_ingest_service::*; -pub use push_ingest_service::*; +pub use push_ingest_executor::*; +pub use push_ingest_planner::*; pub use reader::*; diff --git a/src/domain/core/src/services/ingest/polling_ingest_service.rs b/src/domain/core/src/services/ingest/polling_ingest_service.rs index 091310f48..ed9332285 100644 --- a/src/domain/core/src/services/ingest/polling_ingest_service.rs +++ b/src/domain/core/src/services/ingest/polling_ingest_service.rs @@ -28,17 +28,12 @@ use crate::*; #[async_trait::async_trait] pub trait PollingIngestService: Send + Sync { - /// Returns an active polling source, if any - async fn get_active_polling_source( - &self, - target: ResolvedDataset, - ) -> Result)>, GetDatasetError>; - /// Uses polling source definition in metadata to ingest data from an /// external source async fn ingest( &self, target: ResolvedDataset, + metadata_state: Box, options: PollingIngestOptions, maybe_listener: Option>, ) -> Result; diff --git a/src/domain/core/src/services/ingest/push_ingest_service.rs b/src/domain/core/src/services/ingest/push_ingest_executor.rs similarity index 66% rename from src/domain/core/src/services/ingest/push_ingest_service.rs rename to src/domain/core/src/services/ingest/push_ingest_executor.rs index f2e6fa7c3..d0c1ba2ec 100644 --- a/src/domain/core/src/services/ingest/push_ingest_service.rs +++ b/src/domain/core/src/services/ingest/push_ingest_executor.rs @@ -9,7 +9,6 @@ use std::sync::Arc; -use chrono::{DateTime, Utc}; use internal_error::InternalError; use opendatafabric::*; use thiserror::Error; @@ -22,13 +21,7 @@ use crate::*; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -pub trait PushIngestService: Send + Sync { - /// Returns the set of active push sources - async fn get_active_push_sources( - &self, - target: ResolvedDataset, - ) -> Result)>, GetDatasetError>; - +pub trait PushIngestExecutor: Send + Sync { /// Uses push source definition in metadata to ingest data from the /// specified source. /// @@ -36,9 +29,8 @@ pub trait PushIngestService: Send + Sync { async fn ingest_from_url( &self, target: ResolvedDataset, - source_name: Option<&str>, + plan: PushIngestPlan, url: url::Url, - opts: PushIngestOpts, listener: Option>, ) -> Result; @@ -46,32 +38,17 @@ pub trait PushIngestService: Send + Sync { /// in-band as a file stream. /// /// See also [MediaType]. - async fn ingest_from_file_stream( + async fn ingest_from_stream( &self, target: ResolvedDataset, - source_name: Option<&str>, + plan: PushIngestPlan, data: Box, - opts: PushIngestOpts, listener: Option>, ) -> Result; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -#[derive(Debug, Default)] -pub struct PushIngestOpts { - /// MIME type of the content - pub media_type: Option, - /// Event time to use if data does not contain such column itself - pub source_event_time: Option>, - /// Whether to automatically create a push source if it doesn't exist - pub auto_create_push_source: bool, - /// Schema inference configuration - pub schema_inference: SchemaInferenceOpts, -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - #[derive(Debug)] pub enum PushIngestResult { UpToDate, @@ -121,13 +98,6 @@ impl PushIngestListener for NullPushIngestListener {} // TODO: Revisit error granularity #[derive(Debug, Error)] pub enum PushIngestError { - #[error(transparent)] - SourceNotFound( - #[from] - #[backtrace] - PushSourceNotFoundError, - ), - #[error(transparent)] UnsupportedMediaType( #[from] @@ -193,41 +163,3 @@ pub enum PushIngestError { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Debug, Error, Default)] -pub struct PushSourceNotFoundError { - source_name: Option, -} - -impl PushSourceNotFoundError { - pub fn new(source_name: Option>) -> Self { - Self { - source_name: source_name.map(Into::into), - } - } -} - -impl std::fmt::Display for PushSourceNotFoundError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match &self.source_name { - None => write!( - f, - "Dataset does not define a default push source, consider specifying the source \ - name" - ), - Some(s) => write!(f, "Dataset does not define a push source '{s}'"), - } - } -} - -#[derive(Debug, Error)] -#[error("Unsupported media type {media_type}")] -pub struct UnsupportedMediaTypeError { - pub media_type: MediaType, -} - -impl UnsupportedMediaTypeError { - pub fn new(media_type: MediaType) -> Self { - Self { media_type } - } -} diff --git a/src/domain/core/src/services/ingest/push_ingest_planner.rs b/src/domain/core/src/services/ingest/push_ingest_planner.rs new file mode 100644 index 000000000..b244eac16 --- /dev/null +++ b/src/domain/core/src/services/ingest/push_ingest_planner.rs @@ -0,0 +1,145 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::path::PathBuf; + +use chrono::{DateTime, Utc}; +use internal_error::InternalError; +use opendatafabric as odf; +use thiserror::Error; + +use crate::{ + CommitError, + DataWriterMetadataState, + MediaType, + ResolvedDataset, + SchemaInferenceOpts, +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +pub trait PushIngestPlanner: Send + Sync { + /// Uses or auto-creates push source definition in metadata to plan + /// ingestion + async fn plan_ingest( + &self, + target: ResolvedDataset, + source_name: Option<&str>, + opts: PushIngestOpts, + ) -> Result; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Default)] +pub struct PushIngestOpts { + /// MIME type of the content + pub media_type: Option, + /// Event time to use if data does not contain such column itself + pub source_event_time: Option>, + /// Whether to automatically create a push source if it doesn't exist + pub auto_create_push_source: bool, + /// Schema inference configuration + pub schema_inference: SchemaInferenceOpts, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug)] +pub struct PushIngestPlan { + pub args: PushIngestArgs, + pub metadata_state: Box, +} + +#[derive(Debug)] +pub struct PushIngestArgs { + pub operation_id: String, + pub operation_dir: PathBuf, + pub system_time: DateTime, + pub opts: PushIngestOpts, + pub push_source: odf::AddPushSource, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Error)] +pub enum PushIngestPlanningError { + #[error(transparent)] + SourceNotFound( + #[from] + #[backtrace] + PushSourceNotFoundError, + ), + + #[error(transparent)] + UnsupportedMediaType( + #[from] + #[backtrace] + UnsupportedMediaTypeError, + ), + + #[error(transparent)] + CommitError( + #[from] + #[backtrace] + CommitError, + ), + + #[error(transparent)] + Internal( + #[from] + #[backtrace] + InternalError, + ), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Error, Default)] +pub struct PushSourceNotFoundError { + source_name: Option, +} + +impl PushSourceNotFoundError { + pub fn new(source_name: Option>) -> Self { + Self { + source_name: source_name.map(Into::into), + } + } +} + +impl std::fmt::Display for PushSourceNotFoundError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self.source_name { + None => write!( + f, + "Dataset does not define a default push source, consider specifying the source \ + name" + ), + Some(s) => write!(f, "Dataset does not define a push source '{s}'"), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Error)] +#[error("Unsupported media type {media_type}")] +pub struct UnsupportedMediaTypeError { + pub media_type: MediaType, +} + +impl UnsupportedMediaTypeError { + pub fn new(media_type: MediaType) -> Self { + Self { media_type } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/core/src/services/metadata_query_service.rs b/src/domain/core/src/services/metadata_query_service.rs new file mode 100644 index 000000000..cc1c28cf2 --- /dev/null +++ b/src/domain/core/src/services/metadata_query_service.rs @@ -0,0 +1,51 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use chrono::{DateTime, Utc}; +use internal_error::InternalError; +use opendatafabric as odf; + +use crate::ResolvedDataset; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +pub trait MetadataQueryService: Send + Sync { + /// Returns an active polling source, if any + async fn get_active_polling_source( + &self, + target: ResolvedDataset, + ) -> Result< + Option<( + odf::Multihash, + odf::MetadataBlockTyped, + )>, + InternalError, + >; + + /// Returns the set of active push sources + async fn get_active_push_sources( + &self, + target: ResolvedDataset, + ) -> Result)>, InternalError>; + + /// Returns an active transform, if any + async fn get_active_transform( + &self, + target: ResolvedDataset, + ) -> Result)>, InternalError>; + + /// Attempt reading watermark that is currently associated with a dataset + async fn try_get_current_watermark( + &self, + dataset: ResolvedDataset, + ) -> Result>, InternalError>; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/core/src/services/mod.rs b/src/domain/core/src/services/mod.rs index ab4781184..289e2c27c 100644 --- a/src/domain/core/src/services/mod.rs +++ b/src/domain/core/src/services/mod.rs @@ -10,14 +10,24 @@ // Re-exports pub use container_runtime::{NullPullImageListener, PullImageListener}; -pub mod compaction_service; +pub mod compaction; +pub mod ingest; +pub mod reset; +pub mod transform; +pub mod watermark; + +pub use compaction::*; +pub use ingest::*; +pub use reset::*; +pub use transform::*; +pub use watermark::*; + pub mod dataset_changes_service; pub mod dataset_ownership_service; pub mod dataset_registry; -pub mod dependency_graph_repository; pub mod dependency_graph_service; pub mod engine_provisioner; -pub mod ingest; +pub mod metadata_query_service; pub mod provenance_service; pub mod pull_request_planner; pub mod push_request_planner; @@ -26,23 +36,18 @@ pub mod remote_aliases; pub mod remote_aliases_registry; pub mod remote_repository_registry; pub mod remote_status_service; -pub mod reset_service; pub mod resource_loader; pub mod search_service; pub mod server_url_config; pub mod sync_service; -pub mod transform; pub mod verification_service; -pub mod watermark_service; -pub use compaction_service::*; pub use dataset_changes_service::*; pub use dataset_ownership_service::*; pub use dataset_registry::*; -pub use dependency_graph_repository::*; pub use dependency_graph_service::*; pub use engine_provisioner::*; -pub use ingest::*; +pub use metadata_query_service::*; pub use provenance_service::*; pub use pull_request_planner::*; pub use push_request_planner::*; @@ -51,11 +56,8 @@ pub use remote_aliases::*; pub use remote_aliases_registry::*; pub use remote_repository_registry::*; pub use remote_status_service::*; -pub use reset_service::*; pub use resource_loader::*; pub use search_service::*; pub use server_url_config::*; pub use sync_service::*; -pub use transform::*; pub use verification_service::*; -pub use watermark_service::*; diff --git a/src/domain/core/src/services/pull_request_planner.rs b/src/domain/core/src/services/pull_request_planner.rs index c5b3c6953..c072070e2 100644 --- a/src/domain/core/src/services/pull_request_planner.rs +++ b/src/domain/core/src/services/pull_request_planner.rs @@ -56,6 +56,7 @@ pub struct PullPlanIteration { pub jobs: Vec, } +#[allow(clippy::large_enum_variant)] #[derive(Debug)] pub enum PullPlanIterationJob { Ingest(PullIngestItem), @@ -87,6 +88,7 @@ impl PullPlanIterationJob { pub struct PullIngestItem { pub depth: i32, pub target: ResolvedDataset, + pub metadata_state: Box, pub maybe_original_request: Option, } @@ -388,34 +390,48 @@ pub enum PullError { #[backtrace] DatasetNotFoundError, ), + #[error("Cannot choose between multiple pull aliases")] AmbiguousSource, + #[error("{0}")] InvalidOperation(String), + + #[error(transparent)] + ScanMetadata( + #[from] + #[backtrace] + ScanMetadataError, + ), + #[error(transparent)] PollingIngestError( #[from] #[backtrace] PollingIngestError, ), + #[error(transparent)] TransformError( #[from] #[backtrace] TransformError, ), + #[error(transparent)] SyncError( #[from] #[backtrace] SyncError, ), + #[error(transparent)] Access( #[from] #[backtrace] AccessError, ), + #[error(transparent)] Internal( #[from] diff --git a/src/domain/core/src/services/remote_status_service.rs b/src/domain/core/src/services/remote_status_service.rs index 8df49e377..583054a9a 100644 --- a/src/domain/core/src/services/remote_status_service.rs +++ b/src/domain/core/src/services/remote_status_service.rs @@ -27,6 +27,7 @@ pub trait RemoteStatusService: Send + Sync { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[derive(Debug)] pub struct PushStatus { pub remote: DatasetRefRemote, pub check_result: Result, diff --git a/src/domain/core/src/services/reset/mod.rs b/src/domain/core/src/services/reset/mod.rs new file mode 100644 index 000000000..cb815e599 --- /dev/null +++ b/src/domain/core/src/services/reset/mod.rs @@ -0,0 +1,14 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +mod reset_executor; +mod reset_planner; + +pub use reset_executor::*; +pub use reset_planner::*; diff --git a/src/domain/core/src/services/reset/reset_executor.rs b/src/domain/core/src/services/reset/reset_executor.rs new file mode 100644 index 000000000..21a1770df --- /dev/null +++ b/src/domain/core/src/services/reset/reset_executor.rs @@ -0,0 +1,46 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use internal_error::InternalError; +use opendatafabric as odf; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +use crate::{ResetPlan, ResolvedDataset, SetRefError}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +pub trait ResetExecutor: Send + Sync { + async fn execute( + &self, + target: ResolvedDataset, + plan: ResetPlan, + ) -> Result; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct ResetResult { + pub new_head: odf::Multihash, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Error)] +pub enum ResetExecutionError { + #[error(transparent)] + SetReferenceFailed(#[from] SetRefError), + + #[error(transparent)] + Internal(#[from] InternalError), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/core/src/services/reset/reset_planner.rs b/src/domain/core/src/services/reset/reset_planner.rs new file mode 100644 index 000000000..edbb138c6 --- /dev/null +++ b/src/domain/core/src/services/reset/reset_planner.rs @@ -0,0 +1,59 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use internal_error::InternalError; +use opendatafabric as odf; +use thiserror::Error; + +use crate::ResolvedDataset; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +pub trait ResetPlanner: Send + Sync { + async fn plan_reset( + &self, + target: ResolvedDataset, + maybe_new_head: Option<&odf::Multihash>, + maybe_old_head: Option<&odf::Multihash>, + ) -> Result; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug)] +pub struct ResetPlan { + pub new_head: odf::Multihash, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Error)] +pub enum ResetPlanningError { + #[error(transparent)] + OldHeadMismatch( + #[from] + #[backtrace] + ResetOldHeadMismatchError, + ), + + #[error(transparent)] + Internal(#[from] InternalError), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Error, Debug)] +#[error("Current head is {current_head} but expected head is {old_head}")] +pub struct ResetOldHeadMismatchError { + pub current_head: odf::Multihash, + pub old_head: odf::Multihash, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/core/src/services/reset_service.rs b/src/domain/core/src/services/reset_service.rs deleted file mode 100644 index 0ba45e205..000000000 --- a/src/domain/core/src/services/reset_service.rs +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use internal_error::InternalError; -use opendatafabric::*; -use thiserror::Error; - -use crate::entities::SetRefError; -use crate::*; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[async_trait::async_trait] -pub trait ResetService: Send + Sync { - async fn reset_dataset( - &self, - target: ResolvedDataset, - block_hash: Option<&Multihash>, - old_head_maybe: Option<&Multihash>, - ) -> Result; -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Errors -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Debug, Error)] -pub enum ResetError { - #[error(transparent)] - DatasetNotFound( - #[from] - #[backtrace] - DatasetNotFoundError, - ), - #[error(transparent)] - CASFailed( - #[from] - #[backtrace] - RefCASError, - ), - #[error(transparent)] - BlockNotFound( - #[from] - #[backtrace] - BlockNotFoundError, - ), - #[error(transparent)] - Access( - #[from] - #[backtrace] - AccessError, - ), - #[error(transparent)] - Internal( - #[from] - #[backtrace] - InternalError, - ), - #[error(transparent)] - OldHeadMismatch( - #[from] - #[backtrace] - OldHeadMismatchError, - ), -} - -impl From for ResetError { - fn from(v: GetDatasetError) -> Self { - match v { - GetDatasetError::NotFound(e) => Self::DatasetNotFound(e), - GetDatasetError::Internal(e) => Self::Internal(e), - } - } -} - -impl From for ResetError { - fn from(v: auth::DatasetActionUnauthorizedError) -> Self { - match v { - auth::DatasetActionUnauthorizedError::Access(e) => Self::Access(e), - auth::DatasetActionUnauthorizedError::Internal(e) => Self::Internal(e), - } - } -} - -impl From for ResetError { - fn from(v: SetRefError) -> Self { - match v { - SetRefError::CASFailed(e) => Self::CASFailed(e), - SetRefError::BlockNotFound(e) => Self::BlockNotFound(e), - SetRefError::Access(e) => Self::Access(e), - SetRefError::Internal(e) => Self::Internal(e), - } - } -} - -#[derive(Error, Debug)] -#[error("Current head is {current_head} but expected head is {old_head}")] -pub struct OldHeadMismatchError { - pub current_head: Multihash, - pub old_head: Multihash, -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/core/src/services/transform/mod.rs b/src/domain/core/src/services/transform/mod.rs index d7f598d24..f9b94a668 100644 --- a/src/domain/core/src/services/transform/mod.rs +++ b/src/domain/core/src/services/transform/mod.rs @@ -8,13 +8,13 @@ // by the Apache License, Version 2.0. mod transform_elaboration_service; -mod transform_execution_service; +mod transform_executor; mod transform_listener; mod transform_request_planner; mod transform_types; pub use transform_elaboration_service::*; -pub use transform_execution_service::*; +pub use transform_executor::*; pub use transform_listener::*; pub use transform_request_planner::*; pub use transform_types::*; diff --git a/src/domain/core/src/services/transform/transform_execution_service.rs b/src/domain/core/src/services/transform/transform_executor.rs similarity index 98% rename from src/domain/core/src/services/transform/transform_execution_service.rs rename to src/domain/core/src/services/transform/transform_executor.rs index 199fd0100..9c8065694 100644 --- a/src/domain/core/src/services/transform/transform_execution_service.rs +++ b/src/domain/core/src/services/transform/transform_executor.rs @@ -28,7 +28,7 @@ use crate::{ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -pub trait TransformExecutionService: Send + Sync { +pub trait TransformExecutor: Send + Sync { async fn execute_transform( &self, target: ResolvedDataset, diff --git a/src/domain/core/src/services/transform/transform_request_planner.rs b/src/domain/core/src/services/transform/transform_request_planner.rs index 3d9e547b1..46d42dcad 100644 --- a/src/domain/core/src/services/transform/transform_request_planner.rs +++ b/src/domain/core/src/services/transform/transform_request_planner.rs @@ -20,12 +20,6 @@ use crate::*; #[async_trait::async_trait] pub trait TransformRequestPlanner: Send + Sync { - /// Returns an active transform, if any - async fn get_active_transform( - &self, - target: ResolvedDataset, - ) -> Result)>, InternalError>; - async fn build_transform_preliminary_plan( &self, target: ResolvedDataset, diff --git a/src/domain/core/src/services/watermark/mod.rs b/src/domain/core/src/services/watermark/mod.rs new file mode 100644 index 000000000..bb8ad1979 --- /dev/null +++ b/src/domain/core/src/services/watermark/mod.rs @@ -0,0 +1,14 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +mod set_watermark_executor; +mod set_watermark_planner; + +pub use set_watermark_executor::*; +pub use set_watermark_planner::*; diff --git a/src/domain/core/src/services/watermark/set_watermark_executor.rs b/src/domain/core/src/services/watermark/set_watermark_executor.rs new file mode 100644 index 000000000..dfc5c164a --- /dev/null +++ b/src/domain/core/src/services/watermark/set_watermark_executor.rs @@ -0,0 +1,58 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use internal_error::InternalError; +use opendatafabric as odf; +use thiserror::Error; + +use crate::{ResolvedDataset, SetWatermarkPlan}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +pub trait SetWatermarkExecutor: Send + Sync { + async fn execute( + &self, + target: ResolvedDataset, + plan: SetWatermarkPlan, + ) -> Result; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug)] +pub enum SetWatermarkResult { + UpToDate, + Updated { + old_head: Option, + new_head: odf::Multihash, + }, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Error)] +pub enum SetWatermarkExecutionError { + #[error(transparent)] + Internal( + #[from] + #[backtrace] + InternalError, + ), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/core/src/services/watermark/set_watermark_planner.rs b/src/domain/core/src/services/watermark/set_watermark_planner.rs new file mode 100644 index 000000000..58d6e4afa --- /dev/null +++ b/src/domain/core/src/services/watermark/set_watermark_planner.rs @@ -0,0 +1,54 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use chrono::{DateTime, Utc}; +use internal_error::InternalError; +use thiserror::Error; + +use crate::{DataWriterMetadataState, ResolvedDataset}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +pub trait SetWatermarkPlanner: Send + Sync { + async fn plan_set_watermark( + &self, + target: ResolvedDataset, + new_watermark: DateTime, + ) -> Result; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug)] +pub struct SetWatermarkPlan { + pub system_time: DateTime, + pub new_watermark: DateTime, + pub metadata_state: Box, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Error)] +pub enum SetWatermarkPlanningError { + #[error("Attempting to set watermark on a derivative dataset")] + IsDerivative, + + #[error("Attempting to set watermark on a remote dataset")] + IsRemote, + + #[error(transparent)] + Internal( + #[from] + #[backtrace] + InternalError, + ), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/core/src/services/watermark_service.rs b/src/domain/core/src/services/watermark_service.rs deleted file mode 100644 index 2e35d9310..000000000 --- a/src/domain/core/src/services/watermark_service.rs +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use chrono::{DateTime, Utc}; -use internal_error::InternalError; -use opendatafabric::Multihash; -use thiserror::Error; - -use crate::auth::DatasetActionUnauthorizedError; -use crate::{AccessError, ResolvedDataset}; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[async_trait::async_trait] -pub trait WatermarkService: Send + Sync { - /// Attempt reading watermark that is currently associated with a dataset - async fn try_get_current_watermark( - &self, - dataset: ResolvedDataset, - ) -> Result>, GetWatermarkError>; - - /// Manually advances the watermark of a root dataset - async fn set_watermark( - &self, - target: ResolvedDataset, - new_watermark: DateTime, - ) -> Result; -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Debug)] -pub enum SetWatermarkResult { - UpToDate, - Updated { - old_head: Option, - new_head: Multihash, - }, -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Debug, Error)] -pub enum GetWatermarkError { - #[error(transparent)] - Internal( - #[from] - #[backtrace] - InternalError, - ), -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Debug, Error)] -pub enum SetWatermarkError { - #[error("Attempting to set watermark on a derivative dataset")] - IsDerivative, - - #[error("Attempting to set watermark on a remote dataset")] - IsRemote, - - #[error(transparent)] - Access( - #[from] - #[backtrace] - AccessError, - ), - - #[error(transparent)] - Internal( - #[from] - #[backtrace] - InternalError, - ), -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -impl From for SetWatermarkError { - fn from(v: DatasetActionUnauthorizedError) -> Self { - match v { - DatasetActionUnauthorizedError::Access(e) => Self::Access(e), - DatasetActionUnauthorizedError::Internal(e) => Self::Internal(e), - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/core/src/use_cases/compact_dataset_use_case.rs b/src/domain/core/src/use_cases/compact_dataset_use_case.rs index 43355b65c..0c8f1fe9a 100644 --- a/src/domain/core/src/use_cases/compact_dataset_use_case.rs +++ b/src/domain/core/src/use_cases/compact_dataset_use_case.rs @@ -9,14 +9,18 @@ use std::sync::Arc; -use opendatafabric::DatasetHandle; +use internal_error::InternalError; +use opendatafabric as odf; +use thiserror::Error; +use crate::auth::DatasetActionUnauthorizedError; use crate::{ - CompactionError, + AccessError, + CompactionExecutionError, CompactionListener, CompactionMultiListener, CompactionOptions, - CompactionResponse, + CompactionPlanningError, CompactionResult, }; @@ -26,17 +30,65 @@ use crate::{ pub trait CompactDatasetUseCase: Send + Sync { async fn execute( &self, - dataset_handle: &DatasetHandle, + dataset_handle: &odf::DatasetHandle, options: CompactionOptions, maybe_listener: Option>, ) -> Result; async fn execute_multi( &self, - dataset_handles: Vec, + dataset_handles: Vec, options: CompactionOptions, multi_listener: Option>, ) -> Vec; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug)] +pub struct CompactionResponse { + pub dataset_ref: odf::DatasetRef, + pub result: Result, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Error)] +pub enum CompactionError { + #[error(transparent)] + Planning( + #[from] + #[backtrace] + CompactionPlanningError, + ), + + #[error(transparent)] + Execution( + #[from] + #[backtrace] + CompactionExecutionError, + ), + + #[error(transparent)] + Access( + #[from] + #[backtrace] + AccessError, + ), + + #[error(transparent)] + Internal(#[from] InternalError), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +impl From for CompactionError { + fn from(v: DatasetActionUnauthorizedError) -> Self { + match v { + DatasetActionUnauthorizedError::Access(e) => Self::Access(e), + DatasetActionUnauthorizedError::Internal(e) => Self::Internal(e), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/core/src/use_cases/reset_dataset_use_case.rs b/src/domain/core/src/use_cases/reset_dataset_use_case.rs index edf19b0b0..9fae26e14 100644 --- a/src/domain/core/src/use_cases/reset_dataset_use_case.rs +++ b/src/domain/core/src/use_cases/reset_dataset_use_case.rs @@ -7,9 +7,12 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use internal_error::InternalError; use opendatafabric::{DatasetHandle, Multihash}; +use thiserror::Error; -use crate::ResetError; +use crate::auth::DatasetActionUnauthorizedError; +use crate::{AccessError, ResetExecutionError, ResetPlanningError, ResetResult}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -20,7 +23,47 @@ pub trait ResetDatasetUseCase: Send + Sync { dataset_handle: &DatasetHandle, maybe_new_head: Option<&Multihash>, maybe_old_head: Option<&Multihash>, - ) -> Result; + ) -> Result; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Error)] +pub enum ResetError { + #[error(transparent)] + Planning( + #[from] + #[backtrace] + ResetPlanningError, + ), + + #[error(transparent)] + Execution( + #[from] + #[backtrace] + ResetExecutionError, + ), + + #[error(transparent)] + Access( + #[from] + #[backtrace] + AccessError, + ), + + #[error(transparent)] + Internal(#[from] InternalError), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +impl From for ResetError { + fn from(v: DatasetActionUnauthorizedError) -> Self { + match v { + DatasetActionUnauthorizedError::Access(e) => Self::Access(e), + DatasetActionUnauthorizedError::Internal(e) => Self::Internal(e), + } + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/core/src/use_cases/set_watermark_use_case.rs b/src/domain/core/src/use_cases/set_watermark_use_case.rs index e5cd0cdcc..22f99c96a 100644 --- a/src/domain/core/src/use_cases/set_watermark_use_case.rs +++ b/src/domain/core/src/use_cases/set_watermark_use_case.rs @@ -8,9 +8,17 @@ // by the Apache License, Version 2.0. use chrono::{DateTime, Utc}; +use internal_error::InternalError; use opendatafabric::DatasetHandle; +use thiserror::Error; -use crate::{SetWatermarkError, SetWatermarkResult}; +use crate::auth::DatasetActionUnauthorizedError; +use crate::{ + AccessError, + SetWatermarkExecutionError, + SetWatermarkPlanningError, + SetWatermarkResult, +}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -24,3 +32,39 @@ pub trait SetWatermarkUseCase: Send + Sync { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Error)] +pub enum SetWatermarkError { + #[error(transparent)] + Planning(#[from] SetWatermarkPlanningError), + + #[error(transparent)] + Execution(#[from] SetWatermarkExecutionError), + + #[error(transparent)] + Access( + #[from] + #[backtrace] + AccessError, + ), + + #[error(transparent)] + Internal( + #[from] + #[backtrace] + InternalError, + ), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +impl From for SetWatermarkError { + fn from(v: DatasetActionUnauthorizedError) -> Self { + match v { + DatasetActionUnauthorizedError::Access(e) => Self::Access(e), + DatasetActionUnauthorizedError::Internal(e) => Self::Internal(e), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/core/src/utils/owned_file.rs b/src/domain/core/src/utils/owned_file.rs index 788f12ed3..cad896e6c 100644 --- a/src/domain/core/src/utils/owned_file.rs +++ b/src/domain/core/src/utils/owned_file.rs @@ -23,7 +23,8 @@ impl OwnedFile { pub fn new(path: impl Into) -> Self { let path = path.into(); - assert!(path.exists(), "path: {}", path.display()); + assert!(path.exists(), "Path does not exist: {}", path.display()); + assert!(path.is_file(), "Path is not a file: {}", path.display()); Self { path: Some(path) } } diff --git a/src/domain/datasets/domain/Cargo.toml b/src/domain/datasets/domain/Cargo.toml index 7cb737c0f..35e6dae87 100644 --- a/src/domain/datasets/domain/Cargo.toml +++ b/src/domain/datasets/domain/Cargo.toml @@ -39,7 +39,7 @@ merge = "0.1" secrecy = "0.10" serde = "1" serde_with = { version = "3", default-features = false } -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false } tokio-stream = "0.1" uuid = { version = "1", default-features = false, features = ["v4"] } diff --git a/src/domain/datasets/domain/src/entities/dataset_dependency_entry.rs b/src/domain/datasets/domain/src/entities/dataset_dependency_entry.rs new file mode 100644 index 000000000..8c582ae42 --- /dev/null +++ b/src/domain/datasets/domain/src/entities/dataset_dependency_entry.rs @@ -0,0 +1,20 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use opendatafabric::DatasetID; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Clone, sqlx::FromRow)] +pub struct DatasetDependencyEntryRowModel { + pub downstream_dataset_id: DatasetID, + pub upstream_dataset_id: DatasetID, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/datasets/domain/src/entities/mod.rs b/src/domain/datasets/domain/src/entities/mod.rs index bc4b51507..774c30ac3 100644 --- a/src/domain/datasets/domain/src/entities/mod.rs +++ b/src/domain/datasets/domain/src/entities/mod.rs @@ -7,8 +7,12 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +#[cfg(feature = "sqlx")] +mod dataset_dependency_entry; mod dataset_entry; mod dataset_env_var; +#[cfg(feature = "sqlx")] +pub use dataset_dependency_entry::*; pub use dataset_entry::*; pub use dataset_env_var::*; diff --git a/src/domain/datasets/domain/src/repos/dataset_dependency_repository.rs b/src/domain/datasets/domain/src/repos/dataset_dependency_repository.rs new file mode 100644 index 000000000..fff9d0e9c --- /dev/null +++ b/src/domain/datasets/domain/src/repos/dataset_dependency_repository.rs @@ -0,0 +1,87 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::pin::Pin; + +use internal_error::InternalError; +use opendatafabric::DatasetID; +use thiserror::Error; +use tokio_stream::Stream; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +pub trait DatasetDependencyRepository: Send + Sync { + async fn stores_any_dependencies(&self) -> Result; + + fn list_all_dependencies(&self) -> DatasetDependenciesIDStream; + + async fn add_upstream_dependencies( + &self, + downstream_dataset_id: &DatasetID, + new_upstream_dataset_ids: &[&DatasetID], + ) -> Result<(), AddDependenciesError>; + + async fn remove_upstream_dependencies( + &self, + downstream_dataset_id: &DatasetID, + obsolete_upstream_dataset_ids: &[&DatasetID], + ) -> Result<(), RemoveDependenciesError>; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Eq, PartialEq, Ord, PartialOrd)] +pub struct DatasetDependencies { + pub downstream_dataset_id: DatasetID, + pub upstream_dataset_ids: Vec, +} + +pub type DatasetDependenciesIDStream<'a> = + Pin> + Send + 'a>>; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Error)] +pub enum AddDependenciesError { + #[error(transparent)] + Duplicate(#[from] AddDependencyDuplicateError), + + #[error(transparent)] + Internal(#[from] InternalError), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Error)] +pub enum RemoveDependenciesError { + #[error(transparent)] + NotFound(#[from] RemoveDependencyMissingError), + + #[error(transparent)] + Internal(#[from] InternalError), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Error, Debug)] +#[error("Upstream dependency duplicate for dataset '{downstream_dataset_id}'")] +pub struct AddDependencyDuplicateError { + pub downstream_dataset_id: DatasetID, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Error, Debug)] +#[error("Upstream dependency not found for dataset '{downstream_dataset_id}'")] +pub struct RemoveDependencyMissingError { + pub downstream_dataset_id: DatasetID, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/datasets/domain/src/repos/dataset_entry_repository.rs b/src/domain/datasets/domain/src/repos/dataset_entry_repository.rs index af8036d61..4809e3320 100644 --- a/src/domain/datasets/domain/src/repos/dataset_entry_repository.rs +++ b/src/domain/datasets/domain/src/repos/dataset_entry_repository.rs @@ -214,3 +214,13 @@ pub enum DeleteEntryDatasetError { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +pub trait DatasetEntryRemovalListener: Send + Sync { + async fn on_dataset_entry_removed( + &self, + dataset_id: &odf::DatasetID, + ) -> Result<(), InternalError>; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/datasets/domain/src/repos/mod.rs b/src/domain/datasets/domain/src/repos/mod.rs index eeaf74714..82cd7be94 100644 --- a/src/domain/datasets/domain/src/repos/mod.rs +++ b/src/domain/datasets/domain/src/repos/mod.rs @@ -7,8 +7,10 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +mod dataset_dependency_repository; mod dataset_entry_repository; mod dataset_env_var_repository; +pub use dataset_dependency_repository::*; pub use dataset_entry_repository::*; pub use dataset_env_var_repository::*; diff --git a/src/domain/datasets/services/Cargo.toml b/src/domain/datasets/services/Cargo.toml index cc64dd0d5..097d15d65 100644 --- a/src/domain/datasets/services/Cargo.toml +++ b/src/domain/datasets/services/Cargo.toml @@ -37,8 +37,12 @@ async-trait = { version = "0.1", default-features = false } chrono = { version = "0.4", default-features = false } dill = "0.9" futures = { version = "0.3", default-features = false } +petgraph = { version = "0.6", default-features = false, features = [ + "stable_graph", +] } secrecy = "0.10" tokio = { version = "1", default-features = false } +tokio-stream = { version = "0.1", default-features = false } tracing = { version = "0.1", default-features = false } uuid = { version = "1", default-features = false } @@ -49,7 +53,10 @@ kamu-accounts = { workspace = true, features = ["testing"] } kamu-accounts-inmem = { workspace = true } kamu-core = { workspace = true, features = ["testing"] } kamu-datasets = { workspace = true, features = ["testing"] } +kamu-datasets-inmem = { workspace = true } +indoc = "2" mockall = "0.13" +oop = "0.0.2" pretty_assertions = { version = "1" } test-log = { version = "0.2", features = ["trace"] } diff --git a/src/domain/datasets/services/src/dataset_entry_service_impl.rs b/src/domain/datasets/services/src/dataset_entry_service_impl.rs index 429b13416..b1cbe6e4e 100644 --- a/src/domain/datasets/services/src/dataset_entry_service_impl.rs +++ b/src/domain/datasets/services/src/dataset_entry_service_impl.rs @@ -399,10 +399,6 @@ impl DatasetRegistry for DatasetEntryServiceImpl { #[tracing::instrument(level = "debug", skip_all, fields(%owner_name))] fn all_dataset_handles_by_owner(&self, owner_name: &odf::AccountName) -> DatasetHandleStream { - struct OwnerArgs { - owner_id: odf::AccountID, - } - let owner_name = owner_name.clone(); EntityPageStreamer::default().into_stream( @@ -410,14 +406,11 @@ impl DatasetRegistry for DatasetEntryServiceImpl { let owner_id = self .resolve_account_id_by_maybe_name(Some(&owner_name)) .await?; - Ok(Arc::new(OwnerArgs { owner_id })) + Ok(Arc::new(owner_id)) }, - move |args, pagination| { - let args = args.clone(); - async move { - self.list_all_dataset_handles_by_owner_name(&args.owner_id, pagination) - .await - } + move |owner_id, pagination| async move { + self.list_all_dataset_handles_by_owner_name(&owner_id, pagination) + .await }, ) } diff --git a/src/domain/datasets/services/src/dependency_graph_indexer.rs b/src/domain/datasets/services/src/dependency_graph_indexer.rs new file mode 100644 index 000000000..db4899936 --- /dev/null +++ b/src/domain/datasets/services/src/dependency_graph_indexer.rs @@ -0,0 +1,118 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::sync::Arc; + +use dill::{component, interface, meta}; +use init_on_startup::{InitOnStartup, InitOnStartupMeta}; +use internal_error::{InternalError, ResultIntoInternal}; +use kamu_core::{DatasetRegistry, GetSummaryOpts}; +use kamu_datasets::DatasetDependencyRepository; + +use crate::{ + DependencyGraphServiceImpl, + JOB_KAMU_DATASETS_DATASET_ENTRY_INDEXER, + JOB_KAMU_DATASETS_DEPENDENCY_GRAPH_INDEXER, +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct DependencyGraphIndexer { + dataset_registry: Arc, + dependency_graph_service: Arc, + dataset_dependency_repo: Arc, +} + +#[component(pub)] +#[interface(dyn InitOnStartup)] +#[meta(InitOnStartupMeta { + job_name: JOB_KAMU_DATASETS_DEPENDENCY_GRAPH_INDEXER, + depends_on: &[ + JOB_KAMU_DATASETS_DATASET_ENTRY_INDEXER, + ], + requires_transaction: true, +})] +impl DependencyGraphIndexer { + pub fn new( + dataset_registry: Arc, + dependency_graph_service: Arc, + dataset_dependency_repo: Arc, + ) -> Self { + Self { + dataset_registry, + dependency_graph_service, + dataset_dependency_repo, + } + } + + async fn was_indexed(&self) -> Result { + self.dataset_dependency_repo + .stores_any_dependencies() + .await + .int_err() + } + + async fn index_dependencies_from_storage(&self) -> Result<(), InternalError> { + use tokio_stream::StreamExt; + use tracing::Instrument; + + let mut datasets_stream = self.dataset_registry.all_dataset_handles(); + + while let Some(Ok(dataset_handle)) = datasets_stream.next().await { + let span = + tracing::debug_span!("Scanning dataset dependencies", dataset = %dataset_handle); + + let summary = self + .dataset_registry + .get_dataset_by_handle(&dataset_handle) + .get_summary(GetSummaryOpts::default()) + .instrument(span) + .await + .int_err()?; + + let upstream_dependencies: Vec<_> = summary.dependencies.iter().collect(); + + self.dataset_dependency_repo + .add_upstream_dependencies(&dataset_handle.id, &upstream_dependencies) + .await + .int_err()?; + } + + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl InitOnStartup for DependencyGraphIndexer { + #[tracing::instrument( + level = "debug", + skip_all, + name = "DependencyGraphIndexer::run_initialization" + )] + async fn run_initialization(&self) -> Result<(), InternalError> { + if self.was_indexed().await? { + tracing::debug!("Skip initialization: dependency graph was already indexed"); + } else { + self.index_dependencies_from_storage().await?; + } + + self.dependency_graph_service + .load_dependency_graph( + self.dataset_registry.as_ref(), + self.dataset_dependency_repo.as_ref(), + ) + .await?; + + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/dependency_graph_service_inmem.rs b/src/domain/datasets/services/src/dependency_graph_service_impl.rs similarity index 81% rename from src/infra/core/src/dependency_graph_service_inmem.rs rename to src/domain/datasets/services/src/dependency_graph_service_impl.rs index 21266a5e5..00c82fc21 100644 --- a/src/infra/core/src/dependency_graph_service_inmem.rs +++ b/src/domain/datasets/services/src/dependency_graph_service_impl.rs @@ -12,7 +12,17 @@ use std::sync::Arc; use dill::*; use internal_error::{InternalError, ResultIntoInternal}; -use kamu_core::*; +use kamu_core::{ + DatasetIDStream, + DatasetLifecycleMessage, + DatasetNodeNotFoundError, + DatasetRegistry, + DependencyGraphService, + DependencyOrder, + GetDependenciesError, + MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE, +}; +use kamu_datasets::{DatasetDependencies, DatasetDependencyRepository}; use messaging_outbox::{ MessageConsumer, MessageConsumerMeta, @@ -24,10 +34,11 @@ use petgraph::stable_graph::{NodeIndex, StableDiGraph}; use petgraph::visit::{depth_first_search, Bfs, DfsEvent, Reversed}; use petgraph::Direction; +use crate::MESSAGE_CONSUMER_KAMU_DEPENDENCY_GRAPH_SERVICE; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub struct DependencyGraphServiceInMemory { - repository: Option>, +pub struct DependencyGraphServiceImpl { state: Arc>, } @@ -37,7 +48,6 @@ pub struct DependencyGraphServiceInMemory { struct State { datasets_graph: StableDiGraph, dataset_node_indices: HashMap, - initially_scanned: bool, } impl State { @@ -60,6 +70,7 @@ impl State { let node_index = self.datasets_graph.add_node(dataset_id.clone()); self.dataset_node_indices .insert(dataset_id.clone(), node_index); + tracing::debug!(%dataset_id, "Inserted new dependency graph node"); node_index } } @@ -73,45 +84,39 @@ impl State { #[interface(dyn MessageConsumer)] #[interface(dyn MessageConsumerT)] #[meta(MessageConsumerMeta { - consumer_name: MESSAGE_CONSUMER_KAMU_CORE_DEPENDENCY_GRAPH_SERVICE, + consumer_name: MESSAGE_CONSUMER_KAMU_DEPENDENCY_GRAPH_SERVICE, feeding_producers: &[MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE], delivery: MessageDeliveryMechanism::Immediate, })] #[scope(Singleton)] -impl DependencyGraphServiceInMemory { - pub fn new(repository: Option>) -> Self { +impl DependencyGraphServiceImpl { + pub fn new() -> Self { Self { - repository, state: Arc::new(tokio::sync::RwLock::new(State::default())), } } - async fn ensure_datasets_initially_scanned(&self) -> Result<(), InternalError> { - let mut state = self.state.write().await; - if state.initially_scanned { - return Ok(()); - } - - self.ensure_datasets_initially_scanned_with( - &mut state, - self.repository - .as_ref() - .expect("Dependencies graph repository not present") - .as_ref(), - ) - .await - } - #[tracing::instrument(level = "debug", skip_all)] - async fn ensure_datasets_initially_scanned_with( + pub async fn load_dependency_graph( &self, - state: &mut State, - repository: &dyn DependencyGraphRepository, + dataset_registry: &dyn DatasetRegistry, + dependency_repository: &dyn DatasetDependencyRepository, ) -> Result<(), InternalError> { use tokio_stream::StreamExt; - let mut dependencies_stream = repository.list_dependencies_of_all_datasets(); + let mut state = self.state.write().await; + assert!(state.datasets_graph.node_count() == 0 && state.datasets_graph.edge_count() == 0); + + tracing::debug!("Restoring dataset nodes in dependency graph"); + + let mut datasets_stream = dataset_registry.all_dataset_handles(); + while let Some(Ok(dataset_handle)) = datasets_stream.next().await { + state.get_or_create_dataset_node(&dataset_handle.id); + } + + tracing::debug!("Restoring dependency graph edges"); + let mut dependencies_stream = dependency_repository.list_all_dependencies(); while let Some(Ok(dataset_dependencies)) = dependencies_stream.next().await { let DatasetDependencies { downstream_dataset_id, @@ -120,15 +125,13 @@ impl DependencyGraphServiceInMemory { if !upstream_dataset_ids.is_empty() { for upstream_dataset_id in upstream_dataset_ids { - self.add_dependency(state, &upstream_dataset_id, &downstream_dataset_id); + self.add_dependency(&mut state, &upstream_dataset_id, &downstream_dataset_id); } } else { state.get_or_create_dataset_node(&downstream_dataset_id); } } - state.initially_scanned = true; - tracing::debug!( num_nodes = % state.datasets_graph.node_count(), num_edges = % state.datasets_graph.edge_count(), @@ -206,6 +209,12 @@ impl DependencyGraphServiceInMemory { ) -> Result, GetDependenciesError> { let state = self.state.read().await; + tracing::debug!( + num_nodes = % state.datasets_graph.node_count(), + num_edges = % state.datasets_graph.edge_count(), + "Graph state before breadth first search" + ); + let reversed_graph = Reversed(&state.datasets_graph); let nodes_to_search = self.get_nodes_from_dataset_ids(&dataset_ids, &state)?; @@ -241,6 +250,12 @@ impl DependencyGraphServiceInMemory { ) -> Result, GetDependenciesError> { let state = self.state.read().await; + tracing::debug!( + num_nodes = % state.datasets_graph.node_count(), + num_edges = % state.datasets_graph.edge_count(), + "Graph state before depth first search" + ); + let nodes_to_search = self.get_nodes_from_dataset_ids(&dataset_ids, &state)?; let mut result = vec![]; @@ -265,33 +280,11 @@ impl DependencyGraphServiceInMemory { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl DependencyGraphService for DependencyGraphServiceInMemory { - /// Forces initialization of graph data, if it wasn't initialized already. - /// Ignored if called multiple times - #[tracing::instrument(level = "debug", skip_all)] - async fn eager_initialization( - &self, - repository: &dyn DependencyGraphRepository, - ) -> Result<(), InternalError> { - let mut state = self.state.write().await; - if state.initially_scanned { - return Ok(()); - } - - self.ensure_datasets_initially_scanned_with(&mut state, repository) - .await - } - +impl DependencyGraphService for DependencyGraphServiceImpl { async fn get_recursive_upstream_dependencies( &self, dataset_ids: Vec, ) -> Result { - self.ensure_datasets_initially_scanned() - .await - .int_err() - .map_err(GetDependenciesError::Internal) - .unwrap(); - let result = self .run_recursive_reversed_breadth_first_search(dataset_ids) .await?; @@ -303,11 +296,6 @@ impl DependencyGraphService for DependencyGraphServiceInMemory { &self, dataset_ids: Vec, ) -> Result { - self.ensure_datasets_initially_scanned() - .await - .map_err(GetDependenciesError::Internal) - .unwrap(); - let result = self.run_recursive_depth_first_search(dataset_ids).await?; Ok(Box::pin(tokio_stream::iter(result))) @@ -319,11 +307,6 @@ impl DependencyGraphService for DependencyGraphServiceInMemory { &self, dataset_id: &DatasetID, ) -> Result { - self.ensure_datasets_initially_scanned() - .await - .int_err() - .map_err(GetDependenciesError::Internal)?; - let downstream_node_datasets: Vec<_> = { let state = self.state.read().await; @@ -353,10 +336,6 @@ impl DependencyGraphService for DependencyGraphServiceInMemory { &self, dataset_id: &DatasetID, ) -> Result { - self.ensure_datasets_initially_scanned() - .await - .map_err(GetDependenciesError::Internal)?; - let upstream_node_datasets: Vec<_> = { let state = self.state.read().await; @@ -380,17 +359,12 @@ impl DependencyGraphService for DependencyGraphServiceInMemory { Ok(Box::pin(tokio_stream::iter(upstream_node_datasets))) } + #[tracing::instrument(level = "debug", skip_all, fields(?dataset_ids, ?order))] async fn in_dependency_order( &self, dataset_ids: Vec, order: DependencyOrder, ) -> Result, GetDependenciesError> { - self.ensure_datasets_initially_scanned() - .await - .int_err() - .map_err(GetDependenciesError::Internal) - .unwrap(); - let original_set: std::collections::HashSet<_> = dataset_ids.iter().cloned().collect(); let mut result = match order { @@ -411,24 +385,28 @@ impl DependencyGraphService for DependencyGraphServiceInMemory { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -impl MessageConsumer for DependencyGraphServiceInMemory {} +impl MessageConsumer for DependencyGraphServiceImpl {} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl MessageConsumerT for DependencyGraphServiceInMemory { +impl MessageConsumerT for DependencyGraphServiceImpl { #[tracing::instrument( level = "debug", skip_all, - name = "DependencyGraphServiceInMemory[DatasetLifecycleMessage]" + name = "DependencyGraphServiceImpl[DatasetLifecycleMessage]" )] async fn consume_message( &self, - _: &Catalog, + catalog: &Catalog, message: &DatasetLifecycleMessage, ) -> Result<(), InternalError> { tracing::debug!(received_message = ?message, "Received dataset lifecycle message"); + let repository = catalog + .get_one::() + .unwrap(); + let mut state = self.state.write().await; match message { @@ -461,11 +439,28 @@ impl MessageConsumerT for DependencyGraphServiceInMemor let new_upstream_ids: HashSet<_> = message.new_upstream_ids.iter().cloned().collect(); - for obsolete_upstream_id in existing_upstream_ids.difference(&new_upstream_ids) { + let obsolete_dependencies: Vec<_> = existing_upstream_ids + .difference(&new_upstream_ids) + .collect(); + let added_dependencies: Vec<_> = new_upstream_ids + .difference(&existing_upstream_ids) + .collect(); + + repository + .remove_upstream_dependencies(&message.dataset_id, &obsolete_dependencies) + .await + .int_err()?; + + repository + .add_upstream_dependencies(&message.dataset_id, &added_dependencies) + .await + .int_err()?; + + for obsolete_upstream_id in obsolete_dependencies { self.remove_dependency(&mut state, obsolete_upstream_id, &message.dataset_id); } - for added_id in new_upstream_ids.difference(&existing_upstream_ids) { + for added_id in added_dependencies { self.add_dependency(&mut state, added_id, &message.dataset_id); } } diff --git a/src/domain/datasets/services/src/jobs/mod.rs b/src/domain/datasets/services/src/jobs/mod.rs index dd96b9205..2ba07df46 100644 --- a/src/domain/datasets/services/src/jobs/mod.rs +++ b/src/domain/datasets/services/src/jobs/mod.rs @@ -12,4 +12,7 @@ pub const JOB_KAMU_DATASETS_DATASET_ENTRY_INDEXER: &str = "dev.kamu.domain.datasets.DatasetEntryIndexer"; +pub const JOB_KAMU_DATASETS_DEPENDENCY_GRAPH_INDEXER: &str = + "dev.kamu.domain.datasets.DependencyGraphIndexer"; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/datasets/services/src/lib.rs b/src/domain/datasets/services/src/lib.rs index 2664e1606..ba39e55a7 100644 --- a/src/domain/datasets/services/src/lib.rs +++ b/src/domain/datasets/services/src/lib.rs @@ -18,6 +18,8 @@ mod dataset_env_var_service_impl; mod dataset_env_var_service_null; mod dataset_key_value_service_impl; mod dataset_key_value_service_sys_env; +mod dependency_graph_indexer; +mod dependency_graph_service_impl; mod jobs; mod messages; @@ -27,5 +29,7 @@ pub use dataset_env_var_service_impl::*; pub use dataset_env_var_service_null::*; pub use dataset_key_value_service_impl::*; pub use dataset_key_value_service_sys_env::*; +pub use dependency_graph_indexer::*; +pub use dependency_graph_service_impl::*; pub use jobs::*; pub use messages::*; diff --git a/src/domain/datasets/services/src/messages/dataset_services_message_consumers.rs b/src/domain/datasets/services/src/messages/dataset_services_message_consumers.rs index d4ec6510a..62e6b0f77 100644 --- a/src/domain/datasets/services/src/messages/dataset_services_message_consumers.rs +++ b/src/domain/datasets/services/src/messages/dataset_services_message_consumers.rs @@ -12,4 +12,7 @@ pub const MESSAGE_CONSUMER_KAMU_DATASET_ENTRY_SERVICE: &str = "dev.kamu.domain.datasets.DatasetEntryService"; +pub const MESSAGE_CONSUMER_KAMU_DEPENDENCY_GRAPH_SERVICE: &str = + "dev.kamu.domain.datasets.DependencyGraphService"; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/datasets/services/tests/tests/mod.rs b/src/domain/datasets/services/tests/tests/mod.rs index e206896c5..4bd756220 100644 --- a/src/domain/datasets/services/tests/tests/mod.rs +++ b/src/domain/datasets/services/tests/tests/mod.rs @@ -8,3 +8,4 @@ // by the Apache License, Version 2.0. mod test_dataset_entry_service; +mod test_dependency_graph_service_impl; diff --git a/src/infra/core/tests/tests/test_dependency_graph_inmem.rs b/src/domain/datasets/services/tests/tests/test_dependency_graph_service_impl.rs similarity index 97% rename from src/infra/core/tests/tests/test_dependency_graph_inmem.rs rename to src/domain/datasets/services/tests/tests/test_dependency_graph_service_impl.rs index 6dd980e13..a7c8bac44 100644 --- a/src/infra/core/tests/tests/test_dependency_graph_inmem.rs +++ b/src/domain/datasets/services/tests/tests/test_dependency_graph_service_impl.rs @@ -11,16 +11,17 @@ use std::collections::HashMap; use std::sync::Arc; use dill::Component; -use futures::{future, StreamExt, TryStreamExt}; +use futures::{future, StreamExt}; use internal_error::ResultIntoInternal; -use kamu::testing::MetadataFactory; +use kamu::testing::{BaseRepoHarness, MetadataFactory}; use kamu::*; use kamu_core::*; +use kamu_datasets::{DatasetDependencies, DatasetDependencyRepository}; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; use messaging_outbox::{register_message_dispatcher, Outbox, OutboxImmediateImpl}; use opendatafabric::*; -use crate::BaseRepoHarness; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] @@ -85,7 +86,6 @@ async fn test_multi_tenant_repository() { async fn test_service_queries() { let harness = DependencyGraphHarness::new(TenancyConfig::SingleTenant); harness.create_single_tenant_graph().await; - harness.eager_initialization().await; assert_eq!( harness.dataset_dependencies_report("foo").await, @@ -124,7 +124,6 @@ async fn test_service_queries() { async fn test_service_new_datasets() { let harness = DependencyGraphHarness::new(TenancyConfig::SingleTenant); harness.create_single_tenant_graph().await; - harness.eager_initialization().await; harness.create_root_dataset(None, "test-root").await; @@ -168,7 +167,6 @@ async fn test_service_new_datasets() { async fn test_service_derived_dataset_modifies_links() { let harness = DependencyGraphHarness::new(TenancyConfig::SingleTenant); harness.create_single_tenant_graph().await; - harness.eager_initialization().await; assert_eq!( harness.dataset_dependencies_report("bar").await, @@ -260,7 +258,6 @@ async fn test_service_derived_dataset_modifies_links() { async fn test_service_dataset_deleted() { let harness = DependencyGraphHarness::new(TenancyConfig::SingleTenant); harness.create_single_tenant_graph().await; - harness.eager_initialization().await; assert_eq!( harness.dataset_dependencies_report("foo-bar").await, @@ -296,6 +293,8 @@ async fn test_service_dataset_deleted() { ); } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[test_log::test(tokio::test)] async fn test_get_recursive_downstream_dependencies() { let harness = create_large_dataset_graph().await; @@ -502,6 +501,8 @@ async fn test_get_recursive_downstream_dependencies() { assert_eq!(result, expected_result); } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[test_log::test(tokio::test)] async fn test_get_recursive_upstream_dependencies() { let harness = create_large_dataset_graph().await; @@ -556,6 +557,8 @@ async fn test_get_recursive_upstream_dependencies() { assert_eq!(result, expected_result); } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[test_log::test(tokio::test)] async fn test_in_dependency_order() { let harness = create_large_dataset_graph().await; @@ -605,10 +608,10 @@ async fn test_in_dependency_order() { #[oop::extend(BaseRepoHarness, base_repo_harness)] struct DependencyGraphHarness { - base_repo_harness: BaseRepoHarness, + base_repo_harness: kamu::testing::BaseRepoHarness, catalog: dill::Catalog, dependency_graph_service: Arc, - dependency_graph_repository: Arc, + dataset_dependency_repo: Arc, } impl DependencyGraphHarness { @@ -622,7 +625,8 @@ impl DependencyGraphHarness { ) .bind::() .add::() - .add::() + .add::() + .add::() .add::() .add::() .add::(); @@ -634,26 +638,26 @@ impl DependencyGraphHarness { let catalog = b.build(); - let dataset_repo = catalog.get_one::().unwrap(); + let dataset_dependency_repo = catalog + .get_one::() + .unwrap(); let dependency_graph_service = catalog.get_one::().unwrap(); - // Note: don't place into catalog, avoid cyclic dependency - let dependency_graph_repository = - Arc::new(DependencyGraphRepositoryInMemory::new(dataset_repo.clone())); - Self { base_repo_harness, catalog, dependency_graph_service, - dependency_graph_repository, + dataset_dependency_repo, } } async fn list_all_dependencies(&self) -> Vec<(String, String)> { + use futures::TryStreamExt; + let dependencies: Vec<_> = self - .dependency_graph_repository - .list_dependencies_of_all_datasets() + .dataset_dependency_repo + .list_all_dependencies() .try_collect() .await .unwrap(); @@ -697,13 +701,6 @@ impl DependencyGraphHarness { .join("\n") } - async fn eager_initialization(&self) { - self.dependency_graph_service - .eager_initialization(self.dependency_graph_repository.as_ref()) - .await - .unwrap(); - } - async fn dataset_dependencies_report(&self, dataset_name: &str) -> String { let downstream = self.get_downstream_dependencies(dataset_name).await; let upstream = self.get_upstream_dependencies(dataset_name).await; @@ -1023,7 +1020,6 @@ impl DependencyGraphHarness { async fn create_large_dataset_graph() -> DependencyGraphHarness { let dependency_harness = DependencyGraphHarness::new(TenancyConfig::SingleTenant); dependency_harness.create_single_tenant_graph().await; - dependency_harness.eager_initialization().await; /* Graph representation: diff --git a/src/domain/flow-system/domain/Cargo.toml b/src/domain/flow-system/domain/Cargo.toml index 5363f95aa..91dad0772 100644 --- a/src/domain/flow-system/domain/Cargo.toml +++ b/src/domain/flow-system/domain/Cargo.toml @@ -39,7 +39,7 @@ dill = { version = "0.9" } lazy_static = { version = "1" } sqlx = { version = "0.8", default-features = false, features = ["macros"] } strum = { version = "0.26", features = ["derive"] } -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false, features = ["std"] } tokio-stream = { version = "0.1", default-features = false } # TODO: Make serde optional diff --git a/src/domain/flow-system/domain/src/executors/flow_executor.rs b/src/domain/flow-system/domain/src/agents/flow_agent.rs similarity index 94% rename from src/domain/flow-system/domain/src/executors/flow_executor.rs rename to src/domain/flow-system/domain/src/agents/flow_agent.rs index 6fb4e8729..2c81c22f5 100644 --- a/src/domain/flow-system/domain/src/executors/flow_executor.rs +++ b/src/domain/flow-system/domain/src/agents/flow_agent.rs @@ -13,7 +13,7 @@ use internal_error::{InternalError, ResultIntoInternal}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -pub trait FlowExecutor: Sync + Send { +pub trait FlowAgent: Sync + Send { /// Runs the update main loop async fn run(&self) -> Result<(), InternalError>; } @@ -21,7 +21,7 @@ pub trait FlowExecutor: Sync + Send { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[derive(Debug)] -pub struct FlowExecutorConfig { +pub struct FlowAgentConfig { /// Defines discretion for main scheduling loop: how often new data is /// checked and processed pub awaiting_step: chrono::Duration, @@ -29,7 +29,7 @@ pub struct FlowExecutorConfig { pub mandatory_throttling_period: chrono::Duration, } -impl FlowExecutorConfig { +impl FlowAgentConfig { pub fn new( awaiting_step: chrono::Duration, mandatory_throttling_period: chrono::Duration, diff --git a/src/domain/flow-system/domain/src/executors/mod.rs b/src/domain/flow-system/domain/src/agents/mod.rs similarity index 88% rename from src/domain/flow-system/domain/src/executors/mod.rs rename to src/domain/flow-system/domain/src/agents/mod.rs index 54e3893c3..321d0000e 100644 --- a/src/domain/flow-system/domain/src/executors/mod.rs +++ b/src/domain/flow-system/domain/src/agents/mod.rs @@ -7,6 +7,6 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -mod flow_executor; +mod flow_agent; -pub use flow_executor::*; +pub use flow_agent::*; diff --git a/src/domain/flow-system/domain/src/entities/flow/flow_outcome.rs b/src/domain/flow-system/domain/src/entities/flow/flow_outcome.rs index 0601a692f..ca46c1b6f 100644 --- a/src/domain/flow-system/domain/src/entities/flow/flow_outcome.rs +++ b/src/domain/flow-system/domain/src/entities/flow/flow_outcome.rs @@ -142,7 +142,7 @@ impl From for FlowResult { } ts::TaskResult::ResetDatasetResult(task_reset_result) => { Self::DatasetReset(FlowResultDatasetReset { - new_head: task_reset_result.new_head, + new_head: task_reset_result.reset_result.new_head, }) } ts::TaskResult::CompactionDatasetResult(task_compaction_result) => { diff --git a/src/domain/flow-system/domain/src/flow_messages_types.rs b/src/domain/flow-system/domain/src/flow_messages_types.rs index 5b9506505..e88cadc99 100644 --- a/src/domain/flow-system/domain/src/flow_messages_types.rs +++ b/src/domain/flow-system/domain/src/flow_messages_types.rs @@ -15,7 +15,7 @@ use crate::{FlowConfigurationRule, FlowID, FlowKey, FlowOutcome}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -const FLOW_EXECUTOR_UPDATE_OUTBOX_VERSION: u32 = 1; +const FLOW_AGENT_UPDATE_OUTBOX_VERSION: u32 = 1; const FLOW_CONFIGURATION_UPDATE_OUTBOX_VERSION: u32 = 1; const FLOW_PROGRESS_OUTBOX_VERSION: u32 = 1; @@ -38,21 +38,21 @@ impl Message for FlowConfigurationUpdatedMessage { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct FlowExecutorUpdatedMessage { +pub struct FlowAgentUpdatedMessage { pub update_time: DateTime, - pub update_details: FlowExecutorUpdateDetails, + pub update_details: FlowAgentUpdateDetails, } -impl Message for FlowExecutorUpdatedMessage { +impl Message for FlowAgentUpdatedMessage { fn version() -> u32 { - FLOW_EXECUTOR_UPDATE_OUTBOX_VERSION + FLOW_AGENT_UPDATE_OUTBOX_VERSION } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[derive(Debug, Clone, Serialize, Deserialize)] -pub enum FlowExecutorUpdateDetails { +pub enum FlowAgentUpdateDetails { Loaded, ExecutedTimeslot, } diff --git a/src/domain/flow-system/domain/src/jobs/mod.rs b/src/domain/flow-system/domain/src/jobs/mod.rs index 2aaf127e0..16411d543 100644 --- a/src/domain/flow-system/domain/src/jobs/mod.rs +++ b/src/domain/flow-system/domain/src/jobs/mod.rs @@ -9,7 +9,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub const JOB_KAMU_FLOW_EXECUTOR_RECOVERY: &str = - "dev.kamu.domain.flow-system.FlowExecutorRecovery"; +pub const JOB_KAMU_FLOW_AGENT_RECOVERY: &str = "dev.kamu.domain.flow-system.FlowAgentRecovery"; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/domain/src/lib.rs b/src/domain/flow-system/domain/src/lib.rs index f5414ef37..b563adbcc 100644 --- a/src/domain/flow-system/domain/src/lib.rs +++ b/src/domain/flow-system/domain/src/lib.rs @@ -15,18 +15,18 @@ pub use event_sourcing::*; mod flow_messages_types; +mod agents; mod aggregates; mod dataset_flow_key; mod entities; -mod executors; mod jobs; mod repos; mod services; +pub use agents::*; pub use aggregates::*; pub use dataset_flow_key::*; pub use entities::*; -pub use executors::*; pub use flow_messages_types::*; pub use jobs::*; pub use repos::*; diff --git a/src/domain/flow-system/domain/src/services/flow/flow_service_test_driver.rs b/src/domain/flow-system/domain/src/services/flow/flow_service_test_driver.rs index 71a96bec1..8dc2d6e73 100644 --- a/src/domain/flow-system/domain/src/services/flow/flow_service_test_driver.rs +++ b/src/domain/flow-system/domain/src/services/flow/flow_service_test_driver.rs @@ -17,7 +17,7 @@ use crate::FlowID; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -pub trait FlowExecutorTestDriver: Sync + Send { +pub trait FlowAgentTestDriver: Sync + Send { /// Pretends it is time to schedule the given flow that was in Queued state async fn mimic_flow_scheduled( &self, diff --git a/src/domain/flow-system/services/Cargo.toml b/src/domain/flow-system/services/Cargo.toml index 5eb04093e..8384e7a30 100644 --- a/src/domain/flow-system/services/Cargo.toml +++ b/src/domain/flow-system/services/Cargo.toml @@ -48,6 +48,8 @@ tracing = { version = "0.1", default-features = false } kamu = { workspace = true, features = ["testing"] } kamu-accounts-inmem = { workspace = true } kamu-accounts-services = { workspace = true } +kamu-datasets-inmem = { workspace = true } +kamu-datasets-services = { workspace = true } kamu-flow-system-inmem = { workspace = true } kamu-task-system-inmem = { workspace = true } kamu-task-system-services = { workspace = true } diff --git a/src/domain/flow-system/services/src/dependencies.rs b/src/domain/flow-system/services/src/dependencies.rs index da47b5d8a..cfcb0a862 100644 --- a/src/domain/flow-system/services/src/dependencies.rs +++ b/src/domain/flow-system/services/src/dependencies.rs @@ -15,7 +15,7 @@ use crate::*; pub fn register_dependencies(catalog_builder: &mut CatalogBuilder) { catalog_builder.add::(); - catalog_builder.add::(); + catalog_builder.add::(); catalog_builder.add::(); catalog_builder.add::(); diff --git a/src/domain/flow-system/services/src/flow/flow_executor_impl.rs b/src/domain/flow-system/services/src/flow/flow_agent_impl.rs similarity index 93% rename from src/domain/flow-system/services/src/flow/flow_executor_impl.rs rename to src/domain/flow-system/services/src/flow/flow_agent_impl.rs index 347da3b74..42df2ecfb 100644 --- a/src/domain/flow-system/services/src/flow/flow_executor_impl.rs +++ b/src/domain/flow-system/services/src/flow/flow_agent_impl.rs @@ -35,55 +35,55 @@ use tracing::Instrument as _; use crate::{ FlowAbortHelper, FlowSchedulingHelper, - MESSAGE_CONSUMER_KAMU_FLOW_EXECUTOR, + MESSAGE_CONSUMER_KAMU_FLOW_AGENT, + MESSAGE_PRODUCER_KAMU_FLOW_AGENT, MESSAGE_PRODUCER_KAMU_FLOW_CONFIGURATION_SERVICE, - MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR, MESSAGE_PRODUCER_KAMU_FLOW_PROGRESS_SERVICE, }; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub struct FlowExecutorImpl { +pub struct FlowAgentImpl { catalog: Catalog, time_source: Arc, - executor_config: Arc, + agent_config: Arc, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[component(pub)] -#[interface(dyn FlowExecutor)] -#[interface(dyn FlowExecutorTestDriver)] +#[interface(dyn FlowAgent)] +#[interface(dyn FlowAgentTestDriver)] #[interface(dyn MessageConsumer)] #[interface(dyn MessageConsumerT)] #[interface(dyn MessageConsumerT)] #[interface(dyn MessageConsumerT)] #[meta(MessageConsumerMeta { - consumer_name: MESSAGE_CONSUMER_KAMU_FLOW_EXECUTOR, + consumer_name: MESSAGE_CONSUMER_KAMU_FLOW_AGENT, feeding_producers: &[ MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE, - MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR, + MESSAGE_PRODUCER_KAMU_TASK_AGENT, MESSAGE_PRODUCER_KAMU_FLOW_CONFIGURATION_SERVICE ], delivery: MessageDeliveryMechanism::Transactional, })] #[interface(dyn InitOnStartup)] #[meta(InitOnStartupMeta { - job_name: JOB_KAMU_FLOW_EXECUTOR_RECOVERY, + job_name: JOB_KAMU_FLOW_AGENT_RECOVERY, depends_on: &[], requires_transaction: false, })] #[scope(Singleton)] -impl FlowExecutorImpl { +impl FlowAgentImpl { pub fn new( catalog: Catalog, time_source: Arc, - executor_config: Arc, + agent_config: Arc, ) -> Self { Self { catalog, time_source, - executor_config, + agent_config, } } @@ -106,10 +106,10 @@ impl FlowExecutorImpl { let outbox = transaction_catalog.get_one::().unwrap(); outbox .post_message( - MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR, - FlowExecutorUpdatedMessage { + MESSAGE_PRODUCER_KAMU_FLOW_AGENT, + FlowAgentUpdatedMessage { update_time: start_time, - update_details: FlowExecutorUpdateDetails::Loaded, + update_details: FlowAgentUpdateDetails::Loaded, }, ) .await?; @@ -253,9 +253,7 @@ impl FlowExecutorImpl { flow_event_store, transaction_catalog, ) - .instrument(observability::tracing::root_span!( - "FlowExecutor::activation" - )) + .instrument(observability::tracing::root_span!("FlowAgent::activation")) .await } @@ -311,10 +309,10 @@ impl FlowExecutorImpl { let outbox = transaction_catalog.get_one::().unwrap(); outbox .post_message( - MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR, - FlowExecutorUpdatedMessage { + MESSAGE_PRODUCER_KAMU_FLOW_AGENT, + FlowAgentUpdatedMessage { update_time: activation_moment, - update_details: FlowExecutorUpdateDetails::ExecutedTimeslot, + update_details: FlowAgentUpdateDetails::ExecutedTimeslot, }, ) .await?; @@ -446,18 +444,18 @@ impl FlowExecutorImpl { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl FlowExecutor for FlowExecutorImpl { +impl FlowAgent for FlowAgentImpl { /// Runs the update main loop async fn run(&self) -> Result<(), InternalError> { // Main scanning loop loop { // Run scheduling for current time slot self.tick_current_timeslot() - .instrument(tracing::debug_span!("FlowExecutor::tick")) + .instrument(tracing::debug_span!("FlowAgent::tick")) .await?; self.time_source - .sleep(self.executor_config.awaiting_step) + .sleep(self.agent_config.awaiting_step) .await; } } @@ -466,9 +464,9 @@ impl FlowExecutor for FlowExecutorImpl { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl InitOnStartup for FlowExecutorImpl { +impl InitOnStartup for FlowAgentImpl { async fn run_initialization(&self) -> Result<(), InternalError> { - let start_time = self.executor_config.round_time(self.time_source.now())?; + let start_time = self.agent_config.round_time(self.time_source.now())?; self.recover_initial_flows_state(start_time).await } } @@ -476,7 +474,7 @@ impl InitOnStartup for FlowExecutorImpl { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl FlowExecutorTestDriver for FlowExecutorImpl { +impl FlowAgentTestDriver for FlowAgentImpl { /// Pretends it is time to schedule the given flow that was not waiting for /// anything else async fn mimic_flow_scheduled( @@ -501,17 +499,13 @@ impl FlowExecutorTestDriver for FlowExecutorImpl { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -impl MessageConsumer for FlowExecutorImpl {} +impl MessageConsumer for FlowAgentImpl {} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl MessageConsumerT for FlowExecutorImpl { - #[tracing::instrument( - level = "debug", - skip_all, - name = "FlowExecutorImpl[TaskProgressMessage]" - )] +impl MessageConsumerT for FlowAgentImpl { + #[tracing::instrument(level = "debug", skip_all, name = "FlowAgentImpl[TaskProgressMessage]")] async fn consume_message( &self, target_catalog: &Catalog, @@ -570,7 +564,7 @@ impl MessageConsumerT for FlowExecutorImpl { let scheduling_helper = target_catalog.get_one::().unwrap(); - let finish_time = self.executor_config.round_time(message.event_time)?; + let finish_time = self.agent_config.round_time(message.event_time)?; // In case of success: // - execute followup method @@ -633,11 +627,11 @@ impl MessageConsumerT for FlowExecutorImpl { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl MessageConsumerT for FlowExecutorImpl { +impl MessageConsumerT for FlowAgentImpl { #[tracing::instrument( level = "debug", skip_all, - name = "FlowExecutorImpl[FlowConfigurationUpdatedMessage]" + name = "FlowAgentImpl[FlowConfigurationUpdatedMessage]" )] async fn consume_message( &self, @@ -662,7 +656,7 @@ impl MessageConsumerT for FlowExecutorImpl { let scheduling_helper = target_catalog.get_one::().unwrap(); scheduling_helper .activate_flow_configuration( - self.executor_config.round_time(message.event_time)?, + self.agent_config.round_time(message.event_time)?, message.flow_key.clone(), message.rule.clone(), ) @@ -676,11 +670,11 @@ impl MessageConsumerT for FlowExecutorImpl { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl MessageConsumerT for FlowExecutorImpl { +impl MessageConsumerT for FlowAgentImpl { #[tracing::instrument( level = "debug", skip_all, - name = "FlowExecutorImpl[DatasetLifecycleMessage]" + name = "FlowAgentImpl[DatasetLifecycleMessage]" )] async fn consume_message( &self, diff --git a/src/domain/flow-system/services/src/flow/flow_query_service_impl.rs b/src/domain/flow-system/services/src/flow/flow_query_service_impl.rs index 20b021966..7e25ce58c 100644 --- a/src/domain/flow-system/services/src/flow/flow_query_service_impl.rs +++ b/src/domain/flow-system/services/src/flow/flow_query_service_impl.rs @@ -28,7 +28,7 @@ pub struct FlowQueryServiceImpl { catalog: Catalog, flow_event_store: Arc, dataset_ownership_service: Arc, - executor_config: Arc, + agent_config: Arc, } #[component(pub)] @@ -38,13 +38,13 @@ impl FlowQueryServiceImpl { catalog: Catalog, flow_event_store: Arc, dataset_ownership_service: Arc, - executor_config: Arc, + agent_config: Arc, ) -> Self { Self { catalog, flow_event_store, dataset_ownership_service, - executor_config, + agent_config, } } } @@ -283,7 +283,7 @@ impl FlowQueryService for FlowQueryServiceImpl { initiator_account_id: AccountID, config_snapshot_maybe: Option, ) -> Result { - let activation_time = self.executor_config.round_time(trigger_time)?; + let activation_time = self.agent_config.round_time(trigger_time)?; let scheduling_helper = self.catalog.get_one::().unwrap(); scheduling_helper diff --git a/src/domain/flow-system/services/src/flow/flow_scheduling_helper.rs b/src/domain/flow-system/services/src/flow/flow_scheduling_helper.rs index eabd42833..001113c7b 100644 --- a/src/domain/flow-system/services/src/flow/flow_scheduling_helper.rs +++ b/src/domain/flow-system/services/src/flow/flow_scheduling_helper.rs @@ -30,7 +30,7 @@ pub(crate) struct FlowSchedulingHelper { dependency_graph_service: Arc, dataset_ownership_service: Arc, time_source: Arc, - executor_config: Arc, + agent_config: Arc, } #[component(pub)] @@ -43,7 +43,7 @@ impl FlowSchedulingHelper { dependency_graph_service: Arc, dataset_ownership_service: Arc, time_source: Arc, - executor_config: Arc, + agent_config: Arc, ) -> Self { Self { flow_event_store, @@ -53,7 +53,7 @@ impl FlowSchedulingHelper { dependency_graph_service, dataset_ownership_service, time_source, - executor_config, + agent_config, } } @@ -346,7 +346,7 @@ impl FlowSchedulingHelper { let trigger_time = trigger.trigger_time(); let mut throttling_boundary_time = flow_run_stats.last_attempt_time.map_or(trigger_time, |t| { - t + self.executor_config.mandatory_throttling_period + t + self.agent_config.mandatory_throttling_period }); // It's also possible we are waiting for some start condition much longer.. if throttling_boundary_time < trigger_time { @@ -630,7 +630,7 @@ impl FlowSchedulingHelper { flow.set_relevant_start_condition( self.time_source.now(), FlowStartCondition::Throttling(FlowStartConditionThrottling { - interval: self.executor_config.mandatory_throttling_period, + interval: self.agent_config.mandatory_throttling_period, wake_up_at, shifted_from, }), diff --git a/src/domain/flow-system/services/src/flow/mod.rs b/src/domain/flow-system/services/src/flow/mod.rs index adff1a19c..098471329 100644 --- a/src/domain/flow-system/services/src/flow/mod.rs +++ b/src/domain/flow-system/services/src/flow/mod.rs @@ -8,11 +8,11 @@ // by the Apache License, Version 2.0. mod flow_abort_helper; -mod flow_executor_impl; +mod flow_agent_impl; mod flow_query_service_impl; mod flow_scheduling_helper; pub(crate) use flow_abort_helper::*; -pub use flow_executor_impl::*; +pub use flow_agent_impl::*; pub use flow_query_service_impl::*; pub(crate) use flow_scheduling_helper::*; diff --git a/src/domain/flow-system/services/src/messages/flow_message_consumers.rs b/src/domain/flow-system/services/src/messages/flow_message_consumers.rs index 64eb11480..3112be9a4 100644 --- a/src/domain/flow-system/services/src/messages/flow_message_consumers.rs +++ b/src/domain/flow-system/services/src/messages/flow_message_consumers.rs @@ -12,6 +12,6 @@ pub const MESSAGE_CONSUMER_KAMU_FLOW_CONFIGURATION_SERVICE: &str = "dev.kamu.domain.flow-system.FlowConfigurationService"; -pub const MESSAGE_CONSUMER_KAMU_FLOW_EXECUTOR: &str = "dev.kamu.domain.flow-system.FlowExecutor"; +pub const MESSAGE_CONSUMER_KAMU_FLOW_AGENT: &str = "dev.kamu.domain.flow-system.FlowAgent"; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/services/src/messages/flow_message_producers.rs b/src/domain/flow-system/services/src/messages/flow_message_producers.rs index d816233a2..84f15cb17 100644 --- a/src/domain/flow-system/services/src/messages/flow_message_producers.rs +++ b/src/domain/flow-system/services/src/messages/flow_message_producers.rs @@ -12,7 +12,7 @@ pub const MESSAGE_PRODUCER_KAMU_FLOW_CONFIGURATION_SERVICE: &str = "dev.kamu.domain.flow-system.FlowConfigurationService"; -pub const MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR: &str = "dev.kamu.domain.flow-system.FlowExecutor"; +pub const MESSAGE_PRODUCER_KAMU_FLOW_AGENT: &str = "dev.kamu.domain.flow-system.FlowAgent"; pub const MESSAGE_PRODUCER_KAMU_FLOW_PROGRESS_SERVICE: &str = "dev.kamu.domain.flow-system.FlowProgressService"; diff --git a/src/domain/flow-system/services/tests/tests/mod.rs b/src/domain/flow-system/services/tests/tests/mod.rs index c3a4c5e76..8906add01 100644 --- a/src/domain/flow-system/services/tests/tests/mod.rs +++ b/src/domain/flow-system/services/tests/tests/mod.rs @@ -7,8 +7,8 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +mod test_flow_agent_impl; mod test_flow_configuration_service_impl; -mod test_flow_executor_impl; mod utils; pub(crate) use utils::*; diff --git a/src/domain/flow-system/services/tests/tests/test_flow_executor_impl.rs b/src/domain/flow-system/services/tests/tests/test_flow_agent_impl.rs similarity index 99% rename from src/domain/flow-system/services/tests/tests/test_flow_executor_impl.rs rename to src/domain/flow-system/services/tests/tests/test_flow_agent_impl.rs index d71df5f46..544defc29 100644 --- a/src/domain/flow-system/services/tests/tests/test_flow_executor_impl.rs +++ b/src/domain/flow-system/services/tests/tests/test_flow_agent_impl.rs @@ -58,7 +58,7 @@ async fn test_read_initial_config_and_queue_without_waiting() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -233,7 +233,7 @@ async fn test_read_initial_config_shouldnt_queue_in_recovery_case() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -322,7 +322,7 @@ async fn test_cron_config() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -452,7 +452,7 @@ async fn test_manual_trigger() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -663,7 +663,7 @@ async fn test_ingest_trigger_with_ingest_config() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -864,7 +864,7 @@ async fn test_manual_trigger_compaction() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -1025,7 +1025,7 @@ async fn test_manual_trigger_reset() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -1035,7 +1035,11 @@ async fn test_manual_trigger_reset() { task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(create_dataset_result.dataset_handle.id.clone()), run_since_start: Duration::milliseconds(20), - finish_in_with: Some((Duration::milliseconds(90), TaskOutcome::Success(TaskResult::ResetDatasetResult(TaskResetDatasetResult { new_head: Multihash::from_digest_sha3_256(b"new-head") })))), + finish_in_with: Some((Duration::milliseconds(90), TaskOutcome::Success( + TaskResult::ResetDatasetResult(TaskResetDatasetResult { + reset_result: ResetResult { new_head: Multihash::from_digest_sha3_256(b"new-head") }, + }) + ))), expected_logical_plan: LogicalPlan::ResetDataset(LogicalPlanResetDataset { dataset_id: create_dataset_result.dataset_handle.id.clone(), // By deafult should reset to seed block @@ -1160,7 +1164,7 @@ async fn test_reset_trigger_keep_metadata_compaction_for_derivatives() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -1177,7 +1181,11 @@ async fn test_reset_trigger_keep_metadata_compaction_for_derivatives() { task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "0")]), dataset_id: Some(create_foo_result.dataset_handle.id.clone()), run_since_start: Duration::milliseconds(20), - finish_in_with: Some((Duration::milliseconds(70), TaskOutcome::Success(TaskResult::ResetDatasetResult(TaskResetDatasetResult { new_head: Multihash::from_digest_sha3_256(b"new-head") })))), + finish_in_with: Some((Duration::milliseconds(70), TaskOutcome::Success( + TaskResult::ResetDatasetResult(TaskResetDatasetResult { + reset_result: ResetResult { new_head: Multihash::from_digest_sha3_256(b"new-head") } + }) + ))), expected_logical_plan: LogicalPlan::ResetDataset(LogicalPlanResetDataset { dataset_id: create_foo_result.dataset_handle.id.clone(), new_head_hash: Some(dataset_blocks[1].0.clone()), @@ -1358,7 +1366,7 @@ async fn test_manual_trigger_compaction_with_config() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -1479,7 +1487,7 @@ async fn test_full_hard_compaction_trigger_keep_metadata_compaction_for_derivati // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -1707,7 +1715,7 @@ async fn test_manual_trigger_keep_metadata_only_with_recursive_compaction() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -1937,7 +1945,7 @@ async fn test_manual_trigger_keep_metadata_only_without_recursive_compaction() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -2072,7 +2080,7 @@ async fn test_manual_trigger_keep_metadata_only_compaction_multiple_accounts() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -2241,7 +2249,7 @@ async fn test_dataset_flow_configuration_paused_resumed_modified() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -2473,7 +2481,7 @@ async fn test_respect_last_success_time_when_schedule_resumes() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -2694,7 +2702,7 @@ async fn test_dataset_deleted() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -2883,7 +2891,7 @@ async fn test_task_completions_trigger_next_loop_on_success() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -3108,7 +3116,7 @@ async fn test_derived_dataset_triggered_initially_and_after_input_change() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -3336,7 +3344,7 @@ async fn test_throttling_manual_triggers() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -3516,7 +3524,7 @@ async fn test_throttling_derived_dataset_with_2_parents() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -3983,7 +3991,7 @@ async fn test_batching_condition_records_reached() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -4306,7 +4314,7 @@ async fn test_batching_condition_timeout() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -4580,7 +4588,7 @@ async fn test_batching_condition_watermark() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -4924,7 +4932,7 @@ async fn test_batching_condition_with_2_inputs() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -5388,7 +5396,7 @@ async fn test_list_all_flow_initiators() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -5548,7 +5556,7 @@ async fn test_list_all_datasets_with_flow() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -5696,7 +5704,7 @@ async fn test_abort_flow_before_scheduling_tasks() { // Run scheduler concurrently with manual aborts script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -5792,7 +5800,7 @@ async fn test_abort_flow_after_scheduling_still_waiting_for_executor() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -5893,7 +5901,7 @@ async fn test_abort_flow_after_task_running_has_started() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -5983,7 +5991,7 @@ async fn test_abort_flow_after_task_finishes() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { @@ -6135,7 +6143,7 @@ async fn test_respect_last_success_time_when_activate_configuration() { // Run scheduler concurrently with manual triggers script tokio::select! { // Run API service - res = harness.flow_executor.run() => res.int_err(), + res = harness.flow_agent.run() => res.int_err(), // Run simulation script and task drivers _ = async { diff --git a/src/domain/flow-system/services/tests/tests/test_flow_configuration_service_impl.rs b/src/domain/flow-system/services/tests/tests/test_flow_configuration_service_impl.rs index 068aac9f4..314edbea7 100644 --- a/src/domain/flow-system/services/tests/tests/test_flow_configuration_service_impl.rs +++ b/src/domain/flow-system/services/tests/tests/test_flow_configuration_service_impl.rs @@ -19,6 +19,8 @@ use kamu::testing::MetadataFactory; use kamu::*; use kamu_accounts::CurrentAccountSubject; use kamu_core::*; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; use kamu_flow_system::*; use kamu_flow_system_inmem::*; use kamu_flow_system_services::*; @@ -426,7 +428,6 @@ async fn test_dataset_deleted() { struct FlowConfigurationHarness { _tmp_dir: tempfile::TempDir, catalog: Catalog, - dataset_repo: Arc, flow_configuration_service: Arc, flow_configuration_event_store: Arc, config_listener: Arc, @@ -457,7 +458,8 @@ impl FlowConfigurationHarness { .add::() .add_value(CurrentAccountSubject::new_test()) .add::() - .add::() + .add::() + .add::() .add::() .add::(); @@ -479,7 +481,6 @@ impl FlowConfigurationHarness { let flow_configuration_event_store = catalog .get_one::() .unwrap(); - let dataset_repo = catalog.get_one::().unwrap(); let flow_config_events_listener = catalog.get_one::().unwrap(); Self { @@ -487,7 +488,6 @@ impl FlowConfigurationHarness { catalog, flow_configuration_service, flow_configuration_event_store, - dataset_repo, config_listener: flow_config_events_listener, } } @@ -663,19 +663,6 @@ impl FlowConfigurationHarness { } async fn delete_dataset(&self, dataset_id: &DatasetID) { - // Eagerly push dependency graph initialization before deletes. - // It's ignored, if requested 2nd time - let dependency_graph_service = self - .catalog - .get_one::() - .unwrap(); - let dependency_graph_repository = - DependencyGraphRepositoryInMemory::new(self.dataset_repo.clone()); - dependency_graph_service - .eager_initialization(&dependency_graph_repository) - .await - .unwrap(); - // Do the actual deletion let delete_dataset = self.catalog.get_one::().unwrap(); delete_dataset diff --git a/src/domain/flow-system/services/tests/tests/utils/flow_harness_shared.rs b/src/domain/flow-system/services/tests/tests/utils/flow_harness_shared.rs index f70eaadf7..589f37e17 100644 --- a/src/domain/flow-system/services/tests/tests/utils/flow_harness_shared.rs +++ b/src/domain/flow-system/services/tests/tests/utils/flow_harness_shared.rs @@ -29,10 +29,12 @@ use kamu_accounts_services::{ PredefinedAccountsRegistrator, }; use kamu_core::*; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; use kamu_flow_system::*; use kamu_flow_system_inmem::*; use kamu_flow_system_services::*; -use kamu_task_system::{TaskProgressMessage, MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR}; +use kamu_task_system::{TaskProgressMessage, MESSAGE_PRODUCER_KAMU_TASK_AGENT}; use kamu_task_system_inmem::InMemoryTaskEventStore; use kamu_task_system_services::TaskSchedulerImpl; use messaging_outbox::{register_message_dispatcher, Outbox, OutboxImmediateImpl}; @@ -60,10 +62,9 @@ pub(crate) const SCHEDULING_MANDATORY_THROTTLING_PERIOD_MS: i64 = SCHEDULING_ALI pub(crate) struct FlowHarness { _tmp_dir: tempfile::TempDir, pub catalog: dill::Catalog, - pub dataset_repo: Arc, pub flow_configuration_service: Arc, pub flow_configuration_event_store: Arc, - pub flow_executor: Arc, + pub flow_agent: Arc, pub flow_query_service: Arc, pub flow_event_store: Arc, pub auth_svc: Arc, @@ -142,7 +143,7 @@ impl FlowHarness { ) .bind::() .add::() - .add_value(FlowExecutorConfig::new( + .add_value(FlowAgentConfig::new( awaiting_step, mandatory_throttling_period, )) @@ -165,7 +166,8 @@ impl FlowHarness { .add_value(JwtAuthenticationConfig::default()) .add::() .add::() - .add::() + .add::() + .add::() .add::() .add::() .add::() @@ -180,15 +182,15 @@ impl FlowHarness { ); register_message_dispatcher::( &mut b, - MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR, + MESSAGE_PRODUCER_KAMU_TASK_AGENT, ); register_message_dispatcher::( &mut b, MESSAGE_PRODUCER_KAMU_FLOW_CONFIGURATION_SERVICE, ); - register_message_dispatcher::( + register_message_dispatcher::( &mut b, - MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR, + MESSAGE_PRODUCER_KAMU_FLOW_AGENT, ); register_message_dispatcher::( &mut b, @@ -198,25 +200,23 @@ impl FlowHarness { b.build() }; - let flow_executor = catalog.get_one::().unwrap(); + let flow_agent = catalog.get_one::().unwrap(); let flow_query_service = catalog.get_one::().unwrap(); let flow_configuration_service = catalog.get_one::().unwrap(); let flow_configuration_event_store = catalog .get_one::() .unwrap(); let flow_event_store = catalog.get_one::().unwrap(); - let dataset_repo = catalog.get_one::().unwrap(); let auth_svc = catalog.get_one::().unwrap(); Self { _tmp_dir: tmp_dir, catalog, - flow_executor, + flow_agent, flow_query_service, flow_configuration_service, flow_configuration_event_store, flow_event_store, - dataset_repo, fake_system_time_source, auth_svc, } @@ -271,8 +271,6 @@ impl FlowHarness { } pub async fn eager_initialization(&self) { - self.initialize_dependency_graph().await; - use init_on_startup::InitOnStartup; let dataset_ownership_initializer = self .catalog @@ -283,28 +281,10 @@ impl FlowHarness { .await .unwrap(); - self.flow_executor.run_initialization().await.unwrap(); - } - - pub async fn initialize_dependency_graph(&self) { - let dependency_graph_service = self - .catalog - .get_one::() - .unwrap(); - let dependency_graph_repository = - DependencyGraphRepositoryInMemory::new(self.dataset_repo.clone()); - - dependency_graph_service - .eager_initialization(&dependency_graph_repository) - .await - .unwrap(); + self.flow_agent.run_initialization().await.unwrap(); } pub async fn delete_dataset(&self, dataset_id: &DatasetID) { - // Eagerly push dependency graph initialization before deletes. - // It's ignored, if requested 2nd time - self.initialize_dependency_graph().await; - // Do the actual deletion let delete_dataset = self.catalog.get_one::().unwrap(); delete_dataset diff --git a/src/domain/flow-system/services/tests/tests/utils/flow_system_test_listener.rs b/src/domain/flow-system/services/tests/tests/utils/flow_system_test_listener.rs index 50be32a16..9ba6fdc94 100644 --- a/src/domain/flow-system/services/tests/tests/utils/flow_system_test_listener.rs +++ b/src/domain/flow-system/services/tests/tests/utils/flow_system_test_listener.rs @@ -16,7 +16,7 @@ use dill::*; use internal_error::InternalError; use kamu_flow_system::*; use kamu_flow_system_services::{ - MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR, + MESSAGE_PRODUCER_KAMU_FLOW_AGENT, MESSAGE_PRODUCER_KAMU_FLOW_PROGRESS_SERVICE, }; use messaging_outbox::{ @@ -47,11 +47,11 @@ struct FlowSystemTestListenerState { #[component(pub)] #[scope(Singleton)] #[interface(dyn MessageConsumer)] -#[interface(dyn MessageConsumerT)] +#[interface(dyn MessageConsumerT)] #[interface(dyn MessageConsumerT)] #[meta(MessageConsumerMeta { consumer_name: "FlowSystemTestListener", - feeding_producers: &[MESSAGE_PRODUCER_KAMU_FLOW_EXECUTOR, MESSAGE_PRODUCER_KAMU_FLOW_PROGRESS_SERVICE], + feeding_producers: &[MESSAGE_PRODUCER_KAMU_FLOW_AGENT, MESSAGE_PRODUCER_KAMU_FLOW_PROGRESS_SERVICE], delivery: MessageDeliveryMechanism::Immediate, })] impl FlowSystemTestListener { @@ -240,11 +240,11 @@ impl std::fmt::Display for FlowSystemTestListener { impl MessageConsumer for FlowSystemTestListener {} #[async_trait::async_trait] -impl MessageConsumerT for FlowSystemTestListener { +impl MessageConsumerT for FlowSystemTestListener { async fn consume_message( &self, _: &Catalog, - message: &FlowExecutorUpdatedMessage, + message: &FlowAgentUpdatedMessage, ) -> Result<(), InternalError> { self.make_a_snapshot(message.update_time).await; Ok(()) diff --git a/src/domain/flow-system/services/tests/tests/utils/task_driver.rs b/src/domain/flow-system/services/tests/tests/utils/task_driver.rs index e648a2192..fbcd290ac 100644 --- a/src/domain/flow-system/services/tests/tests/utils/task_driver.rs +++ b/src/domain/flow-system/services/tests/tests/utils/task_driver.rs @@ -63,7 +63,7 @@ impl TaskDriver { // with assumed immediate delivery self.outbox .post_message( - MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR, + MESSAGE_PRODUCER_KAMU_TASK_AGENT, TaskProgressMessage::running( start_time + self.args.run_since_start, self.args.task_id, @@ -80,7 +80,7 @@ impl TaskDriver { // with assummed immediate delivery self.outbox .post_message( - MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR, + MESSAGE_PRODUCER_KAMU_TASK_AGENT, TaskProgressMessage::finished( start_time + self.args.run_since_start + finish_in, self.args.task_id, diff --git a/src/domain/opendatafabric/Cargo.toml b/src/domain/opendatafabric/Cargo.toml index a4cf9f999..c89beeb6a 100644 --- a/src/domain/opendatafabric/Cargo.toml +++ b/src/domain/opendatafabric/Cargo.toml @@ -38,7 +38,7 @@ multiformats = { workspace = true } chrono = { version = "0.4", features = ["serde"] } digest = "0.10" -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false, features = ["std"] } bitflags = { version = "2", default-features = false } diff --git a/src/domain/task-system/domain/Cargo.toml b/src/domain/task-system/domain/Cargo.toml index 65ed0049e..a53f8807d 100644 --- a/src/domain/task-system/domain/Cargo.toml +++ b/src/domain/task-system/domain/Cargo.toml @@ -35,7 +35,7 @@ async-trait = { version = "0.1", default-features = false } chrono = { version = "0.4", default-features = false } serde = { version = "1", features = ["derive"] } sqlx = { version = "0.8", default-features = false, features = ["macros"] } -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false, features = ["std"] } tokio-stream = { version = "0.1", default-features = false } diff --git a/src/domain/task-system/domain/src/entities/task_status.rs b/src/domain/task-system/domain/src/entities/task_status.rs index be48897e5..5a68b0d19 100644 --- a/src/domain/task-system/domain/src/entities/task_status.rs +++ b/src/domain/task-system/domain/src/entities/task_status.rs @@ -7,8 +7,8 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use kamu_core::{CompactionResult, PullResult}; -use opendatafabric::{DatasetID, Multihash}; +use kamu_core::{CompactionResult, PullResult, ResetResult}; +use opendatafabric as odf; use serde::{Deserialize, Serialize}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -61,7 +61,7 @@ pub struct TaskUpdateDatasetResult { #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct TaskResetDatasetResult { - pub new_head: Multihash, + pub reset_result: ResetResult, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -93,7 +93,7 @@ pub enum UpdateDatasetTaskError { #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct InputDatasetCompactedError { - pub dataset_id: DatasetID, + pub dataset_id: odf::DatasetID, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] diff --git a/src/domain/task-system/domain/src/jobs/mod.rs b/src/domain/task-system/domain/src/jobs/mod.rs index 2579431be..f3b6e0253 100644 --- a/src/domain/task-system/domain/src/jobs/mod.rs +++ b/src/domain/task-system/domain/src/jobs/mod.rs @@ -9,7 +9,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub const JOB_KAMU_TASKS_EXECUTOR_RECOVERY: &str = - "dev.kamu.domain.task-system.TaskExecutorRecovery"; +pub const JOB_KAMU_TASKS_AGENT_RECOVERY: &str = "dev.kamu.domain.task-system.TaskAgentRecovery"; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/domain/src/messages/task_message_producers.rs b/src/domain/task-system/domain/src/messages/task_message_producers.rs index 88c2888b9..a4b9a6055 100644 --- a/src/domain/task-system/domain/src/messages/task_message_producers.rs +++ b/src/domain/task-system/domain/src/messages/task_message_producers.rs @@ -9,6 +9,6 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub const MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR: &str = "dev.kamu.domain.task-system.TaskExecutor"; +pub const MESSAGE_PRODUCER_KAMU_TASK_AGENT: &str = "dev.kamu.domain.task-system.TaskAgent"; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/domain/src/services/mod.rs b/src/domain/task-system/domain/src/services/mod.rs index 58ed9cb87..8b03d8dbb 100644 --- a/src/domain/task-system/domain/src/services/mod.rs +++ b/src/domain/task-system/domain/src/services/mod.rs @@ -7,12 +7,12 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +mod task_agent; mod task_definition_planner; -mod task_executor; mod task_runner; mod task_scheduler; +pub use task_agent::*; pub use task_definition_planner::*; -pub use task_executor::*; pub use task_runner::*; pub use task_scheduler::*; diff --git a/src/domain/task-system/domain/src/services/task_executor.rs b/src/domain/task-system/domain/src/services/task_agent.rs similarity index 91% rename from src/domain/task-system/domain/src/services/task_executor.rs rename to src/domain/task-system/domain/src/services/task_agent.rs index 989c9cb2b..36f2075c9 100644 --- a/src/domain/task-system/domain/src/services/task_executor.rs +++ b/src/domain/task-system/domain/src/services/task_agent.rs @@ -12,8 +12,8 @@ use crate::*; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -pub trait TaskExecutor: Sync + Send { - /// Runs the executor main loop +pub trait TaskAgent: Sync + Send { + /// Runs the agent main loop async fn run(&self) -> Result<(), InternalError>; /// Runs single task only, blocks until it is available (for tests only!) diff --git a/src/domain/task-system/domain/src/services/task_definition_planner.rs b/src/domain/task-system/domain/src/services/task_definition_planner.rs index 6354a5333..6e931a41f 100644 --- a/src/domain/task-system/domain/src/services/task_definition_planner.rs +++ b/src/domain/task-system/domain/src/services/task_definition_planner.rs @@ -8,8 +8,7 @@ // by the Apache License, Version 2.0. use internal_error::InternalError; -use kamu_core::{CompactionOptions, PullOptions, PullPlanIterationJob, ResolvedDataset}; -use opendatafabric::Multihash; +use kamu_core::{CompactionPlan, PullOptions, PullPlanIterationJob, ResetPlan, ResolvedDataset}; use crate::{LogicalPlan, LogicalPlanProbe}; @@ -53,8 +52,7 @@ pub struct TaskDefinitionUpdate { #[derive(Debug)] pub struct TaskDefinitionReset { pub target: ResolvedDataset, - pub new_head_hash: Option, - pub old_head_hash: Option, + pub reset_plan: ResetPlan, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -62,7 +60,7 @@ pub struct TaskDefinitionReset { #[derive(Debug)] pub struct TaskDefinitionHardCompact { pub target: ResolvedDataset, - pub compaction_options: CompactionOptions, + pub compaction_plan: CompactionPlan, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/task-system/services/src/dependencies.rs b/src/domain/task-system/services/src/dependencies.rs index 7edf1c354..b59801853 100644 --- a/src/domain/task-system/services/src/dependencies.rs +++ b/src/domain/task-system/services/src/dependencies.rs @@ -14,7 +14,7 @@ use crate::*; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub fn register_dependencies(catalog_builder: &mut CatalogBuilder) { - catalog_builder.add::(); + catalog_builder.add::(); catalog_builder.add::(); catalog_builder.add::(); catalog_builder.add::(); diff --git a/src/domain/task-system/services/src/lib.rs b/src/domain/task-system/services/src/lib.rs index 2b53b3bcc..05ff4e0cb 100644 --- a/src/domain/task-system/services/src/lib.rs +++ b/src/domain/task-system/services/src/lib.rs @@ -11,13 +11,13 @@ pub use kamu_task_system as domain; mod dependencies; +mod task_agent_impl; mod task_definition_planner_impl; -mod task_executor_impl; mod task_runner_impl; mod task_scheduler_impl; pub use dependencies::*; +pub use task_agent_impl::*; pub use task_definition_planner_impl::*; -pub use task_executor_impl::*; pub use task_runner_impl::*; pub use task_scheduler_impl::*; diff --git a/src/domain/task-system/services/src/task_executor_impl.rs b/src/domain/task-system/services/src/task_agent_impl.rs similarity index 95% rename from src/domain/task-system/services/src/task_executor_impl.rs rename to src/domain/task-system/services/src/task_agent_impl.rs index 48c626ca5..13c09dae8 100644 --- a/src/domain/task-system/services/src/task_executor_impl.rs +++ b/src/domain/task-system/services/src/task_agent_impl.rs @@ -20,7 +20,7 @@ use tracing::Instrument as _; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub struct TaskExecutorImpl { +pub struct TaskAgentImpl { catalog: Catalog, task_runner: Arc, time_source: Arc, @@ -29,15 +29,15 @@ pub struct TaskExecutorImpl { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[component(pub)] -#[interface(dyn TaskExecutor)] +#[interface(dyn TaskAgent)] #[interface(dyn InitOnStartup)] #[meta(InitOnStartupMeta { - job_name: JOB_KAMU_TASKS_EXECUTOR_RECOVERY, + job_name: JOB_KAMU_TASKS_AGENT_RECOVERY, depends_on: &[], requires_transaction: false, })] #[scope(Singleton)] -impl TaskExecutorImpl { +impl TaskAgentImpl { pub fn new( catalog: Catalog, task_runner: Arc, @@ -56,7 +56,7 @@ impl TaskExecutorImpl { let task_outcome = self .run_task(&task) .instrument(observability::tracing::root_span!( - "TaskExecutor::run_task", + "TaskAgent::run_task", task_id = %task.task_id, )) .await?; @@ -127,7 +127,7 @@ impl TaskExecutorImpl { outbox .post_message( - MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR, + MESSAGE_PRODUCER_KAMU_TASK_AGENT, TaskProgressMessage::running( self.time_source.now(), task.task_id, @@ -199,7 +199,7 @@ impl TaskExecutorImpl { outbox .post_message( - MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR, + MESSAGE_PRODUCER_KAMU_TASK_AGENT, TaskProgressMessage::finished( self.time_source.now(), task.task_id, @@ -216,7 +216,7 @@ impl TaskExecutorImpl { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl TaskExecutor for TaskExecutorImpl { +impl TaskAgent for TaskAgentImpl { // TODO: Error and panic handling strategy async fn run(&self) -> Result<(), InternalError> { loop { @@ -234,7 +234,7 @@ impl TaskExecutor for TaskExecutorImpl { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl InitOnStartup for TaskExecutorImpl { +impl InitOnStartup for TaskAgentImpl { async fn run_initialization(&self) -> Result<(), InternalError> { self.recover_running_tasks().await } diff --git a/src/domain/task-system/services/src/task_definition_planner_impl.rs b/src/domain/task-system/services/src/task_definition_planner_impl.rs index 641fa6f97..6eab0210c 100644 --- a/src/domain/task-system/services/src/task_definition_planner_impl.rs +++ b/src/domain/task-system/services/src/task_definition_planner_impl.rs @@ -22,6 +22,8 @@ pub struct TaskDefinitionPlannerImpl { dataset_registry: Arc, dataset_env_vars_svc: Arc, pull_request_planner: Arc, + compaction_planner: Arc, + reset_planner: Arc, tenancy_config: Arc, } @@ -34,12 +36,16 @@ impl TaskDefinitionPlannerImpl { dataset_registry: Arc, dataset_env_vars_svc: Arc, pull_request_planner: Arc, + compaction_planner: Arc, + reset_planner: Arc, tenancy_config: Arc, ) -> Self { Self { dataset_registry, dataset_env_vars_svc, pull_request_planner, + compaction_planner, + reset_planner, tenancy_config, } } @@ -115,10 +121,19 @@ impl TaskDefinitionPlannerImpl { .await .int_err()?; + let reset_plan = self + .reset_planner + .plan_reset( + target.clone(), + args.new_head_hash.as_ref(), + args.old_head_hash.as_ref(), + ) + .await + .int_err()?; + Ok(TaskDefinition::Reset(TaskDefinitionReset { target, - new_head_hash: args.new_head_hash.clone(), - old_head_hash: args.old_head_hash.clone(), + reset_plan, })) } @@ -139,9 +154,15 @@ impl TaskDefinitionPlannerImpl { keep_metadata_only: args.keep_metadata_only, }; + let compaction_plan = self + .compaction_planner + .plan_compaction(target.clone(), compaction_options, None) + .await + .int_err()?; + Ok(TaskDefinition::HardCompact(TaskDefinitionHardCompact { target, - compaction_options, + compaction_plan, })) } } diff --git a/src/domain/task-system/services/src/task_runner_impl.rs b/src/domain/task-system/services/src/task_runner_impl.rs index 8afd2e15e..7cba2912e 100644 --- a/src/domain/task-system/services/src/task_runner_impl.rs +++ b/src/domain/task-system/services/src/task_runner_impl.rs @@ -19,9 +19,9 @@ use kamu_task_system::*; pub struct TaskRunnerImpl { polling_ingest_service: Arc, transform_elaboration_service: Arc, - transform_execution_service: Arc, - reset_service: Arc, - compaction_service: Arc, + transform_executor: Arc, + reset_executor: Arc, + compaction_executor: Arc, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -32,16 +32,16 @@ impl TaskRunnerImpl { pub fn new( polling_ingest_service: Arc, transform_elaboration_service: Arc, - transform_execution_service: Arc, - reset_service: Arc, - compaction_service: Arc, + transform_executor: Arc, + reset_executor: Arc, + compaction_executor: Arc, ) -> Self { Self { polling_ingest_service, transform_elaboration_service, - transform_execution_service, - reset_service, - compaction_service, + transform_executor, + reset_executor, + compaction_executor, } } @@ -86,7 +86,12 @@ impl TaskRunnerImpl { ) -> Result { let ingest_response = self .polling_ingest_service - .ingest(ingest_item.target, ingest_options, None) + .ingest( + ingest_item.target, + ingest_item.metadata_state, + ingest_options, + None, + ) .await; match ingest_response { Ok(ingest_result) => Ok(TaskOutcome::Success(TaskResult::UpdateDatasetResult( @@ -130,7 +135,7 @@ impl TaskRunnerImpl { match transform_elaboration { TransformElaboration::Elaborated(transform_plan) => { let (_, execution_result) = self - .transform_execution_service + .transform_executor .execute_transform(transform_item.target, transform_plan, None) .await; @@ -156,21 +161,20 @@ impl TaskRunnerImpl { task_reset: TaskDefinitionReset, ) -> Result { let reset_result_maybe = self - .reset_service - .reset_dataset( - task_reset.target, - task_reset.new_head_hash.as_ref(), - task_reset.old_head_hash.as_ref(), - ) + .reset_executor + .execute(task_reset.target, task_reset.reset_plan) .await; + match reset_result_maybe { - Ok(new_head) => Ok(TaskOutcome::Success(TaskResult::ResetDatasetResult( - TaskResetDatasetResult { new_head }, + Ok(reset_result) => Ok(TaskOutcome::Success(TaskResult::ResetDatasetResult( + TaskResetDatasetResult { reset_result }, ))), Err(err) => match err { - ResetError::BlockNotFound(_) => Ok(TaskOutcome::Failed( - TaskError::ResetDatasetError(ResetDatasetTaskError::ResetHeadNotFound), - )), + ResetExecutionError::SetReferenceFailed(SetRefError::BlockNotFound(_)) => { + Ok(TaskOutcome::Failed(TaskError::ResetDatasetError( + ResetDatasetTaskError::ResetHeadNotFound, + ))) + } err => { tracing::error!( error = ?err, @@ -190,8 +194,8 @@ impl TaskRunnerImpl { task_compact: TaskDefinitionHardCompact, ) -> Result { let compaction_result = self - .compaction_service - .compact_dataset(task_compact.target, task_compact.compaction_options, None) + .compaction_executor + .execute(task_compact.target, task_compact.compaction_plan, None) .await; match compaction_result { diff --git a/src/domain/task-system/services/src/task_scheduler_impl.rs b/src/domain/task-system/services/src/task_scheduler_impl.rs index bbb79cad6..ced2e349e 100644 --- a/src/domain/task-system/services/src/task_scheduler_impl.rs +++ b/src/domain/task-system/services/src/task_scheduler_impl.rs @@ -94,7 +94,7 @@ impl TaskScheduler for TaskSchedulerImpl { return Ok(None); }; - // Mark the task as running and hand it over to Executor + // Mark the task as running and hand it over to Agent let mut task = Task::load(task_id, self.task_event_store.as_ref()) .await .int_err()?; @@ -104,7 +104,7 @@ impl TaskScheduler for TaskSchedulerImpl { tracing::info!( %task_id, logical_plan = ?task.logical_plan, - "Handing over a task to an executor", + "Handing over a task to an agent", ); Ok(Some(task)) diff --git a/src/domain/task-system/services/tests/tests/mod.rs b/src/domain/task-system/services/tests/tests/mod.rs index 7b40bdb45..96d712d88 100644 --- a/src/domain/task-system/services/tests/tests/mod.rs +++ b/src/domain/task-system/services/tests/tests/mod.rs @@ -9,5 +9,5 @@ mod test_task_aggregate; -mod test_task_executor_impl; +mod test_task_agent_impl; mod test_task_scheduler_impl; diff --git a/src/domain/task-system/services/tests/tests/test_task_executor_impl.rs b/src/domain/task-system/services/tests/tests/test_task_agent_impl.rs similarity index 87% rename from src/domain/task-system/services/tests/tests/test_task_executor_impl.rs rename to src/domain/task-system/services/tests/tests/test_task_agent_impl.rs index 8c7fea618..63b1d52a9 100644 --- a/src/domain/task-system/services/tests/tests/test_task_executor_impl.rs +++ b/src/domain/task-system/services/tests/tests/test_task_agent_impl.rs @@ -13,19 +13,7 @@ use std::sync::Arc; use database_common::NoOpDatabasePlugin; use dill::{Catalog, CatalogBuilder, Component}; use kamu::utils::ipfs_wrapper::IpfsClient; -use kamu::{ - DatasetFactoryImpl, - DatasetRegistryRepoBridge, - DatasetRepositoryLocalFs, - DatasetRepositoryWriter, - IpfsGateway, - PullRequestPlannerImpl, - RemoteAliasesRegistryImpl, - RemoteReposDir, - RemoteRepositoryRegistryImpl, - SyncRequestBuilder, - TransformRequestPlannerImpl, -}; +use kamu::*; use kamu_accounts::CurrentAccountSubject; use kamu_core::auth::DummyOdfServerAccessTokenResolver; use kamu_core::{DatasetRepository, TenancyConfig}; @@ -44,7 +32,7 @@ use time_source::SystemTimeSourceDefault; #[test_log::test(tokio::test)] async fn test_pre_run_requeues_running_tasks() { - let harness = TaskExecutorHarness::new(MockOutbox::new(), MockTaskRunner::new()); + let harness = TaskAgentHarness::new(MockOutbox::new(), MockTaskRunner::new()); // Schedule 3 tasks let task_id_1 = harness.schedule_probe_task().await; @@ -85,24 +73,24 @@ async fn test_pre_run_requeues_running_tasks() { async fn test_run_single_task() { // Expect the only task to notify about Running and Finished transitions let mut mock_outbox = MockOutbox::new(); - TaskExecutorHarness::add_outbox_task_expectations(&mut mock_outbox, TaskID::new(0)); + TaskAgentHarness::add_outbox_task_expectations(&mut mock_outbox, TaskID::new(0)); // Expect logical plan runner to run probe let mut mock_task_runner = MockTaskRunner::new(); - TaskExecutorHarness::add_run_probe_plan_expectations( + TaskAgentHarness::add_run_probe_plan_expectations( &mut mock_task_runner, LogicalPlanProbe::default(), 1, ); // Schedule the only task - let harness = TaskExecutorHarness::new(mock_outbox, mock_task_runner); + let harness = TaskAgentHarness::new(mock_outbox, mock_task_runner); let task_id = harness.schedule_probe_task().await; let task = harness.get_task(task_id).await; assert_eq!(task.status(), TaskStatus::Queued); // Run execution loop - harness.task_executor.run_single_task().await.unwrap(); + harness.task_agent.run_single_task().await.unwrap(); // Check the task has Finished status at the end let task = harness.get_task(task_id).await; @@ -115,19 +103,19 @@ async fn test_run_single_task() { async fn test_run_two_of_three_tasks() { // Expect 2 of 3 tasks to notify about Running and Finished transitions let mut mock_outbox = MockOutbox::new(); - TaskExecutorHarness::add_outbox_task_expectations(&mut mock_outbox, TaskID::new(0)); - TaskExecutorHarness::add_outbox_task_expectations(&mut mock_outbox, TaskID::new(1)); + TaskAgentHarness::add_outbox_task_expectations(&mut mock_outbox, TaskID::new(0)); + TaskAgentHarness::add_outbox_task_expectations(&mut mock_outbox, TaskID::new(1)); // Expect logical plan runner to run probe twice let mut mock_task_runner = MockTaskRunner::new(); - TaskExecutorHarness::add_run_probe_plan_expectations( + TaskAgentHarness::add_run_probe_plan_expectations( &mut mock_task_runner, LogicalPlanProbe::default(), 2, ); // Schedule 3 tasks - let harness = TaskExecutorHarness::new(mock_outbox, mock_task_runner); + let harness = TaskAgentHarness::new(mock_outbox, mock_task_runner); let task_id_1 = harness.schedule_probe_task().await; let task_id_2 = harness.schedule_probe_task().await; let task_id_3 = harness.schedule_probe_task().await; @@ -141,8 +129,8 @@ async fn test_run_two_of_three_tasks() { assert_eq!(task_3.status(), TaskStatus::Queued); // Run execution loop twice - harness.task_executor.run_single_task().await.unwrap(); - harness.task_executor.run_single_task().await.unwrap(); + harness.task_agent.run_single_task().await.unwrap(); + harness.task_agent.run_single_task().await.unwrap(); // Check the 2 tasks Finished, 3rd is still Queued let task_1 = harness.get_task(task_id_1).await; @@ -155,14 +143,14 @@ async fn test_run_two_of_three_tasks() { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct TaskExecutorHarness { +struct TaskAgentHarness { _tempdir: TempDir, catalog: Catalog, - task_executor: Arc, + task_agent: Arc, task_scheduler: Arc, } -impl TaskExecutorHarness { +impl TaskAgentHarness { pub fn new(mock_outbox: MockOutbox, mock_task_runner: MockTaskRunner) -> Self { let tempdir = tempfile::tempdir().unwrap(); @@ -173,7 +161,7 @@ impl TaskExecutorHarness { std::fs::create_dir(&repos_dir).unwrap(); let mut b = CatalogBuilder::new(); - b.add::() + b.add::() .add::() .add::() .add::() @@ -183,6 +171,8 @@ impl TaskExecutorHarness { .bind::() .add::() .add::() + .add::() + .add::() .add::() .add::() .add::() @@ -206,13 +196,13 @@ impl TaskExecutorHarness { let catalog = b.build(); - let task_executor = catalog.get_one().unwrap(); + let task_agent = catalog.get_one().unwrap(); let task_scheduler = catalog.get_one().unwrap(); Self { _tempdir: tempdir, catalog, - task_executor, + task_agent, task_scheduler, } } @@ -243,7 +233,7 @@ impl TaskExecutorHarness { mock_outbox .expect_post_message_as_json() .with( - eq(MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR), + eq(MESSAGE_PRODUCER_KAMU_TASK_AGENT), function(move |message_as_json: &serde_json::Value| { matches!( serde_json::from_value::(message_as_json.clone()), @@ -261,7 +251,7 @@ impl TaskExecutorHarness { mock_outbox .expect_post_message_as_json() .with( - eq(MESSAGE_PRODUCER_KAMU_TASK_EXECUTOR), + eq(MESSAGE_PRODUCER_KAMU_TASK_AGENT), function(move |message_as_json: &serde_json::Value| { matches!( serde_json::from_value::(message_as_json.clone()), diff --git a/src/e2e/app/cli/common/Cargo.toml b/src/e2e/app/cli/common/Cargo.toml index 86b30571d..372e21adf 100644 --- a/src/e2e/app/cli/common/Cargo.toml +++ b/src/e2e/app/cli/common/Cargo.toml @@ -26,7 +26,9 @@ http-common = { workspace = true } internal-error = { workspace = true } kamu-adapter-http = { workspace = true } kamu-cli-e2e-common-macros = { workspace = true } -kamu-cli-puppet = { workspace = true, default-features = false, features = ["extensions"] } +kamu-cli-puppet = { workspace = true, default-features = false, features = [ + "extensions", +] } kamu-core = { workspace = true } kamu-flow-system = { workspace = true } opendatafabric = { workspace = true } @@ -48,7 +50,7 @@ sqlx = { version = "0.8", default-features = false, features = [ "mysql", "sqlite", ] } -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false } tokio = { version = "1", default-features = false, features = ["macros", "fs"] } tokio-retry = "0.3" diff --git a/src/e2e/app/cli/inmem/Cargo.toml b/src/e2e/app/cli/inmem/Cargo.toml index 52f625e57..bbd059bcc 100644 --- a/src/e2e/app/cli/inmem/Cargo.toml +++ b/src/e2e/app/cli/inmem/Cargo.toml @@ -24,6 +24,7 @@ doctest = false [dependencies] # We have only tests in this crate + [dev-dependencies] kamu-cli-e2e-common = { workspace = true } kamu-cli-e2e-repo-tests = { workspace = true } diff --git a/src/e2e/app/cli/inmem/tests/tests/commands/test_delete_command.rs b/src/e2e/app/cli/inmem/tests/tests/commands/test_delete_command.rs index e59810f66..4a5d59de7 100644 --- a/src/e2e/app/cli/inmem/tests/tests/commands/test_delete_command.rs +++ b/src/e2e/app/cli/inmem/tests/tests/commands/test_delete_command.rs @@ -42,3 +42,10 @@ kamu_cli_run_api_server_e2e_test!( ); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +kamu_cli_execute_command_e2e_test!( + storage = inmem, + fixture = kamu_cli_e2e_repo_tests::commands::test_delete_args_validation +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/e2e/app/cli/inmem/tests/tests/commands/test_repo_command.rs b/src/e2e/app/cli/inmem/tests/tests/commands/test_repo_command.rs index 44f6069ad..656bebf44 100644 --- a/src/e2e/app/cli/inmem/tests/tests/commands/test_repo_command.rs +++ b/src/e2e/app/cli/inmem/tests/tests/commands/test_repo_command.rs @@ -24,3 +24,10 @@ kamu_cli_execute_command_e2e_test!( ); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +kamu_cli_execute_command_e2e_test!( + storage = inmem, + fixture = kamu_cli_e2e_repo_tests::commands::test_repo_delete_args_validation +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/e2e/app/cli/mysql/Cargo.toml b/src/e2e/app/cli/mysql/Cargo.toml index c86fb6f76..f7efe7b23 100644 --- a/src/e2e/app/cli/mysql/Cargo.toml +++ b/src/e2e/app/cli/mysql/Cargo.toml @@ -24,6 +24,7 @@ doctest = false [dependencies] # We have only tests in this crate + [dev-dependencies] kamu-cli-e2e-common = { workspace = true } kamu-cli-e2e-repo-tests = { workspace = true } diff --git a/src/e2e/app/cli/postgres/Cargo.toml b/src/e2e/app/cli/postgres/Cargo.toml index 50bf40331..1320d4f93 100644 --- a/src/e2e/app/cli/postgres/Cargo.toml +++ b/src/e2e/app/cli/postgres/Cargo.toml @@ -24,6 +24,7 @@ doctest = false [dependencies] # We have only tests in this crate + [dev-dependencies] kamu-cli-e2e-common = { workspace = true } kamu-cli-e2e-repo-tests = { workspace = true } diff --git a/src/e2e/app/cli/postgres/tests/tests/commands/test_delete_command.rs b/src/e2e/app/cli/postgres/tests/tests/commands/test_delete_command.rs index 42c3dea36..cc902c5b7 100644 --- a/src/e2e/app/cli/postgres/tests/tests/commands/test_delete_command.rs +++ b/src/e2e/app/cli/postgres/tests/tests/commands/test_delete_command.rs @@ -42,3 +42,10 @@ kamu_cli_run_api_server_e2e_test!( ); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +kamu_cli_execute_command_e2e_test!( + storage = inmem, + fixture = kamu_cli_e2e_repo_tests::commands::test_delete_args_validation +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/e2e/app/cli/postgres/tests/tests/commands/test_repo_command.rs b/src/e2e/app/cli/postgres/tests/tests/commands/test_repo_command.rs index 3e2b47072..892761875 100644 --- a/src/e2e/app/cli/postgres/tests/tests/commands/test_repo_command.rs +++ b/src/e2e/app/cli/postgres/tests/tests/commands/test_repo_command.rs @@ -24,3 +24,10 @@ kamu_cli_execute_command_e2e_test!( ); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +kamu_cli_execute_command_e2e_test!( + storage = postgres, + fixture = kamu_cli_e2e_repo_tests::commands::test_repo_delete_args_validation +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/e2e/app/cli/repo-tests/src/commands/test_delete_command.rs b/src/e2e/app/cli/repo-tests/src/commands/test_delete_command.rs index 6913f1d89..216254530 100644 --- a/src/e2e/app/cli/repo-tests/src/commands/test_delete_command.rs +++ b/src/e2e/app/cli/repo-tests/src/commands/test_delete_command.rs @@ -324,3 +324,28 @@ pub async fn test_delete_warning(mut kamu_node_api_client: KamuApiServerClient) } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_delete_args_validation(kamu: KamuCliPuppet) { + kamu.assert_success_command_execution( + ["delete", "--all"], + None, + Some(["There are no datasets matching the pattern"]), + ) + .await; + + kamu.assert_failure_command_execution( + ["delete", "player-scores", "--all"], + None, + Some(["You can either specify dataset(s) or pass --all"]), + ) + .await; + + kamu.assert_failure_command_execution( + ["delete"], + None, + Some(["Specify dataset(s) or pass --all"]), + ) + .await; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/e2e/app/cli/repo-tests/src/commands/test_repo_command.rs b/src/e2e/app/cli/repo-tests/src/commands/test_repo_command.rs index 580560c27..2b85f6eb4 100644 --- a/src/e2e/app/cli/repo-tests/src/commands/test_repo_command.rs +++ b/src/e2e/app/cli/repo-tests/src/commands/test_repo_command.rs @@ -163,3 +163,35 @@ pub async fn test_repository_push_aliases_commands(kamu: KamuCliPuppet) { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_repo_delete_args_validation(kamu: KamuCliPuppet) { + kamu.assert_success_command_execution( + ["repo", "delete", "--all"], + None, + Some(["There are no repositories to delete"]), + ) + .await; + + kamu.assert_failure_command_execution( + ["repo", "delete", "some-repo"], + None, + Some(["Error: Repository some-repo does not exist"]), + ) + .await; + + kamu.assert_failure_command_execution( + ["repo", "delete", "some-repo", "--all"], + None, + Some(["You can either specify repository(s) or pass --all"]), + ) + .await; + + kamu.assert_failure_command_execution( + ["repo", "delete"], + None, + Some(["Specify repository(s) or pass --all"]), + ) + .await; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/e2e/app/cli/repo-tests/src/test_smart_transfer_protocol.rs b/src/e2e/app/cli/repo-tests/src/test_smart_transfer_protocol.rs index eec7b0cc2..ea3aa35d5 100644 --- a/src/e2e/app/cli/repo-tests/src/test_smart_transfer_protocol.rs +++ b/src/e2e/app/cli/repo-tests/src/test_smart_transfer_protocol.rs @@ -894,8 +894,8 @@ async fn test_smart_push_all_smart_pull_all( +--------+----+----------------------+----------------------+----------+-----------+-------+ | offset | op | system_time | match_time | match_id | player_id | score | +--------+----+----------------------+----------------------+----------+-----------+-------+ - | 2 | 0 | 2050-01-02T03:04:05Z | 2000-01-02T00:00:00Z | 2 | Charlie | 90 | - | 3 | 0 | 2050-01-02T03:04:05Z | 2000-01-02T00:00:00Z | 2 | Alice | 70 | + | 2 | 0 | 2050-01-02T03:04:05Z | 2000-01-02T00:00:00Z | 2 | Alice | 70 | + | 3 | 0 | 2050-01-02T03:04:05Z | 2000-01-02T00:00:00Z | 2 | Charlie | 90 | +--------+----+----------------------+----------------------+----------+-----------+-------+ "# ); @@ -1154,8 +1154,8 @@ async fn test_smart_push_recursive_smart_pull_recursive( +--------+----+----------------------+----------------------+----------+-----------+-------+ | offset | op | system_time | match_time | match_id | player_id | score | +--------+----+----------------------+----------------------+----------+-----------+-------+ - | 2 | 0 | 2050-01-02T03:04:05Z | 2000-01-02T00:00:00Z | 2 | Charlie | 90 | - | 3 | 0 | 2050-01-02T03:04:05Z | 2000-01-02T00:00:00Z | 2 | Alice | 70 | + | 2 | 0 | 2050-01-02T03:04:05Z | 2000-01-02T00:00:00Z | 2 | Alice | 70 | + | 3 | 0 | 2050-01-02T03:04:05Z | 2000-01-02T00:00:00Z | 2 | Charlie | 90 | +--------+----+----------------------+----------------------+----------+-----------+-------+ "# ); diff --git a/src/e2e/app/cli/sqlite/Cargo.toml b/src/e2e/app/cli/sqlite/Cargo.toml index 21141d282..cf2bd069c 100644 --- a/src/e2e/app/cli/sqlite/Cargo.toml +++ b/src/e2e/app/cli/sqlite/Cargo.toml @@ -24,6 +24,7 @@ doctest = false [dependencies] # We have only tests in this crate + [dev-dependencies] kamu-cli-e2e-common = { workspace = true } kamu-cli-e2e-repo-tests = { workspace = true } diff --git a/src/e2e/app/cli/sqlite/tests/tests/commands/test_delete_command.rs b/src/e2e/app/cli/sqlite/tests/tests/commands/test_delete_command.rs index de6cd3a85..5757fc2a6 100644 --- a/src/e2e/app/cli/sqlite/tests/tests/commands/test_delete_command.rs +++ b/src/e2e/app/cli/sqlite/tests/tests/commands/test_delete_command.rs @@ -42,3 +42,10 @@ kamu_cli_run_api_server_e2e_test!( ); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +kamu_cli_execute_command_e2e_test!( + storage = inmem, + fixture = kamu_cli_e2e_repo_tests::commands::test_delete_args_validation +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/e2e/app/cli/sqlite/tests/tests/commands/test_repo_command.rs b/src/e2e/app/cli/sqlite/tests/tests/commands/test_repo_command.rs index 87ae4dfe3..b6c2caab4 100644 --- a/src/e2e/app/cli/sqlite/tests/tests/commands/test_repo_command.rs +++ b/src/e2e/app/cli/sqlite/tests/tests/commands/test_repo_command.rs @@ -24,3 +24,10 @@ kamu_cli_execute_command_e2e_test!( ); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +kamu_cli_execute_command_e2e_test!( + storage = sqlite, + fixture = kamu_cli_e2e_repo_tests::commands::test_repo_delete_args_validation +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/auth-rebac/postgres/Cargo.toml b/src/infra/auth-rebac/postgres/Cargo.toml index ced872985..d7b8c609b 100644 --- a/src/infra/auth-rebac/postgres/Cargo.toml +++ b/src/infra/auth-rebac/postgres/Cargo.toml @@ -35,6 +35,7 @@ sqlx = { version = "0.8", default-features = false, features = [ "chrono", ] } + [dev-dependencies] database-common-macros = { workspace = true } kamu-auth-rebac-repo-tests = { workspace = true } diff --git a/src/infra/core/Cargo.toml b/src/infra/core/Cargo.toml index 43a9908e3..3f37b84e1 100644 --- a/src/infra/core/Cargo.toml +++ b/src/infra/core/Cargo.toml @@ -69,7 +69,7 @@ ringbuf = "0.3" zip = "2" # Data -datafusion = { version = "42", default-features = false } +datafusion = { version = "43", default-features = false } digest = "0.10" object_store = { version = "0.11", features = ["aws"] } sha3 = "0.10" @@ -79,7 +79,7 @@ aws-config = { version = "1" } aws-sdk-s3 = { version = "1" } aws-smithy-types = { version = "1" } aws-credential-types = { version = "1" } -trust-dns-resolver = "0.23" # TODO: Needed for DNSLink resolution with IPFS +hickory-resolver = "0.24" # TODO: Needed for DNSLink resolution with IPFS http = "1" # Utils @@ -95,13 +95,12 @@ futures = "0.3" glob = "0.3" # Used for glob fetch itertools = "0.13" libc = "0.2" # Signal names -petgraph = { version = "0.6", default-features = false, features = [ - "stable_graph", -] } +like = { version = "0.3", default-features = false } +pin-project = "1" rand = "0.8" regex = "1" tempfile = "3" -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false, features = ["std"] } tokio = { version = "1", default-features = false, features = [ "fs", "process", @@ -121,7 +120,7 @@ tower-http = { version = "0.6", features = ["fs", "trace"] } axum = "0.7" # Optional dependencies -alloy = { optional = true, version = "0.5", default-features = false, features = [ +alloy = { optional = true, version = "0.6", default-features = false, features = [ "std", "provider-http", "provider-ws", @@ -134,8 +133,8 @@ curl = { optional = true, version = "0.4", features = [ "static-ssl", ] } curl-sys = { optional = true, version = "0.4" } -datafusion-ethers = { optional = true, version = "42" } -datafusion-functions-json = { optional = true, version = "0.42" } +datafusion-ethers = { optional = true, version = "43" } +datafusion-functions-json = { optional = true, version = "0.43" } rumqttc = { optional = true, version = "0.24" } mockall = { optional = true, version = "0.13", default-features = false } lazy_static = { version = "1" } @@ -152,9 +151,10 @@ kamu-accounts-inmem = { workspace = true } kamu-accounts-services = { workspace = true } kamu-data-utils = { workspace = true, features = ["testing"] } kamu-datasets-services = { workspace = true } +kamu-datasets-inmem = { workspace = true } criterion = { version = "0.5", features = ["async_tokio"] } -datafusion = { version = "42", default-features = false, features = [ +datafusion = { version = "43", default-features = false, features = [ "parquet", ] } filetime = "0.2" @@ -166,6 +166,7 @@ oop = "0.0.2" pretty_assertions = { version = "1" } test-group = { version = "1" } test-log = { version = "0.2", features = ["trace"] } +testing_logger = { version = "0.1" } tokio = { version = "1", default-features = false, features = ["rt", "macros"] } diff --git a/src/infra/core/src/compaction_service_impl.rs b/src/infra/core/src/compaction_service_impl.rs deleted file mode 100644 index cd76b2a93..000000000 --- a/src/infra/core/src/compaction_service_impl.rs +++ /dev/null @@ -1,508 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use std::cmp::Ordering; -use std::fs; -use std::path::{Path, PathBuf}; -use std::sync::Arc; - -use chrono::{DateTime, Utc}; -use datafusion::prelude::*; -use dill::{component, interface}; -use domain::{ - CompactionError, - CompactionListener, - CompactionOptions, - CompactionPhase, - CompactionResult, - CompactionService, - InvalidDatasetKindError, - NullCompactionListener, - DEFAULT_MAX_SLICE_RECORDS, - DEFAULT_MAX_SLICE_SIZE, -}; -use futures::stream::TryStreamExt; -use internal_error::ResultIntoInternal; -use kamu_core::*; -use opendatafabric::{ - Checkpoint, - DatasetKind, - DatasetVocabulary, - MetadataEvent, - Multihash, - OffsetInterval, - SetVocab, - SourceState, -}; -use random_names::get_random_name; -use time_source::SystemTimeSource; -use url::Url; - -use crate::*; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -pub struct CompactionServiceImpl { - object_store_registry: Arc, - time_source: Arc, - run_info_dir: Arc, -} - -#[allow(clippy::large_enum_variant)] -#[derive(Debug)] -enum DataSliceBatch { - CompactedBatch(DataSliceBatchInfo), - // Hash of block will not be None value in case - // when we will get only one block in batch - // and will be used tp not rewriting such blocks - SingleBlock(Multihash), -} - -#[derive(Debug, Default, Clone)] -struct DataSliceBatchUpperBound { - pub new_source_state: Option, - pub new_watermark: Option>, - pub new_checkpoint: Option, - pub end_offset: u64, -} - -#[derive(Debug, Default, Clone)] -struct DataSliceBatchLowerBound { - pub prev_offset: Option, - pub prev_checkpoint: Option, - pub start_offset: u64, -} - -#[derive(Debug, Default, Clone)] -struct DataSliceBatchInfo { - pub data_slices_batch: Vec, - pub upper_bound: DataSliceBatchUpperBound, - pub lower_bound: DataSliceBatchLowerBound, - pub new_file_path: Option, -} - -struct ChainFilesInfo { - old_head: Multihash, - old_num_blocks: usize, - offset_column: String, - data_slice_batches: Vec, -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[component(pub)] -#[interface(dyn CompactionService)] -impl CompactionServiceImpl { - pub fn new( - object_store_registry: Arc, - time_source: Arc, - run_info_dir: Arc, - ) -> Self { - Self { - object_store_registry, - time_source, - run_info_dir, - } - } - - async fn gather_chain_info( - &self, - target: &ResolvedDataset, - max_slice_size: u64, - max_slice_records: u64, - keep_metadata_only: bool, - ) -> Result { - // Declare mut values for result - - let mut old_num_blocks: usize = 0; - let mut old_head: Option = None; - let mut current_hash: Option = None; - let mut vocab_event: Option = None; - let mut data_slice_batch_info: DataSliceBatchInfo = DataSliceBatchInfo::default(); - let mut data_slice_batches: Vec = vec![]; - let (mut batch_size, mut batch_records) = (0u64, 0u64); - - //////////////////////////////////////////////////////////////////////////////// - - let chain = target.as_metadata_chain(); - let head = chain.resolve_ref(&BlockRef::Head).await?; - let mut block_stream = chain.iter_blocks_interval(&head, None, false); - let object_data_repo = target.as_data_repo(); - - while let Some((block_hash, block)) = block_stream.try_next().await? { - old_num_blocks += 1; - match block.event { - MetadataEvent::AddData(add_data_event) => { - if !keep_metadata_only && let Some(output_slice) = &add_data_event.new_data { - let data_slice_url = object_data_repo - .get_internal_url(&output_slice.physical_hash) - .await; - - // Setting the end offset interval needs to be here because we - // have to get it at the beginning of iteration unlike - // other values which will be set at the end of iteration - if data_slice_batch_info.data_slices_batch.is_empty() { - data_slice_batch_info.upper_bound.end_offset = - output_slice.offset_interval.end; - } - - let current_records = output_slice.num_records(); - - if batch_size + output_slice.size > max_slice_size - || batch_records + current_records > max_slice_records - { - let is_appended = - CompactionServiceImpl::append_add_data_batch_to_chain_info( - &mut data_slice_batches, - ¤t_hash, - &mut data_slice_batch_info, - ); - if is_appended { - // Reset values for next batch - data_slice_batch_info = DataSliceBatchInfo::default(); - data_slice_batch_info.upper_bound.end_offset = - output_slice.offset_interval.end; - } - - data_slice_batch_info.data_slices_batch = vec![data_slice_url]; - batch_size = output_slice.size; - batch_records = current_records; - } else { - data_slice_batch_info.data_slices_batch.push(data_slice_url); - batch_size += output_slice.size; - batch_records += current_records; - } - - // Set lower bound values - data_slice_batch_info.lower_bound.prev_checkpoint = - add_data_event.prev_checkpoint; - data_slice_batch_info.lower_bound.prev_offset = add_data_event.prev_offset; - data_slice_batch_info.lower_bound.start_offset = - output_slice.offset_interval.start; - current_hash = Some(block_hash); - } - // Set upper bound values - if data_slice_batch_info.upper_bound.new_checkpoint.is_none() { - data_slice_batch_info.upper_bound.new_checkpoint = - add_data_event.new_checkpoint; - } - if data_slice_batch_info.upper_bound.new_source_state.is_none() { - data_slice_batch_info.upper_bound.new_source_state = - add_data_event.new_source_state; - } - if data_slice_batch_info.upper_bound.new_watermark.is_none() { - data_slice_batch_info.upper_bound.new_watermark = - add_data_event.new_watermark; - } - } - MetadataEvent::Seed(_) => old_head = Some(block_hash), - MetadataEvent::ExecuteTransform(_) => { - if keep_metadata_only { - continue; - } - } - event => { - if let MetadataEvent::SetVocab(set_vocab_event) = event { - vocab_event = Some(set_vocab_event); - } - let is_appended = CompactionServiceImpl::append_add_data_batch_to_chain_info( - &mut data_slice_batches, - ¤t_hash, - &mut data_slice_batch_info, - ); - data_slice_batches.push(DataSliceBatch::SingleBlock(block_hash.clone())); - if is_appended { - data_slice_batch_info = DataSliceBatchInfo::default(); - } - } - } - } - - let vocab: DatasetVocabulary = vocab_event.unwrap_or_default().into(); - - Ok(ChainFilesInfo { - data_slice_batches, - offset_column: vocab.offset_column, - old_head: old_head.unwrap(), - old_num_blocks, - }) - } - - fn append_add_data_batch_to_chain_info( - data_slice_batches: &mut Vec, - hash: &Option, - data_slice_batch_info: &mut DataSliceBatchInfo, - ) -> bool { - match data_slice_batch_info.data_slices_batch.len().cmp(&1) { - Ordering::Equal => { - data_slice_batches - .push(DataSliceBatch::SingleBlock(hash.as_ref().unwrap().clone())); - } - Ordering::Greater => { - data_slice_batches.push(DataSliceBatch::CompactedBatch( - data_slice_batch_info.clone(), - )); - } - _ => return false, - } - true - } - - async fn merge_files( - &self, - data_slice_batches: &mut [DataSliceBatch], - offset_column: &str, - compaction_dir_path: &Path, - ) -> Result<(), CompactionError> { - let ctx = new_session_context(self.object_store_registry.clone()); - - for (index, data_slice_batch) in data_slice_batches.iter_mut().enumerate() { - if let DataSliceBatch::CompactedBatch(data_slice_batch_info) = data_slice_batch { - let data_frame = ctx - .read_parquet( - data_slice_batch_info.data_slices_batch.clone(), - datafusion::execution::options::ParquetReadOptions { - file_extension: "", - ..Default::default() - }, - ) - .await - .int_err()? - // TODO: PERF: Consider passing sort order hint to `read_parquet` to let DF now - // that the data is already pre-sorted - .sort(vec![col(Column::from_name(offset_column)).sort(true, false)]) - .int_err()?; - - let new_file_path = - compaction_dir_path.join(format!("merge-slice-{index}").as_str()); - - data_frame - .write_parquet( - new_file_path.to_str().unwrap(), - datafusion::dataframe::DataFrameWriteOptions::new() - .with_single_file_output(true), - None, - ) - .await - .int_err()?; - data_slice_batch_info.new_file_path = Some(new_file_path); - } - } - - Ok(()) - } - - fn create_run_compaction_dir(&self) -> Result { - let compaction_dir_path = self - .run_info_dir - .join(get_random_name(Some("compaction-"), 10)); - fs::create_dir_all(&compaction_dir_path).int_err()?; - Ok(compaction_dir_path) - } - - async fn commit_new_blocks( - &self, - target: &ResolvedDataset, - chain_files_info: &ChainFilesInfo, - ) -> Result<(Vec, Multihash, usize), CompactionError> { - let chain = target.as_metadata_chain(); - let mut current_head = chain_files_info.old_head.clone(); - let mut old_data_slices: Vec = vec![]; - // set it to 1 to include seed block - let mut new_num_blocks: usize = 1; - - for data_slice_batch in chain_files_info.data_slice_batches.iter().rev() { - match data_slice_batch { - DataSliceBatch::SingleBlock(block_hash) => { - let block = chain.get_block(block_hash).await.int_err()?; - - let commit_result = target - .commit_event( - block.event, - CommitOpts { - block_ref: &BlockRef::Head, - system_time: Some(self.time_source.now()), - prev_block_hash: Some(Some(¤t_head)), - check_object_refs: false, - update_block_ref: false, - }, - ) - .await - .int_err()?; - current_head = commit_result.new_head; - } - DataSliceBatch::CompactedBatch(data_slice_batch_info) => { - let new_offset_interval = OffsetInterval { - start: data_slice_batch_info.lower_bound.start_offset, - end: data_slice_batch_info.upper_bound.end_offset, - }; - - let add_data_params = AddDataParams { - prev_checkpoint: data_slice_batch_info.lower_bound.prev_checkpoint.clone(), - prev_offset: data_slice_batch_info.lower_bound.prev_offset, - new_offset_interval: Some(new_offset_interval), - new_source_state: data_slice_batch_info - .upper_bound - .new_source_state - .clone(), - new_watermark: data_slice_batch_info.upper_bound.new_watermark, - }; - let new_checkpoint_ref = data_slice_batch_info - .upper_bound - .new_checkpoint - .clone() - .map(|r| CheckpointRef::Existed(r.physical_hash)); - - let commit_result = target - .commit_add_data( - add_data_params, - Some(OwnedFile::new( - data_slice_batch_info.new_file_path.as_ref().unwrap(), - )), - new_checkpoint_ref, - CommitOpts { - block_ref: &BlockRef::Head, - system_time: Some(self.time_source.now()), - prev_block_hash: Some(Some(¤t_head)), - check_object_refs: false, - update_block_ref: false, - }, - ) - .await - .int_err()?; - - current_head = commit_result.new_head; - old_data_slices.extend(data_slice_batch_info.data_slices_batch.clone()); - } - } - new_num_blocks += 1; - } - - Ok((old_data_slices, current_head, new_num_blocks)) - } - - #[tracing::instrument(level = "debug", skip_all)] - async fn compact_dataset_impl( - &self, - target: ResolvedDataset, - max_slice_size: u64, - max_slice_records: u64, - keep_metadata_only: bool, - listener: Arc, - ) -> Result { - let compaction_dir_path = self.create_run_compaction_dir()?; - - listener.begin_phase(CompactionPhase::GatherChainInfo); - let mut chain_files_info = self - .gather_chain_info( - &target, - max_slice_size, - max_slice_records, - keep_metadata_only, - ) - .await?; - - // if slices amount +1(seed block) eq to amount of blocks we will not perform - // compaction - if chain_files_info.data_slice_batches.len() + 1 == chain_files_info.old_num_blocks { - return Ok(CompactionResult::NothingToDo); - } - - listener.begin_phase(CompactionPhase::MergeDataslices); - self.merge_files( - &mut chain_files_info.data_slice_batches, - chain_files_info.offset_column.as_str(), - &compaction_dir_path, - ) - .await?; - - listener.begin_phase(CompactionPhase::CommitNewBlocks); - let (_old_data_slices, new_head, new_num_blocks) = - self.commit_new_blocks(&target, &chain_files_info).await?; - - target - .as_metadata_chain() - .set_ref( - &BlockRef::Head, - &new_head, - SetRefOpts { - validate_block_present: true, - check_ref_is: None, - }, - ) - .await?; - - let res = CompactionResult::Success { - old_head: chain_files_info.old_head, - new_head, - old_num_blocks: chain_files_info.old_num_blocks, - new_num_blocks, - }; - - listener.success(&res); - - Ok(res) - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[async_trait::async_trait] -impl CompactionService for CompactionServiceImpl { - #[tracing::instrument(level = "info", skip_all, fields(target=?target.get_handle(), ?options))] - async fn compact_dataset( - &self, - target: ResolvedDataset, - options: CompactionOptions, - maybe_listener: Option>, - ) -> Result { - let dataset_kind = target - .get_summary(GetSummaryOpts::default()) - .await - .int_err()? - .kind; - - if !options.keep_metadata_only && dataset_kind != DatasetKind::Root { - return Err(CompactionError::InvalidDatasetKind( - InvalidDatasetKindError { - dataset_alias: target.get_alias().clone(), - }, - )); - } - - let listener = maybe_listener.unwrap_or(Arc::new(NullCompactionListener {})); - - let max_slice_size = options.max_slice_size.unwrap_or(DEFAULT_MAX_SLICE_SIZE); - let max_slice_records = options - .max_slice_records - .unwrap_or(DEFAULT_MAX_SLICE_RECORDS); - - match self - .compact_dataset_impl( - target, - max_slice_size, - max_slice_records, - options.keep_metadata_only, - listener.clone(), - ) - .await - { - Ok(res) => { - listener.success(&res); - Ok(res) - } - Err(err) => { - listener.error(&err); - Err(err) - } - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/dependency_graph_repository_inmem.rs b/src/infra/core/src/dependency_graph_repository_inmem.rs deleted file mode 100644 index d0011968e..000000000 --- a/src/infra/core/src/dependency_graph_repository_inmem.rs +++ /dev/null @@ -1,58 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use std::sync::Arc; - -use internal_error::ResultIntoInternal; -use kamu_core::*; -use tracing::Instrument as _; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -pub struct DependencyGraphRepositoryInMemory { - dataset_repo: Arc, -} - -#[dill::component(pub)] -#[dill::interface(dyn DependencyGraphRepository)] -impl DependencyGraphRepositoryInMemory { - pub fn new(dataset_repo: Arc) -> Self { - Self { dataset_repo } - } -} - -impl DependencyGraphRepository for DependencyGraphRepositoryInMemory { - #[tracing::instrument(level = "debug", skip_all)] - fn list_dependencies_of_all_datasets(&self) -> DatasetDependenciesIDStream { - use tokio_stream::StreamExt; - - Box::pin(async_stream::try_stream! { - let mut datasets_stream = self.dataset_repo.all_dataset_handles(); - - while let Some(Ok(dataset_handle)) = datasets_stream.next().await { - let span = tracing::debug_span!("Scanning dataset dependencies", dataset = %dataset_handle); - - let summary = self - .dataset_repo - .get_dataset_by_handle(&dataset_handle) - .get_summary(GetSummaryOpts::default()) - .instrument(span) - .await - .int_err()?; - - yield DatasetDependencies { - downstream_dataset_id: dataset_handle.id.clone(), - upstream_dataset_ids: summary.dependencies, - }; - } - }) - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/engine/engine_datafusion_inproc.rs b/src/infra/core/src/engine/engine_datafusion_inproc.rs index 90d57ca16..3b4534389 100644 --- a/src/infra/core/src/engine/engine_datafusion_inproc.rs +++ b/src/infra/core/src/engine/engine_datafusion_inproc.rs @@ -61,6 +61,7 @@ impl EngineDatafusionInproc { input: Arc::new(logical_plan), or_replace: false, definition: Some(query.to_string()), + temporary: false, })); ctx.execute_logical_plan(create_view).await.int_err()?; diff --git a/src/infra/core/src/engine/engine_io_strategy.rs b/src/infra/core/src/engine/engine_io_strategy.rs index 6d0ed6702..8d1988347 100644 --- a/src/infra/core/src/engine/engine_io_strategy.rs +++ b/src/infra/core/src/engine/engine_io_strategy.rs @@ -142,7 +142,10 @@ impl EngineIoStrategy for EngineIoStrategyLocalVolume { }; let schema_file = { - let name = format!("schema-{}", input.dataset_handle.id.as_multibase()); + // FIXME: The .parquet extension is currently necessary for DataFusion to + // respect the single-file output + // See: https://github.com/apache/datafusion/issues/13323 + let name = format!("schema-{}.parquet", input.dataset_handle.id.as_multibase()); let host_path = host_in_dir.join(&name); let container_path = container_in_dir.join(&name); write_schema_file(&input.schema, &host_path).await?; @@ -309,7 +312,10 @@ impl EngineIoStrategy for EngineIoStrategyRemoteProxy { }; let schema_file = { - let name = format!("schema-{}", input.dataset_handle.id.as_multibase()); + // FIXME: The .parquet extension is currently necessary for DataFusion to + // respect the single-file output + // See: https://github.com/apache/datafusion/issues/13323 + let name = format!("schema-{}.parquet", input.dataset_handle.id.as_multibase()); let host_path = host_in_dir.join(&name); let container_path = container_in_dir.join(&name); write_schema_file(&input.schema, &host_path).await?; @@ -358,6 +364,14 @@ impl EngineIoStrategy for EngineIoStrategyRemoteProxy { async fn write_schema_file(schema: &SchemaRef, path: &Path) -> Result<(), InternalError> { use datafusion::prelude::*; + // FIXME: The extension is currently necessary for DataFusion to + // respect the single-file output + // See: https://github.com/apache/datafusion/issues/13323 + assert!( + path.extension().is_some(), + "Ouput file name must have an extension" + ); + let ctx = SessionContext::new(); let df = ctx .read_batch(datafusion::arrow::array::RecordBatch::new_empty( diff --git a/src/infra/core/src/engine/engine_odf.rs b/src/infra/core/src/engine/engine_odf.rs index 1adda5153..260f5659c 100644 --- a/src/infra/core/src/engine/engine_odf.rs +++ b/src/infra/core/src/engine/engine_odf.rs @@ -255,14 +255,17 @@ impl Engine for ODFEngine { let _ = std::fs::create_dir_all(&host_in_dir); let _ = std::fs::create_dir_all(&host_out_dir); - let host_input_data_path = host_in_dir.join("input"); - let host_output_data_path = host_out_dir.join("output"); + // FIXME: The .parquet extension is currently necessary for DataFusion to + // respect the single-file output + // See: https://github.com/apache/datafusion/issues/13323 + let host_input_data_path = host_in_dir.join("input.parquet"); + let host_output_data_path = host_out_dir.join("output.parquet"); // Note: not using `PathBuf::join()` below to ensure linux style paths let container_in_dir = PathBuf::from("/opt/engine/in"); let container_out_dir = PathBuf::from("/opt/engine/out"); - let container_input_data_path = PathBuf::from("/opt/engine/in/input"); - let container_output_data_path = PathBuf::from("/opt/engine/out/output"); + let container_input_data_path = PathBuf::from("/opt/engine/in/input.parquet"); + let container_output_data_path = PathBuf::from("/opt/engine/out/output.parquet"); let volumes = vec![ VolumeSpec { diff --git a/src/infra/core/src/lib.rs b/src/infra/core/src/lib.rs index 8b924203f..0bd4173e1 100644 --- a/src/infra/core/src/lib.rs +++ b/src/infra/core/src/lib.rs @@ -20,64 +20,20 @@ pub use kamu_core as domain; mod engine; -pub mod ingest; -mod query; mod repos; +mod services; #[cfg(any(feature = "testing", test))] pub mod testing; -mod transform; mod use_cases; pub mod utils; -mod compaction_service_impl; -mod dataset_changes_service_impl; +pub use engine::*; +pub use repos::*; +pub use services::*; +pub use use_cases::*; + mod dataset_config; mod dataset_layout; -mod dataset_ownership_service_inmem; -mod dataset_registry_repo_bridge; -mod dependency_graph_repository_inmem; -mod dependency_graph_service_inmem; -mod provenance_service_impl; -mod pull_request_planner_impl; -mod push_request_planner_impl; -mod query_service_impl; -mod remote_alias_resolver_impl; -mod remote_aliases_registry_impl; -mod remote_repository_registry_impl; -mod remote_status_service_impl; -mod reset_service_impl; -mod resource_loader_impl; -mod search_service_impl; -mod sync_request_builder; -mod sync_service_impl; -mod verification_service_impl; -mod watermark_service_impl; -pub use compaction_service_impl::*; -pub use dataset_changes_service_impl::*; pub use dataset_config::*; pub use dataset_layout::*; -pub use dataset_ownership_service_inmem::*; -pub use dataset_registry_repo_bridge::*; -pub use dependency_graph_repository_inmem::*; -pub use dependency_graph_service_inmem::*; -pub use engine::*; -pub use ingest::*; -pub use provenance_service_impl::*; -pub use pull_request_planner_impl::*; -pub use push_request_planner_impl::*; -pub use query_service_impl::*; -pub use remote_alias_resolver_impl::*; -pub use remote_aliases_registry_impl::*; -pub use remote_repository_registry_impl::*; -pub use remote_status_service_impl::*; -pub use repos::*; -pub use reset_service_impl::*; -pub use resource_loader_impl::*; -pub use search_service_impl::*; -pub use sync_request_builder::*; -pub use sync_service_impl::*; -pub use transform::*; -pub use use_cases::*; -pub use verification_service_impl::*; -pub use watermark_service_impl::*; diff --git a/src/infra/core/src/repos/dataset_factory_impl.rs b/src/infra/core/src/repos/dataset_factory_impl.rs index e149c08f1..19feb099b 100644 --- a/src/infra/core/src/repos/dataset_factory_impl.rs +++ b/src/infra/core/src/repos/dataset_factory_impl.rs @@ -220,7 +220,7 @@ impl DatasetFactoryImpl { } async fn resolve_ipns_dnslink(&self, domain: &str) -> Result { - let r = trust_dns_resolver::TokioAsyncResolver::tokio_from_system_conf().int_err()?; + let r = hickory_resolver::TokioAsyncResolver::tokio_from_system_conf().int_err()?; let query = format!("_dnslink.{domain}"); let result = r.txt_lookup(&query).await.int_err()?; diff --git a/src/infra/core/src/repos/dataset_repository_local_fs.rs b/src/infra/core/src/repos/dataset_repository_local_fs.rs index d59a249d9..7ac5f0489 100644 --- a/src/infra/core/src/repos/dataset_repository_local_fs.rs +++ b/src/infra/core/src/repos/dataset_repository_local_fs.rs @@ -155,6 +155,7 @@ impl DatasetRepository for DatasetRepositoryLocalFs { #[async_trait] impl DatasetRepositoryWriter for DatasetRepositoryLocalFs { + #[tracing::instrument(level = "debug", skip_all, fields(%dataset_alias, ?seed_block))] async fn create_dataset( &self, dataset_alias: &DatasetAlias, @@ -272,6 +273,7 @@ impl DatasetRepositoryWriter for DatasetRepositoryLocalFs { }) } + #[tracing::instrument(level = "debug", skip_all, fields(?snapshot))] async fn create_dataset_from_snapshot( &self, snapshot: DatasetSnapshot, @@ -279,6 +281,7 @@ impl DatasetRepositoryWriter for DatasetRepositoryLocalFs { create_dataset_from_snapshot_impl(self, snapshot, self.system_time_source.now()).await } + #[tracing::instrument(level = "debug", skip_all, fields(%dataset_handle, %new_name))] async fn rename_dataset( &self, dataset_handle: &DatasetHandle, @@ -310,6 +313,7 @@ impl DatasetRepositoryWriter for DatasetRepositoryLocalFs { Ok(()) } + #[tracing::instrument(level = "debug", skip_all, fields(%dataset_handle))] async fn delete_dataset( &self, dataset_handle: &DatasetHandle, diff --git a/src/infra/core/src/repos/dataset_repository_s3.rs b/src/infra/core/src/repos/dataset_repository_s3.rs index 4ef8a821e..080b008b9 100644 --- a/src/infra/core/src/repos/dataset_repository_s3.rs +++ b/src/infra/core/src/repos/dataset_repository_s3.rs @@ -299,6 +299,7 @@ impl DatasetRepository for DatasetRepositoryS3 { #[async_trait] impl DatasetRepositoryWriter for DatasetRepositoryS3 { + #[tracing::instrument(level = "debug", skip_all, fields(%dataset_alias, ?seed_block))] async fn create_dataset( &self, dataset_alias: &DatasetAlias, @@ -411,6 +412,7 @@ impl DatasetRepositoryWriter for DatasetRepositoryS3 { }) } + #[tracing::instrument(level = "debug", skip_all, fields(?snapshot))] async fn create_dataset_from_snapshot( &self, snapshot: DatasetSnapshot, @@ -418,6 +420,7 @@ impl DatasetRepositoryWriter for DatasetRepositoryS3 { create_dataset_from_snapshot_impl(self, snapshot, self.system_time_source.now()).await } + #[tracing::instrument(level = "debug", skip_all, fields(%dataset_handle, %new_name))] async fn rename_dataset( &self, dataset_handle: &DatasetHandle, @@ -459,6 +462,7 @@ impl DatasetRepositoryWriter for DatasetRepositoryS3 { Ok(()) } + #[tracing::instrument(level = "debug", skip_all, fields(%dataset_handle))] async fn delete_dataset( &self, dataset_handle: &DatasetHandle, diff --git a/src/infra/core/src/services/compaction/compaction_executor_impl.rs b/src/infra/core/src/services/compaction/compaction_executor_impl.rs new file mode 100644 index 000000000..1fd0b9600 --- /dev/null +++ b/src/infra/core/src/services/compaction/compaction_executor_impl.rs @@ -0,0 +1,248 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use datafusion::prelude::*; +use dill::{component, interface}; +use internal_error::ResultIntoInternal; +use kamu_core::*; +use opendatafabric as odf; +use random_names::get_random_name; +use time_source::SystemTimeSource; +use url::Url; + +use crate::new_session_context; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct CompactionExecutorImpl { + object_store_registry: Arc, + time_source: Arc, + run_info_dir: Arc, +} + +#[component(pub)] +#[interface(dyn CompactionExecutor)] +impl CompactionExecutorImpl { + pub fn new( + object_store_registry: Arc, + time_source: Arc, + run_info_dir: Arc, + ) -> Self { + Self { + object_store_registry, + time_source, + run_info_dir, + } + } + + fn create_run_compaction_dir(&self) -> Result { + let compaction_dir_path = self + .run_info_dir + .join(get_random_name(Some("compaction-"), 10)); + std::fs::create_dir_all(&compaction_dir_path).int_err()?; + Ok(compaction_dir_path) + } + + async fn merge_files( + &self, + plan: &CompactionPlan, + compaction_dir_path: &Path, + ) -> Result, CompactionExecutionError> { + let ctx = new_session_context(self.object_store_registry.clone()); + + let mut new_file_paths = HashMap::new(); + + for (index, data_slice_batch) in plan.data_slice_batches.iter().enumerate() { + if let CompactionDataSliceBatch::CompactedBatch(data_slice_batch_info) = + data_slice_batch + { + let data_frame = ctx + .read_parquet( + data_slice_batch_info.data_slices_batch.clone(), + datafusion::execution::options::ParquetReadOptions { + file_extension: "", + ..Default::default() + }, + ) + .await + .int_err()? + // TODO: PERF: Consider passing sort order hint to `read_parquet` to let DF now + // that the data is already pre-sorted + .sort(vec![ + col(Column::from_name(&plan.offset_column_name)).sort(true, false) + ]) + .int_err()?; + + // FIXME: The .parquet extension is currently necessary for DataFusion to + // respect the single-file output + // See: https://github.com/apache/datafusion/issues/13323 + let new_file_path = + compaction_dir_path.join(format!("merge-slice-{index}.parquet").as_str()); + + data_frame + .write_parquet( + new_file_path.to_str().unwrap(), + datafusion::dataframe::DataFrameWriteOptions::new() + .with_single_file_output(true), + None, + ) + .await + .int_err()?; + new_file_paths.insert(index, new_file_path); + } + } + + Ok(new_file_paths) + } + + async fn commit_new_blocks( + &self, + target: &ResolvedDataset, + plan: &CompactionPlan, + new_file_paths: HashMap, + ) -> Result<(Vec, odf::Multihash, usize), CompactionExecutionError> { + let chain = target.as_metadata_chain(); + let mut current_head = plan.seed.clone(); + let mut old_data_slices: Vec = vec![]; + // set it to 1 to include seed block + let mut new_num_blocks: usize = 1; + + for (index, data_slice_batch) in plan.data_slice_batches.iter().enumerate().rev() { + match data_slice_batch { + CompactionDataSliceBatch::SingleBlock(block_hash) => { + let block = chain.get_block(block_hash).await.int_err()?; + + let commit_result = target + .commit_event( + block.event, + CommitOpts { + block_ref: &BlockRef::Head, + system_time: Some(self.time_source.now()), + prev_block_hash: Some(Some(¤t_head)), + check_object_refs: false, + update_block_ref: false, + }, + ) + .await + .int_err()?; + current_head = commit_result.new_head; + } + CompactionDataSliceBatch::CompactedBatch(data_slice_batch_info) => { + let new_offset_interval = odf::OffsetInterval { + start: data_slice_batch_info.lower_bound.start_offset, + end: data_slice_batch_info.upper_bound.end_offset, + }; + + let add_data_params = AddDataParams { + prev_checkpoint: data_slice_batch_info.lower_bound.prev_checkpoint.clone(), + prev_offset: data_slice_batch_info.lower_bound.prev_offset, + new_offset_interval: Some(new_offset_interval), + new_source_state: data_slice_batch_info + .upper_bound + .new_source_state + .clone(), + new_watermark: data_slice_batch_info.upper_bound.new_watermark, + }; + let new_checkpoint_ref = data_slice_batch_info + .upper_bound + .new_checkpoint + .clone() + .map(|r| CheckpointRef::Existed(r.physical_hash)); + + let commit_result = target + .commit_add_data( + add_data_params, + Some(OwnedFile::new(new_file_paths.get(&index).expect( + "File path for the compacted chunk should be defined", + ))), + new_checkpoint_ref, + CommitOpts { + block_ref: &BlockRef::Head, + system_time: Some(self.time_source.now()), + prev_block_hash: Some(Some(¤t_head)), + check_object_refs: false, + update_block_ref: false, + }, + ) + .await + .int_err()?; + + current_head = commit_result.new_head; + old_data_slices.extend(data_slice_batch_info.data_slices_batch.clone()); + } + } + new_num_blocks += 1; + } + + Ok((old_data_slices, current_head, new_num_blocks)) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl CompactionExecutor for CompactionExecutorImpl { + #[tracing::instrument(level = "debug", skip_all, fields(target=%target.get_handle()))] + async fn execute( + &self, + target: ResolvedDataset, + plan: CompactionPlan, + maybe_listener: Option>, + ) -> Result { + // if slices amount +1(seed block) eq to amount of blocks we will not perform + // compaction + if plan.data_slice_batches.len() + 1 == plan.old_num_blocks { + return Ok(CompactionResult::NothingToDo); + } + + let listener = maybe_listener.unwrap_or(Arc::new(NullCompactionListener {})); + + let compaction_dir_path = self.create_run_compaction_dir()?; + + tracing::debug!("Merging data slices"); + listener.begin_phase(CompactionPhase::MergeDataslices); + let new_file_paths = self.merge_files(&plan, &compaction_dir_path).await?; + + tracing::debug!("Committing new compacted blocks"); + listener.begin_phase(CompactionPhase::CommitNewBlocks); + let (_old_data_slices, new_head, new_num_blocks) = self + .commit_new_blocks(&target, &plan, new_file_paths) + .await?; + + tracing::debug!("Setting new head"); + target + .as_metadata_chain() + .set_ref( + &BlockRef::Head, + &new_head, + SetRefOpts { + validate_block_present: true, + check_ref_is: None, + }, + ) + .await?; + + let res = CompactionResult::Success { + old_head: plan.old_head, + new_head, + old_num_blocks: plan.old_num_blocks, + new_num_blocks, + }; + + listener.execute_success(&res); + + Ok(res) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/services/compaction/compaction_planner_impl.rs b/src/infra/core/src/services/compaction/compaction_planner_impl.rs new file mode 100644 index 000000000..bc9c53dea --- /dev/null +++ b/src/infra/core/src/services/compaction/compaction_planner_impl.rs @@ -0,0 +1,247 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::cmp::Ordering; +use std::sync::Arc; + +use dill::{component, interface}; +use internal_error::ResultIntoInternal; +use kamu_core::*; +use opendatafabric as odf; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[component(pub)] +#[interface(dyn CompactionPlanner)] +pub struct CompactionPlannerImpl {} + +impl CompactionPlannerImpl { + #[tracing::instrument( + level = "debug", + skip_all, + fields( + target=%target.get_handle(), + max_slice_size, + max_slice_records, + keep_metadata_only + ) + )] + async fn plan_dataset_compaction( + &self, + target: ResolvedDataset, + max_slice_size: u64, + max_slice_records: u64, + keep_metadata_only: bool, + listener: Arc, + ) -> Result { + listener.begin_phase(CompactionPhase::GatherChainInfo); + + // Declare mut values for result + + let mut old_num_blocks: usize = 0; + let mut maybe_seed: Option = None; + + let mut current_hash: Option = None; + let mut vocab_event: Option = None; + let mut data_slice_batch_info = CompactionDataSliceBatchInfo::default(); + let mut data_slice_batches: Vec = vec![]; + let (mut batch_size, mut batch_records) = (0u64, 0u64); + + //////////////////////////////////////////////////////////////////////////////// + + let chain = target.as_metadata_chain(); + let head = chain.resolve_ref(&BlockRef::Head).await?; + + let object_data_repo = target.as_data_repo(); + + { + use futures::TryStreamExt; + let mut block_stream = chain.iter_blocks_interval(&head, None, false); + while let Some((block_hash, block)) = block_stream.try_next().await? { + old_num_blocks += 1; + match block.event { + odf::MetadataEvent::AddData(add_data_event) => { + if !keep_metadata_only && let Some(output_slice) = &add_data_event.new_data + { + let data_slice_url = object_data_repo + .get_internal_url(&output_slice.physical_hash) + .await; + + // Setting the end offset interval needs to be here because we + // have to get it at the beginning of iteration unlike + // other values which will be set at the end of iteration + if data_slice_batch_info.data_slices_batch.is_empty() { + data_slice_batch_info.upper_bound.end_offset = + output_slice.offset_interval.end; + } + + let current_records = output_slice.num_records(); + + if batch_size + output_slice.size > max_slice_size + || batch_records + current_records > max_slice_records + { + let is_appended = self.append_add_data_batch_to_chain_info( + &mut data_slice_batches, + ¤t_hash, + &mut data_slice_batch_info, + ); + if is_appended { + // Reset values for next batch + data_slice_batch_info = CompactionDataSliceBatchInfo::default(); + data_slice_batch_info.upper_bound.end_offset = + output_slice.offset_interval.end; + } + + data_slice_batch_info.data_slices_batch = vec![data_slice_url]; + batch_size = output_slice.size; + batch_records = current_records; + } else { + data_slice_batch_info.data_slices_batch.push(data_slice_url); + batch_size += output_slice.size; + batch_records += current_records; + } + + // Set lower bound values + data_slice_batch_info.lower_bound.prev_checkpoint = + add_data_event.prev_checkpoint; + data_slice_batch_info.lower_bound.prev_offset = + add_data_event.prev_offset; + data_slice_batch_info.lower_bound.start_offset = + output_slice.offset_interval.start; + current_hash = Some(block_hash); + } + // Set upper bound values + if data_slice_batch_info.upper_bound.new_checkpoint.is_none() { + data_slice_batch_info.upper_bound.new_checkpoint = + add_data_event.new_checkpoint; + } + if data_slice_batch_info.upper_bound.new_source_state.is_none() { + data_slice_batch_info.upper_bound.new_source_state = + add_data_event.new_source_state; + } + if data_slice_batch_info.upper_bound.new_watermark.is_none() { + data_slice_batch_info.upper_bound.new_watermark = + add_data_event.new_watermark; + } + } + odf::MetadataEvent::Seed(_) => maybe_seed = Some(block_hash), + odf::MetadataEvent::ExecuteTransform(_) => { + if keep_metadata_only { + continue; + } + } + event => { + if let odf::MetadataEvent::SetVocab(set_vocab_event) = event { + vocab_event = Some(set_vocab_event); + } + let is_appended = self.append_add_data_batch_to_chain_info( + &mut data_slice_batches, + ¤t_hash, + &mut data_slice_batch_info, + ); + data_slice_batches + .push(CompactionDataSliceBatch::SingleBlock(block_hash.clone())); + if is_appended { + data_slice_batch_info = CompactionDataSliceBatchInfo::default(); + } + } + } + } + } + + let vocab: odf::DatasetVocabulary = vocab_event.unwrap_or_default().into(); + + Ok(CompactionPlan { + data_slice_batches, + old_head: head, + offset_column_name: vocab.offset_column, + seed: maybe_seed.expect("Seed must be present"), + old_num_blocks, + }) + } + + fn append_add_data_batch_to_chain_info( + &self, + data_slice_batches: &mut Vec, + hash: &Option, + data_slice_batch_info: &mut CompactionDataSliceBatchInfo, + ) -> bool { + match data_slice_batch_info.data_slices_batch.len().cmp(&1) { + Ordering::Equal => { + data_slice_batches.push(CompactionDataSliceBatch::SingleBlock( + hash.as_ref().unwrap().clone(), + )); + true + } + Ordering::Greater => { + data_slice_batches.push(CompactionDataSliceBatch::CompactedBatch( + data_slice_batch_info.clone(), + )); + true + } + _ => false, + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl CompactionPlanner for CompactionPlannerImpl { + #[tracing::instrument(level = "debug", skip_all, fields(target=%target.get_handle(), ?options))] + async fn plan_compaction( + &self, + target: ResolvedDataset, + options: CompactionOptions, + maybe_listener: Option>, + ) -> Result { + let dataset_kind = target + .get_summary(GetSummaryOpts::default()) + .await + .int_err()? + .kind; + + if !options.keep_metadata_only && dataset_kind != odf::DatasetKind::Root { + return Err(CompactionPlanningError::InvalidDatasetKind( + InvalidDatasetKindError { + dataset_alias: target.get_alias().clone(), + }, + )); + } + + let listener = maybe_listener.unwrap_or(Arc::new(NullCompactionListener {})); + + let max_slice_size = options.max_slice_size.unwrap_or(DEFAULT_MAX_SLICE_SIZE); + let max_slice_records = options + .max_slice_records + .unwrap_or(DEFAULT_MAX_SLICE_RECORDS); + + match self + .plan_dataset_compaction( + target, + max_slice_size, + max_slice_records, + options.keep_metadata_only, + listener.clone(), + ) + .await + { + Ok(plan) => { + listener.plan_success(&plan); + Ok(plan) + } + Err(err) => { + listener.plan_error(&err); + Err(err) + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/services/compaction/mod.rs b/src/infra/core/src/services/compaction/mod.rs new file mode 100644 index 000000000..fc5929d6a --- /dev/null +++ b/src/infra/core/src/services/compaction/mod.rs @@ -0,0 +1,14 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +mod compaction_executor_impl; +mod compaction_planner_impl; + +pub use compaction_executor_impl::*; +pub use compaction_planner_impl::*; diff --git a/src/infra/core/src/dataset_changes_service_impl.rs b/src/infra/core/src/services/dataset_changes_service_impl.rs similarity index 100% rename from src/infra/core/src/dataset_changes_service_impl.rs rename to src/infra/core/src/services/dataset_changes_service_impl.rs diff --git a/src/infra/core/src/dataset_ownership_service_inmem.rs b/src/infra/core/src/services/dataset_ownership_service_inmem.rs similarity index 100% rename from src/infra/core/src/dataset_ownership_service_inmem.rs rename to src/infra/core/src/services/dataset_ownership_service_inmem.rs diff --git a/src/infra/core/src/dataset_registry_repo_bridge.rs b/src/infra/core/src/services/dataset_registry_repo_bridge.rs similarity index 100% rename from src/infra/core/src/dataset_registry_repo_bridge.rs rename to src/infra/core/src/services/dataset_registry_repo_bridge.rs diff --git a/src/infra/core/src/ingest/data_format_registry_impl.rs b/src/infra/core/src/services/ingest/data_format_registry_impl.rs similarity index 100% rename from src/infra/core/src/ingest/data_format_registry_impl.rs rename to src/infra/core/src/services/ingest/data_format_registry_impl.rs diff --git a/src/infra/core/src/ingest/fetch_service/configs.rs b/src/infra/core/src/services/ingest/fetch_service/configs.rs similarity index 100% rename from src/infra/core/src/ingest/fetch_service/configs.rs rename to src/infra/core/src/services/ingest/fetch_service/configs.rs diff --git a/src/infra/core/src/ingest/fetch_service/container.rs b/src/infra/core/src/services/ingest/fetch_service/container.rs similarity index 100% rename from src/infra/core/src/ingest/fetch_service/container.rs rename to src/infra/core/src/services/ingest/fetch_service/container.rs diff --git a/src/infra/core/src/ingest/fetch_service/core.rs b/src/infra/core/src/services/ingest/fetch_service/core.rs similarity index 100% rename from src/infra/core/src/ingest/fetch_service/core.rs rename to src/infra/core/src/services/ingest/fetch_service/core.rs diff --git a/src/infra/core/src/ingest/fetch_service/evm.rs b/src/infra/core/src/services/ingest/fetch_service/evm.rs similarity index 97% rename from src/infra/core/src/ingest/fetch_service/evm.rs rename to src/infra/core/src/services/ingest/fetch_service/evm.rs index 742243f77..73bd3e1a0 100644 --- a/src/infra/core/src/ingest/fetch_service/evm.rs +++ b/src/infra/core/src/services/ingest/fetch_service/evm.rs @@ -123,6 +123,11 @@ impl FetchService { // options in transform DTOs) cfg.options_mut().sql_parser.enable_ident_normalization = false; + // TODO: Disabling Utf8View types due to unresolved issues + // See: https://github.com/apache/datafusion/issues/13510 + // See: https://github.com/apache/datafusion/issues/13504 + cfg.options_mut().execution.parquet.schema_force_view_types = false; + let mut ctx = SessionContext::new_with_config(cfg); datafusion_ethers::udf::register_all(&mut ctx).unwrap(); ctx.register_catalog( diff --git a/src/infra/core/src/ingest/fetch_service/file.rs b/src/infra/core/src/services/ingest/fetch_service/file.rs similarity index 100% rename from src/infra/core/src/ingest/fetch_service/file.rs rename to src/infra/core/src/services/ingest/fetch_service/file.rs diff --git a/src/infra/core/src/ingest/fetch_service/ftp.rs b/src/infra/core/src/services/ingest/fetch_service/ftp.rs similarity index 100% rename from src/infra/core/src/ingest/fetch_service/ftp.rs rename to src/infra/core/src/services/ingest/fetch_service/ftp.rs diff --git a/src/infra/core/src/ingest/fetch_service/http.rs b/src/infra/core/src/services/ingest/fetch_service/http.rs similarity index 100% rename from src/infra/core/src/ingest/fetch_service/http.rs rename to src/infra/core/src/services/ingest/fetch_service/http.rs diff --git a/src/infra/core/src/ingest/fetch_service/mod.rs b/src/infra/core/src/services/ingest/fetch_service/mod.rs similarity index 100% rename from src/infra/core/src/ingest/fetch_service/mod.rs rename to src/infra/core/src/services/ingest/fetch_service/mod.rs diff --git a/src/infra/core/src/ingest/fetch_service/mqtt.rs b/src/infra/core/src/services/ingest/fetch_service/mqtt.rs similarity index 100% rename from src/infra/core/src/ingest/fetch_service/mqtt.rs rename to src/infra/core/src/services/ingest/fetch_service/mqtt.rs diff --git a/src/infra/core/src/ingest/fetch_service/template.rs b/src/infra/core/src/services/ingest/fetch_service/template.rs similarity index 100% rename from src/infra/core/src/ingest/fetch_service/template.rs rename to src/infra/core/src/services/ingest/fetch_service/template.rs diff --git a/src/infra/core/src/ingest/ingest_common.rs b/src/infra/core/src/services/ingest/ingest_common.rs similarity index 91% rename from src/infra/core/src/ingest/ingest_common.rs rename to src/infra/core/src/services/ingest/ingest_common.rs index 9269ae03f..f215398ae 100644 --- a/src/infra/core/src/ingest/ingest_common.rs +++ b/src/infra/core/src/services/ingest/ingest_common.rs @@ -178,7 +178,12 @@ pub fn new_session_context(object_store_registry: Arc) use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv}; use datafusion::prelude::*; - let mut config = SessionConfig::new().with_default_catalog_and_schema("kamu", "kamu"); + // Note: We use single partition as ingest currently always reads one file at a + // time and repartitioning of data likely to hurt performance rather than + // improve it + let mut config = SessionConfig::new() + .with_target_partitions(1) + .with_default_catalog_and_schema("kamu", "kamu"); // Forcing cese-sensitive identifiers in case-insensitive language seems to // be a lesser evil than following DataFusion's default behavior of forcing @@ -189,12 +194,21 @@ pub fn new_session_context(object_store_registry: Arc) // options in transform DTOs) config.options_mut().sql_parser.enable_ident_normalization = false; + // TODO: Disabling Utf8View types due to unresolved issues + // See: https://github.com/apache/datafusion/issues/13510 + // See: https://github.com/apache/datafusion/issues/13504 + config + .options_mut() + .execution + .parquet + .schema_force_view_types = false; + let runtime_config = RuntimeConfig { object_store_registry: object_store_registry.as_datafusion_registry(), ..RuntimeConfig::default() }; - let runtime = Arc::new(RuntimeEnv::new(runtime_config).unwrap()); + let runtime = Arc::new(RuntimeEnv::try_new(runtime_config).unwrap()); #[allow(unused_mut)] let mut ctx = SessionContext::new_with_config_rt(config, runtime); diff --git a/src/infra/core/src/ingest/mod.rs b/src/infra/core/src/services/ingest/mod.rs similarity index 83% rename from src/infra/core/src/ingest/mod.rs rename to src/infra/core/src/services/ingest/mod.rs index 9ea43476a..ba8b1fed1 100644 --- a/src/infra/core/src/ingest/mod.rs +++ b/src/infra/core/src/services/ingest/mod.rs @@ -13,7 +13,8 @@ mod ingest_common; mod polling_ingest_service_impl; mod polling_source_state; mod prep_service; -mod push_ingest_service_impl; +mod push_ingest_executor_impl; +mod push_ingest_planner_impl; pub use data_format_registry_impl::*; pub use fetch_service::*; @@ -21,4 +22,5 @@ pub use ingest_common::*; pub use polling_ingest_service_impl::*; pub use polling_source_state::*; pub use prep_service::*; -pub use push_ingest_service_impl::*; +pub use push_ingest_executor_impl::*; +pub use push_ingest_planner_impl::*; diff --git a/src/infra/core/src/ingest/polling_ingest_service_impl.rs b/src/infra/core/src/services/ingest/polling_ingest_service_impl.rs similarity index 91% rename from src/infra/core/src/ingest/polling_ingest_service_impl.rs rename to src/infra/core/src/services/ingest/polling_ingest_service_impl.rs index c8db4de93..56f5704c6 100644 --- a/src/infra/core/src/ingest/polling_ingest_service_impl.rs +++ b/src/infra/core/src/services/ingest/polling_ingest_service_impl.rs @@ -61,40 +61,16 @@ impl PollingIngestServiceImpl { } } - async fn do_ingest( + async fn ingest_loop( &self, target: ResolvedDataset, + metadata_state: Box, options: PollingIngestOptions, - get_listener: impl FnOnce(&DatasetHandle) -> Option>, - ) -> Result { - let listener = get_listener(target.get_handle()) - .unwrap_or_else(|| Arc::new(NullPollingIngestListener)); - - self.ingest_loop(IngestLoopArgs { - target, - options, - listener, - }) - .await - } - - #[tracing::instrument( - level = "info", - skip_all, - fields( - dataset_handle = %args.target.get_handle(), - ) - )] - async fn ingest_loop( - &self, - args: IngestLoopArgs, + listener: Arc, ) -> Result { let ctx = ingest_common::new_session_context(self.object_store_registry.clone()); - let mut data_writer = DataWriterDataFusion::builder((*args.target).clone(), ctx.clone()) - .with_metadata_state_scanned(None) - .await - .int_err()? - .build(); + let mut data_writer = + DataWriterDataFusion::from_metadata_state(ctx.clone(), target.clone(), *metadata_state); let Some(MetadataEvent::SetPollingSource(polling_source)) = data_writer.source_event().cloned() @@ -106,8 +82,8 @@ impl PollingIngestServiceImpl { uncacheable: false, }; - args.listener.begin(); - args.listener.success(&result); + listener.begin(); + listener.success(&result); return Ok(result); }; @@ -125,14 +101,14 @@ impl PollingIngestServiceImpl { // TODO: Avoid excessive cloning let iteration_args = IngestIterationArgs { - dataset_handle: args.target.get_handle().clone(), + dataset_handle: target.get_handle().clone(), iteration, operation_id, operation_dir, system_time: self.time_source.now(), - options: args.options.clone(), + options: options.clone(), polling_source: polling_source.clone(), - listener: args.listener.clone(), + listener: listener.clone(), ctx: new_ctx, data_writer: &mut data_writer, }; @@ -147,7 +123,7 @@ impl PollingIngestServiceImpl { None => unreachable!(), }; - if !has_more || !args.options.exhaust_sources { + if !has_more || !options.exhaust_sources { break; } } @@ -268,7 +244,7 @@ impl PollingIngestServiceImpl { let new_source_state = savepoint.source_state.map(|ss| ss.to_source_state()); let out_dir = args.operation_dir.join("out"); - let data_staging_path = out_dir.join("data"); + let data_staging_path = out_dir.join("data.parquet"); std::fs::create_dir(&out_dir).int_err()?; let stage_result = args @@ -285,6 +261,8 @@ impl PollingIngestServiceImpl { ) .await; + tracing::info!(?stage_result, "Stage result"); + // Clean up intermediate files // Note that we are leaving the fetch data and savepoint intact // in case user wants to iterate on the dataset. @@ -594,28 +572,18 @@ impl PollingIngestServiceImpl { #[async_trait::async_trait] impl PollingIngestService for PollingIngestServiceImpl { - #[tracing::instrument(level = "info", skip_all, fields(target=%target.get_handle()))] - async fn get_active_polling_source( - &self, - target: ResolvedDataset, - ) -> Result)>, GetDatasetError> { - // TODO: Support source evolution - Ok(target - .as_metadata_chain() - .accept_one(SearchSetPollingSourceVisitor::new()) - .await - .int_err()? - .into_hashed_block()) - } - #[tracing::instrument(level = "info", skip_all, fields(target=%target.get_handle()))] async fn ingest( &self, target: ResolvedDataset, + metadata_state: Box, options: PollingIngestOptions, maybe_listener: Option>, ) -> Result { - self.do_ingest(target, options, |_| maybe_listener).await + let listener = maybe_listener.unwrap_or_else(|| Arc::new(NullPollingIngestListener)); + + self.ingest_loop(target, metadata_state, options, listener) + .await } } @@ -632,12 +600,6 @@ pub(crate) struct PrepStepResult { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct IngestLoopArgs { - target: ResolvedDataset, - options: PollingIngestOptions, - listener: Arc, -} - struct IngestIterationArgs<'a> { dataset_handle: DatasetHandle, iteration: usize, diff --git a/src/infra/core/src/ingest/polling_source_state.rs b/src/infra/core/src/services/ingest/polling_source_state.rs similarity index 100% rename from src/infra/core/src/ingest/polling_source_state.rs rename to src/infra/core/src/services/ingest/polling_source_state.rs diff --git a/src/infra/core/src/ingest/prep_service.rs b/src/infra/core/src/services/ingest/prep_service.rs similarity index 100% rename from src/infra/core/src/ingest/prep_service.rs rename to src/infra/core/src/services/ingest/prep_service.rs diff --git a/src/infra/core/src/ingest/push_ingest_service_impl.rs b/src/infra/core/src/services/ingest/push_ingest_executor_impl.rs similarity index 62% rename from src/infra/core/src/ingest/push_ingest_service_impl.rs rename to src/infra/core/src/services/ingest/push_ingest_executor_impl.rs index e8bfd9982..509bdd9b5 100644 --- a/src/infra/core/src/ingest/push_ingest_service_impl.rs +++ b/src/infra/core/src/services/ingest/push_ingest_executor_impl.rs @@ -10,108 +10,63 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; -use chrono::{DateTime, Utc}; use datafusion::arrow::array::RecordBatch; use datafusion::prelude::{DataFrame, SessionContext}; use internal_error::{ErrorIntoInternal, InternalError, ResultIntoInternal}; use kamu_core::ingest::*; use kamu_core::*; use kamu_ingest_datafusion::*; -use opendatafabric::*; -use random_names::get_random_name; -use time_source::SystemTimeSource; use tokio::io::AsyncRead; use super::ingest_common; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub struct PushIngestServiceImpl { +pub struct PushIngestExecutorImpl { object_store_registry: Arc, data_format_registry: Arc, - time_source: Arc, engine_provisioner: Arc, - run_info_dir: Arc, } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[dill::component(pub)] -#[dill::interface(dyn PushIngestService)] -impl PushIngestServiceImpl { +#[dill::interface(dyn PushIngestExecutor)] +impl PushIngestExecutorImpl { pub fn new( object_store_registry: Arc, data_format_registry: Arc, - time_source: Arc, engine_provisioner: Arc, - run_info_dir: Arc, ) -> Self { Self { object_store_registry, data_format_registry, - time_source, engine_provisioner, - run_info_dir, } } async fn do_ingest( &self, target: ResolvedDataset, - source_name: Option<&str>, + plan: PushIngestPlan, source: DataSource, - opts: PushIngestOpts, listener: Arc, ) -> Result { - let operation_id = get_random_name(None, 10); - let operation_dir = self.run_info_dir.join(format!("ingest-{operation_id}")); - std::fs::create_dir_all(&operation_dir).int_err()?; - let ctx: SessionContext = ingest_common::new_session_context(self.object_store_registry.clone()); - let mut data_writer = self - .make_data_writer((*target).clone(), source_name, ctx.clone()) - .await?; - - let push_source = match (data_writer.source_event(), opts.auto_create_push_source) { - // No push source, and it's allowed to create - (None, true) => { - let add_push_source_event = self - .auto_create_push_source((*target).clone(), "auto", &opts) - .await?; - - // Update data writer, as we've modified the dataset - data_writer = self - .make_data_writer((*target).clone(), source_name, ctx.clone()) - .await?; - Ok(add_push_source_event) - } - - // Got existing push source - (Some(MetadataEvent::AddPushSource(e)), _) => Ok(e.clone()), - - // No push source and not allowed to create - _ => Err(PushIngestError::SourceNotFound( - PushSourceNotFoundError::new(source_name), - )), - }?; - - let args = PushIngestArgs { - operation_id, - operation_dir, - system_time: self.time_source.now(), - opts, - listener, - ctx, - data_writer, - push_source, - }; + let data_writer = DataWriterDataFusion::from_metadata_state( + ctx.clone(), + target.clone(), + *plan.metadata_state, + ); - let listener = args.listener.clone(); listener.begin(); - match self.do_ingest_inner(source, args).await { + match self + .do_ingest_inner(plan.args, source, data_writer, ctx, listener.clone()) + .await + { Ok(res) => { tracing::info!(result = ?res, "Ingest iteration successful"); listener.success(&res); @@ -125,67 +80,6 @@ impl PushIngestServiceImpl { } } - async fn make_data_writer( - &self, - dataset: Arc, - source_name: Option<&str>, - ctx: SessionContext, - ) -> Result { - match DataWriterDataFusion::builder(dataset, ctx) - .with_metadata_state_scanned(source_name) - .await - { - Ok(b) => Ok(b.build()), - Err(ScanMetadataError::SourceNotFound(err)) => { - Err(PushIngestError::SourceNotFound(err.into())) - } - Err(ScanMetadataError::Internal(err)) => Err(PushIngestError::Internal(err)), - } - } - - async fn auto_create_push_source( - &self, - dataset: Arc, - source_name: &str, - opts: &PushIngestOpts, - ) -> Result { - let read = match &opts.media_type { - Some(media_type) => { - match self - .data_format_registry - .get_best_effort_config(None, media_type) - { - Ok(read_step) => Ok(read_step), - Err(e) => Err(PushIngestError::UnsupportedMediaType(e)), - } - } - None => Err(PushIngestError::SourceNotFound( - PushSourceNotFoundError::new(Some(source_name)), - )), - }?; - - let add_push_source_event = AddPushSource { - source_name: String::from("auto"), - read, - preprocess: None, - merge: opendatafabric::MergeStrategy::Append(opendatafabric::MergeStrategyAppend {}), - }; - - let commit_result = dataset - .commit_event( - MetadataEvent::AddPushSource(add_push_source_event.clone()), - CommitOpts { - system_time: opts.source_event_time, - ..CommitOpts::default() - }, - ) - .await; - match commit_result { - Ok(_) => Ok(add_push_source_event), - Err(e) => Err(PushIngestError::CommitError(e)), - } - } - #[tracing::instrument( level = "info", skip_all, @@ -195,29 +89,29 @@ impl PushIngestServiceImpl { )] async fn do_ingest_inner( &self, + args: PushIngestArgs, source: DataSource, - mut args: PushIngestArgs, + mut data_writer: DataWriterDataFusion, + ctx: SessionContext, + listener: Arc, ) -> Result { - args.listener - .on_stage_progress(PushIngestStage::Read, 0, TotalSteps::Exact(1)); + listener.on_stage_progress(PushIngestStage::Read, 0, TotalSteps::Exact(1)); + + std::fs::create_dir_all(&args.operation_dir).int_err()?; let input_data_path = self.maybe_fetch(source, &args).await?; - let df = if let Some(df) = self.read(&input_data_path, &args).await? { + let df = if let Some(df) = self.read(&input_data_path, &ctx, &args).await? { if let Some(transform) = &args.push_source.preprocess { - args.listener.on_stage_progress( - PushIngestStage::Preprocess, - 0, - TotalSteps::Exact(1), - ); + listener.on_stage_progress(PushIngestStage::Preprocess, 0, TotalSteps::Exact(1)); ingest_common::preprocess( &args.operation_id, self.engine_provisioner.as_ref(), - &args.ctx, + &ctx, transform, df, - args.listener.clone().get_engine_provisioning_listener(), + listener.clone().get_engine_provisioning_listener(), ) .await? } else { @@ -225,7 +119,7 @@ impl PushIngestServiceImpl { ingest_common::preprocess_default( df, &args.push_source.read, - args.data_writer.vocab(), + data_writer.vocab(), &args.opts.schema_inference, ) .int_err()?, @@ -237,11 +131,10 @@ impl PushIngestServiceImpl { }; let out_dir = args.operation_dir.join("out"); - let data_staging_path = out_dir.join("data"); + let data_staging_path = out_dir.join("data.parquet"); std::fs::create_dir(&out_dir).int_err()?; - let stage_result = args - .data_writer + let stage_result = data_writer .stage( df, WriteDataOpts { @@ -258,10 +151,9 @@ impl PushIngestServiceImpl { match stage_result { Ok(staged) => { - args.listener - .on_stage_progress(PushIngestStage::Commit, 0, TotalSteps::Exact(1)); + listener.on_stage_progress(PushIngestStage::Commit, 0, TotalSteps::Exact(1)); - let res = args.data_writer.commit(staged).await?; + let res = data_writer.commit(staged).await?; Ok(PushIngestResult::Updated { old_head: res.old_head, @@ -336,6 +228,7 @@ impl PushIngestServiceImpl { async fn read( &self, input_data_path: &Path, + ctx: &SessionContext, args: &PushIngestArgs, ) -> Result, PushIngestError> { let conf = if let Some(media_type) = &args.opts.media_type { @@ -355,7 +248,7 @@ impl PushIngestServiceImpl { let temp_path = args.operation_dir.join("reader.tmp"); let reader = self .data_format_registry - .get_reader(args.ctx.clone(), conf, temp_path) + .get_reader(ctx.clone(), conf, temp_path) .await?; if input_data_path.metadata().int_err()?.len() == 0 { @@ -365,8 +258,7 @@ impl PushIngestServiceImpl { "Returning an empty data frame as input file is empty", ); - let df = args - .ctx + let df = ctx .read_batch(RecordBatch::new_empty(read_schema)) .int_err()?; @@ -417,76 +309,41 @@ impl PushIngestServiceImpl { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl PushIngestService for PushIngestServiceImpl { - #[tracing::instrument(level = "info", skip_all, fields(target=%target.get_handle()))] - async fn get_active_push_sources( - &self, - target: ResolvedDataset, - ) -> Result)>, GetDatasetError> { - use futures::TryStreamExt; - - // TODO: Support source disabling and evolution - let stream = target - .as_metadata_chain() - .iter_blocks() - .filter_map_ok(|(h, b)| b.into_typed().map(|b| (h, b))); - - Ok(stream.try_collect().await.int_err()?) - } - - #[tracing::instrument(level = "info", skip_all, fields(target=%target.get_handle()))] +impl PushIngestExecutor for PushIngestExecutorImpl { + #[tracing::instrument(level = "info", skip_all, fields(target=%target.get_handle(), %url))] async fn ingest_from_url( &self, target: ResolvedDataset, - source_name: Option<&str>, + plan: PushIngestPlan, url: url::Url, - opts: PushIngestOpts, listener: Option>, ) -> Result { let listener = listener.unwrap_or_else(|| Arc::new(NullPushIngestListener)); - tracing::info!(%url, ?opts, "Ingesting from url"); - - self.do_ingest(target, source_name, DataSource::Url(url), opts, listener) + self.do_ingest(target, plan, DataSource::Url(url), listener) .await } #[tracing::instrument(level = "info", skip_all, fields(target=%target.get_handle()))] - async fn ingest_from_file_stream( + async fn ingest_from_stream( &self, target: ResolvedDataset, - source_name: Option<&str>, + plan: PushIngestPlan, data: Box, - opts: PushIngestOpts, listener: Option>, ) -> Result { let listener = listener.unwrap_or_else(|| Arc::new(NullPushIngestListener)); - tracing::info!(?opts, "Ingesting from file stream"); - - self.do_ingest( - target, - source_name, - DataSource::Stream(data), - opts, - listener, - ) - .await + self.do_ingest(target, plan, DataSource::Stream(data), listener) + .await } } -struct PushIngestArgs { - operation_id: String, - operation_dir: PathBuf, - system_time: DateTime, - opts: PushIngestOpts, - listener: Arc, - ctx: SessionContext, - data_writer: DataWriterDataFusion, - push_source: AddPushSource, -} +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// enum DataSource { Url(url::Url), Stream(Box), } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/services/ingest/push_ingest_planner_impl.rs b/src/infra/core/src/services/ingest/push_ingest_planner_impl.rs new file mode 100644 index 000000000..cfc50339f --- /dev/null +++ b/src/infra/core/src/services/ingest/push_ingest_planner_impl.rs @@ -0,0 +1,155 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::sync::Arc; + +use kamu_core::*; +use opendatafabric as odf; +use random_names::get_random_name; +use time_source::SystemTimeSource; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct PushIngestPlannerImpl { + data_format_registry: Arc, + time_source: Arc, + run_info_dir: Arc, +} + +#[dill::component(pub)] +#[dill::interface(dyn PushIngestPlanner)] +impl PushIngestPlannerImpl { + pub fn new( + data_format_registry: Arc, + time_source: Arc, + run_info_dir: Arc, + ) -> Self { + Self { + data_format_registry, + time_source, + run_info_dir, + } + } + + async fn prepare_metadata_state( + &self, + target: ResolvedDataset, + source_name: Option<&str>, + ) -> Result { + let metadata_state = DataWriterMetadataState::build(target, &BlockRef::Head, source_name) + .await + .map_err(|e| match e { + ScanMetadataError::SourceNotFound(err) => { + PushIngestPlanningError::SourceNotFound(err.into()) + } + ScanMetadataError::Internal(err) => PushIngestPlanningError::Internal(err), + })?; + Ok(metadata_state) + } + + async fn auto_create_push_source( + &self, + target: ResolvedDataset, + source_name: &str, + opts: &PushIngestOpts, + ) -> Result { + let read = match &opts.media_type { + Some(media_type) => { + match self + .data_format_registry + .get_best_effort_config(None, media_type) + { + Ok(read_step) => Ok(read_step), + Err(e) => Err(PushIngestPlanningError::UnsupportedMediaType(e)), + } + } + None => Err(PushIngestPlanningError::SourceNotFound( + PushSourceNotFoundError::new(Some(source_name)), + )), + }?; + + let add_push_source_event = odf::AddPushSource { + source_name: String::from("auto"), + read, + preprocess: None, + merge: opendatafabric::MergeStrategy::Append(opendatafabric::MergeStrategyAppend {}), + }; + + let commit_result = target + .commit_event( + odf::MetadataEvent::AddPushSource(add_push_source_event.clone()), + CommitOpts { + system_time: opts.source_event_time, + ..CommitOpts::default() + }, + ) + .await; + match commit_result { + Ok(_) => Ok(add_push_source_event), + Err(e) => Err(PushIngestPlanningError::CommitError(e)), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl PushIngestPlanner for PushIngestPlannerImpl { + /// Uses or auto-creates push source definition in metadata to plan + /// ingestion + #[tracing::instrument(level = "debug", skip_all, fields(target=%target.get_handle(), ?source_name, ?opts))] + async fn plan_ingest( + &self, + target: ResolvedDataset, + source_name: Option<&str>, + opts: PushIngestOpts, + ) -> Result { + let mut metadata_state = self + .prepare_metadata_state(target.clone(), source_name) + .await?; + + let push_source = match (&metadata_state.source_event, opts.auto_create_push_source) { + // No push source, and it's allowed to create + (None, true) => { + tracing::debug!("Auto-creating new push source"); + let add_push_source_event = self + .auto_create_push_source(target.clone(), "auto", &opts) + .await?; + + // Update data writer, as we've modified the dataset + metadata_state = self.prepare_metadata_state(target, source_name).await?; + Ok(add_push_source_event) + } + + // Got existing push source + (Some(odf::MetadataEvent::AddPushSource(e)), _) => Ok(e.clone()), + + // No push source and not allowed to create + _ => Err(PushIngestPlanningError::SourceNotFound( + PushSourceNotFoundError::new(source_name), + )), + }?; + + let operation_id = get_random_name(None, 10); + let operation_dir = self.run_info_dir.join(format!("ingest-{operation_id}")); + + Ok(PushIngestPlan { + args: PushIngestArgs { + operation_id, + operation_dir, + system_time: self.time_source.now(), + opts, + push_source, + }, + metadata_state: Box::new(metadata_state), + }) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/services/metadata_query_service_impl.rs b/src/infra/core/src/services/metadata_query_service_impl.rs new file mode 100644 index 000000000..e9bc47175 --- /dev/null +++ b/src/infra/core/src/services/metadata_query_service_impl.rs @@ -0,0 +1,98 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use chrono::{DateTime, Utc}; +use dill::*; +use internal_error::{InternalError, ResultIntoInternal}; +use kamu_core::*; +use opendatafabric::{self as odf, AsTypedBlock}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[component(pub)] +#[interface(dyn MetadataQueryService)] +pub struct MetadataQueryServiceImpl {} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl MetadataQueryService for MetadataQueryServiceImpl { + /// Returns an active polling source, if any + async fn get_active_polling_source( + &self, + target: ResolvedDataset, + ) -> Result< + Option<( + odf::Multihash, + odf::MetadataBlockTyped, + )>, + InternalError, + > { + // TODO: Support source evolution + Ok(target + .as_metadata_chain() + .accept_one(SearchSetPollingSourceVisitor::new()) + .await + .int_err()? + .into_hashed_block()) + } + + /// Returns the set of active push sources + async fn get_active_push_sources( + &self, + target: ResolvedDataset, + ) -> Result)>, InternalError> + { + use futures::TryStreamExt; + + // TODO: Support source disabling and evolution + let stream = target + .as_metadata_chain() + .iter_blocks() + .filter_map_ok(|(h, b)| b.into_typed().map(|b| (h, b))); + + Ok(stream.try_collect().await.int_err()?) + } + + /// Returns an active transform, if any + async fn get_active_transform( + &self, + target: ResolvedDataset, + ) -> Result)>, InternalError> + { + // TODO: Support transform evolution + Ok(target + .as_metadata_chain() + .accept_one(SearchSetTransformVisitor::new()) + .await + .int_err()? + .into_hashed_block()) + } + + /// Attempt reading watermark that is currently associated with a dataset + #[tracing::instrument(level = "info", skip_all)] + async fn try_get_current_watermark( + &self, + resolved_dataset: ResolvedDataset, + ) -> Result>, InternalError> { + let mut add_data_visitor = SearchAddDataVisitor::new(); + + resolved_dataset + .as_metadata_chain() + .accept(&mut [&mut add_data_visitor]) + .await + .int_err()?; + + let current_watermark = add_data_visitor.into_event().and_then(|e| e.new_watermark); + + Ok(current_watermark) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/services/mod.rs b/src/infra/core/src/services/mod.rs new file mode 100644 index 000000000..e98a3f0f0 --- /dev/null +++ b/src/infra/core/src/services/mod.rs @@ -0,0 +1,46 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +mod compaction; +pub mod ingest; +mod query; +mod remote; +mod reset; +mod sync; +mod transform; +mod watermark; + +pub use compaction::*; +pub use ingest::*; +pub use remote::*; +pub use reset::*; +pub use sync::*; +pub use transform::*; +pub use watermark::*; + +mod dataset_changes_service_impl; +mod dataset_ownership_service_inmem; +mod dataset_registry_repo_bridge; +mod metadata_query_service_impl; +mod provenance_service_impl; +mod pull_request_planner_impl; +mod push_request_planner_impl; +mod query_service_impl; + +mod verification_service_impl; + +pub use dataset_changes_service_impl::*; +pub use dataset_ownership_service_inmem::*; +pub use dataset_registry_repo_bridge::*; +pub use metadata_query_service_impl::*; +pub use provenance_service_impl::*; +pub use pull_request_planner_impl::*; +pub use push_request_planner_impl::*; +pub use query_service_impl::*; +pub use verification_service_impl::*; diff --git a/src/infra/core/src/provenance_service_impl.rs b/src/infra/core/src/services/provenance_service_impl.rs similarity index 100% rename from src/infra/core/src/provenance_service_impl.rs rename to src/infra/core/src/services/provenance_service_impl.rs diff --git a/src/infra/core/src/pull_request_planner_impl.rs b/src/infra/core/src/services/pull_request_planner_impl.rs similarity index 95% rename from src/infra/core/src/pull_request_planner_impl.rs rename to src/infra/core/src/services/pull_request_planner_impl.rs index 815f04e6d..2bfe173ef 100644 --- a/src/infra/core/src/pull_request_planner_impl.rs +++ b/src/infra/core/src/services/pull_request_planner_impl.rs @@ -142,7 +142,7 @@ impl PullRequestPlannerImpl { } #[tracing::instrument(level = "debug", skip_all, fields(?pi))] - fn build_ingest_item(&self, pi: PullItem) -> PullIngestItem { + async fn build_ingest_item(&self, pi: PullItem) -> Result { assert!(pi.maybe_remote_ref.is_none()); let hdl = match pi.local_target { @@ -152,10 +152,20 @@ impl PullRequestPlannerImpl { } }; - PullIngestItem { - depth: pi.depth, - target: self.dataset_registry.get_dataset_by_handle(&hdl), - maybe_original_request: pi.maybe_original_request, + let target = self.dataset_registry.get_dataset_by_handle(&hdl); + match DataWriterMetadataState::build(target.clone(), &BlockRef::Head, None).await { + Ok(metadata_state) => Ok(PullIngestItem { + depth: pi.depth, + target, + metadata_state: Box::new(metadata_state), + maybe_original_request: pi.maybe_original_request, + }), + Err(e) => Err(PullResponse { + maybe_original_request: pi.maybe_original_request, + maybe_local_ref: Some(hdl.as_local_ref()), + maybe_remote_ref: None, + result: Err(PullError::ScanMetadata(e)), + }), } } @@ -309,9 +319,15 @@ impl PullRequestPlanner for PullRequestPlannerImpl { for item in batch { // Ingest? if depth == 0 && item.maybe_remote_ref.is_none() { - let pii = self.build_ingest_item(item); - tracing::debug!(depth, ?pii, "Added ingest item to pull plan"); - jobs.push(PullPlanIterationJob::Ingest(pii)); + match self.build_ingest_item(item).await { + Ok(pii) => { + tracing::debug!(depth, ?pii, "Added ingest item to pull plan"); + jobs.push(PullPlanIterationJob::Ingest(pii)); + } + Err(ingest_error) => { + errors.push(ingest_error); + } + } // Sync? } else if depth == 0 && item.maybe_remote_ref.is_some() { diff --git a/src/infra/core/src/push_request_planner_impl.rs b/src/infra/core/src/services/push_request_planner_impl.rs similarity index 100% rename from src/infra/core/src/push_request_planner_impl.rs rename to src/infra/core/src/services/push_request_planner_impl.rs diff --git a/src/infra/core/src/query/mod.rs b/src/infra/core/src/services/query/mod.rs similarity index 94% rename from src/infra/core/src/query/mod.rs rename to src/infra/core/src/services/query/mod.rs index 941e7489e..017a50a6a 100644 --- a/src/infra/core/src/query/mod.rs +++ b/src/infra/core/src/services/query/mod.rs @@ -66,6 +66,14 @@ impl CatalogProvider for KamuCatalog { } } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +impl std::fmt::Debug for KamuCatalog { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("KamuCatalog").finish_non_exhaustive() + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Schema //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -232,6 +240,14 @@ impl SchemaProvider for KamuSchema { } } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +impl std::fmt::Debug for KamuSchema { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("KamuSchema").finish_non_exhaustive() + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Table //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -487,3 +503,13 @@ impl TableProvider for KamuTable { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +impl std::fmt::Debug for KamuTable { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("KamuTable") + .field("resolved_dataset", &self.resolved_dataset) + .finish_non_exhaustive() + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/query_service_impl.rs b/src/infra/core/src/services/query_service_impl.rs similarity index 98% rename from src/infra/core/src/query_service_impl.rs rename to src/infra/core/src/services/query_service_impl.rs index 3498446de..3086866e1 100644 --- a/src/infra/core/src/query_service_impl.rs +++ b/src/infra/core/src/services/query_service_impl.rs @@ -25,7 +25,7 @@ use kamu_core::auth::{DatasetAction, DatasetActionAuthorizer}; use kamu_core::*; use opendatafabric::*; -use crate::query::*; +use crate::services::query::*; use crate::utils::docker_images; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -68,11 +68,16 @@ impl QueryServiceImpl { // options in transform DTOs) cfg.options_mut().sql_parser.enable_ident_normalization = false; + // TODO: Disabling Utf8View types due to unresolved issues + // See: https://github.com/apache/datafusion/issues/13510 + // See: https://github.com/apache/datafusion/issues/13504 + cfg.options_mut().execution.parquet.schema_force_view_types = false; + let runtime_config = RuntimeConfig { object_store_registry: self.object_store_registry.clone().as_datafusion_registry(), ..RuntimeConfig::default() }; - let runtime = Arc::new(RuntimeEnv::new(runtime_config).unwrap()); + let runtime = Arc::new(RuntimeEnv::try_new(runtime_config).unwrap()); let session_context = SessionContext::new_with_config_rt(cfg, runtime); let schema = KamuSchema::prepare( diff --git a/src/infra/core/src/services/remote/mod.rs b/src/infra/core/src/services/remote/mod.rs new file mode 100644 index 000000000..1bd4b96bd --- /dev/null +++ b/src/infra/core/src/services/remote/mod.rs @@ -0,0 +1,22 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +mod remote_alias_resolver_impl; +mod remote_aliases_registry_impl; +mod remote_repository_registry_impl; +mod remote_status_service_impl; +mod resource_loader_impl; +mod search_service_impl; + +pub use remote_alias_resolver_impl::*; +pub use remote_aliases_registry_impl::*; +pub use remote_repository_registry_impl::*; +pub use remote_status_service_impl::*; +pub use resource_loader_impl::*; +pub use search_service_impl::*; diff --git a/src/infra/core/src/remote_alias_resolver_impl.rs b/src/infra/core/src/services/remote/remote_alias_resolver_impl.rs similarity index 100% rename from src/infra/core/src/remote_alias_resolver_impl.rs rename to src/infra/core/src/services/remote/remote_alias_resolver_impl.rs diff --git a/src/infra/core/src/remote_aliases_registry_impl.rs b/src/infra/core/src/services/remote/remote_aliases_registry_impl.rs similarity index 97% rename from src/infra/core/src/remote_aliases_registry_impl.rs rename to src/infra/core/src/services/remote/remote_aliases_registry_impl.rs index 5d5b52ed7..6d2711fd6 100644 --- a/src/infra/core/src/remote_aliases_registry_impl.rs +++ b/src/infra/core/src/services/remote/remote_aliases_registry_impl.rs @@ -16,7 +16,7 @@ use opendatafabric::serde::yaml::Manifest; use opendatafabric::*; use thiserror::Error; -use super::*; +use crate::DatasetConfig; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -66,12 +66,16 @@ impl RemoteAliasesRegistryImpl { #[async_trait::async_trait] impl RemoteAliasesRegistry for RemoteAliasesRegistryImpl { + #[tracing::instrument(level = "debug", skip_all, fields(?dataset_handle))] async fn get_remote_aliases( &self, dataset_handle: &DatasetHandle, ) -> Result, GetAliasesError> { let resolved_dataset = self.dataset_registry.get_dataset_by_handle(dataset_handle); + let config = Self::read_config(resolved_dataset.as_ref()).await?; + tracing::debug!(?config, "Loaded dataset config"); + Ok(Box::new(RemoteAliasesImpl::new(resolved_dataset, config))) } } diff --git a/src/infra/core/src/remote_repository_registry_impl.rs b/src/infra/core/src/services/remote/remote_repository_registry_impl.rs similarity index 100% rename from src/infra/core/src/remote_repository_registry_impl.rs rename to src/infra/core/src/services/remote/remote_repository_registry_impl.rs diff --git a/src/infra/core/src/remote_status_service_impl.rs b/src/infra/core/src/services/remote/remote_status_service_impl.rs similarity index 94% rename from src/infra/core/src/remote_status_service_impl.rs rename to src/infra/core/src/services/remote/remote_status_service_impl.rs index 86886a8de..5c0757e75 100644 --- a/src/infra/core/src/remote_status_service_impl.rs +++ b/src/infra/core/src/services/remote/remote_status_service_impl.rs @@ -94,6 +94,7 @@ impl RemoteStatusServiceImpl { #[async_trait] impl RemoteStatusService for RemoteStatusServiceImpl { + #[tracing::instrument(level = "debug", skip_all, fields(%dataset_handle))] async fn check_remotes_status( &self, dataset_handle: &DatasetHandle, @@ -110,6 +111,8 @@ impl RemoteStatusService for RemoteStatusServiceImpl { let push_aliases: Vec<&DatasetRefRemote> = aliases.get_by_kind(RemoteAliasKind::Push).collect(); + tracing::debug!(?push_aliases, "Fetched dataset remote push aliases"); + let mut statuses = vec![]; for alias in push_aliases { @@ -119,6 +122,8 @@ impl RemoteStatusService for RemoteStatusServiceImpl { }); } + tracing::debug!(?statuses, "Determined push alias statuses"); + Ok(DatasetPushStatuses { statuses }) } } diff --git a/src/infra/core/src/resource_loader_impl.rs b/src/infra/core/src/services/remote/resource_loader_impl.rs similarity index 100% rename from src/infra/core/src/resource_loader_impl.rs rename to src/infra/core/src/services/remote/resource_loader_impl.rs diff --git a/src/infra/core/src/search_service_impl.rs b/src/infra/core/src/services/remote/search_service_impl.rs similarity index 100% rename from src/infra/core/src/search_service_impl.rs rename to src/infra/core/src/services/remote/search_service_impl.rs diff --git a/src/infra/core/src/services/reset/mod.rs b/src/infra/core/src/services/reset/mod.rs new file mode 100644 index 000000000..22c826796 --- /dev/null +++ b/src/infra/core/src/services/reset/mod.rs @@ -0,0 +1,14 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +mod reset_executor_impl; +mod reset_planner_impl; + +pub use reset_executor_impl::*; +pub use reset_planner_impl::*; diff --git a/src/infra/core/src/services/reset/reset_executor_impl.rs b/src/infra/core/src/services/reset/reset_executor_impl.rs new file mode 100644 index 000000000..1b7af6fff --- /dev/null +++ b/src/infra/core/src/services/reset/reset_executor_impl.rs @@ -0,0 +1,47 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use dill::*; +use kamu_core::*; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[component(pub)] +#[interface(dyn ResetExecutor)] +pub struct ResetExecutorImpl {} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl ResetExecutor for ResetExecutorImpl { + #[tracing::instrument(level = "debug", skip_all, fields(target=%target.get_handle(), new_head=%plan.new_head))] + async fn execute( + &self, + target: ResolvedDataset, + plan: ResetPlan, + ) -> Result { + target + .as_metadata_chain() + .set_ref( + &BlockRef::Head, + &plan.new_head, + SetRefOpts { + validate_block_present: true, + check_ref_is: None, + }, + ) + .await?; + + Ok(ResetResult { + new_head: plan.new_head, + }) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/reset_service_impl.rs b/src/infra/core/src/services/reset/reset_planner_impl.rs similarity index 56% rename from src/infra/core/src/reset_service_impl.rs rename to src/infra/core/src/services/reset/reset_planner_impl.rs index a4cf3b06d..ab331d55c 100644 --- a/src/infra/core/src/reset_service_impl.rs +++ b/src/infra/core/src/services/reset/reset_planner_impl.rs @@ -10,24 +10,30 @@ use dill::*; use internal_error::ResultIntoInternal; use kamu_core::*; -use opendatafabric::*; +use opendatafabric as odf; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[component(pub)] -#[interface(dyn ResetService)] -pub struct ResetServiceImpl {} +#[interface(dyn ResetPlanner)] +pub struct ResetPlannerImpl {} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl ResetService for ResetServiceImpl { - #[tracing::instrument(level = "info", skip_all, fields(new_head = ?new_head_maybe, old_head = ?old_head_maybe))] - async fn reset_dataset( +impl ResetPlanner for ResetPlannerImpl { + #[tracing::instrument( + level = "debug", + skip_all, + fields(target=%target.get_handle(), ?maybe_old_head, ?maybe_new_head) + )] + async fn plan_reset( &self, target: ResolvedDataset, - new_head_maybe: Option<&Multihash>, - old_head_maybe: Option<&Multihash>, - ) -> Result { - let new_head = if let Some(new_head) = new_head_maybe { + maybe_new_head: Option<&odf::Multihash>, + maybe_old_head: Option<&odf::Multihash>, + ) -> Result { + let new_head = if let Some(new_head) = maybe_new_head { new_head } else { &target @@ -39,32 +45,25 @@ impl ResetService for ResetServiceImpl { .unwrap() .0 }; - if let Some(old_head) = old_head_maybe + + if let Some(old_head) = maybe_old_head && let Some(current_head) = target .as_metadata_chain() .try_get_ref(&BlockRef::Head) .await? && old_head != ¤t_head { - return Err(ResetError::OldHeadMismatch(OldHeadMismatchError { - current_head, - old_head: old_head.clone(), - })); - } - - target - .as_metadata_chain() - .set_ref( - &BlockRef::Head, - new_head, - SetRefOpts { - validate_block_present: true, - check_ref_is: None, + return Err(ResetPlanningError::OldHeadMismatch( + ResetOldHeadMismatchError { + current_head, + old_head: old_head.clone(), }, - ) - .await?; + )); + } - Ok(new_head.clone()) + Ok(ResetPlan { + new_head: new_head.clone(), + }) } } diff --git a/src/infra/core/src/services/sync/mod.rs b/src/infra/core/src/services/sync/mod.rs new file mode 100644 index 000000000..c6444ab6b --- /dev/null +++ b/src/infra/core/src/services/sync/mod.rs @@ -0,0 +1,14 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +mod sync_request_builder; +mod sync_service_impl; + +pub use sync_request_builder::*; +pub use sync_service_impl::*; diff --git a/src/infra/core/src/sync_request_builder.rs b/src/infra/core/src/services/sync/sync_request_builder.rs similarity index 100% rename from src/infra/core/src/sync_request_builder.rs rename to src/infra/core/src/services/sync/sync_request_builder.rs diff --git a/src/infra/core/src/sync_service_impl.rs b/src/infra/core/src/services/sync/sync_service_impl.rs similarity index 99% rename from src/infra/core/src/sync_service_impl.rs rename to src/infra/core/src/services/sync/sync_service_impl.rs index 363c2063b..685fa718b 100644 --- a/src/infra/core/src/sync_service_impl.rs +++ b/src/infra/core/src/services/sync/sync_service_impl.rs @@ -18,14 +18,16 @@ use kamu_core::*; use opendatafabric::*; use url::Url; -use super::utils::smart_transfer_protocol::SmartTransferProtocolClient; use crate::resolve_remote_dataset_url; use crate::utils::ipfs_wrapper::*; use crate::utils::simple_transfer_protocol::{ SimpleProtocolTransferOptions, SimpleTransferProtocol, }; -use crate::utils::smart_transfer_protocol::TransferOptions as SmartTransferOptions; +use crate::utils::smart_transfer_protocol::{ + SmartTransferProtocolClient, + TransferOptions as SmartTransferOptions, +}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/transform/mod.rs b/src/infra/core/src/services/transform/mod.rs similarity index 87% rename from src/infra/core/src/transform/mod.rs rename to src/infra/core/src/services/transform/mod.rs index 77b70bb4d..a6834b4e2 100644 --- a/src/infra/core/src/transform/mod.rs +++ b/src/infra/core/src/services/transform/mod.rs @@ -8,11 +8,11 @@ // by the Apache License, Version 2.0. mod transform_elaboration_service_impl; -mod transform_execution_service_impl; +mod transform_executor_impl; mod transform_helpers; mod transform_request_planner_impl; pub use transform_elaboration_service_impl::*; -pub use transform_execution_service_impl::*; +pub use transform_executor_impl::*; pub(crate) use transform_helpers::*; pub use transform_request_planner_impl::*; diff --git a/src/infra/core/src/transform/transform_elaboration_service_impl.rs b/src/infra/core/src/services/transform/transform_elaboration_service_impl.rs similarity index 93% rename from src/infra/core/src/transform/transform_elaboration_service_impl.rs rename to src/infra/core/src/services/transform/transform_elaboration_service_impl.rs index 09c831572..2817489e5 100644 --- a/src/infra/core/src/transform/transform_elaboration_service_impl.rs +++ b/src/infra/core/src/services/transform/transform_elaboration_service_impl.rs @@ -22,7 +22,8 @@ use crate::build_preliminary_request_ext; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub struct TransformElaborationServiceImpl { - compaction_svc: Arc, + compaction_planner: Arc, + compaction_executor: Arc, time_source: Arc, } @@ -30,11 +31,13 @@ pub struct TransformElaborationServiceImpl { #[interface(dyn TransformElaborationService)] impl TransformElaborationServiceImpl { pub fn new( - compaction_svc: Arc, + compaction_planner: Arc, + compaction_executor: Arc, time_source: Arc, ) -> Self { Self { - compaction_svc, + compaction_planner, + compaction_executor, time_source, } } @@ -148,7 +151,7 @@ impl TransformElaborationServiceImpl { #[async_trait::async_trait] impl TransformElaborationService for TransformElaborationServiceImpl { - #[tracing::instrument(level = "info", skip_all, fields(target=%target.get_handle(), ?plan, ?options))] + #[tracing::instrument(level = "info", skip_all, fields(target=%target.get_handle(), ?options))] async fn elaborate_transform( &self, target: ResolvedDataset, @@ -186,9 +189,9 @@ impl TransformElaborationService for TransformElaborationServiceImpl { "Interval error detected - resetting on diverged input", ); - let compaction_result = self - .compaction_svc - .compact_dataset( + let compaction_plan = self + .compaction_planner + .plan_compaction( target.clone(), CompactionOptions { keep_metadata_only: true, @@ -199,6 +202,12 @@ impl TransformElaborationService for TransformElaborationServiceImpl { .await .int_err()?; + let compaction_result = self + .compaction_executor + .execute(target.clone(), compaction_plan, None) + .await + .int_err()?; + if let CompactionResult::Success { .. } = compaction_result { // Recursing to try again after compaction self.elaborate_transform( diff --git a/src/infra/core/src/transform/transform_execution_service_impl.rs b/src/infra/core/src/services/transform/transform_executor_impl.rs similarity index 97% rename from src/infra/core/src/transform/transform_execution_service_impl.rs rename to src/infra/core/src/services/transform/transform_executor_impl.rs index 1cbb79a45..de25ce6e7 100644 --- a/src/infra/core/src/transform/transform_execution_service_impl.rs +++ b/src/infra/core/src/services/transform/transform_executor_impl.rs @@ -18,13 +18,13 @@ use opendatafabric::{EnumWithVariants, ExecuteTransform, SetDataSchema, Transfor //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub struct TransformExecutionServiceImpl { +pub struct TransformExecutorImpl { engine_provisioner: Arc, } #[component(pub)] -#[interface(dyn TransformExecutionService)] -impl TransformExecutionServiceImpl { +#[interface(dyn TransformExecutor)] +impl TransformExecutorImpl { pub fn new(engine_provisioner: Arc) -> Self { Self { engine_provisioner } } @@ -184,8 +184,8 @@ impl TransformExecutionServiceImpl { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl TransformExecutionService for TransformExecutionServiceImpl { - #[tracing::instrument(level = "info", skip_all, fields(target=%target.get_handle(), ?plan))] +impl TransformExecutor for TransformExecutorImpl { + #[tracing::instrument(level = "info", skip_all, fields(target=%target.get_handle()))] async fn execute_transform( &self, target: ResolvedDataset, @@ -212,7 +212,7 @@ impl TransformExecutionService for TransformExecutionServiceImpl { ) } - #[tracing::instrument(level = "info", skip_all, fields(target=%target.get_handle(), ?verification_operation))] + #[tracing::instrument(level = "info", skip_all, fields(target=%target.get_handle()))] async fn execute_verify_transform( &self, target: ResolvedDataset, diff --git a/src/infra/core/src/transform/transform_helpers.rs b/src/infra/core/src/services/transform/transform_helpers.rs similarity index 100% rename from src/infra/core/src/transform/transform_helpers.rs rename to src/infra/core/src/services/transform/transform_helpers.rs diff --git a/src/infra/core/src/transform/transform_request_planner_impl.rs b/src/infra/core/src/services/transform/transform_request_planner_impl.rs similarity index 94% rename from src/infra/core/src/transform/transform_request_planner_impl.rs rename to src/infra/core/src/services/transform/transform_request_planner_impl.rs index 52f06bc60..f6f3043e3 100644 --- a/src/infra/core/src/transform/transform_request_planner_impl.rs +++ b/src/infra/core/src/services/transform/transform_request_planner_impl.rs @@ -20,11 +20,9 @@ use opendatafabric::{ DatasetVocabulary, ExecuteTransform, MetadataBlock, - MetadataBlockTyped, MetadataEventTypeFlags, Multihash, SetDataSchema, - SetTransform, }; use random_names::get_random_name; use time_source::SystemTimeSource; @@ -99,22 +97,6 @@ impl TransformRequestPlannerImpl { #[async_trait::async_trait] impl TransformRequestPlanner for TransformRequestPlannerImpl { - /// Returns an active transform, if any - #[tracing::instrument(level = "debug", skip_all, fields(target=%target.get_handle()))] - /// - async fn get_active_transform( - &self, - target: ResolvedDataset, - ) -> Result)>, InternalError> { - // TODO: Support transform evolution - Ok(target - .as_metadata_chain() - .accept_one(SearchSetTransformVisitor::new()) - .await - .int_err()? - .into_hashed_block()) - } - #[tracing::instrument(level = "info", skip_all, fields(target=%target.get_handle()))] async fn build_transform_preliminary_plan( &self, diff --git a/src/infra/core/src/verification_service_impl.rs b/src/infra/core/src/services/verification_service_impl.rs similarity index 98% rename from src/infra/core/src/verification_service_impl.rs rename to src/infra/core/src/services/verification_service_impl.rs index ffd9051cc..7b6bcf397 100644 --- a/src/infra/core/src/verification_service_impl.rs +++ b/src/infra/core/src/services/verification_service_impl.rs @@ -22,7 +22,7 @@ use crate::*; pub struct VerificationServiceImpl { transform_request_planner: Arc, - transform_execution_svc: Arc, + transform_executor: Arc, } #[component(pub)] @@ -30,11 +30,11 @@ pub struct VerificationServiceImpl { impl VerificationServiceImpl { pub fn new( transform_request_planner: Arc, - transform_execution_svc: Arc, + transform_executor: Arc, ) -> Self { Self { transform_request_planner, - transform_execution_svc, + transform_executor, } } @@ -304,7 +304,7 @@ impl VerificationService for VerificationServiceImpl { VerificationError::VerifyTransform(VerifyTransformError::Plan(e)) })?; - self.transform_execution_svc + self.transform_executor .execute_verify_transform(request.target.clone(), plan, Some(listener.clone())) .await .map_err(|e| { diff --git a/src/infra/core/src/services/watermark/mod.rs b/src/infra/core/src/services/watermark/mod.rs new file mode 100644 index 000000000..151b6cb63 --- /dev/null +++ b/src/infra/core/src/services/watermark/mod.rs @@ -0,0 +1,14 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +mod set_watermark_executor_impl; +mod set_watermark_planner_impl; + +pub use set_watermark_executor_impl::*; +pub use set_watermark_planner_impl::*; diff --git a/src/infra/core/src/services/watermark/set_watermark_executor_impl.rs b/src/infra/core/src/services/watermark/set_watermark_executor_impl.rs new file mode 100644 index 000000000..9e15e9678 --- /dev/null +++ b/src/infra/core/src/services/watermark/set_watermark_executor_impl.rs @@ -0,0 +1,69 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use dill::*; +use internal_error::ErrorIntoInternal; +use kamu_core::*; +use kamu_ingest_datafusion::DataWriterDataFusion; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[component(pub)] +#[interface(dyn SetWatermarkExecutor)] +pub struct SetWatermarkExecutorImpl {} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl SetWatermarkExecutor for SetWatermarkExecutorImpl { + #[tracing::instrument( + level = "debug", + skip_all, + fields( + target=%target.get_handle(), + new_watermark=%plan.new_watermark + ) + )] + async fn execute( + &self, + target: ResolvedDataset, + plan: SetWatermarkPlan, + ) -> Result { + let mut writer = DataWriterDataFusion::from_metadata_state( + datafusion::prelude::SessionContext::new(), + target.clone(), + *plan.metadata_state, + ); + + match writer + .write_watermark( + plan.new_watermark, + WriteWatermarkOpts { + system_time: plan.system_time, + new_source_state: None, + }, + ) + .await + { + Ok(res) => Ok(SetWatermarkResult::Updated { + old_head: Some(res.old_head), + new_head: res.new_head, + }), + Err( + WriteWatermarkError::EmptyCommit(_) + | WriteWatermarkError::CommitError(CommitError::MetadataAppendError( + AppendError::InvalidBlock(AppendValidationError::WatermarkIsNotMonotonic), + )), + ) => Ok(SetWatermarkResult::UpToDate), + Err(e) => Err(e.int_err().into()), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/services/watermark/set_watermark_planner_impl.rs b/src/infra/core/src/services/watermark/set_watermark_planner_impl.rs new file mode 100644 index 000000000..633ed86aa --- /dev/null +++ b/src/infra/core/src/services/watermark/set_watermark_planner_impl.rs @@ -0,0 +1,84 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::sync::Arc; + +use chrono::{DateTime, Utc}; +use dill::*; +use internal_error::ResultIntoInternal; +use kamu_core::*; +use opendatafabric as odf; +use time_source::SystemTimeSource; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct SetWatermarkPlannerImpl { + remote_alias_reg: Arc, + system_time_source: Arc, +} + +#[component(pub)] +#[interface(dyn SetWatermarkPlanner)] +impl SetWatermarkPlannerImpl { + pub fn new( + remote_alias_reg: Arc, + system_time_source: Arc, + ) -> Self { + Self { + remote_alias_reg, + system_time_source, + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl SetWatermarkPlanner for SetWatermarkPlannerImpl { + #[tracing::instrument(level = "debug", skip_all, fields(target=%target.get_handle(), new_watermark))] + async fn plan_set_watermark( + &self, + target: ResolvedDataset, + new_watermark: DateTime, + ) -> Result { + let aliases = match self + .remote_alias_reg + .get_remote_aliases(target.get_handle()) + .await + { + Ok(v) => Ok(v), + Err(GetAliasesError::Internal(e)) => Err(SetWatermarkPlanningError::Internal(e)), + }?; + + if !aliases.is_empty(RemoteAliasKind::Pull) { + return Err(SetWatermarkPlanningError::IsRemote); + } + + let summary = target + .get_summary(GetSummaryOpts::default()) + .await + .int_err()?; + + if summary.kind != odf::DatasetKind::Root { + return Err(SetWatermarkPlanningError::IsDerivative); + } + + let metadata_state = DataWriterMetadataState::build(target.clone(), &BlockRef::Head, None) + .await + .int_err()?; + + Ok(SetWatermarkPlan { + system_time: self.system_time_source.now(), + new_watermark, + metadata_state: Box::new(metadata_state), + }) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/tests/utils/base_repo_harness.rs b/src/infra/core/src/testing/base_repo_harness.rs similarity index 97% rename from src/infra/core/tests/utils/base_repo_harness.rs rename to src/infra/core/src/testing/base_repo_harness.rs index 9ec640e53..0b80f3505 100644 --- a/src/infra/core/tests/utils/base_repo_harness.rs +++ b/src/infra/core/src/testing/base_repo_harness.rs @@ -11,8 +11,6 @@ use std::path::Path; use std::sync::Arc; use dill::{Catalog, Component}; -use kamu::testing::MetadataFactory; -use kamu::{DatasetRegistryRepoBridge, DatasetRepositoryLocalFs, DatasetRepositoryWriter}; use kamu_accounts::CurrentAccountSubject; use kamu_core::{ CreateDatasetResult, @@ -30,6 +28,9 @@ use opendatafabric::serde::MetadataBlockSerializer; use opendatafabric::{DatasetAlias, DatasetKind, DatasetRef, MetadataBlock, Multicodec, Multihash}; use time_source::SystemTimeSourceDefault; +use crate::testing::MetadataFactory; +use crate::{DatasetRegistryRepoBridge, DatasetRepositoryLocalFs, DatasetRepositoryWriter}; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub struct BaseRepoHarness { diff --git a/src/infra/core/src/testing/mock_dependency_graph_repository.rs b/src/infra/core/src/testing/mock_dependency_graph_repository.rs deleted file mode 100644 index 103cf1901..000000000 --- a/src/infra/core/src/testing/mock_dependency_graph_repository.rs +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use kamu_core::{DatasetDependenciesIDStream, DependencyGraphRepository}; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -mockall::mock! { - pub DependencyGraphRepository {} - - #[async_trait::async_trait] - impl DependencyGraphRepository for DependencyGraphRepository { - fn list_dependencies_of_all_datasets(&self) -> DatasetDependenciesIDStream<'_>; - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -impl MockDependencyGraphRepository { - pub fn no_dependencies() -> Self { - let mut dependency_graph_repo_mock = MockDependencyGraphRepository::default(); - dependency_graph_repo_mock - .expect_list_dependencies_of_all_datasets() - .return_once(|| Box::pin(futures::stream::empty())); - dependency_graph_repo_mock - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/testing/mock_polling_source_service.rs b/src/infra/core/src/testing/mock_polling_source_service.rs index f992a8d9f..35bdfd341 100644 --- a/src/infra/core/src/testing/mock_polling_source_service.rs +++ b/src/infra/core/src/testing/mock_polling_source_service.rs @@ -9,9 +9,8 @@ use std::sync::Arc; -use chrono::Utc; use kamu_core::{ - GetDatasetError, + DataWriterMetadataState, PollingIngestError, PollingIngestListener, PollingIngestOptions, @@ -19,18 +18,7 @@ use kamu_core::{ PollingIngestService, ResolvedDataset, }; -use opendatafabric::{ - DatasetAlias, - FetchStep, - FetchStepUrl, - MergeStrategy, - MergeStrategyAppend, - MetadataBlockTyped, - Multihash, - ReadStep, - ReadStepJson, - SetPollingSource, -}; +use opendatafabric as odf; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -39,14 +27,10 @@ mockall::mock! { #[async_trait::async_trait] impl PollingIngestService for PollingIngestService { - async fn get_active_polling_source( - &self, - target: ResolvedDataset, - ) -> Result)>, GetDatasetError>; - async fn ingest( &self, target: ResolvedDataset, + metadata_state: Box, options: PollingIngestOptions, listener: Option>, ) -> Result; @@ -56,11 +40,11 @@ mockall::mock! { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// impl MockPollingIngestService { - pub fn make_expect_ingest(mut self, dataset_alias: DatasetAlias) -> Self { + pub fn make_expect_ingest(mut self, dataset_alias: odf::DatasetAlias) -> Self { self.expect_ingest() - .withf(move |target, _, _| target.get_alias() == &dataset_alias) + .withf(move |target, _, _, _| target.get_alias() == &dataset_alias) .times(1) - .returning(|_, _, _| { + .returning(|_, _, _, _| { Ok(PollingIngestResult::UpToDate { no_source_defined: false, uncacheable: false, @@ -68,49 +52,6 @@ impl MockPollingIngestService { }); self } - - pub fn without_active_polling_source() -> Self { - let mut dependency_graph_repo_mock = MockPollingIngestService::default(); - dependency_graph_repo_mock - .expect_get_active_polling_source() - .returning(|_| Ok(None)); - dependency_graph_repo_mock - } - - pub fn with_active_polling_source() -> Self { - let mut dependency_graph_repo_mock = MockPollingIngestService::default(); - dependency_graph_repo_mock - .expect_get_active_polling_source() - .returning(|_| { - Ok(Some(( - Multihash::from_digest_sha3_256(b"a"), - MetadataBlockTyped { - system_time: Utc::now(), - prev_block_hash: None, - event: SetPollingSource { - fetch: FetchStep::Url(FetchStepUrl { - url: "http://foo".to_string(), - event_time: None, - cache: None, - headers: None, - }), - prepare: None, - read: ReadStep::Json(ReadStepJson { - sub_path: None, - schema: None, - date_format: None, - encoding: None, - timestamp_format: None, - }), - preprocess: None, - merge: MergeStrategy::Append(MergeStrategyAppend {}), - }, - sequence_number: 0, - }, - ))) - }); - dependency_graph_repo_mock - } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/testing/mock_transform_execution_service.rs b/src/infra/core/src/testing/mock_transform_execution_service.rs index d3e08ece1..91fa719a0 100644 --- a/src/infra/core/src/testing/mock_transform_execution_service.rs +++ b/src/infra/core/src/testing/mock_transform_execution_service.rs @@ -18,7 +18,7 @@ mockall::mock! { pub TransformExecutionService {} #[async_trait::async_trait] - impl TransformExecutionService for TransformExecutionService { + impl TransformExecutor for TransformExecutionService { async fn execute_transform( &self, target: ResolvedDataset, diff --git a/src/infra/core/src/testing/mock_transform_request_planner.rs b/src/infra/core/src/testing/mock_transform_request_planner.rs index 00468b589..c99c2dfe9 100644 --- a/src/infra/core/src/testing/mock_transform_request_planner.rs +++ b/src/infra/core/src/testing/mock_transform_request_planner.rs @@ -7,8 +7,6 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use chrono::Utc; -use internal_error::InternalError; use kamu_core::*; use opendatafabric::*; @@ -18,11 +16,6 @@ mockall::mock! { pub TransformRequestPlanner {} #[async_trait::async_trait] impl TransformRequestPlanner for TransformRequestPlanner { - async fn get_active_transform( - &self, - target: ResolvedDataset, - ) -> Result)>, InternalError>; - async fn build_transform_preliminary_plan( &self, target: ResolvedDataset, @@ -37,38 +30,3 @@ mockall::mock! { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -impl MockTransformRequestPlanner { - pub fn without_set_transform() -> Self { - let mut mock = Self::default(); - mock.expect_get_active_transform().return_once(|_| Ok(None)); - mock - } - - pub fn with_set_transform() -> Self { - let mut mock = Self::default(); - mock.expect_get_active_transform().return_once(|_| { - Ok(Some(( - Multihash::from_digest_sha3_256(b"a"), - MetadataBlockTyped { - system_time: Utc::now(), - prev_block_hash: None, - event: SetTransform { - inputs: vec![], - transform: Transform::Sql(TransformSql { - engine: "spark".to_string(), - version: None, - query: None, - queries: None, - temporal_tables: None, - }), - }, - sequence_number: 0, - }, - ))) - }); - mock - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/testing/mod.rs b/src/infra/core/src/testing/mod.rs index fa0514576..0ba2ff76d 100644 --- a/src/infra/core/src/testing/mod.rs +++ b/src/infra/core/src/testing/mod.rs @@ -7,6 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +mod base_repo_harness; mod dataset_data_helper; mod dataset_test_helper; mod dummy_smart_transfer_protocol_client; @@ -16,7 +17,6 @@ mod metadata_factory; mod minio_server; mod mock_dataset_action_authorizer; mod mock_dataset_changes_service; -mod mock_dependency_graph_repository; mod mock_odf_server_access_token_resolver; mod mock_polling_source_service; mod mock_sync_service; @@ -26,6 +26,7 @@ mod mock_transform_request_planner; mod parquet_reader_helper; mod parquet_writer_helper; +pub use base_repo_harness::*; pub use dataset_data_helper::*; pub use dataset_test_helper::*; pub use dummy_smart_transfer_protocol_client::*; @@ -35,7 +36,6 @@ pub use metadata_factory::*; pub use minio_server::*; pub use mock_dataset_action_authorizer::*; pub use mock_dataset_changes_service::*; -pub use mock_dependency_graph_repository::*; pub use mock_odf_server_access_token_resolver::*; pub use mock_polling_source_service::*; pub use mock_sync_service::*; diff --git a/src/infra/core/src/use_cases/compact_dataset_use_case_impl.rs b/src/infra/core/src/use_cases/compact_dataset_use_case_impl.rs index ec7e8292b..8f496e3b3 100644 --- a/src/infra/core/src/use_cases/compact_dataset_use_case_impl.rs +++ b/src/infra/core/src/use_cases/compact_dataset_use_case_impl.rs @@ -14,12 +14,13 @@ use kamu_core::auth::{DatasetAction, DatasetActionAuthorizer}; use kamu_core::{ CompactDatasetUseCase, CompactionError, + CompactionExecutor, CompactionListener, CompactionMultiListener, CompactionOptions, + CompactionPlanner, CompactionResponse, CompactionResult, - CompactionService, DatasetRegistry, NullCompactionMultiListener, }; @@ -30,19 +31,22 @@ use opendatafabric::DatasetHandle; #[component(pub)] #[interface(dyn CompactDatasetUseCase)] pub struct CompactDatasetUseCaseImpl { - compaction_service: Arc, + compaction_planner: Arc, + compaction_executor: Arc, dataset_registry: Arc, dataset_action_authorizer: Arc, } impl CompactDatasetUseCaseImpl { pub fn new( - compaction_service: Arc, + compaction_planner: Arc, + compaction_executor: Arc, dataset_registry: Arc, dataset_action_authorizer: Arc, ) -> Self { Self { - compaction_service, + compaction_planner, + compaction_executor, dataset_registry, dataset_action_authorizer, } @@ -71,10 +75,19 @@ impl CompactDatasetUseCase for CompactDatasetUseCaseImpl { // Resolve dataset let target = self.dataset_registry.get_dataset_by_handle(dataset_handle); - // Actual action - self.compaction_service - .compact_dataset(target, options, maybe_listener) - .await + // Plan compacting + let compaction_plan = self + .compaction_planner + .plan_compaction(target.clone(), options, maybe_listener.clone()) + .await?; + + // Execute compacting + let compaction_result = self + .compaction_executor + .execute(target, compaction_plan, maybe_listener) + .await?; + + Ok(compaction_result) } #[tracing::instrument( diff --git a/src/infra/core/src/use_cases/pull_dataset_use_case_impl.rs b/src/infra/core/src/use_cases/pull_dataset_use_case_impl.rs index 477ca2909..bc12edb64 100644 --- a/src/infra/core/src/use_cases/pull_dataset_use_case_impl.rs +++ b/src/infra/core/src/use_cases/pull_dataset_use_case_impl.rs @@ -32,7 +32,7 @@ pub struct PullDatasetUseCaseImpl { remote_alias_registry: Arc, polling_ingest_svc: Arc, transform_elaboration_svc: Arc, - transform_execution_svc: Arc, + transform_executor: Arc, sync_svc: Arc, tenancy_config: Arc, } @@ -45,7 +45,7 @@ impl PullDatasetUseCaseImpl { remote_alias_registry: Arc, polling_ingest_svc: Arc, transform_elaboration_svc: Arc, - transform_execution_svc: Arc, + transform_executor: Arc, sync_svc: Arc, tenancy_config: Arc, ) -> Self { @@ -56,7 +56,7 @@ impl PullDatasetUseCaseImpl { remote_alias_registry, polling_ingest_svc, transform_elaboration_svc, - transform_execution_svc, + transform_executor, sync_svc, tenancy_config, } @@ -120,7 +120,7 @@ impl PullDatasetUseCaseImpl { pti, options.transform_options, self.transform_elaboration_svc.clone(), - self.transform_execution_svc.clone(), + self.transform_executor.clone(), maybe_listener, )) } @@ -339,7 +339,12 @@ impl PullDatasetUseCaseImpl { maybe_listener: Option>, ) -> Result { let ingest_response = polling_ingest_svc - .ingest(pii.target.clone(), ingest_options, maybe_listener) + .ingest( + pii.target.clone(), + pii.metadata_state, + ingest_options, + maybe_listener, + ) .await; Ok(PullResponse { @@ -357,7 +362,7 @@ impl PullDatasetUseCaseImpl { pti: PullTransformItem, transform_options: TransformOptions, transform_elaboration_svc: Arc, - transform_execution_svc: Arc, + transform_executor: Arc, maybe_listener: Option>, ) -> Result { // Remember original request @@ -367,7 +372,7 @@ impl PullDatasetUseCaseImpl { async fn run_transform( pti: PullTransformItem, transform_elaboration_svc: Arc, - transform_execution_svc: Arc, + transform_executor: Arc, transform_options: TransformOptions, maybe_listener: Option>, ) -> (ResolvedDataset, Result) { @@ -384,7 +389,7 @@ impl PullDatasetUseCaseImpl { // Elaborate success Ok(TransformElaboration::Elaborated(plan)) => { // Execute phase - let (target, result) = transform_execution_svc + let (target, result) = transform_executor .execute_transform(pti.target, plan, maybe_listener) .await; ( @@ -405,7 +410,7 @@ impl PullDatasetUseCaseImpl { let transform_result = run_transform( pti, transform_elaboration_svc, - transform_execution_svc, + transform_executor, transform_options, maybe_listener, ) diff --git a/src/infra/core/src/use_cases/reset_dataset_use_case_impl.rs b/src/infra/core/src/use_cases/reset_dataset_use_case_impl.rs index e275a2570..74fbb79ba 100644 --- a/src/infra/core/src/use_cases/reset_dataset_use_case_impl.rs +++ b/src/infra/core/src/use_cases/reset_dataset_use_case_impl.rs @@ -11,7 +11,14 @@ use std::sync::Arc; use dill::{component, interface}; use kamu_core::auth::{DatasetAction, DatasetActionAuthorizer}; -use kamu_core::{DatasetRegistry, ResetDatasetUseCase, ResetError, ResetService}; +use kamu_core::{ + DatasetRegistry, + ResetDatasetUseCase, + ResetError, + ResetExecutor, + ResetPlanner, + ResetResult, +}; use opendatafabric::{DatasetHandle, Multihash}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -19,19 +26,22 @@ use opendatafabric::{DatasetHandle, Multihash}; #[component(pub)] #[interface(dyn ResetDatasetUseCase)] pub struct ResetDatasetUseCaseImpl { - reset_service: Arc, + reset_planner: Arc, + reset_executor: Arc, dataset_registry: Arc, dataset_action_authorizer: Arc, } impl ResetDatasetUseCaseImpl { pub fn new( - reset_service: Arc, + reset_planner: Arc, + reset_executor: Arc, dataset_registry: Arc, dataset_action_authorizer: Arc, ) -> Self { Self { - reset_service, + reset_planner, + reset_executor, dataset_registry, dataset_action_authorizer, } @@ -51,19 +61,25 @@ impl ResetDatasetUseCase for ResetDatasetUseCaseImpl { dataset_handle: &DatasetHandle, maybe_new_head: Option<&Multihash>, maybe_old_head: Option<&Multihash>, - ) -> Result { + ) -> Result { // Permission check self.dataset_action_authorizer .check_action_allowed(dataset_handle, DatasetAction::Write) .await?; // Resolve dataset - let resolved_dataset = self.dataset_registry.get_dataset_by_handle(dataset_handle); + let target = self.dataset_registry.get_dataset_by_handle(dataset_handle); - // Actual action - self.reset_service - .reset_dataset(resolved_dataset, maybe_new_head, maybe_old_head) - .await + // Make a plan + let reset_plan = self + .reset_planner + .plan_reset(target.clone(), maybe_new_head, maybe_old_head) + .await?; + + // Execute the plan + let reset_result = self.reset_executor.execute(target, reset_plan).await?; + + Ok(reset_result) } } diff --git a/src/infra/core/src/use_cases/set_watermark_use_case_impl.rs b/src/infra/core/src/use_cases/set_watermark_use_case_impl.rs index 6d6de3f1b..2195ec1ff 100644 --- a/src/infra/core/src/use_cases/set_watermark_use_case_impl.rs +++ b/src/infra/core/src/use_cases/set_watermark_use_case_impl.rs @@ -12,13 +12,7 @@ use std::sync::Arc; use chrono::{DateTime, Utc}; use dill::{component, interface}; use kamu_core::auth::{DatasetAction, DatasetActionAuthorizer}; -use kamu_core::{ - DatasetRegistry, - SetWatermarkError, - SetWatermarkResult, - SetWatermarkUseCase, - WatermarkService, -}; +use kamu_core::*; use opendatafabric::DatasetHandle; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -26,19 +20,22 @@ use opendatafabric::DatasetHandle; #[component(pub)] #[interface(dyn SetWatermarkUseCase)] pub struct SetWatermarkUseCaseImpl { - watermark_service: Arc, + set_watermark_planner: Arc, + set_watermark_executor: Arc, dataset_registry: Arc, dataset_action_authorizer: Arc, } impl SetWatermarkUseCaseImpl { pub fn new( - watermark_service: Arc, + set_watermark_planner: Arc, + set_watermark_executor: Arc, dataset_registry: Arc, dataset_action_authorizer: Arc, ) -> Self { Self { - watermark_service, + set_watermark_planner, + set_watermark_executor, dataset_registry, dataset_action_authorizer, } @@ -64,12 +61,18 @@ impl SetWatermarkUseCase for SetWatermarkUseCaseImpl { .await?; // Resolve dataset - let resolved_dataset = self.dataset_registry.get_dataset_by_handle(dataset_handle); + let target = self.dataset_registry.get_dataset_by_handle(dataset_handle); - // Actual action - self.watermark_service - .set_watermark(resolved_dataset, new_watermark) - .await + // Make a plan + let plan = self + .set_watermark_planner + .plan_set_watermark(target.clone(), new_watermark) + .await?; + + // Execute the plan + let result = self.set_watermark_executor.execute(target, plan).await?; + + Ok(result) } } diff --git a/src/infra/core/src/watermark_service_impl.rs b/src/infra/core/src/watermark_service_impl.rs deleted file mode 100644 index ae1fb5cdf..000000000 --- a/src/infra/core/src/watermark_service_impl.rs +++ /dev/null @@ -1,152 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use std::sync::Arc; - -use chrono::{DateTime, Utc}; -use dill::*; -use internal_error::{ErrorIntoInternal, ResultIntoInternal}; -use kamu_core::{ - AppendError, - AppendValidationError, - BlockRef, - CommitError, - DataWriter, - GetAliasesError, - GetSummaryOpts, - GetWatermarkError, - MetadataChainExt, - RemoteAliasKind, - RemoteAliasesRegistry, - ResolvedDataset, - SearchAddDataVisitor, - SetWatermarkError, - SetWatermarkResult, - WatermarkService, - WriteWatermarkError, - WriteWatermarkOpts, -}; -use kamu_ingest_datafusion::DataWriterDataFusion; -use opendatafabric::DatasetKind; -use time_source::SystemTimeSource; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -pub struct WatermarkServiceImpl { - remote_alias_reg: Arc, - system_time_source: Arc, -} - -#[component(pub)] -#[interface(dyn WatermarkService)] -impl WatermarkServiceImpl { - pub fn new( - remote_alias_reg: Arc, - system_time_source: Arc, - ) -> Self { - Self { - remote_alias_reg, - system_time_source, - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[async_trait::async_trait] -impl WatermarkService for WatermarkServiceImpl { - /// Attempt reading watermark that is currently associated with a dataset - #[tracing::instrument(level = "info", skip_all)] - async fn try_get_current_watermark( - &self, - resolved_dataset: ResolvedDataset, - ) -> Result>, GetWatermarkError> { - let head = resolved_dataset - .as_metadata_chain() - .resolve_ref(&BlockRef::Head) - .await - .int_err()?; - - let mut add_data_visitor = SearchAddDataVisitor::new(); - - resolved_dataset - .as_metadata_chain() - .accept_by_hash(&mut [&mut add_data_visitor], &head) - .await - .int_err()?; - - let current_watermark = add_data_visitor.into_event().and_then(|e| e.new_watermark); - - Ok(current_watermark) - } - - /// Manually advances the watermark of a root dataset - #[tracing::instrument(level = "info", skip_all, fields(%new_watermark))] - async fn set_watermark( - &self, - target: ResolvedDataset, - new_watermark: DateTime, - ) -> Result { - let aliases = match self - .remote_alias_reg - .get_remote_aliases(target.get_handle()) - .await - { - Ok(v) => Ok(v), - Err(GetAliasesError::Internal(e)) => Err(SetWatermarkError::Internal(e)), - }?; - - if !aliases.is_empty(RemoteAliasKind::Pull) { - return Err(SetWatermarkError::IsRemote); - } - - let summary = target - .get_summary(GetSummaryOpts::default()) - .await - .int_err()?; - - if summary.kind != DatasetKind::Root { - return Err(SetWatermarkError::IsDerivative); - } - - let mut writer = DataWriterDataFusion::builder( - (*target).clone(), - datafusion::prelude::SessionContext::new(), - ) - .with_metadata_state_scanned(None) - .await - .int_err()? - .build(); - - match writer - .write_watermark( - new_watermark, - WriteWatermarkOpts { - system_time: self.system_time_source.now(), - new_source_state: None, - }, - ) - .await - { - Ok(res) => Ok(SetWatermarkResult::Updated { - old_head: Some(res.old_head), - new_head: res.new_head, - }), - Err( - WriteWatermarkError::EmptyCommit(_) - | WriteWatermarkError::CommitError(CommitError::MetadataAppendError( - AppendError::InvalidBlock(AppendValidationError::WatermarkIsNotMonotonic), - )), - ) => Ok(SetWatermarkResult::UpToDate), - Err(e) => Err(e.int_err().into()), - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/tests/benches/parallel_simple_transfer_protocol.rs b/src/infra/core/tests/benches/parallel_simple_transfer_protocol.rs index 4f9d95fa5..6cb41f007 100644 --- a/src/infra/core/tests/benches/parallel_simple_transfer_protocol.rs +++ b/src/infra/core/tests/benches/parallel_simple_transfer_protocol.rs @@ -27,7 +27,6 @@ use kamu::{ DatasetRegistryRepoBridge, DatasetRepositoryLocalFs, DatasetRepositoryWriter, - DependencyGraphServiceInMemory, IpfsGateway, RemoteReposDir, RemoteRepositoryRegistryImpl, @@ -35,6 +34,8 @@ use kamu::{ SyncServiceImpl, }; use kamu_accounts::CurrentAccountSubject; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::DependencyGraphServiceImpl; use opendatafabric::*; use time_source::SystemTimeSourceDefault; use url::Url; @@ -64,7 +65,8 @@ async fn setup_dataset( std::fs::create_dir(&repos_dir).unwrap(); let catalog = dill::CatalogBuilder::new() - .add::() + .add::() + .add::() .add_value(ipfs_gateway) .add_value(ipfs_client) .add_value(CurrentAccountSubject::new_test()) diff --git a/src/infra/core/tests/tests/engine/test_engine_io.rs b/src/infra/core/tests/tests/engine/test_engine_io.rs index c5466c360..5489919a1 100644 --- a/src/infra/core/tests/tests/engine/test_engine_io.rs +++ b/src/infra/core/tests/tests/engine/test_engine_io.rs @@ -69,7 +69,8 @@ async fn test_engine_io_common< let transform_helper = TransformTestHelper::build( Arc::new(DatasetRegistryRepoBridge::new(dataset_repo.clone())), time_source.clone(), - Arc::new(CompactionServiceImpl::new( + Arc::new(CompactionPlannerImpl {}), + Arc::new(CompactionExecutorImpl::new( object_store_registry.clone(), time_source.clone(), run_info_dir.clone(), @@ -125,9 +126,17 @@ async fn test_engine_io_common< .unwrap() .create_dataset_result; + let root_target = ResolvedDataset::from(&root_created); + + let root_metadata_state = + DataWriterMetadataState::build(root_target.clone(), &BlockRef::Head, None) + .await + .unwrap(); + ingest_svc .ingest( - ResolvedDataset::from(&root_created), + root_target.clone(), + Box::new(root_metadata_state), PollingIngestOptions::default(), None, ) @@ -193,9 +202,15 @@ async fn test_engine_io_common< ) .unwrap(); + let root_metadata_state = + DataWriterMetadataState::build(root_target.clone(), &BlockRef::Head, None) + .await + .unwrap(); + ingest_svc .ingest( - ResolvedDataset::from(&root_created), + root_target, + Box::new(root_metadata_state), PollingIngestOptions::default(), None, ) diff --git a/src/infra/core/tests/tests/engine/test_engine_transform.rs b/src/infra/core/tests/tests/engine/test_engine_transform.rs index 001ee3bb3..0b7134373 100644 --- a/src/infra/core/tests/tests/engine/test_engine_transform.rs +++ b/src/infra/core/tests/tests/engine/test_engine_transform.rs @@ -219,7 +219,8 @@ struct TestHarness { tempdir: tempfile::TempDir, dataset_repo_writer: Arc, ingest_svc: Arc, - push_ingest_svc: Arc, + push_ingest_planner: Arc, + push_ingest_executor: Arc, transform_helper: TransformTestHelper, time_source: Arc, } @@ -255,11 +256,13 @@ impl TestHarness { .add::() .add::() .add::() - .add::() + .add::() + .add::() .add::() .add::() - .add::() - .add::() + .add::() + .add::() + .add::() .add::() .add_value(SystemTimeSourceStub::new_set( Utc.with_ymd_and_hms(2050, 1, 1, 12, 0, 0).unwrap(), @@ -273,11 +276,18 @@ impl TestHarness { tempdir, dataset_repo_writer: catalog.get_one().unwrap(), ingest_svc: catalog.get_one().unwrap(), - push_ingest_svc: catalog.get_one().unwrap(), + push_ingest_planner: catalog.get_one().unwrap(), + push_ingest_executor: catalog.get_one().unwrap(), time_source: catalog.get_one().unwrap(), transform_helper, } } + + async fn build_metadata_state(&self, created: &CreateDatasetResult) -> DataWriterMetadataState { + DataWriterMetadataState::build(ResolvedDataset::from(created), &BlockRef::Head, None) + .await + .unwrap() + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -337,10 +347,15 @@ async fn test_transform_common(transform: Transform, test_retractions: bool) { .unwrap() .create_dataset_result; + let root_target = ResolvedDataset::from(&root_created); + + let root_metadata_state = harness.build_metadata_state(&root_created).await; + harness .ingest_svc .ingest( - ResolvedDataset::from(&root_created), + root_target.clone(), + Box::new(root_metadata_state), PollingIngestOptions::default(), None, ) @@ -439,10 +454,13 @@ async fn test_transform_common(transform: Transform, test_retractions: bool) { ) .unwrap(); + let root_metadata_state = harness.build_metadata_state(&root_created).await; + harness .ingest_svc .ingest( - ResolvedDataset::from(&root_created), + root_target.clone(), + Box::new(root_metadata_state), PollingIngestOptions::default(), None, ) @@ -755,13 +773,22 @@ async fn test_transform_empty_inputs() { .await .unwrap(); - let ingest_result = harness - .push_ingest_svc - .ingest_from_file_stream( + let ingest_plan = harness + .push_ingest_planner + .plan_ingest( ResolvedDataset::from(&root), None, - Box::new(tokio::io::BufReader::new(std::io::Cursor::new(b""))), PushIngestOpts::default(), + ) + .await + .unwrap(); + + let ingest_result = harness + .push_ingest_executor + .ingest_from_stream( + ResolvedDataset::from(&root), + ingest_plan, + Box::new(tokio::io::BufReader::new(std::io::Cursor::new(b""))), None, ) .await @@ -796,15 +823,24 @@ async fn test_transform_empty_inputs() { // 3: Input gets some data /////////////////////////////////////////////////////////////////////////// - let ingest_result = harness - .push_ingest_svc - .ingest_from_file_stream( + let ingest_plan = harness + .push_ingest_planner + .plan_ingest( ResolvedDataset::from(&root), None, + PushIngestOpts::default(), + ) + .await + .unwrap(); + + let ingest_result = harness + .push_ingest_executor + .ingest_from_stream( + ResolvedDataset::from(&root), + ingest_plan, Box::new(tokio::io::BufReader::new(std::io::Cursor::new( br#"{"city": "A", "population": 100}"#, ))), - PushIngestOpts::default(), None, ) .await diff --git a/src/infra/core/tests/tests/ingest/test_polling_ingest.rs b/src/infra/core/tests/tests/ingest/test_polling_ingest.rs index 90e5478eb..29d7aee1f 100644 --- a/src/infra/core/tests/tests/ingest/test_polling_ingest.rs +++ b/src/infra/core/tests/tests/ingest/test_polling_ingest.rs @@ -1270,9 +1270,16 @@ impl IngestTestHarness { &self, created: &CreateDatasetResult, ) -> Result { + let target = ResolvedDataset::from(created); + + let metadata_state = DataWriterMetadataState::build(target.clone(), &BlockRef::Head, None) + .await + .unwrap(); + self.ingest_svc .ingest( - ResolvedDataset::from(created), + target, + Box::new(metadata_state), PollingIngestOptions::default(), None, ) diff --git a/src/infra/core/tests/tests/ingest/test_push_ingest.rs b/src/infra/core/tests/tests/ingest/test_push_ingest.rs index 846693ddc..0809c2b41 100644 --- a/src/infra/core/tests/tests/ingest/test_push_ingest.rs +++ b/src/infra/core/tests/tests/ingest/test_push_ingest.rs @@ -20,6 +20,8 @@ use kamu_accounts::CurrentAccountSubject; use opendatafabric::*; use tempfile::TempDir; use time_source::{SystemTimeSource, SystemTimeSourceStub}; +use tokio::io::AsyncRead; +use url::Url; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -74,16 +76,13 @@ async fn test_ingest_push_url_stream() { .unwrap(); harness - .push_ingest_svc .ingest_from_url( - ResolvedDataset::from(&created), + &created, None, url::Url::from_file_path(&src_path).unwrap(), PushIngestOpts::default(), - None, ) - .await - .unwrap(); + .await; data_helper .assert_last_data_eq( @@ -134,16 +133,8 @@ async fn test_ingest_push_url_stream() { )); harness - .push_ingest_svc - .ingest_from_file_stream( - ResolvedDataset::from(&created), - None, - Box::new(data), - PushIngestOpts::default(), - None, - ) - .await - .unwrap(); + .ingest_from_stream(&created, None, Box::new(data), PushIngestOpts::default()) + .await; data_helper .assert_last_data_records_eq(indoc!( @@ -217,19 +208,16 @@ async fn test_ingest_push_media_type_override() { .unwrap(); harness - .push_ingest_svc .ingest_from_url( - ResolvedDataset::from(&created), + &created, None, url::Url::from_file_path(&src_path).unwrap(), PushIngestOpts { media_type: Some(MediaType::CSV.to_owned()), ..Default::default() }, - None, ) - .await - .unwrap(); + .await; data_helper .assert_last_data_eq( @@ -270,19 +258,16 @@ async fn test_ingest_push_media_type_override() { .unwrap(); harness - .push_ingest_svc .ingest_from_url( - ResolvedDataset::from(&created), + &created, None, url::Url::from_file_path(&src_path).unwrap(), PushIngestOpts { media_type: Some(MediaType::NDJSON.to_owned()), ..Default::default() }, - None, ) - .await - .unwrap(); + .await; data_helper .assert_last_data_eq( @@ -325,19 +310,16 @@ async fn test_ingest_push_media_type_override() { .unwrap(); harness - .push_ingest_svc .ingest_from_url( - ResolvedDataset::from(&created), + &created, None, url::Url::from_file_path(&src_path).unwrap(), PushIngestOpts { media_type: Some(MediaType::JSON.to_owned()), ..Default::default() }, - None, ) - .await - .unwrap(); + .await; data_helper .assert_last_data_eq( @@ -413,16 +395,13 @@ async fn test_ingest_push_schema_stability() { .unwrap(); harness - .push_ingest_svc .ingest_from_url( - ResolvedDataset::from(&created), + &created, None, url::Url::from_file_path(&src_path).unwrap(), PushIngestOpts::default(), - None, ) - .await - .unwrap(); + .await; let set_data_schema = data_helper.get_last_set_data_schema_block().await.event; @@ -498,9 +477,8 @@ async fn test_ingest_inference_automatic_coercion_of_event_time_from_string() { .unwrap(); harness - .push_ingest_svc .ingest_from_url( - ResolvedDataset::from(&created), + &created, None, url::Url::from_file_path(&src_path).unwrap(), PushIngestOpts { @@ -510,10 +488,8 @@ async fn test_ingest_inference_automatic_coercion_of_event_time_from_string() { }, ..Default::default() }, - None, ) - .await - .unwrap(); + .await; data_helper .assert_last_data_eq( @@ -576,9 +552,8 @@ async fn test_ingest_inference_automatic_coercion_of_event_time_from_unixtime() .unwrap(); harness - .push_ingest_svc .ingest_from_url( - ResolvedDataset::from(&created), + &created, None, url::Url::from_file_path(&src_path).unwrap(), PushIngestOpts { @@ -588,10 +563,8 @@ async fn test_ingest_inference_automatic_coercion_of_event_time_from_unixtime() }, ..Default::default() }, - None, ) - .await - .unwrap(); + .await; data_helper .assert_last_data_eq( @@ -654,9 +627,8 @@ async fn test_ingest_inference_automatic_renaming_of_conflicting_columns() { .unwrap(); harness - .push_ingest_svc .ingest_from_url( - ResolvedDataset::from(&created), + &created, None, url::Url::from_file_path(&src_path).unwrap(), PushIngestOpts { @@ -666,10 +638,8 @@ async fn test_ingest_inference_automatic_renaming_of_conflicting_columns() { }, ..Default::default() }, - None, ) - .await - .unwrap(); + .await; data_helper .assert_last_data_eq( @@ -753,16 +723,13 @@ async fn test_ingest_sql_case_sensitivity() { .unwrap(); harness - .push_ingest_svc .ingest_from_url( - ResolvedDataset::from(&created), + &created, None, url::Url::from_file_path(&src_path).unwrap(), PushIngestOpts::default(), - None, ) - .await - .unwrap(); + .await; data_helper .assert_last_data_eq( @@ -800,7 +767,8 @@ struct IngestTestHarness { temp_dir: TempDir, dataset_registry: Arc, dataset_repo_writer: Arc, - push_ingest_svc: Arc, + push_ingest_planner: Arc, + push_ingest_executor: Arc, ctx: SessionContext, } @@ -832,14 +800,16 @@ impl IngestTestHarness { .add::() .add::() .add::() - .add::() + .add::() + .add::() .build(); Self { temp_dir, dataset_registry: catalog.get_one().unwrap(), dataset_repo_writer: catalog.get_one().unwrap(), - push_ingest_svc: catalog.get_one().unwrap(), + push_ingest_planner: catalog.get_one().unwrap(), + push_ingest_executor: catalog.get_one().unwrap(), ctx: SessionContext::new_with_config(SessionConfig::new().with_target_partitions(1)), } } @@ -861,4 +831,46 @@ impl IngestTestHarness { DatasetDataHelper::new_with_context((*resolved_dataset).clone(), self.ctx.clone()) } + + async fn ingest_from_stream( + &self, + created: &CreateDatasetResult, + source_name: Option<&str>, + data: Box, + opts: PushIngestOpts, + ) { + let target = ResolvedDataset::from(created); + + let ingest_plan = self + .push_ingest_planner + .plan_ingest(target.clone(), source_name, opts) + .await + .unwrap(); + + self.push_ingest_executor + .ingest_from_stream(target, ingest_plan, data, None) + .await + .unwrap(); + } + + async fn ingest_from_url( + &self, + created: &CreateDatasetResult, + source_name: Option<&str>, + url: Url, + opts: PushIngestOpts, + ) { + let target = ResolvedDataset::from(created); + + let ingest_plan = self + .push_ingest_planner + .plan_ingest(target.clone(), source_name, opts) + .await + .unwrap(); + + self.push_ingest_executor + .ingest_from_url(target, ingest_plan, url, None) + .await + .unwrap(); + } } diff --git a/src/infra/core/tests/tests/ingest/test_writer.rs b/src/infra/core/tests/tests/ingest/test_writer.rs index 45e1238e8..c2aeb7492 100644 --- a/src/infra/core/tests/tests/ingest/test_writer.rs +++ b/src/infra/core/tests/tests/ingest/test_writer.rs @@ -9,10 +9,8 @@ use std::assert_matches::assert_matches; use std::path::PathBuf; -use std::sync::Arc; use chrono::{DateTime, TimeZone, Utc}; -use datafusion::arrow::datatypes::SchemaRef; use datafusion::prelude::*; use dill::Component; use indoc::indoc; @@ -20,10 +18,11 @@ use kamu::testing::MetadataFactory; use kamu::{DatasetRepositoryLocalFs, DatasetRepositoryWriter}; use kamu_accounts::CurrentAccountSubject; use kamu_core::*; -use kamu_data_utils::testing::{assert_data_eq, assert_schema_eq}; +use kamu_data_utils::testing::{assert_arrow_schema_eq, assert_data_eq, assert_schema_eq}; use kamu_ingest_datafusion::*; use odf::{AsTypedBlock, DatasetAlias}; use opendatafabric as odf; +use serde_json::json; use time_source::SystemTimeSourceDefault; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -63,20 +62,65 @@ async fn test_data_writer_happy_path() { let df = harness.get_last_data().await; - assert_schema_eq( - df.schema(), - indoc!( - r#" - message arrow_schema { - REQUIRED INT64 offset; - REQUIRED INT32 op; - REQUIRED INT64 system_time (TIMESTAMP(MILLIS,true)); - OPTIONAL INT64 event_time (TIMESTAMP(MILLIS,true)); - OPTIONAL BYTE_ARRAY city (STRING); - OPTIONAL INT64 population; - } - "# - ), + // Check schema of the data + assert_arrow_schema_eq( + df.schema().as_arrow(), + json!({ + "fields": [{ + "name": "offset", + "data_type": "Int64", + "dict_id": 0, + "dict_is_ordered": false, + "metadata": {}, + "nullable": false, + }, { + "name": "op", + "data_type": "Int32", + "dict_id": 0, + "dict_is_ordered": false, + "metadata": {}, + "nullable": false, + }, { + "name": "system_time", + "data_type": { + "Timestamp": [ + "Millisecond", + "UTC", + ], + }, + "dict_id": 0, + "dict_is_ordered": false, + "metadata": {}, + "nullable": false, + }, { + "name": "event_time", + "data_type": { + "Timestamp": [ + "Millisecond", + "UTC", + ], + }, + "dict_id": 0, + "dict_is_ordered": false, + "metadata": {}, + "nullable": true, + }, { + "name": "city", + "data_type": "Utf8View", + "dict_id": 0, + "dict_is_ordered": false, + "metadata": {}, + "nullable": true, + }, { + "name": "population", + "data_type": "Int64", + "dict_id": 0, + "dict_is_ordered": false, + "metadata": {}, + "nullable": true, + }], + "metadata": {}, + }), ); assert_data_eq( @@ -100,11 +144,68 @@ async fn test_data_writer_happy_path() { Some(&harness.source_event_time) ); - // Compare schemas in block and in data + // Check schema in block SetDataSchema block let (schema_block_hash, schema_block) = harness.get_last_schema_block().await; - let schema_in_block = schema_block.event.schema_as_arrow().unwrap(); - let schema_in_data = SchemaRef::new(df.schema().into()); - assert_eq!(schema_in_block, schema_in_data); + assert_arrow_schema_eq( + &schema_block.event.schema_as_arrow().unwrap(), + json!({ + "fields": [{ + "name": "offset", + "data_type": "Int64", + "dict_id": 0, + "dict_is_ordered": false, + "metadata": {}, + "nullable": false, + }, { + "name": "op", + "data_type": "Int32", + "dict_id": 0, + "dict_is_ordered": false, + "metadata": {}, + "nullable": false, + }, { + "name": "system_time", + "data_type": { + "Timestamp": [ + "Millisecond", + "UTC", + ], + }, + "dict_id": 0, + "dict_is_ordered": false, + "metadata": {}, + "nullable": false, + }, { + "name": "event_time", + "data_type": { + "Timestamp": [ + "Millisecond", + "UTC", + ], + }, + "dict_id": 0, + "dict_is_ordered": false, + "metadata": {}, + "nullable": true, + }, { + "name": "city", + // NOTE: The difference between Utf8 and Utf8View is expected + "data_type": "Utf8", + "dict_id": 0, + "dict_is_ordered": false, + "metadata": {}, + "nullable": true, + }, { + "name": "population", + "data_type": "Int64", + "dict_id": 0, + "dict_is_ordered": false, + "metadata": {}, + "nullable": true, + }], + "metadata": {}, + }), + ); // Round 2 harness.set_system_time(Utc.with_ymd_and_hms(2010, 1, 2, 12, 0, 0).unwrap()); @@ -419,11 +520,179 @@ async fn test_data_writer_rejects_incompatible_schema() { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[test_group::group(engine, ingest, datafusion)] +#[test] +fn test_data_writer_offsets_are_sequential_partitioned() { + // Ensure our logic is resistant to partitioning + let ctx = SessionContext::new_with_config(SessionConfig::new().with_target_partitions(4)); + + // Ensure we run with multiple threads + // otherwise `target_partitions` doesn't matter + let plan = tokio::runtime::Builder::new_multi_thread() + .worker_threads(4) + .build() + .unwrap() + .block_on(test_data_writer_offsets_are_sequential_impl(ctx)); + + pretty_assertions::assert_eq!( + indoc::indoc!( + r#" + Optimized physical plan: + DataSinkExec: sink=ParquetSink(file_groups=[]) + SortPreservingMergeExec: [offset@0 ASC] + SortExec: expr=[offset@0 ASC], preserve_partitioning=[true] + ProjectionExec: expr=[CAST(row_number() PARTITION BY [Int32(1)] ORDER BY [event_time ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 AS Int64) + -1 as offset, op@0 as op, system_time@4 as system_time, event_time@1 as event_time, city@2 as city, population@3 as population] + BoundedWindowAggExec: wdw=[row_number() PARTITION BY [Int32(1)] ORDER BY [event_time ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() PARTITION BY [Int32(1)] ORDER BY [event_time ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted] + SortExec: expr=[event_time@1 ASC], preserve_partitioning=[true] + CoalesceBatchesExec: target_batch_size=8192 + RepartitionExec: partitioning=Hash([1], 4), input_partitions=4 + ProjectionExec: expr=[0 as op, CASE WHEN event_time@0 IS NULL THEN 946728000000 ELSE event_time@0 END as event_time, city@1 as city, population@2 as population, 1262347200000 as system_time] + ProjectionExec: expr=[CAST(event_time@0 AS Timestamp(Millisecond, Some("UTC"))) as event_time, city@1 as city, population@2 as population] + JsonExec: file_groups={4 groups: [[tmp/data.ndjson:0..2991668], [tmp/data.ndjson:2991668..5983336], [tmp/data.ndjson:5983336..8975004], [tmp/data.ndjson:8975004..11966670]]}, projection=[event_time, city, population] + "# + ).trim(), + plan + ); +} + +#[test_group::group(engine, ingest, datafusion)] +#[test] +fn test_data_writer_offsets_are_sequential_serialized() { + let ctx = SessionContext::new_with_config(SessionConfig::new().with_target_partitions(1)); + + // Ensure we run with multiple threads + let plan = tokio::runtime::Builder::new_multi_thread() + .worker_threads(4) + .build() + .unwrap() + .block_on(test_data_writer_offsets_are_sequential_impl(ctx)); + + pretty_assertions::assert_eq!( + indoc::indoc!( + r#" + Optimized physical plan: + DataSinkExec: sink=ParquetSink(file_groups=[]) + SortExec: expr=[offset@0 ASC], preserve_partitioning=[false] + ProjectionExec: expr=[CAST(row_number() PARTITION BY [Int32(1)] ORDER BY [event_time ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 AS Int64) + -1 as offset, op@0 as op, system_time@4 as system_time, event_time@1 as event_time, city@2 as city, population@3 as population] + BoundedWindowAggExec: wdw=[row_number() PARTITION BY [Int32(1)] ORDER BY [event_time ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() PARTITION BY [Int32(1)] ORDER BY [event_time ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }], mode=[Sorted] + SortExec: expr=[event_time@1 ASC], preserve_partitioning=[false] + ProjectionExec: expr=[0 as op, CASE WHEN event_time@0 IS NULL THEN 946728000000 ELSE event_time@0 END as event_time, city@1 as city, population@2 as population, 1262347200000 as system_time] + ProjectionExec: expr=[CAST(event_time@0 AS Timestamp(Millisecond, Some("UTC"))) as event_time, city@1 as city, population@2 as population] + JsonExec: file_groups={1 group: [[tmp/data.ndjson:0..11966670]]}, projection=[event_time, city, population] + "# + ).trim(), + plan + ); +} + +async fn test_data_writer_offsets_are_sequential_impl(ctx: SessionContext) -> String { + use std::io::Write; + + testing_logger::setup(); + + let harness = Harness::new(vec![MetadataFactory::set_polling_source() + .merge(odf::MergeStrategyLedger { + primary_key: vec!["event_time".to_string(), "city".to_string()], + }) + .build() + .into()]) + .await; + + let mut writer = DataWriterDataFusion::from_metadata_chain( + ctx.clone(), + harness.target.clone(), + &BlockRef::Head, + None, + ) + .await + .unwrap(); + + let mut event_time = Utc.with_ymd_and_hms(2010, 1, 1, 0, 0, 0).unwrap(); + let data_path = harness.temp_dir.path().join("data.ndjson"); + let mut file = std::fs::File::create_new(&data_path).unwrap(); + + // Generate a lot of data to make parquet split it into chunks + for i in 0..50_000 { + for city in ["A", "B", "C"] { + writeln!( + &mut file, + "{{\"event_time\": \"{}\", \"city\": \"{}\", \"population\": \"{}\"}}", + event_time.to_rfc3339(), + city, + i, + ) + .unwrap(); + } + event_time += chrono::Duration::minutes(1); + } + + let df = ReaderNdJson::new( + ctx.clone(), + odf::ReadStepNdJson { + schema: Some(vec![ + "event_time TIMESTAMP".to_string(), + "city STRING".to_string(), + "population BIGINT".to_string(), + ]), + ..Default::default() + }, + ) + .await + .unwrap() + .read(&data_path) + .await + .unwrap(); + + writer + .write( + Some(df), + WriteDataOpts { + system_time: harness.system_time, + source_event_time: harness.source_event_time, + new_watermark: None, + new_source_state: None, + data_staging_path: harness.temp_dir.path().join("data.parquet"), + }, + ) + .await + .unwrap(); + + let data_path = harness.get_last_data_file().await; + + kamu_data_utils::testing::assert_parquet_offsets_are_in_order(&data_path); + + let plan = std::sync::Mutex::new(String::new()); + testing_logger::validate(|capture| { + let p = capture + .iter() + .find(|c| c.body.contains("Optimized physical plan:")) + .unwrap() + .body + .trim() + .replace( + harness + .temp_dir + .path() + .display() + .to_string() + .trim_start_matches('/'), + "tmp", + ); + + *plan.lock().unwrap() = p; + }); + plan.into_inner().unwrap() +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[test_group::group(engine, ingest, datafusion)] #[test_log::test(tokio::test)] async fn test_data_writer_ledger_orders_by_event_time() { let mut harness = Harness::new(vec![MetadataFactory::set_polling_source() - .merge(odf::MergeStrategyAppend {}) + .merge(odf::MergeStrategyLedger { + primary_key: vec!["event_time".to_string(), "city".to_string()], + }) .build() .into()]) .await; @@ -775,20 +1044,20 @@ async fn test_data_writer_builder_scan_no_source() { .into()]) .await; - let b = DataWriterDataFusion::builder(harness.dataset.clone(), harness.ctx.clone()) - .with_metadata_state_scanned(None) - .await - .unwrap(); + let metadata_state = + DataWriterMetadataState::build(harness.target.clone(), &BlockRef::Head, None) + .await + .unwrap(); let head = harness - .dataset + .target .as_metadata_chain() .resolve_ref(&BlockRef::Head) .await .unwrap(); assert_matches!( - b.metadata_state().unwrap(), + metadata_state, DataWriterMetadataState { head: h, schema: None, @@ -800,7 +1069,7 @@ async fn test_data_writer_builder_scan_no_source() { prev_watermark: None, prev_source_state: None, .. - } if *h == head && vocab.event_time_column == "foo" + } if h == head && vocab.event_time_column == "foo" ); } @@ -817,13 +1086,13 @@ async fn test_data_writer_builder_scan_polling_source() { .into()]) .await; - let b = DataWriterDataFusion::builder(harness.dataset.clone(), harness.ctx.clone()) - .with_metadata_state_scanned(None) - .await - .unwrap(); + let metadata_state = + DataWriterMetadataState::build(harness.target.clone(), &BlockRef::Head, None) + .await + .unwrap(); assert_matches!( - b.metadata_state().unwrap(), + metadata_state, DataWriterMetadataState { schema: None, source_event: Some(_), @@ -834,7 +1103,7 @@ async fn test_data_writer_builder_scan_polling_source() { prev_watermark: None, prev_source_state: None, .. - } if *vocab == odf::DatasetVocabulary::default() + } if vocab == odf::DatasetVocabulary::default() ); } @@ -858,13 +1127,13 @@ async fn test_data_writer_builder_scan_push_source() { .into()]) .await; - let b = DataWriterDataFusion::builder(harness.dataset.clone(), harness.ctx.clone()) - .with_metadata_state_scanned(None) - .await - .unwrap(); + let metadata_state = + DataWriterMetadataState::build(harness.target.clone(), &BlockRef::Head, None) + .await + .unwrap(); assert_matches!( - b.metadata_state().unwrap(), + metadata_state, DataWriterMetadataState { schema: None, source_event: Some(_), @@ -875,7 +1144,7 @@ async fn test_data_writer_builder_scan_push_source() { prev_watermark: None, prev_source_state: None, .. - } if *vocab == odf::DatasetVocabulary::default() + } if vocab == odf::DatasetVocabulary::default() ); } @@ -908,13 +1177,13 @@ async fn test_data_writer_builder_scan_push_source_with_extra_events() { ]) .await; - let b = DataWriterDataFusion::builder(harness.dataset.clone(), harness.ctx.clone()) - .with_metadata_state_scanned(None) - .await - .unwrap(); + let metadata_state = + DataWriterMetadataState::build(harness.target.clone(), &BlockRef::Head, None) + .await + .unwrap(); assert_matches!( - b.metadata_state().unwrap(), + metadata_state, DataWriterMetadataState { schema: None, source_event: Some(_), @@ -925,7 +1194,7 @@ async fn test_data_writer_builder_scan_push_source_with_extra_events() { prev_watermark: None, prev_source_state: None, .. - } if *vocab == odf::DatasetVocabulary::default() + } if vocab == odf::DatasetVocabulary::default() ); } @@ -933,7 +1202,7 @@ async fn test_data_writer_builder_scan_push_source_with_extra_events() { struct Harness { temp_dir: tempfile::TempDir, - dataset: Arc, + target: ResolvedDataset, writer: DataWriterDataFusion, ctx: SessionContext, @@ -959,7 +1228,7 @@ impl Harness { let dataset_repo = catalog.get_one::().unwrap(); - let dataset = dataset_repo + let foo_created = dataset_repo .create_dataset( &DatasetAlias::new(None, odf::DatasetName::new_unchecked("foo")), MetadataFactory::metadata_block( @@ -969,11 +1238,11 @@ impl Harness { .build_typed(), ) .await - .unwrap() - .dataset; + .unwrap(); for event in dataset_events { - dataset + foo_created + .dataset .commit_event( event, CommitOpts { @@ -985,17 +1254,22 @@ impl Harness { .unwrap(); } + let foo_target = ResolvedDataset::from(&foo_created); + let ctx = SessionContext::new_with_config(SessionConfig::new().with_target_partitions(1)); - let writer = DataWriterDataFusion::builder(dataset.clone(), ctx.clone()) - .with_metadata_state_scanned(None) - .await - .unwrap() - .build(); + let writer = DataWriterDataFusion::from_metadata_chain( + ctx.clone(), + foo_target.clone(), + &BlockRef::Head, + None, + ) + .await + .unwrap(); Self { temp_dir, - dataset, + target: foo_target, writer, ctx, system_time, @@ -1012,11 +1286,14 @@ impl Harness { } async fn reset_writer(&mut self) { - self.writer = DataWriterDataFusion::builder(self.dataset.clone(), self.ctx.clone()) - .with_metadata_state_scanned(None) - .await - .unwrap() - .build(); + self.writer = DataWriterDataFusion::from_metadata_chain( + self.ctx.clone(), + self.target.clone(), + &BlockRef::Head, + None, + ) + .await + .unwrap(); } async fn write_opts( @@ -1056,7 +1333,7 @@ impl Harness { source_event_time: self.source_event_time, new_watermark: None, new_source_state, - data_staging_path: self.temp_dir.path().join("write.tmp"), + data_staging_path: self.temp_dir.path().join("data.parquet"), }, ) .await @@ -1072,7 +1349,7 @@ impl Harness { use futures::StreamExt; let (hash, block) = self - .dataset + .target .as_metadata_chain() .iter_blocks() .filter_ok(|(_, b)| b.as_typed::().is_some()) @@ -1088,7 +1365,7 @@ impl Harness { use futures::StreamExt; let (_, block) = self - .dataset + .target .as_metadata_chain() .iter_blocks() .next() @@ -1102,7 +1379,7 @@ impl Harness { let block = self.get_last_data_block().await; kamu_data_utils::data::local_url::into_local_path( - self.dataset + self.target .as_data_repo() .get_internal_url(&block.event.new_data.unwrap().physical_hash) .await, diff --git a/src/infra/core/tests/tests/mod.rs b/src/infra/core/tests/tests/mod.rs index cd87628a3..387f00765 100644 --- a/src/infra/core/tests/tests/mod.rs +++ b/src/infra/core/tests/tests/mod.rs @@ -10,24 +10,23 @@ mod engine; mod ingest; mod repos; -mod test_compact_service_impl; +mod test_compaction_services_impl; mod test_dataset_changes_service_impl; mod test_dataset_ownership_service_inmem; mod test_datasets_filtering; -mod test_dependency_graph_inmem; mod test_metadata_chain_comparator; mod test_pull_request_planner_impl; mod test_push_request_planner_impl; mod test_query_service_impl; mod test_remote_status_service; -mod test_reset_service_impl; +mod test_reset_services_impl; mod test_resource_loader_impl; mod test_schema_utils; mod test_search_service_impl; mod test_serde_yaml; mod test_setup; mod test_sync_service_impl; -mod test_transform_service_impl; +mod test_transform_services_impl; mod test_verification_service_impl; -mod test_watermark_service_impl; +mod test_watermark_services_impl; mod use_cases; diff --git a/src/infra/core/tests/tests/test_compact_service_impl.rs b/src/infra/core/tests/tests/test_compaction_services_impl.rs similarity index 86% rename from src/infra/core/tests/tests/test_compact_service_impl.rs rename to src/infra/core/tests/tests/test_compaction_services_impl.rs index 227b4a4ca..0e50a0fdf 100644 --- a/src/infra/core/tests/tests/test_compact_service_impl.rs +++ b/src/infra/core/tests/tests/test_compaction_services_impl.rs @@ -10,13 +10,13 @@ use std::assert_matches::assert_matches; use std::sync::Arc; -use chrono::{DateTime, NaiveDate, TimeDelta, TimeZone, Utc}; +use chrono::{DateTime, TimeZone, Utc}; use datafusion::execution::config::SessionConfig; use datafusion::execution::context::SessionContext; use dill::Component; -use domain::{CompactionError, CompactionOptions, CompactionResult, CompactionService}; +use domain::{CompactionError, CompactionOptions, CompactionResult}; use futures::TryStreamExt; -use indoc::{formatdoc, indoc}; +use indoc::indoc; use kamu::domain::*; use kamu::testing::{DatasetDataHelper, LocalS3Server, MetadataFactory}; use kamu::utils::s3_context::S3Context; @@ -727,7 +727,9 @@ async fn test_dataset_compaction_derive_error() { harness .compact_dataset(&created, CompactionOptions::default(),) .await, - Err(CompactionError::InvalidDatasetKind(_)), + Err(CompactionError::Planning( + CompactionPlanningError::InvalidDatasetKind(_) + )), ); } @@ -741,7 +743,7 @@ async fn test_large_dataset_compact() { let created = harness.create_test_root_dataset().await; let dataset_ref = created.dataset_handle.as_local_ref(); - harness.ingest_multiple_blocks(&created, 100).await; + harness.ingest_multiple_blocks(&created, 100, 2).await; let data_helper = harness.dataset_data_helper(&dataset_ref).await; @@ -769,8 +771,8 @@ async fn test_large_dataset_compact() { +--------+----+----------------------+----------------------+------+------------+ | offset | op | system_time | date | city | population | +--------+----+----------------------+----------------------+------+------------+ - | 198 | 0 | 2050-01-01T12:00:00Z | 2020-04-09T00:00:00Z | A | 1000 | - | 199 | 0 | 2050-01-01T12:00:00Z | 2020-04-10T00:00:00Z | B | 2000 | + | 198 | 0 | 2050-01-01T12:00:00Z | 2010-01-01T03:18:00Z | A | 198 | + | 199 | 0 | 2050-01-01T12:00:00Z | 2010-01-01T03:19:00Z | B | 199 | +--------+----+----------------------+----------------------+------+------------+ "# ), @@ -819,16 +821,16 @@ async fn test_large_dataset_compact() { +--------+----+----------------------+----------------------+------+------------+ | offset | op | system_time | date | city | population | +--------+----+----------------------+----------------------+------+------------+ - | 190 | 0 | 2050-01-01T12:00:00Z | 2020-04-05T00:00:00Z | A | 1000 | - | 191 | 0 | 2050-01-01T12:00:00Z | 2020-04-06T00:00:00Z | B | 2000 | - | 192 | 0 | 2050-01-01T12:00:00Z | 2020-04-06T00:00:00Z | A | 1000 | - | 193 | 0 | 2050-01-01T12:00:00Z | 2020-04-07T00:00:00Z | B | 2000 | - | 194 | 0 | 2050-01-01T12:00:00Z | 2020-04-07T00:00:00Z | A | 1000 | - | 195 | 0 | 2050-01-01T12:00:00Z | 2020-04-08T00:00:00Z | B | 2000 | - | 196 | 0 | 2050-01-01T12:00:00Z | 2020-04-08T00:00:00Z | A | 1000 | - | 197 | 0 | 2050-01-01T12:00:00Z | 2020-04-09T00:00:00Z | B | 2000 | - | 198 | 0 | 2050-01-01T12:00:00Z | 2020-04-09T00:00:00Z | A | 1000 | - | 199 | 0 | 2050-01-01T12:00:00Z | 2020-04-10T00:00:00Z | B | 2000 | + | 190 | 0 | 2050-01-01T12:00:00Z | 2010-01-01T03:10:00Z | A | 190 | + | 191 | 0 | 2050-01-01T12:00:00Z | 2010-01-01T03:11:00Z | B | 191 | + | 192 | 0 | 2050-01-01T12:00:00Z | 2010-01-01T03:12:00Z | A | 192 | + | 193 | 0 | 2050-01-01T12:00:00Z | 2010-01-01T03:13:00Z | B | 193 | + | 194 | 0 | 2050-01-01T12:00:00Z | 2010-01-01T03:14:00Z | A | 194 | + | 195 | 0 | 2050-01-01T12:00:00Z | 2010-01-01T03:15:00Z | B | 195 | + | 196 | 0 | 2050-01-01T12:00:00Z | 2010-01-01T03:16:00Z | A | 196 | + | 197 | 0 | 2050-01-01T12:00:00Z | 2010-01-01T03:17:00Z | B | 197 | + | 198 | 0 | 2050-01-01T12:00:00Z | 2010-01-01T03:18:00Z | A | 198 | + | 199 | 0 | 2050-01-01T12:00:00Z | 2010-01-01T03:19:00Z | B | 199 | +--------+----+----------------------+----------------------+------+------------+ "# ), @@ -843,6 +845,85 @@ async fn test_large_dataset_compact() { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[test_group::group(ingest, datafusion, compact)] +#[test] +fn test_compact_offsets_are_sequential() { + // Ensure we run with multiple threads otherwise DataFusion's + // `target_partitions` setting doesn't matter + tokio::runtime::Builder::new_multi_thread() + .worker_threads(4) + .build() + .unwrap() + .block_on(test_compact_offsets_are_sequential_impl()); +} + +async fn test_compact_offsets_are_sequential_impl() { + testing_logger::setup(); + let harness = CompactTestHarness::new(); + + let created = harness.create_test_root_dataset().await; + let dataset_ref = created.dataset_handle.as_local_ref(); + + harness.ingest_multiple_blocks(&created, 10, 10000).await; + + let data_helper = harness.dataset_data_helper(&dataset_ref).await; + + let old_blocks = harness.get_dataset_blocks(&dataset_ref).await; + + assert_eq!(old_blocks.len(), 14); + + assert_matches!( + harness + .compact_dataset( + &created, + CompactionOptions { + max_slice_records: Some(u64::MAX), + max_slice_size: Some(u64::MAX), + ..CompactionOptions::default() + }, + ) + .await, + Ok(CompactionResult::Success { + new_head, + old_head, + new_num_blocks: 5, + old_num_blocks: 14 + }) if new_head != old_head, + ); + + testing_logger::validate(|capture| { + let plan = capture + .iter() + .filter(|c| c.body.contains("Optimized physical plan:")) + .last() + .unwrap() + .body + .trim(); + + let end = plan.find("...").unwrap(); + let start = plan[0..end].rfind('[').unwrap(); + let plan_clean = plan[0..=start].to_string() + &plan[end..plan.len()]; + + pretty_assertions::assert_eq!( + indoc::indoc!( + r#" + Optimized physical plan: + DataSinkExec: sink=ParquetSink(file_groups=[]) + SortExec: expr=[offset@0 ASC NULLS LAST], preserve_partitioning=[false] + ParquetExec: file_groups={1 group: [[...]]}, projection=[offset, op, system_time, date, city, population] + "# + ) + .trim(), + plan_clean + ); + }); + + let data_path = data_helper.get_last_data_file().await; + kamu_data_utils::testing::assert_parquet_offsets_are_in_order(&data_path); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[test_group::group(ingest, datafusion, compact)] #[tokio::test] async fn test_dataset_keep_metadata_only_compact() { @@ -983,8 +1064,10 @@ struct CompactTestHarness { _temp_dir: tempfile::TempDir, dataset_registry: Arc, dataset_repo_writer: Arc, - compaction_svc: Arc, - push_ingest_svc: Arc, + compaction_planner: Arc, + compaction_executor: Arc, + push_ingest_planner: Arc, + push_ingest_executor: Arc, transform_helper: TransformTestHelper, verification_svc: Arc, current_date_time: DateTime, @@ -1018,11 +1101,13 @@ impl CompactTestHarness { .add::() .add::() .add::() - .add::() - .add::() + .add::() + .add::() + .add::() + .add::() .add::() .add::() - .add::() + .add::() .add_value( mock_engine_provisioner::MockEngineProvisioner::new().stub_provision_engine(), ) @@ -1030,22 +1115,18 @@ impl CompactTestHarness { .add::() .build(); - let dataset_registry = catalog.get_one::().unwrap(); - let dataset_repo_writer = catalog.get_one::().unwrap(); - let compaction_svc = catalog.get_one::().unwrap(); - let push_ingest_svc = catalog.get_one::().unwrap(); - let verification_svc = catalog.get_one::().unwrap(); - let transform_helper = TransformTestHelper::from_catalog(&catalog); Self { _temp_dir: temp_dir, - dataset_registry, - dataset_repo_writer, - compaction_svc, - push_ingest_svc, + dataset_registry: catalog.get_one().unwrap(), + dataset_repo_writer: catalog.get_one().unwrap(), + compaction_planner: catalog.get_one().unwrap(), + compaction_executor: catalog.get_one().unwrap(), + push_ingest_planner: catalog.get_one().unwrap(), + push_ingest_executor: catalog.get_one().unwrap(), + verification_svc: catalog.get_one().unwrap(), transform_helper, - verification_svc, current_date_time, ctx: SessionContext::new_with_config(SessionConfig::new().with_target_partitions(1)), } @@ -1074,12 +1155,14 @@ impl CompactTestHarness { .add_value(ObjectStoreBuilderS3::new(s3_context.clone(), true)) .bind::() .add::() - .add::() + .add::() + .add::() .add::() .add::() - .add::() + .add::() .add::() - .add::() + .add::() + .add::() .add_value(CurrentAccountSubject::new_test()) .build(); @@ -1091,8 +1174,10 @@ impl CompactTestHarness { _temp_dir: temp_dir, dataset_registry: catalog.get_one().unwrap(), dataset_repo_writer: catalog.get_one().unwrap(), - compaction_svc: catalog.get_one().unwrap(), - push_ingest_svc: catalog.get_one().unwrap(), + compaction_planner: catalog.get_one().unwrap(), + compaction_executor: catalog.get_one().unwrap(), + push_ingest_planner: catalog.get_one().unwrap(), + push_ingest_executor: catalog.get_one().unwrap(), transform_helper, verification_svc: catalog.get_one().unwrap(), current_date_time, @@ -1208,37 +1293,51 @@ impl CompactTestHarness { DatasetDataHelper::new_with_context((*resolved_dataset).clone(), self.ctx.clone()) } - async fn ingest_multiple_blocks(&self, dataset_created: &CreateDatasetResult, amount: i64) { - let start_date = NaiveDate::parse_from_str("2020-01-01", "%Y-%m-%d").unwrap(); - - for i in 0..amount { - let a_date = start_date + TimeDelta::try_days(i).unwrap(); - let b_date = start_date + TimeDelta::try_days(i + 1).unwrap(); - - let start_date_str = formatdoc!( - " - date,city,population - {},A,1000 - {},B,2000 - ", - a_date.to_string(), - b_date.to_string() - ); - self.ingest_data(start_date_str, dataset_created).await; + async fn ingest_multiple_blocks( + &self, + dataset_created: &CreateDatasetResult, + blocks: i64, + records_per_block: i64, + ) { + use std::io::Write; + + let mut event_time = Utc.with_ymd_and_hms(2010, 1, 1, 0, 0, 0).unwrap(); + let cities = ["A", "B", "C", "D"]; + + for b in 0..blocks { + let mut data = Vec::new(); + writeln!(&mut data, "date,city,population").unwrap(); + + for r in 0..records_per_block { + writeln!( + &mut data, + "{},{},{}", + event_time.to_rfc3339(), + cities[usize::try_from(r).unwrap() % cities.len()], + b * records_per_block + r + ) + .unwrap(); + + event_time += chrono::Duration::minutes(1); + } + self.ingest_data(String::from_utf8(data).unwrap(), dataset_created) + .await; } } async fn ingest_data(&self, data_str: String, dataset_created: &CreateDatasetResult) { let data = std::io::Cursor::new(data_str); - self.push_ingest_svc - .ingest_from_file_stream( - ResolvedDataset::from(dataset_created), - None, - Box::new(data), - PushIngestOpts::default(), - None, - ) + let target = ResolvedDataset::from(dataset_created); + + let ingest_plan = self + .push_ingest_planner + .plan_ingest(target.clone(), None, PushIngestOpts::default()) + .await + .unwrap(); + + self.push_ingest_executor + .ingest_from_stream(target, ingest_plan, Box::new(data), None) .await .unwrap(); } @@ -1336,12 +1435,24 @@ impl CompactTestHarness { dataset_create_result: &CreateDatasetResult, compaction_options: CompactionOptions, ) -> Result { - self.compaction_svc - .compact_dataset( + let compaction_plan = self + .compaction_planner + .plan_compaction( ResolvedDataset::from(dataset_create_result), compaction_options, - Some(Arc::new(NullCompactionListener {})), + None, ) - .await + .await?; + + let result = self + .compaction_executor + .execute( + ResolvedDataset::from(dataset_create_result), + compaction_plan, + None, + ) + .await?; + + Ok(result) } } diff --git a/src/infra/core/tests/tests/test_dataset_changes_service_impl.rs b/src/infra/core/tests/tests/test_dataset_changes_service_impl.rs index 85288efe1..471ccf7fc 100644 --- a/src/infra/core/tests/tests/test_dataset_changes_service_impl.rs +++ b/src/infra/core/tests/tests/test_dataset_changes_service_impl.rs @@ -10,13 +10,11 @@ use std::sync::Arc; use chrono::Utc; -use kamu::testing::MetadataFactory; +use kamu::testing::{BaseRepoHarness, MetadataFactory}; use kamu::DatasetChangesServiceImpl; use kamu_core::{CommitOpts, DatasetChangesService, DatasetIntervalIncrement, TenancyConfig}; use opendatafabric::{Checkpoint, DatasetAlias, DatasetID, DatasetName, MetadataEvent, Multihash}; -use crate::BaseRepoHarness; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] diff --git a/src/infra/core/tests/tests/test_dataset_ownership_service_inmem.rs b/src/infra/core/tests/tests/test_dataset_ownership_service_inmem.rs index fccf03d58..8bfc2c0ea 100644 --- a/src/infra/core/tests/tests/test_dataset_ownership_service_inmem.rs +++ b/src/infra/core/tests/tests/test_dataset_ownership_service_inmem.rs @@ -11,6 +11,7 @@ use std::collections::HashMap; use std::sync::Arc; use database_common::{DatabaseTransactionRunner, NoOpDatabasePlugin}; +use kamu::testing::BaseRepoHarness; use kamu::{DatasetOwnershipServiceInMemory, DatasetOwnershipServiceInMemoryStateInitializer}; use kamu_accounts::{ AccountConfig, @@ -29,8 +30,6 @@ use kamu_accounts_services::{ use kamu_core::{DatasetOwnershipService, TenancyConfig}; use opendatafabric::{AccountID, AccountName, DatasetAlias, DatasetID, DatasetName}; -use crate::BaseRepoHarness; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] diff --git a/src/infra/core/tests/tests/test_datasets_filtering.rs b/src/infra/core/tests/tests/test_datasets_filtering.rs index 4c9a34186..255036e80 100644 --- a/src/infra/core/tests/tests/test_datasets_filtering.rs +++ b/src/infra/core/tests/tests/test_datasets_filtering.rs @@ -10,6 +10,7 @@ use std::str::FromStr; use futures::TryStreamExt; +use kamu::testing::BaseRepoHarness; use kamu::utils::datasets_filtering::{ get_local_datasets_stream, matches_local_ref_pattern, @@ -19,8 +20,6 @@ use kamu_accounts::DEFAULT_ACCOUNT_NAME; use kamu_core::TenancyConfig; use opendatafabric::*; -use crate::BaseRepoHarness; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test] diff --git a/src/infra/core/tests/tests/test_pull_request_planner_impl.rs b/src/infra/core/tests/tests/test_pull_request_planner_impl.rs index 2ad4bc2e7..5b2b5dff9 100644 --- a/src/infra/core/tests/tests/test_pull_request_planner_impl.rs +++ b/src/infra/core/tests/tests/test_pull_request_planner_impl.rs @@ -13,7 +13,7 @@ use std::sync::{Arc, Mutex}; use std::time::Duration; use kamu::domain::*; -use kamu::testing::*; +use kamu::testing::{BaseRepoHarness, *}; use kamu::utils::ipfs_wrapper::IpfsClient; use kamu::utils::simple_transfer_protocol::SimpleTransferProtocol; use kamu::*; @@ -22,8 +22,6 @@ use messaging_outbox::DummyOutboxImpl; use opendatafabric::*; use time_source::SystemTimeSourceDefault; -use crate::BaseRepoHarness; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// macro_rules! n { diff --git a/src/infra/core/tests/tests/test_push_request_planner_impl.rs b/src/infra/core/tests/tests/test_push_request_planner_impl.rs index 56e135d43..af5b044f4 100644 --- a/src/infra/core/tests/tests/test_push_request_planner_impl.rs +++ b/src/infra/core/tests/tests/test_push_request_planner_impl.rs @@ -11,6 +11,7 @@ use std::assert_matches::assert_matches; use std::sync::Arc; use auth::DummyOdfServerAccessTokenResolver; +use kamu::testing::BaseRepoHarness; use kamu::*; use kamu_core::*; use opendatafabric::{ @@ -24,8 +25,6 @@ use opendatafabric::{ use tempfile::TempDir; use url::Url; -use crate::BaseRepoHarness; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] diff --git a/src/infra/core/tests/tests/test_query_service_impl.rs b/src/infra/core/tests/tests/test_query_service_impl.rs index 9402c8ea1..4b6451928 100644 --- a/src/infra/core/tests/tests/test_query_service_impl.rs +++ b/src/infra/core/tests/tests/test_query_service_impl.rs @@ -705,14 +705,16 @@ async fn test_sql_statement_with_state_simple() { ) .await .unwrap(); - let foo_id = foo_create.dataset_handle.id; - let foo_dataset = foo_create.dataset; + let foo_id = &foo_create.dataset_handle.id; - let mut writer = DataWriterDataFusion::builder(foo_dataset.clone(), ctx.clone()) - .with_metadata_state_scanned(None) - .await - .unwrap() - .build(); + let mut writer = DataWriterDataFusion::from_metadata_chain( + ctx.clone(), + ResolvedDataset::from(&foo_create), + &BlockRef::Head, + None, + ) + .await + .unwrap(); writer .write( @@ -737,7 +739,7 @@ async fn test_sql_statement_with_state_simple() { source_event_time: Utc::now(), new_watermark: None, new_source_state: None, - data_staging_path: tempdir.path().join(".temp-data"), + data_staging_path: tempdir.path().join(".temp-data.parquet"), }, ) .await @@ -783,7 +785,8 @@ async fn test_sql_statement_with_state_simple() { foo_id.clone(), QueryStateDataset { alias: "foo".to_string(), - block_hash: foo_dataset + block_hash: foo_create + .dataset .as_metadata_chain() .resolve_ref(&BlockRef::Head) .await @@ -816,7 +819,7 @@ async fn test_sql_statement_with_state_simple() { source_event_time: Utc::now(), new_watermark: None, new_source_state: None, - data_staging_path: tempdir.path().join(".temp-data"), + data_staging_path: tempdir.path().join(".temp-data.parquet"), }, ) .await @@ -923,7 +926,7 @@ async fn test_sql_statement_with_state_cte() { // Dataset `foo` let foo_alias = DatasetAlias::new(None, DatasetName::new_unchecked("foo")); - let foo_create = dataset_repo_writer + let foo_created = dataset_repo_writer .create_dataset( &foo_alias, MetadataFactory::metadata_block( @@ -933,14 +936,16 @@ async fn test_sql_statement_with_state_cte() { ) .await .unwrap(); - let foo_id = foo_create.dataset_handle.id; - let foo_dataset = foo_create.dataset; + let foo_id = &foo_created.dataset_handle.id; - let mut writer_foo = DataWriterDataFusion::builder(foo_dataset.clone(), ctx.clone()) - .with_metadata_state_scanned(None) - .await - .unwrap() - .build(); + let mut writer_foo = DataWriterDataFusion::from_metadata_chain( + ctx.clone(), + ResolvedDataset::from(&foo_created), + &BlockRef::Head, + None, + ) + .await + .unwrap(); writer_foo .write( @@ -965,7 +970,7 @@ async fn test_sql_statement_with_state_cte() { source_event_time: Utc::now(), new_watermark: None, new_source_state: None, - data_staging_path: tempdir.path().join(".temp-data"), + data_staging_path: tempdir.path().join(".temp-data.parquet"), }, ) .await @@ -973,7 +978,7 @@ async fn test_sql_statement_with_state_cte() { // Dataset `bar` let bar_alias = DatasetAlias::new(None, DatasetName::new_unchecked("bar")); - let bar_create = dataset_repo_writer + let bar_created = dataset_repo_writer .create_dataset( &bar_alias, MetadataFactory::metadata_block( @@ -983,14 +988,16 @@ async fn test_sql_statement_with_state_cte() { ) .await .unwrap(); - let bar_id = bar_create.dataset_handle.id; - let bar_dataset = bar_create.dataset; + let bar_id = &bar_created.dataset_handle.id; - let mut writer_bar = DataWriterDataFusion::builder(bar_dataset.clone(), ctx.clone()) - .with_metadata_state_scanned(None) - .await - .unwrap() - .build(); + let mut writer_bar = DataWriterDataFusion::from_metadata_chain( + ctx.clone(), + ResolvedDataset::from(&bar_created), + &BlockRef::Head, + None, + ) + .await + .unwrap(); writer_bar .write( @@ -1015,7 +1022,7 @@ async fn test_sql_statement_with_state_cte() { source_event_time: Utc::now(), new_watermark: None, new_source_state: None, - data_staging_path: tempdir.path().join(".temp-data"), + data_staging_path: tempdir.path().join(".temp-data.parquet"), }, ) .await @@ -1068,7 +1075,8 @@ async fn test_sql_statement_with_state_cte() { foo_id.clone(), QueryStateDataset { alias: "foo".to_string(), - block_hash: foo_dataset + block_hash: foo_created + .dataset .as_metadata_chain() .resolve_ref(&BlockRef::Head) .await @@ -1079,7 +1087,8 @@ async fn test_sql_statement_with_state_cte() { bar_id.clone(), QueryStateDataset { alias: "bar".to_string(), - block_hash: bar_dataset + block_hash: bar_created + .dataset .as_metadata_chain() .resolve_ref(&BlockRef::Head) .await @@ -1113,7 +1122,7 @@ async fn test_sql_statement_with_state_cte() { source_event_time: Utc::now(), new_watermark: None, new_source_state: None, - data_staging_path: tempdir.path().join(".temp-data"), + data_staging_path: tempdir.path().join(".temp-data.parquet"), }, ) .await @@ -1142,7 +1151,7 @@ async fn test_sql_statement_with_state_cte() { source_event_time: Utc::now(), new_watermark: None, new_source_state: None, - data_staging_path: tempdir.path().join(".temp-data"), + data_staging_path: tempdir.path().join(".temp-data.parquet"), }, ) .await @@ -1270,7 +1279,7 @@ async fn test_sql_statement_with_state_cte() { block_hash: Some( res.state .input_datasets - .get(&foo_id) + .get(foo_id) .unwrap() .block_hash .clone(), @@ -1285,7 +1294,7 @@ async fn test_sql_statement_with_state_cte() { block_hash: Some( res.state .input_datasets - .get(&bar_id) + .get(bar_id) .unwrap() .block_hash .clone(), diff --git a/src/infra/core/tests/tests/test_remote_status_service.rs b/src/infra/core/tests/tests/test_remote_status_service.rs index b02891975..963bd6130 100644 --- a/src/infra/core/tests/tests/test_remote_status_service.rs +++ b/src/infra/core/tests/tests/test_remote_status_service.rs @@ -14,7 +14,7 @@ use std::sync::Arc; use datafusion::arrow::datatypes::{DataType, Field, Schema}; use dill::CatalogBuilder; -use kamu::testing::{DummySmartTransferProtocolClient, MetadataFactory}; +use kamu::testing::{BaseRepoHarness, DummySmartTransferProtocolClient, MetadataFactory}; use kamu::utils::ipfs_wrapper::IpfsClient; use kamu::utils::simple_transfer_protocol::SimpleTransferProtocol; use kamu::{ @@ -33,8 +33,6 @@ use kamu_core::*; use opendatafabric::*; use url::Url; -use crate::BaseRepoHarness; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[tokio::test] diff --git a/src/infra/core/tests/tests/test_reset_service_impl.rs b/src/infra/core/tests/tests/test_reset_services_impl.rs similarity index 85% rename from src/infra/core/tests/tests/test_reset_service_impl.rs rename to src/infra/core/tests/tests/test_reset_services_impl.rs index 6dadd66fb..d0f2d7e42 100644 --- a/src/infra/core/tests/tests/test_reset_service_impl.rs +++ b/src/infra/core/tests/tests/test_reset_services_impl.rs @@ -11,12 +11,10 @@ use std::assert_matches::assert_matches; use std::sync::Arc; use kamu::domain::*; -use kamu::testing::*; +use kamu::testing::{BaseRepoHarness, *}; use kamu::*; use opendatafabric::*; -use crate::BaseRepoHarness; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] @@ -86,9 +84,16 @@ async fn test_reset_dataset_to_non_existing_block_fails() { None, ) .await; - assert_matches!(result, Err(ResetError::BlockNotFound(_))); + assert_matches!( + result, + Err(ResetError::Execution( + ResetExecutionError::SetReferenceFailed(SetRefError::BlockNotFound(_)) + )) + ); } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[test_log::test(tokio::test)] async fn test_reset_dataset_with_wrong_head() { let harness = ResetTestHarness::new(); @@ -101,9 +106,14 @@ async fn test_reset_dataset_with_wrong_head() { Some(&test_case.hash_seed_block), ) .await; - assert_matches!(result, Err(ResetError::OldHeadMismatch(_))); + assert_matches!( + result, + Err(ResetError::Planning(ResetPlanningError::OldHeadMismatch(_))) + ); } +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[test_log::test(tokio::test)] async fn test_reset_dataset_with_default_seed_block() { let harness = ResetTestHarness::new(); @@ -155,7 +165,8 @@ impl ChainWith2BlocksTestCase { #[oop::extend(BaseRepoHarness, base_repo_harness)] struct ResetTestHarness { base_repo_harness: BaseRepoHarness, - reset_svc: Arc, + reset_planner: Arc, + reset_executor: Arc, } impl ResetTestHarness { @@ -163,14 +174,17 @@ impl ResetTestHarness { let base_repo_harness = BaseRepoHarness::new(TenancyConfig::SingleTenant); let catalog = dill::CatalogBuilder::new_chained(base_repo_harness.catalog()) - .add::() + .add::() + .add::() .build(); - let reset_svc = catalog.get_one::().unwrap(); + let reset_planner = catalog.get_one::().unwrap(); + let reset_executor = catalog.get_one::().unwrap(); Self { base_repo_harness, - reset_svc, + reset_planner, + reset_executor, } } @@ -210,11 +224,17 @@ impl ResetTestHarness { dataset_handle: &DatasetHandle, block_hash: Option<&Multihash>, old_head_maybe: Option<&Multihash>, - ) -> Result { - let resolved_dataset = self.resolve_dataset(dataset_handle); - self.reset_svc - .reset_dataset(resolved_dataset, block_hash, old_head_maybe) - .await + ) -> Result { + let target = self.resolve_dataset(dataset_handle); + + let reset_plan = self + .reset_planner + .plan_reset(target.clone(), block_hash, old_head_maybe) + .await?; + + let reset_result = self.reset_executor.execute(target, reset_plan).await?; + + Ok(reset_result) } async fn get_dataset_head(&self, dataset_handle: &DatasetHandle) -> Multihash { diff --git a/src/infra/core/tests/tests/test_transform_service_impl.rs b/src/infra/core/tests/tests/test_transform_services_impl.rs similarity index 93% rename from src/infra/core/tests/tests/test_transform_service_impl.rs rename to src/infra/core/tests/tests/test_transform_services_impl.rs index 7180333ad..f1d979e0b 100644 --- a/src/infra/core/tests/tests/test_transform_service_impl.rs +++ b/src/infra/core/tests/tests/test_transform_services_impl.rs @@ -33,9 +33,11 @@ struct TransformTestHarness { dataset_repo_writer: Arc, transform_request_planner: Arc, transform_elab_svc: Arc, - transform_exec_svc: Arc, - compaction_service: Arc, - push_ingest_svc: Arc, + transform_executor: Arc, + compaction_planner: Arc, + compaction_executor: Arc, + push_ingest_planner: Arc, + push_ingest_executor: Arc, } impl TransformTestHarness { @@ -59,15 +61,16 @@ impl TransformTestHarness { .add::() .add::() .add::() - .add::() + .add::() + .add::() .add::() - .add::() - .bind::() + .add::() + .add::() .add_value(engine_provisioner) .bind::() .add::() .add::() - .add::() + .add::() .add::() .build(); @@ -75,11 +78,13 @@ impl TransformTestHarness { _tempdir: tempdir, dataset_registry: catalog.get_one().unwrap(), dataset_repo_writer: catalog.get_one().unwrap(), - compaction_service: catalog.get_one().unwrap(), - push_ingest_svc: catalog.get_one().unwrap(), + compaction_planner: catalog.get_one().unwrap(), + compaction_executor: catalog.get_one().unwrap(), + push_ingest_planner: catalog.get_one().unwrap(), + push_ingest_executor: catalog.get_one().unwrap(), transform_request_planner: catalog.get_one().unwrap(), transform_elab_svc: catalog.get_one().unwrap(), - transform_exec_svc: catalog.get_one().unwrap(), + transform_executor: catalog.get_one().unwrap(), } } @@ -187,14 +192,16 @@ impl TransformTestHarness { async fn ingest_data(&self, data_str: String, dataset_created: &CreateDatasetResult) { let data = std::io::Cursor::new(data_str); - self.push_ingest_svc - .ingest_from_file_stream( - ResolvedDataset::from(dataset_created), - None, - Box::new(data), - PushIngestOpts::default(), - None, - ) + let target = ResolvedDataset::from(dataset_created); + + let ingest_plan = self + .push_ingest_planner + .plan_ingest(target.clone(), None, PushIngestOpts::default()) + .await + .unwrap(); + + self.push_ingest_executor + .ingest_from_stream(target, ingest_plan, Box::new(data), None) .await .unwrap(); } @@ -231,13 +238,30 @@ impl TransformTestHarness { match elaboration { TransformElaboration::UpToDate => Ok(TransformResult::UpToDate), TransformElaboration::Elaborated(plan) => self - .transform_exec_svc + .transform_executor .execute_transform(target, plan, None) .await .1 .map_err(TransformError::Execute), } } + + async fn compact(&self, dataset: &CreateDatasetResult) { + let compaction_plan = self + .compaction_planner + .plan_compaction( + ResolvedDataset::from(dataset), + CompactionOptions::default(), + None, + ) + .await + .unwrap(); + + self.compaction_executor + .execute(ResolvedDataset::from(dataset), compaction_plan, None) + .await + .unwrap(); + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -700,15 +724,7 @@ async fn test_transform_with_compaction_retry() { let transform_result = harness.transform(&bar, TransformOptions::default()).await; assert_matches!(transform_result, Ok(TransformResult::Updated { .. })); - let foo_dataset = harness - .dataset_registry - .get_dataset_by_handle(&foo_created_result.dataset_handle); - - harness - .compaction_service - .compact_dataset(foo_dataset, CompactionOptions::default(), None) - .await - .unwrap(); + harness.compact(&foo_created_result).await; let transform_result = harness.transform(&bar, TransformOptions::default()).await; diff --git a/src/infra/core/tests/tests/test_verification_service_impl.rs b/src/infra/core/tests/tests/test_verification_service_impl.rs index 89d79a5cf..f0a4b17a5 100644 --- a/src/infra/core/tests/tests/test_verification_service_impl.rs +++ b/src/infra/core/tests/tests/test_verification_service_impl.rs @@ -14,12 +14,10 @@ use datafusion::arrow::array::{Array, Int32Array, StringArray}; use datafusion::arrow::datatypes::{DataType, Field, Schema}; use datafusion::arrow::record_batch::RecordBatch; use kamu::domain::*; -use kamu::testing::{MetadataFactory, ParquetWriterHelper}; +use kamu::testing::{BaseRepoHarness, MetadataFactory, ParquetWriterHelper}; use kamu::*; use opendatafabric::*; -use crate::BaseRepoHarness; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[tokio::test] @@ -199,7 +197,7 @@ impl VerifyHarness { let catalog = dill::CatalogBuilder::new_chained(base_repo_harness.catalog()) .add::() - .add::() + .add::() .add::() .add::() .build(); diff --git a/src/infra/core/tests/tests/test_watermark_service_impl.rs b/src/infra/core/tests/tests/test_watermark_services_impl.rs similarity index 82% rename from src/infra/core/tests/tests/test_watermark_service_impl.rs rename to src/infra/core/tests/tests/test_watermark_services_impl.rs index 59ebe448a..19f141b1f 100644 --- a/src/infra/core/tests/tests/test_watermark_service_impl.rs +++ b/src/infra/core/tests/tests/test_watermark_services_impl.rs @@ -11,18 +11,25 @@ use std::assert_matches::assert_matches; use std::sync::Arc; use chrono::{DateTime, TimeZone, Utc}; -use kamu::{RemoteAliasesRegistryImpl, WatermarkServiceImpl}; +use kamu::testing::BaseRepoHarness; +use kamu::{ + MetadataQueryServiceImpl, + RemoteAliasesRegistryImpl, + SetWatermarkExecutorImpl, + SetWatermarkPlannerImpl, +}; use kamu_core::{ + MetadataQueryService, ResolvedDataset, SetWatermarkError, + SetWatermarkExecutor, + SetWatermarkPlanner, + SetWatermarkPlanningError, SetWatermarkResult, TenancyConfig, - WatermarkService, }; use opendatafabric::{DatasetAlias, DatasetName}; -use crate::BaseRepoHarness; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[tokio::test] @@ -136,7 +143,9 @@ async fn test_set_watermark_rejects_on_derivative() { Utc.with_ymd_and_hms(2000, 1, 2, 0, 0, 0).unwrap() ) .await, - Err(SetWatermarkError::IsDerivative) + Err(SetWatermarkError::Planning( + SetWatermarkPlanningError::IsDerivative + )) ); assert_eq!(harness.num_blocks(ResolvedDataset::from(&derived)).await, 2); @@ -153,7 +162,9 @@ async fn test_set_watermark_rejects_on_derivative() { #[oop::extend(BaseRepoHarness, base_repo_harness)] struct WatermarkTestHarness { base_repo_harness: BaseRepoHarness, - watermark_svc: Arc, + set_watermark_planner: Arc, + set_watermark_executor: Arc, + metadata_query_svc: Arc, } impl WatermarkTestHarness { @@ -162,12 +173,16 @@ impl WatermarkTestHarness { let catalog = dill::CatalogBuilder::new_chained(base_repo_harness.catalog()) .add::() - .add::() + .add::() + .add::() + .add::() .build(); Self { base_repo_harness, - watermark_svc: catalog.get_one().unwrap(), + set_watermark_planner: catalog.get_one().unwrap(), + set_watermark_executor: catalog.get_one().unwrap(), + metadata_query_svc: catalog.get_one().unwrap(), } } @@ -176,13 +191,18 @@ impl WatermarkTestHarness { target: ResolvedDataset, new_watermark: DateTime, ) -> Result { - self.watermark_svc - .set_watermark(target, new_watermark) - .await + let plan = self + .set_watermark_planner + .plan_set_watermark(target.clone(), new_watermark) + .await?; + + let result = self.set_watermark_executor.execute(target, plan).await?; + + Ok(result) } async fn current_watermark(&self, target: ResolvedDataset) -> Option> { - self.watermark_svc + self.metadata_query_svc .try_get_current_watermark(target) .await .unwrap() diff --git a/src/infra/core/tests/tests/use_cases/base_use_case_harness.rs b/src/infra/core/tests/tests/use_cases/base_use_case_harness.rs index 1573c09bb..2a5ad0c42 100644 --- a/src/infra/core/tests/tests/use_cases/base_use_case_harness.rs +++ b/src/infra/core/tests/tests/use_cases/base_use_case_harness.rs @@ -8,13 +8,11 @@ // by the Apache License, Version 2.0. use dill::Catalog; -use kamu::testing::MockDatasetActionAuthorizer; +use kamu::testing::{BaseRepoHarness, MockDatasetActionAuthorizer}; use kamu_core::auth::DatasetActionAuthorizer; use kamu_core::TenancyConfig; use messaging_outbox::{MockOutbox, Outbox}; -use crate::BaseRepoHarness; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub(crate) struct BaseUseCaseHarnessOptions { diff --git a/src/infra/core/tests/tests/use_cases/test_append_dataset_metadata_batch_use_case.rs b/src/infra/core/tests/tests/use_cases/test_append_dataset_metadata_batch_use_case.rs index 3a6b29a16..2723c0a48 100644 --- a/src/infra/core/tests/tests/use_cases/test_append_dataset_metadata_batch_use_case.rs +++ b/src/infra/core/tests/tests/use_cases/test_append_dataset_metadata_batch_use_case.rs @@ -12,14 +12,13 @@ use std::collections::VecDeque; use std::sync::Arc; use chrono::Utc; -use kamu::testing::MetadataFactory; +use kamu::testing::{BaseRepoHarness, MetadataFactory}; use kamu::AppendDatasetMetadataBatchUseCaseImpl; use kamu_core::AppendDatasetMetadataBatchUseCase; use messaging_outbox::MockOutbox; use opendatafabric::*; use crate::tests::use_cases::*; -use crate::BaseRepoHarness; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/tests/tests/use_cases/test_compact_dataset_use_case.rs b/src/infra/core/tests/tests/use_cases/test_compact_dataset_use_case.rs index 4a1ffba01..18c9cbacb 100644 --- a/src/infra/core/tests/tests/use_cases/test_compact_dataset_use_case.rs +++ b/src/infra/core/tests/tests/use_cases/test_compact_dataset_use_case.rs @@ -153,7 +153,8 @@ impl CompactUseCaseHarness { let catalog = dill::CatalogBuilder::new_chained(base_harness.catalog()) .add::() - .add::() + .add::() + .add::() .add::() .build(); diff --git a/src/infra/core/tests/tests/use_cases/test_delete_dataset_use_case.rs b/src/infra/core/tests/tests/use_cases/test_delete_dataset_use_case.rs index cdb5acd0c..2e3594417 100644 --- a/src/infra/core/tests/tests/use_cases/test_delete_dataset_use_case.rs +++ b/src/infra/core/tests/tests/use_cases/test_delete_dataset_use_case.rs @@ -12,19 +12,15 @@ use std::sync::Arc; use dill::Catalog; use kamu::testing::MockDatasetActionAuthorizer; -use kamu::{ - DeleteDatasetUseCaseImpl, - DependencyGraphRepositoryInMemory, - DependencyGraphServiceInMemory, -}; +use kamu::DeleteDatasetUseCaseImpl; use kamu_core::{ DatasetLifecycleMessage, - DatasetRepository, DeleteDatasetError, DeleteDatasetUseCase, - DependencyGraphService, GetDatasetError, }; +use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; +use kamu_datasets_services::{DependencyGraphIndexer, DependencyGraphServiceImpl}; use messaging_outbox::{consume_deserialized_message, ConsumerFilter, Message, MockOutbox}; use opendatafabric::{DatasetAlias, DatasetName}; @@ -45,7 +41,7 @@ async fn test_delete_dataset_success_via_ref() { let harness = DeleteUseCaseHarness::new(mock_authorizer, mock_outbox); harness.create_root_dataset(&alias_foo).await; - harness.dependencies_eager_initialization().await; + harness.reindex_dependency_graph().await; harness .use_case @@ -74,7 +70,7 @@ async fn test_delete_dataset_success_via_handle() { let harness = DeleteUseCaseHarness::new(mock_authorizer, mock_outbox); let foo = harness.create_root_dataset(&alias_foo).await; - harness.dependencies_eager_initialization().await; + harness.reindex_dependency_graph().await; harness .use_case @@ -116,7 +112,7 @@ async fn test_delete_unauthorized() { ); let foo = harness.create_root_dataset(&alias_foo).await; - harness.dependencies_eager_initialization().await; + harness.reindex_dependency_graph().await; assert_matches!( harness @@ -145,7 +141,7 @@ async fn test_delete_dataset_respects_dangling_refs() { let derived = harness .create_derived_dataset(&alias_bar, vec![alias_foo.as_local_ref()]) .await; - harness.dependencies_eager_initialization().await; + harness.reindex_dependency_graph().await; assert_matches!( harness.use_case.execute_via_handle(&root.dataset_handle).await, @@ -197,9 +193,8 @@ async fn test_delete_dataset_respects_dangling_refs() { struct DeleteUseCaseHarness { base_harness: BaseUseCaseHarness, catalog: Catalog, - dependency_graph_service: Arc, - dataset_repo: Arc, use_case: Arc, + indexer: Arc, } impl DeleteUseCaseHarness { @@ -215,31 +210,22 @@ impl DeleteUseCaseHarness { let catalog = dill::CatalogBuilder::new_chained(base_harness.catalog()) .add::() - .add::() + .add::() + .add::() + .add::() .build(); - let dependency_graph_service = catalog.get_one().unwrap(); let use_case = catalog.get_one().unwrap(); - let dataset_repo = catalog.get_one().unwrap(); + let indexer = catalog.get_one().unwrap(); Self { base_harness, catalog, - dependency_graph_service, - dataset_repo, use_case, + indexer, } } - async fn dependencies_eager_initialization(&self) { - self.dependency_graph_service - .eager_initialization(&DependencyGraphRepositoryInMemory::new( - self.dataset_repo.clone(), - )) - .await - .unwrap(); - } - async fn consume_message(&self, message: TMessage) { let content_json = serde_json::to_string(&message).unwrap(); consume_deserialized_message::( @@ -251,6 +237,11 @@ impl DeleteUseCaseHarness { .await .unwrap(); } + + async fn reindex_dependency_graph(&self) { + use init_on_startup::InitOnStartup; + self.indexer.run_initialization().await.unwrap(); + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/tests/tests/use_cases/test_pull_dataset_use_case.rs b/src/infra/core/tests/tests/use_cases/test_pull_dataset_use_case.rs index 4ff453b7e..8af02e3d0 100644 --- a/src/infra/core/tests/tests/use_cases/test_pull_dataset_use_case.rs +++ b/src/infra/core/tests/tests/use_cases/test_pull_dataset_use_case.rs @@ -674,7 +674,7 @@ impl PullUseCaseHarness { .add_value(mocks.mock_transform_elaboration_service) .bind::() .add_value(mocks.mock_transform_execution_service) - .bind::() + .bind::() .add_value(mocks.mock_sync_service) .bind::() .add::() diff --git a/src/infra/core/tests/tests/use_cases/test_reset_dataset_use_case.rs b/src/infra/core/tests/tests/use_cases/test_reset_dataset_use_case.rs index ea89f4137..3459d7f94 100644 --- a/src/infra/core/tests/tests/use_cases/test_reset_dataset_use_case.rs +++ b/src/infra/core/tests/tests/use_cases/test_reset_dataset_use_case.rs @@ -38,13 +38,13 @@ async fn test_reset_success() { assert_eq!(harness.num_blocks(ResolvedDataset::from(&foo)).await, 3); - let new_head = harness + let reset_result = harness .use_case .execute(&foo.dataset_handle, Some(&foo.head), None) .await .unwrap(); - assert_eq!(new_head, foo.head); + assert_eq!(reset_result.new_head, foo.head); assert_eq!(harness.num_blocks(ResolvedDataset::from(&foo)).await, 2); } @@ -85,7 +85,8 @@ impl ResetUseCaseHarness { let catalog = dill::CatalogBuilder::new_chained(base_harness.catalog()) .add::() - .add::() + .add::() + .add::() .build(); let use_case = catalog.get_one::().unwrap(); diff --git a/src/infra/core/tests/tests/use_cases/test_set_watermark_use_case.rs b/src/infra/core/tests/tests/use_cases/test_set_watermark_use_case.rs index d5debe946..99cef4210 100644 --- a/src/infra/core/tests/tests/use_cases/test_set_watermark_use_case.rs +++ b/src/infra/core/tests/tests/use_cases/test_set_watermark_use_case.rs @@ -69,7 +69,7 @@ async fn test_set_watermark_unauthorized() { struct SetWatermarkUseCaseHarness { base_harness: BaseUseCaseHarness, use_case: Arc, - watermark_svc: Arc, + metadata_query_svc: Arc, } impl SetWatermarkUseCaseHarness { @@ -80,17 +80,16 @@ impl SetWatermarkUseCaseHarness { let catalog = dill::CatalogBuilder::new_chained(base_harness.catalog()) .add::() - .add::() + .add::() + .add::() .add::() + .add::() .build(); - let use_case = catalog.get_one().unwrap(); - let watermark_svc = catalog.get_one().unwrap(); - Self { base_harness, - use_case, - watermark_svc, + use_case: catalog.get_one().unwrap(), + metadata_query_svc: catalog.get_one().unwrap(), } } @@ -98,7 +97,7 @@ impl SetWatermarkUseCaseHarness { &self, created_result: &CreateDatasetResult, ) -> Option> { - self.watermark_svc + self.metadata_query_svc .try_get_current_watermark(ResolvedDataset::from(created_result)) .await .unwrap() diff --git a/src/infra/core/tests/tests/use_cases/test_verify_dataset_use_case.rs b/src/infra/core/tests/tests/use_cases/test_verify_dataset_use_case.rs index e2b9f4a07..7f1e82aaf 100644 --- a/src/infra/core/tests/tests/use_cases/test_verify_dataset_use_case.rs +++ b/src/infra/core/tests/tests/use_cases/test_verify_dataset_use_case.rs @@ -178,7 +178,7 @@ impl VerifyUseCaseHarness { .add::() .add::() .add::() - .add::() + .add::() .add::() .build(); diff --git a/src/infra/core/tests/utils/mod.rs b/src/infra/core/tests/utils/mod.rs index eb28ace95..d47dc2b02 100644 --- a/src/infra/core/tests/utils/mod.rs +++ b/src/infra/core/tests/utils/mod.rs @@ -25,6 +25,3 @@ pub use mqtt_broker::*; mod transform_test_helper; pub use transform_test_helper::*; - -mod base_repo_harness; -pub use base_repo_harness::*; diff --git a/src/infra/core/tests/utils/transform_test_helper.rs b/src/infra/core/tests/utils/transform_test_helper.rs index a36bd1a16..48f6cedcc 100644 --- a/src/infra/core/tests/utils/transform_test_helper.rs +++ b/src/infra/core/tests/utils/transform_test_helper.rs @@ -10,20 +10,17 @@ use std::sync::Arc; use dill::Catalog; -use kamu::{ - TransformElaborationServiceImpl, - TransformExecutionServiceImpl, - TransformRequestPlannerImpl, -}; +use kamu::{TransformElaborationServiceImpl, TransformExecutorImpl, TransformRequestPlannerImpl}; use kamu_core::{ - CompactionService, + CompactionExecutor, + CompactionPlanner, CreateDatasetResult, DatasetRegistry, EngineProvisioner, ResolvedDataset, TransformElaboration, TransformElaborationService, - TransformExecutionService, + TransformExecutor, TransformOptions, TransformRequestPlanner, TransformResult, @@ -36,14 +33,15 @@ use time_source::SystemTimeSource; pub struct TransformTestHelper { transform_request_planner: Arc, transform_elab_svc: Arc, - transform_exec_svc: Arc, + transform_executor: Arc, } impl TransformTestHelper { pub fn build( dataset_registry: Arc, system_time_source: Arc, - compaction_svc: Arc, + compaction_planner: Arc, + compaction_executor: Arc, engine_provisioner: Arc, ) -> Self { Self { @@ -52,10 +50,11 @@ impl TransformTestHelper { system_time_source.clone(), )), transform_elab_svc: Arc::new(TransformElaborationServiceImpl::new( - compaction_svc, + compaction_planner, + compaction_executor, system_time_source, )), - transform_exec_svc: Arc::new(TransformExecutionServiceImpl::new(engine_provisioner)), + transform_executor: Arc::new(TransformExecutorImpl::new(engine_provisioner)), } } @@ -63,7 +62,7 @@ impl TransformTestHelper { Self { transform_request_planner: catalog.get_one().unwrap(), transform_elab_svc: catalog.get_one().unwrap(), - transform_exec_svc: catalog.get_one().unwrap(), + transform_executor: catalog.get_one().unwrap(), } } @@ -91,7 +90,7 @@ impl TransformTestHelper { TransformElaboration::UpToDate => return TransformResult::UpToDate, }; - self.transform_exec_svc + self.transform_executor .execute_transform(deriv_target, plan, None) .await .1 @@ -110,7 +109,7 @@ impl TransformTestHelper { .await .map_err(VerifyTransformError::Plan)?; - self.transform_exec_svc + self.transform_executor .execute_verify_transform(deriv_target, verify_plan, None) .await .map_err(VerifyTransformError::Execute) diff --git a/src/infra/datasets/inmem/Cargo.toml b/src/infra/datasets/inmem/Cargo.toml index cea50d914..733b84f92 100644 --- a/src/infra/datasets/inmem/Cargo.toml +++ b/src/infra/datasets/inmem/Cargo.toml @@ -29,9 +29,10 @@ internal-error = { workspace = true } async-trait = { version = "0.1", default-features = false } dill = "0.9" -futures = "0.3" +futures = { version = "0.3", default-features = false } tokio = { version = "1", default-features = false } -uuid = "1" +tokio-stream = { version = "0.1", default-features = false } +uuid = { version = "1", default-features = false } [dev-dependencies] diff --git a/src/infra/datasets/inmem/src/repos/inmem_dataset_dependency_repository.rs b/src/infra/datasets/inmem/src/repos/inmem_dataset_dependency_repository.rs new file mode 100644 index 000000000..f49371a7f --- /dev/null +++ b/src/infra/datasets/inmem/src/repos/inmem_dataset_dependency_repository.rs @@ -0,0 +1,192 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::collections::{HashMap, HashSet}; +use std::sync::{Arc, Mutex}; + +use dill::*; +use internal_error::InternalError; +use kamu_datasets::*; +use opendatafabric::DatasetID; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Default)] +struct State { + upstream_by_downstream: HashMap>, + downstream_by_upstream: HashMap>, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct InMemoryDatasetDependencyRepository { + state: Arc>, +} + +#[component(pub)] +#[interface(dyn DatasetDependencyRepository)] +#[interface(dyn DatasetEntryRemovalListener)] +#[scope(Singleton)] +impl InMemoryDatasetDependencyRepository { + pub fn new() -> Self { + Self { + state: Arc::new(Mutex::new(State::default())), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl DatasetDependencyRepository for InMemoryDatasetDependencyRepository { + async fn stores_any_dependencies(&self) -> Result { + let guard = self.state.lock().unwrap(); + let has_dependencies = !guard.upstream_by_downstream.is_empty(); + Ok(has_dependencies) + } + + fn list_all_dependencies(&self) -> DatasetDependenciesIDStream { + let dependencies: Vec<_> = { + let guard = self.state.lock().unwrap(); + guard + .upstream_by_downstream + .iter() + .filter(|(_, upstreams)| !upstreams.is_empty()) + .map(|(downstream_dataset_id, upstreams)| { + let mut upstream_dataset_ids: Vec<_> = upstreams.iter().cloned().collect(); + upstream_dataset_ids.sort(); + + Ok(DatasetDependencies { + downstream_dataset_id: downstream_dataset_id.clone(), + upstream_dataset_ids, + }) + }) + .collect() + }; + + Box::pin(tokio_stream::iter(dependencies)) + } + + async fn add_upstream_dependencies( + &self, + downstream_dataset_id: &DatasetID, + new_upstream_dataset_ids: &[&DatasetID], + ) -> Result<(), AddDependenciesError> { + if new_upstream_dataset_ids.is_empty() { + return Ok(()); + } + + let mut guard = self.state.lock().unwrap(); + + let upstreams = guard + .upstream_by_downstream + .entry(downstream_dataset_id.clone()) + .or_default(); + + for new_upstream_dataset_id in new_upstream_dataset_ids { + upstreams.insert((*new_upstream_dataset_id).clone()); + } + + for new_upstream_dataset_id in new_upstream_dataset_ids { + let downstreams = guard + .downstream_by_upstream + .entry((*new_upstream_dataset_id).clone()) + .or_default(); + let inserted = downstreams.insert(downstream_dataset_id.clone()); + if !inserted { + return Err(AddDependenciesError::Duplicate( + AddDependencyDuplicateError { + downstream_dataset_id: downstream_dataset_id.clone(), + }, + )); + } + } + + Ok(()) + } + + async fn remove_upstream_dependencies( + &self, + downstream_dataset_id: &DatasetID, + obsolete_upstream_dataset_ids: &[&DatasetID], + ) -> Result<(), RemoveDependenciesError> { + if obsolete_upstream_dataset_ids.is_empty() { + return Ok(()); + } + + let mut guard = self.state.lock().unwrap(); + + let maybe_current_upstreams = guard.upstream_by_downstream.get_mut(downstream_dataset_id); + if let Some(current_upstreams) = maybe_current_upstreams { + let some_missing = obsolete_upstream_dataset_ids + .iter() + .any(|id| !current_upstreams.contains(*id)); + if some_missing { + return Err(RemoveDependenciesError::NotFound( + RemoveDependencyMissingError { + downstream_dataset_id: downstream_dataset_id.clone(), + }, + )); + } + + for obsolete_upstream_id in obsolete_upstream_dataset_ids { + current_upstreams.remove(*obsolete_upstream_id); + } + + for obsolete_upstream_id in obsolete_upstream_dataset_ids { + if let Some(downstreams) = + guard.downstream_by_upstream.get_mut(*obsolete_upstream_id) + { + downstreams.remove(downstream_dataset_id); + } + } + } else { + return Err(RemoveDependenciesError::NotFound( + RemoveDependencyMissingError { + downstream_dataset_id: downstream_dataset_id.clone(), + }, + )); + } + + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl DatasetEntryRemovalListener for InMemoryDatasetDependencyRepository { + async fn on_dataset_entry_removed(&self, dataset_id: &DatasetID) -> Result<(), InternalError> { + let mut guard = self.state.lock().unwrap(); + + if let Some(upstreams) = guard.upstream_by_downstream.remove(dataset_id) { + for upstream_dataset_id in upstreams { + if let Some(downstreams) = + guard.downstream_by_upstream.get_mut(&upstream_dataset_id) + { + downstreams.remove(dataset_id); + } + } + } + + if let Some(downstreams) = guard.downstream_by_upstream.remove(dataset_id) { + for downstream_dataset_id in downstreams { + if let Some(upstreams) = + guard.upstream_by_downstream.get_mut(&downstream_dataset_id) + { + upstreams.remove(dataset_id); + } + } + } + + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/datasets/inmem/src/repos/inmem_dataset_env_var_repository.rs b/src/infra/datasets/inmem/src/repos/inmem_dataset_env_var_repository.rs index 8bb6bfccf..65bd9719a 100644 --- a/src/infra/datasets/inmem/src/repos/inmem_dataset_env_var_repository.rs +++ b/src/infra/datasets/inmem/src/repos/inmem_dataset_env_var_repository.rs @@ -13,6 +13,7 @@ use std::sync::{Arc, Mutex}; use database_common::PaginationOpts; use dill::*; +use internal_error::InternalError; use opendatafabric::DatasetID; use uuid::Uuid; @@ -47,6 +48,7 @@ impl State { #[component(pub)] #[interface(dyn DatasetEnvVarRepository)] +#[interface(dyn DatasetEntryRemovalListener)] #[scope(Singleton)] impl InMemoryDatasetEnvVarRepository { pub fn new() -> Self { @@ -228,3 +230,22 @@ impl DatasetEnvVarRepository for InMemoryDatasetEnvVarRepository { )); } } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl DatasetEntryRemovalListener for InMemoryDatasetEnvVarRepository { + async fn on_dataset_entry_removed(&self, dataset_id: &DatasetID) -> Result<(), InternalError> { + let mut guard = self.state.lock().unwrap(); + + if let Some(env_var_ids) = guard.dataset_env_var_ids_by_dataset_id.remove(dataset_id) { + for env_var_id in env_var_ids { + if let Some(env_var) = guard.dataset_env_vars_by_ids.remove(&env_var_id) { + guard.dataset_env_var_ids_by_keys.remove(&env_var.key); + } + } + } + + Ok(()) + } +} diff --git a/src/infra/datasets/inmem/src/repos/inmem_dateset_entry_repository.rs b/src/infra/datasets/inmem/src/repos/inmem_dateset_entry_repository.rs index a42dce5ef..26368dc56 100644 --- a/src/infra/datasets/inmem/src/repos/inmem_dateset_entry_repository.rs +++ b/src/infra/datasets/inmem/src/repos/inmem_dateset_entry_repository.rs @@ -12,7 +12,7 @@ use std::sync::Arc; use database_common::PaginationOpts; use dill::*; -use internal_error::InternalError; +use internal_error::{InternalError, ResultIntoInternal}; use kamu_datasets::*; use opendatafabric::{AccountID, DatasetID, DatasetName}; use tokio::sync::RwLock; @@ -39,6 +39,7 @@ impl State { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub struct InMemoryDatasetEntryRepository { + listeners: Vec>, state: Arc>, } @@ -46,8 +47,9 @@ pub struct InMemoryDatasetEntryRepository { #[interface(dyn DatasetEntryRepository)] #[scope(Singleton)] impl InMemoryDatasetEntryRepository { - pub fn new() -> Self { + pub fn new(listeners: Vec>) -> Self { Self { + listeners, state: Arc::new(RwLock::new(State::new())), } } @@ -259,18 +261,27 @@ impl DatasetEntryRepository for InMemoryDatasetEntryRepository { &self, dataset_id: &DatasetID, ) -> Result<(), DeleteEntryDatasetError> { - let mut writable_state = self.state.write().await; + { + let mut writable_state = self.state.write().await; + + let maybe_removed_entry = writable_state.rows.remove(dataset_id); + if let Some(removed_entry) = maybe_removed_entry { + writable_state.rows_by_name.remove(&removed_entry.name); + writable_state + .rows_by_owner + .get_mut(&removed_entry.owner_id) + .unwrap() + .remove(&removed_entry.id); + } else { + return Err(DatasetEntryNotFoundError::new(dataset_id.clone()).into()); + } + } - let maybe_removed_entry = writable_state.rows.remove(dataset_id); - if let Some(removed_entry) = maybe_removed_entry { - writable_state.rows_by_name.remove(&removed_entry.name); - writable_state - .rows_by_owner - .get_mut(&removed_entry.owner_id) - .unwrap() - .remove(&removed_entry.id); - } else { - return Err(DatasetEntryNotFoundError::new(dataset_id.clone()).into()); + for listener in &self.listeners { + listener + .on_dataset_entry_removed(dataset_id) + .await + .int_err()?; } Ok(()) diff --git a/src/infra/datasets/inmem/src/repos/mod.rs b/src/infra/datasets/inmem/src/repos/mod.rs index 3d6a04f4d..51b94594f 100644 --- a/src/infra/datasets/inmem/src/repos/mod.rs +++ b/src/infra/datasets/inmem/src/repos/mod.rs @@ -7,8 +7,10 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +mod inmem_dataset_dependency_repository; mod inmem_dataset_env_var_repository; mod inmem_dateset_entry_repository; +pub use inmem_dataset_dependency_repository::*; pub use inmem_dataset_env_var_repository::*; pub use inmem_dateset_entry_repository::*; diff --git a/src/infra/datasets/inmem/tests/repos/mod.rs b/src/infra/datasets/inmem/tests/repos/mod.rs index 411ed2912..c0d5e3e7f 100644 --- a/src/infra/datasets/inmem/tests/repos/mod.rs +++ b/src/infra/datasets/inmem/tests/repos/mod.rs @@ -7,5 +7,6 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +mod test_inmem_dataset_dependency_repository; mod test_inmem_dataset_entry_repository; mod test_inmem_dataset_env_var_repository; diff --git a/src/infra/datasets/inmem/tests/repos/test_inmem_dataset_dependency_repository.rs b/src/infra/datasets/inmem/tests/repos/test_inmem_dataset_dependency_repository.rs new file mode 100644 index 000000000..a49d1a867 --- /dev/null +++ b/src/infra/datasets/inmem/tests/repos/test_inmem_dataset_dependency_repository.rs @@ -0,0 +1,116 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use database_common_macros::database_transactional_test; +use dill::{Catalog, CatalogBuilder}; +use kamu_accounts_inmem::InMemoryAccountRepository; +use kamu_datasets_inmem::{InMemoryDatasetDependencyRepository, InMemoryDatasetEntryRepository}; +use kamu_datasets_repo_tests::dataset_dependency_repo; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = inmem, + fixture = dataset_dependency_repo::test_crud_single_dependency, + harness = InMemoryDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = inmem, + fixture = dataset_dependency_repo::test_several_unrelated_dependencies, + harness = InMemoryDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = inmem, + fixture = dataset_dependency_repo::test_dependency_chain, + harness = InMemoryDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = inmem, + fixture = dataset_dependency_repo::test_dependency_fanins, + harness = InMemoryDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = inmem, + fixture = dataset_dependency_repo::test_dependency_fanouts, + harness = InMemoryDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = inmem, + fixture = dataset_dependency_repo::test_add_duplicate_dependency, + harness = InMemoryDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = inmem, + fixture = dataset_dependency_repo::test_remove_dependency, + harness = InMemoryDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = inmem, + fixture = dataset_dependency_repo::test_remove_missing_dependency, + harness = InMemoryDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = inmem, + fixture = dataset_dependency_repo::test_remove_all_dataset_dependencies, + harness = InMemoryDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = inmem, + fixture = dataset_dependency_repo::test_remove_orphan_dependencies, + harness = InMemoryDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +struct InMemoryDatasetDependencyRepositoryHarness { + catalog: Catalog, +} + +impl InMemoryDatasetDependencyRepositoryHarness { + pub fn new() -> Self { + let mut catalog_builder = CatalogBuilder::new(); + + catalog_builder.add::(); + catalog_builder.add::(); + catalog_builder.add::(); + + Self { + catalog: catalog_builder.build(), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/datasets/inmem/tests/repos/test_inmem_dataset_env_var_repository.rs b/src/infra/datasets/inmem/tests/repos/test_inmem_dataset_env_var_repository.rs index 4f2e2e9cf..466385806 100644 --- a/src/infra/datasets/inmem/tests/repos/test_inmem_dataset_env_var_repository.rs +++ b/src/infra/datasets/inmem/tests/repos/test_inmem_dataset_env_var_repository.rs @@ -9,7 +9,8 @@ use database_common_macros::database_transactional_test; use dill::{Catalog, CatalogBuilder}; -use kamu_datasets_inmem::InMemoryDatasetEnvVarRepository; +use kamu_accounts_inmem::InMemoryAccountRepository; +use kamu_datasets_inmem::{InMemoryDatasetEntryRepository, InMemoryDatasetEnvVarRepository}; use kamu_datasets_repo_tests::dataset_env_var_repo; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -54,6 +55,14 @@ database_transactional_test!( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +database_transactional_test!( + storage = inmem, + fixture = dataset_env_var_repo::test_delete_all_dataset_env_vars, + harness = InMemoryDatasetEnvVarRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct InMemoryDatasetEnvVarRepositoryHarness { catalog: Catalog, } @@ -61,6 +70,8 @@ struct InMemoryDatasetEnvVarRepositoryHarness { impl InMemoryDatasetEnvVarRepositoryHarness { pub fn new() -> Self { let mut catalog_builder = CatalogBuilder::new(); + catalog_builder.add::(); + catalog_builder.add::(); catalog_builder.add::(); Self { diff --git a/src/infra/datasets/postgres/.sqlx/query-00c7d1b42566c90d6752f442fef7b2b12465ab511b6f53d28289cf518fcfd748.json b/src/infra/datasets/postgres/.sqlx/query-00c7d1b42566c90d6752f442fef7b2b12465ab511b6f53d28289cf518fcfd748.json new file mode 100644 index 000000000..a9144722f --- /dev/null +++ b/src/infra/datasets/postgres/.sqlx/query-00c7d1b42566c90d6752f442fef7b2b12465ab511b6f53d28289cf518fcfd748.json @@ -0,0 +1,14 @@ +{ + "db_name": "PostgreSQL", + "query": "\n DELETE FROM dataset_dependencies WHERE downstream_dataset_id = $1 OR upstream_dataset_id = $1\n ", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [] + }, + "hash": "00c7d1b42566c90d6752f442fef7b2b12465ab511b6f53d28289cf518fcfd748" +} diff --git a/src/infra/datasets/postgres/.sqlx/query-2cff2b63ca18bba00390b2b61ec90170110f38925697e5898b1fc9d7e7f91b39.json b/src/infra/datasets/postgres/.sqlx/query-2cff2b63ca18bba00390b2b61ec90170110f38925697e5898b1fc9d7e7f91b39.json new file mode 100644 index 000000000..4afc3e124 --- /dev/null +++ b/src/infra/datasets/postgres/.sqlx/query-2cff2b63ca18bba00390b2b61ec90170110f38925697e5898b1fc9d7e7f91b39.json @@ -0,0 +1,20 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT EXISTS (SELECT * FROM dataset_dependencies LIMIT 1) as has_data\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "has_data", + "type_info": "Bool" + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + null + ] + }, + "hash": "2cff2b63ca18bba00390b2b61ec90170110f38925697e5898b1fc9d7e7f91b39" +} diff --git a/src/infra/datasets/postgres/.sqlx/query-2d0e3957bf855f14108200effab39e6d34070193b15e7dd0ec3ccca710ec9b55.json b/src/infra/datasets/postgres/.sqlx/query-2d0e3957bf855f14108200effab39e6d34070193b15e7dd0ec3ccca710ec9b55.json new file mode 100644 index 000000000..813b00c35 --- /dev/null +++ b/src/infra/datasets/postgres/.sqlx/query-2d0e3957bf855f14108200effab39e6d34070193b15e7dd0ec3ccca710ec9b55.json @@ -0,0 +1,15 @@ +{ + "db_name": "PostgreSQL", + "query": "\n DELETE FROM dataset_dependencies WHERE downstream_dataset_id = $1 AND upstream_dataset_id = ANY($2)\n ", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Text", + "TextArray" + ] + }, + "nullable": [] + }, + "hash": "2d0e3957bf855f14108200effab39e6d34070193b15e7dd0ec3ccca710ec9b55" +} diff --git a/src/infra/datasets/postgres/.sqlx/query-c1a820bf91e3efcba1d0470753a37f66a76b0af3c4a999d0e37710a89679b310.json b/src/infra/datasets/postgres/.sqlx/query-c1a820bf91e3efcba1d0470753a37f66a76b0af3c4a999d0e37710a89679b310.json new file mode 100644 index 000000000..d4f11153c --- /dev/null +++ b/src/infra/datasets/postgres/.sqlx/query-c1a820bf91e3efcba1d0470753a37f66a76b0af3c4a999d0e37710a89679b310.json @@ -0,0 +1,26 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT\n downstream_dataset_id as \"downstream_dataset_id: _\",\n upstream_dataset_id as \"upstream_dataset_id: _\"\n FROM dataset_dependencies\n ORDER BY downstream_dataset_id, upstream_dataset_id\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "downstream_dataset_id: _", + "type_info": "Varchar" + }, + { + "ordinal": 1, + "name": "upstream_dataset_id: _", + "type_info": "Varchar" + } + ], + "parameters": { + "Left": [] + }, + "nullable": [ + false, + false + ] + }, + "hash": "c1a820bf91e3efcba1d0470753a37f66a76b0af3c4a999d0e37710a89679b310" +} diff --git a/src/infra/datasets/postgres/.sqlx/query-fe344c04ab30051d00929bf196f38f73eb6ea9ee52ec69c97343683c28757e4a.json b/src/infra/datasets/postgres/.sqlx/query-fe344c04ab30051d00929bf196f38f73eb6ea9ee52ec69c97343683c28757e4a.json new file mode 100644 index 000000000..28832a50c --- /dev/null +++ b/src/infra/datasets/postgres/.sqlx/query-fe344c04ab30051d00929bf196f38f73eb6ea9ee52ec69c97343683c28757e4a.json @@ -0,0 +1,14 @@ +{ + "db_name": "PostgreSQL", + "query": "\n DELETE FROM dataset_entries WHERE dataset_id = $1\n ", + "describe": { + "columns": [], + "parameters": { + "Left": [ + "Text" + ] + }, + "nullable": [] + }, + "hash": "fe344c04ab30051d00929bf196f38f73eb6ea9ee52ec69c97343683c28757e4a" +} diff --git a/src/infra/datasets/postgres/src/repos/mod.rs b/src/infra/datasets/postgres/src/repos/mod.rs index 73103b0cc..1e69a08b9 100644 --- a/src/infra/datasets/postgres/src/repos/mod.rs +++ b/src/infra/datasets/postgres/src/repos/mod.rs @@ -7,8 +7,10 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +mod postgres_dataset_dependency_repository; mod postgres_dataset_entry_repository; mod postgres_dataset_env_var_repository; +pub use postgres_dataset_dependency_repository::*; pub use postgres_dataset_entry_repository::*; pub use postgres_dataset_env_var_repository::*; diff --git a/src/infra/datasets/postgres/src/repos/postgres_dataset_dependency_repository.rs b/src/infra/datasets/postgres/src/repos/postgres_dataset_dependency_repository.rs new file mode 100644 index 000000000..876a6a324 --- /dev/null +++ b/src/infra/datasets/postgres/src/repos/postgres_dataset_dependency_repository.rs @@ -0,0 +1,211 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use database_common::{TransactionRef, TransactionRefT}; +use dill::{component, interface}; +use internal_error::{ErrorIntoInternal, InternalError, ResultIntoInternal}; +use kamu_datasets::*; +use opendatafabric::DatasetID; +use sqlx::{Postgres, QueryBuilder}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct PostgresDatasetDependencyRepository { + transaction: TransactionRefT, +} + +#[component(pub)] +#[interface(dyn DatasetDependencyRepository)] +#[interface(dyn DatasetEntryRemovalListener)] +impl PostgresDatasetDependencyRepository { + pub fn new(transaction: TransactionRef) -> Self { + Self { + transaction: transaction.into(), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl DatasetDependencyRepository for PostgresDatasetDependencyRepository { + async fn stores_any_dependencies(&self) -> Result { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + + let has_data = sqlx::query_scalar!( + r#" + SELECT EXISTS (SELECT * FROM dataset_dependencies LIMIT 1) as has_data + "#, + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + Ok(has_data.unwrap_or(false)) + } + + fn list_all_dependencies(&self) -> DatasetDependenciesIDStream { + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let mut query_stream = sqlx::query_as!( + DatasetDependencyEntryRowModel, + r#" + SELECT + downstream_dataset_id as "downstream_dataset_id: _", + upstream_dataset_id as "upstream_dataset_id: _" + FROM dataset_dependencies + ORDER BY downstream_dataset_id, upstream_dataset_id + "#, + ) + .fetch(connection_mut) + .map_err(ErrorIntoInternal::int_err); + + use futures::TryStreamExt; + + let mut maybe_last_downstream_id: Option = None; + let mut current_upstreams = Vec::new(); + + while let Some(entry) = query_stream.try_next().await? { + if let Some(last_downstream_id) = &maybe_last_downstream_id { + if *last_downstream_id == entry.downstream_dataset_id { + current_upstreams.push(entry.upstream_dataset_id); + continue; + } + + yield Ok(DatasetDependencies { + downstream_dataset_id: last_downstream_id.clone(), + upstream_dataset_ids: current_upstreams, + }); + + current_upstreams = Vec::new(); + } + + maybe_last_downstream_id = Some(entry.downstream_dataset_id); + current_upstreams.push(entry.upstream_dataset_id); + } + + if !current_upstreams.is_empty() { + yield Ok(DatasetDependencies { + downstream_dataset_id: maybe_last_downstream_id.expect("last downstream id to be present"), + upstream_dataset_ids: current_upstreams, + }); + } + }) + } + + async fn add_upstream_dependencies( + &self, + downstream_dataset_id: &DatasetID, + new_upstream_dataset_ids: &[&DatasetID], + ) -> Result<(), AddDependenciesError> { + if new_upstream_dataset_ids.is_empty() { + return Ok(()); + } + + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let mut query_builder = QueryBuilder::::new( + r#" + INSERT INTO dataset_dependencies(downstream_dataset_id, upstream_dataset_id) + "#, + ); + + query_builder.push_values(new_upstream_dataset_ids, |mut b, upsteam_dataset_id| { + b.push_bind(downstream_dataset_id.as_did_str().to_string()); + b.push_bind(upsteam_dataset_id.as_did_str().to_string()); + }); + + let query_result = query_builder.build().execute(connection_mut).await; + if let Err(e) = query_result { + return Err(match e { + sqlx::Error::Database(e) if e.is_unique_violation() => { + AddDependencyDuplicateError { + downstream_dataset_id: downstream_dataset_id.clone(), + } + .into() + } + _ => AddDependenciesError::Internal(e.int_err()), + }); + } + + Ok(()) + } + + async fn remove_upstream_dependencies( + &self, + downstream_dataset_id: &DatasetID, + obsolete_upstream_dataset_ids: &[&DatasetID], + ) -> Result<(), RemoveDependenciesError> { + if obsolete_upstream_dataset_ids.is_empty() { + return Ok(()); + } + + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + + let stack_downstream_dataset_id = downstream_dataset_id.as_did_str().to_stack_string(); + let upstream_dataset_ids: Vec<_> = obsolete_upstream_dataset_ids + .iter() + .map(|id| id.as_did_str().to_string()) + .collect(); + + let delete_result = sqlx::query!( + r#" + DELETE FROM dataset_dependencies WHERE downstream_dataset_id = $1 AND upstream_dataset_id = ANY($2) + "#, + stack_downstream_dataset_id.as_str(), + &upstream_dataset_ids, + ) + .execute(&mut *connection_mut) + .await + .int_err()?; + + if delete_result.rows_affected() == 0 { + return Err(RemoveDependencyMissingError { + downstream_dataset_id: downstream_dataset_id.clone(), + } + .into()); + } + + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl DatasetEntryRemovalListener for PostgresDatasetDependencyRepository { + async fn on_dataset_entry_removed(&self, dataset_id: &DatasetID) -> Result<(), InternalError> { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + + let stack_dataset_id = dataset_id.as_did_str().to_stack_string(); + + sqlx::query!( + r#" + DELETE FROM dataset_dependencies WHERE downstream_dataset_id = $1 OR upstream_dataset_id = $1 + "#, + stack_dataset_id.as_str(), + ) + .execute(&mut *connection_mut) + .await + .int_err()?; + + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/datasets/postgres/src/repos/postgres_dataset_entry_repository.rs b/src/infra/datasets/postgres/src/repos/postgres_dataset_entry_repository.rs index abb5d46f4..cdc842cf7 100644 --- a/src/infra/datasets/postgres/src/repos/postgres_dataset_entry_repository.rs +++ b/src/infra/datasets/postgres/src/repos/postgres_dataset_entry_repository.rs @@ -8,6 +8,7 @@ // by the Apache License, Version 2.0. use std::collections::HashSet; +use std::sync::Arc; use database_common::{PaginationOpts, TransactionRef, TransactionRefT}; use dill::{component, interface}; @@ -19,14 +20,19 @@ use opendatafabric::{AccountID, DatasetID, DatasetName}; pub struct PostgresDatasetEntryRepository { transaction: TransactionRefT, + listeners: Vec>, } #[component(pub)] #[interface(dyn DatasetEntryRepository)] impl PostgresDatasetEntryRepository { - pub fn new(transaction: TransactionRef) -> Self { + pub fn new( + transaction: TransactionRef, + listeners: Vec>, + ) -> Self { Self { transaction: transaction.into(), + listeners, } } } @@ -155,7 +161,10 @@ impl DatasetEntryRepository for PostgresDatasetEntryRepository { let connection_mut = tr.connection_mut().await?; - let dataset_ids_search: Vec<_> = dataset_ids.iter().map(ToString::to_string).collect(); + let dataset_ids_search = dataset_ids + .iter() + .map(ToString::to_string) + .collect::>(); let resolved_entries = sqlx::query_as!( DatasetEntryRowModel, @@ -343,26 +352,33 @@ impl DatasetEntryRepository for PostgresDatasetEntryRepository { &self, dataset_id: &DatasetID, ) -> Result<(), DeleteEntryDatasetError> { - let mut tr = self.transaction.lock().await; + { + let mut tr = self.transaction.lock().await; - let connection_mut = tr.connection_mut().await?; + let connection_mut = tr.connection_mut().await?; - let stack_dataset_id = dataset_id.as_did_str().to_stack_string(); + let stack_dataset_id = dataset_id.as_did_str().to_stack_string(); - let delete_result = sqlx::query!( - r#" - DELETE - FROM dataset_entries - WHERE dataset_id = $1 - "#, - stack_dataset_id.as_str(), - ) - .execute(&mut *connection_mut) - .await - .int_err()?; + let delete_result = sqlx::query!( + r#" + DELETE FROM dataset_entries WHERE dataset_id = $1 + "#, + stack_dataset_id.as_str(), + ) + .execute(&mut *connection_mut) + .await + .int_err()?; - if delete_result.rows_affected() == 0 { - return Err(DatasetEntryNotFoundError::new(dataset_id.clone()).into()); + if delete_result.rows_affected() == 0 { + return Err(DatasetEntryNotFoundError::new(dataset_id.clone()).into()); + } + } + + for listener in &self.listeners { + listener + .on_dataset_entry_removed(dataset_id) + .await + .int_err()?; } Ok(()) diff --git a/src/infra/datasets/postgres/tests/repos/mod.rs b/src/infra/datasets/postgres/tests/repos/mod.rs index 1c53cb288..41c4e5875 100644 --- a/src/infra/datasets/postgres/tests/repos/mod.rs +++ b/src/infra/datasets/postgres/tests/repos/mod.rs @@ -7,5 +7,6 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +mod test_postgres_dataset_dependency_repository; mod test_postgres_dataset_entry_repository; mod test_postgres_dataset_env_var_repository; diff --git a/src/infra/datasets/postgres/tests/repos/test_postgres_dataset_dependency_repository.rs b/src/infra/datasets/postgres/tests/repos/test_postgres_dataset_dependency_repository.rs new file mode 100644 index 000000000..91a0d8adb --- /dev/null +++ b/src/infra/datasets/postgres/tests/repos/test_postgres_dataset_dependency_repository.rs @@ -0,0 +1,121 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use database_common::PostgresTransactionManager; +use database_common_macros::database_transactional_test; +use dill::{Catalog, CatalogBuilder}; +use kamu_accounts_postgres::PostgresAccountRepository; +use kamu_datasets_postgres::{PostgresDatasetDependencyRepository, PostgresDatasetEntryRepository}; +use kamu_datasets_repo_tests::dataset_dependency_repo; +use sqlx::PgPool; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = dataset_dependency_repo::test_crud_single_dependency, + harness = PostgresDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = dataset_dependency_repo::test_several_unrelated_dependencies, + harness = PostgresDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = dataset_dependency_repo::test_dependency_chain, + harness = PostgresDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = dataset_dependency_repo::test_dependency_fanins, + harness = PostgresDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = dataset_dependency_repo::test_dependency_fanouts, + harness = PostgresDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = dataset_dependency_repo::test_add_duplicate_dependency, + harness = PostgresDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = dataset_dependency_repo::test_remove_dependency, + harness = PostgresDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = dataset_dependency_repo::test_remove_missing_dependency, + harness = PostgresDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = dataset_dependency_repo::test_remove_all_dataset_dependencies, + harness = PostgresDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = postgres, + fixture = dataset_dependency_repo::test_remove_orphan_dependencies, + harness = PostgresDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +struct PostgresDatasetDependencyRepositoryHarness { + catalog: Catalog, +} + +impl PostgresDatasetDependencyRepositoryHarness { + pub fn new(pg_pool: PgPool) -> Self { + let mut catalog_builder = CatalogBuilder::new(); + + catalog_builder.add_value(pg_pool); + catalog_builder.add::(); + + catalog_builder.add::(); + catalog_builder.add::(); + catalog_builder.add::(); + + Self { + catalog: catalog_builder.build(), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/datasets/postgres/tests/repos/test_postgres_dataset_entry_repository.rs b/src/infra/datasets/postgres/tests/repos/test_postgres_dataset_entry_repository.rs index 7759310ab..b2efd5ecc 100644 --- a/src/infra/datasets/postgres/tests/repos/test_postgres_dataset_entry_repository.rs +++ b/src/infra/datasets/postgres/tests/repos/test_postgres_dataset_entry_repository.rs @@ -107,6 +107,7 @@ impl PostgresDatasetEntryRepositoryHarness { catalog_builder.add_value(pg_pool); catalog_builder.add::(); + catalog_builder.add::(); catalog_builder.add::(); diff --git a/src/infra/datasets/postgres/tests/repos/test_postgres_dataset_env_var_repository.rs b/src/infra/datasets/postgres/tests/repos/test_postgres_dataset_env_var_repository.rs index e17c33a9c..548d6f432 100644 --- a/src/infra/datasets/postgres/tests/repos/test_postgres_dataset_env_var_repository.rs +++ b/src/infra/datasets/postgres/tests/repos/test_postgres_dataset_env_var_repository.rs @@ -10,7 +10,8 @@ use database_common::PostgresTransactionManager; use database_common_macros::database_transactional_test; use dill::{Catalog, CatalogBuilder}; -use kamu_datasets_postgres::PostgresDatasetEnvVarRepository; +use kamu_accounts_postgres::PostgresAccountRepository; +use kamu_datasets_postgres::{PostgresDatasetEntryRepository, PostgresDatasetEnvVarRepository}; use kamu_datasets_repo_tests::dataset_env_var_repo; use sqlx::PgPool; @@ -56,6 +57,14 @@ database_transactional_test!( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +database_transactional_test!( + storage = postgres, + fixture = dataset_env_var_repo::test_delete_all_dataset_env_vars, + harness = PostgresDatasetEnvVarRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct PostgresDatasetEnvVarRepositoryHarness { catalog: Catalog, } @@ -66,6 +75,9 @@ impl PostgresDatasetEnvVarRepositoryHarness { let mut catalog_builder = CatalogBuilder::new(); catalog_builder.add_value(pg_pool); catalog_builder.add::(); + + catalog_builder.add::(); + catalog_builder.add::(); catalog_builder.add::(); Self { diff --git a/src/infra/datasets/repo-tests/Cargo.toml b/src/infra/datasets/repo-tests/Cargo.toml index 99a46d9f9..264c52a4e 100644 --- a/src/infra/datasets/repo-tests/Cargo.toml +++ b/src/infra/datasets/repo-tests/Cargo.toml @@ -29,6 +29,7 @@ opendatafabric = { workspace = true } chrono = { version = "0.4", default-features = false } dill = "0.9" +itertools = "0.13" futures = "0.3" secrecy = "0.10" uuid = "1" diff --git a/src/infra/datasets/repo-tests/src/dataset_dependencies_repository_test_suite.rs b/src/infra/datasets/repo-tests/src/dataset_dependencies_repository_test_suite.rs new file mode 100644 index 000000000..d9c8c8670 --- /dev/null +++ b/src/infra/datasets/repo-tests/src/dataset_dependencies_repository_test_suite.rs @@ -0,0 +1,597 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::assert_matches::assert_matches; + +use dill::Catalog; +use itertools::{assert_equal, sorted}; +use kamu_accounts::AccountRepository; +use kamu_datasets::*; + +use crate::helpers::*; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_crud_single_dependency(catalog: &Catalog) { + let account_repo = catalog.get_one::().unwrap(); + let dataset_entry_repo = catalog.get_one::().unwrap(); + let dataset_dependency_repo = catalog + .get_one::() + .unwrap(); + + let account = new_account(&account_repo).await; + let entry_foo = new_dataset_entry_with(&account, "foo"); + let entry_bar = new_dataset_entry_with(&account, "bar"); + + for entry in [&entry_foo, &entry_bar] { + dataset_entry_repo.save_dataset_entry(entry).await.unwrap(); + } + + assert_matches!( + dataset_dependency_repo.stores_any_dependencies().await, + Ok(false) + ); + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_bar.id, &[&entry_foo.id]) + .await; + assert_matches!(res, Ok(())); + + assert_matches!( + dataset_dependency_repo.stores_any_dependencies().await, + Ok(true) + ); + + use futures::TryStreamExt; + let dependencies: Vec<_> = dataset_dependency_repo + .list_all_dependencies() + .try_collect() + .await + .unwrap(); + + assert_eq!( + dependencies, + vec![DatasetDependencies { + downstream_dataset_id: entry_bar.id, + upstream_dataset_ids: vec![entry_foo.id] + }] + ); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_several_unrelated_dependencies(catalog: &Catalog) { + let account_repo = catalog.get_one::().unwrap(); + let dataset_entry_repo = catalog.get_one::().unwrap(); + let dataset_dependency_repo = catalog + .get_one::() + .unwrap(); + + let account = new_account(&account_repo).await; + let entry_alpha = new_dataset_entry_with(&account, "alpha"); + let entry_beta = new_dataset_entry_with(&account, "beta"); + + let entry_phi = new_dataset_entry_with(&account, "phi"); + let entry_ksi = new_dataset_entry_with(&account, "ksi"); + + for entry in [&entry_alpha, &entry_beta, &entry_phi, &entry_ksi] { + dataset_entry_repo.save_dataset_entry(entry).await.unwrap(); + } + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_beta.id, &[&entry_alpha.id]) + .await; + assert_matches!(res, Ok(())); + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_ksi.id, &[&entry_phi.id]) + .await; + assert_matches!(res, Ok(())); + + use futures::TryStreamExt; + let dependencies: Vec<_> = dataset_dependency_repo + .list_all_dependencies() + .try_collect() + .await + .unwrap(); + + assert_equal( + sorted(dependencies), + sorted(vec![ + DatasetDependencies { + downstream_dataset_id: entry_beta.id, + upstream_dataset_ids: vec![entry_alpha.id], + }, + DatasetDependencies { + downstream_dataset_id: entry_ksi.id, + upstream_dataset_ids: vec![entry_phi.id], + }, + ]), + ); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dependency_chain(catalog: &Catalog) { + let account_repo = catalog.get_one::().unwrap(); + let dataset_entry_repo = catalog.get_one::().unwrap(); + let dataset_dependency_repo = catalog + .get_one::() + .unwrap(); + + let account = new_account(&account_repo).await; + let entry_foo = new_dataset_entry_with(&account, "foo"); + let entry_bar = new_dataset_entry_with(&account, "bar"); + let entry_baz = new_dataset_entry_with(&account, "baz"); + + for entry in [&entry_foo, &entry_bar, &entry_baz] { + dataset_entry_repo.save_dataset_entry(entry).await.unwrap(); + } + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_bar.id, &[&entry_foo.id]) + .await; + assert_matches!(res, Ok(())); + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_baz.id, &[&entry_bar.id]) + .await; + assert_matches!(res, Ok(())); + + use futures::TryStreamExt; + let dependencies: Vec<_> = dataset_dependency_repo + .list_all_dependencies() + .try_collect() + .await + .unwrap(); + + assert_equal( + sorted(dependencies), + sorted(vec![ + DatasetDependencies { + downstream_dataset_id: entry_bar.id.clone(), + upstream_dataset_ids: vec![entry_foo.id], + }, + DatasetDependencies { + downstream_dataset_id: entry_baz.id, + upstream_dataset_ids: vec![entry_bar.id], + }, + ]), + ); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dependency_fanins(catalog: &Catalog) { + let account_repo = catalog.get_one::().unwrap(); + let dataset_entry_repo = catalog.get_one::().unwrap(); + let dataset_dependency_repo = catalog + .get_one::() + .unwrap(); + + let account = new_account(&account_repo).await; + + let entry_a = new_dataset_entry_with(&account, "a"); + let entry_b = new_dataset_entry_with(&account, "b"); + let entry_c = new_dataset_entry_with(&account, "c"); + let entry_d = new_dataset_entry_with(&account, "d"); + let entry_e = new_dataset_entry_with(&account, "e"); + let entry_abc = new_dataset_entry_with(&account, "abc"); + let entry_de = new_dataset_entry_with(&account, "de"); + let entry_abc_de = new_dataset_entry_with(&account, "abc-de"); + + for entry in [ + &entry_a, + &entry_b, + &entry_c, + &entry_d, + &entry_e, + &entry_abc, + &entry_de, + &entry_abc_de, + ] { + dataset_entry_repo.save_dataset_entry(entry).await.unwrap(); + } + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_abc.id, &[&entry_a.id, &entry_b.id, &entry_c.id]) + .await; + assert_matches!(res, Ok(())); + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_de.id, &[&entry_d.id, &entry_e.id]) + .await; + assert_matches!(res, Ok(())); + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_abc_de.id, &[&entry_abc.id, &entry_de.id]) + .await; + assert_matches!(res, Ok(())); + + use futures::TryStreamExt; + let dependencies: Vec<_> = dataset_dependency_repo + .list_all_dependencies() + .try_collect() + .await + .unwrap(); + + assert_equal( + sorted(dependencies), + sorted(vec![ + DatasetDependencies { + downstream_dataset_id: entry_abc.id.clone(), + upstream_dataset_ids: sorted(vec![entry_a.id, entry_b.id, entry_c.id]).collect(), + }, + DatasetDependencies { + downstream_dataset_id: entry_de.id.clone(), + upstream_dataset_ids: sorted(vec![entry_d.id, entry_e.id]).collect(), + }, + DatasetDependencies { + downstream_dataset_id: entry_abc_de.id, + upstream_dataset_ids: sorted(vec![entry_abc.id, entry_de.id]).collect(), + }, + ]), + ); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_dependency_fanouts(catalog: &Catalog) { + let account_repo = catalog.get_one::().unwrap(); + let dataset_entry_repo = catalog.get_one::().unwrap(); + let dataset_dependency_repo = catalog + .get_one::() + .unwrap(); + + let account = new_account(&account_repo).await; + + let entry_a = new_dataset_entry_with(&account, "a"); + let entry_a1 = new_dataset_entry_with(&account, "a1"); + let entry_a2 = new_dataset_entry_with(&account, "a2"); + let entry_a1_1 = new_dataset_entry_with(&account, "a1_1"); + let entry_a1_2 = new_dataset_entry_with(&account, "a1_2"); + let entry_a2_1 = new_dataset_entry_with(&account, "a2_1"); + let entry_a2_2 = new_dataset_entry_with(&account, "a2_2"); + + for entry in [ + &entry_a, + &entry_a1, + &entry_a2, + &entry_a1_1, + &entry_a1_2, + &entry_a2_1, + &entry_a2_2, + ] { + dataset_entry_repo.save_dataset_entry(entry).await.unwrap(); + } + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_a1.id, &[&entry_a.id]) + .await; + assert_matches!(res, Ok(())); + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_a2.id, &[&entry_a.id]) + .await; + assert_matches!(res, Ok(())); + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_a1_1.id, &[&entry_a1.id]) + .await; + assert_matches!(res, Ok(())); + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_a1_2.id, &[&entry_a1.id]) + .await; + assert_matches!(res, Ok(())); + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_a2_1.id, &[&entry_a2.id]) + .await; + assert_matches!(res, Ok(())); + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_a2_2.id, &[&entry_a2.id]) + .await; + assert_matches!(res, Ok(())); + + use futures::TryStreamExt; + let dependencies: Vec<_> = dataset_dependency_repo + .list_all_dependencies() + .try_collect() + .await + .unwrap(); + + assert_equal( + sorted(dependencies), + sorted(vec![ + DatasetDependencies { + downstream_dataset_id: entry_a1.id.clone(), + upstream_dataset_ids: vec![entry_a.id.clone()], + }, + DatasetDependencies { + downstream_dataset_id: entry_a2.id.clone(), + upstream_dataset_ids: vec![entry_a.id], + }, + DatasetDependencies { + downstream_dataset_id: entry_a1_1.id, + upstream_dataset_ids: vec![entry_a1.id.clone()], + }, + DatasetDependencies { + downstream_dataset_id: entry_a1_2.id, + upstream_dataset_ids: vec![entry_a1.id], + }, + DatasetDependencies { + downstream_dataset_id: entry_a2_1.id, + upstream_dataset_ids: vec![entry_a2.id.clone()], + }, + DatasetDependencies { + downstream_dataset_id: entry_a2_2.id, + upstream_dataset_ids: vec![entry_a2.id], + }, + ]), + ); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_add_duplicate_dependency(catalog: &Catalog) { + let account_repo = catalog.get_one::().unwrap(); + let dataset_entry_repo = catalog.get_one::().unwrap(); + let dataset_dependency_repo = catalog + .get_one::() + .unwrap(); + + let account = new_account(&account_repo).await; + + let entry_foo = new_dataset_entry_with(&account, "foo"); + let entry_bar = new_dataset_entry_with(&account, "bar"); + + for entry in [&entry_foo, &entry_bar] { + dataset_entry_repo.save_dataset_entry(entry).await.unwrap(); + } + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_bar.id, &[&entry_foo.id]) + .await; + assert_matches!(res, Ok(())); + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_bar.id, &[&entry_foo.id]) + .await; + assert_matches!( + res, + Err(AddDependenciesError::Duplicate( + AddDependencyDuplicateError { + downstream_dataset_id + } + )) if downstream_dataset_id == entry_bar.id + ); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_remove_dependency(catalog: &Catalog) { + let account_repo = catalog.get_one::().unwrap(); + let dataset_entry_repo = catalog.get_one::().unwrap(); + let dataset_dependency_repo = catalog + .get_one::() + .unwrap(); + + let account = new_account(&account_repo).await; + + let entry_foo = new_dataset_entry_with(&account, "foo"); + let entry_bar = new_dataset_entry_with(&account, "bar"); + let entry_baz = new_dataset_entry_with(&account, "baz"); + + for entry in [&entry_foo, &entry_bar, &entry_baz] { + dataset_entry_repo.save_dataset_entry(entry).await.unwrap(); + } + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_baz.id, &[&entry_foo.id, &entry_bar.id]) + .await; + assert_matches!(res, Ok(())); + + use futures::TryStreamExt; + let dependencies: Vec<_> = dataset_dependency_repo + .list_all_dependencies() + .try_collect() + .await + .unwrap(); + + assert_equal( + dependencies, + vec![DatasetDependencies { + downstream_dataset_id: entry_baz.id.clone(), + upstream_dataset_ids: sorted(vec![entry_foo.id.clone(), entry_bar.id.clone()]) + .collect(), + }], + ); + + let res = dataset_dependency_repo + .remove_upstream_dependencies(&entry_baz.id, &[&entry_foo.id]) + .await; + assert_matches!(res, Ok(())); + + let dependencies: Vec<_> = dataset_dependency_repo + .list_all_dependencies() + .try_collect() + .await + .unwrap(); + + assert_equal( + dependencies, + vec![DatasetDependencies { + downstream_dataset_id: entry_baz.id.clone(), + upstream_dataset_ids: vec![entry_bar.id.clone()], + }], + ); + + let res = dataset_dependency_repo + .remove_upstream_dependencies(&entry_baz.id, &[&entry_bar.id]) + .await; + assert_matches!(res, Ok(())); + + let dependencies: Vec<_> = dataset_dependency_repo + .list_all_dependencies() + .try_collect() + .await + .unwrap(); + + assert!(dependencies.is_empty()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_remove_missing_dependency(catalog: &Catalog) { + let account_repo = catalog.get_one::().unwrap(); + let dataset_entry_repo = catalog.get_one::().unwrap(); + let dataset_dependency_repo = catalog + .get_one::() + .unwrap(); + + let account = new_account(&account_repo).await; + + let entry_foo = new_dataset_entry_with(&account, "foo"); + let entry_bar = new_dataset_entry_with(&account, "bar"); + let entry_baz = new_dataset_entry_with(&account, "baz"); + + for entry in [&entry_foo, &entry_bar, &entry_baz] { + dataset_entry_repo.save_dataset_entry(entry).await.unwrap(); + } + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_bar.id, &[&entry_foo.id]) + .await; + assert_matches!(res, Ok(())); + + let res = dataset_dependency_repo + .remove_upstream_dependencies(&entry_baz.id, &[&entry_foo.id]) + .await; + assert_matches!(res, Err(RemoveDependenciesError::NotFound(RemoveDependencyMissingError { + downstream_dataset_id + })) if downstream_dataset_id == entry_baz.id); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_remove_all_dataset_dependencies(catalog: &Catalog) { + let account_repo = catalog.get_one::().unwrap(); + let dataset_entry_repo = catalog.get_one::().unwrap(); + let dataset_dependency_repo = catalog + .get_one::() + .unwrap(); + + let account = new_account(&account_repo).await; + + let entry_foo = new_dataset_entry_with(&account, "foo"); + let entry_bar = new_dataset_entry_with(&account, "bar"); + let entry_baz = new_dataset_entry_with(&account, "baz"); + + for entry in [&entry_foo, &entry_bar, &entry_baz] { + dataset_entry_repo.save_dataset_entry(entry).await.unwrap(); + } + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_bar.id, &[&entry_foo.id]) + .await; + assert_matches!(res, Ok(())); + + let res = dataset_dependency_repo + .add_upstream_dependencies(&entry_baz.id, &[&entry_foo.id, &entry_bar.id]) + .await; + assert_matches!(res, Ok(())); + + use futures::TryStreamExt; + let dependencies: Vec<_> = dataset_dependency_repo + .list_all_dependencies() + .try_collect() + .await + .unwrap(); + + assert_equal( + sorted(dependencies), + sorted(vec![ + DatasetDependencies { + downstream_dataset_id: entry_bar.id.clone(), + upstream_dataset_ids: vec![entry_foo.id.clone()], + }, + DatasetDependencies { + downstream_dataset_id: entry_baz.id.clone(), + upstream_dataset_ids: sorted(vec![entry_foo.id.clone(), entry_bar.id.clone()]) + .collect(), + }, + ]), + ); + + ///// + + let res = dataset_entry_repo.delete_dataset_entry(&entry_foo.id).await; + assert_matches!(res, Ok(())); + + let dependencies: Vec<_> = dataset_dependency_repo + .list_all_dependencies() + .try_collect() + .await + .unwrap(); + + assert_equal( + dependencies, + vec![DatasetDependencies { + downstream_dataset_id: entry_baz.id.clone(), + upstream_dataset_ids: vec![entry_bar.id.clone()], + }], + ); + + ///// + + let res = dataset_entry_repo.delete_dataset_entry(&entry_baz.id).await; + assert_matches!(res, Ok(())); + + let dependencies: Vec<_> = dataset_dependency_repo + .list_all_dependencies() + .try_collect() + .await + .unwrap(); + + assert!(dependencies.is_empty()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_remove_orphan_dependencies(catalog: &Catalog) { + let account_repo = catalog.get_one::().unwrap(); + let dataset_entry_repo = catalog.get_one::().unwrap(); + let dataset_dependency_repo = catalog + .get_one::() + .unwrap(); + + let account = new_account(&account_repo).await; + + let entry_foo = new_dataset_entry_with(&account, "foo"); + dataset_entry_repo + .save_dataset_entry(&entry_foo) + .await + .unwrap(); + + let res = dataset_entry_repo.delete_dataset_entry(&entry_foo.id).await; + assert_matches!(res, Ok(())); + + use futures::TryStreamExt; + let dependencies: Vec<_> = dataset_dependency_repo + .list_all_dependencies() + .try_collect() + .await + .unwrap(); + + assert!(dependencies.is_empty()); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/datasets/repo-tests/src/dataset_entry_repository_test_suite.rs b/src/infra/datasets/repo-tests/src/dataset_entry_repository_test_suite.rs index 86b2c4bf7..913b69243 100644 --- a/src/infra/datasets/repo-tests/src/dataset_entry_repository_test_suite.rs +++ b/src/infra/datasets/repo-tests/src/dataset_entry_repository_test_suite.rs @@ -8,15 +8,12 @@ // by the Apache License, Version 2.0. use std::assert_matches::assert_matches; -use std::sync::Arc; -use chrono::{SubsecRound, Utc}; use database_common::PaginationOpts; use dill::Catalog; -use kamu_accounts::{Account, AccountRepository, AccountType}; +use kamu_accounts::AccountRepository; use kamu_datasets::{ DatasetEntriesResolution, - DatasetEntry, DatasetEntryByNameNotFoundError, DatasetEntryNotFoundError, DatasetEntryRepository, @@ -26,7 +23,9 @@ use kamu_datasets::{ SaveDatasetEntryError, UpdateDatasetEntryNameError, }; -use opendatafabric::{AccountID, AccountName, DatasetID, DatasetName}; +use opendatafabric::{DatasetID, DatasetName}; + +use crate::helpers::*; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -677,49 +676,3 @@ pub async fn test_delete_dataset_entry(catalog: &Catalog) { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Helpers -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -async fn new_account_with_name( - account_repo: &Arc, - account_name: &str, -) -> Account { - let (_, id) = AccountID::new_generated_ed25519(); - - let account = Account { - id, - account_name: AccountName::new_unchecked(account_name), - email: None, - display_name: String::new(), - account_type: AccountType::User, - avatar_url: None, - registered_at: Default::default(), - is_admin: false, - provider: "unit-test-provider".to_string(), - provider_identity_key: account_name.to_string(), - }; - let create_res = account_repo.create_account(&account).await; - - assert_matches!(create_res, Ok(_)); - - account -} - -async fn new_account(account_repo: &Arc) -> Account { - new_account_with_name(account_repo, "unit-test-user").await -} - -fn new_dataset_entry_with(owner: &Account, dataset_name: &str) -> DatasetEntry { - let (_, dataset_id) = DatasetID::new_generated_ed25519(); - let owner_id = owner.id.clone(); - let dataset_alias = DatasetName::new_unchecked(dataset_name); - let created_at = Utc::now().round_subsecs(6); - - DatasetEntry::new(dataset_id, owner_id, dataset_alias, created_at) -} - -fn new_dataset_entry(owner: &Account) -> DatasetEntry { - new_dataset_entry_with(owner, "dataset") -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/datasets/repo-tests/src/dataset_env_var_repository_test_suite.rs b/src/infra/datasets/repo-tests/src/dataset_env_var_repository_test_suite.rs index af1262d8b..ba8510411 100644 --- a/src/infra/datasets/repo-tests/src/dataset_env_var_repository_test_suite.rs +++ b/src/infra/datasets/repo-tests/src/dataset_env_var_repository_test_suite.rs @@ -12,7 +12,9 @@ use std::assert_matches::assert_matches; use chrono::{SubsecRound, Utc}; use database_common::PaginationOpts; use dill::Catalog; +use kamu_accounts::AccountRepository; use kamu_datasets::{ + DatasetEntryRepository, DatasetEnvVar, DatasetEnvVarRepository, DatasetEnvVarValue, @@ -25,6 +27,8 @@ use opendatafabric::DatasetID; use secrecy::SecretString; use uuid::Uuid; +use crate::helpers::{new_account, new_dataset_entry_with}; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// pub async fn test_missing_dataset_env_var_not_found(catalog: &Catalog) { @@ -62,17 +66,26 @@ pub async fn test_missing_dataset_env_var_not_found(catalog: &Catalog) { pub async fn test_insert_and_get_dataset_env_var(catalog: &Catalog) { let dataset_env_var_repo = catalog.get_one::().unwrap(); + let dataset_entry_repo = catalog.get_one::().unwrap(); + let account_repo = catalog.get_one::().unwrap(); + let dataset_env_var_key = "foo"; let dataset_env_var_key_string = "foo_value".to_string(); let dataset_env_var_value = DatasetEnvVarValue::Secret(SecretString::from(dataset_env_var_key_string.clone())); - let dataset_id = DatasetID::new_seeded_ed25519(b"foo"); + + let account = new_account(&account_repo).await; + let entry_foo = new_dataset_entry_with(&account, "foo"); + dataset_entry_repo + .save_dataset_entry(&entry_foo) + .await + .unwrap(); let new_dataset_env_var = DatasetEnvVar::new( dataset_env_var_key, Utc::now().round_subsecs(6), &dataset_env_var_value, - &dataset_id, + &entry_foo.id, SAMPLE_DATASET_ENV_VAR_ENCRYPTION_KEY, ) .unwrap(); @@ -89,14 +102,14 @@ pub async fn test_insert_and_get_dataset_env_var(catalog: &Catalog) { assert_eq!(db_dataset_env_var, new_dataset_env_var); let db_dataset_env_var = dataset_env_var_repo - .get_dataset_env_var_by_key_and_dataset_id(dataset_env_var_key, &dataset_id) + .get_dataset_env_var_by_key_and_dataset_id(dataset_env_var_key, &entry_foo.id) .await .unwrap(); assert_eq!(db_dataset_env_var, new_dataset_env_var); let db_dataset_env_vars = dataset_env_var_repo .get_all_dataset_env_vars_by_dataset_id( - &dataset_id, + &entry_foo.id, &PaginationOpts { offset: 0, limit: 5, @@ -111,18 +124,28 @@ pub async fn test_insert_and_get_dataset_env_var(catalog: &Catalog) { pub async fn test_insert_and_get_multiple_dataset_env_vars(catalog: &Catalog) { let dataset_env_var_repo = catalog.get_one::().unwrap(); + let dataset_entry_repo = catalog.get_one::().unwrap(); + let account_repo = catalog.get_one::().unwrap(); + let secret_dataset_env_var_key = "foo"; let secret_dataset_env_var_key_string = "foo_value".to_string(); let secret_dataset_env_var_value = DatasetEnvVarValue::Secret(SecretString::from( secret_dataset_env_var_key_string.clone(), )); - let dataset_id = DatasetID::new_seeded_ed25519(b"foo"); + + let account = new_account(&account_repo).await; + let entry_foo = new_dataset_entry_with(&account, "foo"); + + dataset_entry_repo + .save_dataset_entry(&entry_foo) + .await + .unwrap(); let new_secret_dataset_env_var = DatasetEnvVar::new( secret_dataset_env_var_key, Utc::now().round_subsecs(6), &secret_dataset_env_var_value, - &dataset_id, + &entry_foo.id, SAMPLE_DATASET_ENV_VAR_ENCRYPTION_KEY, ) .unwrap(); @@ -135,7 +158,7 @@ pub async fn test_insert_and_get_multiple_dataset_env_vars(catalog: &Catalog) { dataset_env_var_key, Utc::now().round_subsecs(6), &dataset_env_var_value, - &dataset_id, + &entry_foo.id, SAMPLE_DATASET_ENV_VAR_ENCRYPTION_KEY, ) .unwrap(); @@ -151,7 +174,7 @@ pub async fn test_insert_and_get_multiple_dataset_env_vars(catalog: &Catalog) { let mut db_dataset_env_vars = dataset_env_var_repo .get_all_dataset_env_vars_by_dataset_id( - &dataset_id, + &entry_foo.id, &PaginationOpts { offset: 0, limit: 5, @@ -167,7 +190,7 @@ pub async fn test_insert_and_get_multiple_dataset_env_vars(catalog: &Catalog) { ); let db_dataset_env_vars_count = dataset_env_var_repo - .get_all_dataset_env_vars_count_by_dataset_id(&dataset_id) + .get_all_dataset_env_vars_count_by_dataset_id(&entry_foo.id) .await .unwrap(); @@ -178,13 +201,22 @@ pub async fn test_insert_and_get_multiple_dataset_env_vars(catalog: &Catalog) { pub async fn test_delete_dataset_env_vars(catalog: &Catalog) { let dataset_env_var_repo = catalog.get_one::().unwrap(); + let dataset_entry_repo = catalog.get_one::().unwrap(); + let account_repo = catalog.get_one::().unwrap(); + + let account = new_account(&account_repo).await; + let entry_foo = new_dataset_entry_with(&account, "foo"); + + dataset_entry_repo + .save_dataset_entry(&entry_foo) + .await + .unwrap(); - let dataset_id = DatasetID::new_seeded_ed25519(b"foo"); let new_dataset_env_var = DatasetEnvVar::new( "foo", Utc::now().round_subsecs(6), &DatasetEnvVarValue::Regular("foo".to_string()), - &dataset_id, + &entry_foo.id, SAMPLE_DATASET_ENV_VAR_ENCRYPTION_KEY, ) .unwrap(); @@ -192,7 +224,7 @@ pub async fn test_delete_dataset_env_vars(catalog: &Catalog) { "bar", Utc::now().round_subsecs(6), &DatasetEnvVarValue::Regular("bar".to_string()), - &dataset_id, + &entry_foo.id, SAMPLE_DATASET_ENV_VAR_ENCRYPTION_KEY, ) .unwrap(); @@ -219,7 +251,7 @@ pub async fn test_delete_dataset_env_vars(catalog: &Catalog) { let db_dataset_env_vars = dataset_env_var_repo .get_all_dataset_env_vars_by_dataset_id( - &dataset_id, + &entry_foo.id, &PaginationOpts { offset: 0, limit: 5, @@ -235,12 +267,22 @@ pub async fn test_delete_dataset_env_vars(catalog: &Catalog) { pub async fn test_modify_dataset_env_vars(catalog: &Catalog) { let dataset_env_var_repo = catalog.get_one::().unwrap(); + let dataset_entry_repo = catalog.get_one::().unwrap(); + let account_repo = catalog.get_one::().unwrap(); + + let account = new_account(&account_repo).await; + let entry_foo = new_dataset_entry_with(&account, "foo"); + + dataset_entry_repo + .save_dataset_entry(&entry_foo) + .await + .unwrap(); let new_dataset_env_var = DatasetEnvVar::new( "foo", Utc::now().round_subsecs(6), &DatasetEnvVarValue::Regular("foo".to_string()), - &DatasetID::new_seeded_ed25519(b"foo"), + &entry_foo.id, SAMPLE_DATASET_ENV_VAR_ENCRYPTION_KEY, ) .unwrap(); @@ -285,3 +327,84 @@ pub async fn test_modify_dataset_env_vars(catalog: &Catalog) { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub async fn test_delete_all_dataset_env_vars(catalog: &Catalog) { + let dataset_env_var_repo = catalog.get_one::().unwrap(); + let dataset_entry_repo = catalog.get_one::().unwrap(); + let account_repo = catalog.get_one::().unwrap(); + + let account = new_account(&account_repo).await; + let entry_foo = new_dataset_entry_with(&account, "foo"); + + dataset_entry_repo + .save_dataset_entry(&entry_foo) + .await + .unwrap(); + + let new_dataset_env_var = DatasetEnvVar::new( + "foo", + Utc::now().round_subsecs(6), + &DatasetEnvVarValue::Regular("foo".to_string()), + &entry_foo.id, + SAMPLE_DATASET_ENV_VAR_ENCRYPTION_KEY, + ) + .unwrap(); + let new_bar_dataset_env_var = DatasetEnvVar::new( + "bar", + Utc::now().round_subsecs(6), + &DatasetEnvVarValue::Regular("bar".to_string()), + &entry_foo.id, + SAMPLE_DATASET_ENV_VAR_ENCRYPTION_KEY, + ) + .unwrap(); + let save_result = dataset_env_var_repo + .save_dataset_env_var(&new_dataset_env_var) + .await; + assert!(save_result.is_ok()); + let save_result = dataset_env_var_repo + .save_dataset_env_var(&new_bar_dataset_env_var) + .await; + assert!(save_result.is_ok()); + + let db_dataset_env_vars = dataset_env_var_repo + .get_all_dataset_env_vars_by_dataset_id( + &entry_foo.id, + &PaginationOpts { + offset: 0, + limit: 5, + }, + ) + .await + .unwrap(); + + assert_eq!( + db_dataset_env_vars, + vec![new_dataset_env_var.clone(), new_bar_dataset_env_var.clone()] + ); + + let res = dataset_entry_repo.delete_dataset_entry(&entry_foo.id).await; + assert_matches!(res, Ok(())); + + let db_dataset_env_vars = dataset_env_var_repo + .get_all_dataset_env_vars_by_dataset_id( + &entry_foo.id, + &PaginationOpts { + offset: 0, + limit: 5, + }, + ) + .await + .unwrap(); + + assert!(db_dataset_env_vars.is_empty()); + let res = dataset_env_var_repo + .get_dataset_env_var_by_id(&new_bar_dataset_env_var.id) + .await; + assert_matches!(res, Err(GetDatasetEnvVarError::NotFound(_))); + let res = dataset_env_var_repo + .get_dataset_env_var_by_key_and_dataset_id(&new_dataset_env_var.key, &entry_foo.id) + .await; + assert_matches!(res, Err(GetDatasetEnvVarError::NotFound(_))); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/datasets/repo-tests/src/helpers.rs b/src/infra/datasets/repo-tests/src/helpers.rs new file mode 100644 index 000000000..f7a4344bb --- /dev/null +++ b/src/infra/datasets/repo-tests/src/helpers.rs @@ -0,0 +1,64 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::assert_matches::assert_matches; +use std::sync::Arc; + +use chrono::{SubsecRound, Utc}; +use kamu_accounts::{Account, AccountRepository, AccountType}; +use kamu_datasets::DatasetEntry; +use opendatafabric::{AccountID, AccountName, DatasetID, DatasetName}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub(crate) async fn new_account_with_name( + account_repo: &Arc, + account_name: &str, +) -> Account { + let (_, id) = AccountID::new_generated_ed25519(); + + let account = Account { + id, + account_name: AccountName::new_unchecked(account_name), + email: None, + display_name: String::new(), + account_type: AccountType::User, + avatar_url: None, + registered_at: Default::default(), + is_admin: false, + provider: "unit-test-provider".to_string(), + provider_identity_key: account_name.to_string(), + }; + let create_res = account_repo.create_account(&account).await; + + assert_matches!(create_res, Ok(_)); + + account +} + +pub(crate) async fn new_account(account_repo: &Arc) -> Account { + new_account_with_name(account_repo, "unit-test-user").await +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub(crate) fn new_dataset_entry_with(owner: &Account, dataset_name: &str) -> DatasetEntry { + let (_, dataset_id) = DatasetID::new_generated_ed25519(); + let owner_id = owner.id.clone(); + let dataset_alias = DatasetName::new_unchecked(dataset_name); + let created_at = Utc::now().round_subsecs(6); + + DatasetEntry::new(dataset_id, owner_id, dataset_alias, created_at) +} + +pub(crate) fn new_dataset_entry(owner: &Account) -> DatasetEntry { + new_dataset_entry_with(owner, "dataset") +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/datasets/repo-tests/src/lib.rs b/src/infra/datasets/repo-tests/src/lib.rs index f4de3fbae..5a112b5fa 100644 --- a/src/infra/datasets/repo-tests/src/lib.rs +++ b/src/infra/datasets/repo-tests/src/lib.rs @@ -9,9 +9,14 @@ #![feature(assert_matches)] +mod dataset_dependencies_repository_test_suite; mod dataset_entry_repository_test_suite; mod dataset_env_var_repository_test_suite; +mod helpers; +pub mod dataset_dependency_repo { + pub use crate::dataset_dependencies_repository_test_suite::*; +} pub mod dataset_entry_repo { pub use crate::dataset_entry_repository_test_suite::*; } diff --git a/src/infra/datasets/sqlite/.sqlx/query-00c7d1b42566c90d6752f442fef7b2b12465ab511b6f53d28289cf518fcfd748.json b/src/infra/datasets/sqlite/.sqlx/query-00c7d1b42566c90d6752f442fef7b2b12465ab511b6f53d28289cf518fcfd748.json new file mode 100644 index 000000000..fdab5a447 --- /dev/null +++ b/src/infra/datasets/sqlite/.sqlx/query-00c7d1b42566c90d6752f442fef7b2b12465ab511b6f53d28289cf518fcfd748.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "\n DELETE FROM dataset_dependencies WHERE downstream_dataset_id = $1 OR upstream_dataset_id = $1\n ", + "describe": { + "columns": [], + "parameters": { + "Right": 1 + }, + "nullable": [] + }, + "hash": "00c7d1b42566c90d6752f442fef7b2b12465ab511b6f53d28289cf518fcfd748" +} diff --git a/src/infra/datasets/sqlite/.sqlx/query-2cff2b63ca18bba00390b2b61ec90170110f38925697e5898b1fc9d7e7f91b39.json b/src/infra/datasets/sqlite/.sqlx/query-2cff2b63ca18bba00390b2b61ec90170110f38925697e5898b1fc9d7e7f91b39.json new file mode 100644 index 000000000..733475bf6 --- /dev/null +++ b/src/infra/datasets/sqlite/.sqlx/query-2cff2b63ca18bba00390b2b61ec90170110f38925697e5898b1fc9d7e7f91b39.json @@ -0,0 +1,20 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT EXISTS (SELECT * FROM dataset_dependencies LIMIT 1) as has_data\n ", + "describe": { + "columns": [ + { + "name": "has_data", + "ordinal": 0, + "type_info": "Integer" + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + false + ] + }, + "hash": "2cff2b63ca18bba00390b2b61ec90170110f38925697e5898b1fc9d7e7f91b39" +} diff --git a/src/infra/datasets/sqlite/.sqlx/query-30c92efe33072f0b9fa446ea3255ffca15f34c2af9aaeb8d31453ab364f97495.json b/src/infra/datasets/sqlite/.sqlx/query-30c92efe33072f0b9fa446ea3255ffca15f34c2af9aaeb8d31453ab364f97495.json deleted file mode 100644 index 2c185e067..000000000 --- a/src/infra/datasets/sqlite/.sqlx/query-30c92efe33072f0b9fa446ea3255ffca15f34c2af9aaeb8d31453ab364f97495.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "db_name": "SQLite", - "query": "\n DELETE\n FROM dataset_entries\n WHERE dataset_id = $1\n ", - "describe": { - "columns": [], - "parameters": { - "Right": 1 - }, - "nullable": [] - }, - "hash": "30c92efe33072f0b9fa446ea3255ffca15f34c2af9aaeb8d31453ab364f97495" -} diff --git a/src/infra/datasets/sqlite/.sqlx/query-b3a72232b45f12e81c27b06dfe13f674909a6dcf6997e8f28476667275d104f0.json b/src/infra/datasets/sqlite/.sqlx/query-b3a72232b45f12e81c27b06dfe13f674909a6dcf6997e8f28476667275d104f0.json index b45126ad7..538c4b005 100644 --- a/src/infra/datasets/sqlite/.sqlx/query-b3a72232b45f12e81c27b06dfe13f674909a6dcf6997e8f28476667275d104f0.json +++ b/src/infra/datasets/sqlite/.sqlx/query-b3a72232b45f12e81c27b06dfe13f674909a6dcf6997e8f28476667275d104f0.json @@ -21,7 +21,7 @@ { "name": "secret_nonce: _", "ordinal": 3, - "type_info": "Null" + "type_info": "Blob" }, { "name": "created_at: _", diff --git a/src/infra/datasets/sqlite/.sqlx/query-b53a84cd33e8bacb8a7b8de7f5a3bf9ab82360dfaa972f9bcfe80e14241afde5.json b/src/infra/datasets/sqlite/.sqlx/query-b53a84cd33e8bacb8a7b8de7f5a3bf9ab82360dfaa972f9bcfe80e14241afde5.json index f6dd4fe87..64cd330d3 100644 --- a/src/infra/datasets/sqlite/.sqlx/query-b53a84cd33e8bacb8a7b8de7f5a3bf9ab82360dfaa972f9bcfe80e14241afde5.json +++ b/src/infra/datasets/sqlite/.sqlx/query-b53a84cd33e8bacb8a7b8de7f5a3bf9ab82360dfaa972f9bcfe80e14241afde5.json @@ -21,7 +21,7 @@ { "name": "secret_nonce: _", "ordinal": 3, - "type_info": "Null" + "type_info": "Blob" }, { "name": "created_at: _", diff --git a/src/infra/datasets/sqlite/.sqlx/query-c1a820bf91e3efcba1d0470753a37f66a76b0af3c4a999d0e37710a89679b310.json b/src/infra/datasets/sqlite/.sqlx/query-c1a820bf91e3efcba1d0470753a37f66a76b0af3c4a999d0e37710a89679b310.json new file mode 100644 index 000000000..4b324e403 --- /dev/null +++ b/src/infra/datasets/sqlite/.sqlx/query-c1a820bf91e3efcba1d0470753a37f66a76b0af3c4a999d0e37710a89679b310.json @@ -0,0 +1,26 @@ +{ + "db_name": "SQLite", + "query": "\n SELECT\n downstream_dataset_id as \"downstream_dataset_id: _\",\n upstream_dataset_id as \"upstream_dataset_id: _\"\n FROM dataset_dependencies\n ORDER BY downstream_dataset_id, upstream_dataset_id\n ", + "describe": { + "columns": [ + { + "name": "downstream_dataset_id: _", + "ordinal": 0, + "type_info": "Text" + }, + { + "name": "upstream_dataset_id: _", + "ordinal": 1, + "type_info": "Text" + } + ], + "parameters": { + "Right": 0 + }, + "nullable": [ + false, + false + ] + }, + "hash": "c1a820bf91e3efcba1d0470753a37f66a76b0af3c4a999d0e37710a89679b310" +} diff --git a/src/infra/datasets/sqlite/.sqlx/query-cdaa31bfc6b5fe558f0cc88fc8dc865a15f8d6f9406f1622baba64ea0a14cebf.json b/src/infra/datasets/sqlite/.sqlx/query-cdaa31bfc6b5fe558f0cc88fc8dc865a15f8d6f9406f1622baba64ea0a14cebf.json index 6fd425d83..cf8377968 100644 --- a/src/infra/datasets/sqlite/.sqlx/query-cdaa31bfc6b5fe558f0cc88fc8dc865a15f8d6f9406f1622baba64ea0a14cebf.json +++ b/src/infra/datasets/sqlite/.sqlx/query-cdaa31bfc6b5fe558f0cc88fc8dc865a15f8d6f9406f1622baba64ea0a14cebf.json @@ -21,7 +21,7 @@ { "name": "secret_nonce: _", "ordinal": 3, - "type_info": "Null" + "type_info": "Blob" }, { "name": "created_at: _", diff --git a/src/infra/datasets/sqlite/.sqlx/query-fe344c04ab30051d00929bf196f38f73eb6ea9ee52ec69c97343683c28757e4a.json b/src/infra/datasets/sqlite/.sqlx/query-fe344c04ab30051d00929bf196f38f73eb6ea9ee52ec69c97343683c28757e4a.json new file mode 100644 index 000000000..f2c0e0dde --- /dev/null +++ b/src/infra/datasets/sqlite/.sqlx/query-fe344c04ab30051d00929bf196f38f73eb6ea9ee52ec69c97343683c28757e4a.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "\n DELETE FROM dataset_entries WHERE dataset_id = $1\n ", + "describe": { + "columns": [], + "parameters": { + "Right": 1 + }, + "nullable": [] + }, + "hash": "fe344c04ab30051d00929bf196f38f73eb6ea9ee52ec69c97343683c28757e4a" +} diff --git a/src/infra/datasets/sqlite/src/repos/mod.rs b/src/infra/datasets/sqlite/src/repos/mod.rs index 48bf05ad5..8663ba23d 100644 --- a/src/infra/datasets/sqlite/src/repos/mod.rs +++ b/src/infra/datasets/sqlite/src/repos/mod.rs @@ -7,8 +7,10 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +mod sqlite_dataset_dependency_repository; mod sqlite_dataset_env_var_repository; mod sqlite_dateset_entry_repository; +pub use sqlite_dataset_dependency_repository::*; pub use sqlite_dataset_env_var_repository::*; pub use sqlite_dateset_entry_repository::*; diff --git a/src/infra/datasets/sqlite/src/repos/sqlite_dataset_dependency_repository.rs b/src/infra/datasets/sqlite/src/repos/sqlite_dataset_dependency_repository.rs new file mode 100644 index 000000000..97ad5d07c --- /dev/null +++ b/src/infra/datasets/sqlite/src/repos/sqlite_dataset_dependency_repository.rs @@ -0,0 +1,221 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use database_common::{TransactionRef, TransactionRefT}; +use dill::{component, interface}; +use internal_error::{ErrorIntoInternal, InternalError, ResultIntoInternal}; +use kamu_datasets::*; +use opendatafabric::DatasetID; +use sqlx::{QueryBuilder, Sqlite}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub struct SqliteDatasetDependencyRepository { + transaction: TransactionRefT, +} + +#[component(pub)] +#[interface(dyn DatasetDependencyRepository)] +#[interface(dyn DatasetEntryRemovalListener)] +impl SqliteDatasetDependencyRepository { + pub fn new(transaction: TransactionRef) -> Self { + Self { + transaction: transaction.into(), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl DatasetDependencyRepository for SqliteDatasetDependencyRepository { + async fn stores_any_dependencies(&self) -> Result { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + + let has_data = sqlx::query_scalar!( + r#" + SELECT EXISTS (SELECT * FROM dataset_dependencies LIMIT 1) as has_data + "#, + ) + .fetch_one(connection_mut) + .await + .int_err()?; + + Ok(has_data != 0) + } + + fn list_all_dependencies(&self) -> DatasetDependenciesIDStream { + Box::pin(async_stream::stream! { + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let mut query_stream = sqlx::query_as!( + DatasetDependencyEntryRowModel, + r#" + SELECT + downstream_dataset_id as "downstream_dataset_id: _", + upstream_dataset_id as "upstream_dataset_id: _" + FROM dataset_dependencies + ORDER BY downstream_dataset_id, upstream_dataset_id + "#, + ) + .fetch(connection_mut) + .map_err(ErrorIntoInternal::int_err); + + use futures::TryStreamExt; + + let mut maybe_last_downstream_id: Option = None; + let mut current_upstreams = Vec::new(); + + while let Some(entry) = query_stream.try_next().await? { + if let Some(last_downstream_id) = &maybe_last_downstream_id { + if *last_downstream_id == entry.downstream_dataset_id { + current_upstreams.push(entry.upstream_dataset_id); + continue; + } + + yield Ok(DatasetDependencies { + downstream_dataset_id: last_downstream_id.clone(), + upstream_dataset_ids: current_upstreams, + }); + + current_upstreams = Vec::new(); + } + + maybe_last_downstream_id = Some(entry.downstream_dataset_id); + current_upstreams.push(entry.upstream_dataset_id); + } + + if !current_upstreams.is_empty() { + let last_downstream_id = maybe_last_downstream_id.expect("last downstream id to be present"); + yield Ok(DatasetDependencies { + downstream_dataset_id: last_downstream_id, + upstream_dataset_ids: current_upstreams, + }); + } + }) + } + + async fn add_upstream_dependencies( + &self, + downstream_dataset_id: &DatasetID, + new_upstream_dataset_ids: &[&DatasetID], + ) -> Result<(), AddDependenciesError> { + if new_upstream_dataset_ids.is_empty() { + return Ok(()); + } + + let mut tr = self.transaction.lock().await; + let connection_mut = tr.connection_mut().await?; + + let mut query_builder = QueryBuilder::::new( + r#" + INSERT INTO dataset_dependencies(downstream_dataset_id, upstream_dataset_id) + "#, + ); + + query_builder.push_values(new_upstream_dataset_ids, |mut b, upsteam_dataset_id| { + b.push_bind(downstream_dataset_id.as_did_str().to_string()); + b.push_bind(upsteam_dataset_id.as_did_str().to_string()); + }); + + let query_result = query_builder.build().execute(connection_mut).await; + if let Err(e) = query_result { + return Err(match e { + sqlx::Error::Database(e) if e.is_unique_violation() => { + AddDependencyDuplicateError { + downstream_dataset_id: downstream_dataset_id.clone(), + } + .into() + } + _ => AddDependenciesError::Internal(e.int_err()), + }); + } + + Ok(()) + } + + async fn remove_upstream_dependencies( + &self, + downstream_dataset_id: &DatasetID, + obsolete_upstream_dataset_ids: &[&DatasetID], + ) -> Result<(), RemoveDependenciesError> { + if obsolete_upstream_dataset_ids.is_empty() { + return Ok(()); + } + + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + + let placeholders = obsolete_upstream_dataset_ids + .iter() + .enumerate() + .map(|(i, _)| format!("${}", i + 2)) + .collect::>() + .join(", "); + + let query_str = format!( + r#" + DELETE FROM dataset_dependencies + WHERE + downstream_dataset_id = $1 AND + upstream_dataset_id IN ({placeholders}) + "#, + ); + + // ToDo replace it by macro once sqlx will support it + // https://github.com/launchbadge/sqlx/blob/main/FAQ.md#how-can-i-do-a-select--where-foo-in--query + let mut query = sqlx::query(&query_str); + query = query.bind(downstream_dataset_id.to_string()); + for upstream_dataset_id in obsolete_upstream_dataset_ids { + query = query.bind(upstream_dataset_id.to_string()); + } + + let delete_result = query.execute(&mut *connection_mut).await.int_err()?; + if delete_result.rows_affected() == 0 { + return Err(RemoveDependencyMissingError { + downstream_dataset_id: downstream_dataset_id.clone(), + } + .into()); + } + + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[async_trait::async_trait] +impl DatasetEntryRemovalListener for SqliteDatasetDependencyRepository { + async fn on_dataset_entry_removed(&self, dataset_id: &DatasetID) -> Result<(), InternalError> { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr.connection_mut().await?; + + let stack_dataset_id = dataset_id.as_did_str().to_stack_string(); + let stack_dataset_id_as_str = stack_dataset_id.as_str(); + + sqlx::query!( + r#" + DELETE FROM dataset_dependencies WHERE downstream_dataset_id = $1 OR upstream_dataset_id = $1 + "#, + stack_dataset_id_as_str, + ) + .execute(&mut *connection_mut) + .await + .int_err()?; + + Ok(()) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/datasets/sqlite/src/repos/sqlite_dateset_entry_repository.rs b/src/infra/datasets/sqlite/src/repos/sqlite_dateset_entry_repository.rs index 158016c4d..eecc4e200 100644 --- a/src/infra/datasets/sqlite/src/repos/sqlite_dateset_entry_repository.rs +++ b/src/infra/datasets/sqlite/src/repos/sqlite_dateset_entry_repository.rs @@ -8,6 +8,7 @@ // by the Apache License, Version 2.0. use std::collections::HashSet; +use std::sync::Arc; use database_common::{PaginationOpts, TransactionRef, TransactionRefT}; use dill::{component, interface}; @@ -20,14 +21,19 @@ use sqlx::Row; pub struct SqliteDatasetEntryRepository { transaction: TransactionRefT, + listeners: Vec>, } #[component(pub)] #[interface(dyn DatasetEntryRepository)] impl SqliteDatasetEntryRepository { - pub fn new(transaction: TransactionRef) -> Self { + pub fn new( + transaction: TransactionRef, + listeners: Vec>, + ) -> Self { Self { transaction: transaction.into(), + listeners, } } } @@ -382,29 +388,36 @@ impl DatasetEntryRepository for SqliteDatasetEntryRepository { &self, dataset_id: &DatasetID, ) -> Result<(), DeleteEntryDatasetError> { - let mut tr = self.transaction.lock().await; + { + let mut tr = self.transaction.lock().await; + + let connection_mut = tr + .connection_mut() + .await + .map_err(DeleteEntryDatasetError::Internal)?; - let connection_mut = tr - .connection_mut() + let stack_dataset_id = dataset_id.as_did_str().to_stack_string(); + let dataset_id_as_str = stack_dataset_id.as_str(); + let delete_result = sqlx::query!( + r#" + DELETE FROM dataset_entries WHERE dataset_id = $1 + "#, + dataset_id_as_str, + ) + .execute(&mut *connection_mut) .await - .map_err(DeleteEntryDatasetError::Internal)?; + .int_err()?; - let stack_dataset_id = dataset_id.as_did_str().to_stack_string(); - let dataset_id_as_str = stack_dataset_id.as_str(); - let delete_result = sqlx::query!( - r#" - DELETE - FROM dataset_entries - WHERE dataset_id = $1 - "#, - dataset_id_as_str, - ) - .execute(&mut *connection_mut) - .await - .int_err()?; + if delete_result.rows_affected() == 0 { + return Err(DatasetEntryNotFoundError::new(dataset_id.clone()).into()); + } + } - if delete_result.rows_affected() == 0 { - return Err(DatasetEntryNotFoundError::new(dataset_id.clone()).into()); + for listener in &self.listeners { + listener + .on_dataset_entry_removed(dataset_id) + .await + .int_err()?; } Ok(()) diff --git a/src/infra/datasets/sqlite/tests/repos/mod.rs b/src/infra/datasets/sqlite/tests/repos/mod.rs index 7e64014da..711f00a6e 100644 --- a/src/infra/datasets/sqlite/tests/repos/mod.rs +++ b/src/infra/datasets/sqlite/tests/repos/mod.rs @@ -7,5 +7,6 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +mod test_sqlite_dataset_dependency_repository; mod test_sqlite_dataset_entry_repository; mod test_sqlite_dataset_env_var_repository; diff --git a/src/infra/datasets/sqlite/tests/repos/test_sqlite_dataset_dependency_repository.rs b/src/infra/datasets/sqlite/tests/repos/test_sqlite_dataset_dependency_repository.rs new file mode 100644 index 000000000..0432b3da7 --- /dev/null +++ b/src/infra/datasets/sqlite/tests/repos/test_sqlite_dataset_dependency_repository.rs @@ -0,0 +1,121 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use database_common::SqliteTransactionManager; +use database_common_macros::database_transactional_test; +use dill::{Catalog, CatalogBuilder}; +use kamu_accounts_sqlite::SqliteAccountRepository; +use kamu_datasets_repo_tests::dataset_dependency_repo; +use kamu_datasets_sqlite::{SqliteDatasetDependencyRepository, SqliteDatasetEntryRepository}; +use sqlx::SqlitePool; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = dataset_dependency_repo::test_crud_single_dependency, + harness = SqliteDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = dataset_dependency_repo::test_several_unrelated_dependencies, + harness = SqliteDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = dataset_dependency_repo::test_dependency_chain, + harness = SqliteDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = dataset_dependency_repo::test_dependency_fanins, + harness = SqliteDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = dataset_dependency_repo::test_dependency_fanouts, + harness = SqliteDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = dataset_dependency_repo::test_add_duplicate_dependency, + harness = SqliteDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = dataset_dependency_repo::test_remove_dependency, + harness = SqliteDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = dataset_dependency_repo::test_remove_missing_dependency, + harness = SqliteDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = dataset_dependency_repo::test_remove_all_dataset_dependencies, + harness = SqliteDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +database_transactional_test!( + storage = sqlite, + fixture = dataset_dependency_repo::test_remove_orphan_dependencies, + harness = SqliteDatasetDependencyRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +struct SqliteDatasetDependencyRepositoryHarness { + catalog: Catalog, +} + +impl SqliteDatasetDependencyRepositoryHarness { + pub fn new(sqlite_pool: SqlitePool) -> Self { + let mut catalog_builder = CatalogBuilder::new(); + + catalog_builder.add_value(sqlite_pool); + catalog_builder.add::(); + + catalog_builder.add::(); + catalog_builder.add::(); + catalog_builder.add::(); + + Self { + catalog: catalog_builder.build(), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/datasets/sqlite/tests/repos/test_sqlite_dataset_env_var_repository.rs b/src/infra/datasets/sqlite/tests/repos/test_sqlite_dataset_env_var_repository.rs index f990a6bea..2117dc000 100644 --- a/src/infra/datasets/sqlite/tests/repos/test_sqlite_dataset_env_var_repository.rs +++ b/src/infra/datasets/sqlite/tests/repos/test_sqlite_dataset_env_var_repository.rs @@ -10,8 +10,9 @@ use database_common::SqliteTransactionManager; use database_common_macros::database_transactional_test; use dill::{Catalog, CatalogBuilder}; +use kamu_accounts_sqlite::SqliteAccountRepository; use kamu_datasets_repo_tests::dataset_env_var_repo; -use kamu_datasets_sqlite::SqliteDatasetEnvVarRepository; +use kamu_datasets_sqlite::{SqliteDatasetEntryRepository, SqliteDatasetEnvVarRepository}; use sqlx::SqlitePool; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -56,6 +57,14 @@ database_transactional_test!( //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +database_transactional_test!( + storage = sqlite, + fixture = dataset_env_var_repo::test_delete_all_dataset_env_vars, + harness = SqliteDatasetEnvVarRepositoryHarness +); + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + struct SqliteDatasetEnvVarRepositoryHarness { catalog: Catalog, } @@ -66,6 +75,9 @@ impl SqliteDatasetEnvVarRepositoryHarness { let mut catalog_builder = CatalogBuilder::new(); catalog_builder.add_value(sqlite_pool); catalog_builder.add::(); + + catalog_builder.add::(); + catalog_builder.add::(); catalog_builder.add::(); Self { diff --git a/src/infra/ingest-datafusion/Cargo.toml b/src/infra/ingest-datafusion/Cargo.toml index a3384da9b..3c541d653 100644 --- a/src/infra/ingest-datafusion/Cargo.toml +++ b/src/infra/ingest-datafusion/Cargo.toml @@ -27,7 +27,7 @@ opendatafabric = { workspace = true, features = ["arrow"] } kamu-core = { workspace = true } kamu-data-utils = { workspace = true } -datafusion = { version = "42", default-features = false } +datafusion = { version = "43", default-features = false } geo-types = { version = "0.7", default-features = false, features = [] } geojson = { version = "0.24", default-features = false, features = [ "geo-types", @@ -47,7 +47,7 @@ zip = { version = "2", default-features = false, features = [ async-trait = "0.1" chrono = { version = "0.4", features = ["serde"] } futures = "0.3" -thiserror = "1" +thiserror = { version = "2", default-features = false, features = ["std"] } tokio = { version = "1", default-features = false, features = [ "fs", "process", diff --git a/src/infra/ingest-datafusion/src/lib.rs b/src/infra/ingest-datafusion/src/lib.rs index 0e72d54d3..5d192e165 100644 --- a/src/infra/ingest-datafusion/src/lib.rs +++ b/src/infra/ingest-datafusion/src/lib.rs @@ -12,7 +12,6 @@ pub mod merge_strategies; pub mod readers; -mod visitor; mod writer; pub use kamu_core::ingest::*; diff --git a/src/infra/ingest-datafusion/src/writer.rs b/src/infra/ingest-datafusion/src/writer.rs index 714cabfcc..997bc7542 100644 --- a/src/infra/ingest-datafusion/src/writer.rs +++ b/src/infra/ingest-datafusion/src/writer.rs @@ -23,19 +23,16 @@ use datafusion::prelude::*; use internal_error::*; use kamu_core::ingest::*; use kamu_core::*; -use odf::{AsTypedBlock, DatasetVocabulary, MetadataEvent}; +use odf::{AsTypedBlock, DatasetVocabulary}; use opendatafabric as odf; -use crate::visitor::SourceEventVisitor; - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /// Implementation of the [`DataWriter`] interface using Apache `DataFusion` /// engine pub struct DataWriterDataFusion { - dataset: Arc, + target: ResolvedDataset, merge_strategy: Arc, - block_ref: BlockRef, // Mutable meta: DataWriterMetadataState, @@ -44,41 +41,33 @@ pub struct DataWriterDataFusion { ctx: SessionContext, } -/// Contains a projection of the metadata needed for [`DataWriter`] to function -#[derive(Debug, Clone)] -pub struct DataWriterMetadataState { - pub head: odf::Multihash, - pub schema: Option, - pub source_event: Option, - pub merge_strategy: odf::MergeStrategy, - pub vocab: odf::DatasetVocabulary, - pub data_slices: Vec, - pub prev_offset: Option, - pub prev_checkpoint: Option, - pub prev_watermark: Option>, - pub prev_source_state: Option, -} - //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// impl DataWriterDataFusion { - pub fn builder(dataset: Arc, ctx: SessionContext) -> DataWriterDataFusionBuilder { - DataWriterDataFusionBuilder::new(dataset, ctx) + pub async fn from_metadata_chain( + ctx: SessionContext, + target: ResolvedDataset, + block_ref: &BlockRef, + source_name: Option<&str>, + ) -> Result { + let metadata_state = + DataWriterMetadataState::build(target.clone(), block_ref, source_name).await?; + + Ok(Self::from_metadata_state(ctx, target, metadata_state)) } - /// Use [`Self::builder`] to create an instance - fn new( + pub fn from_metadata_state( ctx: SessionContext, - dataset: Arc, - merge_strategy: Arc, - block_ref: BlockRef, + target: ResolvedDataset, metadata_state: DataWriterMetadataState, ) -> Self { + let merge_strategy = + Self::merge_strategy_for(metadata_state.merge_strategy.clone(), &metadata_state.vocab); + Self { ctx, - dataset, + target, merge_strategy, - block_ref, meta: metadata_state, } } @@ -226,7 +215,7 @@ impl DataWriterDataFusion { return Ok(None); } - let data_repo = self.dataset.as_data_repo(); + let data_repo = self.target.as_data_repo(); use futures::StreamExt; let prev_data_paths: Vec<_> = futures::stream::iter(prev_data_slices.iter().rev()) @@ -318,20 +307,16 @@ impl DataWriterDataFusion { ) .int_err()?; - // Offset & event time - // Note: ODF expects events within one chunk to be sorted by event time, so we - // ensure data is held in one partition to avoid reordering when saving to - // parquet. + // Assign offset based on the merge strategy's sort order // TODO: For some reason this adds two columns: the expected // "offset", but also "ROW_NUMBER()" for now we simply filter out the // latter. let df = df - .repartition(Partitioning::RoundRobinBatch(1)) - .int_err()? .with_column( &self.meta.vocab.offset_column, datafusion::functions_window::row_number::row_number() .order_by(self.merge_strategy.sort_order()) + .partition_by(vec![lit(1)]) .build() .int_err()?, ) @@ -361,6 +346,13 @@ impl DataWriterDataFusion { let full_columns_str: Vec<_> = full_columns.iter().map(String::as_str).collect(); let df = df.select_columns(&full_columns_str).int_err()?; + + // Note: As the very last step we sort the data by offset to guarantee its + // sequential layout in the parquet file + let df = df + .sort(vec![col(&self.meta.vocab.offset_column).sort(true, true)]) + .int_err()?; + Ok(df) } @@ -389,10 +381,17 @@ impl DataWriterDataFusion { } fn is_schema_equivalent_rec(lhs: &Field, rhs: &Field) -> bool { - // Ignore nullability - lhs.name() == rhs.name() - && lhs.data_type() == rhs.data_type() - && lhs.metadata() == rhs.metadata() + // Rules: + // - Ignore nullability (temporarily until we regain control over it) + // - Treat Utf8 equivalent to Utf8View + if lhs.name() != rhs.name() || lhs.metadata() != rhs.metadata() { + return false; + } + match (lhs.data_type(), rhs.data_type()) { + (l, r) if l == r => true, + (DataType::Utf8, DataType::Utf8View) | (DataType::Utf8View, DataType::Utf8) => true, + _ => false, + } } // TODO: Externalize configuration @@ -436,6 +435,14 @@ impl DataWriterDataFusion { ) -> Result, InternalError> { use datafusion::arrow::array::UInt64Array; + // FIXME: The extension is currently necessary for DataFusion to + // respect the single-file output + // See: https://github.com/apache/datafusion/issues/13323 + assert!( + path.extension().is_some(), + "Ouput file name must have an extension" + ); + let res = df .write_parquet( path.as_os_str().to_str().unwrap(), @@ -445,7 +452,11 @@ impl DataWriterDataFusion { .await .int_err()?; - let file = OwnedFile::new(path); + let file = if path.exists() { + Some(OwnedFile::new(path)) + } else { + None + }; assert_eq!(res.len(), 1); assert_eq!(res[0].num_columns(), 1); @@ -458,6 +469,7 @@ impl DataWriterDataFusion { .value(0); if num_records > 0 { + let file = file.unwrap(); tracing::info!( path = ?file.as_path(), num_records, @@ -566,6 +578,23 @@ impl DataWriterDataFusion { Ok((offset_interval, output_watermark)) } + + fn merge_strategy_for( + conf: odf::MergeStrategy, + vocab: &DatasetVocabulary, + ) -> Arc { + use crate::merge_strategies::*; + + match conf { + odf::MergeStrategy::Append(_cfg) => Arc::new(MergeStrategyAppend::new(vocab.clone())), + odf::MergeStrategy::Ledger(cfg) => { + Arc::new(MergeStrategyLedger::new(vocab.clone(), cfg)) + } + odf::MergeStrategy::Snapshot(cfg) => { + Arc::new(MergeStrategySnapshot::new(vocab.clone(), cfg)) + } + } + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -655,7 +684,8 @@ impl DataWriter for DataWriterDataFusion { tracing::info!(schema = ?new_schema, "Final output schema"); if let Some(prev_schema) = &self.meta.schema { - Self::validate_output_schema_equivalence(prev_schema, &new_schema)?; + let arrow_schema = prev_schema.schema_as_arrow().int_err()?; + Self::validate_output_schema_equivalence(&arrow_schema, &new_schema)?; } // Write output @@ -752,11 +782,11 @@ impl DataWriter for DataWriterDataFusion { if let Some(new_schema) = staged.new_schema { // TODO: Make commit of schema and data atomic let commit_schema_result = self - .dataset + .target .commit_event( odf::SetDataSchema::new(&new_schema).into(), CommitOpts { - block_ref: &self.block_ref, + block_ref: &self.meta.block_ref, system_time: Some(staged.system_time), prev_block_hash: Some(Some(&self.meta.head)), check_object_refs: false, @@ -767,19 +797,19 @@ impl DataWriter for DataWriterDataFusion { // Update state self.meta.head = commit_schema_result.new_head; - self.meta.schema = Some(new_schema); + self.meta.schema = Some(odf::SetDataSchema::new(new_schema.as_ref())); } // Commit `AddData` event let add_data_block = if let Some(add_data) = staged.add_data { let commit_data_result = self - .dataset + .target .commit_add_data( add_data, staged.data_file, None, CommitOpts { - block_ref: &self.block_ref, + block_ref: &self.meta.block_ref, system_time: Some(staged.system_time), prev_block_hash: Some(Some(&self.meta.head)), check_object_refs: false, @@ -790,7 +820,7 @@ impl DataWriter for DataWriterDataFusion { // Update state for the next append let new_block = self - .dataset + .target .as_metadata_chain() .get_block(&commit_data_result.new_head) .await @@ -830,234 +860,3 @@ impl DataWriter for DataWriterDataFusion { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Builder -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -pub struct DataWriterDataFusionBuilder { - dataset: Arc, - ctx: SessionContext, - block_ref: BlockRef, - metadata_state: Option, -} - -impl DataWriterDataFusionBuilder { - pub fn new(dataset: Arc, ctx: SessionContext) -> Self { - Self { - dataset, - ctx, - block_ref: BlockRef::Head, - metadata_state: None, - } - } - - pub fn with_block_ref(self, block_ref: BlockRef) -> Self { - Self { block_ref, ..self } - } - - pub fn metadata_state(&self) -> Option<&DataWriterMetadataState> { - self.metadata_state.as_ref() - } - - /// Use to specify all needed state for builder to avoid scanning the - /// metadata chain - pub fn with_metadata_state(self, metadata_state: DataWriterMetadataState) -> Self { - Self { - metadata_state: Some(metadata_state), - ..self - } - } - - /// Scans metadata chain to populate the needed metadata - /// - /// * `source_name` - name of the source to use when extracting the metadata - /// needed for writing. Leave empty for polling sources or to use the only - /// push source defined when there is no ambiguity. - pub async fn with_metadata_state_scanned( - self, - source_name: Option<&str>, - ) -> Result { - type Flag = odf::MetadataEventTypeFlags; - type Decision = MetadataVisitorDecision; - - // TODO: PERF: Full metadata scan below - this is expensive and should be - // improved using skip lists. - - let head = self - .dataset - .as_metadata_chain() - .resolve_ref(&self.block_ref) - .await - .int_err()?; - let mut seed_visitor = SearchSeedVisitor::new().adapt_err(); - let mut set_vocab_visitor = SearchSetVocabVisitor::new().adapt_err(); - let mut set_data_schema_visitor = SearchSetDataSchemaVisitor::new().adapt_err(); - let mut prev_source_state_visitor = SearchSourceStateVisitor::new(source_name).adapt_err(); - let mut add_data_visitor = SearchAddDataVisitor::new().adapt_err(); - let mut add_data_collection_visitor = GenericCallbackVisitor::new( - Vec::new(), - Decision::NextOfType(Flag::ADD_DATA), - |state, _, block| { - let MetadataEvent::AddData(e) = &block.event else { - unreachable!() - }; - - if let Some(output_data) = &e.new_data { - state.push(output_data.physical_hash.clone()); - } - - Decision::NextOfType(Flag::ADD_DATA) - }, - ) - .adapt_err(); - let mut source_event_visitor = SourceEventVisitor::new(source_name); - - self.dataset - .as_metadata_chain() - .accept_by_hash( - &mut [ - &mut source_event_visitor, - &mut seed_visitor, - &mut set_vocab_visitor, - &mut add_data_visitor, - &mut set_data_schema_visitor, - &mut prev_source_state_visitor, - &mut add_data_collection_visitor, - ], - &head, - ) - .await?; - - { - let seed = seed_visitor - .into_inner() - .into_event() - .expect("Dataset without blocks"); - - assert_eq!(seed.dataset_kind, odf::DatasetKind::Root); - } - - let (source_event, merge_strategy) = - source_event_visitor.get_source_event_and_merge_strategy()?; - let (prev_offset, prev_watermark, prev_checkpoint) = { - match add_data_visitor.into_inner().into_event() { - Some(e) => ( - e.last_offset(), - e.new_watermark, - e.new_checkpoint.map(|cp| cp.physical_hash), - ), - None => (None, None, None), - } - }; - let metadata_state = DataWriterMetadataState { - head, - schema: set_data_schema_visitor - .into_inner() - .into_event() - .as_ref() - .map(odf::SetDataSchema::schema_as_arrow) - .transpose() // Option> -> Result, E> - .int_err()?, - source_event, - merge_strategy, - vocab: set_vocab_visitor - .into_inner() - .into_event() - .unwrap_or_default() - .into(), - data_slices: add_data_collection_visitor.into_inner().into_state(), - prev_offset, - prev_checkpoint, - prev_watermark, - prev_source_state: prev_source_state_visitor.into_inner().into_state(), - }; - - Ok(self.with_metadata_state(metadata_state)) - } - - pub fn build(self) -> DataWriterDataFusion { - let Some(metadata_state) = self.metadata_state else { - // TODO: Typestate - panic!( - "Writer state is undefined - use with_metadata_state_scanned() to initialize it \ - from metadata chain or pass it explicitly via with_metadata_state()" - ) - }; - - let merge_strategy = - Self::merge_strategy_for(metadata_state.merge_strategy.clone(), &metadata_state.vocab); - - DataWriterDataFusion::new( - self.ctx, - self.dataset, - merge_strategy, - self.block_ref, - metadata_state, - ) - } - - fn merge_strategy_for( - conf: odf::MergeStrategy, - vocab: &DatasetVocabulary, - ) -> Arc { - use crate::merge_strategies::*; - - match conf { - odf::MergeStrategy::Append(_cfg) => Arc::new(MergeStrategyAppend::new(vocab.clone())), - odf::MergeStrategy::Ledger(cfg) => { - Arc::new(MergeStrategyLedger::new(vocab.clone(), cfg)) - } - odf::MergeStrategy::Snapshot(cfg) => { - Arc::new(MergeStrategySnapshot::new(vocab.clone(), cfg)) - } - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Debug, thiserror::Error)] -pub enum ScanMetadataError { - #[error(transparent)] - SourceNotFound( - #[from] - #[backtrace] - SourceNotFoundError, - ), - #[error(transparent)] - Internal( - #[from] - #[backtrace] - InternalError, - ), -} - -impl From> for ScanMetadataError { - fn from(v: AcceptVisitorError) -> Self { - match v { - AcceptVisitorError::Visitor(err) => err, - AcceptVisitorError::Traversal(err) => Self::Internal(err.int_err()), - } - } -} - -#[derive(Debug, thiserror::Error)] -#[error("{message}")] -pub struct SourceNotFoundError { - pub source_name: Option, - message: String, -} - -impl SourceNotFoundError { - pub fn new(source_name: Option>, message: impl Into) -> Self { - Self { - source_name: source_name.map(std::convert::Into::into), - message: message.into(), - } - } -} - -impl From for PushSourceNotFoundError { - fn from(val: SourceNotFoundError) -> Self { - PushSourceNotFoundError::new(val.source_name) - } -} diff --git a/src/infra/messaging-outbox/postgres/Cargo.toml b/src/infra/messaging-outbox/postgres/Cargo.toml index 39857a771..6e935a3c9 100644 --- a/src/infra/messaging-outbox/postgres/Cargo.toml +++ b/src/infra/messaging-outbox/postgres/Cargo.toml @@ -35,7 +35,7 @@ sqlx = { version = "0.8", default-features = false, features = [ "macros", "postgres", "chrono", - "json" + "json", ] } diff --git a/src/infra/messaging-outbox/repo-tests/Cargo.toml b/src/infra/messaging-outbox/repo-tests/Cargo.toml index ff7916ec9..75bc4e4df 100644 --- a/src/infra/messaging-outbox/repo-tests/Cargo.toml +++ b/src/infra/messaging-outbox/repo-tests/Cargo.toml @@ -30,3 +30,6 @@ futures = "0.3" rand = "0.8" serde = { version = "1", features = ["derive"] } serde_json = "1" + + +[dev-dependencies] diff --git a/src/infra/messaging-outbox/sqlite/Cargo.toml b/src/infra/messaging-outbox/sqlite/Cargo.toml index 7e422314a..a7bd73528 100644 --- a/src/infra/messaging-outbox/sqlite/Cargo.toml +++ b/src/infra/messaging-outbox/sqlite/Cargo.toml @@ -36,7 +36,7 @@ sqlx = { version = "0.8", default-features = false, features = [ "macros", "sqlite", "chrono", - "json" + "json", ] } diff --git a/src/utils/container-runtime/Cargo.toml b/src/utils/container-runtime/Cargo.toml index 77932634e..ec71d0e9d 100644 --- a/src/utils/container-runtime/Cargo.toml +++ b/src/utils/container-runtime/Cargo.toml @@ -30,8 +30,13 @@ dill = "0.9" libc = "0.2" regex = "1" serde = { version = "1", features = ["derive"] } -thiserror = { version = "1", default-features = false } -tokio = { version = "1", default-features = false, features = ["time", "sync", "process", "parking_lot"] } +thiserror = { version = "2", default-features = false, features = ["std"] } +tokio = { version = "1", default-features = false, features = [ + "time", + "sync", + "process", + "parking_lot", +] } tracing = "0.1" url = "2" diff --git a/src/utils/data-utils/Cargo.toml b/src/utils/data-utils/Cargo.toml index 1842629da..8f2ae7163 100644 --- a/src/utils/data-utils/Cargo.toml +++ b/src/utils/data-utils/Cargo.toml @@ -33,7 +33,7 @@ async-trait = "0.1" arrow = { version = "53", default-features = false } arrow-json = { version = "53", default-features = false } arrow-digest = { version = "53", default-features = false } -datafusion = { version = "42", default-features = false, features = [ +datafusion = { version = "43", default-features = false, features = [ "parquet", "serde", ] } @@ -41,7 +41,7 @@ digest = "0.10" hex = "0.4" sha3 = "0.10" tracing = { version = "0.1", default-features = false } -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false, features = ["std"] } url = "2" serde = { version = "1", default-features = false } serde_json = { version = "1" } diff --git a/src/utils/data-utils/src/data/hash.rs b/src/utils/data-utils/src/data/hash.rs index 635a5c9ca..674f91053 100644 --- a/src/utils/data-utils/src/data/hash.rs +++ b/src/utils/data-utils/src/data/hash.rs @@ -45,6 +45,7 @@ pub fn get_parquet_logical_hash( use datafusion::parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; let file = std::fs::File::open(data_path)?; + let parquet_reader = ParquetRecordBatchReaderBuilder::try_new(file)? .with_batch_size(LOGICAL_HASH_BATCH_SIZE) .build()?; diff --git a/src/utils/data-utils/src/testing/mod.rs b/src/utils/data-utils/src/testing/mod.rs index 17bbbbf5e..6f1b0e6d9 100644 --- a/src/utils/data-utils/src/testing/mod.rs +++ b/src/utils/data-utils/src/testing/mod.rs @@ -7,6 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use arrow::datatypes::Schema; use datafusion::common::DFSchema; use datafusion::prelude::*; use pretty_assertions::assert_eq; @@ -19,6 +20,12 @@ pub fn assert_schema_eq(schema: &DFSchema, expected: &str) { assert_eq!(expected.trim(), actual.trim()); } +#[allow(clippy::needless_pass_by_value)] +pub fn assert_arrow_schema_eq(schema: &Schema, expected: serde_json::Value) { + let actual = serde_json::to_value(schema).unwrap(); + assert_eq!(expected, actual); +} + pub async fn assert_data_eq(df: DataFrame, expected: &str) { use datafusion::arrow::util::pretty; @@ -28,3 +35,29 @@ pub async fn assert_data_eq(df: DataFrame, expected: &str) { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +pub fn assert_parquet_offsets_are_in_order(data_path: &std::path::Path) { + use ::datafusion::arrow::array::{downcast_array, Int64Array}; + use ::datafusion::parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; + + let reader = ParquetRecordBatchReaderBuilder::try_new(std::fs::File::open(data_path).unwrap()) + .unwrap() + .build() + .unwrap(); + + let mut expected_offset = 0; + + for batch in reader { + let batch = batch.unwrap(); + let offsets_dyn = batch.column_by_name("offset").unwrap(); + let offsets = downcast_array::(offsets_dyn); + for i in 0..offsets.len() { + let actual_offset = offsets.value(i); + assert_eq!( + actual_offset, expected_offset, + "Offset column in parquet file is not sequentially ordered" + ); + expected_offset += 1; + } + } +} diff --git a/src/utils/database-common/Cargo.toml b/src/utils/database-common/Cargo.toml index e2d7eecea..79daf4237 100644 --- a/src/utils/database-common/Cargo.toml +++ b/src/utils/database-common/Cargo.toml @@ -34,10 +34,10 @@ hex = "0.4" hmac = "0.12" internal-error = { workspace = true } secrecy = "0.10" -serde = "1" -serde_json = "1" +serde = { version = "1", default-features = false } +serde_json = { version = "1", default-features = false } sha2 = "0.10" -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false, features = ["std"] } tokio = { version = "1", default-features = false, features = ["sync"] } tracing = "0.1" diff --git a/src/utils/database-common/src/entities.rs b/src/utils/database-common/src/entities.rs index 4f4a361f9..134fe82ef 100644 --- a/src/utils/database-common/src/entities.rs +++ b/src/utils/database-common/src/entities.rs @@ -41,6 +41,36 @@ pub type EntityPageStream<'a, Entity> = //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// `EntityPageStreamer` is a helper hiding the pagination logic. +/// The default page size is `100` rows, but this value can be changed if +/// desired. +/// +/// The primary method is [`EntityPageStreamer::into_stream()`]. +/// +/// # Examples +/// ``` +/// // Usage without arguments for page callback: +/// let dataset_handles_stream = EntityPageStreamer::default().into_stream( +/// || async { Ok(()) }, +/// |_, pagination| self.list_all_dataset_handles(pagination), +/// ); +/// +/// // An example use case with passing some value to each page callback: +/// let dataset_handles_by_owner_name_stream = EntityPageStreamer::default().into_stream( +/// move || async move { +/// let owner_id = self +/// .resolve_account_id_by_maybe_name(Some(&owner_name)) +/// .await?; +/// Ok(Arc::new(owner_id)) +/// }, +/// move |owner_id, pagination| async move { +/// self.list_all_dataset_handles_by_owner_name(&owner_id, pagination) +/// .await +/// }, +/// ) +/// +/// // More examples can be found in unit tests. +/// ``` pub struct EntityPageStreamer { start_offset: usize, page_limit: usize, @@ -63,6 +93,14 @@ impl EntityPageStreamer { } } + /// # Arguments + /// * `get_args_callback` - a function to generating arguments for + /// `next_entities_callback`. Note, it is only called once. + /// * `next_entities_callback` - a function that will be called for each + /// page. + /// + /// # Examples + /// You can find examples of use in [`EntityPageStreamer`]. pub fn into_stream<'a, Entity, Args, HInitArgs, HInitArgsFut, HListing, HListingFut>( self, get_args_callback: HInitArgs, diff --git a/src/utils/database-common/src/helpers.rs b/src/utils/database-common/src/helpers.rs index 160971f4b..bda25574e 100644 --- a/src/utils/database-common/src/helpers.rs +++ b/src/utils/database-common/src/helpers.rs @@ -9,6 +9,17 @@ //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +/// Utility to generate the placeholder list. Helpful when using dynamic SQL +/// generation. +/// +/// # Examples +/// ``` +/// // Output for `arguments_count`=3 & `index_offset`=0 +/// "$0,$1,$2" +/// +/// // Output for `arguments_count`=2 & `index_offset`=3 +/// "$3,$4" +/// ``` pub fn sqlite_generate_placeholders_list(arguments_count: usize, index_offset: usize) -> String { (0..arguments_count) .map(|i| format!("${}", i + index_offset)) diff --git a/src/utils/database-common/src/lib.rs b/src/utils/database-common/src/lib.rs index d31627c52..88ab1bb72 100644 --- a/src/utils/database-common/src/lib.rs +++ b/src/utils/database-common/src/lib.rs @@ -7,9 +7,6 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -#![feature(lint_reasons)] -#![expect(incomplete_features)] -#![feature(inherent_associated_types)] #![feature(iter_intersperse)] mod db_connection_settings; diff --git a/src/utils/database-common/tests/tests/mod.rs b/src/utils/database-common/tests/tests/mod.rs index e6dbbae29..59f64dce9 100644 --- a/src/utils/database-common/tests/tests/mod.rs +++ b/src/utils/database-common/tests/tests/mod.rs @@ -7,4 +7,5 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -mod test_entries_streamer; +mod test_entity_page_streamer; +mod test_helpers; diff --git a/src/utils/database-common/tests/tests/test_entity_page_streamer.rs b/src/utils/database-common/tests/tests/test_entity_page_streamer.rs new file mode 100644 index 000000000..b80e6294f --- /dev/null +++ b/src/utils/database-common/tests/tests/test_entity_page_streamer.rs @@ -0,0 +1,414 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use std::sync::Arc; + +use database_common::{EntityPageListing, EntityPageStreamer, PaginationOpts}; +use futures::TryStreamExt; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +struct TestPaginationOpts { + total_entity_count: usize, + expected_entities_call_count: usize, + start_offset: usize, + page_limit: usize, + expected_entities: Vec, +} + +macro_rules! test_pagination { + ($test_pagination_opts: expr) => { + let TestPaginationOpts { + total_entity_count, + expected_entities_call_count, + start_offset, + page_limit, + expected_entities, + } = $test_pagination_opts; + + let entity_source = entity_source(total_entity_count, expected_entities_call_count); + let streamer = EntityPageStreamer::new(start_offset, page_limit); + + let stream = streamer.into_stream( + || async { + let arguments = entity_source.init_arguments().await; + Ok(arguments) + }, + |_, pagination| { + let entity_source = entity_source.clone(); + async move { + let listing = entity_source.entities(pagination).await; + Ok(listing) + } + }, + ); + + let actual_entries = stream.try_collect::>().await.unwrap(); + + pretty_assertions::assert_eq!(expected_entities, actual_entries); + }; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_pagination_less_than_a_page() { + test_pagination!(TestPaginationOpts { + total_entity_count: 3, + start_offset: 0, + page_limit: 5, + expected_entities_call_count: 1, + expected_entities: vec![ + TestEntity { id: 0 }, + TestEntity { id: 1 }, + TestEntity { id: 2 }, + ], + }); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_pagination_fits_on_one_page() { + test_pagination!(TestPaginationOpts { + total_entity_count: 5, + start_offset: 0, + page_limit: 5, + expected_entities_call_count: 1, + expected_entities: vec![ + TestEntity { id: 0 }, + TestEntity { id: 1 }, + TestEntity { id: 2 }, + TestEntity { id: 3 }, + TestEntity { id: 4 }, + ], + }); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_pagination_more_than_a_page() { + test_pagination!(TestPaginationOpts { + total_entity_count: 7, + start_offset: 0, + page_limit: 5, + expected_entities_call_count: 2, + expected_entities: vec![ + TestEntity { id: 0 }, + TestEntity { id: 1 }, + TestEntity { id: 2 }, + TestEntity { id: 3 }, + TestEntity { id: 4 }, + TestEntity { id: 5 }, + TestEntity { id: 6 }, + ], + }); +} + +#[tokio::test] +async fn test_pagination_fits_on_few_pages() { + test_pagination!(TestPaginationOpts { + total_entity_count: 10, + start_offset: 0, + page_limit: 5, + expected_entities_call_count: 2, + expected_entities: vec![ + TestEntity { id: 0 }, + TestEntity { id: 1 }, + TestEntity { id: 2 }, + TestEntity { id: 3 }, + TestEntity { id: 4 }, + TestEntity { id: 5 }, + TestEntity { id: 6 }, + TestEntity { id: 7 }, + TestEntity { id: 8 }, + TestEntity { id: 9 }, + ], + }); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_pagination_start_offset_in_the_page_middle() { + test_pagination!(TestPaginationOpts { + total_entity_count: 10, + start_offset: 5, + page_limit: 10, + expected_entities_call_count: 1, + expected_entities: vec![ + TestEntity { id: 5 }, + TestEntity { id: 6 }, + TestEntity { id: 7 }, + TestEntity { id: 8 }, + TestEntity { id: 9 }, + ], + }); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_pagination_start_offset_is_greater_than_the_total_entity_count() { + test_pagination!(TestPaginationOpts { + total_entity_count: 10, + start_offset: 11, + page_limit: 10, + expected_entities_call_count: 1, + expected_entities: vec![], + }); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_paged_page_processing_of_input_data_by_ref() { + fn assert_page(page: &[&TestEntity], pagination: &PaginationOpts) { + match pagination.offset { + 0 => { + pretty_assertions::assert_eq!( + vec![ + &TestEntity { id: 0 }, + &TestEntity { id: 1 }, + &TestEntity { id: 2 }, + ], + page + ); + } + 3 => { + pretty_assertions::assert_eq!( + vec![ + &TestEntity { id: 3 }, + &TestEntity { id: 4 }, + &TestEntity { id: 5 }, + ], + page + ); + } + 6 => { + pretty_assertions::assert_eq!( + vec![ + &TestEntity { id: 6 }, + &TestEntity { id: 7 }, + &TestEntity { id: 8 }, + ], + page + ); + } + 9 => { + pretty_assertions::assert_eq!(vec![&TestEntity { id: 9 },], page); + } + _ => { + unreachable!() + } + } + } + + let input_data = vec![ + TestEntity { id: 0 }, + TestEntity { id: 1 }, + TestEntity { id: 2 }, + TestEntity { id: 3 }, + TestEntity { id: 4 }, + TestEntity { id: 5 }, + TestEntity { id: 6 }, + TestEntity { id: 7 }, + TestEntity { id: 8 }, + TestEntity { id: 9 }, + ]; + + struct CollectionArgs<'a> { + pub input_data: &'a Vec, + } + + let streamer = EntityPageStreamer::new(0, 3); + + let stream = streamer.into_stream( + || async { + Ok(Arc::new(CollectionArgs { + input_data: &input_data, + })) + }, + |input, pagination| { + let input_len = input.input_data.len(); + + let input_page = input + .input_data + .iter() + .skip(pagination.offset) + .take(pagination.safe_limit(input_len)) + .collect::>(); + + assert_page(&input_page, &pagination); + + async move { + Ok(EntityPageListing { + list: input_page, + total_count: input_len, + }) + } + }, + ); + + stream.try_collect::>().await.unwrap(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_paged_page_processing_of_input_data_by_value() { + #[derive(Debug, Clone, PartialEq)] + struct ClonableTestEntity { + id: usize, + } + + fn assert_page(page: &[ClonableTestEntity], pagination: &PaginationOpts) { + match pagination.offset { + 0 => { + pretty_assertions::assert_eq!( + vec![ + ClonableTestEntity { id: 0 }, + ClonableTestEntity { id: 1 }, + ClonableTestEntity { id: 2 }, + ], + page + ); + } + 3 => { + pretty_assertions::assert_eq!( + vec![ + ClonableTestEntity { id: 3 }, + ClonableTestEntity { id: 4 }, + ClonableTestEntity { id: 5 }, + ], + page + ); + } + 6 => { + pretty_assertions::assert_eq!( + vec![ + ClonableTestEntity { id: 6 }, + ClonableTestEntity { id: 7 }, + ClonableTestEntity { id: 8 }, + ], + page + ); + } + 9 => { + pretty_assertions::assert_eq!(vec![ClonableTestEntity { id: 9 },], page); + } + _ => { + unreachable!() + } + } + } + + let input_data = vec![ + ClonableTestEntity { id: 0 }, + ClonableTestEntity { id: 1 }, + ClonableTestEntity { id: 2 }, + ClonableTestEntity { id: 3 }, + ClonableTestEntity { id: 4 }, + ClonableTestEntity { id: 5 }, + ClonableTestEntity { id: 6 }, + ClonableTestEntity { id: 7 }, + ClonableTestEntity { id: 8 }, + ClonableTestEntity { id: 9 }, + ]; + + let streamer = EntityPageStreamer::new(0, 3); + + let stream = streamer.into_stream( + || async { Ok(Arc::new(input_data)) }, + |input, pagination| { + let input_page = input + .iter() + .skip(pagination.offset) + .take(pagination.safe_limit(input.len())) + .cloned() + .collect::>(); + + assert_page(&input_page, &pagination); + + async move { + Ok(EntityPageListing { + list: input_page, + total_count: input.len(), + }) + } + }, + ); + + stream.try_collect::>().await.unwrap(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Helpers +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +fn entity_source( + total_entities_count: usize, + expected_entities_call_count: usize, +) -> Arc { + let mut entity_source = MockEntitySource::new(); + + entity_source + .expect_init_arguments() + .times(1) + .returning(|| NoArgs); + + entity_source + .expect_entities() + .times(expected_entities_call_count) + .returning(move |pagination| { + let result = (0..) + .skip(pagination.offset) + .take(pagination.safe_limit(total_entities_count)) + .map(|id| TestEntity { id }) + .collect::>(); + + EntityPageListing { + list: result, + total_count: total_entities_count, + } + }); + + Arc::new(entity_source) +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Clone)] +struct NoArgs; + +#[derive(Debug, PartialEq)] +struct TestEntity { + id: usize, +} + +#[async_trait::async_trait] +trait EntitySource { + async fn init_arguments(&self) -> NoArgs; + + async fn entities(&self, pagination: PaginationOpts) -> EntityPageListing; +} + +mockall::mock! { + pub EntitySource {} + + #[async_trait::async_trait] + impl EntitySource for EntitySource { + async fn init_arguments(&self) -> NoArgs; + + async fn entities(&self, pagination: PaginationOpts) -> EntityPageListing; + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/utils/database-common/tests/tests/test_helpers.rs b/src/utils/database-common/tests/tests/test_helpers.rs new file mode 100644 index 000000000..1893d1893 --- /dev/null +++ b/src/utils/database-common/tests/tests/test_helpers.rs @@ -0,0 +1,30 @@ +// Copyright Kamu Data, Inc. and contributors. All rights reserved. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0. + +use database_common::sqlite_generate_placeholders_list; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[tokio::test] +async fn test_sqlite_generate_placeholders() { + for (arguments_count, index_offset, expected_result) in [ + (0, 0, ""), + (0, 1, ""), + (0, 42, ""), + (3, 0, "$0,$1,$2"), + (2, 3, "$3,$4"), + ] { + pretty_assertions::assert_eq!( + expected_result, + sqlite_generate_placeholders_list(arguments_count, index_offset) + ); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/utils/datafusion-cli/Cargo.toml b/src/utils/datafusion-cli/Cargo.toml index b02096c28..54d4542a1 100644 --- a/src/utils/datafusion-cli/Cargo.toml +++ b/src/utils/datafusion-cli/Cargo.toml @@ -36,7 +36,7 @@ async-trait = "0.1" aws-config = "1" aws-credential-types = "1" clap = { version = "4", features = ["derive"] } -datafusion = { version = "42", features = [ +datafusion = { version = "43", features = [ "crypto_expressions", "datetime_expressions", "encoding_expressions", diff --git a/src/utils/datafusion-cli/README.md b/src/utils/datafusion-cli/README.md index 2d42e92d2..4a3b41a81 100644 --- a/src/utils/datafusion-cli/README.md +++ b/src/utils/datafusion-cli/README.md @@ -1,9 +1,9 @@ # datafusion-cli -This crate is heavily based on `datafusion-cli` in the https://github.com/apache/arrow-datafusion/ repository and therefore is licensed under the same `Apache 2.0` license. +This crate is heavily based on `datafusion-cli` in the https://github.com/apache/datafusion/ repository and therefore is licensed under the same `Apache 2.0` license. We decided to copy the code instead of using the existing `datafusion-cli` crate because: -- It was hard to align the dependency versions between our and `arrow-datafusion` repo +- It was hard to align the dependency versions between our and `datafusion` repo - The crate comes with more dependencies than we actually need / want - Maintaining optional features in the upstream repo would introduce too much overhead - Maintaining a fork was problematic due to `datafusion-cli` sharing the repo with all other `datafusion` crates, not only due to slow clone speed, but also due to `cargo` also switching to use `datafusion` crates from that repo instead of using published artifacts from `crates.io`. diff --git a/src/utils/datafusion-cli/src/catalog.rs b/src/utils/datafusion-cli/src/catalog.rs index f54f05835..c456766ec 100644 --- a/src/utils/datafusion-cli/src/catalog.rs +++ b/src/utils/datafusion-cli/src/catalog.rs @@ -33,6 +33,7 @@ use crate::object_storage::{get_object_store, AwsOptions, GcpOptions}; /// Wraps another catalog, automatically register require object stores for the /// file locations +#[derive(Debug)] pub struct DynamicObjectStoreCatalog { inner: Arc, state: Weak>, @@ -70,6 +71,7 @@ impl CatalogProviderList for DynamicObjectStoreCatalog { } /// Wraps another catalog provider +#[derive(Debug)] struct DynamicObjectStoreCatalogProvider { inner: Arc, state: Weak>, @@ -109,6 +111,7 @@ impl CatalogProvider for DynamicObjectStoreCatalogProvider { /// Wraps another schema provider. [DynamicObjectStoreSchemaProvider] is /// responsible for registering the required object stores for the file /// locations. +#[derive(Debug)] struct DynamicObjectStoreSchemaProvider { inner: Arc, state: Weak>, diff --git a/src/utils/datafusion-cli/src/exec.rs b/src/utils/datafusion-cli/src/exec.rs index 7b754fa3f..80da51339 100644 --- a/src/utils/datafusion-cli/src/exec.rs +++ b/src/utils/datafusion-cli/src/exec.rs @@ -373,7 +373,7 @@ pub(crate) async fn register_object_store_and_config_extensions( ctx.register_table_options_extension_from_scheme(scheme); // Clone and modify the default table options based on the provided options - let mut table_options = ctx.session_state().default_table_options().clone(); + let mut table_options = ctx.session_state().default_table_options(); if let Some(format) = format { table_options.set_config_format(format); } diff --git a/src/utils/datafusion-cli/src/functions.rs b/src/utils/datafusion-cli/src/functions.rs index eb49d91ce..9a02ca2f8 100644 --- a/src/utils/datafusion-cli/src/functions.rs +++ b/src/utils/datafusion-cli/src/functions.rs @@ -214,6 +214,7 @@ pub fn display_all_functions() -> Result<()> { } /// PARQUET_META table function +#[derive(Debug)] struct ParquetMetadataTable { schema: SchemaRef, batch: RecordBatch, @@ -314,6 +315,7 @@ fn fixed_len_byte_array_to_string(val: Option<&FixedLenByteArray>) -> Option().unwrap(); let builder = get_s3_object_store_builder(table_url.as_ref(), aws_options).await?; @@ -511,7 +511,7 @@ mod tests { if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan { ctx.register_table_options_extension_from_scheme(scheme); - let mut table_options = ctx.state().default_table_options().clone(); + let mut table_options = ctx.state().default_table_options(); table_options.alter_with_string_hash_map(&cmd.options)?; let aws_options = table_options.extensions.get::().unwrap(); let err = get_s3_object_store_builder(table_url.as_ref(), aws_options) @@ -538,7 +538,7 @@ mod tests { if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan { ctx.register_table_options_extension_from_scheme(scheme); - let mut table_options = ctx.state().default_table_options().clone(); + let mut table_options = ctx.state().default_table_options(); table_options.alter_with_string_hash_map(&cmd.options)?; let aws_options = table_options.extensions.get::().unwrap(); // ensure this isn't an error @@ -570,7 +570,7 @@ mod tests { if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan { ctx.register_table_options_extension_from_scheme(scheme); - let mut table_options = ctx.state().default_table_options().clone(); + let mut table_options = ctx.state().default_table_options(); table_options.alter_with_string_hash_map(&cmd.options)?; let aws_options = table_options.extensions.get::().unwrap(); let builder = get_oss_object_store_builder(table_url.as_ref(), aws_options)?; @@ -612,7 +612,7 @@ mod tests { if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan { ctx.register_table_options_extension_from_scheme(scheme); - let mut table_options = ctx.state().default_table_options().clone(); + let mut table_options = ctx.state().default_table_options(); table_options.alter_with_string_hash_map(&cmd.options)?; let gcp_options = table_options.extensions.get::().unwrap(); let builder = get_gcs_object_store_builder(table_url.as_ref(), gcp_options)?; diff --git a/src/utils/event-sourcing/Cargo.toml b/src/utils/event-sourcing/Cargo.toml index d53eb1b5e..5240d24a9 100644 --- a/src/utils/event-sourcing/Cargo.toml +++ b/src/utils/event-sourcing/Cargo.toml @@ -27,9 +27,11 @@ internal-error = { workspace = true } async-stream = "0.3" async-trait = { version = "0.1", default-features = false } -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false, features = ["std"] } tokio-stream = { version = "0.1", default-features = false } -tracing = { version = "0.1", default-features = false, features = ["attributes"] } +tracing = { version = "0.1", default-features = false, features = [ + "attributes", +] } [dev-dependencies] diff --git a/src/utils/http-common/Cargo.toml b/src/utils/http-common/Cargo.toml index e29cb0ee9..8055b9cf9 100644 --- a/src/utils/http-common/Cargo.toml +++ b/src/utils/http-common/Cargo.toml @@ -28,7 +28,7 @@ kamu-core = { workspace = true } axum = { version = "0.7", default-features = false } http = { version = "1", default-features = false } serde = { version = "1", default-features = false } -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false } tracing = { version = "0.1", default-features = false } utoipa = { version = "5", default-features = false, features = [] } diff --git a/src/utils/init-on-startup/Cargo.toml b/src/utils/init-on-startup/Cargo.toml index 5cbaf0224..fb51a8599 100644 --- a/src/utils/init-on-startup/Cargo.toml +++ b/src/utils/init-on-startup/Cargo.toml @@ -27,11 +27,14 @@ internal-error = { workspace = true } async-trait = "0.1" dill = "0.9" -petgraph = { version = "0.6", default-features = false, features = ["stable_graph"] } -thiserror = "1" +petgraph = { version = "0.6", default-features = false, features = [ + "stable_graph", +] } +thiserror = { version = "2", default-features = false, features = ["std"] } tracing = "0.1" + [dev-dependencies] paste = "1" test-log = { version = "0.2", features = ["trace"] } -tokio = { version = "1", default-features = false, features = ["rt", "macros"] } \ No newline at end of file +tokio = { version = "1", default-features = false, features = ["rt", "macros"] } diff --git a/src/utils/internal-error/Cargo.toml b/src/utils/internal-error/Cargo.toml index bd59f8f70..104741c71 100644 --- a/src/utils/internal-error/Cargo.toml +++ b/src/utils/internal-error/Cargo.toml @@ -22,4 +22,4 @@ doctest = false [dependencies] -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false, features = ["std"] } diff --git a/src/utils/kamu-cli-puppet/Cargo.toml b/src/utils/kamu-cli-puppet/Cargo.toml index b931029f9..86635dd4e 100644 --- a/src/utils/kamu-cli-puppet/Cargo.toml +++ b/src/utils/kamu-cli-puppet/Cargo.toml @@ -48,7 +48,7 @@ kamu-data-utils = { optional = true, workspace = true, features = ["testing"] } opendatafabric = { optional = true, workspace = true } async-trait = { optional = true, version = "0.1" } -datafusion = { optional = true, version = "42", default-features = false } +datafusion = { optional = true, version = "43", default-features = false } indoc = { optional = true, version = "2" } pretty_assertions = { optional = true, version = "1" } serde = { optional = true, version = "1", default-features = false, features = [ diff --git a/src/utils/messaging-outbox/Cargo.toml b/src/utils/messaging-outbox/Cargo.toml index 5cd3bdd52..aac91dc91 100644 --- a/src/utils/messaging-outbox/Cargo.toml +++ b/src/utils/messaging-outbox/Cargo.toml @@ -38,7 +38,7 @@ mockall = "0.13" prometheus = { version = "0.13", default-features = false } serde = { version = "1", features = ["derive"] } serde_json = "1" -thiserror = "1" +thiserror = { version = "2", default-features = false, features = ["std"] } tokio = { version = "1", default-features = false } tokio-stream = { version = "0.1", default-features = false } tracing = "0.1" diff --git a/src/utils/messaging-outbox/src/executors/mod.rs b/src/utils/messaging-outbox/src/agent/mod.rs similarity index 74% rename from src/utils/messaging-outbox/src/executors/mod.rs rename to src/utils/messaging-outbox/src/agent/mod.rs index c806a780f..7653d86a6 100644 --- a/src/utils/messaging-outbox/src/executors/mod.rs +++ b/src/utils/messaging-outbox/src/agent/mod.rs @@ -7,14 +7,14 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +mod outbox_agent; +mod outbox_agent_metrics; +mod outbox_agent_shared; mod outbox_consumption_iteration_planner; -mod outbox_executor; -mod outbox_executor_metrics; -mod outbox_executor_shared; mod outbox_producer_consumption_job; +pub use outbox_agent::*; +pub use outbox_agent_metrics::*; +pub(crate) use outbox_agent_shared::*; pub(crate) use outbox_consumption_iteration_planner::*; -pub use outbox_executor::*; -pub use outbox_executor_metrics::*; -pub(crate) use outbox_executor_shared::*; pub(crate) use outbox_producer_consumption_job::*; diff --git a/src/utils/messaging-outbox/src/executors/outbox_executor.rs b/src/utils/messaging-outbox/src/agent/outbox_agent.rs similarity index 96% rename from src/utils/messaging-outbox/src/executors/outbox_executor.rs rename to src/utils/messaging-outbox/src/agent/outbox_agent.rs index be86f7255..d51bb1dd5 100644 --- a/src/utils/messaging-outbox/src/executors/outbox_executor.rs +++ b/src/utils/messaging-outbox/src/agent/outbox_agent.rs @@ -29,16 +29,16 @@ enum RunMode { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub const JOB_MESSAGING_OUTBOX_STARTUP: &str = "dev.kamu.utils.outbox.OutboxExecutorStartup"; +pub const JOB_MESSAGING_OUTBOX_STARTUP: &str = "dev.kamu.utils.outbox.OutboxAgentStartup"; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -pub struct OutboxExecutor { +pub struct OutboxAgent { catalog: Catalog, config: Arc, routes_static_info: Arc, producer_consumption_jobs: Vec, - metrics: Arc, + metrics: Arc, run_lock: tokio::sync::Mutex<()>, } @@ -50,12 +50,12 @@ pub struct OutboxExecutor { requires_transaction: false, })] #[scope(Singleton)] -impl OutboxExecutor { +impl OutboxAgent { pub fn new( catalog: Catalog, config: Arc, message_dispatchers_by_producers: Vec>, - metrics: Arc, + metrics: Arc, ) -> Self { let routes_static_info = Arc::new(Self::make_static_routes_info( &catalog, @@ -128,7 +128,7 @@ impl OutboxExecutor { RunMode::WhileHasTasks => loop { let processed_consumer_tasks_count = self .run_consumption_iteration() - .instrument(tracing::debug_span!("OutboxExecutor::tick")) + .instrument(tracing::debug_span!("OutboxAgent::tick")) .await?; if processed_consumer_tasks_count == 0 { @@ -140,7 +140,7 @@ impl OutboxExecutor { loop { self.run_consumption_iteration() - .instrument(tracing::debug_span!("OutboxExecutor::tick")) + .instrument(tracing::debug_span!("OutboxAgent::tick")) .await?; tokio::time::sleep(loop_delay).await; @@ -272,8 +272,8 @@ impl OutboxExecutor { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[async_trait::async_trait] -impl InitOnStartup for OutboxExecutor { - #[tracing::instrument(level = "debug", skip_all, name = "OutboxExecutor::run_initialization")] +impl InitOnStartup for OutboxAgent { + #[tracing::instrument(level = "debug", skip_all, name = "OutboxAgent::run_initialization")] async fn run_initialization(&self) -> Result<(), InternalError> { // Trace current routes self.debug_outbox_routes(); diff --git a/src/utils/messaging-outbox/src/executors/outbox_executor_metrics.rs b/src/utils/messaging-outbox/src/agent/outbox_agent_metrics.rs similarity index 96% rename from src/utils/messaging-outbox/src/executors/outbox_executor_metrics.rs rename to src/utils/messaging-outbox/src/agent/outbox_agent_metrics.rs index 44c12e963..adf1f974e 100644 --- a/src/utils/messaging-outbox/src/executors/outbox_executor_metrics.rs +++ b/src/utils/messaging-outbox/src/agent/outbox_agent_metrics.rs @@ -15,7 +15,7 @@ use observability::metrics::MetricsProvider; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[derive(Debug)] -pub struct OutboxExecutorMetrics { +pub struct OutboxAgentMetrics { pub messages_processed_total: prometheus::IntCounterVec, pub messages_pending_total: prometheus::IntGaugeVec, pub failed_consumers_total: prometheus::IntGaugeVec, @@ -24,7 +24,7 @@ pub struct OutboxExecutorMetrics { #[component(pub)] #[interface(dyn MetricsProvider)] #[scope(Singleton)] -impl OutboxExecutorMetrics { +impl OutboxAgentMetrics { pub fn new() -> Self { use prometheus::*; @@ -78,7 +78,7 @@ impl OutboxExecutorMetrics { } } -impl MetricsProvider for OutboxExecutorMetrics { +impl MetricsProvider for OutboxAgentMetrics { fn register(&self, reg: &prometheus::Registry) -> prometheus::Result<()> { reg.register(Box::new(self.messages_processed_total.clone()))?; reg.register(Box::new(self.messages_pending_total.clone()))?; diff --git a/src/utils/messaging-outbox/src/executors/outbox_executor_shared.rs b/src/utils/messaging-outbox/src/agent/outbox_agent_shared.rs similarity index 100% rename from src/utils/messaging-outbox/src/executors/outbox_executor_shared.rs rename to src/utils/messaging-outbox/src/agent/outbox_agent_shared.rs diff --git a/src/utils/messaging-outbox/src/executors/outbox_consumption_iteration_planner.rs b/src/utils/messaging-outbox/src/agent/outbox_consumption_iteration_planner.rs similarity index 98% rename from src/utils/messaging-outbox/src/executors/outbox_consumption_iteration_planner.rs rename to src/utils/messaging-outbox/src/agent/outbox_consumption_iteration_planner.rs index f4809fd1c..f28d124be 100644 --- a/src/utils/messaging-outbox/src/executors/outbox_consumption_iteration_planner.rs +++ b/src/utils/messaging-outbox/src/agent/outbox_consumption_iteration_planner.rs @@ -13,7 +13,7 @@ use std::sync::Arc; use internal_error::InternalError; use super::{ - OutboxExecutorMetrics, + OutboxAgentMetrics, OutboxRoutesStaticInfo, ProducerConsumptionTask, UnconsumedProducerState, @@ -31,7 +31,7 @@ pub(crate) struct OutboxConsumptionIterationPlanner { routes_static_info: Arc, outbox_message_repository: Arc, outbox_message_consumption_repository: Arc, - metrics: Arc, + metrics: Arc, messages_batch_size: usize, } @@ -40,7 +40,7 @@ impl OutboxConsumptionIterationPlanner { routes_static_info: Arc, outbox_message_repository: Arc, outbox_message_consumption_repository: Arc, - metrics: Arc, + metrics: Arc, messages_batch_size: usize, ) -> Self { Self { diff --git a/src/utils/messaging-outbox/src/executors/outbox_producer_consumption_job.rs b/src/utils/messaging-outbox/src/agent/outbox_producer_consumption_job.rs similarity index 98% rename from src/utils/messaging-outbox/src/executors/outbox_producer_consumption_job.rs rename to src/utils/messaging-outbox/src/agent/outbox_producer_consumption_job.rs index 98ead958a..19d06f91e 100644 --- a/src/utils/messaging-outbox/src/executors/outbox_producer_consumption_job.rs +++ b/src/utils/messaging-outbox/src/agent/outbox_producer_consumption_job.rs @@ -15,7 +15,7 @@ use dill::Catalog; use internal_error::{InternalError, ResultIntoInternal}; use tracing::Instrument; -use super::{OutboxExecutorMetrics, OutboxRoutesStaticInfo, ProducerConsumptionTask}; +use super::{OutboxAgentMetrics, OutboxRoutesStaticInfo, ProducerConsumptionTask}; use crate::{ ConsumerFilter, MessageDispatcher, @@ -37,7 +37,7 @@ pub(crate) struct ProducerConsumptionJob { producer_name: String, consumer_names: Vec, failed_consumer_names: Mutex>, - metrics: Arc, + metrics: Arc, } impl ProducerConsumptionJob { @@ -46,7 +46,7 @@ impl ProducerConsumptionJob { routes_static_info: Arc, producer_name: String, consumer_names: Vec, - metrics: Arc, + metrics: Arc, ) -> Self { Self { catalog, diff --git a/src/utils/messaging-outbox/src/lib.rs b/src/utils/messaging-outbox/src/lib.rs index ad1dc069e..1cdbb0963 100644 --- a/src/utils/messaging-outbox/src/lib.rs +++ b/src/utils/messaging-outbox/src/lib.rs @@ -9,16 +9,16 @@ #![feature(let_chains)] +mod agent; mod consumers; mod entities; -mod executors; mod message; mod repos; mod services; +pub use agent::*; pub use consumers::*; pub use entities::*; -pub use executors::*; pub use message::*; pub use repos::*; pub use services::*; diff --git a/src/utils/messaging-outbox/tests/tests/mod.rs b/src/utils/messaging-outbox/tests/tests/mod.rs index 8e1d85f50..761003dbd 100644 --- a/src/utils/messaging-outbox/tests/tests/mod.rs +++ b/src/utils/messaging-outbox/tests/tests/mod.rs @@ -9,5 +9,5 @@ mod test_dispatching_outbox_impl; mod test_immediate_outbox_impl; -mod test_outbox_executor; +mod test_outbox_agent; mod test_transactional_outbox_impl; diff --git a/src/utils/messaging-outbox/tests/tests/test_outbox_executor.rs b/src/utils/messaging-outbox/tests/tests/test_outbox_agent.rs similarity index 93% rename from src/utils/messaging-outbox/tests/tests/test_outbox_executor.rs rename to src/utils/messaging-outbox/tests/tests/test_outbox_agent.rs index 6814c0e7f..b1cd84ef2 100644 --- a/src/utils/messaging-outbox/tests/tests/test_outbox_executor.rs +++ b/src/utils/messaging-outbox/tests/tests/test_outbox_agent.rs @@ -49,8 +49,8 @@ async fn test_deliver_messages_of_one_type() { body: "bar".to_string(), }; - let harness = OutboxExecutorHarness::new(); - harness.outbox_processor.run_initialization().await.unwrap(); + let harness = OutboxAgentHarness::new(); + harness.outbox_agent.run_initialization().await.unwrap(); harness .outbox @@ -76,7 +76,7 @@ async fn test_deliver_messages_of_one_type() { // Run iteration harness - .outbox_processor + .outbox_agent .run_single_iteration_only() .await .unwrap(); @@ -111,8 +111,8 @@ async fn test_deliver_messages_of_two_types() { body: "bar".to_string(), }; - let harness = OutboxExecutorHarness::new(); - harness.outbox_processor.run_initialization().await.unwrap(); + let harness = OutboxAgentHarness::new(); + harness.outbox_agent.run_initialization().await.unwrap(); harness .outbox @@ -138,7 +138,7 @@ async fn test_deliver_messages_of_two_types() { // Run iteration harness - .outbox_processor + .outbox_agent .run_single_iteration_only() .await .unwrap(); @@ -174,8 +174,8 @@ async fn test_deliver_messages_multiple_consumers() { body: "bar".to_string(), }; - let harness = OutboxExecutorHarness::new(); - harness.outbox_processor.run_initialization().await.unwrap(); + let harness = OutboxAgentHarness::new(); + harness.outbox_agent.run_initialization().await.unwrap(); harness .outbox @@ -201,7 +201,7 @@ async fn test_deliver_messages_multiple_consumers() { // Run iteration harness - .outbox_processor + .outbox_agent .run_single_iteration_only() .await .unwrap(); @@ -235,8 +235,8 @@ async fn test_deliver_messages_multiple_consumers() { #[test_log::test(tokio::test)] async fn test_deliver_messages_with_partial_consumption() { - let harness = OutboxExecutorHarness::new(); - harness.outbox_processor.run_initialization().await.unwrap(); + let harness = OutboxAgentHarness::new(); + harness.outbox_agent.run_initialization().await.unwrap(); let message_texts = ["foo", "bar", "baz", "super", "duper"]; for message_text in message_texts { @@ -285,7 +285,7 @@ async fn test_deliver_messages_with_partial_consumption() { // Run iteration harness - .outbox_processor + .outbox_agent .run_single_iteration_only() .await .unwrap(); @@ -325,19 +325,19 @@ async fn test_deliver_messages_with_partial_consumption() { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -struct OutboxExecutorHarness { +struct OutboxAgentHarness { catalog: Catalog, - outbox_processor: Arc, + outbox_agent: Arc, outbox: Arc, outbox_consumption_repository: Arc, - metrics: Arc, + metrics: Arc, } -impl OutboxExecutorHarness { +impl OutboxAgentHarness { fn new() -> Self { let mut b = CatalogBuilder::new(); - b.add::(); - b.add::(); + b.add::(); + b.add::(); b.add_value(OutboxConfig::default()); b.add::(); b.add::(); @@ -358,7 +358,7 @@ impl OutboxExecutorHarness { let catalog = b.build(); - let outbox_processor = catalog.get_one::().unwrap(); + let outbox_agent = catalog.get_one::().unwrap(); let outbox = catalog.get_one::().unwrap(); let outbox_consumption_repository = catalog .get_one::() @@ -367,7 +367,7 @@ impl OutboxExecutorHarness { Self { catalog, - outbox_processor, + outbox_agent, outbox, outbox_consumption_repository, metrics, diff --git a/src/utils/multiformats/Cargo.toml b/src/utils/multiformats/Cargo.toml index 996bfa050..99c5acae9 100644 --- a/src/utils/multiformats/Cargo.toml +++ b/src/utils/multiformats/Cargo.toml @@ -42,7 +42,7 @@ rand = { version = "0.8", default-features = false, features = [ ] } sha3 = { version = "0.10", default-features = false, features = [] } serde = { version = "1", default-features = false, features = [] } -thiserror = { version = "1", default-features = false, features = [] } +thiserror = { version = "2", default-features = false, features = ["std"] } unsigned-varint = { version = "0.8", default-features = false, features = [ "std", ] } diff --git a/src/utils/observability/Cargo.toml b/src/utils/observability/Cargo.toml index a5eec323b..1bace8c9b 100644 --- a/src/utils/observability/Cargo.toml +++ b/src/utils/observability/Cargo.toml @@ -46,7 +46,7 @@ dill = { version = "0.9", default-features = false } http = { version = "1", default-features = false } serde = { version = "1", default-features = false, features = ["derive"] } serde_json = { version = "1", default-features = false } -thiserror = { version = "1", default-features = false } +thiserror = { version = "2", default-features = false, features = ["std"] } tracing = { version = "0.1", default-features = false } tracing-appender = { version = "0.2", default-features = false } tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } From 34217e9600e5002ed6891c5035eba1e1b6fc5b71 Mon Sep 17 00:00:00 2001 From: Dima Pristupa Date: Thu, 19 Dec 2024 13:55:26 +0200 Subject: [PATCH 04/10] Fixes after merging (#999) --- .../src/oso_resource_service_impl.rs | 17 +++++++----- src/app/cli/src/app.rs | 27 ++++++++----------- .../auth-rebac/services/src/dependencies.rs | 26 ------------------ src/domain/auth-rebac/services/src/lib.rs | 2 -- ...ebac_dataset_lifecycle_message_consumer.rs | 2 +- ...ebac_dataset_lifecycle_message_consumer.rs | 4 +-- .../services/src/dataset_entry_indexer.rs | 9 ------- .../tests/tests/test_dataset_entry_service.rs | 2 +- .../src/commands/test_init_command.rs | 2 +- .../src/repos/dataset_repository_local_fs.rs | 10 +++++++ 10 files changed, 36 insertions(+), 65 deletions(-) delete mode 100644 src/domain/auth-rebac/services/src/dependencies.rs diff --git a/src/adapter/auth-oso-rebac/src/oso_resource_service_impl.rs b/src/adapter/auth-oso-rebac/src/oso_resource_service_impl.rs index cc4287e21..73a344024 100644 --- a/src/adapter/auth-oso-rebac/src/oso_resource_service_impl.rs +++ b/src/adapter/auth-oso-rebac/src/oso_resource_service_impl.rs @@ -94,13 +94,16 @@ impl OsoResourceServiceImpl { Err(e) => return Err(e.into()), }; - let account_properties = self - .rebac_service - .get_account_properties(&account.id) - .await - .int_err()?; - - UserActor::logged(&account.id, account_properties.is_admin) + // TODO: Private Datasets: absorb the `is_admin` attribute + // from the Accounts domain + // https://github.com/kamu-data/kamu-cli/issues/766 + // let account_properties = self + // .rebac_service + // .get_account_properties(&account.id) + // .await + // .int_err()?; + + UserActor::logged(&account.id, account.is_admin) }; // Lastly, caching diff --git a/src/app/cli/src/app.rs b/src/app/cli/src/app.rs index db3e65724..77d13a653 100644 --- a/src/app/cli/src/app.rs +++ b/src/app/cli/src/app.rs @@ -131,21 +131,16 @@ pub async fn run(workspace_layout: WorkspaceLayout, args: cli::Cli) -> Result<() is_e2e_testing, ); - if workspace_svc.is_in_workspace() { - // TODO: Private Datasets: recheck after merge - // // NOTE: Register DatasetEntryIndexer in DI, since it is referenced by other - // // components (via InitOnStartup) - // // TODO: PERF: Do not register InitOnStartup-components if we are not inside the - // // workspace - // base_catalog_builder.add_builder( - // kamu_datasets_services::DatasetEntryIndexer::builder() - // .with_is_in_workspace(workspace_svc.is_in_workspace()), - // ); - // // The indexer has no other interfaces - // base_catalog_builder - // .bind::(); + // TODO: Use SQLite database in single-tenant + // https://github.com/kamu-data/kamu-cli/issues/981 + // + // After implementing this ticket, we need to use "is_init_command" + // not "init_multi_tenant_workspace" here + let is_indexing_needed = init_multi_tenant_workspace || workspace_svc.is_in_workspace(); + if is_indexing_needed { base_catalog_builder.add::(); base_catalog_builder.add::(); + base_catalog_builder.add::(); } base_catalog_builder.add_value(JwtAuthenticationConfig::load_from_env()); @@ -485,14 +480,14 @@ pub fn configure_base_catalog( b.add::(); b.add::(); - kamu_auth_rebac_services::register_dependencies(&mut b, tenancy_config); - kamu_adapter_auth_oso_rebac::register_dependencies(&mut b); b.add::(); + b.add::(); + if tenancy_config == TenancyConfig::MultiTenant { - b.add::(); + b.add::(); } b.add::(); diff --git a/src/domain/auth-rebac/services/src/dependencies.rs b/src/domain/auth-rebac/services/src/dependencies.rs deleted file mode 100644 index 10dbd085b..000000000 --- a/src/domain/auth-rebac/services/src/dependencies.rs +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use dill::CatalogBuilder; -use kamu_core::TenancyConfig; - -use crate::*; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -pub fn register_dependencies(catalog_builder: &mut CatalogBuilder, tenancy_config: TenancyConfig) { - catalog_builder.add::(); - catalog_builder.add::(); - - if tenancy_config == TenancyConfig::MultiTenant { - catalog_builder.add::(); - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/auth-rebac/services/src/lib.rs b/src/domain/auth-rebac/services/src/lib.rs index f328f6fdf..df34f2039 100644 --- a/src/domain/auth-rebac/services/src/lib.rs +++ b/src/domain/auth-rebac/services/src/lib.rs @@ -10,14 +10,12 @@ // Re-exports pub use kamu_auth_rebac as domain; -mod dependencies; mod jobs; mod messages; mod multi_tenant_rebac_dataset_lifecycle_message_consumer; mod rebac_indexer; mod rebac_service_impl; -pub use dependencies::*; pub use jobs::*; pub use messages::*; pub use multi_tenant_rebac_dataset_lifecycle_message_consumer::*; diff --git a/src/domain/auth-rebac/services/src/multi_tenant_rebac_dataset_lifecycle_message_consumer.rs b/src/domain/auth-rebac/services/src/multi_tenant_rebac_dataset_lifecycle_message_consumer.rs index 1323f2ae0..b7cb4dc28 100644 --- a/src/domain/auth-rebac/services/src/multi_tenant_rebac_dataset_lifecycle_message_consumer.rs +++ b/src/domain/auth-rebac/services/src/multi_tenant_rebac_dataset_lifecycle_message_consumer.rs @@ -57,7 +57,7 @@ impl MultiTenantRebacDatasetLifecycleMessageConsumer { for (name, value) in [ DatasetPropertyName::allows_public_read(allows), // TODO: Private Datasets: Read from a specific environment's config - DatasetPropertyName::allows_anonymous_read(false), + DatasetPropertyName::allows_anonymous_read(allows), ] { self.rebac_service .set_dataset_property(&message.dataset_id, name, &value) diff --git a/src/domain/auth-rebac/services/tests/tests/test_multi_tenant_rebac_dataset_lifecycle_message_consumer.rs b/src/domain/auth-rebac/services/tests/tests/test_multi_tenant_rebac_dataset_lifecycle_message_consumer.rs index d9edee196..1dcdbaaef 100644 --- a/src/domain/auth-rebac/services/tests/tests/test_multi_tenant_rebac_dataset_lifecycle_message_consumer.rs +++ b/src/domain/auth-rebac/services/tests/tests/test_multi_tenant_rebac_dataset_lifecycle_message_consumer.rs @@ -83,7 +83,7 @@ async fn test_rebac_properties_added() { .get_dataset_properties(&public_dataset_id) .await, Ok(DatasetProperties { - allows_anonymous_read: false, + allows_anonymous_read: true, allows_public_read: true }) ); @@ -129,7 +129,7 @@ async fn test_rebac_properties_deleted() { .get_dataset_properties(&dataset_id) .await, Ok(DatasetProperties { - allows_anonymous_read: false, + allows_anonymous_read: true, allows_public_read: true }) ); diff --git a/src/domain/datasets/services/src/dataset_entry_indexer.rs b/src/domain/datasets/services/src/dataset_entry_indexer.rs index 86fa4efba..cfb4ca6be 100644 --- a/src/domain/datasets/services/src/dataset_entry_indexer.rs +++ b/src/domain/datasets/services/src/dataset_entry_indexer.rs @@ -35,7 +35,6 @@ pub struct DatasetEntryIndexer { time_source: Arc, dataset_repo: Arc, account_repository: Arc, - is_in_workspace: bool, } #[component(pub)] @@ -54,14 +53,12 @@ impl DatasetEntryIndexer { time_source: Arc, dataset_repo: Arc, account_repository: Arc, - is_in_workspace: bool, ) -> Self { Self { dataset_entry_repo, time_source, dataset_repo, account_repository, - is_in_workspace, } } @@ -178,12 +175,6 @@ impl InitOnStartup for DatasetEntryIndexer { name = "DatasetEntryIndexer::run_initialization" )] async fn run_initialization(&self) -> Result<(), InternalError> { - if !self.is_in_workspace { - tracing::debug!("Skip initialization: not in a workspace"); - - return Ok(()); - } - if self.has_datasets_indexed().await? { tracing::debug!("Skip initialization: datasets already have indexed"); diff --git a/src/domain/datasets/services/tests/tests/test_dataset_entry_service.rs b/src/domain/datasets/services/tests/tests/test_dataset_entry_service.rs index dcb4f91c1..8e751cb3f 100644 --- a/src/domain/datasets/services/tests/tests/test_dataset_entry_service.rs +++ b/src/domain/datasets/services/tests/tests/test_dataset_entry_service.rs @@ -210,7 +210,7 @@ impl DatasetEntryServiceHarness { let mut b = CatalogBuilder::new(); b.add::(); - b.add_builder(DatasetEntryIndexer::builder().with_is_in_workspace(true)); + b.add::(); b.add_value(mock_dataset_entry_repository); b.bind::(); diff --git a/src/e2e/app/cli/repo-tests/src/commands/test_init_command.rs b/src/e2e/app/cli/repo-tests/src/commands/test_init_command.rs index 89c98edbd..bb839ff91 100644 --- a/src/e2e/app/cli/repo-tests/src/commands/test_init_command.rs +++ b/src/e2e/app/cli/repo-tests/src/commands/test_init_command.rs @@ -83,7 +83,7 @@ pub async fn test_init_exist_ok_mt(mut kamu: KamuCliPuppet) { .unwrap(); // Verify that the database has not been overwritten - pretty_assertions::assert_eq!(modified_new, modified_old); + pretty_assertions::assert_eq!(modified_old, modified_new); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/repos/dataset_repository_local_fs.rs b/src/infra/core/src/repos/dataset_repository_local_fs.rs index 7ac5f0489..4c857f398 100644 --- a/src/infra/core/src/repos/dataset_repository_local_fs.rs +++ b/src/infra/core/src/repos/dataset_repository_local_fs.rs @@ -465,6 +465,11 @@ impl DatasetStorageStrategy for DatasetSingleTenantStorageStrategy { fn get_all_datasets(&self) -> DatasetHandleStream<'_> { Box::pin(async_stream::try_stream! { + // While creating a workspace, the directory has not yet been created + if !self.root.exists() { + return; + } + let read_dataset_dir = std::fs::read_dir(&self.root).int_err()?; for r_dataset_dir in read_dataset_dir { @@ -749,6 +754,11 @@ impl DatasetStorageStrategy for DatasetMultiTenantStorageStrategy { fn get_all_datasets(&self) -> DatasetHandleStream<'_> { Box::pin(async_stream::try_stream! { + // While creating a workspace, the directory has not yet been created + if !self.root.exists() { + return; + } + let read_account_dir = std::fs::read_dir(&self.root).int_err()?; for r_account_dir in read_account_dir { From d7ebb404271cf54723e75d7611aeb6d43169f793 Mon Sep 17 00:00:00 2001 From: Dima Pristupa Date: Fri, 20 Dec 2024 22:57:47 +0200 Subject: [PATCH 05/10] `DatasetOwnershipService`: moved to the `kamu-dataset` area & implemented via `DatasetEntryServiceImpl` (#1004) * DatasetOwnershipService: use odf namespace * DatasetEntryServiceImpl: impl DatasetOwnershipService * DatasetOwnershipService: move to kamu-datasets scope * CHANGELOG.md: update --- CHANGELOG.md | 1 + Cargo.lock | 4 +- .../dataset_mut/dataset_mut_utils.rs | 2 +- .../flows_mut/account_flow_configs_mut.rs | 2 +- .../queries/accounts/account_flow_configs.rs | 2 +- .../tests/test_gql_account_flow_configs.rs | 12 +- .../tests/tests/test_gql_dataset_flow_runs.rs | 10 +- src/adapter/graphql/tests/utils/auth_utils.rs | 18 +- .../http/tests/harness/client_side_harness.rs | 8 +- src/app/cli/src/app.rs | 3 - src/domain/core/src/services/mod.rs | 2 - .../src/services/dataset_entry_service.rs | 2 + .../src/services/dataset_ownership_service.rs | 17 +- .../datasets/domain/src/services/mod.rs | 2 + .../src/dataset_entry_service_impl.rs | 67 ++- .../tests/tests/test_dataset_entry_service.rs | 4 +- src/domain/flow-system/services/Cargo.toml | 3 +- .../services/src/flow/flow_agent_impl.rs | 2 +- .../src/flow/flow_query_service_impl.rs | 2 +- .../src/flow/flow_scheduling_helper.rs | 51 ++- .../tests/tests/test_flow_agent_impl.rs | 243 +++++----- .../tests/tests/utils/flow_harness_shared.rs | 140 +++--- .../services/tests/tests/utils/task_driver.rs | 19 +- src/infra/core/Cargo.toml | 8 +- .../dataset_ownership_service_inmem.rs | 300 ------------- src/infra/core/src/services/mod.rs | 2 - .../core/src/testing/base_repo_harness.rs | 7 +- ...ate_dataset_from_snapshot_use_case_impl.rs | 13 +- .../use_cases/create_dataset_use_case_impl.rs | 13 +- src/infra/core/tests/tests/mod.rs | 1 - .../test_dataset_ownership_service_inmem.rs | 273 ------------ .../tests/tests/test_entries_streamer.rs | 414 ------------------ 32 files changed, 392 insertions(+), 1255 deletions(-) rename src/domain/{core => datasets/domain}/src/services/dataset_ownership_service.rs (69%) delete mode 100644 src/infra/core/src/services/dataset_ownership_service_inmem.rs delete mode 100644 src/infra/core/tests/tests/test_dataset_ownership_service_inmem.rs delete mode 100644 src/utils/database-common/tests/tests/test_entries_streamer.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 467c1e7c8..6481f3e57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Recommendation: for ease of reading, use the following order: - OSO: added resource storage for access speed - E2E: Using the correct account in multi-tenant mode - And also the possibility of set it up + - `DatasetOwnershipService`: moved to the `kamu-dataset` crate area & implemented via `DatasetEntryServiceImpl` ## [0.213.1] - 2024-12-18 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index 267a57d3a..f60685f67 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5263,7 +5263,6 @@ dependencies = [ "curl", "curl-sys", "dashmap", - "database-common", "datafusion", "datafusion-ethers", "datafusion-functions-json", @@ -5284,8 +5283,6 @@ dependencies = [ "itertools 0.13.0", "kamu", "kamu-accounts", - "kamu-accounts-inmem", - "kamu-accounts-services", "kamu-core", "kamu-data-utils", "kamu-datasets", @@ -6358,6 +6355,7 @@ dependencies = [ "kamu-accounts-inmem", "kamu-accounts-services", "kamu-core", + "kamu-datasets", "kamu-datasets-inmem", "kamu-datasets-services", "kamu-flow-system", diff --git a/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut_utils.rs b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut_utils.rs index b156d92db..4046ac95f 100644 --- a/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut_utils.rs +++ b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut_utils.rs @@ -7,7 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. -use kamu_core::DatasetOwnershipService; +use kamu_datasets::DatasetOwnershipService; use opendatafabric as odf; use crate::prelude::*; diff --git a/src/adapter/graphql/src/mutations/flows_mut/account_flow_configs_mut.rs b/src/adapter/graphql/src/mutations/flows_mut/account_flow_configs_mut.rs index 32769e6d0..32fa4b8c0 100644 --- a/src/adapter/graphql/src/mutations/flows_mut/account_flow_configs_mut.rs +++ b/src/adapter/graphql/src/mutations/flows_mut/account_flow_configs_mut.rs @@ -10,7 +10,7 @@ use chrono::Utc; use fs::FlowConfigurationService; use kamu_accounts::Account; -use kamu_core::DatasetOwnershipService; +use kamu_datasets::DatasetOwnershipService; use kamu_flow_system as fs; use opendatafabric::DatasetID; diff --git a/src/adapter/graphql/src/queries/accounts/account_flow_configs.rs b/src/adapter/graphql/src/queries/accounts/account_flow_configs.rs index d9543d2df..04632fcbd 100644 --- a/src/adapter/graphql/src/queries/accounts/account_flow_configs.rs +++ b/src/adapter/graphql/src/queries/accounts/account_flow_configs.rs @@ -9,7 +9,7 @@ use futures::StreamExt; use kamu_accounts::Account as AccountEntity; -use kamu_core::DatasetOwnershipService; +use kamu_datasets::DatasetOwnershipService; use kamu_flow_system::FlowConfigurationService; use crate::prelude::*; diff --git a/src/adapter/graphql/tests/tests/test_gql_account_flow_configs.rs b/src/adapter/graphql/tests/tests/test_gql_account_flow_configs.rs index f5562d189..ff68a0596 100644 --- a/src/adapter/graphql/tests/tests/test_gql_account_flow_configs.rs +++ b/src/adapter/graphql/tests/tests/test_gql_account_flow_configs.rs @@ -17,9 +17,6 @@ use indoc::indoc; use kamu::testing::{MetadataFactory, MockDatasetActionAuthorizer, MockDatasetChangesService}; use kamu::{ CreateDatasetFromSnapshotUseCaseImpl, - DatasetOwnershipServiceInMemory, - DatasetOwnershipServiceInMemoryStateInitializer, - DatasetRegistryRepoBridge, DatasetRepositoryLocalFs, DatasetRepositoryWriter, MetadataQueryServiceImpl, @@ -28,8 +25,8 @@ use kamu_accounts::{JwtAuthenticationConfig, DEFAULT_ACCOUNT_NAME, DEFAULT_ACCOU use kamu_accounts_inmem::InMemoryAccessTokenRepository; use kamu_accounts_services::{AccessTokenServiceImpl, AuthenticationServiceImpl}; use kamu_core::*; -use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; -use kamu_datasets_services::DependencyGraphServiceImpl; +use kamu_datasets_inmem::{InMemoryDatasetDependencyRepository, InMemoryDatasetEntryRepository}; +use kamu_datasets_services::{DatasetEntryServiceImpl, DependencyGraphServiceImpl}; use kamu_flow_system::FlowAgentConfig; use kamu_flow_system_inmem::{InMemoryFlowConfigurationEventStore, InMemoryFlowEventStore}; use kamu_task_system_inmem::InMemoryTaskEventStore; @@ -643,7 +640,6 @@ impl FlowConfigHarness { .add_builder(DatasetRepositoryLocalFs::builder().with_root(datasets_dir)) .bind::() .bind::() - .add::() .add::() .add::() .add_value(dataset_changes_mock) @@ -665,8 +661,8 @@ impl FlowConfigHarness { )) .add::() .add::() - .add::() - .add::() + .add::() + .add::() .add::(); NoOpDatabasePlugin::init_database_components(&mut b); diff --git a/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs b/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs index b593f0bc6..510a16056 100644 --- a/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs +++ b/src/adapter/graphql/tests/tests/test_gql_dataset_flow_runs.rs @@ -18,8 +18,6 @@ use indoc::indoc; use kamu::testing::{MetadataFactory, MockDatasetChangesService}; use kamu::{ CreateDatasetFromSnapshotUseCaseImpl, - DatasetOwnershipServiceInMemory, - DatasetRegistryRepoBridge, DatasetRepositoryLocalFs, DatasetRepositoryWriter, MetadataQueryServiceImpl, @@ -47,8 +45,8 @@ use kamu_core::{ TenancyConfig, MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE, }; -use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; -use kamu_datasets_services::DependencyGraphServiceImpl; +use kamu_datasets_inmem::{InMemoryDatasetDependencyRepository, InMemoryDatasetEntryRepository}; +use kamu_datasets_services::{DatasetEntryServiceImpl, DependencyGraphServiceImpl}; use kamu_flow_system::{ Flow, FlowAgentConfig, @@ -3111,7 +3109,6 @@ impl FlowRunsHarness { .add_builder(DatasetRepositoryLocalFs::builder().with_root(datasets_dir)) .bind::() .bind::() - .add::() .add::() .add::() .add_value(dataset_changes_mock) @@ -3132,7 +3129,8 @@ impl FlowRunsHarness { .add::() .add::() .add_value(JwtAuthenticationConfig::default()) - .add::() + .add::() + .add::() .add::(); NoOpDatabasePlugin::init_database_components(&mut b); diff --git a/src/adapter/graphql/tests/utils/auth_utils.rs b/src/adapter/graphql/tests/utils/auth_utils.rs index 84e5b53c6..ed3ae25fe 100644 --- a/src/adapter/graphql/tests/utils/auth_utils.rs +++ b/src/adapter/graphql/tests/utils/auth_utils.rs @@ -17,12 +17,6 @@ use kamu_adapter_graphql::ANONYMOUS_ACCESS_FORBIDDEN_MESSAGE; pub async fn authentication_catalogs( base_catalog: &dill::Catalog, ) -> (dill::Catalog, dill::Catalog) { - let catalog_anonymous = dill::CatalogBuilder::new_chained(base_catalog) - .add_value(CurrentAccountSubject::anonymous( - AnonymousAccountReason::NoAuthenticationProvided, - )) - .build(); - let current_account_subject = CurrentAccountSubject::new_test(); let mut predefined_accounts_config = PredefinedAccountsConfig::new(); @@ -36,12 +30,20 @@ pub async fn authentication_catalogs( panic!() } - let catalog_authorized = dill::CatalogBuilder::new_chained(base_catalog) + let base_auth_catalog = dill::CatalogBuilder::new_chained(base_catalog) .add::() .add::() .add::() + .add_value(predefined_accounts_config.clone()) + .build(); + + let catalog_anonymous = dill::CatalogBuilder::new_chained(&base_auth_catalog) + .add_value(CurrentAccountSubject::anonymous( + AnonymousAccountReason::NoAuthenticationProvided, + )) + .build(); + let catalog_authorized = dill::CatalogBuilder::new_chained(&base_auth_catalog) .add_value(current_account_subject) - .add_value(predefined_accounts_config) .build(); init_on_startup::run_startup_jobs(&catalog_authorized) diff --git a/src/adapter/http/tests/harness/client_side_harness.rs b/src/adapter/http/tests/harness/client_side_harness.rs index 442cee8a4..2719ffa35 100644 --- a/src/adapter/http/tests/harness/client_side_harness.rs +++ b/src/adapter/http/tests/harness/client_side_harness.rs @@ -52,7 +52,7 @@ pub(crate) struct ClientSideHarness { catalog: dill::Catalog, pull_dataset_use_case: Arc, push_dataset_use_case: Arc, - access_token_resover: Arc, + access_token_resolver: Arc, options: ClientSideHarnessOptions, } @@ -162,7 +162,7 @@ impl ClientSideHarness { let pull_dataset_use_case = catalog.get_one::().unwrap(); let push_dataset_use_case = catalog.get_one::().unwrap(); - let access_token_resover = catalog + let access_token_resolver = catalog .get_one::() .unwrap(); @@ -171,7 +171,7 @@ impl ClientSideHarness { catalog, pull_dataset_use_case, push_dataset_use_case, - access_token_resover, + access_token_resolver, options, } } @@ -341,7 +341,7 @@ impl ClientSideHarness { let mut ws_url = url.odf_to_transport_protocol().unwrap(); ws_url.ensure_trailing_slash(); let maybe_access_token = self - .access_token_resover + .access_token_resolver .resolve_odf_dataset_access_token(&ws_url); ws_url = ws_url.join(method).unwrap(); diff --git a/src/app/cli/src/app.rs b/src/app/cli/src/app.rs index 77d13a653..9555dae1f 100644 --- a/src/app/cli/src/app.rs +++ b/src/app/cli/src/app.rs @@ -535,9 +535,6 @@ pub fn configure_server_catalog(base_catalog: &Catalog) -> CatalogBuilder { b.add::(); - b.add::(); - b.add::(); - kamu_task_system_services::register_dependencies(&mut b); b.add_value(kamu_flow_system_inmem::domain::FlowAgentConfig::new( diff --git a/src/domain/core/src/services/mod.rs b/src/domain/core/src/services/mod.rs index 289e2c27c..734cd2137 100644 --- a/src/domain/core/src/services/mod.rs +++ b/src/domain/core/src/services/mod.rs @@ -23,7 +23,6 @@ pub use transform::*; pub use watermark::*; pub mod dataset_changes_service; -pub mod dataset_ownership_service; pub mod dataset_registry; pub mod dependency_graph_service; pub mod engine_provisioner; @@ -43,7 +42,6 @@ pub mod sync_service; pub mod verification_service; pub use dataset_changes_service::*; -pub use dataset_ownership_service::*; pub use dataset_registry::*; pub use dependency_graph_service::*; pub use engine_provisioner::*; diff --git a/src/domain/datasets/domain/src/services/dataset_entry_service.rs b/src/domain/datasets/domain/src/services/dataset_entry_service.rs index 0da7d1587..a142981c8 100644 --- a/src/domain/datasets/domain/src/services/dataset_entry_service.rs +++ b/src/domain/datasets/domain/src/services/dataset_entry_service.rs @@ -22,6 +22,8 @@ pub trait DatasetEntryService: Sync + Send { // TODO: Private Datasets: extract to DatasetEntryRegistry? fn all_entries(&self) -> DatasetEntryStream; + fn entries_owned_by(&self, owner_id: &odf::AccountID) -> DatasetEntryStream; + async fn list_all_entries( &self, pagination: PaginationOpts, diff --git a/src/domain/core/src/services/dataset_ownership_service.rs b/src/domain/datasets/domain/src/services/dataset_ownership_service.rs similarity index 69% rename from src/domain/core/src/services/dataset_ownership_service.rs rename to src/domain/datasets/domain/src/services/dataset_ownership_service.rs index 384b698c2..fe68361f7 100644 --- a/src/domain/core/src/services/dataset_ownership_service.rs +++ b/src/domain/datasets/domain/src/services/dataset_ownership_service.rs @@ -8,27 +8,26 @@ // by the Apache License, Version 2.0. use internal_error::InternalError; -use opendatafabric::{AccountID, DatasetID}; +use opendatafabric as odf; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// TODO: Private Datasets: replace with DatasetEntry-related service #[async_trait::async_trait] pub trait DatasetOwnershipService: Sync + Send { - async fn get_dataset_owners( + async fn get_dataset_owner( &self, - dataset_id: &DatasetID, - ) -> Result, InternalError>; + dataset_id: &odf::DatasetID, + ) -> Result; async fn get_owned_datasets( &self, - account_id: &AccountID, - ) -> Result, InternalError>; + account_id: &odf::AccountID, + ) -> Result, InternalError>; async fn is_dataset_owned_by( &self, - dataset_id: &DatasetID, - account_id: &AccountID, + dataset_id: &odf::DatasetID, + account_id: &odf::AccountID, ) -> Result; } diff --git a/src/domain/datasets/domain/src/services/mod.rs b/src/domain/datasets/domain/src/services/mod.rs index a0b3ec926..004333bdb 100644 --- a/src/domain/datasets/domain/src/services/mod.rs +++ b/src/domain/datasets/domain/src/services/mod.rs @@ -10,7 +10,9 @@ mod dataset_entry_service; mod dataset_env_var_service; mod dataset_key_value_service; +mod dataset_ownership_service; pub use dataset_entry_service::*; pub use dataset_env_var_service::*; pub use dataset_key_value_service::*; +pub use dataset_ownership_service::*; diff --git a/src/domain/datasets/services/src/dataset_entry_service_impl.rs b/src/domain/datasets/services/src/dataset_entry_service_impl.rs index b1cbe6e4e..a59b45c55 100644 --- a/src/domain/datasets/services/src/dataset_entry_service_impl.rs +++ b/src/domain/datasets/services/src/dataset_entry_service_impl.rs @@ -12,7 +12,7 @@ use std::sync::Arc; use database_common::{EntityPageListing, EntityPageStreamer, PaginationOpts}; use dill::{component, interface, meta, Catalog}; -use internal_error::{InternalError, ResultIntoInternal}; +use internal_error::{ErrorIntoInternal, InternalError, ResultIntoInternal}; use kamu_accounts::{AccountRepository, CurrentAccountSubject}; use kamu_core::{ DatasetHandleStream, @@ -68,6 +68,7 @@ struct AccountsCache { #[component(pub)] #[interface(dyn DatasetEntryService)] #[interface(dyn DatasetRegistry)] +#[interface(dyn DatasetOwnershipService)] #[interface(dyn MessageConsumer)] #[interface(dyn MessageConsumerT)] #[meta(MessageConsumerMeta { @@ -336,6 +337,19 @@ impl DatasetEntryService for DatasetEntryServiceImpl { ) } + fn entries_owned_by(&self, owner_id: &odf::AccountID) -> DatasetEntryStream { + let owner_id = owner_id.clone(); + + EntityPageStreamer::default().into_stream( + || async { Ok(Arc::new(owner_id)) }, + move |owner_id, pagination| async move { + self.list_entries_owned_by(&owner_id, pagination) + .await + .int_err() + }, + ) + } + async fn list_all_entries( &self, pagination: PaginationOpts, @@ -511,6 +525,57 @@ impl DatasetRegistry for DatasetEntryServiceImpl { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#[async_trait::async_trait] +impl DatasetOwnershipService for DatasetEntryServiceImpl { + async fn get_dataset_owner( + &self, + dataset_id: &odf::DatasetID, + ) -> Result { + let dataset_entry = self + .dataset_entry_repo + .get_dataset_entry(dataset_id) + .await + .int_err()?; + + Ok(dataset_entry.owner_id) + } + + async fn get_owned_datasets( + &self, + account_id: &odf::AccountID, + ) -> Result, InternalError> { + use futures::TryStreamExt; + + let owned_dataset_ids = self + .entries_owned_by(account_id) + .try_collect::>() + .await? + .into_iter() + .map(|dataset_entry| dataset_entry.id) + .collect::>(); + + Ok(owned_dataset_ids) + } + + async fn is_dataset_owned_by( + &self, + dataset_id: &odf::DatasetID, + account_id: &odf::AccountID, + ) -> Result { + let get_res = self.dataset_entry_repo.get_dataset_entry(dataset_id).await; + + match get_res { + Ok(dataset_entry) => Ok(dataset_entry.owner_id == *account_id), + Err(err) => match err { + GetDatasetEntryError::NotFound(_) => Ok(false), + unexpected_error => Err(unexpected_error.int_err()), + }, + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + impl MessageConsumer for DatasetEntryServiceImpl {} //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/datasets/services/tests/tests/test_dataset_entry_service.rs b/src/domain/datasets/services/tests/tests/test_dataset_entry_service.rs index 8e751cb3f..365996b22 100644 --- a/src/domain/datasets/services/tests/tests/test_dataset_entry_service.rs +++ b/src/domain/datasets/services/tests/tests/test_dataset_entry_service.rs @@ -10,7 +10,7 @@ use std::sync::{Arc, RwLock}; use chrono::{DateTime, TimeZone, Utc}; -use dill::{Catalog, CatalogBuilder, Component}; +use dill::{CatalogBuilder, Component}; use init_on_startup::InitOnStartup; use kamu::{DatasetRepositoryWriter, MockDatasetRepositoryWriter}; use kamu_accounts::{Account, AccountRepository, CurrentAccountSubject}; @@ -195,7 +195,6 @@ async fn test_indexes_datasets_correctly() { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// struct DatasetEntryServiceHarness { - _catalog: Catalog, outbox: Arc, dataset_entry_indexer: Arc, account_repo: Arc, @@ -252,7 +251,6 @@ impl DatasetEntryServiceHarness { outbox: catalog.get_one().unwrap(), dataset_entry_indexer: catalog.get_one().unwrap(), account_repo: catalog.get_one().unwrap(), - _catalog: catalog, } } diff --git a/src/domain/flow-system/services/Cargo.toml b/src/domain/flow-system/services/Cargo.toml index 8384e7a30..dc6a779eb 100644 --- a/src/domain/flow-system/services/Cargo.toml +++ b/src/domain/flow-system/services/Cargo.toml @@ -26,11 +26,12 @@ database-common = { workspace = true } database-common-macros = { workspace = true } init-on-startup = { workspace = true } internal-error = { workspace = true } -messaging-outbox = { workspace = true } kamu-accounts = { workspace = true } kamu-core = { workspace = true } +kamu-datasets = { workspace = true } kamu-flow-system = { workspace = true } kamu-task-system = { workspace = true } +messaging-outbox = { workspace = true } observability = { workspace = true, default-features = false } opendatafabric = { workspace = true } time-source = { workspace = true } diff --git a/src/domain/flow-system/services/src/flow/flow_agent_impl.rs b/src/domain/flow-system/services/src/flow/flow_agent_impl.rs index 42df2ecfb..454d00c96 100644 --- a/src/domain/flow-system/services/src/flow/flow_agent_impl.rs +++ b/src/domain/flow-system/services/src/flow/flow_agent_impl.rs @@ -567,7 +567,7 @@ impl MessageConsumerT for FlowAgentImpl { let finish_time = self.agent_config.round_time(message.event_time)?; // In case of success: - // - execute followup method + // - execute follow-up method if let Some(flow_result) = flow.try_result_as_ref() && !flow_result.is_empty() { diff --git a/src/domain/flow-system/services/src/flow/flow_query_service_impl.rs b/src/domain/flow-system/services/src/flow/flow_query_service_impl.rs index 7e25ce58c..53e820c12 100644 --- a/src/domain/flow-system/services/src/flow/flow_query_service_impl.rs +++ b/src/domain/flow-system/services/src/flow/flow_query_service_impl.rs @@ -15,7 +15,7 @@ use database_common::PaginationOpts; use dill::{component, interface, Catalog}; use futures::TryStreamExt; use internal_error::ResultIntoInternal; -use kamu_core::DatasetOwnershipService; +use kamu_datasets::DatasetOwnershipService; use kamu_flow_system::*; use opendatafabric::{AccountID, DatasetID}; diff --git a/src/domain/flow-system/services/src/flow/flow_scheduling_helper.rs b/src/domain/flow-system/services/src/flow/flow_scheduling_helper.rs index 001113c7b..f7b7713b6 100644 --- a/src/domain/flow-system/services/src/flow/flow_scheduling_helper.rs +++ b/src/domain/flow-system/services/src/flow/flow_scheduling_helper.rs @@ -12,7 +12,8 @@ use std::sync::Arc; use chrono::{DateTime, Utc}; use dill::component; use internal_error::InternalError; -use kamu_core::{DatasetChangesService, DatasetOwnershipService, DependencyGraphService}; +use kamu_core::{DatasetChangesService, DependencyGraphService}; +use kamu_datasets::DatasetOwnershipService; use kamu_flow_system::*; use messaging_outbox::{Outbox, OutboxExt}; use time_source::SystemTimeSource; @@ -221,35 +222,33 @@ impl FlowSchedulingHelper { } DownstreamDependencyTriggerType::TriggerOwnHardCompaction => { - let dataset_owner_account_ids = self + let owner_account_id = self .dataset_ownership_service - .get_dataset_owners(&fk_dataset.dataset_id) + .get_dataset_owner(&fk_dataset.dataset_id) .await?; for dependent_dataset_id in dependent_dataset_ids { - for owner_account_id in &dataset_owner_account_ids { - if self - .dataset_ownership_service - .is_dataset_owned_by(&dependent_dataset_id, owner_account_id) - .await? - { - plans.push(DownstreamDependencyFlowPlan { - flow_key: FlowKeyDataset::new( - dependent_dataset_id.clone(), - DatasetFlowType::HardCompaction, - ) - .into(), - flow_trigger_context: FlowTriggerContext::Unconditional, - // Currently we trigger Hard compaction recursively only in keep - // metadata only mode - maybe_config_snapshot: Some(FlowConfigurationSnapshot::Compaction( - CompactionRule::MetadataOnly(CompactionRuleMetadataOnly { - recursive: true, - }), - )), - }); - break; - } + let owned = self + .dataset_ownership_service + .is_dataset_owned_by(&dependent_dataset_id, &owner_account_id) + .await?; + + if owned { + plans.push(DownstreamDependencyFlowPlan { + flow_key: FlowKeyDataset::new( + dependent_dataset_id, + DatasetFlowType::HardCompaction, + ) + .into(), + flow_trigger_context: FlowTriggerContext::Unconditional, + // Currently we trigger Hard compaction recursively only in keep + // metadata only mode + maybe_config_snapshot: Some(FlowConfigurationSnapshot::Compaction( + CompactionRule::MetadataOnly(CompactionRuleMetadataOnly { + recursive: true, + }), + )), + }); } } } diff --git a/src/domain/flow-system/services/tests/tests/test_flow_agent_impl.rs b/src/domain/flow-system/services/tests/tests/test_flow_agent_impl.rs index 544defc29..c5cdfff7c 100644 --- a/src/domain/flow-system/services/tests/tests/test_flow_agent_impl.rs +++ b/src/domain/flow-system/services/tests/tests/test_flow_agent_impl.rs @@ -12,6 +12,7 @@ use std::str::FromStr; use chrono::{Duration, DurationRound, Utc}; use futures::TryStreamExt; use kamu::testing::MockDatasetChangesService; +use kamu_accounts::{AccountConfig, CurrentAccountSubject}; use kamu_core::*; use kamu_flow_system::*; use kamu_task_system::*; @@ -106,7 +107,6 @@ async fn test_read_initial_config_and_queue_without_waiting() { test_flow_listener.define_dataset_display_name(foo_id.clone(), "foo".to_string()); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -143,7 +143,8 @@ async fn test_read_initial_config_and_queue_without_waiting() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -262,7 +263,6 @@ async fn test_read_initial_config_shouldnt_queue_in_recovery_case() { test_flow_listener.define_dataset_display_name(foo_id.clone(), "foo".to_string()); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -283,7 +283,8 @@ async fn test_read_initial_config_shouldnt_queue_in_recovery_case() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -374,7 +375,6 @@ async fn test_cron_config() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -402,7 +402,8 @@ async fn test_cron_config() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -544,7 +545,6 @@ async fn test_manual_trigger() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -613,7 +613,8 @@ async fn test_manual_trigger() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -755,7 +756,6 @@ async fn test_ingest_trigger_with_ingest_config() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -824,7 +824,8 @@ async fn test_ingest_trigger_with_ingest_config() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -933,7 +934,6 @@ async fn test_manual_trigger_compaction() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -971,7 +971,8 @@ async fn test_manual_trigger_compaction() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -1071,7 +1072,6 @@ async fn test_manual_trigger_reset() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -1089,7 +1089,8 @@ async fn test_manual_trigger_reset() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -1256,14 +1257,12 @@ async fn test_reset_trigger_keep_metadata_compaction_for_derivatives() { harness.advance_time(Duration::milliseconds(300)).await; }; - // tokio::join!(trigger0_handle, task0_handle, main_handle) tokio::join!(trigger0_handle, task0_handle, task1_handle, task2_handle, main_handle) } => Ok(()) } .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -1325,7 +1324,8 @@ async fn test_reset_trigger_keep_metadata_compaction_for_derivatives() { Flow ID = 1 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -1406,7 +1406,6 @@ async fn test_manual_trigger_compaction_with_config() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -1424,7 +1423,8 @@ async fn test_manual_trigger_compaction_with_config() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -1526,7 +1526,7 @@ async fn test_full_hard_compaction_trigger_keep_metadata_compaction_for_derivati }); let task0_handle = task0_driver.run(); - // Task 1: "foo_bar" start running at 110ms, finish at 180sms + // Task 1: "foo_baz" start running at 110ms, finish at 180sms let task1_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(1), task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "1")]), @@ -1554,7 +1554,7 @@ async fn test_full_hard_compaction_trigger_keep_metadata_compaction_for_derivati }); let task1_handle = task1_driver.run(); - // Task 2: "foo_bar_baz" start running at 200ms, finish at 240ms + // Task 2: "foo_bar" start running at 200ms, finish at 240ms let task2_driver = harness.task_driver(TaskDriverArgs { task_id: TaskID::new(2), task_metadata: TaskMetadata::from(vec![(METADATA_TASK_FLOW_ID, "2")]), @@ -1593,7 +1593,6 @@ async fn test_full_hard_compaction_trigger_keep_metadata_compaction_for_derivati .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -1655,7 +1654,8 @@ async fn test_full_hard_compaction_trigger_keep_metadata_compaction_for_derivati Flow ID = 1 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()), ); } @@ -1821,7 +1821,6 @@ async fn test_manual_trigger_keep_metadata_only_with_recursive_compaction() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -1885,7 +1884,8 @@ async fn test_manual_trigger_keep_metadata_only_with_recursive_compaction() { Flow ID = 2 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -1995,7 +1995,6 @@ async fn test_manual_trigger_keep_metadata_only_without_recursive_compaction() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -2013,7 +2012,8 @@ async fn test_manual_trigger_keep_metadata_only_without_recursive_compaction() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -2021,40 +2021,50 @@ async fn test_manual_trigger_keep_metadata_only_without_recursive_compaction() { #[test_log::test(tokio::test)] async fn test_manual_trigger_keep_metadata_only_compaction_multiple_accounts() { - let wasya_account_name = AccountName::new_unchecked("wasya"); - let petya_account_name = AccountName::new_unchecked("petya"); + let wasya = AccountConfig::from_name(AccountName::new_unchecked("wasya")); + let petya = AccountConfig::from_name(AccountName::new_unchecked("petya")); + + let subject_wasya = + CurrentAccountSubject::logged(wasya.get_id(), wasya.account_name.clone(), false); + let subject_petya = + CurrentAccountSubject::logged(petya.get_id(), petya.account_name.clone(), false); let harness = FlowHarness::with_overrides(FlowHarnessOverrides { tenancy_config: TenancyConfig::MultiTenant, - custom_account_names: vec![wasya_account_name.clone(), petya_account_name.clone()], + predefined_accounts: vec![wasya, petya], ..Default::default() }) .await; let foo_create_result = harness - .create_root_dataset(DatasetAlias { - dataset_name: DatasetName::new_unchecked("foo"), - account_name: Some(wasya_account_name.clone()), - }) + .create_root_dataset_using_subject( + DatasetAlias { + dataset_name: DatasetName::new_unchecked("foo"), + account_name: Some(subject_wasya.account_name().clone()), + }, + subject_wasya.clone(), + ) .await; let foo_id = foo_create_result.dataset_handle.id; let foo_bar_id = harness - .create_derived_dataset( + .create_derived_dataset_using_subject( DatasetAlias { dataset_name: DatasetName::new_unchecked("foo.bar"), - account_name: Some(wasya_account_name.clone()), + account_name: Some(subject_wasya.account_name().clone()), }, vec![foo_id.clone()], + subject_wasya, ) .await; let foo_baz_id = harness - .create_derived_dataset( + .create_derived_dataset_using_subject( DatasetAlias { dataset_name: DatasetName::new_unchecked("foo.baz"), - account_name: Some(petya_account_name.clone()), + account_name: Some(subject_petya.account_name().clone()), }, vec![foo_id.clone()], + subject_petya, ) .await; @@ -2148,7 +2158,6 @@ async fn test_manual_trigger_keep_metadata_only_compaction_multiple_accounts() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -2186,7 +2195,8 @@ async fn test_manual_trigger_keep_metadata_only_compaction_multiple_accounts() { Flow ID = 1 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -2324,7 +2334,6 @@ async fn test_dataset_flow_configuration_paused_resumed_modified() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -2414,7 +2423,8 @@ async fn test_dataset_flow_configuration_paused_resumed_modified() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -2555,7 +2565,6 @@ async fn test_respect_last_success_time_when_schedule_resumes() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -2645,7 +2654,8 @@ async fn test_respect_last_success_time_when_schedule_resumes() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -2763,7 +2773,6 @@ async fn test_dataset_deleted() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -2831,7 +2840,8 @@ async fn test_dataset_deleted() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -2964,7 +2974,6 @@ async fn test_task_completions_trigger_next_loop_on_success() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -3046,7 +3055,8 @@ async fn test_task_completions_trigger_next_loop_on_success() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -3201,7 +3211,6 @@ async fn test_derived_dataset_triggered_initially_and_after_input_change() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -3303,7 +3312,8 @@ async fn test_derived_dataset_triggered_initially_and_after_input_change() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -3401,7 +3411,6 @@ async fn test_throttling_manual_triggers() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -3429,7 +3438,8 @@ async fn test_throttling_manual_triggers() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -3677,7 +3687,6 @@ async fn test_throttling_derived_dataset_with_2_parents() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -3901,7 +3910,8 @@ async fn test_throttling_derived_dataset_with_2_parents() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -4101,7 +4111,6 @@ async fn test_batching_condition_records_reached() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -4235,7 +4244,8 @@ async fn test_batching_condition_records_reached() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -4407,7 +4417,6 @@ async fn test_batching_condition_timeout() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -4509,7 +4518,8 @@ async fn test_batching_condition_timeout() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -4681,7 +4691,6 @@ async fn test_batching_condition_watermark() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -4783,7 +4792,8 @@ async fn test_batching_condition_watermark() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -5080,7 +5090,6 @@ async fn test_batching_condition_with_2_inputs() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -5335,7 +5344,8 @@ async fn test_batching_condition_with_2_inputs() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -5343,42 +5353,51 @@ async fn test_batching_condition_with_2_inputs() { #[test_log::test(tokio::test)] async fn test_list_all_flow_initiators() { - let foo_account_name = AccountName::new_unchecked("foo"); - let bar_account_name = AccountName::new_unchecked("bar"); + let foo = AccountConfig::from_name(AccountName::new_unchecked("foo")); + let bar = AccountConfig::from_name(AccountName::new_unchecked("bar")); + + let subject_foo = CurrentAccountSubject::logged(foo.get_id(), foo.account_name.clone(), false); + let subject_bar = CurrentAccountSubject::logged(bar.get_id(), bar.account_name.clone(), false); let harness = FlowHarness::with_overrides(FlowHarnessOverrides { - custom_account_names: vec![foo_account_name.clone(), bar_account_name.clone()], + predefined_accounts: vec![foo, bar], tenancy_config: TenancyConfig::MultiTenant, ..Default::default() }) .await; let foo_create_result = harness - .create_root_dataset(DatasetAlias { - dataset_name: DatasetName::new_unchecked("foo"), - account_name: Some(foo_account_name.clone()), - }) + .create_root_dataset_using_subject( + DatasetAlias { + dataset_name: DatasetName::new_unchecked("foo"), + account_name: Some(subject_foo.account_name().clone()), + }, + subject_foo.clone(), + ) .await; let foo_id = foo_create_result.dataset_handle.id; let foo_account_id = harness .auth_svc - .find_account_id_by_name(&foo_account_name) + .find_account_id_by_name(subject_foo.account_name()) .await .unwrap() .unwrap(); let bar_account_id = harness .auth_svc - .find_account_id_by_name(&bar_account_name) + .find_account_id_by_name(subject_bar.account_name()) .await .unwrap() .unwrap(); let bar_create_result = harness - .create_root_dataset(DatasetAlias { - dataset_name: DatasetName::new_unchecked("bar"), - account_name: Some(bar_account_name.clone()), - }) + .create_root_dataset_using_subject( + DatasetAlias { + dataset_name: DatasetName::new_unchecked("bar"), + account_name: Some(subject_bar.account_name().clone()), + }, + subject_bar, + ) .await; let bar_id = bar_create_result.dataset_handle.id; @@ -5474,7 +5493,7 @@ async fn test_list_all_flow_initiators() { .await .unwrap(); - assert_eq!(foo_dataset_initiators_list, [foo_account_id.clone()]); + pretty_assertions::assert_eq!([foo_account_id.clone()], *foo_dataset_initiators_list); let bar_dataset_initiators_list: Vec<_> = harness .flow_query_service @@ -5486,59 +5505,69 @@ async fn test_list_all_flow_initiators() { .await .unwrap(); - assert_eq!(bar_dataset_initiators_list, [bar_account_id.clone()]); + pretty_assertions::assert_eq!([bar_account_id.clone()], *bar_dataset_initiators_list); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #[test_log::test(tokio::test)] async fn test_list_all_datasets_with_flow() { - let foo_account_name = AccountName::new_unchecked("foo"); - let bar_account_name = AccountName::new_unchecked("bar"); + let foo = AccountConfig::from_name(AccountName::new_unchecked("foo")); + let bar = AccountConfig::from_name(AccountName::new_unchecked("bar")); + + let subject_foo = CurrentAccountSubject::logged(foo.get_id(), foo.account_name.clone(), false); + let subject_bar = CurrentAccountSubject::logged(bar.get_id(), bar.account_name.clone(), false); let harness = FlowHarness::with_overrides(FlowHarnessOverrides { - custom_account_names: vec![foo_account_name.clone(), bar_account_name.clone()], + predefined_accounts: vec![foo, bar], tenancy_config: TenancyConfig::MultiTenant, ..Default::default() }) .await; let foo_create_result = harness - .create_root_dataset(DatasetAlias { - dataset_name: DatasetName::new_unchecked("foo"), - account_name: Some(foo_account_name.clone()), - }) + .create_root_dataset_using_subject( + DatasetAlias { + dataset_name: DatasetName::new_unchecked("foo"), + account_name: Some(subject_foo.account_name().clone()), + }, + subject_foo.clone(), + ) .await; let foo_id = foo_create_result.dataset_handle.id; let _foo_bar_id = harness - .create_derived_dataset( + .create_derived_dataset_using_subject( DatasetAlias { dataset_name: DatasetName::new_unchecked("foo.bar"), - account_name: Some(foo_account_name.clone()), + account_name: Some(subject_foo.account_name().clone()), }, vec![foo_id.clone()], + subject_foo.clone(), ) .await; let foo_account_id = harness .auth_svc - .find_account_id_by_name(&foo_account_name) + .find_account_id_by_name(subject_foo.account_name()) .await .unwrap() .unwrap(); let bar_account_id = harness .auth_svc - .find_account_id_by_name(&bar_account_name) + .find_account_id_by_name(subject_bar.account_name()) .await .unwrap() .unwrap(); let bar_create_result = harness - .create_root_dataset(DatasetAlias { - dataset_name: DatasetName::new_unchecked("bar"), - account_name: Some(bar_account_name.clone()), - }) + .create_root_dataset_using_subject( + DatasetAlias { + dataset_name: DatasetName::new_unchecked("bar"), + account_name: Some(subject_bar.account_name().clone()), + }, + subject_bar, + ) .await; let bar_id = bar_create_result.dataset_handle.id; @@ -5634,7 +5663,7 @@ async fn test_list_all_datasets_with_flow() { .await .unwrap(); - assert_eq!(foo_dataset_initiators_list, [foo_account_id.clone()]); + pretty_assertions::assert_eq!([foo_account_id.clone()], *foo_dataset_initiators_list); let bar_dataset_initiators_list: Vec<_> = harness .flow_query_service @@ -5646,7 +5675,7 @@ async fn test_list_all_datasets_with_flow() { .await .unwrap(); - assert_eq!(bar_dataset_initiators_list, [bar_account_id.clone()]); + pretty_assertions::assert_eq!([bar_account_id.clone()], *bar_dataset_initiators_list); let all_datasets_with_flow: Vec<_> = harness .flow_query_service @@ -5658,7 +5687,7 @@ async fn test_list_all_datasets_with_flow() { .await .unwrap(); - assert_eq!(all_datasets_with_flow, [foo_id]); + pretty_assertions::assert_eq!([foo_id], *all_datasets_with_flow); let all_datasets_with_flow: Vec<_> = harness .flow_query_service @@ -5670,7 +5699,7 @@ async fn test_list_all_datasets_with_flow() { .await .unwrap(); - assert_eq!(all_datasets_with_flow, [bar_id]); + pretty_assertions::assert_eq!([bar_id], *all_datasets_with_flow); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -5739,7 +5768,6 @@ async fn test_abort_flow_before_scheduling_tasks() { test_flow_listener.define_dataset_display_name(foo_id.clone(), "foo".to_string()); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -5765,7 +5793,8 @@ async fn test_abort_flow_before_scheduling_tasks() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -5835,7 +5864,6 @@ async fn test_abort_flow_after_scheduling_still_waiting_for_executor() { test_flow_listener.define_dataset_display_name(foo_id.clone(), "foo".to_string()); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -5866,7 +5894,8 @@ async fn test_abort_flow_after_scheduling_still_waiting_for_executor() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -5936,7 +5965,6 @@ async fn test_abort_flow_after_task_running_has_started() { test_flow_listener.define_dataset_display_name(foo_id.clone(), "foo".to_string()); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -5956,7 +5984,8 @@ async fn test_abort_flow_after_task_running_has_started() { Flow ID = 0 Finished Aborted "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -6040,7 +6069,6 @@ async fn test_abort_flow_after_task_finishes() { test_flow_listener.define_dataset_display_name(foo_id.clone(), "foo".to_string()); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -6077,7 +6105,8 @@ async fn test_abort_flow_after_task_finishes() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } @@ -6177,7 +6206,7 @@ async fn test_respect_last_success_time_when_activate_configuration() { // Main simulation script let main_handle = async { - // Initially both "foo" isscheduled without waiting. + // Initially both "foo" and "bar are scheduled without waiting. // "foo": // - flow 0: task 0 starts at 10ms, finishes at 20ms // - next flow 2 queued for 120ms (20ms initiated + 100ms period) @@ -6217,7 +6246,6 @@ async fn test_respect_last_success_time_when_activate_configuration() { .unwrap(); pretty_assertions::assert_eq!( - format!("{}", test_flow_listener.as_ref()), indoc::indoc!( r#" #0: +0ms: @@ -6285,7 +6313,8 @@ async fn test_respect_last_success_time_when_activate_configuration() { Flow ID = 0 Finished Success "# - ) + ), + format!("{}", test_flow_listener.as_ref()) ); } diff --git a/src/domain/flow-system/services/tests/tests/utils/flow_harness_shared.rs b/src/domain/flow-system/services/tests/tests/utils/flow_harness_shared.rs index 589f37e17..45b3c40b8 100644 --- a/src/domain/flow-system/services/tests/tests/utils/flow_harness_shared.rs +++ b/src/domain/flow-system/services/tests/tests/utils/flow_harness_shared.rs @@ -29,8 +29,8 @@ use kamu_accounts_services::{ PredefinedAccountsRegistrator, }; use kamu_core::*; -use kamu_datasets_inmem::InMemoryDatasetDependencyRepository; -use kamu_datasets_services::DependencyGraphServiceImpl; +use kamu_datasets_inmem::{InMemoryDatasetDependencyRepository, InMemoryDatasetEntryRepository}; +use kamu_datasets_services::{DatasetEntryServiceImpl, DependencyGraphServiceImpl}; use kamu_flow_system::*; use kamu_flow_system_inmem::*; use kamu_flow_system_services::*; @@ -61,7 +61,11 @@ pub(crate) const SCHEDULING_MANDATORY_THROTTLING_PERIOD_MS: i64 = SCHEDULING_ALI pub(crate) struct FlowHarness { _tmp_dir: tempfile::TempDir, - pub catalog: dill::Catalog, + catalog_without_subject: Catalog, + delete_dataset_use_case: Arc, + create_dataset_from_snapshot_use_case: Arc, + + pub catalog: Catalog, pub flow_configuration_service: Arc, pub flow_configuration_event_store: Arc, pub flow_agent: Arc, @@ -76,7 +80,7 @@ pub(crate) struct FlowHarnessOverrides { pub awaiting_step: Option, pub mandatory_throttling_period: Option, pub mock_dataset_changes: Option, - pub custom_account_names: Vec, + pub predefined_accounts: Vec, pub tenancy_config: TenancyConfig, } @@ -91,19 +95,17 @@ impl FlowHarness { std::fs::create_dir(&datasets_dir).unwrap(); let accounts_catalog = { - let predefined_accounts_config = if overrides.custom_account_names.is_empty() { + let predefined_accounts_config = if overrides.predefined_accounts.is_empty() { PredefinedAccountsConfig::single_tenant() } else { let mut predefined_accounts_config = PredefinedAccountsConfig::new(); - for account_name in overrides.custom_account_names { - predefined_accounts_config - .predefined - .push(AccountConfig::from_name(account_name)); + for account in overrides.predefined_accounts { + predefined_accounts_config.predefined.push(account); } predefined_accounts_config }; - let mut b = dill::CatalogBuilder::new(); + let mut b = CatalogBuilder::new(); b.add_value(predefined_accounts_config) .add::() .add::() @@ -134,8 +136,8 @@ impl FlowHarness { let mock_dataset_changes = overrides.mock_dataset_changes.unwrap_or_default(); - let catalog = { - let mut b = dill::CatalogBuilder::new_chained(&accounts_catalog); + let catalog_without_subject = { + let mut b = CatalogBuilder::new_chained(&accounts_catalog); b.add_builder( messaging_outbox::OutboxImmediateImpl::builder() @@ -155,10 +157,8 @@ impl FlowHarness { .add_builder(DatasetRepositoryLocalFs::builder().with_root(datasets_dir)) .bind::() .bind::() - .add::() .add_value(mock_dataset_changes) .bind::() - .add_value(CurrentAccountSubject::new_test()) .add::() .add::() .add::() @@ -168,11 +168,11 @@ impl FlowHarness { .add::() .add::() .add::() - .add::() + .add::() + .add::() .add::() .add::() - .add::() - .add::(); + .add::(); kamu_flow_system_services::register_dependencies(&mut b); @@ -200,35 +200,39 @@ impl FlowHarness { b.build() }; - let flow_agent = catalog.get_one::().unwrap(); - let flow_query_service = catalog.get_one::().unwrap(); - let flow_configuration_service = catalog.get_one::().unwrap(); - let flow_configuration_event_store = catalog - .get_one::() - .unwrap(); - let flow_event_store = catalog.get_one::().unwrap(); - let auth_svc = catalog.get_one::().unwrap(); + let catalog = CatalogBuilder::new_chained(&catalog_without_subject) + .add_value(CurrentAccountSubject::new_test()) + .build(); Self { _tmp_dir: tmp_dir, - catalog, - flow_agent, - flow_query_service, - flow_configuration_service, - flow_configuration_event_store, - flow_event_store, + flow_configuration_service: catalog.get_one().unwrap(), + flow_configuration_event_store: catalog.get_one().unwrap(), + flow_agent: catalog.get_one().unwrap(), + flow_query_service: catalog.get_one().unwrap(), + flow_event_store: catalog.get_one().unwrap(), + auth_svc: catalog.get_one().unwrap(), fake_system_time_source, - auth_svc, + delete_dataset_use_case: catalog.get_one().unwrap(), + create_dataset_from_snapshot_use_case: catalog.get_one().unwrap(), + catalog, + catalog_without_subject, } } - pub async fn create_root_dataset(&self, dataset_alias: DatasetAlias) -> CreateDatasetResult { - let create_dataset_from_snapshot = self - .catalog + pub async fn create_root_dataset_using_subject( + &self, + dataset_alias: DatasetAlias, + subject: CurrentAccountSubject, + ) -> CreateDatasetResult { + let subject_catalog = CatalogBuilder::new_chained(&self.catalog_without_subject) + .add_value(subject) + .build(); + let create_dataset_from_snapshot_use_case = subject_catalog .get_one::() .unwrap(); - create_dataset_from_snapshot + create_dataset_from_snapshot_use_case .execute( MetadataFactory::dataset_snapshot() .name(dataset_alias) @@ -241,17 +245,34 @@ impl FlowHarness { .unwrap() } - pub async fn create_derived_dataset( + pub async fn create_root_dataset(&self, dataset_alias: DatasetAlias) -> CreateDatasetResult { + self.create_dataset_from_snapshot_use_case + .execute( + MetadataFactory::dataset_snapshot() + .name(dataset_alias) + .kind(DatasetKind::Root) + .push_event(MetadataFactory::set_polling_source().build()) + .build(), + Default::default(), + ) + .await + .unwrap() + } + + pub async fn create_derived_dataset_using_subject( &self, dataset_alias: DatasetAlias, input_ids: Vec, + subject: CurrentAccountSubject, ) -> DatasetID { - let create_dataset_from_snapshot = self - .catalog + let subject_catalog = CatalogBuilder::new_chained(&self.catalog_without_subject) + .add_value(subject) + .build(); + let create_dataset_from_snapshot_use_case = subject_catalog .get_one::() .unwrap(); - let create_result = create_dataset_from_snapshot + let create_result = create_dataset_from_snapshot_use_case .execute( MetadataFactory::dataset_snapshot() .name(dataset_alias) @@ -270,24 +291,39 @@ impl FlowHarness { create_result.dataset_handle.id } - pub async fn eager_initialization(&self) { - use init_on_startup::InitOnStartup; - let dataset_ownership_initializer = self - .catalog - .get_one::() - .unwrap(); - dataset_ownership_initializer - .run_initialization() + pub async fn create_derived_dataset( + &self, + dataset_alias: DatasetAlias, + input_ids: Vec, + ) -> DatasetID { + let create_result = self + .create_dataset_from_snapshot_use_case + .execute( + MetadataFactory::dataset_snapshot() + .name(dataset_alias) + .kind(DatasetKind::Derivative) + .push_event( + MetadataFactory::set_transform() + .inputs_from_refs(input_ids) + .build(), + ) + .build(), + Default::default(), + ) .await .unwrap(); + create_result.dataset_handle.id + } + + pub async fn eager_initialization(&self) { + use init_on_startup::InitOnStartup; + self.flow_agent.run_initialization().await.unwrap(); } pub async fn delete_dataset(&self, dataset_id: &DatasetID) { - // Do the actual deletion - let delete_dataset = self.catalog.get_one::().unwrap(); - delete_dataset + self.delete_dataset_use_case .execute_via_ref(&(dataset_id.as_local_ref())) .await .unwrap(); @@ -457,3 +493,5 @@ impl FlowHarness { } } } + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/flow-system/services/tests/tests/utils/task_driver.rs b/src/domain/flow-system/services/tests/tests/utils/task_driver.rs index fbcd290ac..061cc3f67 100644 --- a/src/domain/flow-system/services/tests/tests/utils/task_driver.rs +++ b/src/domain/flow-system/services/tests/tests/utils/task_driver.rs @@ -25,6 +25,7 @@ pub(crate) struct TaskDriver { args: TaskDriverArgs, } +#[derive(Debug)] pub(crate) struct TaskDriverArgs { pub(crate) task_id: TaskID, pub(crate) task_metadata: TaskMetadata, @@ -53,14 +54,14 @@ impl TaskDriver { let start_time = self.time_source.now(); self.time_source.sleep(self.args.run_since_start).await; - while !(self.task_exists().await) { + while !self.task_exists().await { yield_now().await; } self.ensure_task_matches_logical_plan().await; - // Note: we can omit transaction, since this is a test-only abstraction - // with assumed immediate delivery + // Note: We can omit transaction, since this is a test-only abstraction + // with assumed immediate delivery self.outbox .post_message( MESSAGE_PRODUCER_KAMU_TASK_AGENT, @@ -76,8 +77,8 @@ impl TaskDriver { if let Some((finish_in, with_outcome)) = self.args.finish_in_with { self.time_source.sleep(finish_in).await; - // Note: we can omit transaction, since this is a test-only abstraction - // with assummed immediate delivery + // Note: We can omit transaction, since this is a test-only abstraction + // with assumed immediate delivery self.outbox .post_message( MESSAGE_PRODUCER_KAMU_TASK_AGENT, @@ -105,11 +106,15 @@ impl TaskDriver { .await .expect("Task does not exist yet"); - assert_eq!(self.args.expected_logical_plan, task.logical_plan); + pretty_assertions::assert_eq!(self.args.expected_logical_plan, task.logical_plan); + match &task.logical_plan { LogicalPlan::UpdateDataset(ud) => { assert!(self.args.dataset_id.is_some()); - assert_eq!(&ud.dataset_id, self.args.dataset_id.as_ref().unwrap()); + pretty_assertions::assert_eq!( + self.args.dataset_id.as_ref().unwrap(), + &ud.dataset_id, + ); } LogicalPlan::Probe(_) => assert!(self.args.dataset_id.is_none()), LogicalPlan::HardCompactDataset(_) | LogicalPlan::ResetDataset(_) => (), diff --git a/src/infra/core/Cargo.toml b/src/infra/core/Cargo.toml index 3f37b84e1..afe5b9a84 100644 --- a/src/infra/core/Cargo.toml +++ b/src/infra/core/Cargo.toml @@ -28,7 +28,10 @@ ingest-evm = ["dep:alloy", "dep:datafusion-ethers"] ingest-ftp = ["dep:curl", "dep:curl-sys"] ingest-mqtt = ["dep:rumqttc"] query-extensions-json = ["dep:datafusion-functions-json"] -testing = ["dep:mockall", "kamu-data-utils/testing"] +testing = [ + "dep:mockall", + "kamu-data-utils/testing", +] [dependencies] @@ -145,10 +148,7 @@ libc = "0.2" # For getting uid:gid [dev-dependencies] -database-common = { workspace = true } kamu = { workspace = true, features = ["testing"] } -kamu-accounts-inmem = { workspace = true } -kamu-accounts-services = { workspace = true } kamu-data-utils = { workspace = true, features = ["testing"] } kamu-datasets-services = { workspace = true } kamu-datasets-inmem = { workspace = true } diff --git a/src/infra/core/src/services/dataset_ownership_service_inmem.rs b/src/infra/core/src/services/dataset_ownership_service_inmem.rs deleted file mode 100644 index be6a805c6..000000000 --- a/src/infra/core/src/services/dataset_ownership_service_inmem.rs +++ /dev/null @@ -1,300 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use std::collections::{HashMap, HashSet}; -use std::sync::Arc; - -use dill::*; -use init_on_startup::{InitOnStartup, InitOnStartupMeta}; -use internal_error::InternalError; -use kamu_accounts::{AuthenticationService, CurrentAccountSubject}; -use kamu_core::*; -use messaging_outbox::{ - MessageConsumer, - MessageConsumerMeta, - MessageConsumerT, - MessageDeliveryMechanism, -}; -use opendatafabric::{AccountID, AccountName, DatasetID}; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -pub struct DatasetOwnershipServiceInMemory { - state: Arc>, -} - -#[derive(Default)] -struct State { - dataset_ids_by_account_id: HashMap>, - account_ids_by_dataset_id: HashMap>, - initially_scanned: bool, -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[component(pub)] -#[interface(dyn DatasetOwnershipService)] -#[interface(dyn MessageConsumer)] -#[interface(dyn MessageConsumerT)] -#[meta(MessageConsumerMeta { - consumer_name: MESSAGE_CONSUMER_KAMU_CORE_DATASET_OWNERSHIP_SERVICE, - feeding_producers: &[MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE], - delivery: MessageDeliveryMechanism::Immediate, -})] -#[scope(Singleton)] -impl DatasetOwnershipServiceInMemory { - pub fn new() -> Self { - Self { - state: Default::default(), - } - } - - fn insert_dataset_record( - &self, - state: &mut State, - dataset_id: &DatasetID, - owner_account_id: &AccountID, - ) { - state - .account_ids_by_dataset_id - .insert(dataset_id.clone(), vec![owner_account_id.clone()]); - - state - .dataset_ids_by_account_id - .entry(owner_account_id.clone()) - .and_modify(|e| { - e.insert(dataset_id.clone()); - }) - .or_insert_with(|| { - let mut dataset_ids = HashSet::new(); - dataset_ids.insert(dataset_id.clone()); - dataset_ids - }); - } - - async fn check_has_initialized(&self) -> Result<(), InternalError> { - let has_initially_scanned = self.state.read().await.initially_scanned; - - if has_initially_scanned { - Ok(()) - } else { - InternalError::bail("The service was not previously initialized!") - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[async_trait::async_trait] -impl DatasetOwnershipService for DatasetOwnershipServiceInMemory { - #[tracing::instrument(level = "debug", skip_all, fields(%dataset_id))] - async fn get_dataset_owners( - &self, - dataset_id: &DatasetID, - ) -> Result, InternalError> { - self.check_has_initialized().await?; - - let guard = self.state.read().await; - let maybe_account_ids = guard.account_ids_by_dataset_id.get(dataset_id); - if let Some(account_ids) = maybe_account_ids { - Ok(account_ids.clone()) - } else { - Ok(vec![]) - } - } - - #[tracing::instrument(level = "debug", skip_all, fields(%account_id))] - async fn get_owned_datasets( - &self, - account_id: &AccountID, - ) -> Result, InternalError> { - self.check_has_initialized().await?; - - let guard = self.state.read().await; - let maybe_dataset_ids = guard.dataset_ids_by_account_id.get(account_id); - if let Some(dataset_ids) = maybe_dataset_ids { - Ok(dataset_ids.iter().cloned().collect::>()) - } else { - Ok(vec![]) - } - } - - #[tracing::instrument(level = "debug", skip_all, fields(%dataset_id, %account_id))] - async fn is_dataset_owned_by( - &self, - dataset_id: &DatasetID, - account_id: &AccountID, - ) -> Result { - self.check_has_initialized().await?; - - let guard = self.state.read().await; - - let maybe_account_ids = guard.account_ids_by_dataset_id.get(dataset_id); - if let Some(account_ids) = maybe_account_ids { - Ok(account_ids.contains(account_id)) - } else { - Ok(false) - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -impl MessageConsumer for DatasetOwnershipServiceInMemory {} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[async_trait::async_trait] -impl MessageConsumerT for DatasetOwnershipServiceInMemory { - #[tracing::instrument( - level = "debug", - skip_all, - name = "DatasetOwnershipServiceInMemory[DatasetLifecycleMessage]" - )] - async fn consume_message( - &self, - _: &Catalog, - message: &DatasetLifecycleMessage, - ) -> Result<(), InternalError> { - tracing::debug!(received_message = ?message, "Received dataset lifecycle message"); - - match message { - DatasetLifecycleMessage::Created(message) => { - let mut guard = self.state.write().await; - self.insert_dataset_record( - &mut guard, - &message.dataset_id, - &message.owner_account_id, - ); - } - DatasetLifecycleMessage::Deleted(message) => { - let account_ids = self.get_dataset_owners(&message.dataset_id).await?; - if !account_ids.is_empty() { - let mut guard = self.state.write().await; - for account_id in account_ids { - if let Some(dataset_ids) = - guard.dataset_ids_by_account_id.get_mut(&account_id) - { - dataset_ids.remove(&message.dataset_id); - } - } - guard.account_ids_by_dataset_id.remove(&message.dataset_id); - } - } - DatasetLifecycleMessage::DependenciesUpdated(_) - | DatasetLifecycleMessage::Renamed(_) => { - // No action required - } - } - - Ok(()) - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Initializer -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -pub struct DatasetOwnershipServiceInMemoryStateInitializer { - current_account_subject: Arc, - dataset_registry: Arc, - authentication_service: Arc, - dataset_ownership_service: Arc, -} - -#[component(pub)] -#[interface(dyn InitOnStartup)] -#[meta(InitOnStartupMeta { - job_name: JOB_KAMU_CORE_DATASET_OWNERSHIP_INITIALIZER, - depends_on: &[kamu_accounts::JOB_KAMU_ACCOUNTS_PREDEFINED_ACCOUNTS_REGISTRATOR], - requires_transaction: true -})] -impl DatasetOwnershipServiceInMemoryStateInitializer { - pub fn new( - current_account_subject: Arc, - dataset_registry: Arc, - authentication_service: Arc, - dataset_ownership_service: Arc, - ) -> Self { - Self { - current_account_subject, - dataset_registry, - authentication_service, - dataset_ownership_service, - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[async_trait::async_trait] -impl InitOnStartup for DatasetOwnershipServiceInMemoryStateInitializer { - #[tracing::instrument( - level = "debug", - skip_all, - name = "DatasetOwnershipServiceInMemoryStateInitializer::run_initialization" - )] - async fn run_initialization(&self) -> Result<(), InternalError> { - let mut guard = self.dataset_ownership_service.state.write().await; - if guard.initially_scanned { - tracing::warn!("The service has already initialized"); - - return Ok(()); - } - - use futures::StreamExt; - - tracing::debug!("Initializing dataset ownership data started"); - - let mut account_ids_by_name: HashMap = HashMap::new(); - - let mut datasets_stream = self.dataset_registry.all_dataset_handles(); - while let Some(Ok(dataset_handle)) = datasets_stream.next().await { - let account_name = match dataset_handle.alias.account_name { - Some(account_name) => account_name, - None => match self.current_account_subject.as_ref() { - CurrentAccountSubject::Anonymous(_) => { - panic!("Initializing dataset ownership without authorization") - } - CurrentAccountSubject::Logged(l) => l.account_name.clone(), - }, - }; - - let maybe_account_id = if let Some(account_id) = account_ids_by_name.get(&account_name) - { - Some(account_id.clone()) - } else { - let maybe_account_id = self - .authentication_service - .find_account_id_by_name(&account_name) - .await?; - if let Some(account_id) = maybe_account_id { - account_ids_by_name.insert(account_name.clone(), account_id.clone()); - Some(account_id) - } else { - None - } - }; - - if let Some(account_id) = maybe_account_id { - self.dataset_ownership_service.insert_dataset_record( - &mut guard, - &dataset_handle.id, - &account_id, - ); - } - } - - guard.initially_scanned = true; - - Ok(()) - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/services/mod.rs b/src/infra/core/src/services/mod.rs index e98a3f0f0..046649217 100644 --- a/src/infra/core/src/services/mod.rs +++ b/src/infra/core/src/services/mod.rs @@ -25,7 +25,6 @@ pub use transform::*; pub use watermark::*; mod dataset_changes_service_impl; -mod dataset_ownership_service_inmem; mod dataset_registry_repo_bridge; mod metadata_query_service_impl; mod provenance_service_impl; @@ -36,7 +35,6 @@ mod query_service_impl; mod verification_service_impl; pub use dataset_changes_service_impl::*; -pub use dataset_ownership_service_inmem::*; pub use dataset_registry_repo_bridge::*; pub use metadata_query_service_impl::*; pub use provenance_service_impl::*; diff --git a/src/infra/core/src/testing/base_repo_harness.rs b/src/infra/core/src/testing/base_repo_harness.rs index 0b80f3505..0320b42fa 100644 --- a/src/infra/core/src/testing/base_repo_harness.rs +++ b/src/infra/core/src/testing/base_repo_harness.rs @@ -61,14 +61,11 @@ impl BaseRepoHarness { .add::() .build(); - let dataset_registry = catalog.get_one().unwrap(); - let dataset_repo_writer = catalog.get_one().unwrap(); - Self { temp_dir, + dataset_registry: catalog.get_one().unwrap(), + dataset_repo_writer: catalog.get_one().unwrap(), catalog, - dataset_registry, - dataset_repo_writer, } } diff --git a/src/infra/core/src/use_cases/create_dataset_from_snapshot_use_case_impl.rs b/src/infra/core/src/use_cases/create_dataset_from_snapshot_use_case_impl.rs index 9708eed32..8aefc0b4c 100644 --- a/src/infra/core/src/use_cases/create_dataset_from_snapshot_use_case_impl.rs +++ b/src/infra/core/src/use_cases/create_dataset_from_snapshot_use_case_impl.rs @@ -57,6 +57,12 @@ impl CreateDatasetFromSnapshotUseCase for CreateDatasetFromSnapshotUseCaseImpl { snapshot: DatasetSnapshot, options: CreateDatasetUseCaseOptions, ) -> Result { + let logged_account_id = match self.current_account_subject.as_ref() { + CurrentAccountSubject::Anonymous(_) => { + panic!("Anonymous account cannot create dataset"); + } + CurrentAccountSubject::Logged(l) => l.account_id.clone(), + }; let dataset_name = snapshot.name.dataset_name.clone(); let CreateDatasetFromSnapshotResult { create_dataset_result, @@ -71,12 +77,7 @@ impl CreateDatasetFromSnapshotUseCase for CreateDatasetFromSnapshotUseCaseImpl { MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE, DatasetLifecycleMessage::created( create_dataset_result.dataset_handle.id.clone(), - match self.current_account_subject.as_ref() { - CurrentAccountSubject::Anonymous(_) => { - panic!("Anonymous account cannot create dataset"); - } - CurrentAccountSubject::Logged(l) => l.account_id.clone(), - }, + logged_account_id, options.dataset_visibility, dataset_name, ), diff --git a/src/infra/core/src/use_cases/create_dataset_use_case_impl.rs b/src/infra/core/src/use_cases/create_dataset_use_case_impl.rs index 691086a43..084cc7280 100644 --- a/src/infra/core/src/use_cases/create_dataset_use_case_impl.rs +++ b/src/infra/core/src/use_cases/create_dataset_use_case_impl.rs @@ -57,6 +57,12 @@ impl CreateDatasetUseCase for CreateDatasetUseCaseImpl { seed_block: MetadataBlockTyped, options: CreateDatasetUseCaseOptions, ) -> Result { + let logged_account_id = match self.current_account_subject.as_ref() { + CurrentAccountSubject::Anonymous(_) => { + panic!("Anonymous account cannot create dataset"); + } + CurrentAccountSubject::Logged(l) => l.account_id.clone(), + }; let create_result = self .dataset_repo_writer .create_dataset(dataset_alias, seed_block) @@ -67,12 +73,7 @@ impl CreateDatasetUseCase for CreateDatasetUseCaseImpl { MESSAGE_PRODUCER_KAMU_CORE_DATASET_SERVICE, DatasetLifecycleMessage::created( create_result.dataset_handle.id.clone(), - match self.current_account_subject.as_ref() { - CurrentAccountSubject::Anonymous(_) => { - panic!("Anonymous account cannot create dataset"); - } - CurrentAccountSubject::Logged(l) => l.account_id.clone(), - }, + logged_account_id, options.dataset_visibility, dataset_alias.dataset_name.clone(), ), diff --git a/src/infra/core/tests/tests/mod.rs b/src/infra/core/tests/tests/mod.rs index 387f00765..0388190ce 100644 --- a/src/infra/core/tests/tests/mod.rs +++ b/src/infra/core/tests/tests/mod.rs @@ -12,7 +12,6 @@ mod ingest; mod repos; mod test_compaction_services_impl; mod test_dataset_changes_service_impl; -mod test_dataset_ownership_service_inmem; mod test_datasets_filtering; mod test_metadata_chain_comparator; mod test_pull_request_planner_impl; diff --git a/src/infra/core/tests/tests/test_dataset_ownership_service_inmem.rs b/src/infra/core/tests/tests/test_dataset_ownership_service_inmem.rs deleted file mode 100644 index 8bfc2c0ea..000000000 --- a/src/infra/core/tests/tests/test_dataset_ownership_service_inmem.rs +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use std::collections::HashMap; -use std::sync::Arc; - -use database_common::{DatabaseTransactionRunner, NoOpDatabasePlugin}; -use kamu::testing::BaseRepoHarness; -use kamu::{DatasetOwnershipServiceInMemory, DatasetOwnershipServiceInMemoryStateInitializer}; -use kamu_accounts::{ - AccountConfig, - AuthenticationService, - JwtAuthenticationConfig, - PredefinedAccountsConfig, - DEFAULT_ACCOUNT_ID, -}; -use kamu_accounts_inmem::{InMemoryAccessTokenRepository, InMemoryAccountRepository}; -use kamu_accounts_services::{ - AccessTokenServiceImpl, - AuthenticationServiceImpl, - LoginPasswordAuthProvider, - PredefinedAccountsRegistrator, -}; -use kamu_core::{DatasetOwnershipService, TenancyConfig}; -use opendatafabric::{AccountID, AccountName, DatasetAlias, DatasetID, DatasetName}; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[test_log::test(tokio::test)] -async fn test_multi_tenant_dataset_owners() { - let mut harness = DatasetOwnershipHarness::new(TenancyConfig::MultiTenant).await; - - harness.create_multi_tenant_datasets().await; - harness.eager_initialization().await; - - for (account_id, mut dataset_ids) in harness.account_datasets { - let mut owner_datasets = harness - .dataset_ownership_service - .get_owned_datasets(&account_id) - .await - .unwrap(); - owner_datasets.sort(); - dataset_ids.sort(); - assert_eq!(owner_datasets, dataset_ids); - - for dataset_id in dataset_ids { - let is_owner = harness - .dataset_ownership_service - .is_dataset_owned_by(&dataset_id, &account_id) - .await - .unwrap(); - assert!(is_owner); - - let is_invalid_owner = harness - .dataset_ownership_service - .is_dataset_owned_by(&dataset_id, &DEFAULT_ACCOUNT_ID) - .await - .unwrap(); - assert!(!is_invalid_owner); - - let dataset_owners = harness - .dataset_ownership_service - .get_dataset_owners(&dataset_id) - .await - .unwrap(); - - assert_eq!(dataset_owners, [account_id.clone()]); - } - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[oop::extend(BaseRepoHarness, base_repo_harness)] -struct DatasetOwnershipHarness { - base_repo_harness: BaseRepoHarness, - catalog: dill::Catalog, - dataset_ownership_service: Arc, - auth_svc: Arc, - account_datasets: HashMap>, -} - -impl DatasetOwnershipHarness { - async fn new(tenancy_config: TenancyConfig) -> Self { - let base_repo_harness = BaseRepoHarness::new(tenancy_config); - - let predefined_accounts = [ - AccountName::new_unchecked("alice"), - AccountName::new_unchecked("bob"), - AccountName::new_unchecked("eve"), - ]; - let mut predefined_accounts_config = PredefinedAccountsConfig::new(); - for account_name in predefined_accounts { - predefined_accounts_config - .predefined - .push(AccountConfig::from_name(account_name)); - } - - let base_catalog = { - let mut b = dill::CatalogBuilder::new_chained(base_repo_harness.catalog()); - - b.add::() - .add::() - .add_value(predefined_accounts_config.clone()) - .add_value(JwtAuthenticationConfig::default()) - .add::() - .add::() - .add::() - .add::() - .add::() - .add::(); - - NoOpDatabasePlugin::init_database_components(&mut b); - - b.build() - }; - - init_on_startup::run_startup_jobs(&base_catalog) - .await - .unwrap(); - - // Attach ownership initializer in separate catalog, - // so that the startup job is not run before creating datasets - let catalog = { - let mut b = dill::CatalogBuilder::new_chained(&base_catalog); - b.add::(); - b.build() - }; - - let dataset_ownership_service = catalog.get_one::().unwrap(); - let auth_svc = catalog.get_one::().unwrap(); - - Self { - base_repo_harness, - catalog, - dataset_ownership_service, - auth_svc, - account_datasets: HashMap::new(), - } - } - - async fn eager_initialization(&self) { - use init_on_startup::InitOnStartup; - let initializer = self - .catalog - .get_one::() - .unwrap(); - initializer.run_initialization().await.unwrap(); - } - - async fn create_multi_tenant_datasets(&mut self) { - let alice = AccountName::new_unchecked("alice"); - let bob = AccountName::new_unchecked("bob"); - let eve: AccountName = AccountName::new_unchecked("eve"); - - let mut dataset_accounts: HashMap<&'static str, AccountName> = HashMap::new(); - dataset_accounts.insert("foo", alice.clone()); - dataset_accounts.insert("bar", alice.clone()); - dataset_accounts.insert("baz", bob.clone()); - dataset_accounts.insert("foo-bar", alice); - dataset_accounts.insert("foo-baz", bob); - dataset_accounts.insert("foo-bar-foo-baz", eve); - - self.create_datasets(|dataset_name| dataset_accounts.get(dataset_name).cloned()) - .await; - } - - async fn create_datasets(&mut self, account_getter: impl Fn(&str) -> Option) { - self.create_root_dataset(account_getter("foo"), "foo").await; - self.create_root_dataset(account_getter("bar"), "bar").await; - self.create_root_dataset(account_getter("baz"), "baz").await; - - self.create_derived_dataset( - account_getter("foo-bar"), - "foo-bar", - vec![ - DatasetAlias::new(account_getter("foo"), DatasetName::new_unchecked("foo")), - DatasetAlias::new(account_getter("bar"), DatasetName::new_unchecked("bar")), - ], - ) - .await; - - self.create_derived_dataset( - account_getter("foo-baz"), - "foo-baz", - vec![ - DatasetAlias::new(account_getter("foo"), DatasetName::new_unchecked("foo")), - DatasetAlias::new(account_getter("baz"), DatasetName::new_unchecked("baz")), - ], - ) - .await; - - self.create_derived_dataset( - account_getter("foo-bar-foo-baz"), - "foo-bar-foo-baz", - vec![ - DatasetAlias::new( - account_getter("foo-bar"), - DatasetName::new_unchecked("foo-bar"), - ), - DatasetAlias::new( - account_getter("foo-baz"), - DatasetName::new_unchecked("foo-baz"), - ), - ], - ) - .await; - } - - async fn create_root_dataset(&mut self, account_name: Option, dataset_name: &str) { - let account_id = self - .auth_svc - .find_account_id_by_name(account_name.as_ref().unwrap()) - .await - .unwrap() - .unwrap(); - - let created_dataset = self - ._super() - .create_root_dataset(&DatasetAlias::new( - account_name, - DatasetName::new_unchecked(dataset_name), - )) - .await; - - self.account_datasets - .entry(account_id.clone()) - .and_modify(|e| { - e.push(created_dataset.dataset_handle.id.clone()); - }) - .or_insert_with(|| vec![created_dataset.dataset_handle.id.clone()]); - } - - async fn create_derived_dataset( - &mut self, - account_name: Option, - dataset_name: &str, - input_aliases: Vec, - ) { - let account_id = self - .auth_svc - .find_account_id_by_name(account_name.as_ref().unwrap()) - .await - .unwrap() - .unwrap(); - - let created_dataset = self - ._super() - .create_derived_dataset( - &DatasetAlias::new(account_name, DatasetName::new_unchecked(dataset_name)), - input_aliases - .iter() - .map(DatasetAlias::as_local_ref) - .collect(), - ) - .await; - - self.account_datasets - .entry(account_id.clone()) - .and_modify(|e| { - e.push(created_dataset.dataset_handle.id.clone()); - }) - .or_insert_with(|| vec![created_dataset.dataset_handle.id.clone()]); - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/utils/database-common/tests/tests/test_entries_streamer.rs b/src/utils/database-common/tests/tests/test_entries_streamer.rs deleted file mode 100644 index b80e6294f..000000000 --- a/src/utils/database-common/tests/tests/test_entries_streamer.rs +++ /dev/null @@ -1,414 +0,0 @@ -// Copyright Kamu Data, Inc. and contributors. All rights reserved. -// -// Use of this software is governed by the Business Source License -// included in the LICENSE file. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0. - -use std::sync::Arc; - -use database_common::{EntityPageListing, EntityPageStreamer, PaginationOpts}; -use futures::TryStreamExt; - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -struct TestPaginationOpts { - total_entity_count: usize, - expected_entities_call_count: usize, - start_offset: usize, - page_limit: usize, - expected_entities: Vec, -} - -macro_rules! test_pagination { - ($test_pagination_opts: expr) => { - let TestPaginationOpts { - total_entity_count, - expected_entities_call_count, - start_offset, - page_limit, - expected_entities, - } = $test_pagination_opts; - - let entity_source = entity_source(total_entity_count, expected_entities_call_count); - let streamer = EntityPageStreamer::new(start_offset, page_limit); - - let stream = streamer.into_stream( - || async { - let arguments = entity_source.init_arguments().await; - Ok(arguments) - }, - |_, pagination| { - let entity_source = entity_source.clone(); - async move { - let listing = entity_source.entities(pagination).await; - Ok(listing) - } - }, - ); - - let actual_entries = stream.try_collect::>().await.unwrap(); - - pretty_assertions::assert_eq!(expected_entities, actual_entries); - }; -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[tokio::test] -async fn test_pagination_less_than_a_page() { - test_pagination!(TestPaginationOpts { - total_entity_count: 3, - start_offset: 0, - page_limit: 5, - expected_entities_call_count: 1, - expected_entities: vec![ - TestEntity { id: 0 }, - TestEntity { id: 1 }, - TestEntity { id: 2 }, - ], - }); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[tokio::test] -async fn test_pagination_fits_on_one_page() { - test_pagination!(TestPaginationOpts { - total_entity_count: 5, - start_offset: 0, - page_limit: 5, - expected_entities_call_count: 1, - expected_entities: vec![ - TestEntity { id: 0 }, - TestEntity { id: 1 }, - TestEntity { id: 2 }, - TestEntity { id: 3 }, - TestEntity { id: 4 }, - ], - }); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[tokio::test] -async fn test_pagination_more_than_a_page() { - test_pagination!(TestPaginationOpts { - total_entity_count: 7, - start_offset: 0, - page_limit: 5, - expected_entities_call_count: 2, - expected_entities: vec![ - TestEntity { id: 0 }, - TestEntity { id: 1 }, - TestEntity { id: 2 }, - TestEntity { id: 3 }, - TestEntity { id: 4 }, - TestEntity { id: 5 }, - TestEntity { id: 6 }, - ], - }); -} - -#[tokio::test] -async fn test_pagination_fits_on_few_pages() { - test_pagination!(TestPaginationOpts { - total_entity_count: 10, - start_offset: 0, - page_limit: 5, - expected_entities_call_count: 2, - expected_entities: vec![ - TestEntity { id: 0 }, - TestEntity { id: 1 }, - TestEntity { id: 2 }, - TestEntity { id: 3 }, - TestEntity { id: 4 }, - TestEntity { id: 5 }, - TestEntity { id: 6 }, - TestEntity { id: 7 }, - TestEntity { id: 8 }, - TestEntity { id: 9 }, - ], - }); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[tokio::test] -async fn test_pagination_start_offset_in_the_page_middle() { - test_pagination!(TestPaginationOpts { - total_entity_count: 10, - start_offset: 5, - page_limit: 10, - expected_entities_call_count: 1, - expected_entities: vec![ - TestEntity { id: 5 }, - TestEntity { id: 6 }, - TestEntity { id: 7 }, - TestEntity { id: 8 }, - TestEntity { id: 9 }, - ], - }); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[tokio::test] -async fn test_pagination_start_offset_is_greater_than_the_total_entity_count() { - test_pagination!(TestPaginationOpts { - total_entity_count: 10, - start_offset: 11, - page_limit: 10, - expected_entities_call_count: 1, - expected_entities: vec![], - }); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[tokio::test] -async fn test_paged_page_processing_of_input_data_by_ref() { - fn assert_page(page: &[&TestEntity], pagination: &PaginationOpts) { - match pagination.offset { - 0 => { - pretty_assertions::assert_eq!( - vec![ - &TestEntity { id: 0 }, - &TestEntity { id: 1 }, - &TestEntity { id: 2 }, - ], - page - ); - } - 3 => { - pretty_assertions::assert_eq!( - vec![ - &TestEntity { id: 3 }, - &TestEntity { id: 4 }, - &TestEntity { id: 5 }, - ], - page - ); - } - 6 => { - pretty_assertions::assert_eq!( - vec![ - &TestEntity { id: 6 }, - &TestEntity { id: 7 }, - &TestEntity { id: 8 }, - ], - page - ); - } - 9 => { - pretty_assertions::assert_eq!(vec![&TestEntity { id: 9 },], page); - } - _ => { - unreachable!() - } - } - } - - let input_data = vec![ - TestEntity { id: 0 }, - TestEntity { id: 1 }, - TestEntity { id: 2 }, - TestEntity { id: 3 }, - TestEntity { id: 4 }, - TestEntity { id: 5 }, - TestEntity { id: 6 }, - TestEntity { id: 7 }, - TestEntity { id: 8 }, - TestEntity { id: 9 }, - ]; - - struct CollectionArgs<'a> { - pub input_data: &'a Vec, - } - - let streamer = EntityPageStreamer::new(0, 3); - - let stream = streamer.into_stream( - || async { - Ok(Arc::new(CollectionArgs { - input_data: &input_data, - })) - }, - |input, pagination| { - let input_len = input.input_data.len(); - - let input_page = input - .input_data - .iter() - .skip(pagination.offset) - .take(pagination.safe_limit(input_len)) - .collect::>(); - - assert_page(&input_page, &pagination); - - async move { - Ok(EntityPageListing { - list: input_page, - total_count: input_len, - }) - } - }, - ); - - stream.try_collect::>().await.unwrap(); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[tokio::test] -async fn test_paged_page_processing_of_input_data_by_value() { - #[derive(Debug, Clone, PartialEq)] - struct ClonableTestEntity { - id: usize, - } - - fn assert_page(page: &[ClonableTestEntity], pagination: &PaginationOpts) { - match pagination.offset { - 0 => { - pretty_assertions::assert_eq!( - vec![ - ClonableTestEntity { id: 0 }, - ClonableTestEntity { id: 1 }, - ClonableTestEntity { id: 2 }, - ], - page - ); - } - 3 => { - pretty_assertions::assert_eq!( - vec![ - ClonableTestEntity { id: 3 }, - ClonableTestEntity { id: 4 }, - ClonableTestEntity { id: 5 }, - ], - page - ); - } - 6 => { - pretty_assertions::assert_eq!( - vec![ - ClonableTestEntity { id: 6 }, - ClonableTestEntity { id: 7 }, - ClonableTestEntity { id: 8 }, - ], - page - ); - } - 9 => { - pretty_assertions::assert_eq!(vec![ClonableTestEntity { id: 9 },], page); - } - _ => { - unreachable!() - } - } - } - - let input_data = vec![ - ClonableTestEntity { id: 0 }, - ClonableTestEntity { id: 1 }, - ClonableTestEntity { id: 2 }, - ClonableTestEntity { id: 3 }, - ClonableTestEntity { id: 4 }, - ClonableTestEntity { id: 5 }, - ClonableTestEntity { id: 6 }, - ClonableTestEntity { id: 7 }, - ClonableTestEntity { id: 8 }, - ClonableTestEntity { id: 9 }, - ]; - - let streamer = EntityPageStreamer::new(0, 3); - - let stream = streamer.into_stream( - || async { Ok(Arc::new(input_data)) }, - |input, pagination| { - let input_page = input - .iter() - .skip(pagination.offset) - .take(pagination.safe_limit(input.len())) - .cloned() - .collect::>(); - - assert_page(&input_page, &pagination); - - async move { - Ok(EntityPageListing { - list: input_page, - total_count: input.len(), - }) - } - }, - ); - - stream.try_collect::>().await.unwrap(); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Helpers -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -fn entity_source( - total_entities_count: usize, - expected_entities_call_count: usize, -) -> Arc { - let mut entity_source = MockEntitySource::new(); - - entity_source - .expect_init_arguments() - .times(1) - .returning(|| NoArgs); - - entity_source - .expect_entities() - .times(expected_entities_call_count) - .returning(move |pagination| { - let result = (0..) - .skip(pagination.offset) - .take(pagination.safe_limit(total_entities_count)) - .map(|id| TestEntity { id }) - .collect::>(); - - EntityPageListing { - list: result, - total_count: total_entities_count, - } - }); - - Arc::new(entity_source) -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -#[derive(Clone)] -struct NoArgs; - -#[derive(Debug, PartialEq)] -struct TestEntity { - id: usize, -} - -#[async_trait::async_trait] -trait EntitySource { - async fn init_arguments(&self) -> NoArgs; - - async fn entities(&self, pagination: PaginationOpts) -> EntityPageListing; -} - -mockall::mock! { - pub EntitySource {} - - #[async_trait::async_trait] - impl EntitySource for EntitySource { - async fn init_arguments(&self) -> NoArgs; - - async fn entities(&self, pagination: PaginationOpts) -> EntityPageListing; - } -} - -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// From 301b5c113f088d0dbd3e4ae40a12fc98c6a2e433 Mon Sep 17 00:00:00 2001 From: Dima Pristupa Date: Mon, 23 Dec 2024 17:30:12 +0200 Subject: [PATCH 06/10] GQL, DatasetMut::set_visibility(): correct return type (#1007) --- resources/schema.gql | 8 ++++++-- .../graphql/src/mutations/dataset_mut/dataset_mut.rs | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/resources/schema.gql b/resources/schema.gql index 8b25c2fba..f97ea47e0 100644 --- a/resources/schema.gql +++ b/resources/schema.gql @@ -725,7 +725,7 @@ type DatasetMut { """ Set visibility for the dataset """ - setVisibility(visibility: DatasetVisibilityInput!): SetDatasetPropertyResultSuccess! + setVisibility(visibility: DatasetVisibilityInput!): SetDatasetPropertyResult! } scalar DatasetName @@ -1758,7 +1758,11 @@ type SetDataSchema { schema: DataSchema! } -type SetDatasetPropertyResultSuccess { +interface SetDatasetPropertyResult { + message: String! +} + +type SetDatasetPropertyResultSuccess implements SetDatasetPropertyResult { dummy: String message: String! } diff --git a/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs index 4d77495e2..89e378aba 100644 --- a/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs +++ b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs @@ -162,7 +162,7 @@ impl DatasetMut { &self, ctx: &Context<'_>, visibility: DatasetVisibilityInput, - ) -> Result { + ) -> Result { ensure_account_owns_dataset(ctx, &self.dataset_handle).await?; let rebac_svc = from_catalog_n!(ctx, dyn kamu_auth_rebac::RebacService); @@ -186,7 +186,7 @@ impl DatasetMut { .int_err()?; } - Ok(SetDatasetPropertyResultSuccess::default()) + Ok(SetDatasetPropertyResultSuccess::default().into()) } } From e1b8804dd9ebb2ba58f98cf9c1f053fdd4417773 Mon Sep 17 00:00:00 2001 From: Dima Pristupa Date: Tue, 24 Dec 2024 18:00:22 +0200 Subject: [PATCH 07/10] GQL, DatasetMetadata: be prepared for not accessed datasets (#1011) --- CHANGELOG.md | 2 + resources/schema.gql | 23 ++- .../src/mutations/dataset_mut/dataset_mut.rs | 14 +- .../src/queries/datasets/dataset_metadata.rs | 142 ++++++++++++++---- 4 files changed, 144 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6481f3e57..847386e62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,8 @@ Recommendation: for ease of reading, use the following order: - E2E: Using the correct account in multi-tenant mode - And also the possibility of set it up - `DatasetOwnershipService`: moved to the `kamu-dataset` crate area & implemented via `DatasetEntryServiceImpl` + - GQL, `DatasetMetadata.currentUpstreamDependencies`: indication if datasets not found/not accessed + - GQL, `DatasetMetadata.currentDownstreamDependencies`: exclude datasets that cannot be accessed ## [0.213.1] - 2024-12-18 ### Fixed diff --git a/resources/schema.gql b/resources/schema.gql index f97ea47e0..2cdce5126 100644 --- a/resources/schema.gql +++ b/resources/schema.gql @@ -646,7 +646,7 @@ type DatasetMetadata { """ Current upstream dependencies of a dataset """ - currentUpstreamDependencies: [Dataset!]! + currentUpstreamDependencies: [UpstreamDatasetResult!]! """ Current downstream dependencies of a dataset """ @@ -725,7 +725,7 @@ type DatasetMut { """ Set visibility for the dataset """ - setVisibility(visibility: DatasetVisibilityInput!): SetDatasetPropertyResult! + setVisibility(visibility: DatasetVisibilityInput!): SetDatasetVisibilityResult! } scalar DatasetName @@ -1758,11 +1758,11 @@ type SetDataSchema { schema: DataSchema! } -interface SetDatasetPropertyResult { +interface SetDatasetVisibilityResult { message: String! } -type SetDatasetPropertyResultSuccess implements SetDatasetPropertyResult { +type SetDatasetVisibilityResultSuccess implements SetDatasetVisibilityResult { dummy: String message: String! } @@ -1996,6 +1996,21 @@ interface UpdateReadmeResult { message: String! } +interface UpstreamDatasetResult { + message: String! +} + +type UpstreamDatasetResultFound implements UpstreamDatasetResult { + dataset: Dataset! + message: String! +} + +type UpstreamDatasetResultNotFound implements UpstreamDatasetResult { + datasetId: DatasetID! + datasetAlias: DatasetAlias! + message: String! +} + type ViewAccessToken { """ Unique identifier of the access token diff --git a/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs index 89e378aba..c27b950e6 100644 --- a/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs +++ b/src/adapter/graphql/src/mutations/dataset_mut/dataset_mut.rs @@ -162,7 +162,7 @@ impl DatasetMut { &self, ctx: &Context<'_>, visibility: DatasetVisibilityInput, - ) -> Result { + ) -> Result { ensure_account_owns_dataset(ctx, &self.dataset_handle).await?; let rebac_svc = from_catalog_n!(ctx, dyn kamu_auth_rebac::RebacService); @@ -186,7 +186,7 @@ impl DatasetMut { .int_err()?; } - Ok(SetDatasetPropertyResultSuccess::default().into()) + Ok(SetDatasetVisibilityResultSuccess::default().into()) } } @@ -292,20 +292,20 @@ pub enum DatasetVisibilityInput { #[derive(Interface, Debug)] #[graphql(field(name = "message", ty = "String"))] -pub enum SetDatasetPropertyResult { - Success(SetDatasetPropertyResultSuccess), +pub enum SetDatasetVisibilityResult { + Success(SetDatasetVisibilityResultSuccess), } #[derive(SimpleObject, Debug, Default)] #[graphql(complex)] -pub struct SetDatasetPropertyResultSuccess { +pub struct SetDatasetVisibilityResultSuccess { _dummy: Option, } #[ComplexObject] -impl SetDatasetPropertyResultSuccess { +impl SetDatasetVisibilityResultSuccess { async fn message(&self) -> String { - "Updated".to_string() + "Success".to_string() } } diff --git a/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs b/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs index 1d89e5fbe..427dc000d 100644 --- a/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs +++ b/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs @@ -8,6 +8,7 @@ // by the Apache License, Version 2.0. use chrono::prelude::*; +use kamu_core::auth::{ClassifyByAllowanceResponse, DatasetAction}; use kamu_core::{ self as domain, MetadataChainExt, @@ -25,8 +26,6 @@ use crate::utils::get_dataset; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - pub struct DatasetMetadata { dataset_handle: odf::DatasetHandle, } @@ -81,31 +80,50 @@ impl DatasetMetadata { } } + // TODO: Private Datasets: tests /// Current upstream dependencies of a dataset - async fn current_upstream_dependencies(&self, ctx: &Context<'_>) -> Result> { - let (dependency_graph_service, dataset_registry) = from_catalog_n!( + async fn current_upstream_dependencies( + &self, + ctx: &Context<'_>, + ) -> Result> { + let (dependency_graph_service, dataset_registry, dataset_action_authorizer) = from_catalog_n!( ctx, dyn domain::DependencyGraphService, - dyn domain::DatasetRegistry + dyn domain::DatasetRegistry, + dyn kamu_core::auth::DatasetActionAuthorizer ); - use tokio_stream::StreamExt; - let upstream_dataset_ids: Vec<_> = dependency_graph_service + use futures::{StreamExt, TryStreamExt}; + + let upstream_dataset_handles = dependency_graph_service .get_upstream_dependencies(&self.dataset_handle.id) .await .int_err()? - .collect() - .await; - - let mut upstream = Vec::with_capacity(upstream_dataset_ids.len()); - for upstream_dataset_id in upstream_dataset_ids { - let hdl = dataset_registry - .resolve_dataset_handle_by_ref(&upstream_dataset_id.as_local_ref()) - .await - .int_err()?; + .then(|upstream_dataset_id| { + let dataset_registry = dataset_registry.clone(); + async move { + dataset_registry + .resolve_dataset_handle_by_ref(&upstream_dataset_id.as_local_ref()) + .await + .int_err() + } + }) + .try_collect::>() + .await?; + + let upstream_dataset_handles_len = upstream_dataset_handles.len(); + let ClassifyByAllowanceResponse { + authorized_handles, + unauthorized_handles_with_errors, + } = dataset_action_authorizer + .classify_datasets_by_allowance(upstream_dataset_handles, DatasetAction::Read) + .await?; + + let mut upstream = Vec::with_capacity(upstream_dataset_handles_len); + for hdl in authorized_handles { let maybe_account = Account::from_dataset_alias(ctx, &hdl.alias).await?; if let Some(account) = maybe_account { - upstream.push(Dataset::new(account, hdl)); + upstream.push(UpstreamDatasetResult::found(Dataset::new(account, hdl))); } else { tracing::warn!( "Skipped upstream dataset '{}' with unresolved account", @@ -114,28 +132,51 @@ impl DatasetMetadata { } } + upstream.extend( + unauthorized_handles_with_errors + .into_iter() + .map(|(hdl, _)| UpstreamDatasetResult::not_found(hdl)), + ); + Ok(upstream) } // TODO: Convert to collection + // TODO: Private Datasets: tests /// Current downstream dependencies of a dataset async fn current_downstream_dependencies(&self, ctx: &Context<'_>) -> Result> { - let (dependency_graph_service, dataset_registry) = from_catalog_n!( + let (dependency_graph_service, dataset_registry, dataset_action_authorizer) = from_catalog_n!( ctx, dyn domain::DependencyGraphService, - dyn domain::DatasetRegistry + dyn domain::DatasetRegistry, + dyn kamu_core::auth::DatasetActionAuthorizer ); - use tokio_stream::StreamExt; - let downstream_dataset_ids: Vec<_> = dependency_graph_service + use futures::{StreamExt, TryStreamExt}; + + let downstream_dataset_handles = dependency_graph_service .get_downstream_dependencies(&self.dataset_handle.id) .await .int_err()? - .collect() - .await; - - let mut downstream = Vec::with_capacity(downstream_dataset_ids.len()); - for downstream_dataset_id in downstream_dataset_ids { + .then(|upstream_dataset_id| { + let dataset_registry = dataset_registry.clone(); + async move { + dataset_registry + .resolve_dataset_handle_by_ref(&upstream_dataset_id.as_local_ref()) + .await + .int_err() + } + }) + .try_collect::>() + .await?; + + let authorized_downstream_dataset_ids = dataset_action_authorizer + .classify_datasets_by_allowance(downstream_dataset_handles, DatasetAction::Read) + .await? + .authorized_handles; + + let mut downstream = Vec::with_capacity(authorized_downstream_dataset_ids.len()); + for downstream_dataset_id in authorized_downstream_dataset_ids { let hdl = dataset_registry .resolve_dataset_handle_by_ref(&downstream_dataset_id.as_local_ref()) .await @@ -284,3 +325,52 @@ impl DatasetMetadata { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Interface, Debug, Clone)] +#[graphql(field(name = "message", ty = "String"))] +enum UpstreamDatasetResult { + Found(UpstreamDatasetResultFound), + NotFound(UpstreamDatasetResultNotFound), +} + +impl UpstreamDatasetResult { + pub fn found(dataset: Dataset) -> Self { + Self::Found(UpstreamDatasetResultFound { dataset }) + } + + pub fn not_found(dataset_handle: odf::DatasetHandle) -> Self { + Self::NotFound(UpstreamDatasetResultNotFound { + dataset_id: dataset_handle.id.into(), + dataset_alias: dataset_handle.alias.into(), + }) + } +} + +#[derive(SimpleObject, Debug, Clone)] +#[graphql(complex)] +pub struct UpstreamDatasetResultFound { + pub dataset: Dataset, +} + +#[ComplexObject] +impl UpstreamDatasetResultFound { + async fn message(&self) -> String { + "Found".to_string() + } +} + +#[derive(SimpleObject, Debug, Clone)] +#[graphql(complex)] +pub struct UpstreamDatasetResultNotFound { + pub dataset_id: DatasetID, + pub dataset_alias: DatasetAlias, +} + +#[ComplexObject] +impl UpstreamDatasetResultNotFound { + async fn message(&self) -> String { + "Not found".to_string() + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// From ac122df46efdf43cb76ebec07607c9b1d2853c6a Mon Sep 17 00:00:00 2001 From: Dima Pristupa Date: Wed, 25 Dec 2024 18:30:35 +0200 Subject: [PATCH 08/10] GQL, DatasetMetadata: correct processing of dataset's dependencies that are not found (#1013) --- resources/schema.gql | 33 ++- .../src/oso_dataset_authorizer.rs | 44 ++++ src/adapter/graphql/Cargo.toml | 4 +- .../src/queries/datasets/dataset_metadata.rs | 233 ++++++++++++------ .../domain/src/services/account_service.rs | 17 ++ .../services/src/account_service_impl.rs | 28 +++ .../src/auth/dataset_action_authorizer.rs | 32 ++- .../testing/mock_dataset_action_authorizer.rs | 38 +-- 8 files changed, 312 insertions(+), 117 deletions(-) diff --git a/resources/schema.gql b/resources/schema.gql index 2cdce5126..d1c8f134d 100644 --- a/resources/schema.gql +++ b/resources/schema.gql @@ -646,11 +646,11 @@ type DatasetMetadata { """ Current upstream dependencies of a dataset """ - currentUpstreamDependencies: [UpstreamDatasetResult!]! + currentUpstreamDependencies: [DependencyDatasetResult!]! """ Current downstream dependencies of a dataset """ - currentDownstreamDependencies: [Dataset!]! + currentDownstreamDependencies: [DependencyDatasetResult!]! """ Current polling source used by the root dataset """ @@ -849,6 +849,20 @@ type DeleteResultSuccess implements DeleteResult { message: String! } +interface DependencyDatasetResult { + message: String! +} + +type DependencyDatasetResultFound implements DependencyDatasetResult { + dataset: Dataset! + message: String! +} + +type DependencyDatasetResultNotFound implements DependencyDatasetResult { + datasetId: DatasetID! + message: String! +} + type DisablePollingSource { dummy: String } @@ -1996,21 +2010,6 @@ interface UpdateReadmeResult { message: String! } -interface UpstreamDatasetResult { - message: String! -} - -type UpstreamDatasetResultFound implements UpstreamDatasetResult { - dataset: Dataset! - message: String! -} - -type UpstreamDatasetResultNotFound implements UpstreamDatasetResult { - datasetId: DatasetID! - datasetAlias: DatasetAlias! - message: String! -} - type ViewAccessToken { """ Unique identifier of the access token diff --git a/src/adapter/auth-oso-rebac/src/oso_dataset_authorizer.rs b/src/adapter/auth-oso-rebac/src/oso_dataset_authorizer.rs index 113549ed5..4ef045e30 100644 --- a/src/adapter/auth-oso-rebac/src/oso_dataset_authorizer.rs +++ b/src/adapter/auth-oso-rebac/src/oso_dataset_authorizer.rs @@ -235,6 +235,50 @@ impl DatasetActionAuthorizer for OsoDatasetAuthorizer { unauthorized_handles_with_errors: unmatched_results, }) } + + async fn classify_dataset_ids_by_allowance( + &self, + dataset_ids: Vec, + action: DatasetAction, + ) -> Result { + let user_actor = self.user_actor().await?; + let mut authorized_ids = Vec::with_capacity(dataset_ids.len()); + let mut unauthorized_ids_with_errors = Vec::new(); + + let dataset_resources_resolution = self + .oso_resource_service + .get_multiple_dataset_resources(&dataset_ids) + .await + .int_err()?; + + for (dataset_id, dataset_resource) in dataset_resources_resolution.resolved_resources { + let is_allowed = self + .kamu_auth_oso + .is_allowed(user_actor.clone(), action, dataset_resource) + .int_err()?; + + if is_allowed { + authorized_ids.push(dataset_id); + } else { + let dataset_ref = dataset_id.as_local_ref(); + unauthorized_ids_with_errors.push(( + dataset_id, + DatasetActionUnauthorizedError::Access(AccessError::Forbidden( + DatasetActionNotEnoughPermissionsError { + action, + dataset_ref, + } + .into(), + )), + )); + } + } + + Ok(ClassifyByAllowanceIdsResponse { + authorized_ids, + unauthorized_ids_with_errors, + }) + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/adapter/graphql/Cargo.toml b/src/adapter/graphql/Cargo.toml index dd40ef1df..b871b0fd6 100644 --- a/src/adapter/graphql/Cargo.toml +++ b/src/adapter/graphql/Cargo.toml @@ -48,7 +48,9 @@ datafusion = { version = "43", default-features = false, features = [ "serde", ] } # TODO: Currently needed for type conversions but ideally should be encapsulated by kamu-core dill = "0.9" -futures = "0.3" +futures = { version = "0.3", default-features = false, features = [ + "alloc" +] } secrecy = "0.10" serde = { version = "1", default-features = false } serde_json = "1" diff --git a/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs b/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs index 427dc000d..fe0c4723a 100644 --- a/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs +++ b/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs @@ -7,8 +7,11 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use std::collections::HashSet; + use chrono::prelude::*; -use kamu_core::auth::{ClassifyByAllowanceResponse, DatasetAction}; +use kamu_accounts::AccountService; +use kamu_core::auth::{ClassifyByAllowanceIdsResponse, DatasetAction}; use kamu_core::{ self as domain, MetadataChainExt, @@ -17,6 +20,7 @@ use kamu_core::{ SearchSetLicenseVisitor, SearchSetVocabVisitor, }; +use kamu_datasets::DatasetEntriesResolution; use opendatafabric as odf; use crate::prelude::*; @@ -85,114 +89,181 @@ impl DatasetMetadata { async fn current_upstream_dependencies( &self, ctx: &Context<'_>, - ) -> Result> { - let (dependency_graph_service, dataset_registry, dataset_action_authorizer) = from_catalog_n!( + ) -> Result> { + let ( + dependency_graph_service, + dataset_action_authorizer, + dataset_entry_repository, + account_service, + ) = from_catalog_n!( ctx, dyn domain::DependencyGraphService, - dyn domain::DatasetRegistry, - dyn kamu_core::auth::DatasetActionAuthorizer + dyn kamu_core::auth::DatasetActionAuthorizer, + dyn kamu_datasets::DatasetEntryRepository, + dyn AccountService ); - use futures::{StreamExt, TryStreamExt}; + use tokio_stream::StreamExt; - let upstream_dataset_handles = dependency_graph_service + // TODO: PERF: chunk the stream + let upstream_dependency_ids = dependency_graph_service .get_upstream_dependencies(&self.dataset_handle.id) .await .int_err()? - .then(|upstream_dataset_id| { - let dataset_registry = dataset_registry.clone(); - async move { - dataset_registry - .resolve_dataset_handle_by_ref(&upstream_dataset_id.as_local_ref()) - .await - .int_err() - } - }) - .try_collect::>() - .await?; + .collect::>() + .await; - let upstream_dataset_handles_len = upstream_dataset_handles.len(); - let ClassifyByAllowanceResponse { - authorized_handles, - unauthorized_handles_with_errors, + let mut upstream_dependencies = Vec::with_capacity(upstream_dependency_ids.len()); + + let ClassifyByAllowanceIdsResponse { + authorized_ids, + unauthorized_ids_with_errors, } = dataset_action_authorizer - .classify_datasets_by_allowance(upstream_dataset_handles, DatasetAction::Read) + .classify_dataset_ids_by_allowance(upstream_dependency_ids, DatasetAction::Read) .await?; - let mut upstream = Vec::with_capacity(upstream_dataset_handles_len); - for hdl in authorized_handles { - let maybe_account = Account::from_dataset_alias(ctx, &hdl.alias).await?; + upstream_dependencies.extend(unauthorized_ids_with_errors.into_iter().map( + |(not_found_dataset_id, _)| DependencyDatasetResult::not_found(not_found_dataset_id), + )); + + let DatasetEntriesResolution { + resolved_entries, + unresolved_entries, + } = dataset_entry_repository + .get_multiple_dataset_entries(&authorized_ids) + .await + .int_err()?; + + upstream_dependencies.extend( + unresolved_entries + .into_iter() + .map(DependencyDatasetResult::not_found), + ); + + let owner_ids = resolved_entries + .iter() + .fold(HashSet::new(), |mut acc, entry| { + acc.insert(entry.owner_id.clone()); + acc + }); + let account_map = account_service + .get_account_map(owner_ids.into_iter().collect()) + .await + .int_err()?; + + for dataset_entry in resolved_entries { + let maybe_account = account_map.get(&dataset_entry.owner_id); + if let Some(account) = maybe_account { - upstream.push(UpstreamDatasetResult::found(Dataset::new(account, hdl))); + let dataset_handle = odf::DatasetHandle { + id: dataset_entry.id, + alias: odf::DatasetAlias::new( + Some(account.account_name.clone()), + dataset_entry.name, + ), + }; + let dataset = Dataset::new(Account::from_account(account.clone()), dataset_handle); + + upstream_dependencies.push(DependencyDatasetResult::found(dataset)); } else { tracing::warn!( - "Skipped upstream dataset '{}' with unresolved account", - hdl.alias + "Upstream owner's account not found for dataset: {:?}", + &dataset_entry ); + upstream_dependencies.push(DependencyDatasetResult::not_found(dataset_entry.id)); } } - upstream.extend( - unauthorized_handles_with_errors - .into_iter() - .map(|(hdl, _)| UpstreamDatasetResult::not_found(hdl)), - ); - - Ok(upstream) + Ok(upstream_dependencies) } // TODO: Convert to collection // TODO: Private Datasets: tests /// Current downstream dependencies of a dataset - async fn current_downstream_dependencies(&self, ctx: &Context<'_>) -> Result> { - let (dependency_graph_service, dataset_registry, dataset_action_authorizer) = from_catalog_n!( + async fn current_downstream_dependencies( + &self, + ctx: &Context<'_>, + ) -> Result> { + let ( + dependency_graph_service, + dataset_action_authorizer, + dataset_entry_repository, + account_service, + ) = from_catalog_n!( ctx, dyn domain::DependencyGraphService, - dyn domain::DatasetRegistry, - dyn kamu_core::auth::DatasetActionAuthorizer + dyn kamu_core::auth::DatasetActionAuthorizer, + dyn kamu_datasets::DatasetEntryRepository, + dyn AccountService ); - use futures::{StreamExt, TryStreamExt}; + use tokio_stream::StreamExt; - let downstream_dataset_handles = dependency_graph_service + // TODO: PERF: chunk the stream + let downstream_dependency_ids = dependency_graph_service .get_downstream_dependencies(&self.dataset_handle.id) .await .int_err()? - .then(|upstream_dataset_id| { - let dataset_registry = dataset_registry.clone(); - async move { - dataset_registry - .resolve_dataset_handle_by_ref(&upstream_dataset_id.as_local_ref()) - .await - .int_err() - } - }) - .try_collect::>() - .await?; + .collect::>() + .await; - let authorized_downstream_dataset_ids = dataset_action_authorizer - .classify_datasets_by_allowance(downstream_dataset_handles, DatasetAction::Read) + let mut downstream_dependencies = Vec::with_capacity(downstream_dependency_ids.len()); + + // Cut off datasets that we don't have access to + let authorized_ids = dataset_action_authorizer + .classify_dataset_ids_by_allowance(downstream_dependency_ids, DatasetAction::Read) .await? - .authorized_handles; - - let mut downstream = Vec::with_capacity(authorized_downstream_dataset_ids.len()); - for downstream_dataset_id in authorized_downstream_dataset_ids { - let hdl = dataset_registry - .resolve_dataset_handle_by_ref(&downstream_dataset_id.as_local_ref()) - .await - .int_err()?; - let maybe_account = Account::from_dataset_alias(ctx, &hdl.alias).await?; + .authorized_ids; + + let DatasetEntriesResolution { + resolved_entries, + unresolved_entries, + } = dataset_entry_repository + .get_multiple_dataset_entries(&authorized_ids) + .await + .int_err()?; + + downstream_dependencies.extend( + unresolved_entries + .into_iter() + .map(DependencyDatasetResult::not_found), + ); + + let owner_ids = resolved_entries + .iter() + .fold(HashSet::new(), |mut acc, entry| { + acc.insert(entry.owner_id.clone()); + acc + }); + let account_map = account_service + .get_account_map(owner_ids.into_iter().collect()) + .await + .int_err()?; + + for dataset_entry in resolved_entries { + let maybe_account = account_map.get(&dataset_entry.owner_id); + if let Some(account) = maybe_account { - downstream.push(Dataset::new(account, hdl)); + let dataset_handle = odf::DatasetHandle { + id: dataset_entry.id, + alias: odf::DatasetAlias::new( + Some(account.account_name.clone()), + dataset_entry.name, + ), + }; + let dataset = Dataset::new(Account::from_account(account.clone()), dataset_handle); + + downstream_dependencies.push(DependencyDatasetResult::found(dataset)); } else { tracing::warn!( - "Skipped downstream dataset '{}' with unresolved account", - hdl.alias + "Downstream owner's account not found for dataset: {:?}", + &dataset_entry ); + downstream_dependencies.push(DependencyDatasetResult::not_found(dataset_entry.id)); } } - Ok(downstream) + Ok(downstream_dependencies) } /// Current polling source used by the root dataset @@ -328,32 +399,31 @@ impl DatasetMetadata { #[derive(Interface, Debug, Clone)] #[graphql(field(name = "message", ty = "String"))] -enum UpstreamDatasetResult { - Found(UpstreamDatasetResultFound), - NotFound(UpstreamDatasetResultNotFound), +enum DependencyDatasetResult { + Found(DependencyDatasetResultFound), + NotFound(DependencyDatasetResultNotFound), } -impl UpstreamDatasetResult { +impl DependencyDatasetResult { pub fn found(dataset: Dataset) -> Self { - Self::Found(UpstreamDatasetResultFound { dataset }) + Self::Found(DependencyDatasetResultFound { dataset }) } - pub fn not_found(dataset_handle: odf::DatasetHandle) -> Self { - Self::NotFound(UpstreamDatasetResultNotFound { - dataset_id: dataset_handle.id.into(), - dataset_alias: dataset_handle.alias.into(), + pub fn not_found(dataset_id: odf::DatasetID) -> Self { + Self::NotFound(DependencyDatasetResultNotFound { + dataset_id: dataset_id.into(), }) } } #[derive(SimpleObject, Debug, Clone)] #[graphql(complex)] -pub struct UpstreamDatasetResultFound { +pub struct DependencyDatasetResultFound { pub dataset: Dataset, } #[ComplexObject] -impl UpstreamDatasetResultFound { +impl DependencyDatasetResultFound { async fn message(&self) -> String { "Found".to_string() } @@ -361,13 +431,12 @@ impl UpstreamDatasetResultFound { #[derive(SimpleObject, Debug, Clone)] #[graphql(complex)] -pub struct UpstreamDatasetResultNotFound { +pub struct DependencyDatasetResultNotFound { pub dataset_id: DatasetID, - pub dataset_alias: DatasetAlias, } #[ComplexObject] -impl UpstreamDatasetResultNotFound { +impl DependencyDatasetResultNotFound { async fn message(&self) -> String { "Not found".to_string() } diff --git a/src/domain/accounts/domain/src/services/account_service.rs b/src/domain/accounts/domain/src/services/account_service.rs index 8bd680027..fedd5b13c 100644 --- a/src/domain/accounts/domain/src/services/account_service.rs +++ b/src/domain/accounts/domain/src/services/account_service.rs @@ -7,14 +7,18 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use std::collections::HashMap; + use database_common::{EntityPageListing, PaginationOpts}; use internal_error::InternalError; +use opendatafabric as odf; use thiserror::Error; use crate::{Account, AccountPageStream}; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// TODO: Private Datasets: merge with AuthenticationService? // TODO: Private Datasets: tests #[async_trait::async_trait] pub trait AccountService: Sync + Send { @@ -25,6 +29,11 @@ pub trait AccountService: Sync + Send { &self, pagination: PaginationOpts, ) -> Result, ListAccountError>; + + async fn get_account_map( + &self, + account_ids: Vec, + ) -> Result, GetAccountMapError>; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -38,3 +47,11 @@ pub enum ListAccountError { } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#[derive(Debug, Error)] +pub enum GetAccountMapError { + #[error(transparent)] + Internal(#[from] InternalError), +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/accounts/services/src/account_service_impl.rs b/src/domain/accounts/services/src/account_service_impl.rs index c1bd8b168..f4ef33913 100644 --- a/src/domain/accounts/services/src/account_service_impl.rs +++ b/src/domain/accounts/services/src/account_service_impl.rs @@ -7,6 +7,7 @@ // the Business Source License, use of this software will be governed // by the Apache License, Version 2.0. +use std::collections::HashMap; use std::sync::Arc; use database_common::{EntityPageListing, EntityPageStreamer, PaginationOpts}; @@ -17,8 +18,11 @@ use kamu_accounts::{ AccountPageStream, AccountRepository, AccountService, + GetAccountByIdError, + GetAccountMapError, ListAccountError, }; +use opendatafabric as odf; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -67,6 +71,30 @@ impl AccountService for AccountServiceImpl { total_count, }) } + + async fn get_account_map( + &self, + account_ids: Vec, + ) -> Result, GetAccountMapError> { + let account_map = match self.account_repo.get_accounts_by_ids(account_ids).await { + Ok(accounts) => { + let map = accounts + .into_iter() + .fold(HashMap::new(), |mut acc, account| { + acc.insert(account.id.clone(), account); + acc + }); + Ok(map) + } + Err(err) => match err { + GetAccountByIdError::NotFound(_) => Ok(HashMap::new()), + e => Err(e), + }, + } + .int_err()?; + + Ok(account_map) + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/domain/core/src/auth/dataset_action_authorizer.rs b/src/domain/core/src/auth/dataset_action_authorizer.rs index cc452af02..bbcfbe1b2 100644 --- a/src/domain/core/src/auth/dataset_action_authorizer.rs +++ b/src/domain/core/src/auth/dataset_action_authorizer.rs @@ -23,7 +23,7 @@ use crate::AccessError; pub trait DatasetActionAuthorizer: Sync + Send { async fn check_action_allowed( &self, - // TODO: Private Datasets: use odf::DatasetID, here and below + // TODO: Private Datasets: migrate to use odf::DatasetID, here and below dataset_handle: &odf::DatasetHandle, action: DatasetAction, ) -> Result<(), DatasetActionUnauthorizedError>; @@ -60,6 +60,15 @@ pub trait DatasetActionAuthorizer: Sync + Send { dataset_handles: Vec, action: DatasetAction, ) -> Result; + + // TODO: Private Datasets: tests + // TODO: Private Datasets: use classify_datasets_by_allowance() name + // after migration + async fn classify_dataset_ids_by_allowance( + &self, + dataset_ids: Vec, + action: DatasetAction, + ) -> Result; } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -144,6 +153,16 @@ pub struct ClassifyByAllowanceResponse { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// TODO: Private Datasets: use classify_datasets_by_allowance() name +// after migration +#[derive(Debug)] +pub struct ClassifyByAllowanceIdsResponse { + pub authorized_ids: Vec, + pub unauthorized_ids_with_errors: Vec<(odf::DatasetID, DatasetActionUnauthorizedError)>, +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[component(pub)] #[interface(dyn DatasetActionAuthorizer)] pub struct AlwaysHappyDatasetActionAuthorizer {} @@ -190,6 +209,17 @@ impl DatasetActionAuthorizer for AlwaysHappyDatasetActionAuthorizer { unauthorized_handles_with_errors: vec![], }) } + + async fn classify_dataset_ids_by_allowance( + &self, + dataset_ids: Vec, + _action: DatasetAction, + ) -> Result { + Ok(ClassifyByAllowanceIdsResponse { + authorized_ids: dataset_ids, + unauthorized_ids_with_errors: vec![], + }) + } } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/infra/core/src/testing/mock_dataset_action_authorizer.rs b/src/infra/core/src/testing/mock_dataset_action_authorizer.rs index e7baae422..5713c1250 100644 --- a/src/infra/core/src/testing/mock_dataset_action_authorizer.rs +++ b/src/infra/core/src/testing/mock_dataset_action_authorizer.rs @@ -11,7 +11,7 @@ use std::collections::HashSet; use internal_error::InternalError; use kamu_core::auth::{ - self, + ClassifyByAllowanceIdsResponse, ClassifyByAllowanceResponse, DatasetAction, DatasetActionAuthorizer, @@ -21,7 +21,7 @@ use kamu_core::auth::{ use kamu_core::AccessError; use mockall::predicate::{always, eq, function}; use mockall::Predicate; -use opendatafabric::{DatasetAlias, DatasetHandle}; +use opendatafabric as odf; //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -32,32 +32,38 @@ mockall::mock! { impl DatasetActionAuthorizer for DatasetActionAuthorizer { async fn check_action_allowed( &self, - dataset_handle: &DatasetHandle, + dataset_handle: &odf::DatasetHandle, action: DatasetAction, ) -> Result<(), DatasetActionUnauthorizedError>; async fn get_allowed_actions( &self, - dataset_handle: &DatasetHandle, + dataset_handle: &odf::DatasetHandle, ) -> Result, InternalError>; async fn filter_datasets_allowing( &self, - dataset_handles: Vec, + dataset_handles: Vec, action: DatasetAction, - ) -> Result, InternalError>; + ) -> Result, InternalError>; async fn classify_datasets_by_allowance( &self, - dataset_handles: Vec, + dataset_handles: Vec, action: DatasetAction, ) -> Result; + + async fn classify_dataset_ids_by_allowance( + &self, + dataset_ids: Vec, + action: DatasetAction, + ) -> Result; } } impl MockDatasetActionAuthorizer { pub fn denying_error( - dataset_handle: &DatasetHandle, + dataset_handle: &odf::DatasetHandle, action: DatasetAction, ) -> DatasetActionUnauthorizedError { DatasetActionUnauthorizedError::Access(AccessError::Forbidden( @@ -87,13 +93,13 @@ impl MockDatasetActionAuthorizer { pub fn expect_check_read_dataset( self, - dataset_alias: &DatasetAlias, + dataset_alias: &odf::DatasetAlias, times: usize, success: bool, ) -> Self { let dataset_alias = dataset_alias.clone(); self.expect_check_action_allowed_internal( - function(move |dh: &DatasetHandle| dh.alias == dataset_alias), + function(move |dh: &odf::DatasetHandle| dh.alias == dataset_alias), DatasetAction::Read, times, success, @@ -102,13 +108,13 @@ impl MockDatasetActionAuthorizer { pub fn expect_check_write_dataset( self, - dataset_alias: &DatasetAlias, + dataset_alias: &odf::DatasetAlias, times: usize, success: bool, ) -> Self { let dataset_alias = dataset_alias.clone(); self.expect_check_action_allowed_internal( - function(move |dh: &DatasetHandle| dh.alias == dataset_alias), + function(move |dh: &odf::DatasetHandle| dh.alias == dataset_alias), DatasetAction::Write, times, success, @@ -126,12 +132,12 @@ impl MockDatasetActionAuthorizer { fn expect_check_action_allowed_internal

( mut self, dataset_handle_predicate: P, - action: auth::DatasetAction, + action: DatasetAction, times: usize, success: bool, ) -> Self where - P: Predicate + Sync + Send + 'static, + P: Predicate + Sync + Send + 'static, { if times > 0 { self.expect_check_action_allowed() @@ -155,9 +161,9 @@ impl MockDatasetActionAuthorizer { pub fn make_expect_classify_datasets_by_allowance( mut self, - action: auth::DatasetAction, + action: DatasetAction, times: usize, - authorized: HashSet, + authorized: HashSet, ) -> Self { self.expect_classify_datasets_by_allowance() .with(always(), eq(action)) From 607894d518b8c53bb6ec38114ea521cf18bfb57c Mon Sep 17 00:00:00 2001 From: Dima Pristupa Date: Thu, 26 Dec 2024 12:53:34 +0200 Subject: [PATCH 09/10] GQL, DatasetMetadata: update dataset's dependencies types (#1014) --- resources/schema.gql | 6 +-- .../src/queries/datasets/dataset_metadata.rs | 44 ++++++++++--------- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/resources/schema.gql b/resources/schema.gql index d1c8f134d..39875c9ad 100644 --- a/resources/schema.gql +++ b/resources/schema.gql @@ -853,13 +853,13 @@ interface DependencyDatasetResult { message: String! } -type DependencyDatasetResultFound implements DependencyDatasetResult { +type DependencyDatasetResultAccessible implements DependencyDatasetResult { dataset: Dataset! message: String! } -type DependencyDatasetResultNotFound implements DependencyDatasetResult { - datasetId: DatasetID! +type DependencyDatasetResultNotAccessible implements DependencyDatasetResult { + id: DatasetID! message: String! } diff --git a/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs b/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs index fe0c4723a..54e8498fa 100644 --- a/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs +++ b/src/adapter/graphql/src/queries/datasets/dataset_metadata.rs @@ -123,7 +123,9 @@ impl DatasetMetadata { .await?; upstream_dependencies.extend(unauthorized_ids_with_errors.into_iter().map( - |(not_found_dataset_id, _)| DependencyDatasetResult::not_found(not_found_dataset_id), + |(unauthorized_dataset_id, _)| { + DependencyDatasetResult::not_accessible(unauthorized_dataset_id) + }, )); let DatasetEntriesResolution { @@ -137,7 +139,7 @@ impl DatasetMetadata { upstream_dependencies.extend( unresolved_entries .into_iter() - .map(DependencyDatasetResult::not_found), + .map(DependencyDatasetResult::not_accessible), ); let owner_ids = resolved_entries @@ -164,13 +166,14 @@ impl DatasetMetadata { }; let dataset = Dataset::new(Account::from_account(account.clone()), dataset_handle); - upstream_dependencies.push(DependencyDatasetResult::found(dataset)); + upstream_dependencies.push(DependencyDatasetResult::accessible(dataset)); } else { tracing::warn!( "Upstream owner's account not found for dataset: {:?}", &dataset_entry ); - upstream_dependencies.push(DependencyDatasetResult::not_found(dataset_entry.id)); + upstream_dependencies + .push(DependencyDatasetResult::not_accessible(dataset_entry.id)); } } @@ -226,7 +229,7 @@ impl DatasetMetadata { downstream_dependencies.extend( unresolved_entries .into_iter() - .map(DependencyDatasetResult::not_found), + .map(DependencyDatasetResult::not_accessible), ); let owner_ids = resolved_entries @@ -253,13 +256,14 @@ impl DatasetMetadata { }; let dataset = Dataset::new(Account::from_account(account.clone()), dataset_handle); - downstream_dependencies.push(DependencyDatasetResult::found(dataset)); + downstream_dependencies.push(DependencyDatasetResult::accessible(dataset)); } else { tracing::warn!( "Downstream owner's account not found for dataset: {:?}", &dataset_entry ); - downstream_dependencies.push(DependencyDatasetResult::not_found(dataset_entry.id)); + downstream_dependencies + .push(DependencyDatasetResult::not_accessible(dataset_entry.id)); } } @@ -400,30 +404,30 @@ impl DatasetMetadata { #[derive(Interface, Debug, Clone)] #[graphql(field(name = "message", ty = "String"))] enum DependencyDatasetResult { - Found(DependencyDatasetResultFound), - NotFound(DependencyDatasetResultNotFound), + Accessible(DependencyDatasetResultAccessible), + NotAccessible(DependencyDatasetResultNotAccessible), } impl DependencyDatasetResult { - pub fn found(dataset: Dataset) -> Self { - Self::Found(DependencyDatasetResultFound { dataset }) + pub fn accessible(dataset: Dataset) -> Self { + Self::Accessible(DependencyDatasetResultAccessible { dataset }) } - pub fn not_found(dataset_id: odf::DatasetID) -> Self { - Self::NotFound(DependencyDatasetResultNotFound { - dataset_id: dataset_id.into(), + pub fn not_accessible(dataset_id: odf::DatasetID) -> Self { + Self::NotAccessible(DependencyDatasetResultNotAccessible { + id: dataset_id.into(), }) } } #[derive(SimpleObject, Debug, Clone)] #[graphql(complex)] -pub struct DependencyDatasetResultFound { +pub struct DependencyDatasetResultAccessible { pub dataset: Dataset, } #[ComplexObject] -impl DependencyDatasetResultFound { +impl DependencyDatasetResultAccessible { async fn message(&self) -> String { "Found".to_string() } @@ -431,14 +435,14 @@ impl DependencyDatasetResultFound { #[derive(SimpleObject, Debug, Clone)] #[graphql(complex)] -pub struct DependencyDatasetResultNotFound { - pub dataset_id: DatasetID, +pub struct DependencyDatasetResultNotAccessible { + pub id: DatasetID, } #[ComplexObject] -impl DependencyDatasetResultNotFound { +impl DependencyDatasetResultNotAccessible { async fn message(&self) -> String { - "Not found".to_string() + "Not Accessible".to_string() } } From 4524c6b6edc0e2df44654d8878b15e2bf7f42b51 Mon Sep 17 00:00:00 2001 From: Dima Pristupa Date: Thu, 26 Dec 2024 17:08:48 +0200 Subject: [PATCH 10/10] Private Datasets: absorb helpful commits from command updates (#1016) * E2E: added the ability to create an account using CLI * OutboxImmediateImpl::post_message_as_json(): return a dispatch error, if present --- CHANGELOG.md | 1 + src/app/cli/src/cli.rs | 5 ++- src/app/cli/src/cli_commands.rs | 3 ++ .../cli/src/commands/system_e2e_command.rs | 42 +++++++++++++++++-- .../src/services/accounts/account_service.rs | 2 +- .../accounts/domain/src/entities/account.rs | 21 ++++++++++ .../src/predefined_accounts_registrator.rs | 13 +----- .../implementation/outbox_dispatching_impl.rs | 8 ++-- .../implementation/outbox_immediate_impl.rs | 3 +- 9 files changed, 75 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 847386e62..2a0a1999a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ Recommendation: for ease of reading, use the following order: - `DatasetOwnershipService`: moved to the `kamu-dataset` crate area & implemented via `DatasetEntryServiceImpl` - GQL, `DatasetMetadata.currentUpstreamDependencies`: indication if datasets not found/not accessed - GQL, `DatasetMetadata.currentDownstreamDependencies`: exclude datasets that cannot be accessed + - E2E: added the ability to create an account using CLI ## [0.213.1] - 2024-12-18 ### Fixed diff --git a/src/app/cli/src/cli.rs b/src/app/cli/src/cli.rs index 6891f7dfa..14924bef2 100644 --- a/src/app/cli/src/cli.rs +++ b/src/app/cli/src/cli.rs @@ -1405,7 +1405,10 @@ pub struct SystemDiagnose {} #[derive(Debug, clap::Args)] #[command(hide = true)] pub struct SystemE2e { - #[arg(long, value_name = "ACT", value_parser = ["get-last-data-block-path"])] + #[arg()] + pub arguments: Option>, + + #[arg(long, value_name = "ACT", value_parser = ["get-last-data-block-path", "account-add"])] pub action: String, /// Local dataset reference diff --git a/src/app/cli/src/cli_commands.rs b/src/app/cli/src/cli_commands.rs index 8be1ba177..37160bab0 100644 --- a/src/app/cli/src/cli_commands.rs +++ b/src/app/cli/src/cli_commands.rs @@ -455,8 +455,11 @@ pub fn get_command( } cli::SystemSubCommand::E2e(sc) => Box::new(SystemE2ECommand::new( sc.action, + sc.arguments.unwrap_or_default(), sc.dataset, cli_catalog.get_one()?, + cli_catalog.get_one()?, + cli_catalog.get_one()?, )), cli::SystemSubCommand::Gc(_) => Box::new(GcCommand::new(cli_catalog.get_one()?)), cli::SystemSubCommand::GenerateToken(sc) => Box::new(GenerateTokenCommand::new( diff --git a/src/app/cli/src/commands/system_e2e_command.rs b/src/app/cli/src/commands/system_e2e_command.rs index b14a3f0bd..73cdb6ebf 100644 --- a/src/app/cli/src/commands/system_e2e_command.rs +++ b/src/app/cli/src/commands/system_e2e_command.rs @@ -9,9 +9,11 @@ use std::sync::Arc; -use internal_error::ResultIntoInternal; +use internal_error::{ErrorIntoInternal, ResultIntoInternal}; use kamu::domain::{DatasetRegistry, DatasetRegistryExt, MetadataChainExt}; -use opendatafabric::DatasetRef; +use kamu_accounts::{AccountConfig, AccountRepository, PROVIDER_PASSWORD}; +use kamu_accounts_services::LoginPasswordAuthProvider; +use opendatafabric as odf; use super::{CLIError, Command}; @@ -19,23 +21,32 @@ use super::{CLIError, Command}; pub struct SystemE2ECommand { action: String, - dataset_ref: Option, + arguments: Vec, + dataset_ref: Option, dataset_registry: Arc, + account_repo: Arc, + login_password_auth_provider: Arc, } impl SystemE2ECommand { pub fn new( action: S, - dataset_ref: Option, + arguments: Vec, + dataset_ref: Option, dataset_registry: Arc, + account_repo: Arc, + login_password_auth_provider: Arc, ) -> Self where S: Into, { Self { action: action.into(), + arguments, dataset_ref, dataset_registry, + account_repo, + login_password_auth_provider, } } } @@ -76,6 +87,29 @@ impl Command for SystemE2ECommand { println!("{}", path.display()); } + "account-add" => { + if self.arguments.is_empty() { + return Err("Account names have not been provided".int_err().into()); + }; + + for account_name in &self.arguments { + eprint!("Add {account_name}... "); + + let account_config = + AccountConfig::from_name(odf::AccountName::new_unchecked(account_name)); + let account = (&account_config).into(); + + self.account_repo.create_account(&account).await.int_err()?; + + if account_config.provider == PROVIDER_PASSWORD { + self.login_password_auth_provider + .save_password(&account.account_name, account_config.get_password()) + .await?; + } + + eprintln!("{}", console::style("Done").green()); + } + } unexpected_action => panic!("Unexpected action: '{unexpected_action}'"), } diff --git a/src/app/cli/src/services/accounts/account_service.rs b/src/app/cli/src/services/accounts/account_service.rs index 7814a276c..89ef8794f 100644 --- a/src/app/cli/src/services/accounts/account_service.rs +++ b/src/app/cli/src/services/accounts/account_service.rs @@ -46,7 +46,7 @@ impl AccountService { default_account_name } else { // Use account as username, when there is no data - account.clone() + account }, true, ) diff --git a/src/domain/accounts/domain/src/entities/account.rs b/src/domain/accounts/domain/src/entities/account.rs index 3ac52005d..43dae2f0f 100644 --- a/src/domain/accounts/domain/src/entities/account.rs +++ b/src/domain/accounts/domain/src/entities/account.rs @@ -12,6 +12,8 @@ use lazy_static::lazy_static; use opendatafabric::{AccountID, AccountName}; use serde::{Deserialize, Serialize}; +use crate::AccountConfig; + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // TODO: have some length restrictions (0 < .. < limit) @@ -48,6 +50,25 @@ pub struct Account { //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +impl From<&AccountConfig> for Account { + fn from(account_config: &AccountConfig) -> Self { + Account { + id: account_config.get_id(), + account_name: account_config.account_name.clone(), + email: account_config.email.clone(), + display_name: account_config.get_display_name(), + account_type: account_config.account_type, + avatar_url: account_config.avatar_url.clone(), + registered_at: account_config.registered_at, + is_admin: account_config.is_admin, + provider: account_config.provider.clone(), + provider_identity_key: account_config.account_name.to_string(), + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[cfg_attr( feature = "sqlx", diff --git a/src/domain/accounts/services/src/predefined_accounts_registrator.rs b/src/domain/accounts/services/src/predefined_accounts_registrator.rs index a0c6d78b5..7b92892af 100644 --- a/src/domain/accounts/services/src/predefined_accounts_registrator.rs +++ b/src/domain/accounts/services/src/predefined_accounts_registrator.rs @@ -49,18 +49,7 @@ impl PredefinedAccountsRegistrator { &self, account_config: &AccountConfig, ) -> Result<(), InternalError> { - let account = Account { - id: account_config.get_id(), - account_name: account_config.account_name.clone(), - email: account_config.email.clone(), - display_name: account_config.get_display_name(), - account_type: account_config.account_type, - avatar_url: account_config.avatar_url.clone(), - registered_at: account_config.registered_at, - is_admin: account_config.is_admin, - provider: account_config.provider.clone(), - provider_identity_key: account_config.account_name.to_string(), - }; + let account = account_config.into(); self.account_repository .create_account(&account) diff --git a/src/utils/messaging-outbox/src/services/implementation/outbox_dispatching_impl.rs b/src/utils/messaging-outbox/src/services/implementation/outbox_dispatching_impl.rs index c93bc8d28..8dd3e26c5 100644 --- a/src/utils/messaging-outbox/src/services/implementation/outbox_dispatching_impl.rs +++ b/src/utils/messaging-outbox/src/services/implementation/outbox_dispatching_impl.rs @@ -87,14 +87,14 @@ impl Outbox for OutboxDispatchingImpl { ) -> Result<(), InternalError> { tracing::debug!(content_json = %content_json, "Dispatching outbox message"); - if self.transactional_producers.contains(producer_name) { - self.transactional_outbox + if self.immediate_producers.contains(producer_name) { + self.immediate_outbox .post_message_as_json(producer_name, content_json, version) .await?; } - if self.immediate_producers.contains(producer_name) { - self.immediate_outbox + if self.transactional_producers.contains(producer_name) { + self.transactional_outbox .post_message_as_json(producer_name, content_json, version) .await?; } diff --git a/src/utils/messaging-outbox/src/services/implementation/outbox_immediate_impl.rs b/src/utils/messaging-outbox/src/services/implementation/outbox_immediate_impl.rs index 44616e784..62b6b8b0c 100644 --- a/src/utils/messaging-outbox/src/services/implementation/outbox_immediate_impl.rs +++ b/src/utils/messaging-outbox/src/services/implementation/outbox_immediate_impl.rs @@ -59,7 +59,7 @@ impl Outbox for OutboxImmediateImpl { let dispatch_result = dispatcher .dispatch_message(&self.catalog, self.consumer_filter, &content_json, version) .await; - if let Err(e) = dispatch_result { + if let Err(e) = &dispatch_result { tracing::error!( error = ?e, error_msg = %e, @@ -67,6 +67,7 @@ impl Outbox for OutboxImmediateImpl { ?content_json, "Immediate outbox message dispatching failed" ); + return dispatch_result; } }