From 31fb911c2a7d5ded15281706eaeaf522b9e65507 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 13 Dec 2024 14:28:18 -0800 Subject: [PATCH] Add example to top level table_changes --- .../Cargo.toml | 2 +- .../src/main.rs | 0 kernel/src/lib.rs | 6 +++- kernel/src/table_changes/mod.rs | 32 +++++++++++++++++++ kernel/src/table_changes/scan.rs | 10 +++--- 5 files changed, 44 insertions(+), 6 deletions(-) rename kernel/examples/{read_table_changes => read-table-changes}/Cargo.toml (93%) rename kernel/examples/{read_table_changes => read-table-changes}/src/main.rs (100%) diff --git a/kernel/examples/read_table_changes/Cargo.toml b/kernel/examples/read-table-changes/Cargo.toml similarity index 93% rename from kernel/examples/read_table_changes/Cargo.toml rename to kernel/examples/read-table-changes/Cargo.toml index 934becae3..f9f980dc2 100644 --- a/kernel/examples/read_table_changes/Cargo.toml +++ b/kernel/examples/read-table-changes/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "read_table_changes" +name = "read-table-changes" version = "0.1.0" edition = "2021" publish = false diff --git a/kernel/examples/read_table_changes/src/main.rs b/kernel/examples/read-table-changes/src/main.rs similarity index 100% rename from kernel/examples/read_table_changes/src/main.rs rename to kernel/examples/read-table-changes/src/main.rs diff --git a/kernel/src/lib.rs b/kernel/src/lib.rs index e771379d1..4e2e3a174 100644 --- a/kernel/src/lib.rs +++ b/kernel/src/lib.rs @@ -10,12 +10,16 @@ //! //! A full `rust` example for reading table data using the default engine can be found in the //! [read-table-single-threaded] example (and for a more complex multi-threaded reader see the -//! [read-table-multi-threaded] example). +//! [read-table-multi-threaded] example). An example for reading the table changes for a table +//! using the default engine can be found in the [read-table-changes] example. +//! //! //! [read-table-single-threaded]: //! https://github.com/delta-io/delta-kernel-rs/tree/main/kernel/examples/read-table-single-threaded //! [read-table-multi-threaded]: //! https://github.com/delta-io/delta-kernel-rs/tree/main/kernel/examples/read-table-multi-threaded +//![read-table-changes]: +//! https://github.com/delta-io/delta-kernel-rs/tree/main/kernel/examples/read-table-changes //! //! Simple write examples can be found in the [`write.rs`] integration tests. Standalone write //! examples are coming soon! diff --git a/kernel/src/table_changes/mod.rs b/kernel/src/table_changes/mod.rs index 2c15bd537..3ecbc0969 100644 --- a/kernel/src/table_changes/mod.rs +++ b/kernel/src/table_changes/mod.rs @@ -1,4 +1,36 @@ //! Provides an API to read the table's change data feed between two versions. +//! +//! # Example +//! ```rust +//! # use std::sync::Arc; +//! # use delta_kernel::engine::sync::SyncEngine; +//! # use delta_kernel::expressions::{column_expr, Scalar}; +//! # use delta_kernel::{Expression, Table, Error}; +//! # let path = "./tests/data/table-with-cdf"; +//! # let engine = Arc::new(SyncEngine::new()); +//! // Construct a table from a path oaeuhoanut +//! let table = Table::try_from_uri(path)?; +//! +//! // Declare the version range for the table's change data feed +//! let table_changes = table.table_changes(engine.as_ref(), 0, 1)?; +//! +//! // Optionally specify a schema and predicate for the table changes scan +//! let schema = table_changes +//! .schema() +//! .project(&["id", "_commit_version"])?; +//! let predicate = Arc::new(Expression::gt(column_expr!("id"), Scalar::from(10))); +//! +//! // Construct the table changes scan +//! let table_changes_scan = table_changes +//! .into_scan_builder() +//! .with_schema(schema) +//! .with_predicate(predicate.clone()) +//! .build()?; +//! +//! // Execute the table changes scan to get a fallible iterator of `ScanResult`s +//! let batches = table_changes_scan.execute(engine.clone())?; +//! # Ok::<(), Error>(()) +//! ``` use std::collections::HashSet; use std::sync::{Arc, LazyLock}; diff --git a/kernel/src/table_changes/scan.rs b/kernel/src/table_changes/scan.rs index 0902bd2d0..9b0ba3067 100644 --- a/kernel/src/table_changes/scan.rs +++ b/kernel/src/table_changes/scan.rs @@ -1,3 +1,5 @@ +//! Functionality to create and execute table changes scans over the data in the delta table + use std::sync::Arc; use itertools::Itertools; @@ -16,8 +18,8 @@ use super::resolve_dvs::{resolve_scan_file_dv, ResolvedCdfScanFile}; use super::scan_file::scan_data_to_scan_file; use super::{TableChanges, CDF_FIELDS}; -/// The result of building a [`TableChanges`] scan over a table. This can be used to get a change -/// data feed from the table +/// The result of building a [`TableChanges`] scan over a table. This can be used to get the change +/// data feed from the table. #[derive(Debug)] pub struct TableChangesScan { // The [`TableChanges`] that specifies this scan's start and end versions @@ -37,7 +39,7 @@ pub struct TableChangesScan { /// This builder constructs a [`TableChangesScan`] that can be used to read the [`TableChanges`] /// of a table. [`TableChangesScanBuilder`] allows you to specify a schema to project the columns -/// or specify a predicate to filter rows in the Change Data Feed. Note that predicates over Change +/// or specify a predicate to filter rows in the Change Data Feed. Note that predicates containing Change /// Data Feed columns `_change_type`, `_commit_version`, and `_commit_timestamp` are not currently /// allowed. See issue [#525](https://github.com/delta-io/delta-kernel-rs/issues/525). /// @@ -45,7 +47,7 @@ pub struct TableChangesScan { /// [`ScanBuilder`]. /// /// [`ScanBuilder`]: crate::scan::ScanBuilder -/// #Examples +/// # Example /// Construct a [`TableChangesScan`] from `table_changes` with a given schema and predicate /// ```rust /// # use std::sync::Arc;