From d65fc1831986453433d130e080f0ad474aaaf716 Mon Sep 17 00:00:00 2001 From: Stephen Carman Date: Tue, 16 Jan 2024 11:28:44 -0500 Subject: [PATCH] docs: add usage guide for check constraints (#2079) # Description Added a usage guide for adding check constraints to tables # Related Issue(s) Closes #2063 # Documentation I just wrote it --- docs/src/python/check_constraints.py | 22 ++++++++++++++++++++++ docs/src/rust/check_constraints.rs | 23 +++++++++++++++++++++++ docs/usage/constraints.md | 17 +++++++++++++++++ mkdocs.yml | 1 + 4 files changed, 63 insertions(+) create mode 100644 docs/src/python/check_constraints.py create mode 100644 docs/src/rust/check_constraints.rs create mode 100644 docs/usage/constraints.md diff --git a/docs/src/python/check_constraints.py b/docs/src/python/check_constraints.py new file mode 100644 index 0000000000..16fb8bf374 --- /dev/null +++ b/docs/src/python/check_constraints.py @@ -0,0 +1,22 @@ +def add_constraint(): + # --8<-- [start:add_constraint] + from deltalake import DeltaTable + + dt = DeltaTable("../rust/tests/data/simple_table") + + # Check the schema before hand + print(dt.schema()) + # Add the constraint to the table. + dt.alter.add_constraint({"id_gt_0": "id > 0"}) + # --8<-- [end:add_constraint] + + +def add_data(): + # --8<-- [start:add_data] + from deltalake import write_deltalake + import pandas as pd + + df = pd.DataFrame({"id": [-1]}) + write_deltalake(dt, df, mode="append", engine="rust") + # _internal.DeltaProtocolError: Invariant violations: ["Check or Invariant (id > 0) violated by value in row: [-1]"] + # --8<-- [end:add_data] diff --git a/docs/src/rust/check_constraints.rs b/docs/src/rust/check_constraints.rs new file mode 100644 index 0000000000..fbc2cf18d6 --- /dev/null +++ b/docs/src/rust/check_constraints.rs @@ -0,0 +1,23 @@ +use std::sync::Arc; + +#[tokio::main] +async fn main() -> Result<(), Box> { + + // --8<-- [start:add_constraint] + let table = deltalake::open_table("../rust/tests/data/simple_table").await?; + let ops = DeltaOps(table); + ops.with_constraint("id_gt_0", "id > 0").await?; + // --8<-- [end:add_constraint] + + // --8<-- [start:add_data] + let table = deltalake::open_table("../rust/tests/data/simple_table").await?; + let schema = table.get_state().arrow_schema()?; + let invalid_values: Vec> = vec![ + Arc::new(Int32Array::from(vec![-10])) + ]; + let batch = RecordBatch::try_new(schema, invalid_values)?; + table.write(vec![batch]).await?; + // --8<-- [end:add_data] + + Ok(()) +} \ No newline at end of file diff --git a/docs/usage/constraints.md b/docs/usage/constraints.md new file mode 100644 index 0000000000..3e8bc8122c --- /dev/null +++ b/docs/usage/constraints.md @@ -0,0 +1,17 @@ +# Adding a Constraint to a table + +Check constraints are a way to enforce that only data that meets the constraint is allowed to be added to the table. + +## Add the Constraint + +{{ code_example('check_constraints', 'add_constraint', ['DeltaTable']) }} + +After you have added the constraint to the table attempting to append data to the table that violates the constraint +will instead throw an error. + +## Verify the constraint by trying to add some data + +{{ code_example('check_constraints', 'add_data', []) }} + +Note: ensure you use the `engine='rust'` parameter when writing to the table as this feature is not supported in the +default pyarrow writer. \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index 10e1dc4df5..c9164be25d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -52,6 +52,7 @@ nav: - Creating a table: usage/create-delta-lake-table.md - Loading a table: usage/loading-table.md - Append/overwrite tables: usage/appending-overwriting-delta-lake-table.md + - Adding a constraint: usage/constraints.md - Examining a table: usage/examining-table.md - Querying a table: usage/querying-delta-tables.md - Managing a table: usage/managing-tables.md