table_uri |
- Union[str, Path]
+ Union[str, Path, PathLike[str]]
|
@@ -1727,13 +1535,13 @@
-
-
-
-
-
-
-
-
-
-
-
-
- Metadata
-
-
-
- dataclass
-
-
-
- Metadata(table: RawDeltaTable)
-
-
-
-
-
- Create a Metadata instance.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- configuration
-
-
-
- property
-
-
-
- configuration: Dict[str, str]
-
-
-
-
- Return the DeltaTable properties.
-
-
-
-
-
-
-
-
-
-
- created_time
-
-
-
- property
-
-
-
-
-
-
-
- Return The time when this metadata action is created, in milliseconds since the Unix epoch of the DeltaTable.
-
-
-
-
-
-
-
-
-
-
- description
-
-
-
- property
-
-
-
-
-
-
-
- Return the user-provided description of the DeltaTable.
-
-
-
-
-
-
-
-
-
-
- id
-
-
-
- property
-
-
-
-
-
-
-
- Return the unique identifier of the DeltaTable.
-
-
-
-
-
-
-
-
-
-
- name
-
-
-
- property
-
-
-
-
-
-
-
- Return the user-provided identifier of the DeltaTable.
-
-
-
-
-
-
-
-
-
-
- partition_columns
-
-
-
- property
-
-
-
- partition_columns: List[str]
-
-
-
-
- Return an array containing the names of the partitioned columns of the DeltaTable.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- TableMerger
-
-
-
- TableMerger(table: DeltaTable, source: pyarrow.RecordBatchReader, predicate: str, source_alias: Optional[str] = None, target_alias: Optional[str] = None, safe_cast: bool = True)
-
-
-
-
-
- API for various table MERGE commands.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- execute
-
-
-
- execute() -> Dict[str, Any]
-
-
-
-
- Executes MERGE with the previously provided settings in Rust with Apache Datafusion query engine.
-
-
-
- Returns:
-
-
-
- Type |
- Description |
-
-
-
+ None
+
+
+ log_buffer_size |
- Dict[str, Any]
+ Optional[int]
|
- Dict[str, Any]: metrics
+ Number of files to buffer when reading the commit log. A positive integer.
+ Setting a value greater than 1 results in concurrent calls to the storage api.
+ This can decrease latency if there are many files in the log since the last checkpoint,
+ but will also increase memory usage. Possible rate limits of the storage backend should
+ also be considered for optimal performance. Defaults to 4 * number of cpus.
|
+
+ None
+ |
@@ -3777,18 +2156,19 @@
-
- when_matched_delete
+
+ get_add_actions
-when_matched_delete(predicate: Optional[str] = None) -> TableMerger
-
+get_add_actions(flatten: bool = False) -> pyarrow.RecordBatch
+
- Delete a matched row from the table only if the given predicate (if specified) is
-true for the matched row. If not specified it deletes all matches.
+ Return a dataframe with all current add actions.
+ Add actions represent the files that currently make up the table. This
+data is a low-level representation parsed from the transaction log.
@@ -3804,17 +2184,20 @@
- predicate |
+ flatten |
- (str | None, Optional)
+ bool
|
- SQL like predicate on when to delete. Defaults to None.
+ whether to flatten the schema. Partition values columns are
+ given the prefix partition. , statistics (null_count, min, and max) are
+ given the prefix null_count. , min. , and max. , and tags the
+ prefix tags. . Nested field names are concatenated with . .
|
- None
+ False
|
@@ -3826,66 +2209,45 @@
-Name | Type |
+ Type |
Description |
-TableMerger |
- TableMerger
+ |
+ RecordBatch
|
- TableMerger Object
+ a PyArrow RecordBatch containing the add action data.
|
- Examples:
- Delete on a predicate
-
->>> from deltalake import DeltaTable, write_deltalake
->>> import pyarrow as pa
->>> data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]})
->>> write_deltalake("tmp", data)
->>> dt = DeltaTable("tmp")
->>> new_data = pa.table({"x": [2, 3], "deleted": [False, True]})
->>> (
-... dt.merge(
-... source=new_data,
-... predicate='target.x = source.x',
-... source_alias='source',
-... target_alias='target')
-... .when_matched_delete(
-... predicate="source.deleted = true")
-... .execute()
-... )
-{'num_source_rows': 2, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 0, 'num_target_rows_deleted': 1, 'num_target_rows_copied': 2, 'num_output_rows': 2, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}
->>> dt.to_pandas().sort_values("x", ignore_index=True)
- x y
-0 1 4
-1 2 5
-
-Delete all records that were matched
-
->>> dt = DeltaTable("tmp")
->>> (
-... dt.merge(
-... source=new_data,
-... predicate='target.x = source.x',
-... source_alias='source',
-... target_alias='target')
-... .when_matched_delete()
-... .execute()
-... )
-{'num_source_rows': 2, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 0, 'num_target_rows_deleted': 1, 'num_target_rows_copied': 1, 'num_output_rows': 1, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}
->>> dt.to_pandas()
- x y
-0 1 4
-
+
+ Example
+ from pprint import pprint
+from deltalake import DeltaTable, write_deltalake
+import pyarrow as pa
+data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]})
+write_deltalake("tmp", data, partition_by=["x"])
+dt = DeltaTable("tmp")
+df = dt.get_add_actions().to_pandas()
+df["path"].sort_values(ignore_index=True)
+0 x=1/0
+1 x=2/0
+2 x=3/0
+
+df = dt.get_add_actions(flatten=True).to_pandas()
+df["partition.x"].sort_values(ignore_index=True)
+0 1
+1 2
+2 3
+
+
@@ -3896,18 +2258,18 @@
- when_matched_update
+
+ history
-when_matched_update(updates: Dict[str, str], predicate: Optional[str] = None) -> TableMerger
-
+history(limit: Optional[int] = None) -> List[Dict[str, Any]]
+
- Update a matched table row based on the rules defined by updates .
-If a predicate is specified, then it must evaluate to true for the row to be updated.
+ Run the history command on the DeltaTable.
+The operations are returned in reverse chronological order.
@@ -3923,27 +2285,13 @@
- updates |
-
- dict
- |
-
-
- a mapping of column name to update SQL expression.
-
- |
-
- required
- |
-
-
- predicate |
+ limit |
- (str | None, Optional)
+ Optional[int]
|
- SQL like predicate on when to update. Defaults to None.
+ the commit info limit to return
|
@@ -3959,46 +2307,23 @@
-Name | Type |
+ Type |
Description |
-TableMerger |
- TableMerger
+ |
+ List[Dict[str, Any]]
|
- TableMerger Object
+ list of the commit infos registered in the transaction log
|
- Examples:
->>> from deltalake import DeltaTable, write_deltalake
->>> import pyarrow as pa
->>> data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]})
->>> write_deltalake("tmp", data)
->>> dt = DeltaTable("tmp")
->>> new_data = pa.table({"x": [1], "y": [7]})
->>> (
-... dt.merge(
-... source=new_data,
-... predicate="target.x = source.x",
-... source_alias="source",
-... target_alias="target")
-... .when_matched_update(updates={"x": "source.x", "y": "source.y"})
-... .execute()
-... )
-{'num_source_rows': 1, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 1, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 2, 'num_output_rows': 3, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}
->>> dt.to_pandas()
- x y
-0 1 7
-1 2 5
-2 3 6
-
@@ -4010,18 +2335,17 @@
- when_matched_update_all
+
+ load_version
-when_matched_update_all(predicate: Optional[str] = None) -> TableMerger
-
+load_version(version: int) -> None
+
- Updating all source fields to target fields, source and target are required to have the same field names.
-If a predicate is specified, then it must evaluate to true for the row to be updated.
+ Load a DeltaTable with a specified version.
@@ -4037,68 +2361,21 @@ Optional)
+ int
|
- SQL like predicate on when to update all columns. Defaults to None.
+ the identifier of the version of the DeltaTable to load
|
- None
- |
-
-
-
-
-
-
- Returns:
-
-
-
-Name | Type |
- Description |
-
-
-
-
-TableMerger |
- TableMerger
- |
-
-
+ required
|
- Examples:
- >>> from deltalake import DeltaTable, write_deltalake
->>> import pyarrow as pa
->>> data = pa.table({"x": [1, 2, 3], "y": [4, 5, 6]})
->>> write_deltalake("tmp", data)
->>> dt = DeltaTable("tmp")
->>> new_data = pa.table({"x": [1], "y": [7]})
->>> (
-... dt.merge(
-... source=new_data,
-... predicate="target.x = source.x",
-... source_alias="source",
-... target_alias="target")
-... .when_matched_update_all()
-... .execute()
-... )
-{'num_source_rows': 1, 'num_target_rows_inserted': 0, 'num_target_rows_updated': 1, 'num_target_rows_deleted': 0, 'num_target_rows_copied': 2, 'num_output_rows': 3, 'num_target_files_added': 1, 'num_target_files_removed': 1, 'execution_time_ms': ..., 'scan_time_ms': ..., 'rewrite_time_ms': ...}
->>> dt.to_pandas()
- x y
-0 1 7
-1 2 5
-2 3 6
-
@@ -4110,18 +2387,18 @@
- when_not_matched_by_source_delete
+
+ load_with_datetime
-when_not_matched_by_source_delete(predicate: Optional[str] = None) -> TableMerger
-
+load_with_datetime(datetime_string: str) -> None
+
- Delete a target row that has no matches in the source from the table only if the given
-predicate (if specified) is true for the target row.
+ Time travel Delta table to the latest version that's created at or before provided datetime_string argument.
+The datetime_string argument should be an RFC 3339 and ISO 8601 date and time string.
@@ -4137,46 +2414,29 @@ Optional)
+ str
|
- SQL like predicate on when to delete when not matched by source. Defaults to None.
+ the identifier of the datetime point of the DeltaTable to load
|
- None
- |
-
-
-
-
-
-
-