diff --git a/python/deltalake/table.py b/python/deltalake/table.py index 3aba38bff0..e3b91f8447 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -511,6 +511,24 @@ def version(self) -> int: """ return self._table.version() + def partitions( + self, + partition_filters: Optional[List[Tuple[str, str, Any]]] = None, + ) -> List[Dict[str, str]]: + """ + Returns the partitions as a list of dicts. Example: `[{'month': '1', 'year': '2020', 'day': '1'}, ...]` + + Args: + partition_filters: The partition filters that will be used for getting the matched partitions, defaults to `None` (no filtering). + """ + + partitions: List[Dict[str, str]] = [] + for partition in self._table.get_active_partitions(partition_filters): + if not partition: + continue + partitions.append({k: v for (k, v) in partition}) + return partitions + def files( self, partition_filters: Optional[List[Tuple[str, str, Any]]] = None ) -> List[str]: diff --git a/python/tests/test_table_read.py b/python/tests/test_table_read.py index cc36fc0274..5ff07ed9e8 100644 --- a/python/tests/test_table_read.py +++ b/python/tests/test_table_read.py @@ -1,4 +1,5 @@ import os +import tempfile from datetime import date, datetime, timezone from pathlib import Path from random import random @@ -839,6 +840,83 @@ def test_encode_partition_value(input_value: Any, expected: str) -> None: assert encode_partition_value(input_value) == expected +def test_partitions_partitioned_table(): + table_path = "../crates/test/tests/data/delta-0.8.0-partitioned" + dt = DeltaTable(table_path) + expected = [ + {"year": "2020", "month": "2", "day": "5"}, + {"year": "2021", "month": "12", "day": "4"}, + {"year": "2020", "month": "2", "day": "3"}, + {"year": "2021", "month": "4", "day": "5"}, + {"year": "2020", "month": "1", "day": "1"}, + {"year": "2021", "month": "12", "day": "20"}, + ] + actual = dt.partitions() + for partition in expected: + assert partition in actual + + +def test_partitions_filtering_partitioned_table(): + table_path = "../crates/test/tests/data/delta-0.8.0-partitioned" + dt = DeltaTable(table_path) + expected = [ + {"day": "5", "month": "4", "year": "2021"}, + {"day": "20", "month": "12", "year": "2021"}, + {"day": "4", "month": "12", "year": "2021"}, + ] + + partition_filters = [("year", ">=", "2021")] + actual = dt.partitions(partition_filters=partition_filters) + assert len(expected) == len(actual) + for partition in expected: + partition in actual + + +def test_partitions_date_partitioned_table(): + table_path = tempfile.gettempdir() + "/date_partition_table" + date_partitions = [ + date(2024, 8, 1), + date(2024, 8, 2), + date(2024, 8, 3), + date(2024, 8, 4), + ] + sample_data = pa.table( + { + "date_field": pa.array(date_partitions, pa.date32()), + "numeric_data": pa.array([1, 2, 3, 4], pa.int64()), + } + ) + write_deltalake( + table_path, sample_data, mode="overwrite", partition_by=["date_field"] + ) + + delta_table = DeltaTable(table_path) + expected = [ + {"date_field": "2024-08-01"}, + {"date_field": "2024-08-02"}, + {"date_field": "2024-08-03"}, + {"date_field": "2024-08-04"}, + ] + actual = sorted(delta_table.partitions(), key=lambda x: x["date_field"]) + assert expected == actual + + +def test_partitions_special_partitioned_table(): + table_path = "../crates/test/tests/data/delta-0.8.0-special-partition" + dt = DeltaTable(table_path) + + expected = [{"x": "A/A"}, {"x": "B B"}] + actual = dt.partitions() + for partition in expected: + partition in actual + + +def test_partitions_unpartitioned_table(): + table_path = "../crates/test/tests/data/simple_table" + dt = DeltaTable(table_path) + assert len(dt.partitions()) == 0 + + def test_read_table_last_checkpoint_not_updated(): dt = DeltaTable("../crates/test/tests/data/table_failed_last_checkpoint_update")