Skip to content

Commit 36897b7

Browse files
authored
Add tests for Partition Spec Evolution (#2479)
<!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> # Rationale for this change The Java implementation has several tests around updating Table Specs. This adds three of them to the Python Catalog Tests. ## Are these changes tested? Tests should pass. ## Are there any user-facing changes? Just tests. <!-- In the case of user-facing changes, please add the changelog label. -->
1 parent 94ce205 commit 36897b7

File tree

1 file changed

+64
-1
lines changed

1 file changed

+64
-1
lines changed

tests/integration/test_catalog.py

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from pyiceberg.schema import INITIAL_SCHEMA_ID, Schema
4040
from pyiceberg.table.metadata import INITIAL_SPEC_ID
4141
from pyiceberg.table.sorting import INITIAL_SORT_ORDER_ID, SortField, SortOrder
42-
from pyiceberg.transforms import DayTransform, IdentityTransform
42+
from pyiceberg.transforms import BucketTransform, DayTransform, IdentityTransform
4343
from pyiceberg.types import IntegerType, LongType, NestedField, TimestampType, UUIDType
4444
from tests.conftest import clean_up
4545

@@ -503,6 +503,69 @@ def test_update_namespace_properties(test_catalog: Catalog, database_name: str)
503503
assert "updated test description" == test_catalog.load_namespace_properties(database_name)["comment"]
504504

505505

506+
@pytest.mark.integration
507+
@pytest.mark.parametrize("test_catalog", CATALOGS)
508+
def test_update_table_spec(test_catalog: Catalog, test_schema: Schema, table_name: str, database_name: str) -> None:
509+
identifier = (database_name, table_name)
510+
test_catalog.create_namespace(database_name)
511+
table = test_catalog.create_table(identifier, test_schema)
512+
513+
with table.update_spec() as update:
514+
update.add_field(source_column_name="VendorID", transform=BucketTransform(16), partition_field_name="shard")
515+
516+
loaded = test_catalog.load_table(identifier)
517+
expected_spec = PartitionSpec(
518+
PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name="shard"), spec_id=1
519+
)
520+
# The spec ID may not match, so check equality of the fields
521+
assert loaded.spec() == expected_spec
522+
523+
524+
@pytest.mark.integration
525+
@pytest.mark.parametrize("test_catalog", CATALOGS)
526+
def test_update_table_spec_conflict(test_catalog: Catalog, test_schema: Schema, table_name: str, database_name: str) -> None:
527+
identifier = (database_name, table_name)
528+
test_catalog.create_namespace(database_name)
529+
spec = PartitionSpec(PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name="id_bucket"))
530+
table = test_catalog.create_table(identifier, test_schema, partition_spec=spec)
531+
532+
update = table.update_spec()
533+
update.add_field(source_column_name="tpep_pickup_datetime", transform=BucketTransform(16), partition_field_name="shard")
534+
535+
# update with conflict
536+
conflict_table = test_catalog.load_table(identifier)
537+
with conflict_table.update_spec() as conflict_update:
538+
conflict_update.remove_field("id_bucket")
539+
540+
with pytest.raises(
541+
CommitFailedException, match="Requirement failed: default spec id has changed|default partition spec changed"
542+
):
543+
update.commit()
544+
545+
loaded = test_catalog.load_table(identifier)
546+
assert loaded.spec() == PartitionSpec(spec_id=1)
547+
548+
549+
@pytest.mark.integration
550+
@pytest.mark.parametrize("test_catalog", CATALOGS)
551+
def test_update_table_spec_then_revert(test_catalog: Catalog, test_schema: Schema, table_name: str, database_name: str) -> None:
552+
identifier = (database_name, table_name)
553+
test_catalog.create_namespace(database_name)
554+
555+
initial_spec = PartitionSpec(PartitionField(source_id=1, field_id=1000, transform=BucketTransform(16), name="id_bucket"))
556+
557+
table = test_catalog.create_table(identifier, test_schema, partition_spec=initial_spec, properties={"format-version": "2"})
558+
assert table.format_version == 2
559+
560+
with table.update_spec() as update:
561+
update.add_identity(source_column_name="tpep_pickup_datetime")
562+
563+
with table.update_spec() as update:
564+
update.remove_field("tpep_pickup_datetime")
565+
566+
assert table.spec() == initial_spec
567+
568+
506569
@pytest.mark.integration
507570
@pytest.mark.parametrize("test_catalog", CATALOGS)
508571
def test_register_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None:

0 commit comments

Comments
 (0)