diff --git a/api/src/main/java/org/apache/iceberg/PartitionSpec.java b/api/src/main/java/org/apache/iceberg/PartitionSpec.java index 08a1c4f9ecfd..9b74893f1831 100644 --- a/api/src/main/java/org/apache/iceberg/PartitionSpec.java +++ b/api/src/main/java/org/apache/iceberg/PartitionSpec.java @@ -371,6 +371,7 @@ public static class Builder { new AtomicInteger(unpartitionedLastAssignedId()); // check if there are conflicts between partition and schema field name private boolean checkConflicts = true; + private boolean caseSensitive = true; private Builder(Schema schema) { this.schema = schema; @@ -390,7 +391,8 @@ Builder checkConflicts(boolean check) { } private void checkAndAddPartitionName(String name, Integer sourceColumnId) { - Types.NestedField schemaField = schema.findField(name); + Types.NestedField schemaField = + this.caseSensitive ? schema.findField(name) : schema.caseInsensitiveFindField(name); if (checkConflicts) { if (sourceColumnId != null) { // for identity transform case we allow conflicts between partition and schema field name @@ -427,20 +429,31 @@ private void checkForRedundantPartitions(PartitionField field) { dedupFields.put(dedupKey, field); } + public Builder caseSensitive(boolean sensitive) { + this.caseSensitive = sensitive; + return this; + } + public Builder withSpecId(int newSpecId) { this.specId = newSpecId; return this; } private Types.NestedField findSourceColumn(String sourceName) { - Types.NestedField sourceColumn = schema.findField(sourceName); + Types.NestedField sourceColumn = + this.caseSensitive + ? schema.findField(sourceName) + : schema.caseInsensitiveFindField(sourceName); Preconditions.checkArgument( sourceColumn != null, "Cannot find source column: %s", sourceName); return sourceColumn; } Builder identity(String sourceName, String targetName) { - Types.NestedField sourceColumn = findSourceColumn(sourceName); + return identity(findSourceColumn(sourceName), targetName); + } + + private Builder identity(Types.NestedField sourceColumn, String targetName) { checkAndAddPartitionName(targetName, sourceColumn.fieldId()); PartitionField field = new PartitionField( @@ -451,12 +464,16 @@ Builder identity(String sourceName, String targetName) { } public Builder identity(String sourceName) { - return identity(sourceName, sourceName); + Types.NestedField sourceColumn = findSourceColumn(sourceName); + return identity(sourceColumn, schema.findColumnName(sourceColumn.fieldId())); } public Builder year(String sourceName, String targetName) { + return year(findSourceColumn(sourceName), targetName); + } + + private Builder year(Types.NestedField sourceColumn, String targetName) { checkAndAddPartitionName(targetName); - Types.NestedField sourceColumn = findSourceColumn(sourceName); PartitionField field = new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.year()); checkForRedundantPartitions(field); @@ -465,12 +482,17 @@ public Builder year(String sourceName, String targetName) { } public Builder year(String sourceName) { - return year(sourceName, sourceName + "_year"); + Types.NestedField sourceColumn = findSourceColumn(sourceName); + String columnName = schema.findColumnName(sourceColumn.fieldId()); + return year(sourceColumn, columnName + "_year"); } public Builder month(String sourceName, String targetName) { + return month(findSourceColumn(sourceName), targetName); + } + + private Builder month(Types.NestedField sourceColumn, String targetName) { checkAndAddPartitionName(targetName); - Types.NestedField sourceColumn = findSourceColumn(sourceName); PartitionField field = new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.month()); checkForRedundantPartitions(field); @@ -479,12 +501,17 @@ public Builder month(String sourceName, String targetName) { } public Builder month(String sourceName) { - return month(sourceName, sourceName + "_month"); + Types.NestedField sourceColumn = findSourceColumn(sourceName); + String columnName = schema.findColumnName(sourceColumn.fieldId()); + return month(sourceColumn, columnName + "_month"); } public Builder day(String sourceName, String targetName) { + return day(findSourceColumn(sourceName), targetName); + } + + private Builder day(Types.NestedField sourceColumn, String targetName) { checkAndAddPartitionName(targetName); - Types.NestedField sourceColumn = findSourceColumn(sourceName); PartitionField field = new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.day()); checkForRedundantPartitions(field); @@ -493,12 +520,17 @@ public Builder day(String sourceName, String targetName) { } public Builder day(String sourceName) { - return day(sourceName, sourceName + "_day"); + Types.NestedField sourceColumn = findSourceColumn(sourceName); + String columnName = schema.findColumnName(sourceColumn.fieldId()); + return day(sourceColumn, columnName + "_day"); } public Builder hour(String sourceName, String targetName) { + return hour(findSourceColumn(sourceName), targetName); + } + + private Builder hour(Types.NestedField sourceColumn, String targetName) { checkAndAddPartitionName(targetName); - Types.NestedField sourceColumn = findSourceColumn(sourceName); PartitionField field = new PartitionField(sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.hour()); checkForRedundantPartitions(field); @@ -507,12 +539,17 @@ public Builder hour(String sourceName, String targetName) { } public Builder hour(String sourceName) { - return hour(sourceName, sourceName + "_hour"); + Types.NestedField sourceColumn = findSourceColumn(sourceName); + String columnName = schema.findColumnName(sourceColumn.fieldId()); + return hour(sourceColumn, columnName + "_hour"); } public Builder bucket(String sourceName, int numBuckets, String targetName) { + return bucket(findSourceColumn(sourceName), numBuckets, targetName); + } + + private Builder bucket(Types.NestedField sourceColumn, int numBuckets, String targetName) { checkAndAddPartitionName(targetName); - Types.NestedField sourceColumn = findSourceColumn(sourceName); fields.add( new PartitionField( sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.bucket(numBuckets))); @@ -520,12 +557,17 @@ public Builder bucket(String sourceName, int numBuckets, String targetName) { } public Builder bucket(String sourceName, int numBuckets) { - return bucket(sourceName, numBuckets, sourceName + "_bucket"); + Types.NestedField sourceColumn = findSourceColumn(sourceName); + String columnName = schema.findColumnName(sourceColumn.fieldId()); + return bucket(sourceColumn, numBuckets, columnName + "_bucket"); } public Builder truncate(String sourceName, int width, String targetName) { + return truncate(findSourceColumn(sourceName), width, targetName); + } + + private Builder truncate(Types.NestedField sourceColumn, int width, String targetName) { checkAndAddPartitionName(targetName); - Types.NestedField sourceColumn = findSourceColumn(sourceName); fields.add( new PartitionField( sourceColumn.fieldId(), nextFieldId(), targetName, Transforms.truncate(width))); @@ -533,11 +575,16 @@ public Builder truncate(String sourceName, int width, String targetName) { } public Builder truncate(String sourceName, int width) { - return truncate(sourceName, width, sourceName + "_trunc"); + Types.NestedField sourceColumn = findSourceColumn(sourceName); + String columnName = schema.findColumnName(sourceColumn.fieldId()); + return truncate(sourceColumn, width, columnName + "_trunc"); } public Builder alwaysNull(String sourceName, String targetName) { - Types.NestedField sourceColumn = findSourceColumn(sourceName); + return alwaysNull(findSourceColumn(sourceName), targetName); + } + + private Builder alwaysNull(Types.NestedField sourceColumn, String targetName) { checkAndAddPartitionName( targetName, sourceColumn.fieldId()); // can duplicate a source column name fields.add( @@ -547,7 +594,9 @@ public Builder alwaysNull(String sourceName, String targetName) { } public Builder alwaysNull(String sourceName) { - return alwaysNull(sourceName, sourceName + "_null"); + Types.NestedField sourceColumn = findSourceColumn(sourceName); + String columnName = schema.findColumnName(sourceColumn.fieldId()); + return alwaysNull(sourceColumn, columnName + "_null"); } // add a partition field with an auto-increment partition field id starting from diff --git a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java index 07d06dcc5a89..7298dfca4c03 100644 --- a/api/src/main/java/org/apache/iceberg/types/TypeUtil.java +++ b/api/src/main/java/org/apache/iceberg/types/TypeUtil.java @@ -181,11 +181,37 @@ public static Map indexQuotedNameById( return indexer.byId(); } + /** + * Creates a mapping from lower-case field names to their corresponding field IDs. + * + *

This method iterates over the fields of the provided struct and maps each field's name + * (converted to lower-case) to its ID. If two fields have the same lower-case name, an + * `IllegalArgumentException` is thrown. + * + * @param struct the struct type whose fields are to be indexed + * @return a map where the keys are lower-case field names and the values are field IDs + * @throws IllegalArgumentException if two fields have the same lower-case name + */ public static Map indexByLowerCaseName(Types.StructType struct) { Map indexByLowerCaseName = Maps.newHashMap(); + + IndexByName indexer = new IndexByName(); + visit(struct, indexer); + Map byName = indexer.byName(); + Map byId = indexer.byId(); + indexByName(struct) .forEach( - (name, integer) -> indexByLowerCaseName.put(name.toLowerCase(Locale.ROOT), integer)); + (name, fieldId) -> { + String key = name.toLowerCase(Locale.ROOT); + Integer existingId = indexByLowerCaseName.put(key, fieldId); + Preconditions.checkArgument( + existingId == null || existingId.equals(fieldId), + "Cannot build lower case index: %s and %s collide", + byId.get(existingId), + byId.get(fieldId)); + indexByLowerCaseName.put(key, fieldId); + }); return indexByLowerCaseName; } diff --git a/api/src/test/java/org/apache/iceberg/TestSchemaCaseSensitivity.java b/api/src/test/java/org/apache/iceberg/TestSchemaCaseSensitivity.java new file mode 100644 index 000000000000..bdb73374bf21 --- /dev/null +++ b/api/src/test/java/org/apache/iceberg/TestSchemaCaseSensitivity.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatIllegalArgumentException; + +import org.apache.iceberg.types.Types; +import org.junit.jupiter.api.Test; + +public class TestSchemaCaseSensitivity { + + @Test + public void testCaseInsensitiveFieldCollision() { + Schema schema = + new Schema( + required(1, "id", Types.LongType.get()), + required(2, "data", Types.StringType.get()), + required(3, "DATA", Types.StringType.get())); + assertThatIllegalArgumentException() + .isThrownBy(() -> schema.caseInsensitiveFindField("DATA")) + .withMessage("Cannot build lower case index: data and DATA collide"); + } + + @Test + public void testCaseSensitiveFindField() { + Schema schema = + new Schema( + required(1, "id", Types.LongType.get()), + required(2, "data", Types.StringType.get()), + required(3, "DATA", Types.StringType.get())); + + Types.NestedField actual1 = schema.findField("data"); + assertThat(actual1).isEqualTo(Types.NestedField.required(2, "data", Types.StringType.get())); + Types.NestedField actual2 = schema.findField("DATA"); + assertThat(actual2).isEqualTo(Types.NestedField.required(3, "DATA", Types.StringType.get())); + } + + @Test + public void testCaseInsensitiveField() { + Schema schema = + new Schema( + required(1, "id", Types.LongType.get()), required(2, "data", Types.StringType.get())); + + Types.NestedField actual1 = schema.caseInsensitiveFindField("DATA"); + assertThat(actual1).isEqualTo(Types.NestedField.required(2, "data", Types.StringType.get())); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionSpecBuilderCaseSensitivity.java b/core/src/test/java/org/apache/iceberg/TestPartitionSpecBuilderCaseSensitivity.java new file mode 100644 index 000000000000..349912bd78df --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestPartitionSpecBuilderCaseSensitivity.java @@ -0,0 +1,873 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatIllegalArgumentException; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.types.Types.NestedField; +import org.apache.iceberg.types.Types.StructType; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class TestPartitionSpecBuilderCaseSensitivity { + + private static final int V2_FORMAT_VERSION = 2; + private static final Schema SCHEMA_WITHOUT_NAME_CONFLICTS = + new Schema( + required(1, "id", Types.IntegerType.get()), + required(2, "data", Types.StringType.get()), + required(3, "category", Types.StringType.get()), + required(4, "order_date", Types.DateType.get()), + required(5, "order_time", Types.TimestampType.withoutZone()), + required(6, "ship_date", Types.DateType.get()), + required(7, "ship_time", Types.TimestampType.withoutZone())); + + private static final Schema SCHEMA_WITH_NAME_CONFLICTS = + new Schema( + required(1, "id", Types.IntegerType.get()), + required(2, "data", Types.StringType.get()), + required(3, "DATA", Types.StringType.get()), + required(4, "order_date", Types.DateType.get()), + required(5, "ORDER_DATE", Types.DateType.get()), + required(6, "order_time", Types.TimestampType.withoutZone()), + required(7, "ORDER_TIME", Types.TimestampType.withoutZone())); + + @TempDir private Path temp; + private File tableDir = null; + + @BeforeEach + public void setupTableDir() throws IOException { + this.tableDir = Files.createTempDirectory(temp, "junit").toFile(); + } + + @AfterEach + public void cleanupTables() { + TestTables.clearTables(); + } + + @Test + public void testPartitionTypeWithColumnNamesThatDifferOnlyInLetterCase() { + Schema schema = + new Schema( + required(1, "id", Types.IntegerType.get()), + required(2, "data", Types.StringType.get()), + required(3, "DATA", Types.StringType.get()), + required(4, "order_date", Types.DateType.get())); + PartitionSpec spec = PartitionSpec.builderFor(schema).identity("data").identity("DATA").build(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "data", Types.StringType.get()), + NestedField.optional(1001, "DATA", Types.StringType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testPartitionTypeWithIdentityTargetName() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .identity("data", "partition1") + .build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "partition1", Types.StringType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testBucketSourceNameAllowsExactDuplicateWhenCaseSensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .bucket("data", 10, "partition1") + .bucket("data", 10, "PARTITION1") + .build(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "partition1", Types.IntegerType.get()), + NestedField.optional(1001, "PARTITION1", Types.IntegerType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testBucketTargetNameDefaultValue() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS).bucket("data", 10).build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "data_bucket", Types.IntegerType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testBucketTargetNameDefaultValueCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .bucket("DATA", 10) + .build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "data_bucket", Types.IntegerType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testBucketSourceNameAllowsInexactDuplicateWhenCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .bucket("data", 10, "partition1") + .bucket("DATA", 10, "PARTITION1") + .build(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "partition1", Types.IntegerType.get()), + NestedField.optional(1001, "PARTITION1", Types.IntegerType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testBucketTargetNameAllowsInexactDuplicateWhenCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .bucket("data", 10, "partition1") + .bucket("category", 10, "PARTITION1") + .build(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "partition1", Types.IntegerType.get()), + NestedField.optional(1001, "PARTITION1", Types.IntegerType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testBucketTargetNameDoesNotAllowExactDuplicateWhenCaseInsensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .bucket("data", 10, "partition1") + .bucket("category", 10, "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } + + @Test + public void testBucketTargetNameDoesNotAllowExactDuplicateWhenCaseSensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .bucket("data", 10, "partition1") + .bucket("DATA", 10, "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } + + @Test + public void testTruncateTargetNameDefaultValue() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS).truncate("data", 10).build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "data_trunc", Types.StringType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testTruncateTargetNameDefaultValueCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .truncate("DATA", 10) + .build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "data_trunc", Types.StringType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testTruncateSourceNameAllowsExactDuplicateWhenCaseSensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .truncate("data", 10, "partition1") + .truncate("data", 10, "PARTITION1") + .build(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "partition1", Types.StringType.get()), + NestedField.optional(1001, "PARTITION1", Types.StringType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testTruncateSourceNameAllowsInexactDuplicateWhenCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .truncate("data", 10, "partition1") + .truncate("DATA", 10, "PARTITION1") + .build(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "partition1", Types.StringType.get()), + NestedField.optional(1001, "PARTITION1", Types.StringType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testTruncateTargetNameAllowsInexactDuplicateWhenCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .truncate("data", 10, "partition1") + .truncate("category", 10, "PARTITION1") + .build(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "partition1", Types.StringType.get()), + NestedField.optional(1001, "PARTITION1", Types.StringType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testTruncateTargetNameDoesNotAllowExactDuplicateWhenCaseInsensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .truncate("data", 10, "partition1") + .truncate("category", 10, "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } + + @Test + public void testTruncateTargetNameDoesNotAllowExactDuplicateWhenCaseSensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .truncate("data", 10, "partition1") + .truncate("DATA", 10, "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } + + @Test + public void testIdentityTargetNameDefaultValue() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS).identity("data").build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "data", Types.StringType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testIdentityTargetNameDefaultValueCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .identity("DATA") + .build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "data", Types.StringType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testIdentitySourceNameDoesNotAllowExactDuplicateWhenCaseSensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .identity("data", "partition1") + .identity("data", "PARTITION1") + .build()) + .withMessage( + "Cannot add redundant partition: 1000: partition1: identity(2) conflicts with 1001: PARTITION1: identity(2)"); + } + + @Test + public void testIdentitySourceNameDoesNotAllowInexactDuplicateWhenCaseInsensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .identity("data", "partition1") + .identity("DATA", "PARTITION1") + .build()) + .withMessage( + "Cannot add redundant partition: 1000: partition1: identity(2) conflicts with 1001: PARTITION1: identity(2)"); + } + + @Test + public void testIdentityTargetNameAllowsInexactDuplicateWhenCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .identity("data", "partition1") + .identity("category", "PARTITION1") + .build(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "partition1", Types.StringType.get()), + NestedField.optional(1001, "PARTITION1", Types.StringType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testIdentityTargetNameDoesNotAllowExactDuplicateWhenCaseInsensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .identity("data", "partition1") + .identity("category", "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } + + @Test + public void testIdentityTargetNameDoesNotAllowExactDuplicateWhenCaseSensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .identity("data", "partition1") + .identity("DATA", "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } + + @Test + public void testAlwaysNullTargetNameDefaultValue() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS).alwaysNull("data").build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "data_null", Types.StringType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testAlwaysNullTargetNameDefaultValueCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .alwaysNull("DATA") + .build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "data_null", Types.StringType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testAlwaysNullSourceNameAllowsExactDuplicateWhenCaseSensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .alwaysNull("data", "partition1") + .alwaysNull("data", "PARTITION1") + .build(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "partition1", Types.StringType.get()), + NestedField.optional(1001, "PARTITION1", Types.StringType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testAlwaysNullSourceNameAllowsInexactDuplicateWhenCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .alwaysNull("data", "partition1") + .alwaysNull("DATA", "PARTITION1") + .build(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "partition1", Types.StringType.get()), + NestedField.optional(1001, "PARTITION1", Types.StringType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testAlwaysNullTargetNameAllowsInexactDuplicateWhenCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .alwaysNull("data", "partition1") + .alwaysNull("category", "PARTITION1") + .build(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "partition1", Types.StringType.get()), + NestedField.optional(1001, "PARTITION1", Types.StringType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testAlwaysNullTargetNameDoesNotAllowExactDuplicateWhenCaseInsensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .alwaysNull("data", "partition1") + .alwaysNull("category", "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } + + @Test + public void testAlwaysNullTargetNameDoesNotAllowExactDuplicateWhenCaseSensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .alwaysNull("data", "partition1") + .alwaysNull("DATA", "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } + + @Test + public void testYearTargetNameDefaultValue() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS).year("order_date").build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "order_date_year", Types.IntegerType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testYearTargetNameDefaultValueCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .year("ORDER_DATE") + .build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "order_date_year", Types.IntegerType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testYearSourceNameDoesNotAllowExactDuplicateWhenCaseSensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .year("order_date", "partition1") + .year("order_date", "PARTITION1") + .build()) + .withMessage( + "Cannot add redundant partition: 1000: partition1: year(4) conflicts with 1001: PARTITION1: year(4)"); + } + + @Test + public void testYearSourceNameDoesNotAllowInexactDuplicateWhenCaseInsensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .year("order_date", "partition1") + .year("ORDER_DATE", "PARTITION1") + .build()) + .withMessage( + "Cannot add redundant partition: 1000: partition1: year(4) conflicts with 1001: PARTITION1: year(4)"); + } + + @Test + public void testYearTargetNameAllowsInexactDuplicateWhenCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .year("order_date", "partition1") + .year("ship_date", "PARTITION1") + .build(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "partition1", Types.IntegerType.get()), + NestedField.optional(1001, "PARTITION1", Types.IntegerType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testYearTargetNameDoesNotAllowExactDuplicateWhenCaseInsensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .year("order_date", "partition1") + .year("ship_date", "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } + + @Test + public void testYearTargetNameDoesNotAllowExactDuplicateWhenCaseSensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .year("order_date", "partition1") + .year("ORDER_DATE", "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } + + @Test + public void testMonthTargetNameDefaultValue() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS).month("order_date").build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "order_date_month", Types.IntegerType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testMonthTargetNameDefaultValueCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .month("ORDER_DATE") + .build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "order_date_month", Types.IntegerType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testMonthSourceNameDoesNotAllowExactDuplicateWhenCaseSensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .month("order_date", "partition1") + .month("order_date", "PARTITION1") + .build()) + .withMessage( + "Cannot add redundant partition: 1000: partition1: month(4) conflicts with 1001: PARTITION1: month(4)"); + } + + @Test + public void testMonthSourceNameDoesNotAllowInexactDuplicateWhenCaseInsensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .month("order_date", "partition1") + .month("ORDER_DATE", "PARTITION1") + .build()) + .withMessage( + "Cannot add redundant partition: 1000: partition1: month(4) conflicts with 1001: PARTITION1: month(4)"); + } + + @Test + public void testMonthTargetNameAllowsInexactDuplicateWhenCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .month("order_date", "partition1") + .month("ship_date", "PARTITION1") + .build(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "partition1", Types.IntegerType.get()), + NestedField.optional(1001, "PARTITION1", Types.IntegerType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testMonthTargetNameDoesNotAllowExactDuplicateWhenCaseInsensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .month("order_date", "partition1") + .month("ship_date", "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } + + @Test + public void testMonthTargetNameDoesNotAllowExactDuplicateWhenCaseSensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .month("order_date", "partition1") + .month("ORDER_DATE", "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } + + @Test + public void testDayTargetNameDefaultValue() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS).day("order_date").build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "order_date_day", Types.DateType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testDayTargetNameDefaultValueCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .day("ORDER_DATE") + .build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "order_date_day", Types.DateType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testDaySourceNameDoesNotAllowExactDuplicateWhenCaseSensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .day("order_date", "partition1") + .day("order_date", "PARTITION1") + .build()) + .withMessage( + "Cannot add redundant partition: 1000: partition1: day(4) conflicts with 1001: PARTITION1: day(4)"); + } + + @Test + public void testDaySourceNameDoesNotAllowInexactDuplicateWhenCaseInsensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .day("order_date", "partition1") + .day("ORDER_DATE", "PARTITION1") + .build()) + .withMessage( + "Cannot add redundant partition: 1000: partition1: day(4) conflicts with 1001: PARTITION1: day(4)"); + } + + @Test + public void testDayTargetNameAllowsInexactDuplicateWhenCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .day("order_date", "partition1") + .day("ship_date", "PARTITION1") + .build(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "partition1", Types.DateType.get()), + NestedField.optional(1001, "PARTITION1", Types.DateType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testDayTargetNameDoesNotAllowExactDuplicateWhenCaseInsensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .day("order_date", "partition1") + .day("ship_date", "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } + + @Test + public void testDayTargetNameDoesNotAllowExactDuplicateWhenCaseSensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .day("order_date", "partition1") + .day("ORDER_DATE", "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } + + @Test + public void testHourTargetNameDefaultValue() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS).hour("order_time").build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "order_time_hour", Types.IntegerType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testHourTargetNameDefaultValueCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .hour("ORDER_TIME") + .build(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "order_time_hour", Types.IntegerType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testHourSourceNameDoesNotAllowExactDuplicateWhenCaseSensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .hour("order_time", "partition1") + .hour("order_time", "PARTITION1") + .build()) + .withMessage( + "Cannot add redundant partition: 1000: partition1: hour(6) conflicts with 1001: PARTITION1: hour(6)"); + } + + @Test + public void testHourSourceNameDoesNotAllowInexactDuplicateWhenCaseInsensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .hour("order_time", "partition1") + .hour("ORDER_TIME", "PARTITION1") + .build()) + .withMessage( + "Cannot add redundant partition: 1000: partition1: hour(5) conflicts with 1001: PARTITION1: hour(5)"); + } + + @Test + public void testHourTargetNameAllowsInexactDuplicateWhenCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .hour("order_time", "partition1") + .hour("ship_time", "PARTITION1") + .build(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "partition1", Types.IntegerType.get()), + NestedField.optional(1001, "PARTITION1", Types.IntegerType.get())); + StructType actualType = spec.partitionType(); + assertThat(actualType).isEqualTo(expectedType); + } + + @Test + public void testHourTargetNameDoesNotAllowExactDuplicateWhenCaseInsensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITHOUT_NAME_CONFLICTS) + .caseSensitive(false) + .hour("order_time", "partition1") + .hour("ship_time", "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } + + @Test + public void testHourTargetNameDoesNotAllowExactDuplicateWhenCaseSensitive() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> + PartitionSpec.builderFor(SCHEMA_WITH_NAME_CONFLICTS) + .hour("order_time", "partition1") + .hour("ORDER_TIME", "partition1") + .build()) + .withMessage("Cannot use partition name more than once: partition1"); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestPartitionSpecInfo.java b/core/src/test/java/org/apache/iceberg/TestPartitionSpecInfo.java index a324b8af2e70..f5636a77be61 100644 --- a/core/src/test/java/org/apache/iceberg/TestPartitionSpecInfo.java +++ b/core/src/test/java/org/apache/iceberg/TestPartitionSpecInfo.java @@ -20,6 +20,7 @@ import static org.apache.iceberg.types.Types.NestedField.required; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatIllegalArgumentException; import static org.assertj.core.api.Assertions.entry; import java.io.File; @@ -95,6 +96,30 @@ public void testSpecInfoPartitionedTable() { .doesNotContainKey(Integer.MAX_VALUE); } + @TestTemplate + public void testSpecInfoPartitionedTableCaseInsensitive() { + PartitionSpec spec = + PartitionSpec.builderFor(schema).caseSensitive(false).identity("DATA").build(); + TestTables.TestTable table = TestTables.create(tableDir, "test", schema, spec, formatVersion); + + assertThat(table.spec()).isEqualTo(spec); + assertThat(table.spec().lastAssignedFieldId()).isEqualTo(spec.lastAssignedFieldId()); + assertThat(table.specs()) + .containsExactly(entry(spec.specId(), spec)) + .doesNotContainKey(Integer.MAX_VALUE); + } + + @TestTemplate + public void testSpecInfoPartitionedTableCaseSensitiveFails() { + assertThatIllegalArgumentException() + .isThrownBy( + () -> { + PartitionSpec spec = + PartitionSpec.builderFor(schema).caseSensitive(true).identity("DATA").build(); + }) + .withMessage("Cannot find source column: DATA"); + } + @TestTemplate public void testColumnDropWithPartitionSpecEvolution() { PartitionSpec spec = PartitionSpec.builderFor(schema).identity("id").build(); diff --git a/core/src/test/java/org/apache/iceberg/TestPartitioning.java b/core/src/test/java/org/apache/iceberg/TestPartitioning.java index 91f0fe95c2fa..a4df125f1de2 100644 --- a/core/src/test/java/org/apache/iceberg/TestPartitioning.java +++ b/core/src/test/java/org/apache/iceberg/TestPartitioning.java @@ -122,6 +122,25 @@ public void testPartitionTypeWithRenamesInV1Table() { assertThat(actualType).isEqualTo(expectedType); } + @Test + public void testPartitionTypeWithRenamesInV1TableCaseInsensitive() { + PartitionSpec initialSpec = + PartitionSpec.builderFor(SCHEMA).caseSensitive(false).identity("DATA", "p1").build(); + TestTables.TestTable table = + TestTables.create(tableDir, "test", SCHEMA, initialSpec, V1_FORMAT_VERSION); + + table.updateSpec().addField("category").commit(); + + table.updateSpec().renameField("p1", "p2").commit(); + + StructType expectedType = + StructType.of( + NestedField.optional(1000, "p2", Types.StringType.get()), + NestedField.optional(1001, "category", Types.StringType.get())); + StructType actualType = Partitioning.partitionType(table); + assertThat(actualType).isEqualTo(expectedType); + } + @Test public void testPartitionTypeWithAddingBackSamePartitionFieldInV1Table() { TestTables.TestTable table = @@ -252,6 +271,23 @@ public void testGroupingKeyTypeWithRenamesInV1Table() { assertThat(actualType).isEqualTo(expectedType); } + @Test + public void testGroupingKeyTypeWithRenamesInV1TableCaseInsensitive() { + PartitionSpec initialSpec = + PartitionSpec.builderFor(SCHEMA).caseSensitive(false).identity("DATA", "p1").build(); + TestTables.TestTable table = + TestTables.create(tableDir, "test", SCHEMA, initialSpec, V1_FORMAT_VERSION); + + table.updateSpec().addField("category").commit(); + + table.updateSpec().renameField("p1", "p2").commit(); + + StructType expectedType = + StructType.of(NestedField.optional(1000, "p2", Types.StringType.get())); + StructType actualType = Partitioning.groupingKeyType(table.schema(), table.specs().values()); + assertThat(actualType).isEqualTo(expectedType); + } + @Test public void testGroupingKeyTypeWithRenamesInV2Table() { PartitionSpec initialSpec = PartitionSpec.builderFor(SCHEMA).identity("data", "p1").build();