diff --git a/api/src/main/java/org/apache/iceberg/actions/RewriteDataFiles.java b/api/src/main/java/org/apache/iceberg/actions/RewriteDataFiles.java index 589b9017741e..a6f65c836663 100644 --- a/api/src/main/java/org/apache/iceberg/actions/RewriteDataFiles.java +++ b/api/src/main/java/org/apache/iceberg/actions/RewriteDataFiles.java @@ -118,6 +118,15 @@ public interface RewriteDataFiles boolean REMOVE_DANGLING_DELETES_DEFAULT = false; + /** + * If set to true, the rewrite operation will ignore invalid options. + * + *

Defaults to false. + */ + String IGNORE_INVALID_OPTIONS = "ignore-invalid-options"; + + boolean IGNORE_INVALID_OPTIONS_DEFAULT = false; + /** * Forces the rewrite job order based on the value. * diff --git a/api/src/main/java/org/apache/iceberg/actions/RewritePositionDeleteFiles.java b/api/src/main/java/org/apache/iceberg/actions/RewritePositionDeleteFiles.java index 0c05433a8b96..244b47a5b660 100644 --- a/api/src/main/java/org/apache/iceberg/actions/RewritePositionDeleteFiles.java +++ b/api/src/main/java/org/apache/iceberg/actions/RewritePositionDeleteFiles.java @@ -78,6 +78,15 @@ public interface RewritePositionDeleteFiles String REWRITE_JOB_ORDER_DEFAULT = RewriteJobOrder.NONE.orderName(); + /** + * If set to true, the rewrite operation will ignore invalid options. + * + *

Defaults to false. + */ + String IGNORE_INVALID_OPTIONS = "ignore-invalid-options"; + + boolean IGNORE_INVALID_OPTIONS_DEFAULT = false; + /** * A filter for finding deletes to rewrite. * diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java index e04a0c88b4bb..28256973bde6 100644 --- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java +++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteDataFilesSparkAction.java @@ -87,7 +87,8 @@ public class RewriteDataFilesSparkAction USE_STARTING_SEQUENCE_NUMBER, REWRITE_JOB_ORDER, OUTPUT_SPEC_ID, - REMOVE_DANGLING_DELETES); + REMOVE_DANGLING_DELETES, + IGNORE_INVALID_OPTIONS); private static final RewriteDataFilesSparkAction.Result EMPTY_RESULT = ImmutableRewriteDataFiles.Result.builder().rewriteResults(ImmutableList.of()).build(); @@ -435,17 +436,22 @@ private Iterable toRewriteResults(List } void validateAndInitOptions() { - Set validOptions = Sets.newHashSet(rewriter.validOptions()); - validOptions.addAll(VALID_OPTIONS); - - Set invalidKeys = Sets.newHashSet(options().keySet()); - invalidKeys.removeAll(validOptions); - - Preconditions.checkArgument( - invalidKeys.isEmpty(), - "Cannot use options %s, they are not supported by the action or the rewriter %s", - invalidKeys, - rewriter.description()); + boolean ignoreInvalidOptions = + PropertyUtil.propertyAsBoolean( + options(), IGNORE_INVALID_OPTIONS, IGNORE_INVALID_OPTIONS_DEFAULT); + if (!ignoreInvalidOptions) { + Set validOptions = Sets.newHashSet(rewriter.validOptions()); + validOptions.addAll(VALID_OPTIONS); + + Set invalidKeys = Sets.newHashSet(options().keySet()); + invalidKeys.removeAll(validOptions); + + Preconditions.checkArgument( + invalidKeys.isEmpty(), + "Cannot use options %s, they are not supported by the action or the rewriter %s", + invalidKeys, + rewriter.description()); + } rewriter.init(options()); diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewritePositionDeleteFilesSparkAction.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewritePositionDeleteFilesSparkAction.java index 2562c74eafcc..dc850487928c 100644 --- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewritePositionDeleteFilesSparkAction.java +++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewritePositionDeleteFilesSparkAction.java @@ -81,7 +81,8 @@ public class RewritePositionDeleteFilesSparkAction MAX_CONCURRENT_FILE_GROUP_REWRITES, PARTIAL_PROGRESS_ENABLED, PARTIAL_PROGRESS_MAX_COMMITS, - REWRITE_JOB_ORDER); + REWRITE_JOB_ORDER, + IGNORE_INVALID_OPTIONS); private static final Result EMPTY_RESULT = ImmutableRewritePositionDeleteFiles.Result.builder().build(); @@ -358,17 +359,22 @@ private RewritePositionDeletesGroup newRewriteGroup( } private void validateAndInitOptions() { - Set validOptions = Sets.newHashSet(rewriter.validOptions()); - validOptions.addAll(VALID_OPTIONS); - - Set invalidKeys = Sets.newHashSet(options().keySet()); - invalidKeys.removeAll(validOptions); - - Preconditions.checkArgument( - invalidKeys.isEmpty(), - "Cannot use options %s, they are not supported by the action or the rewriter %s", - invalidKeys, - rewriter.description()); + boolean ignoreInvalidOptions = + PropertyUtil.propertyAsBoolean( + options(), IGNORE_INVALID_OPTIONS, IGNORE_INVALID_OPTIONS_DEFAULT); + if (!ignoreInvalidOptions) { + Set validOptions = Sets.newHashSet(rewriter.validOptions()); + validOptions.addAll(VALID_OPTIONS); + + Set invalidKeys = Sets.newHashSet(options().keySet()); + invalidKeys.removeAll(validOptions); + + Preconditions.checkArgument( + invalidKeys.isEmpty(), + "Cannot use options %s, they are not supported by the action or the rewriter %s", + invalidKeys, + rewriter.description()); + } rewriter.init(options()); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java index 38c4d32a90d2..efb8e039cce0 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewriteDataFilesAction.java @@ -1214,6 +1214,15 @@ public void testInvalidOptions() { .hasMessageContaining("requires enabling Iceberg Spark session extensions"); } + @TestTemplate + public void testIgnoreInvalidOptions() { + Table table = createTable(20); + basicRewrite(table) + .option("ignore-invalid-options", "true") + .option("foobarity", "-5") + .execute(); + } + @TestTemplate public void testSortMultipleGroups() { Table table = createTable(20); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewritePositionDeleteFilesAction.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewritePositionDeleteFilesAction.java index 12b104fca27c..29a8cdd067cf 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewritePositionDeleteFilesAction.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/actions/TestRewritePositionDeleteFilesAction.java @@ -728,6 +728,27 @@ public void testRewriteManyColumns() throws Exception { assertEquals("Position deletes must match", expectedDeletes, actualDeletes); } + @TestTemplate + public void testIgnoreInvalidOptions() { + Table table = createTableUnpartitioned(2, SCALE); + assertThatThrownBy( + () -> { + SparkActions.get(spark) + .rewritePositionDeletes(table) + .option("foobarity", "-5") + .execute(); + }) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage( + "Cannot use options [foobarity], they are not supported by the action or the rewriter BIN-PACK"); + + SparkActions.get(spark) + .rewritePositionDeletes(table) + .option("ignore-invalid-options", "true") + .option("foobarity", "-5") + .execute(); + } + private Table createTablePartitioned(int partitions, int files, int numRecords) { PartitionSpec spec = PartitionSpec.builderFor(SCHEMA).identity("c1").build(); Table table =