Skip to content

Commit 6d5ae0d

Browse files
committed
Fix commit duplication in DynamicIcebergSink
Change-Id: I3b97982e3dcc180bc82c36f97f335d873fbbde57
1 parent c4e480d commit 6d5ae0d

File tree

8 files changed

+381
-42
lines changed

8 files changed

+381
-42
lines changed

api/src/main/java/org/apache/iceberg/ReplacePartitions.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
*/
1919
package org.apache.iceberg;
2020

21+
import java.util.function.Consumer;
22+
2123
/**
2224
* API for overwriting files in a table by partition.
2325
*
@@ -71,6 +73,20 @@ public interface ReplacePartitions extends SnapshotUpdate<ReplacePartitions> {
7173
*/
7274
ReplacePartitions validateFromSnapshot(long snapshotId);
7375

76+
/**
77+
* Enables snapshot validation with a user-provided function, which must throw an exception on
78+
* validation failures.
79+
*
80+
* <p>Clients can use this method to validate summary and other metadata of parent snapshots.
81+
*
82+
* @param snapshotValidator a user function to validate parent snapshots
83+
* @return this for method chaining
84+
*/
85+
default ReplacePartitions validateSnapshot(Consumer<Snapshot> snapshotValidator) {
86+
throw new UnsupportedOperationException(
87+
getClass().getName() + " does not implement validateSnapshot");
88+
}
89+
7490
/**
7591
* Enables validation that deletes that happened concurrently do not conflict with this commit's
7692
* operation.

api/src/main/java/org/apache/iceberg/RowDelta.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
*/
1919
package org.apache.iceberg;
2020

21+
import java.util.function.Consumer;
2122
import org.apache.iceberg.expressions.Expression;
2223

2324
/**
@@ -79,6 +80,20 @@ default RowDelta removeDeletes(DeleteFile deletes) {
7980
*/
8081
RowDelta validateFromSnapshot(long snapshotId);
8182

83+
/**
84+
* Enables snapshot validation with a user-provided function, which must throw an exception on
85+
* validation failures.
86+
*
87+
* <p>Clients can use this method to validate summary and other metadata of parent snapshots.
88+
*
89+
* @param snapshotValidator a user function to validate parent snapshots
90+
* @return this for method chaining
91+
*/
92+
default RowDelta validateSnapshot(Consumer<Snapshot> snapshotValidator) {
93+
throw new UnsupportedOperationException(
94+
getClass().getName() + " does not implement validateSnapshot");
95+
}
96+
8297
/**
8398
* Enables or disables case sensitive expression binding for validations that accept expressions.
8499
*

core/src/main/java/org/apache/iceberg/BaseReplacePartitions.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
package org.apache.iceberg;
2020

2121
import java.util.List;
22+
import java.util.function.Consumer;
23+
import javax.annotation.Nullable;
2224
import org.apache.iceberg.exceptions.ValidationException;
2325
import org.apache.iceberg.expressions.Expressions;
2426
import org.apache.iceberg.util.PartitionSet;
@@ -28,6 +30,7 @@ public class BaseReplacePartitions extends MergingSnapshotProducer<ReplacePartit
2830

2931
private final PartitionSet replacedPartitions;
3032
private Long startingSnapshotId;
33+
@Nullable private Consumer<Snapshot> snapshotValidator = null;
3134
private boolean validateConflictingData = false;
3235
private boolean validateConflictingDeletes = false;
3336

@@ -67,6 +70,12 @@ public ReplacePartitions validateFromSnapshot(long newStartingSnapshotId) {
6770
return this;
6871
}
6972

73+
@Override
74+
public ReplacePartitions validateSnapshot(Consumer<Snapshot> validator) {
75+
this.snapshotValidator = validator;
76+
return this;
77+
}
78+
7079
@Override
7180
public ReplacePartitions validateNoConflictingDeletes() {
7281
this.validateConflictingDeletes = true;
@@ -87,6 +96,10 @@ public BaseReplacePartitions toBranch(String branch) {
8796

8897
@Override
8998
public void validate(TableMetadata currentMetadata, Snapshot parent) {
99+
if (snapshotValidator != null) {
100+
validateSnapshots(snapshotValidator, currentMetadata, startingSnapshotId, parent);
101+
}
102+
90103
if (validateConflictingData) {
91104
if (dataSpec().isUnpartitioned()) {
92105
validateAddedDataFiles(

core/src/main/java/org/apache/iceberg/BaseRowDelta.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
package org.apache.iceberg;
2020

2121
import java.util.List;
22+
import java.util.function.Consumer;
2223
import java.util.stream.Collectors;
24+
import javax.annotation.Nullable;
2325
import org.apache.iceberg.exceptions.ValidationException;
2426
import org.apache.iceberg.expressions.Expression;
2527
import org.apache.iceberg.expressions.Expressions;
@@ -30,6 +32,7 @@
3032

3133
public class BaseRowDelta extends MergingSnapshotProducer<RowDelta> implements RowDelta {
3234
private Long startingSnapshotId = null; // check all versions by default
35+
@Nullable private Consumer<Snapshot> snapshotValidator = null;
3336
private final CharSequenceSet referencedDataFiles = CharSequenceSet.empty();
3437
private final DataFileSet removedDataFiles = DataFileSet.create();
3538
private boolean validateDeletes = false;
@@ -86,6 +89,12 @@ public RowDelta validateFromSnapshot(long snapshotId) {
8689
return this;
8790
}
8891

92+
@Override
93+
public RowDelta validateSnapshot(Consumer<Snapshot> validator) {
94+
this.snapshotValidator = validator;
95+
return this;
96+
}
97+
8998
@Override
9099
public RowDelta validateDeletedFiles() {
91100
this.validateDeletes = true;
@@ -144,6 +153,10 @@ protected void validate(TableMetadata base, Snapshot parent) {
144153
parent);
145154
}
146155

156+
if (snapshotValidator != null) {
157+
validateSnapshots(snapshotValidator, base, startingSnapshotId, parent);
158+
}
159+
147160
if (validateDeletes) {
148161
failMissingDeletePaths();
149162
}

core/src/main/java/org/apache/iceberg/MergingSnapshotProducer.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
import java.util.Map;
3131
import java.util.Objects;
3232
import java.util.Set;
33+
import java.util.function.Consumer;
34+
import javax.annotation.Nullable;
3335
import org.apache.iceberg.encryption.EncryptedOutputFile;
3436
import org.apache.iceberg.events.CreateSnapshotEvent;
3537
import org.apache.iceberg.exceptions.ValidationException;
@@ -674,6 +676,27 @@ protected void validateDeletedDataFiles(
674676
}
675677
}
676678

679+
/**
680+
* Validates parent snapshots with a user-provided function.
681+
*
682+
* @param validator the validation function
683+
* @param base table metadata to validate
684+
* @param startingSnapshotId id of the snapshot current at the start of the operation
685+
* @param parent ending snapshot on the branch being validated
686+
*/
687+
protected void validateSnapshots(
688+
Consumer<Snapshot> validator,
689+
TableMetadata base,
690+
@Nullable Long startingSnapshotId,
691+
@Nullable Snapshot parent) {
692+
if (parent == null) {
693+
return;
694+
}
695+
696+
SnapshotUtil.ancestorsBetween(parent.snapshotId(), startingSnapshotId, base::snapshot)
697+
.forEach(validator);
698+
}
699+
677700
/**
678701
* Returns an iterable of files matching a filter have been added to the table since a starting
679702
* snapshot.

flink/v1.20/flink/src/main/java/org/apache/iceberg/flink/sink/dynamic/DynamicCommitter.java

Lines changed: 81 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
import java.util.Objects;
2929
import java.util.concurrent.ExecutorService;
3030
import java.util.concurrent.TimeUnit;
31+
import java.util.function.Consumer;
32+
import javax.annotation.Nullable;
3133
import org.apache.flink.annotation.Internal;
3234
import org.apache.flink.api.connector.sink2.Committer;
3335
import org.apache.flink.core.io.SimpleVersionedSerialization;
@@ -40,6 +42,7 @@
4042
import org.apache.iceberg.Table;
4143
import org.apache.iceberg.catalog.Catalog;
4244
import org.apache.iceberg.catalog.TableIdentifier;
45+
import org.apache.iceberg.exceptions.ValidationException;
4346
import org.apache.iceberg.flink.sink.CommitSummary;
4447
import org.apache.iceberg.flink.sink.DeltaManifests;
4548
import org.apache.iceberg.flink.sink.DeltaManifestsSerializer;
@@ -158,26 +161,36 @@ public void commit(Collection<CommitRequest<DynamicCommittable>> commitRequests)
158161
private static long getMaxCommittedCheckpointId(
159162
Table table, String flinkJobId, String operatorId, String branch) {
160163
Snapshot snapshot = table.snapshot(branch);
161-
long lastCommittedCheckpointId = INITIAL_CHECKPOINT_ID;
162164

163165
while (snapshot != null) {
164-
Map<String, String> summary = snapshot.summary();
165-
String snapshotFlinkJobId = summary.get(FLINK_JOB_ID);
166-
String snapshotOperatorId = summary.get(OPERATOR_ID);
167-
if (flinkJobId.equals(snapshotFlinkJobId)
168-
&& (snapshotOperatorId == null || snapshotOperatorId.equals(operatorId))) {
169-
String value = summary.get(MAX_COMMITTED_CHECKPOINT_ID);
170-
if (value != null) {
171-
lastCommittedCheckpointId = Long.parseLong(value);
172-
break;
173-
}
166+
@Nullable
167+
Long committedCheckpointId = extractCommittedCheckpointId(snapshot, flinkJobId, operatorId);
168+
if (committedCheckpointId != null) {
169+
return committedCheckpointId;
174170
}
175171

176172
Long parentSnapshotId = snapshot.parentId();
177173
snapshot = parentSnapshotId != null ? table.snapshot(parentSnapshotId) : null;
178174
}
179175

180-
return lastCommittedCheckpointId;
176+
return INITIAL_CHECKPOINT_ID;
177+
}
178+
179+
@Nullable
180+
private static Long extractCommittedCheckpointId(
181+
Snapshot snapshot, String flinkJobId, String operatorId) {
182+
Map<String, String> summary = snapshot.summary();
183+
String snapshotFlinkJobId = summary.get(FLINK_JOB_ID);
184+
String snapshotOperatorId = summary.get(OPERATOR_ID);
185+
if (flinkJobId.equals(snapshotFlinkJobId)
186+
&& (snapshotOperatorId == null || snapshotOperatorId.equals(operatorId))) {
187+
String value = summary.get(MAX_COMMITTED_CHECKPOINT_ID);
188+
if (value != null) {
189+
return Long.parseLong(value);
190+
}
191+
}
192+
193+
return null;
181194
}
182195

183196
/**
@@ -276,7 +289,17 @@ private void replacePartitions(
276289
String operatorId) {
277290
// Iceberg tables are unsorted. So the order of the append data does not matter.
278291
// Hence, we commit everything in one snapshot.
279-
ReplacePartitions dynamicOverwrite = table.newReplacePartitions().scanManifestsWith(workerPool);
292+
long checkpointId = pendingResults.lastKey();
293+
ReplacePartitions dynamicOverwrite =
294+
table
295+
.newReplacePartitions()
296+
.scanManifestsWith(workerPool)
297+
.validateSnapshot(
298+
new MaxCommittedCheckpointIdValidator(checkpointId, newFlinkJobId, operatorId));
299+
@Nullable Snapshot latestSnapshot = table.snapshot(branch);
300+
if (latestSnapshot != null) {
301+
dynamicOverwrite = dynamicOverwrite.validateFromSnapshot(latestSnapshot.snapshotId());
302+
}
280303

281304
for (List<WriteResult> writeResults : pendingResults.values()) {
282305
for (WriteResult result : writeResults) {
@@ -292,7 +315,7 @@ private void replacePartitions(
292315
"dynamic partition overwrite",
293316
newFlinkJobId,
294317
operatorId,
295-
pendingResults.lastKey());
318+
checkpointId);
296319
}
297320

298321
private void commitDeltaTxn(
@@ -306,7 +329,17 @@ private void commitDeltaTxn(
306329
long checkpointId = e.getKey();
307330
List<WriteResult> writeResults = e.getValue();
308331

309-
RowDelta rowDelta = table.newRowDelta().scanManifestsWith(workerPool);
332+
RowDelta rowDelta =
333+
table
334+
.newRowDelta()
335+
.scanManifestsWith(workerPool)
336+
.validateSnapshot(
337+
new MaxCommittedCheckpointIdValidator(checkpointId, newFlinkJobId, operatorId));
338+
@Nullable Snapshot latestSnapshot = table.snapshot(branch);
339+
if (latestSnapshot != null) {
340+
rowDelta = rowDelta.validateFromSnapshot(latestSnapshot.snapshotId());
341+
}
342+
310343
for (WriteResult result : writeResults) {
311344
// Row delta validations are not needed for streaming changes that write equality deletes.
312345
// Equality deletes are applied to data in all previous sequence numbers, so retries may
@@ -329,6 +362,39 @@ private void commitDeltaTxn(
329362
}
330363
}
331364

365+
static class MaxCommittedCheckpointIdValidator implements Consumer<Snapshot> {
366+
private final long stagedCheckpointId;
367+
private final String flinkJobId;
368+
private final String flinkOperatorId;
369+
370+
MaxCommittedCheckpointIdValidator(
371+
long stagedCheckpointId, String flinkJobId, String flinkOperatorId) {
372+
this.stagedCheckpointId = stagedCheckpointId;
373+
this.flinkJobId = flinkJobId;
374+
this.flinkOperatorId = flinkOperatorId;
375+
}
376+
377+
@Override
378+
public void accept(Snapshot snapshot) {
379+
@Nullable
380+
Long checkpointId = extractCommittedCheckpointId(snapshot, flinkJobId, flinkOperatorId);
381+
if (checkpointId == null) {
382+
return;
383+
}
384+
385+
ValidationException.check(
386+
checkpointId < stagedCheckpointId,
387+
"The new parent snapshot '%s' has '%s': '%s' >= '%s' of the currently staged committable."
388+
+ "\nThis can happen, for example, when using the REST catalog: if the previous commit request failed"
389+
+ " in the Flink client but succeeded on the server after the Flink job decided to retry it with the new request."
390+
+ "\nFlink should retry this exception, and the committer should skip the duplicate request during the next retry.",
391+
snapshot.snapshotId(),
392+
MAX_COMMITTED_CHECKPOINT_ID,
393+
checkpointId,
394+
stagedCheckpointId);
395+
}
396+
}
397+
332398
@VisibleForTesting
333399
void commitOperation(
334400
Table table,

0 commit comments

Comments
 (0)