-
Notifications
You must be signed in to change notification settings - Fork 153
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Enhancement] Support restore/rollback sync during conversion (1/2) #569
base: main
Are you sure you want to change the base?
Changes from all commits
e0a8710
9bc8256
76e0bc0
45b279a
8ac5717
53a58b6
bb5ed78
9b8b71e
252ca2c
d9cb5fd
c8dde9e
bfbb78f
002d91b
bf5f0a0
56fe85e
a40cd6c
1e39d1a
0af0a82
82b2d2d
7ffda25
4198e5b
b181a07
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -78,4 +78,13 @@ public interface ConversionSource<COMMIT> extends Closeable { | |
* false. | ||
*/ | ||
boolean isIncrementalSyncSafeFrom(Instant instant); | ||
|
||
/** | ||
* Extract the identifier of the provided commit, the identifier defined as 1. Snapshot ID in | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can use |
||
* Iceberg 2. Version ID in Delta 3. timestamp in Hudi | ||
* | ||
* @param commit The provided commit | ||
* @return the string version of commit identifier | ||
*/ | ||
String getCommitIdentifier(COMMIT commit); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,7 +73,8 @@ public Map<String, SyncResult> syncSnapshot( | |
internalTable, | ||
target -> target.syncFilesForSnapshot(snapshot.getPartitionedDataFiles()), | ||
startTime, | ||
snapshot.getPendingCommits())); | ||
snapshot.getPendingCommits(), | ||
snapshot.getSourceIdentifier())); | ||
} catch (Exception e) { | ||
log.error("Failed to sync snapshot", e); | ||
results.put( | ||
|
@@ -121,7 +122,8 @@ public Map<String, List<SyncResult>> syncChanges( | |
change.getTableAsOfChange(), | ||
target -> target.syncFilesForDiff(change.getFilesDiff()), | ||
startTime, | ||
changes.getPendingCommits())); | ||
changes.getPendingCommits(), | ||
change.getSourceIdentifier())); | ||
} catch (Exception e) { | ||
log.error("Failed to sync table changes", e); | ||
resultsForFormat.add(buildResultForError(SyncMode.INCREMENTAL, startTime, e)); | ||
|
@@ -149,19 +151,26 @@ private SyncResult getSyncResult( | |
InternalTable tableState, | ||
SyncFiles fileSyncMethod, | ||
Instant startTime, | ||
List<Instant> pendingCommits) { | ||
List<Instant> pendingCommits, | ||
String sourceIdentifier) { | ||
// initialize the sync | ||
conversionTarget.beginSync(tableState); | ||
// Persist the latest commit time in table properties for incremental syncs | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here We need to move the Metadata set operation earlier because it will be required during the |
||
// Syncing metadata must precede the following steps to ensure that the metadata is available | ||
// before committing | ||
TableSyncMetadata latestState = | ||
TableSyncMetadata.of( | ||
tableState.getLatestCommitTime(), | ||
pendingCommits, | ||
tableState.getTableFormat(), | ||
sourceIdentifier); | ||
conversionTarget.syncMetadata(latestState); | ||
// sync schema updates | ||
conversionTarget.syncSchema(tableState.getReadSchema()); | ||
// sync partition updates | ||
conversionTarget.syncPartitionSpec(tableState.getPartitioningFields()); | ||
// Update the files in the target table | ||
fileSyncMethod.sync(conversionTarget); | ||
// Persist the latest commit time in table properties for incremental syncs. | ||
TableSyncMetadata latestState = | ||
TableSyncMetadata.of(tableState.getLatestCommitTime(), pendingCommits); | ||
conversionTarget.syncMetadata(latestState); | ||
conversionTarget.completeSync(); | ||
|
||
return SyncResult.builder() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this may be safer without a default and require non-null, what do you think?
Similar note for
InternalSnapshot