-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Loading status checks…
[Kernel] Add an end2end prototype of Coordinated Commit read support …
…in kernel
1 parent
ad8d1cb
commit 323f08d
Showing
33 changed files
with
2,279 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
57 changes: 57 additions & 0 deletions
57
kernel/kernel-api/src/main/java/io/delta/kernel/commit/Commit.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
/* | ||
* Copyright (2021) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.delta.kernel.commit; | ||
|
||
import io.delta.kernel.utils.FileStatus; | ||
|
||
/** | ||
* Representation of a commit file | ||
*/ | ||
public class Commit { | ||
|
||
private long version; | ||
|
||
private FileStatus fileStatus; | ||
|
||
private long commitTimestamp; | ||
|
||
public Commit(long version, FileStatus fileStatus, long commitTimestamp) { | ||
this.version = version; | ||
this.fileStatus = fileStatus; | ||
this.commitTimestamp = commitTimestamp; | ||
} | ||
|
||
public long getVersion() { | ||
return version; | ||
} | ||
|
||
public FileStatus getFileStatus() { | ||
return fileStatus; | ||
} | ||
|
||
public long getCommitTimestamp() { | ||
return commitTimestamp; | ||
} | ||
|
||
public Commit withFileStatus(FileStatus fileStatus) { | ||
return new Commit(version, fileStatus, commitTimestamp); | ||
} | ||
|
||
public Commit withCommitTimestamp(long commitTimestamp) { | ||
return new Commit(version, fileStatus, commitTimestamp); | ||
} | ||
} |
60 changes: 60 additions & 0 deletions
60
kernel/kernel-api/src/main/java/io/delta/kernel/commit/CommitFailedException.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
/* | ||
* Copyright (2021) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.delta.kernel.commit; | ||
|
||
import java.util.Iterator; | ||
import java.util.Map; | ||
|
||
/** | ||
* Exception raised by | ||
* {@link io.delta.kernel.engine.CommitCoordinatorClientHandler#commit( | ||
* String, Map, long, Iterator, UpdatedActions)} | ||
* | ||
* <pre> | ||
* | retryable | conflict | meaning | | ||
* | no | no | something bad happened (e.g. auth failure) | | ||
* | no | yes | permanent transaction conflict (e.g. multi-table commit failed) | | ||
* | yes | no | transient error (e.g. network hiccup) | | ||
* | yes | yes | physical conflict (allowed to rebase and retry) | | ||
* </pre> | ||
*/ | ||
public class CommitFailedException extends Exception { | ||
|
||
private boolean retryable; | ||
|
||
private boolean conflict; | ||
|
||
private String message; | ||
|
||
public CommitFailedException(boolean retryable, boolean conflict, String message) { | ||
this.retryable = retryable; | ||
this.conflict = conflict; | ||
this.message = message; | ||
} | ||
|
||
public boolean getRetryable() { | ||
return retryable; | ||
} | ||
|
||
public boolean getConflict() { | ||
return conflict; | ||
} | ||
|
||
public String getMessage() { | ||
return message; | ||
} | ||
} |
38 changes: 38 additions & 0 deletions
38
kernel/kernel-api/src/main/java/io/delta/kernel/commit/CommitResponse.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
/* | ||
* Copyright (2021) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.delta.kernel.commit; | ||
|
||
import java.util.Iterator; | ||
import java.util.Map; | ||
|
||
/** | ||
* Response container for | ||
* {@link io.delta.kernel.engine.CommitCoordinatorClientHandler#commit( | ||
* String, Map, long, Iterator, UpdatedActions)}. | ||
*/ | ||
public class CommitResponse { | ||
|
||
private Commit commit; | ||
|
||
public CommitResponse(Commit commit) { | ||
this.commit = commit; | ||
} | ||
|
||
public Commit getCommit() { | ||
return commit; | ||
} | ||
} |
44 changes: 44 additions & 0 deletions
44
kernel/kernel-api/src/main/java/io/delta/kernel/commit/GetCommitsResponse.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
/* | ||
* Copyright (2021) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.delta.kernel.commit; | ||
|
||
import java.util.List; | ||
import java.util.Map; | ||
|
||
/** | ||
* Response container for | ||
* {@link io.delta.kernel.engine.CommitCoordinatorClientHandler#getCommits( | ||
* String, Map, Long, Long)}. | ||
*/ | ||
public class GetCommitsResponse { | ||
private List<Commit> commits; | ||
|
||
private long latestTableVersion; | ||
|
||
public GetCommitsResponse(List<Commit> commits, long latestTableVersion) { | ||
this.commits = commits; | ||
this.latestTableVersion = latestTableVersion; | ||
} | ||
|
||
public List<Commit> getCommits() { | ||
return commits; | ||
} | ||
|
||
public long getLatestTableVersion() { | ||
return latestTableVersion; | ||
} | ||
} |
70 changes: 70 additions & 0 deletions
70
kernel/kernel-api/src/main/java/io/delta/kernel/commit/UpdatedActions.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
/* | ||
* Copyright (2021) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.delta.kernel.commit; | ||
|
||
import io.delta.kernel.commit.actions.AbstractCommitInfo; | ||
import io.delta.kernel.commit.actions.AbstractMetadata; | ||
import io.delta.kernel.commit.actions.AbstractProtocol; | ||
|
||
/** | ||
* A container class to inform the CommitCoordinatorClientHandler about any changes in | ||
* Protocol/Metadata | ||
*/ | ||
public class UpdatedActions { | ||
private AbstractCommitInfo commitInfo; | ||
|
||
private AbstractMetadata newMetadata; | ||
|
||
private AbstractProtocol newProtocol; | ||
|
||
private AbstractMetadata oldMetadata; | ||
|
||
private AbstractProtocol oldProtocol; | ||
|
||
public UpdatedActions( | ||
AbstractCommitInfo commitInfo, | ||
AbstractMetadata newMetadata, | ||
AbstractProtocol newProtocol, | ||
AbstractMetadata oldMetadata, | ||
AbstractProtocol oldProtocol) { | ||
this.commitInfo = commitInfo; | ||
this.newMetadata = newMetadata; | ||
this.newProtocol = newProtocol; | ||
this.oldMetadata = oldMetadata; | ||
this.oldProtocol = oldProtocol; | ||
} | ||
|
||
public AbstractCommitInfo getCommitInfo() { | ||
return commitInfo; | ||
} | ||
|
||
public AbstractMetadata getNewMetadata() { | ||
return newMetadata; | ||
} | ||
|
||
public AbstractProtocol getNewProtocol() { | ||
return newProtocol; | ||
} | ||
|
||
public AbstractMetadata getOldMetadata() { | ||
return oldMetadata; | ||
} | ||
|
||
public AbstractProtocol getOldProtocol() { | ||
return oldProtocol; | ||
} | ||
} |
31 changes: 31 additions & 0 deletions
31
kernel/kernel-api/src/main/java/io/delta/kernel/commit/actions/AbstractCommitInfo.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
/* | ||
* Copyright (2021) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.delta.kernel.commit.actions; | ||
|
||
/** | ||
* Interface for objects that represents the base information for a commit. | ||
* Commits need to provide an in-commit timestamp. This timestamp is used | ||
* to specify the exact time the commit happened and determines the target | ||
* version for time-based time travel queries. | ||
*/ | ||
public interface AbstractCommitInfo { | ||
|
||
/** | ||
* Get the timestamp of the commit as millis after the epoch. | ||
*/ | ||
long getCommitTimestamp(); | ||
} |
68 changes: 68 additions & 0 deletions
68
kernel/kernel-api/src/main/java/io/delta/kernel/commit/actions/AbstractMetadata.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/* | ||
* Copyright (2021) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.delta.kernel.commit.actions; | ||
|
||
import java.util.List; | ||
import java.util.Map; | ||
|
||
/** | ||
* Interface for metadata actions in Delta. The metadata defines the metadata | ||
* of the table. | ||
*/ | ||
public interface AbstractMetadata { | ||
|
||
/** | ||
* A unique table identifier. | ||
*/ | ||
String getId(); | ||
|
||
/** | ||
* User-specified table identifier. | ||
*/ | ||
String getName(); | ||
|
||
/** | ||
* User-specified table description. | ||
*/ | ||
String getDescription(); | ||
|
||
/** The table provider format. */ | ||
String getProvider(); | ||
|
||
/** The format options */ | ||
Map<String, String> getFormatOptions(); | ||
|
||
/** | ||
* The table schema in string representation. | ||
*/ | ||
String getSchemaString(); | ||
|
||
/** | ||
* List of partition columns. | ||
*/ | ||
List<String> getPartitionColumns(); | ||
|
||
/** | ||
* The table properties defined on the table. | ||
*/ | ||
Map<String, String> getConfiguration(); | ||
|
||
/** | ||
* Timestamp for the creation of this metadata. | ||
*/ | ||
Long getCreatedTime(); | ||
} |
46 changes: 46 additions & 0 deletions
46
kernel/kernel-api/src/main/java/io/delta/kernel/commit/actions/AbstractProtocol.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/* | ||
* Copyright (2021) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.delta.kernel.commit.actions; | ||
|
||
import java.util.Set; | ||
|
||
/** | ||
* Interface for protocol actions in Delta. The protocol defines the requirements | ||
* that readers and writers of the table need to meet. | ||
*/ | ||
public interface AbstractProtocol { | ||
|
||
/** | ||
* The minimum reader version required to read the table. | ||
*/ | ||
int getMinReaderVersion(); | ||
|
||
/** | ||
* The minimum writer version required to read the table. | ||
*/ | ||
int getMinWriterVersion(); | ||
|
||
/** | ||
* The reader features that need to be supported to read the table. | ||
*/ | ||
Set<String> getReaderFeatures(); | ||
|
||
/** | ||
* The writer features that need to be supported to write the table. | ||
*/ | ||
Set<String> getWriterFeatures(); | ||
} |
153 changes: 153 additions & 0 deletions
153
kernel/kernel-api/src/main/java/io/delta/kernel/engine/CommitCoordinatorClientHandler.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
/* | ||
* Copyright (2023) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.delta.kernel.engine; | ||
|
||
import java.io.IOException; | ||
import java.util.Iterator; | ||
import java.util.Map; | ||
|
||
import io.delta.kernel.annotation.Evolving; | ||
import io.delta.kernel.commit.Commit; | ||
import io.delta.kernel.commit.CommitFailedException; | ||
import io.delta.kernel.commit.CommitResponse; | ||
import io.delta.kernel.commit.GetCommitsResponse; | ||
import io.delta.kernel.commit.UpdatedActions; | ||
import io.delta.kernel.commit.actions.AbstractMetadata; | ||
import io.delta.kernel.commit.actions.AbstractProtocol; | ||
|
||
/** | ||
* Provides coordinated commits related functionalities to Delta Kernel. | ||
* | ||
* @since 3.0.0 | ||
*/ | ||
@Evolving | ||
public interface CommitCoordinatorClientHandler { | ||
|
||
/** | ||
* API to register the table represented by the given `logPath` at the provided | ||
* currentTableVersion with the commit coordinator this commit coordinator client represents. | ||
* <p> | ||
* This API is called when the table is being converted from a file system table to a | ||
* coordinated-commit table. | ||
* <p> | ||
* When a new coordinated-commit table is being created, the currentTableVersion will be -1 and | ||
* the upgrade commit needs to be a file system commit which will write the backfilled file | ||
* directly. | ||
* | ||
* @param logPath The path to the delta log of the table that should be converted | ||
* @param currentVersion The currentTableVersion is the version of the table just before | ||
* conversion. currentTableVersion + 1 represents the commit that | ||
* will do the conversion. This must be backfilled atomically. | ||
* currentTableVersion + 2 represents the first commit after conversion. | ||
* This will go through the CommitCoordinatorClient and the client is | ||
* free to choose when it wants to backfill this commit. | ||
* @param currentMetadata The metadata of the table at currentTableVersion | ||
* @param currentProtocol The protocol of the table at currentTableVersion | ||
* @return A map of key-value pairs which is issued by the commit coordinator to identify the | ||
* table. This should be stored in the table's metadata. This information needs to be | ||
* passed to the {@link #commit}, {@link #getCommits}, and {@link #backfillToVersion} | ||
* APIs to identify the table. | ||
*/ | ||
Map<String, String> registerTable( | ||
String logPath, | ||
long currentVersion, | ||
AbstractMetadata currentMetadata, | ||
AbstractProtocol currentProtocol); | ||
|
||
/** | ||
* API to commit the given set of actions to the table represented by logPath at the | ||
* given commitVersion. | ||
* | ||
* @param logPath The path to the delta log of the table that should be committed to. | ||
* @param tableConf The table configuration that was returned by the commit coordinator | ||
* client during registration. | ||
* @param commitVersion The version of the commit that is being committed. | ||
* @param actions The actions that need to be committed. | ||
* @param updatedActions The commit info and any metadata or protocol changes that are made | ||
* as part of this commit. | ||
* @return CommitResponse which contains the file status of the committed commit file. If the | ||
* commit is already backfilled, then the file status could be omitted from the response | ||
* and the client could retrieve the information by itself. | ||
*/ | ||
CommitResponse commit( | ||
String logPath, | ||
Map<String, String> tableConf, | ||
long commitVersion, | ||
Iterator<String> actions, | ||
UpdatedActions updatedActions) throws IOException, CommitFailedException; | ||
|
||
/** | ||
* API to get the unbackfilled commits for the table represented by the given logPath. | ||
* Commits older than startVersion or newer than endVersion (if given) are ignored. The | ||
* returned commits are contiguous and in ascending version order. | ||
* | ||
* Note that the first version returned by this API may not be equal to startVersion. This | ||
* happens when some versions starting from startVersion have already been backfilled and so | ||
* the commit coordinator may have stopped tracking them. | ||
* | ||
* The returned latestTableVersion is the maximum commit version ratified by the commit | ||
* coordinator. Note that returning latestTableVersion as -1 is acceptable only if the commit | ||
* coordinator never ratified any version, i.e. it never accepted any unbackfilled commit. | ||
* | ||
* @param tablePath The path to the delta log of the table for which the unbackfilled | ||
* commits should be retrieved. | ||
* @param tableConf The table configuration that was returned by the commit coordinator | ||
* during registration. | ||
* @param startVersion The minimum version of the commit that should be returned. Can be null. | ||
* @param endVersion The maximum version of the commit that should be returned. Can be null. | ||
* @return GetCommitsResponse which has a list of {@link Commit}s and the latestTableVersion | ||
* which is tracked by {@link CommitCoordinatorClientHandler}. | ||
*/ | ||
GetCommitsResponse getCommits( | ||
String tablePath, | ||
Map<String, String> tableConf, | ||
Long startVersion, | ||
Long endVersion); | ||
|
||
/** | ||
* API to ask the commit coordinator client to backfill all commits up to {@code version} | ||
* and notify the commit coordinator. | ||
* | ||
* If this API returns successfully, that means the backfill must have been completed, although | ||
* the commit coordinator may not be aware of it yet. | ||
* | ||
* @param logPath The path to the delta log of the table that should be backfilled. | ||
* @param tableConf The table configuration that was returned by the commit coordinator | ||
* during registration. | ||
* @param version The version till which the commit coordinator client should backfill. | ||
* @param lastKnownBackfilledVersion The last known version that was backfilled before this API | ||
* was called. If it is None or invalid, then the commit | ||
* coordinator client should backfill from the beginning of | ||
* the table. Can be null. | ||
*/ | ||
void backfillToVersion( | ||
String logPath, | ||
Map<String, String> tableConf, | ||
long version, | ||
Long lastKnownBackfilledVersion) throws IOException; | ||
|
||
/** | ||
* Determines whether this CommitCoordinatorClient is semantically equal to another | ||
* CommitCoordinatorClient. | ||
* | ||
* Semantic equality is determined by each CommitCoordinatorClient implementation based on | ||
* whether the two instances can be used interchangeably when invoking any of the | ||
* CommitCoordinatorClient APIs, such as {@link #commit}, {@link #getCommits}, etc. For example, | ||
* both instances might be pointing to the same underlying endpoint. | ||
*/ | ||
Boolean semanticEquals(CommitCoordinatorClientHandler other); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
194 changes: 194 additions & 0 deletions
194
.../src/main/java/io/delta/kernel/defaults/engine/DefaultCommitCoordinatorClientHandler.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,194 @@ | ||
/* | ||
* Copyright (2023) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package io.delta.kernel.defaults.engine; | ||
|
||
import java.io.IOException; | ||
import java.util.Iterator; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.stream.Collectors; | ||
|
||
import io.delta.storage.LogStore; | ||
import io.delta.storage.commit.CommitCoordinatorClient; | ||
import io.delta.storage.commit.CommitFailedException; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.Path; | ||
|
||
import io.delta.kernel.commit.Commit; | ||
import io.delta.kernel.commit.CommitResponse; | ||
import io.delta.kernel.commit.GetCommitsResponse; | ||
import io.delta.kernel.commit.UpdatedActions; | ||
import io.delta.kernel.commit.actions.AbstractMetadata; | ||
import io.delta.kernel.commit.actions.AbstractProtocol; | ||
import io.delta.kernel.engine.CommitCoordinatorClientHandler; | ||
import io.delta.kernel.utils.FileStatus; | ||
import io.delta.kernel.defaults.internal.coordinatedcommits.CommitCoordinatorProvider; | ||
import io.delta.kernel.defaults.internal.logstore.LogStoreProvider; | ||
|
||
/** | ||
* Default implementation of {@link CommitCoordinatorClientHandler} based on Hadoop APIs. | ||
* It takes a Hadoop {@link Configuration} object to interact with the commit coordinator client. | ||
* The following optional configurations can be set to customize the behavior of the client: | ||
* <ul> | ||
* <li>{@code io.delta.kernel.logStore.<scheme>.impl} - The class name of the custom | ||
* {@link LogStore} implementation to use for operations on storage systems with the | ||
* specified {@code scheme}. For example, to use a custom {@link LogStore} for S3 storage | ||
* objects: | ||
* <pre>{@code | ||
* <property> | ||
* <name>io.delta.kernel.logStore.s3.impl</name> | ||
* <value>com.example.S3LogStore</value> | ||
* </property> | ||
* }</pre> | ||
* If not set, the default LogStore implementation for the scheme will be used. | ||
* </li> | ||
* <li>{@code delta.enableFastS3AListFrom} - Set to {@code true} to enable fast listing | ||
* functionality when using a {@link LogStore} created for S3 storage objects. | ||
* </li> | ||
* </ul> | ||
*/ | ||
public class DefaultCommitCoordinatorClientHandler implements CommitCoordinatorClientHandler { | ||
private final Configuration hadoopConf; | ||
private final CommitCoordinatorClient commitCoordinatorClient; | ||
|
||
/** | ||
* Create an instance of the default {@link DefaultCommitCoordinatorClientHandler} | ||
* implementation. | ||
* | ||
* @param hadoopConf Configuration to use. List of options to customize the behavior of | ||
* the client can be found in the class documentation. | ||
*/ | ||
public DefaultCommitCoordinatorClientHandler( | ||
Configuration hadoopConf, String name, Map<String, String> conf) { | ||
this.hadoopConf = hadoopConf; | ||
this.commitCoordinatorClient = CommitCoordinatorProvider | ||
.getCommitCoordinatorClient(name, conf); | ||
} | ||
|
||
@Override | ||
public Map<String, String> registerTable( | ||
String logPath, | ||
long currentVersion, | ||
AbstractMetadata currentMetadata, | ||
AbstractProtocol currentProtocol) { | ||
return commitCoordinatorClient.registerTable( | ||
new Path(logPath), | ||
currentVersion, | ||
(io.delta.storage.commit.actions.AbstractMetadata) currentMetadata, | ||
(io.delta.storage.commit.actions.AbstractProtocol) currentProtocol); | ||
} | ||
|
||
@Override | ||
public CommitResponse commit( | ||
String logPath, | ||
Map<String, String> tableConf, | ||
long commitVersion, | ||
Iterator<String> actions, | ||
UpdatedActions updatedActions) | ||
throws IOException, io.delta.kernel.commit.CommitFailedException { | ||
Path path = new Path(logPath); | ||
LogStore logStore = LogStoreProvider.getLogStore(hadoopConf, path.toUri().getScheme()); | ||
try { | ||
return convertCommitResponse(commitCoordinatorClient.commit( | ||
logStore, | ||
hadoopConf, | ||
path, | ||
tableConf, | ||
commitVersion, | ||
actions, | ||
convertUpdatedActions(updatedActions))); | ||
} catch (CommitFailedException e) { | ||
throw new io.delta.kernel.commit.CommitFailedException( | ||
e.getRetryable(), e.getConflict(), e.getMessage()); | ||
} | ||
} | ||
|
||
@Override | ||
public GetCommitsResponse getCommits( | ||
String tablePath, | ||
Map<String, String> tableConf, | ||
Long startVersion, | ||
Long endVersion) { | ||
return convertGetCommitsResponse(commitCoordinatorClient.getCommits( | ||
new Path(tablePath), | ||
tableConf, | ||
startVersion, | ||
endVersion)); | ||
} | ||
|
||
@Override | ||
public void backfillToVersion( | ||
String logPath, | ||
Map<String, String> tableConf, | ||
long version, | ||
Long lastKnownBackfilledVersion) throws IOException { | ||
Path path = new Path(logPath); | ||
LogStore logStore = LogStoreProvider.getLogStore(hadoopConf, path.toUri().getScheme()); | ||
commitCoordinatorClient.backfillToVersion( | ||
logStore, | ||
hadoopConf, | ||
path, | ||
tableConf, | ||
version, | ||
lastKnownBackfilledVersion); | ||
} | ||
|
||
@Override | ||
public Boolean semanticEquals(CommitCoordinatorClientHandler other) { | ||
return commitCoordinatorClient.semanticEquals( | ||
((DefaultCommitCoordinatorClientHandler) other).getCommitCoordinatorClient()); | ||
} | ||
|
||
public CommitCoordinatorClient getCommitCoordinatorClient() { | ||
return commitCoordinatorClient; | ||
} | ||
|
||
private io.delta.storage.commit.UpdatedActions convertUpdatedActions( | ||
UpdatedActions updatedActions) { | ||
return new io.delta.storage.commit.UpdatedActions( | ||
(io.delta.storage.commit.actions.AbstractCommitInfo) updatedActions.getCommitInfo(), | ||
(io.delta.storage.commit.actions.AbstractMetadata) updatedActions.getNewMetadata(), | ||
(io.delta.storage.commit.actions.AbstractProtocol) updatedActions.getNewProtocol(), | ||
(io.delta.storage.commit.actions.AbstractMetadata) updatedActions.getOldMetadata(), | ||
(io.delta.storage.commit.actions.AbstractProtocol) updatedActions.getOldProtocol()); | ||
} | ||
|
||
private CommitResponse convertCommitResponse(io.delta.storage.commit.CommitResponse response) { | ||
return new CommitResponse(convertCommit(response.getCommit())); | ||
} | ||
|
||
private Commit convertCommit(io.delta.storage.commit.Commit commit) { | ||
return new Commit( | ||
commit.getVersion(), | ||
convertFileStatus(commit.getFileStatus()), | ||
commit.getCommitTimestamp()); | ||
} | ||
|
||
private FileStatus convertFileStatus(org.apache.hadoop.fs.FileStatus hadoopFileStatus) { | ||
return FileStatus.of( | ||
hadoopFileStatus.getPath().toString(), | ||
hadoopFileStatus.getLen(), | ||
hadoopFileStatus.getModificationTime()); | ||
} | ||
|
||
private GetCommitsResponse convertGetCommitsResponse( | ||
io.delta.storage.commit.GetCommitsResponse response) { | ||
List<Commit> commits = response.getCommits().stream() | ||
.map(this::convertCommit) | ||
.collect(Collectors.toList()); | ||
return new GetCommitsResponse(commits, response.getLatestTableVersion()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
216 changes: 216 additions & 0 deletions
216
...defaults/internal/coordinatedcommits/AbstractBatchBackfillingCommitCoordinatorClient.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,216 @@ | ||
/* | ||
* Copyright (2023) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package io.delta.kernel.defaults.internal.coordinatedcommits; | ||
|
||
import java.io.IOException; | ||
import java.nio.file.FileAlreadyExistsException; | ||
import java.util.Iterator; | ||
import java.util.Map; | ||
import java.util.UUID; | ||
|
||
import io.delta.storage.CloseableIterator; | ||
import io.delta.storage.LogStore; | ||
import io.delta.storage.commit.Commit; | ||
import io.delta.storage.commit.CommitCoordinatorClient; | ||
import io.delta.storage.commit.CommitFailedException; | ||
import io.delta.storage.commit.CommitResponse; | ||
import io.delta.storage.commit.GetCommitsResponse; | ||
import io.delta.storage.commit.UpdatedActions; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.FileStatus; | ||
import org.apache.hadoop.fs.FileSystem; | ||
import org.apache.hadoop.fs.Path; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* An abstract {@link CommitCoordinatorClient} which triggers backfills every n commits. | ||
* - every commit version which satisfies `commitVersion % batchSize == 0` will trigger a backfill. | ||
*/ | ||
public abstract class AbstractBatchBackfillingCommitCoordinatorClient | ||
implements CommitCoordinatorClient { | ||
|
||
protected static final Logger logger = | ||
LoggerFactory.getLogger(AbstractBatchBackfillingCommitCoordinatorClient.class); | ||
|
||
/** | ||
* Size of batch that should be backfilled. So every commit version which satisfies | ||
* `commitVersion % batchSize == 0` will trigger a backfill. | ||
*/ | ||
protected long batchSize; | ||
|
||
/** | ||
* Commit a given `commitFile` to the table represented by given `logPath` at the | ||
* given `commitVersion` | ||
*/ | ||
protected abstract CommitResponse commitImpl( | ||
LogStore logStore, | ||
Configuration hadoopConf, | ||
Path logPath, | ||
Map<String, String> coordinatedCommitsTableConf, | ||
long commitVersion, | ||
FileStatus commitFile, | ||
long commitTimestamp) throws CommitFailedException; | ||
|
||
@Override | ||
public CommitResponse commit( | ||
LogStore logStore, | ||
Configuration hadoopConf, | ||
Path logPath, | ||
Map<String, String> coordinatedCommitsTableConf, | ||
long commitVersion, | ||
Iterator<String> actions, | ||
UpdatedActions updatedActions) throws CommitFailedException, IOException { | ||
Path tablePath = CoordinatedCommitsUtils.getTablePath(logPath); | ||
if (commitVersion == 0) { | ||
throw new CommitFailedException( | ||
false, false, "Commit version 0 must go via filesystem."); | ||
} | ||
logger.info("Attempting to commit version {} on table {}", commitVersion, tablePath); | ||
FileSystem fs = logPath.getFileSystem(hadoopConf); | ||
if (batchSize <= 1) { | ||
// Backfill until `commitVersion - 1` | ||
logger.info("Making sure commits are backfilled until {}" + | ||
" version for table {}", commitVersion - 1, tablePath); | ||
backfillToVersion( | ||
logStore, | ||
hadoopConf, | ||
logPath, | ||
coordinatedCommitsTableConf, | ||
commitVersion - 1, | ||
null); | ||
} | ||
|
||
// Write new commit file in _commits directory | ||
FileStatus fileStatus = CoordinatedCommitsUtils.writeCommitFile( | ||
logStore, hadoopConf, logPath.toString(), commitVersion, actions, generateUUID()); | ||
|
||
// Do the actual commit | ||
long commitTimestamp = updatedActions.getCommitInfo().getCommitTimestamp(); | ||
CommitResponse commitResponse = | ||
commitImpl( | ||
logStore, | ||
hadoopConf, | ||
logPath, | ||
coordinatedCommitsTableConf, | ||
commitVersion, | ||
fileStatus, | ||
commitTimestamp); | ||
|
||
boolean mcToFsConversion = isCoordinatedCommitsToFSConversion( | ||
commitVersion, updatedActions); | ||
// Backfill if needed | ||
if (batchSize <= 1) { | ||
// Always backfill when batch size is configured as 1 | ||
backfill(logStore, hadoopConf, logPath, commitVersion, fileStatus); | ||
Path targetFile = CoordinatedCommitsUtils.getHadoopDeltaFile(logPath, commitVersion); | ||
FileStatus targetFileStatus = fs.getFileStatus(targetFile); | ||
Commit newCommit = commitResponse.getCommit().withFileStatus(targetFileStatus); | ||
return new CommitResponse(newCommit); | ||
} else if (commitVersion % batchSize == 0 || mcToFsConversion) { | ||
logger.info("Making sure commits are backfilled till {} version for table {}", | ||
commitVersion, | ||
tablePath); | ||
backfillToVersion( | ||
logStore, | ||
hadoopConf, | ||
logPath, | ||
coordinatedCommitsTableConf, | ||
commitVersion, | ||
null); | ||
} | ||
logger.info("Commit {} done successfully on table {}", commitVersion, tablePath); | ||
return commitResponse; | ||
} | ||
|
||
@Override | ||
public void backfillToVersion( | ||
LogStore logStore, | ||
Configuration hadoopConf, | ||
Path logPath, | ||
Map<String, String> coordinatedCommitsTableConf, | ||
long version, | ||
Long lastKnownBackfilledVersion) throws IOException { | ||
// Confirm the last backfilled version by checking the backfilled delta file's existence. | ||
if (lastKnownBackfilledVersion != null) { | ||
try { | ||
FileSystem fs = logPath.getFileSystem(hadoopConf); | ||
if (!fs.exists(CoordinatedCommitsUtils.getHadoopDeltaFile(logPath, version))) { | ||
lastKnownBackfilledVersion = null; | ||
} | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
} | ||
Long startVersion = null; | ||
if (lastKnownBackfilledVersion != null) { | ||
startVersion = lastKnownBackfilledVersion + 1; | ||
} | ||
GetCommitsResponse commitsResponse = | ||
getCommits(logPath,coordinatedCommitsTableConf, startVersion, version); | ||
for (Commit commit : commitsResponse.getCommits()) { | ||
backfill(logStore, hadoopConf, logPath, commit.getVersion(), commit.getFileStatus()); | ||
} | ||
} | ||
|
||
protected String generateUUID() { | ||
return UUID.randomUUID().toString(); | ||
} | ||
|
||
/** Backfills a given `fileStatus` to `version`.json */ | ||
protected void backfill( | ||
LogStore logStore, | ||
Configuration hadoopConf, | ||
Path logPath, | ||
long version, | ||
FileStatus fileStatus) throws IOException { | ||
Path targetFile = CoordinatedCommitsUtils.getHadoopDeltaFile(logPath, version); | ||
logger.info("Backfilling commit " + fileStatus.getPath() + " to " + targetFile); | ||
CloseableIterator<String> commitContentIterator = logStore | ||
.read(fileStatus.getPath(), hadoopConf); | ||
try { | ||
logStore.write( | ||
targetFile, | ||
commitContentIterator, | ||
false, | ||
hadoopConf); | ||
registerBackfill(logPath, version); | ||
} catch (FileAlreadyExistsException e) { | ||
logger.info("The backfilled file " + targetFile + " already exists."); | ||
} finally { | ||
commitContentIterator.close(); | ||
} | ||
} | ||
|
||
/** | ||
* Callback to tell the CommitCoordinator that all commits <= `backfilledVersion` are | ||
* backfilled. | ||
*/ | ||
protected abstract void registerBackfill(Path logPath, long backfilledVersion); | ||
|
||
private boolean isCoordinatedCommitsToFSConversion( | ||
long commitVersion, UpdatedActions updatedActions) { | ||
boolean oldMetadataHasCoordinatedCommits = | ||
CoordinatedCommitsUtils | ||
.getCommitCoordinatorName(updatedActions.getOldMetadata()).isPresent(); | ||
boolean newMetadataHasCoordinatedCommits = | ||
CoordinatedCommitsUtils | ||
.getCommitCoordinatorName(updatedActions.getNewMetadata()).isPresent(); | ||
return oldMetadataHasCoordinatedCommits | ||
&& !newMetadataHasCoordinatedCommits | ||
&& commitVersion > 0; | ||
} | ||
} |
29 changes: 29 additions & 0 deletions
29
...n/java/io/delta/kernel/defaults/internal/coordinatedcommits/CommitCoordinatorBuilder.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
/* | ||
* Copyright (2023) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package io.delta.kernel.defaults.internal.coordinatedcommits; | ||
|
||
import java.util.*; | ||
|
||
import io.delta.storage.commit.CommitCoordinatorClient; | ||
|
||
/** A builder interface for {@link CommitCoordinatorClient} */ | ||
public interface CommitCoordinatorBuilder { | ||
/** Name of the commit-coordinator */ | ||
String getName(); | ||
|
||
/** Returns a commit-coordinator client based on the given conf */ | ||
CommitCoordinatorClient build(Map<String, String> conf); | ||
} |
57 changes: 57 additions & 0 deletions
57
.../java/io/delta/kernel/defaults/internal/coordinatedcommits/CommitCoordinatorProvider.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
/* | ||
* Copyright (2023) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package io.delta.kernel.defaults.internal.coordinatedcommits; | ||
|
||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
import io.delta.storage.commit.CommitCoordinatorClient; | ||
|
||
/** Factory to get the correct {@link CommitCoordinatorClient} for a table */ | ||
public class CommitCoordinatorProvider { | ||
// mapping from different commit-coordinator names to the corresponding | ||
// {@link CommitCoordinatorBuilder}s. | ||
private static final Map<String, CommitCoordinatorBuilder> nameToBuilderMapping = | ||
new HashMap<>(); | ||
|
||
/** | ||
* Registers a new {@link CommitCoordinatorBuilder} with the {@link CommitCoordinatorProvider}. | ||
*/ | ||
public static synchronized void registerBuilder( | ||
CommitCoordinatorBuilder commitCoordinatorBuilder) { | ||
String name = commitCoordinatorBuilder.getName(); | ||
if (nameToBuilderMapping.containsKey(name)) { | ||
throw new IllegalArgumentException( | ||
"commit-coordinator: " + | ||
name + | ||
" already registered with builder " + | ||
commitCoordinatorBuilder.getClass().getName()); | ||
} else { | ||
nameToBuilderMapping.put(name, commitCoordinatorBuilder); | ||
} | ||
} | ||
|
||
/** Returns a {@link CommitCoordinatorClient} for the given `name` and `conf` */ | ||
public static synchronized CommitCoordinatorClient getCommitCoordinatorClient( | ||
String name, Map<String, String> conf) { | ||
CommitCoordinatorBuilder builder = nameToBuilderMapping.get(name); | ||
if (builder == null) { | ||
throw new IllegalArgumentException("Unknown commit-coordinator: " + name); | ||
} else { | ||
return builder.build(conf); | ||
} | ||
} | ||
} |
182 changes: 182 additions & 0 deletions
182
...in/java/io/delta/kernel/defaults/internal/coordinatedcommits/CoordinatedCommitsUtils.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
/* | ||
* Copyright (2023) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package io.delta.kernel.defaults.internal.coordinatedcommits; | ||
|
||
import java.io.IOException; | ||
import java.util.*; | ||
import java.util.function.Function; | ||
|
||
import io.delta.storage.LogStore; | ||
import io.delta.storage.commit.actions.AbstractCommitInfo; | ||
import io.delta.storage.commit.actions.AbstractMetadata; | ||
import io.delta.storage.commit.actions.AbstractProtocol; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.FileStatus; | ||
import org.apache.hadoop.fs.FileSystem; | ||
import org.apache.hadoop.fs.Path; | ||
|
||
import io.delta.kernel.internal.TableConfig; | ||
import io.delta.kernel.internal.actions.CommitInfo; | ||
import io.delta.kernel.internal.actions.Metadata; | ||
import io.delta.kernel.internal.actions.Protocol; | ||
import io.delta.kernel.internal.util.FileNames; | ||
import io.delta.kernel.internal.util.VectorUtils; | ||
import static io.delta.kernel.internal.TableConfig.COORDINATED_COMMITS_COORDINATOR_NAME; | ||
|
||
public class CoordinatedCommitsUtils { | ||
|
||
/** | ||
* Write a UUID-based commit file for the specified version to the table at logPath. | ||
*/ | ||
public static FileStatus writeCommitFile( | ||
LogStore logStore, | ||
Configuration hadoopConf, | ||
String logPath, | ||
long commitVersion, | ||
Iterator<String> actions, | ||
String uuid) throws IOException { | ||
Path commitPath = new Path( | ||
FileNames.unbackfilledDeltaFile( | ||
new io.delta.kernel.internal.fs.Path(logPath), | ||
commitVersion, | ||
Optional.of(uuid)).toString()); | ||
FileSystem fs = commitPath.getFileSystem(hadoopConf); | ||
if (!fs.exists(commitPath.getParent())) { | ||
fs.mkdirs(commitPath.getParent()); | ||
} | ||
logStore.write(commitPath, actions, false, hadoopConf); | ||
return commitPath.getFileSystem(hadoopConf).getFileStatus(commitPath); | ||
} | ||
|
||
/** | ||
* Get the table path from the provided log path. | ||
*/ | ||
public static Path getTablePath(Path logPath) { | ||
return logPath.getParent(); | ||
} | ||
|
||
/** | ||
* Helper method to recover the saved value of `tableConfig` from `abstractMetadata`. | ||
* Return defaultValue if the key is not in the configuration. | ||
*/ | ||
public static <T> T fromAbstractMetadataAndTableConfig( | ||
AbstractMetadata abstractMetadata, TableConfig<T> tableConfig) { | ||
Map<String, String> conf = abstractMetadata.getConfiguration(); | ||
String value = conf.getOrDefault(tableConfig.getKey(), tableConfig.getDefaultValue()); | ||
Function<String, T> fromString = tableConfig.getFromString(); | ||
return fromString.apply(value); | ||
} | ||
|
||
/** | ||
* Get the commit coordinator name from the provided abstract metadata. | ||
*/ | ||
public static Optional<String> getCommitCoordinatorName(AbstractMetadata abstractMetadata) { | ||
return fromAbstractMetadataAndTableConfig( | ||
abstractMetadata, COORDINATED_COMMITS_COORDINATOR_NAME); | ||
} | ||
|
||
/** | ||
* Get the hadoop file path for the delta file for the specified version. | ||
* | ||
* @param logPath The root path of the delta log. | ||
* @param version The version of the delta file. | ||
* @return The hadoop file path for the delta file. | ||
*/ | ||
public static Path getHadoopDeltaFile(Path logPath, long version) { | ||
return new Path(FileNames | ||
.deltaFile(new io.delta.kernel.internal.fs.Path(logPath.toString()), version)); | ||
} | ||
|
||
public static AbstractMetadata convertMetadataToAbstractMetadata(Metadata metadata) { | ||
return new AbstractMetadata() { | ||
@Override | ||
public String getId() { | ||
return metadata.getId(); | ||
} | ||
|
||
@Override | ||
public String getName() { | ||
return metadata.getName().orElse(null); | ||
} | ||
|
||
@Override | ||
public String getDescription() { | ||
return metadata.getDescription().orElse(null); | ||
} | ||
|
||
@Override | ||
public String getProvider() { | ||
return metadata.getFormat().getProvider(); | ||
} | ||
|
||
@Override | ||
public Map<String, String> getFormatOptions() { | ||
// Assuming Format class has a method to get format options | ||
return metadata.getFormat().getOptions(); | ||
} | ||
|
||
@Override | ||
public String getSchemaString() { | ||
// Assuming Metadata class has a method to get schema string | ||
return metadata.getSchemaString(); | ||
} | ||
|
||
@Override | ||
public List<String> getPartitionColumns() { | ||
// Assuming Metadata class has a method to get partition columns | ||
return VectorUtils.toJavaList(metadata.getPartitionColumns()); | ||
} | ||
|
||
@Override | ||
public Map<String, String> getConfiguration() { | ||
return metadata.getConfiguration(); | ||
} | ||
|
||
@Override | ||
public Long getCreatedTime() { | ||
return metadata.getCreatedTime().orElse(null); | ||
} | ||
}; | ||
} | ||
|
||
public static AbstractProtocol convertProtocolToAbstractProtocol(Protocol protocol) { | ||
return new AbstractProtocol() { | ||
@Override | ||
public int getMinReaderVersion() { | ||
return protocol.getMinReaderVersion(); | ||
} | ||
|
||
@Override | ||
public int getMinWriterVersion() { | ||
return protocol.getMinWriterVersion(); | ||
} | ||
|
||
@Override | ||
public Set<String> getReaderFeatures() { | ||
return new HashSet<>(protocol.getReaderFeatures()); | ||
} | ||
|
||
@Override | ||
public Set<String> getWriterFeatures() { | ||
return new HashSet<>(protocol.getWriterFeatures()); | ||
} | ||
}; | ||
} | ||
|
||
public static AbstractCommitInfo convertCommitInfoToAbstractCommitInfo(CommitInfo commitInfo) { | ||
return () -> commitInfo.getInCommitTimestamp().orElse(commitInfo.getTimestamp()); | ||
} | ||
} |
252 changes: 252 additions & 0 deletions
252
.../java/io/delta/kernel/defaults/internal/coordinatedcommits/InMemoryCommitCoordinator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,252 @@ | ||
/* | ||
* Copyright (2023) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package io.delta.kernel.defaults.internal.coordinatedcommits; | ||
|
||
import java.util.*; | ||
import java.util.concurrent.ConcurrentHashMap; | ||
import java.util.concurrent.locks.ReentrantReadWriteLock; | ||
import java.util.function.Supplier; | ||
|
||
import io.delta.storage.LogStore; | ||
import io.delta.storage.commit.Commit; | ||
import io.delta.storage.commit.CommitCoordinatorClient; | ||
import io.delta.storage.commit.CommitFailedException; | ||
import io.delta.storage.commit.CommitResponse; | ||
import io.delta.storage.commit.GetCommitsResponse; | ||
import io.delta.storage.commit.actions.AbstractMetadata; | ||
import io.delta.storage.commit.actions.AbstractProtocol; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.FileStatus; | ||
import org.apache.hadoop.fs.Path; | ||
|
||
import io.delta.kernel.internal.util.Tuple2; | ||
|
||
public class InMemoryCommitCoordinator extends AbstractBatchBackfillingCommitCoordinatorClient { | ||
|
||
/** | ||
* @param maxCommitVersion represents the max commit version known for the table. This is | ||
* initialized at the time of pre-registration and updated whenever a | ||
* commit is successfully added to the commit-coordinator. | ||
* @param active represents whether this commit-coordinator has ratified any commit or not. | ||
* |----------------------------|------------------|---------------------------| | ||
* | State | maxCommitVersion | active | | ||
* |----------------------------|------------------|---------------------------| | ||
* | Table is pre-registered | currentVersion+1 | false | | ||
* |----------------------------|------------------|---------------------------| | ||
* | Table is pre-registered | X | true | | ||
* | and more commits are done | | | | ||
* |----------------------------|------------------|---------------------------| | ||
*/ | ||
private ConcurrentHashMap<String, PerTableData> perTableMap; | ||
|
||
public InMemoryCommitCoordinator(long batchSize) { | ||
this.batchSize = batchSize; | ||
this.perTableMap = new ConcurrentHashMap<>(); | ||
} | ||
|
||
private class PerTableData { | ||
private long maxCommitVersion; | ||
private boolean active; | ||
private TreeMap<Long, Commit> commitsMap; | ||
private ReentrantReadWriteLock lock; | ||
|
||
PerTableData(long maxCommitVersion) { | ||
this(maxCommitVersion, false); | ||
} | ||
|
||
PerTableData(long maxCommitVersion, boolean active) { | ||
this.maxCommitVersion = maxCommitVersion; | ||
this.active = active; | ||
this.commitsMap = new TreeMap<>(); | ||
this.lock = new ReentrantReadWriteLock(); | ||
} | ||
|
||
public void updateLastRatifiedCommit(long commitVersion) { | ||
this.active = true; | ||
this.maxCommitVersion = commitVersion; | ||
} | ||
|
||
public long lastRatifiedCommitVersion() { | ||
return this.active ? this.maxCommitVersion : -1; | ||
} | ||
|
||
public long getMaxCommitVersion() { | ||
return maxCommitVersion; | ||
} | ||
|
||
public TreeMap<Long, Commit> getCommitsMap() { | ||
return commitsMap; | ||
} | ||
} | ||
|
||
/** | ||
* This method acquires a write lock, validates the commit version is next in line, | ||
* updates commit maps, and releases the lock. | ||
* | ||
*/ | ||
@Override | ||
protected CommitResponse commitImpl( | ||
LogStore logStore, | ||
Configuration hadoopConf, | ||
Path logPath, | ||
Map<String, String> coordinatedCommitsTableConf, | ||
long commitVersion, | ||
FileStatus commitFile, | ||
long commitTimestamp) throws CommitFailedException { | ||
Tuple2<CommitResponse, CommitFailedException> ret = | ||
addToMap(logPath, commitVersion, commitFile, commitTimestamp); | ||
if (ret._2 != null) { | ||
throw ret._2; | ||
} else { | ||
return ret._1; | ||
} | ||
} | ||
|
||
private Tuple2<CommitResponse, CommitFailedException> addToMap( | ||
Path logPath, | ||
long commitVersion, | ||
FileStatus commitFile, | ||
long commitTimestamp) { | ||
|
||
return withWriteLock(logPath, () -> { | ||
PerTableData tableData = perTableMap.get(logPath.toString()); | ||
long expectedVersion = tableData.maxCommitVersion + 1; | ||
if (commitVersion != expectedVersion) { | ||
return new Tuple2<>(null, new CommitFailedException( | ||
commitVersion < expectedVersion, | ||
commitVersion < expectedVersion, | ||
"Commit version " + | ||
commitVersion + | ||
" is not valid. Expected version: " + | ||
expectedVersion)); | ||
} | ||
|
||
Commit commit = new Commit(commitVersion, commitFile, commitTimestamp); | ||
tableData.commitsMap.put(commitVersion, commit); | ||
tableData.updateLastRatifiedCommit(commitVersion); | ||
|
||
logger.info("Added commit file " + commitFile.getPath() + " to commit-coordinator."); | ||
return new Tuple2<>(new CommitResponse(commit), null); | ||
}); | ||
} | ||
|
||
@Override | ||
public GetCommitsResponse getCommits( | ||
Path logPath, | ||
Map<String, String> coordinatedCommitsTableConf, | ||
Long startVersion, | ||
Long endVersion) { | ||
return withReadLock(logPath, () -> { | ||
PerTableData tableData = perTableMap.get(logPath.toString()); | ||
Optional<Long> startVersionOpt = Optional.ofNullable(startVersion); | ||
Optional<Long> endVersionOpt = Optional.ofNullable(endVersion); | ||
long effectiveStartVersion = startVersionOpt.orElse(0L); | ||
// Calculate the end version for the range, or use the last key if endVersion is not | ||
// provided | ||
long effectiveEndVersion = endVersionOpt.orElseGet(() -> | ||
tableData.commitsMap.isEmpty() | ||
? effectiveStartVersion : tableData.commitsMap.lastKey()); | ||
SortedMap<Long, Commit> commitsInRange = tableData.commitsMap.subMap( | ||
effectiveStartVersion, effectiveEndVersion + 1); | ||
return new GetCommitsResponse( | ||
new ArrayList<>(commitsInRange.values()), | ||
tableData.lastRatifiedCommitVersion()); | ||
}); | ||
} | ||
|
||
@Override | ||
protected void registerBackfill(Path logPath, long backfilledVersion) { | ||
withWriteLock(logPath, () -> { | ||
PerTableData tableData = perTableMap.get(logPath.toString()); | ||
if (backfilledVersion > tableData.lastRatifiedCommitVersion()) { | ||
throw new IllegalArgumentException( | ||
"Unexpected backfill version: " + backfilledVersion + ". " + | ||
"Max backfill version: " + tableData.getMaxCommitVersion()); | ||
} | ||
// Remove keys with versions less than or equal to 'untilVersion' | ||
Iterator<Long> iterator = tableData.getCommitsMap().keySet().iterator(); | ||
while (iterator.hasNext()) { | ||
Long version = iterator.next(); | ||
if (version <= backfilledVersion) { | ||
iterator.remove(); | ||
} else { | ||
break; | ||
} | ||
} | ||
return null; | ||
}); | ||
} | ||
|
||
@Override | ||
public Map<String, String> registerTable( | ||
Path logPath, | ||
long currentVersion, | ||
AbstractMetadata currentMetadata, | ||
AbstractProtocol currentProtocol) { | ||
PerTableData newPerTableData = new PerTableData(currentVersion + 1); | ||
perTableMap.compute(logPath.toString(), (key, existingData) -> { | ||
if (existingData != null) { | ||
if (existingData.lastRatifiedCommitVersion() != -1) { | ||
throw new IllegalStateException( | ||
"Table " + logPath + " already exists in the commit-coordinator."); | ||
} | ||
// If lastRatifiedCommitVersion is -1 i.e. the commit-coordinator has never | ||
// attempted any commit for this table => this table was just pre-registered. If | ||
// there is another pre-registration request for an older version, we reject it and | ||
// table can't go backward. | ||
if (currentVersion < existingData.getMaxCommitVersion()) { | ||
throw new IllegalStateException( | ||
"Table " + logPath + " already registered with commit-coordinator"); | ||
} | ||
} | ||
return newPerTableData; | ||
}); | ||
return Collections.emptyMap(); | ||
} | ||
|
||
@Override | ||
public Boolean semanticEquals(CommitCoordinatorClient other) { | ||
return this.equals(other); | ||
} | ||
|
||
private <T> T withReadLock(Path logPath, Supplier<T> operation) { | ||
PerTableData tableData = perTableMap.get(logPath.toString()); | ||
if (tableData == null) { | ||
throw new IllegalArgumentException("Unknown table " + logPath + "."); | ||
} | ||
ReentrantReadWriteLock.ReadLock lock = tableData.lock.readLock(); | ||
lock.lock(); | ||
try { | ||
return operation.get(); | ||
} finally { | ||
lock.unlock(); | ||
} | ||
} | ||
|
||
private <T> T withWriteLock(Path logPath, Supplier<T> operation) { | ||
PerTableData tableData = perTableMap.get(logPath.toString()); | ||
if (tableData == null) { | ||
throw new IllegalArgumentException("Unknown table " + logPath + "."); | ||
} | ||
ReentrantReadWriteLock.WriteLock lock = tableData.lock.writeLock(); | ||
lock.lock(); | ||
try { | ||
return operation.get(); | ||
} finally { | ||
lock.unlock(); | ||
} | ||
} | ||
} |
42 changes: 42 additions & 0 deletions
42
...o/delta/kernel/defaults/internal/coordinatedcommits/InMemoryCommitCoordinatorBuilder.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
/* | ||
* Copyright (2023) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package io.delta.kernel.defaults.internal.coordinatedcommits; | ||
|
||
import java.util.Map; | ||
|
||
import io.delta.storage.commit.CommitCoordinatorClient; | ||
|
||
import io.delta.kernel.internal.lang.Lazy; | ||
|
||
public class InMemoryCommitCoordinatorBuilder implements CommitCoordinatorBuilder { | ||
private final long batchSize; | ||
private Lazy<InMemoryCommitCoordinator> inMemoryStore; | ||
|
||
public InMemoryCommitCoordinatorBuilder(long batchSize) { | ||
this.batchSize = batchSize; | ||
this.inMemoryStore = new Lazy<>(() -> new InMemoryCommitCoordinator(batchSize)); | ||
} | ||
|
||
/** Name of the commit-coordinator */ | ||
public String getName() { | ||
return "in-memory"; | ||
} | ||
|
||
/** Returns a commit-coordinator based on the given conf */ | ||
public CommitCoordinatorClient build(Map<String, String> conf) { | ||
return inMemoryStore.get(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
120 changes: 120 additions & 0 deletions
120
.../scala/io/delta/kernel/defaults/internal/coordinatedcommits/CoordinatedCommitsSuite.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
/* | ||
* Copyright (2021) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package io.delta.kernel.defaults.internal.coordinatedcommits | ||
|
||
import io.delta.kernel.defaults.DeltaTableWriteSuiteBase | ||
import io.delta.kernel.defaults.internal.logstore.LogStoreProvider | ||
import io.delta.kernel.defaults.utils.TestRow | ||
import io.delta.kernel.internal.TableConfig._ | ||
import io.delta.kernel.Table | ||
import io.delta.kernel.internal.SnapshotImpl | ||
import io.delta.kernel.internal.actions.{CommitInfo, Metadata, Protocol} | ||
import org.apache.hadoop.fs.Path | ||
|
||
import java.util | ||
import java.util.{Collections, Optional} | ||
import scala.collection.convert.ImplicitConversions.`iterator asScala` | ||
import scala.collection.JavaConverters._ | ||
|
||
class CoordinatedCommitsSuite extends DeltaTableWriteSuiteBase | ||
with CoordinatedCommitsTestUtils { | ||
|
||
test("helper method that recovers config from abstract metadata works properly") { | ||
val m1 = Metadata.empty.withNewConfiguration( | ||
Map(COORDINATED_COMMITS_COORDINATOR_NAME.getKey -> "string_value").asJava | ||
) | ||
assert(CoordinatedCommitsUtils.fromAbstractMetadataAndTableConfig( | ||
CoordinatedCommitsUtils.convertMetadataToAbstractMetadata(m1), | ||
COORDINATED_COMMITS_COORDINATOR_NAME) === Optional.of("string_value")) | ||
|
||
val m2 = Metadata.empty.withNewConfiguration( | ||
Map(COORDINATED_COMMITS_COORDINATOR_NAME.getKey -> "").asJava | ||
) | ||
assert(CoordinatedCommitsUtils.fromAbstractMetadataAndTableConfig( | ||
CoordinatedCommitsUtils.convertMetadataToAbstractMetadata(m2), | ||
COORDINATED_COMMITS_COORDINATOR_NAME) === Optional.of("")) | ||
|
||
val m3 = Metadata.empty.withNewConfiguration( | ||
Map(COORDINATED_COMMITS_COORDINATOR_CONF.getKey -> | ||
"""{"key1": "string_value", "key2Int": 2, "key3ComplexStr": "\"hello\""}""").asJava | ||
) | ||
assert(CoordinatedCommitsUtils.fromAbstractMetadataAndTableConfig( | ||
CoordinatedCommitsUtils.convertMetadataToAbstractMetadata(m3), | ||
COORDINATED_COMMITS_COORDINATOR_CONF) === | ||
Map("key1" -> "string_value", "key2Int" -> "2", "key3ComplexStr" -> "\"hello\"").asJava) | ||
} | ||
|
||
test("cold snapshot initialization") { | ||
val builder = new InMemoryCommitCoordinatorBuilder(10) | ||
val commitCoordinatorClient = builder.build(Collections.emptyMap()) | ||
CommitCoordinatorProvider.registerBuilder(builder) | ||
withTempDirAndEngine { (tablePath, engine) => | ||
val logPath = new Path("file:" + tablePath, "_delta_log") | ||
val table = Table.forPath(engine, tablePath) | ||
|
||
spark.range(0, 10).write.format("delta").mode("overwrite").save(tablePath) // version 0 | ||
checkAnswer( | ||
spark.sql(s"SELECT * FROM delta.`$tablePath`").collect().map(TestRow(_)), | ||
(0L to 9L).map(TestRow(_))) | ||
spark.range(10, 20).write.format("delta").mode("overwrite").save(tablePath) // version 1 | ||
spark.range(20, 30).write.format("delta").mode("append").save(tablePath) // version 2 | ||
checkAnswer( | ||
spark.sql(s"SELECT * FROM delta.`$tablePath`").collect().map(TestRow(_)), | ||
(10L to 29L).map(TestRow(_))) | ||
|
||
var tableConf: util.Map[String, String] = null | ||
val logStore = LogStoreProvider.getLogStore(hadoopConf, logPath.toUri.getScheme) | ||
|
||
(0 to 2).foreach{ version => | ||
val delta = CoordinatedCommitsUtils.getHadoopDeltaFile(logPath, version) | ||
|
||
val rows = addCoordinatedCommitToMetadataRow(logStore.read(delta, hadoopConf).toList) | ||
|
||
if (version == 0) { | ||
tableConf = commitCoordinatorClient.registerTable( | ||
logPath, | ||
-1L, | ||
CoordinatedCommitsUtils.convertMetadataToAbstractMetadata(Metadata.empty()), | ||
CoordinatedCommitsUtils.convertProtocolToAbstractProtocol(new Protocol(1, 1))) | ||
writeCommitZero(engine, logPath, rows.asJava) | ||
} else { | ||
commit(logPath, tableConf, version, version, rows.asJava, commitCoordinatorClient) | ||
logPath.getFileSystem(hadoopConf).delete( | ||
CoordinatedCommitsUtils.getHadoopDeltaFile(logPath, version)) | ||
} | ||
} | ||
val snapshot0 = table.getSnapshotAsOfVersion(engine, 0) | ||
val result0 = readSnapshot(snapshot0, snapshot0.getSchema(engine), null, null, engine) | ||
checkAnswer(result0, (0L to 9L).map(TestRow(_))) | ||
|
||
val snapshot1 = table.getSnapshotAsOfVersion(engine, 1) | ||
val result1 = readSnapshot(snapshot1, snapshot1.getSchema(engine), null, null, engine) | ||
checkAnswer(result1, (10L to 19L).map(TestRow(_))) | ||
|
||
val snapshot2 = table.getLatestSnapshot(engine) | ||
val result2 = readSnapshot(snapshot2, snapshot2.getSchema(engine), null, null, engine) | ||
checkAnswer(result2, (10L to 29L).map(TestRow(_))) | ||
} | ||
} | ||
|
||
def addCoordinatedCommitToMetadataRow(rows: List[String]): List[String] = rows.map(row => { | ||
if (row.contains("metaData")) row.replace( | ||
"\"configuration\":{}", | ||
"\"configuration\":{\"coordinatedCommits.commitCoordinatorConf-preview\":\"{}\"," + | ||
"\"delta.coordinatedCommits.commitCoordinator-preview\":\"in-memory\"}") else row | ||
}) | ||
} |
97 changes: 97 additions & 0 deletions
97
...la/io/delta/kernel/defaults/internal/coordinatedcommits/CoordinatedCommitsTestUtils.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
/* | ||
* Copyright (2021) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package io.delta.kernel.defaults.internal.coordinatedcommits | ||
|
||
import java.util | ||
import io.delta.kernel.defaults.internal.logstore.LogStoreProvider | ||
import io.delta.kernel.engine.Engine | ||
import io.delta.kernel.internal.actions.{CommitInfo, Metadata, Protocol} | ||
import io.delta.kernel.internal.TableConfig | ||
import io.delta.storage.commit.{Commit, CommitCoordinatorClient, UpdatedActions} | ||
import org.apache.hadoop.conf.Configuration | ||
import org.apache.hadoop.fs.Path | ||
|
||
import scala.collection.JavaConverters._ | ||
|
||
trait CoordinatedCommitsTestUtils { | ||
|
||
val hadoopConf = new Configuration() | ||
def commit( | ||
logPath: Path, | ||
tableConf: util.Map[String, String], | ||
version: Long, | ||
timestamp: Long, | ||
commit: util.List[String], | ||
commitCoordinatorClient: CommitCoordinatorClient): Commit = { | ||
val logStore = LogStoreProvider.getLogStore(hadoopConf, logPath.toUri.getScheme) | ||
val updatedCommitInfo = CommitInfo.empty().withTimestamp(timestamp) | ||
val updatedActions = if (version == 0) { | ||
getUpdatedActionsForZerothCommit(updatedCommitInfo) | ||
} else { | ||
getUpdatedActionsForNonZerothCommit(updatedCommitInfo) | ||
} | ||
commitCoordinatorClient.commit( | ||
logStore, | ||
hadoopConf, | ||
logPath, | ||
tableConf, | ||
version, | ||
commit.iterator(), | ||
updatedActions).getCommit | ||
} | ||
|
||
def writeCommitZero(engine: Engine, logPath: Path, commit: util.List[String]): Unit = { | ||
createLogPath(engine, logPath) | ||
val logStore = LogStoreProvider.getLogStore(hadoopConf, logPath.toUri.getScheme) | ||
logStore.write( | ||
CoordinatedCommitsUtils.getHadoopDeltaFile(logPath, 0), | ||
commit.iterator(), | ||
true, | ||
hadoopConf) | ||
} | ||
|
||
def createLogPath(engine: Engine, logPath: Path): Unit = { | ||
// New table, create a delta log directory | ||
if (!engine.getFileSystemClient.mkdirs(logPath.toString)) { | ||
throw new RuntimeException("Failed to create delta log directory: " + logPath) | ||
} | ||
} | ||
|
||
def getUpdatedActionsForZerothCommit( | ||
commitInfo: CommitInfo, | ||
oldMetadata: Metadata = Metadata.empty()): UpdatedActions = { | ||
val newMetadataConfiguration = | ||
Map(TableConfig.COORDINATED_COMMITS_COORDINATOR_NAME.getKey -> "in-memory", | ||
TableConfig.COORDINATED_COMMITS_COORDINATOR_CONF.getKey -> "{}") | ||
val newMetadata = oldMetadata.withNewConfiguration(newMetadataConfiguration.asJava) | ||
new UpdatedActions( | ||
CoordinatedCommitsUtils.convertCommitInfoToAbstractCommitInfo(commitInfo), | ||
CoordinatedCommitsUtils.convertMetadataToAbstractMetadata(newMetadata), | ||
CoordinatedCommitsUtils.convertProtocolToAbstractProtocol(Protocol.empty()), | ||
CoordinatedCommitsUtils.convertMetadataToAbstractMetadata(oldMetadata), | ||
CoordinatedCommitsUtils.convertProtocolToAbstractProtocol(Protocol.empty())) | ||
} | ||
|
||
def getUpdatedActionsForNonZerothCommit(commitInfo: CommitInfo): UpdatedActions = { | ||
val updatedActions = getUpdatedActionsForZerothCommit(commitInfo) | ||
new UpdatedActions( | ||
updatedActions.getCommitInfo, | ||
updatedActions.getNewMetadata, | ||
updatedActions.getNewProtocol, | ||
updatedActions.getNewMetadata, // oldMetadata is replaced with newMetadata | ||
updatedActions.getOldProtocol) | ||
} | ||
} |
178 changes: 178 additions & 0 deletions
178
...io/delta/kernel/defaults/internal/coordinatedcommits/InMemoryCommitCoordinatorSuite.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
/* | ||
* Copyright (2021) The Delta Lake Project Authors. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package io.delta.kernel.defaults.internal.coordinatedcommits | ||
|
||
import io.delta.kernel.Table | ||
import io.delta.kernel.defaults.engine.DefaultJsonHandler | ||
import io.delta.kernel.defaults.internal.logstore.LogStoreProvider | ||
import io.delta.kernel.defaults.utils.DefaultVectorTestUtils | ||
import io.delta.kernel.defaults.DeltaTableWriteSuiteBase | ||
import io.delta.kernel.engine.Engine | ||
import io.delta.kernel.internal.actions.{CommitInfo, Metadata, Protocol} | ||
import io.delta.kernel.types.{StringType, StructType} | ||
import io.delta.storage.commit.{Commit, CommitCoordinatorClient, CommitFailedException, GetCommitsResponse} | ||
import org.apache.hadoop.conf.Configuration | ||
import org.apache.hadoop.fs.Path | ||
|
||
import java.util | ||
import java.util.{Collections, Optional} | ||
import scala.collection.JavaConverters._ | ||
import scala.collection.convert.ImplicitConversions.{`iterator asScala`, `list asScalaBuffer`} | ||
|
||
abstract class InMemoryCommitCoordinatorSuite(batchSize: Int) extends DeltaTableWriteSuiteBase | ||
with CoordinatedCommitsTestUtils | ||
with DefaultVectorTestUtils { | ||
|
||
val jsonHandler = new DefaultJsonHandler(hadoopConf) | ||
|
||
private def assertGetCommitResponseEqual(x: GetCommitsResponse, y: GetCommitsResponse): Unit = { | ||
assert(x.getLatestTableVersion == y.getLatestTableVersion) | ||
assert(x.getCommits.size() == y.getCommits.size()) | ||
for (i <- 0 until x.getCommits.size()) { | ||
assert(x.getCommits.get(i).getVersion == y.getCommits.get(i).getVersion) | ||
assert(x.getCommits.get(i).getFileStatus.getPath == y.getCommits.get(i).getFileStatus.getPath) | ||
assert(x.getCommits.get(i).getFileStatus.getLen == y.getCommits.get(i).getFileStatus.getLen) | ||
assert(x | ||
.getCommits | ||
.get(i) | ||
.getFileStatus | ||
.getModificationTime == y.getCommits.get(i).getFileStatus.getModificationTime) | ||
assert(x.getCommits.get(i).getCommitTimestamp == y.getCommits.get(i).getCommitTimestamp) | ||
} | ||
} | ||
|
||
protected def assertBackfilled( | ||
version: Long, | ||
logPath: Path, | ||
timestampOpt: Option[Long] = None): Unit = { | ||
val logStore = LogStoreProvider.getLogStore(hadoopConf, logPath.toUri.getScheme) | ||
val delta = CoordinatedCommitsUtils.getHadoopDeltaFile(logPath, version) | ||
if (timestampOpt.isDefined) { | ||
assert(logStore.read(delta, hadoopConf).toSeq == Seq(s"$version", s"${timestampOpt.get}")) | ||
} else { | ||
assert(logStore.read(delta, hadoopConf).take(1).toSeq == Seq(s"$version")) | ||
} | ||
} | ||
|
||
protected def registerBackfillOp( | ||
commitCoordinatorClient: CommitCoordinatorClient, | ||
logPath: Path, | ||
version: Long): Unit = { | ||
val inMemoryCS = commitCoordinatorClient.asInstanceOf[InMemoryCommitCoordinator] | ||
inMemoryCS.registerBackfill(logPath, version) | ||
} | ||
|
||
protected def validateBackfillStrategy( | ||
engine: Engine, | ||
commitCoordinatorClient: CommitCoordinatorClient, | ||
logPath: Path, | ||
tableConf: util.Map[String, String], | ||
version: Long): Unit = { | ||
val lastExpectedBackfilledVersion = (version - (version % batchSize)).toInt | ||
val unbackfilledCommitVersionsAll = commitCoordinatorClient | ||
.getCommits(logPath, tableConf, null, null) | ||
.getCommits.map(_.getVersion) | ||
val expectedVersions = lastExpectedBackfilledVersion + 1 to version.toInt | ||
|
||
assert(unbackfilledCommitVersionsAll == expectedVersions) | ||
(0 to lastExpectedBackfilledVersion).foreach { v => | ||
assertBackfilled(v, logPath, Some(v)) | ||
} | ||
} | ||
|
||
/** | ||
* Checks that the commit coordinator state is correct in terms of | ||
* - The latest table version in the commit coordinator is correct | ||
* - All supposedly backfilled commits are indeed backfilled | ||
* - The contents of the backfilled commits are correct (verified | ||
* if commitTimestampOpt is provided) | ||
* | ||
* This can be overridden by implementing classes to implement | ||
* more specific invariants. | ||
*/ | ||
protected def assertInvariants( | ||
logPath: Path, | ||
tableConf: util.Map[String, String], | ||
commitCoordinatorClient: CommitCoordinatorClient, | ||
commitTimestampsOpt: Option[Array[Long]] = None): Unit = { | ||
val maxUntrackedVersion: Int = { | ||
val commitResponse = commitCoordinatorClient.getCommits(logPath, tableConf, null, null) | ||
if (commitResponse.getCommits.isEmpty) { | ||
commitResponse.getLatestTableVersion.toInt | ||
} else { | ||
assert( | ||
commitResponse.getCommits.last.getVersion == commitResponse.getLatestTableVersion, | ||
s"Max commit tracked by the commit coordinator ${commitResponse.getCommits.last} must " + | ||
s"match latestTableVersion tracked by the commit coordinator " + | ||
s"${commitResponse.getLatestTableVersion}." | ||
) | ||
val minVersion = commitResponse.getCommits.head.getVersion | ||
assert( | ||
commitResponse.getLatestTableVersion - minVersion + 1 == commitResponse.getCommits.size, | ||
"Commit map should have a contiguous range of unbackfilled commits." | ||
) | ||
minVersion.toInt - 1 | ||
} | ||
} | ||
(0 to maxUntrackedVersion).foreach { version => | ||
assertBackfilled(version, logPath, commitTimestampsOpt.map(_(version))) | ||
} | ||
} | ||
|
||
test("test basic commit and backfill functionality") { | ||
withTempDirAndEngine { (tablePath, engine) => | ||
val cc = new InMemoryCommitCoordinatorBuilder(batchSize).build(Collections.emptyMap()) | ||
val logPath = new Path(tablePath, "_delta_log") | ||
|
||
val tableConf = cc.registerTable( | ||
logPath, | ||
-1L, | ||
CoordinatedCommitsUtils.convertMetadataToAbstractMetadata(Metadata.empty()), | ||
CoordinatedCommitsUtils.convertProtocolToAbstractProtocol(new Protocol(1, 1))) | ||
|
||
val e = intercept[CommitFailedException] { | ||
commit( | ||
logPath, | ||
tableConf, version = 0, timestamp = 0, util.Arrays.asList("0", "0"), cc) | ||
} | ||
assert(e.getMessage === "Commit version 0 must go via filesystem.") | ||
writeCommitZero(engine, logPath, util.Arrays.asList("0", "0")) | ||
assertGetCommitResponseEqual( | ||
cc.getCommits(logPath, tableConf, null, null), | ||
new GetCommitsResponse(Collections.emptyList(), -1)) | ||
assertBackfilled(version = 0, logPath, Some(0L)) | ||
|
||
// Test backfilling functionality for commits 1 - 8 | ||
(1 to 8).foreach { version => | ||
commit( | ||
logPath, | ||
tableConf, | ||
version, version, util.Arrays.asList(s"$version", s"$version"), cc) | ||
validateBackfillStrategy(engine, cc, logPath, tableConf, version) | ||
assert(cc.getCommits(logPath, tableConf, null, null).getLatestTableVersion == version) | ||
} | ||
|
||
// Test that out-of-order backfill is rejected | ||
intercept[IllegalArgumentException] { | ||
registerBackfillOp(cc, logPath, 10) | ||
} | ||
assertInvariants(logPath, tableConf, cc) | ||
} | ||
} | ||
} | ||
|
||
class InMemoryCommitCoordinator1Suite extends InMemoryCommitCoordinatorSuite(1) | ||
class InMemoryCommitCoordinator5Suite extends InMemoryCommitCoordinatorSuite(5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters