Skip to content

Commit 34fd618

Browse files
[Kernel][Refactor] Factor out some common utils to LogDataUtils (#5295)
<!-- Thanks for sending a pull request! Here are some tips for you: 1. If this is your first time, please read our contributor guidelines: https://github.com/delta-io/delta/blob/master/CONTRIBUTING.md 2. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP] Your PR title ...'. 3. Be sure to keep the PR description updated to reflect all changes. 4. Please write your PR title to summarize what this PR proposes. 5. If possible, provide a concise example to reproduce the issue for a faster review. 6. If applicable, include the corresponding issue number in the PR title and link it in the body. --> #### Which Delta project/connector is this regarding? <!-- Please add the component selected below to the beginning of the pull request title For example: [Spark] Title of my pull request --> - [ ] Spark - [ ] Standalone - [ ] Flink - [x] Kernel - [ ] Other (fill in here) ## Description Factors out some common utilities to a utils class. These will also be used additionally in the CCV2 commitRange implementation. <!-- - Describe what this PR changes. - Describe why we need the change. If this PR resolves an issue be sure to include "Resolves #XXX" to correctly link and close the issue upon merge. --> ## How was this patch tested? Existing tests + adds unit tests. <!-- If tests were added, say they were added here. Please make sure to test the changes thoroughly including negative and positive cases if possible. If the changes were tested in any way other than unit tests, please clarify how you tested step by step (ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future). If the changes were not tested, please explain why. --> ## Does this PR introduce _any_ user-facing changes? No <!-- If yes, please clarify the previous behavior and the change this PR proposes - provide the console output, description and/or an example to show the behavior difference if possible. If possible, please also clarify if this is a user-facing change compared to the released Delta Lake versions or within the unreleased branches such as master. If no, write 'No'. -->
1 parent 6236fdf commit 34fd618

File tree

5 files changed

+333
-46
lines changed

5 files changed

+333
-46
lines changed

kernel/kernel-api/src/main/java/io/delta/kernel/internal/DeltaHistoryManager.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import io.delta.kernel.internal.actions.CommitInfo;
2727
import io.delta.kernel.internal.actions.Metadata;
2828
import io.delta.kernel.internal.checkpoints.CheckpointInstance;
29+
import io.delta.kernel.internal.files.LogDataUtils;
2930
import io.delta.kernel.internal.files.ParsedCatalogCommitData;
3031
import io.delta.kernel.internal.files.ParsedLogData;
3132
import io.delta.kernel.internal.fs.Path;
@@ -163,9 +164,7 @@ public static Commit getActiveCommitAtTimestamp(
163164
List<ParsedCatalogCommitData> parsedCatalogCommits)
164165
throws TableNotFoundException {
165166
// For now, we only accept *staged* ratified commits (not inline)
166-
checkArgument(
167-
parsedCatalogCommits.stream().allMatch(ParsedCatalogCommitData::isFile),
168-
"Currently getActiveCommitAtTimestamp only accepts ratified staged file commits");
167+
LogDataUtils.validateLogDataContainsOnlyRatifiedStagedCommits(parsedCatalogCommits);
169168

170169
// Create a mapper for delta version -> file status that takes into account ratified commits
171170
Function<Long, FileStatus> versionToFileStatusFunction =
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
* Copyright (2025) The Delta Lake Project Authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package io.delta.kernel.internal.files;
17+
18+
import static io.delta.kernel.internal.util.Preconditions.checkArgument;
19+
20+
import io.delta.kernel.internal.lang.ListUtils;
21+
import java.util.List;
22+
import java.util.function.Function;
23+
import java.util.stream.Collectors;
24+
import java.util.stream.Stream;
25+
26+
public final class LogDataUtils {
27+
28+
private LogDataUtils() {}
29+
30+
public static void validateLogDataContainsOnlyRatifiedStagedCommits(
31+
List<? extends ParsedLogData> logDatas) {
32+
for (ParsedLogData logData : logDatas) {
33+
checkArgument(
34+
logData instanceof ParsedCatalogCommitData && logData.isFile(),
35+
"Only staged ratified commits are supported, but found: " + logData);
36+
}
37+
}
38+
39+
public static void validateLogDataIsSortedContiguous(List<? extends ParsedLogData> logDatas) {
40+
if (logDatas.size() > 1) {
41+
for (int i = 1; i < logDatas.size(); i++) {
42+
final ParsedLogData prev = logDatas.get(i - 1);
43+
final ParsedLogData curr = logDatas.get(i);
44+
checkArgument(
45+
prev.getVersion() + 1 == curr.getVersion(),
46+
String.format(
47+
"Log data must be sorted and contiguous, but found: %s and %s", prev, curr));
48+
}
49+
}
50+
}
51+
52+
/**
53+
* Combines a list of published Deltas and ratified Deltas into a single list of Deltas such that
54+
* there is exactly one {@link ParsedDeltaData} per version. When there is both a published Delta
55+
* and a ratified staged Delta for the same version, prioritizes the ratified Delta.
56+
*
57+
* <p>The method requires but does not validate the following:
58+
*
59+
* <ul>
60+
* <li>{@code publishedDeltas} are sorted and contiguous
61+
* <li>{@code ratifiedDeltas} are sorted and contiguous
62+
* <li>the commit versions present in {@code publishedDeltas} and {@code ratifiedDeltas}, when
63+
* combined, reflect a contiguous version range. In other words, if the two do not overlap,
64+
* publishedDeltas.last = ratifiedDeltas.first + 1).
65+
* </ul>
66+
*/
67+
public static List<ParsedDeltaData> combinePublishedAndRatifiedDeltasWithCatalogPriority(
68+
List<ParsedDeltaData> publishedDeltas, List<ParsedDeltaData> ratifiedDeltas) {
69+
if (ratifiedDeltas.isEmpty()) {
70+
return publishedDeltas;
71+
}
72+
73+
if (publishedDeltas.isEmpty()) {
74+
return ratifiedDeltas;
75+
}
76+
77+
final long firstRatified = ratifiedDeltas.get(0).getVersion();
78+
final long lastRatified = ListUtils.getLast(ratifiedDeltas).getVersion();
79+
80+
return Stream.of(
81+
publishedDeltas.stream().filter(x -> x.getVersion() < firstRatified),
82+
ratifiedDeltas.stream(),
83+
publishedDeltas.stream().filter(x -> x.getVersion() > lastRatified))
84+
.flatMap(Function.identity())
85+
.collect(Collectors.toList());
86+
}
87+
}

kernel/kernel-api/src/main/java/io/delta/kernel/internal/snapshot/SnapshotManager.java

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,7 @@
4141
import io.delta.kernel.internal.util.Tuple2;
4242
import io.delta.kernel.utils.FileStatus;
4343
import java.util.*;
44-
import java.util.function.Function;
4544
import java.util.stream.Collectors;
46-
import java.util.stream.Stream;
4745
import org.slf4j.Logger;
4846
import org.slf4j.LoggerFactory;
4947

@@ -593,23 +591,8 @@ private List<ParsedDeltaData> getAllDeltasAfterCheckpointWithCatalogPriority(
593591
.map(ParsedCatalogCommitData.class::cast)
594592
.collect(Collectors.toList());
595593

596-
if (allRatifiedCommitsAfterCheckpoint.isEmpty()) {
597-
return allPublishedDeltasAfterCheckpoint;
598-
}
599-
600-
if (allPublishedDeltasAfterCheckpoint.isEmpty()) {
601-
return allRatifiedCommitsAfterCheckpoint;
602-
}
603-
604-
final long firstRatified = allRatifiedCommitsAfterCheckpoint.get(0).getVersion();
605-
final long lastRatified = ListUtils.getLast(allRatifiedCommitsAfterCheckpoint).getVersion();
606-
607-
return Stream.of(
608-
allPublishedDeltasAfterCheckpoint.stream().filter(x -> x.getVersion() < firstRatified),
609-
allRatifiedCommitsAfterCheckpoint.stream(),
610-
allPublishedDeltasAfterCheckpoint.stream().filter(x -> x.getVersion() > lastRatified))
611-
.flatMap(Function.identity())
612-
.collect(Collectors.toList());
594+
return LogDataUtils.combinePublishedAndRatifiedDeltasWithCatalogPriority(
595+
allPublishedDeltasAfterCheckpoint, allRatifiedCommitsAfterCheckpoint);
613596
}
614597

615598
/**

kernel/kernel-api/src/main/java/io/delta/kernel/internal/table/SnapshotBuilderImpl.java

Lines changed: 3 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
import io.delta.kernel.internal.SnapshotImpl;
2828
import io.delta.kernel.internal.actions.Metadata;
2929
import io.delta.kernel.internal.actions.Protocol;
30-
import io.delta.kernel.internal.files.ParsedCatalogCommitData;
30+
import io.delta.kernel.internal.files.LogDataUtils;
3131
import io.delta.kernel.internal.files.ParsedLogData;
3232
import io.delta.kernel.internal.tablefeatures.TableFeatures;
3333
import io.delta.kernel.internal.util.Tuple2;
@@ -122,8 +122,8 @@ private void validateInputOnBuild(Engine engine) {
122122
validateProtocolAndMetadataOnlyIfVersionProvided();
123123
validateProtocolRead();
124124
// TODO: delta-io/delta#4765 support other types
125-
validateLogDataContainsOnlyStagedRatifiedCommits();
126-
validateLogDataIsSortedContiguous();
125+
LogDataUtils.validateLogDataContainsOnlyRatifiedStagedCommits(ctx.logDatas);
126+
LogDataUtils.validateLogDataIsSortedContiguous(ctx.logDatas);
127127
}
128128

129129
private void validateVersionNonNegative() {
@@ -171,25 +171,4 @@ private void validateProtocolRead() {
171171
ctx.protocolAndMetadataOpt.ifPresent(
172172
x -> TableFeatures.validateKernelCanReadTheTable(x._1, ctx.unresolvedPath));
173173
}
174-
175-
private void validateLogDataContainsOnlyStagedRatifiedCommits() {
176-
for (ParsedLogData logData : ctx.logDatas) {
177-
checkArgument(
178-
logData instanceof ParsedCatalogCommitData && logData.isFile(),
179-
"Only staged ratified commits are supported, but found: " + logData);
180-
}
181-
}
182-
183-
private void validateLogDataIsSortedContiguous() {
184-
if (ctx.logDatas.size() > 1) {
185-
for (int i = 1; i < ctx.logDatas.size(); i++) {
186-
final ParsedLogData prev = ctx.logDatas.get(i - 1);
187-
final ParsedLogData curr = ctx.logDatas.get(i);
188-
checkArgument(
189-
prev.getVersion() + 1 == curr.getVersion(),
190-
String.format(
191-
"Log data must be sorted and contiguous, but found: %s and %s", prev, curr));
192-
}
193-
}
194-
}
195174
}

0 commit comments

Comments
 (0)