-
Notifications
You must be signed in to change notification settings - Fork 2.9k
REST: Add Support for Custom Operations Builders in RESTCatalog #14465
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,145 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. The ASF licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
| package org.apache.iceberg.rest; | ||
|
|
||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Set; | ||
| import java.util.function.Supplier; | ||
| import org.apache.iceberg.MetadataUpdate; | ||
| import org.apache.iceberg.TableMetadata; | ||
| import org.apache.iceberg.io.FileIO; | ||
| import org.apache.iceberg.view.ViewMetadata; | ||
|
|
||
| /** | ||
| * A factory interface for creating {@link RESTTableOperations} and {@link RESTViewOperations} | ||
| * instances for REST catalogs. | ||
| * | ||
| * <p>This interface allows custom implementations of table and view operations to be injected into | ||
| * {@link RESTSessionCatalog} and {@link RESTCatalog}, enabling extensibility for specialized use | ||
| * cases. | ||
| * | ||
| * <p>Example usage: | ||
| * | ||
| * <pre> | ||
| * RESTOperationsFactory customFactory = new RESTOperationsFactory() { | ||
| * {@literal @}Override | ||
| * public RESTTableOperations createTableOperations( | ||
| * RESTClient client, | ||
| * String path, | ||
| * Supplier<Map<String, String>> headers, | ||
| * FileIO io, | ||
| * TableMetadata current, | ||
| * Set<Endpoint> endpoints) { | ||
| * return new CustomRESTTableOperations(client, path, headers, io, current, endpoints); | ||
| * } | ||
| * | ||
| * {@literal @}Override | ||
| * public RESTViewOperations createViewOperations( | ||
| * RESTClient client, | ||
| * String path, | ||
| * Supplier<Map<String, String>> headers, | ||
| * ViewMetadata current, | ||
| * Set<Endpoint> endpoints) { | ||
| * return new CustomRESTViewOperations(client, path, headers, current, endpoints); | ||
| * } | ||
| * }; | ||
| * | ||
| * RESTSessionCatalog catalog = new RESTSessionCatalog(clientBuilder, ioBuilder, customFactory); | ||
| * </pre> | ||
| */ | ||
| public interface RESTOperationsFactory { | ||
|
|
||
| /** | ||
| * Create a new {@link RESTTableOperations} instance for simple table operations. | ||
| * | ||
| * <p>The default implementation creates a standard {@link RESTTableOperations} instance. | ||
| * | ||
| * @param client the REST client to use for communicating with the catalog server | ||
| * @param path the REST path for the table | ||
| * @param headers a supplier for additional HTTP headers to include in requests | ||
| * @param io the FileIO implementation for reading and writing table metadata and data files | ||
| * @param current the current table metadata | ||
| * @param endpoints the set of supported REST endpoints | ||
| * @return a new RESTTableOperations instance | ||
| */ | ||
| default RESTTableOperations createTableOperations( | ||
| RESTClient client, | ||
| String path, | ||
| Supplier<Map<String, String>> headers, | ||
| FileIO io, | ||
| TableMetadata current, | ||
| Set<Endpoint> endpoints) { | ||
| return new RESTTableOperations(client, path, headers, io, current, endpoints); | ||
| } | ||
|
|
||
| /** | ||
| * Create a new {@link RESTTableOperations} instance for transaction-based operations (create or | ||
| * replace). | ||
| * | ||
| * <p>This method is used when creating tables or replacing table metadata within a transaction. | ||
| * The default implementation creates a standard {@link RESTTableOperations} instance. | ||
| * | ||
| * @param client the REST client to use for communicating with the catalog server | ||
| * @param path the REST path for the table | ||
| * @param headers a supplier for additional HTTP headers to include in requests | ||
| * @param io the FileIO implementation for reading and writing table metadata and data files | ||
| * @param updateType the type of update being performed (CREATE, REPLACE, or SIMPLE) | ||
| * @param createChanges the list of metadata updates to apply during table creation or replacement | ||
| * @param current the current table metadata (may be null for CREATE operations) | ||
| * @param endpoints the set of supported REST endpoints | ||
| * @return a new RESTTableOperations instance | ||
| */ | ||
| default RESTTableOperations createTableOperationsForTransaction( | ||
| RESTClient client, | ||
| String path, | ||
| Supplier<Map<String, String>> headers, | ||
| FileIO io, | ||
| RESTTableOperations.UpdateType updateType, | ||
| List<MetadataUpdate> createChanges, | ||
| TableMetadata current, | ||
| Set<Endpoint> endpoints) { | ||
| return new RESTTableOperations( | ||
| client, path, headers, io, updateType, createChanges, current, endpoints); | ||
| } | ||
|
|
||
| /** | ||
| * Create a new {@link RESTViewOperations} instance. | ||
| * | ||
| * <p>The default implementation creates a standard {@link RESTViewOperations} instance. | ||
| * | ||
| * @param client the REST client to use for communicating with the catalog server | ||
| * @param path the REST path for the view | ||
| * @param headers a supplier for additional HTTP headers to include in requests | ||
| * @param current the current view metadata | ||
| * @param endpoints the set of supported REST endpoints | ||
| * @return a new RESTViewOperations instance | ||
| */ | ||
| default RESTViewOperations createViewOperations( | ||
| RESTClient client, | ||
| String path, | ||
| Supplier<Map<String, String>> headers, | ||
| ViewMetadata current, | ||
| Set<Endpoint> endpoints) { | ||
| return new RESTViewOperations(client, path, headers, current, endpoints); | ||
| } | ||
|
|
||
| /** Default {@link RESTOperationsFactory} instance. */ | ||
| RESTOperationsFactory DEFAULT = new RESTOperationsFactory() {}; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -44,10 +44,10 @@ | |
| import org.apache.iceberg.rest.responses.LoadTableResponse; | ||
| import org.apache.iceberg.util.LocationUtil; | ||
|
|
||
| class RESTTableOperations implements TableOperations { | ||
| public class RESTTableOperations implements TableOperations { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need this scope change?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If users only want to make small adjustments to RESTTableOperations (for example, injecting a custom header), they can simply provide a custom implementation that extends RESTTableOperations, without having to copy the entire class. This makes it much easier for them to upgrade to newer Iceberg SDK versions without dealing with merge conflicts or duplicated code. I'm okay with either approach here, don't have a strong preference. WDYT? |
||
| private static final String METADATA_FOLDER_NAME = "metadata"; | ||
|
|
||
| enum UpdateType { | ||
| public enum UpdateType { | ||
| CREATE, | ||
| REPLACE, | ||
| SIMPLE | ||
|
|
@@ -63,7 +63,7 @@ enum UpdateType { | |
| private UpdateType updateType; | ||
| private TableMetadata current; | ||
|
|
||
| RESTTableOperations( | ||
| public RESTTableOperations( | ||
| RESTClient client, | ||
| String path, | ||
| Supplier<Map<String, String>> headers, | ||
|
|
@@ -73,7 +73,7 @@ enum UpdateType { | |
| this(client, path, headers, io, UpdateType.SIMPLE, Lists.newArrayList(), current, endpoints); | ||
| } | ||
|
|
||
| RESTTableOperations( | ||
| public RESTTableOperations( | ||
| RESTClient client, | ||
| String path, | ||
| Supplier<Map<String, String>> headers, | ||
|
|
@@ -183,7 +183,7 @@ public void commit(TableMetadata base, TableMetadata metadata) { | |
| * refresh fails. In case of refresh failure, the failure is recorded as suppressed on the | ||
| * provided {@code original} exception to aid diagnostics. | ||
| */ | ||
| private boolean reconcileOnSimpleUpdate( | ||
| protected boolean reconcileOnSimpleUpdate( | ||
| List<MetadataUpdate> updates, CommitStateUnknownException original) { | ||
| Long expectedSnapshotId = expectedSnapshotIdIfSnapshotAddOnly(updates); | ||
| if (expectedSnapshotId == null) { | ||
|
|
@@ -241,7 +241,7 @@ private static Long expectedSnapshotIdIfSnapshotAddOnly(List<MetadataUpdate> upd | |
| return addedSnapshotId; | ||
| } | ||
|
|
||
| private TableMetadata updateCurrentMetadata(LoadTableResponse response) { | ||
| protected TableMetadata updateCurrentMetadata(LoadTableResponse response) { | ||
| // LoadTableResponse is used to deserialize the response, but config is not allowed by the REST | ||
| // spec so it can be | ||
| // safely ignored. there is no requirement to update config on refresh or commit. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,14 +29,14 @@ | |
| import org.apache.iceberg.view.ViewMetadata; | ||
| import org.apache.iceberg.view.ViewOperations; | ||
|
|
||
| class RESTViewOperations implements ViewOperations { | ||
| public class RESTViewOperations implements ViewOperations { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need this scope change?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above |
||
| private final RESTClient client; | ||
| private final String path; | ||
| private final Supplier<Map<String, String>> headers; | ||
| private final Set<Endpoint> endpoints; | ||
| private ViewMetadata current; | ||
|
|
||
| RESTViewOperations( | ||
| public RESTViewOperations( | ||
| RESTClient client, | ||
| String path, | ||
| Supplier<Map<String, String>> headers, | ||
|
|
@@ -79,7 +79,7 @@ public void commit(ViewMetadata base, ViewMetadata metadata) { | |
| updateCurrentMetadata(response); | ||
| } | ||
|
|
||
| private ViewMetadata updateCurrentMetadata(LoadViewResponse response) { | ||
| protected ViewMetadata updateCurrentMetadata(LoadViewResponse response) { | ||
| if (!Objects.equals(current.metadataFileLocation(), response.metadataLocation())) { | ||
| this.current = response.metadata(); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How is fileIO handled for view opertions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIUC,
FileIOis not required for view operations, because Iceberg views are logical objects that contain only metadata (SQL definitions, schemas, and versions) and do not read or write any physical files.When a user runs a query against a view, the query engine expands the view's SQL definition, compiles it into a query plan, and resolves the underlying tables. At that point, the engine loads the actual table objects (which include
TableOperationsandFileIO) to read the physical data files.