forked from unitycatalog/unitycatalog
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add support for tables located in S3 in Iceberg REST catalog (unityca…
…talog#123) **PR Checklist** - [x] A description of the changes is added to the description of this PR. - [x] If there is a related issue, make sure it is linked to this PR. - [x] If you've fixed a bug or added code that should be tested, add tests! - [ ] If you've added or modified a feature, documentation in `docs` is updated **Description of changes** This PR introduces the ability to load Iceberg tables that are located in S3. (Previously only local path was supported). This adds new dependencies on iceberg-aws module as well as aws sdk v2. This also introduces new testing frameworks - mockito and S3Mock in order to better test and simulate reading an iceberg metadata out of S3 with S3FileIO (as well as starting to use JUnit5 instead of JUnit4). **Related Issues** This PR covers the basic and initial S3 support addressing issue unitycatalog#105
- Loading branch information
Showing
10 changed files
with
300 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
64 changes: 64 additions & 0 deletions
64
server/src/main/java/io/unitycatalog/server/service/iceberg/FileIOFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
package io.unitycatalog.server.service.iceberg; | ||
|
||
import io.unitycatalog.server.exception.BaseException; | ||
import io.unitycatalog.server.model.AwsCredentials; | ||
import io.unitycatalog.server.persist.utils.ServerPropertiesUtils; | ||
import io.unitycatalog.server.utils.TemporaryCredentialUtils; | ||
import org.apache.iceberg.aws.s3.S3FileIO; | ||
import org.apache.iceberg.io.FileIO; | ||
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; | ||
import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; | ||
import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider; | ||
import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; | ||
import software.amazon.awssdk.regions.Region; | ||
import software.amazon.awssdk.services.s3.S3Client; | ||
|
||
import java.net.URI; | ||
import java.util.Map; | ||
|
||
public class FileIOFactory { | ||
|
||
private static final String S3 = "s3"; | ||
|
||
public FileIOFactory() { | ||
} | ||
|
||
// TODO: Cache fileIOs | ||
public FileIO getFileIO(URI tableLocationUri) { | ||
switch (tableLocationUri.getScheme()) { | ||
case S3: return getS3FileIO(tableLocationUri); | ||
// TODO: should we default/fallback to HadoopFileIO ? | ||
default: return new SimpleLocalFileIO(); | ||
} | ||
} | ||
|
||
protected S3FileIO getS3FileIO(URI tableLocationUri) { | ||
String region = ServerPropertiesUtils.getInstance().getProperty("aws.region", System.getenv("AWS_REGION")); | ||
|
||
// FIXME!! - proper credential vending and region settings | ||
S3FileIO s3FileIO = new S3FileIO(() -> getS3Client(getAwsCredentialsProvider(tableLocationUri), region)); | ||
|
||
s3FileIO.initialize(Map.of()); | ||
|
||
return s3FileIO; | ||
} | ||
|
||
protected S3Client getS3Client(AwsCredentialsProvider awsCredentialsProvider, String region) { | ||
return S3Client.builder() | ||
.region(Region.of(region)) | ||
.credentialsProvider(awsCredentialsProvider) | ||
.forcePathStyle(false) | ||
.build(); | ||
} | ||
|
||
private AwsCredentialsProvider getAwsCredentialsProvider(URI tableLocationUri) { | ||
try { | ||
AwsCredentials credentials = TemporaryCredentialUtils.findS3BucketConfig(tableLocationUri.toString()); | ||
return StaticCredentialsProvider.create( | ||
AwsSessionCredentials.create( | ||
credentials.getAccessKeyId(),credentials.getSecretAccessKey(),credentials.getSessionToken())); | ||
} catch (BaseException e) { | ||
return DefaultCredentialsProvider.create(); | ||
} | ||
} | ||
} |
25 changes: 25 additions & 0 deletions
25
server/src/main/java/io/unitycatalog/server/service/iceberg/MetadataService.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package io.unitycatalog.server.service.iceberg; | ||
|
||
import org.apache.iceberg.TableMetadata; | ||
import org.apache.iceberg.TableMetadataParser; | ||
import org.apache.iceberg.io.FileIO; | ||
|
||
import java.io.IOException; | ||
import java.net.URI; | ||
|
||
public class MetadataService { | ||
|
||
private final FileIOFactory fileIOFactory; | ||
|
||
public MetadataService(FileIOFactory fileIOFactory) { | ||
this.fileIOFactory = fileIOFactory; | ||
} | ||
|
||
public TableMetadata readTableMetadata(String metadataLocation) { | ||
URI metadataLocationUri = URI.create(metadataLocation); | ||
// TODO: cache fileIO | ||
FileIO fileIO = fileIOFactory.getFileIO(metadataLocationUri); | ||
|
||
return TableMetadataParser.read(fileIO, metadataLocation); | ||
} | ||
} |
23 changes: 23 additions & 0 deletions
23
server/src/main/java/io/unitycatalog/server/service/iceberg/SimpleLocalFileIO.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package io.unitycatalog.server.service.iceberg; | ||
|
||
import org.apache.iceberg.Files; | ||
import org.apache.iceberg.io.FileIO; | ||
import org.apache.iceberg.io.InputFile; | ||
import org.apache.iceberg.io.OutputFile; | ||
|
||
public class SimpleLocalFileIO implements FileIO { | ||
@Override | ||
public InputFile newInputFile(String path) { | ||
return Files.localInput(path); | ||
} | ||
|
||
@Override | ||
public OutputFile newOutputFile(String path) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
|
||
@Override | ||
public void deleteFile(String path) { | ||
throw new UnsupportedOperationException(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
68 changes: 68 additions & 0 deletions
68
server/src/test/java/io/unitycatalog/server/service/iceberg/MetadataServiceTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
package io.unitycatalog.server.service.iceberg; | ||
|
||
import com.adobe.testing.s3mock.junit5.S3MockExtension; | ||
import com.amazonaws.util.IOUtils; | ||
import lombok.SneakyThrows; | ||
import org.apache.iceberg.TableMetadata; | ||
import org.apache.iceberg.aws.s3.S3FileIO; | ||
import org.junit.jupiter.api.BeforeEach; | ||
import org.junit.jupiter.api.Test; | ||
import org.junit.jupiter.api.extension.ExtendWith; | ||
import org.junit.jupiter.api.extension.RegisterExtension; | ||
import software.amazon.awssdk.core.sync.RequestBody; | ||
import software.amazon.awssdk.services.s3.S3Client; | ||
|
||
import java.util.Objects; | ||
|
||
import static org.assertj.core.api.Assertions.assertThat; | ||
import static org.mockito.ArgumentMatchers.any; | ||
import static org.mockito.Mockito.mock; | ||
import static org.mockito.Mockito.when; | ||
|
||
@ExtendWith(S3MockExtension.class) | ||
public class MetadataServiceTest { | ||
@RegisterExtension | ||
public static final S3MockExtension S3_MOCK = S3MockExtension.builder().silent().build(); | ||
|
||
public static final String TEST_BUCKET = "test-bucket"; | ||
public static final String TEST_LOCATION = "test-bucket"; | ||
public static final String TEST_SIMPLE_ICEBERG_V1_METADATA_FILE_NAME = "simple-v1-iceberg.metadata.json"; | ||
|
||
private final FileIOFactory mockFileIOFactory = mock(); | ||
private final S3Client mockS3Client = S3_MOCK.createS3ClientV2(); | ||
|
||
private MetadataService metadataService; | ||
|
||
@SneakyThrows | ||
@BeforeEach | ||
public void setUp() { | ||
metadataService = new MetadataService(mockFileIOFactory); | ||
} | ||
|
||
@SneakyThrows | ||
@Test | ||
public void testGetTableMetadataFromS3() { | ||
when(mockFileIOFactory.getFileIO(any())).thenReturn(new S3FileIO(() -> mockS3Client)); | ||
mockS3Client.createBucket(builder -> builder.bucket(TEST_BUCKET).build()); | ||
String simpleMetadataJson = IOUtils.toString( | ||
Objects.requireNonNull(this.getClass().getResourceAsStream("/" + TEST_SIMPLE_ICEBERG_V1_METADATA_FILE_NAME))); | ||
mockS3Client.putObject( | ||
builder -> builder.bucket(TEST_BUCKET).key(TEST_LOCATION + "/" + TEST_SIMPLE_ICEBERG_V1_METADATA_FILE_NAME).build(), | ||
RequestBody.fromString(simpleMetadataJson)); | ||
|
||
String metadataLocation = "s3://" + TEST_BUCKET + "/" + TEST_LOCATION + "/" + TEST_SIMPLE_ICEBERG_V1_METADATA_FILE_NAME; | ||
TableMetadata tableMetadata = metadataService.readTableMetadata(metadataLocation); | ||
assertThat(tableMetadata.uuid()).isEqualTo("11111111-2222-3333-4444-555555555555"); | ||
} | ||
|
||
@SneakyThrows | ||
@Test | ||
public void testGetTableMetadataFromLocalFS() { | ||
when(mockFileIOFactory.getFileIO(any())).thenReturn(new SimpleLocalFileIO()); | ||
String metadataLocation = Objects.requireNonNull( | ||
this.getClass().getResource("/iceberg.metadata.json")).toURI().toString(); | ||
TableMetadata tableMetadata = metadataService.readTableMetadata(metadataLocation); | ||
assertThat(tableMetadata.uuid()).isEqualTo("55d4dc69-5b14-4483-bfc8-f33b80f99f99"); | ||
} | ||
|
||
} |
File renamed without changes.
Oops, something went wrong.