Skip to content

Commit

Permalink
Remove /metadata and /data subdirectory locations from generated subs…
Browse files Browse the repository at this point in the history
…coped policy (#244)


---------

Co-authored-by: Michael Collado <[email protected]>
  • Loading branch information
collado-mike and sfc-gh-mcollado authored Sep 9, 2024
1 parent ec6ffd0 commit 17dd740
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
*/
package org.apache.polaris.service.catalog;

import static org.apache.polaris.core.storage.StorageUtil.concatFilePrefixes;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Objects;
Expand Down Expand Up @@ -364,10 +362,8 @@ private String defaultNamespaceLocation(Namespace namespace) {
}

private Set<String> getLocationsAllowedToBeAccessed(TableMetadata tableMetadata) {
String basicLocation = tableMetadata.location();
Set<String> locations = new HashSet<>();
locations.add(concatFilePrefixes(basicLocation, "data/", "/"));
locations.add(concatFilePrefixes(basicLocation, "metadata/", "/"));
locations.add(tableMetadata.location());
if (tableMetadata
.properties()
.containsKey(TableLikeEntity.USER_SPECIFIED_WRITE_DATA_LOCATION_KEY)) {
Expand All @@ -386,11 +382,7 @@ private Set<String> getLocationsAllowedToBeAccessed(TableMetadata tableMetadata)
}

private Set<String> getLocationsAllowedToBeAccessed(ViewMetadata viewMetadata) {
String basicLocation = viewMetadata.location();
Set<String> locations = new HashSet<>();
// a view won't have a "data" location, so only allowed to access "metadata"
locations.add(concatFilePrefixes(basicLocation, "metadata/", "/"));
return locations;
return Set.of(viewMetadata.location());
}

@Override
Expand Down
22 changes: 12 additions & 10 deletions regtests/t_pyspark/src/test_spark_sql_s3_with_privileges.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,12 @@ def test_spark_credentials_s3_direct_with_write(root_client, snowflake_catalog,
aws_secret_access_key=response.config['s3.secret-access-key'],
aws_session_token=response.config['s3.session-token'])

objects = s3.list_objects(Bucket=test_bucket, Delimiter='/',
Prefix='polaris_test/snowflake_catalog/db1/schema/iceberg_table/')
assert objects is not None
assert 'CommonPrefixes' in objects
assert len(objects['CommonPrefixes']) > 0

objects = s3.list_objects(Bucket=test_bucket, Delimiter='/',
Prefix='polaris_test/snowflake_catalog/db1/schema/iceberg_table/metadata/')
assert objects is not None
Expand Down Expand Up @@ -1051,23 +1057,19 @@ def test_spark_credentials_s3_scoped_to_metadata_data_locations(root_client, sno
objects = client.list_objects(Bucket=test_bucket, Delimiter='/',
Prefix=f'{prefix}/metadata/')
assert objects is not None
assert 'Contents' in objects , f'list medata files failed in prefix: {prefix}/metadata/'
assert 'Contents' in objects , f'list metadata files failed in prefix: {prefix}/metadata/'

objects = client.list_objects(Bucket=test_bucket, Delimiter='/',
Prefix=f'{prefix}/data/')
assert objects is not None
# no insert executed, so should not have any data files
assert 'Contents' not in objects , f'No contents should be in prefix: {prefix}/data/'

# list files fail in the same table's other directory. The access policy should restrict this
# even metadata and data, it needs an ending `/`
for invalidPrefix in [f'{prefix}/other_directory/', f'{prefix}/metadata', f'{prefix}/data']:
try:
client.list_objects(Bucket=test_bucket, Delimiter='/',
Prefix=invalidPrefix)
pytest.fail(f'Expected exception listing files outside of allowed table directories, but succeeds on location: {invalidPrefix}')
except botocore.exceptions.ClientError as error:
assert error.response['Error']['Code'] == 'AccessDenied', 'Expected exception AccessDenied, but got: ' + error.response['Error']['Code'] + ' on location: ' + invalidPrefix
objects = client.list_objects(Bucket=test_bucket, Delimiter='/',
Prefix=f'{prefix}/')
assert objects is not None
assert 'CommonPrefixes' in objects , f'list prefixes failed in prefix: {prefix}/'
assert len(objects['CommonPrefixes']) > 0

with IcebergSparkSession(credentials=f'{snowman.principal.client_id}:{snowman.credentials.client_secret}',
catalog_name=snowflake_catalog.name,
Expand Down

0 comments on commit 17dd740

Please sign in to comment.