Skip to content

Commit

Permalink
aws requester pays (#1173)
Browse files Browse the repository at this point in the history
* add functionality for requester pays buckets

* add removed dependency

* add debug for requester pays

* remove debug statements and fix url concat

* fix presigned url tests and clean up

* resolve poetry.lock conflict

* add mock presigned urls for blank tests

* add functionality for requester pays buckets

* add removed dependency

* add debug for requester pays

* remove debug statements and fix url concat

* fix presigned url tests and clean up

* add mock presigned urls for blank tests

* clean up and fix function case

* adding config to boto clients

* add handler for custom parameters for s3 presigned urls

* fix tests

* clean up and add extra params for requester pays

* remove custom parameters for upload presigned url

* fix requester pays params logic

* add session token (#1176)

* add session token

* fix multipart upload

* rename var

* fix update

* fix bucket name

* fix

* refactor logic

* update lock

* address pr comments

* update

* update lock

* test

* fix

* test

* restore

* try authlib version

* update comment

* update lock

* update version

* Update pyproject.toml

Co-authored-by: Pauline Ribeyre <[email protected]>

* Update pyproject.toml

Co-authored-by: Pauline Ribeyre <[email protected]>

* redo lock

* add to default config

---------

Co-authored-by: Mingfei Shao <[email protected]>
Co-authored-by: Mingfei Shao <[email protected]>
Co-authored-by: Pauline Ribeyre <[email protected]>
  • Loading branch information
4 people authored Aug 27, 2024
1 parent e1a568d commit cd66a72
Show file tree
Hide file tree
Showing 9 changed files with 219 additions and 137 deletions.
6 changes: 3 additions & 3 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -268,14 +268,14 @@
"filename": "tests/conftest.py",
"hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9",
"is_verified": false,
"line_number": 1569
"line_number": 1570
},
{
"type": "Base64 High Entropy String",
"filename": "tests/conftest.py",
"hashed_secret": "227dea087477346785aefd575f91dd13ab86c108",
"is_verified": false,
"line_number": 1593
"line_number": 1594
}
],
"tests/credentials/google/test_credentials.py": [
Expand Down Expand Up @@ -422,5 +422,5 @@
}
]
},
"generated_at": "2024-07-25T17:19:58Z"
"generated_at": "2024-08-22T19:43:39Z"
}
57 changes: 41 additions & 16 deletions fence/blueprints/data/indexd.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import re
import time
import json
import boto3
from botocore.client import Config
from urllib.parse import urlparse, ParseResult, urlunparse
from datetime import datetime, timedelta

from sqlalchemy.sql.functions import user
from cached_property import cached_property
import gen3cirrus
from gen3cirrus import GoogleCloudManager
from gen3cirrus import AwsService
from cdislogging import get_logger
from cdispyutils.config import get_value
from cdispyutils.hmac4 import generate_aws_presigned_url
import flask
from flask import current_app
import requests
Expand Down Expand Up @@ -396,7 +398,7 @@ def make_signed_url(self, file_name, protocol=None, expires_in=None, bucket=None
@staticmethod
def init_multipart_upload(key, expires_in=None, bucket=None):
"""
Initilize multipart upload given key
Initialize multipart upload given key
Args:
key(str): object key
Expand Down Expand Up @@ -441,7 +443,7 @@ def generate_aws_presigned_url_for_part(
Args:
key(str): object key of `guid/filename`
uploadID(str): uploadId of the current upload.
uploadId(str): uploadId of the current upload.
partNumber(int): the part number
Returns:
Expand Down Expand Up @@ -1061,6 +1063,8 @@ def get_signed_url(
bucket_name = self.bucket_name()
bucket = s3_buckets.get(bucket_name)

object_id = self.parsed_url.path.strip("/")

if bucket and bucket.get("endpoint_url"):
http_url = bucket["endpoint_url"].strip("/") + "/{}/{}".format(
self.parsed_url.netloc, self.parsed_url.path.strip("/")
Expand Down Expand Up @@ -1092,18 +1096,38 @@ def get_signed_url(
region = flask.current_app.boto.get_bucket_region(
self.parsed_url.netloc, credential
)
s3client = boto3.client(
"s3",
aws_access_key_id=credential["aws_access_key_id"],
aws_secret_access_key=credential["aws_secret_access_key"],
aws_session_token=credential.get("aws_session_token", None),
region_name=region,
config=Config(s3={"addressing_style": "path"}, signature_version="s3v4"),
)

cirrus_aws = AwsService(s3client)
auth_info = _get_auth_info_for_id_or_from_request(user=authorized_user)

url = generate_aws_presigned_url(
http_url,
ACTION_DICT["s3"][action],
credential,
"s3",
region,
expires_in,
auth_info,
)
action = ACTION_DICT["s3"][action]

# get presigned url for upload
if action == "PUT":
url = cirrus_aws.upload_presigned_url(
bucket_name, object_id, expires_in, None
)
# get presigned url for download
else:
if bucket.get("requester_pays") is True:
# need to add extra parameter to signing url for header
# https://github.com/boto/boto3/issues/3685
auth_info["x-amz-request-payer"] = "requester"
url = cirrus_aws.requester_pays_download_presigned_url(
bucket_name, object_id, expires_in, auth_info
)
else:
url = cirrus_aws.download_presigned_url(
bucket_name, object_id, expires_in, auth_info
)

return url

Expand All @@ -1115,7 +1139,7 @@ def init_multipart_upload(self, expires_in):
expires(int): expiration time
Returns:
UploadId(str)
uploadId(str)
"""
aws_creds = get_value(
config, "AWS_CREDENTIALS", InternalError("credentials not configured")
Expand All @@ -1133,18 +1157,19 @@ def generate_presigned_url_for_part_upload(self, uploadId, partNumber, expires_i
Generate presigned url for uploading object part given uploadId and part number
Args:
uploadId(str): uploadID of the multipart upload
uploadId(str): uploadId of the multipart upload
partNumber(int): part number
expires(int): expiration time
Returns:
presigned_url(str)
"""
bucket_name = self.bucket_name()
aws_creds = get_value(
config, "AWS_CREDENTIALS", InternalError("credentials not configured")
)
credential = S3IndexedFileLocation.get_credential_to_access_bucket(
self.bucket_name(), aws_creds, expires_in
bucket_name, aws_creds, expires_in
)

region = self.get_bucket_region()
Expand All @@ -1154,7 +1179,7 @@ def generate_presigned_url_for_part_upload(self, uploadId, partNumber, expires_i
)

return multipart_upload.generate_presigned_url_for_uploading_part(
self.parsed_url.netloc,
bucket_name,
self.parsed_url.path.strip("/"),
credential,
uploadId,
Expand Down
36 changes: 15 additions & 21 deletions fence/blueprints/data/multipart_upload.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import boto3
from botocore.client import Config
from botocore.exceptions import ClientError
from retry.api import retry_call

from cdispyutils.hmac4 import generate_aws_presigned_url
from cdispyutils.config import get_value
from cdislogging import get_logger
from gen3cirrus import AwsService
from fence.config import config
from fence.errors import InternalError

Expand Down Expand Up @@ -58,7 +59,7 @@ def initialize_multipart_upload(bucket_name, key, credentials):
key, error
)
)
raise InternalError("Can not initilize multipart upload for {}".format(key))
raise InternalError("Can not initialize multipart upload for {}".format(key))

return multipart_upload.get("UploadId")

Expand Down Expand Up @@ -140,28 +141,21 @@ def generate_presigned_url_for_uploading_part(
Returns:
presigned_url(str)
"""
s3_buckets = get_value(
config, "S3_BUCKETS", InternalError("S3_BUCKETS not configured")
)
bucket = s3_buckets.get(bucket_name)

s3_buckets = get_value(
config, "S3_BUCKETS", InternalError("S3_BUCKETS not configured")
)
bucket = s3_buckets.get(bucket_name)

if bucket.get("endpoint_url"):
url = bucket["endpoint_url"].strip("/") + "/{}/{}".format(
bucket_name, key.strip("/")
try:
s3client = boto3.client(
"s3",
aws_access_key_id=credentials["aws_access_key_id"],
aws_secret_access_key=credentials["aws_secret_access_key"],
aws_session_token=credentials.get("aws_session_token", None),
region_name=region,
config=Config(s3={"addressing_style": "path"}, signature_version="s3v4"),
)
else:
url = "https://{}.s3.amazonaws.com/{}".format(bucket_name, key)
additional_signed_qs = {"partNumber": str(partNumber), "uploadId": uploadId}
cirrus_aws = AwsService(s3client)

try:
presigned_url = generate_aws_presigned_url(
url, "PUT", credentials, "s3", region, expires, additional_signed_qs
presigned_url = cirrus_aws.multipart_upload_presigned_url(
bucket_name, key, expires, uploadId, partNumber
)

return presigned_url
except Exception as e:
raise InternalError(
Expand Down
4 changes: 4 additions & 0 deletions fence/config-default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,10 @@ S3_BUCKETS: {}
# cred: 'CRED1'
# region: 'us-east-1'
# role-arn: 'arn:aws:iam::role1'
# bucket5:
# cred: 'CRED3'
# region: 'us-east-1'
# requester_pays: true # to indicate this is a requester pay enabled S3 bucket
GS_BUCKETS: {}
# NOTE: Remove the {} and supply buckets if needed. Example in comments below
# bucket1:
Expand Down
Loading

0 comments on commit cd66a72

Please sign in to comment.