Skip to content

Commit

Permalink
Make CRC32 the default checksum and remove MD5
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan343 committed Oct 8, 2024
1 parent 2d66b37 commit d9031ea
Show file tree
Hide file tree
Showing 7 changed files with 486 additions and 349 deletions.
19 changes: 14 additions & 5 deletions botocore/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
ParamValidationError,
UnsupportedTLSVersionWarning,
)
from botocore.httpchecksum import DEFAULT_CHECKSUM_ALGORITHM
from botocore.regions import EndpointResolverBuiltins
from botocore.signers import (
add_generate_db_auth_token,
Expand All @@ -61,8 +62,6 @@
from botocore.utils import (
SAFE_CHARS,
ArnParser,
conditionally_calculate_checksum,
conditionally_calculate_md5,
percent_encode,
switch_host_with_param,
)
Expand Down Expand Up @@ -1239,6 +1238,15 @@ def document_expires_shape(section, event_name, **kwargs):
)


def set_default_multipart_checksum_algorithm(params, **kwargs):
"""Sets the ``ChecksumAlgorithm`` parameter to the SDKs default checksum.
The ``CreateMultipartUpload`` operation isn't modeled with the ``httpchecksum``
trait and requires us to set a default checksum algorithm when none is provided.
"""
params.setdefault('ChecksumAlgorithm', DEFAULT_CHECKSUM_ALGORITHM)


# This is a list of (event_name, handler).
# When a Session is created, everything in this list will be
# automatically registered with that Session.
Expand Down Expand Up @@ -1292,6 +1300,10 @@ def document_expires_shape(section, event_name, **kwargs):
'before-parameter-build.s3.CreateMultipartUpload',
validate_ascii_metadata,
),
(
'before-parameter-build.s3.CreateMultipartUpload',
set_default_multipart_checksum_algorithm,
),
('before-parameter-build.s3-control', remove_accid_host_prefix_from_model),
('docs.*.s3.CopyObject.complete-section', document_copy_source_form),
('docs.*.s3.UploadPartCopy.complete-section', document_copy_source_form),
Expand All @@ -1302,10 +1314,7 @@ def document_expires_shape(section, event_name, **kwargs):
('before-call.s3', add_expect_header),
('before-call.glacier', add_glacier_version),
('before-call.apigateway', add_accept_header),
('before-call.s3.PutObject', conditionally_calculate_checksum),
('before-call.s3.UploadPart', conditionally_calculate_md5),
('before-call.s3.DeleteObjects', escape_xml_payload),
('before-call.s3.DeleteObjects', conditionally_calculate_checksum),
('before-call.s3.PutBucketLifecycleConfiguration', escape_xml_payload),
('before-call.glacier.UploadArchive', add_glacier_checksums),
('before-call.glacier.UploadMultipartPart', add_glacier_checksums),
Expand Down
72 changes: 37 additions & 35 deletions botocore/httpchecksum.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,7 @@
MissingDependencyException,
)
from botocore.response import StreamingBody
from botocore.utils import (
conditionally_calculate_md5,
determine_content_length,
)
from botocore.utils import _has_checksum_header, determine_content_length

if HAS_CRT:
from awscrt import checksums as crt_checksums
Expand All @@ -44,6 +41,8 @@

logger = logging.getLogger(__name__)

DEFAULT_CHECKSUM_ALGORITHM = "CRC32"


class BaseChecksum:
_CHUNK_SIZE = 1024 * 1024
Expand Down Expand Up @@ -259,7 +258,18 @@ def resolve_request_checksum_algorithm(
params,
supported_algorithms=None,
):
# If the header is already set by the customer, skip calculation
if _has_checksum_header(request):
return

request_checksum_calculation = request["context"][
"client_config"
].request_checksum_calculation
http_checksum = operation_model.http_checksum
request_checksum_required = (
operation_model.http_checksum_required
or http_checksum.get("requestChecksumRequired")
)
algorithm_member = http_checksum.get("requestAlgorithmMember")
if algorithm_member and algorithm_member in params:
# If the client has opted into using flexible checksums and the
Expand All @@ -280,35 +290,30 @@ def resolve_request_checksum_algorithm(
raise FlexibleChecksumError(
error_msg=f"Unsupported checksum algorithm: {algorithm_name}"
)
elif request_checksum_required or (
algorithm_member and request_checksum_calculation == "when_supported"
):
algorithm_name = "crc32"
else:
return

location_type = "header"
if operation_model.has_streaming_input:
# Operations with streaming input must support trailers.
if request["url"].startswith("https:"):
# We only support unsigned trailer checksums currently. As this
# disables payload signing we'll only use trailers over TLS.
location_type = "trailer"

algorithm = {
"algorithm": algorithm_name,
"in": location_type,
"name": f"x-amz-checksum-{algorithm_name}",
}

if algorithm["name"] in request["headers"]:
# If the header is already set by the customer, skip calculation
return
location_type = "header"
if operation_model.has_streaming_input:
# Operations with streaming input must support trailers.
if request["url"].startswith("https:"):
# We only support unsigned trailer checksums currently. As this
# disables payload signing we'll only use trailers over TLS.
location_type = "trailer"

algorithm = {
"algorithm": algorithm_name,
"in": location_type,
"name": f"x-amz-checksum-{algorithm_name}",
}

checksum_context = request["context"].get("checksum", {})
checksum_context["request_algorithm"] = algorithm
request["context"]["checksum"] = checksum_context
elif operation_model.http_checksum_required or http_checksum.get(
"requestChecksumRequired"
):
# Otherwise apply the old http checksum behavior via Content-MD5
checksum_context = request["context"].get("checksum", {})
checksum_context["request_algorithm"] = "conditional-md5"
request["context"]["checksum"] = checksum_context
checksum_context = request["context"].get("checksum", {})
checksum_context["request_algorithm"] = algorithm
request["context"]["checksum"] = checksum_context


def apply_request_checksum(request):
Expand All @@ -318,10 +323,7 @@ def apply_request_checksum(request):
if not algorithm:
return

if algorithm == "conditional-md5":
# Special case to handle the http checksum required trait
conditionally_calculate_md5(request)
elif algorithm["in"] == "header":
if algorithm["in"] == "header":
_apply_request_header_checksum(request)
elif algorithm["in"] == "trailer":
_apply_request_trailer_checksum(request)
Expand Down
83 changes: 1 addition & 82 deletions botocore/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
import base64
import binascii
import datetime
import email.message
Expand Down Expand Up @@ -50,10 +49,8 @@
HAS_CRT,
IPV4_RE,
IPV6_ADDRZ_RE,
MD5_AVAILABLE,
UNSAFE_URL_CHARS,
OrderedDict,
get_md5,
get_tzinfo_options,
json,
quote,
Expand Down Expand Up @@ -3223,96 +3220,18 @@ def get_encoding_from_headers(headers, default='ISO-8859-1'):
return default


def calculate_md5(body, **kwargs):
if isinstance(body, (bytes, bytearray)):
binary_md5 = _calculate_md5_from_bytes(body)
else:
binary_md5 = _calculate_md5_from_file(body)
return base64.b64encode(binary_md5).decode('ascii')


def _calculate_md5_from_bytes(body_bytes):
md5 = get_md5(body_bytes)
return md5.digest()


def _calculate_md5_from_file(fileobj):
start_position = fileobj.tell()
md5 = get_md5()
for chunk in iter(lambda: fileobj.read(1024 * 1024), b''):
md5.update(chunk)
fileobj.seek(start_position)
return md5.digest()


def _is_s3express_request(params):
endpoint_properties = params.get('context', {}).get(
'endpoint_properties', {}
)
return endpoint_properties.get('backend') == 'S3Express'


def _has_checksum_header(params):
headers = params['headers']
# If a user provided Content-MD5 is present,
# don't try to compute a new one.
if 'Content-MD5' in headers:
return True

# If a header matching the x-amz-checksum-* pattern is present, we
# assume a checksum has already been provided and an md5 is not needed
# assume a checksum has already been provided by the user.
for header in headers:
if CHECKSUM_HEADER_PATTERN.match(header):
return True

return False


def conditionally_calculate_checksum(params, **kwargs):
if not _has_checksum_header(params):
conditionally_calculate_md5(params, **kwargs)
conditionally_enable_crc32(params, **kwargs)


def conditionally_enable_crc32(params, **kwargs):
checksum_context = params.get('context', {}).get('checksum', {})
checksum_algorithm = checksum_context.get('request_algorithm')
if (
_is_s3express_request(params)
and params['body'] is not None
and checksum_algorithm in (None, "conditional-md5")
):
params['context']['checksum'] = {
'request_algorithm': {
'algorithm': 'crc32',
'in': 'header',
'name': 'x-amz-checksum-crc32',
}
}


def conditionally_calculate_md5(params, **kwargs):
"""Only add a Content-MD5 if the system supports it."""
body = params['body']
checksum_context = params.get('context', {}).get('checksum', {})
checksum_algorithm = checksum_context.get('request_algorithm')
if checksum_algorithm and checksum_algorithm != 'conditional-md5':
# Skip for requests that will have a flexible checksum applied
return

if _has_checksum_header(params):
# Don't add a new header if one is already available.
return

if _is_s3express_request(params):
# S3Express doesn't support MD5
return

if MD5_AVAILABLE and body is not None:
md5_digest = calculate_md5(body, **kwargs)
params['headers']['Content-MD5'] = md5_digest


class FileWebIdentityTokenLoader:
def __init__(self, web_identity_token_path, _open=open):
self._web_identity_token_path = web_identity_token_path
Expand Down
Loading

0 comments on commit d9031ea

Please sign in to comment.