Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CASMCMS-9201 - Fix orphaned artifacts in S3. #155

Merged
merged 2 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Fixed
- CASMCMS-9201 - large artifacts orphaned by delete operation

## [3.21.0] - 2024-12-10
### Fixed
Expand Down
6 changes: 3 additions & 3 deletions constraints.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Constraints updated 8/27/2024
aniso8601==9.0.1
boto3==1.34.114
botocore==1.34.114
boto3==1.36.2
botocore==1.36.2
cachetools==5.3.3
certifi==2024.2.2
chardet==5.2.0
Expand Down Expand Up @@ -32,7 +32,7 @@ PyYAML==6.0.1
requests==2.31.0
requests-oauthlib==1.3.1
rsa==4.9
s3transfer==0.10.1
s3transfer==0.11.1
setuptools >= 70.0
urllib3==1.26.18 # most recent 2.2.1, botocore 1.34.114 requires <1.27.0
websocket-client==1.8.0
Expand Down
22 changes: 21 additions & 1 deletion kubernetes/cray-ims/values.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# MIT License
#
# (C) Copyright 2021-2024 Hewlett Packard Enterprise Development LP
# (C) Copyright 2021-2025 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand Down Expand Up @@ -129,6 +129,26 @@ cray-service:
secretKeyRef:
name: ims-s3-credentials
key: ssl_validate
- name: S3_STS_ACCESS_KEY
valueFrom:
secretKeyRef:
name: sts-s3-credentials
key: access_key
- name: S3_STS_SECRET_KEY
valueFrom:
secretKeyRef:
name: sts-s3-credentials
key: secret_key
- name: S3_STS_ENDPOINT
valueFrom:
secretKeyRef:
name: sts-s3-credentials
key: s3_endpoint
- name: S3_STS_SSL_VALIDATE
valueFrom:
secretKeyRef:
name: sts-s3-credentials
key: ssl_validate
volumeMounts:
- mountPath: /var/ims/data
name: cray-ims-data
Expand Down
51 changes: 40 additions & 11 deletions src/server/app.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# MIT License
#
# (C) Copyright 2018-2024 Hewlett Packard Enterprise Development LP
# (C) Copyright 2018-2025 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand All @@ -27,8 +27,9 @@
"""

import os

import http.client
import logging

from flask import Flask
from flask_restful import Api

Expand Down Expand Up @@ -109,29 +110,57 @@ def load_boto3(_app):
""" Utility function to initialize S3 client objects. """
boto3.set_stream_logger('boto3.resources', _app.config['LOG_LEVEL'])
boto3.set_stream_logger("botocore", _app.config['LOG_LEVEL'])
s3_config = BotoConfig(
connect_timeout=int(_app.config['S3_CONNECT_TIMEOUT']),
read_timeout=int(_app.config['S3_READ_TIMEOUT'])
)
_app.s3 = boto3.client(
's3',
endpoint_url=_app.config['S3_ENDPOINT'],
aws_access_key_id=_app.config['S3_ACCESS_KEY'],
aws_secret_access_key=_app.config['S3_SECRET_KEY'],
verify=_app.config['S3_SSL_VALIDATE'],
config=BotoConfig(
connect_timeout=int(_app.config['S3_CONNECT_TIMEOUT']),
read_timeout=int(_app.config['S3_READ_TIMEOUT']),
),
config=s3_config
)
_app.s3resource = boto3.resource(
service_name='s3',
verify=_app.config['S3_SSL_VALIDATE'],
endpoint_url=_app.config['S3_ENDPOINT'],
aws_access_key_id=_app.config['S3_ACCESS_KEY'],
aws_secret_access_key=_app.config['S3_SECRET_KEY'],
config=BotoConfig(
connect_timeout=int(_app.config['S3_CONNECT_TIMEOUT']),
read_timeout=int(_app.config['S3_READ_TIMEOUT']),
)
config=s3_config
)
# NOTE: Only present for multi-part file copy of artifacts that are uploaded
# through 'cray artifacts create boot-images...' and end up with STS as the
# artifact owner.
_app.s3_sts_resource = boto3.resource(
service_name='s3',
verify=_app.config['S3_STS_SSL_VALIDATE'],
endpoint_url=_app.config['S3_ENDPOINT'],
aws_access_key_id=_app.config['S3_STS_ACCESS_KEY'],
aws_secret_access_key=_app.config['S3_STS_SECRET_KEY'],
config=s3_config
)

def str_to_log_level(level:str) -> int:
# NOTE: we only have to do this until we upgrade to Flask:3.2 or later, then the
# _app.logger.setLevel will take the string version of the logging level
name_to_level = {
'CRITICAL': logging.CRITICAL,
'FATAL': logging.FATAL,
'ERROR': logging.ERROR,
'WARN': logging.WARNING,
'WARNING': logging.WARNING,
'INFO': logging.INFO,
'DEBUG': logging.DEBUG,
'NOTSET': logging.NOTSET,
}

# default to INFO if something unexpected is here
ret_val = name_to_level.get(level)
if ret_val is None:
ret_val = logging.INFO
return ret_val

def create_app():
"""
Expand All @@ -150,7 +179,7 @@ def create_app():
_app.config.from_object(APP_SETTINGS[os.getenv('FLASK_ENV', 'production')])

# pylint: disable=E1101
_app.logger.setLevel(_app.config['LOG_LEVEL'])
_app.logger.setLevel(str_to_log_level(_app.config['LOG_LEVEL']))
_app.logger.info('Image management service configured in {} mode'.format(os.getenv('FLASK_ENV', 'production')))

# dictionary to all the data store objects
Expand Down
24 changes: 19 additions & 5 deletions src/server/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# MIT License
#
# (C) Copyright 2018-2022 Hewlett Packard Enterprise Development LP
# (C) Copyright 2018-2022, 2025 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand Down Expand Up @@ -51,14 +51,28 @@ class Config:
DEBUG = False
TESTING = False
MAX_CONTENT_LENGTH = None # Unlimited
LOG_LEVEL = logging.INFO
LOG_LEVEL = os.getenv('LOG_LEVEL','INFO')

# S3 creds for 'IMS' user
S3_ENDPOINT = os.getenv('S3_ENDPOINT')
S3_ACCESS_KEY = os.getenv('S3_ACCESS_KEY')
S3_SECRET_KEY = os.getenv('S3_SECRET_KEY')
S3_SSL_VALIDATE = \
False if os.getenv('S3_SSL_VALIDATE', 'False').lower() in ('false', 'off', 'no', 'f', '0') \
else os.getenv('S3_SSL_VALIDATE')

# S3 creds for 'STS' user
# NOTE: artifacts in boot-images that are uploaded via 'cray artifacts create...' will be
# assigned to the STS owner. A multi-part copy must be done as the STS user for this to
# succeed. This should not be required and may be a bug in boto3 so this may be able to
# be removed at some time in the future.
S3_STS_ENDPOINT = os.getenv('S3_STS_ENDPOINT')
S3_STS_ACCESS_KEY = os.getenv('S3_STS_ACCESS_KEY')
S3_STS_SECRET_KEY = os.getenv('S3_STS_SECRET_KEY')
S3_STS_SSL_VALIDATE = \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're hashing all of these potential false values to False, but you're not doing the same for, presumably, the true values. Why not? You're just taking whatever is in that environment variable.

False if os.getenv('S3_STS_SSL_VALIDATE', 'False').lower() in ('false', 'off', 'no', 'f', '0') \
else os.getenv('S3_STS_SSL_VALIDATE')

S3_IMS_BUCKET = os.getenv('S3_IMS_BUCKET', 'ims')
S3_BOOT_IMAGES_BUCKET = os.getenv('S3_BOOT_IMAGES_BUCKET', 'boot-images')

Expand All @@ -83,7 +97,7 @@ class DevelopmentConfig(Config):
"""
DEBUG = True
ENV = 'development'
LOG_LEVEL = logging.DEBUG
LOG_LEVEL = 'DEBUG'
HACK_DATA_STORE = os.path.join(os.path.expanduser("~"), 'ims', 'data')


Expand All @@ -92,13 +106,13 @@ class TestingConfig(Config):
TESTING = True
DEBUG = True
ENV = 'development'
LOG_LEVEL = logging.DEBUG
LOG_LEVEL = 'DEBUG'


class StagingConfig(Config):
"""Configuration for Staging."""
DEBUG = True
LOG_LEVEL = logging.DEBUG
LOG_LEVEL = 'DEBUG'


class ProductionConfig(Config):
Expand Down
34 changes: 31 additions & 3 deletions src/server/helper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# MIT License
#
# (C) Copyright 2018-2023 Hewlett Packard Enterprise Development LP
# (C) Copyright 2018-2023, 2025 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand Down Expand Up @@ -426,9 +426,37 @@ def _delete_s3_artifact():

def s3_move_artifact(origin_url, destination_path):
""" Utility function to orchestrate moving/renaming a S3 artifact to a new key value. """
new_object = app.s3resource.Object(origin_url.bucket, destination_path)
new_object.copy_from(CopySource='/'.join([origin_url.bucket, origin_url.key]))

# NOTE: there is 'move' or 'rename' function with S3 objects, so we have to copy then
# delete the old object. For artifacts smaller than 5Gb, the Object.copy_from function
# works and does this in a single transfer operation. For larger than 5Gb, it has to
# use a multi-part copy operation. This is automatically handled by the Object.copy
# function. The multi-part copy requires greater levels of permissions than the single
# transfer copy. It will currenly not work if the owner of the object is something
# other than IMS. When an object is copied into S3 via 'cray artifacts create ...' it
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# other than IMS. When an object is copied into S3 via 'cray artifacts create ...' it
# other than IMS. When an object is copied into S3 via 'cray artifacts create ...', it

# has an owner of 'STS' and can't be copied with the multi-part transfer.

# Find the owner of the object
sts_owned = False
orig_object_owner = app.s3resource.ObjectAcl(origin_url.bucket, origin_url.key)
if orig_object_owner.owner != None and 'ID' in orig_object_owner.owner and orig_object_owner.owner['ID']=='STS':
app.logger.info(f"Source object owner: {orig_object_owner.owner['ID']}")
sts_owned = True

# if the object is owned by 'STS', then a multi-part copy needs to be done with 'STS' creds
new_object = None
if not sts_owned:
new_object = app.s3resource.Object(origin_url.bucket, destination_path)
else:
new_object = app.s3_sts_resource.Object(origin_url.bucket, destination_path)

# Copy - should do multi-part copy if needed
copy_source = {'Bucket':origin_url.bucket, 'Key':origin_url.key}
new_object.copy(copy_source)

# delete the original object
app.s3resource.Object(origin_url.bucket, origin_url.key).delete()

return new_object


Expand Down
28 changes: 15 additions & 13 deletions tests/v3/test_v3_deleted_images.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# MIT License
#
# (C) Copyright 2020-2024 Hewlett Packard Enterprise Development LP
# (C) Copyright 2020-2025 Hewlett Packard Enterprise Development LP
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
Expand All @@ -23,6 +23,7 @@
#
import io
import json
import pytest
from datetime import datetime, timedelta
from uuid import uuid4

Expand Down Expand Up @@ -120,20 +121,19 @@ def stub_soft_undelete(self, bucket, key, etag):
{"ETag": etag},
{'Bucket': bucket, 'Key': key})

self.s3resource_stub.add_response(method='copy_object',
# NOTE: this isn't correct. The 'copy' method looks at the size of the artifact
# being copied and either completes it as a single transaction, or breaks it
# into multiple transactions. That type of interaction with boto3 does not
# stub out correctly and at this time there is no good solution.
self.s3resource_stub.add_response(method='copy',
service_response={
'CopyObjectResult': {
'ETag': f"\"{etag}\"",
},
'ResponseMetadata': {
'HTTPStatusCode': 200,
}
},
'ResponseMetadata': {
'HTTPStatusCode': 200,
}
},
expected_params={
'Bucket': bucket,
'CopySource': '/'.join([bucket, key]),
'Key': key.replace('deleted/', '')
})
'CopySource': {'Bucket':bucket, 'Key':key},
})

self.s3resource_stub.add_response(method='delete_object',
service_response={},
Expand Down Expand Up @@ -184,6 +184,7 @@ def test_get_404_bad_id(self):
response = self.app.get('/v3/deleted/images/{}'.format(str(uuid4())))
check_error_responses(self, response, 404, ['status', 'title', 'detail'])

@pytest.mark.skip(reason="Boto3 Stubber can't handle multi-part copy command")
def test_soft_undelete(self):
""" PATCH /v3/deleted/images/{image_id} """

Expand Down Expand Up @@ -311,6 +312,7 @@ def test_get_all(self):
'resource field "{}" returned was not equal'.format(key))
assert match_found

@pytest.mark.skip(reason="Boto3 Stubber can't handle multi-part copy command")
def test_soft_undelete_all(self):
""" PATCH /v3/deleted/images """

Expand Down
Loading