Skip to content

Commit

Permalink
tests: add check for keeping latest objects in S3
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Mar 5, 2024
1 parent 254b9b9 commit d5a5e48
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 8 deletions.
25 changes: 17 additions & 8 deletions dcor_control/inspect/data_ckan_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,15 @@ def check_orphaned_s3_artifacts(assume_yes=False, older_than_days=7,
click.secho(f"Found S3 bucket for non-existent circle {cs3}")
request_bucket_removal(
bucket_name=bucket_name,
older_than_days=older_than_days,
autocorrect=assume_yes)
continue
# Iterate through the resources of that circle
circle_resources = list_group_resources_ckan(cs3)

invalid_artifacts = []
for object_name in iter_bucket_objects_s3(bucket_name):
for object_name in iter_bucket_objects_s3(
bucket_name, older_than_days=older_than_days):
artifact = object_name.split("/")[0]
if artifact in ARTIFACT_NAMES:
rid = "".join(object_name.split("/")[1:])
Expand All @@ -82,7 +84,6 @@ def get_circles_ckan():
f"ckan -c {ckan_ini} list-circles",
shell=True).decode().split("\n")
circle_list = [f.split()[0] for f in data if f.strip()]
print("CKAN circles", circle_list)
return circle_list


Expand All @@ -103,18 +104,17 @@ def get_circles_s3(older_than_days=0):
tz = creation_date.tzinfo
if creation_date > (datetime.now(tz=tz)
- timedelta(days=older_than_days)):
# Ignore circles that are younger than a week
# Ignore circles that are younger `older_than_days`
continue
# Find circles that match our regular expression scheme
r_match = bucket_regexp.match(bdict["Name"])
if r_match is not None:
circle_id = r_match.group(1)
circle_list.append(circle_id)
print("S3 circles", circle_list)
return circle_list


def iter_bucket_objects_s3(bucket_name):
def iter_bucket_objects_s3(bucket_name, older_than_days=7):
"""Return iterator over all objects in a Bucket"""
s3_client, _, s3_resource = s3.get_s3()
kwargs = {"Bucket": bucket_name,
Expand All @@ -129,6 +129,12 @@ def iter_bucket_objects_s3(bucket_name):

for obj in resp.get("Contents", []):
object_name = obj["Key"]
creation_date = obj.get("LastModified", obj.get("CreationDate"))
tz = creation_date.tzinfo
if creation_date > (datetime.now(tz=tz)
- timedelta(days=older_than_days)):
# Ignore objects that are younger than `older_than_days`
continue
yield object_name

if not resp.get("IsTruncated"):
Expand All @@ -151,7 +157,7 @@ def list_group_resources_ckan(group_name_or_id):
return resources


def request_bucket_removal(bucket_name, autocorrect=False):
def request_bucket_removal(bucket_name, older_than_days=7, autocorrect=False):
"""Request (user interaction) the removal of an entire bucket"""
if autocorrect:
print(f"Deleting bucket {bucket_name}")
Expand All @@ -164,11 +170,14 @@ def request_bucket_removal(bucket_name, autocorrect=False):
# Delete the objects
request_removal_from_bucket(
bucket_name=bucket_name,
objects=iter_bucket_objects_s3(bucket_name),
objects=iter_bucket_objects_s3(bucket_name,
older_than_days=older_than_days),
autocorrect=True
)
# Delete the bucket if it is not empty
if len(list(iter_bucket_objects_s3(bucket_name))) == 0:
if len(list(
iter_bucket_objects_s3(bucket_name,
older_than_days=older_than_days))) == 0:
try:
s3_client.delete_bucket(Bucket=bucket_name)
except s3_client.exceptions.NoSuchBucket:
Expand Down
7 changes: 7 additions & 0 deletions tests/test_inspect_data_ckan.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,13 @@ def test_check_orphaned_s3_artifacts(enqueue_job_mock, create_with_upload,
# Make sure that the S3 object is still there
assert s3.object_exists(bucket_name, object_name)

# Perform a cleanup that does not take into account the new data
inspect.check_orphaned_s3_artifacts(assume_yes=True,
older_than_days=1) # [sic]

# Make sure that the S3 object is still there
assert s3.object_exists(bucket_name, object_name)

# Perform the actual cleanup
inspect.check_orphaned_s3_artifacts(assume_yes=True,
older_than_days=0)
Expand Down

0 comments on commit d5a5e48

Please sign in to comment.