From b71ece9962411ea0573c57e0892551c1f850276f Mon Sep 17 00:00:00 2001 From: "Nicholas H.Tollervey" Date: Tue, 26 May 2020 15:54:38 +0100 Subject: [PATCH 1/3] Add versioning and simple lifecycle to S3 buckets. Also include a script to update existing buckets to this state. --- controlpanel/api/aws.py | 28 +++++++++++++++++++++++++- scripts/README.md | 4 ++++ scripts/version_buckets.py | 41 ++++++++++++++++++++++++++++++++++++++ tests/api/test_aws.py | 11 ++++++++++ 4 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 scripts/README.md create mode 100644 scripts/version_buckets.py diff --git a/controlpanel/api/aws.py b/controlpanel/api/aws.py index 4a8e6f284..34e57e981 100644 --- a/controlpanel/api/aws.py +++ b/controlpanel/api/aws.py @@ -199,13 +199,39 @@ def delete_role(name): def create_bucket(bucket_name, is_data_warehouse=False): try: - bucket = boto3.resource('s3').create_bucket( + bucket = boto3.resource("s3").create_bucket( Bucket=bucket_name, ACL='private', CreateBucketConfiguration={ 'LocationConstraint': settings.BUCKET_REGION, }, ) + # Enable versioning by default. + # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html?highlight=s3#S3.BucketVersioning + versioning = bucket.Versioning() + versioning.enable() + # Set bucket lifecycle. Send non-current versions of files to glacier + # storage after 30 days. + # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.put_bucket_lifecycle_configuration + lifecycle_id = f"{bucket_name}_lifecycle_configuration" + lifecycle_conf = boto3.client("s3").put_bucket_lifecycle_configuration( + Bucket=bucket_name, + LifecycleConfiguration={ + "Rules": [ + { + "ID": lifecycle_id, + "Status": "Enabled", + "Prefix": "", + "NoncurrentVersionTransitions": [ + { + 'NoncurrentDays': 30, + 'StorageClass': 'GLACIER', + }, + ] + }, + ] + } + ) if is_data_warehouse: _tag_bucket(bucket, {"buckettype": "datawarehouse"}) diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 000000000..83f7e80d4 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,4 @@ +# Scripts + +A directory containing Python scripts used to automate various administrative +process needed during the life of the analytic platform. diff --git a/scripts/version_buckets.py b/scripts/version_buckets.py new file mode 100644 index 000000000..d5e7f92a8 --- /dev/null +++ b/scripts/version_buckets.py @@ -0,0 +1,41 @@ +""" +Reads all the current buckets, ensures that versioning is switched on for those +that don't yet have it and sets the life cycle configuration to send +non-current versions of files to glacier storage after 30 days. +""" +import boto3 + + +s3 = boto3.client('s3') +buckets = s3.list_buckets() +for bucket_dict in buckets["Buckets"]: + bucket_name = bucket_dict["Name"] + print("Working on {bucket_name}.") + bucket = boto3.resource("s3").Bucket(bucket_name) + # Add versioning if not already set. + versioning = bucket.Versioning() + if not versioning.status == "Enabled": + print("Enabling versioning for {bucket_name}.") + versioning.enable() + # Set life cycle rule to send non-current versions of files to glacier + # storage after 30 days. + lifecycle_id = f"{bucket_name}_lifecycle_configuration" + print("Setting lifecycle {lifecycle_id} for bucket {bucket_name}.") + lifecycle_conf = boto3.client("s3").put_bucket_lifecycle_configuration( + Bucket=bucket_name, + LifecycleConfiguration={ + "Rules": [ + { + "ID": lifecycle_id, + "Status": "Enabled", + "Prefix": "", + "NoncurrentVersionTransitions": [ + { + 'NoncurrentDays': 30, + 'StorageClass': 'GLACIER', + }, + ] + }, + ] + } + ) diff --git a/tests/api/test_aws.py b/tests/api/test_aws.py index 697bacfbf..c93b6259f 100644 --- a/tests/api/test_aws.py +++ b/tests/api/test_aws.py @@ -253,6 +253,17 @@ def test_create_bucket(logs_bucket, s3): aws.create_bucket(bucket_name, is_data_warehouse=True) + # Check versioning. + assert bucket.Versioning().status == "Enabled" + + # Check lifecycle. + versioning = bucket.LifecycleConfiguration() + rule = versioning.rules[0] + assert rule["ID"].endswith("_lifecycle_configuration") + assert rule["Status"] == "Enabled" + assert rule["NoncurrentVersionTransitions"][0]["NoncurrentDays"] == 30 + assert rule["NoncurrentVersionTransitions"][0]["StorageClass"] == "GLACIER" + # Check logging assert bucket.Logging().logging_enabled['TargetBucket'] == settings.LOGS_BUCKET_NAME # Check tagging From 81024802e86d943b8c5c9eb9249a273ed00876c2 Mon Sep 17 00:00:00 2001 From: "Nicholas H.Tollervey" Date: Wed, 27 May 2020 13:19:59 +0100 Subject: [PATCH 2/3] Remove single-shot script (moved to analytics-platform-ops - see PR therein). --- scripts/README.md | 4 ---- scripts/version_buckets.py | 41 -------------------------------------- 2 files changed, 45 deletions(-) delete mode 100644 scripts/README.md delete mode 100644 scripts/version_buckets.py diff --git a/scripts/README.md b/scripts/README.md deleted file mode 100644 index 83f7e80d4..000000000 --- a/scripts/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# Scripts - -A directory containing Python scripts used to automate various administrative -process needed during the life of the analytic platform. diff --git a/scripts/version_buckets.py b/scripts/version_buckets.py deleted file mode 100644 index d5e7f92a8..000000000 --- a/scripts/version_buckets.py +++ /dev/null @@ -1,41 +0,0 @@ -""" -Reads all the current buckets, ensures that versioning is switched on for those -that don't yet have it and sets the life cycle configuration to send -non-current versions of files to glacier storage after 30 days. -""" -import boto3 - - -s3 = boto3.client('s3') -buckets = s3.list_buckets() -for bucket_dict in buckets["Buckets"]: - bucket_name = bucket_dict["Name"] - print("Working on {bucket_name}.") - bucket = boto3.resource("s3").Bucket(bucket_name) - # Add versioning if not already set. - versioning = bucket.Versioning() - if not versioning.status == "Enabled": - print("Enabling versioning for {bucket_name}.") - versioning.enable() - # Set life cycle rule to send non-current versions of files to glacier - # storage after 30 days. - lifecycle_id = f"{bucket_name}_lifecycle_configuration" - print("Setting lifecycle {lifecycle_id} for bucket {bucket_name}.") - lifecycle_conf = boto3.client("s3").put_bucket_lifecycle_configuration( - Bucket=bucket_name, - LifecycleConfiguration={ - "Rules": [ - { - "ID": lifecycle_id, - "Status": "Enabled", - "Prefix": "", - "NoncurrentVersionTransitions": [ - { - 'NoncurrentDays': 30, - 'StorageClass': 'GLACIER', - }, - ] - }, - ] - } - ) From 42f236dd94648af7eaddb8b41dbb9c19e246571d Mon Sep 17 00:00:00 2001 From: "Nicholas H.Tollervey" Date: Mon, 22 Jun 2020 13:41:55 +0100 Subject: [PATCH 3/3] Fix error with exception namespacing. --- controlpanel/api/aws.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/controlpanel/api/aws.py b/controlpanel/api/aws.py index 34e57e981..367872e30 100644 --- a/controlpanel/api/aws.py +++ b/controlpanel/api/aws.py @@ -198,8 +198,10 @@ def delete_role(name): def create_bucket(bucket_name, is_data_warehouse=False): + s3_resource = boto3.resource("s3") + s3_client = boto3.client('s3') try: - bucket = boto3.resource("s3").create_bucket( + bucket = s3_resource.create_bucket( Bucket=bucket_name, ACL='private', CreateBucketConfiguration={ @@ -214,7 +216,7 @@ def create_bucket(bucket_name, is_data_warehouse=False): # storage after 30 days. # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.put_bucket_lifecycle_configuration lifecycle_id = f"{bucket_name}_lifecycle_configuration" - lifecycle_conf = boto3.client("s3").put_bucket_lifecycle_configuration( + lifecycle_conf = s3_client.put_bucket_lifecycle_configuration( Bucket=bucket_name, LifecycleConfiguration={ "Rules": [ @@ -235,7 +237,7 @@ def create_bucket(bucket_name, is_data_warehouse=False): if is_data_warehouse: _tag_bucket(bucket, {"buckettype": "datawarehouse"}) - except bucket.meta.client.exceptions.BucketAlreadyOwnedByYou: + except s3_resource.meta.client.exceptions.BucketAlreadyOwnedByYou: log.warning(f'Skipping creating Bucket {bucket_name}: Already exists') return