From 30657c56088e177a3b590b4f40334a81a70de48b Mon Sep 17 00:00:00 2001 From: "Colin (Wilkie) McLellan" Date: Thu, 25 Jan 2024 15:42:32 +0000 Subject: [PATCH 1/4] Change list of match ids to set for json_handler.py Increase the speed of the json_handler by migrating from a list to a set. Move from O(n) to O(1) --- backend/ecs_tasks/delete_files/json_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/ecs_tasks/delete_files/json_handler.py b/backend/ecs_tasks/delete_files/json_handler.py index 03f4236d..f84ebe32 100644 --- a/backend/ecs_tasks/delete_files/json_handler.py +++ b/backend/ecs_tasks/delete_files/json_handler.py @@ -52,7 +52,7 @@ def delete_matches_from_json_file(input_file, to_delete, compressed=False): for column in to_delete: if column["Type"] == "Simple": record = get_value(column["Column"], parsed) - if record and record in column["MatchIds"]: + if record and record in set(column["MatchIds"]): should_delete = True break else: From ca23bac7ebebc904b21ebdde3b09f58f9ae79875 Mon Sep 17 00:00:00 2001 From: "Colin (Wilkie) McLellan" Date: Fri, 26 Jan 2024 11:31:18 +0000 Subject: [PATCH 2/4] Update CHANGELOG.md --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6104c99a..6fc2378a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Change Log +## v0.66 + +- [#395](https://github.com/awslabs/amazon-s3-find-and-forget/issues/395): + Increase the speed of the json_handler by migrating from a list to a set. + Move from O(n) to O(1) + ## v0.65 - [#393](https://github.com/awslabs/amazon-s3-find-and-forget/issues/393): Fix From 05be12930bbde60b9a366227e60240acb16eb612 Mon Sep 17 00:00:00 2001 From: "Colin (Wilkie) McLellan" Date: Fri, 26 Jan 2024 12:39:53 +0000 Subject: [PATCH 3/4] Bump Version 0.65 -> 0.66 --- templates/template.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/templates/template.yaml b/templates/template.yaml index 94e717b3..5e3d3f7b 100644 --- a/templates/template.yaml +++ b/templates/template.yaml @@ -1,6 +1,6 @@ AWSTemplateFormatVersion: "2010-09-09" Transform: AWS::Serverless-2016-10-31 -Description: Amazon S3 Find and Forget (uksb-1q2j8beb0) (version:v0.65) (tag:main) +Description: Amazon S3 Find and Forget (uksb-1q2j8beb0) (version:v0.66) (tag:main) Parameters: AccessControlAllowOriginOverride: @@ -206,7 +206,7 @@ Conditions: Mappings: Solution: Constants: - Version: 'v0.65' + Version: 'v0.66' Resources: TempBucket: From c497147dfc63733c312d59934846d52597f0009d Mon Sep 17 00:00:00 2001 From: Matteo Figus Date: Fri, 26 Jan 2024 14:26:30 +0000 Subject: [PATCH 4/4] Include optimisation for composite json matches --- backend/ecs_tasks/delete_files/json_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/ecs_tasks/delete_files/json_handler.py b/backend/ecs_tasks/delete_files/json_handler.py index f84ebe32..188d8483 100644 --- a/backend/ecs_tasks/delete_files/json_handler.py +++ b/backend/ecs_tasks/delete_files/json_handler.py @@ -61,7 +61,7 @@ def delete_matches_from_json_file(input_file, to_delete, compressed=False): record = get_value(col, parsed) if record: matched.append(record) - if matched in column["MatchIds"]: + if tuple(matched) in set(map(tuple, column["MatchIds"])): should_delete = True break if should_delete: