awslabs · dontirun · Jul 19, 2024 · Jul 19, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,10 +1,10 @@
 repos:
   - repo: https://github.com/ambv/black
-    rev: 21.4b2
+    rev: 24.4.2
     hooks:
       - id: black
-  - repo: https://gitlab.com/pycqa/flake8
-    rev: 3.9.1
+  - repo: https://github.com/PyCQA/flake8
+    rev: 7.1.0
     hooks:
       - id: flake8
   - repo: https://github.com/pre-commit/pre-commit-hooks

diff --git a/assets/lambda/code/scan/lambda.py b/assets/lambda/code/scan/lambda.py
@@ -1,18 +1,19 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-from posixpath import join
-import time
-import boto3
-import botocore
 import glob
 import json
 import logging
 import os
 import pwd
-import subprocess
 import shutil
+import subprocess
+import time
+from posixpath import join
 from urllib.parse import unquote_plus
+
+import boto3
+import botocore
 from aws_lambda_powertools import Logger, Metrics
 
 logger = Logger()
@@ -106,6 +107,7 @@ def lambda_handler(event, context):
 
 def set_status(bucket, key, status):
     """Set the scan-status tag of the S3 Object"""
+    logger.info(f"Attempting to set scan-status of {key} to {status}")
     old_tags = {}
     try:
         response = s3_client.get_object_tagging(Bucket=bucket, Key=key)
@@ -133,6 +135,9 @@ def create_dir(input_bucket, input_key, download_path):
     full_path = download_path
     if len(sub_dir) > 0:
         full_path = os.path.join(full_path, sub_dir)
+    logger.info(
+        f"Attempting to create the EFS filepath {full_path} if it does not already exist"
+    )
     if not os.path.exists(full_path):
         try:
             os.makedirs(full_path, exist_ok=True)
@@ -142,6 +147,9 @@ def create_dir(input_bucket, input_key, download_path):
 
 def download_object(input_bucket, input_key, download_path):
     """Downloads the specified file from S3 to EFS"""
+    logger.info(
+        f"Attempting to download of {input_key} from the {input_bucket} S3 Bucket to the EFS filepath {download_path}"
+    )
     try:
         s3_resource.Bucket(input_bucket).download_file(
             input_key, f"{download_path}/{input_key}"
@@ -158,6 +166,9 @@ def download_object(input_bucket, input_key, download_path):
 def expand_if_large_archive(input_bucket, input_key, download_path, byte_size):
     """Expand the file if it is an archival type and larger than ClamAV Max Size"""
     if byte_size > MAX_BYTES:
+        logger.info(
+            f"Attempting to expand {input_key} since the filesize is {byte_size} bytes, which is greater than the ClamAV per file max of {MAX_BYTES}"
+        )
         file_name = f"{download_path}/{input_key}"
         try:
             command = ["7za", "x", "-y", f"{file_name}", f"-o{download_path}"]
@@ -193,15 +204,22 @@ def expand_if_large_archive(input_bucket, input_key, download_path, byte_size):
     else:
         return
 
+
 last_update_time = 0
 
+
 def freshclam_update(input_bucket, input_key, download_path, definitions_path):
+    """Points freshclam to the local database files and the S3 Definitions bucket.
+    Creates the database path on EFS if it does not already exist"""
     global last_update_time
     current_time = time.time()
     elapsed_time = current_time - last_update_time
-    if elapsed_time >= 3600: # Update definitions only once per hour to improve performance
-        """Points freshclam to the local database files and the S3 Definitions bucket.
-        Creates the database path on EFS if it does not already exist"""
+    if (
+        elapsed_time >= 3600
+    ):  # Update definitions only once per hour to improve performance
+        logger.info(
+            f"Attempting to update the virus database. Last update was at {last_update_time} epoch time. Current time is {current_time}"
+        )
         conf = "/tmp/freshclam.conf"
         # will already exist when Lambdas are running in same execution context
         if not os.path.exists(conf):
@@ -228,15 +246,19 @@ def freshclam_update(input_bucket, input_key, download_path, definitions_path):
                 )
             last_update_time = current_time
         except subprocess.CalledProcessError as e:
-            report_failure(input_bucket, input_key, download_path, str(e.stderr))
+            report_failure(
+                input_bucket, input_key, download_path, str(e.stderr)
+            )
         except ClamAVException as e:
             report_failure(input_bucket, input_key, download_path, e.message)
     return
 
 
 def scan(input_bucket, input_key, download_path, definitions_path, tmp_path):
     """Scans the object from S3"""
-    # Max file size support by ClamAV
+    logger.info(
+        f"Attempting to scan the file {input_key} from the {input_bucket} S3 Bucket located at the EFS path {download_path}"
+    )
     try:
         command = [
             "clamscan",
@@ -280,6 +302,9 @@ def scan(input_bucket, input_key, download_path, definitions_path, tmp_path):
 
 def delete(download_path, input_key=None):
     """Deletes the file/folder from the EFS File System"""
+    logger.info(
+        f"Attempting to delete the file located at the EFS path {download_path}"
+    )
     if input_key:
         file = f"{download_path}/{input_key}"
         if os.path.exists(file):