Skip to content

Commit

Permalink
feat: verbose logging during scans
Browse files Browse the repository at this point in the history
  • Loading branch information
Arun Donti committed Jul 19, 2024
1 parent 697fec5 commit 77121ea
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 13 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
repos:
- repo: https://github.com/ambv/black
rev: 21.4b2
rev: 24.4.2
hooks:
- id: black
- repo: https://gitlab.com/pycqa/flake8
rev: 3.9.1
- repo: https://github.com/PyCQA/flake8
rev: 7.1.0
hooks:
- id: flake8
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down
45 changes: 35 additions & 10 deletions assets/lambda/code/scan/lambda.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

from posixpath import join
import time
import boto3
import botocore
import glob
import json
import logging
import os
import pwd
import subprocess
import shutil
import subprocess
import time
from posixpath import join
from urllib.parse import unquote_plus

import boto3
import botocore
from aws_lambda_powertools import Logger, Metrics

logger = Logger()
Expand Down Expand Up @@ -106,6 +107,7 @@ def lambda_handler(event, context):

def set_status(bucket, key, status):
"""Set the scan-status tag of the S3 Object"""
logger.info(f"Attempting to set scan-status of {key} to {status}")
old_tags = {}
try:
response = s3_client.get_object_tagging(Bucket=bucket, Key=key)
Expand Down Expand Up @@ -133,6 +135,9 @@ def create_dir(input_bucket, input_key, download_path):
full_path = download_path
if len(sub_dir) > 0:
full_path = os.path.join(full_path, sub_dir)
logger.info(
f"Attempting to create the EFS filepath {full_path} if it does not already exist"
)
if not os.path.exists(full_path):
try:
os.makedirs(full_path, exist_ok=True)
Expand All @@ -142,6 +147,9 @@ def create_dir(input_bucket, input_key, download_path):

def download_object(input_bucket, input_key, download_path):
"""Downloads the specified file from S3 to EFS"""
logger.info(
f"Attempting to download of {input_key} from the {input_bucket} S3 Bucket to the EFS filepath {download_path}"
)
try:
s3_resource.Bucket(input_bucket).download_file(
input_key, f"{download_path}/{input_key}"
Expand All @@ -158,6 +166,9 @@ def download_object(input_bucket, input_key, download_path):
def expand_if_large_archive(input_bucket, input_key, download_path, byte_size):
"""Expand the file if it is an archival type and larger than ClamAV Max Size"""
if byte_size > MAX_BYTES:
logger.info(
f"Attempting to expand {input_key} since the filesize is {byte_size} bytes, which is greater than the ClamAV per file max of {MAX_BYTES}"
)
file_name = f"{download_path}/{input_key}"
try:
command = ["7za", "x", "-y", f"{file_name}", f"-o{download_path}"]
Expand Down Expand Up @@ -193,15 +204,22 @@ def expand_if_large_archive(input_bucket, input_key, download_path, byte_size):
else:
return


last_update_time = 0


def freshclam_update(input_bucket, input_key, download_path, definitions_path):
"""Points freshclam to the local database files and the S3 Definitions bucket.
Creates the database path on EFS if it does not already exist"""
global last_update_time
current_time = time.time()
elapsed_time = current_time - last_update_time
if elapsed_time >= 3600: # Update definitions only once per hour to improve performance
"""Points freshclam to the local database files and the S3 Definitions bucket.
Creates the database path on EFS if it does not already exist"""
if (
elapsed_time >= 3600
): # Update definitions only once per hour to improve performance
logger.info(
f"Attempting to update the virus database. Last update was at {last_update_time} epoch time. Current time is {current_time}"
)
conf = "/tmp/freshclam.conf"
# will already exist when Lambdas are running in same execution context
if not os.path.exists(conf):
Expand All @@ -228,15 +246,19 @@ def freshclam_update(input_bucket, input_key, download_path, definitions_path):
)
last_update_time = current_time
except subprocess.CalledProcessError as e:
report_failure(input_bucket, input_key, download_path, str(e.stderr))
report_failure(
input_bucket, input_key, download_path, str(e.stderr)
)
except ClamAVException as e:
report_failure(input_bucket, input_key, download_path, e.message)
return


def scan(input_bucket, input_key, download_path, definitions_path, tmp_path):
"""Scans the object from S3"""
# Max file size support by ClamAV
logger.info(
f"Attempting to scan the file {input_key} from the {input_bucket} S3 Bucket located at the EFS path {download_path}"
)
try:
command = [
"clamscan",
Expand Down Expand Up @@ -280,6 +302,9 @@ def scan(input_bucket, input_key, download_path, definitions_path, tmp_path):

def delete(download_path, input_key=None):
"""Deletes the file/folder from the EFS File System"""
logger.info(
f"Attempting to delete the file located at the EFS path {download_path}"
)
if input_key:
file = f"{download_path}/{input_key}"
if os.path.exists(file):
Expand Down

0 comments on commit 77121ea

Please sign in to comment.