Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: verbose logging during scans #1262

Merged
merged 1 commit into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
repos:
- repo: https://github.com/ambv/black
rev: 21.4b2
rev: 24.4.2
hooks:
- id: black
- repo: https://gitlab.com/pycqa/flake8
rev: 3.9.1
- repo: https://github.com/PyCQA/flake8
rev: 7.1.0
hooks:
- id: flake8
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down
45 changes: 35 additions & 10 deletions assets/lambda/code/scan/lambda.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

from posixpath import join
import time
import boto3
import botocore
import glob
import json
import logging
import os
import pwd
import subprocess
import shutil
import subprocess
import time
from posixpath import join
from urllib.parse import unquote_plus

import boto3
import botocore
from aws_lambda_powertools import Logger, Metrics

logger = Logger()
Expand Down Expand Up @@ -106,6 +107,7 @@ def lambda_handler(event, context):

def set_status(bucket, key, status):
"""Set the scan-status tag of the S3 Object"""
logger.info(f"Attempting to set scan-status of {key} to {status}")
old_tags = {}
try:
response = s3_client.get_object_tagging(Bucket=bucket, Key=key)
Expand Down Expand Up @@ -133,6 +135,9 @@ def create_dir(input_bucket, input_key, download_path):
full_path = download_path
if len(sub_dir) > 0:
full_path = os.path.join(full_path, sub_dir)
logger.info(
f"Attempting to create the EFS filepath {full_path} if it does not already exist"
)
if not os.path.exists(full_path):
try:
os.makedirs(full_path, exist_ok=True)
Expand All @@ -142,6 +147,9 @@ def create_dir(input_bucket, input_key, download_path):

def download_object(input_bucket, input_key, download_path):
"""Downloads the specified file from S3 to EFS"""
logger.info(
f"Attempting to download of {input_key} from the {input_bucket} S3 Bucket to the EFS filepath {download_path}"
)
try:
s3_resource.Bucket(input_bucket).download_file(
input_key, f"{download_path}/{input_key}"
Expand All @@ -158,6 +166,9 @@ def download_object(input_bucket, input_key, download_path):
def expand_if_large_archive(input_bucket, input_key, download_path, byte_size):
"""Expand the file if it is an archival type and larger than ClamAV Max Size"""
if byte_size > MAX_BYTES:
logger.info(
f"Attempting to expand {input_key} since the filesize is {byte_size} bytes, which is greater than the ClamAV per file max of {MAX_BYTES}"
)
file_name = f"{download_path}/{input_key}"
try:
command = ["7za", "x", "-y", f"{file_name}", f"-o{download_path}"]
Expand Down Expand Up @@ -193,15 +204,22 @@ def expand_if_large_archive(input_bucket, input_key, download_path, byte_size):
else:
return


last_update_time = 0


def freshclam_update(input_bucket, input_key, download_path, definitions_path):
"""Points freshclam to the local database files and the S3 Definitions bucket.
Creates the database path on EFS if it does not already exist"""
global last_update_time
current_time = time.time()
elapsed_time = current_time - last_update_time
if elapsed_time >= 3600: # Update definitions only once per hour to improve performance
"""Points freshclam to the local database files and the S3 Definitions bucket.
Creates the database path on EFS if it does not already exist"""
if (
elapsed_time >= 3600
): # Update definitions only once per hour to improve performance
logger.info(
f"Attempting to update the virus database. Last update was at {last_update_time} epoch time. Current time is {current_time}"
)
conf = "/tmp/freshclam.conf"
# will already exist when Lambdas are running in same execution context
if not os.path.exists(conf):
Expand All @@ -228,15 +246,19 @@ def freshclam_update(input_bucket, input_key, download_path, definitions_path):
)
last_update_time = current_time
except subprocess.CalledProcessError as e:
report_failure(input_bucket, input_key, download_path, str(e.stderr))
report_failure(
input_bucket, input_key, download_path, str(e.stderr)
)
except ClamAVException as e:
report_failure(input_bucket, input_key, download_path, e.message)
return


def scan(input_bucket, input_key, download_path, definitions_path, tmp_path):
"""Scans the object from S3"""
# Max file size support by ClamAV
logger.info(
f"Attempting to scan the file {input_key} from the {input_bucket} S3 Bucket located at the EFS path {download_path}"
)
try:
command = [
"clamscan",
Expand Down Expand Up @@ -280,6 +302,9 @@ def scan(input_bucket, input_key, download_path, definitions_path, tmp_path):

def delete(download_path, input_key=None):
"""Deletes the file/folder from the EFS File System"""
logger.info(
f"Attempting to delete the file located at the EFS path {download_path}"
)
if input_key:
file = f"{download_path}/{input_key}"
if os.path.exists(file):
Expand Down
Loading