diff --git a/configs/python/backend/backend.yaml b/configs/python/backend/backend.yaml index 57ebdc2c..8b73ea66 100644 --- a/configs/python/backend/backend.yaml +++ b/configs/python/backend/backend.yaml @@ -694,6 +694,10 @@ scanners: priority: 5 options: limit: 1000 + limit_metadata: True + size_limit: 250000000 + crack_pws: False + log_pws: True password_file: '/etc/strelka/passwords.dat' 'ScanZlib': - positive: diff --git a/docs/README.md b/docs/README.md index d69a486a..f7f12181 100644 --- a/docs/README.md +++ b/docs/README.md @@ -746,21 +746,21 @@ Navigate to the Jaeger UI at http://localhost:16686/ to view traces. ## Logging ### Local -The historical, and default, means of logging in Strelka is via a local log file that is instatiated upon the creation of the Strelka Frontend container. While other logging methodologies have recently been added (see Kafka section), in cases where other optional logging methodologies have been enable but fail some time after the instance has started running, the instance will always default to the local log such that no data is lost in the event of the alternative logging methodology failing. +The historical, and default, means of logging in Strelka is via a local log file that is instatiated upon the creation of the Strelka Frontend container. While other logging methodologies have recently been added (see Kafka section), in cases where other optional logging methodologies have been enable but fail some time after the instance has started running, the instance will always default to the local log such that no data is lost in the event of the alternative logging methodology failing. ### Kafka -The Frontend allows for the creation of a Kafka producer at runtime for an alternative means of logging Strelka output such that logs can be streamed to a Kafka Topic of the user's choice. This logging option is useful when there is a high volume of data being processed by Strelka and the production of that data to a down stream analysis tool (such as a SIEM system) must be highly availible for data enrichment purposes. +The Frontend allows for the creation of a Kafka producer at runtime for an alternative means of logging Strelka output such that logs can be streamed to a Kafka Topic of the user's choice. This logging option is useful when there is a high volume of data being processed by Strelka and the production of that data to a down stream analysis tool (such as a SIEM system) must be highly availible for data enrichment purposes. -Currently this is toggled on and off in the Frontend Dockerfile, which is overwritten in the build/docker-compose.yaml file. Specifically, to toggle the Kafka Producer log option on, the locallog command line option must be set to false, and the kafkalog function must be set to true. If both command line options are set to true, then the Frontend will default to the local logging option, which is how the logging has functioned historically. +Currently this is toggled on and off in the Frontend Dockerfile, which is overwritten in the build/docker-compose.yaml file. Specifically, to toggle the Kafka Producer log option on, the locallog command line option must be set to false, and the kafkalog function must be set to true. If both command line options are set to true, then the Frontend will default to the local logging option, which is how the logging has functioned historically. -The Kafka Producer that is created with the abbove command line options is fully configurable, and placeholder fields have already been added to the frontend.yaml configuration file. This file will need to be updated in order to point to an existing Kafka Topic, as desired. In cases where some fields are not used (e.g when security has not been enable on the desired Kafka Topic, etc) then unused fields in the broker configuration section of the frontend.yaml file may simply be replaced with an empty string. +The Kafka Producer that is created with the abbove command line options is fully configurable, and placeholder fields have already been added to the frontend.yaml configuration file. This file will need to be updated in order to point to an existing Kafka Topic, as desired. In cases where some fields are not used (e.g when security has not been enable on the desired Kafka Topic, etc) then unused fields in the broker configuration section of the frontend.yaml file may simply be replaced with an empty string. #### Optional: S3 Redundancy Dependant on a Kafka producer being created and a boolean in the Kafka config set to true, S3 redundancy can be toggled on in order to account for any issues with a Kafka connection. S3, in this case, is referring to either a AWS S3 bucket, or a Ceph Opensource Object Storage bucket. -Currently, if the option for S3 redundancy is toggled on, if the Kafka connection as desribed in the Kafka logging section of this document is interrupted, then, after the local log file is updated, the contents of that log file will be uploaded to the configureable S3 location. By default logs are kept for three hours after the start of the interuption of the Kafka connection, and, will rotate logs in S3 on the hour to maintain relevancy in the remote bucket location. +Currently, if the option for S3 redundancy is toggled on, if the Kafka connection as desribed in the Kafka logging section of this document is interrupted, then, after the local log file is updated, the contents of that log file will be uploaded to the configureable S3 location. By default logs are kept for three hours after the start of the interuption of the Kafka connection, and, will rotate logs in S3 on the hour to maintain relevancy in the remote bucket location. -Once connection is re-established to the original Kafka broker, then the stored logs are sent in parallel to new logs to the Kafka broker. If a restart of the Frontend is required to reset the connection, then the logs will be sent to the Kafka Broker (if they are not stale) at the next start up. +Once connection is re-established to the original Kafka broker, then the stored logs are sent in parallel to new logs to the Kafka broker. If a restart of the Frontend is required to reset the connection, then the logs will be sent to the Kafka Broker (if they are not stale) at the next start up. This option is set to false by default. @@ -815,7 +815,7 @@ The table below describes each scanner and its options. Each scanner has the hid | ScanPhp | Collects metadata from PHP files | N/A | | ScanPkcs7 | Extracts files from PKCS7 certificate files | N/A | | ScanPlist | Collects attributes from binary and XML property list files | `keys` -- list of keys to log (defaults to `all`) | -| ScanQr | Collects QR code metadata from image files | `support_inverted` -- Enable/disable image inversion to support inverted QR codes (white on black). Adds some image processing overhead. | [Aaron Herman](https://github.com/aaronherman) +| ScanQr | Collects QR code metadata from image files | `support_inverted` -- Enable/disable image inversion to support inverted QR codes (white on black). Adds some image processing overhead. | [Aaron Herman](https://github.com/aaronherman) | ScanRar | Extracts files from RAR archives | `limit` -- maximum number of files to extract (defaults to `1000`)
`password_file` -- location of passwords file for RAR archives (defaults to `/etc/strelka/passwords.dat`) | | ScanRpm | Collects metadata and extracts files from RPM files | `tempfile_directory` -- location where `tempfile` will write temporary files (defaults to `/tmp/`) | | ScanRtf | Extracts embedded files from RTF files | `limit` -- maximum number of files to extract (defaults to `1000`) | @@ -838,7 +838,7 @@ The table below describes each scanner and its options. Each scanner has the hid | ScanXL4MA | Analyzes and parses Excel 4 Macros from XLSX files | `type` -- string that determines the type of x509 certificate being scanned (no default, assigned as either "der" or "pem" depending on flavor) | Ryan Borre | ScanXml | Log metadata and extract files from XML files | `extract_tags` -- list of XML tags that will have their text extracted as child files (defaults to empty list)
`metadata_tags` -- list of XML tags that will have their text logged as metadata (defaults to empty list) | | ScanYara | Scans files with YARA rules | `location` -- location of the YARA rules file or directory (defaults to `/etc/strelka/yara/`)
`compiled` -- Enable use of compiled YARA rules, as well as the path.
`store_offset` -- Stores file offset for YARA match
`offset_meta_key` -- YARA meta key that must exist in the YARA rule for the offset to be stored.
`offset_padding` -- Amount of data to be stored before and after offset for additional context. | -| ScanZip | Extracts files from zip archives | `limit` -- maximum number of files to extract (defaults to `1000`)
`password_file` -- location of passwords file for zip archives (defaults to `/etc/strelka/passwords.dat`) | +| ScanZip | Extracts files from zip archives | `limit` -- maximum number of files to extract (defaults to `1000`)
`limit_metadata` -- stop adding file metadata when `limit` is reached (defaults to true)
`size_limit` -- maximum size for extracted files (defaults to `250000000`)
`crack_pws` -- use a dictionary to crack encrypted files (defaults to false)
`log_pws` -- log cracked passwords (defaults to true)
`password_file` -- location of passwords file for zip archives (defaults to `/etc/strelka/passwords.dat`) | | ScanZlib | Decompresses gzip files | N/A ## Tests diff --git a/src/python/strelka/scanners/scan_docx.py b/src/python/strelka/scanners/scan_docx.py index 738806d2..ae1baabd 100644 --- a/src/python/strelka/scanners/scan_docx.py +++ b/src/python/strelka/scanners/scan_docx.py @@ -30,17 +30,17 @@ def scan(self, data, file, options, expire_at): self.event["identifier"] = docx_doc.core_properties.identifier self.event["keywords"] = docx_doc.core_properties.keywords self.event["language"] = docx_doc.core_properties.language - self.event[ - "last_modified_by" - ] = docx_doc.core_properties.last_modified_by + self.event["last_modified_by"] = ( + docx_doc.core_properties.last_modified_by + ) if docx_doc.core_properties.last_printed is not None: - self.event[ - "last_printed" - ] = docx_doc.core_properties.last_printed.isoformat() + self.event["last_printed"] = ( + docx_doc.core_properties.last_printed.isoformat() + ) if docx_doc.core_properties.modified is not None: - self.event[ - "modified" - ] = docx_doc.core_properties.modified.isoformat() + self.event["modified"] = ( + docx_doc.core_properties.modified.isoformat() + ) self.event["revision"] = docx_doc.core_properties.revision self.event["subject"] = docx_doc.core_properties.subject self.event["title"] = docx_doc.core_properties.title diff --git a/src/python/strelka/scanners/scan_encrypted_zip.py b/src/python/strelka/scanners/scan_encrypted_zip.py index 3a5fe6ee..1a816732 100644 --- a/src/python/strelka/scanners/scan_encrypted_zip.py +++ b/src/python/strelka/scanners/scan_encrypted_zip.py @@ -133,8 +133,8 @@ def scan(self, data, file, options, expire_at): is_aes = True break - with pyzipper.AESZipFile(zip_io) if is_aes else pyzipper.ZipFile( - zip_io + with ( + pyzipper.AESZipFile(zip_io) if is_aes else pyzipper.ZipFile(zip_io) ) as zip_obj: file_list = zip_obj.filelist # .filelist for file_list_item in file_list: diff --git a/src/python/strelka/scanners/scan_iso.py b/src/python/strelka/scanners/scan_iso.py index 9e825245..b949e5cc 100644 --- a/src/python/strelka/scanners/scan_iso.py +++ b/src/python/strelka/scanners/scan_iso.py @@ -27,19 +27,19 @@ def scan(self, data, file, options, expire_at): # Attempt to get Meta try: - self.event["meta"][ - "date_created" - ] = self._datetime_from_volume_date(iso.pvd.volume_creation_date) - self.event["meta"][ - "date_effective" - ] = self._datetime_from_volume_date(iso.pvd.volume_effective_date) - self.event["meta"][ - "date_expiration" - ] = self._datetime_from_volume_date(iso.pvd.volume_expiration_date) - self.event["meta"][ - "date_modification" - ] = self._datetime_from_volume_date( - iso.pvd.volume_modification_date + self.event["meta"]["date_created"] = ( + self._datetime_from_volume_date(iso.pvd.volume_creation_date) + ) + self.event["meta"]["date_effective"] = ( + self._datetime_from_volume_date(iso.pvd.volume_effective_date) + ) + self.event["meta"]["date_expiration"] = ( + self._datetime_from_volume_date(iso.pvd.volume_expiration_date) + ) + self.event["meta"]["date_modification"] = ( + self._datetime_from_volume_date( + iso.pvd.volume_modification_date + ) ) self.event["meta"][ "volume_identifier" diff --git a/src/python/strelka/scanners/scan_lnk.py b/src/python/strelka/scanners/scan_lnk.py index ed405d0d..7759529d 100644 --- a/src/python/strelka/scanners/scan_lnk.py +++ b/src/python/strelka/scanners/scan_lnk.py @@ -140,18 +140,18 @@ def scan(self, data, file, options, expire_at): try: if extradata.IconEnvironmentDataBlock: - self.event[ - "icon_target" - ] = extradata.IconEnvironmentDataBlock.TargetAnsi + self.event["icon_target"] = ( + extradata.IconEnvironmentDataBlock.TargetAnsi + ) except strelka.ScannerTimeout: raise except Exception: self.flags.append("Unable to parse IconEnvironmentDataBlock") if extradata.TrackerDataBlock: - self.event[ - "machine_id" - ] = extradata.TrackerDataBlock.MachineID.strip(b"\x00") + self.event["machine_id"] = ( + extradata.TrackerDataBlock.MachineID.strip(b"\x00") + ) self.event["mac"] = str( uuid.UUID(bytes_le=extradata.TrackerDataBlock.Droid[16:]) ).split("-")[-1] diff --git a/src/python/strelka/scanners/scan_pe.py b/src/python/strelka/scanners/scan_pe.py index 6d1cb443..3b873f70 100644 --- a/src/python/strelka/scanners/scan_pe.py +++ b/src/python/strelka/scanners/scan_pe.py @@ -532,18 +532,18 @@ def scan(self, data, file, options, expire_at): self.event["address_of_entry_point"] = pe.OPTIONAL_HEADER.AddressOfEntryPoint self.event["image_base"] = pe.OPTIONAL_HEADER.ImageBase self.event["size_of_code"] = pe.OPTIONAL_HEADER.SizeOfCode - self.event[ - "size_of_initialized_data" - ] = pe.OPTIONAL_HEADER.SizeOfInitializedData + self.event["size_of_initialized_data"] = ( + pe.OPTIONAL_HEADER.SizeOfInitializedData + ) self.event["size_of_headers"] = pe.OPTIONAL_HEADER.SizeOfHeaders self.event["size_of_heap_reserve"] = pe.OPTIONAL_HEADER.SizeOfHeapReserve self.event["size_of_image"] = pe.OPTIONAL_HEADER.SizeOfImage self.event["size_of_stack_commit"] = pe.OPTIONAL_HEADER.SizeOfStackCommit self.event["size_of_stack_reserve"] = pe.OPTIONAL_HEADER.SizeOfStackReserve self.event["size_of_heap_commit"] = pe.OPTIONAL_HEADER.SizeOfHeapCommit - self.event[ - "size_of_uninitialized_data" - ] = pe.OPTIONAL_HEADER.SizeOfUninitializedData + self.event["size_of_uninitialized_data"] = ( + pe.OPTIONAL_HEADER.SizeOfUninitializedData + ) self.event["file_alignment"] = pe.OPTIONAL_HEADER.FileAlignment self.event["section_alignment"] = pe.OPTIONAL_HEADER.SectionAlignment self.event["checksum"] = pe.OPTIONAL_HEADER.CheckSum @@ -552,12 +552,12 @@ def scan(self, data, file, options, expire_at): self.event["minor_image_version"] = pe.OPTIONAL_HEADER.MinorImageVersion self.event["major_linker_version"] = pe.OPTIONAL_HEADER.MajorLinkerVersion self.event["minor_linker_version"] = pe.OPTIONAL_HEADER.MinorLinkerVersion - self.event[ - "major_operating_system_version" - ] = pe.OPTIONAL_HEADER.MajorOperatingSystemVersion - self.event[ - "minor_operating_system_version" - ] = pe.OPTIONAL_HEADER.MinorOperatingSystemVersion + self.event["major_operating_system_version"] = ( + pe.OPTIONAL_HEADER.MajorOperatingSystemVersion + ) + self.event["minor_operating_system_version"] = ( + pe.OPTIONAL_HEADER.MinorOperatingSystemVersion + ) self.event["major_subsystem_version"] = pe.OPTIONAL_HEADER.MajorSubsystemVersion self.event["minor_subsystem_version"] = pe.OPTIONAL_HEADER.MinorSubsystemVersion self.event["image_version"] = float( diff --git a/src/python/strelka/scanners/scan_pgp.py b/src/python/strelka/scanners/scan_pgp.py index 1a9f4e7a..7c3259d4 100644 --- a/src/python/strelka/scanners/scan_pgp.py +++ b/src/python/strelka/scanners/scan_pgp.py @@ -74,9 +74,9 @@ def parse_pgpdump(self, data): secret_key_entry["creation_time"] = creation_time.isoformat() expiration_time = getattr(packet, "expiration_time", None) if expiration_time is not None: - secret_key_entry[ - "expiration_time" - ] = expiration_time.isoformat() + secret_key_entry["expiration_time"] = ( + expiration_time.isoformat() + ) if secret_key_entry not in self.event["secret_keys"]: self.event["secret_keys"].append(secret_key_entry) @@ -98,9 +98,9 @@ def parse_pgpdump(self, data): public_key_entry["creation_time"] = creation_time.isoformat() expiration_time = getattr(packet, "expiration_time", None) if expiration_time is not None: - public_key_entry[ - "expiration_time" - ] = expiration_time.isoformat() + public_key_entry["expiration_time"] = ( + expiration_time.isoformat() + ) if public_key_entry not in self.event["public_keys"]: self.event["public_keys"].append(public_key_entry) @@ -135,14 +135,14 @@ def parse_pgpdump(self, data): } creation_time = getattr(packet, "creation_time", None) if creation_time is not None: - signature_packet_entry[ - "creation_time" - ] = creation_time.isoformat() + signature_packet_entry["creation_time"] = ( + creation_time.isoformat() + ) expiration_time = getattr(packet, "expiration_time", None) if expiration_time is not None: - signature_packet_entry[ - "expiration_time" - ] = expiration_time.isoformat() + signature_packet_entry["expiration_time"] = ( + expiration_time.isoformat() + ) if signature_packet_entry not in self.event["signatures"]: self.event["signatures"].append(signature_packet_entry) diff --git a/src/python/strelka/scanners/scan_vsto.py b/src/python/strelka/scanners/scan_vsto.py index 981b7348..8e139257 100644 --- a/src/python/strelka/scanners/scan_vsto.py +++ b/src/python/strelka/scanners/scan_vsto.py @@ -7,7 +7,6 @@ """ - import base64 import hashlib diff --git a/src/python/strelka/scanners/scan_zip.py b/src/python/strelka/scanners/scan_zip.py index 58181043..a5297a0d 100644 --- a/src/python/strelka/scanners/scan_zip.py +++ b/src/python/strelka/scanners/scan_zip.py @@ -22,8 +22,13 @@ class ScanZip(strelka.Scanner): def scan(self, data, file, options, expire_at): file_limit = options.get("limit", 100) + size_limit = options.get("size_limit", 250000000) + limit_metadata = options.get("limit_metadata", True) + crack_pws = options.get("crack_pws", False) + log_pws = options.get("log_pws", True) password_file = options.get("password_file", "/etc/strelka/passwords.dat") - passwords = [] + + passwords = [None] # Gather count and list of files to be extracted self.event["total"] = {"files": 0, "extracted": 0} @@ -33,111 +38,141 @@ def scan(self, data, file, options, expire_at): compress_size_total = 0 file_size_total = 0 - if os.path.isfile(password_file): + if crack_pws and os.path.isfile(password_file): with open(password_file, "rb") as f: for line in f: passwords.append(line.strip()) with io.BytesIO(data) as zip_io: try: - is_aes = False - with pyzipper.ZipFile(zip_io) as zip_obj: - filelist = zip_obj.filelist - for file in filelist: - if not file.is_dir(): - # Check for the AES compression type - if file.compress_type == 99: - is_aes = True - break - - with pyzipper.ZipFile(zip_io) if is_aes else pyzipper.ZipFile( - zip_io - ) as zip_obj: + with pyzipper.AESZipFile(zip_io) as zip_obj: filelist = zip_obj.filelist - for file in filelist: - if not file.is_dir(): - self.event["total"]["files"] += 1 - - # For each file in zip, gather metadata metrics and pass back to Strelka for recursive extraction. - for i, name in enumerate(filelist): - if name.file_size > 0 and name.compress_size > 0: - compress_size_total += name.compress_size - file_size_total += name.file_size - size_difference = name.file_size - name.compress_size + # Count the file entries, in case the function encounters an unhandled exception + for compressed_file in filelist: + if compressed_file.is_dir(): + continue + self.event["total"]["files"] += 1 + + # For each file in zip, gather metadata and pass extracted file back to Strelka + for compressed_file in filelist: + if compressed_file.is_dir(): + continue + + extract = True + extracted = False + compression_rate = 0 + + if compressed_file.file_size > size_limit: + extract = False + if "file_size_limit" not in self.flags: + self.flags.append("file_size_limit") + + if self.event["total"]["extracted"] >= file_limit: + extract = False + if "file_count_limit" not in self.flags: + self.flags.append("file_count_limit") + + if ( + compressed_file.file_size > 0 + and compressed_file.compress_size > 0 + ): + compress_size_total += compressed_file.compress_size + file_size_total += compressed_file.file_size + + size_difference = ( + compressed_file.file_size + - compressed_file.compress_size + ) compression_rate = ( size_difference * 100.0 - ) / name.file_size - self.event["files"].append( - { - "file_name": name.filename, - "file_size": name.file_size, - "compression_size": name.compress_size, - "compression_rate": round(compression_rate, 2), - } - ) - - if self.event["total"]["extracted"] >= file_limit: - break - - try: - extract_data = b"" - zinfo = zip_obj.getinfo(name.filename) - - if zinfo.flag_bits & 0x1: - if "encrypted" not in self.flags: - self.flags.append("encrypted") - - if passwords: - for pw in passwords: - try: - extract_data = zip_obj.read( - name.filename, pw - ) - self.event["password"] = pw.decode( + ) / compressed_file.file_size + + try: + extract_data = b"" + zinfo = zip_obj.getinfo(compressed_file.filename) + + if zinfo.flag_bits & 0x1: + if "encrypted" not in self.flags: + self.flags.append("encrypted") + + for password in passwords: + try: + if extract: + extract_data = zip_obj.read( + compressed_file.filename, password + ) + if extract_data: + passwords.insert( + 0, + passwords.pop( + passwords.index(password) + ), + ) + if password and crack_pws and log_pws: + if "password" not in self.event.keys(): + self.event["password"] = [] + if password.decode( "utf-8" - ) - - except ( - RuntimeError, - pyzipper.BadZipFile, - zlib.error, - ): - pass - else: - try: - extract_data = zip_obj.read(name.filename) - except RuntimeError: - self.flags.append("runtime_error") - except pyzipper.BadZipFile: - self.flags.append("bad_zip_file") - except zlib.error: - self.flags.append("zlib_error") - - # Suppress sending to coordinator in favor of ScanEncryptedZip - if extract_data and "encrypted" not in self.flags: - # Send extracted file back to Strelka - self.emit_file(extract_data, name=name.filename) - - self.event["total"]["extracted"] += 1 - - except NotImplementedError: - self.flags.append("unsupported_compression") - except RuntimeError: - self.flags.append("runtime_error") - except ValueError: - self.flags.append("value_error") - except zlib.error: - self.flags.append("zlib_error") - - # Top level compression metric - try: - size_difference_total = file_size_total - compress_size_total - self.event["compression_rate"] = round( - (size_difference_total * 100.0) / file_size_total, 2 - ) - except ZeroDivisionError: - self.flags.append("file_size_zero") + ) not in self.event.get("password", []): + self.event["password"].append( + password.decode("utf-8") + ) + break + except RuntimeError: + pass + + # If there's data in it, and no limits have been met, emit the file + if extract_data and extract: + # Send extracted file back to Strelka + self.emit_file( + extract_data, name=compressed_file.filename + ) + extracted = True + + if not ( + limit_metadata + and self.event["total"]["extracted"] >= file_limit + ): + self.event["files"].append( + { + "file_name": compressed_file.filename, + "file_size": compressed_file.file_size, + "compression_size": compressed_file.compress_size, + "compression_rate": round(compression_rate, 2), + "extracted": extracted, + "encrypted": ( + True + if zinfo.flag_bits & 0x1 == 1 + else False + ), + } + ) + + if extracted: + self.event["total"]["extracted"] += 1 + + except NotImplementedError: + self.flags.append("unsupported_compression") + except RuntimeError: + self.flags.append("runtime_error") + except ValueError: + self.flags.append("value_error") + except zlib.error: + self.flags.append("zlib_error") + except pyzipper.BadZipFile: + self.flags.append("bad_zip_file") + + # Top level compression metric + if file_size_total > 0 and compress_size_total > 0: + size_difference_total = ( + file_size_total - compress_size_total + ) + self.event["compression_rate"] = round( + (size_difference_total * 100.0) / file_size_total, 2 + ) + else: + self.event["compression_rate"] = 0.00 except pyzipper.BadZipFile: self.flags.append("bad_zip_file") diff --git a/src/python/strelka/tests/__init__.py b/src/python/strelka/tests/__init__.py index 5873a94a..d452ce6b 100644 --- a/src/python/strelka/tests/__init__.py +++ b/src/python/strelka/tests/__init__.py @@ -95,9 +95,11 @@ def get_remote_fixture_archive( fileentry.filename: io.BytesIO( archive.read( fileentry.filename, - pwd=password.encode("utf-8") - if password - else None, + pwd=( + password.encode("utf-8") + if password + else None + ), ) ) } diff --git a/src/python/strelka/tests/fixtures/test_big.zip b/src/python/strelka/tests/fixtures/test_big.zip new file mode 100644 index 00000000..8eadbb29 Binary files /dev/null and b/src/python/strelka/tests/fixtures/test_big.zip differ diff --git a/src/python/strelka/tests/fixtures/test_empty.zip b/src/python/strelka/tests/fixtures/test_empty.zip new file mode 100644 index 00000000..a3727acb Binary files /dev/null and b/src/python/strelka/tests/fixtures/test_empty.zip differ diff --git a/src/python/strelka/tests/fixtures/test_mixedcrypto.zip b/src/python/strelka/tests/fixtures/test_mixedcrypto.zip new file mode 100644 index 00000000..6fe1747e Binary files /dev/null and b/src/python/strelka/tests/fixtures/test_mixedcrypto.zip differ diff --git a/src/python/strelka/tests/test_scan_zip.py b/src/python/strelka/tests/test_scan_zip.py index cb22ef74..0829f6f3 100644 --- a/src/python/strelka/tests/test_scan_zip.py +++ b/src/python/strelka/tests/test_scan_zip.py @@ -21,24 +21,32 @@ def test_scan_zip(mocker): "file_size": 4015, "compression_size": 1425, "compression_rate": 64.51, + "extracted": True, + "encrypted": False, }, { "file_name": "hidden/lorem-readonly.txt", "file_size": 4015, "compression_size": 1425, "compression_rate": 64.51, + "extracted": True, + "encrypted": False, }, { "file_name": "hidden/lorem.txt", "file_size": 4015, "compression_size": 1425, "compression_rate": 64.51, + "extracted": True, + "encrypted": False, }, { "file_name": "lorem.txt", "file_size": 4015, "compression_size": 1425, "compression_rate": 64.51, + "extracted": True, + "encrypted": False, }, ], "compression_rate": 64.51, @@ -54,6 +62,109 @@ def test_scan_zip(mocker): TestCase().assertDictEqual(test_scan_event, scanner_event) +def test_scan_zip_count_limit(mocker): + """ + Pass: Sample event matches output of scanner. + Failure: Unable to load file or sample event fails to match. + """ + + test_scan_event = { + "elapsed": mock.ANY, + "flags": ["file_count_limit"], + "total": {"files": 4, "extracted": 2}, + "files": [ + { + "file_name": "hidden/lorem-hidden.txt", + "file_size": 4015, + "compression_size": 1425, + "compression_rate": 64.51, + "extracted": True, + "encrypted": False, + }, + { + "file_name": "hidden/lorem-readonly.txt", + "file_size": 4015, + "compression_size": 1425, + "compression_rate": 64.51, + "extracted": True, + "encrypted": False, + }, + ], + "compression_rate": 64.51, + } + + scanner_event = run_test_scan( + mocker=mocker, + scan_class=ScanUnderTest, + fixture_path=Path(__file__).parent / "fixtures/test.zip", + options={"limit": 2}, + ) + + TestCase.maxDiff = None + TestCase().assertDictEqual(test_scan_event, scanner_event) + + +def test_scan_zip_metadata_limit(mocker): + """ + Pass: Sample event matches output of scanner. + Failure: Unable to load file or sample event fails to match. + """ + + test_scan_event = { + "elapsed": mock.ANY, + "flags": ["file_count_limit"], + "total": {"files": 4, "extracted": 2}, + "files": [ + { + "file_name": "hidden/lorem-hidden.txt", + "file_size": 4015, + "compression_size": 1425, + "compression_rate": 64.51, + "extracted": True, + "encrypted": False, + }, + { + "file_name": "hidden/lorem-readonly.txt", + "file_size": 4015, + "compression_size": 1425, + "compression_rate": 64.51, + "extracted": True, + "encrypted": False, + }, + { + "file_name": "hidden/lorem.txt", + "file_size": 4015, + "compression_size": 1425, + "compression_rate": 64.51, + "extracted": False, + "encrypted": False, + }, + { + "file_name": "lorem.txt", + "file_size": 4015, + "compression_size": 1425, + "compression_rate": 64.51, + "extracted": False, + "encrypted": False, + }, + ], + "compression_rate": 64.51, + } + + scanner_event = run_test_scan( + mocker=mocker, + scan_class=ScanUnderTest, + fixture_path=Path(__file__).parent / "fixtures/test.zip", + options={ + "limit": 2, + "limit_metadata": False, + }, + ) + + TestCase.maxDiff = None + TestCase().assertDictEqual(test_scan_event, scanner_event) + + def test_scan_zip_aes256(mocker): """ Pass: Sample event matches output of scanner. @@ -63,31 +174,40 @@ def test_scan_zip_aes256(mocker): test_scan_event = { "elapsed": mock.ANY, "flags": ["encrypted"], - "total": {"files": 4, "extracted": 0}, + "total": {"files": 4, "extracted": 4}, + "password": ["password"], "files": [ { "file_name": "hidden/lorem-hidden.txt", "file_size": 4015, "compression_size": 1453, "compression_rate": 63.81, + "extracted": True, + "encrypted": True, }, { "file_name": "hidden/lorem-readonly.txt", "file_size": 4015, "compression_size": 1453, "compression_rate": 63.81, + "extracted": True, + "encrypted": True, }, { "file_name": "hidden/lorem.txt", "file_size": 4015, "compression_size": 1453, "compression_rate": 63.81, + "extracted": True, + "encrypted": True, }, { "file_name": "lorem.txt", "file_size": 4015, "compression_size": 1453, "compression_rate": 63.81, + "extracted": True, + "encrypted": True, }, ], "compression_rate": 63.81, @@ -97,7 +217,145 @@ def test_scan_zip_aes256(mocker): mocker=mocker, scan_class=ScanUnderTest, fixture_path=Path(__file__).parent / "fixtures/test_aes256_password.zip", + options={"crack_pws": True, "log_pws": True}, + ) + + TestCase.maxDiff = None + TestCase().assertDictEqual(test_scan_event, scanner_event) + + +def test_scan_zip_big(mocker): + """ + Pass: Sample event matches output of scanner. + Failure: Unable to load file or sample event fails to match. + """ + + test_scan_event = { + "elapsed": mock.ANY, + "flags": ["file_size_limit"], + "total": {"files": 1, "extracted": 0}, + "files": [ + { + "file_name": "test_big.zero", + "file_size": 512000000, + "compression_size": 496891, + "compression_rate": 99.9, + "extracted": False, + "encrypted": False, + }, + ], + "compression_rate": 99.9, + } + + scanner_event = run_test_scan( + mocker=mocker, + scan_class=ScanUnderTest, + fixture_path=Path(__file__).parent / "fixtures/test_big.zip", + options={"size_limit": 100000000}, ) TestCase.maxDiff = None TestCase().assertDictEqual(test_scan_event, scanner_event) + + +def test_scan_zip_empty(mocker): + """ + Pass: Sample event matches output of scanner. + Failure: Unable to load file or sample event fails to match. + """ + + test_scan_event = { + "elapsed": mock.ANY, + "flags": [], + "total": {"files": 2, "extracted": 1}, + "files": [ + { + "file_name": "test_empty.bin", + "file_size": 0, + "compression_size": 0, + "compression_rate": 0, + "extracted": False, + "encrypted": False, + }, + { + "file_name": "test.txt", + "file_size": 4007, + "compression_size": 1449, + "compression_rate": 63.84, + "extracted": True, + "encrypted": False, + }, + ], + "compression_rate": 63.84, + } + + scanner_event = run_test_scan( + mocker=mocker, + scan_class=ScanUnderTest, + fixture_path=Path(__file__).parent / "fixtures/test_empty.zip", + ) + + TestCase.maxDiff = None + TestCase().assertDictEqual(test_scan_event, scanner_event) + + +def test_scan_zip_mixed_zipcrypto(mocker): + """ + Pass: Sample event matches output of scanner. + Failure: Unable to load file or sample event fails to match. + """ + + test_scan_event = { + "elapsed": mock.ANY, + "flags": ["encrypted"], + "total": {"files": 4, "extracted": 3}, + "password": ["password"], + "files": [ + { + "file_name": "test.txt", + "file_size": 4007, + "compression_size": 1421, + "compression_rate": 64.54, + "extracted": True, + "encrypted": False, + }, + { + "file_name": "test_aes256.txt", + "file_size": 4007, + "compression_size": 1449, + "compression_rate": 63.84, + "extracted": True, + "encrypted": True, + }, + { + "file_name": "test_zipcrypto.txt", + "file_size": 4007, + "compression_size": 1433, + "compression_rate": 64.24, + "extracted": True, + "encrypted": True, + }, + { + "file_name": "test_zipcrypto_badpw.txt", + "file_size": 4007, + "compression_size": 1433, + "compression_rate": 64.24, + "extracted": False, + "encrypted": True, + }, + ], + "compression_rate": 64.21, + } + + scanner_event = run_test_scan( + mocker=mocker, + scan_class=ScanUnderTest, + fixture_path=Path(__file__).parent / "fixtures/test_mixedcrypto.zip", + options={"crack_pws": True, "log_pws": True}, + ) + + TestCase.maxDiff = None + TestCase().assertDictEqual(test_scan_event, scanner_event) + + +# test_aes256_password.zip