From 76b9a12035f18828188e2a790054e884198e1b64 Mon Sep 17 00:00:00 2001 From: Paul Hutelmyer Date: Tue, 2 Apr 2024 09:20:27 -0400 Subject: [PATCH 1/5] Adding Broken Email Test --- src/python/strelka/scanners/scan_email.py | 270 +++++++----------- .../strelka/tests/fixtures/test_broken.eml | 33 +++ src/python/strelka/tests/test_scan_email.py | 44 ++- 3 files changed, 179 insertions(+), 168 deletions(-) create mode 100644 src/python/strelka/tests/fixtures/test_broken.eml diff --git a/src/python/strelka/scanners/scan_email.py b/src/python/strelka/scanners/scan_email.py index b5bea6a8..7f5e8152 100644 --- a/src/python/strelka/scanners/scan_email.py +++ b/src/python/strelka/scanners/scan_email.py @@ -51,11 +51,7 @@ def scan(self, data, file, options, expire_at): thumbnail_header = options.get("thumbnail_header", False) thumbnail_size = options.get("thumbnail_size", (500, 500)) - # ---------------- - # Thumbnail - # ---------------- - # Create a thumbnail from the image. - # Stores as a base64 value in the key: base64_thumbnail + # Attempt to create a thumbnail from the email if create_thumbnail: try: image = self.create_email_thumbnail(data, thumbnail_header) @@ -67,184 +63,124 @@ def scan(self, data, file, options, expire_at): self.event["base64_thumbnail"] = base64_image else: self.flags.append( - f"{self.__class__.__name__}: image_thumbnail_error: Could not generate thumbnail." + f"{self.__class__.__name__}: image_thumbnail_error: Could not generate thumbnail. No HTML found." ) except Exception as e: self.flags.append( f"{self.__class__.__name__}: image_thumbnail_error: {str(e)[:50]}" ) - # ---------------- - # Parse Email Contents - # ------------------- + # Parse email contents try: # Open and parse email byte string - # If fail to open, return. - try: - ep = eml_parser.EmlParser( - include_attachment_data=True, include_raw_body=True - ) - parsed_eml = ep.decode_email_bytes(data) - except strelka.ScannerTimeout: - raise - except Exception as e: - self.flags.append( - f"{self.__class__.__name__}: email_parse_error: {str(e)[:50]}" - ) + ep = eml_parser.EmlParser( + include_attachment_data=True, include_raw_body=True + ) + parsed_eml = ep.decode_email_bytes(data) # Check if email was parsed properly and attempt to deconflict and reload. - # If fail to reparse, return. - try: - if not ( - parsed_eml["header"]["subject"] and parsed_eml["header"]["header"] - ): - if b"\nReceived: from " in data: - data = ( - data.rpartition(b"\nReceived: from ")[1] - + data.rpartition(b"\nReceived: from ")[2] - )[1:] - elif b"Start mail input; end with .\n" in data: - data = data.rpartition( - b"Start mail input; end with .\n" - )[2] - parsed_eml = ep.decode_email_bytes(data) - if not ( - parsed_eml["header"]["subject"] - and parsed_eml["header"]["header"] - ): - self.flags.append( - f"{self.__class__.__name__}: email_parse_error" - ) - return - except strelka.ScannerTimeout: - raise - except Exception as e: - self.flags.append( - f"{self.__class__.__name__}: email_parse_error: {str(e)[:50]}" - ) + if not (parsed_eml["header"]["subject"] and parsed_eml["header"]["header"]): + if b"\nReceived: from " in data: + data = ( + data.rpartition(b"\nReceived: from ")[1] + + data.rpartition(b"\nReceived: from ")[2] + )[1:] + elif b"Start mail input; end with .\n" in data: + data = data.rpartition( + b"Start mail input; end with .\n" + )[2] + parsed_eml = ep.decode_email_bytes(data) - # Body - # If body exists in email, collect partial message contents and domains - try: - if "body" in parsed_eml: - for body in parsed_eml["body"]: - if "content_type" in body: - if body["content_type"] == "text/plain": - if len(body["content"]) <= 200: - self.event["body"] = body["content"] - else: - self.event["body"] = ( - body["content"][:100] - + "..." - + body["content"][-100:] - ) - else: - self.event["body"] = ( - body["content"][:100] + "..." + body["content"][-100:] - ) - if "domain" in body: - if "domain" in self.event: - self.event["domains"] += body["domain"] + # Extract body content and domains + if "body" in parsed_eml: + for body in parsed_eml["body"]: + if "content_type" in body: + if body["content_type"] == "text/plain": + if len(body["content"]) <= 200: + self.event["body"] = body["content"] else: - self.event["domains"] = body["domain"] - except strelka.ScannerTimeout: - raise - except Exception as e: - self.flags.append( - f"{self.__class__.__name__}: email_parse_body_error: {str(e)[:50]}" - ) - - # Attachments - # If attachments exist in email, collect attachment details and raw data to be resubmitted to pipeline. - try: - if "attachment" in parsed_eml: - self.event["attachments"] = {} - self.event["attachments"]["filenames"] = [] - self.event["attachments"]["hashes"] = [] - self.event["attachments"]["totalsize"] = 0 - for attachment in parsed_eml["attachment"]: - self.event["attachments"]["filenames"].append( - attachment["filename"] - ) - self.event["attachments"]["hashes"].append( - attachment["hash"]["md5"] - ) - self.event["attachments"]["totalsize"] += attachment["size"] - attachments.append( - { - "name": attachment["filename"], - "content-type": attachment["content_header"][ - "content-type" - ][0], - "raw": base64.b64decode(attachment["raw"]), - } + self.event["body"] = ( + body["content"][:100] + + "..." + + body["content"][-100:] + ) + else: + self.event["body"] = ( + body["content"][:100] + "..." + body["content"][-100:] ) - except strelka.ScannerTimeout: - raise - except Exception as e: - self.flags.append( - f"{self.__class__.__name__}: email_parse_attachment_error: {str(e)[:50]}" - ) + if "domain" in body: + if "domain" in self.event: + self.event["domains"] += body["domain"] + else: + self.event["domains"] = body["domain"] + + # Extract attachment details and raw data + if "attachment" in parsed_eml: + self.event["attachments"] = { + "filenames": [], + "hashes": [], + "totalsize": 0, + } + for attachment in parsed_eml["attachment"]: + self.event["attachments"]["filenames"].append( + attachment["filename"] + ) + self.event["attachments"]["hashes"].append( + attachment["hash"]["md5"] + ) + self.event["attachments"]["totalsize"] += attachment["size"] + attachments.append( + { + "name": attachment["filename"], + "content-type": attachment["content_header"][ + "content-type" + ][0], + "raw": base64.b64decode(attachment["raw"]), + } + ) - # Header - # Collect email header information - try: - self.event["subject"] = parsed_eml["header"]["subject"] - self.event["to"] = parsed_eml["header"]["to"] - self.event["from"] = parsed_eml["header"]["from"] + # Extract email header information + self.event["subject"] = parsed_eml["header"].get("subject", "") + self.event["to"] = parsed_eml["header"].get("to", "") + self.event["from"] = parsed_eml["header"].get("from", "") + date_header = parsed_eml["header"].get("date") + if date_header: self.event["date_utc"] = ( - parsed_eml["header"]["date"].astimezone(pytz.utc).isoformat()[:-6] - + ".000Z" + date_header.astimezone(pytz.utc).isoformat()[:-6] + ".000Z" ) - self.event["message_id"] = str( - parsed_eml["header"]["header"]["message-id"][0] - .lstrip("<") - .rstrip(">") - ) - if "received_domain" in parsed_eml["header"]: - self.event["received_domain"] = parsed_eml["header"][ - "received_domain" - ] - if "received_ip" in parsed_eml["header"]: - self.event["received_ip"] = parsed_eml["header"]["received_ip"] - except strelka.ScannerTimeout: - raise - except Exception as e: - self.flags.append( - f"{self.__class__.__name__}: email_parse_header_error: {str(e)[:50]}" - ) - - # If attachments were found, submit back into pipeline - try: - if attachments: - for attachment in attachments: - self.event["total"]["attachments"] += 1 - - name = attachment["name"] - try: - flavors = [ - attachment["content-type"] - .encode("utf-8") - .partition(b";")[0] - ] - except Exception as e: - self.flags.append( - f"{self.__class__.__name__}: email_extract_attachment_error: {str(e)[:50]}" - ) - - # Send extracted file back to Strelka - self.emit_file(attachment["raw"], name=name, flavors=flavors) - - self.event["total"]["extracted"] += 1 - except strelka.ScannerTimeout: - raise - except Exception as e: - self.flags.append( - f"{self.__class__.__name__}: email_extract_attachment_error: {str(e)[:50]}" - ) - - except AssertionError: - self.flags.append(f"{self.__class__.__name__}: email_assertion_error") + header = parsed_eml.get("header", {}).get("header", {}) + message_id = header.get("message-id", [])[0] if header else None + self.event["message_id"] = ( + str(message_id.lstrip("<").rstrip(">")) if message_id else "" + ) + self.event["received_domain"] = parsed_eml["header"].get( + "received_domain", [] + ) + self.event["received_ip"] = parsed_eml["header"].get("received_ip", []) + + # Process attachments + if attachments: + for attachment in attachments: + self.event["total"]["attachments"] += 1 + name = attachment["name"] + try: + flavors = [ + attachment["content-type"] + .encode("utf-8") + .partition(b";")[0] + ] + except Exception as e: + self.flags.append( + f"{self.__class__.__name__}: email_extract_attachment_error: {str(e)[:50]}" + ) + # Send extracted file back to Strelka + self.emit_file(attachment["raw"], name=name, flavors=flavors) + self.event["total"]["extracted"] += 1 + + except Exception as e: + self.flags.append( + f"{self.__class__.__name__}: email_parse_error: {str(e)[:50]}" + ) def create_email_thumbnail(self, data, show_header): """ diff --git a/src/python/strelka/tests/fixtures/test_broken.eml b/src/python/strelka/tests/fixtures/test_broken.eml new file mode 100644 index 00000000..d065450f --- /dev/null +++ b/src/python/strelka/tests/fixtures/test_broken.eml @@ -0,0 +1,33 @@ +Hi Placeholder, + +Can I have access? + +Thanks, +John + + +From: Placeholder Smith > +Date: Thursday, March 28, 2024 at 1:45 PM +To: "Jane.Doe" +Subject: Fwd: [EXTERNAL] Folder shared with you: "Strelka Details" + +Begin forwarded message: +From: "Placeholder Smith (via Acme Share)" +Date: March 27, 2024 at 6:47:31 PM EST +To: "Jane.Doe" +Cc: "John.Doe" +Subject: [EXTERNAL] Folder shared with you: "Strelka Details" +Reply-To: Placeholder Smith (placeholder@acme.com + +Placeholder shared a folder +Placeholder Smith (placeholder@acme.com) added you as an editor. Verify your email to securely start contributing to this folder. You will need to verify your email every 7 days. +Hello, attached is the shared folder. +Best, +Placeholder + +Open + +Use is subject to the Google Privacy Policy. + +ACME LLC, 123 Fake Street, USA +You have received this email because placeholder@acme.com shared a file or folder located in Acme Share with you. Delete visitor session diff --git a/src/python/strelka/tests/test_scan_email.py b/src/python/strelka/tests/test_scan_email.py index ce026617..10e3be1d 100644 --- a/src/python/strelka/tests/test_scan_email.py +++ b/src/python/strelka/tests/test_scan_email.py @@ -142,7 +142,11 @@ def test_scan_email_with_thumbnail(mocker): "2002:a05:6500:11d0:b0:17b:2a20:6c32", ] ), - "base64_thumbnail": "", + "base64_thumbnail": "UklGRnJAAABXRUJQVlA4IGZAAAAQBwGdASqCAfQBPxF+tVOsKCSkKfxJuYAiCWlu/GwZq+tQzPE/4V/qf7/4s" + "+XXkN+//s04j7Vv7X8/vNb//+Bfyy//f3p9wXz36Vj4TrDQF+l/7798/UM/O81/3Xmu/1n7feUJ9y/3H7ifAT" + "/Vf896av3l6S/4T/4fvp8CH+K/7/7/ldkkCIGW+8Nid8kgRAy33hsTvkkCIGW+8Nid8kgRAy33hsTvkkCIGW" + "+8Nid8kfIoTYHLouMkva2/n6FHGZMLvDYnfJIEQMsbA0RoornnZFBRZqselPM/Ve" + "", } scanner_event = run_test_scan( @@ -156,3 +160,41 @@ def test_scan_email_with_thumbnail(mocker): TestCase.maxDiff = None TestCase().assertDictEqual(test_scan_event, scanner_event) + + +def test_scan_email_incomplete(mocker): + """ + Pass: Sample event matches output of scanner. + Failure: Unable to load file or sample event fails to match. + """ + + test_scan_event = { + "elapsed": mock.ANY, + "flags": [ + "ScanEmail: image_thumbnail_error: Could not generate thumbnail. No HTML found." + ], + "total": {"attachments": 0, "extracted": 0}, + "body": "Hi Placeholder,\n\nCan I have access?\n\nThanks,\nJohn\n\n\nFrom: Placeholder Smith " + " shared a file or folder located in Acme Share with you. Delete visitor " + "session\n", + "domains": ["acme.com", "share.acme.com"], + "subject": "", + "to": [], + "from": "", + "date_utc": "1970-01-01T00:00:00.000Z", + "message_id": "", + "received_domain": [], + "received_ip": [], + } + + scanner_event = run_test_scan( + mocker=mocker, + scan_class=ScanUnderTest, + fixture_path=Path(__file__).parent / "fixtures/test_broken.eml", + options={ + "create_thumbnail": True, + }, + ) + + TestCase.maxDiff = None + TestCase().assertDictEqual(test_scan_event, scanner_event) From 99fefeb24a5f6ac0a5d283b36e4b8d83637d5551 Mon Sep 17 00:00:00 2001 From: Paul Hutelmyer Date: Tue, 2 Apr 2024 10:50:56 -0400 Subject: [PATCH 2/5] Updating tastes --- configs/python/backend/backend.yaml | 3 ++- configs/python/backend/taste/taste.yara | 17 +++++++++++++++++ src/python/strelka/tests/test_scan_email.py | 11 ++++++----- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/configs/python/backend/backend.yaml b/configs/python/backend/backend.yaml index f8bb6847..92400677 100644 --- a/configs/python/backend/backend.yaml +++ b/configs/python/backend/backend.yaml @@ -1,4 +1,4 @@ -version: 2024.02.01.01 +version: 2024.04.02.01 logging_cfg: '/etc/strelka/logging.yaml' limits: max_files: 5000 @@ -107,6 +107,7 @@ scanners: - 'application/vnd.ms-outlook' - 'message/rfc822' - 'email_file' + - 'email_file_broad' priority: 5 options: create_thumbnail: True diff --git a/configs/python/backend/taste/taste.yara b/configs/python/backend/taste/taste.yara index baa5047e..5064d82d 100644 --- a/configs/python/backend/taste/taste.yara +++ b/configs/python/backend/taste/taste.yara @@ -464,6 +464,23 @@ rule email_file { $e in (0..2048) } +rule email_file_broad +{ + meta: + type = "email" + strings: + $ = "Received: " + $ = "Origin-messageId: " + $ = "Return-Path: " + $ = "From: " + $ = "To: " + $ = "Subject: " + $ = "Date: " + condition: + magic.mime_type() == "message/rfc822" or + all of them +} + rule tnef_file { meta: description = "Transport Neutral Encapsulation Format" diff --git a/src/python/strelka/tests/test_scan_email.py b/src/python/strelka/tests/test_scan_email.py index 10e3be1d..b3ddfab1 100644 --- a/src/python/strelka/tests/test_scan_email.py +++ b/src/python/strelka/tests/test_scan_email.py @@ -142,11 +142,12 @@ def test_scan_email_with_thumbnail(mocker): "2002:a05:6500:11d0:b0:17b:2a20:6c32", ] ), - "base64_thumbnail": "UklGRnJAAABXRUJQVlA4IGZAAAAQBwGdASqCAfQBPxF+tVOsKCSkKfxJuYAiCWlu/GwZq+tQzPE/4V/qf7/4s" - "+XXkN+//s04j7Vv7X8/vNb//+Bfyy//f3p9wXz36Vj4TrDQF+l/7798/UM/O81/3Xmu/1n7feUJ9y/3H7ifAT" - "/Vf896av3l6S/4T/4fvp8CH+K/7/7/ldkkCIGW+8Nid8kgRAy33hsTvkkCIGW+8Nid8kgRAy33hsTvkkCIGW" - "+8Nid8kfIoTYHLouMkva2/n6FHGZMLvDYnfJIEQMsbA0RoornnZFBRZqselPM/Ve" - "", + "base64_thumbnail": "UklGRgQ+AABXRUJQVlA4IPg9AADw/ACdASqCAfQBPxF+tFQsKCUjKfw5WYAiCWlu/F+5jutQzvEJ4w" + "/pvBHyNfRf3X/I////kchr3//S82P6//If/nrE/rO8n56f/3qBfs3789Kx771c5++wR5A/en1Jfx/Nn928trzn" + "/1n7feTf9p/2H7afAP/T/9H6bH3p6Uf4T/6/vv8C/+b/9n75FL7nZ5mu4gd9zs8zXcQO" + "+52eZruIHfc7PM13EDvudnma7iB33OzzNdxA77nZ3+e5IriyfRIt/vg3" + "+pSaeYt4QBPM13EDvuS8U1MkohIJ9SAbDbgdtEaNdcub66DxvHIoijT9sn+Ue" + "", } scanner_event = run_test_scan( From 88abc625dea321ebb1152d526cab0424385ae8fe Mon Sep 17 00:00:00 2001 From: Paul Hutelmyer Date: Tue, 2 Apr 2024 10:53:43 -0400 Subject: [PATCH 3/5] Update test_scan_email.py --- src/python/strelka/tests/test_scan_email.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/python/strelka/tests/test_scan_email.py b/src/python/strelka/tests/test_scan_email.py index b3ddfab1..1dc145b3 100644 --- a/src/python/strelka/tests/test_scan_email.py +++ b/src/python/strelka/tests/test_scan_email.py @@ -143,11 +143,11 @@ def test_scan_email_with_thumbnail(mocker): ] ), "base64_thumbnail": "UklGRgQ+AABXRUJQVlA4IPg9AADw/ACdASqCAfQBPxF+tFQsKCUjKfw5WYAiCWlu/F+5jutQzvEJ4w" - "/pvBHyNfRf3X/I////kchr3//S82P6//If/nrE/rO8n56f/3qBfs3789Kx771c5++wR5A/en1Jfx/Nn928trzn" - "/1n7feTf9p/2H7afAP/T/9H6bH3p6Uf4T/6/vv8C/+b/9n75FL7nZ5mu4gd9zs8zXcQO" - "+52eZruIHfc7PM13EDvudnma7iB33OzzNdxA77nZ3+e5IriyfRIt/vg3" - "+pSaeYt4QBPM13EDvuS8U1MkohIJ9SAbDbgdtEaNdcub66DxvHIoijT9sn+Ue" - "", + "/pvBHyNfRf3X/I////kchr3//S82P6//If/nrE/rO8n56f/3qBfs3789Kx771c5++wR5A/en1Jfx/Nn928trzn" + "/1n7feTf9p/2H7afAP/T/9H6bH3p6Uf4T/6/vv8C/+b/9n75FL7nZ5mu4gd9zs8zXcQO" + "+52eZruIHfc7PM13EDvudnma7iB33OzzNdxA77nZ3+e5IriyfRIt/vg3" + "+pSaeYt4QBPM13EDvuS8U1MkohIJ9SAbDbgdtEaNdcub66DxvHIoijT9sn+Ue" + "", } scanner_event = run_test_scan( From 141478689717a32cc1e9abbd7c64be7b4f0516c7 Mon Sep 17 00:00:00 2001 From: Sara Kalupa Date: Thu, 18 Apr 2024 13:48:21 -0500 Subject: [PATCH 4/5] Adding in ScanJNLP --- docs/README.md | 3 +- src/python/strelka/scanners/scan_jnlp.py | 106 ++++++++++++++++++++ src/python/strelka/tests/fixtures/test.jnlp | 20 ++++ src/python/strelka/tests/test_scan_jnlp.py | 26 +++++ 4 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 src/python/strelka/scanners/scan_jnlp.py create mode 100644 src/python/strelka/tests/fixtures/test.jnlp create mode 100644 src/python/strelka/tests/test_scan_jnlp.py diff --git a/docs/README.md b/docs/README.md index 73d36e2f..4a1998c3 100644 --- a/docs/README.md +++ b/docs/README.md @@ -798,7 +798,8 @@ The table below describes each scanner and its options. Each scanner has the hid | ScanIso | Collects and extracts files from ISO files | `limit` -- maximum number of files to extract (defaults to `0`) | | ScanJarManifest | Collects metadata from JAR manifest files | N/A | | ScanJavascript | Collects metadata from Javascript files | `beautify` -- beautifies JavaScript before parsing (defaults to `True`) | -| ScanJpeg | Extracts data embedded in JPEG files | N/A | +| ScanJpeg | Extracts data embedded in JPEG files | N/A +| ScanJnlp | Identifies JNLP files that reference external HTTP resources, particularly those not associated with trusted domains | N/A | Ryan Borre, [Paul Hutelmyer](https://github.com/phutelmyer) | | ScanJson | Collects keys from JSON files | N/A | | ScanLibarchive | Extracts files from libarchive-compatible archives. | `limit` -- maximum number of files to extract (defaults to `1000`) | | ScanLnk | Collects metadata from lnk files. | N/A | Ryan Borre, [DerekT2](https://github.com/Derekt2), [Nathan Icart](https://github.com/nateicart) diff --git a/src/python/strelka/scanners/scan_jnlp.py b/src/python/strelka/scanners/scan_jnlp.py new file mode 100644 index 00000000..6c365ab1 --- /dev/null +++ b/src/python/strelka/scanners/scan_jnlp.py @@ -0,0 +1,106 @@ +from io import BytesIO + +from lxml import etree + +from strelka import strelka + + +class ScanJnlp(strelka.Scanner): + """ + Analyzes Java Network Launch Protocol (JNLP) files. + + JNLP files, used by Java Web Start technology, can launch Java applications from a web browser. While facilitating + legitimate applications, they can also be abused for malicious purposes such as distributing malware or executing + phishing attacks. + + Scanner Type: Collection + + Attributes: + event (dict): Stores extracted data during the scan for further analysis. + + Detection Use Cases: + - **External Resource Reference** + - Identify JNLP files that reference external HTTP resources, particularly those not associated with trusted + domains. + + Known Limitations: + - **Java Dependence** + - Effectiveness is contingent on the presence and version of Java installed on the target system. + + Todo: + - Improve detection of obfuscated or sophisticated threats within JNLP files. + - Extract any other potential JNLP content / headers. + + References: + - **File Structure** + - https://docs.oracle.com/javase/tutorial/deployment/deploymentInDepth/jnlpFileSyntax.html + - **Malicious Usage** + - https://www.forcepoint.com/blog/x-labs/java-network-launch-protocol + - https://newtonpaul.com/analysing-fileless-malware-cobalt-strike-beacon + """ + + def scan(self, data, file, options, expire_at): + """ + Scans the given data for JNLP-related information. + + Extracts 'codebase' and 'href' attributes from JNLP and JAR tags to detect potential malicious activities. + + Args: + data (bytes): Data of the file being scanned. + file (File): File object being scanned. + options (dict): Options for the scanner. + expire_at (datetime): Expiration time of the scan result. + """ + # Initialize variables for 'codebase' and 'href' attributes + codebase = "" + href = "" + + # Parse the XML to find 'jnlp' and 'jar' elements + for elem, _ in iterate_xml_elements(data, tags=["jnlp", "jar"]): + if elem.tag == "jnlp": + codebase = elem.get("codebase", "").rstrip("/") + elif elem.tag == "jar": + href = elem.get("href", "").lstrip("/") + + # If both 'codebase' and 'href' are found, construct the full resource URL + if codebase and href: + self.event["resource"] = f"{codebase}/{href}" + + +def iterate_xml_elements(data, tags=None): + """ + Iterates over XML data, yielding elements with specified tags. + + This method parses the XML data byte by byte and yields elements that match the specified tags. This is useful + for extracting specific information from structured XML documents. + + Args: + data (bytes): The XML data to parse. + tags (list): List of XML tags to filter elements by. + + Yields: + tuple: A tuple containing the XML element and its depth in the XML tree. + """ + # Define the events to listen for during XML parsing + events = ("start", "end") + depth = 0 + inside_tags = [] + + # Parse the XML data + for event, elem in etree.iterparse(BytesIO(data), events=events): + if event == "start": + # If the element's tag is one we're interested in, track it and its depth + if tags is None or elem.tag in tags: + inside_tags.append((elem.tag, depth)) + depth += 1 + elif event == "end": + # On end tag, reduce depth and check if the closing tag is one we're tracking + depth -= 1 + if depth < 0: + continue + + # Check if the current element should be yielded + is_wanted = tags is None or elem.tag in tags + if is_wanted and inside_tags and inside_tags[-1][0] == elem.tag: + inside_tags.pop() + yield elem, depth diff --git a/src/python/strelka/tests/fixtures/test.jnlp b/src/python/strelka/tests/fixtures/test.jnlp new file mode 100644 index 00000000..13969e77 --- /dev/null +++ b/src/python/strelka/tests/fixtures/test.jnlp @@ -0,0 +1,20 @@ + + + + SECURE DOCUMENT VIEWER + Microsoft + + Secure document viewer app + + + + + + + + + + + +1234abcdeF56789 + \ No newline at end of file diff --git a/src/python/strelka/tests/test_scan_jnlp.py b/src/python/strelka/tests/test_scan_jnlp.py new file mode 100644 index 00000000..c6c14bb4 --- /dev/null +++ b/src/python/strelka/tests/test_scan_jnlp.py @@ -0,0 +1,26 @@ +from pathlib import Path +from unittest import TestCase, mock + +from strelka.scanners.scan_jnlp import ScanJnlp as ScanUnderTest +from strelka.tests import run_test_scan + + +def test_scan_jnlp(mocker): + """ + Pass: Sample event matches output of scanner. + Failure: Unable to load file or sample event fails to match. + """ + test_scan_event = { + "elapsed": mock.ANY, + "flags": [], + "resource": "https://example.com/uplib.jar", + } + + scanner_event = run_test_scan( + mocker=mocker, + scan_class=ScanUnderTest, + fixture_path=Path(__file__).parent / "fixtures/test.jnlp", + ) + + TestCase.maxDiff = None + TestCase().assertDictEqual(test_scan_event, scanner_event) From a8d847aeebf4dab7ac277716bc47ccf54b9e9675 Mon Sep 17 00:00:00 2001 From: Sara Kalupa Date: Thu, 18 Apr 2024 14:03:00 -0500 Subject: [PATCH 5/5] Enabling ScanJnlp in backend.yaml --- configs/python/backend/backend.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/configs/python/backend/backend.yaml b/configs/python/backend/backend.yaml index f8bb6847..b48ed40f 100644 --- a/configs/python/backend/backend.yaml +++ b/configs/python/backend/backend.yaml @@ -272,6 +272,11 @@ scanners: - 'application/json' - 'json_file' priority: 5 + 'ScanJnlp': + - positive: + flavors: + - "jnlp_file" + priority: 5 'ScanLibarchive': - positive: flavors: