Skip to content

Commit

Permalink
Merge pull request #454 from target/ScanJNLP
Browse files Browse the repository at this point in the history
Adding in ScanJNLP
  • Loading branch information
phutelmyer authored Apr 19, 2024
2 parents e499d29 + a8d847a commit c621fe8
Show file tree
Hide file tree
Showing 5 changed files with 159 additions and 1 deletion.
5 changes: 5 additions & 0 deletions configs/python/backend/backend.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,11 @@ scanners:
- 'application/json'
- 'json_file'
priority: 5
'ScanJnlp':
- positive:
flavors:
- "jnlp_file"
priority: 5
'ScanLibarchive':
- positive:
flavors:
Expand Down
3 changes: 2 additions & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,8 @@ The table below describes each scanner and its options. Each scanner has the hid
| ScanIso | Collects and extracts files from ISO files | `limit` -- maximum number of files to extract (defaults to `0`) |
| ScanJarManifest | Collects metadata from JAR manifest files | N/A |
| ScanJavascript | Collects metadata from Javascript files | `beautify` -- beautifies JavaScript before parsing (defaults to `True`) |
| ScanJpeg | Extracts data embedded in JPEG files | N/A |
| ScanJpeg | Extracts data embedded in JPEG files | N/A
| ScanJnlp | Identifies JNLP files that reference external HTTP resources, particularly those not associated with trusted domains | N/A | Ryan Borre, [Paul Hutelmyer](https://github.com/phutelmyer) |
| ScanJson | Collects keys from JSON files | N/A |
| ScanLibarchive | Extracts files from libarchive-compatible archives. | `limit` -- maximum number of files to extract (defaults to `1000`) |
| ScanLnk | Collects metadata from lnk files. | N/A | Ryan Borre, [DerekT2](https://github.com/Derekt2), [Nathan Icart](https://github.com/nateicart)
Expand Down
106 changes: 106 additions & 0 deletions src/python/strelka/scanners/scan_jnlp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
from io import BytesIO

from lxml import etree

from strelka import strelka


class ScanJnlp(strelka.Scanner):
"""
Analyzes Java Network Launch Protocol (JNLP) files.
JNLP files, used by Java Web Start technology, can launch Java applications from a web browser. While facilitating
legitimate applications, they can also be abused for malicious purposes such as distributing malware or executing
phishing attacks.
Scanner Type: Collection
Attributes:
event (dict): Stores extracted data during the scan for further analysis.
Detection Use Cases:
- **External Resource Reference**
- Identify JNLP files that reference external HTTP resources, particularly those not associated with trusted
domains.
Known Limitations:
- **Java Dependence**
- Effectiveness is contingent on the presence and version of Java installed on the target system.
Todo:
- Improve detection of obfuscated or sophisticated threats within JNLP files.
- Extract any other potential JNLP content / headers.
References:
- **File Structure**
- https://docs.oracle.com/javase/tutorial/deployment/deploymentInDepth/jnlpFileSyntax.html
- **Malicious Usage**
- https://www.forcepoint.com/blog/x-labs/java-network-launch-protocol
- https://newtonpaul.com/analysing-fileless-malware-cobalt-strike-beacon
"""

def scan(self, data, file, options, expire_at):
"""
Scans the given data for JNLP-related information.
Extracts 'codebase' and 'href' attributes from JNLP and JAR tags to detect potential malicious activities.
Args:
data (bytes): Data of the file being scanned.
file (File): File object being scanned.
options (dict): Options for the scanner.
expire_at (datetime): Expiration time of the scan result.
"""
# Initialize variables for 'codebase' and 'href' attributes
codebase = ""
href = ""

# Parse the XML to find 'jnlp' and 'jar' elements
for elem, _ in iterate_xml_elements(data, tags=["jnlp", "jar"]):
if elem.tag == "jnlp":
codebase = elem.get("codebase", "").rstrip("/")
elif elem.tag == "jar":
href = elem.get("href", "").lstrip("/")

# If both 'codebase' and 'href' are found, construct the full resource URL
if codebase and href:
self.event["resource"] = f"{codebase}/{href}"


def iterate_xml_elements(data, tags=None):
"""
Iterates over XML data, yielding elements with specified tags.
This method parses the XML data byte by byte and yields elements that match the specified tags. This is useful
for extracting specific information from structured XML documents.
Args:
data (bytes): The XML data to parse.
tags (list): List of XML tags to filter elements by.
Yields:
tuple: A tuple containing the XML element and its depth in the XML tree.
"""
# Define the events to listen for during XML parsing
events = ("start", "end")
depth = 0
inside_tags = []

# Parse the XML data
for event, elem in etree.iterparse(BytesIO(data), events=events):
if event == "start":
# If the element's tag is one we're interested in, track it and its depth
if tags is None or elem.tag in tags:
inside_tags.append((elem.tag, depth))
depth += 1
elif event == "end":
# On end tag, reduce depth and check if the closing tag is one we're tracking
depth -= 1
if depth < 0:
continue

# Check if the current element should be yielded
is_wanted = tags is None or elem.tag in tags
if is_wanted and inside_tags and inside_tags[-1][0] == elem.tag:
inside_tags.pop()
yield elem, depth
20 changes: 20 additions & 0 deletions src/python/strelka/tests/fixtures/test.jnlp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="utf-8"?>
<jnlp spec="1.0+" codebase="https://example.com/" href="file-1.jnlp">
<information>
<title>SECURE DOCUMENT VIEWER</title>
<vendor>Microsoft</vendor>
<homepage href="https://microsoft.com"/>
<description>Secure document viewer app</description>
</information>
<security>
<all-permissions/>
</security>
<resources>
<j2se version="1.6+" />
<jar href="uplib.jar" />
</resources>
<application-desc main-class="Viewer">
</application-desc>

1234abcdeF56789
</jnlp>
26 changes: 26 additions & 0 deletions src/python/strelka/tests/test_scan_jnlp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from pathlib import Path
from unittest import TestCase, mock

from strelka.scanners.scan_jnlp import ScanJnlp as ScanUnderTest
from strelka.tests import run_test_scan


def test_scan_jnlp(mocker):
"""
Pass: Sample event matches output of scanner.
Failure: Unable to load file or sample event fails to match.
"""
test_scan_event = {
"elapsed": mock.ANY,
"flags": [],
"resource": "https://example.com/uplib.jar",
}

scanner_event = run_test_scan(
mocker=mocker,
scan_class=ScanUnderTest,
fixture_path=Path(__file__).parent / "fixtures/test.jnlp",
)

TestCase.maxDiff = None
TestCase().assertDictEqual(test_scan_event, scanner_event)

0 comments on commit c621fe8

Please sign in to comment.