Merge pull request #431 from target/email-js-pdf-update-01292024

Enhancements: JNLP Signature, ScanEmail Preview Image, IOC Support, and Dependency Optimization
target · Jan 29, 2024 · b37e761 · b37e761
2 parents 7c632da + 78c46f2
commit b37e761
Showing 12 changed files with 1,529 additions and 1,388 deletions.
diff --git a/configs/python/backend/backend.yaml b/configs/python/backend/backend.yaml
@@ -108,6 +108,10 @@ scanners:
           - 'message/rfc822'
           - 'email_file'
       priority: 5
+      options:
+        create_thumbnail: True
+        thumbnail_header: False
+        thumbnail_size: [ 500, 500 ]
   'ScanEncryptedDoc':
     - positive:
         flavors:

diff --git a/configs/python/backend/taste/taste.yara b/configs/python/backend/taste/taste.yara
@@ -773,6 +773,19 @@ rule batch_file {
         $a at 0
 }
 
+rule jnlp_file {
+    meta:
+        description = "Detect JNLP (Java Network Launch Protocol) files"
+        author = "Paul Hutelmyer"
+        reference = "https://docs.oracle.com/javase/tutorial/deployment/webstart/deploying.html"
+        type = "script"
+    strings:
+        $jnlp_header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" nocase
+        $jnlp_tag = "<jnlp" nocase
+    condition:
+        $jnlp_header at 0 and $jnlp_tag
+}
+
 rule javascript_file {
     meta:
         type = "script"

diff --git a/docs/README.md b/docs/README.md
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "strelka-worker"
-version = "0.23.10.19"
+version = "0.24.01.19"
 description = "Strelka's backend Python worker"
 authors = [
     "Paul Hutelmyer <[email protected]>",
@@ -16,13 +16,13 @@ boltons = "23.0.0"
 boto3 = "1.28.60"
 construct = "2.10.68"
 cryptography = "41.0.6"
+dncil = "1.0.2"
+dnfile = "0.14.1"
 docker = "6.1.3"
+dotnetfile = "0.2.4"
 eml-parser = "1.17.5"
 esprima = "4.0.1"
-flare-capa = "6.1.0"
 formulas = "1.2.6"
-grpcio-tools = "1.59.0"
-grpcio = "1.59.0"
 html5lib = "1.1"
 inflection = "0.5.1"
 jsbeautifier = "1.14.9"
@@ -44,12 +44,12 @@ opentelemetry-exporter-otlp-proto-grpc = "1.17.0"
 opentelemetry-exporter-otlp-proto-http = "1.17.0"
 opentelemetry-sdk = "1.17.0"
 pefile = "2023.2.7"
-pillow-avif-plugin = "1.4.1"
-pillow-heif = "^0.13.1"
+pillow-avif-plugin = "1.4.2"
+pillow-heif = "^0.14.0"
 pgpdump3 = "1.5.2"
 py-tlsh = "4.7.2"
 pycdlib = "1.14.0"
-pycryptodomex = "3.18.0"
+pycryptodomex = "3.20.0"
 pyelftools = "0.29"
 pygments = "2.15.0"
 pylzma = "0.5.0"
@@ -75,6 +75,7 @@ ssdeep = "3.4"
 tldextract = "3.4.0"
 tnefparse = "1.4.0"
 validators = "0.20.0"
+weasyprint = "60.2"
 xlrd2 = "1.3.4"
 xlrd = "2.0.1"
 xmltodict = "0.13.0"

diff --git a/src/python/strelka/scanners/scan_email.py b/src/python/strelka/scanners/scan_email.py
@@ -1,18 +1,82 @@
 import base64
+import email
+import email.header
+import hashlib
+import io
+import logging
+import os
+import tempfile
 
 import eml_parser
+import fitz  # PyMuPDF
 import pytz
+from PIL import Image
+from weasyprint import HTML
 
 from strelka import strelka
 
+# Configure logging to suppress warnings for WeasyPrint and informational messages for fontTools
+weasyprint_logger = logging.getLogger("weasyprint")
+weasyprint_logger.setLevel(logging.ERROR)
+
+fonttools_logger = logging.getLogger("fontTools.subset")
+fonttools_logger.setLevel(logging.WARNING)
+
 
 class ScanEmail(strelka.Scanner):
-    """Collects metadata and extract files from email messages."""
+    """
+    Scanner that collects metadata, extracts files from email messages, and generates thumbnails.
+
+    This scanner processes email files to extract metadata, attachments, and generates
+    thumbnail images of the email content for a visual overview. It handles both plain text and HTML emails,
+    including inline images.
+    """
 
     def scan(self, data, file, options, expire_at):
+        """
+        Processes the email, extracts metadata and attachments, and optionally generates a thumbnail.
+
+        Args:
+            data: The raw email data.
+            file: File details.
+            options: Scanner options including thumbnail creation and size.
+            expire_at: Expiry time of the scan.
+        """
+        # Initialize data structures for storing scan results
         attachments = []
         self.event["total"] = {"attachments": 0, "extracted": 0}
 
+        # Thumbnail creation based on user option
+        create_thumbnail = options.get("create_thumbnail", False)
+        thumbnail_header = options.get("thumbnail_header", False)
+        thumbnail_size = options.get("thumbnail_size", (500, 500))
+
+        # ----------------
+        # Thumbnail
+        # ----------------
+        # Create a thumbnail from the image.
+        # Stores as a base64 value in the key: base64_thumbnail
+        if create_thumbnail:
+            try:
+                image = self.create_email_thumbnail(data, thumbnail_header)
+                if image:
+                    image.thumbnail(thumbnail_size, Image.Resampling.BILINEAR)
+                    buffered = io.BytesIO()
+                    image.save(buffered, format="WEBP", quality=30, optimize=True)
+                    base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
+                    self.event["base64_thumbnail"] = base64_image
+                else:
+                    self.flags.append(
+                        f"{self.__class__.__name__}: image_thumbnail_error: Could not generate thumbnail."
+                    )
+            except Exception as e:
+                self.flags.append(
+                    f"{self.__class__.__name__}: image_thumbnail_error: {str(e)[:50]}"
+                )
+
+        # ----------------
+        # Parse Email Contents
+        # -------------------
         try:
             # Open and parse email byte string
             # If fail to open, return.
@@ -23,9 +87,10 @@ def scan(self, data, file, options, expire_at):
                 parsed_eml = ep.decode_email_bytes(data)
             except strelka.ScannerTimeout:
                 raise
-            except Exception:
-                self.flags.append("parse_load_error")
-                return
+            except Exception as e:
+                self.flags.append(
+                    f"{self.__class__.__name__}: email_parse_error: {str(e)[:50]}"
+                )
 
             # Check if email was parsed properly and attempt to deconflict and reload.
             # If fail to reparse, return.
@@ -47,13 +112,16 @@ def scan(self, data, file, options, expire_at):
                         parsed_eml["header"]["subject"]
                         and parsed_eml["header"]["header"]
                     ):
-                        self.flags.append("parse_manual_email_error")
+                        self.flags.append(
+                            f"{self.__class__.__name__}: email_parse_error"
+                        )
                         return
             except strelka.ScannerTimeout:
                 raise
-            except Exception:
-                self.flags.append("parse_manual_email_error")
-                return
+            except Exception as e:
+                self.flags.append(
+                    f"{self.__class__.__name__}: email_parse_error: {str(e)[:50]}"
+                )
 
             # Body
             # If body exists in email, collect partial message contents and domains
@@ -81,8 +149,10 @@ def scan(self, data, file, options, expire_at):
                                 self.event["domains"] = body["domain"]
             except strelka.ScannerTimeout:
                 raise
-            except Exception:
-                self.flags.append("parse_body_error")
+            except Exception as e:
+                self.flags.append(
+                    f"{self.__class__.__name__}: email_parse_body_error: {str(e)[:50]}"
+                )
 
             # Attachments
             # If attachments exist in email, collect attachment details and raw data to be resubmitted to pipeline.
@@ -111,8 +181,10 @@ def scan(self, data, file, options, expire_at):
                         )
             except strelka.ScannerTimeout:
                 raise
-            except Exception:
-                self.flags.append("parse_attachment_error")
+            except Exception as e:
+                self.flags.append(
+                    f"{self.__class__.__name__}: email_parse_attachment_error: {str(e)[:50]}"
+                )
 
             # Header
             # Collect email header information
@@ -137,8 +209,10 @@ def scan(self, data, file, options, expire_at):
                     self.event["received_ip"] = parsed_eml["header"]["received_ip"]
             except strelka.ScannerTimeout:
                 raise
-            except Exception:
-                self.flags.append("parse_header_error")
+            except Exception as e:
+                self.flags.append(
+                    f"{self.__class__.__name__}: email_parse_header_error: {str(e)[:50]}"
+                )
 
             # If attachments were found, submit back into pipeline
             try:
@@ -153,18 +227,212 @@ def scan(self, data, file, options, expire_at):
                                 .encode("utf-8")
                                 .partition(b";")[0]
                             ]
-                        except Exception:
-                            flavors = []
-                            self.flags.append("content_type_error")
+                        except Exception as e:
+                            self.flags.append(
+                                f"{self.__class__.__name__}: email_extract_attachment_error: {str(e)[:50]}"
+                            )
 
                         # Send extracted file back to Strelka
                         self.emit_file(attachment["raw"], name=name, flavors=flavors)
 
                         self.event["total"]["extracted"] += 1
             except strelka.ScannerTimeout:
                 raise
-            except Exception:
-                self.flags.append("extract_attachment_error")
+            except Exception as e:
+                self.flags.append(
+                    f"{self.__class__.__name__}: email_extract_attachment_error: {str(e)[:50]}"
+                )
 
         except AssertionError:
-            self.flags.append("assertion_error")
+            self.flags.append(f"{self.__class__.__name__}: email_assertion_error")
+
+    def create_email_thumbnail(self, data, show_header):
+        """
+        Generates a thumbnail image from the content of an email message.
+
+        This function processes the email to extract images and text, combines them into
+        a single image, and returns that image.
+
+        Args:
+            show_header: Whether to show the header details in the output.
+            data: Raw email data.
+
+        Returns:
+            A PIL Image object representing the combined thumbnail image of the email.
+            None if no images could be created.
+        """
+        # Supported image types for extraction from the email
+        image_types = [
+            "image/gif",
+            "image/jpeg",
+            "image/png",
+            "image/jpg",
+            "image/bmp",
+            "image/ico",
+            "image/svg",
+            "image/web",
+        ]
+
+        # Dictionary to map content IDs to images
+        images_dict = {}
+
+        # Create a temporary directory to store generated images
+        with tempfile.TemporaryDirectory() as temp_dir:
+            # Parse the email data
+            msg = email.message_from_bytes(data)
+
+            # List to store paths of generated images
+            images_list = []
+
+            # Extract and format header details from the email
+            if show_header:
+                header_fields = ["Date", "From", "To", "Subject", "Message-Id"]
+                header_values = {
+                    field: self.decode_and_format_header(msg, field)
+                    for field in header_fields
+                }
+
+                # Generate an HTML table from the header values
+                headers_html = '<table width="100%">\n'
+                for field, value in header_values.items():
+                    headers_html += f'  <tr><td align="right"><b>{field}:</b></td><td>{value}</td></tr>\n'
+                headers_html += "</table>\n<hr></p>\n"
+
+                # Convert HTML header details to an image
+                header_image_path = self.html_to_image(headers_html, temp_dir)
+                if header_image_path:
+                    images_list.append(header_image_path)
+
+            # Process the MIME parts to extract images
+            for part in msg.walk():
+                if part.is_multipart():
+                    continue
+
+                mime_type = part.get_content_type()
+                if mime_type in image_types:
+                    # Extract image data and create a base64 encoded version
+                    content_id = part.get("Content-ID", "").strip("<>")
+                    image_data = part.get_payload(decode=True)
+                    img_data_base64 = base64.b64encode(image_data).decode("utf-8")
+                    images_dict[content_id] = img_data_base64
+
+            # Process HTML body parts and replace CID references with base64 data
+            for part in msg.walk():
+                if part.get_content_type() == "text/html":
+                    payload = part.get_payload(decode=True).decode("utf-8")
+                    for cid, img_data in images_dict.items():
+                        payload = payload.replace(
+                            f"cid:{cid}", f"data:image/jpeg;base64,{img_data}"
+                        )
+
+                    # Convert the modified HTML body to an image
+                    body_image_path = self.html_to_image(payload, temp_dir)
+                    if body_image_path:
+                        images_list.append(body_image_path)
+
+            # Combine all extracted images into a single image
+            if images_list:
+                images = [Image.open(path) for path in images_list]
+                return self.append_images(images)
+
+            return None
+
+    @staticmethod
+    def html_to_image(html_content, temp_dir):
+        """
+        Converts HTML content to an image.
+
+        This method uses WeasyPrint to convert the HTML content to a PDF and then
+        uses PyMuPDF (fitz) to render the PDF as an image. The rendered image is saved as a PNG file.
+
+        Args:
+            html_content: HTML content to be converted into an image.
+            temp_dir: Temporary directory to store intermediate files.
+
+        Returns:
+            The file path to the generated image, or None if the process fails.
+        """
+        # Generate a unique filename for the PDF
+        pdf_filename = hashlib.md5(html_content.encode()).hexdigest() + ".pdf"
+        pdf_path = os.path.join(temp_dir, pdf_filename)
+
+        # Convert HTML to a PDF using WeasyPrint
+        try:
+            HTML(string=html_content).write_pdf(pdf_path)
+
+            # Open the PDF with fitz and render the first page as an image
+            with fitz.open(pdf_path) as doc:
+                if doc.page_count > 0:
+                    page = doc.load_page(0)  # first page
+                    pix = page.get_pixmap()
+                    image_path = os.path.join(
+                        temp_dir, pdf_filename.replace(".pdf", ".png")
+                    )
+                    pix.save(image_path)
+                    return image_path
+                else:
+                    return None
+        except Exception:
+            return None
+
+    @staticmethod
+    def append_images(images):
+        """
+        Combines multiple image objects into a single image.
+
+        This function stacks the provided images vertically to create one continuous image.
+        It's particularly useful for creating a visual summary of an email's content.
+
+        Args:
+            images: A list of PIL Image objects to be combined.
+
+        Returns:
+            A single PIL Image object that combines all the input images.
+        """
+        # Define the background color for the combined image
+        bg_color = (255, 255, 255)
+
+        # Calculate the total width (max width among images) and total height (sum of heights of all images)
+        widths, heights = zip(*(img.size for img in images))
+        total_width = max(widths)
+        total_height = sum(heights)
+
+        # Create a new image with the calculated dimensions
+        combined_image = Image.new("RGB", (total_width, total_height), color=bg_color)
+
+        # Paste each image onto the combined image, one below the other
+        y_offset = 0
+        for img in images:
+            combined_image.paste(img, (0, y_offset))
+            y_offset += img.height
+
+        return combined_image
+
+    @staticmethod
+    def decode_and_format_header(msg, header_name):
+        """
+        Decodes and safely formats a specific header field from an email message.
+
+        Email headers can be encoded in various formats. This function decodes the header
+        into a human-readable format, and also ensures that the text is safe for HTML display.
+
+        Args:
+            msg: Parsed email message object.
+            header_name: The name of the header field to decode.
+
+        Returns:
+            A string representing the decoded and header field values.
+            Returns a placeholder string if the header field is missing or cannot be decoded.
+        """
+        try:
+            # Decode the specified header field
+            decoded_header = email.header.decode_header(msg[header_name])[0]
+            # Convert bytes to string if necessary
+            field_value = decoded_header[0]
+            if isinstance(field_value, bytes):
+                field_value = field_value.decode(decoded_header[1] or "utf-8")
+        except Exception:
+            field_value = "&lt;Unknown&gt;"
+
+        # Replace angle brackets for HTML safety
+        return field_value.replace("<", "&lt;").replace(">", "&gt;")
diff --git a/src/python/strelka/scanners/scan_javascript.py b/src/python/strelka/scanners/scan_javascript.py
diff --git a/src/python/strelka/scanners/scan_pdf.py b/src/python/strelka/scanners/scan_pdf.py
@@ -188,11 +188,14 @@ def scan(self, data, file, options, expire_at):
                     # Extract urls from text
                     self.event["links"].extend(re.findall(r"https?://[^\s)>]+", text))
 
-                # If links found, remove all duplicates.
+                # If links found, remove all duplicates and submit as IOCs.
                 # Deduplicate the links
                 if self.event["links"]:
                     self.event["links"] = list(set(filter(None, self.event["links"])))
 
+                    # Submit all links to the IOCs pipeline.
+                    self.add_iocs(self.event["links"])
+
                 # Send extracted file back to Strelka
                 self.emit_file(text.encode("utf-8"), name="text")
 

diff --git a/src/python/strelka/tests/fixtures/test.js b/src/python/strelka/tests/fixtures/test.js
@@ -7,6 +7,8 @@
 
 var path = require('path');
 var fs = require('fs');
+var WebSocket = require('ws'); // Suspicious keyword
+
 
 // Export helpers
 module.exports.register = function (Handlebars, opt, params)  {
@@ -71,8 +73,41 @@ module.exports.register = function (Handlebars, opt, params)  {
 
     btoa: function(b) {
       return new Buffer(b, 'utf8').toString('base64');
+    },
+        // Suspicious function using WebSocket
+    establishWebSocket: function(url) {
+      var ws = new WebSocket(url);
+      ws.on('open', function open() {
+        ws.send('Connection established');
+      });
+    },
+
+    // Function using eval
+    dynamicEval: function(code) {
+      eval(code);
+    },
+
+    // Function with embedded IOC URL
+    fetchDataFromUrl: function() {
+      var suspiciousUrl = "http://example-malicious-site.com/data";
+      // Code to fetch data from the URL
+      console.log("Fetching data from: " + suspiciousUrl);
+    },
+
+    // Function with multiple IOC URLs
+    checkMultipleUrls: function() {
+      var urls = [
+        "http://example-malicious-site.com",
+        "http://example-malicious-site.com",
+        "https://another-example-bad-site.net",
+        "ftp://suspicious-ftp-server.org"
+      ];
+      urls.forEach(url => {
+        console.log("Checking URL: " + url);
+      });
     }
   };
+  };
 
   opt = opt || {};
   for (var helper in helpers) {

diff --git a/src/python/strelka/tests/test_scan_email.py b/src/python/strelka/tests/test_scan_email.py
diff --git a/src/python/strelka/tests/test_scan_javascript.py b/src/python/strelka/tests/test_scan_javascript.py
@@ -18,110 +18,186 @@ def test_scan_javascript(mocker):
         "flags": [],
         "tokens": unordered(
             [
-                "BlockComment",
+                "LineComment",
                 "String",
-                "Punctuator",
-                "Keyword",
                 "Identifier",
-                "LineComment",
+                "Punctuator",
                 "RegularExpression",
                 "Numeric",
+                "BlockComment",
+                "Keyword",
             ]
         ),
         "keywords": unordered(
             [
                 "var",
-                "function",
+                "in",
+                "this",
+                "typeof",
                 "return",
-                "for",
+                "function",
                 "if",
-                "throw",
                 "else",
-                "typeof",
+                "throw",
                 "new",
-                "this",
-                "in",
+                "for",
             ]
         ),
         "strings": unordered(
             [
-                "use strict",
+                "",
+                "ws",
+                "open",
+                "string",
+                "ftp://suspicious-ftp-server.org",
+                "Checking URL: ",
+                "Fetching data from: ",
+                "base64",
                 "path",
+                ".",
+                "-",
+                " (",
                 "fs",
+                "utf8",
                 "package.json",
-                "",
-                "-",
-                "Could not find partial with name ",
-                ".",
-                "string",
-                "function",
+                "https://another-example-bad-site.net",
                 "Found unknown type of partial ",
-                " (",
+                "use strict",
                 ") in Handlebars partial Array => ",
-                "base64",
-                "utf8",
+                "function",
+                "Could not find partial with name ",
+                "http://example-malicious-site.com",
+                "http://example-malicious-site.com/data",
+                "Connection established",
             ]
         ),
         "identifiers": unordered(
             [
-                "path",
-                "require",
-                "fs",
-                "module",
-                "exports",
-                "register",
+                "compile",
                 "Handlebars",
-                "opt",
-                "params",
-                "pkg",
+                "send",
+                "urls",
+                "partials",
                 "JSON",
-                "parse",
-                "readFileSync",
-                "join",
-                "process",
+                "pkg",
+                "open",
+                "eval",
+                "console",
+                "params",
                 "cwd",
-                "slugify",
-                "str",
-                "toLowerCase",
+                "register",
+                "key",
                 "replace",
+                "suspiciousUrl",
+                "toLowerCase",
+                "hasOwnProperty",
+                "WebSocket",
+                "concat",
+                "arguments",
+                "ws",
+                "partial",
+                "Buffer",
                 "helpers",
-                "key",
-                "escape",
-                "Utils",
-                "escapeExpression",
+                "btoa",
+                "dynamicEval",
+                "opt",
+                "slugify",
+                "str",
                 "jsonStringify",
-                "obj",
+                "process",
+                "url",
                 "stringify",
-                "concat",
-                "arr",
                 "i",
-                "arguments",
-                "length",
-                "partial",
-                "name",
+                "fetchDataFromUrl",
                 "context",
-                "partials",
-                "compile",
+                "log",
                 "SafeString",
-                "atob",
+                "on",
+                "checkMultipleUrls",
+                "code",
+                "helper",
+                "escape",
                 "a",
-                "Buffer",
-                "toString",
-                "btoa",
+                "Utils",
+                "name",
+                "atob",
+                "fs",
+                "obj",
+                "join",
+                "path",
+                "module",
+                "forEach",
+                "length",
+                "establishWebSocket",
+                "arr",
                 "b",
-                "helper",
-                "hasOwnProperty",
+                "require",
+                "readFileSync",
+                "toString",
+                "parse",
+                "exports",
+                "escapeExpression",
                 "registerHelper",
             ]
         ),
-        "regular_expressions": unordered(["/[^\\w ]+/g", "/ +/g"]),
+        "regular_expressions": unordered(["/ +/g", "/[^\\w ]+/g"]),
+        "suspicious_keywords": unordered(["WebSocket", "eval"]),
+        "urls": unordered(
+            [
+                "https://another-example-bad-site.net",
+                "http://example-malicious-site.com",
+                "ftp://suspicious-ftp-server.org",
+                "http://example-malicious-site.com/data",
+            ]
+        ),
         "beautified": True,
+        "script_length_bytes": 3127,
+        "iocs": unordered(
+            [
+                {
+                    "ioc": "suspicious-ftp-server.org",
+                    "ioc_type": "domain",
+                    "scanner": "ScanJavascript",
+                },
+                {
+                    "ioc": "ftp://suspicious-ftp-server.org",
+                    "ioc_type": "url",
+                    "scanner": "ScanJavascript",
+                },
+                {
+                    "ioc": "example-malicious-site.com",
+                    "ioc_type": "domain",
+                    "scanner": "ScanJavascript",
+                },
+                {
+                    "ioc": "http://example-malicious-site.com",
+                    "ioc_type": "url",
+                    "scanner": "ScanJavascript",
+                },
+                {
+                    "ioc": "http://example-malicious-site.com/data",
+                    "ioc_type": "url",
+                    "scanner": "ScanJavascript",
+                },
+                {
+                    "ioc": "another-example-bad-site.net",
+                    "ioc_type": "domain",
+                    "scanner": "ScanJavascript",
+                },
+                {
+                    "ioc": "https://another-example-bad-site.net",
+                    "ioc_type": "url",
+                    "scanner": "ScanJavascript",
+                },
+            ]
+        ),
     }
 
     scanner_event = run_test_scan(
         mocker=mocker,
         scan_class=ScanUnderTest,
         fixture_path=Path(__file__).parent / "fixtures/test.js",
+        options=({"max_strings": 500}),
     )
 
     TestCase.maxDiff = None
@@ -137,26 +213,51 @@ def test_scan_javascript_character_max_strings(mocker):
     test_scan_event = {
         "elapsed": mock.ANY,
         "flags": [],
-        "tokens": unordered(
+        "tokens": unordered(["Punctuator", "BlockComment"]),
+        "keywords": unordered(["return", "this"]),
+        "strings": unordered(["", "Checking URL: "]),
+        "identifiers": unordered(["arguments", "process"]),
+        "regular_expressions": unordered(["/ +/g", "/[^\\w ]+/g"]),
+        "suspicious_keywords": unordered(["WebSocket", "eval"]),
+        "urls": unordered(
             [
-                "BlockComment",
-                "String",
-                "Punctuator",
-                "Keyword",
+                "http://example-malicious-site.com/data",
+                "https://another-example-bad-site.net",
             ]
         ),
-        "keywords": unordered(["throw", "return", "else", "var", "new"]),
-        "strings": unordered(["", "path", "string", "-", "base64"]),
-        "identifiers": unordered(["exports", "params", "cwd", "Buffer", "escape"]),
-        "regular_expressions": unordered(["/[^\\w ]+/g", "/ +/g"]),
         "beautified": True,
+        "script_length_bytes": 3127,
+        "iocs": unordered(
+            [
+                {
+                    "ioc": "example-malicious-site.com",
+                    "ioc_type": "domain",
+                    "scanner": "ScanJavascript",
+                },
+                {
+                    "ioc": "http://example-malicious-site.com/data",
+                    "ioc_type": "url",
+                    "scanner": "ScanJavascript",
+                },
+                {
+                    "ioc": "another-example-bad-site.net",
+                    "ioc_type": "domain",
+                    "scanner": "ScanJavascript",
+                },
+                {
+                    "ioc": "https://another-example-bad-site.net",
+                    "ioc_type": "url",
+                    "scanner": "ScanJavascript",
+                },
+            ]
+        ),
     }
 
     scanner_event = run_test_scan(
         mocker=mocker,
         scan_class=ScanUnderTest,
         fixture_path=Path(__file__).parent / "fixtures/test.js",
-        options={"max_strings": 5},
+        options={"max_strings": 2},
     )
 
     TestCase.maxDiff = None

diff --git a/src/python/strelka/tests/test_scan_pdf.py b/src/python/strelka/tests/test_scan_pdf.py
@@ -21,6 +21,20 @@ def test_scan_pdf(mocker):
         "links": unordered(
             ["http://bing.com", "https://duckduckgo.com", "https://google.com"]
         ),
+        "iocs": unordered(
+            [
+                {"ioc": "bing.com", "ioc_type": "domain", "scanner": "ScanPdf"},
+                {"ioc": "http://bing.com", "ioc_type": "url", "scanner": "ScanPdf"},
+                {"ioc": "duckduckgo.com", "ioc_type": "domain", "scanner": "ScanPdf"},
+                {
+                    "ioc": "https://duckduckgo.com",
+                    "ioc_type": "url",
+                    "scanner": "ScanPdf",
+                },
+                {"ioc": "google.com", "ioc_type": "domain", "scanner": "ScanPdf"},
+                {"ioc": "https://google.com", "ioc_type": "url", "scanner": "ScanPdf"},
+            ]
+        ),
         "words": 421,
         "xref_object": unordered(
             [