diff --git a/Dockerfile b/Dockerfile index 89590b7f..99de1044 100644 --- a/Dockerfile +++ b/Dockerfile @@ -52,14 +52,19 @@ RUN set -e; \ esac; \ echo -e "[nodejs]\nname=nodejs\nstream=21\nprofiles=\nstate=enabled\n" > /etc/dnf/modules.d/nodejs.module \ && microdnf module enable php ruby -y \ - && microdnf install -y php php-curl php-zip php-bcmath php-json php-pear php-mbstring php-devel make gcc git-core python3.11 python3.11-devel python3.11-pip ruby ruby-devel \ + && microdnf install -y php php-curl php-zip php-bcmath php-json php-pear php-mbstring php-devel make gcc git-core \ + python3.11 python3.11-devel python3.11-pip ruby ruby-devel \ + libX11-devel libXext-devel libXrender-devel libjpeg-turbo-devel \ pcre2 which tar zip unzip sudo nodejs ncurses glibc-common glibc-all-langpacks xorg-x11-fonts-75dpi xorg-x11-fonts-Type1 \ && alternatives --install /usr/bin/python3 python /usr/bin/python3.11 1 \ && python3 --version \ && python3 -m pip install --upgrade pip \ + && curl -LO https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6.1-2/wkhtmltox-0.12.6.1-2.almalinux9.${ARCH_NAME}.rpm \ + && rpm -ivh wkhtmltox-0.12.6.1-2.almalinux9.${ARCH_NAME}.rpm \ + && rm wkhtmltox-0.12.6.1-2.almalinux9.${ARCH_NAME}.rpm \ && curl -s "https://get.sdkman.io" | bash \ && source "$HOME/.sdkman/bin/sdkman-init.sh" \ - && echo -e "sdkman_auto_answer=true\nsdkman_selfupdate_feature=false\nsdkman_auto_env=true" >> $HOME/.sdkman/etc/config \ + && echo -e "sdkman_auto_answer=true\nsdkman_selfupdate_feature=false\nsdkman_auto_env=true\nsdkman_curl_connect_timeout=20\nsdkman_curl_max_time=0" >> $HOME/.sdkman/etc/config \ && sdk install java $JAVA_VERSION \ && sdk install maven $MAVEN_VERSION \ && sdk install gradle $GRADLE_VERSION \ diff --git a/README.md b/README.md index cef268d1..17907aa6 100644 --- a/README.md +++ b/README.md @@ -425,6 +425,10 @@ The license data is sourced from choosealicense.com and is quite limited. If the dep-scan could auto-detect most cloud applications and Kubernetes manifest files. Pass the argument `-t yaml-manifest` to manually specify the type. +## PDF reports + +Ensure [wkhtmltopdf](https://wkhtmltopdf.org/downloads.html) is installed or use the official container image to generate pdf reports. Use with `--explain` for more detailed reports. + ## Discord support The developers could be reached via the [discord](https://discord.gg/DCNxzaeUpd) channel for enterprise support. diff --git a/depscan/cli.py b/depscan/cli.py index 2f73ce47..eeec8114 100644 --- a/depscan/cli.py +++ b/depscan/cli.py @@ -10,7 +10,7 @@ import oras.client from quart import Quart, request from rich.panel import Panel -from rich.terminal_theme import MONOKAI +from rich.terminal_theme import DEFAULT_TERMINAL_THEME, MONOKAI from vdb.lib import config from vdb.lib import db as db_lib from vdb.lib.config import data_dir @@ -19,16 +19,16 @@ from vdb.lib.osv import OSVSource from vdb.lib.utils import parse_purl -from depscan.lib import github, utils, explainer +from depscan.lib import explainer, github, utils from depscan.lib.analysis import ( PrepareVdrOptions, analyse_licenses, analyse_pkg_risks, + find_purl_usages, jsonl_report, prepare_vdr, suggest_version, summary_stats, - find_purl_usages, ) from depscan.lib.audit import audit, risk_audit, risk_audit_map, type_audit_map from depscan.lib.bom import create_bom, get_pkg_by_type, get_pkg_list, submit_bom @@ -665,6 +665,7 @@ def main(): else os.path.join(reports_dir, "depscan.json") ) html_file = areport_file.replace(".json", ".html") + pdf_file = areport_file.replace(".json", ".pdf") # Create reports directory if reports_dir and not os.path.exists(reports_dir): os.makedirs(reports_dir, exist_ok=True) @@ -686,7 +687,6 @@ def main(): results = [] report_file = areport_file.replace(".json", f"-{project_type}.json") risk_report_file = areport_file.replace(".json", f"-risk.{project_type}.json") - console.rule(style="gray37") if args.bom and os.path.exists(args.bom): bom_file = args.bom creation_status = True @@ -732,7 +732,6 @@ def main(): analyse_licenses(project_type, licenses_results, license_report_file) if project_type in risk_audit_map: if args.risk_audit: - console.rule(style="gray37") console.print( Panel( f"Performing OSS Risk Audit for packages from " @@ -764,12 +763,11 @@ def main(): "Depscan supports OSS Risk audit for this " "project.\nTo enable set the environment variable [" "bold]ENABLE_OSS_RISK=true[/bold]", - title="New Feature", + title="Risk Audit Capability", expand=False, ) ) if project_type in type_audit_map: - console.rule(style="gray37") LOG.debug( "Performing remote audit for %s of type %s", src_dir, @@ -897,7 +895,11 @@ def main(): direct_purls=direct_purls, reached_purls=reached_purls, ) - console.save_html(html_file, theme=MONOKAI) + console.save_html( + html_file, + theme=MONOKAI if os.getenv("USE_DARK_THEME") else DEFAULT_TERMINAL_THEME, + ) + utils.export_pdf(html_file, pdf_file) # Submit vdr/vex files to threatdb server if args.threatdb_server and (args.threatdb_username or args.threatdb_token): submit_bom( diff --git a/depscan/lib/analysis.py b/depscan/lib/analysis.py index c8ff553d..955fbe63 100644 --- a/depscan/lib/analysis.py +++ b/depscan/lib/analysis.py @@ -1,6 +1,6 @@ import json import os.path -from collections import defaultdict, OrderedDict +from collections import OrderedDict, defaultdict from dataclasses import dataclass from typing import Dict, List, Optional @@ -11,7 +11,7 @@ from rich.tree import Tree from vdb.lib import CPE_FULL_REGEX from vdb.lib.config import placeholder_fix_version -from vdb.lib.utils import parse_purl, parse_cpe +from vdb.lib.utils import parse_cpe, parse_purl from depscan.lib import config from depscan.lib.logger import LOG, console @@ -587,7 +587,6 @@ def prepare_vdr(options: PrepareVdrOptions): } ) if not options.no_vuln_table: - console.rule(style="gray37") console.print() console.print(table) console.print() diff --git a/depscan/lib/config.py b/depscan/lib/config.py index 011f851a..4e4d440b 100644 --- a/depscan/lib/config.py +++ b/depscan/lib/config.py @@ -433,4 +433,4 @@ def get_int_from_env(name, default): max_reachable_explanations = get_int_from_env("max_reachable_explanations", 20) -max_reachable_explanations_purl = get_int_from_env("max_reachable_explanations_purl", 5) +max_purl_per_flow = get_int_from_env("max_purl_per_flow", 6) diff --git a/depscan/lib/explainer.py b/depscan/lib/explainer.py index 81043b21..74bfcf39 100644 --- a/depscan/lib/explainer.py +++ b/depscan/lib/explainer.py @@ -7,10 +7,8 @@ from rich.table import Table from rich.tree import Tree +from depscan.lib.config import max_purl_per_flow, max_reachable_explanations from depscan.lib.logger import console -from depscan.lib.config import ( - max_reachable_explanations, -) def explain( @@ -95,7 +93,7 @@ def explain_reachables(reachables, pkg_group_rows, project_type): if checked_flows: console.print( Panel( - "Review the detected validation/sanitization methods. Refactor the application to centralize the common valiidation operations to improve the security posture.", + "Review the detected validation/sanitization methods. Refactor the application to validate using custom middlewares to improve the security posture.", title="Recommendation", expand=False, ) @@ -135,7 +133,22 @@ def flow_to_source_sink(flow, purls, project_type): source_sink_desc = flow.get("code").split("\n")[0] elif project_type not in ("java") and flow.get("label") == "IDENTIFIER": source_sink_desc = flow.get("code").split("\n")[0] - if len(purls) == 1: + # Try to understand the source a bit more + if source_sink_desc.startswith("require("): + source_sink_desc = "Flow starts from a module import" + elif ( + ".use(" in source_sink_desc + or ".subscribe(" in source_sink_desc + or ".on(" in source_sink_desc + or ".emit(" in source_sink_desc + or " => {" in source_sink_desc + ): + source_sink_desc = "Flow starts from a callback function" + elif ( + "middleware" in source_sink_desc.lower() or "route" in source_sink_desc.lower() + ): + source_sink_desc = "Flow starts from a middlware" + elif len(purls) == 1: source_sink_desc = f"{source_sink_desc} can be used to reach this package." else: source_sink_desc = ( @@ -144,7 +157,7 @@ def flow_to_source_sink(flow, purls, project_type): return source_sink_desc -def flow_to_str(flow): +def flow_to_str(flow, project_type): """""" has_check_tag = False file_loc = "" @@ -162,11 +175,12 @@ def flow_to_str(flow): param_name = "" node_desc = f'{flow.get("parentMethodName")}([red]{param_name}[/red]) :right_arrow_curving_left:' if tags: - node_desc = ( - f"{node_desc}\n[bold]Tags :label: [/bold] [italic]{tags}[/italic]\n" - ) - elif flow.get("label") == "IDENTIFIER" and node_desc.startswith("<"): - node_desc = flow.get("name") + node_desc = f"{node_desc}\n[bold]Tags:[/bold] [italic]{tags}[/italic]\n" + elif flow.get("label") == "IDENTIFIER": + if node_desc.startswith("<"): + node_desc = flow.get("name") + if project_type not in ("java") and tags: + node_desc = f"{node_desc}\n[bold]Tags:[/bold] [italic]{tags}[/italic]\n" if flow.get("tags"): if ( "validation" in tags @@ -175,16 +189,15 @@ def flow_to_str(flow): or "sanitize" in tags ): has_check_tag = True - elif flow.get("label") in ("CALL", "RETURN"): + elif flow.get("label") in ("CALL", "RETURN") or project_type not in ("java"): code = flow.get("code", "").lower() # Let's broaden and look for more check method patterns # This is not a great logic but since we're offering some ideas this should be ok # Hopefully, the tagger would improve to handle these cases in the future if ( - "escape(" in code - or "encode(" in code - or "encrypt(" in code - or "validate" in code + re.search("(escape|encode|encrypt|validate|sanitize)", code) + or "authorize" in node_desc.lower() + or "authenticate" in node_desc.lower() ): has_check_tag = True if has_check_tag: @@ -196,6 +209,10 @@ def explain_flows(flows, purls, project_type): """""" tree = None comments = [] + if len(purls) > max_purl_per_flow: + comments.append( + ":exclamation_mark: Refactor this flow to reduce the number of external libraries used." + ) purls_str = "\n".join(purls) comments.append(f"Reachable Packages:\n{purls_str}") added_flows = [] @@ -211,7 +228,7 @@ def explain_flows(flows, purls, project_type): continue if not source_sink_desc: source_sink_desc = flow_to_source_sink(aflow, purls, project_type) - file_loc, flow_str, has_check_tag_flow = flow_to_str(aflow) + file_loc, flow_str, has_check_tag_flow = flow_to_str(aflow, project_type) if last_file_loc == file_loc: continue last_file_loc = file_loc @@ -227,6 +244,6 @@ def explain_flows(flows, purls, project_type): if has_check_tag: comments.insert( 0, - ":white_medium_small_square: Check if the mitigation used in this flow is valid and appropriate for your security requirements.", + ":white_medium_small_square: Check if the mitigation(s) used in this flow is valid and appropriate for your security requirements.", ) return tree, "\n".join(comments), source_sink_desc, has_check_tag diff --git a/depscan/lib/github.py b/depscan/lib/github.py index f3eb9e2f..7275c1d7 100644 --- a/depscan/lib/github.py +++ b/depscan/lib/github.py @@ -1,5 +1,6 @@ -import httpx import os + +import httpx from github import Auth, Github from depscan.lib import config diff --git a/depscan/lib/utils.py b/depscan/lib/utils.py index 0c532a96..d896e40e 100644 --- a/depscan/lib/utils.py +++ b/depscan/lib/utils.py @@ -2,6 +2,7 @@ import os import re from collections import defaultdict +from datetime import datetime from importlib.metadata import distribution from vdb.lib import db as db_lib @@ -374,3 +375,36 @@ def get_version(): Returns the version of depscan """ return distribution("owasp-depscan").version + + +def export_pdf( + html_file, + pdf_file, + title="DepScan Analysis", + footer=f'Report generated by OWASP dep-scan at {datetime.now().strftime("%B %d, %Y %H:%M")}', +): + """ + Method to export html as pdf using pdfkit + """ + pdf_options = { + "page-size": "A2", + "margin-top": "0.5in", + "margin-right": "0.25in", + "margin-bottom": "0.5in", + "margin-left": "0.25in", + "encoding": "UTF-8", + "outline": None, + "title": title, + "footer-right": footer, + "minimum-font-size": "12", + "disable-smart-shrinking": "", + } + try: + import pdfkit + + if not pdf_file and html_file: + pdf_file = html_file.replace(".html", ".pdf") + if os.path.exists(html_file): + pdfkit.from_file(html_file, pdf_file, options=pdf_options) + except Exception as e: + pass diff --git a/pyproject.toml b/pyproject.toml index 25f99bed..8aee336a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = [ {name = "Team AppThreat", email = "cloud@appthreat.com"}, ] dependencies = [ - "appthreat-vulnerability-db>=5.5.1", + "appthreat-vulnerability-db>=5.5.2", "defusedxml", "oras", "PyYAML", @@ -14,6 +14,7 @@ dependencies = [ "quart", "PyGithub", "toml", + "pdfkit", ] requires-python = ">=3.8"