diff --git a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py index 4fd96e23a..cfe8ade36 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py @@ -118,21 +118,31 @@ def _load_defaults(self, resources_path: str) -> tuple[str, str | None, set[str] logger.debug(error_msg) raise ConfigurationError(error_msg) - semgrep_commands: list[str] = ["semgrep", "scan", "--validate", "--oss-only", "--config", custom_rule_path] + semgrep_commands: list[str] = [ + "semgrep", + "scan", + "--metrics", + "off", + "--disable-version-check", + "--validate", + "--oss-only", + "--config", + custom_rule_path, + ] try: process = subprocess.run(semgrep_commands, check=True, capture_output=True) # nosec B603 + if process.returncode != 0: + warning_msg = ( + f"Error running semgrep validation on {custom_rule_path} with arguments" f" {process.args}." + ) + logger.warning(warning_msg) + except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as semgrep_error: - error_msg = ( + warning_msg = ( f"Unable to run semgrep validation on {custom_rule_path} with arguments " f"{semgrep_commands}: {semgrep_error}." ) - logger.debug(error_msg) - raise ConfigurationError(error_msg) from semgrep_error - - if process.returncode != 0: - error_msg = f"Error running semgrep validation on {custom_rule_path} with arguments" f" {process.args}." - logger.debug(error_msg) - raise ConfigurationError(error_msg) + logger.warning(warning_msg) logger.debug("Including custom ruleset from %s.", custom_rule_path) @@ -248,7 +258,15 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes # Here, we disable 'nosemgrep' ignoring so that this is not an evasion method of our scan (i.e. malware includes # 'nosemgrep' comments to prevent our scan detecting those code lines). Read more about the 'nosemgrep' feature # here: https://semgrep.dev/docs/ignoring-files-folders-code - semgrep_commands: list[str] = ["semgrep", "scan", "--oss-only", "--disable-nosem"] + semgrep_commands: list[str] = [ + "semgrep", + "scan", + "--metrics", + "off", + "--disable-version-check", + "--oss-only", + "--disable-nosem", + ] result: HeuristicResult = HeuristicResult.PASS source_code_path = pypi_package_json.package_sourcecode_path diff --git a/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py b/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py index 295083e08..c18369bb4 100644 --- a/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py +++ b/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py @@ -91,7 +91,7 @@ def test_nonexistent_rule_path(mock_defaults: MagicMock) -> None: @patch("macaron.malware_analyzer.pypi_heuristics.sourcecode.pypi_sourcecode_analyzer.defaults") -def test_invalid_custom_rules(mock_defaults: MagicMock) -> None: +def test_invalid_custom_rules(mock_defaults: MagicMock, pypi_package_json: MagicMock) -> None: """Test for when the provided file is not a valid semgrep rule, so error,""" # Use this file as an invalid semgrep rule as it is most definitely not a semgrep rule, and does exist. defaults = { @@ -103,8 +103,14 @@ def test_invalid_custom_rules(mock_defaults: MagicMock) -> None: mock_defaults.has_section.side_effect = lambda section: section == "heuristic.pypi" mock_defaults.__getitem__.side_effect = lambda section: sub_section if section == "heuristic.pypi" else None - with pytest.raises(ConfigurationError): - _ = PyPISourcecodeAnalyzer(resources_path=RESOURCES_PATH) + analyzer = PyPISourcecodeAnalyzer(resources_path=RESOURCES_PATH) + pypi_package_json.package_sourcecode_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "resources", "sourcecode_samples" + ) + + # Semgrep should fail to run when we launch analysis + with pytest.raises(HeuristicAnalyzerValueError): + _ = analyzer.analyze(pypi_package_json) @patch("macaron.malware_analyzer.pypi_heuristics.sourcecode.pypi_sourcecode_analyzer.defaults")