diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 034608f19..414e36f03 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,6 +32,19 @@ repos: args: [--settings-path, pyproject.toml] exclude: ^tests/malware_analyzer/pypi/resources/sourcecode_samples.* +# Format comment grammar. +- repo: local + hooks: + - id: comment-checker + name: Fix comment grammar and formatting + entry: python + args: + - ./scripts/dev_scripts/format_checker.py + - tests + - src/macaron + language: system + pass_filenames: true + # Add Black code formatters. - repo: https://github.com/ambv/black rev: 25.1.0 @@ -179,7 +192,9 @@ repos: language: system pass_filenames: false -# On push to the remote, run the unit tests. + + + # On push to the remote, run the unit tests. - repo: local hooks: - id: pytest diff --git a/docs/source/pages/developers_guide/style_guide.rst b/docs/source/pages/developers_guide/style_guide.rst index 1381f6ddb..2b0dfa84c 100644 --- a/docs/source/pages/developers_guide/style_guide.rst +++ b/docs/source/pages/developers_guide/style_guide.rst @@ -76,3 +76,21 @@ For variables of a class: we do not use the ``Attribute`` section as per the `nu x: float #: The y coordinate of the point. y: float + + +-------- +Comments +-------- + +Comments should use typical grammar where appropriate. Ideally, they should be comprised of sentences that start with capital letters, and end with punctuation. +We use a pre-commit hook script to help enforce this. +The script is a lightweight implementation that can make mistakes in some cases. +Python script files that have been changed in the commit will be passed to the checker, if they reside in either the ``src/macaron`` or ``tests`` directories. + +'''''''''''''''''''''''' +Working with the script: +'''''''''''''''''''''''' + +- Try to avoid having Proper nouns in the middle of sentences spill over to a new line, as this will be considered as the start of a sentence instead. +- To prevent the script from changing a particular file, add the following comment after the copyright information: "# grammar: off" +- To disable the script for a particular comment, use a double pound sign, e.g. "## " diff --git a/scripts/dev_scripts/format_checker.py b/scripts/dev_scripts/format_checker.py new file mode 100644 index 000000000..7cf429aa2 --- /dev/null +++ b/scripts/dev_scripts/format_checker.py @@ -0,0 +1,281 @@ +# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This script checks grammar formatting in Python source files.""" +import glob +import os.path +import re +import sys +import tempfile + + +def main(source_locations: list[str] = None) -> int: + """Check the grammar formatting of Python source files passed directly, or in directories. + + If the passed list is empty, fallback to system arguments. + If source locations include files and directories, this script will only check files within the passed directories. + Otherwise, all files, or files within directories, will be checked. + Macaron's integration test directories are always excluded. + """ + if not source_locations and len(sys.argv) >= 2: + source_locations = [] + for arg in sys.argv[1:]: + source_locations.append(arg) + + if not source_locations or not isinstance(source_locations, list): + return 0 + + restrictive_mode = False + source_directories = [] + source_files = [] + for source_location in source_locations: + if os.path.isfile(source_location): + if not os.path.basename(source_location).lower().endswith(".py"): + continue + source_files.append(source_location) + elif os.path.isdir(source_location): + source_directories.append(source_location) + if source_files and source_directories: + restrictive_mode = True + + checker = FormatChecker() + target_files = [] + + if restrictive_mode: + for source_file in source_files: + for source_directory in source_directories: + if os.path.commonpath([source_file, source_directory]): + target_files.append(source_file) + break + else: + for source_directory in source_directories: + target_files = target_files + glob.glob(f"{source_directory}/**/*.py", recursive=True) + for source_file in source_files: + target_files.append(source_file) + + changed_files = 0 + for target_file in target_files: + if os.path.join("integration", "cases") in target_file: + # Exclude integration test directories. + continue + with open(target_file, "r", encoding="utf-8") as file: + lines = file.readlines() + changed_files = changed_files + checker.check_file(target_file, lines) + + return 1 if changed_files else 0 + + +class FormatChecker: + + TOOL_EXCEPTIONS = {"noqa:", "pylint:", "type:", "nosec", "nosec:", "flake8:", "pragma:", "pyarmor:", ":meta"} + LOWER_CASE_EXCEPTIONS = {"npm", "http:", "https:", "git@", "jdk"} + END_PUNCTUATION = {".", "!", "?"} + OTHER_PUNCTUATION = {",", ":", ";"} + SPECIAL_EXCEPTIONS = {"e.g.", "i.e.", "n.b."} + DISABLE_KEYWORD = ["grammar:", "off"] + WRAPPERS = ["'", '"', ")"] + + def check_file(self, source_file: str, lines: list[str]) -> int: + """Check and fix the contents of the passed file.""" + grouped_comment_lines = [] + current_group = [] + inline_comment_lines = [] + start_indices = {} + for index, line in enumerate(lines): + if index < 2: + # Copyright headers are checked elsewhere. + continue + if "#" not in line: + continue + line = line.rstrip() + + line_split = re.split("(\s+)", line) + start_index = -1 + for part_index, part in enumerate(line_split): + if part in {"#", "#:", "##"}: + start_index = part_index + break + if start_index == -1 or line_split[start_index] == "##": + # TODO discuss this per-line disable feature. + continue + + # Check for disable keywords of other tools. + if start_index + 2 >= len(line_split): + continue + first_part = line_split[start_index + 2] + if first_part in self.TOOL_EXCEPTIONS: + continue + + # Check for this disabler. + if start_index + 4 < len(line_split): + second_part = line_split[start_index + 4] + if first_part == self.DISABLE_KEYWORD[0] and second_part == self.DISABLE_KEYWORD[1]: + return 0 + + # Separate in-line and regular comments. + start_indices[index] = start_index + if start_index > 3: + # In-line comment. + inline_comment_lines.append(index) + else: + # Regular comments. + if not current_group: + current_group.append(index) + grouped_comment_lines.append(current_group) + else: + if index - current_group[-1] > 1: + current_group = [] + grouped_comment_lines.append(current_group) + current_group.append(index) + + # TODO create class for handling lines and offsets. + change_count = 0 + for index in inline_comment_lines: + line = lines[index].rstrip() + line_split = re.split("(\s+)", line) + start_index = -1 + for part_index, part in enumerate(line_split): + if part in {"#", "#:", "##"}: + start_index = part_index + break + if start_index == -1: + continue + changed_line, changed = self.check_line(line_split, None, None, start_index + 2) + if changed: + change_count += 1 + lines[index] = "".join(changed_line) + os.linesep + + split_lines = {} + for group in grouped_comment_lines: + for index in group: + split_lines[index] = re.split("(\s+)", lines[index].rstrip()) + + for index, group in enumerate(grouped_comment_lines): + group_changed = False + for group_index, line_index in enumerate(group): + current_line = split_lines[line_index] + prev_line = None + if group_index > 0: + prev_line = split_lines[line_index - 1][-1] + next_line = None + if group_index < len(group) - 1: + next_line = split_lines[line_index + 1][start_indices[line_index + 1] + 2] + current_line, changed = self.check_line( + current_line, prev_line, next_line, start_indices[line_index] + 2 + ) + if changed: + lines[line_index] = "".join(current_line) + os.linesep + group_changed = group_changed | changed + if group_changed: + change_count += 1 + + if change_count: + # Save changes. + target_dir = os.path.dirname(source_file) + file_mode = os.stat(source_file).st_mode + print(f"*** Adjusting file: {source_file}") + with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8", dir=target_dir, delete=False) as file: + file.writelines(lines) + os.replace(file.name, source_file) + os.chmod(source_file, file_mode) + + return 1 if change_count else 0 + + def check_line( + self, current_line: list[str], prev_line: str | None, next_line: str | None, start_index: int = 0 + ) -> tuple[list[str], bool]: + """Check the current line for formatting issues.""" + if current_line[start_index] == "-": + # Ignore bullet point style lines. + return current_line, False + + has_changed = False + if ( + (prev_line and self.has_sentence_end(prev_line) or not prev_line) + and not self.has_sentence_start(current_line[start_index]) + and re.match("[a-z]", current_line[start_index][0]) + ): + current_line[start_index] = current_line[start_index][0].upper() + current_line[start_index][1:] + has_changed = True + if ( + next_line + and re.match("^[A-Z][^A-Z]*$", next_line) + and current_line[start_index][-1] not in self.END_PUNCTUATION + ): + current_line, line_changed = self.fix_sentence_end(current_line) + has_changed = has_changed | line_changed + + for index, current_word in enumerate(current_line): + if index <= start_index: + continue + if not current_word: + continue + prev_word = current_line[index - 2] + if not self.has_sentence_end(prev_word): + continue + if ( + prev_word.lower() in self.SPECIAL_EXCEPTIONS + or prev_word.startswith("(") + and prev_word[1:].lower() in self.SPECIAL_EXCEPTIONS + ): + continue + if self.has_sentence_start(current_word): + continue + if re.match("[a-z]", current_word[0]): + continue + current_line[index] = current_word[0].upper() + current_word[1:] + has_changed = True + + if not next_line: + current_line, line_changed = self.fix_sentence_end(current_line) + has_changed = has_changed | line_changed + + return current_line, has_changed + + def fix_sentence_end(self, line: list[str]) -> tuple[list[str], bool]: + """Fix the end of a sentence by adding a period when appropriate.""" + if line[-1].endswith('"""') or line[-1].endswith(":"): + return line, False + if line[-1].startswith("https://") or line[-1].startswith("http://") or line[-1].startswith("git@"): + # Allow URLs to end sentences without a period. + return line, False + if ( + line[-1][-1] not in self.END_PUNCTUATION + and line[-1][-1] not in self.OTHER_PUNCTUATION + and not (line[-1][-1] in self.WRAPPERS and line[-1][-2] == ".") + and re.search("[a-zA-Z0-9]", line[-1]) + ): + # Add a period if line does not end with one, or another acceptable punctuation. + # Also check for periods within wrappers, e.g. parenthesis. + # Only add a period if the final word contains at least one alphanumeric character. + line[-1] += "." + return line, True + + return line, False + + def has_sentence_start(self, word: str) -> bool: + """Check if the passed line starts with a capital letter, etc.""" + # TODO refactor this method to distinguish between when a sentence has a capital letter start, versus when it + # is only exempt from needing one. + if re.search("[_@$+:]", word): + # Ignore non-standard words such as variable references and URLs. + return True + if word in self.LOWER_CASE_EXCEPTIONS: + return True + if len(word) >= 2 and re.match("^[A-Z][0-9A-Z]+$", word): + # Ignore words that are entirely in capitals. + return True + if re.match("^[A-Z]", word): + return True + return False + + def has_sentence_end(self, line: str | list[str]) -> bool: + """Check if the passed line ends with a punctuation mark.""" + end_word = line[-1] if isinstance(line, list) else line + if end_word.lower() in self.SPECIAL_EXCEPTIONS: + return False + return end_word[-1] in self.END_PUNCTUATION + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/macaron/dependency_analyzer/cyclonedx.py b/src/macaron/dependency_analyzer/cyclonedx.py index bacefb99d..ad93d377a 100644 --- a/src/macaron/dependency_analyzer/cyclonedx.py +++ b/src/macaron/dependency_analyzer/cyclonedx.py @@ -248,7 +248,7 @@ def add_latest_version( latest_deps[key] = item else: try: - # These are stored as variables so mypy does not complain about None values (union-attr) + # These are stored as variables so mypy does not complain about None values (union-attr). latest_value_purl = latest_value.get("purl") item_purl = item.get("purl") if ( @@ -476,7 +476,7 @@ def _resolve_more_dependencies(dependencies: dict[str, DependencyInfo]) -> None: if outcome not in {RepoFinderInfo.FOUND, RepoFinderInfo.FOUND_FROM_PARENT}: logger.debug("Failed to find url for purl: %s", item["purl"]) else: - # TODO decide how to handle possible duplicates here + # TODO decide how to handle possible duplicates here. item["url"] = url item["available"] = SCMStatus.AVAILABLE item["note"] = "" @@ -692,7 +692,7 @@ def convert_components_to_artifacts( # submodules that produce development artifacts in the same repo. if ( "snapshot" in (purl.version or "").lower() - # or "" is not necessary but mypy produces a FP otherwise. + # Or "" is not necessary but mypy produces a FP otherwise. and root_component and purl.namespace == root_component.group ): diff --git a/src/macaron/malware_analyzer/pypi_heuristics/base_analyzer.py b/src/macaron/malware_analyzer/pypi_heuristics/base_analyzer.py index 0c55b03fd..a1e86063d 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/base_analyzer.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/base_analyzer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Define and initialize the base analyzer.""" @@ -23,7 +23,7 @@ def __init__( self.name: str = name self.heuristic: Heuristics = heuristic self.depends_on: list[tuple[Heuristics, HeuristicResult]] | None = ( - depends_on # Contains the dependent heuristics and the expected result of each heuristic + depends_on # Contains the dependent heuristics and the expected result of each heuristic. ) @abstractmethod diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/anomalous_version.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/anomalous_version.py index 40843297d..b29917770 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/anomalous_version.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/anomalous_version.py @@ -105,7 +105,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes if there is no release information available. """ releases = pypi_package_json.get_releases() - if releases is None: # no release information + if releases is None: # No release information. error_msg = "There is no information for any release of this package." logger.debug(error_msg) raise HeuristicAnalyzerValueError(error_msg) @@ -118,7 +118,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes logger.debug(error_msg) raise HeuristicAnalyzerValueError(error_msg) - # Since there is only one release, the latest version should be that release + # Since there is only one release, the latest version should be that release. release = pypi_package_json.get_latest_version() if release is None: error_msg = "No latest version information available" @@ -155,7 +155,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes raise HeuristicAnalyzerValueError(error_msg) years.append(parsed_time.year) - years.append(parsed_time.year % 100) # last 2 digits + years.append(parsed_time.year % 100) # Last 2 digits. months.append(parsed_time.month) publish_days.append(parsed_time.day) @@ -164,31 +164,31 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes calendar = False calendar_semantic = False - # check for year YY[YY]... + # Check for year YY[YY]... if version.major in years: - # calendar versioning: YY[YY].(M[M].D[D])(D[D].M[M])... + # Calendar versioning: YY[YY].(M[M].D[D])(D[D].M[M])... if (version.minor in months and version.micro in days) or ( version.minor in days and version.micro in months ): calendar = True else: calendar_semantic = True - # check for calendar versioning: M[M].D[D].YY[YY]... or D[D].M[M].YY[YY]... or the whole digit rerpesenting a datetime + # Check for calendar versioning: M[M].D[D].YY[YY]... or D[D].M[M].YY[YY]... or the whole digit rerpesenting a datetime. elif ( ((version.major in months and version.minor in days) or (version.major in days and version.minor in months)) and version.micro in years ) or self._integer_date(version.major, years, months, days): - # must include day and year for this to be calendar + # Must include day and year for this to be calendar. calendar = True - if calendar: # just check epoch + if calendar: # Just check epoch. detail_info: dict[str, JsonType] = {self.DETAIL_INFO_KEY: Versioning.CALENDAR.value} if version.epoch > self.epoch_threshold: return HeuristicResult.FAIL, detail_info return HeuristicResult.PASS, detail_info - if calendar_semantic: # check minor (as major) and epoch + if calendar_semantic: # Check minor (as major) and epoch. detail_info = {self.DETAIL_INFO_KEY: Versioning.CALENDAR_SEMANTIC.value} if version.epoch > self.epoch_threshold: @@ -198,7 +198,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes return HeuristicResult.PASS, detail_info - # semantic versioning + # Semantic versioning. detail_info = {self.DETAIL_INFO_KEY: Versioning.SEMANTIC.value} if version.epoch > self.epoch_threshold: diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/high_release_frequency.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/high_release_frequency.py index d5e1b7635..1050a65b7 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/high_release_frequency.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/high_release_frequency.py @@ -23,9 +23,9 @@ def __init__(self) -> None: super().__init__( name="high_release_frequency_analyzer", heuristic=Heuristics.HIGH_RELEASE_FREQUENCY, - depends_on=[(Heuristics.ONE_RELEASE, HeuristicResult.PASS)], # Analyzing when this heuristic pass + depends_on=[(Heuristics.ONE_RELEASE, HeuristicResult.PASS)], # Analyzing when this heuristic pass. ) - self.average_gap_threshold: int = self._load_defaults() # Days + self.average_gap_threshold: int = self._load_defaults() # Days. def _load_defaults(self) -> int: """Load the default values from defaults.ini.""" diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/one_release.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/one_release.py index 7d81ec010..269f84a32 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/one_release.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/one_release.py @@ -34,6 +34,6 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes return HeuristicResult.SKIP, {"releases": {}} if len(releases) == 1: - return HeuristicResult.FAIL, {"releases": releases} # Higher false positive, so we keep it MEDIUM + return HeuristicResult.FAIL, {"releases": releases} # Higher false positive, so we keep it MEDIUM. return HeuristicResult.PASS, {"releases": releases} diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/source_code_repo.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/source_code_repo.py index 708301807..7a8860efb 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/source_code_repo.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/source_code_repo.py @@ -40,7 +40,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes tuple[HeuristicResult, dict[str, JsonType]]: The result and related information collected during the analysis. """ - # If a sourcecode repo exists, then this will have already been validated + # If a sourcecode repo exists, then this will have already been validated. if not pypi_package_json.has_repository: return HeuristicResult.FAIL, {} return HeuristicResult.PASS, {} diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py index 278f3eeb5..d6c9743ab 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/unchanged_release.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Heuristics analyzer to check unchanged content in multiple releases.""" @@ -72,7 +72,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes frequency = Counter(digests) highest_frequency = max(frequency.values()) - if highest_frequency > 1: # Any two release are same + if highest_frequency > 1: # Any two release are same. return HeuristicResult.FAIL, {} return HeuristicResult.PASS, {} diff --git a/src/macaron/malware_analyzer/pypi_heuristics/metadata/wheel_absence.py b/src/macaron/malware_analyzer/pypi_heuristics/metadata/wheel_absence.py index 0198a932d..94f685db7 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/metadata/wheel_absence.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/metadata/wheel_absence.py @@ -24,7 +24,7 @@ class WheelAbsenceAnalyzer(BaseHeuristicAnalyzer): """ WHEEL: str = "bdist_wheel" - # as per https://github.com/pypi/inspector/blob/main/inspector/main.py line 125 + # As per https://github.com/pypi/inspector/blob/main/inspector/main.py line 125. INSPECTOR_TEMPLATE = ( "{inspector_url_scheme}://{inspector_url_netloc}/project/" "{name}/{version}/packages/{first}/{second}/{rest}/{filename}" @@ -56,13 +56,13 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes If there is no release information, or has other missing package information. """ releases = pypi_package_json.get_releases() - if releases is None: # no release information + if releases is None: # No release information. error_msg = "There is no information for any release of this package." logger.debug(error_msg) raise HeuristicAnalyzerValueError(error_msg) version = pypi_package_json.component_version - if version is None: # check latest release version + if version is None: # Check latest release version. version = pypi_package_json.get_latest_version() if version is None: @@ -81,7 +81,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes raise HeuristicAnalyzerValueError(error_msg) for distribution in release_distributions: - # validate data + # Validate data. package_type = json_extract(distribution, ["packagetype"], str) if package_type is None: error_msg = f"The version {version} has no 'package type' field in a distribution" @@ -120,10 +120,10 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes filename=filename, ) - # use a head request because we don't care about the response contents + # Use a head request because we don't care about the response contents. inspector_links[inspector_link] = False if send_head_http_raw(inspector_link): - inspector_links[inspector_link] = True # link was reachable + inspector_links[inspector_link] = True # Link was reachable. detail_info: dict[str, JsonType] = { "inspector_links": inspector_links, diff --git a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py index 4fd96e23a..e28762b13 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/pypi_sourcecode_analyzer.py @@ -108,7 +108,7 @@ def _load_defaults(self, resources_path: str) -> tuple[str, str | None, set[str] configuration_name = "custom_semgrep_rules_path" custom_rule_path = section.get(configuration_name) - if not custom_rule_path: # i.e. None or empty string + if not custom_rule_path: # I.e. None or empty string. logger.debug("No custom path listed under %s, using default rules only.", configuration_name) custom_rule_path = None else: @@ -308,7 +308,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes # only work if `--experimental` is also supplied to enable experimental features, which we do not use. # Semgrep provides a relative path separated by '.' to the rule ID, where the rule ID is always the # final element in that path, so we use that to match our rule IDs. - # e.g. rule_id = src.macaron.resources.pypi_malware_rules.obfuscation_decode-and-execute, which comes from + # E.g. rule_id = src.macaron.resources.pypi_malware_rules.obfuscation_decode-and-execute, which comes from # the rule ID 'obfuscation_decode-and-execute' inside 'obfuscation.yaml'. if rule_id.split(".")[-1] in self.disabled_rule_ids: if rule_id not in disabled_results: diff --git a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/suspicious_setup.py b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/suspicious_setup.py index ebde2a21f..caca1bf67 100644 --- a/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/suspicious_setup.py +++ b/src/macaron/malware_analyzer/pypi_heuristics/sourcecode/suspicious_setup.py @@ -141,7 +141,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes except SyntaxError: imports = self.extract_from_lines(content) - # Catch the install_requires packages + # Catch the install_requires packages. pattern = r"install_requires\s*=\s*\[(.*?)\]" matches = re.search(pattern, content, re.DOTALL) if matches: @@ -210,7 +210,7 @@ def extract_from_lines(self, source_content: str) -> set[str]: r"(?:(?:\s*,\s*)(?:" + module_name + r")*(?:" + alias_pattern + r")?))*)(?:(?:\s|#).*)?" ) # Allows for a standard import statement. - # E.g.: import + # E.g.: import . # Where consists of one or more . # Where consists of one or more words (a-z or 0-9 or underscore) separated by periods, # with an optional alias. @@ -222,7 +222,7 @@ def extract_from_lines(self, source_content: str) -> set[str]: + r")*)(?:\s+import\s+(\w+(?:\s+as\s+\w+)?(?:(?:\s*,\s*)(?:\w+(?:\s+as\s+\w+)?))*))" ) # Allows for a from import statement. - # E.g.: from import + # E.g.: from import . # Where is as above, but can also be preceded by any number of periods. # (Note only a single module can be placed here.) # Where consists of one or more with optional aliases. @@ -249,7 +249,7 @@ def extract_from_lines(self, source_content: str) -> set[str]: for split in splits: imports.add(split) elif match.group(2): - # From import + # From import. imports.add(match.group(2)) if match.group(3): splits = self._prune_aliased_lines(match.group(3), alias_pattern) diff --git a/src/macaron/parsers/github_workflow_model.py b/src/macaron/parsers/github_workflow_model.py index 9d0d86962..973b1ba31 100644 --- a/src/macaron/parsers/github_workflow_model.py +++ b/src/macaron/parsers/github_workflow_model.py @@ -2,7 +2,7 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. # pylint: skip-file # flake8: noqa - +# grammar: off # generated by datamodel-codegen: # filename: https://raw.githubusercontent.com/SchemaStore/schemastore/a1689388470d1997f2e5ebd8b430e99587b8d354/src/schemas/json/github-workflow.json diff --git a/src/macaron/parsers/yaml/loader.py b/src/macaron/parsers/yaml/loader.py index 30712caa0..7d5692d5d 100644 --- a/src/macaron/parsers/yaml/loader.py +++ b/src/macaron/parsers/yaml/loader.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the loader for YAML files.""" @@ -115,7 +115,7 @@ def load(cls, path: os.PathLike | str, schema: Schema = None) -> Any: result = None # Ensure to get the correct data - # yamale.make_data return a list of tuples: (loaded_data, file_path) + # yamale.make_data return a list of tuples: (loaded_data, file_path). for data in loaded_data: if data[1] == path: result = data[0] diff --git a/src/macaron/policy_engine/policy_engine.py b/src/macaron/policy_engine/policy_engine.py index 1b9bec29c..0df15ce98 100644 --- a/src/macaron/policy_engine/policy_engine.py +++ b/src/macaron/policy_engine/policy_engine.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module handles invoking the souffle policy engine on a database.""" @@ -164,7 +164,7 @@ def run_policy_engine(database_path: str, policy_content: str) -> dict: The policy engine result. """ # TODO: uncomment the following line when the check is improved. - # _check_version(database_path) + # _check_version(database_path). res = run_souffle(database_path, policy_content) diff --git a/src/macaron/policy_engine/souffle.py b/src/macaron/policy_engine/souffle.py index 95d0093b3..5b2b628dc 100644 --- a/src/macaron/policy_engine/souffle.py +++ b/src/macaron/policy_engine/souffle.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ @@ -22,7 +22,7 @@ class SouffleError(Exception): """Occurs when the souffle program contains errors, or there is an error invoking souffle.""" - # TODO: Use generic Macaron error class + # TODO: Use generic Macaron error class. def __init__( self, command: list[str] | str | None = None, message: str = "An error occurred with calling Souffle." diff --git a/src/macaron/policy_engine/souffle_code_generator.py b/src/macaron/policy_engine/souffle_code_generator.py index b768ba5a7..a9cef8795 100644 --- a/src/macaron/policy_engine/souffle_code_generator.py +++ b/src/macaron/policy_engine/souffle_code_generator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Generate souffle datalog for policy prelude.""" @@ -244,20 +244,20 @@ def project_join_table_souffle_relation( } ) - # Construct rule to create relations based on table + # Construct rule to create relations based on table. for value_column in right_table.columns: - # Loop over each column that gets treated as a value + # Loop over each column that gets treated as a value. if value_column.name in right_ignore_fields: continue if value_column.name in right_common_fields: continue - # Construct the relation statement containing all common_fields and the cid bound to value + # Construct the relation statement containing all common_fields and the cid bound to value. right_pattern = [] left_pattern = [] value_statement = "" - # Construct the relation statement containing all common_fields and the cid bound to value + # Construct the relation statement containing all common_fields and the cid bound to value. for column in left_table.columns: if column.name in left_common_fields: left_pattern.append(left_common_fields[column.name]) @@ -328,7 +328,7 @@ def get_table_rules_per_column( Program to declare and construct the rules common_fields PRODUCT (table.columns - common_fields - ignore_columns) """ - # Construct declaration statement + # Construct declaration statement. result = SouffleProgram( declarations={ f".decl {rule_name} (" @@ -342,15 +342,15 @@ def get_table_rules_per_column( } ) - # Construct rule to create relations based on table + # Construct rule to create relations based on table. for value_column in table.columns: - # Loop over each column that gets treated as a value + # Loop over each column that gets treated as a value. if value_column.name in ignore_columns: continue if value_column.name in common_fields: continue - # Construct the relation statement containing all common_fields and the cid bound to value + # Construct the relation statement containing all common_fields and the cid bound to value. pattern = [] value_statement = "" for column in table.columns: diff --git a/src/macaron/repo_finder/commit_finder.py b/src/macaron/repo_finder/commit_finder.py index 5e443e56b..e6914b8f4 100644 --- a/src/macaron/repo_finder/commit_finder.py +++ b/src/macaron/repo_finder/commit_finder.py @@ -34,12 +34,12 @@ # This part of the pattern terminates with an OR character to allow for it to be combined with the name of the target # artifact as another possible prefix match. # E.g. -# PREFIX_START + + PREFIX_END +# PREFIX_START + + PREFIX_END. PREFIX_START = "(?P(?:(?:.*(?:[a-z0-9][a-z][0-9]+|[0-9][a-z]|[a-z]{2}|[0-9]{1,2}))|[a-z]{2})|" PREFIX_END = ")?" # An alternative prefix pattern that is intended for a single use case: A prefix that contains a part that is -# difficult to distinguish from part of a version, i.e. java-v1-1.1.0 (prefix: java-v1, version: 1.1.0) +# difficult to distinguish from part of a version, i.e. java-v1-1.1.0 (prefix: java-v1, version: 1.1.0). PREFIX_WITH_SEPARATOR = "(?P(?:[a-z].*(?P[^a-z0-9])[a-z][0-9]+))(?:(?P=prefix_sep_1))" # Another alternative prefix pattern that accepts a string of any number of alphabetic characters and no separator. @@ -96,7 +96,7 @@ # If a version string has less parts than this number it will be padded with additional zeros to provide better matching # opportunities. # For this to be applied, the version string must not have any non-numeric parts. -# E.g 1.2 (2) -> 1.2.0.0 (4), 1.2.RELEASE (3) -> 1.2.RELEASE (3), 1.DEV-5 (3) -> 1.DEV-5 (3) +# E.g 1.2 (2) -> 1.2.0.0 (4), 1.2.RELEASE (3) -> 1.2.RELEASE (3), 1.DEV-5 (3) -> 1.DEV-5 (3). MAX_ZERO_DIGIT_EXTENSION = 4 split_pattern = re.compile("[^0-9a-z]", flags=re.IGNORECASE) # Used to split version strings. @@ -105,8 +105,8 @@ alphabetic_only_pattern = re.compile("^[a-z]+$", flags=re.IGNORECASE) hex_only_pattern = re.compile("^[0-9a-f]+$", flags=re.IGNORECASE) numeric_only_pattern = re.compile("^[0-9]+$") -special_suffix_pattern = re.compile("^([0-9]+)([a-z]+[0-9]+)$", flags=re.IGNORECASE) # E.g. 1.10rc1 -versioned_string = re.compile("^([a-z]*)(0*)([1-9]+[0-9]*)?$", flags=re.IGNORECASE) # e.g. RC1, 15, 0010, M, etc. +special_suffix_pattern = re.compile("^([0-9]+)([a-z]+[0-9]+)$", flags=re.IGNORECASE) # E.g. 1.10rc1. +versioned_string = re.compile("^([a-z]*)(0*)([1-9]+[0-9]*)?$", flags=re.IGNORECASE) # E.g. RC1, 15, 0010, M, etc. multiple_zero_pattern = re.compile("^0+$") name_version_pattern = re.compile("([0-9]+(?:[.][0-9]+)*)") # Identifies version-like parts within prefixes. @@ -323,7 +323,7 @@ def _split_name(name: str) -> list[str]: def _split_version(version: str) -> tuple[list[str], bool, set[int]]: """Split a version into its constituent parts, and flag if the version contained more than one kind of seperator.""" # The version is split on non-alphanumeric characters to separate the version parts from the non-version parts. - # e.g. 1.2.3-DEV -> [1, 2, 3, DEV] + # E.g. 1.2.3-DEV -> [1, 2, 3, DEV]. split = split_pattern.split(version) version_separators = _split_separators(version) multi_sep = False @@ -592,7 +592,7 @@ def match_tags(tag_list: list[str], name: str, version: str) -> tuple[list[str], if not prefix: continue if "/" in prefix: - # Exclude prefix parts that exists before a forward slash, e.g. rel/ + # Exclude prefix parts that exists before a forward slash, e.g. rel/. _, _, prefix = prefix.rpartition("/") if ( prefix.lower() == name.lower() @@ -654,7 +654,7 @@ def _fix_misaligned_tag_matches(matched_tags: list[dict[str, str]], version: str # Try to move any version-like strings from the end of the prefix to the version. # E.g. An optional 'v', 'r', or 'c', followed by one or more numbers. # TODO consider cases where multiple version-like parts exist in the prefix. - # E.g. Prefix: 'prefix-1.2' Version: '3.4' from Artifact Version 'prefix-1.2.3.4' + # E.g. Prefix: 'prefix-1.2' Version: '3.4' from Artifact Version 'prefix-1.2.3.4'. if re.match("^([vrc])?[0-9]+$", prefixes[-1], re.IGNORECASE): if version_sep and version_sep == prefix_sep: # Ensure there is a version separator and a prefix separator, and they match. @@ -762,7 +762,7 @@ def _compute_tag_version_similarity( # Try to reduce the score further based on the tag suffix. if tag_suffix: last_part = version_parts[-1].lower() - # The tag suffix might consist of multiple version parts, e.g. RC1.RELEASE + # The tag suffix might consist of multiple version parts, e.g. RC1.RELEASE. suffix_split, _, _ = _split_version(tag_suffix) # Try to match suffix parts to version. versioned_string_match = False diff --git a/src/macaron/repo_finder/repo_finder.py b/src/macaron/repo_finder/repo_finder.py index 9017a4ae0..e63cb4b7f 100644 --- a/src/macaron/repo_finder/repo_finder.py +++ b/src/macaron/repo_finder/repo_finder.py @@ -107,7 +107,7 @@ def find_repo( logger.debug("No Repo Finder found for package type: %s of %s", purl.type, purl) return "", RepoFinderInfo.UNSUPPORTED_PACKAGE_TYPE - # Call Repo Finder and return first valid URL + # Call Repo Finder and return first valid URL. logger.debug("Analyzing %s with Repo Finder: %s", purl, type(repo_finder)) found_repo, outcome = repo_finder.find_repo(purl) diff --git a/src/macaron/repo_finder/repo_finder_enums.py b/src/macaron/repo_finder/repo_finder_enums.py index 87c258491..9889cd7dd 100644 --- a/src/macaron/repo_finder/repo_finder_enums.py +++ b/src/macaron/repo_finder/repo_finder_enums.py @@ -12,8 +12,8 @@ class RepoFinderInfo(Enum): #: E.g. Maven central. NO_MAVEN_HOST_PROVIDED = "No maven host provided" - #: Reported if the list of period separated tags that point to the SCM within the POM is not provided by the user in - #: Macaron's config. E.g. scm.url, scm.connection + #: Reported if the list of period separated tags that point to the SCM within the POM is not provided by the user + #: in Macaron's config. E.g. scm.url, scm.connection. NO_POM_TAGS_PROVIDED = "No POM tags provided" #: Reported if the user does not provide a version for the Repo Finder via the command line, and does not allow the @@ -87,7 +87,7 @@ class RepoFinderInfo(Enum): #: Reported when a repository could only be found by checking the PyPI registry JSON. FOUND_FROM_PYPI = "Found from PyPI" - #: Default value. Reported if the Repo Finder was not called. E.g. Because the repository URL was already present. + #: Default value. Reported if the Repo Finder was not called. E.g. because the repository URL was already present. NOT_USED = "Not used" @@ -112,7 +112,7 @@ class CommitFinderInfo(Enum): NO_TAGS_WITH_COMMITS = "No Git tags with commits" #: Reported if the tag selected from the repository fails to resolve to a commit despite having one associated with - # it. + #: it. NO_TAG_COMMIT = "No valid commit found for Git tag" #: Reported if the version part of the user provided PURL is invalid. @@ -127,5 +127,5 @@ class CommitFinderInfo(Enum): #: Reported if a match was found. MATCHED = "Matched" - #: Default state. Reported if the commit finder was not called. E.g. Because the Repo Finder failed. + #: Default state. Reported if the commit finder was not called. E.g. because the Repo Finder failed. NOT_USED = "Not used" diff --git a/src/macaron/repo_finder/repo_finder_java.py b/src/macaron/repo_finder/repo_finder_java.py index 3a50fb9b6..c51100b87 100644 --- a/src/macaron/repo_finder/repo_finder_java.py +++ b/src/macaron/repo_finder/repo_finder_java.py @@ -53,7 +53,7 @@ def find_repo(self, purl: PackageURL) -> tuple[str, RepoFinderInfo]: if not version: logger.debug("Version missing for maven artifact: %s:%s", group, artifact) - # TODO add support for Java artifacts without a version + # TODO add support for Java artifacts without a version. return "", RepoFinderInfo.NO_VERSION_PROVIDED # Perform the following in a loop: @@ -102,7 +102,7 @@ def find_repo(self, purl: PackageURL) -> tuple[str, RepoFinderInfo]: urls, read_outcome = self._read_pom(pom, tags) if urls: - # If the found URLs fail to validate, finding can continue on to the next parent POM + # If the found URLs fail to validate, finding can continue on to the next parent POM. logger.debug("Found %s urls: %s", len(urls), urls) url = find_valid_repository_url(urls) if url: @@ -302,7 +302,7 @@ def _find_element(self, parent: Element | None, target: str) -> Element | None: # Attempt to match the target tag within the children of parent. for child in parent: - # Handle raw tags, and tags accompanied by Maven metadata enclosed in curly braces. E.g. '{metadata}tag' + # Handle raw tags, and tags accompanied by Maven metadata enclosed in curly braces. E.g. '{metadata}tag'. if child.tag == target or child.tag.endswith(f"}}{target}"): return child return None diff --git a/src/macaron/repo_verifier/repo_verifier_maven.py b/src/macaron/repo_verifier/repo_verifier_maven.py index 22c9e42b3..f7ed1c080 100644 --- a/src/macaron/repo_verifier/repo_verifier_maven.py +++ b/src/macaron/repo_verifier/repo_verifier_maven.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains code to verify whether a reported repository with Maven build system can be linked back to the artifact.""" @@ -117,9 +117,9 @@ def verify_domains_from_recognized_code_hosting_services(self) -> RepositoryVeri # but it doesn't match gitlab.com/foo or gitlab.com/bar. if ( group_parts[0].lower() in {"io", "com"} - and group_parts[1].lower() == platform.lower() # e.g., github - and group_parts[1].lower() == reported_hostname.lower() # e.g., github - and group_parts[2].lower() == reported_account.lower() # e.g., foo in github.com/foo + and group_parts[1].lower() == platform.lower() # E.g., github. + and group_parts[1].lower() == reported_hostname.lower() # E.g., github. + and group_parts[2].lower() == reported_account.lower() # E.g., foo in github.com/foo. ): return RepositoryVerificationResult( status=RepositoryVerificationStatus.PASSED, reason="git_ns_match", build_tool=self.build_tool diff --git a/src/macaron/slsa_analyzer/analyze_context.py b/src/macaron/slsa_analyzer/analyze_context.py index 93a3f48ba..73aaba8c1 100644 --- a/src/macaron/slsa_analyzer/analyze_context.py +++ b/src/macaron/slsa_analyzer/analyze_context.py @@ -82,16 +82,16 @@ def __init__( self.slsa_level = SLSALevels.LEVEL0 # Indicate whether this repo fully reach a level or - # it's just compliant for a SLSA level + # it's just compliant for a SLSA level. self.is_full_reach = False # The Macaron root path where the wrapper files exist. self.macaron_path = macaron_path - # The output dir to store all files + # The output dir to store all files. self.output_dir = output_dir - # The check results from the analysis + # The check results from the analysis. self.check_results: dict[str, CheckResult] = {} # Add the data computed at runtime to the dynamic_data attribute. @@ -169,7 +169,7 @@ def provenances(self) -> dict[str, list[InTotoV01Statement | InTotoV1Statement]] except KeyError: return {} - # TODO: refactor as this information is related to the reporter not analyze context + # TODO: refactor as this information is related to the reporter not analyze context. @property def is_inferred_provenance(self) -> bool: """Return True if the provenance for this repo is an inferred one. @@ -234,7 +234,7 @@ def get_dict(self) -> dict: # Remove result_tables since we don't have a good json representation for them. sorted_on_id = [] for res in _sorted_on_id: - # res is CheckResult + # Res is CheckResult. res_dict: dict = dict(res.get_summary()) res_dict.pop("result_tables") sorted_on_id.append(res_dict) diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 8c0faaad8..a9c330c1b 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -66,7 +66,7 @@ from macaron.slsa_analyzer.asset import VirtualReleaseAsset from macaron.slsa_analyzer.build_tool import BUILD_TOOLS -# To load all checks into the registry +# To load all checks into the registry. from macaron.slsa_analyzer.checks import * # pylint: disable=wildcard-import,unused-wildcard-import # noqa: F401,F403 from macaron.slsa_analyzer.ci_service import CI_SERVICES from macaron.slsa_analyzer.database_store import store_analyze_context_to_db @@ -136,7 +136,7 @@ def __init__(self, output_path: str, build_log_path: str) -> None: # Get the db manager singleton object. self.db_man: DatabaseManager = get_db_manager() - # Create database tables: all checks have been registered so all tables should be mapped now + # Create database tables: all checks have been registered so all tables should be mapped now. self.db_man.create_tables() self.local_artifact_repo_mapper = Analyzer._get_local_artifact_repo_mapper() @@ -652,7 +652,7 @@ def add_repository(self, branch_name: str | None, git_obj: Git) -> Repository | commit_date_str = commit_date.isoformat(sep="T", timespec="seconds") # We only allow complete_name's length to be 2 or 3 because we need to construct PURL - # strings using the complete_name, i.e., type/namespace/name@commitsha + # strings using the complete_name, i.e., type/namespace/name@commitsha. if (parts_len := len(Path(complete_name).parts)) < 2 or parts_len > 3: logger.error("The repository path %s is not valid.", complete_name) return None @@ -900,7 +900,7 @@ def to_analysis_target( # As there is no repo or commit from provenance, use the Repo Finder to find the repo. converted_repo_path = repo_finder.to_repo_path(parsed_purl, available_domains) if converted_repo_path is None: - # Try to find repo from PURL + # Try to find repo from PURL. repo, repo_finder_outcome = repo_finder.find_repo( parsed_purl, package_registries_info=package_registries_info ) diff --git a/src/macaron/slsa_analyzer/build_tool/gradle.py b/src/macaron/slsa_analyzer/build_tool/gradle.py index 2cc491934..86cbed156 100644 --- a/src/macaron/slsa_analyzer/build_tool/gradle.py +++ b/src/macaron/slsa_analyzer/build_tool/gradle.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the Gradle class which inherits BaseBuildTool. @@ -92,7 +92,7 @@ def prepare_config_files(self, wrapper_path: str, build_dir: str) -> bool: bool True if succeed else False. """ - # The path of the needed wrapper files + # The path of the needed wrapper files. wrapper_files = self.wrapper_files if copy_file_bulk(wrapper_files, wrapper_path, build_dir): diff --git a/src/macaron/slsa_analyzer/build_tool/maven.py b/src/macaron/slsa_analyzer/build_tool/maven.py index 69323ad9c..3c0d7a56b 100644 --- a/src/macaron/slsa_analyzer/build_tool/maven.py +++ b/src/macaron/slsa_analyzer/build_tool/maven.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the Maven class which inherits BaseBuildTool. @@ -85,7 +85,7 @@ def prepare_config_files(self, wrapper_path: str, build_dir: str) -> bool: bool True if succeed else False. """ - # The path of the needed wrapper files + # The path of the needed wrapper files. wrapper_files = self.wrapper_files if copy_file_bulk(wrapper_files, wrapper_path, build_dir): diff --git a/src/macaron/slsa_analyzer/build_tool/npm.py b/src/macaron/slsa_analyzer/build_tool/npm.py index 5f575b899..4c3bc0a56 100644 --- a/src/macaron/slsa_analyzer/build_tool/npm.py +++ b/src/macaron/slsa_analyzer/build_tool/npm.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the NPM class which inherits BaseBuildTool. @@ -55,7 +55,7 @@ def is_detected(self, repo_path: str) -> bool: """ # TODO: When more complex build detection is being implemented, consider # cases like .npmrc existing but not package-lock.json and whether - # they would still count as "detected" + # they would still count as "detected". npm_config_files = self.build_configs + self.package_lock + self.entry_conf return any(file_exists(repo_path, file) for file in npm_config_files) diff --git a/src/macaron/slsa_analyzer/build_tool/poetry.py b/src/macaron/slsa_analyzer/build_tool/poetry.py index eeb54216b..95a84b71f 100644 --- a/src/macaron/slsa_analyzer/build_tool/poetry.py +++ b/src/macaron/slsa_analyzer/build_tool/poetry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the Poetry class which inherits BaseBuildTool. @@ -78,7 +78,7 @@ def is_detected(self, repo_path: str) -> bool: # Take the highest level file, if there are two at the same level, take the first in the list. file_path = min(files_detected, key=lambda x: len(Path(x).parts)) try: - # Parse the .toml file + # Parse the .toml file. with open(file_path, "rb") as toml_file: try: data = tomllib.load(toml_file) diff --git a/src/macaron/slsa_analyzer/build_tool/yarn.py b/src/macaron/slsa_analyzer/build_tool/yarn.py index 90c424035..c4d8c777d 100644 --- a/src/macaron/slsa_analyzer/build_tool/yarn.py +++ b/src/macaron/slsa_analyzer/build_tool/yarn.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the Yarn class which inherits BaseBuildTool. @@ -36,7 +36,7 @@ def load_defaults(self) -> None: # if "builder.yarn.ci.deploy" in defaults: # for item in defaults["builder.yarn.ci.deploy"]: # if item in self.ci_deploy_kws: - # self.ci_deploy_kws[item] = defaults.get_list("builder.yarn.ci.deploy", item) + # self.ci_deploy_kws[item] = defaults.get_list("builder.yarn.ci.deploy", item). def is_detected(self, repo_path: str) -> bool: """Return True if this build tool is used in the target repo. @@ -53,7 +53,7 @@ def is_detected(self, repo_path: str) -> bool: """ # TODO: When more complex build detection is being implemented, consider # cases like .yarnrc existing but not package-lock.json and whether - # they would still count as "detected" + # they would still count as "detected". yarn_config_files = self.build_configs + self.package_lock + self.entry_conf return any(file_exists(repo_path, file) for file in yarn_config_files) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py index 1348b1307..3e36ec179 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py @@ -150,7 +150,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: trusted_deploy_actions = tool.ci_deploy_kws["github_actions"] or [] # Check for use of a trusted GitHub Actions workflow to publish/deploy. - # TODO: verify that deployment is legitimate and not a test + # TODO: verify that deployment is legitimate and not a test. if trusted_deploy_actions: for callee in ci_info["callgraph"].bfs(): if isinstance(callee, GitHubWorkflowNode) and callee.node_type in [ @@ -180,7 +180,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: # Only third-party Actions can be called from a step. # Reusable workflows have to be directly called from the job. # See https://docs.github.com/en/actions/sharing-automations/ \ - # reusing-workflows#calling-a-reusable-workflow + # reusing-workflows#calling-a-reusable-workflow. if callee.node_type == GitHubWorkflowType.EXTERNAL: callee_step_obj = cast(ActionStep, callee.parsed_obj) if "id" in callee_step_obj: @@ -332,7 +332,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: # build tool usage. # TODO: When more sophisticated build tool detection is # implemented, consider whether this should be one fail = whole - # check fails instead + # check fails instead. return CheckResultData(result_tables=result_tables, result_type=overall_res) diff --git a/src/macaron/slsa_analyzer/checks/build_service_check.py b/src/macaron/slsa_analyzer/checks/build_service_check.py index cea689a7c..3b47277f4 100644 --- a/src/macaron/slsa_analyzer/checks/build_service_check.py +++ b/src/macaron/slsa_analyzer/checks/build_service_check.py @@ -200,7 +200,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: # build tool usage. # TODO: When more sophisticated build tool detection is # implemented, consider whether this should be one fail = whole - # check fails instead + # check fails instead. return CheckResultData(result_tables=result_tables, result_type=overall_res) diff --git a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py index 8514a458d..f753e1f09 100644 --- a/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py +++ b/src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py @@ -172,7 +172,7 @@ def evaluate_heuristic_results( facts_list.append(f"{heuristic.value} :- true.") elif result == HeuristicResult.FAIL: facts_list.append(f"{heuristic.value} :- false.") - # Do not define for HeuristicResult.SKIP + # Do not define for HeuristicResult.SKIP. facts = "\n".join(facts_list) problog_code = f"{facts}\n\n{self.malware_rules_problog_model}" @@ -182,7 +182,7 @@ def evaluate_heuristic_results( problog_results: dict[Term, float] = get_evaluatable().create_from(problog_model).evaluate() confidence = problog_results.pop(Term(self.problog_result_access), 0.0) - if confidence > 0: # a rule was triggered + if confidence > 0: # A rule was triggered. for term, conf in problog_results.items(): if term.args: triggered_rules[str(term.args[0])] = conf @@ -247,7 +247,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: result_tables: list[CheckFacts] = [] package_registry_info_entries = ctx.dynamic_data["package_registries"] - # First check if this package is a known malware + # First check if this package is a known malware. package_exists = False try: package_exists = bool(DepsDevService.get_package_info(ctx.component.purl)) @@ -312,7 +312,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: confidence = Confidence.HIGH result_type = CheckResultType.PASSED - # Source code analysis + # Source code analysis. try: sourcecode_result, sourcecode_detail_info = self.analyze_source( pypi_package_json, heuristic_results, force=ctx.dynamic_data["force_analyze_source"] @@ -325,7 +325,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: if sourcecode_result[Heuristics.SUSPICIOUS_PATTERNS] == HeuristicResult.FAIL: if result_type == CheckResultType.PASSED: - # heuristics determined it benign, so lower the confidence + # Heuristics determined it benign, so lower the confidence. confidence = Confidence.LOW result_type = CheckResultType.FAILED @@ -345,8 +345,8 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: # Return UNKNOWN result for unsupported ecosystems. return CheckResultData(result_tables=[], result_type=CheckResultType.UNKNOWN) - # This list contains the heuristic analyzer classes - # When implementing new analyzer, appending the classes to this list + # This list contains the heuristic analyzer classes. + # When implementing new analyzer, appending the classes to this list. analyzers: list = [ EmptyProjectLinkAnalyzer, SourceCodeRepoAnalyzer, @@ -360,7 +360,7 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData: TyposquattingPresenceAnalyzer, ] - # name used to query the result of all problog rules, so it can be accessed outside the model. + # Name used to query the result of all problog rules, so it can be accessed outside the model. problog_result_access = "result" malware_rules_problog_model = f""" diff --git a/src/macaron/slsa_analyzer/checks/provenance_witness_l1_check.py b/src/macaron/slsa_analyzer/checks/provenance_witness_l1_check.py index c1eaff4e6..44fb5443d 100644 --- a/src/macaron/slsa_analyzer/checks/provenance_witness_l1_check.py +++ b/src/macaron/slsa_analyzer/checks/provenance_witness_l1_check.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This check examines a witness provenance (https://github.com/testifysec/witness).""" @@ -79,7 +79,7 @@ def verify_artifact_assets( """ # A look-up table to verify: # 1. if the name of the artifact appears in any subject of the witness provenance, then - # 2. if the digest of the artifact could be found + # 2. if the digest of the artifact could be found. look_up: dict[str, dict[str, InTotoV01Subject]] = {} for subject in subjects: diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions/analyzer.py b/src/macaron/slsa_analyzer/ci_service/github_actions/analyzer.py index 4565c2098..9622bf75d 100644 --- a/src/macaron/slsa_analyzer/ci_service/github_actions/analyzer.py +++ b/src/macaron/slsa_analyzer/ci_service/github_actions/analyzer.py @@ -507,7 +507,7 @@ def __init__(self, external_node: GitHubWorkflowNode): external_node: GitHubWorkflowNode The external GitHub Action workflow node. """ - # external_node is assumed to be an EXTERNAL node with ActionStep parsed_obj + # external_node is assumed to be an EXTERNAL node with ActionStep parsed_obj. step = external_node.parsed_obj if not is_parsed_obj_action_step(step): raise ValueError("Expected an action step node") @@ -603,7 +603,7 @@ def __init__(self, external_node: GitHubWorkflowNode): external_node: GitHubWorkflowNode The external GitHub Action workflow node. """ - # external_node is assumed to be an EXTERNAL node with ActionStep parsed_obj + # external_node is assumed to be an EXTERNAL node with ActionStep parsed_obj. step = external_node.parsed_obj if not is_parsed_obj_action_step(step): raise ValueError("Expected an action step node") @@ -699,7 +699,7 @@ def __init__(self, external_node: GitHubWorkflowNode): external_node: GitHubWorkflowNode The external GitHub Action workflow node. """ - # external_node is assumed to be an EXTERNAL node with ActionStep parsed_obj + # external_node is assumed to be an EXTERNAL node with ActionStep parsed_obj. step = external_node.parsed_obj if not is_parsed_obj_action_step(step): raise ValueError("Expected an action step node") diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py index 43c4e3f0e..93a1acf57 100644 --- a/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py +++ b/src/macaron/slsa_analyzer/ci_service/github_actions/github_actions_ci.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module analyzes GitHub Actions CI.""" @@ -168,7 +168,7 @@ def has_latest_run_passed( try: # Setting the timezone to UTC because the date format. # We are using for GitHub Actions is in ISO format, which contains the offset - # from the UTC timezone. For example: 2022-04-10T14:10:01+07:00 + # from the UTC timezone. For example: 2022-04-10T14:10:01+07:00. current_time = datetime.now(timezone.utc) # TODO: it is safer to get commit_date as a datetime object directly. commit_date_obj = datetime.fromisoformat(commit_date) @@ -370,7 +370,7 @@ def workflow_run_in_date_time_range( logger.debug("Checking workflow run of %s.", workflow) try: - # iterate through the responses in reversed order to add the run + # Iterate through the responses in reversed order to add the run # closest to the `date_time - time_range` timestamp first. for item in reversed(run_data["workflow_runs"]): # The workflow parameter contains the URL to the workflow. @@ -459,7 +459,7 @@ def workflow_run_deleted(self, timestamp: datetime) -> bool: # GitHub retains GitHub Actions pipeline data for 400 days. So, we cannot analyze the # pipelines if artifacts are older than 400 days. # https://docs.github.com/en/rest/guides/using-the-rest-api-to-interact-with-checks? - # apiVersion=2022-11-28#retention-of-checks-data + # ApiVersion=2022-11-28#retention-of-checks-data # TODO: change this check if this issue is resolved: # https://github.com/orgs/community/discussions/138249 if datetime.now(timezone.utc) - timedelta(days=400) > timestamp: diff --git a/src/macaron/slsa_analyzer/git_url.py b/src/macaron/slsa_analyzer/git_url.py index baadebc97..5544733d1 100644 --- a/src/macaron/slsa_analyzer/git_url.py +++ b/src/macaron/slsa_analyzer/git_url.py @@ -303,7 +303,7 @@ def clone_remote_repo(clone_dir: str, url: str) -> Repo | None: If the repository has not been cloned and the clone attempt fails. """ # Handle the case where the repository already exists in `/git_repos`. - # This could happen when multiple runs of Macaron use the same ``, leading to + # This could happen when multiple runs of Macaron use the same ``, leading to. # Macaron attempting to clone a repository multiple times. # In these cases, we should not error since it may interrupt the analysis. if os.path.isdir(clone_dir): @@ -498,7 +498,7 @@ def get_repo_full_name_from_url(url: str) -> str: full_name = parsed_url.path.split(".git")[0] - # The full name must be in org/repo format + # The full name must be in org/repo format. if len(full_name.split("/")) != 2: logger.error("Fullname %s extract from %s is not valid.", full_name, url) return "" @@ -713,7 +713,7 @@ def parse_remote_url( res_path = "" res_netloc = "" - # e.g., https://github.com/owner/project.git + # E.g., https://github.com/owner/project.git if parsed_url.scheme in {"http", "https", "ftp", "ftps", "git+https"}: if parsed_url.netloc not in allowed_git_service_hostnames: return None @@ -727,7 +727,7 @@ def parse_remote_url( # e.g.: # ssh://git@hostname:port/owner/project.git - # ssh://git@hostname:owner/project.git + # ssh://git@hostname:owner/project.git. elif parsed_url.scheme in {"ssh", "git+ssh"}: user_host, _, port = parsed_url.netloc.partition(":") user, _, host = user_host.rpartition("@") @@ -754,7 +754,7 @@ def parse_remote_url( res_scheme = "https" res_netloc = host - # e.g., git@github.com:owner/project.git + # E.g., git@github.com:owner/project.git elif parsed_url.scheme == "": user_host, _, port_path = parsed_url.path.partition(":") if not user_host or not port_path: diff --git a/src/macaron/slsa_analyzer/package_registry/osv_dev.py b/src/macaron/slsa_analyzer/package_registry/osv_dev.py index 9a5c96c13..3b7d5b80c 100644 --- a/src/macaron/slsa_analyzer/package_registry/osv_dev.py +++ b/src/macaron/slsa_analyzer/package_registry/osv_dev.py @@ -375,7 +375,7 @@ def is_version_affected( if "fixed" in e: fixed = e["fixed"] - # TODO: convert commit to tag & version + # TODO: convert commit to tag & version. parsed_introduced = version.Version("0") if introduced: try: diff --git a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py index f0cfcfbc3..afe940848 100644 --- a/src/macaron/slsa_analyzer/package_registry/pypi_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/pypi_registry.py @@ -194,7 +194,7 @@ def download_package_sourcecode(self, url: str) -> str: _, _, file_name = url.rpartition("/") package_name = re.sub(r"\.tar\.gz$", "", file_name) - # temporary directory to unzip and read all source files + # Temporary directory to unzip and read all source files. temp_dir = tempfile.mkdtemp(prefix=f"{package_name}_") response = send_get_http_raw(url, stream=True) if response is None: @@ -249,7 +249,7 @@ def download_package_sourcecode(self, url: str) -> str: extracted_dir = os.listdir(temp_dir) if len(extracted_dir) == 1 and package_name == extracted_dir[0]: - # structure used package name and version as top-level directory + # Structure used package name and version as top-level directory. temp_dir = os.path.join(temp_dir, extracted_dir[0]) else: @@ -320,7 +320,7 @@ def get_package_page(self, package_name: str) -> str | None: str | None The package main page. """ - # Important: trailing '/' avoids JS-based redirect; ensures Macaron can access the page directly + # Important: trailing '/' avoids JS-based redirect; ensures Macaron can access the page directly. url = urllib.parse.urljoin(self.registry_url, f"project/{package_name}/") response = send_get_http_raw(url) if response: @@ -361,7 +361,7 @@ def get_maintainer_profile_page(self, username: str) -> str | None: str | None The profile page. """ - # Important: trailing '/' avoids JS-based redirect; ensures Macaron can access the page directly + # Important: trailing '/' avoids JS-based redirect; ensures Macaron can access the page directly. url = urllib.parse.urljoin(self.registry_url, f"user/{username}/") response = send_get_http_raw(url, headers=None) if response: @@ -467,7 +467,7 @@ class PyPIPackageJsonAsset: #: The asset content. package_json: dict - #: the source code temporary location name + #: The source code temporary location name. package_sourcecode_path: str #: The size of the asset (in bytes). This attribute is added to match the AssetLocator diff --git a/src/macaron/slsa_analyzer/registry.py b/src/macaron/slsa_analyzer/registry.py index 3d8b6000f..6436357dc 100644 --- a/src/macaron/slsa_analyzer/registry.py +++ b/src/macaron/slsa_analyzer/registry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the Registry class for loading checks.""" @@ -41,7 +41,7 @@ class Registry: # Map between a check and any child checks that depend on it. _check_relationships_mapping: dict[str, dict[str, CheckResultType]] = {} - # The format for check id + # The format for check id. _id_format = re.compile(r"^mcn_([a-z]+_)+([0-9]+)$") def __init__(self) -> None: @@ -154,7 +154,7 @@ def _validate_check(check: Any) -> bool: ) return False - # Try to get the path to the check module file + # Try to get the path to the check module file. check_module = inspect.getmodule(check) if not check_module: logger.critical("Cannot resolve the Check module.") @@ -479,7 +479,7 @@ def scan(self, target: AnalyzeContext) -> dict[str, CheckResult]: ) continue - # Don't run excluded checks + # Don't run excluded checks. if check_id not in self.checks_to_run: logger.debug( "Check %s is disabled by user configuration.", @@ -487,7 +487,7 @@ def scan(self, target: AnalyzeContext) -> dict[str, CheckResult]: ) continue - # Look up check results to see if this check should be run based on its parent status + # Look up check results to see if this check should be run based on its parent status. skipped_info = self._should_skip_check(check, results) if skipped_info: skipped_checks.append(skipped_info) @@ -589,7 +589,7 @@ def _should_skip_check(check: BaseCheck, results: dict[str, CheckResult]) -> Ski parent_id: str = parent[0] expect_status: CheckResultType = parent[1] - # Look up the result of this parent check + # Look up the result of this parent check. parent_result = results[parent_id] got_status = parent_result.result.result_type diff --git a/src/macaron/slsa_analyzer/slsa_req.py b/src/macaron/slsa_analyzer/slsa_req.py index 0fcb16e5a..bcd6ae391 100644 --- a/src/macaron/slsa_analyzer/slsa_req.py +++ b/src/macaron/slsa_analyzer/slsa_req.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the base classes for defining SLSA requirements.""" @@ -14,13 +14,13 @@ class ReqName(Enum): """Store the name of each requirement.""" - # Source requirements + # Source requirements. VCS = "Version controlled" VERIFIED_HISTORY = "Verified history" RETAINED_INDEFINITELY = "Retained indefinitely" TWO_PERSON_REVIEWED = "Two-person reviewed" - # Build requirements + # Build requirements. SCRIPTED_BUILD = "Scripted Build" BUILD_SERVICE = "Build service" BUILD_AS_CODE = "Build as code" @@ -30,14 +30,14 @@ class ReqName(Enum): HERMETIC = "Hermetic" REPRODUCIBLE = "Reproducible" - # Provenance requirements + # Provenance requirements. PROV_AVAILABLE = "Provenance - Available" PROV_AUTH = "Provenance - Authenticated" PROV_SERVICE_GEN = "Provenance - Service generated" PROV_NON_FALSIFIABLE = "Provenance - Non falsifiable" PROV_DEPENDENCIES_COMPLETE = "Provenance - Dependencies complete" - # Provenance content requirements + # Provenance content requirements. PROV_CONT_ARTI = "Provenance content - Identifies artifacts" PROV_CONT_BUILDER = "Provenance content - Identifies builder" PROV_CONT_BUILD_INS = "Provenance content - Identifies build instructions" @@ -48,7 +48,7 @@ class ReqName(Enum): PROV_CONT_REPRODUCIBLE_INFO = "Provenance content - Includes reproducible info" PROV_CONT_META_DATA = "Provenance content - Includes metadata" - # Common requirements + # Common requirements. SECURITY = "Security" ACCESS = "Access" SUPERUSERS = "Superusers" diff --git a/src/macaron/slsa_analyzer/specs/build_spec.py b/src/macaron/slsa_analyzer/specs/build_spec.py index bdd2defac..41e707d5c 100644 --- a/src/macaron/slsa_analyzer/specs/build_spec.py +++ b/src/macaron/slsa_analyzer/specs/build_spec.py @@ -1,5 +1,6 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. +# grammar: off """This module contains the BuildSpec class.""" diff --git a/tests/analyze_json_output/compare_analyze_json_output.py b/tests/analyze_json_output/compare_analyze_json_output.py index 322a902a8..158936b75 100755 --- a/tests/analyze_json_output/compare_analyze_json_output.py +++ b/tests/analyze_json_output/compare_analyze_json_output.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module checks the result JSON files against the expected outputs.""" @@ -17,7 +17,7 @@ def compare_check_results(result: dict, expected: dict) -> int: """Compare the content of the target.checks section.""" fail_count = 0 - # Compare summary + # Compare summary. for key, exp_val in expected["summary"].items(): if exp_val != result["summary"].get(key): logger.error( @@ -28,7 +28,7 @@ def compare_check_results(result: dict, expected: dict) -> int: ) fail_count += 1 - # Compare check results + # Compare check results. res_sorted_reqs = sorted(result["results"], key=lambda item: str(item["check_id"])) exp_sorted_reqs = sorted(expected["results"], key=lambda item: str(item["check_id"])) @@ -84,13 +84,13 @@ def compare_check_results(result: dict, expected: dict) -> int: def compare_target_info(result: dict, expected: dict) -> int: """Compare the content of the target.info section""" - # Remove nondeterministic fields + # Remove nondeterministic fields. result["local_cloned_path"] = expected["local_cloned_path"] = "" result["commit_date"] = expected["commit_date"] = "" fail_count = 0 - # Iterate through elements in the JSON dictionary + # Iterate through elements in the JSON dictionary. for key, exp_item in expected.items(): result_item = result.get(key) diff --git a/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py b/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py index ce421f56c..a54ab1295 100644 --- a/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py +++ b/tests/dependency_analyzer/cyclonedx/test_cyclonedx.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module tests the CycloneDX helper functions.""" @@ -136,7 +136,7 @@ def test_convert_components_to_artifacts_java( # Path to the root bom.json. root_bom_path = Path(RESOURCES_DIR, "bom_aws_parent.json") - # Disable repo finding to prevent remote calls during testing + # Disable repo finding to prevent remote calls during testing. load_defaults(os.path.join(os.path.dirname(os.path.abspath(__file__)), "defaults.ini")) assert defaults.getboolean("repofinder.java", "find_repos") is False assert defaults.get_list("repofinder", "redirect_urls") == [] diff --git a/tests/integration/run.py b/tests/integration/run.py index 2cb77025b..573f3bf5e 100644 --- a/tests/integration/run.py +++ b/tests/integration/run.py @@ -683,7 +683,7 @@ def run(self, macaron_cmd: str, interactive: bool, dry: bool) -> int: ret = 0 - # Clean up previous results + # Clean up previous results. output_dir = os.path.join(self.case_dir, "output") if not dry and os.path.isdir(output_dir): remove_output = True diff --git a/tests/malware_analyzer/pypi/resources/sourcecode_samples/exfiltration/expected_results.json b/tests/malware_analyzer/pypi/resources/sourcecode_samples/exfiltration/expected_results.json index ef19d6c0e..6abc2e439 100644 --- a/tests/malware_analyzer/pypi/resources/sourcecode_samples/exfiltration/expected_results.json +++ b/tests/malware_analyzer/pypi/resources/sourcecode_samples/exfiltration/expected_results.json @@ -5,18 +5,18 @@ "detections": [ { "file": "exfiltration/remote_exfiltration.py", - "start": 31, - "end": 31 + "start": 32, + "end": 32 }, { "file": "exfiltration/remote_exfiltration.py", - "start": 42, - "end": 42 + "start": 43, + "end": 43 }, { "file": "exfiltration/remote_exfiltration.py", - "start": 50, - "end": 50 + "start": 51, + "end": 51 } ] } diff --git a/tests/malware_analyzer/pypi/resources/sourcecode_samples/exfiltration/remote_exfiltration.py b/tests/malware_analyzer/pypi/resources/sourcecode_samples/exfiltration/remote_exfiltration.py index e2602ef1f..cd87cb319 100644 --- a/tests/malware_analyzer/pypi/resources/sourcecode_samples/exfiltration/remote_exfiltration.py +++ b/tests/malware_analyzer/pypi/resources/sourcecode_samples/exfiltration/remote_exfiltration.py @@ -1,5 +1,6 @@ # Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. +# grammar: off """ Running this code will not produce any malicious behavior, but code isolation measures are @@ -8,7 +9,7 @@ import sys -# ensure no symbols are exported so this code cannot accidentally be used +# Ensure no symbols are exported so this code cannot accidentally be used. __all__ = [] sys.exit() diff --git a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/decode_and_execute.py b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/decode_and_execute.py index 114072a52..90484516c 100644 --- a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/decode_and_execute.py +++ b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/decode_and_execute.py @@ -8,7 +8,7 @@ import sys -# ensure no symbols are exported so this code cannot accidentally be used +# Ensure no symbols are exported so this code cannot accidentally be used. __all__ = [] sys.exit() @@ -23,7 +23,7 @@ def test_function(): # NOTE: since these are flows, all will go in subfunctions that they are supposed to be in so the detections # can be sections out. - # marshal encryption from pyobfuscate.com/marshal-encrypt, script is just print("Hello world!") + # Marshal encryption from pyobfuscate.com/marshal-encrypt, script is just print("Hello world!"). def marshal_flow(): from marshal import loads bytecode = loads(b'\xe3\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00@\x00\x00\x00s\x0c\x00\x00\x00e\x00d\x00\x83\x01\x01\x00d\x01S\x00)\x02z\x0cHello world!N)\x01\xda\x05print\xa9\x00r\x02\x00\x00\x00r\x02\x00\x00\x00\xfa\x08\xda\x08\x01\x00\x00\x00\xf3\x00\x00\x00\x00') @@ -40,9 +40,9 @@ def bytes_eval_to_soc_bind(): def map_b64_to_request(): import requests as req - # decodes to GET + # Decodes to GET. method = __import__('base64').b64decode(b'R0VU') - # just maps to a random fake link https://www.print-hello-world-sample.com/print/hello/world! + # Just maps to a random fake link https://www.print-hello-world-sample.com/print/hello/world! link = ''.join(map(chr, [104, 116, 116, 112, 115, 58, 47, 47, 119, 119, 119, 46, 112, 114, 105, 110, 116, 45, 104, 101, 108, 108, 111, 45, 119, 111, 114, 108, 100, 45, 115, 97, 109, 112, 108, 101, 46, 99, 111, 109, 47, 112, 114, 105, 110, 116, 47, 104, 101, 108, 108, 111, 47, 119, 111, 114, 108, 100, 33])) _ = req.Request(method, link) @@ -51,17 +51,17 @@ def zlib_ast_subprocess(): import zlib as zeeee from ast import literal_eval - # just decodes to ["echo", "Hello world!"] + # Just decodes to ["echo", "Hello world!"]. subprocess.Popen(literal_eval(zeeee.decompress(b'x\x9c\x8bVOM\xce\xc8W\xd7QP\xf7H\xcd\xc9\xc9W(\xcf/\xcaIQT\x8f\x05\x00]\xa0\x07\x9d').decode())) def propagation_to_write(): import os as e - # symbol propagations should detect assign of os as e to o and bytes to b and still trigger + # Symbol propagations should detect assign of os as e to o and bytes to b and still trigger. o = e b = bytes - # just decodes to "Hello world!" + # Just decodes to "Hello world!". contents = b.fromhex("48656C6C6F20776F726C6421") - # just decodes to "some_path" + # Just decodes to "some_path". file = o.open(''.join(chr(c) for c in [115, 111, 109, 101, 95, 112, 97, 116, 104]), o.O_RDWR) o.pwritev(file, contents, 0) diff --git a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/inline_imports.py b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/inline_imports.py index 80e006781..35a2eeaba 100644 --- a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/inline_imports.py +++ b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/inline_imports.py @@ -8,7 +8,7 @@ import sys -# ensure no symbols are exported so this code cannot accidentally be used +# Ensure no symbols are exported so this code cannot accidentally be used. __all__ = [] sys.exit() @@ -27,6 +27,6 @@ def test_function(): __import__('os') __import__('zlib') __import__('marshal') - # these both just import builtins + # These both just import builtins. __import__('\142\165\151\154\164\151\156\163') __import__('\x62\x75\x69\x6c\x74\x69\x6e\x73') diff --git a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/obfuscation_tools.py b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/obfuscation_tools.py index 270f88600..98b624938 100644 --- a/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/obfuscation_tools.py +++ b/tests/malware_analyzer/pypi/resources/sourcecode_samples/obfuscation/obfuscation_tools.py @@ -8,7 +8,7 @@ import sys -# ensure no symbols are exported so this code cannot accidentally be used +# Ensure no symbols are exported so this code cannot accidentally be used. __all__ = [] sys.exit() @@ -19,7 +19,7 @@ def test_function(): samples. """ sys.exit() - # using pyobfuscate.com/rename-obf to rename items, code is a class that has one method that prints Hello world! + # Using pyobfuscate.com/rename-obf to rename items, code is a class that has one method that prints Hello world! lllllllllllllll, llllllllllllllI = __name__, print class lIIlIIIIIIIlIlllIl: @@ -33,7 +33,7 @@ def IIlIlIIIIlIlIlIIll(IIIlIlIIllllIlIlll): llIlIIIllIIIIlIlll = lIIlIIIIIIIlIlllIl() llIlIIIllIIIIlIlll.IIlIlIIIIlIlIlIIll() - # using using pyob.oxyry.com's naming convention + # Using using pyob.oxyry.com's naming convention. __O0O00O00O0OOOOO0O, __OO00000OOOO000OO0 = __name__, print class OO0OO0OOO0OOOO000: @@ -47,7 +47,7 @@ def OOOOOO000OOO0O0O0(O00O00O0O00O000O0): __OO00000O00OOOO0OO = OO0OO0OOO0OOOO000() __OO00000O00OOOO0OO.OOOOOO000OOO0O0O0() - # using pyarmor's RTF mode naming convention + # Using pyarmor's RTF mode naming convention. pyarmor__12, pyarmor__14 = __name__, print class pyarmor__16: @@ -61,9 +61,9 @@ def pyarmor__24(pyarmor__60): pyarmor__2 = pyarmor__16() pyarmor__2.pyarmor__24() - # inline pyarmor marker + # Inline pyarmor marker. # pyarmor: print('this script is obfuscated') - # obfuscated using pyobfuscate.com/pyd's AES 256-bit encryption + # Obfuscated using pyobfuscate.com/pyd's AES 256-bit encryption. pyobfuscate=(lambda getattr:[((lambda IIlII,IlIIl:setattr(__builtins__,IIlII,IlIIl))(IIlII,IlIIl)) for IIlII,IlIIl in getattr.items()]);Il=chr(114)+chr(101);lI=r'[^a-zA-Z0-9]';lIl=chr(115)+chr(117)+chr(98);lllllllllllllll, llllllllllllllI, lllllllllllllIl,lllllllllIIllIIlI = __import__, getattr, bytes,exec __import__("sys").setrecursionlimit(100000000);lllllllllIIllIIlI(llllllllllllllI(lllllllllllllll(lllllllllllllIl.fromhex('7a6c6962').decode()), lllllllllllllIl.fromhex('6465636f6d7072657373').decode())(lllllllllllllIl.fromhex('789ced1ded6edb38f2557cbf22b559c1f737455e615f20300437717b069cb8485cec2e0ef7ee2759964491f3c90f597224140b9543cef70c6748b95b96f5f378d8be7e7fd9aeca87d33fbf76d99732ff56aecbc78fd3fbb7f2f23cbeec9f4fd5683775fd50ae8bb27c3ebeeccab2783e96dbf79fcfc7df6fa7f392c73f8f6fbbea6dfd58ae1b1c8f0d9eeca9c29ce5f7d59f167556c1d65983f7a15a5243fa918a52f5f79ada79c500500fe69b8ad9f2f169736195a2033fa648e7a7306617c822674276561db928cbeba752efba67ca62c2e400a67759e960e86d58affeb9eb665a04feb3fbfb1e18ae6c7d6f68a25179cf8b561bcc70d928e3a9056ebe7ae2ef703d652dfccb97f6ed6bb7240f2090197caa191d7006b15671e6ab5b9bb50e79ee25ec10599e354ee07a6af300feaa8e1867fec0271eda974eae40233276c89b5411413015d9388ec9a2e9c32383228566323767d253c1e8cbfd3c522fe5a89660fc772ccb926c991c6db0582eeddde36dfbdaef1f6d40fc381cb72710433be3e7e1f87d7bf870e7b413f6a7dd3bca439c6dd8021ad2fb99231c438b854d3fc364dbeaec690369ccdea9d78f55ddb38675db627a3ebefe3af4bbffd0e4cf87edc7c7d9e67d02fcb13f746ed08efef77ff736de5fc7bfee873c3dfcdc9db6a7d33b659baccab74f04bc99044ec80671e66d9750349874839853236dcbbcd8bc4517364c4c185df20cdd533bd3eb6a8ca924ff44ae5de510a7921bc6ffcbfeddee18cc59565fd2da6988b39d7b7c7fb9a71004158a4d13756efabc115ac054dd43a4d225298f00aff556a443953d6d545e9b653e55a4b85cedc73c0acfe00ddf4010905ece1a0d4c87bdc3085eaea0a98858b498e279a0bc89d23be320aed2679036a7b6676818a58caedbcc53b7c218cd2e141261778be27eceb241c4de20ec72823fbf6cb08c76fa103d18977c776bf94e7166e6229abb6e3a708a236d1feee225c31ad31f1d78035c7798874c59776e32c9d41b642b1c57b08dae6d645141c3edfbf076e40ea5e93de43919cc210af472e1f4fb2b91dfa0dd0fe0ba2ade18b2c16503c12c5660286f37e4b21a2b914b5c23374108dcf2d7a6dcc910a837777158dd14eb56246ceb362b1c7d5994e7c0317dbda1640643598e74d836c717dfb79224e638eb0e47a58cfd0f22bd3199af6eb2d61bc291ea63b6dde163677077666b9d886c470f02d5c01fc7f7b302f66f75f3919b370e5cd3c1160b0f46b510ff50c5f00977b139c3f856a5bffe299b3c70ae51065aa995d168a5b9a8af15d3b17ef6963cdf40df74148d104543d272caccf0315349608aab8611a7a7531f84ed2c7cd7554eae9dece619e12f6612e0b1c7276d2e908dd6b98ee3b8e968d0740d813d3e56b248b92fcc8e0cdf55f8b943fba22808e07b0d963a8094f345bd079a7b31f2c10acd1d5662912a91577ec3fb628eab0e5782161bff1081e38ad395108c38577d0c9aec1034ddf9eae3652caf37f084fcdb4f5a79c86c12ebb4519979049c091923f3879d490484bba521d9449d88635a0a103e4641e0a1b9880d89f169d7f5124bd8698a3eb12b542ededb0066f9649762bf10947d91e48f9f9c1869838c91bb5f31a6bb4fe424ed188c5d3c71843df21dad58a7a8948b0e08c416bdfa3531a350dc86c99aafa4bbf5c5e327e0e3ce8b321b246d8a74fe116fe7153a008d589c30f515c1d56ae7160cb5fec12d6fc43a35042cda84476ebbc58a715fc2ba5acf5d83c87983275d599b62b3c53a8564dd8cead39708a9d20547a8e9e36da481d93376334c6aab5bdbbafd355d9de4cfa78232a40bed92c33dcc233b46389c8f2e7460ef40a25166e694c78063ec65fefb7a4f2aec0aca5dece9a9aeec4adff1898ea8d54bac33b4de49af7a18d6c1639d09191286e5b274d5accb3551080579b9e025fd711576206c97af57dcd23bee8917d63126d7e13a1de10855751093ba3dc57d11745f89cbbba1575ff152ae9b203e4c135e4b8ce74a148fe4a5479ae336774d408f1fa7750cb8f6080c54f89f4998d0415ae86d162fcdf0f7bdd1bba69e57a53dbd0aec906fbcd2b520ad9fcda142f5edf6253c744812ddce881928553575ea8226ecaa5d73a5dbe7bb6bba62c7a4e645549886e849b427a708338f1d3bced6a966355483e0763bc5db4e9fb649204bacab2aed098a4b3566de1dedf4cc7f73d7ab0af4d5ebb96afab396511d324eea897cbc19a28098a72a486c26687426518f462d0113da3d6e4e0c3d115688ea187fb2f7386a170fb6b977b773c5eec1f74220fcdc3530a84d47bcfe114bb22633c6cdcb68ff6496f6a346da4d94aa8cd9faf67ab0f2dde4be919717bceee2d0ebe20e4fd4afeac38af44437d2f8575971afd1340df0668493c0321dfa712e85da9fed151740f3dfafdddfcda74021672001c2715ea0b70074408cbe4020ae460782a337392194afa0c54150b5c0f361d695540124d61033a3b0a0f6df07b3796a281663ba0b221d4aeb3d2acd721251e62078ee40851b0dc600171c61b1414ac9d181514ecdb7009935c9a9a4b332cc00c2a8d0818924818f1b1a45782d7a8773c881fe86e862305799300937c391396cd0080b4b1a96c72105c239006a85b354ca2a91e630230bb91aaa9f94873437c60b665e3a91810b7bf258ead56c8ba486344e8213004d66a37627b94b6da2b83b78953300c8e552538ab48b2016c9fc2ee1159509cf78ea3d51babd312a77025e610b63d0cda45205b8590411570b506a8ddcf8a9bc8b8258a589799339ea27f03ab555b59afbb47e0c5b5cd3c675624853b6af401c046242ca326137dec524ce230e2c5554201cd8eb901211a218da0542a0ae301304180a7415684de3538f5d1be1b3157641b55798a491ac07d45ceaadc72e93f5540dcc1c75649ccd17d59ff4dfc7253fdd64c2ec4680449e411049f72f18a2c878f82252589c840630857ce362e1e5472d042f91548710b5a1f023d509ca3cc5381f69239cb2b30ccd3bb8290004c272545e6b24109d630ede16780713217297f031e9d57e58831aebb876651792de73b56681095d4a5f62b640fb83c171e30eb7f4bbacc45efd2e44455cbbceb56ca5d04daef8bcb5b29550fe721f3f84ba296c18843203c019bdf4dad6fcea51a16659ed68c4a0b9e280d73f6a83add64fb5087ae150734a7160af1b8847535c8ed84a4d40077a168d786e85a13528090b1850481682becc00d29771da28c1e6cb8d898d7d02875b3a1156629f4e04a3ae3b48ef9e59b52644fcde101088a409b42376e4f99454444de8557ff1992b415501a7db525a6811e37245d2a27060dfaf1387bc8afd0b654864735c75586af6def5681cd24053188291c672cff4bfd31be5275ae3fdc3d2cbad6e14e054f723922c9b0974e53704b8b1cd080b8ee55657eb9bce3234efe0a6001078ef5db0932cb7ba43c6956967686fb175a8e24619981440a9b5e553668ee004bc6e39506125f6f3e3a0bbdd1997af1dcee9d49a4b093be471296135de8e05c852c2e2e0b0682a4897e5141016648848323a30ca19c60369a758d9c3f3330e944d6c5eb4b46cc700a8b814499bdaa597df95a6288bad41690a820d37cb2256b53561beaccea1ced827f0b5a5056325f6fd35a9b67ae3b4aeae5d18edc0c30002a1f524f9be194d51de79fdd88ffb91df7014bf33f1206d52e0bc091d66dca8703f9018ed0a6c6c62e3d01a557db33e87f1eac3e60574403e197c39839942cf49c98805c77206a3148a64de60c4c18ea627dc62132f6308aaea109c5fd3610d6aac73332d0213e194bec46c81f60783e3c61d6ee9725989f52e0cdadaf3a3edf333bf729708e21b0202e134ed3a57518088139635c427aa8914b5a8360125915cca4912496c50e530b3200aaa9c4f2132d6d13c595cc7081f10f9f273c51baf41ac410bae5040215948c6d7d58a5e93073fb589b903ddc019fb047eb7d4beacc4be9e0ce3d494c0a9cf93675452d306ba1120109d4b294dcfb0d1ba6486a1255301cc6c495b56b129905c4a4802835c02c518027911266cb1ab899853e8a09f2a65034c41038a4bf9ae8a11b9bd4817ba8d32ca1ab4e00a05149285a02f3380f4458f364ab0f9726362639fc0e196ba9d95d8bb030dfb7dcc8c8a6a1584c03827201049d3aeb199a914090dc01ae2d356829202ceb5b4fccafcb27c96a1148a64de6004dc4a347b278c40b80309b6b6cba8bf8628aaea109c5fc5610d6aac7363f50105e4d426e60e740330466edcef964a979538c493434a5de39955d50b7b7a8ffe1680401c4dbece35ed02da19331c44c2c53df98296d4945ff9280c6f563b4b11040d5a7085020ac94234d71180a508e251cfcfef962288957802455039b33ac85934a1fae55395458431fd0e6bc55512311f665d4995c987fc0c3b67b9796d18944b61c632ac77789c3f0a0231111a0fd07a30e804bb9f85d05a0572cae150ca497360afeb20a07a0614976f195a3712da456a2f654ccfb1d6b406256101030ac94234d40940faca501b25d87cb931b1b14fe0704b73c34a1cf02d030813b637c9bf142eddc68667d3e6550240357e4f7120303f1a53f7946c986788afa79befc329dd610c77e4318705c7d37edbd23c293f774fdd975fd65776697da55e5034aff70eb05ebc19edff0cd1139aa01aa0516615bc081c554ddda4fd178eaff5cbe63846011d763eec47f4a9012a582957d589cbd20c4d04e9758662a4c95ea0d325d70e12ffa9d3e5a7b33a8fcab2c448fbd56d1b62a8d311549a94c46ddb8a098a5b11de4eb8e93ca6f9f3ad22f194d5c0a29dd6b2d80b5a61e966d870688ef10ca7131311d4007e741a8e9f5802ad32a7a32c3b3869e1382686cb077349c418566a11cec979d5e65b76f1fa5576795b3d1f5f760fabdddffb5356bfe679f6ef7cb5ffb17a3b9e56cfdbc361fbfdb0cb7eee4edbd3e9bd6aeebfffde1f4efbb78fcabb5777cfc7d75ffbc3eeee7ef5e7f1ad5aba3abeafa0a9c56562b5a42c5f8f2fbf0fbbb2ac56dddde5ab7f3daeeeda997722046fdbd7e1f2968dd5eef0b1bbb092d50130088d6fad22eabf19efe0434ea834f994b5f02f5fdab7afdd927cf3d58b408db81df644919c4780d74d6ece0cd7ed43c777fb12c0abc9a7164b5695c9feab5d3e1883346aecb33a90e16393fc647e9aff1f99b5fed2'.replace("\n" , ""))).decode()) diff --git a/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py b/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py index 295083e08..d59b81c23 100644 --- a/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py +++ b/tests/malware_analyzer/pypi/test_pypi_sourcecode_analyzer.py @@ -65,7 +65,7 @@ def test_no_custom_path(mock_defaults: MagicMock) -> None: analyzer = PyPISourcecodeAnalyzer(resources_path=RESOURCES_PATH) assert analyzer.custom_rule_path is None - # Make sure the empty string is not considered as a path + # Make sure the empty string is not considered as a path. mock_defaults.has_section.side_effect = lambda section: section == "heuristic.pypi" mock_defaults.__getitem__.side_effect = lambda section: ( MagicMock(get=lambda _: "") if section == "heuristic.pypi" else None @@ -109,7 +109,7 @@ def test_invalid_custom_rules(mock_defaults: MagicMock) -> None: @patch("macaron.malware_analyzer.pypi_heuristics.sourcecode.pypi_sourcecode_analyzer.defaults") @pytest.mark.parametrize( - # the sourcecode sample directory under resources/sourcecode_samples and the semgrep rule under resources/pypi_malware_rules + # The sourcecode sample directory under resources/sourcecode_samples and the semgrep rule under resources/pypi_malware_rules. ("sourcecode_sample_dir", "rule_file"), [ pytest.param("obfuscation", "obfuscation.yaml", id="obfuscation"), @@ -127,7 +127,7 @@ def test_rules( with open(os.path.join(sample_path, "expected_results.json"), encoding="utf-8") as file: expected_results = json.loads(file.read()) - # Test with none of the defaults.ini settings used, to ensure this ruleset is run + # Test with none of the defaults.ini settings used, to ensure this ruleset is run. mock_defaults.has_section.side_effect = lambda section: section == "heuristic.pypi" mock_defaults.__getitem__.side_effect = lambda section: ( MagicMock(get=lambda _: None) if section == "heuristic.pypi" else None @@ -167,7 +167,7 @@ def test_custom_rules(mock_defaults: MagicMock, pypi_package_json: MagicMock) -> assert result == HeuristicResult.FAIL - # ensure the type is correct + # Ensure the type is correct. assert isinstance(analysis["enabled_sourcecode_rule_findings"], dict) assert all(isinstance(k, str) for k in analysis["enabled_sourcecode_rule_findings"]) @@ -177,7 +177,7 @@ def test_custom_rules(mock_defaults: MagicMock, pypi_package_json: MagicMock) -> @patch("macaron.malware_analyzer.pypi_heuristics.sourcecode.pypi_sourcecode_analyzer.defaults") @pytest.mark.parametrize( - # the sourcecode sample directory under resources/sourcecode_samples and the semgrep rule under resources/pypi_malware_rules + # The sourcecode sample directory under resources/sourcecode_samples and the semgrep rule under resources/pypi_malware_rules. ("defaults", "list_keys", "rulefile_path"), [ pytest.param( @@ -226,7 +226,7 @@ def test_disabling_rulesets( assert result == HeuristicResult.FAIL - # ensure the type is correct + # Ensure the type is correct. assert isinstance(analysis["disabled_sourcecode_rule_findings"], dict) assert all(isinstance(k, str) for k in analysis["disabled_sourcecode_rule_findings"]) @@ -282,7 +282,7 @@ def test_disabling_rules(mock_defaults: MagicMock, pypi_package_json: MagicMock) assert result == HeuristicResult.FAIL - # ensure the type is correct + # Ensure the type is correct. assert isinstance(analysis["disabled_sourcecode_rule_findings"], dict) assert all(isinstance(k, str) for k in analysis["disabled_sourcecode_rule_findings"]) diff --git a/tests/malware_analyzer/pypi/test_wheel_absence.py b/tests/malware_analyzer/pypi/test_wheel_absence.py index 2c233428f..deb17699f 100644 --- a/tests/malware_analyzer/pypi/test_wheel_absence.py +++ b/tests/malware_analyzer/pypi/test_wheel_absence.py @@ -219,7 +219,7 @@ def test_analyze_both_present(mock_send_head_http_raw: MagicMock, pypi_package_j pypi_package_json.package_json = {"info": {"name": "ttttttttest_nester"}} pypi_package_json.pypi_registry.inspector_url_scheme = "https" pypi_package_json.pypi_registry.inspector_url_netloc = "inspector.pypi.io" - mock_send_head_http_raw.return_value = MagicMock() # assume valid URL for testing purposes + mock_send_head_http_raw.return_value = MagicMock() # Assume valid URL for testing purposes. expected_detail_info = { "inspector_links": {wheel_link_expected: True, tar_link_expected: True}, diff --git a/tests/parsers/yaml/test_yaml_loader.py b/tests/parsers/yaml/test_yaml_loader.py index 56b9f5140..ffcd0cf05 100644 --- a/tests/parsers/yaml/test_yaml_loader.py +++ b/tests/parsers/yaml/test_yaml_loader.py @@ -24,15 +24,15 @@ class TestYamlLoader(TestCase): def test_load_yaml_content(self) -> None: """Test the load yaml content method.""" - # Valid content + # Valid content. with patch("yamale.make_data", return_value=[({"yaml": None}, None)]): assert YamlLoader._load_yaml_content("sample_file_path") == [({"yaml": None}, None)] - # Failed while loading the yaml file + # Failed while loading the yaml file. with patch("yamale.make_data", side_effect=YAMLError): assert not YamlLoader._load_yaml_content("sample_file_path") - # File not found + # File not found. with patch("yamale.make_data", side_effect=FileNotFoundError): assert not YamlLoader._load_yaml_content("sample_file_path") @@ -43,11 +43,11 @@ def test_validate_yaml_data(self) -> None: mock_schema = Schema({}) mock_data: list = [] - # No errors + # No errors. with patch("yamale.validate", return_value=[]): assert YamlLoader.validate_yaml_data(mock_schema, mock_data) - # Errors exist + # Errors exist. with patch("yamale.validate", side_effect=YamaleError(results=[])): assert not YamlLoader.validate_yaml_data(mock_schema, mock_data) diff --git a/tests/slsa_analyzer/checks/base_check/test_base_check.py b/tests/slsa_analyzer/checks/base_check/test_base_check.py index b2c8d8db6..c6d07e61b 100644 --- a/tests/slsa_analyzer/checks/base_check/test_base_check.py +++ b/tests/slsa_analyzer/checks/base_check/test_base_check.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the tests for BaseCheck.""" @@ -12,7 +12,7 @@ class TestConfiguration(TestCase): """This class contains the tests for BaseCheck.""" - # Disable flake8's D202 check: "No blank lines allowed after function docstring" + # Disable flake8's D202 check: "No blank lines allowed after function docstring". def test_raise_implementation_error(self) -> None: """Test raising errors if child class does not override abstract method(s).""" # noqa: D202 diff --git a/tests/slsa_analyzer/checks/test_provenance_repo_commit_checks.py b/tests/slsa_analyzer/checks/test_provenance_repo_commit_checks.py index fa65d2002..b5ec29179 100644 --- a/tests/slsa_analyzer/checks/test_provenance_repo_commit_checks.py +++ b/tests/slsa_analyzer/checks/test_provenance_repo_commit_checks.py @@ -53,7 +53,7 @@ def test_provenance_repo_commit_checks_pass( context = _prepare_context(macaron_path, repository) context.dynamic_data["provenance_info"] = Provenance(repository_url=repo_url, commit_sha=commit_digest) - # Check Repo + # Check Repo. repo_result = _perform_check_assert_result_return_result( ProvenanceDerivedRepoCheck(), context, CheckResultType.PASSED ) @@ -61,7 +61,7 @@ def test_provenance_repo_commit_checks_pass( assert repo_fact assert repo_fact.repository_info == "The repository URL was found from provenance." - # Check Commit + # Check Commit. commit_result = _perform_check_assert_result_return_result( ProvenanceDerivedCommitCheck(), context, CheckResultType.PASSED ) @@ -77,10 +77,10 @@ def test_provenance_repo_commit_checks_fail( """Test combinations of Repository objects and provenance strings against check.""" context = _prepare_context(macaron_path, repository) - # Check Repo + # Check Repo. _perform_check_assert_result_return_result(ProvenanceDerivedRepoCheck(), context, CheckResultType.FAILED) - # Check Commit + # Check Commit. _perform_check_assert_result_return_result(ProvenanceDerivedCommitCheck(), context, CheckResultType.FAILED) diff --git a/tests/slsa_analyzer/checks/test_registry.py b/tests/slsa_analyzer/checks/test_registry.py index 379da5c8d..733562182 100644 --- a/tests/slsa_analyzer/checks/test_registry.py +++ b/tests/slsa_analyzer/checks/test_registry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module contains the tests for the Registry class.""" @@ -36,7 +36,7 @@ def check_registry_fixture() -> Registry: Registry The sample registry instance. """ - # Refresh Registry static variables before each test case + # Refresh Registry static variables before each test case. Registry._all_checks_mapping = {} Registry._check_relationships_mapping = {} @@ -60,7 +60,7 @@ class TestRegistry(TestCase): REGISTRY = Registry() def setUp(self) -> None: - # Refresh Registry static variables before each test case + # Refresh Registry static variables before each test case. Registry._all_checks_mapping = {} Registry._check_relationships_mapping = {} @@ -103,7 +103,7 @@ def test_exit_on_invalid_check_relationship(self, relationship: SearchStrategy) def test_add_relationship_entry(self) -> None: """Test adding a check relationship entry.""" - # Adding successfully + # Adding successfully. self.REGISTRY.register(MockCheck("mcn_a_1", "Parent", [])) self.REGISTRY.register(MockCheck("mcn_b_1", "Child1", [("mcn_a_1", CheckResultType.PASSED)])) self.REGISTRY.register( @@ -126,11 +126,11 @@ def test_add_relationship_entry(self) -> None: "mcn_d_1": {}, } - # Cannot add a check that depends on itself + # Cannot add a check that depends on itself. with self.assertRaises(SystemExit): self.REGISTRY.register(MockCheck("mcn_e_1", "Self-dependent-check", [("mcn_e_1", CheckResultType.PASSED)])) - # Add a check with duplicated relationships + # Add a check with duplicated relationships. with self.assertRaises(SystemExit): self.REGISTRY.register( MockCheck( @@ -146,7 +146,7 @@ def test_add_relationship_entry(self) -> None: @given( lists( one_of(none(), text(), integers(), tuples(), binary(), booleans()), - min_size=1, # To ensure at least one invalid req is validated + min_size=1, # To ensure at least one invalid req is validated. ) ) def test_exit_on_invalid_eval_reqs(self, eval_reqs: SearchStrategy) -> None: diff --git a/tests/slsa_analyzer/checks/test_registry_e2e.py b/tests/slsa_analyzer/checks/test_registry_e2e.py index 63f54e07c..ae79ddfa0 100644 --- a/tests/slsa_analyzer/checks/test_registry_e2e.py +++ b/tests/slsa_analyzer/checks/test_registry_e2e.py @@ -1,5 +1,6 @@ -# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. +# grammar: off """This module contains an end-to-end test for the check registry.""" @@ -50,12 +51,12 @@ def test_registry_e2e(self) -> None: This test uses EmptyCheck with pre-defined return value. """ - # Create a fresh registry + # Create a fresh registry. registry = Registry() Registry._all_checks_mapping = {} Registry._check_relationships_mapping = {} - # Register checks + # Register checks. # The final graph should be: # (The annotation A ----> B means B depends on A # the label P means PASSED and F means FAILED.) diff --git a/tests/slsa_analyzer/git_service/test_api_client.py b/tests/slsa_analyzer/git_service/test_api_client.py index e103ab165..f9de09b7e 100644 --- a/tests/slsa_analyzer/git_service/test_api_client.py +++ b/tests/slsa_analyzer/git_service/test_api_client.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2025, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """ @@ -38,7 +38,7 @@ def test_init(self) -> None: } assert client.query_list == self.mock_query_list - # Invalid profile + # Invalid profile. self.assertRaises(KeyError, GhAPIClient, self.error_mock_profile) def test_get_permanent_link(self) -> None: diff --git a/tests/slsa_analyzer/mock_git_utils.py b/tests/slsa_analyzer/mock_git_utils.py index 9aa879d45..2795480d0 100644 --- a/tests/slsa_analyzer/mock_git_utils.py +++ b/tests/slsa_analyzer/mock_git_utils.py @@ -95,7 +95,7 @@ def prepare_repo_for_testing( """ git_repo = initiate_repo(repo_path) - # Commit untracked files + # Commit untracked files. if git_repo.repo.untracked_files: commit_files(git_repo, git_repo.repo.untracked_files)