From a7e3897989c80776f0d5e9d6b71e57b2e8e88768 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Wed, 4 Aug 2021 09:10:06 +0200 Subject: [PATCH] Add support for Windows containers (#181) * Use newer version of container libraries Signed-off-by: Philippe Ombredanne * Use new container-inspector structures Signed-off-by: Philippe Ombredanne * Add minimal support for Windows containers Signed-off-by: Philippe Ombredanne * Update Windows package getter * The windows_helper module from scancode is not available on pypi Signed-off-by: Jono Yang * Use newer version of container libraries Signed-off-by: Philippe Ombredanne * Update call to windows_helper to win_reg Signed-off-by: Jono Yang * Create new pipeline for Windows Docker images * Create Windows specific tag_uninteresting_windows_codebase_resources function Signed-off-by: Jono Yang * Add function to find packages at well-known paths * Update tests Signed-off-by: Jono Yang * Add step to tag known software in pipeline * Change name of Docker step from "find_images_linux_distro" to "find_images_os_and_distro" Signed-off-by: Jono Yang * Get version from path in tag_known_software #238 * Update docstrings * Pin fetchcode dep Signed-off-by: Jono Yang * Troubleshoot regex patterns #238 Signed-off-by: Jono Yang * Report Program File contents as packages #238 Signed-off-by: Jono Yang * Update Windows-specific regex * Add more file names and file extensions to be ignored * Update expected test results Signed-off-by: Jono Yang * Do not ignore .mui files #238 Signed-off-by: Jono Yang * Filter using extension field rather than path #238 Signed-off-by: Jono Yang * Update scanpipe/pipes/docker.py Create issue to track extraction issue See https://github.com/nexB/scancode.io/issues/251 Signed-off-by: Philippe Ombredanne * Fix scancode-toolkit pinned version in base.txt #238 Signed-off-by: Jono Yang * Create pipeline step to tag ignorable files #252 Signed-off-by: Jono Yang * Update formatting #238 Signed-off-by: Jono Yang * Generalize regex expressions #238 * Modify regex used for Windows container analysis so it can be used outside the context of a Windows Docker image * Update tests Signed-off-by: Jono Yang * Create new pipes for ignoring files #238 * Create pipes that ignore media files and data files with no clues * Update test results Signed-off-by: Jono Yang * Add more file extensions to ignore #238 Signed-off-by: Jono Yang * Bump dep versions #238 Signed-off-by: Jono Yang * Update docstring #238 * Use InstalledWindowsProgram object instead of Package Signed-off-by: Jono Yang * Improve regex used in tag_known_software #238 * Update tests with more paths to test regex patterns Signed-off-by: Jono Yang * Adjust code for consistency across the codebase #181 Signed-off-by: Thomas Druez * Address PR comments #238 * Use re.match instead of re.split * Rename WindowsDocker pipeline to DockerWindows * Set the default value of the q_objects argument for tag_installed_package_files to be a tuple Signed-off-by: Jono Yang * Add is_media field to CodebaseResource #238 * Update test results Signed-off-by: Jono Yang * Simplify tag_media_files_as_unintersting() #238 * Update test Signed-off-by: Jono Yang * Refine windows pipes #238 Signed-off-by: Thomas Druez Co-authored-by: Jono Yang Co-authored-by: Thomas Druez --- .../0011_codebaseresource_is_media.py | 18 + scanpipe/models.py | 1 + scanpipe/pipelines/docker.py | 6 +- scanpipe/pipelines/docker_windows.py | 81 ++ scanpipe/pipes/docker.py | 1 - scanpipe/pipes/rootfs.py | 53 +- scanpipe/pipes/scancode.py | 1 - scanpipe/pipes/windows.py | 276 +++++++ .../tests/data/is-npm-1.0.0_scancode.json | 13 +- scanpipe/tests/data/is-npm-1.0.0_summary.json | 710 +++++++++--------- scanpipe/tests/test_api.py | 2 +- scanpipe/tests/test_commands.py | 2 +- scanpipe/tests/test_pipes.py | 317 ++++++++ setup.py | 1 + 14 files changed, 1107 insertions(+), 375 deletions(-) create mode 100644 scanpipe/migrations/0011_codebaseresource_is_media.py create mode 100644 scanpipe/pipelines/docker_windows.py create mode 100644 scanpipe/pipes/windows.py diff --git a/scanpipe/migrations/0011_codebaseresource_is_media.py b/scanpipe/migrations/0011_codebaseresource_is_media.py new file mode 100644 index 000000000..f52ef769d --- /dev/null +++ b/scanpipe/migrations/0011_codebaseresource_is_media.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.6 on 2021-08-03 18:27 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('scanpipe', '0010_codebaseresource_is_key_file'), + ] + + operations = [ + migrations.AddField( + model_name='codebaseresource', + name='is_media', + field=models.BooleanField(default=False), + ), + ] diff --git a/scanpipe/models.py b/scanpipe/models.py index 9d5a4a205..2994776a2 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -1036,6 +1036,7 @@ class Type(models.TextChoices): is_text = models.BooleanField(default=False) is_archive = models.BooleanField(default=False) is_key_file = models.BooleanField(default=False) + is_media = models.BooleanField(default=False) class Compliance(models.TextChoices): OK = "ok" diff --git a/scanpipe/pipelines/docker.py b/scanpipe/pipelines/docker.py index ac6d63287..c919e0013 100644 --- a/scanpipe/pipelines/docker.py +++ b/scanpipe/pipelines/docker.py @@ -35,7 +35,7 @@ def steps(cls): return ( cls.extract_images, cls.extract_layers, - cls.find_images_linux_distro, + cls.find_images_os_and_distro, cls.collect_images_information, cls.collect_and_create_codebase_resources, cls.collect_and_create_system_packages, @@ -63,9 +63,9 @@ def extract_layers(self): if errors: self.add_error("\n".join(errors)) - def find_images_linux_distro(self): + def find_images_os_and_distro(self): """ - Finds the linux distro of input images. + Finds the operating system and distro of input images. """ for image in self.images: image.get_and_set_distro() diff --git a/scanpipe/pipelines/docker_windows.py b/scanpipe/pipelines/docker_windows.py new file mode 100644 index 000000000..b0cc49248 --- /dev/null +++ b/scanpipe/pipelines/docker_windows.py @@ -0,0 +1,81 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/nexB/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/scancode.io for support and download. + +from scanpipe.pipelines.docker import Docker +from scanpipe.pipes import docker +from scanpipe.pipes import rootfs +from scanpipe.pipes import windows + + +class DockerWindows(Docker): + """ + A pipeline to analyze Windows Docker images. + """ + + @classmethod + def steps(cls): + return ( + cls.extract_images, + cls.extract_layers, + cls.find_images_os_and_distro, + cls.collect_images_information, + cls.collect_and_create_codebase_resources, + cls.collect_and_create_system_packages, + cls.tag_known_software_packages, + cls.tag_uninteresting_codebase_resources, + cls.tag_program_files_dirs_as_packages, + cls.tag_empty_files, + cls.scan_for_application_packages, + cls.scan_for_files, + cls.analyze_scanned_files, + cls.tag_data_files_with_no_clues, + cls.tag_not_analyzed_codebase_resources, + ) + + def tag_known_software_packages(self): + """ + Flag files from well-known software packages by checking common install paths. + """ + windows.tag_known_software(self.project) + + def tag_uninteresting_codebase_resources(self): + """ + Flag files that are known to be uninteresting. + """ + docker.tag_whiteout_codebase_resources(self.project) + windows.tag_uninteresting_windows_codebase_resources(self.project) + rootfs.tag_ignorable_codebase_resources(self.project) + rootfs.tag_media_files_as_uninteresting(self.project) + + def tag_program_files_dirs_as_packages(self): + """ + Report the immediate subdirectories of `Program Files` and `Program + Files (x86)` as packages. + """ + windows.tag_program_files(self.project) + + def tag_data_files_with_no_clues(self): + """ + If a file is a data file and has no clues towards its origin, mark as + uninteresting. + """ + rootfs.tag_data_files_with_no_clues(self.project) diff --git a/scanpipe/pipes/docker.py b/scanpipe/pipes/docker.py index b82d5f800..d0599fc59 100644 --- a/scanpipe/pipes/docker.py +++ b/scanpipe/pipes/docker.py @@ -61,7 +61,6 @@ def extract_layers_from_images(project, images): Returns the `errors` that may happen during the extraction. """ errors = [] - for image in images: image_dirname = Path(image.extracted_location).name target_path = project.codebase_path / image_dirname diff --git a/scanpipe/pipes/rootfs.py b/scanpipe/pipes/rootfs.py index be4be83ff..8e31b24a2 100644 --- a/scanpipe/pipes/rootfs.py +++ b/scanpipe/pipes/rootfs.py @@ -20,6 +20,7 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/scancode.io for support and download. +import fnmatch import logging import os from functools import partial @@ -28,12 +29,14 @@ from django.db.models import Q import attr +from commoncode.ignore import default_ignores from container_inspector.distro import Distro from scanpipe import pipes from scanpipe.pipes import alpine from scanpipe.pipes import debian from scanpipe.pipes import rpm +from scanpipe.pipes import windows logger = logging.getLogger(__name__) @@ -48,6 +51,7 @@ "opensuse": rpm.package_getter, "opensuse-tumbleweed": rpm.package_getter, "photon": rpm.package_getter, + "windows": windows.package_getter, } @@ -188,7 +192,7 @@ def has_hash_diff(install_file, codebase_resource): def scan_rootfs_for_system_packages(project, rootfs, detect_licenses=True): """ - Given a `project` Project and an `rootfs` RootFs, scan the `rootfs` for + Given a `project` Project and a `rootfs` RootFs, scan the `rootfs` for installed system packages, and create a DiscoveredPackage for each. Then for each installed DiscoveredPackage file, check if it exists @@ -336,3 +340,50 @@ def tag_uninteresting_codebase_resources(project): qs = project.codebaseresources.no_status() qs.filter(lookups).update(status="ignored-not-interesting") + + +def tag_ignorable_codebase_resources(project): + """ + Using the glob patterns from commoncode.ignore of ignorable files/directories, + tag codebase resources from `project` if their paths match an ignorable pattern. + """ + lookups = Q() + for pattern in default_ignores.keys(): + # Translate glob pattern to regex + translated_pattern = fnmatch.translate(pattern) + # PostgreSQL does not like parts of Python regex + if translated_pattern.startswith("(?s"): + translated_pattern = translated_pattern.replace("(?s", "(?") + lookups |= Q(rootfs_path__icontains=pattern) + lookups |= Q(rootfs_path__iregex=translated_pattern) + + qs = project.codebaseresources.no_status() + qs.filter(lookups).update(status="ignored-default-ignores") + + +def tag_data_files_with_no_clues(project): + """ + Tags CodebaseResources that have a file type of `data` and no detected clues + to be uninteresting. + """ + lookup = Q( + file_type="data", + copyrights=[], + holders=[], + authors=[], + licenses=[], + license_expressions=[], + emails=[], + urls=[], + ) + + qs = project.codebaseresources + qs.filter(lookup).update(status="ignored-data-file-no-clues") + + +def tag_media_files_as_uninteresting(project): + """ + Tags CodebaseResources that are media files to be uninteresting. + """ + qs = project.codebaseresources.no_status() + qs.filter(is_media=True).update(status="ignored-media-file") diff --git a/scanpipe/pipes/scancode.py b/scanpipe/pipes/scancode.py index 75cde0331..e2b9ec585 100644 --- a/scanpipe/pipes/scancode.py +++ b/scanpipe/pipes/scancode.py @@ -104,7 +104,6 @@ def get_resource_info(location): # Missing fields on CodebaseResource model returned by `get_file_info`. unsupported_fields = [ - "is_media", "is_source", "is_script", "date", diff --git a/scanpipe/pipes/windows.py b/scanpipe/pipes/windows.py new file mode 100644 index 000000000..af9eb1363 --- /dev/null +++ b/scanpipe/pipes/windows.py @@ -0,0 +1,276 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/nexB/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/scancode.io for support and download. + +import re + +from django.db.models import Q + +from packagedcode import win_reg + +from scanpipe import pipes + + +def package_getter(root_dir, **kwargs): + """ + Yield installed package objects. + """ + packages = win_reg.get_installed_packages(root_dir) + for package in packages: + yield package.purl, package + + +def tag_uninteresting_windows_codebase_resources(project): + """ + Tag known uninteresting files as uninteresting + """ + uninteresting_files = ( + "DefaultUser_Delta", + "Sam_Delta", + "Security_Delta", + "Software_Delta", + "System_Delta", + "NTUSER.DAT", + "desktop.ini", + "BBI", + "BCD-Template", + "DEFAULT", + "DRIVERS", + "ELAM", + "SAM", + "SECURITY", + "SOFTWARE", + "SYSTEM", + "system.ini", + ) + + uninteresting_file_extensions = ( + ".lnk", + ".library-ms", + ".LOG", + ".inf_loc", + ".NLS", + ".dat", + ".pem", + ".xrm-ms", + ".sql", + ".mof", + ".mfl", + ".manifest", + ".inf", + ".cat", + ".efi", + ".evtx", + ".cat", + ".pnf", + ) + + lookups = Q() + for file_name in uninteresting_files: + lookups |= Q(rootfs_path__iendswith=file_name) + for file_extension in uninteresting_file_extensions: + lookups |= Q(extension__icontains=file_extension) + + qs = project.codebaseresources.no_status() + qs.filter(lookups).update(status="ignored-not-interesting") + + +def tag_installed_package_files(project, root_dir_pattern, package, q_objects=None): + """ + For all CodebaseResources from `project` whose `rootfs_path` starts with + `root_dir_pattern`, add `package` to the discovered_packages of each + CodebaseResource and set the status. + """ + qs = project.codebaseresources.no_status() + lookup = Q(rootfs_path__startswith=root_dir_pattern) + + # If there are Q() objects in `q_objects`, then those Q() objects are chained + # to the initial query `lookup` using AND to allow a more specific query for + # package files. + for q_object in q_objects or []: + lookup &= q_object + + installed_package_files = qs.filter(lookup) + # If we find files whose names start with `root_dir_pattern`, we consider + # these files to be part of the Package `package` and tag these files as such. + if installed_package_files: + created_package = pipes.update_or_create_package(project, package.to_dict()) + for installed_package_file in installed_package_files: + installed_package_file.discovered_packages.add(created_package) + installed_package_file.status = "installed-package" + installed_package_file.save() + created_package.save() + + +def _tag_python_software(project): + qs = project.codebaseresources.no_status() + python_root_pattern = r"(?P^/(Files/)?Python(?P\d+)?)/.*$" + python_root_pattern_compiled = re.compile(python_root_pattern) + python_resources = qs.filter(rootfs_path__regex=r"(^/(Files/)?Python(\d+)?)/.*$") + + python_versions_by_path = {} + for python_resource in python_resources: + match = python_root_pattern_compiled.match(python_resource.rootfs_path) + if not match: + continue + + python_root_path = match.group("root_path") + if python_root_path in python_versions_by_path: + continue + + version = match.group("version") + if not version: + version = "nv" + if version != "nv": + version = ".".join(digit for digit in version) + + python_versions_by_path[python_root_path] = version + + # We do not want to tag the files in the `site-packages` directory as being + # from Python proper. The packages found here are oftentimes third-party + # packages from outside the Python foundation + q_objects = [~Q(rootfs_path__icontains="site-packages")] + + for python_path, python_version in python_versions_by_path.items(): + python_package = win_reg.InstalledWindowsProgram( + name="Python", + version=python_version, + license_expression="python", + copyright="Copyright (c) Python Software Foundation", + homepage_url="https://www.python.org/", + ) + tag_installed_package_files( + project=project, + root_dir_pattern=python_path, + package=python_package, + q_objects=q_objects, + ) + + +def _tag_openjdk_software(project): + qs = project.codebaseresources.no_status() + openjdk_root_pattern = ( + r"^(?P/(Files/)?(open)?jdk(-(?P(\d*)(\.\d+)*))*)/.*$" + ) + openjdk_root_pattern_compiled = re.compile(openjdk_root_pattern) + openjdk_resources = qs.filter( + rootfs_path__regex=r"^(/(Files/)?(open)?jdk(-((\d*)(\.\d+)*))*)/.*$" + ) + + openjdk_versions_by_path = {} + for openjdk_codebase_resource in openjdk_resources: + match = openjdk_root_pattern_compiled.match( + openjdk_codebase_resource.rootfs_path + ) + if not match: + continue + + openjdk_root_path = match.group("root_path") + if openjdk_root_path in openjdk_versions_by_path: + continue + + openjdk_version = match.group("version") + if not openjdk_version: + openjdk_version = "nv" + + openjdk_versions_by_path[openjdk_root_path] = openjdk_version + + for openjdk_path, openjdk_version in openjdk_versions_by_path.items(): + openjdk_package = win_reg.InstalledWindowsProgram( + name="OpenJDK", + version=openjdk_version, + license_expression="gpl-2.0 WITH oracle-openjdk-classpath-exception-2.0", + copyright="Copyright (c) Oracle and/or its affiliates", + homepage_url="http://openjdk.java.net/", + ) + tag_installed_package_files( + project=project, + root_dir_pattern=openjdk_path, + package=openjdk_package, + ) + + +def tag_known_software(project): + """ + Find Windows software in `project` by checking `project`s CodebaseResources + to see if their rootfs_path is is under a known software root directory. If + there are CodebaseResources that are under a known software root directory, + a DiscoveredPackage is created for that software package and all files under + that software package's root directory are considered installed files for + that package. + + Currently, we are only checking for Python and openjdk in Windows Docker + image layers. + + If a version number cannot be determined for an installed software Package, + then a version number of "nv" will be set. + """ + _tag_python_software(project) + _tag_openjdk_software(project) + + +PROGRAM_FILES_DIRS_TO_IGNORE = ( + "Common Files", + "Microsoft", +) + + +def tag_program_files(project): + """ + Report all subdirectories of Program Files and Program Files (x86) as Packages. + + If a Package is detected in this manner, then we will attempt to determine + the version from the path. If a version cannot be determined, a version of + `nv` will be set for the Package. + """ + qs = project.codebaseresources.no_status() + # Get all files from Program Files and Program Files (x86) + program_files_subdir_pattern = ( + r"(?P^.*Program Files( \(x86\))?/(?P[^/]+))" + ) + program_files_subdir_pattern_compiled = re.compile(program_files_subdir_pattern) + program_files_resources = qs.filter( + rootfs_path__regex=r"^.*/Program Files( \(x86\))?" + ) + + program_files_dirname_by_path = {} + for program_file in program_files_resources: + match = program_files_subdir_pattern_compiled.match(program_file.rootfs_path) + if not match: + continue + + program_files_subdir = match.group("program_files_subdir") + dirname = match.group("dirname") + if ( + program_files_subdir in program_files_dirname_by_path + or dirname.lower() in map(str.lower, PROGRAM_FILES_DIRS_TO_IGNORE) + ): + continue + + program_files_dirname_by_path[program_files_subdir] = dirname + + for root_dir, root_dir_name in program_files_dirname_by_path.items(): + package = win_reg.InstalledWindowsProgram(name=root_dir_name, version="nv") + tag_installed_package_files( + project=project, + root_dir_pattern=root_dir, + package=package, + ) diff --git a/scanpipe/tests/data/is-npm-1.0.0_scancode.json b/scanpipe/tests/data/is-npm-1.0.0_scancode.json index 4ff86abaf..1ce03638c 100644 --- a/scanpipe/tests/data/is-npm-1.0.0_scancode.json +++ b/scanpipe/tests/data/is-npm-1.0.0_scancode.json @@ -2,10 +2,10 @@ "headers": [ { "tool_name": "scancode-toolkit", - "tool_version": "21.6.7", + "tool_version": "21.7.30", "options": { "input": [ - "/Volumes/nexB/var/projects/wdsdadsadasd-bc9f8ade/codebase" + "/tmp/tmpkzp1f_pw/projects/analysis-03087122/codebase" ], "--classify": true, "--consolidate": true, @@ -13,20 +13,19 @@ "--email": true, "--info": true, "--is-license-text": true, - "--json-pp": "/Volumes/nexB/var/projects/wdsdadsadasd-bc9f8ade/output/scancode-2021-07-07-11-18-49.json", + "--json-pp": "/tmp/tmpkzp1f_pw/projects/analysis-03087122/output/scancode-2021-08-03-19-09-26.json", "--license": true, "--license-clarity-score": true, "--license-text": true, "--package": true, - "--processes": "4", "--summary": true, "--summary-key-files": true, "--url": true }, "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", - "start_timestamp": "2021-07-07T111850.232385", - "end_timestamp": "2021-07-07T111856.279366", - "duration": 6.047006845474243, + "start_timestamp": "2021-08-03T190927.407466", + "end_timestamp": "2021-08-03T190932.182255", + "duration": 4.774801015853882, "message": null, "errors": [], "extra_data": { diff --git a/scanpipe/tests/data/is-npm-1.0.0_summary.json b/scanpipe/tests/data/is-npm-1.0.0_summary.json index 93ad1d6f6..2ca0337bf 100644 --- a/scanpipe/tests/data/is-npm-1.0.0_summary.json +++ b/scanpipe/tests/data/is-npm-1.0.0_summary.json @@ -1,385 +1,375 @@ { - "license_expressions": [ - { - "value": "mit", - "count": 2 - }, - { - "value": null, - "count": 1 - } - ], - "copyrights": [ - { - "value": null, - "count": 2 - }, - { - "value": "(c) Sindre Sorhus (http://sindresorhus.com)", - "count": 1 - } - ], - "holders": [ - { - "value": null, - "count": 2 - }, - { - "value": "Sindre Sorhus", - "count": 1 - } - ], - "authors": [ - { - "value": null, - "count": 3 - } - ], - "programming_language": [ - { - "value": null, - "count": 2 - }, - { - "value": "JavaScript", - "count": 1 - } - ], - "packages": [ - { - "type": "npm", - "namespace": null, - "name": "is-npm", - "version": "1.0.0", - "qualifiers": {}, - "subpath": null, - "primary_language": "JavaScript", - "description": "Check if your code is running as an npm script", - "release_date": null, - "parties": [ + "license_expressions": [ { - "type": "person", - "role": "author", - "name": "Sindre Sorhus", - "email": "sindresorhus@gmail.com", - "url": "http://sindresorhus.com" - } - ], - "keywords": [ - "npm", - "is", - "check", - "detect", - "env", - "environment" - ], - "homepage_url": null, - "download_url": "https://registry.npmjs.org/is-npm/-/is-npm-1.0.0.tgz", - "size": null, - "sha1": null, - "md5": null, - "sha256": null, - "sha512": null, - "bug_tracking_url": null, - "code_view_url": null, - "vcs_url": "https://github.com/sindresorhus/is-npm", - "copyright": null, - "license_expression": "mit", - "declared_license": [ - "MIT" - ], - "notice_text": null, - "root_path": "codebase/package", - "dependencies": [ + "value": "mit", + "count": 2 + }, { - "purl": "pkg:npm/ava", - "requirement": "0.0.3", - "scope": "devDependencies", - "is_runtime": false, - "is_optional": true, - "is_resolved": false + "value": null, + "count": 1 } - ], - "contains_source_code": null, - "source_packages": [], - "extra_data": {}, - "purl": "pkg:npm/is-npm@1.0.0", - "repository_homepage_url": "https://www.npmjs.com/package/is-npm", - "repository_download_url": "https://registry.npmjs.org/is-npm/-/is-npm-1.0.0.tgz", - "api_data_url": "https://registry.npmjs.org/is-npm/1.0.0", - "files": [ + ], + "copyrights": [ { - "path": "codebase/package/package.json", - "type": "file" - } - ] - } - ], - "license_clarity_score": { - "score": 30, - "declared": true, - "discovered": 0, - "consistency": false, - "spdx": false, - "license_texts": false - }, - "license_matches": { - "mit": [ - { - "path": "package/package.json", - "matched_text": " \"license\": \"MIT\"," - }, - { - "path": "package/readme.md", - "matched_text": "## License\n\nMIT \u00a9 [Sindre Sorhus](http://sindresorhus.com)" - } - ] - }, - "key_files": [ - { - "for_packages": [ - "pkg:npm/is-npm@1.0.0" - ], - "compliance_alert": "ok", - "path": "package/package.json", - "size": 563, - "sha1": "943efb8abda9b9cdaa4ea73f5709563b948af50b", - "md5": "c843e88ecb274d5d573c71be330bff8b", - "extra_data": {}, - "copyrights": [], - "holders": [], - "authors": [], - "licenses": [ + "value": null, + "count": 2 + }, { - "key": "mit", - "name": "MIT License", - "owner": "MIT", - "score": 100.0, - "policy": { - "icon": "far fa-thumbs-up", - "label": "Recommended License", - "color_code": "#002fff", - "license_key": "mit", - "compliance_alert": "" - }, - "category": "Permissive", - "end_line": 5, - "spdx_url": "https://spdx.org/licenses/MIT", - "text_url": "http://opensource.org/licenses/mit-license.php", - "short_name": "MIT License", - "start_line": 5, - "homepage_url": "http://opensource.org/licenses/mit-license.php", - "is_exception": false, - "matched_rule": { - "matcher": "2-aho", - "licenses": [ - "mit" - ], - "identifier": "mit_30.RULE", - "rule_length": 2, - "is_license_tag": true, - "match_coverage": 100.0, - "matched_length": 2, - "rule_relevance": 100, - "is_license_text": false, - "is_license_intro": false, - "is_license_notice": false, - "license_expression": "mit", - "is_license_reference": false - }, - "matched_text": " \"license\": \"MIT\",", - "reference_url": "https://scancode-licensedb.aboutcode.org/mit", - "spdx_license_key": "MIT", - "scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/mit.yml", - "scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/mit.LICENSE" + "value": "(c) Sindre Sorhus (http://sindresorhus.com)", + "count": 1 } - ], - "license_expressions": [ - "mit" - ], - "emails": [ + ], + "holders": [ { - "email": "sindresorhus@gmail.com", - "end_line": 9, - "start_line": 9 - } - ], - "urls": [ + "value": null, + "count": 2 + }, { - "url": "http://sindresorhus.com/", - "end_line": 10, - "start_line": 10 + "value": "Sindre Sorhus", + "count": 1 } - ], - "status": "application-package", - "type": "file", - "name": "package.json", - "extension": ".json", - "programming_language": "", - "mime_type": "application/json", - "file_type": "JSON data", - "is_binary": false, - "is_text": true, - "is_archive": false, - "is_key_file": true, - "content": "{\n \"name\": \"is-npm\",\n \"version\": \"1.0.0\",\n \"description\": \"Check if your code is running as an npm script\",\n \"license\": \"MIT\",\n \"repository\": \"sindresorhus/is-npm\",\n \"author\": {\n \"name\": \"Sindre Sorhus\",\n \"email\": \"sindresorhus@gmail.com\",\n \"url\": \"http://sindresorhus.com\"\n },\n \"engines\": {\n \"node\": \">=0.10.0\"\n },\n \"scripts\": {\n \"test\": \"node test.js\"\n },\n \"files\": [\n \"index.js\"\n ],\n \"keywords\": [\n \"npm\",\n \"is\",\n \"check\",\n \"detect\",\n \"env\",\n \"environment\"\n ],\n \"devDependencies\": {\n \"ava\": \"0.0.3\"\n }\n}\n" - }, - { - "for_packages": [], - "compliance_alert": "ok", - "path": "package/readme.md", - "size": 477, - "sha1": "b77832e53cb8239edd733c2ca02b1fc01ca3c2b6", - "md5": "a743e0abf08c28a37ecc4bef4dc02f8c", - "extra_data": {}, - "copyrights": [ + ], + "authors": [ { - "value": "(c) Sindre Sorhus (http://sindresorhus.com)", - "end_line": 30, - "start_line": 30 + "value": null, + "count": 3 } - ], - "holders": [ + ], + "programming_language": [ + { + "value": null, + "count": 2 + }, { - "value": "Sindre Sorhus", - "end_line": 30, - "start_line": 30 + "value": "JavaScript", + "count": 1 } - ], - "authors": [], - "licenses": [ + ], + "packages": [ { - "key": "mit", - "name": "MIT License", - "owner": "MIT", - "score": 100.0, - "policy": { - "icon": "far fa-thumbs-up", - "label": "Recommended License", - "color_code": "#002fff", - "license_key": "mit", - "compliance_alert": "" - }, - "category": "Permissive", - "end_line": 30, - "spdx_url": "https://spdx.org/licenses/MIT", - "text_url": "http://opensource.org/licenses/mit-license.php", - "short_name": "MIT License", - "start_line": 28, - "homepage_url": "http://opensource.org/licenses/mit-license.php", - "is_exception": false, - "matched_rule": { - "matcher": "2-aho", - "licenses": [ - "mit" + "type": "npm", + "namespace": null, + "name": "is-npm", + "version": "1.0.0", + "qualifiers": {}, + "subpath": null, + "primary_language": "JavaScript", + "description": "Check if your code is running as an npm script", + "release_date": null, + "parties": [ + { + "type": "person", + "role": "author", + "name": "Sindre Sorhus", + "email": "sindresorhus@gmail.com", + "url": "http://sindresorhus.com" + } + ], + "keywords": [ + "npm", + "is", + "check", + "detect", + "env", + "environment" ], - "identifier": "mit_30.RULE", - "rule_length": 2, - "is_license_tag": true, - "match_coverage": 100.0, - "matched_length": 2, - "rule_relevance": 100, - "is_license_text": false, - "is_license_intro": false, - "is_license_notice": false, + "homepage_url": null, + "download_url": "https://registry.npmjs.org/is-npm/-/is-npm-1.0.0.tgz", + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": "https://github.com/sindresorhus/is-npm", + "copyright": null, "license_expression": "mit", - "is_license_reference": false - }, - "matched_text": "## License\n\nMIT \u00a9 [Sindre Sorhus](http://sindresorhus.com)", - "reference_url": "https://scancode-licensedb.aboutcode.org/mit", - "spdx_license_key": "MIT", - "scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/mit.yml", - "scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/mit.LICENSE" + "declared_license": [ + "MIT" + ], + "notice_text": null, + "root_path": "codebase/package", + "dependencies": [ + { + "purl": "pkg:npm/ava", + "requirement": "0.0.3", + "scope": "devDependencies", + "is_runtime": false, + "is_optional": true, + "is_resolved": false + } + ], + "contains_source_code": null, + "source_packages": [], + "extra_data": {}, + "purl": "pkg:npm/is-npm@1.0.0", + "repository_homepage_url": "https://www.npmjs.com/package/is-npm", + "repository_download_url": "https://registry.npmjs.org/is-npm/-/is-npm-1.0.0.tgz", + "api_data_url": "https://registry.npmjs.org/is-npm/1.0.0", + "files": [ + { + "path": "codebase/package/package.json", + "type": "file" + } + ] } - ], - "license_expressions": [ - "mit" - ], - "emails": [], - "urls": [ - { - "url": "https://travis-ci.org/sindresorhus/is-npm.svg?branch=master", - "end_line": 1, - "start_line": 1 - }, - { - "url": "https://travis-ci.org/sindresorhus/is-npm", - "end_line": 1, - "start_line": 1 - }, + ], + "license_clarity_score": { + "score": 30, + "declared": true, + "discovered": 0, + "consistency": false, + "spdx": false, + "license_texts": false + }, + "license_matches": { + "mit": [ + { + "path": "package/package.json", + "matched_text": " \"license\": \"MIT\"," + }, + { + "path": "package/readme.md", + "matched_text": "## License\n\nMIT \u00a9 [Sindre Sorhus](http://sindresorhus.com)" + } + ] + }, + "key_files": [ { - "url": "https://www.npmjs.org/doc/misc/npm-scripts.html", - "end_line": 3, - "start_line": 3 + "for_packages": [ + "pkg:npm/is-npm@1.0.0" + ], + "compliance_alert": "missing", + "path": "package/package.json", + "size": 563, + "sha1": "943efb8abda9b9cdaa4ea73f5709563b948af50b", + "md5": "c843e88ecb274d5d573c71be330bff8b", + "extra_data": {}, + "copyrights": [], + "holders": [], + "authors": [], + "licenses": [ + { + "key": "mit", + "name": "MIT License", + "owner": "MIT", + "score": 100.0, + "policy": null, + "category": "Permissive", + "end_line": 5, + "spdx_url": "https://spdx.org/licenses/MIT", + "text_url": "http://opensource.org/licenses/mit-license.php", + "short_name": "MIT License", + "start_line": 5, + "homepage_url": "http://opensource.org/licenses/mit-license.php", + "is_exception": false, + "matched_rule": { + "matcher": "2-aho", + "licenses": [ + "mit" + ], + "identifier": "mit_30.RULE", + "rule_length": 2, + "is_license_tag": true, + "match_coverage": 100.0, + "matched_length": 2, + "rule_relevance": 100, + "is_license_text": false, + "is_license_intro": false, + "is_license_notice": false, + "license_expression": "mit", + "is_license_reference": false + }, + "matched_text": " \"license\": \"MIT\",", + "reference_url": "https://scancode-licensedb.aboutcode.org/mit", + "spdx_license_key": "MIT", + "scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/mit.yml", + "scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/mit.LICENSE" + } + ], + "license_expressions": [ + "mit" + ], + "emails": [ + { + "email": "sindresorhus@gmail.com", + "end_line": 9, + "start_line": 9 + } + ], + "urls": [ + { + "url": "http://sindresorhus.com/", + "end_line": 10, + "start_line": 10 + } + ], + "status": "application-package", + "type": "file", + "name": "package.json", + "extension": ".json", + "programming_language": "", + "mime_type": "application/json", + "file_type": "JSON data", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_key_file": true, + "is_media": false, + "content": "{\n \"name\": \"is-npm\",\n \"version\": \"1.0.0\",\n \"description\": \"Check if your code is running as an npm script\",\n \"license\": \"MIT\",\n \"repository\": \"sindresorhus/is-npm\",\n \"author\": {\n \"name\": \"Sindre Sorhus\",\n \"email\": \"sindresorhus@gmail.com\",\n \"url\": \"http://sindresorhus.com\"\n },\n \"engines\": {\n \"node\": \">=0.10.0\"\n },\n \"scripts\": {\n \"test\": \"node test.js\"\n },\n \"files\": [\n \"index.js\"\n ],\n \"keywords\": [\n \"npm\",\n \"is\",\n \"check\",\n \"detect\",\n \"env\",\n \"environment\"\n ],\n \"devDependencies\": {\n \"ava\": \"0.0.3\"\n }\n}\n" }, { - "url": "http://sindresorhus.com/", - "end_line": 30, - "start_line": 30 + "for_packages": [], + "compliance_alert": "missing", + "path": "package/readme.md", + "size": 477, + "sha1": "b77832e53cb8239edd733c2ca02b1fc01ca3c2b6", + "md5": "a743e0abf08c28a37ecc4bef4dc02f8c", + "extra_data": {}, + "copyrights": [ + { + "value": "(c) Sindre Sorhus (http://sindresorhus.com)", + "end_line": 30, + "start_line": 30 + } + ], + "holders": [ + { + "value": "Sindre Sorhus", + "end_line": 30, + "start_line": 30 + } + ], + "authors": [], + "licenses": [ + { + "key": "mit", + "name": "MIT License", + "owner": "MIT", + "score": 100.0, + "policy": null, + "category": "Permissive", + "end_line": 30, + "spdx_url": "https://spdx.org/licenses/MIT", + "text_url": "http://opensource.org/licenses/mit-license.php", + "short_name": "MIT License", + "start_line": 28, + "homepage_url": "http://opensource.org/licenses/mit-license.php", + "is_exception": false, + "matched_rule": { + "matcher": "2-aho", + "licenses": [ + "mit" + ], + "identifier": "mit_30.RULE", + "rule_length": 2, + "is_license_tag": true, + "match_coverage": 100.0, + "matched_length": 2, + "rule_relevance": 100, + "is_license_text": false, + "is_license_intro": false, + "is_license_notice": false, + "license_expression": "mit", + "is_license_reference": false + }, + "matched_text": "## License\n\nMIT \u00a9 [Sindre Sorhus](http://sindresorhus.com)", + "reference_url": "https://scancode-licensedb.aboutcode.org/mit", + "spdx_license_key": "MIT", + "scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/mit.yml", + "scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/mit.LICENSE" + } + ], + "license_expressions": [ + "mit" + ], + "emails": [], + "urls": [ + { + "url": "https://travis-ci.org/sindresorhus/is-npm.svg?branch=master", + "end_line": 1, + "start_line": 1 + }, + { + "url": "https://travis-ci.org/sindresorhus/is-npm", + "end_line": 1, + "start_line": 1 + }, + { + "url": "https://www.npmjs.org/doc/misc/npm-scripts.html", + "end_line": 3, + "start_line": 3 + }, + { + "url": "http://sindresorhus.com/", + "end_line": 30, + "start_line": 30 + } + ], + "status": "", + "type": "file", + "name": "readme.md", + "extension": ".md", + "programming_language": "", + "mime_type": "text/plain", + "file_type": "UTF-8 Unicode text", + "is_binary": false, + "is_text": true, + "is_archive": false, + "is_key_file": true, + "is_media": false, + "content": "# is-npm [![Build Status](https://travis-ci.org/sindresorhus/is-npm.svg?branch=master)](https://travis-ci.org/sindresorhus/is-npm)\n\n> Check if your code is running as an [npm script](https://www.npmjs.org/doc/misc/npm-scripts.html)\n\n\n## Install\n\n```sh\n$ npm install --save is-npm\n```\n\n\n## Usage\n\n```js\nvar isNpm = require('is-npm');\nconsole.log(isNpm);\n```\n\n```sh\n$ node foo.js\n#=> false\n$ npm run foo\n#=> true\n```\n\n\n## License\n\nMIT \u00a9 [Sindre Sorhus](http://sindresorhus.com)\n" } - ], - "status": "", - "type": "file", - "name": "readme.md", - "extension": ".md", - "programming_language": "", - "mime_type": "text/plain", - "file_type": "UTF-8 Unicode text", - "is_binary": false, - "is_text": true, - "is_archive": false, - "is_key_file": true, - "content": "# is-npm [![Build Status](https://travis-ci.org/sindresorhus/is-npm.svg?branch=master)](https://travis-ci.org/sindresorhus/is-npm)\n\n> Check if your code is running as an [npm script](https://www.npmjs.org/doc/misc/npm-scripts.html)\n\n\n## Install\n\n```sh\n$ npm install --save is-npm\n```\n\n\n## Usage\n\n```js\nvar isNpm = require('is-npm');\nconsole.log(isNpm);\n```\n\n```sh\n$ node foo.js\n#=> false\n$ npm run foo\n#=> true\n```\n\n\n## License\n\nMIT \u00a9 [Sindre Sorhus](http://sindresorhus.com)\n" - } - ], - "key_files_packages": [ - { - "purl": "pkg:npm/is-npm@1.0.0", - "type": "npm", - "namespace": "", - "name": "is-npm", - "version": "1.0.0", - "qualifiers": "", - "subpath": "", - "primary_language": "JavaScript", - "description": "Check if your code is running as an npm script", - "release_date": null, - "homepage_url": "", - "download_url": "https://registry.npmjs.org/is-npm/-/is-npm-1.0.0.tgz", - "size": null, - "sha1": "", - "md5": "", - "bug_tracking_url": "", - "code_view_url": "", - "vcs_url": "https://github.com/sindresorhus/is-npm", - "copyright": "", - "license_expression": "mit", - "declared_license": "['MIT']", - "notice_text": "", - "manifest_path": "", - "contains_source_code": null, - "extra_data": {}, - "missing_resources": [], - "modified_resources": [], - "dependencies": [ + ], + "key_files_packages": [ { - "purl": "pkg:npm/ava", - "scope": "devDependencies", - "is_runtime": false, - "is_optional": true, - "is_resolved": false, - "requirement": "0.0.3" + "purl": "pkg:npm/is-npm@1.0.0", + "type": "npm", + "namespace": "", + "name": "is-npm", + "version": "1.0.0", + "qualifiers": "", + "subpath": "", + "primary_language": "JavaScript", + "description": "Check if your code is running as an npm script", + "release_date": null, + "homepage_url": "", + "download_url": "https://registry.npmjs.org/is-npm/-/is-npm-1.0.0.tgz", + "size": null, + "sha1": "", + "md5": "", + "bug_tracking_url": "", + "code_view_url": "", + "vcs_url": "https://github.com/sindresorhus/is-npm", + "copyright": "", + "license_expression": "mit", + "declared_license": "['MIT']", + "notice_text": "", + "manifest_path": "", + "contains_source_code": null, + "extra_data": {}, + "missing_resources": [], + "modified_resources": [], + "dependencies": [ + { + "purl": "pkg:npm/ava", + "scope": "devDependencies", + "is_runtime": false, + "is_optional": true, + "is_resolved": false, + "requirement": "0.0.3" + } + ], + "keywords": [ + "npm", + "is", + "check", + "detect", + "env", + "environment" + ], + "source_packages": [] } - ], - "keywords": [ - "npm", - "is", - "check", - "detect", - "env", - "environment" - ], - "source_packages": [] - } - ] + ] } \ No newline at end of file diff --git a/scanpipe/tests/test_api.py b/scanpipe/tests/test_api.py index be5d07a98..7b56046d0 100644 --- a/scanpipe/tests/test_api.py +++ b/scanpipe/tests/test_api.py @@ -391,7 +391,7 @@ def test_scanpipe_api_serializer_get_model_serializer(self): def test_scanpipe_api_serializer_get_serializer_fields(self): self.assertEqual(30, len(get_serializer_fields(DiscoveredPackage))) - self.assertEqual(25, len(get_serializer_fields(CodebaseResource))) + self.assertEqual(26, len(get_serializer_fields(CodebaseResource))) with self.assertRaises(LookupError): get_serializer_fields(None) diff --git a/scanpipe/tests/test_commands.py b/scanpipe/tests/test_commands.py index 2c2ed3adf..862cbf25f 100644 --- a/scanpipe/tests/test_commands.py +++ b/scanpipe/tests/test_commands.py @@ -78,7 +78,7 @@ def test_scanpipe_pipelines_pipeline_graph_output_dot(self): self.assertIn('"extract_images"[label=<extract_images>', output_dot) self.assertIn('"extract_layers"[label=<extract_layers>', output_dot) self.assertIn("extract_images -> extract_layers;", output_dot) - self.assertIn("extract_layers -> find_images_linux_distro;", output_dot) + self.assertIn("extract_layers -> find_images_os_and_distro;", output_dot) def test_scanpipe_management_command_create_project_base(self): out = StringIO() diff --git a/scanpipe/tests/test_pipes.py b/scanpipe/tests/test_pipes.py index 0e5dfb89d..46d14c9ef 100644 --- a/scanpipe/tests/test_pipes.py +++ b/scanpipe/tests/test_pipes.py @@ -48,6 +48,7 @@ from scanpipe.pipes import scancode from scanpipe.pipes import strip_root from scanpipe.pipes import tag_not_analyzed_codebase_resources +from scanpipe.pipes import windows from scanpipe.pipes.input import copy_inputs from scanpipe.tests import license_policies_index from scanpipe.tests import mocked_now @@ -756,6 +757,322 @@ def test_scanpipe_pipes_rootfs_has_hash_diff(self): codebase_resource = CodebaseResource(sha256="sha256", md5="md5") self.assertFalse(rootfs.has_hash_diff(install_file, codebase_resource)) + def test_scanpipe_pipes_windows_tag_uninteresting_windows_codebase_resources(self): + p1 = Project.objects.create(name="Analysis") + resource1 = CodebaseResource.objects.create( + project=p1, + path="root/Files/example.lnk", + rootfs_path="/Files/example.lnk", + extension=".lnk", + ) + resource2 = CodebaseResource.objects.create( + project=p1, + path="root/Hives/Software_Delta", + rootfs_path="/Hives/Software_Delta", + ) + resource3 = CodebaseResource.objects.create( + project=p1, + path="root/Files/example.dat", + rootfs_path="/Files/example.dat", + extension=".dat", + ) + resource4 = CodebaseResource.objects.create( + project=p1, + path="root/Files/should-not-be-ignored.txt", + rootfs_path="/Files/should-not-be-ignored.txt", + extension=".txt", + ) + + windows.tag_uninteresting_windows_codebase_resources(p1) + resource1.refresh_from_db() + resource2.refresh_from_db() + resource3.refresh_from_db() + resource4.refresh_from_db() + self.assertEqual("ignored-not-interesting", resource1.status) + self.assertEqual("ignored-not-interesting", resource2.status) + self.assertEqual("ignored-not-interesting", resource3.status) + self.assertEqual("", resource4.status) + + def test_scanpipe_pipes_windows_tag_known_software(self): + p1 = Project.objects.create(name="Analysis") + resource1 = CodebaseResource.objects.create( + project=p1, + path="root/Files/Python/py.exe", + rootfs_path="/Files/Python/py.exe", + ) + resource2 = CodebaseResource.objects.create( + project=p1, + path="root/Files/Python27/python2.exe", + rootfs_path="/Files/Python27/python2.exe", + ) + resource3 = CodebaseResource.objects.create( + project=p1, + path="root/Files/Python3/python3.exe", + rootfs_path="/Files/Python3/python3.exe", + ) + resource4 = CodebaseResource.objects.create( + project=p1, + path="root/Files/Python39/python3.9", + rootfs_path="/Files/Python39/python3.9.exe", + ) + resource5 = CodebaseResource.objects.create( + project=p1, + path="root/Files/Python39/Lib/site-packages/pip-21.1.3.dist-info/WHEEL", + rootfs_path="/Files/Python39/Lib/site-packages/pip-21.1.3.dist-info/WHEEL", + ) + resource6 = CodebaseResource.objects.create( + project=p1, + path="root/Files/jdk-11.0.1/readme.txt", + rootfs_path="/Files/jdk-11.0.1/readme.txt", + ) + resource7 = CodebaseResource.objects.create( + project=p1, + path="root/Files/openjdk-11.0.1/readme.txt", + rootfs_path="/Files/openjdk-11.0.1/readme.txt", + ) + resource8 = CodebaseResource.objects.create( + project=p1, + path="root/Files/jdk/readme.txt", + rootfs_path="/Files/jdk/readme.txt", + ) + resource9 = CodebaseResource.objects.create( + project=p1, + path="root/Files/openjdk/readme.txt", + rootfs_path="/Files/openjdk/readme.txt", + ) + resource10 = CodebaseResource.objects.create( + project=p1, + path="root/Files/Program Files/something-else/jdk/readme.txt", + rootfs_path="/Files/Program Files/something-else/jdk/readme.txt", + ) + resource11 = CodebaseResource.objects.create( + project=p1, + path="root/Python/py.exe", + rootfs_path="/Python/py.exe", + ) + resource12 = CodebaseResource.objects.create( + project=p1, + path="root/Python27/python2.exe", + rootfs_path="/Python27/python2.exe", + ) + resource13 = CodebaseResource.objects.create( + project=p1, + path="root/Python3/python3.exe", + rootfs_path="/Python3/python3.exe", + ) + resource14 = CodebaseResource.objects.create( + project=p1, + path="root/Python39/python3.9", + rootfs_path="/Python39/python3.9.exe", + ) + resource15 = CodebaseResource.objects.create( + project=p1, + path="root/Python39/Lib/site-packages/pip-21.1.3.dist-info/WHEEL", + rootfs_path="/Python39/Lib/site-packages/pip-21.1.3.dist-info/WHEEL", + ) + resource16 = CodebaseResource.objects.create( + project=p1, + path="root/jdk-11.0.1/readme.txt", + rootfs_path="/jdk-11.0.1/readme.txt", + ) + resource17 = CodebaseResource.objects.create( + project=p1, + path="root/openjdk-11.0.1/readme.txt", + rootfs_path="/openjdk-11.0.1/readme.txt", + ) + resource18 = CodebaseResource.objects.create( + project=p1, + path="root/jdk/readme.txt", + rootfs_path="/jdk/readme.txt", + ) + resource19 = CodebaseResource.objects.create( + project=p1, + path="root/openjdk/readme.txt", + rootfs_path="/openjdk/readme.txt", + ) + resource20 = CodebaseResource.objects.create( + project=p1, + path="root/Program Files/something-else/jdk/readme.txt", + rootfs_path="/Program Files/something-else/jdk/readme.txt", + ) + + windows.tag_known_software(p1) + resource11.refresh_from_db() + resource12.refresh_from_db() + resource13.refresh_from_db() + resource14.refresh_from_db() + resource15.refresh_from_db() + resource16.refresh_from_db() + resource17.refresh_from_db() + resource18.refresh_from_db() + resource19.refresh_from_db() + resource20.refresh_from_db() + resource1.refresh_from_db() + resource2.refresh_from_db() + resource3.refresh_from_db() + resource4.refresh_from_db() + resource5.refresh_from_db() + resource6.refresh_from_db() + resource7.refresh_from_db() + resource8.refresh_from_db() + resource9.refresh_from_db() + resource10.refresh_from_db() + + self.assertEqual("installed-package", resource1.status) + self.assertEqual("installed-package", resource2.status) + self.assertEqual("installed-package", resource3.status) + self.assertEqual("installed-package", resource4.status) + self.assertEqual("", resource5.status) + self.assertEqual("installed-package", resource6.status) + self.assertEqual("installed-package", resource7.status) + self.assertEqual("installed-package", resource8.status) + self.assertEqual("installed-package", resource9.status) + self.assertEqual("", resource10.status) + self.assertEqual("installed-package", resource11.status) + self.assertEqual("installed-package", resource12.status) + self.assertEqual("installed-package", resource13.status) + self.assertEqual("installed-package", resource14.status) + self.assertEqual("", resource15.status) + self.assertEqual("installed-package", resource16.status) + self.assertEqual("installed-package", resource17.status) + self.assertEqual("installed-package", resource18.status) + self.assertEqual("installed-package", resource19.status) + self.assertEqual("", resource20.status) + + def test_scanpipe_pipes_windows_tag_program_files(self): + p1 = Project.objects.create(name="Analysis") + resource1 = CodebaseResource.objects.create( + project=p1, + path="root/Files/Program Files (x86)/Microsoft/example.exe", + rootfs_path="/Files/Program Files (x86)/Microsoft/example.exe", + ) + resource2 = CodebaseResource.objects.create( + project=p1, + path="root/Files/Program Files/Microsoft/example.exe", + rootfs_path="/Files/Program Files/Microsoft/example.exe", + ) + resource3 = CodebaseResource.objects.create( + project=p1, + path="root/Files/Program Files (x86)/7Zip/7z.exe", + rootfs_path="/Files/Program Files (x86)/7Zip/7z.exe", + ) + resource4 = CodebaseResource.objects.create( + project=p1, + path="root/Files/Program Files/7Zip/7z.exe", + rootfs_path="/Files/Program Files/7Zip/7z.exe", + ) + resource5 = CodebaseResource.objects.create( + project=p1, + path="root/Files/Program Files (x86)/common files/sample.dat", + rootfs_path="/Files/Program Files (x86)/common files/sample.dat", + ) + resource6 = CodebaseResource.objects.create( + project=p1, + path="root/Files/Program Files/common files/sample.dat", + rootfs_path="/Files/Program Files/common files/sample.dat", + ) + windows.tag_program_files(p1) + resource1.refresh_from_db() + resource2.refresh_from_db() + resource3.refresh_from_db() + resource4.refresh_from_db() + resource5.refresh_from_db() + resource6.refresh_from_db() + self.assertEqual("", resource1.status) + self.assertEqual("", resource2.status) + self.assertEqual("installed-package", resource3.status) + self.assertEqual("installed-package", resource4.status) + self.assertEqual("", resource5.status) + self.assertEqual("", resource6.status) + + def test_scanpipe_pipes_rootfs_tag_ignorable_codebase_resources(self): + p1 = Project.objects.create(name="Analysis") + resource1 = CodebaseResource.objects.create( + project=p1, + path="root/user/cmake_install.cmake", + rootfs_path="/user/cmake_install.cmake", + ) + resource2 = CodebaseResource.objects.create( + project=p1, path="root/user/example.pot", rootfs_path="/user/example.pot" + ) + resource3 = CodebaseResource.objects.create( + project=p1, + path="root/user/__pycache__/foo.pyc", + rootfs_path="/user/__pycache__/foo.pyc", + ) + resource4 = CodebaseResource.objects.create( + project=p1, path="root/user/foo.css.map", rootfs_path="/user/foo.css.map" + ) + resource5 = CodebaseResource.objects.create( + project=p1, + path="root/user/should-not-be-ignored.txt", + rootfs_path="/user/should-not-be-ignored.txt", + ) + rootfs.tag_ignorable_codebase_resources(p1) + resource1.refresh_from_db() + resource2.refresh_from_db() + resource3.refresh_from_db() + resource4.refresh_from_db() + resource5.refresh_from_db() + self.assertEqual("ignored-default-ignores", resource1.status) + self.assertEqual("ignored-default-ignores", resource2.status) + self.assertEqual("ignored-default-ignores", resource3.status) + self.assertEqual("ignored-default-ignores", resource4.status) + self.assertEqual("", resource5.status) + + def test_scanpipe_pipes_rootfs_tag_data_files_with_no_clues(self): + p1 = Project.objects.create(name="Analysis") + resource1 = CodebaseResource.objects.create( + project=p1, + path="root/user/foo.data", + rootfs_path="/user/foo.data", + file_type="data", + ) + resource2 = CodebaseResource.objects.create( + project=p1, + path="root/user/bar.data", + rootfs_path="/user/bar.data", + file_type="data", + license_expressions=["apache-2.0"], + ) + rootfs.tag_data_files_with_no_clues(p1) + resource1.refresh_from_db() + resource2.refresh_from_db() + self.assertEqual("ignored-data-file-no-clues", resource1.status) + self.assertEqual("", resource2.status) + + def test_scanpipe_pipes_rootfs_tag_media_files_as_uninteresting(self): + p1 = Project.objects.create(name="Analysis") + resource1 = CodebaseResource.objects.create( + project=p1, + path="root/user/foo.png", + rootfs_path="/user/foo.png", + mime_type="image/png", + file_type="image/png", + is_media=True, + ) + resource2 = CodebaseResource.objects.create( + project=p1, + path="root/user/bar.jpg", + rootfs_path="/user/bar.jpg", + mime_type="image/jpeg", + file_type="JPEG image data", + is_media=True, + ) + resource3 = CodebaseResource.objects.create( + project=p1, + path="root/user/baz.txt", + rootfs_path="/user/baz.txt", + is_media=False, + ) + rootfs.tag_media_files_as_uninteresting(p1) + resource1.refresh_from_db() + resource2.refresh_from_db() + resource3.refresh_from_db() + self.assertEqual("ignored-media-file", resource1.status) + self.assertEqual("ignored-media-file", resource2.status) + self.assertEqual("", resource3.status) + class ScanPipePipesTransactionTest(TransactionTestCase): """ diff --git a/setup.py b/setup.py index 43c904440..8076bb6f2 100755 --- a/setup.py +++ b/setup.py @@ -57,6 +57,7 @@ ], "scancodeio_pipelines": [ "docker = scanpipe.pipelines.docker:Docker", + "docker_windows = scanpipe.pipelines.docker_windows:DockerWindows", "load_inventory = scanpipe.pipelines.load_inventory:LoadInventory", "root_filesystems = scanpipe.pipelines.root_filesystems:RootFS", "scan_codebase = scanpipe.pipelines.scan_codebase:ScanCodebase",