diff --git a/etc/requirements/base.txt b/etc/requirements/base.txt index 02a11cd5f..21e1e7391 100644 --- a/etc/requirements/base.txt +++ b/etc/requirements/base.txt @@ -33,7 +33,7 @@ extractcode[full]==21.6.2 commoncode==21.6.11 # FetchCode -fetchcode-container>=1.2.3.210512; sys_platform == "linux" +fetchcode-container==1.2.3.210512; sys_platform == "linux" # Utilities XlsxWriter==1.4.5 diff --git a/scanpipe/pipelines/docker.py b/scanpipe/pipelines/docker.py index 545243029..580308b06 100644 --- a/scanpipe/pipelines/docker.py +++ b/scanpipe/pipelines/docker.py @@ -65,7 +65,7 @@ def extract_layers(self): def find_images_os_and_distro(self): """ - Finds the linux distro of input images. + Find the operating system and distro of the images. """ for image in self.images: image.get_and_set_distro() diff --git a/scanpipe/pipelines/windows_docker.py b/scanpipe/pipelines/windows_docker.py index 040be9745..863bf68fc 100644 --- a/scanpipe/pipelines/windows_docker.py +++ b/scanpipe/pipelines/windows_docker.py @@ -49,6 +49,10 @@ def steps(cls): ) def tag_known_software_packages(self): + """ + Flag files from well-known software packages by checking common install + paths + """ windows.tag_known_software(self.project) def tag_uninteresting_codebase_resources(self): diff --git a/scanpipe/pipes/windows.py b/scanpipe/pipes/windows.py index 6385b2145..15ff150cc 100644 --- a/scanpipe/pipes/windows.py +++ b/scanpipe/pipes/windows.py @@ -20,6 +20,8 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/scancode.io for support and download. +import re + from django.db.models import Q from scanpipe import pipes @@ -69,6 +71,11 @@ def tag_uninteresting_windows_codebase_resources(project): def tag_installed_package_files(project, root_dir_pattern, package): + """ + For all CodebaseResources from `project` whose `rootfs_path` starts with + `root_dir_pattern`, add `package` to the discovered_packages of each + CodebaseResource and set the status + """ qs = project.codebaseresources.no_status() installed_package_files = qs.filter(rootfs_path__regex=root_dir_pattern) # If we find files whose names start with `root_dir_pattern`, we consider @@ -84,31 +91,72 @@ def tag_installed_package_files(project, root_dir_pattern, package): def tag_known_software(project): - python_root_directory_name_pattern = r"/Files/Python\d?\d?" - python_package = Package( - type="windows-program", - name="Python", - # TODO: find version from path - version="nv", - license_expression="python", - copyright="Copyright (c) Python Software Foundation", - homepage_url="https://www.python.org/" - ) - tag_installed_package_files( - project=project, - root_dir_pattern=python_root_directory_name_pattern, - package=python_package - ) + """ + Find Windows software in `project` by checking `project`s CodebaseResources + to see if their rootfs_path is is under a known software root directory. If + there are CodebaseResources that are under a known software root directory, + a DiscoveredPackage is created for that software package and all files under + that software package's root directory are considered installed files for + that package. - openjdk_root_directory_name_pattern = r"/Files/jdk-\d\d?.?\d?.?\d?" - openjdk_package = Package( - name="OpenJDK", - license_expression="gpl-2.0 WITH oracle-openjdk-classpath-exception-2.0", - copyright="Copyright (c) Oracle and/or its affiliates", - homepage_url="http://openjdk.java.net/", - ) - tag_installed_package_files( - project=project, - root_dir_pattern=openjdk_root_directory_name_pattern, - package=openjdk_package - ) + Currently, we are only checking for Python and openjdk in Windows Docker + image layers. + """ + qs = project.codebaseresources.no_status() + python_root_directory_name_pattern = r"/Files/Python(\d\d?)?" + python_root_directory_name_pattern_compiled = re.compile(python_root_directory_name_pattern) + python_paths_by_versions = {} + for python_codebase_resource in qs.filter(rootfs_path__regex=python_root_directory_name_pattern): + _, version, _ = re.split( + python_root_directory_name_pattern_compiled, + python_codebase_resource.rootfs_path + ) + if not version or version in python_paths_by_versions: + continue + version_with_dots = '.'.join(digit for digit in version) + python_paths_by_versions[version_with_dots] = f'/Files/Python{version}' + + for python_version, python_path in python_paths_by_versions.items(): + python_package = Package( + type="windows-program", + name="Python", + version=python_version, + license_expression="python", + copyright="Copyright (c) Python Software Foundation", + homepage_url="https://www.python.org/" + ) + tag_installed_package_files( + project=project, + root_dir_pattern=python_path, + package=python_package + ) + + qs = project.codebaseresources.no_status() + openjdk_root_directory_name_pattern = r"/Files/(open)?jdk-(\d\d?\.?\d?\.?\d?)" + openjdk_root_directory_name_pattern_compiled = re.compile(openjdk_root_directory_name_pattern) + openjdk_paths_by_versions = {} + for openjdk_codebase_resource in qs.filter(rootfs_path__regex=openjdk_root_directory_name_pattern): + _, open_prefix, openjdk_version, _ = re.split( + openjdk_root_directory_name_pattern_compiled, + openjdk_codebase_resource.rootfs_path + ) + if (not openjdk_version + or openjdk_version in openjdk_paths_by_versions): + continue + openjdk_path = f'/Files/{open_prefix}jdk-{openjdk_version}' + openjdk_paths_by_versions[openjdk_version] = openjdk_path + + for openjdk_version, openjdk_path in openjdk_paths_by_versions.items(): + openjdk_package = Package( + type="windows-program", + name="OpenJDK", + version=openjdk_version, + license_expression="gpl-2.0 WITH oracle-openjdk-classpath-exception-2.0", + copyright="Copyright (c) Oracle and/or its affiliates", + homepage_url="http://openjdk.java.net/", + ) + tag_installed_package_files( + project=project, + root_dir_pattern=openjdk_path, + package=openjdk_package + )