Skip to content

Commit

Permalink
Get version from path in tag_known_software #238
Browse files Browse the repository at this point in the history
    * Update docstrings
    * Pin fetchcode dep

Signed-off-by: Jono Yang <[email protected]>
  • Loading branch information
JonoYang committed Jul 16, 2021
1 parent 99d7844 commit 00a1cdd
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 29 deletions.
2 changes: 1 addition & 1 deletion etc/requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ extractcode[full]==21.6.2
commoncode==21.6.11

# FetchCode
fetchcode-container>=1.2.3.210512; sys_platform == "linux"
fetchcode-container==1.2.3.210512; sys_platform == "linux"

# Utilities
XlsxWriter==1.4.3
Expand Down
2 changes: 1 addition & 1 deletion scanpipe/pipelines/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def extract_layers(self):

def find_images_os_and_distro(self):
"""
Find the linux distro of the images.
Find the operating system and distro of the images.
"""
for image in self.images:
image.get_and_set_distro()
Expand Down
4 changes: 4 additions & 0 deletions scanpipe/pipelines/windows_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ def steps(cls):
)

def tag_known_software_packages(self):
"""
Flag files from well-known software packages by checking common install
paths
"""
windows.tag_known_software(self.project)

def tag_uninteresting_codebase_resources(self):
Expand Down
102 changes: 75 additions & 27 deletions scanpipe/pipes/windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

import re

from django.db.models import Q

from scanpipe import pipes
Expand Down Expand Up @@ -69,6 +71,11 @@ def tag_uninteresting_windows_codebase_resources(project):


def tag_installed_package_files(project, root_dir_pattern, package):
"""
For all CodebaseResources from `project` whose `rootfs_path` starts with
`root_dir_pattern`, add `package` to the discovered_packages of each
CodebaseResource and set the status
"""
qs = project.codebaseresources.no_status()
installed_package_files = qs.filter(rootfs_path__regex=root_dir_pattern)
# If we find files whose names start with `root_dir_pattern`, we consider
Expand All @@ -84,31 +91,72 @@ def tag_installed_package_files(project, root_dir_pattern, package):


def tag_known_software(project):
python_root_directory_name_pattern = r"/Files/Python\d?\d?"
python_package = Package(
type="windows-program",
name="Python",
# TODO: find version from path
version="nv",
license_expression="python",
copyright="Copyright (c) Python Software Foundation",
homepage_url="https://www.python.org/"
)
tag_installed_package_files(
project=project,
root_dir_pattern=python_root_directory_name_pattern,
package=python_package
)
"""
Find Windows software in `project` by checking `project`s CodebaseResources
to see if their rootfs_path is is under a known software root directory. If
there are CodebaseResources that are under a known software root directory,
a DiscoveredPackage is created for that software package and all files under
that software package's root directory are considered installed files for
that package.
openjdk_root_directory_name_pattern = r"/Files/jdk-\d\d?.?\d?.?\d?"
openjdk_package = Package(
name="OpenJDK",
license_expression="gpl-2.0 WITH oracle-openjdk-classpath-exception-2.0",
copyright="Copyright (c) Oracle and/or its affiliates",
homepage_url="http://openjdk.java.net/",
)
tag_installed_package_files(
project=project,
root_dir_pattern=openjdk_root_directory_name_pattern,
package=openjdk_package
)
Currently, we are only checking for Python and openjdk in Windows Docker
image layers.
"""
qs = project.codebaseresources.no_status()
python_root_directory_name_pattern = r"/Files/Python(\d\d?)?"
python_root_directory_name_pattern_compiled = re.compile(python_root_directory_name_pattern)
python_paths_by_versions = {}
for python_codebase_resource in qs.filter(rootfs_path__regex=python_root_directory_name_pattern):
_, version, _ = re.split(
python_root_directory_name_pattern_compiled,
python_codebase_resource.rootfs_path
)
if not version or version in python_paths_by_versions:
continue
version_with_dots = '.'.join(digit for digit in version)
python_paths_by_versions[version_with_dots] = f'/Files/Python{version}'

for python_version, python_path in python_paths_by_versions.items():
python_package = Package(
type="windows-program",
name="Python",
version=python_version,
license_expression="python",
copyright="Copyright (c) Python Software Foundation",
homepage_url="https://www.python.org/"
)
tag_installed_package_files(
project=project,
root_dir_pattern=python_path,
package=python_package
)

qs = project.codebaseresources.no_status()
openjdk_root_directory_name_pattern = r"/Files/(open)?jdk-(\d\d?\.?\d?\.?\d?)"
openjdk_root_directory_name_pattern_compiled = re.compile(openjdk_root_directory_name_pattern)
openjdk_paths_by_versions = {}
for openjdk_codebase_resource in qs.filter(rootfs_path__regex=openjdk_root_directory_name_pattern):
_, open_prefix, openjdk_version, _ = re.split(
openjdk_root_directory_name_pattern_compiled,
openjdk_codebase_resource.rootfs_path
)
if (not openjdk_version
or openjdk_version in openjdk_paths_by_versions):
continue
openjdk_path = f'/Files/{open_prefix}jdk-{openjdk_version}'
openjdk_paths_by_versions[openjdk_version] = openjdk_path

for openjdk_version, openjdk_path in openjdk_paths_by_versions.items():
openjdk_package = Package(
type="windows-program",
name="OpenJDK",
version=openjdk_version,
license_expression="gpl-2.0 WITH oracle-openjdk-classpath-exception-2.0",
copyright="Copyright (c) Oracle and/or its affiliates",
homepage_url="http://openjdk.java.net/",
)
tag_installed_package_files(
project=project,
root_dir_pattern=openjdk_path,
package=openjdk_package
)

0 comments on commit 00a1cdd

Please sign in to comment.