Skip to content

Commit

Permalink
Generalize regex expressions #238
Browse files Browse the repository at this point in the history
    * Modify regex used for Windows container analysis so it can be used outside the context of a Windows Docker image
    * Update tests

Signed-off-by: Jono Yang <[email protected]>
  • Loading branch information
JonoYang committed Aug 2, 2021
1 parent b216c6e commit aaafc04
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 27 deletions.
59 changes: 32 additions & 27 deletions scanpipe/pipes/windows.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,21 @@ def tag_uninteresting_windows_codebase_resources(project):
qs.filter(lookups).update(status="ignored-not-interesting")


def tag_installed_package_files(project, root_dir_pattern, package):
def tag_installed_package_files(project, root_dir_pattern, package, q_objects=[]):
"""
For all CodebaseResources from `project` whose `rootfs_path` starts with
`root_dir_pattern`, add `package` to the discovered_packages of each
CodebaseResource and set the status
CodebaseResource and set the status.
If there are Q() objects in `q_objects`, then those Q() objects are chained
to the initial query (`lookup`) using AND to allow a more specific query for
package files.
"""
qs = project.codebaseresources.no_status()
installed_package_files = qs.filter(rootfs_path__startswith=root_dir_pattern)
lookup = Q(rootfs_path__startswith=root_dir_pattern)
for q_object in q_objects:
lookup &= q_object
installed_package_files = qs.filter(lookup)
# If we find files whose names start with `root_dir_pattern`, we consider
# these files to be part of the Package `package` and tag these files as
# such
Expand Down Expand Up @@ -115,15 +122,14 @@ def tag_known_software(project):
image layers.
"""
qs = project.codebaseresources.no_status()
python_root_directory_name_pattern = r"/Files/Python(\d*)"
python_root_directory_name_pattern = r"(^.*Python(\d*))/.*$"
python_root_directory_name_pattern_compiled = re.compile(
python_root_directory_name_pattern
)
python_paths_by_versions = {}
for python_codebase_resource in qs.filter(
rootfs_path__regex=python_root_directory_name_pattern
):
_, version, _ = re.split(
lookup = Q(rootfs_path__regex=python_root_directory_name_pattern)
for python_codebase_resource in qs.filter(lookup):
_, python_root_dir, version, _ = re.split(
python_root_directory_name_pattern_compiled,
python_codebase_resource.rootfs_path,
)
Expand All @@ -132,11 +138,13 @@ def tag_known_software(project):
if version in python_paths_by_versions:
continue
if version != "nv":
version_with_dots = ".".join(digit for digit in version)
python_paths_by_versions[version_with_dots] = f"/Files/Python{version}"
else:
python_paths_by_versions[version] = "/Files/Python"
version = ".".join(digit for digit in version)
python_paths_by_versions[version] = python_root_dir

# We do not want to tag the files in the `site-packages` directory as being
# from Python proper. The packages found here are oftentime third-party
# packages from outside the Python foundation
q_objects = [~Q(rootfs_path__icontains='site-packages')]
for python_version, python_path in python_paths_by_versions.items():
python_package = Package(
type="windows-program",
Expand All @@ -147,19 +155,19 @@ def tag_known_software(project):
homepage_url="https://www.python.org/",
)
tag_installed_package_files(
project=project, root_dir_pattern=python_path, package=python_package
project=project, root_dir_pattern=python_path, package=python_package, q_objects=q_objects
)

qs = project.codebaseresources.no_status()
openjdk_root_directory_name_pattern = r"/Files/(open)?jdk(-((\d*)(\.\d+)*))*"
openjdk_root_directory_name_pattern = r"(^.*/(open)?jdk(-((\d*)(\.\d+)*))*)/.*$"
openjdk_root_directory_name_pattern_compiled = re.compile(
openjdk_root_directory_name_pattern
)
openjdk_paths_by_versions = {}
for openjdk_codebase_resource in qs.filter(
rootfs_path__regex=openjdk_root_directory_name_pattern
):
_, open_prefix, _, openjdk_version, _, _, _ = re.split(
_, openjdk_root_path, open_prefix, _, openjdk_version, _, _, _ = re.split(
openjdk_root_directory_name_pattern_compiled,
openjdk_codebase_resource.rootfs_path,
)
Expand All @@ -169,11 +177,7 @@ def tag_known_software(project):
open_prefix = ""
if openjdk_version in openjdk_paths_by_versions:
continue
if openjdk_version != "nv":
openjdk_path = f"/Files/{open_prefix}jdk-{openjdk_version}"
else:
openjdk_path = f"/Files/{open_prefix}jdk"
openjdk_paths_by_versions[openjdk_version] = openjdk_path
openjdk_paths_by_versions[openjdk_version] = openjdk_root_path

for openjdk_version, openjdk_path in openjdk_paths_by_versions.items():
openjdk_package = Package(
Expand All @@ -198,21 +202,22 @@ def tag_known_software(project):
def tag_program_files(project):
"""
Report all subdirectories of Program Files and Program Files (x86) as
packages
Packages
If a Package is detected in this manner, then we will attempt to determine
the version from the path. If a version cannot be determined, a version of
`nr` will be set for the Package.
"""
qs = project.codebaseresources.no_status()
# Get all files from Program_Files and Program_Files_(x86)
# Get all files from Program Files and Program Files (x86)
program_files_one_directory_below_pattern = (
r"(/Files/Program Files( \(x86\))?/([^/]+))"
r"(^.*Program Files( \(x86\))?/([^/]+))"
)
program_files_one_directory_below_pattern_compiled = re.compile(
program_files_one_directory_below_pattern
)
program_files_dirname_by_path = {}
lookup = Q(rootfs_path__startswith="/Files/Program Files") | Q(
rootfs_path__startswith="/Files/Program Files (x86)"
)
for program_file in qs.filter(lookup):
for program_file in qs.filter(rootfs_path__regex="^.*/Program Files( \(x86\))?"):
_, program_files_subdir, _, dirname, _ = re.split(
program_files_one_directory_below_pattern_compiled, program_file.rootfs_path
)
Expand Down
7 changes: 7 additions & 0 deletions scanpipe/tests/test_pipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,16 +815,23 @@ def test_scanpipe_pipes_windows_tag_known_software(self):
path="root/Files/Python39/python3.9",
rootfs_path="/Files/Python39/python3.9.exe",
)
resource5 = CodebaseResource.objects.create(
project=p1,
path="root/Files/Python39/Lib/site-packages/pip-21.1.3.dist-info/WHEEL",
rootfs_path="/Files/Python39/Lib/site-packages/pip-21.1.3.dist-info/WHEEL",
)

windows.tag_known_software(p1)
resource1.refresh_from_db()
resource2.refresh_from_db()
resource3.refresh_from_db()
resource4.refresh_from_db()
resource5.refresh_from_db()
self.assertEqual("installed-package", resource1.status)
self.assertEqual("installed-package", resource2.status)
self.assertEqual("installed-package", resource3.status)
self.assertEqual("installed-package", resource4.status)
self.assertEqual("", resource5.status)

def test_scanpipe_pipes_windows_tag_program_files(self):
p1 = Project.objects.create(name="Analysis")
Expand Down

0 comments on commit aaafc04

Please sign in to comment.