Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
b36ab37
Add code to parse MSI files for metadata #2565
JonoYang Jun 18, 2021
fe2c9b2
Save all MSI data to extra_data #2565
JonoYang Jun 18, 2021
1e2fc67
Run msiinfo with commoncode.command.execute #2565
JonoYang Jun 19, 2021
4dad10e
Add test for MSI parser #2559
JonoYang Jun 19, 2021
1fea0df
Hook up MSI parsing #2559
JonoYang Jun 22, 2021
0630282
Report installed Windows programs #2565
JonoYang Jun 23, 2021
ec3189e
Add Package for installed Windows programs #2565
JonoYang Jun 24, 2021
9f947b5
Add tests for registry parsing #2565
JonoYang Jun 25, 2021
e6d1ba4
Create functions for use in scancode.io pipeline
JonoYang Jun 25, 2021
97cca61
Check multiple locations for registry files #2565
JonoYang Jun 26, 2021
2b791c3
Catch NoRegistrySubkeysException #2565
JonoYang Jun 26, 2021
76a58ad
Split windows_helper.py into two files #2565
JonoYang Jul 7, 2021
4d3acda
Refactor report_installed_programs #2565
JonoYang Jul 7, 2021
6ad8902
Make functions more conducive to testing #2559
JonoYang Jul 8, 2021
8c14fe6
Report other known program files #2565
JonoYang Jul 8, 2021
7dd1f3e
Add and update win_reg tests #2565
JonoYang Jul 8, 2021
d626f61
Add test for get_installed_files #2565
JonoYang Jul 9, 2021
31bc4cc
Parse Microsoft Update Manifest files #2565
JonoYang Jul 10, 2021
259d4a1
Add regipy and packagedcode-msitools as deps #2565
JonoYang Jul 12, 2021
20d2d25
Update test results #2565
JonoYang Jul 14, 2021
c760017
Use relative file paths for installed_files #2565
JonoYang Jul 15, 2021
a77c617
Merge branch 'develop' into 2565-detect-installed-windows-programs
JonoYang Jul 15, 2021
07027d9
Sort installed_files by path #2565
JonoYang Jul 15, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ chardet==4.0.0
click==8.0.1
colorama==0.4.4
commoncode==21.6.11
construct==2.10.67
cryptography==3.4.7
debian-inspector==21.5.25
dparse==0.5.1
Expand All @@ -24,17 +25,21 @@ gemfileparser==0.8.0
html5lib==1.1
idna==2.10
importlib-metadata==4.4.0
inflection==0.5.1
intbitset==2.4.1
isodate==0.6.0
jaraco.functools==3.3.0
javaproperties==0.8.0
Jinja2==3.0.1
jsonlines==2.0.0
jsonstreams==0.6.0
license-expression==21.6.14
Logbook==1.5.3
lxml==4.6.3
MarkupSafe==2.0.1
more-itertools==8.8.0
normality==2.2.2
packagedcode-msitools==0.101.210706
packageurl-python==0.9.4
packaging==20.9
patch==1.16
Expand All @@ -52,8 +57,10 @@ Pygments==2.9.0
pygmars==0.7.0b2
pymaven-patch==0.3.0
pyparsing==2.4.7
pytz==2021.1
PyYAML==5.4.1
rdflib==5.0.0
regipy==1.9.3
requests==2.25.1
rpm-inspector-rpm==4.16.1.3.210404
saneyaml==0.5.2
Expand All @@ -63,8 +70,10 @@ six==1.16.0
sortedcontainers==2.4.0
soupsieve==2.2.1
spdx-tools==0.6.1
tabulate==0.8.9
text-unidecode==1.3
toml==0.10.2
tqdm==4.61.2
typecode==21.6.1
typecode-libmagic==5.39.210531
typing==3.6.6
Expand Down
2 changes: 2 additions & 0 deletions setup-mini.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ full =
# linux-only package handling
packages =
rpm_inspector_rpm >= 4.16.1.3; platform_system == 'Linux'
regipy >= 1.9.3; platform_system == 'Linux'
packagedcode_msitools >= 0.101.210706; platform_system == 'Linux'

dev =
pytest
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,8 @@ full =
# linux-only package handling
packages =
rpm_inspector_rpm >= 4.16.1.3; platform_system == 'Linux'
regipy >= 1.9.3; platform_system == 'Linux'
packagedcode_msitools >= 0.101.210706; platform_system == 'Linux'

dev =
pytest
Expand Down
6 changes: 5 additions & 1 deletion src/packagedcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from packagedcode import haxe
from packagedcode import maven
from packagedcode import models
from packagedcode import msi
from packagedcode import npm
from packagedcode import nuget
from packagedcode import opam
Expand All @@ -32,6 +33,8 @@
from packagedcode import rpm
from packagedcode import rubygems
from packagedcode import win_pe
from packagedcode import windows


# Note: the order matters: from the most to the least specific
# Package classes MUST be added to this list to be active
Expand Down Expand Up @@ -67,7 +70,6 @@
pypi.PythonPackage,
golang.GolangPackage,
models.CabPackage,
models.MsiInstallerPackage,
models.InstallShieldPackage,
models.NSISInstallerPackage,
nuget.NugetPackage,
Expand All @@ -83,6 +85,8 @@
win_pe.WindowsExecutable,
readme.ReadmePackage,
build.MetadataBzl,
msi.MsiInstallerPackage,
windows.MicrosoftUpdateManifestPackage,
pubspec.PubspecPackage,
]

Expand Down
8 changes: 0 additions & 8 deletions src/packagedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -810,14 +810,6 @@ class CabPackage(Package):
default_type = 'cab'


@attr.s()
class MsiInstallerPackage(Package):
filetypes = ('msi installer',)
mimetypes = ('application/x-msi',)
extensions = ('.msi',)
default_type = 'msi'


@attr.s()
class InstallShieldPackage(Package):
filetypes = ('installshield',)
Expand Down
180 changes: 180 additions & 0 deletions src/packagedcode/msi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/scancode-toolkit for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import os
import re
import warnings
from shutil import which

import attr

from commoncode.command import execute
from commoncode.command import find_in_path
from commoncode.system import on_linux
from commoncode.version import VERSION_PATTERNS_REGEX
from packagedcode import models


MSIINFO_BIN_LOCATION = 'packagedcode_msitools.msiinfo'


def get_msiinfo_bin_location(_cache=[]):
"""
Return the binary location for msiinfo from either:
- a plugin-provided path,
- the system PATH.
Raise an Exception if no msiinfo command can be found.
"""
if _cache:
return _cache[0]

from plugincode.location_provider import get_location

# try a plugin-provided path first
cmd_loc = get_location(MSIINFO_BIN_LOCATION)

# try the PATH
if not cmd_loc:
cmd = 'msiinfo'
cmd_loc = find_in_path(cmd)

if not cmd_loc:
cmd_loc = which(cmd)

if cmd_loc:
warnings.warn(
'Using "msiinfo" command found in the PATH. '
'Install instead a plugincode-msitools plugin for best support.'
)

if not cmd_loc or not os.path.isfile(cmd_loc):
raise Exception(
'CRITICAL: msiinfo not provided. '
'Unable to continue: you need to install the plugin packagedcode-msitools'
)
_cache.append(cmd_loc)
return cmd_loc


class MsiinfoException(Exception):
pass


def parse_msiinfo_suminfo_output(output_string):
"""
Return a dictionary containing information from the output of `msiinfo suminfo`
"""
# Split lines by newline and place lines into a list
output_list = output_string.splitlines()
results = {}
# Partition lines by the leftmost ":", use the string to the left of ":" as
# the key and use the string to the right of ":" as the value
for output in output_list:
key, _, value = output.partition(':')
if key:
results[key] = value.strip()
return results


def get_msi_info(location):
"""
Run the command `msiinfo suminfo` on the file at `location` and return the
results in a dictionary

This function requires `msiinfo` to be installed on the system, either by
installing the `packagedcode-msiinfo` plugin or by installing `msitools`
through a package manager.
"""
rc, stdout, stderr = execute(
cmd_loc=get_msiinfo_bin_location(),
args=[
'suminfo',
location,
],
)
if stderr:
error_message = f'Error encountered when reading MSI information from {location}: '
error_message = error_message + stderr
raise MsiinfoException(error_message)
return parse_msiinfo_suminfo_output(stdout)


def get_version_from_subject_line(subject_line):
"""
Return a version number from `subject_line`

`subject_line` is the `Subject` field from the output of
`msiinfo suminfo <msi installer file>`. This string sometimes contains
the version number of the package contained in the MSI installer.
"""
for pattern in VERSION_PATTERNS_REGEX():
version = re.search(pattern, subject_line)
if version:
v = version.group(0)
# prefix with v space
if not v.lower().startswith('v'):
v = f'v {v}'
return v


def create_package_from_msiinfo_results(msiinfo_results):
"""
Return an MsiInstallerPackage from the dictionary `msiinfo_results`
"""
author_name = msiinfo_results.get('Author', '')
parties = []
if author_name:
parties.append(
models.Party(
type=None,
role='author',
name=author_name
)
)

# Currently, we use the contents `Subject` field from the msiinfo suminfo
# results as the package name because it contains the package name most of
# the time. Getting the version out of the `Subject` string is not
# straightforward because the format of the string is usually different
# between different MSIs
subject = msiinfo_results.get('Subject', '')
name = subject
version = get_version_from_subject_line(subject)
description = msiinfo_results.get('Comments', '')
keywords = msiinfo_results.get('Keywords', '')

return MsiInstallerPackage(
name=name,
version=version,
description=description,
parties=parties,
keywords=keywords,
extra_data=msiinfo_results
)


def msi_parse(location):
"""
Return an MsiInstallerPackage from `location`
"""
info = get_msi_info(location)
return create_package_from_msiinfo_results(info)


@attr.s()
class MsiInstallerPackage(models.Package):
filetypes = ('msi installer',)
mimetypes = ('application/x-msi',)
extensions = ('.msi',)
default_type = 'msi'

@classmethod
def recognize(cls, location):
if on_linux:
yield msi_parse(location)
14 changes: 7 additions & 7 deletions src/packagedcode/win_pe.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,10 +276,10 @@ def parse(location):
infos = pe_info(location)

version = get_first(
infos,
'Full Version',
'ProductVersion',
'FileVersion',
infos,
'Full Version',
'ProductVersion',
'FileVersion',
'Assembly Version',
)
release_date = get_first(infos, 'BuildDate')
Expand All @@ -289,9 +289,9 @@ def parse(location):
release_date = release_date.replace('/', '-')

name = get_first(
infos,
'ProductName',
'OriginalFilename',
infos,
'ProductName',
'OriginalFilename',
'InternalName',
)
copyr = get_first(infos, 'LegalCopyright')
Expand Down
Loading