Skip to content

Commit

Permalink
✨ Add extract_licences_from_wheel
Browse files Browse the repository at this point in the history
  • Loading branch information
ddelange committed Feb 9, 2021
1 parent d2f9395 commit 2144921
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 1 deletion.
17 changes: 17 additions & 0 deletions src/pipgrip/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pipgrip.libs.mixology.failure import SolverFailure
from pipgrip.libs.mixology.package import Package
from pipgrip.libs.mixology.version_solver import VersionSolver
from pipgrip.licenses import get_licenses
from pipgrip.package_source import PackageSource
from pipgrip.pipper import install_packages, read_requirements

Expand Down Expand Up @@ -312,6 +313,11 @@ def render_lock(packages, include_dot=True, sort=False):
is_flag=True,
help="Include pre-release and development versions. By default, pip implicitly excludes pre-releases (unless specified otherwise by PEP 440).",
)
@click.option(
"--detect-licenses",
is_flag=True,
help="Detect and extract license info for each dependency. Completeness can not be guaranteed.",
)
@click.option(
"-v",
"--verbose",
Expand Down Expand Up @@ -339,6 +345,7 @@ def main(
index_url,
extra_index_url,
pre,
detect_licenses,
verbose,
):
if verbose == 0:
Expand Down Expand Up @@ -437,6 +444,16 @@ def main(
source, decision_packages
)

if detect_licenses:
licenses = {
package_key: get_licenses(
**source._packages_metadata[package_key][package_version]
)
for package_key, package_version in packages_flat.items()
}
# TODO add to the various output formats
logger.info("Licenses:\n%s", json.dumps(licenses))

if lock:
with io.open(
os.path.join(os.getcwd(), "pipgrip.lock"), mode="w", encoding="utf-8"
Expand Down
56 changes: 56 additions & 0 deletions src/pipgrip/licenses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from collections import OrderedDict
from zipfile import ZipFile


def extract_licences_from_wheel(wheel_fname):
"""Extract all contents of files containing 'licence' in their filename."""
zfp = ZipFile(wheel_fname, "r")

# missing AUTHORS, COPYING and other legal files
licenses = OrderedDict(
(name, zfp.read(name))
for name in zfp.namelist()
if "license" in name.split("/\\")[-1].lower()
)

return licenses


def get_licenses(wheel_fname, wheel_metadata, **kwargs):
"""Extract copyright related info using a wheel as input."""
# parse all urls mentioned in wheel_metadata
home_page = wheel_metadata.get("home_page", "")
project_urls = (
OrderedDict((("home_page", home_page),)) if home_page else OrderedDict()
)
project_urls.update(x.split(", ") for x in wheel_metadata.get("project_urls", []))

# first attempt at getting licenses based on filename
# e.g. for pip this is incomplete as the pip wheel doesn't contain vendored licenses
# https://github.com/pypa/pip/tree/21.0.1/src/pip/_vendor
# try `pip download pip --no-deps --no-binary :all:` and it will start crashing hard
# https://github.com/pypa/pip/issues/1884
# e.g. for matplotlib, wheels do not reproduce matplotlib's LICENSE
# https://github.com/matplotlib/matplotlib/tree/v3.3.4/LICENSE
licenses = extract_licences_from_wheel(wheel_fname)

# potential fallbacks (already incorrect as it's not found in the bdist_wheel used for installation):
# - use sdist instead (additional downloads):
# - scan for sdist on project_urls or [warehouse json api](https://warehouse.readthedocs.io/api-reference/json.html)
# - download, unarchive and run scancode-toolkit
# - existing databases:
# - https://libraries.io/pypi (detection method unverified)
# - https://clearlydefined.io/?type=pypi (uses scancode-toolkit)
# - machine readable spdx classifiers [ref](https://softwareengineering.stackexchange.com/a/381907/346730)
# - other license headers

wheel_info = OrderedDict(
(
("author", wheel_metadata.get("author", "")),
("author_email", wheel_metadata.get("author_email", "")),
("project_urls", project_urls),
("licenses", licenses),
)
)

return wheel_info
2 changes: 1 addition & 1 deletion src/pipgrip/package_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def discover_and_add(self, package): # type: (str, str) -> None
deps=to_create["requires"],
)

# currently unused
# currently only used for licenses
if req.key not in self._packages_metadata:
self._packages_metadata[req.key] = {}
to_create["pip_string"] = req.__str__()
Expand Down

0 comments on commit 2144921

Please sign in to comment.