From 1610d0b75fb00290684506705cca2b3227976c6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= Date: Fri, 19 Jul 2024 14:00:11 +0200 Subject: [PATCH 1/9] Separate error listing from formatting in format_lines This allows to add more formaters based on the same error list. --- AUTHORS.rst | 1 + src/reuse/lint.py | 59 ++++++++++++++++++++++++++--------------------- 2 files changed, 34 insertions(+), 26 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 6bb97ef2..466e9854 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -150,3 +150,4 @@ Contributors - rajivsunar07 <56905029+rajivsunar07@users.noreply.github.com> - Сергій - Mersho +- Michal Čihař diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 257d41b9..bef9a323 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2023 DB Systel GmbH # SPDX-FileCopyrightText: 2024 Nico Rikken +# SPDX-FileCopyrightText: 2024 Michal Čihař # # SPDX-License-Identifier: GPL-3.0-or-later @@ -9,6 +10,8 @@ the reports and printing some conclusions. """ +from __future__ import annotations + import json import sys from argparse import ArgumentParser, Namespace @@ -16,7 +19,7 @@ from io import StringIO from pathlib import Path from textwrap import TextWrapper -from typing import IO, Any, Optional +from typing import IO, Any, Iterable, Optional from . import __REUSE_version__ from .project import Project @@ -264,7 +267,9 @@ def custom_serializer(obj: Any) -> Any: ) -def format_lines(report: ProjectReport) -> str: +def get_errors( + report: ProjectReport, +) -> Iterable[tuple[Path | str | None, str]]: """Formats data dictionary as plaintext strings to be printed to sys.stdout Sorting of output is not guaranteed. Symbolic links can result in multiple entries per file. @@ -275,7 +280,6 @@ def format_lines(report: ProjectReport) -> str: Returns: String (in plaintext) that can be output to sys.stdout """ - output = StringIO() def license_path(lic: str) -> Optional[Path]: """Resolve a license identifier to a license path.""" @@ -285,55 +289,58 @@ def license_path(lic: str) -> Optional[Path]: # Bad licenses for lic, files in sorted(report.bad_licenses.items()): for path in sorted(files): - output.write( - _("{path}: bad license {lic}\n").format(path=path, lic=lic) - ) + yield (path, _("bad license {lic}").format(lic=lic)) # Deprecated licenses for lic in sorted(report.deprecated_licenses): lic_path = license_path(lic) - output.write( - _("{lic_path}: deprecated license\n").format(lic_path=lic_path) - ) + yield (lic_path, _("deprecated license")) # Licenses without extension for lic in sorted(report.licenses_without_extension): lic_path = license_path(lic) - output.write( - _("{lic_path}: license without file extension\n").format( - lic_path=lic_path - ) - ) + yield (lic_path, _("license without file extension")) # Unused licenses for lic in sorted(report.unused_licenses): lic_path = license_path(lic) - output.write( - _("{lic_path}: unused license\n").format(lic_path=lic_path) - ) + yield lic_path, _("unused license") # Missing licenses for lic, files in sorted(report.missing_licenses.items()): for path in sorted(files): - output.write( - _("{path}: missing license {lic}\n").format( - path=path, lic=lic - ) - ) + yield (path, _("missing license {lic}").format(lic=lic)) # Read errors for path in sorted(report.read_errors): - output.write(_("{path}: read error\n").format(path=path)) + yield (path, _("read error")) # Without licenses for path in report.files_without_licenses: - output.write(_("{path}: no license identifier\n").format(path=path)) + yield (path, _("no license identifier")) # Without copyright for path in report.files_without_copyright: - output.write(_("{path}: no copyright notice\n").format(path=path)) + yield (path, _("no copyright notice")) - return output.getvalue() + +def format_lines(report: ProjectReport) -> str: + """Formats data dictionary as plaintext strings to be printed to sys.stdout + Sorting of output is not guaranteed. + Symbolic links can result in multiple entries per file. + + Args: + report: ProjectReport data + + Returns: + String (in plaintext) that can be output to sys.stdout + """ + if not report.is_compliant: + return "".join( + f"{path}: {error}\n" for path, error in get_errors(report) + ) + + return "" def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int: From 350a3d3d3fd82de208a13752880559bf7f41aa3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= Date: Fri, 19 Jul 2024 14:12:39 +0200 Subject: [PATCH 2/9] Add GitHub workflow commands output This allows GitHub to parse it and display annotations on pull requests. --- changelog.d/added/github-format.md | 1 + docs/man/reuse-lint.rst | 4 +++ src/reuse/lint.py | 33 ++++++++++++++++++++-- tests/test_lint.py | 44 +++++++++++++++++++++++++++++- 4 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 changelog.d/added/github-format.md diff --git a/changelog.d/added/github-format.md b/changelog.d/added/github-format.md new file mode 100644 index 00000000..1086e384 --- /dev/null +++ b/changelog.d/added/github-format.md @@ -0,0 +1 @@ +- Added `--github` output option for `lint`. diff --git a/docs/man/reuse-lint.rst b/docs/man/reuse-lint.rst index 75bfcf94..2a086a87 100644 --- a/docs/man/reuse-lint.rst +++ b/docs/man/reuse-lint.rst @@ -96,6 +96,10 @@ Options Output one line per error, prefixed by the file path. +.. option:: -g, --github + + Output one line per error in GitHub workflow command syntax. + .. option:: -h, --help Display help and exit. diff --git a/src/reuse/lint.py b/src/reuse/lint.py index bef9a323..126c0485 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -47,6 +47,12 @@ def add_arguments(parser: ArgumentParser) -> None: action="store_true", help=_("formats output as errors per line"), ) + mutex_group.add_argument( + "-g", + "--github", + action="store_true", + help=_("formats output as GitHub workflow commands per line"), + ) # pylint: disable=too-many-branches,too-many-statements,too-many-locals @@ -270,7 +276,7 @@ def custom_serializer(obj: Any) -> Any: def get_errors( report: ProjectReport, ) -> Iterable[tuple[Path | str | None, str]]: - """Formats data dictionary as plaintext strings to be printed to sys.stdout + """Returns data dictionary iterable of paths and errors. Sorting of output is not guaranteed. Symbolic links can result in multiple entries per file. @@ -278,7 +284,7 @@ def get_errors( report: ProjectReport data Returns: - String (in plaintext) that can be output to sys.stdout + Iterable of tuples containing path and error message. """ def license_path(lic: str) -> Optional[Path]: @@ -343,6 +349,27 @@ def format_lines(report: ProjectReport) -> str: return "" +def format_github(report: ProjectReport) -> str: + """Formats data dictionary as GitHub workflow commands + to be printed to sys.stdout + Sorting of output is not guaranteed. + Symbolic links can result in multiple entries per file. + + Args: + report: ProjectReport data + + Returns: + String (in plaintext) that can be output to sys.stdout + """ + if not report.is_compliant: + return "".join( + f"::error file={path}::{error}\n" + for path, error in get_errors(report) + ) + + return "" + + def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int: """List all non-compliant files.""" report = ProjectReport.generate( @@ -355,6 +382,8 @@ def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int: out.write(format_json(report)) elif args.lines: out.write(format_lines(report)) + elif args.github: + out.write(format_github(report)) else: out.write(format_plain(report)) diff --git a/tests/test_lint.py b/tests/test_lint.py index 6ca93f9f..5038b435 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2019 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2024 Nico Rikken +# SPDX-FileCopyrightText: 2024 Michal Čihař # # SPDX-License-Identifier: GPL-3.0-or-later @@ -11,7 +12,7 @@ from conftest import cpython, posix -from reuse.lint import format_lines, format_plain +from reuse.lint import format_github, format_lines, format_plain from reuse.project import Project from reuse.report import ProjectReport @@ -271,4 +272,45 @@ def test_lint_lines_read_errors(fake_repository): assert "read error" in result +def test_lint_github_output(fake_repository): + """Complete test for lint with github output.""" + # Prepare a repository that includes all types of situations: + # missing_licenses, unused_licenses, bad_licenses, deprecated_licenses, + # licenses_without_extension, files_without_copyright, + # files_without_licenses, read_errors + (fake_repository / "invalid-license.py").write_text( + "SPDX-License-Identifier: invalid" + ) + (fake_repository / "no-license.py").write_text( + "SPDX-FileCopyrightText: Jane Doe" + ) + (fake_repository / "LICENSES" / "invalid-license-text").write_text( + "An invalid license text" + ) + (fake_repository / "LICENSES" / "Nokia-Qt-exception-1.1.txt").write_text( + "Deprecated" + ) + (fake_repository / "LICENSES" / "MIT").write_text("foo") + (fake_repository / "file with spaces.py").write_text("foo") + + project = Project.from_directory(fake_repository) + report = ProjectReport.generate(project) + + lines_result = format_github(report) + lines_result_lines = lines_result.splitlines() + + assert len(lines_result_lines) == 12 + + for line in lines_result_lines: + assert re.match("::error file=.+::[^:]+", line) + + assert lines_result.count("invalid-license.py") == 3 + assert lines_result.count("no-license.py") == 1 + assert lines_result.count("LICENSES") == 6 + assert lines_result.count("invalid-license-text") == 3 + assert lines_result.count("Nokia-Qt-exception-1.1.txt") == 2 + assert lines_result.count("MIT") == 2 + assert lines_result.count("file with spaces.py") == 2 + + # REUSE-IgnoreEnd From b104679857651f7183eba6c17b2e8d6474977335 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20=C4=8Ciha=C5=99?= Date: Fri, 19 Jul 2024 14:28:53 +0200 Subject: [PATCH 3/9] Add support to change lint output format from environment Use REUSE_OUTPUT_FORMAT to allow overriding lint output formats in certain environments (like CI). --- changelog.d/added/format-env.md | 2 ++ docs/man/reuse-lint.rst | 9 +++++++++ src/reuse/lint.py | 32 ++++++++++++++++++++++---------- 3 files changed, 33 insertions(+), 10 deletions(-) create mode 100644 changelog.d/added/format-env.md diff --git a/changelog.d/added/format-env.md b/changelog.d/added/format-env.md new file mode 100644 index 00000000..3ee98ce4 --- /dev/null +++ b/changelog.d/added/format-env.md @@ -0,0 +1,2 @@ +- Added `REUSE_OUTPUT_FORMAT` environment variable to configure output for + `lint`. diff --git a/docs/man/reuse-lint.rst b/docs/man/reuse-lint.rst index 2a086a87..a750185e 100644 --- a/docs/man/reuse-lint.rst +++ b/docs/man/reuse-lint.rst @@ -103,3 +103,12 @@ Options .. option:: -h, --help Display help and exit. + +Environment +----------- + +.. envvar:: REUSE_OUTPUT_FORMAT + + Specifies output format, one of ``plain``, ``lines``, ``github``, ``json`` + + It behaves same as corresponding command line options. diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 126c0485..2f314e23 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -13,6 +13,7 @@ from __future__ import annotations import json +import os import sys from argparse import ArgumentParser, Namespace from gettext import gettext as _ @@ -376,15 +377,26 @@ def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int: project, do_checksum=False, multiprocessing=not args.no_multiprocessing ) - if args.quiet: - pass - elif args.json: - out.write(format_json(report)) - elif args.lines: - out.write(format_lines(report)) - elif args.github: - out.write(format_github(report)) - else: - out.write(format_plain(report)) + formatters = { + "json": format_json, + "lines": format_lines, + "github": format_github, + "plain": format_plain, + } + + if not args.quiet: + output_format = os.environ.get("REUSE_OUTPUT_FORMAT") + + if output_format is not None and output_format in formatters: + formatter = formatters[output_format] + out.write(formatter(report)) + elif args.json: + out.write(format_json(report)) + elif args.lines: + out.write(format_lines(report)) + elif args.github: + out.write(format_github(report)) + else: + out.write(format_plain(report)) return 0 if report.is_compliant else 1 From b1fc58b149938b51efa867153f8a38bd208d782f Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Mon, 22 Jul 2024 11:25:50 +0200 Subject: [PATCH 4/9] fixup! Separate error listing from formatting in format_lines Signed-off-by: Carmen Bianca BAKKER --- src/reuse/lint.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 2f314e23..ac4ae15e 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -20,7 +20,7 @@ from io import StringIO from pathlib import Path from textwrap import TextWrapper -from typing import IO, Any, Iterable, Optional +from typing import IO, Any, Generator, Optional from . import __REUSE_version__ from .project import Project @@ -276,7 +276,7 @@ def custom_serializer(obj: Any) -> Any: def get_errors( report: ProjectReport, -) -> Iterable[tuple[Path | str | None, str]]: +) -> Generator[tuple[Path | str | None, str], None, None]: """Returns data dictionary iterable of paths and errors. Sorting of output is not guaranteed. Symbolic links can result in multiple entries per file. From 121dc8b2ae1fc9ac8c4140e4f98aa70ac77ad75d Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Tue, 23 Jul 2024 10:02:51 +0200 Subject: [PATCH 5/9] Always expect a license identifier to resolve to a path I added a rather silly fallback. Maybe an error should be raised, but I don't want to deal with catching this very obscure error. Signed-off-by: Carmen Bianca BAKKER --- src/reuse/lint.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index ac4ae15e..584508e0 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -13,6 +13,7 @@ from __future__ import annotations import json +import logging import os import sys from argparse import ArgumentParser, Namespace @@ -20,12 +21,14 @@ from io import StringIO from pathlib import Path from textwrap import TextWrapper -from typing import IO, Any, Generator, Optional +from typing import IO, Any, Generator from . import __REUSE_version__ from .project import Project from .report import ProjectReport +_LOGGER = logging.getLogger(__name__) + def add_arguments(parser: ArgumentParser) -> None: """Add arguments to parser.""" @@ -288,9 +291,20 @@ def get_errors( Iterable of tuples containing path and error message. """ - def license_path(lic: str) -> Optional[Path]: + def license_path(lic: str) -> Path: """Resolve a license identifier to a license path.""" - return report.licenses.get(lic) + result = report.licenses.get(lic) + # TODO: This should never happen. It basically only happens if the + # report or the project is malformed. There should be a better way to do + # this. + if result is None: + _LOGGER.error( + _( + "license {lic} has no known path; this should not happen" + ).format(lic=lic) + ) + result = Path(report.path) + return result if not report.is_compliant: # Bad licenses From 5efed3bcf4ff6550d62ddaf3901f1515705b653f Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Tue, 23 Jul 2024 10:06:08 +0200 Subject: [PATCH 6/9] fixup! Separate error listing from formatting in format_lines Signed-off-by: Carmen Bianca BAKKER --- src/reuse/lint.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 584508e0..062cf293 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -21,7 +21,7 @@ from io import StringIO from pathlib import Path from textwrap import TextWrapper -from typing import IO, Any, Generator +from typing import IO, Any, Generator, NamedTuple from . import __REUSE_version__ from .project import Project @@ -30,6 +30,13 @@ _LOGGER = logging.getLogger(__name__) +class PathError(NamedTuple): + """A simple container with a path and an error message.""" + + path: Path + error: str + + def add_arguments(parser: ArgumentParser) -> None: """Add arguments to parser.""" mutex_group = parser.add_mutually_exclusive_group() @@ -279,7 +286,7 @@ def custom_serializer(obj: Any) -> Any: def get_errors( report: ProjectReport, -) -> Generator[tuple[Path | str | None, str], None, None]: +) -> Generator[PathError, None, None]: """Returns data dictionary iterable of paths and errors. Sorting of output is not guaranteed. Symbolic links can result in multiple entries per file. @@ -310,39 +317,41 @@ def license_path(lic: str) -> Path: # Bad licenses for lic, files in sorted(report.bad_licenses.items()): for path in sorted(files): - yield (path, _("bad license {lic}").format(lic=lic)) + yield PathError(path, _("bad license {lic}").format(lic=lic)) # Deprecated licenses for lic in sorted(report.deprecated_licenses): lic_path = license_path(lic) - yield (lic_path, _("deprecated license")) + yield PathError(lic_path, _("deprecated license")) # Licenses without extension for lic in sorted(report.licenses_without_extension): lic_path = license_path(lic) - yield (lic_path, _("license without file extension")) + yield PathError(lic_path, _("license without file extension")) # Unused licenses for lic in sorted(report.unused_licenses): lic_path = license_path(lic) - yield lic_path, _("unused license") + yield PathError(lic_path, _("unused license")) # Missing licenses for lic, files in sorted(report.missing_licenses.items()): for path in sorted(files): - yield (path, _("missing license {lic}").format(lic=lic)) + yield PathError( + path, _("missing license {lic}").format(lic=lic) + ) # Read errors for path in sorted(report.read_errors): - yield (path, _("read error")) + yield PathError(path, _("read error")) # Without licenses for path in report.files_without_licenses: - yield (path, _("no license identifier")) + yield PathError(path, _("no license identifier")) # Without copyright for path in report.files_without_copyright: - yield (path, _("no copyright notice")) + yield PathError(path, _("no copyright notice")) def format_lines(report: ProjectReport) -> str: From bf4f902f0b6f21fed2a021ec19ae863243eb20f9 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Tue, 23 Jul 2024 10:11:17 +0200 Subject: [PATCH 7/9] fixup! Add GitHub workflow commands output Signed-off-by: Carmen Bianca BAKKER --- src/reuse/lint.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 062cf293..59309a43 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -287,15 +287,15 @@ def custom_serializer(obj: Any) -> Any: def get_errors( report: ProjectReport, ) -> Generator[PathError, None, None]: - """Returns data dictionary iterable of paths and errors. + """Returns a generator of paths and errors from a report. Sorting of output is not guaranteed. Symbolic links can result in multiple entries per file. Args: - report: ProjectReport data + report: :class:`ProjectReport` data Returns: - Iterable of tuples containing path and error message. + Generator of :class:`PathError`s. """ def license_path(lic: str) -> Path: @@ -374,16 +374,17 @@ def format_lines(report: ProjectReport) -> str: def format_github(report: ProjectReport) -> str: - """Formats data dictionary as GitHub workflow commands - to be printed to sys.stdout - Sorting of output is not guaranteed. - Symbolic links can result in multiple entries per file. + """Formats report as GitHub workflow commands to be printed to sys.stdout. + The format is documented at + . + Sorting of output is not guaranteed. Symbolic links can result in multiple + entries per file. Args: - report: ProjectReport data + report: :class:`ProjectReport` data Returns: - String (in plaintext) that can be output to sys.stdout + String (in plaintext) that can be output to sys.stdout. """ if not report.is_compliant: return "".join( From 51e38f9e05bcc4898fce97432a57f887fd279aaa Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Tue, 23 Jul 2024 10:13:19 +0200 Subject: [PATCH 8/9] fixup! Separate error listing from formatting in format_lines Signed-off-by: Carmen Bianca BAKKER --- src/reuse/lint.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 59309a43..3dd86ede 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -287,9 +287,8 @@ def custom_serializer(obj: Any) -> Any: def get_errors( report: ProjectReport, ) -> Generator[PathError, None, None]: - """Returns a generator of paths and errors from a report. - Sorting of output is not guaranteed. - Symbolic links can result in multiple entries per file. + """Returns a generator of paths and errors from a report. Sorting of output + is not guaranteed. Symbolic links can result in multiple entries per file. Args: report: :class:`ProjectReport` data @@ -355,15 +354,15 @@ def license_path(lic: str) -> Path: def format_lines(report: ProjectReport) -> str: - """Formats data dictionary as plaintext strings to be printed to sys.stdout - Sorting of output is not guaranteed. - Symbolic links can result in multiple entries per file. + """Formats report as plaintext strings to be printed to sys.stdout. Sorting + of output is not guaranteed. Symbolic links can result in multiple entries + per file. Args: - report: ProjectReport data + report: :class:`ProjectReport` data Returns: - String (in plaintext) that can be output to sys.stdout + String (in plaintext) that can be output to sys.stdout. """ if not report.is_compliant: return "".join( From d496aebd8e851cb63503652c82bdbd353c97f3ae Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Tue, 23 Jul 2024 10:14:52 +0200 Subject: [PATCH 9/9] Correct errors in docstrings The first implementation of this consumed the `reuse lint --json` output. It was later patched up to consume the ProjectReport instead, but the docstrings were apparently never adjusted accordingly. Signed-off-by: Carmen Bianca BAKKER --- src/reuse/lint.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 3dd86ede..6bab352d 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -68,10 +68,10 @@ def add_arguments(parser: ArgumentParser) -> None: # pylint: disable=too-many-branches,too-many-statements,too-many-locals def format_plain(report: ProjectReport) -> str: - """Formats data dictionary as plaintext string to be printed to sys.stdout + """Formats report as plaintext string to be printed to sys.stdout. Args: - report: ProjectReport data + report: :class:`ProjectReport` data Returns: String (in plaintext) that can be output to sys.stdout @@ -253,13 +253,13 @@ def format_plain(report: ProjectReport) -> str: def format_json(report: ProjectReport) -> str: - """Formats data dictionary as JSON string ready to be printed to sys.stdout + """Formats report as JSON string ready to be printed to sys.stdout. Args: - report: Dictionary containing formatted ProjectReport data + report: :class:`ProjectReport` data Returns: - String (representing JSON) that can be output to sys.stdout + String (representing JSON) that can be output to sys.stdout. """ def custom_serializer(obj: Any) -> Any: