From 3cd4e002a07b570f81711a41d04e5bbae2e5c7eb Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Wed, 10 Apr 2024 20:29:41 +0200 Subject: [PATCH 01/22] feat: lint output per line Adds an '-line' or '-l' option to the 'lint' command. Prints a line for each error, starting with the file to which the error belongs. This output can be a starting point for some parsers, in particular ones that implement something similar to Vim errorformat parsing. Related to https://github.com/fsfe/reuse-tool/issues/925 Additional work needed: - [ ] Needs tests - [ ] Error messages might have to be improved - [ ] Not all errors are found, as some issues aren't in the FileReports. This requires additional investigation. Signed-off-by: Nico Rikken --- src/reuse/lint.py | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index e966a206..958d688a 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -19,7 +19,7 @@ from . import __REUSE_version__ from .project import Project -from .report import ProjectReport +from .report import FileReport, ProjectReport def add_arguments(parser: ArgumentParser) -> None: @@ -37,6 +37,12 @@ def add_arguments(parser: ArgumentParser) -> None: action="store_true", help=_("formats output as plain text"), ) + mutex_group.add_argument( + "-l", + "--lines", + action="store_true", + help=_("formats output as errors per line"), + ) # pylint: disable=too-many-branches,too-many-statements,too-many-locals @@ -257,6 +263,42 @@ def custom_serializer(obj: Any) -> Any: ) +def _output_per_file_report(report: FileReport) -> str: + """Formats data dictionary as plaintext strings to be printed to sys.stdout + + Args: + report: FileReport data + + Returns: + String (in plaintext) that can be output to sys.stdout + """ + output = StringIO() + if report.bad_licenses: + output.write(f"{report.name}: ") + output.write(_("bad licenses:")) + output.write(f" {' '.join(report.bad_licenses)}\n") + if report.missing_licenses: + output.write(f"{report.name}: ") + output.write(_("missing licenses:")) + output.write(f" {' '.join(report.missing_licenses)}\n") + return output.getvalue() + + +def format_lines(report: ProjectReport) -> str: + """Formats data dictionary as plaintext strings to be printed to sys.stdout + + Args: + report: ProjectReport data + + Returns: + String (in plaintext) that can be output to sys.stdout + """ + output = StringIO() + for f_report in report.file_reports: + output.write(_output_per_file_report(f_report)) + return output.getvalue() + + def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int: """List all non-compliant files.""" report = ProjectReport.generate( @@ -267,6 +309,8 @@ def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int: pass elif args.json: out.write(format_json(report)) + elif args.lines: + out.write(format_lines(report)) else: out.write(format_plain(report)) From 21a72983c4413d8743bb67355d3d7e220c66ee12 Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Sat, 20 Apr 2024 08:32:24 +0200 Subject: [PATCH 02/22] feat: lines_output based on ProjectReport Update to the format_lines variant, now based on the ProjectReport to include errors that relate to the licenses. Add a property to the Project that resembles the LICENSES/ directory to prevent duplication of resolving that. No additional ordering is done as it would only slow things down and it can easily be done by the user. Signed-off-by: Nico Rikken --- src/reuse/lint.py | 68 +++++++++++++++++++++++++++++--------------- src/reuse/project.py | 3 +- 2 files changed, 47 insertions(+), 24 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 958d688a..312ce747 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -19,7 +19,7 @@ from . import __REUSE_version__ from .project import Project -from .report import FileReport, ProjectReport +from .report import ProjectReport def add_arguments(parser: ArgumentParser) -> None: @@ -263,39 +263,61 @@ def custom_serializer(obj: Any) -> Any: ) -def _output_per_file_report(report: FileReport) -> str: +def format_lines(report: ProjectReport, licenses_directory: Path) -> str: """Formats data dictionary as plaintext strings to be printed to sys.stdout Args: - report: FileReport data + report: ProjectReport data Returns: String (in plaintext) that can be output to sys.stdout """ + # TODO: modify so that license_directory isn't needed output = StringIO() - if report.bad_licenses: - output.write(f"{report.name}: ") - output.write(_("bad licenses:")) - output.write(f" {' '.join(report.bad_licenses)}\n") - if report.missing_licenses: - output.write(f"{report.name}: ") - output.write(_("missing licenses:")) - output.write(f" {' '.join(report.missing_licenses)}\n") - return output.getvalue() + # TODO: perhaps merge with format_plain + if not report.is_compliant: + # Bad licenses + for lic, files in sorted(report.bad_licenses.items()): + for file in sorted(files): + output.write(_(f"{file}: bad license: {lic}\n")) -def format_lines(report: ProjectReport) -> str: - """Formats data dictionary as plaintext strings to be printed to sys.stdout + # TODO: maintain the exact path rather than assuming it. Also: is this + # based on the analysis of the license file? + # Deprecated licenses + for lic in sorted(report.deprecated_licenses): + license_path = licenses_directory.joinpath(lic) + output.write(_(f"{license_path}: deprecated license\n")) - Args: - report: ProjectReport data + # TODO: maintain the exact path rather than assuming it + # Licenses without extension + for lic in sorted(report.licenses_without_extension): + license_path = licenses_directory.joinpath(lic) + output.write(_(f"{license_path}: license without file extension\n")) + + # Missing licenses + for lic, files in sorted(report.missing_licenses.items()): + for file in sorted(files): + output.write(_(f"{file}: missing license: {lic}\n")) + + # TODO: maintain the exact path rather than assuming it + # Unused licenses + for lic in sorted(report.unused_licenses): + license_path = licenses_directory.joinpath(lic) + output.write(_(f"{license_path}: unused license\n")) + + # Read errors + for path in sorted(report.read_errors): + output.write(_(f"{path}: read error\n")) + + # Without licenses + for file in report.files_without_licenses: + output.write(_(f"{file}: without license\n")) + + # Without copyright + for file in report.files_without_copyright: + output.write(_(f"{file}: without copyright\n")) - Returns: - String (in plaintext) that can be output to sys.stdout - """ - output = StringIO() - for f_report in report.file_reports: - output.write(_output_per_file_report(f_report)) return output.getvalue() @@ -310,7 +332,7 @@ def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int: elif args.json: out.write(format_json(report)) elif args.lines: - out.write(format_lines(report)) + out.write(format_lines(report, project.licenses_directory)) else: out.write(format_plain(report)) diff --git a/src/reuse/project.py b/src/reuse/project.py index 1dc6acdf..653af126 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -75,6 +75,7 @@ def __init__( include_meson_subprojects: bool = False, ): self.root = Path(root) + self.licenses_directory = Path(root).joinpath("LICENSES") if vcs_strategy is None: vcs_strategy = VCSStrategyNone @@ -424,7 +425,7 @@ def _find_licenses(self) -> Dict[str, Path]: # TODO: This method does more than one thing. We ought to simplify it. license_files: Dict[str, Path] = {} - directory = str(self.root / "LICENSES/**") + directory = str(self.licenses_directory / "**") for path_str in glob.iglob(directory, recursive=True): path = Path(path_str) # For some reason, LICENSES/** is resolved even though it From e9fbe0dbef1c191973daeac0cb56b3da36c62863 Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Sun, 21 Apr 2024 09:19:19 +0200 Subject: [PATCH 03/22] feat: licenses_directory relative to project root ENure a relative path, rather than an absolute path. Signed-off-by: Nico Rikken --- src/reuse/project.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/reuse/project.py b/src/reuse/project.py index 653af126..8bd43d0b 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -75,7 +75,9 @@ def __init__( include_meson_subprojects: bool = False, ): self.root = Path(root) - self.licenses_directory = Path(root).joinpath("LICENSES") + self.licenses_directory = self.root.joinpath("LICENSES").relative_to( + self.root + ) if vcs_strategy is None: vcs_strategy = VCSStrategyNone From f4bf49dbc931479693a8798442e8ed7481b16ab1 Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Sun, 21 Apr 2024 10:49:34 +0200 Subject: [PATCH 04/22] chore: generic function for resolving license path To prevent code duplication. Signed-off-by: Nico Rikken --- src/reuse/lint.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 312ce747..03f8f4e5 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -276,6 +276,9 @@ def format_lines(report: ProjectReport, licenses_directory: Path) -> str: output = StringIO() # TODO: perhaps merge with format_plain + def license_path(lic: str) -> Path: + return licenses_directory.joinpath(lic) + if not report.is_compliant: # Bad licenses for lic, files in sorted(report.bad_licenses.items()): @@ -286,14 +289,14 @@ def format_lines(report: ProjectReport, licenses_directory: Path) -> str: # based on the analysis of the license file? # Deprecated licenses for lic in sorted(report.deprecated_licenses): - license_path = licenses_directory.joinpath(lic) - output.write(_(f"{license_path}: deprecated license\n")) + lic_path = license_path(lic) + output.write(_(f"{lic_path}: deprecated license\n")) # TODO: maintain the exact path rather than assuming it # Licenses without extension for lic in sorted(report.licenses_without_extension): - license_path = licenses_directory.joinpath(lic) - output.write(_(f"{license_path}: license without file extension\n")) + lic_path = license_path(lic) + output.write(_(f"{lic_path}: license without file extension\n")) # Missing licenses for lic, files in sorted(report.missing_licenses.items()): @@ -303,8 +306,8 @@ def format_lines(report: ProjectReport, licenses_directory: Path) -> str: # TODO: maintain the exact path rather than assuming it # Unused licenses for lic in sorted(report.unused_licenses): - license_path = licenses_directory.joinpath(lic) - output.write(_(f"{license_path}: unused license\n")) + lic_path = license_path(lic) + output.write(_(f"{lic_path}: unused license\n")) # Read errors for path in sorted(report.read_errors): From 4dc6c8152b0ec255079d360b1f0832bbd902a0df Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Sun, 21 Apr 2024 10:50:23 +0200 Subject: [PATCH 05/22] chore: improve docs on new insights Signed-off-by: Nico Rikken --- src/reuse/lint.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 03f8f4e5..41cd9a67 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -265,6 +265,8 @@ def custom_serializer(obj: Any) -> Any: def format_lines(report: ProjectReport, licenses_directory: Path) -> str: """Formats data dictionary as plaintext strings to be printed to sys.stdout + Sorting of output is not guaranteed. + Symbolic links can result in multiple entries per file. Args: report: ProjectReport data @@ -274,7 +276,6 @@ def format_lines(report: ProjectReport, licenses_directory: Path) -> str: """ # TODO: modify so that license_directory isn't needed output = StringIO() - # TODO: perhaps merge with format_plain def license_path(lic: str) -> Path: return licenses_directory.joinpath(lic) @@ -283,6 +284,7 @@ def license_path(lic: str) -> Path: # Bad licenses for lic, files in sorted(report.bad_licenses.items()): for file in sorted(files): + # TODO: lic might be "invalid" output.write(_(f"{file}: bad license: {lic}\n")) # TODO: maintain the exact path rather than assuming it. Also: is this @@ -301,6 +303,7 @@ def license_path(lic: str) -> Path: # Missing licenses for lic, files in sorted(report.missing_licenses.items()): for file in sorted(files): + # TODO: lic might be "invalid" output.write(_(f"{file}: missing license: {lic}\n")) # TODO: maintain the exact path rather than assuming it From b0d3000a9a0a43442f37dc54462a8f75e2e78ea2 Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Sun, 21 Apr 2024 10:51:20 +0200 Subject: [PATCH 06/22] chore: test for lines output of lint module A complete integration test. Creates a repository that includes all the error types. Output is validated according to a regex. Validation written in a way that is isn't dependent on ordering as ordering isn't guaranteed. Signed-off-by: Nico Rikken --- tests/test_lint.py | 51 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/tests/test_lint.py b/tests/test_lint.py index 1a16672a..a375d479 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -5,11 +5,12 @@ """All tests for reuse.lint""" +import re import shutil from conftest import cpython, posix -from reuse.lint import format_plain +from reuse.lint import format_lines, format_plain from reuse.project import Project from reuse.report import ProjectReport @@ -212,4 +213,52 @@ def test_lint_json_output(fake_repository): ) +def test_lint_lines_output(fake_repository): + """Complete test for lint with lines output.""" + # Prepare a repository that includes all types of situations: + # missing_licenses, unused_licenses, bad_licenses, deprecated_licenses, + # licenses_without_extension, files_without_copyright, + # files_without_licenses, read_errors + (fake_repository / "invalid-license.py").write_text( + "SPDX-License-Identifier: invalid" + ) + (fake_repository / "no-license.py").write_text( + "SPDX-FileCopyrightText: Jane Doe" + ) + (fake_repository / "LICENSES" / "invalid-license-text").write_text( + "An invalid license text" + ) + (fake_repository / "LICENSES" / "Nokia-Qt-exception-1.1.txt").write_text( + "Deprecated" + ) + (fake_repository / "LICENSES" / "MIT").write_text("foo") + (fake_repository / "restricted.py").write_text("foo") + (fake_repository / "restricted.py").chmod(0o000) + # TODO: should potentially conflicting filenames be handled differently? + (fake_repository / "file:with:colons.py").write_text("foo") + (fake_repository / "file with spaces.py").write_text("foo") + + project = Project.from_directory(fake_repository) + report = ProjectReport.generate(project) + + lines_result = format_lines(report, project.licenses_directory) + lines_result_lines = lines_result.splitlines() + + assert len(lines_result_lines) == 15 + + for line in lines_result_lines: + assert re.match(".+: .+", line) + + assert lines_result.count("invalid-license.py") == 3 + assert lines_result.count("no-license.py") == 1 + assert lines_result.count("LICENSES") == 6 + assert lines_result.count("invalid-license-text") == 3 + # TODO: file extension of license isn't kept in the data + assert lines_result.count("Nokia-Qt-exception-1.1.txt") == 2 + assert lines_result.count("MIT") == 2 + assert lines_result.count("restricted.py") == 1 + assert lines_result.count("file:with:colons.py") == 2 + assert lines_result.count("file with spaces.py") == 2 + + # REUSE-IgnoreEnd From b67d9c4ab362ea798d42e062fd98fd54b4b22c92 Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Sun, 21 Apr 2024 11:00:24 +0200 Subject: [PATCH 07/22] chore: update line format comments to reflext insights Add notes in the line format output to reflect current thoughts. Signed-off-by: Nico Rikken --- src/reuse/lint.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 41cd9a67..36b8e822 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -282,19 +282,18 @@ def license_path(lic: str) -> Path: if not report.is_compliant: # Bad licenses + # TODO: include source code line number of the finding for lic, files in sorted(report.bad_licenses.items()): for file in sorted(files): - # TODO: lic might be "invalid" output.write(_(f"{file}: bad license: {lic}\n")) - # TODO: maintain the exact path rather than assuming it. Also: is this - # based on the analysis of the license file? + # TODO: maintain the exact path to include the file extension # Deprecated licenses for lic in sorted(report.deprecated_licenses): lic_path = license_path(lic) output.write(_(f"{lic_path}: deprecated license\n")) - # TODO: maintain the exact path rather than assuming it + # TODO: maintain the exact path to include the file extension # Licenses without extension for lic in sorted(report.licenses_without_extension): lic_path = license_path(lic) @@ -303,10 +302,9 @@ def license_path(lic: str) -> Path: # Missing licenses for lic, files in sorted(report.missing_licenses.items()): for file in sorted(files): - # TODO: lic might be "invalid" output.write(_(f"{file}: missing license: {lic}\n")) - # TODO: maintain the exact path rather than assuming it + # TODO: maintain the exact path to include the file extension # Unused licenses for lic in sorted(report.unused_licenses): lic_path = license_path(lic) From 1744ef8a642c1b85ba950556fc0bfa644d273d9b Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Mon, 22 Apr 2024 07:16:15 +0200 Subject: [PATCH 08/22] chore: resolve license paths Use report.licenses to resolve license paths. Signed-off-by: Nico Rikken --- src/reuse/lint.py | 13 +++++-------- tests/test_lint.py | 3 +-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 36b8e822..b489f09f 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -263,7 +263,7 @@ def custom_serializer(obj: Any) -> Any: ) -def format_lines(report: ProjectReport, licenses_directory: Path) -> str: +def format_lines(report: ProjectReport) -> str: """Formats data dictionary as plaintext strings to be printed to sys.stdout Sorting of output is not guaranteed. Symbolic links can result in multiple entries per file. @@ -274,11 +274,11 @@ def format_lines(report: ProjectReport, licenses_directory: Path) -> str: Returns: String (in plaintext) that can be output to sys.stdout """ - # TODO: modify so that license_directory isn't needed output = StringIO() - def license_path(lic: str) -> Path: - return licenses_directory.joinpath(lic) + def license_path(lic: str) -> Path | None: + "Resolve a license identifier to a license path." + return report.licenses.get(lic) if not report.is_compliant: # Bad licenses @@ -287,13 +287,11 @@ def license_path(lic: str) -> Path: for file in sorted(files): output.write(_(f"{file}: bad license: {lic}\n")) - # TODO: maintain the exact path to include the file extension # Deprecated licenses for lic in sorted(report.deprecated_licenses): lic_path = license_path(lic) output.write(_(f"{lic_path}: deprecated license\n")) - # TODO: maintain the exact path to include the file extension # Licenses without extension for lic in sorted(report.licenses_without_extension): lic_path = license_path(lic) @@ -304,7 +302,6 @@ def license_path(lic: str) -> Path: for file in sorted(files): output.write(_(f"{file}: missing license: {lic}\n")) - # TODO: maintain the exact path to include the file extension # Unused licenses for lic in sorted(report.unused_licenses): lic_path = license_path(lic) @@ -336,7 +333,7 @@ def run(args: Namespace, project: Project, out: IO[str] = sys.stdout) -> int: elif args.json: out.write(format_json(report)) elif args.lines: - out.write(format_lines(report, project.licenses_directory)) + out.write(format_lines(report)) else: out.write(format_plain(report)) diff --git a/tests/test_lint.py b/tests/test_lint.py index a375d479..88f2fb37 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -241,7 +241,7 @@ def test_lint_lines_output(fake_repository): project = Project.from_directory(fake_repository) report = ProjectReport.generate(project) - lines_result = format_lines(report, project.licenses_directory) + lines_result = format_lines(report) lines_result_lines = lines_result.splitlines() assert len(lines_result_lines) == 15 @@ -253,7 +253,6 @@ def test_lint_lines_output(fake_repository): assert lines_result.count("no-license.py") == 1 assert lines_result.count("LICENSES") == 6 assert lines_result.count("invalid-license-text") == 3 - # TODO: file extension of license isn't kept in the data assert lines_result.count("Nokia-Qt-exception-1.1.txt") == 2 assert lines_result.count("MIT") == 2 assert lines_result.count("restricted.py") == 1 From ffdbfcacb977767220789ef5f4c0045a02b44211 Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Mon, 22 Apr 2024 07:19:34 +0200 Subject: [PATCH 09/22] chore: annotate copyright headers Signed-off-by: Nico Rikken --- src/reuse/lint.py | 1 + src/reuse/project.py | 1 + tests/test_lint.py | 1 + 3 files changed, 3 insertions(+) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index b489f09f..31ce7d83 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: 2022 Florian Snow # SPDX-FileCopyrightText: 2023 DB Systel GmbH +# SPDX-FileCopyrightText: 2024 Nico Rikken # # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/src/reuse/project.py b/src/reuse/project.py index 8bd43d0b..49c901a0 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -3,6 +3,7 @@ # SPDX-FileCopyrightText: 2023 Carmen Bianca BAKKER # SPDX-FileCopyrightText: 2023 Matthias Riße # SPDX-FileCopyrightText: 2023 DB Systel GmbH +# SPDX-FileCopyrightText: 2024 Nico Rikken # # SPDX-License-Identifier: GPL-3.0-or-later diff --git a/tests/test_lint.py b/tests/test_lint.py index 88f2fb37..5bc3d4b6 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -1,5 +1,6 @@ # SPDX-FileCopyrightText: 2019 Free Software Foundation Europe e.V. # SPDX-FileCopyrightText: 2022 Florian Snow +# SPDX-FileCopyrightText: 2024 Nico Rikken # # SPDX-License-Identifier: GPL-3.0-or-later From cd8fa4c24d6cc88fd8e9a51032e79178e1e3b7e3 Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Mon, 22 Apr 2024 07:22:24 +0200 Subject: [PATCH 10/22] chore: remove TODO's Signed-off-by: Nico Rikken --- src/reuse/lint.py | 1 - tests/test_lint.py | 1 - 2 files changed, 2 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 31ce7d83..a3c87918 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -283,7 +283,6 @@ def license_path(lic: str) -> Path | None: if not report.is_compliant: # Bad licenses - # TODO: include source code line number of the finding for lic, files in sorted(report.bad_licenses.items()): for file in sorted(files): output.write(_(f"{file}: bad license: {lic}\n")) diff --git a/tests/test_lint.py b/tests/test_lint.py index 5bc3d4b6..35b90646 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -235,7 +235,6 @@ def test_lint_lines_output(fake_repository): (fake_repository / "LICENSES" / "MIT").write_text("foo") (fake_repository / "restricted.py").write_text("foo") (fake_repository / "restricted.py").chmod(0o000) - # TODO: should potentially conflicting filenames be handled differently? (fake_repository / "file:with:colons.py").write_text("foo") (fake_repository / "file with spaces.py").write_text("foo") From c7a8014688dff407cffa936a2c651e3d1fe7eaa8 Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Mon, 22 Apr 2024 07:49:41 +0200 Subject: [PATCH 11/22] feat: remove colons from output To prevent parsing errors, remove colons from messages. Signed-off-by: Nico Rikken --- src/reuse/lint.py | 4 ++-- tests/test_lint.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index a3c87918..60df25d0 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -285,7 +285,7 @@ def license_path(lic: str) -> Path | None: # Bad licenses for lic, files in sorted(report.bad_licenses.items()): for file in sorted(files): - output.write(_(f"{file}: bad license: {lic}\n")) + output.write(_(f"{file}: bad license {lic}\n")) # Deprecated licenses for lic in sorted(report.deprecated_licenses): @@ -300,7 +300,7 @@ def license_path(lic: str) -> Path | None: # Missing licenses for lic, files in sorted(report.missing_licenses.items()): for file in sorted(files): - output.write(_(f"{file}: missing license: {lic}\n")) + output.write(_(f"{file}: missing license {lic}\n")) # Unused licenses for lic in sorted(report.unused_licenses): diff --git a/tests/test_lint.py b/tests/test_lint.py index 35b90646..a6c9ea9d 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -247,7 +247,7 @@ def test_lint_lines_output(fake_repository): assert len(lines_result_lines) == 15 for line in lines_result_lines: - assert re.match(".+: .+", line) + assert re.match(".+: [^:]+", line) assert lines_result.count("invalid-license.py") == 3 assert lines_result.count("no-license.py") == 1 From 80f850a6e8e771a7cc91177f20b3d70d637a348e Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Mon, 22 Apr 2024 20:30:38 +0200 Subject: [PATCH 12/22] fix: Old style optional type for older Python versions Used syntax was too new. Signed-off-by: Nico Rikken --- src/reuse/lint.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 60df25d0..2166fc49 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -16,7 +16,7 @@ from io import StringIO from pathlib import Path from textwrap import TextWrapper -from typing import IO, Any +from typing import IO, Any, Optional from . import __REUSE_version__ from .project import Project @@ -277,7 +277,7 @@ def format_lines(report: ProjectReport) -> str: """ output = StringIO() - def license_path(lic: str) -> Path | None: + def license_path(lic: str) -> Optional[Path]: "Resolve a license identifier to a license path." return report.licenses.get(lic) From fbef60840099388764ea694a85d9a2a67d969112 Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Mon, 22 Apr 2024 20:44:14 +0200 Subject: [PATCH 13/22] chore: restore license_directory in project As the approach in lint module no longer use the license_directory, the change can be reverted. Signed-off-by: Nico Rikken --- src/reuse/project.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/reuse/project.py b/src/reuse/project.py index 49c901a0..1dc6acdf 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -3,7 +3,6 @@ # SPDX-FileCopyrightText: 2023 Carmen Bianca BAKKER # SPDX-FileCopyrightText: 2023 Matthias Riße # SPDX-FileCopyrightText: 2023 DB Systel GmbH -# SPDX-FileCopyrightText: 2024 Nico Rikken # # SPDX-License-Identifier: GPL-3.0-or-later @@ -76,9 +75,6 @@ def __init__( include_meson_subprojects: bool = False, ): self.root = Path(root) - self.licenses_directory = self.root.joinpath("LICENSES").relative_to( - self.root - ) if vcs_strategy is None: vcs_strategy = VCSStrategyNone @@ -428,7 +424,7 @@ def _find_licenses(self) -> Dict[str, Path]: # TODO: This method does more than one thing. We ought to simplify it. license_files: Dict[str, Path] = {} - directory = str(self.licenses_directory / "**") + directory = str(self.root / "LICENSES/**") for path_str in glob.iglob(directory, recursive=True): path = Path(path_str) # For some reason, LICENSES/** is resolved even though it From 45b173512ca941cb6c44554a8ea0801fe879c0af Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Mon, 22 Apr 2024 21:03:02 +0200 Subject: [PATCH 14/22] chore: remove test filename with colons Remove test for filename with colons, as this is unrealistic for files on Windows and atypical for files on Unix systems even though it is allowed. Signed-off-by: Nico Rikken --- tests/test_lint.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_lint.py b/tests/test_lint.py index a6c9ea9d..3466339f 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -235,7 +235,6 @@ def test_lint_lines_output(fake_repository): (fake_repository / "LICENSES" / "MIT").write_text("foo") (fake_repository / "restricted.py").write_text("foo") (fake_repository / "restricted.py").chmod(0o000) - (fake_repository / "file:with:colons.py").write_text("foo") (fake_repository / "file with spaces.py").write_text("foo") project = Project.from_directory(fake_repository) @@ -244,7 +243,7 @@ def test_lint_lines_output(fake_repository): lines_result = format_lines(report) lines_result_lines = lines_result.splitlines() - assert len(lines_result_lines) == 15 + assert len(lines_result_lines) == 13 for line in lines_result_lines: assert re.match(".+: [^:]+", line) @@ -256,7 +255,6 @@ def test_lint_lines_output(fake_repository): assert lines_result.count("Nokia-Qt-exception-1.1.txt") == 2 assert lines_result.count("MIT") == 2 assert lines_result.count("restricted.py") == 1 - assert lines_result.count("file:with:colons.py") == 2 assert lines_result.count("file with spaces.py") == 2 From 6b505d4c134d0d4916c402c5a1c9220cd41d5120 Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Mon, 22 Apr 2024 21:14:57 +0200 Subject: [PATCH 15/22] chore: temporary debug output for windows test Somehow line numbers don't add up on Windows. Debug it. Signed-off-by: Nico Rikken --- tests/test_lint.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_lint.py b/tests/test_lint.py index 3466339f..a0c80dd5 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -243,7 +243,8 @@ def test_lint_lines_output(fake_repository): lines_result = format_lines(report) lines_result_lines = lines_result.splitlines() - assert len(lines_result_lines) == 13 + print(lines_result_lines) # TODO: remove this debug + assert len(lines_result_lines) == 12 for line in lines_result_lines: assert re.match(".+: [^:]+", line) From 53be4f2d1d14b378b665dd64d178ca53137c5163 Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Mon, 22 Apr 2024 21:45:42 +0200 Subject: [PATCH 16/22] fix: move read error test to posix-only As the permission error doesn't work on Windows, move it to posix only, just as the more general permission test. Signed-off-by: Nico Rikken --- tests/test_lint.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/test_lint.py b/tests/test_lint.py index a0c80dd5..6ca93f9f 100644 --- a/tests/test_lint.py +++ b/tests/test_lint.py @@ -233,8 +233,6 @@ def test_lint_lines_output(fake_repository): "Deprecated" ) (fake_repository / "LICENSES" / "MIT").write_text("foo") - (fake_repository / "restricted.py").write_text("foo") - (fake_repository / "restricted.py").chmod(0o000) (fake_repository / "file with spaces.py").write_text("foo") project = Project.from_directory(fake_repository) @@ -243,7 +241,6 @@ def test_lint_lines_output(fake_repository): lines_result = format_lines(report) lines_result_lines = lines_result.splitlines() - print(lines_result_lines) # TODO: remove this debug assert len(lines_result_lines) == 12 for line in lines_result_lines: @@ -255,8 +252,23 @@ def test_lint_lines_output(fake_repository): assert lines_result.count("invalid-license-text") == 3 assert lines_result.count("Nokia-Qt-exception-1.1.txt") == 2 assert lines_result.count("MIT") == 2 - assert lines_result.count("restricted.py") == 1 assert lines_result.count("file with spaces.py") == 2 +@cpython +@posix +def test_lint_lines_read_errors(fake_repository): + """Check read error output""" + (fake_repository / "restricted.py").write_text("foo") + (fake_repository / "restricted.py").chmod(0o000) + project = Project.from_directory(fake_repository) + report = ProjectReport.generate(project) + result = format_lines(report) + print(result) + + assert len(result.splitlines()) == 1 + assert "restricted.py" in result + assert "read error" in result + + # REUSE-IgnoreEnd From e34f404ebec177ceded9e83b1c8e23cc7c72f01d Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sat, 27 Apr 2024 11:08:03 +0200 Subject: [PATCH 17/22] Make a string a docstring Signed-off-by: Carmen Bianca BAKKER --- src/reuse/lint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 2166fc49..a5c8c62d 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -278,7 +278,7 @@ def format_lines(report: ProjectReport) -> str: output = StringIO() def license_path(lic: str) -> Optional[Path]: - "Resolve a license identifier to a license path." + """Resolve a license identifier to a license path.""" return report.licenses.get(lic) if not report.is_compliant: From 4cd9d80c64829f023bc5f021f0f0c7d3d082bcc1 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sat, 27 Apr 2024 11:16:12 +0200 Subject: [PATCH 18/22] Fix translation strings Signed-off-by: Carmen Bianca BAKKER --- src/reuse/lint.py | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index a5c8c62d..5d13bf75 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -284,40 +284,54 @@ def license_path(lic: str) -> Optional[Path]: if not report.is_compliant: # Bad licenses for lic, files in sorted(report.bad_licenses.items()): - for file in sorted(files): - output.write(_(f"{file}: bad license {lic}\n")) + for path in sorted(files): + output.write( + _("{path}: bad license {lic}\n").format(path=path, lic=lic) + ) # Deprecated licenses for lic in sorted(report.deprecated_licenses): lic_path = license_path(lic) - output.write(_(f"{lic_path}: deprecated license\n")) + output.write( + _("{lic_path}: deprecated license\n").format(lic_path=lic_path) + ) # Licenses without extension for lic in sorted(report.licenses_without_extension): lic_path = license_path(lic) - output.write(_(f"{lic_path}: license without file extension\n")) + output.write( + _("{lic_path}: license without file extension\n").format( + lic_path=lic_path + ) + ) # Missing licenses for lic, files in sorted(report.missing_licenses.items()): - for file in sorted(files): - output.write(_(f"{file}: missing license {lic}\n")) + for path in sorted(files): + output.write( + _("{path}: missing license {lic}\n").format( + path=path, lic=lic + ) + ) # Unused licenses for lic in sorted(report.unused_licenses): lic_path = license_path(lic) - output.write(_(f"{lic_path}: unused license\n")) + output.write( + _("{lic_path}: unused license\n").format(lic_path=lic_path) + ) # Read errors for path in sorted(report.read_errors): - output.write(_(f"{path}: read error\n")) + output.write(_("{path}: read error\n").format(path=path)) # Without licenses - for file in report.files_without_licenses: - output.write(_(f"{file}: without license\n")) + for path in report.files_without_licenses: + output.write(_("{path}: without license\n").format(path=path)) # Without copyright - for file in report.files_without_copyright: - output.write(_(f"{file}: without copyright\n")) + for path in report.files_without_copyright: + output.write(_("{path}: without copyright\n").format(path=path)) return output.getvalue() From 4370f2943267a485d811ca4172e20a70f2044355 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sat, 27 Apr 2024 11:24:51 +0200 Subject: [PATCH 19/22] Print missing licenses after unused licenses The general structure of this function is that all per-license lines go first, and all per-file lines go after. 'Missing licenses' is, deceptively, in the per-file category. Signed-off-by: Carmen Bianca BAKKER --- src/reuse/lint.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 5d13bf75..09f83568 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -305,6 +305,13 @@ def license_path(lic: str) -> Optional[Path]: ) ) + # Unused licenses + for lic in sorted(report.unused_licenses): + lic_path = license_path(lic) + output.write( + _("{lic_path}: unused license\n").format(lic_path=lic_path) + ) + # Missing licenses for lic, files in sorted(report.missing_licenses.items()): for path in sorted(files): @@ -314,13 +321,6 @@ def license_path(lic: str) -> Optional[Path]: ) ) - # Unused licenses - for lic in sorted(report.unused_licenses): - lic_path = license_path(lic) - output.write( - _("{lic_path}: unused license\n").format(lic_path=lic_path) - ) - # Read errors for path in sorted(report.read_errors): output.write(_("{path}: read error\n").format(path=path)) From 515d0b70603de56b2c1c61a26a8dfecb06257515 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sat, 27 Apr 2024 11:28:04 +0200 Subject: [PATCH 20/22] Better short description Signed-off-by: Carmen Bianca BAKKER --- src/reuse/lint.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reuse/lint.py b/src/reuse/lint.py index 09f83568..257d41b9 100644 --- a/src/reuse/lint.py +++ b/src/reuse/lint.py @@ -327,11 +327,11 @@ def license_path(lic: str) -> Optional[Path]: # Without licenses for path in report.files_without_licenses: - output.write(_("{path}: without license\n").format(path=path)) + output.write(_("{path}: no license identifier\n").format(path=path)) # Without copyright for path in report.files_without_copyright: - output.write(_("{path}: without copyright\n").format(path=path)) + output.write(_("{path}: no copyright notice\n").format(path=path)) return output.getvalue() From b562d79130027209af5200595d47003b0b339f6b Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sat, 27 Apr 2024 12:05:04 +0200 Subject: [PATCH 21/22] Add change log entry Signed-off-by: Carmen Bianca BAKKER --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 04f85a14..fdbaa589 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,6 +58,7 @@ CLI command and its behaviour. There are no guarantees of stability for the - npm `.npmrc` files (#985) - Added comment styles: - `man` for UNIX Man pages (`.man`) (#954) +- Added `--lines` output option for `lint`. (#956) ### Changed From da08a1c2645af86ffa3912c9589340160304f1c3 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Tue, 28 May 2024 14:35:44 +0200 Subject: [PATCH 22/22] Add --lines to manpage Signed-off-by: Carmen Bianca BAKKER --- docs/man/reuse-lint.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/man/reuse-lint.rst b/docs/man/reuse-lint.rst index db52f568..75bfcf94 100644 --- a/docs/man/reuse-lint.rst +++ b/docs/man/reuse-lint.rst @@ -76,22 +76,26 @@ associated with it, then the project is not compliant. Options ------- -.. option:: --quiet +.. option:: -q, --quiet Do not print anything to STDOUT. .. TODO: specify the JSON output. -.. option:: --json +.. option:: -j, --json Output the results of the lint as JSON. -.. option:: --plain +.. option:: -p, --plain Output the results of the lint as descriptive text. The text is valid Markdown. +.. option:: -l, --lines + + Output one line per error, prefixed by the file path. + .. option:: -h, --help Display help and exit.