From d1a636890e1fcbf18d09152d35d20fdb55721097 Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Wed, 19 Jul 2023 21:09:26 +0200 Subject: [PATCH 1/6] feat: no strict dep5 parsing Since version 0.1.47 of python-debian the dep5 parsing is strict. This is causing issues for certain users that have multiple Copyright statements in their Dep5 file. Disablng strictness removes the issue for those users. Closes #803 Signed-off-by: Nico Rikken --- src/reuse/project.py | 2 +- tests/conftest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reuse/project.py b/src/reuse/project.py index a986fdc73..3962f329e 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -306,7 +306,7 @@ def _copyright(self) -> Optional[Copyright]: copyright_path = self.root / ".reuse/dep5" try: with copyright_path.open(encoding="utf-8") as fp: - self._copyright_val = Copyright(fp) + self._copyright_val = Copyright(fp, strict=False) except OSError: _LOGGER.debug("no .reuse/dep5 file, or could not read it") except DebianError: diff --git a/tests/conftest.py b/tests/conftest.py index 601f5ecc4..37351d734 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -298,7 +298,7 @@ def dep5_copyright(): with (RESOURCES_DIRECTORY / "fake_repository/.reuse/dep5").open( encoding="utf-8" ) as fp: - return Copyright(fp) + return Copyright(fp, strict=False) @pytest.fixture() From 1edc26ed4d286bc82766b57699ceff83fca5f8f6 Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Thu, 20 Jul 2023 08:13:17 +0200 Subject: [PATCH 2/6] chore: tests for dep5 parsing Tests to ensure that all Copyright information from the .reuse/dep5 file is taken into account. It contains 2 testcases, for both strictly and non-strictly formatted dep5 files. The latter being present in the wild, based on comments in #803 Signed-off-by: Nico Rikken --- tests/test_report.py | 95 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/tests/test_report.py b/tests/test_report.py index 5fbc160b9..e7d19593f 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -10,6 +10,7 @@ import os import sys from importlib import import_module +from operator import itemgetter from textwrap import dedent import pytest @@ -267,6 +268,100 @@ def test_generate_file_report_to_dict_lint_source_information(fake_repository): assert expression["value"] == "MIT OR 0BSD" +def test_strict_dep5_in_file_report(fake_repository): + """All copyright information of a strictly formatted dep5 file should be taken + into account in the file report. Strictly formatted meaning that there is a + single Copyright entry with indentend values. + """ + (fake_repository / ".reuse/dep5").write_text( + dedent( + """ + Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ + Upstream-Name: Some project + Upstream-Contact: Jane Doe + Source: https://example.com/ + + Files: doc/* + Copyright: 2017 Jane Doe + 2018 John Doe + 2019 Joey Doe + License: CC0-1.0 + """ + ) + ) + project = Project(fake_repository) + report = FileReport.generate( + project, + "doc/index.rst", + ) + result = report.to_dict_lint() + sorted_copyrights = sorted(result["copyrights"], key=itemgetter("value")) + assert sorted_copyrights == [ + { + "source": ".reuse/dep5", + "source_type": "dep5", + "value": "2017 Jane Doe", + }, + { + "source": ".reuse/dep5", + "source_type": "dep5", + "value": "2018 John Doe", + }, + { + "source": ".reuse/dep5", + "source_type": "dep5", + "value": "2019 Joey Doe", + }, + ] + + +def test_non_strict_dep5_in_file_report(fake_repository): + """All copyright information of a non-strictly formatted dep5 file should be + taken into account in the file report. Non-strictly formatted meaning that + there are multiple Copyright entries. + """ + (fake_repository / ".reuse/dep5").write_text( + dedent( + """ + Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ + Upstream-Name: Some project + Upstream-Contact: Jane Doe + Source: https://example.com/ + + Files: doc/* + Copyright: 2017 Jane Doe + Copyright: 2018 John Doe + Copyright: 2019 Joey Doe + License: CC0-1.0 + """ + ) + ) + project = Project(fake_repository) + report = FileReport.generate( + project, + "doc/index.rst", + ) + result = report.to_dict_lint() + sorted_copyrights = sorted(result["copyrights"], key=itemgetter("value")) + assert sorted_copyrights == [ + { + "source": ".reuse/dep5", + "source_type": "dep5", + "value": "2017 Jane Doe", + }, + { + "source": ".reuse/dep5", + "source_type": "dep5", + "value": "2018 John Doe", + }, + { + "source": ".reuse/dep5", + "source_type": "dep5", + "value": "2019 Joey Doe", + }, + ] + + def test_generate_project_report_simple(fake_repository, multiprocessing): """Simple generate test, just to see if it sort of works.""" project = Project(fake_repository) From 6881f0d59b3bf507304e36911ea4379adaabafac Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Fri, 21 Jul 2023 07:39:11 +0200 Subject: [PATCH 3/6] chore: update non-strict dep5 test to reflect reality Generating a file-report with a non-strict dep5 containing multiple Copyright entries will leave part of the Copyright information to be ignored. The updated test reflects that behavior and now no longer fails. Signed-off-by: Nico Rikken --- tests/test_report.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/tests/test_report.py b/tests/test_report.py index e7d19593f..7c5b911f0 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -316,9 +316,13 @@ def test_strict_dep5_in_file_report(fake_repository): def test_non_strict_dep5_in_file_report(fake_repository): - """All copyright information of a non-strictly formatted dep5 file should be - taken into account in the file report. Non-strictly formatted meaning that - there are multiple Copyright entries. + """Copyright information of a non-strictly formatted dep5 file should be taken + into account in the file report. Non-strictly formatted meaning that there + are multiple Copyright entries. + + Note that in the non-strict mode only the first Copyright entry is taken + into account and the rest is silently ignored. This is undesireable but + reflects the current state of reuse-tool. """ (fake_repository / ".reuse/dep5").write_text( dedent( @@ -349,16 +353,6 @@ def test_non_strict_dep5_in_file_report(fake_repository): "source_type": "dep5", "value": "2017 Jane Doe", }, - { - "source": ".reuse/dep5", - "source_type": "dep5", - "value": "2018 John Doe", - }, - { - "source": ".reuse/dep5", - "source_type": "dep5", - "value": "2019 Joey Doe", - }, ] From 7ab28749505f724e00b359190f90872811dfaf0e Mon Sep 17 00:00:00 2001 From: Nico Rikken Date: Fri, 21 Jul 2023 19:44:03 +0200 Subject: [PATCH 4/6] chore: fix pylint errors Update line wrap length and disable pylint for the dep5 file content. Signed-off-by: Nico Rikken --- tests/test_report.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/test_report.py b/tests/test_report.py index 7c5b911f0..bc3728298 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -269,10 +269,11 @@ def test_generate_file_report_to_dict_lint_source_information(fake_repository): def test_strict_dep5_in_file_report(fake_repository): - """All copyright information of a strictly formatted dep5 file should be taken - into account in the file report. Strictly formatted meaning that there is a - single Copyright entry with indentend values. + """All copyright information of a strictly formatted dep5 file should be + taken into account in the file report. Strictly formatted meaning that there + is a single Copyright entry with indentend values. """ + # pylint: disable=line-too-long (fake_repository / ".reuse/dep5").write_text( dedent( """ @@ -316,14 +317,15 @@ def test_strict_dep5_in_file_report(fake_repository): def test_non_strict_dep5_in_file_report(fake_repository): - """Copyright information of a non-strictly formatted dep5 file should be taken - into account in the file report. Non-strictly formatted meaning that there - are multiple Copyright entries. + """Copyright information of a non-strictly formatted dep5 file should be + taken into account in the file report. Non-strictly formatted meaning that + there are multiple Copyright entries. Note that in the non-strict mode only the first Copyright entry is taken into account and the rest is silently ignored. This is undesireable but reflects the current state of reuse-tool. """ + # pylint: disable=line-too-long (fake_repository / ".reuse/dep5").write_text( dedent( """ From f25b3326cf672348464ba508a1916727f99829b3 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sat, 22 Jul 2023 19:14:13 +0200 Subject: [PATCH 5/6] Refactor test so that to_dict_lint isn't called --- tests/test_report.py | 72 +++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 38 deletions(-) diff --git a/tests/test_report.py b/tests/test_report.py index bc3728298..85e697c08 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -10,12 +10,12 @@ import os import sys from importlib import import_module -from operator import itemgetter from textwrap import dedent import pytest from reuse import SourceType +from reuse._util import _LICENSING from reuse.project import Project from reuse.report import FileReport, ProjectReport @@ -270,8 +270,9 @@ def test_generate_file_report_to_dict_lint_source_information(fake_repository): def test_strict_dep5_in_file_report(fake_repository): """All copyright information of a strictly formatted dep5 file should be - taken into account in the file report. Strictly formatted meaning that there - is a single Copyright entry with indentend values. + taken into account in the file report. 'Strictly formatted' here means no + syntax errors. Specifically, there shouldn't be any duplicate Copyright + tags/fields. """ # pylint: disable=line-too-long (fake_repository / ".reuse/dep5").write_text( @@ -295,35 +296,33 @@ def test_strict_dep5_in_file_report(fake_repository): project, "doc/index.rst", ) - result = report.to_dict_lint() - sorted_copyrights = sorted(result["copyrights"], key=itemgetter("value")) - assert sorted_copyrights == [ - { - "source": ".reuse/dep5", - "source_type": "dep5", - "value": "2017 Jane Doe", - }, - { - "source": ".reuse/dep5", - "source_type": "dep5", - "value": "2018 John Doe", - }, - { - "source": ".reuse/dep5", - "source_type": "dep5", - "value": "2019 Joey Doe", - }, - ] + assert len(report.reuse_infos) == 1 + reuse_info = report.reuse_infos[0] + assert reuse_info.spdx_expressions == {_LICENSING.parse("CC0-1.0")} + assert reuse_info.copyright_lines == { + "2017 Jane Doe", + "2018 John Doe", + "2019 Joey Doe", + } + assert reuse_info.source_path == ".reuse/dep5" + assert reuse_info.source_type == SourceType.DEP5 + +def test_dep5_duplicate_copyright_in_file_report(fake_repository): + """Duplicate 'Copyright:' fields (tags) in a DEP5 file is a syntax error. + Since python-debian 0.1.47, the parser raises an error on this class of + syntax error. -def test_non_strict_dep5_in_file_report(fake_repository): - """Copyright information of a non-strictly formatted dep5 file should be - taken into account in the file report. Non-strictly formatted meaning that - there are multiple Copyright entries. + We suppress that error (by parsing the file 'non-strictly'), because this + error is undocumented breaking behaviour. - Note that in the non-strict mode only the first Copyright entry is taken - into account and the rest is silently ignored. This is undesireable but - reflects the current state of reuse-tool. + Towards that end, verify that no error is raised. As a side effect of + parsing non-strictly, only the first Copyright tag is parsed. This is + undesireable, but reflects pre-0.1.47 behaviour. + + See also . + + TODO: Expect at least a user-facing warning when this happens. """ # pylint: disable=line-too-long (fake_repository / ".reuse/dep5").write_text( @@ -347,15 +346,12 @@ def test_non_strict_dep5_in_file_report(fake_repository): project, "doc/index.rst", ) - result = report.to_dict_lint() - sorted_copyrights = sorted(result["copyrights"], key=itemgetter("value")) - assert sorted_copyrights == [ - { - "source": ".reuse/dep5", - "source_type": "dep5", - "value": "2017 Jane Doe", - }, - ] + assert len(report.reuse_infos) == 1 + reuse_info = report.reuse_infos[0] + assert reuse_info.spdx_expressions == {_LICENSING.parse("CC0-1.0")} + assert reuse_info.copyright_lines == {"2017 Jane Doe"} + assert reuse_info.source_path == ".reuse/dep5" + assert reuse_info.source_type == SourceType.DEP5 def test_generate_project_report_simple(fake_repository, multiprocessing): From 123bcf56db258bcf7b84b2b42d5b1fb1dbfccb44 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Sat, 22 Jul 2023 19:15:30 +0200 Subject: [PATCH 6/6] Adjust import location I found this while working on something else. Tiny commit; why not. --- src/reuse/report.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reuse/report.py b/src/reuse/report.py index 38996f082..abb603af3 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -19,9 +19,9 @@ from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Set, cast from uuid import uuid4 -from . import __REUSE_version__, __version__ +from . import ReuseInfo, __REUSE_version__, __version__ from ._util import _LICENSING, StrPath, _checksum -from .project import Project, ReuseInfo +from .project import Project _LOGGER = logging.getLogger(__name__)