From 1cfb0b210290b4eb7bfb9c5364b7050dd59cae8d Mon Sep 17 00:00:00 2001 From: Alberto Pianon Date: Tue, 15 Nov 2022 19:39:27 +0100 Subject: [PATCH] Add support for file comments in spdx Add initial support for comments on single files, allowed both by dep5 and spdx specs: - add 'comment' field to SpdxInfo - add support for conversion of dep5 Files:Comment fields into SPDX FileComment fields Signed-off-by: Alberto Pianon --- src/reuse/__init__.py | 6 +++++- src/reuse/_util.py | 5 +++-- src/reuse/header.py | 3 ++- src/reuse/project.py | 5 +++-- src/reuse/report.py | 11 ++++++++++- tests/test_header.py | 34 +++++++++++++++++----------------- tests/test_util.py | 2 +- 7 files changed, 41 insertions(+), 25 deletions(-) diff --git a/src/reuse/__init__.py b/src/reuse/__init__.py index d644505ef..c7de270b9 100644 --- a/src/reuse/__init__.py +++ b/src/reuse/__init__.py @@ -77,7 +77,11 @@ #: The two iterables MUST be sets. SpdxInfo = NamedTuple( "SpdxInfo", - [("spdx_expressions", Set[Expression]), ("copyright_lines", Set[str])], + [ + ("spdx_expressions", Set[Expression]), + ("copyright_lines", Set[str]), + ("comment", str) + ], ) diff --git a/src/reuse/_util.py b/src/reuse/_util.py index ffc74bb82..bda6c711b 100644 --- a/src/reuse/_util.py +++ b/src/reuse/_util.py @@ -204,11 +204,12 @@ def _copyright_from_dep5(path: PathLike, dep5_copyright: Copyright) -> SpdxInfo: result = dep5_copyright.find_files_paragraph(Path(path).as_posix()) if result is None: - return SpdxInfo(set(), set()) + return SpdxInfo(set(), set(), "") return SpdxInfo( set(map(_LICENSING.parse, [result.license.synopsis])), set(map(str.strip, result.copyright.splitlines())), + " ".join(result.comment.split()) if result.comment else "" ) @@ -302,7 +303,7 @@ def extract_spdx_info(text: str) -> SpdxInfo: copyright_matches.add(match.groupdict()["copyright"].strip()) break - return SpdxInfo(expressions, copyright_matches) + return SpdxInfo(expressions, copyright_matches, "") def find_license_identifiers(text: str) -> Iterator[str]: diff --git a/src/reuse/header.py b/src/reuse/header.py index 78e463d40..f673b19b7 100644 --- a/src/reuse/header.py +++ b/src/reuse/header.py @@ -177,6 +177,7 @@ def create_header( spdx_info = SpdxInfo( spdx_info.spdx_expressions.union(existing_spdx.spdx_expressions), spdx_copyrights, + "" ) new_header += _create_new_header( @@ -703,7 +704,7 @@ def run(args, project: Project, out=sys.stdout) -> int: else set() ) - spdx_info = SpdxInfo(expressions, copyright_lines) + spdx_info = SpdxInfo(expressions, copyright_lines, "") result = 0 for path in paths: diff --git a/src/reuse/project.py b/src/reuse/project.py index a357beca5..f2b2b55ed 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -142,8 +142,8 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: path = _determine_license_path(path) _LOGGER.debug(f"searching '{path}' for SPDX information") - dep5_result = SpdxInfo(set(), set()) - file_result = SpdxInfo(set(), set()) + dep5_result = SpdxInfo(set(), set(), "") + file_result = SpdxInfo(set(), set(), "") # Search the .reuse/dep5 file for SPDX information. if self._copyright: @@ -172,6 +172,7 @@ def spdx_info_of(self, path: PathLike) -> SpdxInfo: return SpdxInfo( dep5_result.spdx_expressions.union(file_result.spdx_expressions), dep5_result.copyright_lines.union(file_result.copyright_lines), + dep5_result.comment ) def relative_from_root(self, path: Path) -> Path: diff --git a/src/reuse/report.py b/src/reuse/report.py index 458316d76..227b92d23 100644 --- a/src/reuse/report.py +++ b/src/reuse/report.py @@ -16,6 +16,7 @@ from pathlib import Path from typing import Iterable, List, NamedTuple, Optional, Set from uuid import uuid4 +from textwrap import fill from . import __version__ from ._util import _LICENSING, _checksum @@ -158,6 +159,12 @@ def bill_of_materials(self) -> str: ) else: out.write("FileCopyrightText: NONE\n") + if report.spdxfile.comment: + out.write( + f"FileComment: \n" + f"{fill(report.spdxfile.comment, width=72)}\n" + f"\n" + ) # Licenses for lic, path in sorted(self.licenses.items()): @@ -307,6 +314,7 @@ def __init__(self, name, spdx_id=None, chk_sum=None): self.chk_sum: str = chk_sum self.licenses_in_file: List[str] = [] self.copyright: str = None + self.comment: str = None class FileReport: @@ -378,8 +386,9 @@ def generate( # Add license to report. report.spdxfile.licenses_in_file.append(identifier) - # Copyright text + # Copyright text and comment report.spdxfile.copyright = "\n".join(sorted(spdx_info.copyright_lines)) + report.spdxfile.comment = spdx_info.comment return report diff --git a/tests/test_header.py b/tests/test_header.py index f6f6cdb0f..b54b8bce1 100644 --- a/tests/test_header.py +++ b/tests/test_header.py @@ -21,7 +21,7 @@ def test_create_header_simple(): """Create a super simple header.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) expected = cleandoc( """ @@ -37,7 +37,7 @@ def test_create_header_simple(): def test_create_header_template_simple(template_simple): """Create a header with a simple template.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) expected = cleandoc( """ @@ -57,7 +57,7 @@ def test_create_header_template_simple(template_simple): def test_create_header_template_no_spdx(template_no_spdx): """Create a header with a template that does not have all SPDX info.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) with pytest.raises(MissingSpdxInfo): @@ -67,7 +67,7 @@ def test_create_header_template_no_spdx(template_no_spdx): def test_create_header_template_commented(template_commented): """Create a header with an already-commented template.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) expected = cleandoc( """ @@ -93,7 +93,7 @@ def test_create_header_template_commented(template_commented): def test_create_header_already_contains_spdx(): """Create a new header from a header that already contains SPDX info.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) existing = cleandoc( """ @@ -118,7 +118,7 @@ def test_create_header_already_contains_spdx(): def test_create_header_existing_is_wrong(): """If the existing header contains errors, raise a CommentCreateError.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) existing = cleandoc( """ @@ -134,7 +134,7 @@ def test_create_header_existing_is_wrong(): def test_create_header_old_syntax(): """Old copyright syntax is preserved when creating a new header.""" - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) + spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "") existing = cleandoc( """ # Copyright John Doe @@ -153,7 +153,7 @@ def test_create_header_old_syntax(): def test_create_header_remove_fluff(): """Any stuff that isn't SPDX info is removed when using create_header.""" - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) + spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "") existing = cleandoc( """ # SPDX-FileCopyrightText: John Doe @@ -177,7 +177,7 @@ def test_create_header_remove_fluff(): def test_find_and_replace_no_header(): """Given text without header, add a header.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) text = "pass" expected = cleandoc( @@ -195,7 +195,7 @@ def test_find_and_replace_no_header(): def test_find_and_replace_verbatim(): """Replace a header with itself.""" - spdx_info = SpdxInfo(set(), set()) + spdx_info = SpdxInfo(set(), set(), "") text = cleandoc( """ # SPDX-FileCopyrightText: Jane Doe @@ -214,7 +214,7 @@ def test_find_and_replace_newline_before_header(): preceding whitespace. """ spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}, "" ) text = cleandoc( """ @@ -241,7 +241,7 @@ def test_find_and_replace_newline_before_header(): def test_find_and_replace_preserve_preceding(): """When the SPDX header is in the middle of the file, keep it there.""" spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}, "" ) text = cleandoc( """ @@ -279,7 +279,7 @@ def test_find_and_replace_keep_shebang(): it. """ spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: John Doe"}, "" ) text = cleandoc( """ @@ -310,7 +310,7 @@ def test_find_and_replace_separate_shebang(): """When the shebang is part of the same comment as the SPDX comment, separate the two. """ - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) + spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "") text = cleandoc( """ #!/usr/bin/env python3 @@ -338,7 +338,7 @@ def test_find_and_replace_separate_shebang(): def test_find_and_replace_only_shebang(): """When the file only contains a shebang, keep it at the top of the file.""" - spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set()) + spdx_info = SpdxInfo({"GPL-3.0-or-later"}, set(), "") text = cleandoc( """ #!/usr/bin/env python3 @@ -368,7 +368,7 @@ def test_find_and_replace_keep_old_comment(): licensing information, preserve it below the REUSE header. """ spdx_info = SpdxInfo( - {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"} + {"GPL-3.0-or-later"}, {"SPDX-FileCopyrightText: Jane Doe"}, "" ) text = cleandoc( """ @@ -395,7 +395,7 @@ def test_find_and_replace_keep_old_comment(): def test_find_and_replace_preserve_newline(): """If the file content ends with a newline, don't remove it.""" - spdx_info = SpdxInfo(set(), set()) + spdx_info = SpdxInfo(set(), set(), "") text = ( cleandoc( """ diff --git a/tests/test_util.py b/tests/test_util.py index d26a596dd..90fde0d9c 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -78,7 +78,7 @@ def test_extract_no_info(): object. """ result = _util.extract_spdx_info("") - assert result == _util.SpdxInfo(set(), set()) + assert result == _util.SpdxInfo(set(), set(), "") def test_extract_tab():