Skip to content

Commit

Permalink
Merge pull request #787 from carmenbianca/info-correct-source
Browse files Browse the repository at this point in the history
Make sure the sources are correct when information is aggregated
  • Loading branch information
carmenbianca authored Jun 27, 2023
2 parents 3821ea5 + bfd0013 commit 2b134b8
Show file tree
Hide file tree
Showing 8 changed files with 273 additions and 160 deletions.
8 changes: 7 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ CLI command and its behaviour. There are no guarantees of stability for the
- Added `--contributor` option to `annotate`. (#669)
- Added `--json` flag to `lint` command (#654).
- `reuse.ReuseInfo` now has `copy` and `union` methods. (#759)
- `reuse.ReuseInfo` now stores information about the source from which the
information was gathered. (#654, #787)
- Added Ukrainian and Czech translations (#767)
- Added `--suppress-deprecation` to hide (verbose) deprecation warnings. (#778)

Expand All @@ -81,12 +83,16 @@ CLI command and its behaviour. There are no guarantees of stability for the
- Bumped SPDX license list to v3.20. (#692)
- `reuse.SpdxInfo` was renamed to `reuse.ReuseInfo`. It is now a (frozen)
dataclass instead of a namedtuple. This is only relevant if you're using reuse
as a library in Python. (#669)
as a library in Python. Other functions and methods were similarly renamed.
(#669)
- Sphinx documentation: Switched from RTD theme to Furo. (#673, #716)
- Removed dependency on setuptools' `pkg_resources` to determine the installed
version of reuse. (#724)
- Bumped SPDX license list to v3.21. (#763)
- Bumped REUSE Spec version to 3.1. (#768)
- `Project.reuse_info_of` now returns a list of `ReuseInfo` objects instead of a
single one. This is because the source information is now stored alongside the
REUSE information. (#787)

### Deprecated

Expand Down
19 changes: 16 additions & 3 deletions src/reuse/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2021 Alliander N.V.
# SPDX-FileCopyrightText: 2023 Carmen Bianca BAKKER <[email protected]>
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand Down Expand Up @@ -91,11 +92,11 @@ class SourceType(Enum):
"""

#: A .license file containing license information.
DOT_LICENSE_FILE = ".license file"
DOT_LICENSE = "dot-license"
#: A file header containing license information.
FILE_HEADER = "file header"
FILE_HEADER = "file-header"
#: A .reuse/dep5 file containing license information.
DEP5_FILE = ".reuse/dep5 file"
DEP5 = "dep5"


# TODO: In Python 3.10+, add kw_only=True
Expand All @@ -106,6 +107,7 @@ class ReuseInfo:
spdx_expressions: Set[Expression] = field(default_factory=set)
copyright_lines: Set[str] = field(default_factory=set)
contributor_lines: Set[str] = field(default_factory=set)
path: Optional[str] = None
source_path: Optional[str] = None
source_type: Optional[SourceType] = None

Expand Down Expand Up @@ -153,6 +155,17 @@ def contains_copyright_or_licensing(self) -> bool:
"""Either *spdx_expressions* or *copyright_lines* is non-empty."""
return bool(self.spdx_expressions or self.copyright_lines)

def contains_info(self) -> bool:
"""Any field except *path*, *source_path* and *source_type* is
non-empty.
"""
keys = {
key
for key in self.__dict__
if key not in ("path", "source_path", "source_type")
}
return any(self.__dict__[key] for key in keys)

def __bool__(self) -> bool:
return any(self.__dict__.values())

Expand Down
9 changes: 6 additions & 3 deletions src/reuse/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from hashlib import sha1
from itertools import chain
from os import PathLike
from pathlib import Path
from pathlib import Path, PurePath
from typing import IO, Any, BinaryIO, Dict, Iterator, List, Optional, Set, Union

from boolean.boolean import Expression, ParseError
Expand Down Expand Up @@ -225,7 +225,8 @@ def _determine_license_suffix_path(path: StrPath) -> Path:

def _copyright_from_dep5(path: StrPath, dep5_copyright: Copyright) -> ReuseInfo:
"""Find the reuse information of *path* in the dep5 Copyright object."""
result = dep5_copyright.find_files_paragraph(Path(path).as_posix())
path = PurePath(path).as_posix()
result = dep5_copyright.find_files_paragraph(path)

if result is None:
return ReuseInfo()
Expand All @@ -237,7 +238,9 @@ def _copyright_from_dep5(path: StrPath, dep5_copyright: Copyright) -> ReuseInfo:
copyright_lines=set(
map(str.strip, result.copyright.splitlines()) # type: ignore
),
source_type=SourceType.DEP5_FILE,
path=path,
source_type=SourceType.DEP5,
source_path=".reuse/dep5",
)


Expand Down
62 changes: 29 additions & 33 deletions src/reuse/project.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2022 Florian Snow <[email protected]>
# SPDX-FileCopyrightText: 2023 DB Systel GmbH
# SPDX-FileCopyrightText: 2023 Carmen Bianca BAKKER <[email protected]>
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand All @@ -13,7 +14,7 @@
import warnings
from gettext import gettext as _
from pathlib import Path
from typing import Dict, Iterator, Optional, Union, cast
from typing import Dict, Iterator, List, Optional, Union, cast

from boolean.boolean import ParseError
from debian.copyright import Copyright
Expand Down Expand Up @@ -148,28 +149,30 @@ def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]:
_LOGGER.debug("yielding '%s'", the_file)
yield the_file

def reuse_info_of(self, path: StrPath) -> ReuseInfo:
def reuse_info_of(self, path: StrPath) -> List[ReuseInfo]:
"""Return REUSE info of *path*.
This function will return any REUSE information that it can find, both
from within the file, the .license file and from the .reuse/dep5 file.
This function will return any REUSE information that it can find: from
within the file, the .license file and/or from the .reuse/dep5 file.
It also returns a single primary source path of the license/copyright
information, where 'primary' means '.license file' > 'header' > 'dep5'
The presence of a .license file always means that the file itself will
not be parsed for REUSE information.
When the .reuse/dep5 file covers a file and there is also REUSE
information within that file (or within its .license file), then two
:class:`ReuseInfo` objects are returned in the set, each with respective
discovered REUSE information and information about the source.
"""
original_path = path
path = _determine_license_path(path)
dep5_path: Optional[str] = None
source_path = ""
source_type = None

_LOGGER.debug(f"searching '{path}' for REUSE information")

# This means that only one 'source' of licensing/copyright information
# is captured in ReuseInfo
dep5_result = ReuseInfo()
file_result = ReuseInfo()
final_result = ReuseInfo()
result = []

# Search the .reuse/dep5 file for REUSE information.
if self._copyright:
Expand All @@ -180,8 +183,6 @@ def reuse_info_of(self, path: StrPath) -> ReuseInfo:
_LOGGER.info(
_("'{path}' covered by .reuse/dep5").format(path=path)
)
source_path = str(self.root / ".reuse/dep5")
dep5_path = source_path

# Search the file for REUSE information.
with path.open("rb") as fp:
Expand All @@ -199,12 +200,16 @@ def reuse_info_of(self, path: StrPath) -> ReuseInfo:
file_result = extract_reuse_info(
decoded_text_from_binary(fp, size=read_limit)
)
if file_result:
source_path = str(path)
if file_result.contains_copyright_or_licensing():
if path.suffix == ".license":
source_type = SourceType.DOT_LICENSE_FILE
source_type = SourceType.DOT_LICENSE
else:
source_type = SourceType.FILE_HEADER
file_result = file_result.copy(
path=self.relative_from_root(original_path).as_posix(),
source_path=self.relative_from_root(path).as_posix(),
source_type=source_type,
)

except (ExpressionError, ParseError):
_LOGGER.error(
Expand All @@ -215,11 +220,7 @@ def reuse_info_of(self, path: StrPath) -> ReuseInfo:
)

# There is both information in a .dep5 file and in the file header
if (
dep5_result.contains_copyright_or_licensing()
and file_result.contains_copyright_or_licensing()
):
final_result = file_result.union(dep5_result)
if dep5_result.contains_info() and file_result.contains_info():
warnings.warn(
_(
"Copyright and licensing information for"
Expand All @@ -233,22 +234,17 @@ def reuse_info_of(self, path: StrPath) -> ReuseInfo:
" need do nothing yet. Run with"
" `--suppress-deprecation` to hide this warning."
).format(
original_path=original_path, path=path, dep5_path=dep5_path
original_path=original_path,
path=path,
dep5_path=dep5_result.source_path,
),
PendingDeprecationWarning,
)
# Information is only found in a DEP5 file
elif (
dep5_result.contains_copyright_or_licensing()
and not file_result.contains_copyright_or_licensing()
):
final_result = dep5_result.copy(source_path=source_path)
# There is a file header or a .license file
else:
final_result = file_result.copy(
source_path=source_path, source_type=source_type
)
return final_result
if dep5_result.contains_info():
result.append(dep5_result)
if file_result.contains_info():
result.append(file_result)
return result

def relative_from_root(self, path: StrPath) -> Path:
"""If the project root is /tmp/project, and *path* is
Expand Down
Loading

0 comments on commit 2b134b8

Please sign in to comment.