From 875bba68aae86b93b76ca929411e1b1475f13010 Mon Sep 17 00:00:00 2001 From: Carmen Bianca BAKKER Date: Tue, 9 Jul 2024 19:13:23 +0200 Subject: [PATCH] Reduce calls to all_files Previously this function would be called three times on a lint: - once by NestedReuseTOML.find_reuse_tomls in Project.find_global_licensing - once by NestedReuseTOML.find_reuse_tomls in NestedReuseTOML.from_file - once to lint all the files I now no longer use NestedReuseTOML.from_file. It's still not ideal to go over all files twice, but it's better than thrice. Signed-off-by: Carmen Bianca BAKKER --- src/reuse/project.py | 60 +++++++++++++++++++++++++------------------ tests/test_project.py | 25 +++++++++++++----- 2 files changed, 54 insertions(+), 31 deletions(-) diff --git a/src/reuse/project.py b/src/reuse/project.py index c60c31ab..76e89a41 100644 --- a/src/reuse/project.py +++ b/src/reuse/project.py @@ -8,7 +8,6 @@ """Module that contains the central Project class.""" -import contextlib import errno import glob import logging @@ -45,6 +44,7 @@ NestedReuseTOML, PrecedenceType, ReuseDep5, + ReuseTOML, ) from .vcs import VCSStrategy, VCSStrategyNone, all_vcs_strategies @@ -145,11 +145,8 @@ def from_directory( vcs_strategy=vcs_strategy, ) if found: - global_licensing = found.cls.from_file( - found.path, - include_submodules=include_submodules, - include_meson_subprojects=include_meson_subprojects, - vcs_strategy=vcs_strategy, + global_licensing = cls._global_licensing_from_found( + found, str(root) ) project = cls( @@ -292,7 +289,7 @@ def find_global_licensing( include_submodules: bool = False, include_meson_subprojects: bool = False, vcs_strategy: Optional[VCSStrategy] = None, - ) -> Optional[GlobalLicensingFound]: + ) -> List[GlobalLicensingFound]: """Find the path and corresponding class of a project directory's :class:`GlobalLicensing`. @@ -300,7 +297,7 @@ def find_global_licensing( GlobalLicensingConflict: if more than one global licensing config file is present. """ - candidate: Optional[GlobalLicensingFound] = None + candidates: List[GlobalLicensingFound] = [] dep5_path = root / ".reuse/dep5" if (dep5_path).exists(): # Sneaky workaround to not print this warning. @@ -313,31 +310,44 @@ def find_global_licensing( ), PendingDeprecationWarning, ) - candidate = GlobalLicensingFound(dep5_path, ReuseDep5) - - # TODO: the performance of this isn't great. - toml_path = None - with contextlib.suppress(StopIteration): - toml_path = next( - NestedReuseTOML.find_reuse_tomls( - root, - include_submodules=include_submodules, - include_meson_subprojects=include_meson_subprojects, - vcs_strategy=vcs_strategy, - ) + candidates = [GlobalLicensingFound(dep5_path, ReuseDep5)] + + reuse_toml_candidates = [ + GlobalLicensingFound(path, ReuseTOML) + for path in NestedReuseTOML.find_reuse_tomls( + root, + include_submodules=include_submodules, + include_meson_subprojects=include_meson_subprojects, + vcs_strategy=vcs_strategy, ) - if toml_path is not None: - if candidate is not None: + ] + if reuse_toml_candidates: + if candidates: raise GlobalLicensingConflict( _( "Found both '{new_path}' and '{old_path}'. You" " cannot keep both files simultaneously; they are" " not intercompatible." - ).format(new_path=toml_path, old_path=dep5_path) + ).format( + new_path=reuse_toml_candidates[0].path, + old_path=dep5_path, + ) ) - candidate = GlobalLicensingFound(root, NestedReuseTOML) + candidates = reuse_toml_candidates + + return candidates - return candidate + @classmethod + def _global_licensing_from_found( + cls, found: List[GlobalLicensingFound], root: StrPath + ) -> GlobalLicensing: + if len(found) == 1 and found[0].cls == ReuseDep5: + return ReuseDep5.from_file(found[0].path) + # This is an impossible scenario at time of writing. + if not all(item.cls == ReuseTOML for item in found): + raise NotImplementedError() + tomls = [ReuseTOML.from_file(item.path) for item in found] + return NestedReuseTOML(reuse_tomls=tomls, source=str(root)) def _identifier_of_license(self, path: Path) -> str: """Figure out the SPDX License identifier of a license given its path. diff --git a/tests/test_project.py b/tests/test_project.py index 519577fb..9aa39429 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -23,8 +23,8 @@ from reuse.covered_files import all_files from reuse.global_licensing import ( GlobalLicensingParseError, - NestedReuseTOML, ReuseDep5, + ReuseTOML, ) from reuse.project import GlobalLicensingConflict, Project @@ -566,8 +566,10 @@ def test_find_global_licensing_dep5(fake_repository_dep5): """Find the dep5 file. Also output a PendingDeprecationWarning.""" with warnings.catch_warnings(record=True) as caught_warnings: result = Project.find_global_licensing(fake_repository_dep5) - assert result.path == fake_repository_dep5 / ".reuse/dep5" - assert result.cls == ReuseDep5 + assert len(result) == 1 + dep5 = result[0] + assert dep5.path == fake_repository_dep5 / ".reuse/dep5" + assert dep5.cls == ReuseDep5 assert len(caught_warnings) == 1 assert issubclass( @@ -578,14 +580,25 @@ def test_find_global_licensing_dep5(fake_repository_dep5): def test_find_global_licensing_reuse_toml(fake_repository_reuse_toml): """Find the REUSE.toml file.""" result = Project.find_global_licensing(fake_repository_reuse_toml) - assert result.path == fake_repository_reuse_toml / "." - assert result.cls == NestedReuseTOML + assert len(result) == 1 + toml = result[0] + assert toml.path == fake_repository_reuse_toml / "REUSE.toml" + assert toml.cls == ReuseTOML + + +def test_find_global_licensing_reuse_toml_multiple(fake_repository_reuse_toml): + """Find multiple REUSE.tomls.""" + (fake_repository_reuse_toml / "src/REUSE.toml").write_text("version = 1") + result = Project.find_global_licensing(fake_repository_reuse_toml) + assert len(result) == 2 + assert result[0].path == fake_repository_reuse_toml / "REUSE.toml" + assert result[1].path == fake_repository_reuse_toml / "src/REUSE.toml" def test_find_global_licensing_none(empty_directory): """Find no file.""" result = Project.find_global_licensing(empty_directory) - assert result is None + assert not result def test_find_global_licensing_conflict(fake_repository_dep5):