From 7707615cd8d5d5a77e162f577fcb318dda0c7eb7 Mon Sep 17 00:00:00 2001 From: ian-bowden-slalom <128502385+ian-bowden-slalom@users.noreply.github.com> Date: Tue, 18 Jun 2024 12:37:32 -0400 Subject: [PATCH] chore: refactored to use common git file repository class (#568) secureli-XXX This PR refactors GitRepo and RepoFilesRepository classes into a common GitRepo class which is an implementation of a new abstract class VersionControlRepoAbstraction. VersionControlRepoAbstraction replaces the RepoAbstraction class. ## Changes * ## Testing * ## Clean Code Checklist - [x] Meets acceptance criteria for issue - [ ] New logic is covered with automated tests - [ ] Appropriate exception handling added - [ ] Thoughtful logging included - [ ] Documentation is updated - [ ] Follow-up work is documented in TODOs - [ ] TODOs have a ticket associated with them - [ ] No commented-out code included --------- Co-authored-by: Ian Bowden --- secureli/actions/scan.py | 14 ++++---- secureli/container.py | 28 ++++++++------- .../custom_regex_scanner.py | 6 ++-- .../pii_scanner/pii_scanner.py | 6 ++-- .../language_analyzer/language_analyzer.py | 6 ++-- secureli/modules/shared/abstractions/repo.py | 21 ------------ .../abstractions/version_control_repo.py | 20 +++++++++++ .../{repo_files.py => git_repo.py} | 8 +++-- tests/actions/test_scan_action.py | 34 +++++++++++-------- .../modules/shared/abstractions/test_repo.py | 25 -------------- ...o_files_repository.py => test_git_repo.py} | 28 +++++++-------- 11 files changed, 96 insertions(+), 100 deletions(-) delete mode 100644 secureli/modules/shared/abstractions/repo.py create mode 100644 secureli/modules/shared/abstractions/version_control_repo.py rename secureli/repositories/{repo_files.py => git_repo.py} (94%) delete mode 100644 tests/modules/shared/abstractions/test_repo.py rename tests/repositories/{test_repo_files_repository.py => test_git_repo.py} (88%) diff --git a/secureli/actions/scan.py b/secureli/actions/scan.py index 0f6fdd45..76aa96fc 100644 --- a/secureli/actions/scan.py +++ b/secureli/actions/scan.py @@ -6,7 +6,9 @@ from secureli.modules.custom_scanners.custom_scans import CustomScannersService from secureli.actions import action -from secureli.modules.shared.abstractions.repo import GitRepo +from secureli.modules.shared.abstractions.version_control_repo import ( + VersionControlRepoAbstraction, +) from secureli.modules.shared.models.exit_codes import ExitCode from secureli.modules.shared.models import install from secureli.modules.shared.models.logging import LogAction @@ -36,12 +38,12 @@ def __init__( action_deps: action.ActionDependencies, hooks_scanner: HooksScannerService, custom_scanners: CustomScannersService, - git_repo: GitRepo, + file_repo: VersionControlRepoAbstraction, ): super().__init__(action_deps) self.hooks_scanner = hooks_scanner self.custom_scanners = custom_scanners - self.git_repo = git_repo + self.file_repo = file_repo def publish_results( self, @@ -91,7 +93,7 @@ def scan_repo( """ scan_files = [Path(file) for file in files or []] or self._get_commited_files( - scan_mode + scan_mode, folder_path ) verify_result = self.verify_install( folder_path, @@ -198,7 +200,7 @@ def _check_secureli_hook_updates(self, folder_path: Path) -> install.VerifyResul # Since we don't actually perform the updates here, return an outcome of UPDATE_CANCELLED return install.VerifyResult(outcome=install.VerifyOutcome.UPDATE_CANCELED) - def _get_commited_files(self, scan_mode: ScanMode) -> list[Path]: + def _get_commited_files(self, scan_mode: ScanMode, folder_path: Path) -> list[Path]: """ Attempts to build a list of commited files for use in language detection if the user is scanning staged files for an existing installation @@ -211,7 +213,7 @@ def _get_commited_files(self, scan_mode: ScanMode) -> list[Path]: if not installed or scan_mode != ScanMode.STAGED_ONLY: return None try: - committed_files = self.git_repo.get_commit_diff() + committed_files = self.file_repo.list_staged_files(folder_path) return [Path(file) for file in committed_files] except: return None diff --git a/secureli/container.py b/secureli/container.py index 6b434fc4..4e8c10ec 100644 --- a/secureli/container.py +++ b/secureli/container.py @@ -13,8 +13,10 @@ from secureli.actions.scan import ScanAction from secureli.actions.build import BuildAction from secureli.actions.update import UpdateAction -from secureli.modules.shared.abstractions.repo import GitRepo -from secureli.repositories.repo_files import RepoFilesRepository +from secureli.modules.shared.abstractions.version_control_repo import ( + VersionControlRepoAbstraction, +) +from secureli.repositories.git_repo import GitRepo from secureli.repositories.secureli_config import SecureliConfigRepository from secureli.repositories.repo_settings import SecureliRepository from secureli.modules.shared.resources import read_resource @@ -54,15 +56,20 @@ class Container(containers.DeclarativeContainer): ) # Repositories + """Abstraction for interacting with a version control file repo""" + version_control_file_repository = providers.Factory(VersionControlRepoAbstraction) - """Loads files from the repository folder, filtering out invisible files""" - repo_files_repository = providers.Factory( - RepoFilesRepository, + """Git implementation of version control file repo""" + git_files_repository = providers.Factory( + GitRepo, max_file_size=config.repo_files.max_file_size.as_int(), ignored_file_extensions=config.repo_files.ignored_file_extensions, ignored_file_patterns=combined_ignored_file_patterns, ) + """Override all injections of version control repo with the git implementation""" + version_control_file_repository.override(git_files_repository) + """ Loads and saves the seCureLI output configuration, which stores the outcomes of running init and other derived data. @@ -89,9 +96,6 @@ class Container(containers.DeclarativeContainer): echo=echo, ) - """Wraps the execution and management of git commands""" - git_repo = providers.Factory(GitRepo) - # Services """Analyzes a set of files to try to determine the most common languages""" @@ -124,7 +128,7 @@ class Container(containers.DeclarativeContainer): """Analyzes a given repo to try to identify the most common language""" language_analyzer_service = providers.Factory( language_analyzer.language_analyzer.LanguageAnalyzerService, - repo_files=repo_files_repository, + repo_files=version_control_file_repository, lexer_guesser=lexer_guesser, ) @@ -144,14 +148,14 @@ class Container(containers.DeclarativeContainer): """The service that scans the repository for potential PII""" pii_scanner_service = providers.Factory( PiiScannerService, - repo_files=repo_files_repository, + repo_files=version_control_file_repository, echo=echo, ignored_extensions=config.pii_scanner.ignored_extensions, ) custom_regex_scanner_service = providers.Factory( CustomRegexScannerService, - repo_files=repo_files_repository, + repo_files=version_control_file_repository, echo=echo, settings=settings_repository, ) @@ -203,7 +207,7 @@ class Container(containers.DeclarativeContainer): action_deps=action_deps, hooks_scanner=hooks_scanner_service, custom_scanners=custom_scanner_service, - git_repo=git_repo, + file_repo=version_control_file_repository, ) """Update Action, representing what happens when the update command is invoked""" diff --git a/secureli/modules/custom_scanners/custom_regex_scanner/custom_regex_scanner.py b/secureli/modules/custom_scanners/custom_regex_scanner/custom_regex_scanner.py index 80363324..87d30d14 100644 --- a/secureli/modules/custom_scanners/custom_regex_scanner/custom_regex_scanner.py +++ b/secureli/modules/custom_scanners/custom_regex_scanner/custom_regex_scanner.py @@ -11,7 +11,9 @@ import secureli.modules.shared.models.scan as scan from secureli.modules.shared.abstractions.echo import EchoAbstraction -from secureli.repositories.repo_files import RepoFilesRepository +from secureli.modules.shared.abstractions.version_control_repo import ( + VersionControlRepoAbstraction, +) from secureli.repositories.repo_settings import SecureliRepository @@ -31,7 +33,7 @@ class CustomRegexScannerService: def __init__( self, - repo_files: RepoFilesRepository, + repo_files: VersionControlRepoAbstraction, echo: EchoAbstraction, settings: SecureliRepository, ): diff --git a/secureli/modules/custom_scanners/pii_scanner/pii_scanner.py b/secureli/modules/custom_scanners/pii_scanner/pii_scanner.py index 522691a2..d95382eb 100644 --- a/secureli/modules/custom_scanners/pii_scanner/pii_scanner.py +++ b/secureli/modules/custom_scanners/pii_scanner/pii_scanner.py @@ -14,7 +14,9 @@ import secureli.modules.shared.models.scan as scan from secureli.modules.shared.abstractions.echo import EchoAbstraction -from secureli.repositories.repo_files import RepoFilesRepository +from secureli.modules.shared.abstractions.version_control_repo import ( + VersionControlRepoAbstraction, +) class PiiResult(pydantic.BaseModel): @@ -33,7 +35,7 @@ class PiiScannerService: def __init__( self, - repo_files: RepoFilesRepository, + repo_files: VersionControlRepoAbstraction, echo: EchoAbstraction, ignored_extensions: list[str], ): diff --git a/secureli/modules/language_analyzer/language_analyzer.py b/secureli/modules/language_analyzer/language_analyzer.py index 5fa67027..51e56f65 100644 --- a/secureli/modules/language_analyzer/language_analyzer.py +++ b/secureli/modules/language_analyzer/language_analyzer.py @@ -3,7 +3,9 @@ from secureli.modules.shared.abstractions.lexer_guesser import LexerGuesser from secureli.modules.shared.models.language import AnalyzeResult, SkippedFile -from secureli.repositories.repo_files import RepoFilesRepository +from secureli.modules.shared.abstractions.version_control_repo import ( + VersionControlRepoAbstraction, +) from secureli.modules.shared.consts.language import supported_languages @@ -14,7 +16,7 @@ class LanguageAnalyzerService: def __init__( self, - repo_files: RepoFilesRepository, + repo_files: VersionControlRepoAbstraction, lexer_guesser: LexerGuesser, ): self.repo_files = repo_files diff --git a/secureli/modules/shared/abstractions/repo.py b/secureli/modules/shared/abstractions/repo.py deleted file mode 100644 index 6f548447..00000000 --- a/secureli/modules/shared/abstractions/repo.py +++ /dev/null @@ -1,21 +0,0 @@ -from abc import ABC, abstractmethod -import git - - -class RepoAbstraction(ABC): - """ - Abstracts the configuring and execution of git repo features. - """ - - @abstractmethod - def get_commit_diff(self) -> list[str]: - pass - - -class GitRepo(RepoAbstraction): - """ - Implementation and wrapper around git repo features - """ - - def get_commit_diff(self) -> list[str]: - return git.Repo().head.commit.diff() diff --git a/secureli/modules/shared/abstractions/version_control_repo.py b/secureli/modules/shared/abstractions/version_control_repo.py new file mode 100644 index 00000000..8c5dc64b --- /dev/null +++ b/secureli/modules/shared/abstractions/version_control_repo.py @@ -0,0 +1,20 @@ +from abc import ABC, abstractmethod +from pathlib import Path + + +class VersionControlRepoAbstraction(ABC): + """ + Abstracts common version control repository functions + """ + + @abstractmethod + def list_repo_files(self, folder_path: Path) -> list[Path]: + pass + + @abstractmethod + def list_staged_files(self, folder_path: Path) -> list[Path]: + pass + + @abstractmethod + def load_file(self, file_path: Path) -> str: + pass diff --git a/secureli/repositories/repo_files.py b/secureli/repositories/git_repo.py similarity index 94% rename from secureli/repositories/repo_files.py rename to secureli/repositories/git_repo.py index 40efd546..0c81f20c 100644 --- a/secureli/repositories/repo_files.py +++ b/secureli/repositories/git_repo.py @@ -3,6 +3,9 @@ import subprocess import chardet +from secureli.modules.shared.abstractions.version_control_repo import ( + VersionControlRepoAbstraction, +) from secureli.modules.shared.utilities import combine_patterns @@ -16,9 +19,10 @@ def __init__(self, message): super().__init__(self.message) -class RepoFilesRepository: +class GitRepo(VersionControlRepoAbstraction): """ - Loads files in a given repository, or raises ValueError if the provided path is not a git repo + The Git implementation of a version control file repository. + Lists staged files and all files in repository. Reads a file by path to a utf-8 string """ def __init__( diff --git a/tests/actions/test_scan_action.py b/tests/actions/test_scan_action.py index c5e16560..6c12151f 100644 --- a/tests/actions/test_scan_action.py +++ b/tests/actions/test_scan_action.py @@ -73,7 +73,7 @@ def mock_updater() -> MagicMock: @pytest.fixture() -def mock_git_repo() -> MagicMock: +def mock_file_repo() -> MagicMock: return MagicMock() @@ -145,13 +145,13 @@ def action_deps( def scan_action( action_deps: ActionDependencies, mock_custom_scanners: MagicMock, - mock_git_repo: MagicMock, + mock_file_repo: MagicMock, ) -> ScanAction: return ScanAction( action_deps=action_deps, hooks_scanner=action_deps.hooks_scanner, custom_scanners=mock_custom_scanners, - git_repo=mock_git_repo, + file_repo=mock_file_repo, ) @@ -419,7 +419,7 @@ def test_publish_results_on_fail_and_action_not_successful( def test_verify_install_is_called_with_commted_files( scan_action: ScanAction, - mock_git_repo: MagicMock, + mock_file_repo: MagicMock, mock_secureli_config: MagicMock, mock_language_analyzer: MagicMock, ): @@ -429,7 +429,7 @@ def test_verify_install_is_called_with_commted_files( mock_files = ["file1.py", "file2.py"] - mock_git_repo.get_commit_diff.return_value = mock_files + mock_file_repo.list_staged_files.return_value = mock_files scan_action.scan_repo( folder_path=Path(""), scan_mode=ScanMode.STAGED_ONLY, @@ -446,7 +446,7 @@ def test_verify_install_is_called_with_commted_files( def test_verify_install_is_called_with_user_specified_files( scan_action: ScanAction, - mock_git_repo: MagicMock, + mock_file_repo: MagicMock, mock_secureli_config: MagicMock, mock_language_analyzer: MagicMock, ): @@ -456,7 +456,7 @@ def test_verify_install_is_called_with_user_specified_files( mock_files = ["file1.py", "file2.py"] - mock_git_repo.get_commit_diff.return_value = None + mock_file_repo.list_staged_files.return_value = None scan_action.scan_repo( folder_path=Path(""), scan_mode=ScanMode.STAGED_ONLY, @@ -473,7 +473,7 @@ def test_verify_install_is_called_with_user_specified_files( def test_verify_install_is_called_with_no_specified_files( scan_action: ScanAction, - mock_git_repo: MagicMock, + mock_file_repo: MagicMock, mock_secureli_config: MagicMock, mock_language_analyzer: MagicMock, ): @@ -481,7 +481,7 @@ def test_verify_install_is_called_with_no_specified_files( languages=["RadLang"], version_installed=1 ) - mock_git_repo.get_commit_diff.return_value = None + mock_file_repo.list_staged_files.return_value = None scan_action.scan_repo( folder_path=Path(""), scan_mode=ScanMode.STAGED_ONLY, @@ -496,15 +496,17 @@ def test_verify_install_is_called_with_no_specified_files( def test_get_commited_files_returns_commit_diff( scan_action: ScanAction, - mock_git_repo: MagicMock, + mock_file_repo: MagicMock, mock_secureli_config: MagicMock, ): mock_secureli_config.load.return_value = ConfigModels.SecureliConfig( languages=["RadLang"], version_installed=1 ) mock_files = [Path("file1.py"), Path("file2.py")] - mock_git_repo.get_commit_diff.return_value = mock_files - result = scan_action._get_commited_files(scan_mode=ScanMode.STAGED_ONLY) + mock_file_repo.list_staged_files.return_value = mock_files + result = scan_action._get_commited_files( + scan_mode=ScanMode.STAGED_ONLY, folder_path=test_folder_path + ) assert result == mock_files @@ -515,7 +517,9 @@ def test_get_commited_files_returns_none_when_not_installed( mock_secureli_config.load.return_value = ConfigModels.SecureliConfig( languages=[], version_installed=None ) - result = scan_action._get_commited_files(scan_mode=ScanMode.STAGED_ONLY) + result = scan_action._get_commited_files( + scan_mode=ScanMode.STAGED_ONLY, folder_path=test_folder_path + ) assert result is None @@ -526,5 +530,7 @@ def test_get_commited_files_returns_when_scan_mode_is_not_staged_only( mock_secureli_config.load.return_value = ConfigModels.SecureliConfig( languages=["RadLang"], version_installed=1 ) - result = scan_action._get_commited_files(scan_mode=ScanMode.ALL_FILES) + result = scan_action._get_commited_files( + scan_mode=ScanMode.ALL_FILES, folder_path=test_folder_path + ) assert result is None diff --git a/tests/modules/shared/abstractions/test_repo.py b/tests/modules/shared/abstractions/test_repo.py deleted file mode 100644 index 5a99462d..00000000 --- a/tests/modules/shared/abstractions/test_repo.py +++ /dev/null @@ -1,25 +0,0 @@ -from pathlib import Path -from unittest.mock import MagicMock -import pytest -from pytest_mock import MockerFixture - -from secureli.modules.shared.abstractions.repo import GitRepo - - -@pytest.fixture() -def git_repo() -> GitRepo: - return GitRepo() - - -@pytest.fixture() -def mock_repo(mocker: MockerFixture) -> MagicMock: - return mocker.patch("git.Repo", MagicMock()) - - -def test_that_get_commit_diff_returns_file_diff( - git_repo: GitRepo, mock_repo: MagicMock -): - mock_files = [Path("test_file.py"), Path("test_file2.py")] - mock_repo.return_value.head.commit.diff.return_value = mock_files - result = git_repo.get_commit_diff() - assert result is mock_files diff --git a/tests/repositories/test_repo_files_repository.py b/tests/repositories/test_git_repo.py similarity index 88% rename from tests/repositories/test_repo_files_repository.py rename to tests/repositories/test_git_repo.py index 4feee37d..dd4e5dfd 100644 --- a/tests/repositories/test_repo_files_repository.py +++ b/tests/repositories/test_git_repo.py @@ -5,13 +5,13 @@ from pytest_mock import MockerFixture from subprocess import CompletedProcess -from secureli.repositories.repo_files import RepoFilesRepository +from secureli.repositories.git_repo import GitRepo @pytest.fixture() def mock_subprocess(mocker: MockerFixture) -> MagicMock: mock_subprocess = MagicMock() - mocker.patch("secureli.repositories.repo_files.subprocess", mock_subprocess) + mocker.patch("secureli.repositories.git_repo.subprocess", mock_subprocess) return mock_subprocess @@ -137,9 +137,9 @@ def binary_file_with_size_file_path(mock_open_resource_with_binary_file) -> Magi @pytest.fixture() -def repo_files_repository() -> RepoFilesRepository: +def repo_files_repository() -> GitRepo: all_mov_files = "^(?:.+/)?[^/]*\\.mov(?:(?P/).*)?$" - return RepoFilesRepository( + return GitRepo( max_file_size=10000, ignored_file_extensions="", ignored_file_patterns=[all_mov_files], @@ -147,14 +147,14 @@ def repo_files_repository() -> RepoFilesRepository: def test_that_list_repo_files_raises_value_error_without_git_repo( - repo_files_repository: RepoFilesRepository, git_not_exists_folder_path: MagicMock + repo_files_repository: GitRepo, git_not_exists_folder_path: MagicMock ): with pytest.raises(ValueError): repo_files_repository.list_repo_files(git_not_exists_folder_path) def test_that_list_staged_files_returns_list_of_staged_files( - repo_files_repository: RepoFilesRepository, + repo_files_repository: GitRepo, mock_subprocess: MagicMock, ): fake_file_1 = "i/am/staged" @@ -170,7 +170,7 @@ def test_that_list_staged_files_returns_list_of_staged_files( def test_that_list_repo_files_raises_value_error_if_dot_git_is_a_file_somehow( - repo_files_repository: RepoFilesRepository, + repo_files_repository: GitRepo, git_a_file_for_some_reason_folder_path: MagicMock, ): with pytest.raises(ValueError): @@ -178,7 +178,7 @@ def test_that_list_repo_files_raises_value_error_if_dot_git_is_a_file_somehow( def test_that_list_repo_files_filters_out_invisible_files_and_folders( - repo_files_repository: RepoFilesRepository, good_folder_path: MagicMock + repo_files_repository: GitRepo, good_folder_path: MagicMock ): files = repo_files_repository.list_repo_files(good_folder_path) @@ -187,7 +187,7 @@ def test_that_list_repo_files_filters_out_invisible_files_and_folders( def test_that_load_file_loads_data( - repo_files_repository: RepoFilesRepository, good_file_path: MagicMock + repo_files_repository: GitRepo, good_file_path: MagicMock ): data = repo_files_repository.load_file(good_file_path) @@ -195,35 +195,35 @@ def test_that_load_file_loads_data( def test_that_load_file_raises_value_error_for_nonexistent_file( - repo_files_repository: RepoFilesRepository, nonexistent_file_path: MagicMock + repo_files_repository: GitRepo, nonexistent_file_path: MagicMock ): with pytest.raises(ValueError): repo_files_repository.load_file(nonexistent_file_path) def test_that_load_file_raises_value_error_for_file_that_is_too_big( - repo_files_repository: RepoFilesRepository, too_big_file_path: MagicMock + repo_files_repository: GitRepo, too_big_file_path: MagicMock ): with pytest.raises(ValueError): repo_files_repository.load_file(too_big_file_path) def test_that_load_file_raises_value_error_for_file_if_io_error_occurs( - repo_files_repository: RepoFilesRepository, io_error_occurs_file_path: MagicMock + repo_files_repository: GitRepo, io_error_occurs_file_path: MagicMock ): with pytest.raises(ValueError): repo_files_repository.load_file(io_error_occurs_file_path) def test_that_load_file_raises_value_error_for_file_if_value_error_occurs( - repo_files_repository: RepoFilesRepository, value_error_occurs_file_path: MagicMock + repo_files_repository: GitRepo, value_error_occurs_file_path: MagicMock ): with pytest.raises(ValueError): repo_files_repository.load_file(value_error_occurs_file_path) def test_that_load_file_raises_value_error_for_binary_file_with_size( - repo_files_repository: RepoFilesRepository, + repo_files_repository: GitRepo, binary_file_with_size_file_path: MagicMock, ): with pytest.raises(