Skip to content

Commit

Permalink
Feature/secureli 436 language detection (#481)
Browse files Browse the repository at this point in the history
secureli-436

<!-- Include general description here -->
closes #436 

Adds focused language detection during scan for specified or committed
files


## Changes
<!-- A detailed list of changes -->
* Limiting language detection to a specified set of files during scan
(unless all files is provided)
* Adding the usage of git.Repo to check the commit file diff

## Testing
<!--
Mention updated tests and any manual testing performed.
Are aspects not yet tested or not easily testable?
Feel free to include screenshots if appropriate.
 -->
* Verify that language detection can be run on either specified or
committed files during scan

## Clean Code Checklist
<!-- This is here to support you. Some/most checkboxes may not apply to
your change -->
- [x] Meets acceptance criteria for issue
- [x] New logic is covered with automated tests
- [x] Appropriate exception handling added
- [x] Thoughtful logging included
- [x] Documentation is updated
- [x] Follow-up work is documented in TODOs
- [x] TODOs have a ticket associated with them
- [x] No commented-out code included


<!--
Github-flavored markdown reference:
https://docs.github.com/en/get-started/writing-on-github
-->

---------

Co-authored-by: Jordan Heffernan <[email protected]>
  • Loading branch information
kevin-orlando and JordoHeffernan authored Mar 22, 2024
1 parent 029a597 commit fbd983d
Show file tree
Hide file tree
Showing 11 changed files with 251 additions and 40 deletions.
2 changes: 1 addition & 1 deletion .secureli/.pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ repos:
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 24.2.0
rev: 24.3.0
hooks:
- id: black
- repo: https://github.com/yelp/detect-secrets
Expand Down
24 changes: 14 additions & 10 deletions secureli/actions/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,24 @@ class Action(ABC):
def __init__(self, action_deps: ActionDependencies):
self.action_deps = action_deps

def get_secureli_config(self, reset: bool) -> secureli_config.SecureliConfig:
return (
secureli_config.SecureliConfig()
if reset
else self.action_deps.secureli_config.load()
)

def verify_install(
self, folder_path: Path, reset: bool, always_yes: bool
self, folder_path: Path, reset: bool, always_yes: bool, files: list[Path]
) -> VerifyResult:
"""
Installs, upgrades or verifies the current seCureLI installation
:param folder_path: The folder path to initialize the repo for
:param reset: If true, disregard existing configuration and start fresh
:param always_yes: Assume "Yes" to all prompts
:param files: A List of files to scope the install to. This allows language
detection to run on only a selected list of files when scanning the repo.
"""

if (
self.action_deps.secureli_config.verify()
== secureli_config.VerifyConfigOutcome.OUT_OF_DATE
Expand Down Expand Up @@ -86,15 +94,11 @@ def verify_install(
)
return update_result

config = (
secureli_config.SecureliConfig()
if reset
else self.action_deps.secureli_config.load()
)
config = self.get_secureli_config(reset=reset)
languages = []

try:
languages = self._detect_languages(folder_path)
languages = self._detect_languages(folder_path, files)
except (ValueError, language.LanguageNotSupportedError) as e:
if config.languages and config.version_installed:
self.action_deps.echo.warning(
Expand Down Expand Up @@ -298,14 +302,14 @@ def _run_post_install_scan(
f"{format_sentence_list(config.languages)} does not support secrets detection, skipping"
)

def _detect_languages(self, folder_path: Path) -> list[str]:
def _detect_languages(self, folder_path: Path, files: list[Path]) -> list[str]:
"""
Detects programming languages present in the repository
:param folder_path: The folder path to initialize the repo for
:return: A list of all languages found in the repository
"""

analyze_result = self.action_deps.language_analyzer.analyze(folder_path)
analyze_result = self.action_deps.language_analyzer.analyze(folder_path, files)

if analyze_result.skipped_files:
self.action_deps.echo.warning(
Expand Down
2 changes: 1 addition & 1 deletion secureli/actions/initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def initialize_repo(
:param reset: If true, disregard existing configuration and start fresh
:param always_yes: Assume "Yes" to all prompts
"""
verify_result = self.verify_install(folder_path, reset, always_yes)
verify_result = self.verify_install(folder_path, reset, always_yes, files=None)
if verify_result.outcome in ScanAction.halting_outcomes:
self.logging.failure(LogAction.init, verify_result.outcome)
else:
Expand Down
33 changes: 32 additions & 1 deletion secureli/actions/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
from pathlib import Path
from time import time
from typing import Optional
from git import Repo

from secureli.modules.shared.abstractions.echo import EchoAbstraction
from secureli.actions import action
from secureli.modules.shared.abstractions.repo import GitRepo
from secureli.modules.shared.models.exit_codes import ExitCode
from secureli.modules.shared.models.install import VerifyOutcome, VerifyResult
from secureli.modules.shared.models.logging import LogAction
Expand Down Expand Up @@ -35,11 +37,13 @@ def __init__(
echo: EchoAbstraction,
logging: LoggingService,
scanner: ScannerService,
git_repo: GitRepo,
):
super().__init__(action_deps)
self.scanner = scanner
self.echo = echo
self.logging = logging
self.git_repo = git_repo

def _check_secureli_hook_updates(self, folder_path: Path) -> VerifyResult:
"""
Expand Down Expand Up @@ -93,6 +97,24 @@ def publish_results(
else:
self.logging.failure(LogAction.publish, result.result_message)

def get_commited_files(self, scan_mode: ScanMode) -> list[Path]:
"""
Attempts to build a list of commited files for use in language detection if
the user is scanning staged files for an existing installation
:param scan_mode: Determines which files are scanned in the repo (i.e. staged only or all)
:returns: a list of Path objects for the commited files
"""
config = self.get_secureli_config(reset=False)
installed = bool(config.languages and config.version_installed)

if not installed or scan_mode != ScanMode.STAGED_ONLY:
return None
try:
committed_files = self.git_repo.get_commit_diff()
return [Path(file) for file in committed_files]
except:
return None

def scan_repo(
self,
folder_path: Path,
Expand All @@ -112,7 +134,16 @@ def scan_repo(
:param specific_test: If set, limits scanning to the single pre-commit hook.
Otherwise, scans with all hooks.
"""
verify_result = self.verify_install(folder_path, False, always_yes)

scan_files = [Path(file) for file in files or []] or self.get_commited_files(
scan_mode
)
verify_result = self.verify_install(
folder_path,
False,
always_yes,
scan_files,
)

# Check if pre-commit hooks are up-to-date
secureli_config = self.action_deps.secureli_config.load()
Expand Down
6 changes: 5 additions & 1 deletion secureli/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from secureli.actions.scan import ScanAction
from secureli.actions.build import BuildAction
from secureli.actions.update import UpdateAction
from secureli.modules.shared.abstractions.repo import GitRepo
from secureli.repositories.repo_files import RepoFilesRepository
from secureli.repositories.secureli_config import SecureliConfigRepository
from secureli.repositories.repo_settings import SecureliRepository
Expand Down Expand Up @@ -82,6 +83,9 @@ class Container(containers.DeclarativeContainer):
echo=echo,
)

"""Wraps the execution and management of git commands"""
git_repo = providers.Factory(GitRepo)

# Services

"""Analyzes a set of files to try to determine the most common languages"""
Expand Down Expand Up @@ -172,7 +176,7 @@ class Container(containers.DeclarativeContainer):
echo=echo,
logging=logging_service,
scanner=scanner_service,
# settings_repository=settings_repository,
git_repo=git_repo,
)

"""Update Action, representing what happens when the update command is invoked"""
Expand Down
4 changes: 2 additions & 2 deletions secureli/modules/language_analyzer/language_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def __init__(
self.repo_files = repo_files
self.lexer_guesser = lexer_guesser

def analyze(self, folder_path: Path) -> AnalyzeResult:
def analyze(self, folder_path: Path, files: list[Path]) -> AnalyzeResult:
"""
Analyzes the folder structure and lists languages found
:param folder_path: The path to the repository to analyze
Expand All @@ -29,7 +29,7 @@ def analyze(self, folder_path: Path) -> AnalyzeResult:
40% of the repo is JavaScript, the result will be a dictionary containing keys
"Python" and "JavaScript" with values 0.6 and 0.4 respectively
"""
file_paths = self.repo_files.list_repo_files(folder_path)
file_paths = files if files else self.repo_files.list_repo_files(folder_path)
results = defaultdict(int)

skipped_files = []
Expand Down
21 changes: 21 additions & 0 deletions secureli/modules/shared/abstractions/repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from abc import ABC, abstractmethod
import git


class RepoAbstraction(ABC):
"""
Abstracts the configuring and execution of git repo features.
"""

@abstractmethod
def get_commit_diff(self) -> list[str]:
pass


class GitRepo(RepoAbstraction):
"""
Implementation and wrapper around git repo features
"""

def get_commit_diff(self) -> list[str]:
return git.Repo().head.commit.diff()
Loading

0 comments on commit fbd983d

Please sign in to comment.