colorization

Samsung · Dec 11, 2024 · f88b001 · f88b001
1 parent 84a5ed8
commit f88b001
Show file tree

Hide file tree

Showing 9 changed files with 61 additions and 3 deletions.
diff --git a/credsweeper/__main__.py b/credsweeper/__main__.py
@@ -224,6 +224,7 @@ def get_arguments() -> Namespace:
                         const="output.xlsx",
                         dest="xlsx_filename",
                         metavar="PATH")
+    parser.add_argument("--color", "-C", help="print results with colorization", action="store_const", const=True)
     parser.add_argument("--hashed",
                         help="line, variable, value will be hashed in output",
                         action="store_const",
@@ -299,6 +300,7 @@ def scan(args: Namespace, content_provider: AbstractProvider, json_filename: Opt
                                   api_validation=args.api_validation,
                                   json_filename=json_filename,
                                   xlsx_filename=xlsx_filename,
+                                  color=args.color,
                                   hashed=args.hashed,
                                   subtext=args.subtext,
                                   sort_output=args.sort_output,

diff --git a/credsweeper/app.py b/credsweeper/app.py
@@ -5,6 +5,8 @@
 from typing import Any, List, Optional, Union, Dict, Sequence, Tuple
 
 import pandas as pd
+from colorama import Fore
+from colorama.ansi import AnsiStyle, Style
 
 # Directory of credsweeper sources MUST be placed before imports to avoid circular import error
 APP_PATH = Path(__file__).resolve().parent
@@ -42,6 +44,7 @@ def __init__(self,
                  api_validation: bool = False,
                  json_filename: Union[None, str, Path] = None,
                  xlsx_filename: Union[None, str, Path] = None,
+                 color: bool = False,
                  hashed: bool = False,
                  subtext: bool = False,
                  sort_output: bool = False,
@@ -73,6 +76,7 @@ def __init__(self,
                 to json
             xlsx_filename: optional string variable, path to save result
                 to xlsx
+            color: print results to stdout with colorization
             hashed: use hash of line, value and variable instead plain text
             subtext: use subtext of line near variable-value like it performed in ML
             use_filters: boolean variable, specifying the need of rule filters
@@ -112,6 +116,7 @@ def __init__(self,
         self.credential_manager = CredentialManager()
         self.json_filename: Union[None, str, Path] = json_filename
         self.xlsx_filename: Union[None, str, Path] = xlsx_filename
+        self.color = color
         self.hashed = hashed
         self.subtext = subtext
         self.sort_output = sort_output
@@ -427,6 +432,20 @@ def export_results(self) -> None:
             df = pd.DataFrame(data=data_list)
             df.to_excel(self.xlsx_filename, index=False)
 
+        if self.color:
+            is_exported = True
+            for credential in credentials:
+                for line_data in credential.line_data_list:
+                    print(Style.BRIGHT + credential.rule_name \
+                          + f"{line_data.info or line_data.path}:{line_data.line_num}"
+                          + Style.RESET_ALL)
+                    if self.hashed:
+                        print(Fore.LIGHTGREEN_EX \
+                              + line_data.get_hash_or_subtext(line_data.line, self.hashed) \
+                              + Style.RESET_ALL)
+                    else:
+                        print(f"{line_data.get_colored_line(self.subtext)}")
+
         if is_exported is False:
             for credential in credentials:
                 print(credential.to_str(hashed=self.hashed, subtext=self.subtext))
diff --git a/credsweeper/credentials/line_data.py b/credsweeper/credentials/line_data.py
@@ -5,6 +5,8 @@
 from functools import cached_property
 from typing import Any, Dict, Optional, Tuple
 
+from colorama import Fore, Style, Back
+
 from credsweeper.common.constants import MAX_LINE_LENGTH, UTF_8, StartEnd, ML_HUNK
 from credsweeper.config import Config
 from credsweeper.utils import Util
@@ -414,3 +416,35 @@ def to_json(self, hashed: bool, subtext: bool) -> Dict:
         }
         reported_output = {k: v for k, v in full_output.items() if k in self.config.line_data_output}
         return reported_output
+
+    def get_colored_line(self, subtext: bool = False) -> str:
+        # at least, value must present
+        line = self.line[:self.value_start] \
+               + Fore.LIGHTYELLOW_EX \
+               + self.line[self.value_start:self.value_end] \
+               + Style.RESET_ALL \
+               + self.line[self.value_end:]
+        # separator may be missing
+        if 0 <= self.separator_start < self.separator_end <= self.value_start:
+            line = line[:self.separator_start] \
+                   + Fore.LIGHTGREEN_EX \
+                   + line[self.separator_start:self.separator_end] \
+                   + Style.RESET_ALL \
+                   + line[self.separator_end:]
+        # variable may be missing
+        if 0 <= self.separator_start \
+                and 0 <= self.variable_start < self.variable_end <= self.separator_end <= self.value_start \
+                or 0 <= self.variable_start < self.variable_end <= self.value_start:
+            line = line[:self.variable_start] \
+                   + Fore.LIGHTBLUE_EX \
+                   + line[self.variable_start:self.variable_end] \
+                   + Style.RESET_ALL \
+                   + line[self.variable_end:]
+        if subtext:
+            # display part of the text, centered around the start of the value
+            line = Util.subtext(line, self.value_start + len(line) - len(self.line), ML_HUNK)
+            # put style reset at the end as a fallback
+            return f"{line}{Style.RESET_ALL}"
+        else:
+            # show whole line
+            return line
diff --git a/docs/source/guide.rst b/docs/source/guide.rst
@@ -18,7 +18,7 @@ Get all argument list:
                              [--find-by-ext] [--depth POSITIVE_INT] [--no-filters] [--doc] [--ml_threshold FLOAT_OR_STR]
                              [--ml_batch_size POSITIVE_INT] [--ml_config PATH] [--ml_model PATH] [--ml_providers STR]
                              [--api_validation] [--jobs POSITIVE_INT] [--skip_ignored] [--save-json [PATH]]
-                             [--save-xlsx [PATH]] [--hashed] [--subtext] [--sort] [--log LOG_LEVEL]
+                             [--save-xlsx [PATH]] [--color] [--hashed] [--subtext] [--sort] [--log LOG_LEVEL]
                              [--size_limit SIZE_LIMIT]
                              [--banner] [--version]
     options:
@@ -54,6 +54,7 @@ Get all argument list:
       --skip_ignored        parse .gitignore files and skip credentials from ignored objects
       --save-json [PATH]    save result to json file (default: output.json)
       --save-xlsx [PATH]    save result to xlsx file (default: output.xlsx)
+      --color, -C           print results with colorization
       --hashed              line, variable, value will be hashed in output
       --subtext             line text will be stripped in 160 symbols but value and variable are kept
       --sort                enable output sorting

diff --git a/tests/samples/sample.ods b/tests/samples/sample.ods
diff --git a/tests/samples/sample.pptx b/tests/samples/sample.pptx
diff --git a/tests/samples/sample.xlsx b/tests/samples/sample.xlsx
diff --git a/tests/test_app.py b/tests/test_app.py
@@ -243,6 +243,7 @@ def test_it_works_n(self) -> None:
                    " [--skip_ignored]" \
                    " [--save-json [PATH]]" \
                    " [--save-xlsx [PATH]]" \
+                   " [--color]" \
                    " [--hashed]" \
                    " [--subtext]" \
                    " [--sort]" \

diff --git a/tests/test_main.py b/tests/test_main.py
@@ -163,6 +163,7 @@ def test_main_path_p(self, mock_get_arguments) -> None:
                              diff_path=[str(target_path)],
                              json_filename=os.path.join(tmp_dir, f"{__name__}.json"),
                              xlsx_filename=None,
+                             color=False,
                              subtext=False,
                              hashed=False,
                              rule_path=None,
@@ -433,7 +434,7 @@ def test_tar_n(self) -> None:
 
     def test_aws_multi_p(self) -> None:
         content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH / "aws_multi.md"])
-        cred_sweeper = CredSweeper(ml_threshold=0)
+        cred_sweeper = CredSweeper(ml_threshold=0, color=True, hashed=True)
         cred_sweeper.run(content_provider=content_provider)
         for i in cred_sweeper.credential_manager.get_credentials():
             if "AWS Multi" == i.rule_name:
@@ -592,7 +593,7 @@ def test_yaml_n(self) -> None:
     def test_encoded_p(self) -> None:
         # test for finding credentials in ENCODED data
         content_provider: AbstractProvider = FilesProvider([SAMPLES_PATH / "encoded_data"])
-        cred_sweeper = CredSweeper(depth=5, ml_threshold=0)
+        cred_sweeper = CredSweeper(depth=5, ml_threshold=0, color=True)
         cred_sweeper.run(content_provider=content_provider)
         found_credentials = cred_sweeper.credential_manager.get_credentials()
         self.assertEqual(2, len(found_credentials))