From 4e339a3d6b07ba13ba4293fa88f73fe785b7e564 Mon Sep 17 00:00:00 2001 From: Dhanushkarthik29 Date: Thu, 22 Aug 2024 08:31:51 +0530 Subject: [PATCH 1/2] fixLineNumberissue for public key detections --- xgitguard/github-public/public_key_detections.py | 4 +++- xgitguard/utilities/file_utilities.py | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/xgitguard/github-public/public_key_detections.py b/xgitguard/github-public/public_key_detections.py index 0829b6e..3a763b6 100644 --- a/xgitguard/github-public/public_key_detections.py +++ b/xgitguard/github-public/public_key_detections.py @@ -79,7 +79,7 @@ from ml_training.model import xgg_train_model from utilities.common_utilities import mask_data from utilities.file_utilities import write_to_csv_file -from utilities.common_utilities import check_github_token_env +from utilities.common_utilities import check_github_token_env,findLineNumber file_prefix = "xgg_" @@ -208,6 +208,8 @@ def format_detection(pkeyword, skeyword, url, code_content, secrets, keyword_cou else: # Mask the current secret masked_secret = mask_data(code_line, secret) + linenumber=findLineNumber(code_content.split("\n"),code_line) + valid_secret_row.append(linenumber) valid_secret_row.append(masked_secret) valid_secret_row.append(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) valid_secret_row.append(confidence_score[0]) diff --git a/xgitguard/utilities/file_utilities.py b/xgitguard/utilities/file_utilities.py index c99e0c2..99ed6d0 100644 --- a/xgitguard/utilities/file_utilities.py +++ b/xgitguard/utilities/file_utilities.py @@ -25,6 +25,12 @@ logger = logging.getLogger("xgg_logger") +def findLineNumber(code_content,code_line): + for index,line in enumerate(code_content): + if(code_line in line or code_line == line): + return index+1 + return -1 + def read_text_file(file_path): """ From b362e10d6aecbc424cfbb94a1caf721fc139aa8e Mon Sep 17 00:00:00 2001 From: Dhanushkarthik29 Date: Thu, 5 Sep 2024 08:29:26 +0530 Subject: [PATCH 2/2] fix the line number issue --- xgitguard/github-enterprise/enterprise_cred_detections.py | 4 +++- xgitguard/github-enterprise/enterprise_key_detections.py | 4 +++- xgitguard/github-public/public_cred_detections.py | 5 ++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/xgitguard/github-enterprise/enterprise_cred_detections.py b/xgitguard/github-enterprise/enterprise_cred_detections.py index e45b2a8..c4795a4 100644 --- a/xgitguard/github-enterprise/enterprise_cred_detections.py +++ b/xgitguard/github-enterprise/enterprise_cred_detections.py @@ -78,7 +78,7 @@ from ml_training.model import xgg_train_model from utilities.common_utilities import mask_data from utilities.file_utilities import write_to_csv_file -from utilities.common_utilities import check_github_token_env +from utilities.common_utilities import check_github_token_env,findLineNumber file_prefix = "xgg_" @@ -224,6 +224,8 @@ def format_detection(skeyword, org_url, url, code_content, secrets, skeyword_cou else: # Mask the current secret masked_secret = mask_data(code_line, secret) + linenumber=findLineNumber(code_content.split("\n"),code_line) + valid_secret_row.append(linenumber) valid_secret_row.append(masked_secret) valid_secret_row.append(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) diff --git a/xgitguard/github-enterprise/enterprise_key_detections.py b/xgitguard/github-enterprise/enterprise_key_detections.py index 95b27e4..3ab9e3b 100644 --- a/xgitguard/github-enterprise/enterprise_key_detections.py +++ b/xgitguard/github-enterprise/enterprise_key_detections.py @@ -79,7 +79,7 @@ from ml_training.model import xgg_train_model from utilities.common_utilities import mask_data from utilities.file_utilities import write_to_csv_file -from utilities.common_utilities import check_github_token_env +from utilities.common_utilities import check_github_token_env,findLineNumber file_prefix = "xgg_" @@ -209,6 +209,8 @@ def format_detection(skeyword, org_url, url, code_content, secrets, skeyword_cou else: # Mask the current secret masked_secret = mask_data(code_line, secret) + linenumber=findLineNumber(code_content.split("\n"),code_line) + valid_secret_row.append(linenumber) valid_secret_row.append(masked_secret) valid_secret_row.append(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) diff --git a/xgitguard/github-public/public_cred_detections.py b/xgitguard/github-public/public_cred_detections.py index a78e444..19a036e 100644 --- a/xgitguard/github-public/public_cred_detections.py +++ b/xgitguard/github-public/public_cred_detections.py @@ -64,6 +64,7 @@ import pandas as pd from urlextract import URLExtract + MODULE_DIR = os.path.dirname(os.path.realpath(__file__)) parent_dir = os.path.dirname(MODULE_DIR) sys.path.insert(0, parent_dir) @@ -81,7 +82,7 @@ from ml_training.model import xgg_train_model from utilities.common_utilities import mask_data from utilities.file_utilities import write_to_csv_file -from utilities.common_utilities import check_github_token_env +from utilities.common_utilities import check_github_token_env,findLineNumber file_prefix = "xgg_" @@ -226,6 +227,8 @@ def format_detection(pkeyword, skeyword, url, code_content, secrets, keyword_cou else: # Mask the current secret masked_secret = mask_data(code_line, secret) + linenumber=findLineNumber(code_content.split("\n"),code_line) + valid_secret_row.append(linenumber) valid_secret_row.append(masked_secret) valid_secret_row.append(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) valid_secret_row.append(confidence_score[0])