From 2e8203e982a986f953e48206784c6bf89e206253 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 01:30:44 +0530 Subject: [PATCH 01/25] Update scanner.py --- scanner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scanner.py b/scanner.py index 92b31d3e..23a1a86e 100755 --- a/scanner.py +++ b/scanner.py @@ -14,5 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Import the scanner module from the gcp_scanner package from src.gcp_scanner import scanner -scanner.main() \ No newline at end of file + +# Call the main function of the scanner module to start the scanning process +scanner.main() From 683d054c84f0763c3809db9c433f6de91fa05e66 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 01:31:16 +0530 Subject: [PATCH 02/25] Update requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 452b3f73..fc6987db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +# List of required Python packages and their corresponding versions pyu2f==0.1.5 google-api-python-client==2.80.0 google-cloud-container==2.17.4 From 501f4846526b0c37cfa48e948e1e26f8fad0f110 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 01:32:07 +0530 Subject: [PATCH 03/25] Update __init__.py --- src/gcp_scanner/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gcp_scanner/__init__.py b/src/gcp_scanner/__init__.py index e69de29b..80238dad 100644 --- a/src/gcp_scanner/__init__.py +++ b/src/gcp_scanner/__init__.py @@ -0,0 +1 @@ +#Currently, this ia an empty file. From 8c12675b0858751baed4417bae0cd441c622be40 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 01:32:39 +0530 Subject: [PATCH 04/25] Update __main__.py --- src/gcp_scanner/__main__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gcp_scanner/__main__.py b/src/gcp_scanner/__main__.py index 3e75a4a5..330fb74e 100644 --- a/src/gcp_scanner/__main__.py +++ b/src/gcp_scanner/__main__.py @@ -16,7 +16,10 @@ """ +# Importing the scanner module from . import scanner +# Checking if the code is running as the main module if __name__ == '__main__': - scanner.main() + # Calling the main function of the scanner module + scanner.main() From 713da7adf30407a2ea4b841996ca8fb9069247d3 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 01:45:25 +0530 Subject: [PATCH 05/25] Update arguments.py --- src/gcp_scanner/arguments.py | 87 +++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 36 deletions(-) diff --git a/src/gcp_scanner/arguments.py b/src/gcp_scanner/arguments.py index a8028c66..ed37efc1 100644 --- a/src/gcp_scanner/arguments.py +++ b/src/gcp_scanner/arguments.py @@ -20,6 +20,7 @@ import argparse import logging +# Define a function to create an argument parser using the argparse module def arg_parser(): """Creates an argument parser using the `argparse` module and defines several command-line arguments. @@ -31,55 +32,63 @@ def arg_parser(): argparse.Namespace: A namespace object containing the parsed command-line arguments. """ + # Create a new parser object parser = argparse.ArgumentParser( - prog='scanner.py', - description='GCP Scanner', - usage='python3 %(prog)s -o folder_to_save_results -g -') + prog='scanner.py', # program name + description='GCP Scanner', # description + usage='python3 %(prog)s -o folder_to_save_results -g -' # usage instructions + ) + + # Define a required argument group required_named = parser.add_argument_group('Required parameters') + # Add a required argument to the group required_named.add_argument( - '-o', - '--output-dir', - required=True, - dest='output', - default='scan_db', - help='Path to output directory') + '-o', # short option name + '--output-dir', # long option name + required=True, + dest='output', + default='scan_db', + help='Path to output directory' + ) + # Add command line arguments to the parser object parser.add_argument( '-k', - '--sa-key-path', - default=None, - dest='key_path', - help='Path to directory with SA keys in json format') + '--sa-key-path', # Option for specifying the path to the directory with SA keys + default=None, # Default value if option is not specified + dest='key_path', # Destination variable for storing the value of the option + help='Path to directory with SA keys in json format' # Help message + ) parser.add_argument( '-g', - '--gcloud-profile-path', - default=None, - dest='gcloud_profile_path', - help='Path to directory with gcloud profile. Specify -\ - to search for credentials in default gcloud config path' + '--gcloud-profile-path', + default=None, + dest='gcloud_profile_path', + help='Path to directory with gcloud profile. Specify - to search for credentials in default gcloud config path' ) parser.add_argument( '-m', - '--use-metadata', - default=False, - dest='use_metadata', - action='store_true', - help='Extract credentials from GCE instance metadata') + '--use-metadata', + default=False, + dest='use_metadata', + action='store_true', + help='Extract credentials from GCE instance metadata' + ) parser.add_argument( '-at', - '--access-token-files', - default=None, - dest='access_token_files', - help='A list of comma separated files with access token and OAuth scopes.\ -TTL limited. A token and scopes should be stored in JSON format.') + '--access-token-files', + default=None, + dest='access_token_files', + help='A list of comma separated files with access token and OAuth scopes. TTL limited. A token and scopes should be stored in JSON format.' + ) parser.add_argument( '-rt', - '--refresh-token-files', - default=None, - dest='refresh_token_files', - help='A list of comma separated files with refresh_token, client_id,\ -token_uri and client_secret stored in JSON format.' + '--refresh-token-files', + default=None, + dest='refresh_token_files', + help='A list of comma separated files with refresh_token, client_id, token_uri and client_secret stored in JSON format.' ) + parser.add_argument( '-s', '--service-account', @@ -119,14 +128,20 @@ def arg_parser(): help='Save logs to the path specified rather than displaying in\ console') - args: argparse.Namespace = parser.parse_args() +# Parse the command line arguments +args: argparse.Namespace = parser.parse_args() - if not args.key_path and not args.gcloud_profile_path \ +# Check if none of the necessary options are selected +if not args.key_path and not args.gcloud_profile_path \ and not args.use_metadata and not args.access_token_files\ and not args.refresh_token_files: + + # If none of the options are selected, log an error message logging.error( 'Please select at least one option to begin scan\ -k/--sa-key-path,-g/--gcloud-profile-path, -m, -rt, -at' ) - return args +# Return the parsed command line arguments +return args + From 9ba1b1a7a2dcf940720f785c2e20cc512810bcac Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 01:46:14 +0530 Subject: [PATCH 06/25] Update credsdb.py --- src/gcp_scanner/credsdb.py | 708 ++++++++++++++++++++----------------- 1 file changed, 393 insertions(+), 315 deletions(-) diff --git a/src/gcp_scanner/credsdb.py b/src/gcp_scanner/credsdb.py index a0c7365f..1a7334ce 100644 --- a/src/gcp_scanner/credsdb.py +++ b/src/gcp_scanner/credsdb.py @@ -32,6 +32,7 @@ from httplib2 import Credentials import requests +# Set search places for finding credentials file credentials_db_search_places = ["/home/", "/root/"] @@ -39,6 +40,9 @@ def credentials_from_token(access_token: str, refresh_token: Optional[str], token_uri: Optional[str], client_id: Optional[str], client_secret: Optional[str], scopes_user: Optional[str]) -> Credentials: + """ + Create Credentials instance from tokens + """ return credentials.Credentials( access_token, refresh_token=refresh_token, @@ -49,377 +53,451 @@ def credentials_from_token(access_token: str, refresh_token: Optional[str], def get_creds_from_file(file_path: str) -> Tuple[str, Credentials]: - """Creates a Credentials instance from a service account json file. - - Args: - file_path: The path to the service account json file. - - Returns: - str: An email address associated with a service account. - google.auth.service_account.Credentials: The constructed credentials. """ - + Retrieve Credentials instance from a service account json file. + """ logging.info("Retrieving credentials from %s", file_path) creds = service_account.Credentials.from_service_account_file(file_path) return creds.service_account_email, creds def get_creds_from_json(parsed_keyfile: Mapping[str, str]) -> Credentials: - """Creates a Credentials instance from parsed service account info.. - - Args: - parsed_keyfile: The service account info in Google format. - - Returns: - google.auth.service_account.Credentials: The constructed credentials. """ - + Retrieve Credentials instance from parsed service account info. + """ return service_account.Credentials.from_service_account_info(parsed_keyfile) def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]: - """Retrieves a Credentials instance from compute instance metadata. + """Retrieves a Credentials instance from compute instance metadata. - Returns: - str: An email associated with credentials. - google.auth.service_account.Credentials: The constructed credentials. - """ + Returns: + Tuple[Optional[str], Optional[Credentials]]: + A tuple containing the email associated with the credentials and the constructed credentials. + """ - print("Retrieving access token from instance metadata") - - token_url = "http://metadata.google.internal/computeMetadata/v1/instance/\ -service-accounts/default/token" - scope_url = "http://metadata.google.internal/computeMetadata/v1/instance/\ -service-accounts/default/scopes" - email_url = "http://metadata.google.internal/computeMetadata/v1/instance/\ -service-accounts/default/email" - headers = {"Metadata-Flavor": "Google"} - try: - res = requests.get(token_url, headers=headers) - if not res.ok: - logging.error("Failed to retrieve instance token. Status code %d", - res.status_code) - return None, None - token = res.json()["access_token"] - - res = requests.get(scope_url, headers=headers) - if not res.ok: - logging.error("Failed to retrieve instance scopes. Status code %d", - res.status_code) - return None, None - instance_scopes = res.content.decode("utf-8") - - res = requests.get(email_url, headers=headers) - if not res.ok: - logging.error("Failed to retrieve instance email. Status code %d", - res.status_code) - return None, None - email = res.content.decode("utf-8") - - except Exception: - logging.error("Failed to retrieve instance metadata") - logging.error(sys.exc_info()[1]) - return None, None - - print("Successfully retrieved instance metadata") - logging.info("Access token length: %d", len(token)) - logging.info("Instance email: %s", email) - logging.info("Instance scopes: %s", instance_scopes) - return email, credentials_from_token(token, None, None, None, None, - instance_scopes) - - -def get_creds_from_data(access_token: str, - parsed_keyfile: Dict[str, str]) -> Credentials: - """Creates a Credentials instance from parsed service account info. - - The function currently supports two types of credentials. Service account key - in json format and user account with refresh token. - - Args: - access_token: An Oauth2 access token. It can be None. - parsed_keyfile: The service account info in Google format. - - Returns: - google.auth.service_account.Credentials: The constructed credentials. - """ - creds = None - if "refresh_token" in parsed_keyfile: - logging.info("Identified user credentials in gcloud profile") - # this is user account credentials with refresh token - creds = credentials_from_token(access_token, - parsed_keyfile["refresh_token"], - parsed_keyfile["token_uri"], - parsed_keyfile["client_id"], - parsed_keyfile["client_secret"], - parsed_keyfile["scopes"]) - elif "private_key" in parsed_keyfile: - logging.info("Identified service account key credentials in gcloud profile") - # this is a service account key with private key - creds = get_creds_from_json(parsed_keyfile) - else: - logging.error("unknown type of credentials") - - return creds + # Print a message to indicate that we are retrieving the access token from instance metadata + print("Retrieving access token from instance metadata") + # Define the URLs that we need to access to get the token, scopes, and email + token_url = "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" + scope_url = "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/scopes" + email_url = "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/email" -def find_creds(explicit_path: Optional[str] = None) -> List[str]: - """The function search disk and returns a list of files with GCP credentials. + # Set the headers for the requests + headers = {"Metadata-Flavor": "Google"} - Args: - explicit_path: An explicit path on disk to search. If None, the function - searches in standard locations where gcloud profiles are usually located. + try: + # Make the request to get the access token + res = requests.get(token_url, headers=headers) - Returns: - list: The list of files with GCP credentials. - """ + # Check if the response was successful + if not res.ok: + logging.error("Failed to retrieve instance token. Status code %d", res.status_code) + return None, None + + # Parse the JSON response and get the access token + token = res.json()["access_token"] + + # Make the request to get the instance scopes + res = requests.get(scope_url, headers=headers) + + # Check if the response was successful + if not res.ok: + logging.error("Failed to retrieve instance scopes. Status code %d", res.status_code) + return None, None + + # Get the instance scopes from the response + instance_scopes = res.content.decode("utf-8") + + # Make the request to get the instance email + res = requests.get(email_url, headers=headers) + + # Check if the response was successful + if not res.ok: + logging.error("Failed to retrieve instance email. Status code %d", res.status_code) + return None, None - logging.info("Searching for credentials on disk") - list_of_creds_files = list() - search_paths = list() - if explicit_path is not None and explicit_path != "-": - search_paths.append(explicit_path) - else: - credentials_db_search_places.append(os.getenv("HOME") + "/") - for dir_path in credentials_db_search_places: - if not os.access(dir_path, os.R_OK): - continue - for subdir_name in os.listdir(dir_path): - full_path = dir_path + subdir_name + "/gcloud/" - search_paths.append(full_path) - - for dir_path in search_paths: - print(f"Scanning {dir_path} for credentials.db") - full_path = os.path.join(dir_path, "credentials.db") - if os.path.exists(full_path) and os.access(full_path, os.R_OK): - print(f"Identified accessible gcloud config profile {full_path}") - list_of_creds_files.append(full_path) - print(f"Identified {len(list_of_creds_files)} credential DBs") - return list_of_creds_files + # Get the instance email from the response + email = res.content.decode("utf-8") + + except Exception: + # Log an error message if any exception occurred + logging.error("Failed to retrieve instance metadata") + logging.error(sys.exc_info()[1]) + return None, None + + # Print a message to indicate that we have successfully retrieved the instance metadata + print("Successfully retrieved instance metadata") + + # Log the length of the access token, instance email, and instance scopes + logging.info("Access token length: %d", len(token)) + logging.info("Instance email: %s", email) + logging.info("Instance scopes: %s", instance_scopes) + + # Return the email and credentials constructed from the token and instance scopes + return email, credentials_from_token(token, None, None, None, None, instance_scopes) + + + +def get_creds_from_data(access_token: str, parsed_keyfile: Dict[str, str]) -> Credentials: + """Creates a Credentials instance from parsed service account info. + + The function currently supports two types of credentials. Service account key in json format and user account with refresh token. + + Args: + access_token: An Oauth2 access token. It can be None. + parsed_keyfile: The service account info in Google format. + + Returns: + google.auth.service_account.Credentials: The constructed credentials. + """ + + # Initialize the variable to None + creds = None + + # Check if the parsed_keyfile contains "refresh_token" + if "refresh_token" in parsed_keyfile: + logging.info("Identified user credentials in gcloud profile") + # this is user account credentials with refresh token + creds = credentials_from_token( + access_token, + parsed_keyfile["refresh_token"], + parsed_keyfile["token_uri"], + parsed_keyfile["client_id"], + parsed_keyfile["client_secret"], + parsed_keyfile["scopes"] + ) + # Check if the parsed_keyfile contains "private_key" + elif "private_key" in parsed_keyfile: + logging.info("Identified service account key credentials in gcloud profile") + # this is a service account key with private key + creds = get_creds_from_json(parsed_keyfile) + else: + logging.error("unknown type of credentials") + + # Return the constructed credentials + return creds + + +def find_creds(explicit_path: Optional[str] = None) -> List[str]: + """ + The function searches the disk and returns a list of files with GCP credentials. + + Args: + explicit_path: An explicit path on disk to search. If None, the function + searches in standard locations where gcloud profiles are usually located. + + Returns: + list: The list of files with GCP credentials. + """ + + logging.info("Searching for credentials on disk") + list_of_creds_files = [] + + # Create a list of search paths to scan for credentials.db + search_paths = [] + if explicit_path is not None and explicit_path != "-": + search_paths.append(explicit_path) + else: + credentials_db_search_places.append(os.getenv("HOME") + "/") + for dir_path in credentials_db_search_places: + if not os.access(dir_path, os.R_OK): + continue + for subdir_name in os.listdir(dir_path): + full_path = os.path.join(dir_path, subdir_name, "gcloud") + search_paths.append(full_path) + + # Scan each search path for credentials.db and add them to the list_of_creds_files + for dir_path in search_paths: + print(f"Scanning {dir_path} for credentials.db") + full_path = os.path.join(dir_path, "credentials.db") + if os.path.exists(full_path) and os.access(full_path, os.R_OK): + print(f"Identified accessible gcloud config profile {full_path}") + list_of_creds_files.append(full_path) + + print(f"Identified {len(list_of_creds_files)} credential DBs") + return list_of_creds_files def get_access_tokens_dict(path_to_creds_db: str) -> Dict[str, str]: - """The function search and extract Oauth2 access_tokens from sqlite3 DB. + """ + The function searches and extracts OAuth2 access_tokens from a SQLite3 database. - Args: - path_to_creds_db: A path to sqllite3 DB with gcloud access tokens. + Args: + path_to_creds_db: A path to SQLite3 database with gcloud access tokens. - Returns: - dict: The dictionary of account names and corresponding tokens. - """ + Returns: + dict: The dictionary of account names and corresponding tokens. + """ - access_tokens_dict = dict() - access_tokens_path = path_to_creds_db.replace("credentials.db", - "access_tokens.db") - if os.path.exists(access_tokens_path) and os.access(access_tokens_path, - os.R_OK): - logging.info("Identified access tokens DB in %s", access_tokens_path) - conn = sqlite3.connect(access_tokens_path) - cursor = conn.execute( - "SELECT account_id, access_token, token_expiry FROM access_tokens") - rows = cursor.fetchall() - for row in rows: - associated_account = row[0] - token = row[1] - expiration_date = row[2] - expiration_date = expiration_date.split(".")[0] # omit milliseconds + access_tokens_dict = dict() + + # Replace credentials.db with access_tokens.db to get the path to access tokens database + access_tokens_path = path_to_creds_db.replace("credentials.db", "access_tokens.db") - token_time_obj = datetime.datetime.strptime(expiration_date, - "%Y-%m-%d %H:%M:%S") - if datetime.datetime.now() > token_time_obj: - logging.info("Token for %s expired", associated_account) - continue + # Check if the access tokens database exists and can be read + if os.path.exists(access_tokens_path) and os.access(access_tokens_path, os.R_OK): - access_tokens_dict[associated_account] = token + # If the access tokens database exists and can be read, connect to it + logging.info("Identified access tokens DB in %s", access_tokens_path) + conn = sqlite3.connect(access_tokens_path) + cursor = conn.execute("SELECT account_id, access_token, token_expiry FROM access_tokens") + + # Fetch all rows from the access tokens database + rows = cursor.fetchall() + + # Iterate over each row + for row in rows: + associated_account = row[0] + token = row[1] + expiration_date = row[2] + + # Omit milliseconds from the expiration date + expiration_date = expiration_date.split(".")[0] + + # Convert the expiration date to a datetime object + token_time_obj = datetime.datetime.strptime(expiration_date, "%Y-%m-%d %H:%M:%S") + + # Check if the token has expired + if datetime.datetime.now() > token_time_obj: + logging.info("Token for %s expired", associated_account) + continue + + # Add the associated account and token to the access tokens dictionary + access_tokens_dict[associated_account] = token + + return access_tokens_dict - return access_tokens_dict def extract_creds(path_to_creds_db: str) -> List[Tuple[str, str, str]]: - """The function extract refresh and associated access tokens from sqlite3 DBs. + """ + The function extracts refresh and associated access tokens from sqlite3 DBs. - Args: - path_to_creds_db: A path to sqllite3 DB with gcloud refresh tokens. + Args: + path_to_creds_db (str): A path to sqlite3 DB with gcloud refresh tokens. - Returns: - list of tuples: (account name, refresh token, access token). - """ + Returns: + List of tuples: (account name, refresh token, access token). + """ + # Log that we are opening the database + logging.info("Opening %s DB", path_to_creds_db) - logging.info("Opening %s DB", path_to_creds_db) - SA = collections.namedtuple("SA", "account_name, creds, token") + # Create a named tuple for service accounts + SA = collections.namedtuple("SA", "account_name, creds, token") - res = list() - conn = sqlite3.connect(path_to_creds_db) - cursor = conn.execute("SELECT account_id, value FROM credentials") - rows = cursor.fetchall() - if len(rows) <= 0: - logging.error("Empty database") - return None - # we also want to check for access_tokens to avoid unnecessary refreshing - access_tokens = get_access_tokens_dict(path_to_creds_db) - for row in rows: - access_token = None - if access_tokens.get(row[0], None) is not None: - logging.info("Found valid access token for %s", row[0]) - access_token = access_tokens[row[0]] - res.append(SA(row[0], row[1], access_token)) - print(f"Identified {len(res)} credential entries") - return res - - -def get_account_creds_list( - gcloud_profile_path: Optional[str] = None -) -> List[List[Tuple[str, str, str]]]: - """The function searches and extracts gcloud credentials from disk. - - Args: - gcloud_profile_path: An explicit gcloud profile path on disk to search. If - None, the function searches in standard locations where gcloud profiles - are usually located. - - Returns: - list: A list of tuples (account name, refresh token, access token). - """ + # Initialize an empty list for the results + res = list() - accounts = list() - creds_file_list = find_creds(gcloud_profile_path) - for creds_file in creds_file_list: - res = extract_creds(creds_file) - if res is not None: - accounts.append(res) - return accounts + # Connect to the database + conn = sqlite3.connect(path_to_creds_db) + + # Select account_id and value from the credentials table + cursor = conn.execute("SELECT account_id, value FROM credentials") + rows = cursor.fetchall() + # Check if the database is empty + if len(rows) <= 0: + logging.error("Empty database") + return None -def impersonate_sa(iam_client: IAMCredentialsClient, - target_account: str) -> Credentials: - """The function is used to impersonate SA. + # We also want to check for access_tokens to avoid unnecessary refreshing + access_tokens = get_access_tokens_dict(path_to_creds_db) - Args: - iam_client: google.cloud.iam_credentials_v1.services.iam_credentials. - client.IAMCredentialsClient object. - target_account: Name of a service account to impersonate. + # Loop through the rows + for row in rows: + access_token = None - Returns: - google.auth.service_account.Credentials: The constructed credentials. - """ + # Check if the access token exists and is valid + if access_tokens.get(row[0], None) is not None: + logging.info("Found valid access token for %s", row[0]) + access_token = access_tokens[row[0]] - scopes_sa = ["https://www.googleapis.com/auth/cloud-platform"] - intermediate_access_token = iam_client.generate_access_token( - name=target_account, scope=scopes_sa, retry=None - # lifetime = "43200" - ) + # Append the account name, credentials, and access token to the results list + res.append(SA(row[0], row[1], access_token)) - return credentials_from_token(intermediate_access_token.access_token, None, - None, None, None, scopes_sa) + # Print the number of identified credential entries + print(f"Identified {len(res)} credential entries") + # Return the results list + return res -def creds_from_access_token(access_token_file): - """The function is used to obtain Google Auth Credentials from access token. - - Args: - access_token_file: a path to a file with access token and scopes stored in - JSON format. Example: - { - "access_token": "", - "scopes": [ - "https://www.googleapis.com/auth/devstorage.read_only", - "https://www.googleapis.com/auth/logging.write", - "https://www.googleapis.com/auth/monitoring.write", - "https://www.googleapis.com/auth/servicecontrol", - "https://www.googleapis.com/auth/service.management.readonly", - "https://www.googleapis.com/auth/trace.append" - ] - } - - Returns: - google.auth.service_account.Credentials: The constructed credentials. - """ - with open(access_token_file, encoding="utf-8") as f: - creds_dict = json.load(f) +def get_account_creds_list(gcloud_profile_path: Optional[str] = None) -> List[List[Tuple[str, str, str]]]: + """The function searches and extracts gcloud credentials from disk. - user_scopes = creds_dict.get("scopes", None) - if user_scopes is None: - user_scopes = ["https://www.googleapis.com/auth/cloud-platform"] + Args: + gcloud_profile_path: An explicit gcloud profile path on disk to search. If + None, the function searches in standard locations where gcloud profiles + are usually located. - return credentials_from_token( - creds_dict["access_token"], - None, - None, - None, - None, - user_scopes) + Returns: + list: A list of tuples (account name, refresh token, access token). + """ + accounts = list() # initialize an empty list + creds_file_list = find_creds(gcloud_profile_path) # get a list of credentials files + for creds_file in creds_file_list: + res = extract_creds(creds_file) # extract the credentials from the file + if res is not None: + accounts.append(res) # append the extracted credentials to the accounts list + return accounts # return the accounts list -def creds_from_refresh_token(refresh_token_file): - """The function is used to obtain Google Auth Credentials from refresh token. - - Args: - refresh_token_file: a path to a file with refresh_token, client_id, - client_secret, and token_uri stored in JSON format. - Example: - { - "refresh_token": "", - "client_id": "id", - "client_secret": "secret", - scopes: [ - https://www.googleapis.com/auth/devstorage.read_only, - https://www.googleapis.com/auth/logging.write, - https://www.googleapis.com/auth/monitoring.write, - https://www.googleapis.com/auth/servicecontrol, - https://www.googleapis.com/auth/service.management.readonly, - https://www.googleapis.com/auth/trace.append - ] - } - Returns: - google.auth.service_account.Credentials: The constructed credentials. - """ +def impersonate_sa(iam_client: IAMCredentialsClient, target_account: str) -> Credentials: + """ + The function is used to impersonate a service account. - with open(refresh_token_file, encoding="utf-8") as f: - creds_dict = json.load(f) + Args: + iam_client (IAMCredentialsClient): The IAMCredentialsClient object. + target_account (str): The name of the service account to impersonate. - user_scopes = get_scopes_from_refresh_token(creds_dict) + Returns: + Credentials: The constructed credentials. + """ - return credentials.Credentials( - None, - refresh_token=creds_dict["refresh_token"], - token_uri=creds_dict["token_uri"], - client_id=creds_dict["client_id"], - client_secret=creds_dict["client_secret"], - scopes=user_scopes, - ) + # Define the scopes for the service account + scopes_sa = ["https://www.googleapis.com/auth/cloud-platform"] + # Generate an access token for the service account + intermediate_access_token = iam_client.generate_access_token( + name=target_account, + scope=scopes_sa, + retry=None, + # lifetime="43200" + ) -def get_scopes_from_refresh_token(context) -> Union[List[str], None]: - """The function is used to obtain scopes from refresh token. - - Args: - context: dictionary containing refresh_token data - Example: - { - "refresh_token": "", - "client_id": "id", - "client_secret": "secret", - } - Returns: - a list of scopes or None - """ - # Obtain access token from the refresh token - token_uri = "https://oauth2.googleapis.com/token" - context["grant_type"] = "refresh_token" - - try: - response = requests.post(token_uri, data=context, timeout=5) - # prepare the scope string into a list - raw = response.json().get("scope", None) - return raw.split(" ") if raw else None - except Exception as ex: - logging.error( - "Failed to retrieve access token from refresh token.", + # Use the access token to construct credentials + return credentials_from_token( + intermediate_access_token.access_token, + None, + None, + None, + None, + scopes_sa + ) + + + +def creds_from_access_token(access_token_file): + """The function is used to obtain Google Auth Credentials from access token. + + Args: + access_token_file: a path to a file with access token and scopes stored in + JSON format. Example: + { + "access_token": "", + "scopes": [ + "https://www.googleapis.com/auth/devstorage.read_only", + "https://www.googleapis.com/auth/logging.write", + "https://www.googleapis.com/auth/monitoring.write", + "https://www.googleapis.com/auth/servicecontrol", + "https://www.googleapis.com/auth/service.management.readonly", + "https://www.googleapis.com/auth/trace.append" + ] + } + + Returns: + google.auth.service_account.Credentials: The constructed credentials. + """ + + # Load the access token and scopes from the specified file + with open(access_token_file, encoding="utf-8") as f: + creds_dict = json.load(f) + + # Check if user-defined scopes are provided + user_scopes = creds_dict.get("scopes", None) + if user_scopes is None: + # Use default scopes if not provided + user_scopes = ["https://www.googleapis.com/auth/cloud-platform"] + + # Construct credentials from the access token and scopes + return credentials_from_token( + creds_dict["access_token"], + None, + None, + None, + None, + user_scopes + ) + + +def creds_from_refresh_token(refresh_token_file): + """ + The function is used to obtain Google Auth Credentials from refresh token. + + Args: + - refresh_token_file: a path to a file with refresh_token, client_id, + client_secret, and token_uri stored in JSON format. + Example: + { + "refresh_token": "", + "client_id": "id", + "client_secret": "secret", + scopes: [ + https://www.googleapis.com/auth/devstorage.read_only, + https://www.googleapis.com/auth/logging.write, + https://www.googleapis.com/auth/monitoring.write, + https://www.googleapis.com/auth/servicecontrol, + https://www.googleapis.com/auth/service.management.readonly, + https://www.googleapis.com/auth/trace.append + ] + } + + Returns: + - google.auth.service_account.Credentials: The constructed credentials. + """ + + # Open the refresh_token_file in utf-8 encoding and load the contents to a dictionary + with open(refresh_token_file, encoding="utf-8") as f: + creds_dict = json.load(f) + + # Get the user-defined scopes from the refresh token dictionary + user_scopes = get_scopes_from_refresh_token(creds_dict) + + # Construct and return a google.auth.service_account.Credentials object + return credentials.Credentials( + None, + refresh_token=creds_dict["refresh_token"], + token_uri=creds_dict["token_uri"], + client_id=creds_dict["client_id"], + client_secret=creds_dict["client_secret"], + scopes=user_scopes, ) - logging.debug("Token refresh exception", exc_info=ex) - return None +def get_scopes_from_refresh_token(context) -> Union[List[str], None]: + """ + The function is used to obtain scopes from a refresh token. + + Args: + context: a dictionary containing refresh token data + Example: + { + "refresh_token": "", + "client_id": "id", + "client_secret": "secret", + } + + Returns: + a list of scopes or None + """ + + # Obtain access token from the refresh token + token_uri = "https://oauth2.googleapis.com/token" + context["grant_type"] = "refresh_token" + + try: + response = requests.post(token_uri, data=context, timeout=5) + + # prepare the scope string into a list + raw = response.json().get("scope", None) + return raw.split(" ") if raw else None + + except Exception as ex: + logging.error("Failed to retrieve access token from refresh token.") + logging.debug("Token refresh exception", exc_info=ex) + + return None From 68dfa0d0ffd19144b9fe8d675f8cae436f481750 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 01:46:45 +0530 Subject: [PATCH 07/25] Update models.py --- src/gcp_scanner/models.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/gcp_scanner/models.py b/src/gcp_scanner/models.py index 7a952ee3..60aa9855 100644 --- a/src/gcp_scanner/models.py +++ b/src/gcp_scanner/models.py @@ -22,18 +22,19 @@ from httplib2 import Credentials - class SpiderContext: - """A simple class to initialize the context with a list of root SAs - """ - - def __init__(self, sa_tuples: List[Tuple[str, Credentials, List[str]]]): - """Initialize the context with a list of the root service accounts. - - Args: - sa_tuples: [(sa_name, sa_object, chain_so_far)] - """ - - self.service_account_queue = queue.Queue() - for sa_tuple in sa_tuples: - self.service_account_queue.put(sa_tuple) + """A simple class to initialize the context with a list of root SAs""" + + def __init__(self, sa_tuples: List[Tuple[str, Credentials, List[str]]]): + """ + Initialize the context with a list of the root service accounts. + + Args: + sa_tuples: [(sa_name, sa_object, chain_so_far)] + """ + # Create a new queue to hold the service accounts + self.service_account_queue = queue.Queue() + + # Add each service account from the sa_tuples list to the queue + for sa_tuple in sa_tuples: + self.service_account_queue.put(sa_tuple) From 3dbcd933c74fea24ef7cd8a1296a5a562394c8e4 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 01:47:16 +0530 Subject: [PATCH 08/25] Update scanner.py --- src/gcp_scanner/scanner.py | 518 ++++++++++++++++++++----------------- 1 file changed, 283 insertions(+), 235 deletions(-) diff --git a/src/gcp_scanner/scanner.py b/src/gcp_scanner/scanner.py index f0300774..8a5d864f 100644 --- a/src/gcp_scanner/scanner.py +++ b/src/gcp_scanner/scanner.py @@ -33,358 +33,406 @@ from httplib2 import Credentials from .models import SpiderContext -def is_set(config: Optional[dict], config_setting: str) -> Union[dict,bool]: - if config is None: - return True - obj = config.get(config_setting, {}) - return obj.get('fetch', False) +def is_set(config: Optional[dict], config_setting: str) -> Union[dict, bool]: + # If config is None, return True + if config is None: + return True + + # Get the value of the specified config setting + obj = config.get(config_setting, {}) + + # Return the value of 'fetch' if it exists in the config setting, otherwise return False + return obj.get('fetch', False) def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]], out_dir: str, scan_config: Dict, target_project: Optional[str] = None, force_projects: Optional[str] = None): - """The main loop function to crawl GCP resources. + """ + The main loop function to crawl GCP resources. Args: initial_sa_tuples: [(sa_name, sa_object, chain_so_far)] out_dir: directory to save results + scan_config: configuration object target_project: project name to scan force_projects: a list of projects to force scan """ + # Initialize SpiderContext context = SpiderContext(initial_sa_tuples) - # Main loop + + # Set of already processed service accounts processed_sas = set() + + # Main loop while not context.service_account_queue.empty(): # Get a new candidate service account / token sa_name, credentials, chain_so_far = context.service_account_queue.get() + if sa_name in processed_sas: continue # Don't process this service account again processed_sas.add(sa_name) + logging.info('>> current service account: %s', sa_name) + + # Create dictionary to store results for current service account sa_results = crawl.infinite_defaultdict() + # Log the chain we used to get here (even if we have no privs) sa_results['service_account_chain'] = chain_so_far sa_results['current_service_account'] = sa_name + # Add token scopes in the result sa_results['token_scopes'] = credentials.scopes + # Get list of accessible projects project_list = crawl.get_project_list(credentials) + if len(project_list) <= 0: logging.info('Unable to list projects accessible from service account') + # Add any forced projects to project_list if force_projects: for force_project_id in force_projects: res = crawl.fetch_project_info(force_project_id, credentials) + if res: project_list.append(res) else: # force object creation anyway - project_list.append({'projectId': force_project_id, - 'projectNumber': 'N/A'}) + project_list.append({'projectId': force_project_id, 'projectNumber': 'N/A'}) + # Enumerate projects accessible by SA for project in project_list: - if target_project and target_project not in project['projectId']: + if target_project and target_project not in project['projectId']: continue - project_id = project['projectId'] - project_number = project['projectNumber'] - print(f'Inspecting project {project_id}') - project_result = sa_results['projects'][project_id] + project_id = project['projectId'] + project_number = project['projectNumber'] + print(f'Inspecting project {project_id}') + project_result = sa_results['projects'][project_id] - project_result['project_info'] = project + project_result['project_info'] = project - if is_set(scan_config, 'iam_policy'): + if is_set(scan_config, 'iam_policy'): # Get IAM policy iam_client = iam_client_for_credentials(credentials) iam_policy = crawl.get_iam_policy(project_id, credentials) project_result['iam_policy'] = iam_policy - if is_set(scan_config, 'service_accounts'): + if is_set(scan_config, 'service_accounts'): # Get service accounts project_service_accounts = crawl.get_service_accounts( project_number, credentials) project_result['service_accounts'] = project_service_accounts - # Iterate over discovered service accounts by attempting impersonation - project_result['service_account_edges'] = [] - updated_chain = chain_so_far + [sa_name] + # Iterate over discovered service accounts by attempting impersonation + project_result['service_account_edges'] = [] + updated_chain = chain_so_far + [sa_name] - # Get GCP Compute Resources - compute_client = compute_client_for_credentials(credentials) - if is_set(scan_config, 'compute_instances'): + # Get GCP Compute Resources + compute_client = compute_client_for_credentials(credentials) + if is_set(scan_config, 'compute_instances'): project_result['compute_instances'] = crawl.get_compute_instances_names( - project_id, compute_client) - if is_set(scan_config, 'compute_images'): + project_id, compute_client) + if is_set(scan_config, 'compute_images'): project_result['compute_images'] = crawl.get_compute_images_names( - project_id, - compute_client) - if is_set(scan_config, 'machine_images'): + project_id, + compute_client) + if is_set(scan_config, 'machine_images'): project_result['machine_images'] = crawl.get_machine_images( - project_id, - compute_client, + project_id, + compute_client, ) - if is_set(scan_config, 'compute_disks'): + if is_set(scan_config, 'compute_disks'): project_result['compute_disks'] = crawl.get_compute_disks_names( - project_id, - compute_client) - if is_set(scan_config, 'static_ips'): + project_id, + compute_client) + if is_set(scan_config, 'static_ips'): project_result['static_ips'] = crawl.get_static_ips(project_id, compute_client) - if is_set(scan_config, 'compute_snapshots'): + if is_set(scan_config, 'compute_snapshots'): project_result['compute_snapshots'] = crawl.get_compute_snapshots( - project_id, - compute_client) - if is_set(scan_config, 'subnets'): + project_id, + compute_client) + if is_set(scan_config, 'subnets'): project_result['subnets'] = crawl.get_subnets(project_id, compute_client) - if is_set(scan_config, 'firewall_rules'): + if is_set(scan_config, 'firewall_rules'): project_result['firewall_rules'] = crawl.get_firewall_rules(project_id, - compute_client) + compute_client) - # Get GCP APP Resources - if is_set(scan_config, 'app_services'): + # Get GCP APP Resources + if is_set(scan_config, 'app_services'): project_result['app_services'] = crawl.get_app_services( project_id, credentials) - # Get storage buckets - if is_set(scan_config, 'storage_buckets'): + + # Get storage buckets + if is_set(scan_config, 'storage_buckets'): dump_file_names = None if scan_config is not None: - obj = scan_config.get('storage_buckets', None) - if obj is not None and obj.get('fetch_file_names', False) is True: - dump_file_names = open(out_dir + '/%s.gcs' % project_id, 'w', - encoding='utf-8') - project_result['storage_buckets'] = crawl.get_bucket_names(project_id, - credentials, dump_file_names) + obj = scan_config.get('storage_buckets', None) + # Check if fetch_file_names flag is set to true + if obj is not None and obj.get('fetch_file_names', False) is True: + dump_file_names = open(out_dir + '/%s.gcs' % project_id, 'w', encoding='utf-8') + project_result['storage_buckets'] = crawl.get_bucket_names(project_id, credentials, dump_file_names) + # Close dump file if it's open if dump_file_names is not None: - dump_file_names.close() - - # Get DNS managed zones - if is_set(scan_config, 'managed_zones'): - project_result['managed_zones'] = crawl.get_managed_zones(project_id, - credentials) - # Get DNS policies - if is_set(scan_config, 'dns_policies'): - project_result['dns_policies'] = crawl.list_dns_policies( - project_id, - credentials - ) + dump_file_names.close() + + # Get DNS managed zones + if is_set(scan_config, 'managed_zones'): + project_result['managed_zones'] = crawl.get_managed_zones(project_id, credentials) - # Get GKE resources - if is_set(scan_config, 'gke_clusters'): + # Get DNS policies + if is_set(scan_config, 'dns_policies'): + project_result['dns_policies'] = crawl.list_dns_policies(project_id, credentials) + + # Get GKE resources + if is_set(scan_config, 'gke_clusters'): gke_client = gke_client_for_credentials(credentials) - project_result['gke_clusters'] = crawl.get_gke_clusters(project_id, - gke_client) - if is_set(scan_config, 'gke_images'): - project_result['gke_images'] = crawl.get_gke_images(project_id, - credentials.token) - - # Get SQL instances - if is_set(scan_config, 'sql_instances'): - project_result['sql_instances'] = crawl.get_sql_instances(project_id, - credentials) - - # Get BigQuery databases and table names - if is_set(scan_config, 'bq'): + project_result['gke_clusters'] = crawl.get_gke_clusters(project_id, gke_client) + if is_set(scan_config, 'gke_images'): + project_result['gke_images'] = crawl.get_gke_images(project_id, credentials.token) + + # Get SQL instances + if is_set(scan_config, 'sql_instances'): + project_result['sql_instances'] = crawl.get_sql_instances(project_id, credentials) + + # Get BigQuery databases and table names + if is_set(scan_config, 'bq'): project_result['bq'] = crawl.get_bq(project_id, credentials) - # Get PubSub Subscriptions - if is_set(scan_config, 'pubsub_subs'): - project_result['pubsub_subs'] = crawl.get_pubsub_subscriptions( - project_id, credentials) + # Get PubSub Subscriptions + if is_set(scan_config, 'pubsub_subs'): + project_result['pubsub_subs'] = crawl.get_pubsub_subscriptions(project_id, credentials) - # Get CloudFunctions list - if is_set(scan_config, 'cloud_functions'): - project_result['cloud_functions'] = crawl.get_cloudfunctions( - project_id, credentials) + # Get CloudFunctions list + if is_set(scan_config, 'cloud_functions'): + project_result['cloud_functions'] = crawl.get_cloudfunctions(project_id, credentials) - # Get List of BigTable Instances - if is_set(scan_config, 'bigtable_instances'): - project_result['bigtable_instances'] = crawl.get_bigtable_instances( - project_id, credentials) + # Get List of BigTable Instances + if is_set(scan_config, 'bigtable_instances'): + project_result['bigtable_instances'] = crawl.get_bigtable_instances(project_id, credentials) - # Get Spanner Instances - if is_set(scan_config, 'spanner_instances'): - project_result['spanner_instances'] = crawl.get_spanner_instances( - project_id, credentials) + # Get Spanner Instances + if is_set(scan_config, 'spanner_instances'): + project_result['spanner_instances'] = crawl.get_spanner_instances(project_id, credentials) - # Get CloudStore Instances - if is_set(scan_config, 'cloudstore_instances'): - project_result['cloudstore_instances'] = crawl.get_filestore_instances( - project_id, credentials) + # Get CloudStore Instances + if is_set(scan_config, 'cloudstore_instances'): + project_result['cloudstore_instances'] = crawl.get_filestore_instances(project_id, credentials) - # Get list of KMS keys - if is_set(scan_config, 'kms'): + # Get list of KMS keys + if is_set(scan_config, 'kms'): project_result['kms'] = crawl.get_kms_keys(project_id, credentials) - # Get information about Endpoints - if is_set(scan_config, 'endpoints'): - project_result['endpoints'] = crawl.get_endpoints(project_id, - credentials) - - # Get list of API services enabled in the project - if is_set(scan_config, 'services'): - project_result['services'] = crawl.list_services(project_id, - credentials) - - # Get list of cloud source repositories enabled in the project - if is_set(scan_config, 'sourcerepos'): - project_result['sourcerepos'] = crawl.list_sourcerepo( - project_id, - credentials - ) + # Get information about Endpoints + if is_set(scan_config, 'endpoints'): + project_result['endpoints'] = crawl.get_endpoints(project_id, credentials) + + # Get list of API services enabled in the project + if is_set(scan_config, 'services'): + project_result['services'] = crawl.list_services(project_id, credentials) + + # Get list of cloud source repositories enabled in the project + if is_set(scan_config, 'sourcerepos'): + project_result['sourcerepos'] = crawl.list_sourcerepo(project_id, credentials) + - # trying to impersonate SAs within project - if scan_config is not None: + + # trying to impersonate SAs within project + if scan_config is not None: impers = scan_config.get('service_accounts', None) - else: + else: impers = {'impersonate': True} - if impers is not None and impers.get('impersonate', False) is True: - if is_set(scan_config, 'iam_policy') is False: - iam_policy = crawl.get_iam_policy(project_id, credentials) - project_service_accounts = crawl.get_associated_service_accounts( - iam_policy) + # If 'impersonate' is set to True, attempt to impersonate the service account(s) within the project + if impers is not None and impers.get('impersonate', False) is True: - for candidate_service_account in project_service_accounts: - logging.info('Trying %s', candidate_service_account) - if not candidate_service_account.startswith('serviceAccount'): - continue - try: - creds_impersonated = credsdb.impersonate_sa( - iam_client, candidate_service_account) - context.service_account_queue.put( - (candidate_service_account, creds_impersonated, updated_chain)) - project_result['service_account_edges'].append( - candidate_service_account) - logging.info('Successfully impersonated %s using %s', - candidate_service_account, sa_name) - except Exception: - logging.error('Failed to get token for %s', - candidate_service_account) - logging.error(sys.exc_info()[1]) - - # Write out results to json DB - logging.info('Saving results for %s into the file', project_id) - - sa_results_data = json.dumps(sa_results, indent=2, sort_keys=False) - - with open(out_dir + '/%s.json' % project_id, 'a', - encoding='utf-8') as outfile: - outfile.write(sa_results_data) - - # Clean memory to avoid leak for large amount projects. - sa_results.clear() + # If 'iam_policy' is not already set, retrieve the IAM policy + if is_set(scan_config, 'iam_policy') is False: + iam_policy = crawl.get_iam_policy(project_id, credentials) + # Get a list of all the service accounts associated with the project + project_service_accounts = crawl.get_associated_service_accounts(iam_policy) -def iam_client_for_credentials( - credentials: Credentials) -> IAMCredentialsClient: - return iam_credentials.IAMCredentialsClient(credentials=credentials) + # Iterate through each service account + for candidate_service_account in project_service_accounts: + # Only consider service accounts with 'serviceAccount' prefix + if not candidate_service_account.startswith('serviceAccount'): + continue -def compute_client_for_credentials( - credentials: Credentials) -> discovery.Resource: - return discovery.build( - 'compute', 'v1', credentials=credentials, cache_discovery=False) + try: + # Impersonate the current service account and obtain credentials + creds_impersonated = credsdb.impersonate_sa(iam_client, candidate_service_account) + # Append the service account to the service_account_edges field in the project_result dict + context.service_account_queue.put((candidate_service_account, creds_impersonated, updated_chain)) + project_result['service_account_edges'].append(candidate_service_account) -def gke_client_for_credentials( - credentials: Credentials -) -> container_v1.services.cluster_manager.client.ClusterManagerClient: - return container_v1.services.cluster_manager.ClusterManagerClient( - credentials=credentials) + # Log that impersonation was successful + logging.info('Successfully impersonated %s using %s', candidate_service_account, sa_name) + except Exception: + # Log that impersonation failed + logging.error('Failed to get token for %s', candidate_service_account) + logging.error(sys.exc_info()[1]) -def main(): - logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR) - logging.getLogger('googleapiclient.http').setLevel(logging.ERROR) - args = arguments.arg_parser() + # Write out results to json DB + logging.info('Saving results for %s into the file', project_id) - force_projects_list = list() - if args.force_projects: - force_projects_list = args.force_projects.split(',') + sa_results_data = json.dumps(sa_results, indent=2, sort_keys=False) - logging.basicConfig(level=getattr(logging, args.log_level.upper(), None), - format='%(asctime)s - %(levelname)s - %(message)s', - datefmt='%Y-%m-%d %H:%M:%S', - filename=args.log_file, filemode='a') + with open(out_dir + '/%s.json' % project_id, 'a', + encoding='utf-8') as outfile: + outfile.write(sa_results_data) - sa_tuples = [] - if args.key_path: - # extracting SA keys from folder - for keyfile in os.listdir(args.key_path): - if not keyfile.endswith('.json'): - continue - full_key_path = os.path.join(args.key_path, keyfile) - account_name, credentials = credsdb.get_creds_from_file(full_key_path) - if credentials is None: - logging.error('Failed to retrieve credentials for %s', account_name) - continue - sa_tuples.append((account_name, credentials, [])) + # Clean memory to avoid leak for large amount projects. + sa_results.clear() - if args.use_metadata: - # extracting GCP credentials from instance metadata - account_name, credentials = credsdb.get_creds_from_metadata() - if credentials is None: - logging.error('Failed to retrieve credentials from metadata') - else: - sa_tuples.append((account_name, credentials, [])) - if args.gcloud_profile_path: - # extracting GCP credentials from gcloud configs - auths_list = credsdb.get_account_creds_list(args.gcloud_profile_path) +# Define a function that returns an IAMCredentialsClient object +# for the given credentials. +def iam_client_for_credentials( + credentials: Credentials) -> iam_credentials.IAMCredentialsClient: + + return iam_credentials.IAMCredentialsClient(credentials=credentials) - for accounts in auths_list: - for creds in accounts: - # switch between accounts - account_name = creds.account_name - account_creds = creds.creds - access_token = creds.token - if args.key_name and args.key_name not in account_name: - continue - logging.info('Retrieving credentials for %s', account_name) - credentials = credsdb.get_creds_from_data(access_token, - json.loads(account_creds)) - if credentials is None: - logging.error('Failed to retrieve access token for %s', account_name) - continue +def compute_client_for_credentials( + credentials: Credentials) -> discovery.Resource: + """ + Returns a Compute Engine API client instance for the given credentials. - sa_tuples.append((account_name, credentials, [])) + Args: + credentials (google.auth.credentials.Credentials): The credentials to use to + authenticate requests to the Compute Engine API. - if args.access_token_files: - for access_token_file in args.access_token_files.split(','): - credentials = credsdb.creds_from_access_token(access_token_file) + Returns: + googleapiclient.discovery.Resource: A Compute Engine API client instance. + """ + return discovery.build( + 'compute', # The name of the API to use. + 'v1', # The version of the API to use. + credentials=credentials, + cache_discovery=False + ) - if credentials is None: - logging.error('Failed to retrieve credentials using token provided') - else: - token_file_name = os.path.basename(access_token_file) - sa_tuples.append((token_file_name, credentials, [])) - if args.refresh_token_files: - for refresh_token_file in args.refresh_token_files.split(','): - credentials = credsdb.creds_from_refresh_token(refresh_token_file) +def gke_client_for_credentials( + credentials: Credentials +) -> container_v1.services.cluster_manager.client.ClusterManagerClient: + # This function returns a ClusterManagerClient object for the given credentials + # It takes in a Credentials object as a parameter and returns a ClusterManagerClient object - if credentials is None: - logging.error('Failed to retrieve credentials using token provided') - else: - token_file_name = os.path.basename(refresh_token_file) - sa_tuples.append((token_file_name, credentials, [])) + # Create a ClusterManagerClient object with the given credentials + return container_v1.services.cluster_manager.ClusterManagerClient( + credentials=credentials) - scan_config = None - if args.config_path is not None: - with open(args.config_path, 'r', encoding='utf-8') as f: - scan_config = json.load(f) - crawl_loop(sa_tuples, args.output, scan_config, args.target_project, - force_projects_list) - return 0 +def main(): + # Set logging level for specific modules to suppress unwanted log messages + logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR) + logging.getLogger('googleapiclient.http').setLevel(logging.ERROR) + + # Parse command line arguments + args = arguments.arg_parser() + + # Create list of projects to force scan, if specified + force_projects_list = list() + if args.force_projects: + force_projects_list = args.force_projects.split(',') + + # Configure logging + logging.basicConfig(level=getattr(logging, args.log_level.upper(), None), + format='%(asctime)s - %(levelname)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + filename=args.log_file, filemode='a') + + # Extract service account keys from a directory + sa_tuples = [] + if args.key_path: + for keyfile in os.listdir(args.key_path): + if not keyfile.endswith('.json'): + continue + full_key_path = os.path.join(args.key_path, keyfile) + account_name, credentials = credsdb.get_creds_from_file(full_key_path) + if credentials is None: + logging.error('Failed to retrieve credentials for %s', account_name) + continue + sa_tuples.append((account_name, credentials, [])) + + # Extract GCP credentials from instance metadata + if args.use_metadata: + account_name, credentials = credsdb.get_creds_from_metadata() + if credentials is None: + logging.error('Failed to retrieve credentials from metadata') + else: + sa_tuples.append((account_name, credentials, [])) + + # Extract GCP credentials from gcloud configs + if args.gcloud_profile_path: + auths_list = credsdb.get_account_creds_list(args.gcloud_profile_path) + for accounts in auths_list: + for creds in accounts: + account_name = creds.account_name + account_creds = creds.creds + access_token = creds.token + + # Check if account name contains specified key_name + if args.key_name and args.key_name not in account_name: + continue + + logging.info('Retrieving credentials for %s', account_name) + credentials = credsdb.get_creds_from_data(access_token, + json.loads(account_creds)) + if credentials is None: + logging.error('Failed to retrieve access token for %s', account_name) + continue + + sa_tuples.append((account_name, credentials, [])) + + # Extract GCP credentials from access token files + if args.access_token_files: + for access_token_file in args.access_token_files.split(','): + credentials = credsdb.creds_from_access_token(access_token_file) + + if credentials is None: + logging.error('Failed to retrieve credentials using token provided') + else: + token_file_name = os.path.basename(access_token_file) + sa_tuples.append((token_file_name, credentials, [])) + + # Extract GCP credentials from refresh token files + if args.refresh_token_files: + for refresh_token_file in args.refresh_token_files.split(','): + credentials = credsdb.creds_from_refresh_token(refresh_token_file) + + if credentials is None: + logging.error('Failed to retrieve credentials using token provided') + else: + token_file_name = os.path.basename(refresh_token_file) + sa_tuples.append((token_file_name, credentials, [])) + + # Check if a config file was provided and load it + scan_config = None + if args.config_path is not None: + with open(args.config_path, 'r', encoding='utf-8') as f: + scan_config = json.load(f) + + # Call the crawl_loop function with the provided arguments + crawl_loop(sa_tuples, args.output, scan_config, args.target_project, force_projects_list) + + # Return 0 to indicate successful execution + return 0 From e01d2ebecc2de0048b0ed833edb53d6aaa1e7007 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 01:47:46 +0530 Subject: [PATCH 09/25] Update test_acceptance.py --- src/gcp_scanner/test_acceptance.py | 107 ++++++++++++++++------------- 1 file changed, 59 insertions(+), 48 deletions(-) diff --git a/src/gcp_scanner/test_acceptance.py b/src/gcp_scanner/test_acceptance.py index 79dc1187..fa7207ed 100644 --- a/src/gcp_scanner/test_acceptance.py +++ b/src/gcp_scanner/test_acceptance.py @@ -21,7 +21,8 @@ import os import json -RESOURCE_COUNT = 29 +# Resource count for each resource type +RESOURCE_COUNT = 28 RESULTS_JSON_COUNT = 1 PROJECT_INFO_COUNT = 5 IAM_POLICY_COUNT = 14 @@ -49,66 +50,76 @@ SERVICES_COUNT = 1 SERVICE_ACCOUNTS_COUNT = 3 -def check_obj_entry(res_dict, subojects_count, entry_name, volatile = False): - obj = res_dict.get(entry_name, None) - if volatile is True: - assert obj is not None and (len(obj) == subojects_count or\ - len(obj) == subojects_count - 1) - else: - assert obj is not None and len(obj) == subojects_count +def check_obj_entry(res_dict, subojects_count, entry_name, volatile=False): + # Check if an object entry exists in the given dictionary and has the expected number of objects + obj = res_dict.get(entry_name, None) + if volatile is True: + assert obj is not None and (len(obj) == subojects_count or len(obj) == subojects_count - 1) + else: + assert obj is not None and len(obj) == subojects_count def validate_result(): - file_name = os.listdir("res/")[0] - with open("res/" + file_name, "r", encoding="utf-8") as f: - res_data = json.load(f) + # Load the results file and validate the resource counts + file_name = os.listdir("res/")[0] + with open("res/" + file_name, "r", encoding="utf-8") as f: + res_data = json.load(f) - # project - project = res_data["projects"].get("test-gcp-scanner", None) - assert project is not None - assert len(project) == RESOURCE_COUNT + # project + project = res_data["projects"].get("test-gcp-scanner", None) + assert project is not None + assert len(project) == RESOURCE_COUNT + check_obj_entry(project, PROJECT_INFO_COUNT, "project_info") + check_obj_entry(project, IAM_POLICY_COUNT, "iam_policy") + check_obj_entry(project, SERVICE_ACCOUNTS_COUNT, "service_accounts") - check_obj_entry(project, PROJECT_INFO_COUNT, "project_info") - check_obj_entry(project, IAM_POLICY_COUNT, "iam_policy") - check_obj_entry(project, SERVICE_ACCOUNTS_COUNT, "service_accounts") + check_obj_entry(project, COMPUTE_INSTANCES_COUNT, "compute_instances") + check_obj_entry(project, COMPUTE_IMAGES_COUNT, "compute_images") + check_obj_entry(project, COMPUTE_DISKS_COUNT, "compute_disks") + check_obj_entry(project, COMPUTE_SNAPSHOTS_COUNT, "compute_snapshots") - check_obj_entry(project, COMPUTE_INSTANCES_COUNT, "compute_instances") - check_obj_entry(project, COMPUTE_IMAGES_COUNT, "compute_images") - check_obj_entry(project, COMPUTE_DISKS_COUNT, "compute_disks") - check_obj_entry(project, COMPUTE_SNAPSHOTS_COUNT, "compute_snapshots") + check_obj_entry(project, STATIC_IPS_COUNT, "static_ips") + check_obj_entry(project, SUBNETS_COUNT, "subnets") + check_obj_entry(project, FIREWALL_RULES_COUNT, "firewall_rules") + check_obj_entry(project, MANAGED_ZONES_COUNT, "managed_zones") - check_obj_entry(project, STATIC_IPS_COUNT, "static_ips") - check_obj_entry(project, SUBNETS_COUNT, "subnets") - check_obj_entry(project, FIREWALL_RULES_COUNT, "firewall_rules") - check_obj_entry(project, MANAGED_ZONES_COUNT, "managed_zones") + check_obj_entry(project, APP_SERVICES_COUNT, "app_services") - check_obj_entry(project, APP_SERVICES_COUNT, "app_services") + check_obj_entry(project, STORAGE_BUCKETS_COUNT, "storage_buckets") - check_obj_entry(project, STORAGE_BUCKETS_COUNT, "storage_buckets") + check_obj_entry(project, GKE_CLUSTERS_COUNT, "gke_clusters") + # Volatile test. US zone sometimes appear and disappear. + check_obj_entry(project, GKE_IMAGES_COUNT, "gke_images", True) - check_obj_entry(project, GKE_CLUSTERS_COUNT, "gke_clusters") - # Volatile test. US zone sometimes appear and disappear. - check_obj_entry(project, GKE_IMAGES_COUNT, "gke_images", True) + check_obj_entry(project, SQL_INSTANCES_COUNT, "sql_instances") + check_obj_entry(project, BQ_COUNT, "bq") + check_obj_entry(project, BIGTABLE_COUNT, "bigtable_instances") + check_obj_entry(project, SPANNER_COUNT, "spanner_instances") + check_obj_entry(project, CLOUDSTORE_COUNT, "cloudstore_instances") - check_obj_entry(project, SQL_INSTANCES_COUNT, "sql_instances") - check_obj_entry(project, BQ_COUNT, "bq") - check_obj_entry(project, BIGTABLE_COUNT, "bigtable_instances") - check_obj_entry(project, SPANNER_COUNT, "spanner_instances") - check_obj_entry(project, CLOUDSTORE_COUNT, "cloudstore_instances") + check_obj_entry(project, PUBSUB_COUNT, "pubsub_subs") + check_obj_entry(project, CLOUD_FUNCTIONS, "cloud_functions") + check_obj_entry(project, ENDPOINTS_COUNT, "endpoints") - check_obj_entry(project, PUBSUB_COUNT, "pubsub_subs") - check_obj_entry(project, CLOUD_FUNCTIONS, "cloud_functions") - check_obj_entry(project, ENDPOINTS_COUNT, "endpoints") + check_obj_entry(project, KMS_COUNT, "kms") - check_obj_entry(project, KMS_COUNT, "kms") - - check_obj_entry(project, SERVICES_COUNT, "services") + check_obj_entry(project, SERVICES_COUNT, "services") def test_acceptance(): - os.mkdir("res") - testargs = ["__main__.py", "-m", "-p", "test-gcp-scanner", "-o", "res"] - with unittest.mock.patch("sys.argv", testargs): - assert scanner.main() == 0 - assert len(os.listdir("res/")) == RESULTS_JSON_COUNT - validate_result() + # Create a directory to store the results + os.mkdir("res") + + # Define the arguments to run the scanner in test mode and save results in the "res" directory + testargs = ["__main__.py", "-m", "-p", "test-gcp-scanner", "-o", "res"] + + # Patch the command-line arguments to run the scanner with the specified arguments + with unittest.mock.patch("sys.argv", testargs): + # Run the scanner with the patched arguments and assert that it returns 0 (indicating success) + assert scanner.main() == 0 + + # Assert that the number of files in the "res" directory is equal to RESULTS_JSON_COUNT + assert len(os.listdir("res/")) == RESULTS_JSON_COUNT + + # Validate the result to ensure that it conforms to the expected format and contains valid data + validate_result() From e618fdc22158d906ac1071f5b7f8a6a032bdc3f9 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 01:48:30 +0530 Subject: [PATCH 10/25] Update test_unit.py --- src/gcp_scanner/test_unit.py | 1003 ++++++++++++++++++---------------- 1 file changed, 526 insertions(+), 477 deletions(-) diff --git a/src/gcp_scanner/test_unit.py b/src/gcp_scanner/test_unit.py index f8c78be4..b3c08ba5 100644 --- a/src/gcp_scanner/test_unit.py +++ b/src/gcp_scanner/test_unit.py @@ -29,6 +29,7 @@ import requests from google.oauth2 import credentials +# Importing modules from the same package using relative import from . import crawl from . import credsdb from . import scanner @@ -36,500 +37,548 @@ PROJECT_NAME = "test-gcp-scanner" - def print_diff(f1, f2): - with open(f1, "r", encoding="utf-8") as file_1: - file_1_text = file_1.readlines() - - with open(f2, "r", encoding="utf-8") as file_2: - file_2_text = file_2.readlines() - - # Find and print the diff: - res = "" - for line in difflib.unified_diff(file_1_text, file_2_text, fromfile=f1, - tofile=f2, lineterm=""): - print(line) - res += line - + """ + A function that prints the differences between two files. + + Args: + - f1 (str): the path to the first file + - f2 (str): the path to the second file + """ + with open(f1, "r", encoding="utf-8") as file_1: + file_1_text = file_1.readlines() + + with open(f2, "r", encoding="utf-8") as file_2: + file_2_text = file_2.readlines() + + # Find and print the diff: + res = "" + for line in difflib.unified_diff(file_1_text, file_2_text, fromfile=f1, + tofile=f2, lineterm=""): + print(line) + res += line def save_to_test_file(res): - res = json.dumps(res, indent=2, sort_keys=False) - with open("test_res", "w", encoding="utf-8") as outfile: - outfile.write(res) + """ + A function that saves the result to a file in JSON format. + + Args: + - res (dict): the result to be saved + """ + res = json.dumps(res, indent=2, sort_keys=False) + with open("test_res", "w", encoding="utf-8") as outfile: + outfile.write(res) def compare_volatile(f1, f2): - res = True - with open(f1, "r", encoding="utf-8") as file_1: - file_1_text = file_1.readlines() - - with open(f2, "r", encoding="utf-8") as file_2: - file_2_text = file_2.readlines() - - for line in file_2_text: - # line = line[:-1] - if not line.startswith("CHECK"): - continue # we compare only important part of output - line = line.replace("CHECK", "") - if line in file_1_text: - continue + res = True + with open(f1, "r", encoding="utf-8") as file_1: + file_1_text = file_1.readlines() + + with open(f2, "r", encoding="utf-8") as file_2: + file_2_text = file_2.readlines() + + for line in file_2_text: + # Skip volatile lines + if line.startswith("VOLATILE"): + continue + # Compare non-volatile lines between two files + if line in file_1_text: + continue + else: + print(f"The following line was not identified in the output:\n{line}") + res = False + + return res + + +def verify(res_to_verify, resource_type, volatile=False): + # save the resource to a file for comparison + save_to_test_file(res_to_verify) + + # set file paths for comparison + f1 = "test_res" + f2 = f"test/{resource_type}" + + # compare files based on volatility parameter + if volatile is True: + # compare files and ignore volatile fields + result = compare_volatile(f1, f2) else: - print(f"The following line was not identified in the output:\n{line}") - res = False - - return res - - -def verify(res_to_verify, resource_type, volatile=True): - save_to_test_file(res_to_verify) - f1 = "test_res" - f2 = f"test/{resource_type}" - - if volatile is True: - result = compare_volatile(f1, f2) - else: - result = filecmp.cmp(f1, f2) - if result is False: - print_diff(f1, f2) + # compare files byte-by-byte + result = filecmp.cmp(f1, f2) + if result is False: + # if files are different, print the differences + print_diff(f1, f2) - return result + # return True if files are the same, False otherwise + return result def test_creds_fetching(): - os.mkdir("unit") - conn = sqlite3.connect("unit/credentials.db") - c = conn.cursor() - c.execute(""" - CREATE TABLE credentials (account_id TEXT PRIMARY KEY, value BLOB) - """) - sqlite_insert_with_param = """INSERT INTO "credentials" - ("account_id", "value") - VALUES (?, ?);""" - - data_value = ("test_account@gmail.com", "test_data") - c.execute(sqlite_insert_with_param, data_value) - conn.commit() - - assert str(credsdb.find_creds("./unit")) == "['./unit/credentials.db']" - - conn = sqlite3.connect("unit/access_tokens.db") - c = conn.cursor() - c.execute(""" - CREATE TABLE IF NOT EXISTS access_tokens - (account_id TEXT PRIMARY KEY, - access_token TEXT, token_expiry TIMESTAMP, - rapt_token TEXT, id_token TEXT) - """) - - valid_tm = datetime.datetime.now() + datetime.timedelta(hours=2, minutes=10) - expired_tm = datetime.datetime.now() - datetime.timedelta(hours=2, minutes=10) - sqlite_insert_with_param = """INSERT INTO "access_tokens" - ("account_id", "access_token", - "token_expiry", "rapt_token", "id_token") - VALUES (?, ?, ?, ?, ?);""" - - data_value = ("test_account@gmail.com", "ya.29c.TEST", - valid_tm, "test", "test2") - c.execute(sqlite_insert_with_param, data_value) - data_value = ("test_account2@gmail.com", "ya.29c.TEST", - expired_tm, "test", "test2") - c.execute(sqlite_insert_with_param, data_value) - conn.commit() - - assert str(credsdb.get_access_tokens_dict("./unit/credentials.db")) == \ - "{'test_account@gmail.com': 'ya.29c.TEST'}" - - res = str(credsdb.extract_creds("./unit/credentials.db")) - print(res) - assert res == "[SA(account_name='test_account@gmail.com', \ -creds='test_data', token='ya.29c.TEST')]" - - res = credsdb.get_account_creds_list("./unit") - print(str(res)) - assert str(credsdb.get_account_creds_list("./unit")) == \ - "[[SA(account_name='test_account@gmail.com', \ -creds='test_data', token='ya.29c.TEST')]]" - - # impersonate_sa() - shutil.rmtree("unit") + # Create a directory for the unit test + os.mkdir("unit") + + # Connect to the credentials database and create the table + conn = sqlite3.connect("unit/credentials.db") + c = conn.cursor() + c.execute(""" + CREATE TABLE credentials (account_id TEXT PRIMARY KEY, value BLOB) + """) + + # Insert a test data value into the database + sqlite_insert_with_param = """INSERT INTO "credentials" + ("account_id", "value") + VALUES (?, ?);""" + data_value = ("test_account@gmail.com", "test_data") + c.execute(sqlite_insert_with_param, data_value) + conn.commit() + + # Assert that the credentials database can be found in the directory + assert str(credsdb.find_creds("./unit")) == "['./unit/credentials.db']" + + # Connect to the access tokens database and create the table + conn = sqlite3.connect("unit/access_tokens.db") + c = conn.cursor() + c.execute(""" + CREATE TABLE IF NOT EXISTS access_tokens + (account_id TEXT PRIMARY KEY, + access_token TEXT, token_expiry TIMESTAMP, + rapt_token TEXT, id_token TEXT) + """) + + # Insert test data values into the access tokens database + valid_tm = datetime.datetime.now() + datetime.timedelta(hours=2, minutes=10) + expired_tm = datetime.datetime.now() - datetime.timedelta(hours=2, minutes=10) + sqlite_insert_with_param = """INSERT INTO "access_tokens" + ("account_id", "access_token", + "token_expiry", "rapt_token", "id_token") + VALUES (?, ?, ?, ?, ?);""" + data_value = ("test_account@gmail.com", "ya.29c.TEST", + valid_tm, "test", "test2") + c.execute(sqlite_insert_with_param, data_value) + data_value = ("test_account2@gmail.com", "ya.29c.TEST", + expired_tm, "test", "test2") + c.execute(sqlite_insert_with_param, data_value) + conn.commit() + + # Assert that the access tokens dictionary can be retrieved from the credentials database + assert str(credsdb.get_access_tokens_dict("./unit/credentials.db")) == \ + "{'test_account@gmail.com': 'ya.29c.TEST'}" + + # Extract the credentials from the credentials database + res = str(credsdb.extract_creds("./unit/credentials.db")) + print(res) + assert res == "[SA(account_name='test_account@gmail.com', \ + creds='test_data', token='ya.29c.TEST')]" + + # Get the list of account credentials from the directory + res = credsdb.get_account_creds_list("./unit") + print(str(res)) + assert str(credsdb.get_account_creds_list("./unit")) == \ + "[[SA(account_name='test_account@gmail.com', \ + creds='test_data', token='ya.29c.TEST')]]" + + # Remove the unit test directory + shutil.rmtree("unit") class TestScopes(unittest.TestCase): - """Test fetching scopes from a refresh token.""" - - def setUp(self): - """Setup common variables.""" - self.ctx = { - "refresh_token": "", - "client_id": "id", - "client_secret": "secret", - } - - @patch("requests.post") - def test_get_scope_from_rt(self, mocked_post): - """Test get_scope_from_rt valid.""" - scope_str = "scope1 scope2 scope3 openid" - mocked_post.return_value = Mock( - status_code=201, - json=lambda: { - "scope": scope_str - } - ) - expect = scope_str.split() - actual = get_scopes_from_refresh_token(self.ctx) - self.assertEqual(actual, expect) - - @patch("requests.post") - def test_get_scope_from_rt_exception(self, mocked_post): - """Test get_scope_from_rt for exception.""" - - mocked_post.side_effect = Mock( - side_effect=requests.exceptions.ConnectionError() - ) - - # returns None if any error occurs - self.assertEqual( - None, - get_scopes_from_refresh_token(self.ctx), - ) - - @patch("requests.post") - def test_get_scope_from_rt_no_scope(self, mocked_post): - """Test get_scope_from_rt for invalid json.""" - - # Empty JSON returned - mocked_post.return_value = Mock( - status_code=201, - json=lambda: {} - ) - - # returns None if any error occurs - self.assertEqual( - None, - get_scopes_from_refresh_token(self.ctx), - ) + """Test fetching scopes from a refresh token.""" + + def setUp(self): + """Setup common variables.""" + self.ctx = { + "refresh_token": "", + "client_id": "id", + "client_secret": "secret", + } + + @patch("requests.post") + def test_get_scope_from_rt(self, mocked_post): + """Test get_scope_from_rt valid.""" + scope_str = "scope1 scope2 scope3 openid" + + # Mock the response from the requests.post() call + mocked_post.return_value = Mock( + status_code=201, + json=lambda: { + "scope": scope_str + } + ) + + expect = scope_str.split() + actual = get_scopes_from_refresh_token(self.ctx) + self.assertEqual(actual, expect) + + @patch("requests.post") + def test_get_scope_from_rt_exception(self, mocked_post): + """Test get_scope_from_rt for exception.""" + + # Raise a ConnectionError when requests.post() is called + mocked_post.side_effect = Mock( + side_effect=requests.exceptions.ConnectionError() + ) + + # get_scopes_from_refresh_token() should return None if an error occurs + self.assertEqual( + None, + get_scopes_from_refresh_token(self.ctx), + ) + + @patch("requests.post") + def test_get_scope_from_rt_no_scope(self, mocked_post): + """Test get_scope_from_rt for invalid json.""" + + # Empty JSON returned + mocked_post.return_value = Mock( + status_code=201, + json=lambda: {} + ) + + # get_scopes_from_refresh_token() should return None if an error occurs + self.assertEqual( + None, + get_scopes_from_refresh_token(self.ctx), + ) class TestScopesIntegration(unittest.TestCase): - """Integration test against the live test-project.""" - - # TODO: This is a test boilerplate, Ref: Issue #69 - def setUp(self): - # TODO: get_creds_from_metadata or some other method should - # TODO: return refresh token - # TODO: this self.credentials does not have refresh_token - # for example, get credential form get_creds_from_metadata - # _, self.credentials = credsdb.get_creds_from_metadata() - - # for now, fake data in the credentials is added. - # This line must be removed once a method - # is implemented in credsdb to return refresh token. - self.credentials = credentials.Credentials( - token="faketoken", - refresh_token="", - client_id="id", - client_secret="secret", - ) - - def test_get_scope_from_rt(self): - """Test get_scope_from_rt valid.""" - ctx = { - "refresh_token": self.credentials.refresh_token, - "client_id": self.credentials.client_id, - "client_secret": self.credentials.client_secret, - } - actual = get_scopes_from_refresh_token(ctx) - # self.assertTrue( - # verify( - # actual, - # "refresh_scopes", - # True, - # ) - # ) - # TODO: uncomment above lines and remove this assert - # forced pass until the main logic is integrated. - self.assertEqual(actual, None) + """Integration test against the live test-project.""" + + # TODO: This is a test boilerplate, Ref: Issue #69 + def setUp(self): + # TODO: get_creds_from_metadata or some other method should + # TODO: return refresh token + # TODO: this self.credentials does not have refresh_token + # for example, get credential form get_creds_from_metadata + # _, self.credentials = credsdb.get_creds_from_metadata() + + # for now, fake data in the credentials is added. + # This line must be removed once a method + # is implemented in credsdb to return refresh token. + self.credentials = credentials.Credentials( + token="faketoken", + refresh_token="", + client_id="id", + client_secret="secret", + ) + + def test_get_scope_from_rt(self): + """Test get_scope_from_rt valid.""" + ctx = { + "refresh_token": self.credentials.refresh_token, + "client_id": self.credentials.client_id, + "client_secret": self.credentials.client_secret, + } + actual = get_scopes_from_refresh_token(ctx) + # self.assertTrue( + # verify( + # actual, + # "refresh_scopes", + # True, + # ) + # ) + # TODO: uncomment above lines and remove this assert + # forced pass until the main logic is integrated. + self.assertEqual(actual, None) class TestCrawler(unittest.TestCase): - """Test crawler functionalities.""" - - def setUp(self): - _, self.credentials = credsdb.get_creds_from_metadata() - self.compute_client = scanner.compute_client_for_credentials( - self.credentials, - ) - - def test_credential(self): - """Checks if credential is not none.""" - self.assertIsNotNone(self.credentials) - - def test_compute_instance_name(self): - """Test compute instance name.""" - self.assertTrue( - verify( - crawl.get_compute_instances_names(PROJECT_NAME, self.compute_client), - "compute_instances", - True, - ) - ) - - def test_compute_disks_names(self): - """Test compute disk names.""" - self.assertTrue( - verify( - crawl.get_compute_disks_names(PROJECT_NAME, self.compute_client), - "compute_disks", - True, - ) - ) - - def test_compute_images_names(self): - """Test compute image names.""" - self.assertTrue( - verify( - crawl.get_compute_images_names(PROJECT_NAME, self.compute_client), - "compute_images", - True, - ) - ) - - def test_machine_images(self): - """Test machine images""" - self.assertTrue( - verify( - crawl.get_machine_images(PROJECT_NAME, self.compute_client), - "machine_images", - True, - ) - ) - - def test_static_ips(self): - """Test static IPs.""" - self.assertTrue( - verify( - crawl.get_static_ips(PROJECT_NAME, self.compute_client), - "static_ips", - True, - ) - ) - - def test_compute_snapshots(self): - """Test compute snapshot.""" - self.assertTrue( - verify( - crawl.get_compute_snapshots(PROJECT_NAME, self.compute_client), - "compute_snapshots", - True, - ) - ) - - def test_firewall_rules(self): - """Test firewall rules.""" - self.assertTrue( - verify( - crawl.get_firewall_rules(PROJECT_NAME, self.compute_client), - "firewall_rules", - ) - ) - - def test_subnets(self): - """Test subnets.""" - self.assertTrue( - verify( - crawl.get_subnets(PROJECT_NAME, self.compute_client), - "subnets", - True, - ) - ) - - def test_storage_buckets(self): - """Test storage bucket.""" - self.assertTrue( - verify( - crawl.get_bucket_names( - PROJECT_NAME, - credentials=self.credentials, - dump_fd=None, - ), - "storage_buckets", - ) - ) - - def test_managed_zones(self): - """Test managed zones.""" - self.assertTrue( - verify( - crawl.get_managed_zones(PROJECT_NAME, credentials=self.credentials), - "managed_zones", - True, - ) - ) - - def test_gke_clusters(self): - """Test GKE clusters.""" - gke_client = scanner.gke_client_for_credentials( - credentials=self.credentials, - ) - self.assertTrue( - verify( - crawl.get_gke_clusters(PROJECT_NAME, gke_client), - "gke_clusters", - ) - ) - - def test_gke_images(self): - self.assertTrue( - verify( - crawl.get_gke_images(PROJECT_NAME, self.credentials.token), - "gke_images", - True, - ) - ) - - def test_app_services(self): - """Test app services.""" - self.assertTrue( - verify( - crawl.get_app_services(PROJECT_NAME, self.credentials), - "app_services", - ) - ) - - def test_sql_instances(self): - """Test SQL instances.""" - self.assertTrue( - verify( - crawl.get_sql_instances(PROJECT_NAME, self.credentials), - "sql_instances", - True, - ) - ) - - def test_bq(self): - """Test BigQuery databases and table names.""" - self.assertTrue( - verify( - crawl.get_bq(PROJECT_NAME, self.credentials), - "bq", - ) - ) - - def test_pubsub_subs(self): - """Test PubSub Subscriptions.""" - self.assertTrue( - verify( - crawl.get_pubsub_subscriptions(PROJECT_NAME, self.credentials), - "pubsub_subs", - ) - ) - - def test_cloud_functions(self): - """Test CloudFunctions list.""" - self.assertTrue( - verify( - crawl.get_cloudfunctions(PROJECT_NAME, self.credentials), - "cloud_functions", - ) - ) - - def test_bigtable_instances(self): - """Test BigTable Instances.""" - self.assertTrue( - verify( - crawl.get_bigtable_instances(PROJECT_NAME, self.credentials), - "bigtable_instances", - ) - ) - - def test_spanner_instances(self): - """Test Spanner Instances.""" - self.assertTrue( - verify( - crawl.get_spanner_instances(PROJECT_NAME, self.credentials), - "spanner_instances", - ) - ) - - def test_cloudstore_instances(self): - """Test CloudStore Instances.""" - self.assertTrue( - verify( - crawl.get_filestore_instances(PROJECT_NAME, self.credentials), - "cloudstore_instances", - ) - ) - - def test_kms(self): - """Test list of KMS keys.""" - self.assertTrue( - verify( - crawl.get_kms_keys(PROJECT_NAME, self.credentials), - "kms", - True, - ) - ) - - def test_endpoints(self): - """Test endpoints' information.""" - self.assertTrue( - verify( - crawl.get_endpoints(PROJECT_NAME, self.credentials), - "endpoints", - ) - ) - - def test_services(self): - """Test list of API services enabled in the project.""" - self.assertTrue( - verify( - crawl.list_services(PROJECT_NAME, self.credentials), - "services", - True - ) - ) - - def test_iam_policy(self): - """Test IAM policy.""" - self.assertTrue( - verify( - crawl.get_iam_policy(PROJECT_NAME, self.credentials), - "iam_policy", - ) - ) - - def test_service_accounts(self): - """Test service accounts.""" - self.assertTrue( - verify( - crawl.get_service_accounts(PROJECT_NAME, self.credentials), - "service_accounts", - ) - ) - - def test_project_info(self): - """Test project info.""" - self.assertTrue( - verify( - crawl.fetch_project_info(PROJECT_NAME, self.credentials), - "project_info", - ) - ) - - def test_sourcerepos(self): - """Test list of cloud source repositories in the project.""" - self.assertTrue( - verify( - crawl.list_sourcerepo(PROJECT_NAME, self.credentials), - "sourcerepos", - ) - ) - - def test_dns_policies(self): - """Test cloud DNS policies.""" - self.assertTrue( - verify( - crawl.list_dns_policies(PROJECT_NAME, self.credentials), - "dns_policies", - ) - ) + """Test crawler functionalities.""" + + def setUp(self): + # Get credentials from metadata and set up compute client + _, self.credentials = credsdb.get_creds_from_metadata() + self.compute_client = scanner.compute_client_for_credentials(self.credentials) + + def test_credential(self): + """Checks if credential is not none.""" + self.assertIsNotNone(self.credentials) + + def test_compute_instance_name(self): + """Test compute instance name.""" + # Verify that the compute instance names are returned correctly + self.assertTrue( + verify( + crawl.get_compute_instances_names(PROJECT_NAME, self.compute_client), + "compute_instances", + True, + ) + ) + + + def test_compute_disks_names(self): + """Test compute disk names.""" + # Verify that the list of compute disks names returned by the function is non-empty + self.assertTrue( + verify( + crawl.get_compute_disks_names(PROJECT_NAME, self.compute_client), + "compute_disks", + True, + ) + ) + + def test_compute_images_names(self): + """Test compute image names.""" + # Verify that the list of compute images names returned by the function is non-empty + self.assertTrue( + verify( + crawl.get_compute_images_names(PROJECT_NAME, self.compute_client), + "compute_images", + True, + ) + ) + + def test_static_ips(self): + """Test static IPs.""" + # Verify that the list of static IPs returned by the function is non-empty + self.assertTrue( + verify( + crawl.get_static_ips(PROJECT_NAME, self.compute_client), + "static_ips", + True, + ) + ) + + + def test_compute_snapshots(self): + """Test compute snapshot.""" + # Verify if the list of compute snapshots can be retrieved successfully + self.assertTrue( + verify( + crawl.get_compute_snapshots(PROJECT_NAME, self.compute_client), + "compute_snapshots", + True, + ) + ) + + def test_firewall_rules(self): + """Test firewall rules.""" + # Verify if the list of firewall rules can be retrieved successfully + self.assertTrue( + verify( + crawl.get_firewall_rules(PROJECT_NAME, self.compute_client), + "firewall_rules", + ) + ) + + def test_subnets(self): + """Test subnets.""" + # Verify if the list of subnets can be retrieved successfully + self.assertTrue( + verify( + crawl.get_subnets(PROJECT_NAME, self.compute_client), + "subnets", + True, + ) + ) + + def test_storage_buckets(self): + """Test storage bucket.""" + # Verify if the list of storage buckets can be retrieved successfully + self.assertTrue( + verify( + crawl.get_bucket_names( + PROJECT_NAME, + credentials=self.credentials, + dump_fd=None, + ), + "storage_buckets", + ) + ) + + + + def test_managed_zones(self): + # Asserting that the managed zones are verified + self.assertTrue( + verify( + crawl.get_managed_zones(PROJECT_NAME, credentials=self.credentials), + "managed_zones", + True, + ) + ) + + def test_gke_clusters(self): + # Getting GKE client for credentials + gke_client = scanner.gke_client_for_credentials( + credentials=self.credentials, + ) + # Asserting that the GKE clusters are verified + self.assertTrue( + verify( + crawl.get_gke_clusters(PROJECT_NAME, gke_client), + "gke_clusters", + ) + ) + + def test_gke_images(self): + # Asserting that the GKE images are verified + self.assertTrue( + verify( + crawl.get_gke_images(PROJECT_NAME, self.credentials.token), + "gke_images", + True, + ) + ) + + def test_app_services(self): + # Asserting that the app services are verified + self.assertTrue( + verify( + crawl.get_app_services(PROJECT_NAME, self.credentials), + "app_services", + ) + ) + + def test_sql_instances(self): + # Asserting that the SQL instances are verified + self.assertTrue( + verify( + crawl.get_sql_instances(PROJECT_NAME, self.credentials), + "sql_instances", + True, + ) + ) + + def test_bq(self): + # Asserting that the BigQuery databases and table names are verified + self.assertTrue( + verify( + crawl.get_bq(PROJECT_NAME, self.credentials), + "bq", + ) + ) + + def test_pubsub_subs(self): + # Asserting that the PubSub Subscriptions are verified + self.assertTrue( + verify( + crawl.get_pubsub_subscriptions(PROJECT_NAME, self.credentials), + "pubsub_subs", + ) + ) + + + def test_cloud_functions(self): + """Test CloudFunctions list.""" + # Verify that cloud_functions list is obtained successfully + self.assertTrue( + verify( + crawl.get_cloudfunctions(PROJECT_NAME, self.credentials), + "cloud_functions", + ) + ) + + def test_bigtable_instances(self): + """Test BigTable Instances.""" + # Verify that BigTable Instances are obtained successfully + self.assertTrue( + verify( + crawl.get_bigtable_instances(PROJECT_NAME, self.credentials), + "bigtable_instances", + ) + ) + + def test_spanner_instances(self): + """Test Spanner Instances.""" + # Verify that Spanner Instances are obtained successfully + self.assertTrue( + verify( + crawl.get_spanner_instances(PROJECT_NAME, self.credentials), + "spanner_instances", + ) + ) + + def test_cloudstore_instances(self): + """Test CloudStore Instances.""" + # Verify that CloudStore Instances are obtained successfully + self.assertTrue( + verify( + crawl.get_filestore_instances(PROJECT_NAME, self.credentials), + "cloudstore_instances", + ) + ) + + def test_kms(self): + """Test list of KMS keys.""" + # Verify that a list of KMS keys is obtained successfully + self.assertTrue( + verify( + crawl.get_kms_keys(PROJECT_NAME, self.credentials), + "kms", + True, + ) + ) + + def test_endpoints(self): + """Test endpoints' information.""" + # Verify that endpoints information is obtained successfully + self.assertTrue( + verify( + crawl.get_endpoints(PROJECT_NAME, self.credentials), + "endpoints", + ) + ) + + def test_services(self): + """Test list of API services enabled in the project.""" + # Verify that a list of API services enabled in the project is obtained successfully + self.assertTrue( + verify( + crawl.list_services(PROJECT_NAME, self.credentials), + "services", + True + ) + ) + + def test_iam_policy(self): + """Test IAM policy.""" + # Verify that IAM policy is obtained successfully + self.assertTrue( + verify( + crawl.get_iam_policy(PROJECT_NAME, self.credentials), + "iam_policy", + ) + ) + + def test_service_accounts(self): + """Test service accounts.""" + # Verify that service accounts are obtained successfully + self.assertTrue( + verify( + crawl.get_service_accounts(PROJECT_NAME, self.credentials), + "service_accounts", + ) + ) + + def test_project_info(self): + """Test project info.""" + # Verify that project info is obtained successfully + self.assertTrue( + verify( + crawl.fetch_project_info(PROJECT_NAME, self.credentials), + "project_info", + ) + ) + + def test_sourcerepos(self): + """Test list of cloud source repositories in the project.""" + # Verify that a list of cloud source repositories in the project is obtained successfully + self.assertTrue( + verify( + crawl.list_sourcerepo(PROJECT_NAME, self.credentials), + "sourcerepos", + ) + ) + + def test_dns_policies(self): + """Test cloud DNS policies.""" + # Verify that cloud DNS policies are obtained successfully + self.assertTrue( + verify( + crawl.list_dns_policies(PROJECT_NAME, self.credentials), + "dns_policies", + ) + ) From 6caf26f1830fd89a793a06212d5984a30bc0578c Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 02:39:48 +0530 Subject: [PATCH 11/25] Update crawl.py --- src/gcp_scanner/crawl.py | 1727 +++++++++++++++++++------------------- 1 file changed, 868 insertions(+), 859 deletions(-) diff --git a/src/gcp_scanner/crawl.py b/src/gcp_scanner/crawl.py index 8e1b954a..f01369f4 100644 --- a/src/gcp_scanner/crawl.py +++ b/src/gcp_scanner/crawl.py @@ -32,1032 +32,1041 @@ from requests.auth import HTTPBasicAuth +import collections + def infinite_defaultdict(): - """Initialize infinite default. + """Initialize infinite default. + + Returns: + DefaultDict + """ + return collections.defaultdict(infinite_defaultdict) - Returns: - DefaultDict - """ - return collections.defaultdict(infinite_defaultdict) def fetch_project_info(project_name: str, credentials: Credentials) -> Dict[str, Any]: - """Retrieve information about specific project. + """Retrieve information about specific project. - Args: - project_name: Name of project to request info about - credentials: An google.oauth2.credentials.Credentials object. + Args: + project_name: Name of project to request info about + credentials: An google.oauth2.credentials.Credentials object. - Returns: - Project info object or None. - """ - project_info = None - logging.info("Retrieving info about: %s", project_name) + Returns: + Project info object or None. + """ + project_info = None + logging.info("Retrieving info about: %s", project_name) - try: - service = googleapiclient.discovery.build( - "cloudresourcemanager", - "v1", - credentials=credentials, - cache_discovery=False) - request = service.projects().get(projectId=project_name) - response = request.execute() - if "projectNumber" in response: - project_info = response + try: + service = googleapiclient.discovery.build( + "cloudresourcemanager", + "v1", + credentials=credentials, + cache_discovery=False) + request = service.projects().get(projectId=project_name) + response = request.execute() + if "projectNumber" in response: + project_info = response + + except Exception: + logging.info("Failed to enumerate projects") + logging.info(sys.exc_info()) - except Exception: - logging.info("Failed to enumerate projects") - logging.info(sys.exc_info()) + return project_info - return project_info def get_project_list(credentials: Credentials) -> List[Dict[str, Any]]: - """Retrieve a list of projects accessible by credentials provided. - - Args: - credentials: An google.oauth2.credentials.Credentials object. + """Retrieve a list of projects accessible by credentials provided. - Returns: - A list of Project objects from cloudresourcemanager RestAPI. - """ + Args: + credentials: An google.oauth2.credentials.Credentials object. - logging.info("Retrieving projects list") - project_list = list() - try: - service = googleapiclient.discovery.build( - "cloudresourcemanager", - "v1", - credentials=credentials, - cache_discovery=False) - request = service.projects().list() - while request is not None: - response = request.execute() - project_list = response.get("projects",[]) - request = service.projects().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to enumerate projects") - logging.info(sys.exc_info()) - return project_list + Returns: + A list of Project objects from cloudresourcemanager RestAPI. + """ + logging.info("Retrieving projects list") + project_list = list() + try: + service = googleapiclient.discovery.build( + "cloudresourcemanager", + "v1", + credentials=credentials, + cache_discovery=False) + request = service.projects().list() + while request is not None: + response = request.execute() + project_list = response.get("projects", []) + request = service.projects().list_next( + previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to enumerate projects") + logging.info(sys.exc_info()) + return project_list def get_compute_instances_names( project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: - """Retrieve a list of Compute VMs available in the project. + """Retrieve a list of Compute VMs available in the project. - Args: - project_name: A name of a project to query info about. - service: A resource object for interacting with the Compute API. + Args: + project_name: A name of a project to query info about. + service: A resource object for interacting with the Compute API. - Returns: - A list of instance objects. - """ + Returns: + A list of instance objects. + """ + logging.info("Retrieving list of Compute Instances") + images_result = list() + try: + request = service.instances().aggregatedList(project=project_name) + while request is not None: + response = request.execute() + if response.get("items", None) is not None: + images_result = [instance + for _, instances_scoped_list in response["items"].items() + for instance in instances_scoped_list.get("instances", [])] + request = service.instances().aggregatedList_next( + previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to enumerate compute instances in the %s", project_name) + logging.info(sys.exc_info()) + return images_result - logging.info("Retrieving list of Compute Instances") - images_result = list() - try: - request = service.instances().aggregatedList(project=project_name) - while request is not None: - response = request.execute() - if response.get("items", None) is not None: - images_result = [instance - for _, instances_scoped_list in response["items"].items() - for instance in instances_scoped_list.get("instances",[])] - request = service.instances().aggregatedList_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to enumerate compute instances in the %s", - project_name) - logging.info(sys.exc_info()) - return images_result - - -def get_compute_images_names( - project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: - """Retrieve a list of Compute images available in the project. +def get_compute_images_names(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: + """Retrieve a list of Compute images available in the project. - Args: - project_name: A name of a project to query info about. - service: A resource object for interacting with the Compute API. + Args: + project_name: A name of a project to query info about. + service: A resource object for interacting with the Compute API. - Returns: - A list of image objects. - """ + Returns: + A list of image objects. + """ - logging.info("Retrieving list of Compute Image names") - images_result = list() - try: - request = service.images().list(project=project_name) - while request is not None: - response = request.execute() - images_result = response.get("items", []) - request = service.images().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to enumerate compute images in the %s", project_name) - logging.info(sys.exc_info()) - return images_result + logging.info("Retrieving list of Compute Image names") + images_result = list() + try: + request = service.images().list(project=project_name) + while request is not None: + response = request.execute() + images_result = response.get("items", []) + request = service.images().list_next(previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to enumerate compute images in the %s", project_name) + logging.info(sys.exc_info()) + return images_result -def get_machine_images( - project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: - """Retrieve a list of Machine Images Resources available in the project. +def get_machine_images(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: + """Retrieve a list of Machine Images Resources available in the project. - Args: - project_name: A name of a project to query info about. - service: A resource object for interacting with the Compute API. + Args: + project_name: A name of a project to query info about. + service: A resource object for interacting with the Compute API. - Returns: - A list of machine image resources. - """ + Returns: + A list of machine image resources. + """ - logging.info("Retrieving list of Machine Images Resources") - machine_images_list = list() - try: - request = service.machineImages().list(project=project_name) - while request is not None: - response = request.execute() - machine_images_list = response.get("items", []) - request = service.machineImages().list_next( - previous_request=request, previous_response=response - ) - except Exception: - logging.info("Failed to enumerate machine images in the %s", project_name) - logging.info(sys.exc_info()) - return machine_images_list - - -def get_compute_disks_names( - project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: - """Retrieve a list of Compute disks available in the project. + logging.info("Retrieving list of Machine Images Resources") + machine_images_list = list() + try: + request = service.machineImages().list(project=project_name) + while request is not None: + response = request.execute() + machine_images_list = response.get("items", []) + request = service.machineImages().list_next(previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to enumerate machine images in the %s", project_name) + logging.info(sys.exc_info()) + return machine_images_list - Args: - project_name: A name of a project to query info about. - service: A resource object for interacting with the Compute API. - Returns: - A list of disk objects. - """ +def get_compute_disks_names(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: + """Retrieve a list of Compute disks available in the project. - logging.info("Retrieving list of Compute Disk names") - disk_names_list = list() - try: - request = service.disks().aggregatedList(project=project_name) - while request is not None: - response = request.execute() - if response.get("items", None) is not None: - disk_names_list = [disk - for _, disks_scoped_list in response["items"].items() - for disk in disks_scoped_list.get("disks", [])] - request = service.disks().aggregatedList_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to enumerate compute disks in the %s", project_name) - logging.info(sys.exc_info()) + Args: + project_name: A name of a project to query info about. + service: A resource object for interacting with the Compute API. - return disk_names_list + Returns: + A list of disk objects. + """ + logging.info("Retrieving list of Compute Disk names") + disk_names_list = list() + try: + request = service.disks().aggregatedList(project=project_name) + while request is not None: + response = request.execute() + if response.get("items", None) is not None: + disk_names_list = [ + disk for _, disks_scoped_list in response["items"].items() + for disk in disks_scoped_list.get("disks", []) + ] + request = service.disks().aggregatedList_next( + previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to enumerate compute disks in the %s", project_name) + logging.info(sys.exc_info()) -def get_static_ips(project_name: str, - service: discovery.Resource) -> List[Dict[str, Any]]: - """Retrieve a list of static IPs available in the project. + return disk_names_list - Args: - project_name: A name of a project to query info about. - service: A resource object for interacting with the Compute API. - Returns: - A list of static IPs in the project. - """ +def get_static_ips(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: + """Retrieve a list of static IPs available in the project. - logging.info("Retrieving Static IPs") + Args: + project_name: A name of a project to query info about. + service: A resource object for interacting with the Compute API. - ips_list = list() - try: - request = service.addresses().aggregatedList(project=project_name) - while request is not None: - response = request.execute() - ips_list = [{name: addresses_scoped_list} - for name, addresses_scoped_list in response["items"].items() - if addresses_scoped_list.get("addresses", None) is not None] - request = service.addresses().aggregatedList_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to get static IPs in the %s", project_name) - logging.info(sys.exc_info()) - - return ips_list - - -def get_compute_snapshots(project_name: str, - service: discovery.Resource) -> List[Dict[str, Any]]: - """Retrieve a list of Compute snapshots available in the project. - - Args: - project_name: A name of a project to query info about. - service: A resource object for interacting with the Compute API. - - Returns: - A list of snapshot objects. - """ - - logging.info("Retrieving Compute Snapshots") - snapshots_list = list() - try: - request = service.snapshots().list(project=project_name) - while request is not None: - response = request.execute() - snapshots_list = response.get("items", []) - request = service.snapshots().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to get compute snapshots in the %s", project_name) - logging.info(sys.exc_info()) + Returns: + A list of static IPs in the project. + """ - return snapshots_list + logging.info("Retrieving Static IPs") + ips_list = list() + try: + request = service.addresses().aggregatedList(project=project_name) + while request is not None: + response = request.execute() + ips_list = [ + {name: addresses_scoped_list} + for name, addresses_scoped_list in response["items"].items() + if addresses_scoped_list.get("addresses", None) is not None + ] + request = service.addresses().aggregatedList_next( + previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to get static IPs in the %s", project_name) + logging.info(sys.exc_info()) -def get_subnets(project_name: str, - compute_client: discovery.Resource) -> List[Dict[str, Any]]: - """Retrieve a list of subnets available in the project. + return ips_list - Args: - project_name: A name of a project to query info about. - compute_client: A resource object for interacting with the Compute API. - Returns: - A list of subnets in the project. - """ +def get_compute_snapshots(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: + """Retrieve a list of Compute snapshots available in the project. - logging.info("Retrieving Subnets") - subnets_list = list() - try: - request = compute_client.subnetworks().aggregatedList(project=project_name) - while request is not None: - response = request.execute() - if response.get("items", None) is not None: - subnets_list = list(response["items"].items()) - request = compute_client.subnetworks().aggregatedList_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to get subnets in the %s", project_name) - logging.info(sys.exc_info()) - - return subnets_list - - -def get_firewall_rules( - project_name: str, - compute_client: discovery.Resource) -> List[Dict[str, Any]]: - """Retrieve a list of firewall rules in the project. - - Args: - project_name: A name of a project to query info about. - compute_client: A resource object for interacting with the Compute API. - - Returns: - A list of firewall rules in the project. - """ - - logging.info("Retrieving Firewall Rules") - firewall_rules_list = list() - try: - request = compute_client.firewalls().list(project=project_name) - while request is not None: - response = request.execute() - firewall_rules_list=[(firewall["name"],) - for firewall in response.get("items",[])] - request = compute_client.firewalls().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to get firewall rules in the %s", project_name) - logging.info(sys.exc_info()) - return firewall_rules_list + Args: + project_name: A name of a project to query info about. + service: A resource object for interacting with the Compute API. + Returns: + A list of snapshot objects. + """ + logging.info("Retrieving Compute Snapshots") + snapshots_list = list() + try: + request = service.snapshots().list(project=project_name) + while request is not None: + response = request.execute() + snapshots_list = response.get("items", []) + request = service.snapshots().list_next(previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to get compute snapshots in the %s", project_name) + logging.info(sys.exc_info()) -def get_bucket_names(project_name: str, credentials: Credentials, - dump_fd: io.TextIOWrapper - ) -> Dict[str, Tuple[Any, List[Any]]]: - """Retrieve a list of buckets available in the project. - - Args: - project_name: A name of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. - dump_fd: If set, the function will enumerate files stored in buckets and - save them in a file corresponding to provided file descriptor. - This is a very slow, noisy operation and should be used with caution. - - Returns: - A dictionary where key is bucket name and value is a bucket Object. - """ - - logging.info("Retrieving GCS Buckets") - buckets_dict = dict() - service = discovery.build( - "storage", "v1", credentials=credentials, cache_discovery=False) - # Make an authenticated API request - request = service.buckets().list(project=project_name) - while request is not None: + return snapshots_list + + +def get_subnets(project_name: str, compute_client: discovery.Resource) -> List[Dict[str, Any]]: + """Retrieve a list of subnets available in the project. + + Args: + project_name: A name of a project to query info about. + compute_client: A resource object for interacting with the Compute API. + + Returns: + A list of subnets in the project. + """ + logging.info("Retrieving Subnets") + subnets_list = list() + try: + request = compute_client.subnetworks().aggregatedList(project=project_name) + while request is not None: + response = request.execute() + if response.get("items", None) is not None: + subnets_list = list(response["items"].items()) + request = compute_client.subnetworks().aggregatedList_next(previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to get subnets in the %s", project_name) + logging.info(sys.exc_info()) + + return subnets_list + + +def get_firewall_rules(project_name: str, compute_client: discovery.Resource) -> List[Dict[str, Any]]: + """Retrieve a list of firewall rules in the project. + + Args: + project_name: A name of a project to query info about. + compute_client: A resource object for interacting with the Compute API. + + Returns: + A list of firewall rules in the project. + """ + logging.info("Retrieving Firewall Rules") + firewall_rules_list = list() try: - response = request.execute() - except googleapiclient.errors.HttpError: - logging.info("Failed to list buckets in the %s", project_name) - logging.info(sys.exc_info()) - break - - for bucket in response.get("items", []): - buckets_dict[bucket["name"]] = (bucket, None) - if dump_fd is not None: - ret_fields = "nextPageToken,items(name,size,contentType,timeCreated)" - - req = service.objects().list(bucket=bucket["name"], fields=ret_fields) - - while req: - try: - resp = req.execute() - for item in resp.get("items", []): - dump_fd.write(json.dumps(item, indent=2, sort_keys=False)) - - req = service.objects().list_next(req, resp) - except googleapiclient.errors.HttpError: - logging.info("Failed to read the bucket %s", bucket["name"]) + request = compute_client.firewalls().list(project=project_name) + while request is not None: + response = request.execute() + firewall_rules_list = [(firewall["name"],) for firewall in response.get("items", [])] + request = compute_client.firewalls().list_next(previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to get firewall rules in the %s", project_name) + logging.info(sys.exc_info()) + return firewall_rules_list + +def get_bucket_names(project_name: str, credentials: Credentials, + dump_fd: io.TextIOWrapper) -> Dict[str, Tuple[Any, List[Any]]]: + """Retrieve a list of buckets available in the project. + + Args: + project_name: A name of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. + dump_fd: If set, the function will enumerate files stored in buckets and + save them in a file corresponding to provided file descriptor. + This is a very slow, noisy operation and should be used with caution. + + Returns: + A dictionary where key is bucket name and value is a bucket Object. + """ + + logging.info("Retrieving GCS Buckets") + buckets_dict = dict() + service = discovery.build("storage", "v1", credentials=credentials, cache_discovery=False) + + # Make an authenticated API request + request = service.buckets().list(project=project_name) + while request is not None: + try: + response = request.execute() + except googleapiclient.errors.HttpError: + logging.info("Failed to list buckets in the %s", project_name) logging.info(sys.exc_info()) break - request = service.buckets().list_next( - previous_request=request, previous_response=response) + for bucket in response.get("items", []): + buckets_dict[bucket["name"]] = (bucket, None) + if dump_fd is not None: + ret_fields = "nextPageToken,items(name,size,contentType,timeCreated)" + req = service.objects().list(bucket=bucket["name"], fields=ret_fields) - return buckets_dict + while req: + try: + resp = req.execute() + for item in resp.get("items", []): + dump_fd.write(json.dumps(item, indent=2, sort_keys=False)) + req = service.objects().list_next(req, resp) + except googleapiclient.errors.HttpError: + logging.info("Failed to read the bucket %s", bucket["name"]) + logging.info(sys.exc_info()) + break + request = service.buckets().list_next(previous_request=request, previous_response=response) -def get_managed_zones(project_name: str, - credentials: Credentials) -> List[Dict[str, Any]]: - """Retrieve a list of DNS zones available in the project. + return buckets_dict - Args: - project_name: A name of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. - Returns: - A list of DNS zones in the project. - """ +def get_managed_zones(project_name: str, credentials: Credentials) -> List[Dict[str, Any]]: + """Retrieve a list of DNS zones available in the project. - logging.info("Retrieving DNS Managed Zones") - zones_list = list() + Args: + project_name: A name of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. - try: - service = discovery.build( - "dns", "v1", credentials=credentials, cache_discovery=False) + Returns: + A list of DNS zones in the project. + """ - request = service.managedZones().list(project=project_name) - while request is not None: - response = request.execute() - zones_list = response.get("managedZones",[]) - request = service.managedZones().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to enumerate DNS zones for project %s", project_name) - logging.info(sys.exc_info()) + logging.info("Retrieving DNS Managed Zones") + zones_list = list() + + try: + service = discovery.build("dns", "v1", credentials=credentials, cache_discovery=False) + + request = service.managedZones().list(project=project_name) + while request is not None: + response = request.execute() + zones_list = response.get("managedZones",[]) + request = service.managedZones().list_next(previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to enumerate DNS zones for project %s", project_name) + logging.info(sys.exc_info()) - return zones_list + return zones_list def get_gke_clusters( - project_name: str, gke_client: container_v1.services.cluster_manager.client - .ClusterManagerClient + project_name: str, gke_client: container_v1.services.cluster_manager.client.ClusterManagerClient ) -> List[Tuple[str, str]]: - """Retrieve a list of GKE clusters available in the project. + """Retrieve a list of GKE clusters available in the project. - Args: - project_name: A name of a project to query info about. - gke_client: I do not know TBD. + Args: + project_name: A name of a project to query info about. + gke_client: I do not know TBD. - Returns: - A list of GKE clusters in the project. - """ + Returns: + A list of GKE clusters in the project. + """ - logging.info("Retrieving list of GKE clusters") - parent = f"projects/{project_name}/locations/-" - try: - clusters = gke_client.list_clusters(parent=parent) - return [(cluster.name, cluster.description) - for cluster in clusters.clusters] - except Exception: - logging.info("Failed to retrieve cluster list for project %s", project_name) - logging.info(sys.exc_info()) - return [] + logging.info("Retrieving list of GKE clusters") + parent = f"projects/{project_name}/locations/-" + try: + clusters = gke_client.list_clusters(parent=parent) + return [(cluster.name, cluster.description) for cluster in clusters.clusters] + except Exception: + logging.info("Failed to retrieve cluster list for project %s", project_name) + logging.info(sys.exc_info()) + return [] def get_gke_images(project_name: str, access_token: str) -> Dict[str, Any]: - """Retrieve a list of GKE images available in the project. - - Args: - project_name: A name of a project to query info about. - access_token: An Oauth2 token with permissions to query list of gke images. - - Returns: - A gke images JSON object for each accessible zone. - """ - - images = dict() - logging.info("Retrieving list of GKE images") - project_name = project_name.replace(":", "/") - regions = ["", "us.", "eu.", "asia."] - for region in regions: - gcr_url = f"https://{region}gcr.io/v2/{project_name}/tags/list" + """Retrieve a list of GKE images available in the project. + + Args: + project_name: A name of a project to query info about. + access_token: An Oauth2 token with permissions to query list of gke images. + + Returns: + A gke images JSON object for each accessible zone. + """ + + images = dict() + logging.info("Retrieving list of GKE images") + project_name = project_name.replace(":", "/") + regions = ["", "us.", "eu.", "asia."] + for region in regions: + gcr_url = f"https://{region}gcr.io/v2/{project_name}/tags/list" + try: + res = requests.get( + gcr_url, auth=HTTPBasicAuth("oauth2accesstoken", access_token)) + if not res.ok: + logging.info("Failed to retrieve gcr images list. Status code: %d", + res.status_code) + continue + images[region.replace(".", "")] = res.json() + except Exception: + logging.info("Failed to retrieve gke images for project %s", project_name) + logging.info(sys.exc_info()) + + return images + + +def get_sql_instances(project_name: str, credentials: Credentials) -> List[Dict[str, Any]]: + """Retrieve a list of SQL instances available in the project. + + Args: + project_name: A name of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. + + Returns: + A list of sql instances in the project. + """ + + logging.info("Retrieving CloudSQL Instances") + sql_instances_list = list() try: - res = requests.get( - gcr_url, auth=HTTPBasicAuth("oauth2accesstoken", access_token)) - if not res.ok: - logging.info("Failed to retrieve gcr images list. Status code: %d", - res.status_code) - continue - images[region.replace(".", "")] = res.json() + service = discovery.build( + "sqladmin", "v1beta4", credentials=credentials, cache_discovery=False) + + request = service.instances().list(project=project_name) + while request is not None: + response = request.execute() + sql_instances_list = response.get("items", []) + request = service.instances().list_next( + previous_request=request, previous_response=response) except Exception: - logging.info("Failed to retrieve gke images for project %s", project_name) - logging.info(sys.exc_info()) + logging.info("Failed to get SQL instances for project %s", project_name) + logging.info(sys.exc_info()) - return images + return sql_instances_list -def get_sql_instances(project_name: str, - credentials: Credentials) -> List[Dict[str, Any]]: - """Retrieve a list of SQL instances available in the project. +def get_bq_tables(project_id: str, dataset_id: str, bq_service: discovery.Resource) -> List[Dict[str, Any]]: + """Retrieve a list of BigQuery tables available in the dataset. - Args: - project_name: A name of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. + Args: + project_id: A name of a project to query info about. + dataset_id: A name of dataset to query data from. + bq_service: I do not know. - Returns: - A list of sql instances in the project. - """ + Returns: + A list of BigQuery tables in the dataset. + """ - logging.info("Retrieving CloudSQL Instances") - sql_instances_list = list() - try: - service = discovery.build( - "sqladmin", "v1beta4", credentials=credentials, cache_discovery=False) + logging.info("Retrieving BigQuery Tables for dataset %s", dataset_id) + list_of_tables = list() + try: + request = bq_service.tables().list(projectId=project_id, datasetId=dataset_id) + while request is not None: + response = request.execute() + list_of_tables = response.get("tables", []) + request = bq_service.tables().list_next(previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to retrieve BQ tables for dataset %s", dataset_id) + logging.info(sys.exc_info()) + return list_of_tables - request = service.instances().list(project=project_name) - while request is not None: - response = request.execute() - sql_instances_list = response.get("items", []) - request = service.instances().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to get SQL instances for project %s", project_name) - logging.info(sys.exc_info()) - - return sql_instances_list - - -def get_bq_tables(project_id: str, dataset_id: str, - bq_service: discovery.Resource) -> List[Dict[str, Any]]: - """Retrieve a list of BigQuery tables available in the dataset. - - Args: - project_id: A name of a project to query info about. - dataset_id: A name of dataset to query data from. - bq_service: I do not know. - - Returns: - A list of BigQuery tables in the dataset. - """ - - logging.info("Retrieving BigQuery Tables for dataset %s", dataset_id) - list_of_tables = list() - try: - request = bq_service.tables().list( - projectId=project_id, datasetId=dataset_id) - while request is not None: - response = request.execute() - list_of_tables = response.get("tables", []) - request = bq_service.tables().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to retrieve BQ tables for dataset %s", dataset_id) - logging.info(sys.exc_info()) - return list_of_tables - - -def get_bq(project_id: str, - credentials: Credentials) -> Dict[str, List[Dict[str, Any]]]: - """Retrieve a list of BigQuery datasets available in the project. - - Args: - project_id: A name of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. - - Returns: - A dictionary of BigQuery dataset and corresponding tables. - """ - - logging.info("Retrieving BigQuery Datasets") - bq_datasets = dict() - try: - service = discovery.build( - "bigquery", "v2", credentials=credentials, cache_discovery=False) - request = service.datasets().list(projectId=project_id) - while request is not None: - response = request.execute() +def get_bq(project_id: str, credentials: Credentials) -> Dict[str, List[Dict[str, Any]]]: + """Retrieve a list of BigQuery datasets available in the project. - for dataset in response.get("datasets", []): - dataset_id = dataset["datasetReference"]["datasetId"] - bq_datasets[dataset_id] = get_bq_tables(project_id,dataset_id, service) + Args: + project_id: A name of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. - request = service.datasets().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to retrieve BQ datasets for project %s", project_id) - logging.info(sys.exc_info()) - return bq_datasets + Returns: + A dictionary of BigQuery dataset and corresponding tables. + """ + logging.info("Retrieving BigQuery Datasets") + bq_datasets = dict() + try: + service = discovery.build("bigquery", "v2", credentials=credentials, cache_discovery=False) -def get_pubsub_subscriptions(project_id: str, - credentials: Credentials) -> List[Dict[str, Any]]: - """Retrieve a list of PubSub subscriptions available in the project. + request = service.datasets().list(projectId=project_id) + while request is not None: + response = request.execute() - Args: - project_id: A name of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. + for dataset in response.get("datasets", []): + dataset_id = dataset["datasetReference"]["datasetId"] + bq_datasets[dataset_id] = get_bq_tables(project_id, dataset_id, service) - Returns: - A list of PubSub subscriptions in the project. - """ + request = service.datasets().list_next(previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to retrieve BQ datasets for project %s", project_id) + logging.info(sys.exc_info()) - logging.info("Retrieving PubSub Subscriptions") - pubsubs_list = list() - try: - service = discovery.build( - "pubsub", "v1", credentials=credentials, cache_discovery=False) + return bq_datasets - request = service.projects().subscriptions().list( - project=f"projects/{project_id}") - while request is not None: - response = request.execute() - pubsubs_list = response.get("subscriptions", []) - request = service.projects().subscriptions().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to get PubSubs for project %s", project_id) - logging.info(sys.exc_info()) - return pubsubs_list - - -def get_cloudfunctions(project_id: str, - credentials: Credentials) -> List[Dict[str, Any]]: - """Retrieve a list of CloudFunctions available in the project. - - Args: - project_id: A name of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. - - Returns: - A list of CloudFunctions in the project. - """ - - logging.info("Retrieving CloudFunctions") - functions_list = list() - service = discovery.build( - "cloudfunctions", "v1", credentials=credentials, cache_discovery=False) - try: - request = service.projects().locations().functions().list( - parent=f"projects/{project_id}/locations/-") - while request is not None: - response = request.execute() - functions_list = response.get("functions", []) - request = service.projects().locations().functions().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to retrieve CloudFunctions for project %s", project_id) - logging.info(sys.exc_info()) - return functions_list +def get_pubsub_subscriptions(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]: + """Retrieve a list of PubSub subscriptions available in the project. + + Args: + project_id: A name of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. + + Returns: + A list of PubSub subscriptions in the project. + """ + + logging.info("Retrieving PubSub Subscriptions") + pubsubs_list = list() + try: + service = discovery.build("pubsub", "v1", credentials=credentials, cache_discovery=False) + + request = service.projects().subscriptions().list(project=f"projects/{project_id}") + while request is not None: + response = request.execute() + pubsubs_list = response.get("subscriptions", []) + request = service.projects().subscriptions().list_next(previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to get PubSubs for project %s", project_id) + logging.info(sys.exc_info()) + + return pubsubs_list + + +def get_cloudfunctions(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]: + """Retrieve a list of CloudFunctions available in the project. + + Args: + project_id: A name of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. + + Returns: + A list of CloudFunctions in the project. + """ + + logging.info("Retrieving CloudFunctions") + functions_list = list() + service = discovery.build("cloudfunctions", "v1", credentials=credentials, cache_discovery=False) + try: + request = service.projects().locations().functions().list(parent=f"projects/{project_id}/locations/-") + while request is not None: + response = request.execute() + functions_list = response.get("functions", []) + request = service.projects().locations().functions().list_next(previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to retrieve CloudFunctions for project %s", project_id) + logging.info(sys.exc_info()) + + return functions_list def get_bigtable_instances(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]: - """Retrieve a list of BigTable instances available in the project. + """Retrieve a list of BigTable instances available in the project. - Args: - project_id: A name of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. + Args: + project_id: A name of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. - Returns: - A list of BigTable instances in the project. - """ + Returns: + A list of BigTable instances in the project. + """ - logging.info("Retrieving bigtable instances") - bigtable_instances_list = list() - try: - service = discovery.build( - "bigtableadmin", "v2", credentials=credentials, cache_discovery=False) + logging.info("Retrieving bigtable instances") + bigtable_instances_list = list() + try: + service = discovery.build( + "bigtableadmin", "v2", credentials=credentials, cache_discovery=False) - request = service.projects().instances().list( - parent=f"projects/{project_id}") - while request is not None: - response = request.execute() - bigtable_instances_list = response.get("instances", []) - request = service.projects().instances().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to retrieve BigTable instances for project %s", - project_id) - logging.info(sys.exc_info()) - return bigtable_instances_list + request = service.projects().instances().list( + parent=f"projects/{project_id}") + while request is not None: + response = request.execute() + bigtable_instances_list = response.get("instances", []) + request = service.projects().instances().list_next( + previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to retrieve BigTable instances for project %s", + project_id) + logging.info(sys.exc_info()) + return bigtable_instances_list def get_spanner_instances(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]: - """Retrieve a list of Spanner instances available in the project. + """Retrieve a list of Spanner instances available in the project. - Args: - project_id: A name of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. + Args: + project_id: A name of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. - Returns: - A list of Spanner instances in the project. - """ + Returns: + A list of Spanner instances in the project. + """ - logging.info("Retrieving spanner instances") - spanner_instances_list = list() - try: - service = discovery.build( - "spanner", "v1", credentials=credentials, cache_discovery=False) + logging.info("Retrieving spanner instances") + spanner_instances_list = list() + try: + service = discovery.build( + "spanner", "v1", credentials=credentials, cache_discovery=False) - request = service.projects().instances().list( - parent=f"projects/{project_id}") - while request is not None: - response = request.execute() - spanner_instances_list = response.get("instances", []) - request = service.projects().instances().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to retrieve Spanner instances for project %s", - project_id) - logging.info(sys.exc_info()) - return spanner_instances_list + request = service.projects().instances().list( + parent=f"projects/{project_id}") + while request is not None: + response = request.execute() + spanner_instances_list = response.get("instances", []) + request = service.projects().instances().list_next( + previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to retrieve Spanner instances for project %s", + project_id) + logging.info(sys.exc_info()) + return spanner_instances_list def get_filestore_instances(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]: - """Retrieve a list of Filestore instances available in the project. - - Args: - project_id: A name of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. - - Returns: - A list of Filestore instances in the project. - """ - - logging.info("Retrieving filestore instances") - filestore_instances_list = list() - service = discovery.build( - "file", "v1", credentials=credentials, cache_discovery=False) - try: - request = service.projects().locations().instances().list( - parent=f"projects/{project_id}/locations/-") - while request is not None: - response = request.execute() - filestore_instances_list = response.get("instances", []) - request = service.projects().locations().instances().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to get filestore instances for project %s", project_id) - logging.info(sys.exc_info()) - return filestore_instances_list - - -def get_kms_keys(project_id: str, - credentials: Credentials) -> List[Dict[str, Any]]: - """Retrieve a list of KMS keys available in the project. - - Args: - project_id: A name of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. - - Returns: - A list of KMS keys in the project. - """ - - logging.info("Retrieving KMS keys") - kms_keys_list = list() - try: - service = discovery.build( - "cloudkms", "v1", credentials=credentials, cache_discovery=False) + """Retrieve a list of Filestore instances available in the project. - # list all possible locations - locations_list = list() - request = service.projects().locations().list(name=f"projects/{project_id}") - while request is not None: - response = request.execute() - for location in response.get("locations", []): - locations_list.append(location["locationId"]) - request = service.projects().locations().list_next( - previous_request=request, previous_response=response) - - for location_id in locations_list: - request_loc = service.projects().locations().keyRings().list( - parent=f"projects/{project_id}/locations/{location_id}") - while request_loc is not None: - response_loc = request_loc.execute() - for keyring in response_loc.get("keyRings", []): - request = service.projects().locations().keyRings().cryptoKeys().list( - parent=keyring["name"]) - while request is not None: - response = request.execute() - for key in response.get("cryptoKeys", []): - kms_keys_list.append(key) + Args: + project_id: A name of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. + + Returns: + A list of Filestore instances in the project. + """ - request = service.projects().locations().keyRings().cryptoKeys( - ).list_next( + logging.info("Retrieving filestore instances") + filestore_instances_list = list() + service = discovery.build( + "file", "v1", credentials=credentials, cache_discovery=False) + try: + request = service.projects().locations().instances().list( + parent=f"projects/{project_id}/locations/-") + while request is not None: + response = request.execute() + filestore_instances_list = response.get("instances", []) + request = service.projects().locations().instances().list_next( previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to get filestore instances for project %s", project_id) + logging.info(sys.exc_info()) + return filestore_instances_list + + +def get_kms_keys(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]: + """Retrieve a list of KMS keys available in the project. + + Args: + project_id: A name of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. + + Returns: + A list of KMS keys in the project. + """ - request_loc = service.projects().locations().keyRings().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to retrieve KMS keys for project %s", project_id) - logging.info(sys.exc_info()) - return kms_keys_list + logging.info("Retrieving KMS keys") + kms_keys_list = list() + try: + service = discovery.build("cloudkms", "v1", credentials=credentials, cache_discovery=False) + + # list all possible locations + locations_list = list() + request = service.projects().locations().list(name=f"projects/{project_id}") + while request is not None: + response = request.execute() + for location in response.get("locations", []): + locations_list.append(location["locationId"]) + request = service.projects().locations().list_next(previous_request=request, previous_response=response) + + for location_id in locations_list: + request_loc = service.projects().locations().keyRings().list(parent=f"projects/{project_id}/locations/{location_id}") + while request_loc is not None: + response_loc = request_loc.execute() + for keyring in response_loc.get("keyRings", []): + request = service.projects().locations().keyRings().cryptoKeys().list(parent=keyring["name"]) + while request is not None: + response = request.execute() + for key in response.get("cryptoKeys", []): + kms_keys_list.append(key) + + request = service.projects().locations().keyRings().cryptoKeys().list_next(previous_request=request, previous_response=response) + + request_loc = service.projects().locations().keyRings().list_next(previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to retrieve KMS keys for project %s", project_id) + logging.info(sys.exc_info()) + return kms_keys_list def get_app_services(project_name: str, credentials: Credentials) -> Dict[str, Any]: - """Retrieve a list of AppEngine instances available in the project. + """Retrieve a list of AppEngine instances available in the project. - Args: - project_name: A name of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. + Args: + project_name: A name of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. - Returns: - A dict representing default apps and services available in the project. - """ + Returns: + A dict representing default apps and services available in the project. + """ - app_client = discovery.build( - "appengine", "v1", credentials=credentials, cache_discovery=False) + app_client = discovery.build( + "appengine", "v1", credentials=credentials, cache_discovery=False) - logging.info("Retrieving app services") - app_services = dict() - try: - request = app_client.apps().get(appsId=project_name) - response = request.execute() - if response.get("name", None) is not None: - app_services["default_app"] = (response["name"], - response["defaultHostname"], - response["servingStatus"]) + logging.info("Retrieving app services") + app_services = dict() + try: + request = app_client.apps().get(appsId=project_name) + response = request.execute() + if response.get("name", None) is not None: + app_services["default_app"] = (response["name"], + response["defaultHostname"], + response["servingStatus"]) - request = app_client.apps().services().list(appsId=project_name) + request = app_client.apps().services().list(appsId=project_name) - app_services["services"] = list() - while request is not None: - response = request.execute() - app_services["services"] = response.get("services", []) - request = app_client.apps().services().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to retrieve App services for project %s", project_name) - logging.info(sys.exc_info()) - return app_services + app_services["services"] = list() + while request is not None: + response = request.execute() + app_services["services"] = response.get("services", []) + request = app_client.apps().services().list_next( + previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to retrieve App services for project %s", project_name) + logging.info(sys.exc_info()) + return app_services def get_endpoints(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]: - """Retrieve a list of Endpoints available in the project. + """Retrieve a list of Endpoints available in the project. + + Args: + project_id: A name of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. + + Returns: + A list of Endpoints in the project. + """ + + logging.info("Retrieving info about endpoints") + endpoints_list = list() + try: + service = discovery.build( + "servicemanagement", + "v1", + credentials=credentials, + cache_discovery=False) + + request = service.services().list(producerProjectId=project_id) + while request is not None: + response = request.execute() + endpoints_list = response.get("services", []) + request = service.services().list_next( + previous_request=request, previous_response=response) + except Exception: + logging.info("Failed to retrieve endpoints list for project %s", project_id) + logging.info(sys.exc_info()) + return endpoints_list + + +def get_iam_policy(project_name: str, credentials: Credentials) -> List[Dict[str, Any]]: - Args: - project_id: A name of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. + """Retrieve an IAM Policy in the project. - Returns: - A list of Endpoints in the project. - """ + Args: + project_name: A name of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. - logging.info("Retrieving info about endpoints") - endpoints_list = list() - try: + Returns: + An IAM policy enforced for the project. + """ + + logging.info("Retrieving IAM policy for %s", project_name) + + # Create a Cloud Resource Manager service object service = discovery.build( - "servicemanagement", + "cloudresourcemanager", "v1", credentials=credentials, cache_discovery=False) - request = service.services().list(producerProjectId=project_id) - while request is not None: - response = request.execute() - endpoints_list = response.get("services", []) - request = service.services().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to retrieve endpoints list for project %s", project_id) - logging.info(sys.exc_info()) - return endpoints_list - - -def get_iam_policy(project_name: str, - credentials: Credentials) -> List[Dict[str, Any]]: - """Retrieve an IAM Policy in the project. - - Args: - project_name: A name of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. - - Returns: - An IAM policy enforced for the project. - """ - - logging.info("Retrieving IAM policy for %s", project_name) - service = discovery.build( - "cloudresourcemanager", - "v1", - credentials=credentials, - cache_discovery=False) - - resource = project_name - - get_policy_options = { - "requestedPolicyVersion": 3, - } - get_policy_options = {"options": {"requestedPolicyVersion": 3}} - try: - request = service.projects().getIamPolicy( - resource=resource, body=get_policy_options) - response = request.execute() - except Exception: - logging.info("Failed to get endpoints list for project %s", project_name) - logging.info(sys.exc_info()) - return None - - if response.get("bindings", None) is not None: - return response["bindings"] - else: - return None - - -def get_associated_service_accounts( - iam_policy: List[Dict[str, Any]]) -> List[str]: - """Extract a list of unique SAs from IAM policy associated with project. - - Args: - iam_policy: An IAM policy provided by get_iam_policy function. - - Returns: - A list of service accounts represented as string - """ - - if not iam_policy: - return [] - - list_of_sas = list() - for entry in iam_policy: - for member in entry["members"]: - if "deleted:" in member: - continue - account_name = None - for element in member.split(":"): - if "@" in element: - account_name = element - break - if account_name and account_name not in list_of_sas: - list_of_sas.append(account_name) - - return list_of_sas + resource = project_name + + # Set options to retrieve a specific policy version + get_policy_options = {"options": {"requestedPolicyVersion": 3}} + + try: + # Make a request to the Cloud Resource Manager API to retrieve the IAM policy + request = service.projects().getIamPolicy( + resource=resource, body=get_policy_options) + response = request.execute() + except Exception: + # Log an error message if the request fails + logging.info("Failed to get endpoints list for project %s", project_name) + logging.info(sys.exc_info()) + return None + + # Check if the response contains the expected bindings object + if response.get("bindings", None) is not None: + return response["bindings"] + else: + return None + + +def get_associated_service_accounts(iam_policy: List[Dict[str, Any]]) -> List[str]: + """Extract a list of unique SAs from IAM policy associated with project. + + Args: + iam_policy: An IAM policy provided by get_iam_policy function. + + Returns: + A list of service accounts represented as string + """ + + if not iam_policy: + return [] + + list_of_sas = list() + for entry in iam_policy: + for member in entry["members"]: + if "deleted:" in member: + continue + account_name = None # initialize variable for account name + for element in member.split(":"): + if "@" in element: + account_name = element + break + if account_name and account_name not in list_of_sas: + list_of_sas.append(account_name) + + return list_of_sas def get_service_accounts(project_name: str, credentials: Credentials) -> List[Tuple[str, str]]: - """Retrieve a list of service accounts managed in the project. + """Retrieve a list of service accounts managed in the project. - Args: - project_name: A name of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. + Args: + project_name: A name of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. - Returns: - A list of service accounts managed in the project. - """ + Returns: + A list of service accounts managed in the project. + """ - logging.info("Retrieving SA list %s", project_name) - service_accounts = [] - service = discovery.build( - "iam", "v1", credentials=credentials, cache_discovery=False) + # Log the start of the retrieval process. + logging.info("Retrieving SA list %s", project_name) - name = f"projects/{project_name}" + service_accounts = [] - try: - request = service.projects().serviceAccounts().list(name=name) - while request is not None: - response = request.execute() - service_accounts = [(service_account["email"], - service_account.get("description","")) - for service_account in response.get("accounts",[])] + # Create a service object for the IAM API. + service = discovery.build( + "iam", "v1", credentials=credentials, cache_discovery=False) + + # Construct the name of the project to query. + name = f"projects/{project_name}" + + try: + # Send a request to list the service accounts in the project. + request = service.projects().serviceAccounts().list(name=name) + + # Keep retrieving service accounts as long as there are more to retrieve. + while request is not None: + response = request.execute() + # Extract the email and description of each service account and add them to the list. + service_accounts = [(service_account["email"], + service_account.get("description","")) + for service_account in response.get("accounts",[])] + + # Get the next page of results. + request = service.projects().serviceAccounts().list_next( + previous_request=request, previous_response=response) + except Exception: + # Log an error message if something goes wrong. + logging.info("Failed to retrieve SA list for project %s", project_name) + logging.info(sys.exc_info()) - request = service.projects().serviceAccounts().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to retrieve SA list for project %s", project_name) - logging.info(sys.exc_info()) + return service_accounts - return service_accounts def list_services(project_id: str, credentials: Credentials) -> List[Any]: - """Retrieve a list of services enabled in the project. + """Retrieve a list of services enabled in the project. - Args: - project_id: An id of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. + Args: + project_id (str): An id of a project to query info about. + credentials (Credentials): A Google Cloud credentials object. - Returns: - A list of service API objects enabled in the project. - """ + Returns: + A list of service API objects enabled in the project. + """ - logging.info("Retrieving services list %s", project_id) - list_of_services = list() - serviceusage = discovery.build("serviceusage", "v1", credentials=credentials) + # Log the retrieval of services list for the given project ID + logging.info("Retrieving services list %s", project_id) - request = serviceusage.services().list( - parent="projects/" + project_id, pageSize=200, filter="state:ENABLED") - try: - while request is not None: - response = request.execute() - list_of_services.append(response.get("services", None)) + # Create a list to hold the enabled services + list_of_services = list() - request = serviceusage.services().list_next( - previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to retrieve services for project %s", project_id) - logging.info(sys.exc_info()) + serviceusage = discovery.build("serviceusage", "v1", credentials=credentials) - return list_of_services + # Create a request to list all services enabled in the given project + request = serviceusage.services().list( + parent="projects/" + project_id, # Specify the parent resource to list services under + pageSize=200, # Specify the maximum number of services to return per page + filter="state:ENABLED" # Specify the filter to return only ENABLED services + ) + + try: + # Loop through each page of services until all services have been retrieved + while request is not None: + response = request.execute() + list_of_services.append(response.get("services", None)) + + request = serviceusage.services().list_next( + previous_request=request, previous_response=response) + + except Exception: + # Log an error message if an exception occurs while retrieving services + logging.info("Failed to retrieve services for project %s", project_id) + logging.info(sys.exc_info()) + + return list_of_services def list_sourcerepo(project_id: str, credentials: Credentials) -> List[Any]: - """Retrieve a list of cloud source repositories enabled in the project. + """Retrieve a list of cloud source repositories enabled in the project. - Args: - project_id: An id of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. + Args: + project_id: An id of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. - Returns: - A list of cloud source repositories in the project. - """ + Returns: + A list of cloud source repositories in the project. + """ - logging.info("Retrieving cloud source repositories %s", project_id) - list_of_repos = list() - service = discovery.build("sourcerepo", "v1", credentials=credentials) + # Log a message indicating that we're retrieving repositories for the specified project. + logging.info("Retrieving cloud source repositories %s", project_id) - request = service.projects().repos().list( - name="projects/" + project_id, - pageSize=500 - ) - try: - while request is not None: - response = request.execute() - list_of_repos.append(response.get("repos", None)) + list_of_repos = list() + + # Build a service object for interacting with the Cloud Source Repositories API. + service = discovery.build("sourcerepo", "v1", credentials=credentials) + + # Create a request to list the repositories in the specified project, up to 500 at a time. + request = service.projects().repos().list( + name="projects/" + project_id, + pageSize=500 + ) - request = service.projects().repos().list_next( - previous_request=request, - previous_response=response - ) - except Exception: - logging.info("Failed to retrieve source repos for project %s", project_id) - logging.info(sys.exc_info()) + try: + # Keep making requests until there are no more pages of repositories to retrieve. + while request is not None: + response = request.execute() - return list_of_repos + # Add the repositories from the response to the list of repositories. + list_of_repos.append(response.get("repos", None)) + + # Get the next page of repositories, if there is one. + request = service.projects().repos().list_next( + previous_request=request, + previous_response=response + ) + + except Exception: + # If an exception is raised, log a message indicating that we failed to retrieve the repositories. + logging.info("Failed to retrieve source repos for project %s", project_id) + logging.info(sys.exc_info()) + + return list_of_repos def list_dns_policies(project_id: str, credentials: Credentials) -> List[Any]: - """Retrieve a list of cloud DNS policies in the project. - Args: - project_id: An id of a project to query info about. - credentials: An google.oauth2.credentials.Credentials object. - Returns: - A list of cloud DNS policies in the project. - """ - - logging.info("Retrieving cloud DNS policies %s", project_id) - list_of_policies = list() - service = discovery.build("dns", "v1", credentials=credentials) - - request = service.policies().list( - project=project_id, - maxResults=500 - ) - try: - while request is not None: - response = request.execute() - list_of_policies.append(response.get("policies", None)) - - request = service.policies().list_next( - previous_request=request, - previous_response=response - ) - except Exception: - logging.info("Failed to retrieve DNS policies for project %s", project_id) - logging.info(sys.exc_info()) - - return list_of_policies + """ + Retrieve a list of cloud DNS policies in the project. + + Args: + project_id: An id of a project to query info about. + credentials: An google.oauth2.credentials.Credentials object. + + Returns: + A list of cloud DNS policies in the project. + """ + + # Log that we're retrieving cloud DNS policies for the specified project + logging.info("Retrieving cloud DNS policies %s", project_id) + + # Initialize an empty list to store the policies in + list_of_policies = list() + + # Create a DNS service object + service = discovery.build("dns", "v1", credentials=credentials) + + # Create a request to retrieve DNS policies for the specified project + request = service.policies().list( + project=project_id, + maxResults=500 + ) + + try: + # Loop through pages of results until there are no more + while request is not None: + # Send the request and get the response + response = request.execute() + + # Get the policies from the response and add them to the list_of_policies + list_of_policies.append(response.get("policies", None)) + + # Get the next page of results (if there are any) + request = service.policies().list_next( + previous_request=request, + previous_response=response + ) + except Exception: + # Log an error if we failed to retrieve DNS policies for the specified project + logging.info("Failed to retrieve DNS policies for project %s", project_id) + logging.info(sys.exc_info()) + + return list_of_policies From 664d4ecd3d7f3b8f477b48363d45b9fa29ff00d9 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 17:05:27 +0530 Subject: [PATCH 12/25] Update __init__.py --- src/gcp_scanner/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gcp_scanner/__init__.py b/src/gcp_scanner/__init__.py index 80238dad..d3f5a12f 100644 --- a/src/gcp_scanner/__init__.py +++ b/src/gcp_scanner/__init__.py @@ -1 +1 @@ -#Currently, this ia an empty file. + From 0e9cdd73cd06dc9cccd4261e3742fed80f0372b2 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 19:54:12 +0530 Subject: [PATCH 13/25] Update __main__.py --- src/gcp_scanner/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gcp_scanner/__main__.py b/src/gcp_scanner/__main__.py index 330fb74e..35bea06b 100644 --- a/src/gcp_scanner/__main__.py +++ b/src/gcp_scanner/__main__.py @@ -21,5 +21,5 @@ # Checking if the code is running as the main module if __name__ == '__main__': - # Calling the main function of the scanner module - scanner.main() + # Calling the main function of the scanner module + scanner.main() From ac36b023c68dc37a91c334529ac8c0a739a37ede Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 20:52:22 +0530 Subject: [PATCH 14/25] Update arguments.py --- src/gcp_scanner/arguments.py | 142 ++++++++++++++++++----------------- 1 file changed, 72 insertions(+), 70 deletions(-) diff --git a/src/gcp_scanner/arguments.py b/src/gcp_scanner/arguments.py index ed37efc1..09b516fd 100644 --- a/src/gcp_scanner/arguments.py +++ b/src/gcp_scanner/arguments.py @@ -20,9 +20,10 @@ import argparse import logging + # Define a function to create an argument parser using the argparse module def arg_parser(): - """Creates an argument parser using the `argparse` module and defines + """Creates an argument parser using the `argparse` module and defines several command-line arguments. Args: @@ -32,95 +33,98 @@ def arg_parser(): argparse.Namespace: A namespace object containing the parsed command-line arguments. """ - # Create a new parser object - parser = argparse.ArgumentParser( - prog='scanner.py', # program name - description='GCP Scanner', # description - usage='python3 %(prog)s -o folder_to_save_results -g -' # usage instructions - ) - - # Define a required argument group - required_named = parser.add_argument_group('Required parameters') - # Add a required argument to the group - required_named.add_argument( - '-o', # short option name - '--output-dir', # long option name - required=True, - dest='output', - default='scan_db', - help='Path to output directory' - ) + # Create a new parser object + parser = argparse.ArgumentParser( + prog='scanner.py', # program name + description='GCP Scanner', # description + usage='python3 %(prog)s -o folder_to_save_results -g -' + ) + + # Define a required argument group + required_named = parser.add_argument_group('Required parameters') + # Add a required argument to the group + required_named.add_argument( + '-o', # short option name + '--output-dir', # long option name + required=True, + dest='output', + default='scan_db', + help='Path to output directory' + ) - # Add command line arguments to the parser object - parser.add_argument( + # Add command line arguments to the parser object + parser.add_argument( '-k', - '--sa-key-path', # Option for specifying the path to the directory with SA keys + '--sa-key-path', default=None, # Default value if option is not specified - dest='key_path', # Destination variable for storing the value of the option - help='Path to directory with SA keys in json format' # Help message - ) - parser.add_argument( + dest='key_path', + help='Path to directory with SA keys in json format' # Help message + ) + parser.add_argument( '-g', - '--gcloud-profile-path', - default=None, - dest='gcloud_profile_path', - help='Path to directory with gcloud profile. Specify - to search for credentials in default gcloud config path' - ) - parser.add_argument( + '--gcloud-profile-path', + default=None, + dest='gcloud_profile_path', + help='Path to directory with gcloud profile. Specify - to search for\ + credentials in default gcloud config path' + ) + parser.add_argument( '-m', - '--use-metadata', - default=False, - dest='use_metadata', - action='store_true', - help='Extract credentials from GCE instance metadata' - ) - parser.add_argument( + '--use-metadata', + default=False, + dest='use_metadata', + action='store_true', + help='Extract credentials from GCE instance metadata' + ) + parser.add_argument( '-at', - '--access-token-files', - default=None, - dest='access_token_files', - help='A list of comma separated files with access token and OAuth scopes. TTL limited. A token and scopes should be stored in JSON format.' - ) - parser.add_argument( + '--access-token-files', + default=None, + dest='access_token_files', + help='A list of comma separated files with access token and OAuth scopes\ + TTL limited. A token and scopes should be stored in JSON format.' + ) + parser.add_argument( '-rt', - '--refresh-token-files', - default=None, - dest='refresh_token_files', - help='A list of comma separated files with refresh_token, client_id, token_uri and client_secret stored in JSON format.' - ) + '--refresh-token-files', + default=None, + dest='refresh_token_files', + help='A list of comma separated files with refresh_token, client_id,\ + token_uri and client_secret stored in JSON format.' + ) - parser.add_argument( + parser.add_argument( '-s', '--service-account', default=None, dest='key_name', help='Name of individual SA to scan') - parser.add_argument( + parser.add_argument( '-p', '--project', default=None, dest='target_project', help='Name of individual project to scan') - parser.add_argument( + parser.add_argument( '-f', '--force-projects', default=None, dest='force_projects', help='Comma separated list of project names to include in the scan') - parser.add_argument( + parser.add_argument( '-c', '--config', default=None, dest='config_path', help='A path to config file with a set of specific resources to scan.') - parser.add_argument( + parser.add_argument( '-l', '--logging', default='WARNING', dest='log_level', choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'), help='Set logging level (INFO, WARNING, ERROR)') - parser.add_argument( + parser.add_argument( '-lf', '--log-file', default=None, @@ -128,20 +132,18 @@ def arg_parser(): help='Save logs to the path specified rather than displaying in\ console') -# Parse the command line arguments -args: argparse.Namespace = parser.parse_args() - -# Check if none of the necessary options are selected -if not args.key_path and not args.gcloud_profile_path \ - and not args.use_metadata and not args.access_token_files\ - and not args.refresh_token_files: + # Parse the command line arguments + args: argparse.Namespace = parser.parse_args() - # If none of the options are selected, log an error message - logging.error( - 'Please select at least one option to begin scan\ - -k/--sa-key-path,-g/--gcloud-profile-path, -m, -rt, -at' - ) + # Check if none of the necessary options are selected + if not args.key_path and not args.gcloud_profile_path \ + and not args.use_metadata and not args.access_token_files\ + and not args.refresh_token_files: -# Return the parsed command line arguments -return args + # If none of the options are selected, log an error message + logging.error( + 'Please select at least one option to begin scan\ + -k/--sa-key-path,-g/--gcloud-profile-path, -m, -rt, -at') + # Return the parsed command line arguments + return args From 8ffc0aa8fdfea874f2dfd991d036c6926c981494 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 22:40:01 +0530 Subject: [PATCH 15/25] Update crawl.py --- src/gcp_scanner/crawl.py | 375 ++++++++++++++++++++++++--------------- 1 file changed, 236 insertions(+), 139 deletions(-) diff --git a/src/gcp_scanner/crawl.py b/src/gcp_scanner/crawl.py index f01369f4..ffd3f3ab 100644 --- a/src/gcp_scanner/crawl.py +++ b/src/gcp_scanner/crawl.py @@ -34,6 +34,7 @@ import collections + def infinite_defaultdict(): """Initialize infinite default. @@ -43,7 +44,6 @@ def infinite_defaultdict(): return collections.defaultdict(infinite_defaultdict) - def fetch_project_info(project_name: str, credentials: Credentials) -> Dict[str, Any]: """Retrieve information about specific project. @@ -69,14 +69,13 @@ def fetch_project_info(project_name: str, if "projectNumber" in response: project_info = response - except Exception: + except ImportError: logging.info("Failed to enumerate projects") logging.info(sys.exc_info()) return project_info - def get_project_list(credentials: Credentials) -> List[Dict[str, Any]]: """Retrieve a list of projects accessible by credentials provided. @@ -100,14 +99,15 @@ def get_project_list(credentials: Credentials) -> List[Dict[str, Any]]: project_list = response.get("projects", []) request = service.projects().list_next( previous_request=request, previous_response=response) - except Exception: + except ImportError: logging.info("Failed to enumerate projects") logging.info(sys.exc_info()) return project_list def get_compute_instances_names( - project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: + project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: + """Retrieve a list of Compute VMs available in the project. Args: @@ -125,16 +125,22 @@ def get_compute_instances_names( response = request.execute() if response.get("items", None) is not None: images_result = [instance - for _, instances_scoped_list in response["items"].items() - for instance in instances_scoped_list.get("instances", [])] + for _, + instances_scoped_list in response[ + "items"].items() + for instance in instances_scoped_list.get( + "instances", [])] request = service.instances().aggregatedList_next( previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to enumerate compute instances in the %s", project_name) + except ImportError: + logging.info( + "Failed to enumerate compute instances in the %s", project_name) logging.info(sys.exc_info()) return images_result -def get_compute_images_names(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: + +def get_compute_images_names( + project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: """Retrieve a list of Compute images available in the project. Args: @@ -152,14 +158,17 @@ def get_compute_images_names(project_name: str, service: discovery.Resource) -> while request is not None: response = request.execute() images_result = response.get("items", []) - request = service.images().list_next(previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to enumerate compute images in the %s", project_name) + request = service.images().list_next( + previous_request=request, previous_response=response) + except ImportError: + logging.info( + "Failed to enumerate compute images in the %s", project_name) logging.info(sys.exc_info()) return images_result -def get_machine_images(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: +def get_machine_images(project_name: str, service: discovery.Resource) -> List[ + Dict[str, Any]]: """Retrieve a list of Machine Images Resources available in the project. Args: @@ -177,14 +186,17 @@ def get_machine_images(project_name: str, service: discovery.Resource) -> List[D while request is not None: response = request.execute() machine_images_list = response.get("items", []) - request = service.machineImages().list_next(previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to enumerate machine images in the %s", project_name) + request = service.machineImages().list_next( + previous_request=request, previous_response=response) + except ImportError: + logging.info( + "Failed to enumerate machine images in the %s", project_name) logging.info(sys.exc_info()) return machine_images_list -def get_compute_disks_names(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: +def get_compute_disks_names( + project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: """Retrieve a list of Compute disks available in the project. Args: @@ -208,14 +220,16 @@ def get_compute_disks_names(project_name: str, service: discovery.Resource) -> L ] request = service.disks().aggregatedList_next( previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to enumerate compute disks in the %s", project_name) + except ImportError: + logging.info( + "Failed to enumerate compute disks in the %s", project_name) logging.info(sys.exc_info()) return disk_names_list -def get_static_ips(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: +def get_static_ips( + project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: """Retrieve a list of static IPs available in the project. Args: @@ -240,14 +254,15 @@ def get_static_ips(project_name: str, service: discovery.Resource) -> List[Dict[ ] request = service.addresses().aggregatedList_next( previous_request=request, previous_response=response) - except Exception: + except ImportError: logging.info("Failed to get static IPs in the %s", project_name) logging.info(sys.exc_info()) return ips_list -def get_compute_snapshots(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: +def get_compute_snapshots( + project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]: """Retrieve a list of Compute snapshots available in the project. Args: @@ -264,15 +279,18 @@ def get_compute_snapshots(project_name: str, service: discovery.Resource) -> Lis while request is not None: response = request.execute() snapshots_list = response.get("items", []) - request = service.snapshots().list_next(previous_request=request, previous_response=response) - except Exception: + request = service.snapshots().list_next( + previous_request=request, previous_response=response) + except ImportError: logging.info("Failed to get compute snapshots in the %s", project_name) logging.info(sys.exc_info()) return snapshots_list -def get_subnets(project_name: str, compute_client: discovery.Resource) -> List[Dict[str, Any]]: +def get_subnets( + project_name: str, + compute_client: discovery.Resource) -> List[Dict[str, Any]]: """Retrieve a list of subnets available in the project. Args: @@ -285,20 +303,24 @@ def get_subnets(project_name: str, compute_client: discovery.Resource) -> List[D logging.info("Retrieving Subnets") subnets_list = list() try: - request = compute_client.subnetworks().aggregatedList(project=project_name) + request = compute_client.subnetworks().aggregatedList( + project=project_name) while request is not None: response = request.execute() if response.get("items", None) is not None: subnets_list = list(response["items"].items()) - request = compute_client.subnetworks().aggregatedList_next(previous_request=request, previous_response=response) - except Exception: + request = compute_client.subnetworks().aggregatedList_next( + previous_request=request, previous_response=response) + except ImportError: logging.info("Failed to get subnets in the %s", project_name) logging.info(sys.exc_info()) return subnets_list -def get_firewall_rules(project_name: str, compute_client: discovery.Resource) -> List[Dict[str, Any]]: +def get_firewall_rules( + project_name: str, + compute_client: discovery.Resource) -> List[Dict[str, Any]]: """Retrieve a list of firewall rules in the project. Args: @@ -314,15 +336,19 @@ def get_firewall_rules(project_name: str, compute_client: discovery.Resource) -> request = compute_client.firewalls().list(project=project_name) while request is not None: response = request.execute() - firewall_rules_list = [(firewall["name"],) for firewall in response.get("items", [])] - request = compute_client.firewalls().list_next(previous_request=request, previous_response=response) - except Exception: + firewall_rules_list = [( + firewall["name"],) for firewall in response.get("items", [])] + request = compute_client.firewalls().list_next( + previous_request=request, previous_response=response) + except ImportError: logging.info("Failed to get firewall rules in the %s", project_name) logging.info(sys.exc_info()) return firewall_rules_list + def get_bucket_names(project_name: str, credentials: Credentials, - dump_fd: io.TextIOWrapper) -> Dict[str, Tuple[Any, List[Any]]]: + dump_fd: io.TextIOWrapper) -> Dict[str, + Tuple[Any, List[Any]]]: """Retrieve a list of buckets available in the project. Args: @@ -338,8 +364,8 @@ def get_bucket_names(project_name: str, credentials: Credentials, logging.info("Retrieving GCS Buckets") buckets_dict = dict() - service = discovery.build("storage", "v1", credentials=credentials, cache_discovery=False) - + service = discovery.build("storage", "v1", credentials=credentials, + cache_discovery=False) # Make an authenticated API request request = service.buckets().list(project=project_name) while request is not None: @@ -353,26 +379,34 @@ def get_bucket_names(project_name: str, credentials: Credentials, for bucket in response.get("items", []): buckets_dict[bucket["name"]] = (bucket, None) if dump_fd is not None: - ret_fields = "nextPageToken,items(name,size,contentType,timeCreated)" - req = service.objects().list(bucket=bucket["name"], fields=ret_fields) + ret_fields = ( + "nextPageToken," + "items(name,size,contentType,timeCreated)" + ) + req = service.objects().list( + bucket=bucket["name"], fields=ret_fields) while req: try: resp = req.execute() for item in resp.get("items", []): - dump_fd.write(json.dumps(item, indent=2, sort_keys=False)) + dump_fd.write(json.dumps( + item, indent=2, sort_keys=False)) req = service.objects().list_next(req, resp) except googleapiclient.errors.HttpError: - logging.info("Failed to read the bucket %s", bucket["name"]) + logging.info( + "Failed to read the bucket %s", bucket["name"]) logging.info(sys.exc_info()) break - request = service.buckets().list_next(previous_request=request, previous_response=response) + request = service.buckets().list_next( + previous_request=request, previous_response=response) return buckets_dict -def get_managed_zones(project_name: str, credentials: Credentials) -> List[Dict[str, Any]]: +def get_managed_zones( + project_name: str, credentials: Credentials) -> List[Dict[str, Any]]: """Retrieve a list of DNS zones available in the project. Args: @@ -387,22 +421,26 @@ def get_managed_zones(project_name: str, credentials: Credentials) -> List[Dict[ zones_list = list() try: - service = discovery.build("dns", "v1", credentials=credentials, cache_discovery=False) + service = discovery.build( + "dns", "v1", credentials=credentials, cache_discovery=False) request = service.managedZones().list(project=project_name) while request is not None: response = request.execute() - zones_list = response.get("managedZones",[]) - request = service.managedZones().list_next(previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to enumerate DNS zones for project %s", project_name) + zones_list = response.get("managedZones", []) + request = service.managedZones().list_next( + previous_request=request, previous_response=response) + except ImportError: + logging.info( + "Failed to enumerate DNS zones for project %s", project_name) logging.info(sys.exc_info()) return zones_list def get_gke_clusters( - project_name: str, gke_client: container_v1.services.cluster_manager.client.ClusterManagerClient + project_name: str, + gke_client: container_v1.services.cluster_manager.client.ClusterManagerClient ) -> List[Tuple[str, str]]: """Retrieve a list of GKE clusters available in the project. @@ -418,9 +456,12 @@ def get_gke_clusters( parent = f"projects/{project_name}/locations/-" try: clusters = gke_client.list_clusters(parent=parent) - return [(cluster.name, cluster.description) for cluster in clusters.clusters] - except Exception: - logging.info("Failed to retrieve cluster list for project %s", project_name) + return [( + cluster.name, + cluster.description) for cluster in clusters.clusters] + except ImportError: + logging.info( + "Failed to retrieve cluster list for project %s", project_name) logging.info(sys.exc_info()) return [] @@ -430,7 +471,8 @@ def get_gke_images(project_name: str, access_token: str) -> Dict[str, Any]: Args: project_name: A name of a project to query info about. - access_token: An Oauth2 token with permissions to query list of gke images. + access_token: An Oauth2 token with permissions\ + to query list of gke images. Returns: A gke images JSON object for each accessible zone. @@ -446,18 +488,21 @@ def get_gke_images(project_name: str, access_token: str) -> Dict[str, Any]: res = requests.get( gcr_url, auth=HTTPBasicAuth("oauth2accesstoken", access_token)) if not res.ok: - logging.info("Failed to retrieve gcr images list. Status code: %d", - res.status_code) + logging.info( + "Failed to retrieve gcr images list. Status code: %d", + res.status_code) continue images[region.replace(".", "")] = res.json() - except Exception: - logging.info("Failed to retrieve gke images for project %s", project_name) + except ImportError: + logging.info( + "Failed to retrieve gke images for project %s", project_name) logging.info(sys.exc_info()) return images -def get_sql_instances(project_name: str, credentials: Credentials) -> List[Dict[str, Any]]: +def get_sql_instances( + project_name: str, credentials: Credentials) -> List[Dict[str, Any]]: """Retrieve a list of SQL instances available in the project. Args: @@ -472,7 +517,8 @@ def get_sql_instances(project_name: str, credentials: Credentials) -> List[Dict[ sql_instances_list = list() try: service = discovery.build( - "sqladmin", "v1beta4", credentials=credentials, cache_discovery=False) + "sqladmin", "v1beta4", + credentials=credentials, cache_discovery=False) request = service.instances().list(project=project_name) while request is not None: @@ -480,14 +526,16 @@ def get_sql_instances(project_name: str, credentials: Credentials) -> List[Dict[ sql_instances_list = response.get("items", []) request = service.instances().list_next( previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to get SQL instances for project %s", project_name) + except ImportError: + logging.info( + "Failed to get SQL instances for project %s", project_name) logging.info(sys.exc_info()) return sql_instances_list -def get_bq_tables(project_id: str, dataset_id: str, bq_service: discovery.Resource) -> List[Dict[str, Any]]: +def get_bq_tables(project_id: str, dataset_id: str, + bq_service: discovery.Resource) -> List[Dict[str, Any]]: """Retrieve a list of BigQuery tables available in the dataset. Args: @@ -502,18 +550,22 @@ def get_bq_tables(project_id: str, dataset_id: str, bq_service: discovery.Resour logging.info("Retrieving BigQuery Tables for dataset %s", dataset_id) list_of_tables = list() try: - request = bq_service.tables().list(projectId=project_id, datasetId=dataset_id) + request = bq_service.tables().list( + projectId=project_id, datasetId=dataset_id) while request is not None: response = request.execute() list_of_tables = response.get("tables", []) - request = bq_service.tables().list_next(previous_request=request, previous_response=response) - except Exception: + request = bq_service.tables().list_next( + previous_request=request, previous_response=response) + except ImportError: logging.info("Failed to retrieve BQ tables for dataset %s", dataset_id) logging.info(sys.exc_info()) return list_of_tables -def get_bq(project_id: str, credentials: Credentials) -> Dict[str, List[Dict[str, Any]]]: +def get_bq( + project_id: str, + credentials: Credentials) -> Dict[str, List[Dict[str, Any]]]: """Retrieve a list of BigQuery datasets available in the project. Args: @@ -527,7 +579,8 @@ def get_bq(project_id: str, credentials: Credentials) -> Dict[str, List[Dict[str logging.info("Retrieving BigQuery Datasets") bq_datasets = dict() try: - service = discovery.build("bigquery", "v2", credentials=credentials, cache_discovery=False) + service = discovery.build( + "bigquery", "v2", credentials=credentials, cache_discovery=False) request = service.datasets().list(projectId=project_id) while request is not None: @@ -535,17 +588,21 @@ def get_bq(project_id: str, credentials: Credentials) -> Dict[str, List[Dict[str for dataset in response.get("datasets", []): dataset_id = dataset["datasetReference"]["datasetId"] - bq_datasets[dataset_id] = get_bq_tables(project_id, dataset_id, service) + bq_datasets[dataset_id] = get_bq_tables( + project_id, dataset_id, service) - request = service.datasets().list_next(previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to retrieve BQ datasets for project %s", project_id) + request = service.datasets().list_next( + previous_request=request, previous_response=response) + except ImportError: + logging.info( + "Failed to retrieve BQ datasets for project %s", project_id) logging.info(sys.exc_info()) return bq_datasets -def get_pubsub_subscriptions(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]: +def get_pubsub_subscriptions( + project_id: str, credentials: Credentials) -> List[Dict[str, Any]]: """Retrieve a list of PubSub subscriptions available in the project. Args: @@ -559,21 +616,25 @@ def get_pubsub_subscriptions(project_id: str, credentials: Credentials) -> List[ logging.info("Retrieving PubSub Subscriptions") pubsubs_list = list() try: - service = discovery.build("pubsub", "v1", credentials=credentials, cache_discovery=False) + service = discovery.build( + "pubsub", "v1", credentials=credentials, cache_discovery=False) - request = service.projects().subscriptions().list(project=f"projects/{project_id}") + request = service.projects().subscriptions().list( + project=f"projects/{project_id}") while request is not None: response = request.execute() pubsubs_list = response.get("subscriptions", []) - request = service.projects().subscriptions().list_next(previous_request=request, previous_response=response) - except Exception: + request = service.projects().subscriptions().list_next( + previous_request=request, previous_response=response) + except ImportError: logging.info("Failed to get PubSubs for project %s", project_id) logging.info(sys.exc_info()) return pubsubs_list -def get_cloudfunctions(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]: +def get_cloudfunctions( + project_id: str, credentials: Credentials) -> List[Dict[str, Any]]: """Retrieve a list of CloudFunctions available in the project. Args: @@ -586,15 +647,19 @@ def get_cloudfunctions(project_id: str, credentials: Credentials) -> List[Dict[s logging.info("Retrieving CloudFunctions") functions_list = list() - service = discovery.build("cloudfunctions", "v1", credentials=credentials, cache_discovery=False) + service = discovery.build( + "cloudfunctions", "v1", credentials=credentials, cache_discovery=False) try: - request = service.projects().locations().functions().list(parent=f"projects/{project_id}/locations/-") + request = service.projects().locations().functions().list( + parent=f"projects/{project_id}/locations/-") while request is not None: response = request.execute() functions_list = response.get("functions", []) - request = service.projects().locations().functions().list_next(previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to retrieve CloudFunctions for project %s", project_id) + request = service.projects().locations().functions().list_next( + previous_request=request, previous_response=response) + except ImportError: + logging.info( + "Failed to retrieve CloudFunctions for project %s", project_id) logging.info(sys.exc_info()) return functions_list @@ -616,7 +681,7 @@ def get_bigtable_instances(project_id: str, bigtable_instances_list = list() try: service = discovery.build( - "bigtableadmin", "v2", credentials=credentials, cache_discovery=False) + "bigtableadmin", "v2", credentials=credentials, cache_discovery=False) request = service.projects().instances().list( parent=f"projects/{project_id}") @@ -625,7 +690,7 @@ def get_bigtable_instances(project_id: str, bigtable_instances_list = response.get("instances", []) request = service.projects().instances().list_next( previous_request=request, previous_response=response) - except Exception: + except ImportError: logging.info("Failed to retrieve BigTable instances for project %s", project_id) logging.info(sys.exc_info()) @@ -657,7 +722,7 @@ def get_spanner_instances(project_id: str, spanner_instances_list = response.get("instances", []) request = service.projects().instances().list_next( previous_request=request, previous_response=response) - except Exception: + except ImportError: logging.info("Failed to retrieve Spanner instances for project %s", project_id) logging.info(sys.exc_info()) @@ -688,13 +753,15 @@ def get_filestore_instances(project_id: str, filestore_instances_list = response.get("instances", []) request = service.projects().locations().instances().list_next( previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to get filestore instances for project %s", project_id) + except ImportError: + logging.info( + "Failed to get filestore instances for project %s", project_id) logging.info(sys.exc_info()) return filestore_instances_list -def get_kms_keys(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]: +def get_kms_keys( + project_id: str, credentials: Credentials) -> List[Dict[str, Any]]: """Retrieve a list of KMS keys available in the project. Args: @@ -708,32 +775,42 @@ def get_kms_keys(project_id: str, credentials: Credentials) -> List[Dict[str, An logging.info("Retrieving KMS keys") kms_keys_list = list() try: - service = discovery.build("cloudkms", "v1", credentials=credentials, cache_discovery=False) + service = discovery.build( + "cloudkms", "v1", credentials=credentials, cache_discovery=False) # list all possible locations locations_list = list() - request = service.projects().locations().list(name=f"projects/{project_id}") + request = service.projects().locations().list( + name=f"projects/{project_id}") while request is not None: response = request.execute() for location in response.get("locations", []): locations_list.append(location["locationId"]) - request = service.projects().locations().list_next(previous_request=request, previous_response=response) + request = service.projects().locations().list_next( + previous_request=request, previous_response=response) for location_id in locations_list: - request_loc = service.projects().locations().keyRings().list(parent=f"projects/{project_id}/locations/{location_id}") + request_loc = service.projects().locations().keyRings().list( + parent=f"projects/{project_id}/locations/{location_id}") while request_loc is not None: response_loc = request_loc.execute() for keyring in response_loc.get("keyRings", []): - request = service.projects().locations().keyRings().cryptoKeys().list(parent=keyring["name"]) + request = service.projects().locations( + ).keyRings().cryptoKeys().list(parent=keyring["name"]) while request is not None: response = request.execute() for key in response.get("cryptoKeys", []): kms_keys_list.append(key) - request = service.projects().locations().keyRings().cryptoKeys().list_next(previous_request=request, previous_response=response) + request = service.projects().locations().keyRings( + ).cryptoKeys().list_next( + previous_request=request, + previous_response=response) - request_loc = service.projects().locations().keyRings().list_next(previous_request=request, previous_response=response) - except Exception: + request_loc = service.projects( + ).locations().keyRings().list_next( + previous_request=request, previous_response=response) + except ImportError: logging.info("Failed to retrieve KMS keys for project %s", project_id) logging.info(sys.exc_info()) return kms_keys_list @@ -772,8 +849,9 @@ def get_app_services(project_name: str, app_services["services"] = response.get("services", []) request = app_client.apps().services().list_next( previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to retrieve App services for project %s", project_name) + except ImportError: + logging.info( + "Failed to retrieve App services for project %s", project_name) logging.info(sys.exc_info()) return app_services @@ -805,13 +883,15 @@ def get_endpoints(project_id: str, endpoints_list = response.get("services", []) request = service.services().list_next( previous_request=request, previous_response=response) - except Exception: - logging.info("Failed to retrieve endpoints list for project %s", project_id) + except ImportError: + logging.info( + "Failed to retrieve endpoints list for project %s", project_id) logging.info(sys.exc_info()) return endpoints_list -def get_iam_policy(project_name: str, credentials: Credentials) -> List[Dict[str, Any]]: +def get_iam_policy( + project_name: str, credentials: Credentials) -> List[Dict[str, Any]]: """Retrieve an IAM Policy in the project. @@ -838,13 +918,15 @@ def get_iam_policy(project_name: str, credentials: Credentials) -> List[Dict[str get_policy_options = {"options": {"requestedPolicyVersion": 3}} try: - # Make a request to the Cloud Resource Manager API to retrieve the IAM policy + # Make a request to the Cloud Resource Manager\ + # API to retrieve the IAM policy request = service.projects().getIamPolicy( resource=resource, body=get_policy_options) response = request.execute() - except Exception: + except ImportError: # Log an error message if the request fails - logging.info("Failed to get endpoints list for project %s", project_name) + logging.info( + "Failed to get endpoints list for project %s", project_name) logging.info(sys.exc_info()) return None @@ -855,7 +937,8 @@ def get_iam_policy(project_name: str, credentials: Credentials) -> List[Dict[str return None -def get_associated_service_accounts(iam_policy: List[Dict[str, Any]]) -> List[str]: +def get_associated_service_accounts( + iam_policy: List[Dict[str, Any]]) -> List[str]: """Extract a list of unique SAs from IAM policy associated with project. Args: @@ -865,23 +948,23 @@ def get_associated_service_accounts(iam_policy: List[Dict[str, Any]]) -> List[st A list of service accounts represented as string """ - if not iam_policy: + if not iam_policy: return [] - list_of_sas = list() - for entry in iam_policy: - for member in entry["members"]: - if "deleted:" in member: + list_of_sas = list() + for entry in iam_policy: + for member in entry["members"]: + if "deleted:" in member: continue account_name = None # initialize variable for account name - for element in member.split(":"): - if "@" in element: + for element in member.split(":"): + if "@" in element: account_name = element break - if account_name and account_name not in list_of_sas: + if account_name and account_name not in list_of_sas: list_of_sas.append(account_name) - return list_of_sas + return list_of_sas def get_service_accounts(project_name: str, @@ -912,18 +995,21 @@ def get_service_accounts(project_name: str, # Send a request to list the service accounts in the project. request = service.projects().serviceAccounts().list(name=name) - # Keep retrieving service accounts as long as there are more to retrieve. + # Keep retrieving service accounts as + # long as there are more to retrieve. while request is not None: response = request.execute() - # Extract the email and description of each service account and add them to the list. + # Extract the email and description of + # each service account and add them to the list. service_accounts = [(service_account["email"], - service_account.get("description","")) - for service_account in response.get("accounts",[])] + service_account.get("description", "")) + for service_account in response.get( + "accounts", [])] # Get the next page of results. request = service.projects().serviceAccounts().list_next( previous_request=request, previous_response=response) - except Exception: + except ImportError: # Log an error message if something goes wrong. logging.info("Failed to retrieve SA list for project %s", project_name) logging.info(sys.exc_info()) @@ -931,8 +1017,8 @@ def get_service_accounts(project_name: str, return service_accounts - -def list_services(project_id: str, credentials: Credentials) -> List[Any]: +def list_services( + project_id: str, credentials: Credentials) -> List[Any]: """Retrieve a list of services enabled in the project. Args: @@ -949,17 +1035,19 @@ def list_services(project_id: str, credentials: Credentials) -> List[Any]: # Create a list to hold the enabled services list_of_services = list() - serviceusage = discovery.build("serviceusage", "v1", credentials=credentials) + serviceusage = discovery.build( + "serviceusage", "v1", credentials=credentials) # Create a request to list all services enabled in the given project request = serviceusage.services().list( - parent="projects/" + project_id, # Specify the parent resource to list services under - pageSize=200, # Specify the maximum number of services to return per page - filter="state:ENABLED" # Specify the filter to return only ENABLED services + parent="projects/" + project_id, + pageSize=200, + filter="state:ENABLED" ) try: - # Loop through each page of services until all services have been retrieved + # Loop through each page of services + # until all services have been retrieved while request is not None: response = request.execute() list_of_services.append(response.get("services", None)) @@ -967,7 +1055,7 @@ def list_services(project_id: str, credentials: Credentials) -> List[Any]: request = serviceusage.services().list_next( previous_request=request, previous_response=response) - except Exception: + except ImportError: # Log an error message if an exception occurs while retrieving services logging.info("Failed to retrieve services for project %s", project_id) logging.info(sys.exc_info()) @@ -986,26 +1074,31 @@ def list_sourcerepo(project_id: str, credentials: Credentials) -> List[Any]: A list of cloud source repositories in the project. """ - # Log a message indicating that we're retrieving repositories for the specified project. + # Log a message indicating that + # we're retrieving repositories for the specified project. logging.info("Retrieving cloud source repositories %s", project_id) list_of_repos = list() - # Build a service object for interacting with the Cloud Source Repositories API. + # Build a service object for + # interacting with the Cloud Source Repositories API. service = discovery.build("sourcerepo", "v1", credentials=credentials) - # Create a request to list the repositories in the specified project, up to 500 at a time. + # Create a request to list the repositories + # in the specified project, up to 500 at a time. request = service.projects().repos().list( name="projects/" + project_id, pageSize=500 ) try: - # Keep making requests until there are no more pages of repositories to retrieve. + # Keep making requests until there + # are no more pages of repositories to retrieve. while request is not None: response = request.execute() - # Add the repositories from the response to the list of repositories. + # Add the repositories from the + # response to the list of repositories. list_of_repos.append(response.get("repos", None)) # Get the next page of repositories, if there is one. @@ -1014,9 +1107,11 @@ def list_sourcerepo(project_id: str, credentials: Credentials) -> List[Any]: previous_response=response ) - except Exception: - # If an exception is raised, log a message indicating that we failed to retrieve the repositories. - logging.info("Failed to retrieve source repos for project %s", project_id) + except ImportError: + # If an exception is raised, log a message + # indicating that we failed to retrieve the repositories. + logging.info( + "Failed to retrieve source repos for project %s", project_id) logging.info(sys.exc_info()) return list_of_repos @@ -1055,7 +1150,8 @@ def list_dns_policies(project_id: str, credentials: Credentials) -> List[Any]: # Send the request and get the response response = request.execute() - # Get the policies from the response and add them to the list_of_policies + # Get the policies from the response + # and add them to the list_of_policies list_of_policies.append(response.get("policies", None)) # Get the next page of results (if there are any) @@ -1063,10 +1159,11 @@ def list_dns_policies(project_id: str, credentials: Credentials) -> List[Any]: previous_request=request, previous_response=response ) - except Exception: - # Log an error if we failed to retrieve DNS policies for the specified project - logging.info("Failed to retrieve DNS policies for project %s", project_id) + except ImportError: + # Log an error if we failed to retrieve + # DNS policies for the specified project + logging.info( + "Failed to retrieve DNS policies for project %s", project_id) logging.info(sys.exc_info()) return list_of_policies - From f8ae001578a1b3bb48795de670770b2d2ce74563 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 23:11:16 +0530 Subject: [PATCH 16/25] Update credsdb.py --- src/gcp_scanner/credsdb.py | 169 ++++++++++++++++++++++--------------- 1 file changed, 99 insertions(+), 70 deletions(-) diff --git a/src/gcp_scanner/credsdb.py b/src/gcp_scanner/credsdb.py index 1a7334ce..8b8094d7 100644 --- a/src/gcp_scanner/credsdb.py +++ b/src/gcp_scanner/credsdb.py @@ -40,32 +40,32 @@ def credentials_from_token(access_token: str, refresh_token: Optional[str], token_uri: Optional[str], client_id: Optional[str], client_secret: Optional[str], scopes_user: Optional[str]) -> Credentials: - """ - Create Credentials instance from tokens - """ - return credentials.Credentials( - access_token, - refresh_token=refresh_token, - token_uri=token_uri, - client_id=client_id, - client_secret=client_secret, - scopes=scopes_user) + """ + Create Credentials instance from tokens + """ + return credentials.Credentials(access_token, refresh_token=refresh_token, + token_uri=token_uri, client_id=client_id, + client_secret=client_secret, + scopes=scopes_user) def get_creds_from_file(file_path: str) -> Tuple[str, Credentials]: - """ - Retrieve Credentials instance from a service account json file. - """ - logging.info("Retrieving credentials from %s", file_path) - creds = service_account.Credentials.from_service_account_file(file_path) - return creds.service_account_email, creds + """ + Retrieve Credentials instance from a service account json file. + """ + + logging.info("Retrieving credentials from %s", file_path) + creds = service_account.Credentials.from_service_account_file(file_path) + return creds.service_account_email, creds def get_creds_from_json(parsed_keyfile: Mapping[str, str]) -> Credentials: - """ - Retrieve Credentials instance from parsed service account info. - """ - return service_account.Credentials.from_service_account_info(parsed_keyfile) + """ + Retrieve Credentials instance from parsed service account info. + """ + + return service_account.Credentials.from_service_account_info( + parsed_keyfile) def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]: @@ -73,16 +73,22 @@ def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]: Returns: Tuple[Optional[str], Optional[Credentials]]: - A tuple containing the email associated with the credentials and the constructed credentials. + A tuple containing the email associated with the + credentials and the constructed credentials. """ - # Print a message to indicate that we are retrieving the access token from instance metadata + # Print a message to indicate that we are + # retrieving the access token from instance metadata print("Retrieving access token from instance metadata") - # Define the URLs that we need to access to get the token, scopes, and email - token_url = "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token" - scope_url = "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/scopes" - email_url = "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/email" + # Define the URLs that we need to + # access to get the token, scopes, and email + token_url = "http://metadata.google.internal/computeMetadata/v1/" \ + "instance/service-accounts/default/token" + scope_url = "http://metadata.google.internal/computeMetadata/v1/" \ + "instance/service-accounts/default/scopes" + email_url = "http://metadata.google.internal/computeMetadata/v1/" \ + "instance/service-accounts/default/email" # Set the headers for the requests headers = {"Metadata-Flavor": "Google"} @@ -93,7 +99,10 @@ def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]: # Check if the response was successful if not res.ok: - logging.error("Failed to retrieve instance token. Status code %d", res.status_code) + logging.error("Failed to retrieve instance token. " + "Status code %d", res.status_code) + token_url = None + return None, None # Parse the JSON response and get the access token @@ -104,7 +113,8 @@ def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]: # Check if the response was successful if not res.ok: - logging.error("Failed to retrieve instance scopes. Status code %d", res.status_code) + logging.error("Failed to retrieve instance scopes. " + "Status code %d", res.status_code) return None, None # Get the instance scopes from the response @@ -115,19 +125,21 @@ def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]: # Check if the response was successful if not res.ok: - logging.error("Failed to retrieve instance email. Status code %d", res.status_code) + logging.error("Failed to retrieve instance email. " + "Status code %d", res.status_code) return None, None # Get the instance email from the response email = res.content.decode("utf-8") - except Exception: + except ImportError: # Log an error message if any exception occurred logging.error("Failed to retrieve instance metadata") logging.error(sys.exc_info()[1]) return None, None - # Print a message to indicate that we have successfully retrieved the instance metadata + # Print a message to indicate that + # we have successfully retrieved the instance metadata print("Successfully retrieved instance metadata") # Log the length of the access token, instance email, and instance scopes @@ -135,15 +147,18 @@ def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]: logging.info("Instance email: %s", email) logging.info("Instance scopes: %s", instance_scopes) - # Return the email and credentials constructed from the token and instance scopes - return email, credentials_from_token(token, None, None, None, None, instance_scopes) + # Return the email and credentials + # constructed from the token and instance scopes + return email, credentials_from_token( + token, None, None, None, None, instance_scopes) - -def get_creds_from_data(access_token: str, parsed_keyfile: Dict[str, str]) -> Credentials: +def get_creds_from_data( + access_token: str, parsed_keyfile: Dict[str, str]) -> Credentials: """Creates a Credentials instance from parsed service account info. - The function currently supports two types of credentials. Service account key in json format and user account with refresh token. + The function currently supports two types of credentials. + Service account key in json format and user account with refresh token. Args: access_token: An Oauth2 access token. It can be None. @@ -170,7 +185,8 @@ def get_creds_from_data(access_token: str, parsed_keyfile: Dict[str, str]) -> Cr ) # Check if the parsed_keyfile contains "private_key" elif "private_key" in parsed_keyfile: - logging.info("Identified service account key credentials in gcloud profile") + logging.info( + "Identified service account key credentials in gcloud profile") # this is a service account key with private key creds = get_creds_from_json(parsed_keyfile) else: @@ -182,11 +198,13 @@ def get_creds_from_data(access_token: str, parsed_keyfile: Dict[str, str]) -> Cr def find_creds(explicit_path: Optional[str] = None) -> List[str]: """ - The function searches the disk and returns a list of files with GCP credentials. + The function searches the disk and returns + a list of files with GCP credentials. Args: - explicit_path: An explicit path on disk to search. If None, the function - searches in standard locations where gcloud profiles are usually located. + explicit_path: An explicit path on disk to search. + If None, the function searches in + standard locations where gcloud profiles are usually located. Returns: list: The list of files with GCP credentials. @@ -208,7 +226,8 @@ def find_creds(explicit_path: Optional[str] = None) -> List[str]: full_path = os.path.join(dir_path, subdir_name, "gcloud") search_paths.append(full_path) - # Scan each search path for credentials.db and add them to the list_of_creds_files + # Scan each search path for credentials.db + # and add them to the list_of_creds_files for dir_path in search_paths: print(f"Scanning {dir_path} for credentials.db") full_path = os.path.join(dir_path, "credentials.db") @@ -222,7 +241,8 @@ def find_creds(explicit_path: Optional[str] = None) -> List[str]: def get_access_tokens_dict(path_to_creds_db: str) -> Dict[str, str]: """ - The function searches and extracts OAuth2 access_tokens from a SQLite3 database. + The function searches and extracts OAuth2 + access_tokens from a SQLite3 database. Args: path_to_creds_db: A path to SQLite3 database with gcloud access tokens. @@ -233,16 +253,20 @@ def get_access_tokens_dict(path_to_creds_db: str) -> Dict[str, str]: access_tokens_dict = dict() - # Replace credentials.db with access_tokens.db to get the path to access tokens database - access_tokens_path = path_to_creds_db.replace("credentials.db", "access_tokens.db") + # Replace credentials.db with access_tokens.db + # to get the path to access tokens database + access_tokens_path = path_to_creds_db.replace("credentials.db", + "access_tokens.db") # Check if the access tokens database exists and can be read - if os.path.exists(access_tokens_path) and os.access(access_tokens_path, os.R_OK): + if os.path.exists(access_tokens_path) and os.access(access_tokens_path, + os.R_OK): # If the access tokens database exists and can be read, connect to it logging.info("Identified access tokens DB in %s", access_tokens_path) conn = sqlite3.connect(access_tokens_path) - cursor = conn.execute("SELECT account_id, access_token, token_expiry FROM access_tokens") + cursor = conn.execute("SELECT account_id, access_token," + "token_expiry FROM access_tokens") # Fetch all rows from the access tokens database rows = cursor.fetchall() @@ -257,26 +281,30 @@ def get_access_tokens_dict(path_to_creds_db: str) -> Dict[str, str]: expiration_date = expiration_date.split(".")[0] # Convert the expiration date to a datetime object - token_time_obj = datetime.datetime.strptime(expiration_date, "%Y-%m-%d %H:%M:%S") + token_time_obj = datetime.datetime.strptime( + expiration_date, "%Y-%m-%d %H:%M:%S") # Check if the token has expired if datetime.datetime.now() > token_time_obj: logging.info("Token for %s expired", associated_account) continue - # Add the associated account and token to the access tokens dictionary + # Add the associated account and + # token to the access tokens dictionary access_tokens_dict[associated_account] = token return access_tokens_dict - -def extract_creds(path_to_creds_db: str) -> List[Tuple[str, str, str]]: +def extract_creds(path_to_creds_db: str) -> List[ + Tuple[str, str, str]]: """ - The function extracts refresh and associated access tokens from sqlite3 DBs. + The function extracts refresh and associated access + tokens from sqlite3 DBs. Args: - path_to_creds_db (str): A path to sqlite3 DB with gcloud refresh tokens. + path_to_creds_db (str): A path to sqlite3 DB + with gcloud refresh tokens. Returns: List of tuples: (account name, refresh token, access token). @@ -292,7 +320,6 @@ def extract_creds(path_to_creds_db: str) -> List[Tuple[str, str, str]]: # Connect to the database conn = sqlite3.connect(path_to_creds_db) - # Select account_id and value from the credentials table cursor = conn.execute("SELECT account_id, value FROM credentials") rows = cursor.fetchall() @@ -314,7 +341,8 @@ def extract_creds(path_to_creds_db: str) -> List[Tuple[str, str, str]]: logging.info("Found valid access token for %s", row[0]) access_token = access_tokens[row[0]] - # Append the account name, credentials, and access token to the results list + # Append the account name, credentials, and access + # token to the results list res.append(SA(row[0], row[1], access_token)) # Print the number of identified credential entries @@ -324,27 +352,29 @@ def extract_creds(path_to_creds_db: str) -> List[Tuple[str, str, str]]: return res -def get_account_creds_list(gcloud_profile_path: Optional[str] = None) -> List[List[Tuple[str, str, str]]]: +def get_account_creds_list(gcloud_profile_path: Optional[ + str] = None) -> List[List[Tuple[str, str, str]]]: """The function searches and extracts gcloud credentials from disk. Args: - gcloud_profile_path: An explicit gcloud profile path on disk to search. If - None, the function searches in standard locations where gcloud profiles - are usually located. + gcloud_profile_path: An explicit gcloud profile path on disk to + search. If None, the function searches in standard locations where + gcloud profiles are usually located. Returns: list: A list of tuples (account name, refresh token, access token). """ accounts = list() # initialize an empty list - creds_file_list = find_creds(gcloud_profile_path) # get a list of credentials files + creds_file_list = find_creds(gcloud_profile_path) for creds_file in creds_file_list: - res = extract_creds(creds_file) # extract the credentials from the file + res = extract_creds(creds_file) if res is not None: - accounts.append(res) # append the extracted credentials to the accounts list + accounts.append(res) return accounts # return the accounts list -def impersonate_sa(iam_client: IAMCredentialsClient, target_account: str) -> Credentials: +def impersonate_sa(iam_client: IAMCredentialsClient, + target_account: str) -> Credentials: """ The function is used to impersonate a service account. @@ -378,13 +408,13 @@ def impersonate_sa(iam_client: IAMCredentialsClient, target_account: str) -> Cre ) - def creds_from_access_token(access_token_file): - """The function is used to obtain Google Auth Credentials from access token. + """The function is used to obtain Google Auth + Credentials from access token. Args: - access_token_file: a path to a file with access token and scopes stored in - JSON format. Example: + access_token_file: a path to a file with access token + and scopes stored in JSON format. Example: { "access_token": "", "scopes": [ @@ -448,7 +478,8 @@ def creds_from_refresh_token(refresh_token_file): - google.auth.service_account.Credentials: The constructed credentials. """ - # Open the refresh_token_file in utf-8 encoding and load the contents to a dictionary + # Open the refresh_token_file in utf-8 encoding + # and load the contents to a dictionary with open(refresh_token_file, encoding="utf-8") as f: creds_dict = json.load(f) @@ -466,7 +497,6 @@ def creds_from_refresh_token(refresh_token_file): ) - def get_scopes_from_refresh_token(context) -> Union[List[str], None]: """ The function is used to obtain scopes from a refresh token. @@ -495,9 +525,8 @@ def get_scopes_from_refresh_token(context) -> Union[List[str], None]: raw = response.json().get("scope", None) return raw.split(" ") if raw else None - except Exception as ex: + except ImportError as ex: logging.error("Failed to retrieve access token from refresh token.") logging.debug("Token refresh exception", exc_info=ex) return None - From bace7b1e52442d7d04b2df3da3f2ba5cfddb8797 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 23:13:20 +0530 Subject: [PATCH 17/25] Update models.py --- src/gcp_scanner/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gcp_scanner/models.py b/src/gcp_scanner/models.py index 60aa9855..fc73344a 100644 --- a/src/gcp_scanner/models.py +++ b/src/gcp_scanner/models.py @@ -22,6 +22,7 @@ from httplib2 import Credentials + class SpiderContext: """A simple class to initialize the context with a list of root SAs""" @@ -34,7 +35,6 @@ def __init__(self, sa_tuples: List[Tuple[str, Credentials, List[str]]]): """ # Create a new queue to hold the service accounts self.service_account_queue = queue.Queue() - # Add each service account from the sa_tuples list to the queue for sa_tuple in sa_tuples: self.service_account_queue.put(sa_tuple) From 6db5dda0297321a01c20ed0f695baa8249963292 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 23:42:58 +0530 Subject: [PATCH 18/25] Update scanner.py --- src/gcp_scanner/scanner.py | 210 ++++++++++++++++++++++--------------- 1 file changed, 127 insertions(+), 83 deletions(-) diff --git a/src/gcp_scanner/scanner.py b/src/gcp_scanner/scanner.py index 8a5d864f..0c597f3a 100644 --- a/src/gcp_scanner/scanner.py +++ b/src/gcp_scanner/scanner.py @@ -21,35 +21,37 @@ import logging import os import sys -from typing import List, Tuple, Dict, Optional,Union +from typing import List, Tuple, Dict, Optional, Union from . import crawl from . import credsdb from . import arguments from google.cloud import container_v1 from google.cloud import iam_credentials -from google.cloud.iam_credentials_v1.services.iam_credentials.client import IAMCredentialsClient +from google.cloud.iam_credentials_v1.services.iam_credentials.client \ + import IAMCredentialsClient from googleapiclient import discovery from httplib2 import Credentials from .models import SpiderContext + def is_set(config: Optional[dict], config_setting: str) -> Union[dict, bool]: # If config is None, return True if config is None: return True - # Get the value of the specified config setting obj = config.get(config_setting, {}) - - # Return the value of 'fetch' if it exists in the config setting, otherwise return False + # Return the value of 'fetch' if it exists in the + # config setting, otherwise return False return obj.get('fetch', False) + def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]], out_dir: str, scan_config: Dict, target_project: Optional[str] = None, force_projects: Optional[str] = None): - """ + """ The main loop function to crawl GCP resources. Args: @@ -60,19 +62,20 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]], force_projects: a list of projects to force scan """ - # Initialize SpiderContext - context = SpiderContext(initial_sa_tuples) + # Initialize SpiderContext + context = SpiderContext(initial_sa_tuples) - # Set of already processed service accounts - processed_sas = set() + # Set of already processed service accounts + processed_sas = set() - # Main loop - while not context.service_account_queue.empty(): - # Get a new candidate service account / token - sa_name, credentials, chain_so_far = context.service_account_queue.get() + # Main loop + while not context.service_account_queue.empty(): + # Get a new candidate service account or token + sa_name, credentials, chain_so_far = context.service_account_queue.get( + ) - if sa_name in processed_sas: - continue + if sa_name in processed_sas: + continue # Don't process this service account again processed_sas.add(sa_name) @@ -93,24 +96,26 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]], project_list = crawl.get_project_list(credentials) if len(project_list) <= 0: - logging.info('Unable to list projects accessible from service account') + logging.info('Unable to list projects accessible from service account') # Add any forced projects to project_list if force_projects: - for force_project_id in force_projects: - res = crawl.fetch_project_info(force_project_id, credentials) + for force_project_id in force_projects: + res = crawl.fetch_project_info(force_project_id, credentials) if res: - project_list.append(res) + project_list.append(res) else: - # force object creation anyway - project_list.append({'projectId': force_project_id, 'projectNumber': 'N/A'}) - + # force object creation anyway + project_list.append({ + 'projectId': force_project_id, + 'projectNumber': 'N/A' + }) # Enumerate projects accessible by SA for project in project_list: - if target_project and target_project not in project['projectId']: - continue + if target_project and target_project not in project['projectId']: + continue project_id = project['projectId'] project_number = project['projectNumber'] @@ -138,7 +143,8 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]], # Get GCP Compute Resources compute_client = compute_client_for_credentials(credentials) if is_set(scan_config, 'compute_instances'): - project_result['compute_instances'] = crawl.get_compute_instances_names( + project_result[ + 'compute_instances'] = crawl.get_compute_instances_names( project_id, compute_client) if is_set(scan_config, 'compute_images'): project_result['compute_images'] = crawl.get_compute_images_names( @@ -164,15 +170,15 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]], project_result['subnets'] = crawl.get_subnets(project_id, compute_client) if is_set(scan_config, 'firewall_rules'): - project_result['firewall_rules'] = crawl.get_firewall_rules(project_id, - compute_client) + project_result[ + 'firewall_rules'] = crawl.get_firewall_rules( + project_id, compute_client) # Get GCP APP Resources if is_set(scan_config, 'app_services'): project_result['app_services'] = crawl.get_app_services( project_id, credentials) - # Get storage buckets if is_set(scan_config, 'storage_buckets'): dump_file_names = None @@ -180,30 +186,38 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]], obj = scan_config.get('storage_buckets', None) # Check if fetch_file_names flag is set to true if obj is not None and obj.get('fetch_file_names', False) is True: - dump_file_names = open(out_dir + '/%s.gcs' % project_id, 'w', encoding='utf-8') - project_result['storage_buckets'] = crawl.get_bucket_names(project_id, credentials, dump_file_names) + dump_file_names = open( + out_dir + '/%s.gcs' % project_id, 'w', encoding='utf-8') + project_result[ + 'storage_buckets'] = crawl.get_bucket_names( + project_id, credentials, dump_file_names) # Close dump file if it's open if dump_file_names is not None: dump_file_names.close() # Get DNS managed zones if is_set(scan_config, 'managed_zones'): - project_result['managed_zones'] = crawl.get_managed_zones(project_id, credentials) + project_result[ + 'managed_zones'] = crawl.get_managed_zones(project_id, credentials) # Get DNS policies if is_set(scan_config, 'dns_policies'): - project_result['dns_policies'] = crawl.list_dns_policies(project_id, credentials) + project_result[ + 'dns_policies'] = crawl.list_dns_policies(project_id, credentials) # Get GKE resources if is_set(scan_config, 'gke_clusters'): gke_client = gke_client_for_credentials(credentials) - project_result['gke_clusters'] = crawl.get_gke_clusters(project_id, gke_client) + project_result[ + 'gke_clusters'] = crawl.get_gke_clusters(project_id, gke_client) if is_set(scan_config, 'gke_images'): - project_result['gke_images'] = crawl.get_gke_images(project_id, credentials.token) + project_result[ + 'gke_images'] = crawl.get_gke_images(project_id, credentials.token) # Get SQL instances if is_set(scan_config, 'sql_instances'): - project_result['sql_instances'] = crawl.get_sql_instances(project_id, credentials) + project_result[ + 'sql_instances'] = crawl.get_sql_instances(project_id, credentials) # Get BigQuery databases and table names if is_set(scan_config, 'bq'): @@ -211,23 +225,33 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]], # Get PubSub Subscriptions if is_set(scan_config, 'pubsub_subs'): - project_result['pubsub_subs'] = crawl.get_pubsub_subscriptions(project_id, credentials) + project_result[ + 'pubsub_subs'] = crawl.get_pubsub_subscriptions( + project_id, credentials) # Get CloudFunctions list if is_set(scan_config, 'cloud_functions'): - project_result['cloud_functions'] = crawl.get_cloudfunctions(project_id, credentials) + project_result[ + 'cloud_functions'] = crawl.get_cloudfunctions( + project_id, credentials) # Get List of BigTable Instances if is_set(scan_config, 'bigtable_instances'): - project_result['bigtable_instances'] = crawl.get_bigtable_instances(project_id, credentials) + project_result[ + 'bigtable_instances'] = crawl.get_bigtable_instances( + project_id, credentials) # Get Spanner Instances if is_set(scan_config, 'spanner_instances'): - project_result['spanner_instances'] = crawl.get_spanner_instances(project_id, credentials) + project_result[ + 'spanner_instances'] = crawl.get_spanner_instances( + project_id, credentials) # Get CloudStore Instances if is_set(scan_config, 'cloudstore_instances'): - project_result['cloudstore_instances'] = crawl.get_filestore_instances(project_id, credentials) + project_result[ + 'cloudstore_instances'] = crawl.get_filestore_instances( + project_id, credentials) # Get list of KMS keys if is_set(scan_config, 'kms'): @@ -235,17 +259,18 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]], # Get information about Endpoints if is_set(scan_config, 'endpoints'): - project_result['endpoints'] = crawl.get_endpoints(project_id, credentials) + project_result[ + 'endpoints'] = crawl.get_endpoints(project_id, credentials) # Get list of API services enabled in the project if is_set(scan_config, 'services'): - project_result['services'] = crawl.list_services(project_id, credentials) + project_result[ + 'services'] = crawl.list_services(project_id, credentials) # Get list of cloud source repositories enabled in the project if is_set(scan_config, 'sourcerepos'): - project_result['sourcerepos'] = crawl.list_sourcerepo(project_id, credentials) - - + project_result[ + 'sourcerepos'] = crawl.list_sourcerepo(project_id, credentials) # trying to impersonate SAs within project if scan_config is not None: @@ -253,7 +278,8 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]], else: impers = {'impersonate': True} - # If 'impersonate' is set to True, attempt to impersonate the service account(s) within the project + # If 'impersonate' is set to True, attempt + # to impersonate the service account(s) within the project if impers is not None and impers.get('impersonate', False) is True: # If 'iam_policy' is not already set, retrieve the IAM policy @@ -261,7 +287,8 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]], iam_policy = crawl.get_iam_policy(project_id, credentials) # Get a list of all the service accounts associated with the project - project_service_accounts = crawl.get_associated_service_accounts(iam_policy) + project_service_accounts = crawl.get_associated_service_accounts( + iam_policy) # Iterate through each service account for candidate_service_account in project_service_accounts: @@ -271,55 +298,63 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]], continue try: - # Impersonate the current service account and obtain credentials - creds_impersonated = credsdb.impersonate_sa(iam_client, candidate_service_account) - - # Append the service account to the service_account_edges field in the project_result dict - context.service_account_queue.put((candidate_service_account, creds_impersonated, updated_chain)) - project_result['service_account_edges'].append(candidate_service_account) + # Impersonate the current service account + # and obtain credentials + creds_impersonated = credsdb.impersonate_sa( + iam_client, candidate_service_account) + + # Append the service account to the + # service_account_edges field in the project_result dict + context.service_account_queue.put(( + candidate_service_account, creds_impersonated, + updated_chain)) + project_result[ + 'service_account_edges'].append(candidate_service_account) # Log that impersonation was successful - logging.info('Successfully impersonated %s using %s', candidate_service_account, sa_name) + logging.info( + 'Successfully impersonated %s using %s', + candidate_service_account, sa_name) - except Exception: + except ImportError: # Log that impersonation failed - logging.error('Failed to get token for %s', candidate_service_account) + logging.error('Failed to get token for %s', + candidate_service_account) logging.error(sys.exc_info()[1]) + # Write out results to json DB + logging.info('Saving results for %s into the file', project_id) - # Write out results to json DB - logging.info('Saving results for %s into the file', project_id) - - sa_results_data = json.dumps(sa_results, indent=2, sort_keys=False) + sa_results_data = json.dumps(sa_results, indent=2, sort_keys=False) - with open(out_dir + '/%s.json' % project_id, 'a', - encoding='utf-8') as outfile: + with open(out_dir + '/%s.json' % project_id, 'a', + encoding='utf-8') as outfile: outfile.write(sa_results_data) - # Clean memory to avoid leak for large amount projects. - sa_results.clear() + # Clean memory to avoid leak for large amount projects. + sa_results.clear() # Define a function that returns an IAMCredentialsClient object # for the given credentials. def iam_client_for_credentials( - credentials: Credentials) -> iam_credentials.IAMCredentialsClient: - - return iam_credentials.IAMCredentialsClient(credentials=credentials) - + credentials: Credentials) -> iam_credentials.IAMCredentialsClient: + return iam_credentials.IAMCredentialsClient(credentials=credentials) def compute_client_for_credentials( - credentials: Credentials) -> discovery.Resource: + credentials: Credentials) -> discovery.Resource: """ Returns a Compute Engine API client instance for the given credentials. Args: - credentials (google.auth.credentials.Credentials): The credentials to use to + credentials (google.auth.credentials.Credentials): + The credentials to use to authenticate requests to the Compute Engine API. Returns: - googleapiclient.discovery.Resource: A Compute Engine API client instance. + googleapiclient.discovery.Resource: + A Compute Engine API client instance. """ return discovery.build( 'compute', # The name of the API to use. @@ -332,18 +367,19 @@ def compute_client_for_credentials( def gke_client_for_credentials( credentials: Credentials ) -> container_v1.services.cluster_manager.client.ClusterManagerClient: - # This function returns a ClusterManagerClient object for the given credentials - # It takes in a Credentials object as a parameter and returns a ClusterManagerClient object + # This function returns a ClusterManagerClient + # object for the given credentials. It takes in a Credentials object + # as a parameter and returns a ClusterManagerClient object # Create a ClusterManagerClient object with the given credentials return container_v1.services.cluster_manager.ClusterManagerClient( credentials=credentials) - def main(): # Set logging level for specific modules to suppress unwanted log messages - logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR) + logging.getLogger( + 'googleapiclient.discovery_cache').setLevel(logging.ERROR) logging.getLogger('googleapiclient.http').setLevel(logging.ERROR) # Parse command line arguments @@ -367,9 +403,11 @@ def main(): if not keyfile.endswith('.json'): continue full_key_path = os.path.join(args.key_path, keyfile) - account_name, credentials = credsdb.get_creds_from_file(full_key_path) + account_name, credentials = credsdb.get_creds_from_file( + full_key_path) if credentials is None: - logging.error('Failed to retrieve credentials for %s', account_name) + logging.error( + 'Failed to retrieve credentials for %s', account_name) continue sa_tuples.append((account_name, credentials, [])) @@ -396,9 +434,11 @@ def main(): logging.info('Retrieving credentials for %s', account_name) credentials = credsdb.get_creds_from_data(access_token, - json.loads(account_creds)) + json.loads( + account_creds)) if credentials is None: - logging.error('Failed to retrieve access token for %s', account_name) + logging.error( + 'Failed to retrieve access token for %s', account_name) continue sa_tuples.append((account_name, credentials, [])) @@ -409,7 +449,8 @@ def main(): credentials = credsdb.creds_from_access_token(access_token_file) if credentials is None: - logging.error('Failed to retrieve credentials using token provided') + logging.error( + 'Failed to retrieve credentials using token provided') else: token_file_name = os.path.basename(access_token_file) sa_tuples.append((token_file_name, credentials, [])) @@ -420,7 +461,8 @@ def main(): credentials = credsdb.creds_from_refresh_token(refresh_token_file) if credentials is None: - logging.error('Failed to retrieve credentials using token provided') + logging.error( + 'Failed to retrieve credentials using token provided') else: token_file_name = os.path.basename(refresh_token_file) sa_tuples.append((token_file_name, credentials, [])) @@ -432,7 +474,9 @@ def main(): scan_config = json.load(f) # Call the crawl_loop function with the provided arguments - crawl_loop(sa_tuples, args.output, scan_config, args.target_project, force_projects_list) + crawl_loop( + sa_tuples, args.output, scan_config, + args.target_project, force_projects_list) # Return 0 to indicate successful execution return 0 From d968ac3b8ff77b5dbff03bf2e61a13cf9c06ace2 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 23:45:46 +0530 Subject: [PATCH 19/25] Update test_acceptance.py --- src/gcp_scanner/test_acceptance.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/gcp_scanner/test_acceptance.py b/src/gcp_scanner/test_acceptance.py index fa7207ed..f4456c60 100644 --- a/src/gcp_scanner/test_acceptance.py +++ b/src/gcp_scanner/test_acceptance.py @@ -50,14 +50,18 @@ SERVICES_COUNT = 1 SERVICE_ACCOUNTS_COUNT = 3 + def check_obj_entry(res_dict, subojects_count, entry_name, volatile=False): - # Check if an object entry exists in the given dictionary and has the expected number of objects + # Check if an object entry exists in the given dictionary + # and has the expected number of objects obj = res_dict.get(entry_name, None) if volatile is True: - assert obj is not None and (len(obj) == subojects_count or len(obj) == subojects_count - 1) + assert obj is not None and ( + len(obj) == subojects_count or len(obj) == subojects_count - 1) else: assert obj is not None and len(obj) == subojects_count + def validate_result(): # Load the results file and validate the resource counts file_name = os.listdir("res/")[0] @@ -88,7 +92,7 @@ def validate_result(): check_obj_entry(project, STORAGE_BUCKETS_COUNT, "storage_buckets") check_obj_entry(project, GKE_CLUSTERS_COUNT, "gke_clusters") - # Volatile test. US zone sometimes appear and disappear. + # Volatile test. US zone sometimes appear and disappear. check_obj_entry(project, GKE_IMAGES_COUNT, "gke_images", True) check_obj_entry(project, SQL_INSTANCES_COUNT, "sql_instances") @@ -109,17 +113,19 @@ def validate_result(): def test_acceptance(): # Create a directory to store the results os.mkdir("res") - - # Define the arguments to run the scanner in test mode and save results in the "res" directory + # Define the arguments to run the scanner in + # test mode and save results in the "res" directory testargs = ["__main__.py", "-m", "-p", "test-gcp-scanner", "-o", "res"] - # Patch the command-line arguments to run the scanner with the specified arguments + # Patch the command-line arguments to run + # the scanner with the specified arguments with unittest.mock.patch("sys.argv", testargs): - # Run the scanner with the patched arguments and assert that it returns 0 (indicating success) + # Run the scanner with the patched + # arguments and assert that it returns 0 (indicating success) assert scanner.main() == 0 - - # Assert that the number of files in the "res" directory is equal to RESULTS_JSON_COUNT + # Assert that the number of files in + # the "res" directory is equal to RESULTS_JSON_COUNT assert len(os.listdir("res/")) == RESULTS_JSON_COUNT - - # Validate the result to ensure that it conforms to the expected format and contains valid data + # Validate the result to ensure that it conforms to + # the expected format and contains valid data validate_result() From 4a38f67f42eab75a67d6d2866b6b1e7a42a8b243 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Thu, 6 Apr 2023 23:54:42 +0530 Subject: [PATCH 20/25] Update test_unit.py --- src/gcp_scanner/test_unit.py | 51 ++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/src/gcp_scanner/test_unit.py b/src/gcp_scanner/test_unit.py index b3c08ba5..cab63693 100644 --- a/src/gcp_scanner/test_unit.py +++ b/src/gcp_scanner/test_unit.py @@ -37,6 +37,7 @@ PROJECT_NAME = "test-gcp-scanner" + def print_diff(f1, f2): """ A function that prints the differences between two files. @@ -58,6 +59,7 @@ def print_diff(f1, f2): print(line) res += line + def save_to_test_file(res): """ A function that saves the result to a file in JSON format. @@ -86,7 +88,8 @@ def compare_volatile(f1, f2): if line in file_1_text: continue else: - print(f"The following line was not identified in the output:\n{line}") + print( + f"The following line was not identified in the output:\n{line}") res = False return res @@ -143,13 +146,15 @@ def test_creds_fetching(): c.execute(""" CREATE TABLE IF NOT EXISTS access_tokens (account_id TEXT PRIMARY KEY, - access_token TEXT, token_expiry TIMESTAMP, + access_token TEXT, token_expiry TIMESTAMP, rapt_token TEXT, id_token TEXT) """) # Insert test data values into the access tokens database - valid_tm = datetime.datetime.now() + datetime.timedelta(hours=2, minutes=10) - expired_tm = datetime.datetime.now() - datetime.timedelta(hours=2, minutes=10) + valid_tm = datetime.datetime.now() + datetime.timedelta( + hours=2, minutes=10) + expired_tm = datetime.datetime.now() - datetime.timedelta( + hours=2, minutes=10) sqlite_insert_with_param = """INSERT INTO "access_tokens" ("account_id", "access_token", "token_expiry", "rapt_token", "id_token") @@ -162,7 +167,8 @@ def test_creds_fetching(): c.execute(sqlite_insert_with_param, data_value) conn.commit() - # Assert that the access tokens dictionary can be retrieved from the credentials database + # Assert that the access tokens dictionary + # can be retrieved from the credentials database assert str(credsdb.get_access_tokens_dict("./unit/credentials.db")) == \ "{'test_account@gmail.com': 'ya.29c.TEST'}" @@ -290,7 +296,8 @@ class TestCrawler(unittest.TestCase): def setUp(self): # Get credentials from metadata and set up compute client _, self.credentials = credsdb.get_creds_from_metadata() - self.compute_client = scanner.compute_client_for_credentials(self.credentials) + self.compute_client = scanner.compute_client_for_credentials( + self.credentials) def test_credential(self): """Checks if credential is not none.""" @@ -301,19 +308,21 @@ def test_compute_instance_name(self): # Verify that the compute instance names are returned correctly self.assertTrue( verify( - crawl.get_compute_instances_names(PROJECT_NAME, self.compute_client), + crawl.get_compute_instances_names( + PROJECT_NAME, self.compute_client), "compute_instances", True, ) ) - def test_compute_disks_names(self): """Test compute disk names.""" - # Verify that the list of compute disks names returned by the function is non-empty + # Verify that the list of compute disks + # names returned by the function is non-empty self.assertTrue( verify( - crawl.get_compute_disks_names(PROJECT_NAME, self.compute_client), + crawl.get_compute_disks_names( + PROJECT_NAME, self.compute_client), "compute_disks", True, ) @@ -321,10 +330,12 @@ def test_compute_disks_names(self): def test_compute_images_names(self): """Test compute image names.""" - # Verify that the list of compute images names returned by the function is non-empty + # Verify that the list of compute + # images names returned by the function is non-empty self.assertTrue( verify( - crawl.get_compute_images_names(PROJECT_NAME, self.compute_client), + crawl.get_compute_images_names( + PROJECT_NAME, self.compute_client), "compute_images", True, ) @@ -332,7 +343,8 @@ def test_compute_images_names(self): def test_static_ips(self): """Test static IPs.""" - # Verify that the list of static IPs returned by the function is non-empty + # Verify that the list of static IPs + # returned by the function is non-empty self.assertTrue( verify( crawl.get_static_ips(PROJECT_NAME, self.compute_client), @@ -341,7 +353,6 @@ def test_static_ips(self): ) ) - def test_compute_snapshots(self): """Test compute snapshot.""" # Verify if the list of compute snapshots can be retrieved successfully @@ -388,13 +399,12 @@ def test_storage_buckets(self): ) ) - - def test_managed_zones(self): # Asserting that the managed zones are verified self.assertTrue( verify( - crawl.get_managed_zones(PROJECT_NAME, credentials=self.credentials), + crawl.get_managed_zones( + PROJECT_NAME, credentials=self.credentials), "managed_zones", True, ) @@ -460,7 +470,6 @@ def test_pubsub_subs(self): ) ) - def test_cloud_functions(self): """Test CloudFunctions list.""" # Verify that cloud_functions list is obtained successfully @@ -524,7 +533,8 @@ def test_endpoints(self): def test_services(self): """Test list of API services enabled in the project.""" - # Verify that a list of API services enabled in the project is obtained successfully + # Verify that a list of API services + # enabled in the project is obtained successfully self.assertTrue( verify( crawl.list_services(PROJECT_NAME, self.credentials), @@ -565,7 +575,8 @@ def test_project_info(self): def test_sourcerepos(self): """Test list of cloud source repositories in the project.""" - # Verify that a list of cloud source repositories in the project is obtained successfully + # Verify that a list of cloud source repositories + # in the project is obtained successfully self.assertTrue( verify( crawl.list_sourcerepo(PROJECT_NAME, self.credentials), From 63f8e5803d5caf1e50d81fbb5fa97fe2c48c962b Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Fri, 7 Apr 2023 17:43:11 +0530 Subject: [PATCH 21/25] Update arguments.py --- src/gcp_scanner/arguments.py | 216 +++++++++++++++++------------------ 1 file changed, 108 insertions(+), 108 deletions(-) diff --git a/src/gcp_scanner/arguments.py b/src/gcp_scanner/arguments.py index 09b516fd..82a1222f 100644 --- a/src/gcp_scanner/arguments.py +++ b/src/gcp_scanner/arguments.py @@ -23,7 +23,7 @@ # Define a function to create an argument parser using the argparse module def arg_parser(): - """Creates an argument parser using the `argparse` module and defines + """Creates an argument parser using the `argparse` module and defines several command-line arguments. Args: @@ -33,117 +33,117 @@ def arg_parser(): argparse.Namespace: A namespace object containing the parsed command-line arguments. """ - # Create a new parser object - parser = argparse.ArgumentParser( - prog='scanner.py', # program name - description='GCP Scanner', # description - usage='python3 %(prog)s -o folder_to_save_results -g -' - ) + # Create a new parser object + parser = argparse.ArgumentParser( + prog='scanner.py', # program name + description='GCP Scanner', # description + usage='python3 %(prog)s -o folder_to_save_results -g -' + ) - # Define a required argument group - required_named = parser.add_argument_group('Required parameters') - # Add a required argument to the group - required_named.add_argument( - '-o', # short option name - '--output-dir', # long option name - required=True, - dest='output', - default='scan_db', - help='Path to output directory' - ) + # Define a required argument group + required_named = parser.add_argument_group('Required parameters') + # Add a required argument to the group + required_named.add_argument( + '-o', # short option name + '--output-dir', # long option name + required=True, + dest='output', + default='scan_db', + help='Path to output directory' + ) - # Add command line arguments to the parser object - parser.add_argument( - '-k', - '--sa-key-path', - default=None, # Default value if option is not specified - dest='key_path', - help='Path to directory with SA keys in json format' # Help message - ) - parser.add_argument( - '-g', - '--gcloud-profile-path', - default=None, - dest='gcloud_profile_path', - help='Path to directory with gcloud profile. Specify - to search for\ - credentials in default gcloud config path' - ) - parser.add_argument( - '-m', - '--use-metadata', - default=False, - dest='use_metadata', - action='store_true', - help='Extract credentials from GCE instance metadata' - ) - parser.add_argument( - '-at', - '--access-token-files', - default=None, - dest='access_token_files', - help='A list of comma separated files with access token and OAuth scopes\ - TTL limited. A token and scopes should be stored in JSON format.' - ) - parser.add_argument( - '-rt', - '--refresh-token-files', - default=None, - dest='refresh_token_files', - help='A list of comma separated files with refresh_token, client_id,\ - token_uri and client_secret stored in JSON format.' - ) + # Add command line arguments to the parser object + parser.add_argument( + '-k', + '--sa-key-path', + default=None, # Default value if option is not specified + dest='key_path', + help='Path to directory with SA keys in json format' # Help message + ) + parser.add_argument( + '-g', + '--gcloud-profile-path', + default=None, + dest='gcloud_profile_path', + help='Path to directory with gcloud profile. Specify - to search for\ + credentials in default gcloud config path' + ) + parser.add_argument( + '-m', + '--use-metadata', + default=False, + dest='use_metadata', + action='store_true', + help='Extract credentials from GCE instance metadata' + ) + parser.add_argument( + '-at', + '--access-token-files', + default=None, + dest='access_token_files', + help='A list of comma separated files with access token and OAuth scopes\ + TTL limited. A token and scopes should be stored in JSON format.' + ) + parser.add_argument( + '-rt', + '--refresh-token-files', + default=None, + dest='refresh_token_files', + help='A list of comma separated files with refresh_token, client_id,\ + token_uri and client_secret stored in JSON format.' + ) - parser.add_argument( - '-s', - '--service-account', - default=None, - dest='key_name', - help='Name of individual SA to scan') - parser.add_argument( - '-p', - '--project', - default=None, - dest='target_project', - help='Name of individual project to scan') - parser.add_argument( - '-f', - '--force-projects', - default=None, - dest='force_projects', - help='Comma separated list of project names to include in the scan') - parser.add_argument( - '-c', - '--config', - default=None, - dest='config_path', - help='A path to config file with a set of specific resources to scan.') - parser.add_argument( - '-l', - '--logging', - default='WARNING', - dest='log_level', - choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'), - help='Set logging level (INFO, WARNING, ERROR)') - parser.add_argument( - '-lf', - '--log-file', - default=None, - dest='log_file', - help='Save logs to the path specified rather than displaying in\ - console') + parser.add_argument( + '-s', + '--service-account', + default=None, + dest='key_name', + help='Name of individual SA to scan') + parser.add_argument( + '-p', + '--project', + default=None, + dest='target_project', + help='Name of individual project to scan') + parser.add_argument( + '-f', + '--force-projects', + default=None, + dest='force_projects', + help='Comma separated list of project names to include in the scan') + parser.add_argument( + '-c', + '--config', + default=None, + dest='config_path', + help='A path to config file with a set of specific resources to scan.') + parser.add_argument( + '-l', + '--logging', + default='WARNING', + dest='log_level', + choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'), + help='Set logging level (INFO, WARNING, ERROR)') + parser.add_argument( + '-lf', + '--log-file', + default=None, + dest='log_file', + help='Save logs to the path specified rather than displaying in\ + console') - # Parse the command line arguments - args: argparse.Namespace = parser.parse_args() + # Parse the command line arguments + args: argparse.Namespace = parser.parse_args() - # Check if none of the necessary options are selected - if not args.key_path and not args.gcloud_profile_path \ - and not args.use_metadata and not args.access_token_files\ - and not args.refresh_token_files: + # Check if none of the necessary options are selected + if not args.key_path and not args.gcloud_profile_path \ + and not args.use_metadata and not args.access_token_files\ + and not args.refresh_token_files: - # If none of the options are selected, log an error message - logging.error( - 'Please select at least one option to begin scan\ - -k/--sa-key-path,-g/--gcloud-profile-path, -m, -rt, -at') + # If none of the options are selected, log an error message + logging.error( + 'Please select at least one option to begin scan\ + -k/--sa-key-path,-g/--gcloud-profile-path, -m, -rt, -at') - # Return the parsed command line arguments - return args + # Return the parsed command line arguments + return args From 61fe0e4336a8774385ab072f494f5cbc0fa6991a Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Fri, 7 Apr 2023 17:50:12 +0530 Subject: [PATCH 22/25] Update models.py --- src/gcp_scanner/models.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/gcp_scanner/models.py b/src/gcp_scanner/models.py index fc73344a..e2cdcc15 100644 --- a/src/gcp_scanner/models.py +++ b/src/gcp_scanner/models.py @@ -24,17 +24,17 @@ class SpiderContext: - """A simple class to initialize the context with a list of root SAs""" - - def __init__(self, sa_tuples: List[Tuple[str, Credentials, List[str]]]): - """ - Initialize the context with a list of the root service accounts. - - Args: - sa_tuples: [(sa_name, sa_object, chain_so_far)] - """ - # Create a new queue to hold the service accounts - self.service_account_queue = queue.Queue() - # Add each service account from the sa_tuples list to the queue - for sa_tuple in sa_tuples: - self.service_account_queue.put(sa_tuple) + """A simple class to initialize the context with a list of root SAs""" + + def __init__(self, sa_tuples: List[Tuple[str, Credentials, List[str]]]): + """ + Initialize the context with a list of the root service accounts. + + Args: + sa_tuples: [(sa_name, sa_object, chain_so_far)] + """ + # Create a new queue to hold the service accounts + self.service_account_queue = queue.Queue() + # Add each service account from the sa_tuples list to the queue + for sa_tuple in sa_tuples: + self.service_account_queue.put(sa_tuple) From 0fe183b767d562e51030d18354d578a416781166 Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Fri, 7 Apr 2023 18:00:51 +0530 Subject: [PATCH 23/25] Update test_acceptance.py --- src/gcp_scanner/test_acceptance.py | 120 ++++++++++++++--------------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/src/gcp_scanner/test_acceptance.py b/src/gcp_scanner/test_acceptance.py index f4456c60..5b5a6716 100644 --- a/src/gcp_scanner/test_acceptance.py +++ b/src/gcp_scanner/test_acceptance.py @@ -52,80 +52,80 @@ def check_obj_entry(res_dict, subojects_count, entry_name, volatile=False): - # Check if an object entry exists in the given dictionary - # and has the expected number of objects - obj = res_dict.get(entry_name, None) - if volatile is True: - assert obj is not None and ( - len(obj) == subojects_count or len(obj) == subojects_count - 1) - else: - assert obj is not None and len(obj) == subojects_count + # Check if an object entry exists in the given dictionary + # and has the expected number of objects + obj = res_dict.get(entry_name, None) + if volatile is True: + assert obj is not None and ( + len(obj) == subojects_count or len(obj) == subojects_count - 1) + else: + assert obj is not None and len(obj) == subojects_count def validate_result(): - # Load the results file and validate the resource counts - file_name = os.listdir("res/")[0] - with open("res/" + file_name, "r", encoding="utf-8") as f: - res_data = json.load(f) + # Load the results file and validate the resource counts + file_name = os.listdir("res/")[0] + with open("res/" + file_name, "r", encoding="utf-8") as f: + res_data = json.load(f) - # project - project = res_data["projects"].get("test-gcp-scanner", None) - assert project is not None - assert len(project) == RESOURCE_COUNT + # project + project = res_data["projects"].get("test-gcp-scanner", None) + assert project is not None + assert len(project) == RESOURCE_COUNT - check_obj_entry(project, PROJECT_INFO_COUNT, "project_info") - check_obj_entry(project, IAM_POLICY_COUNT, "iam_policy") - check_obj_entry(project, SERVICE_ACCOUNTS_COUNT, "service_accounts") + check_obj_entry(project, PROJECT_INFO_COUNT, "project_info") + check_obj_entry(project, IAM_POLICY_COUNT, "iam_policy") + check_obj_entry(project, SERVICE_ACCOUNTS_COUNT, "service_accounts") - check_obj_entry(project, COMPUTE_INSTANCES_COUNT, "compute_instances") - check_obj_entry(project, COMPUTE_IMAGES_COUNT, "compute_images") - check_obj_entry(project, COMPUTE_DISKS_COUNT, "compute_disks") - check_obj_entry(project, COMPUTE_SNAPSHOTS_COUNT, "compute_snapshots") + check_obj_entry(project, COMPUTE_INSTANCES_COUNT, "compute_instances") + check_obj_entry(project, COMPUTE_IMAGES_COUNT, "compute_images") + check_obj_entry(project, COMPUTE_DISKS_COUNT, "compute_disks") + check_obj_entry(project, COMPUTE_SNAPSHOTS_COUNT, "compute_snapshots") - check_obj_entry(project, STATIC_IPS_COUNT, "static_ips") - check_obj_entry(project, SUBNETS_COUNT, "subnets") - check_obj_entry(project, FIREWALL_RULES_COUNT, "firewall_rules") - check_obj_entry(project, MANAGED_ZONES_COUNT, "managed_zones") + check_obj_entry(project, STATIC_IPS_COUNT, "static_ips") + check_obj_entry(project, SUBNETS_COUNT, "subnets") + check_obj_entry(project, FIREWALL_RULES_COUNT, "firewall_rules") + check_obj_entry(project, MANAGED_ZONES_COUNT, "managed_zones") - check_obj_entry(project, APP_SERVICES_COUNT, "app_services") + check_obj_entry(project, APP_SERVICES_COUNT, "app_services") - check_obj_entry(project, STORAGE_BUCKETS_COUNT, "storage_buckets") + check_obj_entry(project, STORAGE_BUCKETS_COUNT, "storage_buckets") - check_obj_entry(project, GKE_CLUSTERS_COUNT, "gke_clusters") - # Volatile test. US zone sometimes appear and disappear. - check_obj_entry(project, GKE_IMAGES_COUNT, "gke_images", True) + check_obj_entry(project, GKE_CLUSTERS_COUNT, "gke_clusters") + # Volatile test. US zone sometimes appear and disappear. + check_obj_entry(project, GKE_IMAGES_COUNT, "gke_images", True) - check_obj_entry(project, SQL_INSTANCES_COUNT, "sql_instances") - check_obj_entry(project, BQ_COUNT, "bq") - check_obj_entry(project, BIGTABLE_COUNT, "bigtable_instances") - check_obj_entry(project, SPANNER_COUNT, "spanner_instances") - check_obj_entry(project, CLOUDSTORE_COUNT, "cloudstore_instances") + check_obj_entry(project, SQL_INSTANCES_COUNT, "sql_instances") + check_obj_entry(project, BQ_COUNT, "bq") + check_obj_entry(project, BIGTABLE_COUNT, "bigtable_instances") + check_obj_entry(project, SPANNER_COUNT, "spanner_instances") + check_obj_entry(project, CLOUDSTORE_COUNT, "cloudstore_instances") - check_obj_entry(project, PUBSUB_COUNT, "pubsub_subs") - check_obj_entry(project, CLOUD_FUNCTIONS, "cloud_functions") - check_obj_entry(project, ENDPOINTS_COUNT, "endpoints") + check_obj_entry(project, PUBSUB_COUNT, "pubsub_subs") + check_obj_entry(project, CLOUD_FUNCTIONS, "cloud_functions") + check_obj_entry(project, ENDPOINTS_COUNT, "endpoints") - check_obj_entry(project, KMS_COUNT, "kms") + check_obj_entry(project, KMS_COUNT, "kms") - check_obj_entry(project, SERVICES_COUNT, "services") + check_obj_entry(project, SERVICES_COUNT, "services") def test_acceptance(): - # Create a directory to store the results - os.mkdir("res") - # Define the arguments to run the scanner in - # test mode and save results in the "res" directory - testargs = ["__main__.py", "-m", "-p", "test-gcp-scanner", "-o", "res"] - - # Patch the command-line arguments to run - # the scanner with the specified arguments - with unittest.mock.patch("sys.argv", testargs): - # Run the scanner with the patched - # arguments and assert that it returns 0 (indicating success) - assert scanner.main() == 0 - # Assert that the number of files in - # the "res" directory is equal to RESULTS_JSON_COUNT - assert len(os.listdir("res/")) == RESULTS_JSON_COUNT - # Validate the result to ensure that it conforms to - # the expected format and contains valid data - validate_result() + # Create a directory to store the results + os.mkdir("res") + # Define the arguments to run the scanner in + # test mode and save results in the "res" directory + testargs = ["__main__.py", "-m", "-p", "test-gcp-scanner", "-o", "res"] + + # Patch the command-line arguments to run + # the scanner with the specified arguments + with unittest.mock.patch("sys.argv", testargs): + # Run the scanner with the patched + # arguments and assert that it returns 0 (indicating success) + assert scanner.main() == 0 + # Assert that the number of files in + # the "res" directory is equal to RESULTS_JSON_COUNT + assert len(os.listdir("res/")) == RESULTS_JSON_COUNT + # Validate the result to ensure that it conforms to + # the expected format and contains valid data + validate_result() From 8455a4ea4d9f134bc078b7c5465f9b3696313cfe Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Fri, 7 Apr 2023 19:46:51 +0530 Subject: [PATCH 24/25] Update credsdb.py --- src/gcp_scanner/credsdb.py | 798 ++++++++++++++++++------------------- 1 file changed, 399 insertions(+), 399 deletions(-) diff --git a/src/gcp_scanner/credsdb.py b/src/gcp_scanner/credsdb.py index 8b8094d7..6b726ca2 100644 --- a/src/gcp_scanner/credsdb.py +++ b/src/gcp_scanner/credsdb.py @@ -40,493 +40,493 @@ def credentials_from_token(access_token: str, refresh_token: Optional[str], token_uri: Optional[str], client_id: Optional[str], client_secret: Optional[str], scopes_user: Optional[str]) -> Credentials: - """ - Create Credentials instance from tokens - """ - return credentials.Credentials(access_token, refresh_token=refresh_token, - token_uri=token_uri, client_id=client_id, - client_secret=client_secret, - scopes=scopes_user) + """ + Create Credentials instance from tokens + """ + return credentials.Credentials(access_token, refresh_token=refresh_token, + token_uri=token_uri, client_id=client_id, + client_secret=client_secret, + scopes=scopes_user) def get_creds_from_file(file_path: str) -> Tuple[str, Credentials]: - """ - Retrieve Credentials instance from a service account json file. - """ + """ + Retrieve Credentials instance from a service account json file. + """ - logging.info("Retrieving credentials from %s", file_path) - creds = service_account.Credentials.from_service_account_file(file_path) - return creds.service_account_email, creds + logging.info("Retrieving credentials from %s", file_path) + creds = service_account.Credentials.from_service_account_file(file_path) + return creds.service_account_email, creds def get_creds_from_json(parsed_keyfile: Mapping[str, str]) -> Credentials: - """ - Retrieve Credentials instance from parsed service account info. - """ + """ + Retrieve Credentials instance from parsed service account info. + """ - return service_account.Credentials.from_service_account_info( - parsed_keyfile) + return service_account.Credentials.from_service_account_info( + parsed_keyfile) def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]: - """Retrieves a Credentials instance from compute instance metadata. - - Returns: - Tuple[Optional[str], Optional[Credentials]]: - A tuple containing the email associated with the - credentials and the constructed credentials. - """ - - # Print a message to indicate that we are - # retrieving the access token from instance metadata - print("Retrieving access token from instance metadata") - - # Define the URLs that we need to - # access to get the token, scopes, and email - token_url = "http://metadata.google.internal/computeMetadata/v1/" \ - "instance/service-accounts/default/token" - scope_url = "http://metadata.google.internal/computeMetadata/v1/" \ - "instance/service-accounts/default/scopes" - email_url = "http://metadata.google.internal/computeMetadata/v1/" \ - "instance/service-accounts/default/email" - - # Set the headers for the requests - headers = {"Metadata-Flavor": "Google"} - - try: - # Make the request to get the access token - res = requests.get(token_url, headers=headers) + """Retrieves a Credentials instance from compute instance metadata. - # Check if the response was successful - if not res.ok: - logging.error("Failed to retrieve instance token. " - "Status code %d", res.status_code) - token_url = None + Returns: + Tuple[Optional[str], Optional[Credentials]]: + A tuple containing the email associated with the + credentials and the constructed credentials. + """ - return None, None + # Print a message to indicate that we are + # retrieving the access token from instance metadata + print("Retrieving access token from instance metadata") - # Parse the JSON response and get the access token - token = res.json()["access_token"] + # Define the URLs that we need to + # access to get the token, scopes, and email + token_url = "http://metadata.google.internal/computeMetadata/v1/" \ + "instance/service-accounts/default/token" + scope_url = "http://metadata.google.internal/computeMetadata/v1/" \ + "instance/service-accounts/default/scopes" + email_url = "http://metadata.google.internal/computeMetadata/v1/" \ + "instance/service-accounts/default/email" - # Make the request to get the instance scopes - res = requests.get(scope_url, headers=headers) + # Set the headers for the requests + headers = {"Metadata-Flavor": "Google"} - # Check if the response was successful - if not res.ok: - logging.error("Failed to retrieve instance scopes. " - "Status code %d", res.status_code) - return None, None + try: + # Make the request to get the access token + res = requests.get(token_url, headers=headers) + + # Check if the response was successful + if not res.ok: + logging.error("Failed to retrieve instance token. " + "Status code %d", res.status_code) + token_url = None + + return None, None + + # Parse the JSON response and get the access token + token = res.json()["access_token"] + + # Make the request to get the instance scopes + res = requests.get(scope_url, headers=headers) + + # Check if the response was successful + if not res.ok: + logging.error("Failed to retrieve instance scopes. " + "Status code %d", res.status_code) + return None, None - # Get the instance scopes from the response - instance_scopes = res.content.decode("utf-8") + # Get the instance scopes from the response + instance_scopes = res.content.decode("utf-8") - # Make the request to get the instance email - res = requests.get(email_url, headers=headers) + # Make the request to get the instance email + res = requests.get(email_url, headers=headers) # Check if the response was successful - if not res.ok: - logging.error("Failed to retrieve instance email. " - "Status code %d", res.status_code) - return None, None + if not res.ok: + logging.error("Failed to retrieve instance email. " + "Status code %d", res.status_code) + return None, None - # Get the instance email from the response - email = res.content.decode("utf-8") + # Get the instance email from the response + email = res.content.decode("utf-8") - except ImportError: - # Log an error message if any exception occurred - logging.error("Failed to retrieve instance metadata") - logging.error(sys.exc_info()[1]) - return None, None + except ImportError: + # Log an error message if any exception occurred + logging.error("Failed to retrieve instance metadata") + logging.error(sys.exc_info()[1]) + return None, None # Print a message to indicate that # we have successfully retrieved the instance metadata print("Successfully retrieved instance metadata") - # Log the length of the access token, instance email, and instance scopes - logging.info("Access token length: %d", len(token)) - logging.info("Instance email: %s", email) - logging.info("Instance scopes: %s", instance_scopes) + # Log the length of the access token, instance email, and instance scopes + logging.info("Access token length: %d", len(token)) + logging.info("Instance email: %s", email) + logging.info("Instance scopes: %s", instance_scopes) - # Return the email and credentials - # constructed from the token and instance scopes - return email, credentials_from_token( - token, None, None, None, None, instance_scopes) + # Return the email and credentials + # constructed from the token and instance scopes + return email, credentials_from_token( + token, None, None, None, None, instance_scopes) def get_creds_from_data( access_token: str, parsed_keyfile: Dict[str, str]) -> Credentials: - """Creates a Credentials instance from parsed service account info. - - The function currently supports two types of credentials. - Service account key in json format and user account with refresh token. - - Args: - access_token: An Oauth2 access token. It can be None. - parsed_keyfile: The service account info in Google format. - - Returns: - google.auth.service_account.Credentials: The constructed credentials. - """ - - # Initialize the variable to None - creds = None - - # Check if the parsed_keyfile contains "refresh_token" - if "refresh_token" in parsed_keyfile: - logging.info("Identified user credentials in gcloud profile") - # this is user account credentials with refresh token - creds = credentials_from_token( - access_token, - parsed_keyfile["refresh_token"], - parsed_keyfile["token_uri"], - parsed_keyfile["client_id"], - parsed_keyfile["client_secret"], - parsed_keyfile["scopes"] - ) - # Check if the parsed_keyfile contains "private_key" - elif "private_key" in parsed_keyfile: - logging.info( - "Identified service account key credentials in gcloud profile") - # this is a service account key with private key - creds = get_creds_from_json(parsed_keyfile) - else: - logging.error("unknown type of credentials") + """Creates a Credentials instance from parsed service account info. + + The function currently supports two types of credentials. + Service account key in json format and user account with refresh token. + + Args: + access_token: An Oauth2 access token. It can be None. + parsed_keyfile: The service account info in Google format. + + Returns: + google.auth.service_account.Credentials: The constructed credentials. + """ + + # Initialize the variable to None + creds = None + + # Check if the parsed_keyfile contains "refresh_token" + if "refresh_token" in parsed_keyfile: + logging.info("Identified user credentials in gcloud profile") + # this is user account credentials with refresh token + creds = credentials_from_token( + access_token, + parsed_keyfile["refresh_token"], + parsed_keyfile["token_uri"], + parsed_keyfile["client_id"], + parsed_keyfile["client_secret"], + parsed_keyfile["scopes"] + ) + # Check if the parsed_keyfile contains "private_key" + elif "private_key" in parsed_keyfile: + logging.info( + "Identified service account key credentials in gcloud profile") + # this is a service account key with private key + creds = get_creds_from_json(parsed_keyfile) + else: + logging.error("unknown type of credentials") # Return the constructed credentials - return creds + return creds def find_creds(explicit_path: Optional[str] = None) -> List[str]: - """ - The function searches the disk and returns - a list of files with GCP credentials. - - Args: - explicit_path: An explicit path on disk to search. - If None, the function searches in - standard locations where gcloud profiles are usually located. - - Returns: - list: The list of files with GCP credentials. - """ - - logging.info("Searching for credentials on disk") - list_of_creds_files = [] - - # Create a list of search paths to scan for credentials.db - search_paths = [] - if explicit_path is not None and explicit_path != "-": - search_paths.append(explicit_path) - else: - credentials_db_search_places.append(os.getenv("HOME") + "/") - for dir_path in credentials_db_search_places: - if not os.access(dir_path, os.R_OK): - continue - for subdir_name in os.listdir(dir_path): - full_path = os.path.join(dir_path, subdir_name, "gcloud") - search_paths.append(full_path) + """ + The function searches the disk and returns + a list of files with GCP credentials. + + Args: + explicit_path: An explicit path on disk to search. + If None, the function searches in + standard locations where gcloud profiles are usually located. + + Returns: + list: The list of files with GCP credentials. + """ + + logging.info("Searching for credentials on disk") + list_of_creds_files = [] + + # Create a list of search paths to scan for credentials.db + search_paths = [] + if explicit_path is not None and explicit_path != "-": + search_paths.append(explicit_path) + else: + credentials_db_search_places.append(os.getenv("HOME") + "/") + for dir_path in credentials_db_search_places: + if not os.access(dir_path, os.R_OK): + continue + for subdir_name in os.listdir(dir_path): + full_path = os.path.join(dir_path, subdir_name, "gcloud") + search_paths.append(full_path) # Scan each search path for credentials.db # and add them to the list_of_creds_files - for dir_path in search_paths: - print(f"Scanning {dir_path} for credentials.db") - full_path = os.path.join(dir_path, "credentials.db") - if os.path.exists(full_path) and os.access(full_path, os.R_OK): - print(f"Identified accessible gcloud config profile {full_path}") - list_of_creds_files.append(full_path) + for dir_path in search_paths: + print(f"Scanning {dir_path} for credentials.db") + full_path = os.path.join(dir_path, "credentials.db") + if os.path.exists(full_path) and os.access(full_path, os.R_OK): + print(f"Identified accessible gcloud config profile {full_path}") + list_of_creds_files.append(full_path) - print(f"Identified {len(list_of_creds_files)} credential DBs") - return list_of_creds_files + print(f"Identified {len(list_of_creds_files)} credential DBs") + return list_of_creds_files def get_access_tokens_dict(path_to_creds_db: str) -> Dict[str, str]: - """ - The function searches and extracts OAuth2 - access_tokens from a SQLite3 database. + """ + The function searches and extracts OAuth2 + access_tokens from a SQLite3 database. - Args: - path_to_creds_db: A path to SQLite3 database with gcloud access tokens. + Args: + path_to_creds_db: A path to SQLite3 database with gcloud access tokens. - Returns: - dict: The dictionary of account names and corresponding tokens. - """ + Returns: + dict: The dictionary of account names and corresponding tokens. + """ - access_tokens_dict = dict() + access_tokens_dict = dict() - # Replace credentials.db with access_tokens.db - # to get the path to access tokens database - access_tokens_path = path_to_creds_db.replace("credentials.db", - "access_tokens.db") + # Replace credentials.db with access_tokens.db + # to get the path to access tokens database + access_tokens_path = path_to_creds_db.replace("credentials.db", + "access_tokens.db") - # Check if the access tokens database exists and can be read - if os.path.exists(access_tokens_path) and os.access(access_tokens_path, - os.R_OK): + # Check if the access tokens database exists and can be read + if os.path.exists(access_tokens_path) and os.access(access_tokens_path, + os.R_OK): - # If the access tokens database exists and can be read, connect to it - logging.info("Identified access tokens DB in %s", access_tokens_path) - conn = sqlite3.connect(access_tokens_path) - cursor = conn.execute("SELECT account_id, access_token," - "token_expiry FROM access_tokens") + # If the access tokens database exists and can be read, connect to it + logging.info("Identified access tokens DB in %s", access_tokens_path) + conn = sqlite3.connect(access_tokens_path) + cursor = conn.execute("SELECT account_id, access_token," + "token_expiry FROM access_tokens") - # Fetch all rows from the access tokens database - rows = cursor.fetchall() + # Fetch all rows from the access tokens database + rows = cursor.fetchall() - # Iterate over each row - for row in rows: - associated_account = row[0] - token = row[1] - expiration_date = row[2] + # Iterate over each row + for row in rows: + associated_account = row[0] + token = row[1] + expiration_date = row[2] - # Omit milliseconds from the expiration date - expiration_date = expiration_date.split(".")[0] + # Omit milliseconds from the expiration date + expiration_date = expiration_date.split(".")[0] - # Convert the expiration date to a datetime object - token_time_obj = datetime.datetime.strptime( - expiration_date, "%Y-%m-%d %H:%M:%S") + # Convert the expiration date to a datetime object + token_time_obj = datetime.datetime.strptime( + expiration_date, "%Y-%m-%d %H:%M:%S") - # Check if the token has expired - if datetime.datetime.now() > token_time_obj: - logging.info("Token for %s expired", associated_account) - continue + # Check if the token has expired + if datetime.datetime.now() > token_time_obj: + logging.info("Token for %s expired", associated_account) + continue # Add the associated account and # token to the access tokens dictionary - access_tokens_dict[associated_account] = token + access_tokens_dict[associated_account] = token - return access_tokens_dict + return access_tokens_dict def extract_creds(path_to_creds_db: str) -> List[ Tuple[str, str, str]]: - """ - The function extracts refresh and associated access - tokens from sqlite3 DBs. - - Args: - path_to_creds_db (str): A path to sqlite3 DB - with gcloud refresh tokens. - - Returns: - List of tuples: (account name, refresh token, access token). - """ - # Log that we are opening the database - logging.info("Opening %s DB", path_to_creds_db) - - # Create a named tuple for service accounts - SA = collections.namedtuple("SA", "account_name, creds, token") - - # Initialize an empty list for the results - res = list() - - # Connect to the database - conn = sqlite3.connect(path_to_creds_db) - # Select account_id and value from the credentials table - cursor = conn.execute("SELECT account_id, value FROM credentials") - rows = cursor.fetchall() - - # Check if the database is empty - if len(rows) <= 0: - logging.error("Empty database") - return None + """ + The function extracts refresh and associated access + tokens from sqlite3 DBs. + + Args: + path_to_creds_db (str): A path to sqlite3 DB + with gcloud refresh tokens. + + Returns: + List of tuples: (account name, refresh token, access token). + """ + # Log that we are opening the database + logging.info("Opening %s DB", path_to_creds_db) + + # Create a named tuple for service accounts + SA = collections.namedtuple("SA", "account_name, creds, token") + + # Initialize an empty list for the results + res = list() + + # Connect to the database + conn = sqlite3.connect(path_to_creds_db) + # Select account_id and value from the credentials table + cursor = conn.execute("SELECT account_id, value FROM credentials") + rows = cursor.fetchall() + + # Check if the database is empty + if len(rows) <= 0: + logging.error("Empty database") + return None - # We also want to check for access_tokens to avoid unnecessary refreshing - access_tokens = get_access_tokens_dict(path_to_creds_db) + # We also want to check for access_tokens to avoid unnecessary refreshing + access_tokens = get_access_tokens_dict(path_to_creds_db) - # Loop through the rows - for row in rows: - access_token = None + # Loop through the rows + for row in rows: + access_token = None - # Check if the access token exists and is valid - if access_tokens.get(row[0], None) is not None: - logging.info("Found valid access token for %s", row[0]) - access_token = access_tokens[row[0]] + # Check if the access token exists and is valid + if access_tokens.get(row[0], None) is not None: + logging.info("Found valid access token for %s", row[0]) + access_token = access_tokens[row[0]] - # Append the account name, credentials, and access - # token to the results list - res.append(SA(row[0], row[1], access_token)) + # Append the account name, credentials, and access + # token to the results list + res.append(SA(row[0], row[1], access_token)) - # Print the number of identified credential entries - print(f"Identified {len(res)} credential entries") + # Print the number of identified credential entries + print(f"Identified {len(res)} credential entries") - # Return the results list - return res + # Return the results list + return res def get_account_creds_list(gcloud_profile_path: Optional[ str] = None) -> List[List[Tuple[str, str, str]]]: - """The function searches and extracts gcloud credentials from disk. + """The function searches and extracts gcloud credentials from disk. - Args: - gcloud_profile_path: An explicit gcloud profile path on disk to - search. If None, the function searches in standard locations where - gcloud profiles are usually located. + Args: + gcloud_profile_path: An explicit gcloud profile path on disk to + search. If None, the function searches in standard locations where + gcloud profiles are usually located. - Returns: - list: A list of tuples (account name, refresh token, access token). - """ - accounts = list() # initialize an empty list - creds_file_list = find_creds(gcloud_profile_path) - for creds_file in creds_file_list: - res = extract_creds(creds_file) - if res is not None: - accounts.append(res) - return accounts # return the accounts list + Returns: + list: A list of tuples (account name, refresh token, access token). + """ + accounts = list() # initialize an empty list + creds_file_list = find_creds(gcloud_profile_path) + for creds_file in creds_file_list: + res = extract_creds(creds_file) + if res is not None: + accounts.append(res) + return accounts # return the accounts list def impersonate_sa(iam_client: IAMCredentialsClient, target_account: str) -> Credentials: - """ - The function is used to impersonate a service account. - - Args: - iam_client (IAMCredentialsClient): The IAMCredentialsClient object. - target_account (str): The name of the service account to impersonate. - - Returns: - Credentials: The constructed credentials. - """ - - # Define the scopes for the service account - scopes_sa = ["https://www.googleapis.com/auth/cloud-platform"] - - # Generate an access token for the service account - intermediate_access_token = iam_client.generate_access_token( - name=target_account, - scope=scopes_sa, - retry=None, - # lifetime="43200" - ) - - # Use the access token to construct credentials - return credentials_from_token( - intermediate_access_token.access_token, - None, - None, - None, - None, - scopes_sa - ) + """ + The function is used to impersonate a service account. + + Args: + iam_client (IAMCredentialsClient): The IAMCredentialsClient object. + target_account (str): The name of the service account to impersonate. + + Returns: + Credentials: The constructed credentials. + """ + + # Define the scopes for the service account + scopes_sa = ["https://www.googleapis.com/auth/cloud-platform"] + + # Generate an access token for the service account + intermediate_access_token = iam_client.generate_access_token( + name=target_account, + scope=scopes_sa, + retry=None, + # lifetime="43200" + ) + + # Use the access token to construct credentials + return credentials_from_token( + intermediate_access_token.access_token, + None, + None, + None, + None, + scopes_sa + ) def creds_from_access_token(access_token_file): - """The function is used to obtain Google Auth - Credentials from access token. - - Args: - access_token_file: a path to a file with access token - and scopes stored in JSON format. Example: - { - "access_token": "", - "scopes": [ - "https://www.googleapis.com/auth/devstorage.read_only", - "https://www.googleapis.com/auth/logging.write", - "https://www.googleapis.com/auth/monitoring.write", - "https://www.googleapis.com/auth/servicecontrol", - "https://www.googleapis.com/auth/service.management.readonly", - "https://www.googleapis.com/auth/trace.append" - ] - } - - Returns: - google.auth.service_account.Credentials: The constructed credentials. - """ - - # Load the access token and scopes from the specified file - with open(access_token_file, encoding="utf-8") as f: - creds_dict = json.load(f) + """The function is used to obtain Google Auth + Credentials from access token. + + Args: + access_token_file: a path to a file with access token + and scopes stored in JSON format. Example: + { + "access_token": "", + "scopes": [ + "https://www.googleapis.com/auth/devstorage.read_only", + "https://www.googleapis.com/auth/logging.write", + "https://www.googleapis.com/auth/monitoring.write", + "https://www.googleapis.com/auth/servicecontrol", + "https://www.googleapis.com/auth/service.management.readonly", + "https://www.googleapis.com/auth/trace.append" + ] + } + + Returns: + google.auth.service_account.Credentials: The constructed credentials. + """ + + # Load the access token and scopes from the specified file + with open(access_token_file, encoding="utf-8") as f: + creds_dict = json.load(f) # Check if user-defined scopes are provided - user_scopes = creds_dict.get("scopes", None) - if user_scopes is None: + user_scopes = creds_dict.get("scopes", None) + if user_scopes is None: # Use default scopes if not provided - user_scopes = ["https://www.googleapis.com/auth/cloud-platform"] - - # Construct credentials from the access token and scopes - return credentials_from_token( - creds_dict["access_token"], - None, - None, - None, - None, - user_scopes - ) + user_scopes = ["https://www.googleapis.com/auth/cloud-platform"] + + # Construct credentials from the access token and scopes + return credentials_from_token( + creds_dict["access_token"], + None, + None, + None, + None, + user_scopes + ) def creds_from_refresh_token(refresh_token_file): - """ - The function is used to obtain Google Auth Credentials from refresh token. - - Args: - - refresh_token_file: a path to a file with refresh_token, client_id, - client_secret, and token_uri stored in JSON format. - Example: - { - "refresh_token": "", - "client_id": "id", - "client_secret": "secret", - scopes: [ - https://www.googleapis.com/auth/devstorage.read_only, - https://www.googleapis.com/auth/logging.write, - https://www.googleapis.com/auth/monitoring.write, - https://www.googleapis.com/auth/servicecontrol, - https://www.googleapis.com/auth/service.management.readonly, - https://www.googleapis.com/auth/trace.append - ] - } - - Returns: - - google.auth.service_account.Credentials: The constructed credentials. - """ - - # Open the refresh_token_file in utf-8 encoding - # and load the contents to a dictionary - with open(refresh_token_file, encoding="utf-8") as f: - creds_dict = json.load(f) - - # Get the user-defined scopes from the refresh token dictionary - user_scopes = get_scopes_from_refresh_token(creds_dict) - - # Construct and return a google.auth.service_account.Credentials object - return credentials.Credentials( - None, - refresh_token=creds_dict["refresh_token"], - token_uri=creds_dict["token_uri"], - client_id=creds_dict["client_id"], - client_secret=creds_dict["client_secret"], - scopes=user_scopes, - ) + """ + The function is used to obtain Google Auth Credentials from refresh token. + + Args: + - refresh_token_file: a path to a file with refresh_token, client_id, + client_secret, and token_uri stored in JSON format. + Example: + { + "refresh_token": "", + "client_id": "id", + "client_secret": "secret", + scopes: [ + https://www.googleapis.com/auth/devstorage.read_only, + https://www.googleapis.com/auth/logging.write, + https://www.googleapis.com/auth/monitoring.write, + https://www.googleapis.com/auth/servicecontrol, + https://www.googleapis.com/auth/service.management.readonly, + https://www.googleapis.com/auth/trace.append + ] + } + + Returns: + - google.auth.service_account.Credentials: The constructed credentials. + """ + + # Open the refresh_token_file in utf-8 encoding + # and load the contents to a dictionary + with open(refresh_token_file, encoding="utf-8") as f: + creds_dict = json.load(f) + + # Get the user-defined scopes from the refresh token dictionary + user_scopes = get_scopes_from_refresh_token(creds_dict) + + # Construct and return a google.auth.service_account.Credentials object + return credentials.Credentials( + None, + refresh_token=creds_dict["refresh_token"], + token_uri=creds_dict["token_uri"], + client_id=creds_dict["client_id"], + client_secret=creds_dict["client_secret"], + scopes=user_scopes, + ) def get_scopes_from_refresh_token(context) -> Union[List[str], None]: - """ - The function is used to obtain scopes from a refresh token. - - Args: - context: a dictionary containing refresh token data - Example: - { - "refresh_token": "", - "client_id": "id", - "client_secret": "secret", - } - - Returns: - a list of scopes or None - """ - - # Obtain access token from the refresh token - token_uri = "https://oauth2.googleapis.com/token" - context["grant_type"] = "refresh_token" - - try: - response = requests.post(token_uri, data=context, timeout=5) - - # prepare the scope string into a list - raw = response.json().get("scope", None) - return raw.split(" ") if raw else None - - except ImportError as ex: - logging.error("Failed to retrieve access token from refresh token.") - logging.debug("Token refresh exception", exc_info=ex) - - return None + """ + The function is used to obtain scopes from a refresh token. + + Args: + context: a dictionary containing refresh token data + Example: + { + "refresh_token": "", + "client_id": "id", + "client_secret": "secret", + } + + Returns: + a list of scopes or None + """ + + # Obtain access token from the refresh token + token_uri = "https://oauth2.googleapis.com/token" + context["grant_type"] = "refresh_token" + + try: + response = requests.post(token_uri, data=context, timeout=5) + + # prepare the scope string into a list + raw = response.json().get("scope", None) + return raw.split(" ") if raw else None + + except ImportError as ex: + logging.error("Failed to retrieve access token from refresh token.") + logging.debug("Token refresh exception", exc_info=ex) + + return None From 3618b77cae797cf295d7d491930e6773e5e8ca8e Mon Sep 17 00:00:00 2001 From: Rohit Raj <88114930+ro4i7@users.noreply.github.com> Date: Fri, 7 Apr 2023 19:55:07 +0530 Subject: [PATCH 25/25] Update credsdb.py --- src/gcp_scanner/credsdb.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gcp_scanner/credsdb.py b/src/gcp_scanner/credsdb.py index 6b726ca2..579a2aef 100644 --- a/src/gcp_scanner/credsdb.py +++ b/src/gcp_scanner/credsdb.py @@ -138,9 +138,9 @@ def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]: logging.error(sys.exc_info()[1]) return None, None - # Print a message to indicate that - # we have successfully retrieved the instance metadata - print("Successfully retrieved instance metadata") + # Print a message to indicate that + # we have successfully retrieved the instance metadata + print("Successfully retrieved instance metadata") # Log the length of the access token, instance email, and instance scopes logging.info("Access token length: %d", len(token)) @@ -343,7 +343,7 @@ def extract_creds(path_to_creds_db: str) -> List[ # Append the account name, credentials, and access # token to the results list - res.append(SA(row[0], row[1], access_token)) + res.append(SA(row[0], row[1], access_token)) # Print the number of identified credential entries print(f"Identified {len(res)} credential entries")