From 2e8203e982a986f953e48206784c6bf89e206253 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 01:30:44 +0530
Subject: [PATCH 01/25] Update scanner.py

---
 scanner.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scanner.py b/scanner.py
index 92b31d3e..23a1a86e 100755
--- a/scanner.py
+++ b/scanner.py
@@ -14,5 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Import the scanner module from the gcp_scanner package
 from src.gcp_scanner import scanner
-scanner.main()
\ No newline at end of file
+
+# Call the main function of the scanner module to start the scanning process
+scanner.main()

From 683d054c84f0763c3809db9c433f6de91fa05e66 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 01:31:16 +0530
Subject: [PATCH 02/25] Update requirements.txt

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 452b3f73..fc6987db 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+# List of required Python packages and their corresponding versions
 pyu2f==0.1.5
 google-api-python-client==2.80.0
 google-cloud-container==2.17.4

From 501f4846526b0c37cfa48e948e1e26f8fad0f110 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 01:32:07 +0530
Subject: [PATCH 03/25] Update __init__.py

---
 src/gcp_scanner/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gcp_scanner/__init__.py b/src/gcp_scanner/__init__.py
index e69de29b..80238dad 100644
--- a/src/gcp_scanner/__init__.py
+++ b/src/gcp_scanner/__init__.py
@@ -0,0 +1 @@
+#Currently, this ia an empty file.

From 8c12675b0858751baed4417bae0cd441c622be40 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 01:32:39 +0530
Subject: [PATCH 04/25] Update __main__.py

---
 src/gcp_scanner/__main__.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gcp_scanner/__main__.py b/src/gcp_scanner/__main__.py
index 3e75a4a5..330fb74e 100644
--- a/src/gcp_scanner/__main__.py
+++ b/src/gcp_scanner/__main__.py
@@ -16,7 +16,10 @@
 
 """
 
+# Importing the scanner module
 from . import scanner
 
+# Checking if the code is running as the main module
 if __name__ == '__main__':
-  scanner.main()
+    # Calling the main function of the scanner module
+    scanner.main()

From 713da7adf30407a2ea4b841996ca8fb9069247d3 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 01:45:25 +0530
Subject: [PATCH 05/25] Update arguments.py

---
 src/gcp_scanner/arguments.py | 87 +++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 36 deletions(-)

diff --git a/src/gcp_scanner/arguments.py b/src/gcp_scanner/arguments.py
index a8028c66..ed37efc1 100644
--- a/src/gcp_scanner/arguments.py
+++ b/src/gcp_scanner/arguments.py
@@ -20,6 +20,7 @@
 import argparse
 import logging
 
+# Define a function to create an argument parser using the argparse module
 def arg_parser():
   """Creates an argument parser using the `argparse` module and defines
   several command-line arguments.
@@ -31,55 +32,63 @@ def arg_parser():
     argparse.Namespace: A namespace object containing the parsed command-line
     arguments.
   """
+  # Create a new parser object
   parser = argparse.ArgumentParser(
-      prog='scanner.py',
-      description='GCP Scanner',
-      usage='python3 %(prog)s -o folder_to_save_results -g -')
+      prog='scanner.py', # program name
+      description='GCP Scanner', # description
+      usage='python3 %(prog)s -o folder_to_save_results -g -' # usage instructions
+  )
+  
+  # Define a required argument group
   required_named = parser.add_argument_group('Required parameters')
+  # Add a required argument to the group
   required_named.add_argument(
-      '-o',
-      '--output-dir',
-      required=True,
-      dest='output',
-      default='scan_db',
-      help='Path to output directory')
+      '-o', # short option name
+      '--output-dir', # long option name
+      required=True, 
+      dest='output', 
+      default='scan_db', 
+      help='Path to output directory' 
+  )
 
+  # Add command line arguments to the parser object
   parser.add_argument(
       '-k',
-      '--sa-key-path',
-      default=None,
-      dest='key_path',
-      help='Path to directory with SA keys in json format')
+      '--sa-key-path',  # Option for specifying the path to the directory with SA keys
+      default=None,  # Default value if option is not specified
+      dest='key_path',  # Destination variable for storing the value of the option
+      help='Path to directory with SA keys in json format'  # Help message 
+  )
   parser.add_argument(
       '-g',
-      '--gcloud-profile-path',
-      default=None,
-      dest='gcloud_profile_path',
-      help='Path to directory with gcloud profile. Specify -\
- to search for credentials in default gcloud config path'
+      '--gcloud-profile-path', 
+      default=None, 
+      dest='gcloud_profile_path', 
+      help='Path to directory with gcloud profile. Specify - to search for credentials in default gcloud config path'  
   )
   parser.add_argument(
       '-m',
-      '--use-metadata',
-      default=False,
-      dest='use_metadata',
-      action='store_true',
-      help='Extract credentials from GCE instance metadata')
+      '--use-metadata', 
+      default=False, 
+      dest='use_metadata', 
+      action='store_true',  
+      help='Extract credentials from GCE instance metadata'  
+  )
   parser.add_argument(
       '-at',
-      '--access-token-files',
-      default=None,
-      dest='access_token_files',
-      help='A list of comma separated files with access token and OAuth scopes.\
-TTL limited. A token and scopes should be stored in JSON format.')
+      '--access-token-files',  
+      default=None,  
+      dest='access_token_files', 
+      help='A list of comma separated files with access token and OAuth scopes. TTL limited. A token and scopes should be stored in JSON format.' 
+  )
   parser.add_argument(
       '-rt',
-      '--refresh-token-files',
-      default=None,
-      dest='refresh_token_files',
-      help='A list of comma separated files with refresh_token, client_id,\
-token_uri and client_secret stored in JSON format.'
+      '--refresh-token-files',  
+      default=None, 
+      dest='refresh_token_files',  
+      help='A list of comma separated files with refresh_token, client_id, token_uri and client_secret stored in JSON format.' 
   )
+
   parser.add_argument(
       '-s',
       '--service-account',
@@ -119,14 +128,20 @@ def arg_parser():
       help='Save logs to the path specified rather than displaying in\
  console')
 
-  args: argparse.Namespace = parser.parse_args()
+# Parse the command line arguments
+args: argparse.Namespace = parser.parse_args()
 
-  if not args.key_path and not args.gcloud_profile_path \
+# Check if none of the necessary options are selected
+if not args.key_path and not args.gcloud_profile_path \
     and not args.use_metadata and not args.access_token_files\
     and not args.refresh_token_files:
+
+    # If none of the options are selected, log an error message
     logging.error(
         'Please select at least one option to begin scan\
  -k/--sa-key-path,-g/--gcloud-profile-path, -m, -rt, -at'
     )
 
-  return args
+# Return the parsed command line arguments
+return args
+

From 9ba1b1a7a2dcf940720f785c2e20cc512810bcac Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 01:46:14 +0530
Subject: [PATCH 06/25] Update credsdb.py

---
 src/gcp_scanner/credsdb.py | 708 ++++++++++++++++++++-----------------
 1 file changed, 393 insertions(+), 315 deletions(-)

diff --git a/src/gcp_scanner/credsdb.py b/src/gcp_scanner/credsdb.py
index a0c7365f..1a7334ce 100644
--- a/src/gcp_scanner/credsdb.py
+++ b/src/gcp_scanner/credsdb.py
@@ -32,6 +32,7 @@
 from httplib2 import Credentials
 import requests
 
+# Set search places for finding credentials file
 credentials_db_search_places = ["/home/", "/root/"]
 
 
@@ -39,6 +40,9 @@ def credentials_from_token(access_token: str, refresh_token: Optional[str],
                            token_uri: Optional[str], client_id: Optional[str],
                            client_secret: Optional[str],
                            scopes_user: Optional[str]) -> Credentials:
+  """
+  Create Credentials instance from tokens
+  """
   return credentials.Credentials(
     access_token,
     refresh_token=refresh_token,
@@ -49,377 +53,451 @@ def credentials_from_token(access_token: str, refresh_token: Optional[str],
 
 
 def get_creds_from_file(file_path: str) -> Tuple[str, Credentials]:
-  """Creates a Credentials instance from a service account json file.
-
-  Args:
-    file_path: The path to the service account json file.
-
-  Returns:
-    str: An email address associated with a service account.
-    google.auth.service_account.Credentials: The constructed credentials.
   """
-
+  Retrieve Credentials instance from a service account json file.
+  """
   logging.info("Retrieving credentials from %s", file_path)
   creds = service_account.Credentials.from_service_account_file(file_path)
   return creds.service_account_email, creds
 
 
 def get_creds_from_json(parsed_keyfile: Mapping[str, str]) -> Credentials:
-  """Creates a Credentials instance from parsed service account info..
-
-  Args:
-    parsed_keyfile: The service account info in Google format.
-
-  Returns:
-    google.auth.service_account.Credentials: The constructed credentials.
   """
-
+  Retrieve Credentials instance from parsed service account info.
+  """
   return service_account.Credentials.from_service_account_info(parsed_keyfile)
 
 
 def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]:
-  """Retrieves a Credentials instance from compute instance metadata.
+    """Retrieves a Credentials instance from compute instance metadata.
 
-  Returns:
-    str: An email associated with credentials.
-    google.auth.service_account.Credentials: The constructed credentials.
-  """
+    Returns:
+        Tuple[Optional[str], Optional[Credentials]]:
+            A tuple containing the email associated with the credentials and the constructed credentials.
+    """
 
-  print("Retrieving access token from instance metadata")
-
-  token_url = "http://metadata.google.internal/computeMetadata/v1/instance/\
-service-accounts/default/token"
-  scope_url = "http://metadata.google.internal/computeMetadata/v1/instance/\
-service-accounts/default/scopes"
-  email_url = "http://metadata.google.internal/computeMetadata/v1/instance/\
-service-accounts/default/email"
-  headers = {"Metadata-Flavor": "Google"}
-  try:
-    res = requests.get(token_url, headers=headers)
-    if not res.ok:
-      logging.error("Failed to retrieve instance token. Status code %d",
-                    res.status_code)
-      return None, None
-    token = res.json()["access_token"]
-
-    res = requests.get(scope_url, headers=headers)
-    if not res.ok:
-      logging.error("Failed to retrieve instance scopes. Status code %d",
-                    res.status_code)
-      return None, None
-    instance_scopes = res.content.decode("utf-8")
-
-    res = requests.get(email_url, headers=headers)
-    if not res.ok:
-      logging.error("Failed to retrieve instance email. Status code %d",
-                    res.status_code)
-      return None, None
-    email = res.content.decode("utf-8")
-
-  except Exception:
-    logging.error("Failed to retrieve instance metadata")
-    logging.error(sys.exc_info()[1])
-    return None, None
-
-  print("Successfully retrieved instance metadata")
-  logging.info("Access token length: %d", len(token))
-  logging.info("Instance email: %s", email)
-  logging.info("Instance scopes: %s", instance_scopes)
-  return email, credentials_from_token(token, None, None, None, None,
-                                       instance_scopes)
-
-
-def get_creds_from_data(access_token: str,
-                        parsed_keyfile: Dict[str, str]) -> Credentials:
-  """Creates a Credentials instance from parsed service account info.
-
-  The function currently supports two types of credentials.  Service account key
-  in json format and user account with refresh token.
-
-  Args:
-    access_token: An Oauth2 access token. It can be None.
-    parsed_keyfile: The service account info in Google format.
-
-  Returns:
-    google.auth.service_account.Credentials: The constructed credentials.
-  """
-  creds = None
-  if "refresh_token" in parsed_keyfile:
-    logging.info("Identified user credentials in gcloud profile")
-    # this is user account credentials with refresh token
-    creds = credentials_from_token(access_token,
-                                   parsed_keyfile["refresh_token"],
-                                   parsed_keyfile["token_uri"],
-                                   parsed_keyfile["client_id"],
-                                   parsed_keyfile["client_secret"],
-                                   parsed_keyfile["scopes"])
-  elif "private_key" in parsed_keyfile:
-    logging.info("Identified service account key credentials in gcloud profile")
-    # this is a service account key with private key
-    creds = get_creds_from_json(parsed_keyfile)
-  else:
-    logging.error("unknown type of credentials")
-
-  return creds
+    # Print a message to indicate that we are retrieving the access token from instance metadata
+    print("Retrieving access token from instance metadata")
 
+    # Define the URLs that we need to access to get the token, scopes, and email
+    token_url = "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token"
+    scope_url = "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/scopes"
+    email_url = "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/email"
 
-def find_creds(explicit_path: Optional[str] = None) -> List[str]:
-  """The function search disk and returns a list of files with GCP credentials.
+    # Set the headers for the requests
+    headers = {"Metadata-Flavor": "Google"}
 
-  Args:
-    explicit_path: An explicit path on disk to search. If None, the function
-      searches in standard locations where gcloud profiles are usually located.
+    try:
+        # Make the request to get the access token
+        res = requests.get(token_url, headers=headers)
 
-  Returns:
-    list: The list of files with GCP credentials.
-  """
+        # Check if the response was successful
+        if not res.ok:
+            logging.error("Failed to retrieve instance token. Status code %d", res.status_code)
+            return None, None
+
+        # Parse the JSON response and get the access token
+        token = res.json()["access_token"]
+
+        # Make the request to get the instance scopes
+        res = requests.get(scope_url, headers=headers)
+
+        # Check if the response was successful
+        if not res.ok:
+            logging.error("Failed to retrieve instance scopes. Status code %d", res.status_code)
+            return None, None
+
+        # Get the instance scopes from the response
+        instance_scopes = res.content.decode("utf-8")
+
+        # Make the request to get the instance email
+        res = requests.get(email_url, headers=headers)
+
+        # Check if the response was successful
+        if not res.ok:
+            logging.error("Failed to retrieve instance email. Status code %d", res.status_code)
+            return None, None
 
-  logging.info("Searching for credentials on disk")
-  list_of_creds_files = list()
-  search_paths = list()
-  if explicit_path is not None and explicit_path != "-":
-    search_paths.append(explicit_path)
-  else:
-    credentials_db_search_places.append(os.getenv("HOME") + "/")
-    for dir_path in credentials_db_search_places:
-      if not os.access(dir_path, os.R_OK):
-        continue
-      for subdir_name in os.listdir(dir_path):
-        full_path = dir_path + subdir_name + "/gcloud/"
-        search_paths.append(full_path)
-
-  for dir_path in search_paths:
-    print(f"Scanning {dir_path} for credentials.db")
-    full_path = os.path.join(dir_path, "credentials.db")
-    if os.path.exists(full_path) and os.access(full_path, os.R_OK):
-      print(f"Identified accessible gcloud config profile {full_path}")
-      list_of_creds_files.append(full_path)
-  print(f"Identified {len(list_of_creds_files)} credential DBs")
-  return list_of_creds_files
+        # Get the instance email from the response
+        email = res.content.decode("utf-8")
+
+    except Exception:
+        # Log an error message if any exception occurred
+        logging.error("Failed to retrieve instance metadata")
+        logging.error(sys.exc_info()[1])
+        return None, None
+
+    # Print a message to indicate that we have successfully retrieved the instance metadata
+    print("Successfully retrieved instance metadata")
+
+    # Log the length of the access token, instance email, and instance scopes
+    logging.info("Access token length: %d", len(token))
+    logging.info("Instance email: %s", email)
+    logging.info("Instance scopes: %s", instance_scopes)
+
+    # Return the email and credentials constructed from the token and instance scopes
+    return email, credentials_from_token(token, None, None, None, None, instance_scopes)
+
+
+
+def get_creds_from_data(access_token: str, parsed_keyfile: Dict[str, str]) -> Credentials:
+    """Creates a Credentials instance from parsed service account info.
+
+    The function currently supports two types of credentials. Service account key in json format and user account with refresh token.
+
+    Args:
+        access_token: An Oauth2 access token. It can be None.
+        parsed_keyfile: The service account info in Google format.
+
+    Returns:
+        google.auth.service_account.Credentials: The constructed credentials.
+    """
+
+    # Initialize the variable to None
+    creds = None
+
+    # Check if the parsed_keyfile contains "refresh_token"
+    if "refresh_token" in parsed_keyfile:
+        logging.info("Identified user credentials in gcloud profile")
+        # this is user account credentials with refresh token
+        creds = credentials_from_token(
+            access_token,
+            parsed_keyfile["refresh_token"],
+            parsed_keyfile["token_uri"],
+            parsed_keyfile["client_id"],
+            parsed_keyfile["client_secret"],
+            parsed_keyfile["scopes"]
+        )
+    # Check if the parsed_keyfile contains "private_key"
+    elif "private_key" in parsed_keyfile:
+        logging.info("Identified service account key credentials in gcloud profile")
+        # this is a service account key with private key
+        creds = get_creds_from_json(parsed_keyfile)
+    else:
+        logging.error("unknown type of credentials")
+
+    # Return the constructed credentials
+    return creds
+
+
+def find_creds(explicit_path: Optional[str] = None) -> List[str]:
+    """
+    The function searches the disk and returns a list of files with GCP credentials.
+
+    Args:
+        explicit_path: An explicit path on disk to search. If None, the function
+            searches in standard locations where gcloud profiles are usually located.
+
+    Returns:
+        list: The list of files with GCP credentials.
+    """
+
+    logging.info("Searching for credentials on disk")
+    list_of_creds_files = []
+
+    # Create a list of search paths to scan for credentials.db
+    search_paths = []
+    if explicit_path is not None and explicit_path != "-":
+        search_paths.append(explicit_path)
+    else:
+        credentials_db_search_places.append(os.getenv("HOME") + "/")
+        for dir_path in credentials_db_search_places:
+            if not os.access(dir_path, os.R_OK):
+                continue
+            for subdir_name in os.listdir(dir_path):
+                full_path = os.path.join(dir_path, subdir_name, "gcloud")
+                search_paths.append(full_path)
+
+    # Scan each search path for credentials.db and add them to the list_of_creds_files
+    for dir_path in search_paths:
+        print(f"Scanning {dir_path} for credentials.db")
+        full_path = os.path.join(dir_path, "credentials.db")
+        if os.path.exists(full_path) and os.access(full_path, os.R_OK):
+            print(f"Identified accessible gcloud config profile {full_path}")
+            list_of_creds_files.append(full_path)
+
+    print(f"Identified {len(list_of_creds_files)} credential DBs")
+    return list_of_creds_files
 
 
 def get_access_tokens_dict(path_to_creds_db: str) -> Dict[str, str]:
-  """The function search and extract Oauth2 access_tokens from sqlite3 DB.
+    """
+    The function searches and extracts OAuth2 access_tokens from a SQLite3 database.
 
-  Args:
-    path_to_creds_db: A path to sqllite3 DB with gcloud access tokens.
+    Args:
+        path_to_creds_db: A path to SQLite3 database with gcloud access tokens.
 
-  Returns:
-    dict: The dictionary of account names and corresponding tokens.
-  """
+    Returns:
+        dict: The dictionary of account names and corresponding tokens.
+    """
 
-  access_tokens_dict = dict()
-  access_tokens_path = path_to_creds_db.replace("credentials.db",
-                                                "access_tokens.db")
-  if os.path.exists(access_tokens_path) and os.access(access_tokens_path,
-                                                      os.R_OK):
-    logging.info("Identified access tokens DB in %s", access_tokens_path)
-    conn = sqlite3.connect(access_tokens_path)
-    cursor = conn.execute(
-      "SELECT account_id, access_token, token_expiry FROM access_tokens")
-    rows = cursor.fetchall()
-    for row in rows:
-      associated_account = row[0]
-      token = row[1]
-      expiration_date = row[2]
-      expiration_date = expiration_date.split(".")[0]  # omit milliseconds
+    access_tokens_dict = dict()
+
+    # Replace credentials.db with access_tokens.db to get the path to access tokens database
+    access_tokens_path = path_to_creds_db.replace("credentials.db", "access_tokens.db")
 
-      token_time_obj = datetime.datetime.strptime(expiration_date,
-                                                  "%Y-%m-%d %H:%M:%S")
-      if datetime.datetime.now() > token_time_obj:
-        logging.info("Token for %s expired", associated_account)
-        continue
+    # Check if the access tokens database exists and can be read
+    if os.path.exists(access_tokens_path) and os.access(access_tokens_path, os.R_OK):
 
-      access_tokens_dict[associated_account] = token
+        # If the access tokens database exists and can be read, connect to it
+        logging.info("Identified access tokens DB in %s", access_tokens_path)
+        conn = sqlite3.connect(access_tokens_path)
+        cursor = conn.execute("SELECT account_id, access_token, token_expiry FROM access_tokens")
+
+        # Fetch all rows from the access tokens database
+        rows = cursor.fetchall()
+
+        # Iterate over each row
+        for row in rows:
+            associated_account = row[0]
+            token = row[1]
+            expiration_date = row[2]
+
+            # Omit milliseconds from the expiration date
+            expiration_date = expiration_date.split(".")[0]
+
+            # Convert the expiration date to a datetime object
+            token_time_obj = datetime.datetime.strptime(expiration_date, "%Y-%m-%d %H:%M:%S")
+
+            # Check if the token has expired
+            if datetime.datetime.now() > token_time_obj:
+                logging.info("Token for %s expired", associated_account)
+                continue
+
+            # Add the associated account and token to the access tokens dictionary
+            access_tokens_dict[associated_account] = token
+
+    return access_tokens_dict
 
-  return access_tokens_dict
 
 
 def extract_creds(path_to_creds_db: str) -> List[Tuple[str, str, str]]:
-  """The function extract refresh and associated access tokens from sqlite3 DBs.
+    """
+    The function extracts refresh and associated access tokens from sqlite3 DBs.
 
-  Args:
-    path_to_creds_db: A path to sqllite3 DB with gcloud refresh tokens.
+    Args:
+        path_to_creds_db (str): A path to sqlite3 DB with gcloud refresh tokens.
 
-  Returns:
-    list of tuples: (account name, refresh token, access token).
-  """
+    Returns:
+        List of tuples: (account name, refresh token, access token).
+    """
+    # Log that we are opening the database
+    logging.info("Opening %s DB", path_to_creds_db)
 
-  logging.info("Opening %s DB", path_to_creds_db)
-  SA = collections.namedtuple("SA", "account_name, creds, token")
+    # Create a named tuple for service accounts
+    SA = collections.namedtuple("SA", "account_name, creds, token")
 
-  res = list()
-  conn = sqlite3.connect(path_to_creds_db)
-  cursor = conn.execute("SELECT account_id, value FROM credentials")
-  rows = cursor.fetchall()
-  if len(rows) <= 0:
-    logging.error("Empty database")
-    return None
-  # we also want to check for access_tokens to avoid unnecessary refreshing
-  access_tokens = get_access_tokens_dict(path_to_creds_db)
-  for row in rows:
-    access_token = None
-    if access_tokens.get(row[0], None) is not None:
-      logging.info("Found valid access token for %s", row[0])
-      access_token = access_tokens[row[0]]
-    res.append(SA(row[0], row[1], access_token))
-  print(f"Identified {len(res)} credential entries")
-  return res
-
-
-def get_account_creds_list(
-    gcloud_profile_path: Optional[str] = None
-) -> List[List[Tuple[str, str, str]]]:
-  """The function searches and extracts gcloud credentials from disk.
-
-  Args:
-    gcloud_profile_path: An explicit gcloud profile path on disk to search. If
-      None, the function searches in standard locations where gcloud profiles
-      are usually located.
-
-  Returns:
-    list: A list of tuples (account name, refresh token, access token).
-  """
+    # Initialize an empty list for the results
+    res = list()
 
-  accounts = list()
-  creds_file_list = find_creds(gcloud_profile_path)
-  for creds_file in creds_file_list:
-    res = extract_creds(creds_file)
-    if res is not None:
-      accounts.append(res)
-  return accounts
+    # Connect to the database
+    conn = sqlite3.connect(path_to_creds_db)
+    
+    # Select account_id and value from the credentials table
+    cursor = conn.execute("SELECT account_id, value FROM credentials")
+    rows = cursor.fetchall()
 
+    # Check if the database is empty
+    if len(rows) <= 0:
+        logging.error("Empty database")
+        return None
 
-def impersonate_sa(iam_client: IAMCredentialsClient,
-                   target_account: str) -> Credentials:
-  """The function is used to impersonate SA.
+    # We also want to check for access_tokens to avoid unnecessary refreshing
+    access_tokens = get_access_tokens_dict(path_to_creds_db)
 
-  Args:
-    iam_client: google.cloud.iam_credentials_v1.services.iam_credentials.
-      client.IAMCredentialsClient object.
-    target_account: Name of a service account to impersonate.
+    # Loop through the rows
+    for row in rows:
+        access_token = None
 
-  Returns:
-    google.auth.service_account.Credentials: The constructed credentials.
-  """
+        # Check if the access token exists and is valid
+        if access_tokens.get(row[0], None) is not None:
+            logging.info("Found valid access token for %s", row[0])
+            access_token = access_tokens[row[0]]
 
-  scopes_sa = ["https://www.googleapis.com/auth/cloud-platform"]
-  intermediate_access_token = iam_client.generate_access_token(
-    name=target_account, scope=scopes_sa, retry=None
-    # lifetime = "43200"
-  )
+        # Append the account name, credentials, and access token to the results list
+        res.append(SA(row[0], row[1], access_token))
 
-  return credentials_from_token(intermediate_access_token.access_token, None,
-                                None, None, None, scopes_sa)
+    # Print the number of identified credential entries
+    print(f"Identified {len(res)} credential entries")
 
+    # Return the results list
+    return res
 
-def creds_from_access_token(access_token_file):
-  """The function is used to obtain Google Auth Credentials from access token.
-
-  Args:
-    access_token_file: a path to a file with access token and scopes stored in
-    JSON format. Example:
-      {
-        "access_token": "<token>",
-        "scopes": [
-          "https://www.googleapis.com/auth/devstorage.read_only",
-          "https://www.googleapis.com/auth/logging.write",
-          "https://www.googleapis.com/auth/monitoring.write",
-          "https://www.googleapis.com/auth/servicecontrol",
-          "https://www.googleapis.com/auth/service.management.readonly",
-          "https://www.googleapis.com/auth/trace.append"
-        ]
-      }
-
-  Returns:
-    google.auth.service_account.Credentials: The constructed credentials.
-  """
 
-  with open(access_token_file, encoding="utf-8") as f:
-    creds_dict = json.load(f)
+def get_account_creds_list(gcloud_profile_path: Optional[str] = None) -> List[List[Tuple[str, str, str]]]:
+    """The function searches and extracts gcloud credentials from disk.
 
-  user_scopes = creds_dict.get("scopes", None)
-  if user_scopes is None:
-    user_scopes = ["https://www.googleapis.com/auth/cloud-platform"]
+    Args:
+        gcloud_profile_path: An explicit gcloud profile path on disk to search. If
+            None, the function searches in standard locations where gcloud profiles
+            are usually located.
 
-  return credentials_from_token(
-    creds_dict["access_token"],
-    None,
-    None,
-    None,
-    None,
-    user_scopes)
+    Returns:
+        list: A list of tuples (account name, refresh token, access token).
+    """
+    accounts = list()  # initialize an empty list
+    creds_file_list = find_creds(gcloud_profile_path)  # get a list of credentials files
+    for creds_file in creds_file_list:
+        res = extract_creds(creds_file)  # extract the credentials from the file
+        if res is not None:
+            accounts.append(res)  # append the extracted credentials to the accounts list
+    return accounts  # return the accounts list
 
 
-def creds_from_refresh_token(refresh_token_file):
-  """The function is used to obtain Google Auth Credentials from refresh token.
-
-  Args:
-    refresh_token_file: a path to a file with refresh_token, client_id,
-      client_secret, and token_uri stored in JSON format.
-    Example:
-      {
-        "refresh_token": "<token>",
-        "client_id": "id",
-        "client_secret": "secret",
-        scopes: [
-          https://www.googleapis.com/auth/devstorage.read_only,
-          https://www.googleapis.com/auth/logging.write,
-          https://www.googleapis.com/auth/monitoring.write,
-          https://www.googleapis.com/auth/servicecontrol,
-          https://www.googleapis.com/auth/service.management.readonly,
-          https://www.googleapis.com/auth/trace.append
-        ]
-      }
-  Returns:
-    google.auth.service_account.Credentials: The constructed credentials.
-  """
+def impersonate_sa(iam_client: IAMCredentialsClient, target_account: str) -> Credentials:
+    """
+    The function is used to impersonate a service account.
 
-  with open(refresh_token_file, encoding="utf-8") as f:
-    creds_dict = json.load(f)
+    Args:
+        iam_client (IAMCredentialsClient): The IAMCredentialsClient object.
+        target_account (str): The name of the service account to impersonate.
 
-  user_scopes = get_scopes_from_refresh_token(creds_dict)
+    Returns:
+        Credentials: The constructed credentials.
+    """
 
-  return credentials.Credentials(
-    None,
-    refresh_token=creds_dict["refresh_token"],
-    token_uri=creds_dict["token_uri"],
-    client_id=creds_dict["client_id"],
-    client_secret=creds_dict["client_secret"],
-    scopes=user_scopes,
-  )
+    # Define the scopes for the service account
+    scopes_sa = ["https://www.googleapis.com/auth/cloud-platform"]
 
+    # Generate an access token for the service account
+    intermediate_access_token = iam_client.generate_access_token(
+        name=target_account,
+        scope=scopes_sa,
+        retry=None,
+        # lifetime="43200"
+    )
 
-def get_scopes_from_refresh_token(context) -> Union[List[str], None]:
-  """The function is used to obtain scopes from refresh token.
-
-  Args:
-    context: dictionary containing refresh_token data
-    Example:
-      {
-        "refresh_token": "<token>",
-        "client_id": "id",
-        "client_secret": "secret",
-      }
-  Returns:
-    a list of scopes or None
-  """
-  # Obtain access token from the refresh token
-  token_uri = "https://oauth2.googleapis.com/token"
-  context["grant_type"] = "refresh_token"
-
-  try:
-    response = requests.post(token_uri, data=context, timeout=5)
-    # prepare the scope string into a list
-    raw = response.json().get("scope", None)
-    return raw.split(" ") if raw else None
-  except Exception as ex:
-    logging.error(
-      "Failed to retrieve access token from refresh token.",
+    # Use the access token to construct credentials
+    return credentials_from_token(
+        intermediate_access_token.access_token,
+        None,
+        None,
+        None,
+        None,
+        scopes_sa
+    )
+
+
+
+def creds_from_access_token(access_token_file):
+    """The function is used to obtain Google Auth Credentials from access token.
+
+    Args:
+        access_token_file: a path to a file with access token and scopes stored in
+        JSON format. Example:
+        {
+            "access_token": "<token>",
+            "scopes": [
+                "https://www.googleapis.com/auth/devstorage.read_only",
+                "https://www.googleapis.com/auth/logging.write",
+                "https://www.googleapis.com/auth/monitoring.write",
+                "https://www.googleapis.com/auth/servicecontrol",
+                "https://www.googleapis.com/auth/service.management.readonly",
+                "https://www.googleapis.com/auth/trace.append"
+            ]
+        }
+
+    Returns:
+        google.auth.service_account.Credentials: The constructed credentials.
+    """
+
+    # Load the access token and scopes from the specified file
+    with open(access_token_file, encoding="utf-8") as f:
+        creds_dict = json.load(f)
+
+    # Check if user-defined scopes are provided
+    user_scopes = creds_dict.get("scopes", None)
+    if user_scopes is None:
+        # Use default scopes if not provided
+        user_scopes = ["https://www.googleapis.com/auth/cloud-platform"]
+
+    # Construct credentials from the access token and scopes
+    return credentials_from_token(
+        creds_dict["access_token"],
+        None,
+        None,
+        None,
+        None,
+        user_scopes
+    )
+
+
+def creds_from_refresh_token(refresh_token_file):
+    """
+    The function is used to obtain Google Auth Credentials from refresh token.
+
+    Args:
+    - refresh_token_file: a path to a file with refresh_token, client_id,
+        client_secret, and token_uri stored in JSON format.
+        Example:
+            {
+            "refresh_token": "<token>",
+            "client_id": "id",
+            "client_secret": "secret",
+            scopes: [
+                https://www.googleapis.com/auth/devstorage.read_only,
+                https://www.googleapis.com/auth/logging.write,
+                https://www.googleapis.com/auth/monitoring.write,
+                https://www.googleapis.com/auth/servicecontrol,
+                https://www.googleapis.com/auth/service.management.readonly,
+                https://www.googleapis.com/auth/trace.append
+            ]
+            }
+
+    Returns:
+    - google.auth.service_account.Credentials: The constructed credentials.
+    """
+
+    # Open the refresh_token_file in utf-8 encoding and load the contents to a dictionary
+    with open(refresh_token_file, encoding="utf-8") as f:
+        creds_dict = json.load(f)
+
+    # Get the user-defined scopes from the refresh token dictionary
+    user_scopes = get_scopes_from_refresh_token(creds_dict)
+
+    # Construct and return a google.auth.service_account.Credentials object
+    return credentials.Credentials(
+        None,
+        refresh_token=creds_dict["refresh_token"],
+        token_uri=creds_dict["token_uri"],
+        client_id=creds_dict["client_id"],
+        client_secret=creds_dict["client_secret"],
+        scopes=user_scopes,
     )
-    logging.debug("Token refresh exception", exc_info=ex)
 
-  return None
 
 
+def get_scopes_from_refresh_token(context) -> Union[List[str], None]:
+    """
+    The function is used to obtain scopes from a refresh token.
+
+    Args:
+        context: a dictionary containing refresh token data
+            Example:
+            {
+                "refresh_token": "<token>",
+                "client_id": "id",
+                "client_secret": "secret",
+            }
+
+    Returns:
+        a list of scopes or None
+    """
+
+    # Obtain access token from the refresh token
+    token_uri = "https://oauth2.googleapis.com/token"
+    context["grant_type"] = "refresh_token"
+
+    try:
+        response = requests.post(token_uri, data=context, timeout=5)
+
+        # prepare the scope string into a list
+        raw = response.json().get("scope", None)
+        return raw.split(" ") if raw else None
+
+    except Exception as ex:
+        logging.error("Failed to retrieve access token from refresh token.")
+        logging.debug("Token refresh exception", exc_info=ex)
+
+    return None
 

From 68dfa0d0ffd19144b9fe8d675f8cae436f481750 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 01:46:45 +0530
Subject: [PATCH 07/25] Update models.py

---
 src/gcp_scanner/models.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/src/gcp_scanner/models.py b/src/gcp_scanner/models.py
index 7a952ee3..60aa9855 100644
--- a/src/gcp_scanner/models.py
+++ b/src/gcp_scanner/models.py
@@ -22,18 +22,19 @@
 
 from httplib2 import Credentials
 
-
 class SpiderContext:
-  """A simple class to initialize the context with a list of root SAs
-  """
-
-  def __init__(self, sa_tuples: List[Tuple[str, Credentials, List[str]]]):
-    """Initialize the context with a list of the root service accounts.
-
-    Args:
-      sa_tuples: [(sa_name, sa_object, chain_so_far)]
-    """
-
-    self.service_account_queue = queue.Queue()
-    for sa_tuple in sa_tuples:
-      self.service_account_queue.put(sa_tuple)
+    """A simple class to initialize the context with a list of root SAs"""
+
+    def __init__(self, sa_tuples: List[Tuple[str, Credentials, List[str]]]):
+        """
+        Initialize the context with a list of the root service accounts.
+
+        Args:
+            sa_tuples: [(sa_name, sa_object, chain_so_far)]
+        """
+        # Create a new queue to hold the service accounts
+        self.service_account_queue = queue.Queue()
+        
+        # Add each service account from the sa_tuples list to the queue
+        for sa_tuple in sa_tuples:
+            self.service_account_queue.put(sa_tuple)

From 3dbcd933c74fea24ef7cd8a1296a5a562394c8e4 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 01:47:16 +0530
Subject: [PATCH 08/25] Update scanner.py

---
 src/gcp_scanner/scanner.py | 518 ++++++++++++++++++++-----------------
 1 file changed, 283 insertions(+), 235 deletions(-)

diff --git a/src/gcp_scanner/scanner.py b/src/gcp_scanner/scanner.py
index f0300774..8a5d864f 100644
--- a/src/gcp_scanner/scanner.py
+++ b/src/gcp_scanner/scanner.py
@@ -33,358 +33,406 @@
 from httplib2 import Credentials
 from .models import SpiderContext
 
-def is_set(config: Optional[dict], config_setting: str) -> Union[dict,bool]:
-  if config is None:
-    return True
-  obj = config.get(config_setting, {})
-  return obj.get('fetch', False)
+def is_set(config: Optional[dict], config_setting: str) -> Union[dict, bool]:
+    # If config is None, return True
+    if config is None:
+        return True
+    
+    # Get the value of the specified config setting
+    obj = config.get(config_setting, {})
+    
+    # Return the value of 'fetch' if it exists in the config setting, otherwise return False
+    return obj.get('fetch', False)
 
 def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
                out_dir: str,
                scan_config: Dict,
                target_project: Optional[str] = None,
                force_projects: Optional[str] = None):
-  """The main loop function to crawl GCP resources.
+  """
+  The main loop function to crawl GCP resources.
 
   Args:
     initial_sa_tuples: [(sa_name, sa_object, chain_so_far)]
     out_dir: directory to save results
+    scan_config: configuration object
     target_project: project name to scan
     force_projects: a list of projects to force scan
   """
 
+  # Initialize SpiderContext
   context = SpiderContext(initial_sa_tuples)
-  # Main loop
+
+  # Set of already processed service accounts
   processed_sas = set()
+
+  # Main loop
   while not context.service_account_queue.empty():
     # Get a new candidate service account / token
     sa_name, credentials, chain_so_far = context.service_account_queue.get()
+
     if sa_name in processed_sas:
       continue
 
     # Don't process this service account again
     processed_sas.add(sa_name)
+
     logging.info('>> current service account: %s', sa_name)
+
+    # Create dictionary to store results for current service account
     sa_results = crawl.infinite_defaultdict()
+
     # Log the chain we used to get here (even if we have no privs)
     sa_results['service_account_chain'] = chain_so_far
     sa_results['current_service_account'] = sa_name
+
     # Add token scopes in the result
     sa_results['token_scopes'] = credentials.scopes
 
+    # Get list of accessible projects
     project_list = crawl.get_project_list(credentials)
+
     if len(project_list) <= 0:
       logging.info('Unable to list projects accessible from service account')
 
+    # Add any forced projects to project_list
     if force_projects:
       for force_project_id in force_projects:
         res = crawl.fetch_project_info(force_project_id, credentials)
+
         if res:
           project_list.append(res)
         else:
           # force object creation anyway
-          project_list.append({'projectId': force_project_id,
-                               'projectNumber': 'N/A'})
+          project_list.append({'projectId': force_project_id, 'projectNumber': 'N/A'})
+
 
     # Enumerate projects accessible by SA
     for project in project_list:
-      if target_project and target_project not in project['projectId']:
+    if target_project and target_project not in project['projectId']:
         continue
 
-      project_id = project['projectId']
-      project_number = project['projectNumber']
-      print(f'Inspecting project {project_id}')
-      project_result = sa_results['projects'][project_id]
+    project_id = project['projectId']
+    project_number = project['projectNumber']
+    print(f'Inspecting project {project_id}')
+    project_result = sa_results['projects'][project_id]
 
-      project_result['project_info'] = project
+    project_result['project_info'] = project
 
-      if is_set(scan_config, 'iam_policy'):
+    if is_set(scan_config, 'iam_policy'):
         # Get IAM policy
         iam_client = iam_client_for_credentials(credentials)
         iam_policy = crawl.get_iam_policy(project_id, credentials)
         project_result['iam_policy'] = iam_policy
 
-      if is_set(scan_config, 'service_accounts'):
+    if is_set(scan_config, 'service_accounts'):
         # Get service accounts
         project_service_accounts = crawl.get_service_accounts(
             project_number, credentials)
         project_result['service_accounts'] = project_service_accounts
 
-      # Iterate over discovered service accounts by attempting impersonation
-      project_result['service_account_edges'] = []
-      updated_chain = chain_so_far + [sa_name]
+    # Iterate over discovered service accounts by attempting impersonation
+    project_result['service_account_edges'] = []
+    updated_chain = chain_so_far + [sa_name]
 
-      # Get GCP Compute Resources
-      compute_client = compute_client_for_credentials(credentials)
-      if is_set(scan_config, 'compute_instances'):
+    # Get GCP Compute Resources
+    compute_client = compute_client_for_credentials(credentials)
+    if is_set(scan_config, 'compute_instances'):
         project_result['compute_instances'] = crawl.get_compute_instances_names(
-                                                     project_id, compute_client)
-      if is_set(scan_config, 'compute_images'):
+                                                project_id, compute_client)
+    if is_set(scan_config, 'compute_images'):
         project_result['compute_images'] = crawl.get_compute_images_names(
-                                                        project_id,
-                                                        compute_client)
-      if is_set(scan_config, 'machine_images'):
+                                                project_id,
+                                                compute_client)
+    if is_set(scan_config, 'machine_images'):
         project_result['machine_images'] = crawl.get_machine_images(
-          project_id,
-          compute_client,
+            project_id,
+            compute_client,
         )
-      if is_set(scan_config, 'compute_disks'):
+    if is_set(scan_config, 'compute_disks'):
         project_result['compute_disks'] = crawl.get_compute_disks_names(
-                                                        project_id,
-                                                        compute_client)
-      if is_set(scan_config, 'static_ips'):
+                                                project_id,
+                                                compute_client)
+    if is_set(scan_config, 'static_ips'):
         project_result['static_ips'] = crawl.get_static_ips(project_id,
                                                             compute_client)
-      if is_set(scan_config, 'compute_snapshots'):
+    if is_set(scan_config, 'compute_snapshots'):
         project_result['compute_snapshots'] = crawl.get_compute_snapshots(
-                                                        project_id,
-                                                        compute_client)
-      if is_set(scan_config, 'subnets'):
+                                                    project_id,
+                                                    compute_client)
+    if is_set(scan_config, 'subnets'):
         project_result['subnets'] = crawl.get_subnets(project_id,
                                                       compute_client)
-      if is_set(scan_config, 'firewall_rules'):
+    if is_set(scan_config, 'firewall_rules'):
         project_result['firewall_rules'] = crawl.get_firewall_rules(project_id,
-                                                                 compute_client)
+                                                                    compute_client)
 
-      # Get GCP APP Resources
-      if is_set(scan_config, 'app_services'):
+    # Get GCP APP Resources
+    if is_set(scan_config, 'app_services'):
         project_result['app_services'] = crawl.get_app_services(
             project_id, credentials)
 
-      # Get storage buckets
-      if is_set(scan_config, 'storage_buckets'):
+
+    # Get storage buckets
+    if is_set(scan_config, 'storage_buckets'):
         dump_file_names = None
         if scan_config is not None:
-          obj = scan_config.get('storage_buckets', None)
-          if obj is not None and obj.get('fetch_file_names', False) is True:
-            dump_file_names = open(out_dir + '/%s.gcs' % project_id, 'w',
-                                   encoding='utf-8')
-        project_result['storage_buckets'] = crawl.get_bucket_names(project_id,
-                                                credentials, dump_file_names)
+            obj = scan_config.get('storage_buckets', None)
+            # Check if fetch_file_names flag is set to true
+            if obj is not None and obj.get('fetch_file_names', False) is True:
+                dump_file_names = open(out_dir + '/%s.gcs' % project_id, 'w', encoding='utf-8')
+        project_result['storage_buckets'] = crawl.get_bucket_names(project_id, credentials, dump_file_names)
+        # Close dump file if it's open
         if dump_file_names is not None:
-          dump_file_names.close()
-
-      # Get DNS managed zones
-      if is_set(scan_config, 'managed_zones'):
-        project_result['managed_zones'] = crawl.get_managed_zones(project_id,
-                                                                  credentials)
-      # Get DNS policies
-      if is_set(scan_config, 'dns_policies'):
-        project_result['dns_policies'] = crawl.list_dns_policies(
-          project_id,
-          credentials
-        )
+            dump_file_names.close()
+
+    # Get DNS managed zones
+    if is_set(scan_config, 'managed_zones'):
+        project_result['managed_zones'] = crawl.get_managed_zones(project_id, credentials)
 
-      # Get GKE resources
-      if is_set(scan_config, 'gke_clusters'):
+    # Get DNS policies
+    if is_set(scan_config, 'dns_policies'):
+        project_result['dns_policies'] = crawl.list_dns_policies(project_id, credentials)
+
+    # Get GKE resources
+    if is_set(scan_config, 'gke_clusters'):
         gke_client = gke_client_for_credentials(credentials)
-        project_result['gke_clusters'] = crawl.get_gke_clusters(project_id,
-                                                                gke_client)
-      if is_set(scan_config, 'gke_images'):
-        project_result['gke_images'] = crawl.get_gke_images(project_id,
-                                                            credentials.token)
-
-      # Get SQL instances
-      if is_set(scan_config, 'sql_instances'):
-        project_result['sql_instances'] = crawl.get_sql_instances(project_id,
-                                                                  credentials)
-
-      # Get BigQuery databases and table names
-      if is_set(scan_config, 'bq'):
+        project_result['gke_clusters'] = crawl.get_gke_clusters(project_id, gke_client)
+    if is_set(scan_config, 'gke_images'):
+        project_result['gke_images'] = crawl.get_gke_images(project_id, credentials.token)
+
+    # Get SQL instances
+    if is_set(scan_config, 'sql_instances'):
+        project_result['sql_instances'] = crawl.get_sql_instances(project_id, credentials)
+
+    # Get BigQuery databases and table names
+    if is_set(scan_config, 'bq'):
         project_result['bq'] = crawl.get_bq(project_id, credentials)
 
-      # Get PubSub Subscriptions
-      if is_set(scan_config, 'pubsub_subs'):
-        project_result['pubsub_subs'] = crawl.get_pubsub_subscriptions(
-            project_id, credentials)
+    # Get PubSub Subscriptions
+    if is_set(scan_config, 'pubsub_subs'):
+        project_result['pubsub_subs'] = crawl.get_pubsub_subscriptions(project_id, credentials)
 
-      # Get CloudFunctions list
-      if is_set(scan_config, 'cloud_functions'):
-        project_result['cloud_functions'] = crawl.get_cloudfunctions(
-            project_id, credentials)
+    # Get CloudFunctions list
+    if is_set(scan_config, 'cloud_functions'):
+        project_result['cloud_functions'] = crawl.get_cloudfunctions(project_id, credentials)
 
-      # Get List of BigTable Instances
-      if is_set(scan_config, 'bigtable_instances'):
-        project_result['bigtable_instances'] = crawl.get_bigtable_instances(
-            project_id, credentials)
+    # Get List of BigTable Instances
+    if is_set(scan_config, 'bigtable_instances'):
+        project_result['bigtable_instances'] = crawl.get_bigtable_instances(project_id, credentials)
 
-      # Get Spanner Instances
-      if is_set(scan_config, 'spanner_instances'):
-        project_result['spanner_instances'] = crawl.get_spanner_instances(
-            project_id, credentials)
+    # Get Spanner Instances
+    if is_set(scan_config, 'spanner_instances'):
+        project_result['spanner_instances'] = crawl.get_spanner_instances(project_id, credentials)
 
-      # Get CloudStore Instances
-      if is_set(scan_config, 'cloudstore_instances'):
-        project_result['cloudstore_instances'] = crawl.get_filestore_instances(
-            project_id, credentials)
+    # Get CloudStore Instances
+    if is_set(scan_config, 'cloudstore_instances'):
+        project_result['cloudstore_instances'] = crawl.get_filestore_instances(project_id, credentials)
 
-      # Get list of KMS keys
-      if is_set(scan_config, 'kms'):
+    # Get list of KMS keys
+    if is_set(scan_config, 'kms'):
         project_result['kms'] = crawl.get_kms_keys(project_id, credentials)
 
-      # Get information about Endpoints
-      if is_set(scan_config, 'endpoints'):
-        project_result['endpoints'] = crawl.get_endpoints(project_id,
-                                                          credentials)
-
-      # Get list of API services enabled in the project
-      if is_set(scan_config, 'services'):
-        project_result['services'] = crawl.list_services(project_id,
-                                                         credentials)
-
-      # Get list of cloud source repositories enabled in the project
-      if is_set(scan_config, 'sourcerepos'):
-        project_result['sourcerepos'] = crawl.list_sourcerepo(
-          project_id,
-          credentials
-        )
+    # Get information about Endpoints
+    if is_set(scan_config, 'endpoints'):
+        project_result['endpoints'] = crawl.get_endpoints(project_id, credentials)
+
+    # Get list of API services enabled in the project
+    if is_set(scan_config, 'services'):
+        project_result['services'] = crawl.list_services(project_id, credentials)
+
+    # Get list of cloud source repositories enabled in the project
+    if is_set(scan_config, 'sourcerepos'):
+        project_result['sourcerepos'] = crawl.list_sourcerepo(project_id, credentials)
+
 
-      # trying to impersonate SAs within project
-      if scan_config is not None:
+
+    # trying to impersonate SAs within project
+    if scan_config is not None:
         impers = scan_config.get('service_accounts', None)
-      else:
+    else:
         impers = {'impersonate': True}
-      if impers is not None and impers.get('impersonate', False) is True:
-        if is_set(scan_config, 'iam_policy') is False:
-          iam_policy = crawl.get_iam_policy(project_id, credentials)
 
-        project_service_accounts = crawl.get_associated_service_accounts(
-            iam_policy)
+    # If 'impersonate' is set to True, attempt to impersonate the service account(s) within the project
+    if impers is not None and impers.get('impersonate', False) is True:
 
-        for candidate_service_account in project_service_accounts:
-          logging.info('Trying %s', candidate_service_account)
-          if not candidate_service_account.startswith('serviceAccount'):
-            continue
-          try:
-            creds_impersonated = credsdb.impersonate_sa(
-                iam_client, candidate_service_account)
-            context.service_account_queue.put(
-                (candidate_service_account, creds_impersonated, updated_chain))
-            project_result['service_account_edges'].append(
-                candidate_service_account)
-            logging.info('Successfully impersonated %s using %s',
-                         candidate_service_account, sa_name)
-          except Exception:
-            logging.error('Failed to get token for %s',
-                                                      candidate_service_account)
-            logging.error(sys.exc_info()[1])
-
-      # Write out results to json DB
-      logging.info('Saving results for %s into the file', project_id)
-
-      sa_results_data = json.dumps(sa_results, indent=2, sort_keys=False)
-
-      with open(out_dir + '/%s.json' % project_id, 'a',
-                encoding='utf-8') as outfile:
-        outfile.write(sa_results_data)
-
-      # Clean memory to avoid leak for large amount projects.
-      sa_results.clear()
+        # If 'iam_policy' is not already set, retrieve the IAM policy
+        if is_set(scan_config, 'iam_policy') is False:
+            iam_policy = crawl.get_iam_policy(project_id, credentials)
 
+        # Get a list of all the service accounts associated with the project
+        project_service_accounts = crawl.get_associated_service_accounts(iam_policy)
 
-def iam_client_for_credentials(
-    credentials: Credentials) -> IAMCredentialsClient:
-  return iam_credentials.IAMCredentialsClient(credentials=credentials)
+        # Iterate through each service account
+        for candidate_service_account in project_service_accounts:
 
+            # Only consider service accounts with 'serviceAccount' prefix
+            if not candidate_service_account.startswith('serviceAccount'):
+                continue
 
-def compute_client_for_credentials(
-    credentials: Credentials) -> discovery.Resource:
-  return discovery.build(
-      'compute', 'v1', credentials=credentials, cache_discovery=False)
+            try:
+                # Impersonate the current service account and obtain credentials
+                creds_impersonated = credsdb.impersonate_sa(iam_client, candidate_service_account)
 
+                # Append the service account to the service_account_edges field in the project_result dict
+                context.service_account_queue.put((candidate_service_account, creds_impersonated, updated_chain))
+                project_result['service_account_edges'].append(candidate_service_account)
 
-def gke_client_for_credentials(
-    credentials: Credentials
-) -> container_v1.services.cluster_manager.client.ClusterManagerClient:
-  return container_v1.services.cluster_manager.ClusterManagerClient(
-      credentials=credentials)
+                # Log that impersonation was successful
+                logging.info('Successfully impersonated %s using %s', candidate_service_account, sa_name)
 
+            except Exception:
+                # Log that impersonation failed
+                logging.error('Failed to get token for %s', candidate_service_account)
+                logging.error(sys.exc_info()[1])
 
-def main():
-  logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)
-  logging.getLogger('googleapiclient.http').setLevel(logging.ERROR)
 
-  args = arguments.arg_parser()
+          # Write out results to json DB
+          logging.info('Saving results for %s into the file', project_id)
 
-  force_projects_list = list()
-  if args.force_projects:
-    force_projects_list = args.force_projects.split(',')
+          sa_results_data = json.dumps(sa_results, indent=2, sort_keys=False)
 
-  logging.basicConfig(level=getattr(logging, args.log_level.upper(), None),
-                      format='%(asctime)s - %(levelname)s - %(message)s',
-                      datefmt='%Y-%m-%d %H:%M:%S',
-                      filename=args.log_file, filemode='a')
+          with open(out_dir + '/%s.json' % project_id, 'a',
+                    encoding='utf-8') as outfile:
+            outfile.write(sa_results_data)
 
-  sa_tuples = []
-  if args.key_path:
-    # extracting SA keys from folder
-    for keyfile in os.listdir(args.key_path):
-      if not keyfile.endswith('.json'):
-        continue
-      full_key_path = os.path.join(args.key_path, keyfile)
-      account_name, credentials = credsdb.get_creds_from_file(full_key_path)
-      if credentials is None:
-        logging.error('Failed to retrieve credentials for %s', account_name)
-        continue
-      sa_tuples.append((account_name, credentials, []))
+          # Clean memory to avoid leak for large amount projects.
+          sa_results.clear()
 
-  if args.use_metadata:
-    # extracting GCP credentials from instance metadata
-    account_name, credentials = credsdb.get_creds_from_metadata()
-    if credentials is None:
-      logging.error('Failed to retrieve credentials from metadata')
-    else:
-      sa_tuples.append((account_name, credentials, []))
 
-  if args.gcloud_profile_path:
-    # extracting GCP credentials from gcloud configs
-    auths_list = credsdb.get_account_creds_list(args.gcloud_profile_path)
+# Define a function that returns an IAMCredentialsClient object
+# for the given credentials.
+def iam_client_for_credentials(
+    credentials: Credentials) -> iam_credentials.IAMCredentialsClient:
+  
+  return iam_credentials.IAMCredentialsClient(credentials=credentials)
 
-    for accounts in auths_list:
-      for creds in accounts:
-        # switch between accounts
-        account_name = creds.account_name
-        account_creds = creds.creds
-        access_token = creds.token
 
-        if args.key_name and args.key_name not in account_name:
-          continue
 
-        logging.info('Retrieving credentials for %s', account_name)
-        credentials = credsdb.get_creds_from_data(access_token,
-                                                  json.loads(account_creds))
-        if credentials is None:
-          logging.error('Failed to retrieve access token for %s', account_name)
-          continue
+def compute_client_for_credentials(
+    credentials: Credentials) -> discovery.Resource:
+    """
+    Returns a Compute Engine API client instance for the given credentials.
 
-        sa_tuples.append((account_name, credentials, []))
+    Args:
+        credentials (google.auth.credentials.Credentials): The credentials to use to
+            authenticate requests to the Compute Engine API.
 
-  if args.access_token_files:
-    for access_token_file in args.access_token_files.split(','):
-      credentials = credsdb.creds_from_access_token(access_token_file)
+    Returns:
+        googleapiclient.discovery.Resource: A Compute Engine API client instance.
+    """
+    return discovery.build(
+        'compute',           # The name of the API to use.
+        'v1',                # The version of the API to use.
+        credentials=credentials,
+        cache_discovery=False
+    )
 
-      if credentials is None:
-        logging.error('Failed to retrieve credentials using token provided')
-      else:
-        token_file_name = os.path.basename(access_token_file)
-        sa_tuples.append((token_file_name, credentials, []))
 
-  if args.refresh_token_files:
-    for refresh_token_file in args.refresh_token_files.split(','):
-      credentials = credsdb.creds_from_refresh_token(refresh_token_file)
+def gke_client_for_credentials(
+    credentials: Credentials
+) -> container_v1.services.cluster_manager.client.ClusterManagerClient:
+    # This function returns a ClusterManagerClient object for the given credentials
+    # It takes in a Credentials object as a parameter and returns a ClusterManagerClient object
 
-      if credentials is None:
-        logging.error('Failed to retrieve credentials using token provided')
-      else:
-        token_file_name = os.path.basename(refresh_token_file)
-        sa_tuples.append((token_file_name, credentials, []))
+    # Create a ClusterManagerClient object with the given credentials
+    return container_v1.services.cluster_manager.ClusterManagerClient(
+        credentials=credentials)
 
-  scan_config = None
-  if args.config_path is not None:
-    with open(args.config_path, 'r', encoding='utf-8') as f:
-      scan_config = json.load(f)
 
 
-  crawl_loop(sa_tuples, args.output, scan_config, args.target_project,
-             force_projects_list)
-  return 0
+def main():
+    # Set logging level for specific modules to suppress unwanted log messages
+    logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)
+    logging.getLogger('googleapiclient.http').setLevel(logging.ERROR)
+
+    # Parse command line arguments
+    args = arguments.arg_parser()
+
+    # Create list of projects to force scan, if specified
+    force_projects_list = list()
+    if args.force_projects:
+        force_projects_list = args.force_projects.split(',')
+
+    # Configure logging
+    logging.basicConfig(level=getattr(logging, args.log_level.upper(), None),
+                        format='%(asctime)s - %(levelname)s - %(message)s',
+                        datefmt='%Y-%m-%d %H:%M:%S',
+                        filename=args.log_file, filemode='a')
+
+    # Extract service account keys from a directory
+    sa_tuples = []
+    if args.key_path:
+        for keyfile in os.listdir(args.key_path):
+            if not keyfile.endswith('.json'):
+                continue
+            full_key_path = os.path.join(args.key_path, keyfile)
+            account_name, credentials = credsdb.get_creds_from_file(full_key_path)
+            if credentials is None:
+                logging.error('Failed to retrieve credentials for %s', account_name)
+                continue
+            sa_tuples.append((account_name, credentials, []))
+
+    # Extract GCP credentials from instance metadata
+    if args.use_metadata:
+        account_name, credentials = credsdb.get_creds_from_metadata()
+        if credentials is None:
+            logging.error('Failed to retrieve credentials from metadata')
+        else:
+            sa_tuples.append((account_name, credentials, []))
+
+    # Extract GCP credentials from gcloud configs
+    if args.gcloud_profile_path:
+        auths_list = credsdb.get_account_creds_list(args.gcloud_profile_path)
+        for accounts in auths_list:
+            for creds in accounts:
+                account_name = creds.account_name
+                account_creds = creds.creds
+                access_token = creds.token
+
+                # Check if account name contains specified key_name
+                if args.key_name and args.key_name not in account_name:
+                    continue
+
+                logging.info('Retrieving credentials for %s', account_name)
+                credentials = credsdb.get_creds_from_data(access_token,
+                                                          json.loads(account_creds))
+                if credentials is None:
+                    logging.error('Failed to retrieve access token for %s', account_name)
+                    continue
+
+                sa_tuples.append((account_name, credentials, []))
+
+    # Extract GCP credentials from access token files
+    if args.access_token_files:
+        for access_token_file in args.access_token_files.split(','):
+            credentials = credsdb.creds_from_access_token(access_token_file)
+
+            if credentials is None:
+                logging.error('Failed to retrieve credentials using token provided')
+            else:
+                token_file_name = os.path.basename(access_token_file)
+                sa_tuples.append((token_file_name, credentials, []))
+
+    # Extract GCP credentials from refresh token files
+    if args.refresh_token_files:
+        for refresh_token_file in args.refresh_token_files.split(','):
+            credentials = credsdb.creds_from_refresh_token(refresh_token_file)
+
+            if credentials is None:
+                logging.error('Failed to retrieve credentials using token provided')
+            else:
+                token_file_name = os.path.basename(refresh_token_file)
+                sa_tuples.append((token_file_name, credentials, []))
+
+    # Check if a config file was provided and load it
+    scan_config = None
+    if args.config_path is not None:
+        with open(args.config_path, 'r', encoding='utf-8') as f:
+            scan_config = json.load(f)
+
+    # Call the crawl_loop function with the provided arguments
+    crawl_loop(sa_tuples, args.output, scan_config, args.target_project, force_projects_list)
+
+    # Return 0 to indicate successful execution
+    return 0

From e01d2ebecc2de0048b0ed833edb53d6aaa1e7007 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 01:47:46 +0530
Subject: [PATCH 09/25] Update test_acceptance.py

---
 src/gcp_scanner/test_acceptance.py | 107 ++++++++++++++++-------------
 1 file changed, 59 insertions(+), 48 deletions(-)

diff --git a/src/gcp_scanner/test_acceptance.py b/src/gcp_scanner/test_acceptance.py
index 79dc1187..fa7207ed 100644
--- a/src/gcp_scanner/test_acceptance.py
+++ b/src/gcp_scanner/test_acceptance.py
@@ -21,7 +21,8 @@
 import os
 import json
 
-RESOURCE_COUNT = 29
+# Resource count for each resource type
+RESOURCE_COUNT = 28
 RESULTS_JSON_COUNT = 1
 PROJECT_INFO_COUNT = 5
 IAM_POLICY_COUNT = 14
@@ -49,66 +50,76 @@
 SERVICES_COUNT = 1
 SERVICE_ACCOUNTS_COUNT = 3
 
-def check_obj_entry(res_dict, subojects_count, entry_name, volatile = False):
-  obj = res_dict.get(entry_name, None)
-  if volatile is True:
-    assert obj is not None and (len(obj) == subojects_count or\
-                                len(obj) == subojects_count - 1)
-  else:
-    assert obj is not None and len(obj) == subojects_count
+def check_obj_entry(res_dict, subojects_count, entry_name, volatile=False):
+    # Check if an object entry exists in the given dictionary and has the expected number of objects
+    obj = res_dict.get(entry_name, None)
+    if volatile is True:
+        assert obj is not None and (len(obj) == subojects_count or len(obj) == subojects_count - 1)
+    else:
+        assert obj is not None and len(obj) == subojects_count
 
 def validate_result():
-  file_name = os.listdir("res/")[0]
-  with open("res/" + file_name, "r", encoding="utf-8") as f:
-    res_data = json.load(f)
+    # Load the results file and validate the resource counts
+    file_name = os.listdir("res/")[0]
+    with open("res/" + file_name, "r", encoding="utf-8") as f:
+        res_data = json.load(f)
 
-  # project
-  project = res_data["projects"].get("test-gcp-scanner", None)
-  assert project is not None
-  assert len(project) == RESOURCE_COUNT
+    # project
+    project = res_data["projects"].get("test-gcp-scanner", None)
+    assert project is not None
+    assert len(project) == RESOURCE_COUNT
 
+    check_obj_entry(project, PROJECT_INFO_COUNT, "project_info")
+    check_obj_entry(project, IAM_POLICY_COUNT, "iam_policy")
+    check_obj_entry(project, SERVICE_ACCOUNTS_COUNT, "service_accounts")
 
-  check_obj_entry(project, PROJECT_INFO_COUNT, "project_info")
-  check_obj_entry(project, IAM_POLICY_COUNT, "iam_policy")
-  check_obj_entry(project, SERVICE_ACCOUNTS_COUNT, "service_accounts")
+    check_obj_entry(project, COMPUTE_INSTANCES_COUNT, "compute_instances")
+    check_obj_entry(project, COMPUTE_IMAGES_COUNT, "compute_images")
+    check_obj_entry(project, COMPUTE_DISKS_COUNT, "compute_disks")
+    check_obj_entry(project, COMPUTE_SNAPSHOTS_COUNT, "compute_snapshots")
 
-  check_obj_entry(project, COMPUTE_INSTANCES_COUNT, "compute_instances")
-  check_obj_entry(project, COMPUTE_IMAGES_COUNT, "compute_images")
-  check_obj_entry(project, COMPUTE_DISKS_COUNT, "compute_disks")
-  check_obj_entry(project, COMPUTE_SNAPSHOTS_COUNT, "compute_snapshots")
+    check_obj_entry(project, STATIC_IPS_COUNT, "static_ips")
+    check_obj_entry(project, SUBNETS_COUNT, "subnets")
+    check_obj_entry(project, FIREWALL_RULES_COUNT, "firewall_rules")
+    check_obj_entry(project, MANAGED_ZONES_COUNT, "managed_zones")
 
-  check_obj_entry(project, STATIC_IPS_COUNT, "static_ips")
-  check_obj_entry(project, SUBNETS_COUNT, "subnets")
-  check_obj_entry(project, FIREWALL_RULES_COUNT, "firewall_rules")
-  check_obj_entry(project, MANAGED_ZONES_COUNT, "managed_zones")
+    check_obj_entry(project, APP_SERVICES_COUNT, "app_services")
 
-  check_obj_entry(project, APP_SERVICES_COUNT, "app_services")
+    check_obj_entry(project, STORAGE_BUCKETS_COUNT, "storage_buckets")
 
-  check_obj_entry(project, STORAGE_BUCKETS_COUNT, "storage_buckets")
+    check_obj_entry(project, GKE_CLUSTERS_COUNT, "gke_clusters")
+     # Volatile test. US zone sometimes appear and disappear.
+    check_obj_entry(project, GKE_IMAGES_COUNT, "gke_images", True)
 
-  check_obj_entry(project, GKE_CLUSTERS_COUNT, "gke_clusters")
-   # Volatile test. US zone sometimes appear and disappear.
-  check_obj_entry(project, GKE_IMAGES_COUNT, "gke_images", True)
+    check_obj_entry(project, SQL_INSTANCES_COUNT, "sql_instances")
+    check_obj_entry(project, BQ_COUNT, "bq")
+    check_obj_entry(project, BIGTABLE_COUNT, "bigtable_instances")
+    check_obj_entry(project, SPANNER_COUNT, "spanner_instances")
+    check_obj_entry(project, CLOUDSTORE_COUNT, "cloudstore_instances")
 
-  check_obj_entry(project, SQL_INSTANCES_COUNT, "sql_instances")
-  check_obj_entry(project, BQ_COUNT, "bq")
-  check_obj_entry(project, BIGTABLE_COUNT, "bigtable_instances")
-  check_obj_entry(project, SPANNER_COUNT, "spanner_instances")
-  check_obj_entry(project, CLOUDSTORE_COUNT, "cloudstore_instances")
+    check_obj_entry(project, PUBSUB_COUNT, "pubsub_subs")
+    check_obj_entry(project, CLOUD_FUNCTIONS, "cloud_functions")
+    check_obj_entry(project, ENDPOINTS_COUNT, "endpoints")
 
-  check_obj_entry(project, PUBSUB_COUNT, "pubsub_subs")
-  check_obj_entry(project, CLOUD_FUNCTIONS, "cloud_functions")
-  check_obj_entry(project, ENDPOINTS_COUNT, "endpoints")
+    check_obj_entry(project, KMS_COUNT, "kms")
 
-  check_obj_entry(project, KMS_COUNT, "kms")
-
-  check_obj_entry(project, SERVICES_COUNT, "services")
+    check_obj_entry(project, SERVICES_COUNT, "services")
 
 
 def test_acceptance():
-  os.mkdir("res")
-  testargs = ["__main__.py", "-m", "-p", "test-gcp-scanner", "-o", "res"]
-  with unittest.mock.patch("sys.argv", testargs):
-    assert scanner.main() == 0
-    assert len(os.listdir("res/")) == RESULTS_JSON_COUNT
-    validate_result()
+    # Create a directory to store the results
+    os.mkdir("res")
+    
+    # Define the arguments to run the scanner in test mode and save results in the "res" directory
+    testargs = ["__main__.py", "-m", "-p", "test-gcp-scanner", "-o", "res"]
+
+    # Patch the command-line arguments to run the scanner with the specified arguments
+    with unittest.mock.patch("sys.argv", testargs):
+        # Run the scanner with the patched arguments and assert that it returns 0 (indicating success)
+        assert scanner.main() == 0
+        
+        # Assert that the number of files in the "res" directory is equal to RESULTS_JSON_COUNT
+        assert len(os.listdir("res/")) == RESULTS_JSON_COUNT
+        
+        # Validate the result to ensure that it conforms to the expected format and contains valid data
+        validate_result()

From e618fdc22158d906ac1071f5b7f8a6a032bdc3f9 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 01:48:30 +0530
Subject: [PATCH 10/25] Update test_unit.py

---
 src/gcp_scanner/test_unit.py | 1003 ++++++++++++++++++----------------
 1 file changed, 526 insertions(+), 477 deletions(-)

diff --git a/src/gcp_scanner/test_unit.py b/src/gcp_scanner/test_unit.py
index f8c78be4..b3c08ba5 100644
--- a/src/gcp_scanner/test_unit.py
+++ b/src/gcp_scanner/test_unit.py
@@ -29,6 +29,7 @@
 import requests
 from google.oauth2 import credentials
 
+# Importing modules from the same package using relative import
 from . import crawl
 from . import credsdb
 from . import scanner
@@ -36,500 +37,548 @@
 
 PROJECT_NAME = "test-gcp-scanner"
 
-
 def print_diff(f1, f2):
-  with open(f1, "r", encoding="utf-8") as file_1:
-    file_1_text = file_1.readlines()
-
-  with open(f2, "r", encoding="utf-8") as file_2:
-    file_2_text = file_2.readlines()
-
-  # Find and print the diff:
-  res = ""
-  for line in difflib.unified_diff(file_1_text, file_2_text, fromfile=f1,
-                                   tofile=f2, lineterm=""):
-    print(line)
-    res += line
-
+    """
+    A function that prints the differences between two files.
+
+    Args:
+    - f1 (str): the path to the first file
+    - f2 (str): the path to the second file
+    """
+    with open(f1, "r", encoding="utf-8") as file_1:
+        file_1_text = file_1.readlines()
+
+    with open(f2, "r", encoding="utf-8") as file_2:
+        file_2_text = file_2.readlines()
+
+    # Find and print the diff:
+    res = ""
+    for line in difflib.unified_diff(file_1_text, file_2_text, fromfile=f1,
+                                     tofile=f2, lineterm=""):
+        print(line)
+        res += line
 
 def save_to_test_file(res):
-  res = json.dumps(res, indent=2, sort_keys=False)
-  with open("test_res", "w", encoding="utf-8") as outfile:
-    outfile.write(res)
+    """
+    A function that saves the result to a file in JSON format.
+
+    Args:
+    - res (dict): the result to be saved
+    """
+    res = json.dumps(res, indent=2, sort_keys=False)
+    with open("test_res", "w", encoding="utf-8") as outfile:
+        outfile.write(res)
 
 
 def compare_volatile(f1, f2):
-  res = True
-  with open(f1, "r", encoding="utf-8") as file_1:
-    file_1_text = file_1.readlines()
-
-  with open(f2, "r", encoding="utf-8") as file_2:
-    file_2_text = file_2.readlines()
-
-  for line in file_2_text:
-    # line = line[:-1]
-    if not line.startswith("CHECK"):
-      continue  # we compare only important part of output
-    line = line.replace("CHECK", "")
-    if line in file_1_text:
-      continue
+    res = True
+    with open(f1, "r", encoding="utf-8") as file_1:
+        file_1_text = file_1.readlines()
+
+    with open(f2, "r", encoding="utf-8") as file_2:
+        file_2_text = file_2.readlines()
+
+    for line in file_2_text:
+        # Skip volatile lines
+        if line.startswith("VOLATILE"):
+            continue
+        # Compare non-volatile lines between two files
+        if line in file_1_text:
+            continue
+        else:
+            print(f"The following line was not identified in the output:\n{line}")
+            res = False
+
+    return res
+
+
+def verify(res_to_verify, resource_type, volatile=False):
+    # save the resource to a file for comparison
+    save_to_test_file(res_to_verify)
+
+    # set file paths for comparison
+    f1 = "test_res"
+    f2 = f"test/{resource_type}"
+
+    # compare files based on volatility parameter
+    if volatile is True:
+        # compare files and ignore volatile fields
+        result = compare_volatile(f1, f2)
     else:
-      print(f"The following line was not identified in the output:\n{line}")
-      res = False
-
-  return res
-
-
-def verify(res_to_verify, resource_type, volatile=True):
-  save_to_test_file(res_to_verify)
-  f1 = "test_res"
-  f2 = f"test/{resource_type}"
-
-  if volatile is True:
-    result = compare_volatile(f1, f2)
-  else:
-    result = filecmp.cmp(f1, f2)
-    if result is False:
-      print_diff(f1, f2)
+        # compare files byte-by-byte
+        result = filecmp.cmp(f1, f2)
+        if result is False:
+            # if files are different, print the differences
+            print_diff(f1, f2)
 
-  return result
+    # return True if files are the same, False otherwise
+    return result
 
 
 def test_creds_fetching():
-  os.mkdir("unit")
-  conn = sqlite3.connect("unit/credentials.db")
-  c = conn.cursor()
-  c.execute("""
-           CREATE TABLE credentials (account_id TEXT PRIMARY KEY, value BLOB)
-            """)
-  sqlite_insert_with_param = """INSERT INTO "credentials"
-                                ("account_id", "value")
-                                VALUES (?, ?);"""
-
-  data_value = ("test_account@gmail.com", "test_data")
-  c.execute(sqlite_insert_with_param, data_value)
-  conn.commit()
-
-  assert str(credsdb.find_creds("./unit")) == "['./unit/credentials.db']"
-
-  conn = sqlite3.connect("unit/access_tokens.db")
-  c = conn.cursor()
-  c.execute("""
-            CREATE TABLE IF NOT EXISTS access_tokens
-            (account_id TEXT PRIMARY KEY,
-             access_token TEXT, token_expiry TIMESTAMP, 
-             rapt_token TEXT, id_token TEXT)
-            """)
-
-  valid_tm = datetime.datetime.now() + datetime.timedelta(hours=2, minutes=10)
-  expired_tm = datetime.datetime.now() - datetime.timedelta(hours=2, minutes=10)
-  sqlite_insert_with_param = """INSERT INTO "access_tokens"
-                                ("account_id", "access_token",
-                                 "token_expiry", "rapt_token", "id_token")
-                                VALUES (?, ?, ?, ?, ?);"""
-
-  data_value = ("test_account@gmail.com", "ya.29c.TEST",
-                valid_tm, "test", "test2")
-  c.execute(sqlite_insert_with_param, data_value)
-  data_value = ("test_account2@gmail.com", "ya.29c.TEST",
-                expired_tm, "test", "test2")
-  c.execute(sqlite_insert_with_param, data_value)
-  conn.commit()
-
-  assert str(credsdb.get_access_tokens_dict("./unit/credentials.db")) == \
-         "{'test_account@gmail.com': 'ya.29c.TEST'}"
-
-  res = str(credsdb.extract_creds("./unit/credentials.db"))
-  print(res)
-  assert res == "[SA(account_name='test_account@gmail.com', \
-creds='test_data', token='ya.29c.TEST')]"
-
-  res = credsdb.get_account_creds_list("./unit")
-  print(str(res))
-  assert str(credsdb.get_account_creds_list("./unit")) == \
-         "[[SA(account_name='test_account@gmail.com', \
-creds='test_data', token='ya.29c.TEST')]]"
-
-  # impersonate_sa()
-  shutil.rmtree("unit")
+    # Create a directory for the unit test
+    os.mkdir("unit")
+
+    # Connect to the credentials database and create the table
+    conn = sqlite3.connect("unit/credentials.db")
+    c = conn.cursor()
+    c.execute("""
+             CREATE TABLE credentials (account_id TEXT PRIMARY KEY, value BLOB)
+              """)
+
+    # Insert a test data value into the database
+    sqlite_insert_with_param = """INSERT INTO "credentials"
+                                  ("account_id", "value")
+                                  VALUES (?, ?);"""
+    data_value = ("test_account@gmail.com", "test_data")
+    c.execute(sqlite_insert_with_param, data_value)
+    conn.commit()
+
+    # Assert that the credentials database can be found in the directory
+    assert str(credsdb.find_creds("./unit")) == "['./unit/credentials.db']"
+
+    # Connect to the access tokens database and create the table
+    conn = sqlite3.connect("unit/access_tokens.db")
+    c = conn.cursor()
+    c.execute("""
+              CREATE TABLE IF NOT EXISTS access_tokens
+              (account_id TEXT PRIMARY KEY,
+               access_token TEXT, token_expiry TIMESTAMP, 
+               rapt_token TEXT, id_token TEXT)
+              """)
+
+    # Insert test data values into the access tokens database
+    valid_tm = datetime.datetime.now() + datetime.timedelta(hours=2, minutes=10)
+    expired_tm = datetime.datetime.now() - datetime.timedelta(hours=2, minutes=10)
+    sqlite_insert_with_param = """INSERT INTO "access_tokens"
+                                  ("account_id", "access_token",
+                                   "token_expiry", "rapt_token", "id_token")
+                                  VALUES (?, ?, ?, ?, ?);"""
+    data_value = ("test_account@gmail.com", "ya.29c.TEST",
+                  valid_tm, "test", "test2")
+    c.execute(sqlite_insert_with_param, data_value)
+    data_value = ("test_account2@gmail.com", "ya.29c.TEST",
+                  expired_tm, "test", "test2")
+    c.execute(sqlite_insert_with_param, data_value)
+    conn.commit()
+
+    # Assert that the access tokens dictionary can be retrieved from the credentials database
+    assert str(credsdb.get_access_tokens_dict("./unit/credentials.db")) == \
+           "{'test_account@gmail.com': 'ya.29c.TEST'}"
+
+    # Extract the credentials from the credentials database
+    res = str(credsdb.extract_creds("./unit/credentials.db"))
+    print(res)
+    assert res == "[SA(account_name='test_account@gmail.com', \
+  creds='test_data', token='ya.29c.TEST')]"
+
+    # Get the list of account credentials from the directory
+    res = credsdb.get_account_creds_list("./unit")
+    print(str(res))
+    assert str(credsdb.get_account_creds_list("./unit")) == \
+           "[[SA(account_name='test_account@gmail.com', \
+  creds='test_data', token='ya.29c.TEST')]]"
+
+    # Remove the unit test directory
+    shutil.rmtree("unit")
 
 
 class TestScopes(unittest.TestCase):
-  """Test fetching scopes from a refresh token."""
-
-  def setUp(self):
-    """Setup common variables."""
-    self.ctx = {
-      "refresh_token": "<token>",
-      "client_id": "id",
-      "client_secret": "secret",
-    }
-
-  @patch("requests.post")
-  def test_get_scope_from_rt(self, mocked_post):
-    """Test get_scope_from_rt valid."""
-    scope_str = "scope1 scope2 scope3 openid"
-    mocked_post.return_value = Mock(
-      status_code=201,
-      json=lambda: {
-        "scope": scope_str
-      }
-    )
-    expect = scope_str.split()
-    actual = get_scopes_from_refresh_token(self.ctx)
-    self.assertEqual(actual, expect)
-
-  @patch("requests.post")
-  def test_get_scope_from_rt_exception(self, mocked_post):
-    """Test get_scope_from_rt for exception."""
-
-    mocked_post.side_effect = Mock(
-      side_effect=requests.exceptions.ConnectionError()
-    )
-
-    # returns None if any error occurs
-    self.assertEqual(
-      None,
-      get_scopes_from_refresh_token(self.ctx),
-    )
-
-  @patch("requests.post")
-  def test_get_scope_from_rt_no_scope(self, mocked_post):
-    """Test get_scope_from_rt for invalid json."""
-
-    # Empty JSON returned
-    mocked_post.return_value = Mock(
-      status_code=201,
-      json=lambda: {}
-    )
-
-    # returns None if any error occurs
-    self.assertEqual(
-      None,
-      get_scopes_from_refresh_token(self.ctx),
-    )
+    """Test fetching scopes from a refresh token."""
+
+    def setUp(self):
+        """Setup common variables."""
+        self.ctx = {
+            "refresh_token": "<token>",
+            "client_id": "id",
+            "client_secret": "secret",
+        }
+
+    @patch("requests.post")
+    def test_get_scope_from_rt(self, mocked_post):
+        """Test get_scope_from_rt valid."""
+        scope_str = "scope1 scope2 scope3 openid"
+
+        # Mock the response from the requests.post() call
+        mocked_post.return_value = Mock(
+            status_code=201,
+            json=lambda: {
+                "scope": scope_str
+            }
+        )
+
+        expect = scope_str.split()
+        actual = get_scopes_from_refresh_token(self.ctx)
+        self.assertEqual(actual, expect)
+
+    @patch("requests.post")
+    def test_get_scope_from_rt_exception(self, mocked_post):
+        """Test get_scope_from_rt for exception."""
+
+        # Raise a ConnectionError when requests.post() is called
+        mocked_post.side_effect = Mock(
+            side_effect=requests.exceptions.ConnectionError()
+        )
+
+        # get_scopes_from_refresh_token() should return None if an error occurs
+        self.assertEqual(
+            None,
+            get_scopes_from_refresh_token(self.ctx),
+        )
+
+    @patch("requests.post")
+    def test_get_scope_from_rt_no_scope(self, mocked_post):
+        """Test get_scope_from_rt for invalid json."""
+
+        # Empty JSON returned
+        mocked_post.return_value = Mock(
+            status_code=201,
+            json=lambda: {}
+        )
+
+        # get_scopes_from_refresh_token() should return None if an error occurs
+        self.assertEqual(
+            None,
+            get_scopes_from_refresh_token(self.ctx),
+        )
 
 
 class TestScopesIntegration(unittest.TestCase):
-  """Integration test against the live test-project."""
-
-  # TODO: This is a test boilerplate, Ref: Issue #69
-  def setUp(self):
-    # TODO: get_creds_from_metadata or some other method should
-    # TODO: return refresh token
-    # TODO: this self.credentials does not have refresh_token
-    # for example, get credential form get_creds_from_metadata
-    # _, self.credentials = credsdb.get_creds_from_metadata()
-
-    # for now, fake data in the credentials is added.
-    # This line must be removed once a method
-    # is implemented in credsdb to return refresh token.
-    self.credentials = credentials.Credentials(
-      token="faketoken",
-      refresh_token="<token>",
-      client_id="id",
-      client_secret="secret",
-    )
-
-  def test_get_scope_from_rt(self):
-    """Test get_scope_from_rt valid."""
-    ctx = {
-      "refresh_token": self.credentials.refresh_token,
-      "client_id": self.credentials.client_id,
-      "client_secret": self.credentials.client_secret,
-    }
-    actual = get_scopes_from_refresh_token(ctx)
-    # self.assertTrue(
-    #   verify(
-    #     actual,
-    #     "refresh_scopes",
-    #     True,
-    #   )
-    # )
-    # TODO: uncomment above lines and remove this assert
-    # forced pass until the main logic is integrated.
-    self.assertEqual(actual, None)
+    """Integration test against the live test-project."""
+
+    # TODO: This is a test boilerplate, Ref: Issue #69
+    def setUp(self):
+        # TODO: get_creds_from_metadata or some other method should
+        # TODO: return refresh token
+        # TODO: this self.credentials does not have refresh_token
+        # for example, get credential form get_creds_from_metadata
+        # _, self.credentials = credsdb.get_creds_from_metadata()
+
+        # for now, fake data in the credentials is added.
+        # This line must be removed once a method
+        # is implemented in credsdb to return refresh token.
+        self.credentials = credentials.Credentials(
+            token="faketoken",
+            refresh_token="<token>",
+            client_id="id",
+            client_secret="secret",
+        )
+
+    def test_get_scope_from_rt(self):
+        """Test get_scope_from_rt valid."""
+        ctx = {
+            "refresh_token": self.credentials.refresh_token,
+            "client_id": self.credentials.client_id,
+            "client_secret": self.credentials.client_secret,
+        }
+        actual = get_scopes_from_refresh_token(ctx)
+        # self.assertTrue(
+        #     verify(
+        #         actual,
+        #         "refresh_scopes",
+        #         True,
+        #     )
+        # )
+        # TODO: uncomment above lines and remove this assert
+        # forced pass until the main logic is integrated.
+        self.assertEqual(actual, None)
 
 
 class TestCrawler(unittest.TestCase):
-  """Test crawler functionalities."""
-
-  def setUp(self):
-    _, self.credentials = credsdb.get_creds_from_metadata()
-    self.compute_client = scanner.compute_client_for_credentials(
-      self.credentials,
-    )
-
-  def test_credential(self):
-    """Checks if credential is not none."""
-    self.assertIsNotNone(self.credentials)
-
-  def test_compute_instance_name(self):
-    """Test compute instance name."""
-    self.assertTrue(
-      verify(
-        crawl.get_compute_instances_names(PROJECT_NAME, self.compute_client),
-        "compute_instances",
-        True,
-      )
-    )
-
-  def test_compute_disks_names(self):
-    """Test compute disk names."""
-    self.assertTrue(
-      verify(
-        crawl.get_compute_disks_names(PROJECT_NAME, self.compute_client),
-        "compute_disks",
-        True,
-      )
-    )
-
-  def test_compute_images_names(self):
-    """Test compute image names."""
-    self.assertTrue(
-      verify(
-        crawl.get_compute_images_names(PROJECT_NAME, self.compute_client),
-        "compute_images",
-        True,
-      )
-    )
-
-  def test_machine_images(self):
-    """Test machine images"""
-    self.assertTrue(
-      verify(
-        crawl.get_machine_images(PROJECT_NAME, self.compute_client),
-        "machine_images",
-        True,
-      )
-    )
-
-  def test_static_ips(self):
-    """Test static IPs."""
-    self.assertTrue(
-      verify(
-        crawl.get_static_ips(PROJECT_NAME, self.compute_client),
-        "static_ips",
-        True,
-      )
-    )
-
-  def test_compute_snapshots(self):
-    """Test compute snapshot."""
-    self.assertTrue(
-      verify(
-        crawl.get_compute_snapshots(PROJECT_NAME, self.compute_client),
-        "compute_snapshots",
-        True,
-      )
-    )
-
-  def test_firewall_rules(self):
-    """Test firewall rules."""
-    self.assertTrue(
-      verify(
-        crawl.get_firewall_rules(PROJECT_NAME, self.compute_client),
-        "firewall_rules",
-      )
-    )
-
-  def test_subnets(self):
-    """Test subnets."""
-    self.assertTrue(
-      verify(
-        crawl.get_subnets(PROJECT_NAME, self.compute_client),
-        "subnets",
-        True,
-      )
-    )
-
-  def test_storage_buckets(self):
-    """Test storage bucket."""
-    self.assertTrue(
-      verify(
-        crawl.get_bucket_names(
-          PROJECT_NAME,
-          credentials=self.credentials,
-          dump_fd=None,
-        ),
-        "storage_buckets",
-      )
-    )
-
-  def test_managed_zones(self):
-    """Test managed zones."""
-    self.assertTrue(
-      verify(
-        crawl.get_managed_zones(PROJECT_NAME, credentials=self.credentials),
-        "managed_zones",
-        True,
-      )
-    )
-
-  def test_gke_clusters(self):
-    """Test GKE clusters."""
-    gke_client = scanner.gke_client_for_credentials(
-      credentials=self.credentials,
-    )
-    self.assertTrue(
-      verify(
-        crawl.get_gke_clusters(PROJECT_NAME, gke_client),
-        "gke_clusters",
-      )
-    )
-
-  def test_gke_images(self):
-    self.assertTrue(
-      verify(
-        crawl.get_gke_images(PROJECT_NAME, self.credentials.token),
-        "gke_images",
-        True,
-      )
-    )
-
-  def test_app_services(self):
-    """Test app services."""
-    self.assertTrue(
-      verify(
-        crawl.get_app_services(PROJECT_NAME, self.credentials),
-        "app_services",
-      )
-    )
-
-  def test_sql_instances(self):
-    """Test SQL instances."""
-    self.assertTrue(
-      verify(
-        crawl.get_sql_instances(PROJECT_NAME, self.credentials),
-        "sql_instances",
-        True,
-      )
-    )
-
-  def test_bq(self):
-    """Test BigQuery databases and table names."""
-    self.assertTrue(
-      verify(
-        crawl.get_bq(PROJECT_NAME, self.credentials),
-        "bq",
-      )
-    )
-
-  def test_pubsub_subs(self):
-    """Test PubSub Subscriptions."""
-    self.assertTrue(
-      verify(
-        crawl.get_pubsub_subscriptions(PROJECT_NAME, self.credentials),
-        "pubsub_subs",
-      )
-    )
-
-  def test_cloud_functions(self):
-    """Test CloudFunctions list."""
-    self.assertTrue(
-      verify(
-        crawl.get_cloudfunctions(PROJECT_NAME, self.credentials),
-        "cloud_functions",
-      )
-    )
-
-  def test_bigtable_instances(self):
-    """Test BigTable Instances."""
-    self.assertTrue(
-      verify(
-        crawl.get_bigtable_instances(PROJECT_NAME, self.credentials),
-        "bigtable_instances",
-      )
-    )
-
-  def test_spanner_instances(self):
-    """Test Spanner Instances."""
-    self.assertTrue(
-      verify(
-        crawl.get_spanner_instances(PROJECT_NAME, self.credentials),
-        "spanner_instances",
-      )
-    )
-
-  def test_cloudstore_instances(self):
-    """Test CloudStore Instances."""
-    self.assertTrue(
-      verify(
-        crawl.get_filestore_instances(PROJECT_NAME, self.credentials),
-        "cloudstore_instances",
-      )
-    )
-
-  def test_kms(self):
-    """Test list of KMS keys."""
-    self.assertTrue(
-      verify(
-        crawl.get_kms_keys(PROJECT_NAME, self.credentials),
-        "kms",
-        True,
-      )
-    )
-
-  def test_endpoints(self):
-    """Test endpoints' information."""
-    self.assertTrue(
-      verify(
-        crawl.get_endpoints(PROJECT_NAME, self.credentials),
-        "endpoints",
-      )
-    )
-
-  def test_services(self):
-    """Test list of API services enabled in the project."""
-    self.assertTrue(
-      verify(
-        crawl.list_services(PROJECT_NAME, self.credentials),
-        "services",
-        True
-      )
-    )
-
-  def test_iam_policy(self):
-    """Test IAM policy."""
-    self.assertTrue(
-      verify(
-        crawl.get_iam_policy(PROJECT_NAME, self.credentials),
-        "iam_policy",
-      )
-    )
-
-  def test_service_accounts(self):
-    """Test service accounts."""
-    self.assertTrue(
-      verify(
-        crawl.get_service_accounts(PROJECT_NAME, self.credentials),
-        "service_accounts",
-      )
-    )
-
-  def test_project_info(self):
-    """Test project info."""
-    self.assertTrue(
-      verify(
-        crawl.fetch_project_info(PROJECT_NAME, self.credentials),
-        "project_info",
-      )
-    )
-
-  def test_sourcerepos(self):
-    """Test list of cloud source repositories in the project."""
-    self.assertTrue(
-      verify(
-        crawl.list_sourcerepo(PROJECT_NAME, self.credentials),
-        "sourcerepos",
-      )
-    )
-
-  def test_dns_policies(self):
-    """Test cloud DNS policies."""
-    self.assertTrue(
-      verify(
-        crawl.list_dns_policies(PROJECT_NAME, self.credentials),
-        "dns_policies",
-      )
-    )
+    """Test crawler functionalities."""
+
+    def setUp(self):
+        # Get credentials from metadata and set up compute client
+        _, self.credentials = credsdb.get_creds_from_metadata()
+        self.compute_client = scanner.compute_client_for_credentials(self.credentials)
+
+    def test_credential(self):
+        """Checks if credential is not none."""
+        self.assertIsNotNone(self.credentials)
+
+    def test_compute_instance_name(self):
+        """Test compute instance name."""
+        # Verify that the compute instance names are returned correctly
+        self.assertTrue(
+            verify(
+                crawl.get_compute_instances_names(PROJECT_NAME, self.compute_client),
+                "compute_instances",
+                True,
+            )
+        )
+
+
+    def test_compute_disks_names(self):
+        """Test compute disk names."""
+        # Verify that the list of compute disks names returned by the function is non-empty
+        self.assertTrue(
+            verify(
+                crawl.get_compute_disks_names(PROJECT_NAME, self.compute_client),
+                "compute_disks",
+                True,
+            )
+        )
+
+    def test_compute_images_names(self):
+        """Test compute image names."""
+        # Verify that the list of compute images names returned by the function is non-empty
+        self.assertTrue(
+            verify(
+                crawl.get_compute_images_names(PROJECT_NAME, self.compute_client),
+                "compute_images",
+                True,
+            )
+        )
+
+    def test_static_ips(self):
+        """Test static IPs."""
+        # Verify that the list of static IPs returned by the function is non-empty
+        self.assertTrue(
+            verify(
+                crawl.get_static_ips(PROJECT_NAME, self.compute_client),
+                "static_ips",
+                True,
+            )
+        )
+
+
+    def test_compute_snapshots(self):
+        """Test compute snapshot."""
+        # Verify if the list of compute snapshots can be retrieved successfully
+        self.assertTrue(
+            verify(
+                crawl.get_compute_snapshots(PROJECT_NAME, self.compute_client),
+                "compute_snapshots",
+                True,
+            )
+        )
+
+    def test_firewall_rules(self):
+        """Test firewall rules."""
+        # Verify if the list of firewall rules can be retrieved successfully
+        self.assertTrue(
+            verify(
+                crawl.get_firewall_rules(PROJECT_NAME, self.compute_client),
+                "firewall_rules",
+            )
+        )
+
+    def test_subnets(self):
+        """Test subnets."""
+        # Verify if the list of subnets can be retrieved successfully
+        self.assertTrue(
+            verify(
+                crawl.get_subnets(PROJECT_NAME, self.compute_client),
+                "subnets",
+                True,
+            )
+        )
+
+    def test_storage_buckets(self):
+        """Test storage bucket."""
+        # Verify if the list of storage buckets can be retrieved successfully
+        self.assertTrue(
+            verify(
+                crawl.get_bucket_names(
+                    PROJECT_NAME,
+                    credentials=self.credentials,
+                    dump_fd=None,
+                ),
+                "storage_buckets",
+            )
+        )
+
+
+
+    def test_managed_zones(self):
+        # Asserting that the managed zones are verified
+        self.assertTrue(
+            verify(
+                crawl.get_managed_zones(PROJECT_NAME, credentials=self.credentials),
+                "managed_zones",
+                True,
+            )
+        )
+
+    def test_gke_clusters(self):
+        # Getting GKE client for credentials
+        gke_client = scanner.gke_client_for_credentials(
+            credentials=self.credentials,
+        )
+        # Asserting that the GKE clusters are verified
+        self.assertTrue(
+            verify(
+                crawl.get_gke_clusters(PROJECT_NAME, gke_client),
+                "gke_clusters",
+            )
+        )
+
+    def test_gke_images(self):
+        # Asserting that the GKE images are verified
+        self.assertTrue(
+            verify(
+                crawl.get_gke_images(PROJECT_NAME, self.credentials.token),
+                "gke_images",
+                True,
+            )
+        )
+
+    def test_app_services(self):
+        # Asserting that the app services are verified
+        self.assertTrue(
+            verify(
+                crawl.get_app_services(PROJECT_NAME, self.credentials),
+                "app_services",
+            )
+        )
+
+    def test_sql_instances(self):
+        # Asserting that the SQL instances are verified
+        self.assertTrue(
+            verify(
+                crawl.get_sql_instances(PROJECT_NAME, self.credentials),
+                "sql_instances",
+                True,
+            )
+        )
+
+    def test_bq(self):
+        # Asserting that the BigQuery databases and table names are verified
+        self.assertTrue(
+            verify(
+                crawl.get_bq(PROJECT_NAME, self.credentials),
+                "bq",
+            )
+        )
+
+    def test_pubsub_subs(self):
+        # Asserting that the PubSub Subscriptions are verified
+        self.assertTrue(
+            verify(
+                crawl.get_pubsub_subscriptions(PROJECT_NAME, self.credentials),
+                "pubsub_subs",
+            )
+        )
+
+
+    def test_cloud_functions(self):
+        """Test CloudFunctions list."""
+        # Verify that cloud_functions list is obtained successfully
+        self.assertTrue(
+            verify(
+                crawl.get_cloudfunctions(PROJECT_NAME, self.credentials),
+                "cloud_functions",
+            )
+        )
+
+    def test_bigtable_instances(self):
+        """Test BigTable Instances."""
+        # Verify that BigTable Instances are obtained successfully
+        self.assertTrue(
+            verify(
+                crawl.get_bigtable_instances(PROJECT_NAME, self.credentials),
+                "bigtable_instances",
+            )
+        )
+
+    def test_spanner_instances(self):
+        """Test Spanner Instances."""
+        # Verify that Spanner Instances are obtained successfully
+        self.assertTrue(
+            verify(
+                crawl.get_spanner_instances(PROJECT_NAME, self.credentials),
+                "spanner_instances",
+            )
+        )
+
+    def test_cloudstore_instances(self):
+        """Test CloudStore Instances."""
+        # Verify that CloudStore Instances are obtained successfully
+        self.assertTrue(
+            verify(
+                crawl.get_filestore_instances(PROJECT_NAME, self.credentials),
+                "cloudstore_instances",
+            )
+        )
+
+    def test_kms(self):
+        """Test list of KMS keys."""
+        # Verify that a list of KMS keys is obtained successfully
+        self.assertTrue(
+            verify(
+                crawl.get_kms_keys(PROJECT_NAME, self.credentials),
+                "kms",
+                True,
+            )
+        )
+
+    def test_endpoints(self):
+        """Test endpoints' information."""
+        # Verify that endpoints information is obtained successfully
+        self.assertTrue(
+            verify(
+                crawl.get_endpoints(PROJECT_NAME, self.credentials),
+                "endpoints",
+            )
+        )
+
+    def test_services(self):
+        """Test list of API services enabled in the project."""
+        # Verify that a list of API services enabled in the project is obtained successfully
+        self.assertTrue(
+            verify(
+                crawl.list_services(PROJECT_NAME, self.credentials),
+                "services",
+                True
+            )
+        )
+
+    def test_iam_policy(self):
+        """Test IAM policy."""
+        # Verify that IAM policy is obtained successfully
+        self.assertTrue(
+            verify(
+                crawl.get_iam_policy(PROJECT_NAME, self.credentials),
+                "iam_policy",
+            )
+        )
+
+    def test_service_accounts(self):
+        """Test service accounts."""
+        # Verify that service accounts are obtained successfully
+        self.assertTrue(
+            verify(
+                crawl.get_service_accounts(PROJECT_NAME, self.credentials),
+                "service_accounts",
+            )
+        )
+
+    def test_project_info(self):
+        """Test project info."""
+        # Verify that project info is obtained successfully
+        self.assertTrue(
+            verify(
+                crawl.fetch_project_info(PROJECT_NAME, self.credentials),
+                "project_info",
+            )
+        )
+
+    def test_sourcerepos(self):
+        """Test list of cloud source repositories in the project."""
+        # Verify that a list of cloud source repositories in the project is obtained successfully
+        self.assertTrue(
+            verify(
+                crawl.list_sourcerepo(PROJECT_NAME, self.credentials),
+                "sourcerepos",
+            )
+        )
+
+    def test_dns_policies(self):
+        """Test cloud DNS policies."""
+        # Verify that cloud DNS policies are obtained successfully
+        self.assertTrue(
+            verify(
+                crawl.list_dns_policies(PROJECT_NAME, self.credentials),
+                "dns_policies",
+            )
+        )

From 6caf26f1830fd89a793a06212d5984a30bc0578c Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 02:39:48 +0530
Subject: [PATCH 11/25] Update crawl.py

---
 src/gcp_scanner/crawl.py | 1727 +++++++++++++++++++-------------------
 1 file changed, 868 insertions(+), 859 deletions(-)

diff --git a/src/gcp_scanner/crawl.py b/src/gcp_scanner/crawl.py
index 8e1b954a..f01369f4 100644
--- a/src/gcp_scanner/crawl.py
+++ b/src/gcp_scanner/crawl.py
@@ -32,1032 +32,1041 @@
 from requests.auth import HTTPBasicAuth
 
 
+import collections
+
 def infinite_defaultdict():
-  """Initialize infinite default.
+    """Initialize infinite default.
+
+    Returns:
+        DefaultDict
+    """
+    return collections.defaultdict(infinite_defaultdict)
 
-  Returns:
-    DefaultDict
-  """
-  return collections.defaultdict(infinite_defaultdict)
 
 
 def fetch_project_info(project_name: str,
                        credentials: Credentials) -> Dict[str, Any]:
-  """Retrieve information about specific project.
+    """Retrieve information about specific project.
 
-  Args:
-    project_name: Name of project to request info about
-    credentials: An google.oauth2.credentials.Credentials object.
+    Args:
+      project_name: Name of project to request info about
+      credentials: An google.oauth2.credentials.Credentials object.
 
-  Returns:
-    Project info object or None.
-  """
-  project_info = None
-  logging.info("Retrieving info about: %s", project_name)
+    Returns:
+      Project info object or None.
+    """
+    project_info = None
+    logging.info("Retrieving info about: %s", project_name)
 
-  try:
-    service = googleapiclient.discovery.build(
-        "cloudresourcemanager",
-        "v1",
-        credentials=credentials,
-        cache_discovery=False)
-    request = service.projects().get(projectId=project_name)
-    response = request.execute()
-    if "projectNumber" in response:
-      project_info = response
+    try:
+        service = googleapiclient.discovery.build(
+            "cloudresourcemanager",
+            "v1",
+            credentials=credentials,
+            cache_discovery=False)
+        request = service.projects().get(projectId=project_name)
+        response = request.execute()
+        if "projectNumber" in response:
+            project_info = response
+
+    except Exception:
+        logging.info("Failed to enumerate projects")
+        logging.info(sys.exc_info())
 
-  except Exception:
-    logging.info("Failed to enumerate projects")
-    logging.info(sys.exc_info())
+    return project_info
 
-  return project_info
 
 
 def get_project_list(credentials: Credentials) -> List[Dict[str, Any]]:
-  """Retrieve a list of projects accessible by credentials provided.
-
-  Args:
-    credentials: An google.oauth2.credentials.Credentials object.
+    """Retrieve a list of projects accessible by credentials provided.
 
-  Returns:
-    A list of Project objects from cloudresourcemanager RestAPI.
-  """
+    Args:
+        credentials: An google.oauth2.credentials.Credentials object.
 
-  logging.info("Retrieving projects list")
-  project_list = list()
-  try:
-    service = googleapiclient.discovery.build(
-        "cloudresourcemanager",
-        "v1",
-        credentials=credentials,
-        cache_discovery=False)
-    request = service.projects().list()
-    while request is not None:
-      response = request.execute()
-      project_list = response.get("projects",[])
-      request = service.projects().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to enumerate projects")
-    logging.info(sys.exc_info())
-  return project_list
+    Returns:
+        A list of Project objects from cloudresourcemanager RestAPI.
+    """
+    logging.info("Retrieving projects list")
+    project_list = list()
+    try:
+        service = googleapiclient.discovery.build(
+            "cloudresourcemanager",
+            "v1",
+            credentials=credentials,
+            cache_discovery=False)
+        request = service.projects().list()
+        while request is not None:
+            response = request.execute()
+            project_list = response.get("projects", [])
+            request = service.projects().list_next(
+                previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to enumerate projects")
+        logging.info(sys.exc_info())
+    return project_list
 
 
 def get_compute_instances_names(
     project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
-  """Retrieve a list of Compute VMs available in the project.
+    """Retrieve a list of Compute VMs available in the project.
 
-  Args:
-    project_name: A name of a project to query info about.
-    service: A resource object for interacting with the Compute API.
+    Args:
+        project_name: A name of a project to query info about.
+        service: A resource object for interacting with the Compute API.
 
-  Returns:
-    A list of instance objects.
-  """
+    Returns:
+        A list of instance objects.
+    """
+    logging.info("Retrieving list of Compute Instances")
+    images_result = list()
+    try:
+        request = service.instances().aggregatedList(project=project_name)
+        while request is not None:
+            response = request.execute()
+            if response.get("items", None) is not None:
+                images_result = [instance
+                                 for _, instances_scoped_list in response["items"].items()
+                                 for instance in instances_scoped_list.get("instances", [])]
+            request = service.instances().aggregatedList_next(
+                previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to enumerate compute instances in the %s", project_name)
+        logging.info(sys.exc_info())
+    return images_result
 
-  logging.info("Retrieving list of Compute Instances")
-  images_result = list()
-  try:
-    request = service.instances().aggregatedList(project=project_name)
-    while request is not None:
-      response = request.execute()
-      if response.get("items", None) is not None:
-        images_result = [instance
-          for _, instances_scoped_list in response["items"].items()
-          for instance in instances_scoped_list.get("instances",[])]
-      request = service.instances().aggregatedList_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to enumerate compute instances in the %s",
-                 project_name)
-    logging.info(sys.exc_info())
-  return images_result
-
-
-def get_compute_images_names(
-    project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
-  """Retrieve a list of Compute images available in the project.
+def get_compute_images_names(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
+    """Retrieve a list of Compute images available in the project.
 
-  Args:
-    project_name: A name of a project to query info about.
-    service: A resource object for interacting with the Compute API.
+    Args:
+        project_name: A name of a project to query info about.
+        service: A resource object for interacting with the Compute API.
 
-  Returns:
-    A list of image objects.
-  """
+    Returns:
+        A list of image objects.
+    """
 
-  logging.info("Retrieving list of Compute Image names")
-  images_result = list()
-  try:
-    request = service.images().list(project=project_name)
-    while request is not None:
-      response = request.execute()
-      images_result = response.get("items", [])
-      request = service.images().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to enumerate compute images in the %s", project_name)
-    logging.info(sys.exc_info())
-  return images_result
+    logging.info("Retrieving list of Compute Image names")
+    images_result = list()
+    try:
+        request = service.images().list(project=project_name)
+        while request is not None:
+            response = request.execute()
+            images_result = response.get("items", [])
+            request = service.images().list_next(previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to enumerate compute images in the %s", project_name)
+        logging.info(sys.exc_info())
+    return images_result
 
 
-def get_machine_images(
-    project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
-  """Retrieve a list of Machine Images Resources available in the project.
+def get_machine_images(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
+    """Retrieve a list of Machine Images Resources available in the project.
 
-  Args:
-    project_name: A name of a project to query info about.
-    service: A resource object for interacting with the Compute API.
+    Args:
+        project_name: A name of a project to query info about.
+        service: A resource object for interacting with the Compute API.
 
-  Returns:
-    A list of machine image resources.
-  """
+    Returns:
+        A list of machine image resources.
+    """
 
-  logging.info("Retrieving list of Machine Images Resources")
-  machine_images_list = list()
-  try:
-    request = service.machineImages().list(project=project_name)
-    while request is not None:
-      response = request.execute()
-      machine_images_list = response.get("items", [])
-      request = service.machineImages().list_next(
-        previous_request=request, previous_response=response
-      )
-  except Exception:
-    logging.info("Failed to enumerate machine images in the %s", project_name)
-    logging.info(sys.exc_info())
-  return machine_images_list
-
-
-def get_compute_disks_names(
-    project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
-  """Retrieve a list of Compute disks available in the project.
+    logging.info("Retrieving list of Machine Images Resources")
+    machine_images_list = list()
+    try:
+        request = service.machineImages().list(project=project_name)
+        while request is not None:
+            response = request.execute()
+            machine_images_list = response.get("items", [])
+            request = service.machineImages().list_next(previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to enumerate machine images in the %s", project_name)
+        logging.info(sys.exc_info())
+    return machine_images_list
 
-  Args:
-    project_name: A name of a project to query info about.
-    service: A resource object for interacting with the Compute API.
 
-  Returns:
-    A list of disk objects.
-  """
+def get_compute_disks_names(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
+    """Retrieve a list of Compute disks available in the project.
 
-  logging.info("Retrieving list of Compute Disk names")
-  disk_names_list = list()
-  try:
-    request = service.disks().aggregatedList(project=project_name)
-    while request is not None:
-      response = request.execute()
-      if response.get("items", None) is not None:
-        disk_names_list = [disk
-          for _, disks_scoped_list in response["items"].items()
-          for disk in disks_scoped_list.get("disks", [])]
-      request = service.disks().aggregatedList_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to enumerate compute disks in the %s", project_name)
-    logging.info(sys.exc_info())
+    Args:
+        project_name: A name of a project to query info about.
+        service: A resource object for interacting with the Compute API.
 
-  return disk_names_list
+    Returns:
+        A list of disk objects.
+    """
 
+    logging.info("Retrieving list of Compute Disk names")
+    disk_names_list = list()
+    try:
+        request = service.disks().aggregatedList(project=project_name)
+        while request is not None:
+            response = request.execute()
+            if response.get("items", None) is not None:
+                disk_names_list = [
+                    disk for _, disks_scoped_list in response["items"].items()
+                    for disk in disks_scoped_list.get("disks", [])
+                ]
+            request = service.disks().aggregatedList_next(
+                previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to enumerate compute disks in the %s", project_name)
+        logging.info(sys.exc_info())
 
-def get_static_ips(project_name: str,
-                   service: discovery.Resource) -> List[Dict[str, Any]]:
-  """Retrieve a list of static IPs available in the project.
+    return disk_names_list
 
-  Args:
-    project_name: A name of a project to query info about.
-    service: A resource object for interacting with the Compute API.
 
-  Returns:
-    A list of static IPs in the project.
-  """
+def get_static_ips(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
+    """Retrieve a list of static IPs available in the project.
 
-  logging.info("Retrieving Static IPs")
+    Args:
+        project_name: A name of a project to query info about.
+        service: A resource object for interacting with the Compute API.
 
-  ips_list = list()
-  try:
-    request = service.addresses().aggregatedList(project=project_name)
-    while request is not None:
-      response = request.execute()
-      ips_list = [{name: addresses_scoped_list}
-        for name, addresses_scoped_list in response["items"].items()
-        if addresses_scoped_list.get("addresses", None) is not None]
-      request = service.addresses().aggregatedList_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to get static IPs in the %s", project_name)
-    logging.info(sys.exc_info())
-
-  return ips_list
-
-
-def get_compute_snapshots(project_name: str,
-                          service: discovery.Resource) -> List[Dict[str, Any]]:
-  """Retrieve a list of Compute snapshots available in the project.
-
-  Args:
-    project_name: A name of a project to query info about.
-    service: A resource object for interacting with the Compute API.
-
-  Returns:
-    A list of snapshot objects.
-  """
-
-  logging.info("Retrieving Compute Snapshots")
-  snapshots_list = list()
-  try:
-    request = service.snapshots().list(project=project_name)
-    while request is not None:
-      response = request.execute()
-      snapshots_list = response.get("items", [])
-      request = service.snapshots().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to get compute snapshots in the %s", project_name)
-    logging.info(sys.exc_info())
+    Returns:
+        A list of static IPs in the project.
+    """
 
-  return snapshots_list
+    logging.info("Retrieving Static IPs")
 
+    ips_list = list()
+    try:
+        request = service.addresses().aggregatedList(project=project_name)
+        while request is not None:
+            response = request.execute()
+            ips_list = [
+                {name: addresses_scoped_list}
+                for name, addresses_scoped_list in response["items"].items()
+                if addresses_scoped_list.get("addresses", None) is not None
+            ]
+            request = service.addresses().aggregatedList_next(
+                previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to get static IPs in the %s", project_name)
+        logging.info(sys.exc_info())
 
-def get_subnets(project_name: str,
-                compute_client: discovery.Resource) -> List[Dict[str, Any]]:
-  """Retrieve a list of subnets available in the project.
+    return ips_list
 
-  Args:
-    project_name: A name of a project to query info about.
-    compute_client: A resource object for interacting with the Compute API.
 
-  Returns:
-    A list of subnets in the project.
-  """
+def get_compute_snapshots(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
+    """Retrieve a list of Compute snapshots available in the project.
 
-  logging.info("Retrieving Subnets")
-  subnets_list = list()
-  try:
-    request = compute_client.subnetworks().aggregatedList(project=project_name)
-    while request is not None:
-      response = request.execute()
-      if response.get("items", None) is not None:
-        subnets_list = list(response["items"].items())
-      request = compute_client.subnetworks().aggregatedList_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to get subnets in the %s", project_name)
-    logging.info(sys.exc_info())
-
-  return subnets_list
-
-
-def get_firewall_rules(
-    project_name: str,
-    compute_client: discovery.Resource) -> List[Dict[str, Any]]:
-  """Retrieve a list of firewall rules in the project.
-
-  Args:
-    project_name: A name of a project to query info about.
-    compute_client: A resource object for interacting with the Compute API.
-
-  Returns:
-    A list of firewall rules in the project.
-  """
-
-  logging.info("Retrieving Firewall Rules")
-  firewall_rules_list = list()
-  try:
-    request = compute_client.firewalls().list(project=project_name)
-    while request is not None:
-      response = request.execute()
-      firewall_rules_list=[(firewall["name"],)
-        for firewall in response.get("items",[])]
-      request = compute_client.firewalls().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to get firewall rules in the %s", project_name)
-    logging.info(sys.exc_info())
-  return firewall_rules_list
+    Args:
+        project_name: A name of a project to query info about.
+        service: A resource object for interacting with the Compute API.
 
+    Returns:
+        A list of snapshot objects.
+    """
+    logging.info("Retrieving Compute Snapshots")
+    snapshots_list = list()
+    try:
+        request = service.snapshots().list(project=project_name)
+        while request is not None:
+            response = request.execute()
+            snapshots_list = response.get("items", [])
+            request = service.snapshots().list_next(previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to get compute snapshots in the %s", project_name)
+        logging.info(sys.exc_info())
 
-def get_bucket_names(project_name: str, credentials: Credentials,
-                     dump_fd: io.TextIOWrapper
-                     ) -> Dict[str, Tuple[Any, List[Any]]]:
-  """Retrieve a list of buckets available in the project.
-
-  Args:
-    project_name: A name of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
-    dump_fd: If set, the function will enumerate files stored in buckets and
-      save them in a file corresponding to provided file descriptor.
-      This is a very slow, noisy operation and should be used with caution.
-
-  Returns:
-    A dictionary where key is bucket name and value is a bucket Object.
-  """
-
-  logging.info("Retrieving GCS Buckets")
-  buckets_dict = dict()
-  service = discovery.build(
-      "storage", "v1", credentials=credentials, cache_discovery=False)
-  # Make an authenticated API request
-  request = service.buckets().list(project=project_name)
-  while request is not None:
+    return snapshots_list
+
+
+def get_subnets(project_name: str, compute_client: discovery.Resource) -> List[Dict[str, Any]]:
+    """Retrieve a list of subnets available in the project.
+
+    Args:
+        project_name: A name of a project to query info about.
+        compute_client: A resource object for interacting with the Compute API.
+
+    Returns:
+        A list of subnets in the project.
+    """
+    logging.info("Retrieving Subnets")
+    subnets_list = list()
+    try:
+        request = compute_client.subnetworks().aggregatedList(project=project_name)
+        while request is not None:
+            response = request.execute()
+            if response.get("items", None) is not None:
+                subnets_list = list(response["items"].items())
+            request = compute_client.subnetworks().aggregatedList_next(previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to get subnets in the %s", project_name)
+        logging.info(sys.exc_info())
+
+    return subnets_list
+
+
+def get_firewall_rules(project_name: str, compute_client: discovery.Resource) -> List[Dict[str, Any]]:
+    """Retrieve a list of firewall rules in the project.
+
+    Args:
+        project_name: A name of a project to query info about.
+        compute_client: A resource object for interacting with the Compute API.
+
+    Returns:
+        A list of firewall rules in the project.
+    """
+    logging.info("Retrieving Firewall Rules")
+    firewall_rules_list = list()
     try:
-      response = request.execute()
-    except googleapiclient.errors.HttpError:
-      logging.info("Failed to list buckets in the %s", project_name)
-      logging.info(sys.exc_info())
-      break
-
-    for bucket in response.get("items", []):
-      buckets_dict[bucket["name"]] = (bucket, None)
-      if dump_fd is not None:
-        ret_fields = "nextPageToken,items(name,size,contentType,timeCreated)"
-
-        req = service.objects().list(bucket=bucket["name"], fields=ret_fields)
-
-        while req:
-          try:
-            resp = req.execute()
-            for item in resp.get("items", []):
-              dump_fd.write(json.dumps(item, indent=2, sort_keys=False))
-
-            req = service.objects().list_next(req, resp)
-          except googleapiclient.errors.HttpError:
-            logging.info("Failed to read the bucket %s", bucket["name"])
+        request = compute_client.firewalls().list(project=project_name)
+        while request is not None:
+            response = request.execute()
+            firewall_rules_list = [(firewall["name"],) for firewall in response.get("items", [])]
+            request = compute_client.firewalls().list_next(previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to get firewall rules in the %s", project_name)
+        logging.info(sys.exc_info())
+    return firewall_rules_list
+
+def get_bucket_names(project_name: str, credentials: Credentials,
+                     dump_fd: io.TextIOWrapper) -> Dict[str, Tuple[Any, List[Any]]]:
+    """Retrieve a list of buckets available in the project.
+
+    Args:
+      project_name: A name of a project to query info about.
+      credentials: An google.oauth2.credentials.Credentials object.
+      dump_fd: If set, the function will enumerate files stored in buckets and
+        save them in a file corresponding to provided file descriptor.
+        This is a very slow, noisy operation and should be used with caution.
+
+    Returns:
+      A dictionary where key is bucket name and value is a bucket Object.
+    """
+
+    logging.info("Retrieving GCS Buckets")
+    buckets_dict = dict()
+    service = discovery.build("storage", "v1", credentials=credentials, cache_discovery=False)
+
+    # Make an authenticated API request
+    request = service.buckets().list(project=project_name)
+    while request is not None:
+        try:
+            response = request.execute()
+        except googleapiclient.errors.HttpError:
+            logging.info("Failed to list buckets in the %s", project_name)
             logging.info(sys.exc_info())
             break
 
-    request = service.buckets().list_next(
-        previous_request=request, previous_response=response)
+        for bucket in response.get("items", []):
+            buckets_dict[bucket["name"]] = (bucket, None)
+            if dump_fd is not None:
+                ret_fields = "nextPageToken,items(name,size,contentType,timeCreated)"
+                req = service.objects().list(bucket=bucket["name"], fields=ret_fields)
 
-  return buckets_dict
+                while req:
+                    try:
+                        resp = req.execute()
+                        for item in resp.get("items", []):
+                            dump_fd.write(json.dumps(item, indent=2, sort_keys=False))
+                        req = service.objects().list_next(req, resp)
+                    except googleapiclient.errors.HttpError:
+                        logging.info("Failed to read the bucket %s", bucket["name"])
+                        logging.info(sys.exc_info())
+                        break
 
+        request = service.buckets().list_next(previous_request=request, previous_response=response)
 
-def get_managed_zones(project_name: str,
-                      credentials: Credentials) -> List[Dict[str, Any]]:
-  """Retrieve a list of DNS zones available in the project.
+    return buckets_dict
 
-  Args:
-    project_name: A name of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
 
-  Returns:
-    A list of DNS zones in the project.
-  """
+def get_managed_zones(project_name: str, credentials: Credentials) -> List[Dict[str, Any]]:
+    """Retrieve a list of DNS zones available in the project.
 
-  logging.info("Retrieving DNS Managed Zones")
-  zones_list = list()
+    Args:
+        project_name: A name of a project to query info about.
+        credentials: An google.oauth2.credentials.Credentials object.
 
-  try:
-    service = discovery.build(
-        "dns", "v1", credentials=credentials, cache_discovery=False)
+    Returns:
+        A list of DNS zones in the project.
+    """
 
-    request = service.managedZones().list(project=project_name)
-    while request is not None:
-      response = request.execute()
-      zones_list = response.get("managedZones",[])
-      request = service.managedZones().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to enumerate DNS zones for project %s", project_name)
-    logging.info(sys.exc_info())
+    logging.info("Retrieving DNS Managed Zones")
+    zones_list = list()
+
+    try:
+        service = discovery.build("dns", "v1", credentials=credentials, cache_discovery=False)
+
+        request = service.managedZones().list(project=project_name)
+        while request is not None:
+            response = request.execute()
+            zones_list = response.get("managedZones",[])
+            request = service.managedZones().list_next(previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to enumerate DNS zones for project %s", project_name)
+        logging.info(sys.exc_info())
 
-  return zones_list
+    return zones_list
 
 
 def get_gke_clusters(
-    project_name: str, gke_client: container_v1.services.cluster_manager.client
-    .ClusterManagerClient
+    project_name: str, gke_client: container_v1.services.cluster_manager.client.ClusterManagerClient
 ) -> List[Tuple[str, str]]:
-  """Retrieve a list of GKE clusters available in the project.
+    """Retrieve a list of GKE clusters available in the project.
 
-  Args:
-    project_name: A name of a project to query info about.
-    gke_client: I do not know TBD.
+    Args:
+        project_name: A name of a project to query info about.
+        gke_client: I do not know TBD.
 
-  Returns:
-    A list of GKE clusters in the project.
-  """
+    Returns:
+        A list of GKE clusters in the project.
+    """
 
-  logging.info("Retrieving list of GKE clusters")
-  parent = f"projects/{project_name}/locations/-"
-  try:
-    clusters = gke_client.list_clusters(parent=parent)
-    return [(cluster.name, cluster.description)
-      for cluster in clusters.clusters]
-  except Exception:
-    logging.info("Failed to retrieve cluster list for project %s", project_name)
-    logging.info(sys.exc_info())
-    return []
+    logging.info("Retrieving list of GKE clusters")
+    parent = f"projects/{project_name}/locations/-"
+    try:
+        clusters = gke_client.list_clusters(parent=parent)
+        return [(cluster.name, cluster.description) for cluster in clusters.clusters]
+    except Exception:
+        logging.info("Failed to retrieve cluster list for project %s", project_name)
+        logging.info(sys.exc_info())
+        return []
 
 
 def get_gke_images(project_name: str, access_token: str) -> Dict[str, Any]:
-  """Retrieve a list of GKE images available in the project.
-
-  Args:
-    project_name: A name of a project to query info about.
-    access_token: An Oauth2 token with permissions to query list of gke images.
-
-  Returns:
-    A gke images JSON object for each accessible zone.
-  """
-
-  images = dict()
-  logging.info("Retrieving list of GKE images")
-  project_name = project_name.replace(":", "/")
-  regions = ["", "us.", "eu.", "asia."]
-  for region in regions:
-    gcr_url = f"https://{region}gcr.io/v2/{project_name}/tags/list"
+    """Retrieve a list of GKE images available in the project.
+
+    Args:
+        project_name: A name of a project to query info about.
+        access_token: An Oauth2 token with permissions to query list of gke images.
+
+    Returns:
+        A gke images JSON object for each accessible zone.
+    """
+
+    images = dict()
+    logging.info("Retrieving list of GKE images")
+    project_name = project_name.replace(":", "/")
+    regions = ["", "us.", "eu.", "asia."]
+    for region in regions:
+        gcr_url = f"https://{region}gcr.io/v2/{project_name}/tags/list"
+        try:
+            res = requests.get(
+                gcr_url, auth=HTTPBasicAuth("oauth2accesstoken", access_token))
+            if not res.ok:
+                logging.info("Failed to retrieve gcr images list. Status code: %d",
+                             res.status_code)
+                continue
+            images[region.replace(".", "")] = res.json()
+        except Exception:
+            logging.info("Failed to retrieve gke images for project %s", project_name)
+            logging.info(sys.exc_info())
+
+    return images
+
+
+def get_sql_instances(project_name: str, credentials: Credentials) -> List[Dict[str, Any]]:
+    """Retrieve a list of SQL instances available in the project.
+
+    Args:
+        project_name: A name of a project to query info about.
+        credentials: An google.oauth2.credentials.Credentials object.
+
+    Returns:
+        A list of sql instances in the project.
+    """
+
+    logging.info("Retrieving CloudSQL Instances")
+    sql_instances_list = list()
     try:
-      res = requests.get(
-          gcr_url, auth=HTTPBasicAuth("oauth2accesstoken", access_token))
-      if not res.ok:
-        logging.info("Failed to retrieve gcr images list. Status code: %d",
-                     res.status_code)
-        continue
-      images[region.replace(".", "")] = res.json()
+        service = discovery.build(
+            "sqladmin", "v1beta4", credentials=credentials, cache_discovery=False)
+
+        request = service.instances().list(project=project_name)
+        while request is not None:
+            response = request.execute()
+            sql_instances_list = response.get("items", [])
+            request = service.instances().list_next(
+                previous_request=request, previous_response=response)
     except Exception:
-      logging.info("Failed to retrieve gke images for project %s", project_name)
-      logging.info(sys.exc_info())
+        logging.info("Failed to get SQL instances for project %s", project_name)
+        logging.info(sys.exc_info())
 
-  return images
+    return sql_instances_list
 
 
-def get_sql_instances(project_name: str,
-                      credentials: Credentials) -> List[Dict[str, Any]]:
-  """Retrieve a list of SQL instances available in the project.
+def get_bq_tables(project_id: str, dataset_id: str, bq_service: discovery.Resource) -> List[Dict[str, Any]]:
+    """Retrieve a list of BigQuery tables available in the dataset.
 
-  Args:
-    project_name: A name of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
+    Args:
+        project_id: A name of a project to query info about.
+        dataset_id: A name of dataset to query data from.
+        bq_service: I do not know.
 
-  Returns:
-    A list of sql instances in the project.
-  """
+    Returns:
+        A list of BigQuery tables in the dataset.
+    """
 
-  logging.info("Retrieving CloudSQL Instances")
-  sql_instances_list = list()
-  try:
-    service = discovery.build(
-        "sqladmin", "v1beta4", credentials=credentials, cache_discovery=False)
+    logging.info("Retrieving BigQuery Tables for dataset %s", dataset_id)
+    list_of_tables = list()
+    try:
+        request = bq_service.tables().list(projectId=project_id, datasetId=dataset_id)
+        while request is not None:
+            response = request.execute()
+            list_of_tables = response.get("tables", [])
+            request = bq_service.tables().list_next(previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to retrieve BQ tables for dataset %s", dataset_id)
+        logging.info(sys.exc_info())
+    return list_of_tables
 
-    request = service.instances().list(project=project_name)
-    while request is not None:
-      response = request.execute()
-      sql_instances_list = response.get("items", [])
-      request = service.instances().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to get SQL instances for project %s", project_name)
-    logging.info(sys.exc_info())
-
-  return sql_instances_list
-
-
-def get_bq_tables(project_id: str, dataset_id: str,
-                  bq_service: discovery.Resource) -> List[Dict[str, Any]]:
-  """Retrieve a list of BigQuery tables available in the dataset.
-
-  Args:
-    project_id: A name of a project to query info about.
-    dataset_id: A name of dataset to query data from.
-    bq_service: I do not know.
-
-  Returns:
-    A list of BigQuery tables in the dataset.
-  """
-
-  logging.info("Retrieving BigQuery Tables for dataset %s", dataset_id)
-  list_of_tables = list()
-  try:
-    request = bq_service.tables().list(
-        projectId=project_id, datasetId=dataset_id)
-    while request is not None:
-      response = request.execute()
-      list_of_tables = response.get("tables", [])
-      request = bq_service.tables().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to retrieve BQ tables for dataset %s", dataset_id)
-    logging.info(sys.exc_info())
-  return list_of_tables
-
-
-def get_bq(project_id: str,
-           credentials: Credentials) -> Dict[str, List[Dict[str, Any]]]:
-  """Retrieve a list of BigQuery datasets available in the project.
-
-  Args:
-    project_id: A name of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
-
-  Returns:
-    A dictionary of BigQuery dataset and corresponding tables.
-  """
-
-  logging.info("Retrieving BigQuery Datasets")
-  bq_datasets = dict()
-  try:
-    service = discovery.build(
-        "bigquery", "v2", credentials=credentials, cache_discovery=False)
 
-    request = service.datasets().list(projectId=project_id)
-    while request is not None:
-      response = request.execute()
+def get_bq(project_id: str, credentials: Credentials) -> Dict[str, List[Dict[str, Any]]]:
+    """Retrieve a list of BigQuery datasets available in the project.
 
-      for dataset in response.get("datasets", []):
-        dataset_id = dataset["datasetReference"]["datasetId"]
-        bq_datasets[dataset_id] = get_bq_tables(project_id,dataset_id, service)
+    Args:
+        project_id: A name of a project to query info about.
+        credentials: An google.oauth2.credentials.Credentials object.
 
-      request = service.datasets().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to retrieve BQ datasets for project %s", project_id)
-    logging.info(sys.exc_info())
-  return bq_datasets
+    Returns:
+        A dictionary of BigQuery dataset and corresponding tables.
+    """
 
+    logging.info("Retrieving BigQuery Datasets")
+    bq_datasets = dict()
+    try:
+        service = discovery.build("bigquery", "v2", credentials=credentials, cache_discovery=False)
 
-def get_pubsub_subscriptions(project_id: str,
-                             credentials: Credentials) -> List[Dict[str, Any]]:
-  """Retrieve a list of PubSub subscriptions available in the project.
+        request = service.datasets().list(projectId=project_id)
+        while request is not None:
+            response = request.execute()
 
-  Args:
-    project_id: A name of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
+            for dataset in response.get("datasets", []):
+                dataset_id = dataset["datasetReference"]["datasetId"]
+                bq_datasets[dataset_id] = get_bq_tables(project_id, dataset_id, service)
 
-  Returns:
-    A list of PubSub subscriptions in the project.
-  """
+            request = service.datasets().list_next(previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to retrieve BQ datasets for project %s", project_id)
+        logging.info(sys.exc_info())
 
-  logging.info("Retrieving PubSub Subscriptions")
-  pubsubs_list = list()
-  try:
-    service = discovery.build(
-        "pubsub", "v1", credentials=credentials, cache_discovery=False)
+    return bq_datasets
 
-    request = service.projects().subscriptions().list(
-        project=f"projects/{project_id}")
-    while request is not None:
-      response = request.execute()
-      pubsubs_list = response.get("subscriptions", [])
-      request = service.projects().subscriptions().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to get PubSubs for project %s", project_id)
-    logging.info(sys.exc_info())
-  return pubsubs_list
-
-
-def get_cloudfunctions(project_id: str,
-                       credentials: Credentials) -> List[Dict[str, Any]]:
-  """Retrieve a list of CloudFunctions available in the project.
-
-  Args:
-    project_id: A name of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
-
-  Returns:
-    A list of CloudFunctions in the project.
-  """
-
-  logging.info("Retrieving CloudFunctions")
-  functions_list = list()
-  service = discovery.build(
-      "cloudfunctions", "v1", credentials=credentials, cache_discovery=False)
-  try:
-    request = service.projects().locations().functions().list(
-        parent=f"projects/{project_id}/locations/-")
-    while request is not None:
-      response = request.execute()
-      functions_list = response.get("functions", [])
-      request = service.projects().locations().functions().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to retrieve CloudFunctions for project %s", project_id)
-    logging.info(sys.exc_info())
 
-  return functions_list
+def get_pubsub_subscriptions(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]:
+    """Retrieve a list of PubSub subscriptions available in the project.
+
+    Args:
+        project_id: A name of a project to query info about.
+        credentials: An google.oauth2.credentials.Credentials object.
+
+    Returns:
+        A list of PubSub subscriptions in the project.
+    """
+
+    logging.info("Retrieving PubSub Subscriptions")
+    pubsubs_list = list()
+    try:
+        service = discovery.build("pubsub", "v1", credentials=credentials, cache_discovery=False)
+
+        request = service.projects().subscriptions().list(project=f"projects/{project_id}")
+        while request is not None:
+            response = request.execute()
+            pubsubs_list = response.get("subscriptions", [])
+            request = service.projects().subscriptions().list_next(previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to get PubSubs for project %s", project_id)
+        logging.info(sys.exc_info())
+
+    return pubsubs_list
+
+
+def get_cloudfunctions(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]:
+    """Retrieve a list of CloudFunctions available in the project.
+
+    Args:
+        project_id: A name of a project to query info about.
+        credentials: An google.oauth2.credentials.Credentials object.
+
+    Returns:
+        A list of CloudFunctions in the project.
+    """
+
+    logging.info("Retrieving CloudFunctions")
+    functions_list = list()
+    service = discovery.build("cloudfunctions", "v1", credentials=credentials, cache_discovery=False)
+    try:
+        request = service.projects().locations().functions().list(parent=f"projects/{project_id}/locations/-")
+        while request is not None:
+            response = request.execute()
+            functions_list = response.get("functions", [])
+            request = service.projects().locations().functions().list_next(previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to retrieve CloudFunctions for project %s", project_id)
+        logging.info(sys.exc_info())
+
+    return functions_list
 
 
 def get_bigtable_instances(project_id: str,
                            credentials: Credentials) -> List[Dict[str, Any]]:
-  """Retrieve a list of BigTable instances available in the project.
+    """Retrieve a list of BigTable instances available in the project.
 
-  Args:
-    project_id: A name of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
+    Args:
+        project_id: A name of a project to query info about.
+        credentials: An google.oauth2.credentials.Credentials object.
 
-  Returns:
-    A list of BigTable instances in the project.
-  """
+    Returns:
+        A list of BigTable instances in the project.
+    """
 
-  logging.info("Retrieving bigtable instances")
-  bigtable_instances_list = list()
-  try:
-    service = discovery.build(
-        "bigtableadmin", "v2", credentials=credentials, cache_discovery=False)
+    logging.info("Retrieving bigtable instances")
+    bigtable_instances_list = list()
+    try:
+        service = discovery.build(
+            "bigtableadmin", "v2", credentials=credentials, cache_discovery=False)
 
-    request = service.projects().instances().list(
-        parent=f"projects/{project_id}")
-    while request is not None:
-      response = request.execute()
-      bigtable_instances_list = response.get("instances", [])
-      request = service.projects().instances().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to retrieve BigTable instances for project %s",
-                 project_id)
-    logging.info(sys.exc_info())
-  return bigtable_instances_list
+        request = service.projects().instances().list(
+            parent=f"projects/{project_id}")
+        while request is not None:
+            response = request.execute()
+            bigtable_instances_list = response.get("instances", [])
+            request = service.projects().instances().list_next(
+                previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to retrieve BigTable instances for project %s",
+                     project_id)
+        logging.info(sys.exc_info())
+    return bigtable_instances_list
 
 
 def get_spanner_instances(project_id: str,
                           credentials: Credentials) -> List[Dict[str, Any]]:
-  """Retrieve a list of Spanner instances available in the project.
+    """Retrieve a list of Spanner instances available in the project.
 
-  Args:
-    project_id: A name of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
+    Args:
+        project_id: A name of a project to query info about.
+        credentials: An google.oauth2.credentials.Credentials object.
 
-  Returns:
-    A list of Spanner instances in the project.
-  """
+    Returns:
+        A list of Spanner instances in the project.
+    """
 
-  logging.info("Retrieving spanner instances")
-  spanner_instances_list = list()
-  try:
-    service = discovery.build(
-        "spanner", "v1", credentials=credentials, cache_discovery=False)
+    logging.info("Retrieving spanner instances")
+    spanner_instances_list = list()
+    try:
+        service = discovery.build(
+            "spanner", "v1", credentials=credentials, cache_discovery=False)
 
-    request = service.projects().instances().list(
-        parent=f"projects/{project_id}")
-    while request is not None:
-      response = request.execute()
-      spanner_instances_list = response.get("instances", [])
-      request = service.projects().instances().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to retrieve Spanner instances for project %s",
-                 project_id)
-    logging.info(sys.exc_info())
-  return spanner_instances_list
+        request = service.projects().instances().list(
+            parent=f"projects/{project_id}")
+        while request is not None:
+            response = request.execute()
+            spanner_instances_list = response.get("instances", [])
+            request = service.projects().instances().list_next(
+                previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to retrieve Spanner instances for project %s",
+                     project_id)
+        logging.info(sys.exc_info())
+    return spanner_instances_list
 
 
 def get_filestore_instances(project_id: str,
                             credentials: Credentials) -> List[Dict[str, Any]]:
-  """Retrieve a list of Filestore instances available in the project.
-
-  Args:
-    project_id: A name of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
-
-  Returns:
-    A list of Filestore instances in the project.
-  """
-
-  logging.info("Retrieving filestore instances")
-  filestore_instances_list = list()
-  service = discovery.build(
-      "file", "v1", credentials=credentials, cache_discovery=False)
-  try:
-    request = service.projects().locations().instances().list(
-        parent=f"projects/{project_id}/locations/-")
-    while request is not None:
-      response = request.execute()
-      filestore_instances_list = response.get("instances", [])
-      request = service.projects().locations().instances().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to get filestore instances for project %s", project_id)
-    logging.info(sys.exc_info())
-  return filestore_instances_list
-
-
-def get_kms_keys(project_id: str,
-                 credentials: Credentials) -> List[Dict[str, Any]]:
-  """Retrieve a list of KMS keys available in the project.
-
-  Args:
-    project_id: A name of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
-
-  Returns:
-    A list of KMS keys in the project.
-  """
-
-  logging.info("Retrieving KMS keys")
-  kms_keys_list = list()
-  try:
-    service = discovery.build(
-        "cloudkms", "v1", credentials=credentials, cache_discovery=False)
+    """Retrieve a list of Filestore instances available in the project.
 
-    # list all possible locations
-    locations_list = list()
-    request = service.projects().locations().list(name=f"projects/{project_id}")
-    while request is not None:
-      response = request.execute()
-      for location in response.get("locations", []):
-        locations_list.append(location["locationId"])
-      request = service.projects().locations().list_next(
-          previous_request=request, previous_response=response)
-
-    for location_id in locations_list:
-      request_loc = service.projects().locations().keyRings().list(
-          parent=f"projects/{project_id}/locations/{location_id}")
-      while request_loc is not None:
-        response_loc = request_loc.execute()
-        for keyring in response_loc.get("keyRings", []):
-          request = service.projects().locations().keyRings().cryptoKeys().list(
-              parent=keyring["name"])
-          while request is not None:
-            response = request.execute()
-            for key in response.get("cryptoKeys", []):
-              kms_keys_list.append(key)
+    Args:
+        project_id: A name of a project to query info about.
+        credentials: An google.oauth2.credentials.Credentials object.
+
+    Returns:
+        A list of Filestore instances in the project.
+    """
 
-            request = service.projects().locations().keyRings().cryptoKeys(
-            ).list_next(
+    logging.info("Retrieving filestore instances")
+    filestore_instances_list = list()
+    service = discovery.build(
+        "file", "v1", credentials=credentials, cache_discovery=False)
+    try:
+        request = service.projects().locations().instances().list(
+            parent=f"projects/{project_id}/locations/-")
+        while request is not None:
+            response = request.execute()
+            filestore_instances_list = response.get("instances", [])
+            request = service.projects().locations().instances().list_next(
                 previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to get filestore instances for project %s", project_id)
+        logging.info(sys.exc_info())
+    return filestore_instances_list
+
+
+def get_kms_keys(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]:
+    """Retrieve a list of KMS keys available in the project.
+
+    Args:
+      project_id: A name of a project to query info about.
+      credentials: An google.oauth2.credentials.Credentials object.
+
+    Returns:
+      A list of KMS keys in the project.
+    """
 
-        request_loc = service.projects().locations().keyRings().list_next(
-            previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to retrieve KMS keys for project %s", project_id)
-    logging.info(sys.exc_info())
-  return kms_keys_list
+    logging.info("Retrieving KMS keys")
+    kms_keys_list = list()
+    try:
+        service = discovery.build("cloudkms", "v1", credentials=credentials, cache_discovery=False)
+
+        # list all possible locations
+        locations_list = list()
+        request = service.projects().locations().list(name=f"projects/{project_id}")
+        while request is not None:
+            response = request.execute()
+            for location in response.get("locations", []):
+                locations_list.append(location["locationId"])
+            request = service.projects().locations().list_next(previous_request=request, previous_response=response)
+
+        for location_id in locations_list:
+            request_loc = service.projects().locations().keyRings().list(parent=f"projects/{project_id}/locations/{location_id}")
+            while request_loc is not None:
+                response_loc = request_loc.execute()
+                for keyring in response_loc.get("keyRings", []):
+                    request = service.projects().locations().keyRings().cryptoKeys().list(parent=keyring["name"])
+                    while request is not None:
+                        response = request.execute()
+                        for key in response.get("cryptoKeys", []):
+                            kms_keys_list.append(key)
+
+                        request = service.projects().locations().keyRings().cryptoKeys().list_next(previous_request=request, previous_response=response)
+
+                request_loc = service.projects().locations().keyRings().list_next(previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to retrieve KMS keys for project %s", project_id)
+        logging.info(sys.exc_info())
+    return kms_keys_list
 
 
 def get_app_services(project_name: str,
                      credentials: Credentials) -> Dict[str, Any]:
-  """Retrieve a list of AppEngine instances available in the project.
+    """Retrieve a list of AppEngine instances available in the project.
 
-  Args:
-    project_name: A name of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
+    Args:
+        project_name: A name of a project to query info about.
+        credentials: An google.oauth2.credentials.Credentials object.
 
-  Returns:
-    A dict representing default apps and services available in the project.
-  """
+    Returns:
+        A dict representing default apps and services available in the project.
+    """
 
-  app_client = discovery.build(
-      "appengine", "v1", credentials=credentials, cache_discovery=False)
+    app_client = discovery.build(
+        "appengine", "v1", credentials=credentials, cache_discovery=False)
 
-  logging.info("Retrieving app services")
-  app_services = dict()
-  try:
-    request = app_client.apps().get(appsId=project_name)
-    response = request.execute()
-    if response.get("name", None) is not None:
-      app_services["default_app"] = (response["name"],
-                                     response["defaultHostname"],
-                                     response["servingStatus"])
+    logging.info("Retrieving app services")
+    app_services = dict()
+    try:
+        request = app_client.apps().get(appsId=project_name)
+        response = request.execute()
+        if response.get("name", None) is not None:
+            app_services["default_app"] = (response["name"],
+                                           response["defaultHostname"],
+                                           response["servingStatus"])
 
-    request = app_client.apps().services().list(appsId=project_name)
+        request = app_client.apps().services().list(appsId=project_name)
 
-    app_services["services"] = list()
-    while request is not None:
-      response = request.execute()
-      app_services["services"] = response.get("services", [])
-      request = app_client.apps().services().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to retrieve App services for project %s", project_name)
-    logging.info(sys.exc_info())
-  return app_services
+        app_services["services"] = list()
+        while request is not None:
+            response = request.execute()
+            app_services["services"] = response.get("services", [])
+            request = app_client.apps().services().list_next(
+                previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to retrieve App services for project %s", project_name)
+        logging.info(sys.exc_info())
+    return app_services
 
 
 def get_endpoints(project_id: str,
                   credentials: Credentials) -> List[Dict[str, Any]]:
-  """Retrieve a list of Endpoints available in the project.
+    """Retrieve a list of Endpoints available in the project.
+
+    Args:
+        project_id: A name of a project to query info about.
+        credentials: An google.oauth2.credentials.Credentials object.
+
+    Returns:
+        A list of Endpoints in the project.
+    """
+
+    logging.info("Retrieving info about endpoints")
+    endpoints_list = list()
+    try:
+        service = discovery.build(
+            "servicemanagement",
+            "v1",
+            credentials=credentials,
+            cache_discovery=False)
+
+        request = service.services().list(producerProjectId=project_id)
+        while request is not None:
+            response = request.execute()
+            endpoints_list = response.get("services", [])
+            request = service.services().list_next(
+                previous_request=request, previous_response=response)
+    except Exception:
+        logging.info("Failed to retrieve endpoints list for project %s", project_id)
+        logging.info(sys.exc_info())
+    return endpoints_list
+
+
+def get_iam_policy(project_name: str, credentials: Credentials) -> List[Dict[str, Any]]:
 
-  Args:
-    project_id: A name of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
+    """Retrieve an IAM Policy in the project.
 
-  Returns:
-    A list of Endpoints in the project.
-  """
+    Args:
+        project_name: A name of a project to query info about.
+        credentials: An google.oauth2.credentials.Credentials object.
 
-  logging.info("Retrieving info about endpoints")
-  endpoints_list = list()
-  try:
+    Returns:
+        An IAM policy enforced for the project.
+    """
+
+    logging.info("Retrieving IAM policy for %s", project_name)
+
+    # Create a Cloud Resource Manager service object
     service = discovery.build(
-        "servicemanagement",
+        "cloudresourcemanager",
         "v1",
         credentials=credentials,
         cache_discovery=False)
 
-    request = service.services().list(producerProjectId=project_id)
-    while request is not None:
-      response = request.execute()
-      endpoints_list = response.get("services", [])
-      request = service.services().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to retrieve endpoints list for project %s", project_id)
-    logging.info(sys.exc_info())
-  return endpoints_list
-
-
-def get_iam_policy(project_name: str,
-                   credentials: Credentials) -> List[Dict[str, Any]]:
-  """Retrieve an IAM Policy in the project.
-
-  Args:
-    project_name: A name of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
-
-  Returns:
-    An IAM policy enforced for the project.
-  """
-
-  logging.info("Retrieving IAM policy for %s", project_name)
-  service = discovery.build(
-      "cloudresourcemanager",
-      "v1",
-      credentials=credentials,
-      cache_discovery=False)
-
-  resource = project_name
-
-  get_policy_options = {
-      "requestedPolicyVersion": 3,
-  }
-  get_policy_options = {"options": {"requestedPolicyVersion": 3}}
-  try:
-    request = service.projects().getIamPolicy(
-        resource=resource, body=get_policy_options)
-    response = request.execute()
-  except Exception:
-    logging.info("Failed to get endpoints list for project %s", project_name)
-    logging.info(sys.exc_info())
-    return None
-
-  if response.get("bindings", None) is not None:
-    return response["bindings"]
-  else:
-    return None
-
-
-def get_associated_service_accounts(
-    iam_policy: List[Dict[str, Any]]) -> List[str]:
-  """Extract a list of unique SAs from IAM policy associated with project.
-
-  Args:
-    iam_policy: An IAM policy provided by get_iam_policy function.
-
-  Returns:
-    A list of service accounts represented as string
-  """
-
-  if not iam_policy:
-    return []
-
-  list_of_sas = list()
-  for entry in iam_policy:
-    for member in entry["members"]:
-      if "deleted:" in member:
-        continue
-      account_name = None
-      for element in member.split(":"):
-        if "@" in element:
-          account_name = element
-          break
-      if account_name and account_name not in list_of_sas:
-        list_of_sas.append(account_name)
-
-  return list_of_sas
+    resource = project_name
+
+    # Set options to retrieve a specific policy version
+    get_policy_options = {"options": {"requestedPolicyVersion": 3}}
+
+    try:
+        # Make a request to the Cloud Resource Manager API to retrieve the IAM policy
+        request = service.projects().getIamPolicy(
+            resource=resource, body=get_policy_options)
+        response = request.execute()
+    except Exception:
+        # Log an error message if the request fails
+        logging.info("Failed to get endpoints list for project %s", project_name)
+        logging.info(sys.exc_info())
+        return None
+
+    # Check if the response contains the expected bindings object
+    if response.get("bindings", None) is not None:
+        return response["bindings"]
+    else:
+        return None
+
+
+def get_associated_service_accounts(iam_policy: List[Dict[str, Any]]) -> List[str]:
+    """Extract a list of unique SAs from IAM policy associated with project.
+
+    Args:
+        iam_policy: An IAM policy provided by get_iam_policy function.
+
+    Returns:
+        A list of service accounts represented as string
+    """
+
+    if not iam_policy:  
+        return []
+
+    list_of_sas = list()  
+    for entry in iam_policy:  
+        for member in entry["members"]:  
+            if "deleted:" in member:  
+                continue
+            account_name = None  # initialize variable for account name
+            for element in member.split(":"):  
+                if "@" in element:  
+                    account_name = element
+                    break
+            if account_name and account_name not in list_of_sas:  
+                list_of_sas.append(account_name)
+
+    return list_of_sas 
 
 
 def get_service_accounts(project_name: str,
                          credentials: Credentials) -> List[Tuple[str, str]]:
-  """Retrieve a list of service accounts managed in the project.
+    """Retrieve a list of service accounts managed in the project.
 
-  Args:
-    project_name: A name of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
+    Args:
+        project_name: A name of a project to query info about.
+        credentials: An google.oauth2.credentials.Credentials object.
 
-  Returns:
-    A list of service accounts managed in the project.
-  """
+    Returns:
+        A list of service accounts managed in the project.
+    """
 
-  logging.info("Retrieving SA list %s", project_name)
-  service_accounts = []
-  service = discovery.build(
-      "iam", "v1", credentials=credentials, cache_discovery=False)
+    # Log the start of the retrieval process.
+    logging.info("Retrieving SA list %s", project_name)
 
-  name = f"projects/{project_name}"
+    service_accounts = []
 
-  try:
-    request = service.projects().serviceAccounts().list(name=name)
-    while request is not None:
-      response = request.execute()
-      service_accounts = [(service_account["email"],
-        service_account.get("description",""))
-        for service_account in response.get("accounts",[])]
+    # Create a service object for the IAM API.
+    service = discovery.build(
+        "iam", "v1", credentials=credentials, cache_discovery=False)
+
+    # Construct the name of the project to query.
+    name = f"projects/{project_name}"
+
+    try:
+        # Send a request to list the service accounts in the project.
+        request = service.projects().serviceAccounts().list(name=name)
+
+        # Keep retrieving service accounts as long as there are more to retrieve.
+        while request is not None:
+            response = request.execute()
+            # Extract the email and description of each service account and add them to the list.
+            service_accounts = [(service_account["email"],
+                                 service_account.get("description",""))
+                                for service_account in response.get("accounts",[])]
+
+            # Get the next page of results.
+            request = service.projects().serviceAccounts().list_next(
+                previous_request=request, previous_response=response)
+    except Exception:
+        # Log an error message if something goes wrong.
+        logging.info("Failed to retrieve SA list for project %s", project_name)
+        logging.info(sys.exc_info())
 
-      request = service.projects().serviceAccounts().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to retrieve SA list for project %s", project_name)
-    logging.info(sys.exc_info())
+    return service_accounts
 
-  return service_accounts
 
 
 def list_services(project_id: str, credentials: Credentials) -> List[Any]:
-  """Retrieve a list of services enabled in the project.
+    """Retrieve a list of services enabled in the project.
 
-  Args:
-    project_id: An id of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
+    Args:
+        project_id (str): An id of a project to query info about.
+        credentials (Credentials): A Google Cloud credentials object.
 
-  Returns:
-    A list of service API objects enabled in the project.
-  """
+    Returns:
+        A list of service API objects enabled in the project.
+    """
 
-  logging.info("Retrieving services list %s", project_id)
-  list_of_services = list()
-  serviceusage = discovery.build("serviceusage", "v1", credentials=credentials)
+    # Log the retrieval of services list for the given project ID
+    logging.info("Retrieving services list %s", project_id)
 
-  request = serviceusage.services().list(
-      parent="projects/" + project_id, pageSize=200, filter="state:ENABLED")
-  try:
-    while request is not None:
-      response = request.execute()
-      list_of_services.append(response.get("services", None))
+    # Create a list to hold the enabled services
+    list_of_services = list()
 
-      request = serviceusage.services().list_next(
-          previous_request=request, previous_response=response)
-  except Exception:
-    logging.info("Failed to retrieve services for project %s", project_id)
-    logging.info(sys.exc_info())
+    serviceusage = discovery.build("serviceusage", "v1", credentials=credentials)
 
-  return list_of_services
+    # Create a request to list all services enabled in the given project
+    request = serviceusage.services().list(
+        parent="projects/" + project_id,  # Specify the parent resource to list services under
+        pageSize=200,  # Specify the maximum number of services to return per page
+        filter="state:ENABLED"  # Specify the filter to return only ENABLED services
+    )
+
+    try:
+        # Loop through each page of services until all services have been retrieved
+        while request is not None:
+            response = request.execute()
+            list_of_services.append(response.get("services", None))
+
+            request = serviceusage.services().list_next(
+                previous_request=request, previous_response=response)
+
+    except Exception:
+        # Log an error message if an exception occurs while retrieving services
+        logging.info("Failed to retrieve services for project %s", project_id)
+        logging.info(sys.exc_info())
+
+    return list_of_services
 
 
 def list_sourcerepo(project_id: str, credentials: Credentials) -> List[Any]:
-  """Retrieve a list of cloud source repositories enabled in the project.
+    """Retrieve a list of cloud source repositories enabled in the project.
 
-  Args:
-    project_id: An id of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
+    Args:
+        project_id: An id of a project to query info about.
+        credentials: An google.oauth2.credentials.Credentials object.
 
-  Returns:
-    A list of cloud source repositories in the project.
-  """
+    Returns:
+        A list of cloud source repositories in the project.
+    """
 
-  logging.info("Retrieving cloud source repositories %s", project_id)
-  list_of_repos = list()
-  service = discovery.build("sourcerepo", "v1", credentials=credentials)
+    # Log a message indicating that we're retrieving repositories for the specified project.
+    logging.info("Retrieving cloud source repositories %s", project_id)
 
-  request = service.projects().repos().list(
-    name="projects/" + project_id,
-    pageSize=500
-  )
-  try:
-    while request is not None:
-      response = request.execute()
-      list_of_repos.append(response.get("repos", None))
+    list_of_repos = list()
+
+    # Build a service object for interacting with the Cloud Source Repositories API.
+    service = discovery.build("sourcerepo", "v1", credentials=credentials)
+
+    # Create a request to list the repositories in the specified project, up to 500 at a time.
+    request = service.projects().repos().list(
+        name="projects/" + project_id,
+        pageSize=500
+    )
 
-      request = service.projects().repos().list_next(
-        previous_request=request,
-        previous_response=response
-      )
-  except Exception:
-    logging.info("Failed to retrieve source repos for project %s", project_id)
-    logging.info(sys.exc_info())
+    try:
+        # Keep making requests until there are no more pages of repositories to retrieve.
+        while request is not None:
+            response = request.execute()
 
-  return list_of_repos
+            # Add the repositories from the response to the list of repositories.
+            list_of_repos.append(response.get("repos", None))
+
+            # Get the next page of repositories, if there is one.
+            request = service.projects().repos().list_next(
+                previous_request=request,
+                previous_response=response
+            )
+
+    except Exception:
+        # If an exception is raised, log a message indicating that we failed to retrieve the repositories.
+        logging.info("Failed to retrieve source repos for project %s", project_id)
+        logging.info(sys.exc_info())
+
+    return list_of_repos
 
 
 def list_dns_policies(project_id: str, credentials: Credentials) -> List[Any]:
-  """Retrieve a list of cloud DNS policies in the project.
-  Args:
-    project_id: An id of a project to query info about.
-    credentials: An google.oauth2.credentials.Credentials object.
-  Returns:
-    A list of cloud DNS policies in the project.
-  """
-
-  logging.info("Retrieving cloud DNS policies %s", project_id)
-  list_of_policies = list()
-  service = discovery.build("dns", "v1", credentials=credentials)
-
-  request = service.policies().list(
-    project=project_id,
-    maxResults=500
-  )
-  try:
-    while request is not None:
-      response = request.execute()
-      list_of_policies.append(response.get("policies", None))
-
-      request = service.policies().list_next(
-        previous_request=request,
-        previous_response=response
-      )
-  except Exception:
-    logging.info("Failed to retrieve DNS policies for project %s", project_id)
-    logging.info(sys.exc_info())
-
-  return list_of_policies
+    """
+    Retrieve a list of cloud DNS policies in the project.
+
+    Args:
+        project_id: An id of a project to query info about.
+        credentials: An google.oauth2.credentials.Credentials object.
+
+    Returns:
+        A list of cloud DNS policies in the project.
+    """
+
+    # Log that we're retrieving cloud DNS policies for the specified project
+    logging.info("Retrieving cloud DNS policies %s", project_id)
+
+    # Initialize an empty list to store the policies in
+    list_of_policies = list()
+
+    # Create a DNS service object
+    service = discovery.build("dns", "v1", credentials=credentials)
+
+    # Create a request to retrieve DNS policies for the specified project
+    request = service.policies().list(
+        project=project_id,
+        maxResults=500
+    )
+
+    try:
+        # Loop through pages of results until there are no more
+        while request is not None:
+            # Send the request and get the response
+            response = request.execute()
+
+            # Get the policies from the response and add them to the list_of_policies
+            list_of_policies.append(response.get("policies", None))
+
+            # Get the next page of results (if there are any)
+            request = service.policies().list_next(
+                previous_request=request,
+                previous_response=response
+            )
+    except Exception:
+        # Log an error if we failed to retrieve DNS policies for the specified project
+        logging.info("Failed to retrieve DNS policies for project %s", project_id)
+        logging.info(sys.exc_info())
+
+    return list_of_policies
 

From 664d4ecd3d7f3b8f477b48363d45b9fa29ff00d9 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 17:05:27 +0530
Subject: [PATCH 12/25] Update __init__.py

---
 src/gcp_scanner/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gcp_scanner/__init__.py b/src/gcp_scanner/__init__.py
index 80238dad..d3f5a12f 100644
--- a/src/gcp_scanner/__init__.py
+++ b/src/gcp_scanner/__init__.py
@@ -1 +1 @@
-#Currently, this ia an empty file.
+

From 0e9cdd73cd06dc9cccd4261e3742fed80f0372b2 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 19:54:12 +0530
Subject: [PATCH 13/25] Update __main__.py

---
 src/gcp_scanner/__main__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gcp_scanner/__main__.py b/src/gcp_scanner/__main__.py
index 330fb74e..35bea06b 100644
--- a/src/gcp_scanner/__main__.py
+++ b/src/gcp_scanner/__main__.py
@@ -21,5 +21,5 @@
 
 # Checking if the code is running as the main module
 if __name__ == '__main__':
-    # Calling the main function of the scanner module
-    scanner.main()
+  # Calling the main function of the scanner module
+  scanner.main()

From ac36b023c68dc37a91c334529ac8c0a739a37ede Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 20:52:22 +0530
Subject: [PATCH 14/25] Update arguments.py

---
 src/gcp_scanner/arguments.py | 142 ++++++++++++++++++-----------------
 1 file changed, 72 insertions(+), 70 deletions(-)

diff --git a/src/gcp_scanner/arguments.py b/src/gcp_scanner/arguments.py
index ed37efc1..09b516fd 100644
--- a/src/gcp_scanner/arguments.py
+++ b/src/gcp_scanner/arguments.py
@@ -20,9 +20,10 @@
 import argparse
 import logging
 
+
 # Define a function to create an argument parser using the argparse module
 def arg_parser():
-  """Creates an argument parser using the `argparse` module and defines
+    """Creates an argument parser using the `argparse` module and defines
   several command-line arguments.
 
   Args:
@@ -32,95 +33,98 @@ def arg_parser():
     argparse.Namespace: A namespace object containing the parsed command-line
     arguments.
   """
-  # Create a new parser object
-  parser = argparse.ArgumentParser(
-      prog='scanner.py', # program name
-      description='GCP Scanner', # description
-      usage='python3 %(prog)s -o folder_to_save_results -g -' # usage instructions
-  )
-  
-  # Define a required argument group
-  required_named = parser.add_argument_group('Required parameters')
-  # Add a required argument to the group
-  required_named.add_argument(
-      '-o', # short option name
-      '--output-dir', # long option name
-      required=True, 
-      dest='output', 
-      default='scan_db', 
-      help='Path to output directory' 
-  )
+    # Create a new parser object
+    parser = argparse.ArgumentParser(
+      prog='scanner.py',  # program name
+      description='GCP Scanner',  # description
+      usage='python3 %(prog)s -o folder_to_save_results -g -'
+    )
+
+    # Define a required argument group
+    required_named = parser.add_argument_group('Required parameters')
+    # Add a required argument to the group
+    required_named.add_argument(
+      '-o',  # short option name
+      '--output-dir',  # long option name
+      required=True,
+      dest='output',
+      default='scan_db',
+      help='Path to output directory'
+    )
 
-  # Add command line arguments to the parser object
-  parser.add_argument(
+    # Add command line arguments to the parser object
+    parser.add_argument(
       '-k',
-      '--sa-key-path',  # Option for specifying the path to the directory with SA keys
+      '--sa-key-path',
       default=None,  # Default value if option is not specified
-      dest='key_path',  # Destination variable for storing the value of the option
-      help='Path to directory with SA keys in json format'  # Help message 
-  )
-  parser.add_argument(
+      dest='key_path',
+      help='Path to directory with SA keys in json format'  # Help message
+    )
+    parser.add_argument(
       '-g',
-      '--gcloud-profile-path', 
-      default=None, 
-      dest='gcloud_profile_path', 
-      help='Path to directory with gcloud profile. Specify - to search for credentials in default gcloud config path'  
-  )
-  parser.add_argument(
+      '--gcloud-profile-path',
+      default=None,
+      dest='gcloud_profile_path',
+      help='Path to directory with gcloud profile. Specify - to search for\
+      credentials in default gcloud config path'
+    )
+    parser.add_argument(
       '-m',
-      '--use-metadata', 
-      default=False, 
-      dest='use_metadata', 
-      action='store_true',  
-      help='Extract credentials from GCE instance metadata'  
-  )
-  parser.add_argument(
+      '--use-metadata',
+      default=False,
+      dest='use_metadata',
+      action='store_true',
+      help='Extract credentials from GCE instance metadata'
+    )
+    parser.add_argument(
       '-at',
-      '--access-token-files',  
-      default=None,  
-      dest='access_token_files', 
-      help='A list of comma separated files with access token and OAuth scopes. TTL limited. A token and scopes should be stored in JSON format.' 
-  )
-  parser.add_argument(
+      '--access-token-files',
+      default=None,
+      dest='access_token_files',
+      help='A list of comma separated files with access token and OAuth scopes\
+      TTL limited. A token and scopes should be stored in JSON format.'
+    )
+    parser.add_argument(
       '-rt',
-      '--refresh-token-files',  
-      default=None, 
-      dest='refresh_token_files',  
-      help='A list of comma separated files with refresh_token, client_id, token_uri and client_secret stored in JSON format.' 
-  )
+      '--refresh-token-files',
+      default=None,
+      dest='refresh_token_files',
+      help='A list of comma separated files with refresh_token, client_id,\
+        token_uri and client_secret stored in JSON format.'
+    )
 
-  parser.add_argument(
+    parser.add_argument(
       '-s',
       '--service-account',
       default=None,
       dest='key_name',
       help='Name of individual SA to scan')
-  parser.add_argument(
+    parser.add_argument(
       '-p',
       '--project',
       default=None,
       dest='target_project',
       help='Name of individual project to scan')
-  parser.add_argument(
+    parser.add_argument(
       '-f',
       '--force-projects',
       default=None,
       dest='force_projects',
       help='Comma separated list of project names to include in the scan')
-  parser.add_argument(
+    parser.add_argument(
       '-c',
       '--config',
       default=None,
       dest='config_path',
       help='A path to config file with a set of specific resources to scan.')
-  parser.add_argument(
+    parser.add_argument(
       '-l',
       '--logging',
       default='WARNING',
       dest='log_level',
       choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'),
       help='Set logging level (INFO, WARNING, ERROR)')
-  parser.add_argument(
+    parser.add_argument(
       '-lf',
       '--log-file',
       default=None,
@@ -128,20 +132,18 @@ def arg_parser():
       help='Save logs to the path specified rather than displaying in\
  console')
 
-# Parse the command line arguments
-args: argparse.Namespace = parser.parse_args()
-
-# Check if none of the necessary options are selected
-if not args.key_path and not args.gcloud_profile_path \
-    and not args.use_metadata and not args.access_token_files\
-    and not args.refresh_token_files:
+    # Parse the command line arguments
+    args: argparse.Namespace = parser.parse_args()
 
-    # If none of the options are selected, log an error message
-    logging.error(
-        'Please select at least one option to begin scan\
- -k/--sa-key-path,-g/--gcloud-profile-path, -m, -rt, -at'
-    )
+    # Check if none of the necessary options are selected
+    if not args.key_path and not args.gcloud_profile_path \
+        and not args.use_metadata and not args.access_token_files\
+            and not args.refresh_token_files:
 
-# Return the parsed command line arguments
-return args
+        # If none of the options are selected, log an error message
+        logging.error(
+            'Please select at least one option to begin scan\
+    -k/--sa-key-path,-g/--gcloud-profile-path, -m, -rt, -at')
 
+    # Return the parsed command line arguments
+    return args

From 8ffc0aa8fdfea874f2dfd991d036c6926c981494 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 22:40:01 +0530
Subject: [PATCH 15/25] Update crawl.py

---
 src/gcp_scanner/crawl.py | 375 ++++++++++++++++++++++++---------------
 1 file changed, 236 insertions(+), 139 deletions(-)

diff --git a/src/gcp_scanner/crawl.py b/src/gcp_scanner/crawl.py
index f01369f4..ffd3f3ab 100644
--- a/src/gcp_scanner/crawl.py
+++ b/src/gcp_scanner/crawl.py
@@ -34,6 +34,7 @@
 
 import collections
 
+
 def infinite_defaultdict():
     """Initialize infinite default.
 
@@ -43,7 +44,6 @@ def infinite_defaultdict():
     return collections.defaultdict(infinite_defaultdict)
 
 
-
 def fetch_project_info(project_name: str,
                        credentials: Credentials) -> Dict[str, Any]:
     """Retrieve information about specific project.
@@ -69,14 +69,13 @@ def fetch_project_info(project_name: str,
         if "projectNumber" in response:
             project_info = response
 
-    except Exception:
+    except ImportError:
         logging.info("Failed to enumerate projects")
         logging.info(sys.exc_info())
 
     return project_info
 
 
-
 def get_project_list(credentials: Credentials) -> List[Dict[str, Any]]:
     """Retrieve a list of projects accessible by credentials provided.
 
@@ -100,14 +99,15 @@ def get_project_list(credentials: Credentials) -> List[Dict[str, Any]]:
             project_list = response.get("projects", [])
             request = service.projects().list_next(
                 previous_request=request, previous_response=response)
-    except Exception:
+    except ImportError:
         logging.info("Failed to enumerate projects")
         logging.info(sys.exc_info())
     return project_list
 
 
 def get_compute_instances_names(
-    project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
+     project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
+
     """Retrieve a list of Compute VMs available in the project.
 
     Args:
@@ -125,16 +125,22 @@ def get_compute_instances_names(
             response = request.execute()
             if response.get("items", None) is not None:
                 images_result = [instance
-                                 for _, instances_scoped_list in response["items"].items()
-                                 for instance in instances_scoped_list.get("instances", [])]
+                                 for _,
+                                 instances_scoped_list in response[
+                                  "items"].items()
+                                 for instance in instances_scoped_list.get(
+                                  "instances", [])]
             request = service.instances().aggregatedList_next(
                 previous_request=request, previous_response=response)
-    except Exception:
-        logging.info("Failed to enumerate compute instances in the %s", project_name)
+    except ImportError:
+        logging.info(
+            "Failed to enumerate compute instances in the %s", project_name)
         logging.info(sys.exc_info())
     return images_result
 
-def get_compute_images_names(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
+
+def get_compute_images_names(
+     project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
     """Retrieve a list of Compute images available in the project.
 
     Args:
@@ -152,14 +158,17 @@ def get_compute_images_names(project_name: str, service: discovery.Resource) ->
         while request is not None:
             response = request.execute()
             images_result = response.get("items", [])
-            request = service.images().list_next(previous_request=request, previous_response=response)
-    except Exception:
-        logging.info("Failed to enumerate compute images in the %s", project_name)
+            request = service.images().list_next(
+                previous_request=request, previous_response=response)
+    except ImportError:
+        logging.info(
+            "Failed to enumerate compute images in the %s", project_name)
         logging.info(sys.exc_info())
     return images_result
 
 
-def get_machine_images(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
+def get_machine_images(project_name: str, service: discovery.Resource) -> List[
+                       Dict[str, Any]]:
     """Retrieve a list of Machine Images Resources available in the project.
 
     Args:
@@ -177,14 +186,17 @@ def get_machine_images(project_name: str, service: discovery.Resource) -> List[D
         while request is not None:
             response = request.execute()
             machine_images_list = response.get("items", [])
-            request = service.machineImages().list_next(previous_request=request, previous_response=response)
-    except Exception:
-        logging.info("Failed to enumerate machine images in the %s", project_name)
+            request = service.machineImages().list_next(
+                previous_request=request, previous_response=response)
+    except ImportError:
+        logging.info(
+            "Failed to enumerate machine images in the %s", project_name)
         logging.info(sys.exc_info())
     return machine_images_list
 
 
-def get_compute_disks_names(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
+def get_compute_disks_names(
+     project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
     """Retrieve a list of Compute disks available in the project.
 
     Args:
@@ -208,14 +220,16 @@ def get_compute_disks_names(project_name: str, service: discovery.Resource) -> L
                 ]
             request = service.disks().aggregatedList_next(
                 previous_request=request, previous_response=response)
-    except Exception:
-        logging.info("Failed to enumerate compute disks in the %s", project_name)
+    except ImportError:
+        logging.info(
+            "Failed to enumerate compute disks in the %s", project_name)
         logging.info(sys.exc_info())
 
     return disk_names_list
 
 
-def get_static_ips(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
+def get_static_ips(
+     project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
     """Retrieve a list of static IPs available in the project.
 
     Args:
@@ -240,14 +254,15 @@ def get_static_ips(project_name: str, service: discovery.Resource) -> List[Dict[
             ]
             request = service.addresses().aggregatedList_next(
                 previous_request=request, previous_response=response)
-    except Exception:
+    except ImportError:
         logging.info("Failed to get static IPs in the %s", project_name)
         logging.info(sys.exc_info())
 
     return ips_list
 
 
-def get_compute_snapshots(project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
+def get_compute_snapshots(
+     project_name: str, service: discovery.Resource) -> List[Dict[str, Any]]:
     """Retrieve a list of Compute snapshots available in the project.
 
     Args:
@@ -264,15 +279,18 @@ def get_compute_snapshots(project_name: str, service: discovery.Resource) -> Lis
         while request is not None:
             response = request.execute()
             snapshots_list = response.get("items", [])
-            request = service.snapshots().list_next(previous_request=request, previous_response=response)
-    except Exception:
+            request = service.snapshots().list_next(
+                previous_request=request, previous_response=response)
+    except ImportError:
         logging.info("Failed to get compute snapshots in the %s", project_name)
         logging.info(sys.exc_info())
 
     return snapshots_list
 
 
-def get_subnets(project_name: str, compute_client: discovery.Resource) -> List[Dict[str, Any]]:
+def get_subnets(
+     project_name: str,
+     compute_client: discovery.Resource) -> List[Dict[str, Any]]:
     """Retrieve a list of subnets available in the project.
 
     Args:
@@ -285,20 +303,24 @@ def get_subnets(project_name: str, compute_client: discovery.Resource) -> List[D
     logging.info("Retrieving Subnets")
     subnets_list = list()
     try:
-        request = compute_client.subnetworks().aggregatedList(project=project_name)
+        request = compute_client.subnetworks().aggregatedList(
+            project=project_name)
         while request is not None:
             response = request.execute()
             if response.get("items", None) is not None:
                 subnets_list = list(response["items"].items())
-            request = compute_client.subnetworks().aggregatedList_next(previous_request=request, previous_response=response)
-    except Exception:
+            request = compute_client.subnetworks().aggregatedList_next(
+                previous_request=request, previous_response=response)
+    except ImportError:
         logging.info("Failed to get subnets in the %s", project_name)
         logging.info(sys.exc_info())
 
     return subnets_list
 
 
-def get_firewall_rules(project_name: str, compute_client: discovery.Resource) -> List[Dict[str, Any]]:
+def get_firewall_rules(
+        project_name: str,
+        compute_client: discovery.Resource) -> List[Dict[str, Any]]:
     """Retrieve a list of firewall rules in the project.
 
     Args:
@@ -314,15 +336,19 @@ def get_firewall_rules(project_name: str, compute_client: discovery.Resource) ->
         request = compute_client.firewalls().list(project=project_name)
         while request is not None:
             response = request.execute()
-            firewall_rules_list = [(firewall["name"],) for firewall in response.get("items", [])]
-            request = compute_client.firewalls().list_next(previous_request=request, previous_response=response)
-    except Exception:
+            firewall_rules_list = [(
+                firewall["name"],) for firewall in response.get("items", [])]
+            request = compute_client.firewalls().list_next(
+                previous_request=request, previous_response=response)
+    except ImportError:
         logging.info("Failed to get firewall rules in the %s", project_name)
         logging.info(sys.exc_info())
     return firewall_rules_list
 
+
 def get_bucket_names(project_name: str, credentials: Credentials,
-                     dump_fd: io.TextIOWrapper) -> Dict[str, Tuple[Any, List[Any]]]:
+                     dump_fd: io.TextIOWrapper) -> Dict[str,
+                                                        Tuple[Any, List[Any]]]:
     """Retrieve a list of buckets available in the project.
 
     Args:
@@ -338,8 +364,8 @@ def get_bucket_names(project_name: str, credentials: Credentials,
 
     logging.info("Retrieving GCS Buckets")
     buckets_dict = dict()
-    service = discovery.build("storage", "v1", credentials=credentials, cache_discovery=False)
-
+    service = discovery.build("storage", "v1", credentials=credentials,
+                              cache_discovery=False)
     # Make an authenticated API request
     request = service.buckets().list(project=project_name)
     while request is not None:
@@ -353,26 +379,34 @@ def get_bucket_names(project_name: str, credentials: Credentials,
         for bucket in response.get("items", []):
             buckets_dict[bucket["name"]] = (bucket, None)
             if dump_fd is not None:
-                ret_fields = "nextPageToken,items(name,size,contentType,timeCreated)"
-                req = service.objects().list(bucket=bucket["name"], fields=ret_fields)
+                ret_fields = (
+                    "nextPageToken,"
+                    "items(name,size,contentType,timeCreated)"
+                )
+                req = service.objects().list(
+                    bucket=bucket["name"], fields=ret_fields)
 
                 while req:
                     try:
                         resp = req.execute()
                         for item in resp.get("items", []):
-                            dump_fd.write(json.dumps(item, indent=2, sort_keys=False))
+                            dump_fd.write(json.dumps(
+                                item, indent=2, sort_keys=False))
                         req = service.objects().list_next(req, resp)
                     except googleapiclient.errors.HttpError:
-                        logging.info("Failed to read the bucket %s", bucket["name"])
+                        logging.info(
+                            "Failed to read the bucket %s", bucket["name"])
                         logging.info(sys.exc_info())
                         break
 
-        request = service.buckets().list_next(previous_request=request, previous_response=response)
+        request = service.buckets().list_next(
+            previous_request=request, previous_response=response)
 
     return buckets_dict
 
 
-def get_managed_zones(project_name: str, credentials: Credentials) -> List[Dict[str, Any]]:
+def get_managed_zones(
+        project_name: str, credentials: Credentials) -> List[Dict[str, Any]]:
     """Retrieve a list of DNS zones available in the project.
 
     Args:
@@ -387,22 +421,26 @@ def get_managed_zones(project_name: str, credentials: Credentials) -> List[Dict[
     zones_list = list()
 
     try:
-        service = discovery.build("dns", "v1", credentials=credentials, cache_discovery=False)
+        service = discovery.build(
+            "dns", "v1", credentials=credentials, cache_discovery=False)
 
         request = service.managedZones().list(project=project_name)
         while request is not None:
             response = request.execute()
-            zones_list = response.get("managedZones",[])
-            request = service.managedZones().list_next(previous_request=request, previous_response=response)
-    except Exception:
-        logging.info("Failed to enumerate DNS zones for project %s", project_name)
+            zones_list = response.get("managedZones", [])
+            request = service.managedZones().list_next(
+                previous_request=request, previous_response=response)
+    except ImportError:
+        logging.info(
+            "Failed to enumerate DNS zones for project %s", project_name)
         logging.info(sys.exc_info())
 
     return zones_list
 
 
 def get_gke_clusters(
-    project_name: str, gke_client: container_v1.services.cluster_manager.client.ClusterManagerClient
+ project_name: str,
+ gke_client: container_v1.services.cluster_manager.client.ClusterManagerClient
 ) -> List[Tuple[str, str]]:
     """Retrieve a list of GKE clusters available in the project.
 
@@ -418,9 +456,12 @@ def get_gke_clusters(
     parent = f"projects/{project_name}/locations/-"
     try:
         clusters = gke_client.list_clusters(parent=parent)
-        return [(cluster.name, cluster.description) for cluster in clusters.clusters]
-    except Exception:
-        logging.info("Failed to retrieve cluster list for project %s", project_name)
+        return [(
+            cluster.name,
+            cluster.description) for cluster in clusters.clusters]
+    except ImportError:
+        logging.info(
+            "Failed to retrieve cluster list for project %s", project_name)
         logging.info(sys.exc_info())
         return []
 
@@ -430,7 +471,8 @@ def get_gke_images(project_name: str, access_token: str) -> Dict[str, Any]:
 
     Args:
         project_name: A name of a project to query info about.
-        access_token: An Oauth2 token with permissions to query list of gke images.
+        access_token: An Oauth2 token with permissions\
+              to query list of gke images.
 
     Returns:
         A gke images JSON object for each accessible zone.
@@ -446,18 +488,21 @@ def get_gke_images(project_name: str, access_token: str) -> Dict[str, Any]:
             res = requests.get(
                 gcr_url, auth=HTTPBasicAuth("oauth2accesstoken", access_token))
             if not res.ok:
-                logging.info("Failed to retrieve gcr images list. Status code: %d",
-                             res.status_code)
+                logging.info(
+                    "Failed to retrieve gcr images list. Status code: %d",
+                    res.status_code)
                 continue
             images[region.replace(".", "")] = res.json()
-        except Exception:
-            logging.info("Failed to retrieve gke images for project %s", project_name)
+        except ImportError:
+            logging.info(
+                "Failed to retrieve gke images for project %s", project_name)
             logging.info(sys.exc_info())
 
     return images
 
 
-def get_sql_instances(project_name: str, credentials: Credentials) -> List[Dict[str, Any]]:
+def get_sql_instances(
+        project_name: str, credentials: Credentials) -> List[Dict[str, Any]]:
     """Retrieve a list of SQL instances available in the project.
 
     Args:
@@ -472,7 +517,8 @@ def get_sql_instances(project_name: str, credentials: Credentials) -> List[Dict[
     sql_instances_list = list()
     try:
         service = discovery.build(
-            "sqladmin", "v1beta4", credentials=credentials, cache_discovery=False)
+            "sqladmin", "v1beta4",
+            credentials=credentials, cache_discovery=False)
 
         request = service.instances().list(project=project_name)
         while request is not None:
@@ -480,14 +526,16 @@ def get_sql_instances(project_name: str, credentials: Credentials) -> List[Dict[
             sql_instances_list = response.get("items", [])
             request = service.instances().list_next(
                 previous_request=request, previous_response=response)
-    except Exception:
-        logging.info("Failed to get SQL instances for project %s", project_name)
+    except ImportError:
+        logging.info(
+            "Failed to get SQL instances for project %s", project_name)
         logging.info(sys.exc_info())
 
     return sql_instances_list
 
 
-def get_bq_tables(project_id: str, dataset_id: str, bq_service: discovery.Resource) -> List[Dict[str, Any]]:
+def get_bq_tables(project_id: str, dataset_id: str,
+                  bq_service: discovery.Resource) -> List[Dict[str, Any]]:
     """Retrieve a list of BigQuery tables available in the dataset.
 
     Args:
@@ -502,18 +550,22 @@ def get_bq_tables(project_id: str, dataset_id: str, bq_service: discovery.Resour
     logging.info("Retrieving BigQuery Tables for dataset %s", dataset_id)
     list_of_tables = list()
     try:
-        request = bq_service.tables().list(projectId=project_id, datasetId=dataset_id)
+        request = bq_service.tables().list(
+            projectId=project_id, datasetId=dataset_id)
         while request is not None:
             response = request.execute()
             list_of_tables = response.get("tables", [])
-            request = bq_service.tables().list_next(previous_request=request, previous_response=response)
-    except Exception:
+            request = bq_service.tables().list_next(
+                previous_request=request, previous_response=response)
+    except ImportError:
         logging.info("Failed to retrieve BQ tables for dataset %s", dataset_id)
         logging.info(sys.exc_info())
     return list_of_tables
 
 
-def get_bq(project_id: str, credentials: Credentials) -> Dict[str, List[Dict[str, Any]]]:
+def get_bq(
+        project_id: str,
+        credentials: Credentials) -> Dict[str, List[Dict[str, Any]]]:
     """Retrieve a list of BigQuery datasets available in the project.
 
     Args:
@@ -527,7 +579,8 @@ def get_bq(project_id: str, credentials: Credentials) -> Dict[str, List[Dict[str
     logging.info("Retrieving BigQuery Datasets")
     bq_datasets = dict()
     try:
-        service = discovery.build("bigquery", "v2", credentials=credentials, cache_discovery=False)
+        service = discovery.build(
+            "bigquery", "v2", credentials=credentials, cache_discovery=False)
 
         request = service.datasets().list(projectId=project_id)
         while request is not None:
@@ -535,17 +588,21 @@ def get_bq(project_id: str, credentials: Credentials) -> Dict[str, List[Dict[str
 
             for dataset in response.get("datasets", []):
                 dataset_id = dataset["datasetReference"]["datasetId"]
-                bq_datasets[dataset_id] = get_bq_tables(project_id, dataset_id, service)
+                bq_datasets[dataset_id] = get_bq_tables(
+                    project_id, dataset_id, service)
 
-            request = service.datasets().list_next(previous_request=request, previous_response=response)
-    except Exception:
-        logging.info("Failed to retrieve BQ datasets for project %s", project_id)
+            request = service.datasets().list_next(
+                previous_request=request, previous_response=response)
+    except ImportError:
+        logging.info(
+            "Failed to retrieve BQ datasets for project %s", project_id)
         logging.info(sys.exc_info())
 
     return bq_datasets
 
 
-def get_pubsub_subscriptions(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]:
+def get_pubsub_subscriptions(
+        project_id: str, credentials: Credentials) -> List[Dict[str, Any]]:
     """Retrieve a list of PubSub subscriptions available in the project.
 
     Args:
@@ -559,21 +616,25 @@ def get_pubsub_subscriptions(project_id: str, credentials: Credentials) -> List[
     logging.info("Retrieving PubSub Subscriptions")
     pubsubs_list = list()
     try:
-        service = discovery.build("pubsub", "v1", credentials=credentials, cache_discovery=False)
+        service = discovery.build(
+            "pubsub", "v1", credentials=credentials, cache_discovery=False)
 
-        request = service.projects().subscriptions().list(project=f"projects/{project_id}")
+        request = service.projects().subscriptions().list(
+            project=f"projects/{project_id}")
         while request is not None:
             response = request.execute()
             pubsubs_list = response.get("subscriptions", [])
-            request = service.projects().subscriptions().list_next(previous_request=request, previous_response=response)
-    except Exception:
+            request = service.projects().subscriptions().list_next(
+                previous_request=request, previous_response=response)
+    except ImportError:
         logging.info("Failed to get PubSubs for project %s", project_id)
         logging.info(sys.exc_info())
 
     return pubsubs_list
 
 
-def get_cloudfunctions(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]:
+def get_cloudfunctions(
+        project_id: str, credentials: Credentials) -> List[Dict[str, Any]]:
     """Retrieve a list of CloudFunctions available in the project.
 
     Args:
@@ -586,15 +647,19 @@ def get_cloudfunctions(project_id: str, credentials: Credentials) -> List[Dict[s
 
     logging.info("Retrieving CloudFunctions")
     functions_list = list()
-    service = discovery.build("cloudfunctions", "v1", credentials=credentials, cache_discovery=False)
+    service = discovery.build(
+        "cloudfunctions", "v1", credentials=credentials, cache_discovery=False)
     try:
-        request = service.projects().locations().functions().list(parent=f"projects/{project_id}/locations/-")
+        request = service.projects().locations().functions().list(
+            parent=f"projects/{project_id}/locations/-")
         while request is not None:
             response = request.execute()
             functions_list = response.get("functions", [])
-            request = service.projects().locations().functions().list_next(previous_request=request, previous_response=response)
-    except Exception:
-        logging.info("Failed to retrieve CloudFunctions for project %s", project_id)
+            request = service.projects().locations().functions().list_next(
+                previous_request=request, previous_response=response)
+    except ImportError:
+        logging.info(
+            "Failed to retrieve CloudFunctions for project %s", project_id)
         logging.info(sys.exc_info())
 
     return functions_list
@@ -616,7 +681,7 @@ def get_bigtable_instances(project_id: str,
     bigtable_instances_list = list()
     try:
         service = discovery.build(
-            "bigtableadmin", "v2", credentials=credentials, cache_discovery=False)
+         "bigtableadmin", "v2", credentials=credentials, cache_discovery=False)
 
         request = service.projects().instances().list(
             parent=f"projects/{project_id}")
@@ -625,7 +690,7 @@ def get_bigtable_instances(project_id: str,
             bigtable_instances_list = response.get("instances", [])
             request = service.projects().instances().list_next(
                 previous_request=request, previous_response=response)
-    except Exception:
+    except ImportError:
         logging.info("Failed to retrieve BigTable instances for project %s",
                      project_id)
         logging.info(sys.exc_info())
@@ -657,7 +722,7 @@ def get_spanner_instances(project_id: str,
             spanner_instances_list = response.get("instances", [])
             request = service.projects().instances().list_next(
                 previous_request=request, previous_response=response)
-    except Exception:
+    except ImportError:
         logging.info("Failed to retrieve Spanner instances for project %s",
                      project_id)
         logging.info(sys.exc_info())
@@ -688,13 +753,15 @@ def get_filestore_instances(project_id: str,
             filestore_instances_list = response.get("instances", [])
             request = service.projects().locations().instances().list_next(
                 previous_request=request, previous_response=response)
-    except Exception:
-        logging.info("Failed to get filestore instances for project %s", project_id)
+    except ImportError:
+        logging.info(
+            "Failed to get filestore instances for project %s", project_id)
         logging.info(sys.exc_info())
     return filestore_instances_list
 
 
-def get_kms_keys(project_id: str, credentials: Credentials) -> List[Dict[str, Any]]:
+def get_kms_keys(
+        project_id: str, credentials: Credentials) -> List[Dict[str, Any]]:
     """Retrieve a list of KMS keys available in the project.
 
     Args:
@@ -708,32 +775,42 @@ def get_kms_keys(project_id: str, credentials: Credentials) -> List[Dict[str, An
     logging.info("Retrieving KMS keys")
     kms_keys_list = list()
     try:
-        service = discovery.build("cloudkms", "v1", credentials=credentials, cache_discovery=False)
+        service = discovery.build(
+            "cloudkms", "v1", credentials=credentials, cache_discovery=False)
 
         # list all possible locations
         locations_list = list()
-        request = service.projects().locations().list(name=f"projects/{project_id}")
+        request = service.projects().locations().list(
+            name=f"projects/{project_id}")
         while request is not None:
             response = request.execute()
             for location in response.get("locations", []):
                 locations_list.append(location["locationId"])
-            request = service.projects().locations().list_next(previous_request=request, previous_response=response)
+            request = service.projects().locations().list_next(
+                previous_request=request, previous_response=response)
 
         for location_id in locations_list:
-            request_loc = service.projects().locations().keyRings().list(parent=f"projects/{project_id}/locations/{location_id}")
+            request_loc = service.projects().locations().keyRings().list(
+                parent=f"projects/{project_id}/locations/{location_id}")
             while request_loc is not None:
                 response_loc = request_loc.execute()
                 for keyring in response_loc.get("keyRings", []):
-                    request = service.projects().locations().keyRings().cryptoKeys().list(parent=keyring["name"])
+                    request = service.projects().locations(
+                    ).keyRings().cryptoKeys().list(parent=keyring["name"])
                     while request is not None:
                         response = request.execute()
                         for key in response.get("cryptoKeys", []):
                             kms_keys_list.append(key)
 
-                        request = service.projects().locations().keyRings().cryptoKeys().list_next(previous_request=request, previous_response=response)
+                        request = service.projects().locations().keyRings(
+                        ).cryptoKeys().list_next(
+                            previous_request=request,
+                            previous_response=response)
 
-                request_loc = service.projects().locations().keyRings().list_next(previous_request=request, previous_response=response)
-    except Exception:
+                request_loc = service.projects(
+                ).locations().keyRings().list_next(
+                    previous_request=request, previous_response=response)
+    except ImportError:
         logging.info("Failed to retrieve KMS keys for project %s", project_id)
         logging.info(sys.exc_info())
     return kms_keys_list
@@ -772,8 +849,9 @@ def get_app_services(project_name: str,
             app_services["services"] = response.get("services", [])
             request = app_client.apps().services().list_next(
                 previous_request=request, previous_response=response)
-    except Exception:
-        logging.info("Failed to retrieve App services for project %s", project_name)
+    except ImportError:
+        logging.info(
+            "Failed to retrieve App services for project %s", project_name)
         logging.info(sys.exc_info())
     return app_services
 
@@ -805,13 +883,15 @@ def get_endpoints(project_id: str,
             endpoints_list = response.get("services", [])
             request = service.services().list_next(
                 previous_request=request, previous_response=response)
-    except Exception:
-        logging.info("Failed to retrieve endpoints list for project %s", project_id)
+    except ImportError:
+        logging.info(
+            "Failed to retrieve endpoints list for project %s", project_id)
         logging.info(sys.exc_info())
     return endpoints_list
 
 
-def get_iam_policy(project_name: str, credentials: Credentials) -> List[Dict[str, Any]]:
+def get_iam_policy(
+        project_name: str, credentials: Credentials) -> List[Dict[str, Any]]:
 
     """Retrieve an IAM Policy in the project.
 
@@ -838,13 +918,15 @@ def get_iam_policy(project_name: str, credentials: Credentials) -> List[Dict[str
     get_policy_options = {"options": {"requestedPolicyVersion": 3}}
 
     try:
-        # Make a request to the Cloud Resource Manager API to retrieve the IAM policy
+        # Make a request to the Cloud Resource Manager\
+        # API to retrieve the IAM policy
         request = service.projects().getIamPolicy(
             resource=resource, body=get_policy_options)
         response = request.execute()
-    except Exception:
+    except ImportError:
         # Log an error message if the request fails
-        logging.info("Failed to get endpoints list for project %s", project_name)
+        logging.info(
+            "Failed to get endpoints list for project %s", project_name)
         logging.info(sys.exc_info())
         return None
 
@@ -855,7 +937,8 @@ def get_iam_policy(project_name: str, credentials: Credentials) -> List[Dict[str
         return None
 
 
-def get_associated_service_accounts(iam_policy: List[Dict[str, Any]]) -> List[str]:
+def get_associated_service_accounts(
+        iam_policy: List[Dict[str, Any]]) -> List[str]:
     """Extract a list of unique SAs from IAM policy associated with project.
 
     Args:
@@ -865,23 +948,23 @@ def get_associated_service_accounts(iam_policy: List[Dict[str, Any]]) -> List[st
         A list of service accounts represented as string
     """
 
-    if not iam_policy:  
+    if not iam_policy:
         return []
 
-    list_of_sas = list()  
-    for entry in iam_policy:  
-        for member in entry["members"]:  
-            if "deleted:" in member:  
+    list_of_sas = list()
+    for entry in iam_policy:
+        for member in entry["members"]:
+            if "deleted:" in member:
                 continue
             account_name = None  # initialize variable for account name
-            for element in member.split(":"):  
-                if "@" in element:  
+            for element in member.split(":"):
+                if "@" in element:
                     account_name = element
                     break
-            if account_name and account_name not in list_of_sas:  
+            if account_name and account_name not in list_of_sas:
                 list_of_sas.append(account_name)
 
-    return list_of_sas 
+    return list_of_sas
 
 
 def get_service_accounts(project_name: str,
@@ -912,18 +995,21 @@ def get_service_accounts(project_name: str,
         # Send a request to list the service accounts in the project.
         request = service.projects().serviceAccounts().list(name=name)
 
-        # Keep retrieving service accounts as long as there are more to retrieve.
+        # Keep retrieving service accounts as
+        # long as there are more to retrieve.
         while request is not None:
             response = request.execute()
-            # Extract the email and description of each service account and add them to the list.
+            # Extract the email and description of
+            # each service account and add them to the list.
             service_accounts = [(service_account["email"],
-                                 service_account.get("description",""))
-                                for service_account in response.get("accounts",[])]
+                                 service_account.get("description", ""))
+                                for service_account in response.get(
+                "accounts", [])]
 
             # Get the next page of results.
             request = service.projects().serviceAccounts().list_next(
                 previous_request=request, previous_response=response)
-    except Exception:
+    except ImportError:
         # Log an error message if something goes wrong.
         logging.info("Failed to retrieve SA list for project %s", project_name)
         logging.info(sys.exc_info())
@@ -931,8 +1017,8 @@ def get_service_accounts(project_name: str,
     return service_accounts
 
 
-
-def list_services(project_id: str, credentials: Credentials) -> List[Any]:
+def list_services(
+        project_id: str, credentials: Credentials) -> List[Any]:
     """Retrieve a list of services enabled in the project.
 
     Args:
@@ -949,17 +1035,19 @@ def list_services(project_id: str, credentials: Credentials) -> List[Any]:
     # Create a list to hold the enabled services
     list_of_services = list()
 
-    serviceusage = discovery.build("serviceusage", "v1", credentials=credentials)
+    serviceusage = discovery.build(
+        "serviceusage", "v1", credentials=credentials)
 
     # Create a request to list all services enabled in the given project
     request = serviceusage.services().list(
-        parent="projects/" + project_id,  # Specify the parent resource to list services under
-        pageSize=200,  # Specify the maximum number of services to return per page
-        filter="state:ENABLED"  # Specify the filter to return only ENABLED services
+        parent="projects/" + project_id,
+        pageSize=200,
+        filter="state:ENABLED"
     )
 
     try:
-        # Loop through each page of services until all services have been retrieved
+        # Loop through each page of services
+        #  until all services have been retrieved
         while request is not None:
             response = request.execute()
             list_of_services.append(response.get("services", None))
@@ -967,7 +1055,7 @@ def list_services(project_id: str, credentials: Credentials) -> List[Any]:
             request = serviceusage.services().list_next(
                 previous_request=request, previous_response=response)
 
-    except Exception:
+    except ImportError:
         # Log an error message if an exception occurs while retrieving services
         logging.info("Failed to retrieve services for project %s", project_id)
         logging.info(sys.exc_info())
@@ -986,26 +1074,31 @@ def list_sourcerepo(project_id: str, credentials: Credentials) -> List[Any]:
         A list of cloud source repositories in the project.
     """
 
-    # Log a message indicating that we're retrieving repositories for the specified project.
+    # Log a message indicating that
+    # we're retrieving repositories for the specified project.
     logging.info("Retrieving cloud source repositories %s", project_id)
 
     list_of_repos = list()
 
-    # Build a service object for interacting with the Cloud Source Repositories API.
+    # Build a service object for
+    # interacting with the Cloud Source Repositories API.
     service = discovery.build("sourcerepo", "v1", credentials=credentials)
 
-    # Create a request to list the repositories in the specified project, up to 500 at a time.
+    # Create a request to list the repositories
+    # in the specified project, up to 500 at a time.
     request = service.projects().repos().list(
         name="projects/" + project_id,
         pageSize=500
     )
 
     try:
-        # Keep making requests until there are no more pages of repositories to retrieve.
+        # Keep making requests until there
+        # are no more pages of repositories to retrieve.
         while request is not None:
             response = request.execute()
 
-            # Add the repositories from the response to the list of repositories.
+            # Add the repositories from the
+            # response to the list of repositories.
             list_of_repos.append(response.get("repos", None))
 
             # Get the next page of repositories, if there is one.
@@ -1014,9 +1107,11 @@ def list_sourcerepo(project_id: str, credentials: Credentials) -> List[Any]:
                 previous_response=response
             )
 
-    except Exception:
-        # If an exception is raised, log a message indicating that we failed to retrieve the repositories.
-        logging.info("Failed to retrieve source repos for project %s", project_id)
+    except ImportError:
+        # If an exception is raised, log a message
+        # indicating that we failed to retrieve the repositories.
+        logging.info(
+            "Failed to retrieve source repos for project %s", project_id)
         logging.info(sys.exc_info())
 
     return list_of_repos
@@ -1055,7 +1150,8 @@ def list_dns_policies(project_id: str, credentials: Credentials) -> List[Any]:
             # Send the request and get the response
             response = request.execute()
 
-            # Get the policies from the response and add them to the list_of_policies
+            # Get the policies from the response
+            # and add them to the list_of_policies
             list_of_policies.append(response.get("policies", None))
 
             # Get the next page of results (if there are any)
@@ -1063,10 +1159,11 @@ def list_dns_policies(project_id: str, credentials: Credentials) -> List[Any]:
                 previous_request=request,
                 previous_response=response
             )
-    except Exception:
-        # Log an error if we failed to retrieve DNS policies for the specified project
-        logging.info("Failed to retrieve DNS policies for project %s", project_id)
+    except ImportError:
+        # Log an error if we failed to retrieve
+        # DNS policies for the specified project
+        logging.info(
+            "Failed to retrieve DNS policies for project %s", project_id)
         logging.info(sys.exc_info())
 
     return list_of_policies
-

From f8ae001578a1b3bb48795de670770b2d2ce74563 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 23:11:16 +0530
Subject: [PATCH 16/25] Update credsdb.py

---
 src/gcp_scanner/credsdb.py | 169 ++++++++++++++++++++++---------------
 1 file changed, 99 insertions(+), 70 deletions(-)

diff --git a/src/gcp_scanner/credsdb.py b/src/gcp_scanner/credsdb.py
index 1a7334ce..8b8094d7 100644
--- a/src/gcp_scanner/credsdb.py
+++ b/src/gcp_scanner/credsdb.py
@@ -40,32 +40,32 @@ def credentials_from_token(access_token: str, refresh_token: Optional[str],
                            token_uri: Optional[str], client_id: Optional[str],
                            client_secret: Optional[str],
                            scopes_user: Optional[str]) -> Credentials:
-  """
-  Create Credentials instance from tokens
-  """
-  return credentials.Credentials(
-    access_token,
-    refresh_token=refresh_token,
-    token_uri=token_uri,
-    client_id=client_id,
-    client_secret=client_secret,
-    scopes=scopes_user)
+    """
+    Create Credentials instance from tokens
+    """
+    return credentials.Credentials(access_token, refresh_token=refresh_token,
+                                   token_uri=token_uri, client_id=client_id,
+                                   client_secret=client_secret,
+                                   scopes=scopes_user)
 
 
 def get_creds_from_file(file_path: str) -> Tuple[str, Credentials]:
-  """
-  Retrieve Credentials instance from a service account json file.
-  """
-  logging.info("Retrieving credentials from %s", file_path)
-  creds = service_account.Credentials.from_service_account_file(file_path)
-  return creds.service_account_email, creds
+    """
+    Retrieve Credentials instance from a service account json file.
+    """
+
+    logging.info("Retrieving credentials from %s", file_path)
+    creds = service_account.Credentials.from_service_account_file(file_path)
+    return creds.service_account_email, creds
 
 
 def get_creds_from_json(parsed_keyfile: Mapping[str, str]) -> Credentials:
-  """
-  Retrieve Credentials instance from parsed service account info.
-  """
-  return service_account.Credentials.from_service_account_info(parsed_keyfile)
+    """
+    Retrieve Credentials instance from parsed service account info.
+    """
+
+    return service_account.Credentials.from_service_account_info(
+        parsed_keyfile)
 
 
 def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]:
@@ -73,16 +73,22 @@ def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]:
 
     Returns:
         Tuple[Optional[str], Optional[Credentials]]:
-            A tuple containing the email associated with the credentials and the constructed credentials.
+            A tuple containing the email associated with the
+            credentials and the constructed credentials.
     """
 
-    # Print a message to indicate that we are retrieving the access token from instance metadata
+    # Print a message to indicate that we are
+    # retrieving the access token from instance metadata
     print("Retrieving access token from instance metadata")
 
-    # Define the URLs that we need to access to get the token, scopes, and email
-    token_url = "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token"
-    scope_url = "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/scopes"
-    email_url = "http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/email"
+    # Define the URLs that we need to
+    # access to get the token, scopes, and email
+    token_url = "http://metadata.google.internal/computeMetadata/v1/" \
+                "instance/service-accounts/default/token"
+    scope_url = "http://metadata.google.internal/computeMetadata/v1/" \
+                "instance/service-accounts/default/scopes"
+    email_url = "http://metadata.google.internal/computeMetadata/v1/" \
+                "instance/service-accounts/default/email"
 
     # Set the headers for the requests
     headers = {"Metadata-Flavor": "Google"}
@@ -93,7 +99,10 @@ def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]:
 
         # Check if the response was successful
         if not res.ok:
-            logging.error("Failed to retrieve instance token. Status code %d", res.status_code)
+            logging.error("Failed to retrieve instance token. "
+                          "Status code %d", res.status_code)
+            token_url = None
+
             return None, None
 
         # Parse the JSON response and get the access token
@@ -104,7 +113,8 @@ def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]:
 
         # Check if the response was successful
         if not res.ok:
-            logging.error("Failed to retrieve instance scopes. Status code %d", res.status_code)
+            logging.error("Failed to retrieve instance scopes. "
+                          "Status code %d", res.status_code)
             return None, None
 
         # Get the instance scopes from the response
@@ -115,19 +125,21 @@ def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]:
 
         # Check if the response was successful
         if not res.ok:
-            logging.error("Failed to retrieve instance email. Status code %d", res.status_code)
+            logging.error("Failed to retrieve instance email. "
+                          "Status code %d", res.status_code)
             return None, None
 
         # Get the instance email from the response
         email = res.content.decode("utf-8")
 
-    except Exception:
+    except ImportError:
         # Log an error message if any exception occurred
         logging.error("Failed to retrieve instance metadata")
         logging.error(sys.exc_info()[1])
         return None, None
 
-    # Print a message to indicate that we have successfully retrieved the instance metadata
+    # Print a message to indicate that
+    # we have successfully retrieved the instance metadata
     print("Successfully retrieved instance metadata")
 
     # Log the length of the access token, instance email, and instance scopes
@@ -135,15 +147,18 @@ def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]:
     logging.info("Instance email: %s", email)
     logging.info("Instance scopes: %s", instance_scopes)
 
-    # Return the email and credentials constructed from the token and instance scopes
-    return email, credentials_from_token(token, None, None, None, None, instance_scopes)
+    # Return the email and credentials
+    # constructed from the token and instance scopes
+    return email, credentials_from_token(
+        token, None, None, None, None, instance_scopes)
 
 
-
-def get_creds_from_data(access_token: str, parsed_keyfile: Dict[str, str]) -> Credentials:
+def get_creds_from_data(
+        access_token: str, parsed_keyfile: Dict[str, str]) -> Credentials:
     """Creates a Credentials instance from parsed service account info.
 
-    The function currently supports two types of credentials. Service account key in json format and user account with refresh token.
+    The function currently supports two types of credentials.
+    Service account key in json format and user account with refresh token.
 
     Args:
         access_token: An Oauth2 access token. It can be None.
@@ -170,7 +185,8 @@ def get_creds_from_data(access_token: str, parsed_keyfile: Dict[str, str]) -> Cr
         )
     # Check if the parsed_keyfile contains "private_key"
     elif "private_key" in parsed_keyfile:
-        logging.info("Identified service account key credentials in gcloud profile")
+        logging.info(
+            "Identified service account key credentials in gcloud profile")
         # this is a service account key with private key
         creds = get_creds_from_json(parsed_keyfile)
     else:
@@ -182,11 +198,13 @@ def get_creds_from_data(access_token: str, parsed_keyfile: Dict[str, str]) -> Cr
 
 def find_creds(explicit_path: Optional[str] = None) -> List[str]:
     """
-    The function searches the disk and returns a list of files with GCP credentials.
+    The function searches the disk and returns
+    a list of files with GCP credentials.
 
     Args:
-        explicit_path: An explicit path on disk to search. If None, the function
-            searches in standard locations where gcloud profiles are usually located.
+        explicit_path: An explicit path on disk to search.
+        If None, the function searches in
+        standard locations where gcloud profiles are usually located.
 
     Returns:
         list: The list of files with GCP credentials.
@@ -208,7 +226,8 @@ def find_creds(explicit_path: Optional[str] = None) -> List[str]:
                 full_path = os.path.join(dir_path, subdir_name, "gcloud")
                 search_paths.append(full_path)
 
-    # Scan each search path for credentials.db and add them to the list_of_creds_files
+    # Scan each search path for credentials.db
+    # and add them to the list_of_creds_files
     for dir_path in search_paths:
         print(f"Scanning {dir_path} for credentials.db")
         full_path = os.path.join(dir_path, "credentials.db")
@@ -222,7 +241,8 @@ def find_creds(explicit_path: Optional[str] = None) -> List[str]:
 
 def get_access_tokens_dict(path_to_creds_db: str) -> Dict[str, str]:
     """
-    The function searches and extracts OAuth2 access_tokens from a SQLite3 database.
+    The function searches and extracts OAuth2
+    access_tokens from a SQLite3 database.
 
     Args:
         path_to_creds_db: A path to SQLite3 database with gcloud access tokens.
@@ -233,16 +253,20 @@ def get_access_tokens_dict(path_to_creds_db: str) -> Dict[str, str]:
 
     access_tokens_dict = dict()
 
-    # Replace credentials.db with access_tokens.db to get the path to access tokens database
-    access_tokens_path = path_to_creds_db.replace("credentials.db", "access_tokens.db")
+    # Replace credentials.db with access_tokens.db
+    # to get the path to access tokens database
+    access_tokens_path = path_to_creds_db.replace("credentials.db",
+                                                  "access_tokens.db")
 
     # Check if the access tokens database exists and can be read
-    if os.path.exists(access_tokens_path) and os.access(access_tokens_path, os.R_OK):
+    if os.path.exists(access_tokens_path) and os.access(access_tokens_path,
+                                                        os.R_OK):
 
         # If the access tokens database exists and can be read, connect to it
         logging.info("Identified access tokens DB in %s", access_tokens_path)
         conn = sqlite3.connect(access_tokens_path)
-        cursor = conn.execute("SELECT account_id, access_token, token_expiry FROM access_tokens")
+        cursor = conn.execute("SELECT account_id, access_token,"
+                              "token_expiry FROM access_tokens")
 
         # Fetch all rows from the access tokens database
         rows = cursor.fetchall()
@@ -257,26 +281,30 @@ def get_access_tokens_dict(path_to_creds_db: str) -> Dict[str, str]:
             expiration_date = expiration_date.split(".")[0]
 
             # Convert the expiration date to a datetime object
-            token_time_obj = datetime.datetime.strptime(expiration_date, "%Y-%m-%d %H:%M:%S")
+            token_time_obj = datetime.datetime.strptime(
+                expiration_date, "%Y-%m-%d %H:%M:%S")
 
             # Check if the token has expired
             if datetime.datetime.now() > token_time_obj:
                 logging.info("Token for %s expired", associated_account)
                 continue
 
-            # Add the associated account and token to the access tokens dictionary
+            # Add the associated account and
+            # token to the access tokens dictionary
             access_tokens_dict[associated_account] = token
 
     return access_tokens_dict
 
 
-
-def extract_creds(path_to_creds_db: str) -> List[Tuple[str, str, str]]:
+def extract_creds(path_to_creds_db: str) -> List[
+        Tuple[str, str, str]]:
     """
-    The function extracts refresh and associated access tokens from sqlite3 DBs.
+    The function extracts refresh and associated access
+    tokens from sqlite3 DBs.
 
     Args:
-        path_to_creds_db (str): A path to sqlite3 DB with gcloud refresh tokens.
+        path_to_creds_db (str): A path to sqlite3 DB
+        with gcloud refresh tokens.
 
     Returns:
         List of tuples: (account name, refresh token, access token).
@@ -292,7 +320,6 @@ def extract_creds(path_to_creds_db: str) -> List[Tuple[str, str, str]]:
 
     # Connect to the database
     conn = sqlite3.connect(path_to_creds_db)
-    
     # Select account_id and value from the credentials table
     cursor = conn.execute("SELECT account_id, value FROM credentials")
     rows = cursor.fetchall()
@@ -314,7 +341,8 @@ def extract_creds(path_to_creds_db: str) -> List[Tuple[str, str, str]]:
             logging.info("Found valid access token for %s", row[0])
             access_token = access_tokens[row[0]]
 
-        # Append the account name, credentials, and access token to the results list
+        # Append the account name, credentials, and access
+        # token to the results list
         res.append(SA(row[0], row[1], access_token))
 
     # Print the number of identified credential entries
@@ -324,27 +352,29 @@ def extract_creds(path_to_creds_db: str) -> List[Tuple[str, str, str]]:
     return res
 
 
-def get_account_creds_list(gcloud_profile_path: Optional[str] = None) -> List[List[Tuple[str, str, str]]]:
+def get_account_creds_list(gcloud_profile_path: Optional[
+        str] = None) -> List[List[Tuple[str, str, str]]]:
     """The function searches and extracts gcloud credentials from disk.
 
     Args:
-        gcloud_profile_path: An explicit gcloud profile path on disk to search. If
-            None, the function searches in standard locations where gcloud profiles
-            are usually located.
+        gcloud_profile_path: An explicit gcloud profile path on disk to
+        search. If None, the function searches in standard locations where
+        gcloud profiles are usually located.
 
     Returns:
         list: A list of tuples (account name, refresh token, access token).
     """
     accounts = list()  # initialize an empty list
-    creds_file_list = find_creds(gcloud_profile_path)  # get a list of credentials files
+    creds_file_list = find_creds(gcloud_profile_path)
     for creds_file in creds_file_list:
-        res = extract_creds(creds_file)  # extract the credentials from the file
+        res = extract_creds(creds_file)
         if res is not None:
-            accounts.append(res)  # append the extracted credentials to the accounts list
+            accounts.append(res)
     return accounts  # return the accounts list
 
 
-def impersonate_sa(iam_client: IAMCredentialsClient, target_account: str) -> Credentials:
+def impersonate_sa(iam_client: IAMCredentialsClient,
+                   target_account: str) -> Credentials:
     """
     The function is used to impersonate a service account.
 
@@ -378,13 +408,13 @@ def impersonate_sa(iam_client: IAMCredentialsClient, target_account: str) -> Cre
     )
 
 
-
 def creds_from_access_token(access_token_file):
-    """The function is used to obtain Google Auth Credentials from access token.
+    """The function is used to obtain Google Auth
+    Credentials from access token.
 
     Args:
-        access_token_file: a path to a file with access token and scopes stored in
-        JSON format. Example:
+        access_token_file: a path to a file with access token
+        and scopes stored in JSON format. Example:
         {
             "access_token": "<token>",
             "scopes": [
@@ -448,7 +478,8 @@ def creds_from_refresh_token(refresh_token_file):
     - google.auth.service_account.Credentials: The constructed credentials.
     """
 
-    # Open the refresh_token_file in utf-8 encoding and load the contents to a dictionary
+    # Open the refresh_token_file in utf-8 encoding
+    # and load the contents to a dictionary
     with open(refresh_token_file, encoding="utf-8") as f:
         creds_dict = json.load(f)
 
@@ -466,7 +497,6 @@ def creds_from_refresh_token(refresh_token_file):
     )
 
 
-
 def get_scopes_from_refresh_token(context) -> Union[List[str], None]:
     """
     The function is used to obtain scopes from a refresh token.
@@ -495,9 +525,8 @@ def get_scopes_from_refresh_token(context) -> Union[List[str], None]:
         raw = response.json().get("scope", None)
         return raw.split(" ") if raw else None
 
-    except Exception as ex:
+    except ImportError as ex:
         logging.error("Failed to retrieve access token from refresh token.")
         logging.debug("Token refresh exception", exc_info=ex)
 
     return None
-

From bace7b1e52442d7d04b2df3da3f2ba5cfddb8797 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 23:13:20 +0530
Subject: [PATCH 17/25] Update models.py

---
 src/gcp_scanner/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gcp_scanner/models.py b/src/gcp_scanner/models.py
index 60aa9855..fc73344a 100644
--- a/src/gcp_scanner/models.py
+++ b/src/gcp_scanner/models.py
@@ -22,6 +22,7 @@
 
 from httplib2 import Credentials
 
+
 class SpiderContext:
     """A simple class to initialize the context with a list of root SAs"""
 
@@ -34,7 +35,6 @@ def __init__(self, sa_tuples: List[Tuple[str, Credentials, List[str]]]):
         """
         # Create a new queue to hold the service accounts
         self.service_account_queue = queue.Queue()
-        
         # Add each service account from the sa_tuples list to the queue
         for sa_tuple in sa_tuples:
             self.service_account_queue.put(sa_tuple)

From 6db5dda0297321a01c20ed0f695baa8249963292 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 23:42:58 +0530
Subject: [PATCH 18/25] Update scanner.py

---
 src/gcp_scanner/scanner.py | 210 ++++++++++++++++++++++---------------
 1 file changed, 127 insertions(+), 83 deletions(-)

diff --git a/src/gcp_scanner/scanner.py b/src/gcp_scanner/scanner.py
index 8a5d864f..0c597f3a 100644
--- a/src/gcp_scanner/scanner.py
+++ b/src/gcp_scanner/scanner.py
@@ -21,35 +21,37 @@
 import logging
 import os
 import sys
-from typing import List, Tuple, Dict, Optional,Union
+from typing import List, Tuple, Dict, Optional, Union
 
 from . import crawl
 from . import credsdb
 from . import arguments
 from google.cloud import container_v1
 from google.cloud import iam_credentials
-from google.cloud.iam_credentials_v1.services.iam_credentials.client import IAMCredentialsClient
+from google.cloud.iam_credentials_v1.services.iam_credentials.client \
+ import IAMCredentialsClient
 from googleapiclient import discovery
 from httplib2 import Credentials
 from .models import SpiderContext
 
+
 def is_set(config: Optional[dict], config_setting: str) -> Union[dict, bool]:
     # If config is None, return True
     if config is None:
         return True
-    
     # Get the value of the specified config setting
     obj = config.get(config_setting, {})
-    
-    # Return the value of 'fetch' if it exists in the config setting, otherwise return False
+    # Return the value of 'fetch' if it exists in the
+    # config setting, otherwise return False
     return obj.get('fetch', False)
 
+
 def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
                out_dir: str,
                scan_config: Dict,
                target_project: Optional[str] = None,
                force_projects: Optional[str] = None):
-  """
+    """
   The main loop function to crawl GCP resources.
 
   Args:
@@ -60,19 +62,20 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
     force_projects: a list of projects to force scan
   """
 
-  # Initialize SpiderContext
-  context = SpiderContext(initial_sa_tuples)
+    # Initialize SpiderContext
+    context = SpiderContext(initial_sa_tuples)
 
-  # Set of already processed service accounts
-  processed_sas = set()
+    # Set of already processed service accounts
+    processed_sas = set()
 
-  # Main loop
-  while not context.service_account_queue.empty():
-    # Get a new candidate service account / token
-    sa_name, credentials, chain_so_far = context.service_account_queue.get()
+    # Main loop
+    while not context.service_account_queue.empty():
+        # Get a new candidate service account or token
+        sa_name, credentials, chain_so_far = context.service_account_queue.get(
+        )
 
-    if sa_name in processed_sas:
-      continue
+        if sa_name in processed_sas:
+            continue
 
     # Don't process this service account again
     processed_sas.add(sa_name)
@@ -93,24 +96,26 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
     project_list = crawl.get_project_list(credentials)
 
     if len(project_list) <= 0:
-      logging.info('Unable to list projects accessible from service account')
+        logging.info('Unable to list projects accessible from service account')
 
     # Add any forced projects to project_list
     if force_projects:
-      for force_project_id in force_projects:
-        res = crawl.fetch_project_info(force_project_id, credentials)
+        for force_project_id in force_projects:
+            res = crawl.fetch_project_info(force_project_id, credentials)
 
         if res:
-          project_list.append(res)
+            project_list.append(res)
         else:
-          # force object creation anyway
-          project_list.append({'projectId': force_project_id, 'projectNumber': 'N/A'})
-
+            # force object creation anyway
+            project_list.append({
+                'projectId': force_project_id,
+                'projectNumber': 'N/A'
+            })
 
     # Enumerate projects accessible by SA
     for project in project_list:
-    if target_project and target_project not in project['projectId']:
-        continue
+        if target_project and target_project not in project['projectId']:
+            continue
 
     project_id = project['projectId']
     project_number = project['projectNumber']
@@ -138,7 +143,8 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
     # Get GCP Compute Resources
     compute_client = compute_client_for_credentials(credentials)
     if is_set(scan_config, 'compute_instances'):
-        project_result['compute_instances'] = crawl.get_compute_instances_names(
+        project_result[
+            'compute_instances'] = crawl.get_compute_instances_names(
                                                 project_id, compute_client)
     if is_set(scan_config, 'compute_images'):
         project_result['compute_images'] = crawl.get_compute_images_names(
@@ -164,15 +170,15 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
         project_result['subnets'] = crawl.get_subnets(project_id,
                                                       compute_client)
     if is_set(scan_config, 'firewall_rules'):
-        project_result['firewall_rules'] = crawl.get_firewall_rules(project_id,
-                                                                    compute_client)
+        project_result[
+            'firewall_rules'] = crawl.get_firewall_rules(
+            project_id, compute_client)
 
     # Get GCP APP Resources
     if is_set(scan_config, 'app_services'):
         project_result['app_services'] = crawl.get_app_services(
             project_id, credentials)
 
-
     # Get storage buckets
     if is_set(scan_config, 'storage_buckets'):
         dump_file_names = None
@@ -180,30 +186,38 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
             obj = scan_config.get('storage_buckets', None)
             # Check if fetch_file_names flag is set to true
             if obj is not None and obj.get('fetch_file_names', False) is True:
-                dump_file_names = open(out_dir + '/%s.gcs' % project_id, 'w', encoding='utf-8')
-        project_result['storage_buckets'] = crawl.get_bucket_names(project_id, credentials, dump_file_names)
+                dump_file_names = open(
+                    out_dir + '/%s.gcs' % project_id, 'w', encoding='utf-8')
+        project_result[
+            'storage_buckets'] = crawl.get_bucket_names(
+            project_id, credentials, dump_file_names)
         # Close dump file if it's open
         if dump_file_names is not None:
             dump_file_names.close()
 
     # Get DNS managed zones
     if is_set(scan_config, 'managed_zones'):
-        project_result['managed_zones'] = crawl.get_managed_zones(project_id, credentials)
+        project_result[
+            'managed_zones'] = crawl.get_managed_zones(project_id, credentials)
 
     # Get DNS policies
     if is_set(scan_config, 'dns_policies'):
-        project_result['dns_policies'] = crawl.list_dns_policies(project_id, credentials)
+        project_result[
+            'dns_policies'] = crawl.list_dns_policies(project_id, credentials)
 
     # Get GKE resources
     if is_set(scan_config, 'gke_clusters'):
         gke_client = gke_client_for_credentials(credentials)
-        project_result['gke_clusters'] = crawl.get_gke_clusters(project_id, gke_client)
+        project_result[
+            'gke_clusters'] = crawl.get_gke_clusters(project_id, gke_client)
     if is_set(scan_config, 'gke_images'):
-        project_result['gke_images'] = crawl.get_gke_images(project_id, credentials.token)
+        project_result[
+            'gke_images'] = crawl.get_gke_images(project_id, credentials.token)
 
     # Get SQL instances
     if is_set(scan_config, 'sql_instances'):
-        project_result['sql_instances'] = crawl.get_sql_instances(project_id, credentials)
+        project_result[
+            'sql_instances'] = crawl.get_sql_instances(project_id, credentials)
 
     # Get BigQuery databases and table names
     if is_set(scan_config, 'bq'):
@@ -211,23 +225,33 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
 
     # Get PubSub Subscriptions
     if is_set(scan_config, 'pubsub_subs'):
-        project_result['pubsub_subs'] = crawl.get_pubsub_subscriptions(project_id, credentials)
+        project_result[
+            'pubsub_subs'] = crawl.get_pubsub_subscriptions(
+            project_id, credentials)
 
     # Get CloudFunctions list
     if is_set(scan_config, 'cloud_functions'):
-        project_result['cloud_functions'] = crawl.get_cloudfunctions(project_id, credentials)
+        project_result[
+            'cloud_functions'] = crawl.get_cloudfunctions(
+            project_id, credentials)
 
     # Get List of BigTable Instances
     if is_set(scan_config, 'bigtable_instances'):
-        project_result['bigtable_instances'] = crawl.get_bigtable_instances(project_id, credentials)
+        project_result[
+            'bigtable_instances'] = crawl.get_bigtable_instances(
+            project_id, credentials)
 
     # Get Spanner Instances
     if is_set(scan_config, 'spanner_instances'):
-        project_result['spanner_instances'] = crawl.get_spanner_instances(project_id, credentials)
+        project_result[
+            'spanner_instances'] = crawl.get_spanner_instances(
+            project_id, credentials)
 
     # Get CloudStore Instances
     if is_set(scan_config, 'cloudstore_instances'):
-        project_result['cloudstore_instances'] = crawl.get_filestore_instances(project_id, credentials)
+        project_result[
+            'cloudstore_instances'] = crawl.get_filestore_instances(
+            project_id, credentials)
 
     # Get list of KMS keys
     if is_set(scan_config, 'kms'):
@@ -235,17 +259,18 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
 
     # Get information about Endpoints
     if is_set(scan_config, 'endpoints'):
-        project_result['endpoints'] = crawl.get_endpoints(project_id, credentials)
+        project_result[
+            'endpoints'] = crawl.get_endpoints(project_id, credentials)
 
     # Get list of API services enabled in the project
     if is_set(scan_config, 'services'):
-        project_result['services'] = crawl.list_services(project_id, credentials)
+        project_result[
+            'services'] = crawl.list_services(project_id, credentials)
 
     # Get list of cloud source repositories enabled in the project
     if is_set(scan_config, 'sourcerepos'):
-        project_result['sourcerepos'] = crawl.list_sourcerepo(project_id, credentials)
-
-
+        project_result[
+            'sourcerepos'] = crawl.list_sourcerepo(project_id, credentials)
 
     # trying to impersonate SAs within project
     if scan_config is not None:
@@ -253,7 +278,8 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
     else:
         impers = {'impersonate': True}
 
-    # If 'impersonate' is set to True, attempt to impersonate the service account(s) within the project
+    # If 'impersonate' is set to True, attempt
+    # to impersonate the service account(s) within the project
     if impers is not None and impers.get('impersonate', False) is True:
 
         # If 'iam_policy' is not already set, retrieve the IAM policy
@@ -261,7 +287,8 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
             iam_policy = crawl.get_iam_policy(project_id, credentials)
 
         # Get a list of all the service accounts associated with the project
-        project_service_accounts = crawl.get_associated_service_accounts(iam_policy)
+        project_service_accounts = crawl.get_associated_service_accounts(
+            iam_policy)
 
         # Iterate through each service account
         for candidate_service_account in project_service_accounts:
@@ -271,55 +298,63 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
                 continue
 
             try:
-                # Impersonate the current service account and obtain credentials
-                creds_impersonated = credsdb.impersonate_sa(iam_client, candidate_service_account)
-
-                # Append the service account to the service_account_edges field in the project_result dict
-                context.service_account_queue.put((candidate_service_account, creds_impersonated, updated_chain))
-                project_result['service_account_edges'].append(candidate_service_account)
+                # Impersonate the current service account
+                # and obtain credentials
+                creds_impersonated = credsdb.impersonate_sa(
+                    iam_client, candidate_service_account)
+
+                # Append the service account to the
+                # service_account_edges field in the project_result dict
+                context.service_account_queue.put((
+                    candidate_service_account, creds_impersonated,
+                    updated_chain))
+                project_result[
+                    'service_account_edges'].append(candidate_service_account)
 
                 # Log that impersonation was successful
-                logging.info('Successfully impersonated %s using %s', candidate_service_account, sa_name)
+                logging.info(
+                    'Successfully impersonated %s using %s',
+                    candidate_service_account, sa_name)
 
-            except Exception:
+            except ImportError:
                 # Log that impersonation failed
-                logging.error('Failed to get token for %s', candidate_service_account)
+                logging.error('Failed to get token for %s',
+                              candidate_service_account)
                 logging.error(sys.exc_info()[1])
 
+        # Write out results to json DB
+        logging.info('Saving results for %s into the file', project_id)
 
-          # Write out results to json DB
-          logging.info('Saving results for %s into the file', project_id)
-
-          sa_results_data = json.dumps(sa_results, indent=2, sort_keys=False)
+        sa_results_data = json.dumps(sa_results, indent=2, sort_keys=False)
 
-          with open(out_dir + '/%s.json' % project_id, 'a',
-                    encoding='utf-8') as outfile:
+        with open(out_dir + '/%s.json' % project_id, 'a',
+                  encoding='utf-8') as outfile:
             outfile.write(sa_results_data)
 
-          # Clean memory to avoid leak for large amount projects.
-          sa_results.clear()
+        # Clean memory to avoid leak for large amount projects.
+        sa_results.clear()
 
 
 # Define a function that returns an IAMCredentialsClient object
 # for the given credentials.
 def iam_client_for_credentials(
-    credentials: Credentials) -> iam_credentials.IAMCredentialsClient:
-  
-  return iam_credentials.IAMCredentialsClient(credentials=credentials)
-
+ credentials: Credentials) -> iam_credentials.IAMCredentialsClient:
+    return iam_credentials.IAMCredentialsClient(credentials=credentials)
 
 
 def compute_client_for_credentials(
-    credentials: Credentials) -> discovery.Resource:
+ credentials: Credentials) -> discovery.Resource:
     """
     Returns a Compute Engine API client instance for the given credentials.
 
     Args:
-        credentials (google.auth.credentials.Credentials): The credentials to use to
+        credentials (google.auth.credentials.Credentials):
+        The credentials to use to
             authenticate requests to the Compute Engine API.
 
     Returns:
-        googleapiclient.discovery.Resource: A Compute Engine API client instance.
+        googleapiclient.discovery.Resource:
+        A Compute Engine API client instance.
     """
     return discovery.build(
         'compute',           # The name of the API to use.
@@ -332,18 +367,19 @@ def compute_client_for_credentials(
 def gke_client_for_credentials(
     credentials: Credentials
 ) -> container_v1.services.cluster_manager.client.ClusterManagerClient:
-    # This function returns a ClusterManagerClient object for the given credentials
-    # It takes in a Credentials object as a parameter and returns a ClusterManagerClient object
+    # This function returns a ClusterManagerClient
+    # object for the given credentials. It takes in a Credentials object
+    # as a parameter and returns a ClusterManagerClient object
 
     # Create a ClusterManagerClient object with the given credentials
     return container_v1.services.cluster_manager.ClusterManagerClient(
         credentials=credentials)
 
 
-
 def main():
     # Set logging level for specific modules to suppress unwanted log messages
-    logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR)
+    logging.getLogger(
+        'googleapiclient.discovery_cache').setLevel(logging.ERROR)
     logging.getLogger('googleapiclient.http').setLevel(logging.ERROR)
 
     # Parse command line arguments
@@ -367,9 +403,11 @@ def main():
             if not keyfile.endswith('.json'):
                 continue
             full_key_path = os.path.join(args.key_path, keyfile)
-            account_name, credentials = credsdb.get_creds_from_file(full_key_path)
+            account_name, credentials = credsdb.get_creds_from_file(
+                full_key_path)
             if credentials is None:
-                logging.error('Failed to retrieve credentials for %s', account_name)
+                logging.error(
+                    'Failed to retrieve credentials for %s', account_name)
                 continue
             sa_tuples.append((account_name, credentials, []))
 
@@ -396,9 +434,11 @@ def main():
 
                 logging.info('Retrieving credentials for %s', account_name)
                 credentials = credsdb.get_creds_from_data(access_token,
-                                                          json.loads(account_creds))
+                                                          json.loads(
+                                                           account_creds))
                 if credentials is None:
-                    logging.error('Failed to retrieve access token for %s', account_name)
+                    logging.error(
+                        'Failed to retrieve access token for %s', account_name)
                     continue
 
                 sa_tuples.append((account_name, credentials, []))
@@ -409,7 +449,8 @@ def main():
             credentials = credsdb.creds_from_access_token(access_token_file)
 
             if credentials is None:
-                logging.error('Failed to retrieve credentials using token provided')
+                logging.error(
+                    'Failed to retrieve credentials using token provided')
             else:
                 token_file_name = os.path.basename(access_token_file)
                 sa_tuples.append((token_file_name, credentials, []))
@@ -420,7 +461,8 @@ def main():
             credentials = credsdb.creds_from_refresh_token(refresh_token_file)
 
             if credentials is None:
-                logging.error('Failed to retrieve credentials using token provided')
+                logging.error(
+                    'Failed to retrieve credentials using token provided')
             else:
                 token_file_name = os.path.basename(refresh_token_file)
                 sa_tuples.append((token_file_name, credentials, []))
@@ -432,7 +474,9 @@ def main():
             scan_config = json.load(f)
 
     # Call the crawl_loop function with the provided arguments
-    crawl_loop(sa_tuples, args.output, scan_config, args.target_project, force_projects_list)
+    crawl_loop(
+        sa_tuples, args.output, scan_config,
+        args.target_project, force_projects_list)
 
     # Return 0 to indicate successful execution
     return 0

From d968ac3b8ff77b5dbff03bf2e61a13cf9c06ace2 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 23:45:46 +0530
Subject: [PATCH 19/25] Update test_acceptance.py

---
 src/gcp_scanner/test_acceptance.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/gcp_scanner/test_acceptance.py b/src/gcp_scanner/test_acceptance.py
index fa7207ed..f4456c60 100644
--- a/src/gcp_scanner/test_acceptance.py
+++ b/src/gcp_scanner/test_acceptance.py
@@ -50,14 +50,18 @@
 SERVICES_COUNT = 1
 SERVICE_ACCOUNTS_COUNT = 3
 
+
 def check_obj_entry(res_dict, subojects_count, entry_name, volatile=False):
-    # Check if an object entry exists in the given dictionary and has the expected number of objects
+    # Check if an object entry exists in the given dictionary
+    # and has the expected number of objects
     obj = res_dict.get(entry_name, None)
     if volatile is True:
-        assert obj is not None and (len(obj) == subojects_count or len(obj) == subojects_count - 1)
+        assert obj is not None and (
+            len(obj) == subojects_count or len(obj) == subojects_count - 1)
     else:
         assert obj is not None and len(obj) == subojects_count
 
+
 def validate_result():
     # Load the results file and validate the resource counts
     file_name = os.listdir("res/")[0]
@@ -88,7 +92,7 @@ def validate_result():
     check_obj_entry(project, STORAGE_BUCKETS_COUNT, "storage_buckets")
 
     check_obj_entry(project, GKE_CLUSTERS_COUNT, "gke_clusters")
-     # Volatile test. US zone sometimes appear and disappear.
+    # Volatile test. US zone sometimes appear and disappear.
     check_obj_entry(project, GKE_IMAGES_COUNT, "gke_images", True)
 
     check_obj_entry(project, SQL_INSTANCES_COUNT, "sql_instances")
@@ -109,17 +113,19 @@ def validate_result():
 def test_acceptance():
     # Create a directory to store the results
     os.mkdir("res")
-    
-    # Define the arguments to run the scanner in test mode and save results in the "res" directory
+    # Define the arguments to run the scanner in
+    # test mode and save results in the "res" directory
     testargs = ["__main__.py", "-m", "-p", "test-gcp-scanner", "-o", "res"]
 
-    # Patch the command-line arguments to run the scanner with the specified arguments
+    # Patch the command-line arguments to run
+    # the scanner with the specified arguments
     with unittest.mock.patch("sys.argv", testargs):
-        # Run the scanner with the patched arguments and assert that it returns 0 (indicating success)
+        # Run the scanner with the patched
+        # arguments and assert that it returns 0 (indicating success)
         assert scanner.main() == 0
-        
-        # Assert that the number of files in the "res" directory is equal to RESULTS_JSON_COUNT
+        # Assert that the number of files in
+        # the "res" directory is equal to RESULTS_JSON_COUNT
         assert len(os.listdir("res/")) == RESULTS_JSON_COUNT
-        
-        # Validate the result to ensure that it conforms to the expected format and contains valid data
+        # Validate the result to ensure that it conforms to
+        # the expected format and contains valid data
         validate_result()

From 4a38f67f42eab75a67d6d2866b6b1e7a42a8b243 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Thu, 6 Apr 2023 23:54:42 +0530
Subject: [PATCH 20/25] Update test_unit.py

---
 src/gcp_scanner/test_unit.py | 51 ++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 20 deletions(-)

diff --git a/src/gcp_scanner/test_unit.py b/src/gcp_scanner/test_unit.py
index b3c08ba5..cab63693 100644
--- a/src/gcp_scanner/test_unit.py
+++ b/src/gcp_scanner/test_unit.py
@@ -37,6 +37,7 @@
 
 PROJECT_NAME = "test-gcp-scanner"
 
+
 def print_diff(f1, f2):
     """
     A function that prints the differences between two files.
@@ -58,6 +59,7 @@ def print_diff(f1, f2):
         print(line)
         res += line
 
+
 def save_to_test_file(res):
     """
     A function that saves the result to a file in JSON format.
@@ -86,7 +88,8 @@ def compare_volatile(f1, f2):
         if line in file_1_text:
             continue
         else:
-            print(f"The following line was not identified in the output:\n{line}")
+            print(
+             f"The following line was not identified in the output:\n{line}")
             res = False
 
     return res
@@ -143,13 +146,15 @@ def test_creds_fetching():
     c.execute("""
               CREATE TABLE IF NOT EXISTS access_tokens
               (account_id TEXT PRIMARY KEY,
-               access_token TEXT, token_expiry TIMESTAMP, 
+               access_token TEXT, token_expiry TIMESTAMP,
                rapt_token TEXT, id_token TEXT)
               """)
 
     # Insert test data values into the access tokens database
-    valid_tm = datetime.datetime.now() + datetime.timedelta(hours=2, minutes=10)
-    expired_tm = datetime.datetime.now() - datetime.timedelta(hours=2, minutes=10)
+    valid_tm = datetime.datetime.now() + datetime.timedelta(
+        hours=2, minutes=10)
+    expired_tm = datetime.datetime.now() - datetime.timedelta(
+        hours=2, minutes=10)
     sqlite_insert_with_param = """INSERT INTO "access_tokens"
                                   ("account_id", "access_token",
                                    "token_expiry", "rapt_token", "id_token")
@@ -162,7 +167,8 @@ def test_creds_fetching():
     c.execute(sqlite_insert_with_param, data_value)
     conn.commit()
 
-    # Assert that the access tokens dictionary can be retrieved from the credentials database
+    # Assert that the access tokens dictionary
+    # can be retrieved from the credentials database
     assert str(credsdb.get_access_tokens_dict("./unit/credentials.db")) == \
            "{'test_account@gmail.com': 'ya.29c.TEST'}"
 
@@ -290,7 +296,8 @@ class TestCrawler(unittest.TestCase):
     def setUp(self):
         # Get credentials from metadata and set up compute client
         _, self.credentials = credsdb.get_creds_from_metadata()
-        self.compute_client = scanner.compute_client_for_credentials(self.credentials)
+        self.compute_client = scanner.compute_client_for_credentials(
+            self.credentials)
 
     def test_credential(self):
         """Checks if credential is not none."""
@@ -301,19 +308,21 @@ def test_compute_instance_name(self):
         # Verify that the compute instance names are returned correctly
         self.assertTrue(
             verify(
-                crawl.get_compute_instances_names(PROJECT_NAME, self.compute_client),
+                crawl.get_compute_instances_names(
+                    PROJECT_NAME, self.compute_client),
                 "compute_instances",
                 True,
             )
         )
 
-
     def test_compute_disks_names(self):
         """Test compute disk names."""
-        # Verify that the list of compute disks names returned by the function is non-empty
+        # Verify that the list of compute disks
+        # names returned by the function is non-empty
         self.assertTrue(
             verify(
-                crawl.get_compute_disks_names(PROJECT_NAME, self.compute_client),
+                crawl.get_compute_disks_names(
+                    PROJECT_NAME, self.compute_client),
                 "compute_disks",
                 True,
             )
@@ -321,10 +330,12 @@ def test_compute_disks_names(self):
 
     def test_compute_images_names(self):
         """Test compute image names."""
-        # Verify that the list of compute images names returned by the function is non-empty
+        # Verify that the list of compute
+        # images names returned by the function is non-empty
         self.assertTrue(
             verify(
-                crawl.get_compute_images_names(PROJECT_NAME, self.compute_client),
+                crawl.get_compute_images_names(
+                    PROJECT_NAME, self.compute_client),
                 "compute_images",
                 True,
             )
@@ -332,7 +343,8 @@ def test_compute_images_names(self):
 
     def test_static_ips(self):
         """Test static IPs."""
-        # Verify that the list of static IPs returned by the function is non-empty
+        # Verify that the list of static IPs
+        # returned by the function is non-empty
         self.assertTrue(
             verify(
                 crawl.get_static_ips(PROJECT_NAME, self.compute_client),
@@ -341,7 +353,6 @@ def test_static_ips(self):
             )
         )
 
-
     def test_compute_snapshots(self):
         """Test compute snapshot."""
         # Verify if the list of compute snapshots can be retrieved successfully
@@ -388,13 +399,12 @@ def test_storage_buckets(self):
             )
         )
 
-
-
     def test_managed_zones(self):
         # Asserting that the managed zones are verified
         self.assertTrue(
             verify(
-                crawl.get_managed_zones(PROJECT_NAME, credentials=self.credentials),
+                crawl.get_managed_zones(
+                                PROJECT_NAME, credentials=self.credentials),
                 "managed_zones",
                 True,
             )
@@ -460,7 +470,6 @@ def test_pubsub_subs(self):
             )
         )
 
-
     def test_cloud_functions(self):
         """Test CloudFunctions list."""
         # Verify that cloud_functions list is obtained successfully
@@ -524,7 +533,8 @@ def test_endpoints(self):
 
     def test_services(self):
         """Test list of API services enabled in the project."""
-        # Verify that a list of API services enabled in the project is obtained successfully
+        # Verify that a list of API services
+        # enabled in the project is obtained successfully
         self.assertTrue(
             verify(
                 crawl.list_services(PROJECT_NAME, self.credentials),
@@ -565,7 +575,8 @@ def test_project_info(self):
 
     def test_sourcerepos(self):
         """Test list of cloud source repositories in the project."""
-        # Verify that a list of cloud source repositories in the project is obtained successfully
+        # Verify that a list of cloud source repositories
+        # in the project is obtained successfully
         self.assertTrue(
             verify(
                 crawl.list_sourcerepo(PROJECT_NAME, self.credentials),

From 63f8e5803d5caf1e50d81fbb5fa97fe2c48c962b Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Fri, 7 Apr 2023 17:43:11 +0530
Subject: [PATCH 21/25] Update arguments.py

---
 src/gcp_scanner/arguments.py | 216 +++++++++++++++++------------------
 1 file changed, 108 insertions(+), 108 deletions(-)

diff --git a/src/gcp_scanner/arguments.py b/src/gcp_scanner/arguments.py
index 09b516fd..82a1222f 100644
--- a/src/gcp_scanner/arguments.py
+++ b/src/gcp_scanner/arguments.py
@@ -23,7 +23,7 @@
 
 # Define a function to create an argument parser using the argparse module
 def arg_parser():
-    """Creates an argument parser using the `argparse` module and defines
+  """Creates an argument parser using the `argparse` module and defines
   several command-line arguments.
 
   Args:
@@ -33,117 +33,117 @@ def arg_parser():
     argparse.Namespace: A namespace object containing the parsed command-line
     arguments.
   """
-    # Create a new parser object
-    parser = argparse.ArgumentParser(
-      prog='scanner.py',  # program name
-      description='GCP Scanner',  # description
-      usage='python3 %(prog)s -o folder_to_save_results -g -'
-    )
+  # Create a new parser object
+  parser = argparse.ArgumentParser(
+    prog='scanner.py',  # program name
+    description='GCP Scanner',  # description
+    usage='python3 %(prog)s -o folder_to_save_results -g -'
+  )
 
-    # Define a required argument group
-    required_named = parser.add_argument_group('Required parameters')
-    # Add a required argument to the group
-    required_named.add_argument(
-      '-o',  # short option name
-      '--output-dir',  # long option name
-      required=True,
-      dest='output',
-      default='scan_db',
-      help='Path to output directory'
-    )
+  # Define a required argument group
+  required_named = parser.add_argument_group('Required parameters')
+  # Add a required argument to the group
+  required_named.add_argument(
+    '-o',  # short option name
+    '--output-dir',  # long option name
+    required=True,
+    dest='output',
+    default='scan_db',
+    help='Path to output directory'
+  )
 
-    # Add command line arguments to the parser object
-    parser.add_argument(
-      '-k',
-      '--sa-key-path',
-      default=None,  # Default value if option is not specified
-      dest='key_path',
-      help='Path to directory with SA keys in json format'  # Help message
-    )
-    parser.add_argument(
-      '-g',
-      '--gcloud-profile-path',
-      default=None,
-      dest='gcloud_profile_path',
-      help='Path to directory with gcloud profile. Specify - to search for\
-      credentials in default gcloud config path'
-    )
-    parser.add_argument(
-      '-m',
-      '--use-metadata',
-      default=False,
-      dest='use_metadata',
-      action='store_true',
-      help='Extract credentials from GCE instance metadata'
-    )
-    parser.add_argument(
-      '-at',
-      '--access-token-files',
-      default=None,
-      dest='access_token_files',
-      help='A list of comma separated files with access token and OAuth scopes\
-      TTL limited. A token and scopes should be stored in JSON format.'
-    )
-    parser.add_argument(
-      '-rt',
-      '--refresh-token-files',
-      default=None,
-      dest='refresh_token_files',
-      help='A list of comma separated files with refresh_token, client_id,\
-        token_uri and client_secret stored in JSON format.'
-    )
+  # Add command line arguments to the parser object
+  parser.add_argument(
+    '-k',
+    '--sa-key-path',
+    default=None,  # Default value if option is not specified
+    dest='key_path',
+    help='Path to directory with SA keys in json format'  # Help message
+  )
+  parser.add_argument(
+    '-g',
+    '--gcloud-profile-path',
+    default=None,
+    dest='gcloud_profile_path',
+    help='Path to directory with gcloud profile. Specify - to search for\
+    credentials in default gcloud config path'
+  )
+  parser.add_argument(
+    '-m',
+    '--use-metadata',
+    default=False,
+    dest='use_metadata',
+    action='store_true',
+    help='Extract credentials from GCE instance metadata'
+  )
+  parser.add_argument(
+    '-at',
+    '--access-token-files',
+    default=None,
+    dest='access_token_files',
+    help='A list of comma separated files with access token and OAuth scopes\
+    TTL limited. A token and scopes should be stored in JSON format.'
+  )
+  parser.add_argument(
+    '-rt',
+    '--refresh-token-files',
+    default=None,
+    dest='refresh_token_files',
+    help='A list of comma separated files with refresh_token, client_id,\
+      token_uri and client_secret stored in JSON format.'
+  )
 
-    parser.add_argument(
-      '-s',
-      '--service-account',
-      default=None,
-      dest='key_name',
-      help='Name of individual SA to scan')
-    parser.add_argument(
-      '-p',
-      '--project',
-      default=None,
-      dest='target_project',
-      help='Name of individual project to scan')
-    parser.add_argument(
-      '-f',
-      '--force-projects',
-      default=None,
-      dest='force_projects',
-      help='Comma separated list of project names to include in the scan')
-    parser.add_argument(
-      '-c',
-      '--config',
-      default=None,
-      dest='config_path',
-      help='A path to config file with a set of specific resources to scan.')
-    parser.add_argument(
-      '-l',
-      '--logging',
-      default='WARNING',
-      dest='log_level',
-      choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'),
-      help='Set logging level (INFO, WARNING, ERROR)')
-    parser.add_argument(
-      '-lf',
-      '--log-file',
-      default=None,
-      dest='log_file',
-      help='Save logs to the path specified rather than displaying in\
- console')
+  parser.add_argument(
+    '-s',
+    '--service-account',
+    default=None,
+    dest='key_name',
+    help='Name of individual SA to scan')
+  parser.add_argument(
+    '-p',
+    '--project',
+    default=None,
+    dest='target_project',
+    help='Name of individual project to scan')
+  parser.add_argument(
+    '-f',
+    '--force-projects',
+    default=None,
+    dest='force_projects',
+    help='Comma separated list of project names to include in the scan')
+  parser.add_argument(
+    '-c',
+    '--config',
+    default=None,
+    dest='config_path',
+    help='A path to config file with a set of specific resources to scan.')
+  parser.add_argument(
+    '-l',
+    '--logging',
+    default='WARNING',
+    dest='log_level',
+    choices=('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'),
+    help='Set logging level (INFO, WARNING, ERROR)')
+  parser.add_argument(
+    '-lf',
+    '--log-file',
+    default=None,
+    dest='log_file',
+    help='Save logs to the path specified rather than displaying in\
+  console')
 
-    # Parse the command line arguments
-    args: argparse.Namespace = parser.parse_args()
+  # Parse the command line arguments
+  args: argparse.Namespace = parser.parse_args()
 
-    # Check if none of the necessary options are selected
-    if not args.key_path and not args.gcloud_profile_path \
-        and not args.use_metadata and not args.access_token_files\
-            and not args.refresh_token_files:
+  # Check if none of the necessary options are selected
+  if not args.key_path and not args.gcloud_profile_path \
+      and not args.use_metadata and not args.access_token_files\
+          and not args.refresh_token_files:
 
-        # If none of the options are selected, log an error message
-        logging.error(
-            'Please select at least one option to begin scan\
-    -k/--sa-key-path,-g/--gcloud-profile-path, -m, -rt, -at')
+      # If none of the options are selected, log an error message
+    logging.error(
+        'Please select at least one option to begin scan\
+  -k/--sa-key-path,-g/--gcloud-profile-path, -m, -rt, -at')
 
-    # Return the parsed command line arguments
-    return args
+  # Return the parsed command line arguments
+  return args

From 61fe0e4336a8774385ab072f494f5cbc0fa6991a Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Fri, 7 Apr 2023 17:50:12 +0530
Subject: [PATCH 22/25] Update models.py

---
 src/gcp_scanner/models.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/gcp_scanner/models.py b/src/gcp_scanner/models.py
index fc73344a..e2cdcc15 100644
--- a/src/gcp_scanner/models.py
+++ b/src/gcp_scanner/models.py
@@ -24,17 +24,17 @@
 
 
 class SpiderContext:
-    """A simple class to initialize the context with a list of root SAs"""
-
-    def __init__(self, sa_tuples: List[Tuple[str, Credentials, List[str]]]):
-        """
-        Initialize the context with a list of the root service accounts.
-
-        Args:
-            sa_tuples: [(sa_name, sa_object, chain_so_far)]
-        """
-        # Create a new queue to hold the service accounts
-        self.service_account_queue = queue.Queue()
-        # Add each service account from the sa_tuples list to the queue
-        for sa_tuple in sa_tuples:
-            self.service_account_queue.put(sa_tuple)
+  """A simple class to initialize the context with a list of root SAs"""
+
+  def __init__(self, sa_tuples: List[Tuple[str, Credentials, List[str]]]):
+    """
+    Initialize the context with a list of the root service accounts.
+
+    Args:
+        sa_tuples: [(sa_name, sa_object, chain_so_far)]
+    """
+    # Create a new queue to hold the service accounts
+    self.service_account_queue = queue.Queue()
+    # Add each service account from the sa_tuples list to the queue
+    for sa_tuple in sa_tuples:
+      self.service_account_queue.put(sa_tuple)

From 0fe183b767d562e51030d18354d578a416781166 Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Fri, 7 Apr 2023 18:00:51 +0530
Subject: [PATCH 23/25] Update test_acceptance.py

---
 src/gcp_scanner/test_acceptance.py | 120 ++++++++++++++---------------
 1 file changed, 60 insertions(+), 60 deletions(-)

diff --git a/src/gcp_scanner/test_acceptance.py b/src/gcp_scanner/test_acceptance.py
index f4456c60..5b5a6716 100644
--- a/src/gcp_scanner/test_acceptance.py
+++ b/src/gcp_scanner/test_acceptance.py
@@ -52,80 +52,80 @@
 
 
 def check_obj_entry(res_dict, subojects_count, entry_name, volatile=False):
-    # Check if an object entry exists in the given dictionary
-    # and has the expected number of objects
-    obj = res_dict.get(entry_name, None)
-    if volatile is True:
-        assert obj is not None and (
-            len(obj) == subojects_count or len(obj) == subojects_count - 1)
-    else:
-        assert obj is not None and len(obj) == subojects_count
+  # Check if an object entry exists in the given dictionary
+  # and has the expected number of objects
+  obj = res_dict.get(entry_name, None)
+  if volatile is True:
+    assert obj is not None and (
+        len(obj) == subojects_count or len(obj) == subojects_count - 1)
+  else:
+    assert obj is not None and len(obj) == subojects_count
 
 
 def validate_result():
-    # Load the results file and validate the resource counts
-    file_name = os.listdir("res/")[0]
-    with open("res/" + file_name, "r", encoding="utf-8") as f:
-        res_data = json.load(f)
+  # Load the results file and validate the resource counts
+  file_name = os.listdir("res/")[0]
+  with open("res/" + file_name, "r", encoding="utf-8") as f:
+    res_data = json.load(f)
 
-    # project
-    project = res_data["projects"].get("test-gcp-scanner", None)
-    assert project is not None
-    assert len(project) == RESOURCE_COUNT
+  # project
+  project = res_data["projects"].get("test-gcp-scanner", None)
+  assert project is not None
+  assert len(project) == RESOURCE_COUNT
 
-    check_obj_entry(project, PROJECT_INFO_COUNT, "project_info")
-    check_obj_entry(project, IAM_POLICY_COUNT, "iam_policy")
-    check_obj_entry(project, SERVICE_ACCOUNTS_COUNT, "service_accounts")
+  check_obj_entry(project, PROJECT_INFO_COUNT, "project_info")
+  check_obj_entry(project, IAM_POLICY_COUNT, "iam_policy")
+  check_obj_entry(project, SERVICE_ACCOUNTS_COUNT, "service_accounts")
 
-    check_obj_entry(project, COMPUTE_INSTANCES_COUNT, "compute_instances")
-    check_obj_entry(project, COMPUTE_IMAGES_COUNT, "compute_images")
-    check_obj_entry(project, COMPUTE_DISKS_COUNT, "compute_disks")
-    check_obj_entry(project, COMPUTE_SNAPSHOTS_COUNT, "compute_snapshots")
+  check_obj_entry(project, COMPUTE_INSTANCES_COUNT, "compute_instances")
+  check_obj_entry(project, COMPUTE_IMAGES_COUNT, "compute_images")
+  check_obj_entry(project, COMPUTE_DISKS_COUNT, "compute_disks")
+  check_obj_entry(project, COMPUTE_SNAPSHOTS_COUNT, "compute_snapshots")
 
-    check_obj_entry(project, STATIC_IPS_COUNT, "static_ips")
-    check_obj_entry(project, SUBNETS_COUNT, "subnets")
-    check_obj_entry(project, FIREWALL_RULES_COUNT, "firewall_rules")
-    check_obj_entry(project, MANAGED_ZONES_COUNT, "managed_zones")
+  check_obj_entry(project, STATIC_IPS_COUNT, "static_ips")
+  check_obj_entry(project, SUBNETS_COUNT, "subnets")
+  check_obj_entry(project, FIREWALL_RULES_COUNT, "firewall_rules")
+  check_obj_entry(project, MANAGED_ZONES_COUNT, "managed_zones")
 
-    check_obj_entry(project, APP_SERVICES_COUNT, "app_services")
+  check_obj_entry(project, APP_SERVICES_COUNT, "app_services")
 
-    check_obj_entry(project, STORAGE_BUCKETS_COUNT, "storage_buckets")
+  check_obj_entry(project, STORAGE_BUCKETS_COUNT, "storage_buckets")
 
-    check_obj_entry(project, GKE_CLUSTERS_COUNT, "gke_clusters")
-    # Volatile test. US zone sometimes appear and disappear.
-    check_obj_entry(project, GKE_IMAGES_COUNT, "gke_images", True)
+  check_obj_entry(project, GKE_CLUSTERS_COUNT, "gke_clusters")
+  # Volatile test. US zone sometimes appear and disappear.
+  check_obj_entry(project, GKE_IMAGES_COUNT, "gke_images", True)
 
-    check_obj_entry(project, SQL_INSTANCES_COUNT, "sql_instances")
-    check_obj_entry(project, BQ_COUNT, "bq")
-    check_obj_entry(project, BIGTABLE_COUNT, "bigtable_instances")
-    check_obj_entry(project, SPANNER_COUNT, "spanner_instances")
-    check_obj_entry(project, CLOUDSTORE_COUNT, "cloudstore_instances")
+  check_obj_entry(project, SQL_INSTANCES_COUNT, "sql_instances")
+  check_obj_entry(project, BQ_COUNT, "bq")
+  check_obj_entry(project, BIGTABLE_COUNT, "bigtable_instances")
+  check_obj_entry(project, SPANNER_COUNT, "spanner_instances")
+  check_obj_entry(project, CLOUDSTORE_COUNT, "cloudstore_instances")
 
-    check_obj_entry(project, PUBSUB_COUNT, "pubsub_subs")
-    check_obj_entry(project, CLOUD_FUNCTIONS, "cloud_functions")
-    check_obj_entry(project, ENDPOINTS_COUNT, "endpoints")
+  check_obj_entry(project, PUBSUB_COUNT, "pubsub_subs")
+  check_obj_entry(project, CLOUD_FUNCTIONS, "cloud_functions")
+  check_obj_entry(project, ENDPOINTS_COUNT, "endpoints")
 
-    check_obj_entry(project, KMS_COUNT, "kms")
+  check_obj_entry(project, KMS_COUNT, "kms")
 
-    check_obj_entry(project, SERVICES_COUNT, "services")
+  check_obj_entry(project, SERVICES_COUNT, "services")
 
 
 def test_acceptance():
-    # Create a directory to store the results
-    os.mkdir("res")
-    # Define the arguments to run the scanner in
-    # test mode and save results in the "res" directory
-    testargs = ["__main__.py", "-m", "-p", "test-gcp-scanner", "-o", "res"]
-
-    # Patch the command-line arguments to run
-    # the scanner with the specified arguments
-    with unittest.mock.patch("sys.argv", testargs):
-        # Run the scanner with the patched
-        # arguments and assert that it returns 0 (indicating success)
-        assert scanner.main() == 0
-        # Assert that the number of files in
-        # the "res" directory is equal to RESULTS_JSON_COUNT
-        assert len(os.listdir("res/")) == RESULTS_JSON_COUNT
-        # Validate the result to ensure that it conforms to
-        # the expected format and contains valid data
-        validate_result()
+  # Create a directory to store the results
+  os.mkdir("res")
+  # Define the arguments to run the scanner in
+  # test mode and save results in the "res" directory
+  testargs = ["__main__.py", "-m", "-p", "test-gcp-scanner", "-o", "res"]
+
+  # Patch the command-line arguments to run
+  # the scanner with the specified arguments
+  with unittest.mock.patch("sys.argv", testargs):
+    # Run the scanner with the patched
+    # arguments and assert that it returns 0 (indicating success)
+    assert scanner.main() == 0
+    # Assert that the number of files in
+    # the "res" directory is equal to RESULTS_JSON_COUNT
+    assert len(os.listdir("res/")) == RESULTS_JSON_COUNT
+    # Validate the result to ensure that it conforms to
+    # the expected format and contains valid data
+    validate_result()

From 8455a4ea4d9f134bc078b7c5465f9b3696313cfe Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Fri, 7 Apr 2023 19:46:51 +0530
Subject: [PATCH 24/25] Update credsdb.py

---
 src/gcp_scanner/credsdb.py | 798 ++++++++++++++++++-------------------
 1 file changed, 399 insertions(+), 399 deletions(-)

diff --git a/src/gcp_scanner/credsdb.py b/src/gcp_scanner/credsdb.py
index 8b8094d7..6b726ca2 100644
--- a/src/gcp_scanner/credsdb.py
+++ b/src/gcp_scanner/credsdb.py
@@ -40,493 +40,493 @@ def credentials_from_token(access_token: str, refresh_token: Optional[str],
                            token_uri: Optional[str], client_id: Optional[str],
                            client_secret: Optional[str],
                            scopes_user: Optional[str]) -> Credentials:
-    """
-    Create Credentials instance from tokens
-    """
-    return credentials.Credentials(access_token, refresh_token=refresh_token,
-                                   token_uri=token_uri, client_id=client_id,
-                                   client_secret=client_secret,
-                                   scopes=scopes_user)
+  """
+  Create Credentials instance from tokens
+  """
+  return credentials.Credentials(access_token, refresh_token=refresh_token,
+                                 token_uri=token_uri, client_id=client_id,
+                                 client_secret=client_secret,
+                                 scopes=scopes_user)
 
 
 def get_creds_from_file(file_path: str) -> Tuple[str, Credentials]:
-    """
-    Retrieve Credentials instance from a service account json file.
-    """
+  """
+  Retrieve Credentials instance from a service account json file.
+  """
 
-    logging.info("Retrieving credentials from %s", file_path)
-    creds = service_account.Credentials.from_service_account_file(file_path)
-    return creds.service_account_email, creds
+  logging.info("Retrieving credentials from %s", file_path)
+  creds = service_account.Credentials.from_service_account_file(file_path)
+  return creds.service_account_email, creds
 
 
 def get_creds_from_json(parsed_keyfile: Mapping[str, str]) -> Credentials:
-    """
-    Retrieve Credentials instance from parsed service account info.
-    """
+  """
+  Retrieve Credentials instance from parsed service account info.
+  """
 
-    return service_account.Credentials.from_service_account_info(
-        parsed_keyfile)
+  return service_account.Credentials.from_service_account_info(
+      parsed_keyfile)
 
 
 def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]:
-    """Retrieves a Credentials instance from compute instance metadata.
-
-    Returns:
-        Tuple[Optional[str], Optional[Credentials]]:
-            A tuple containing the email associated with the
-            credentials and the constructed credentials.
-    """
-
-    # Print a message to indicate that we are
-    # retrieving the access token from instance metadata
-    print("Retrieving access token from instance metadata")
-
-    # Define the URLs that we need to
-    # access to get the token, scopes, and email
-    token_url = "http://metadata.google.internal/computeMetadata/v1/" \
-                "instance/service-accounts/default/token"
-    scope_url = "http://metadata.google.internal/computeMetadata/v1/" \
-                "instance/service-accounts/default/scopes"
-    email_url = "http://metadata.google.internal/computeMetadata/v1/" \
-                "instance/service-accounts/default/email"
-
-    # Set the headers for the requests
-    headers = {"Metadata-Flavor": "Google"}
-
-    try:
-        # Make the request to get the access token
-        res = requests.get(token_url, headers=headers)
+  """Retrieves a Credentials instance from compute instance metadata.
 
-        # Check if the response was successful
-        if not res.ok:
-            logging.error("Failed to retrieve instance token. "
-                          "Status code %d", res.status_code)
-            token_url = None
+  Returns:
+      Tuple[Optional[str], Optional[Credentials]]:
+          A tuple containing the email associated with the
+          credentials and the constructed credentials.
+  """
 
-            return None, None
+  # Print a message to indicate that we are
+  # retrieving the access token from instance metadata
+  print("Retrieving access token from instance metadata")
 
-        # Parse the JSON response and get the access token
-        token = res.json()["access_token"]
+  # Define the URLs that we need to
+  # access to get the token, scopes, and email
+  token_url = "http://metadata.google.internal/computeMetadata/v1/" \
+              "instance/service-accounts/default/token"
+  scope_url = "http://metadata.google.internal/computeMetadata/v1/" \
+              "instance/service-accounts/default/scopes"
+  email_url = "http://metadata.google.internal/computeMetadata/v1/" \
+              "instance/service-accounts/default/email"
 
-        # Make the request to get the instance scopes
-        res = requests.get(scope_url, headers=headers)
+  # Set the headers for the requests
+  headers = {"Metadata-Flavor": "Google"}
 
-        # Check if the response was successful
-        if not res.ok:
-            logging.error("Failed to retrieve instance scopes. "
-                          "Status code %d", res.status_code)
-            return None, None
+  try:
+    # Make the request to get the access token
+    res = requests.get(token_url, headers=headers)
+
+    # Check if the response was successful
+    if not res.ok:
+      logging.error("Failed to retrieve instance token. "
+                    "Status code %d", res.status_code)
+      token_url = None
+
+      return None, None
+
+    # Parse the JSON response and get the access token
+    token = res.json()["access_token"]
+
+    # Make the request to get the instance scopes
+    res = requests.get(scope_url, headers=headers)
+
+    # Check if the response was successful
+    if not res.ok:
+      logging.error("Failed to retrieve instance scopes. "
+                    "Status code %d", res.status_code)
+      return None, None
 
-        # Get the instance scopes from the response
-        instance_scopes = res.content.decode("utf-8")
+    # Get the instance scopes from the response
+    instance_scopes = res.content.decode("utf-8")
 
-        # Make the request to get the instance email
-        res = requests.get(email_url, headers=headers)
+    # Make the request to get the instance email
+    res = requests.get(email_url, headers=headers)
 
         # Check if the response was successful
-        if not res.ok:
-            logging.error("Failed to retrieve instance email. "
-                          "Status code %d", res.status_code)
-            return None, None
+    if not res.ok:
+      logging.error("Failed to retrieve instance email. "
+                    "Status code %d", res.status_code)
+      return None, None
 
-        # Get the instance email from the response
-        email = res.content.decode("utf-8")
+    # Get the instance email from the response
+    email = res.content.decode("utf-8")
 
-    except ImportError:
-        # Log an error message if any exception occurred
-        logging.error("Failed to retrieve instance metadata")
-        logging.error(sys.exc_info()[1])
-        return None, None
+  except ImportError:
+    # Log an error message if any exception occurred
+    logging.error("Failed to retrieve instance metadata")
+    logging.error(sys.exc_info()[1])
+    return None, None
 
     # Print a message to indicate that
     # we have successfully retrieved the instance metadata
     print("Successfully retrieved instance metadata")
 
-    # Log the length of the access token, instance email, and instance scopes
-    logging.info("Access token length: %d", len(token))
-    logging.info("Instance email: %s", email)
-    logging.info("Instance scopes: %s", instance_scopes)
+  # Log the length of the access token, instance email, and instance scopes
+  logging.info("Access token length: %d", len(token))
+  logging.info("Instance email: %s", email)
+  logging.info("Instance scopes: %s", instance_scopes)
 
-    # Return the email and credentials
-    # constructed from the token and instance scopes
-    return email, credentials_from_token(
-        token, None, None, None, None, instance_scopes)
+  # Return the email and credentials
+  # constructed from the token and instance scopes
+  return email, credentials_from_token(
+      token, None, None, None, None, instance_scopes)
 
 
 def get_creds_from_data(
         access_token: str, parsed_keyfile: Dict[str, str]) -> Credentials:
-    """Creates a Credentials instance from parsed service account info.
-
-    The function currently supports two types of credentials.
-    Service account key in json format and user account with refresh token.
-
-    Args:
-        access_token: An Oauth2 access token. It can be None.
-        parsed_keyfile: The service account info in Google format.
-
-    Returns:
-        google.auth.service_account.Credentials: The constructed credentials.
-    """
-
-    # Initialize the variable to None
-    creds = None
-
-    # Check if the parsed_keyfile contains "refresh_token"
-    if "refresh_token" in parsed_keyfile:
-        logging.info("Identified user credentials in gcloud profile")
-        # this is user account credentials with refresh token
-        creds = credentials_from_token(
-            access_token,
-            parsed_keyfile["refresh_token"],
-            parsed_keyfile["token_uri"],
-            parsed_keyfile["client_id"],
-            parsed_keyfile["client_secret"],
-            parsed_keyfile["scopes"]
-        )
-    # Check if the parsed_keyfile contains "private_key"
-    elif "private_key" in parsed_keyfile:
-        logging.info(
-            "Identified service account key credentials in gcloud profile")
-        # this is a service account key with private key
-        creds = get_creds_from_json(parsed_keyfile)
-    else:
-        logging.error("unknown type of credentials")
+  """Creates a Credentials instance from parsed service account info.
+
+  The function currently supports two types of credentials.
+  Service account key in json format and user account with refresh token.
+
+  Args:
+      access_token: An Oauth2 access token. It can be None.
+      parsed_keyfile: The service account info in Google format.
+
+  Returns:
+      google.auth.service_account.Credentials: The constructed credentials.
+  """
+
+  # Initialize the variable to None
+  creds = None
+
+  # Check if the parsed_keyfile contains "refresh_token"
+  if "refresh_token" in parsed_keyfile:
+    logging.info("Identified user credentials in gcloud profile")
+    # this is user account credentials with refresh token
+    creds = credentials_from_token(
+        access_token,
+        parsed_keyfile["refresh_token"],
+        parsed_keyfile["token_uri"],
+        parsed_keyfile["client_id"],
+        parsed_keyfile["client_secret"],
+        parsed_keyfile["scopes"]
+    )
+  # Check if the parsed_keyfile contains "private_key"
+  elif "private_key" in parsed_keyfile:
+    logging.info(
+        "Identified service account key credentials in gcloud profile")
+    # this is a service account key with private key
+    creds = get_creds_from_json(parsed_keyfile)
+  else:
+    logging.error("unknown type of credentials")
 
     # Return the constructed credentials
-    return creds
+  return creds
 
 
 def find_creds(explicit_path: Optional[str] = None) -> List[str]:
-    """
-    The function searches the disk and returns
-    a list of files with GCP credentials.
-
-    Args:
-        explicit_path: An explicit path on disk to search.
-        If None, the function searches in
-        standard locations where gcloud profiles are usually located.
-
-    Returns:
-        list: The list of files with GCP credentials.
-    """
-
-    logging.info("Searching for credentials on disk")
-    list_of_creds_files = []
-
-    # Create a list of search paths to scan for credentials.db
-    search_paths = []
-    if explicit_path is not None and explicit_path != "-":
-        search_paths.append(explicit_path)
-    else:
-        credentials_db_search_places.append(os.getenv("HOME") + "/")
-        for dir_path in credentials_db_search_places:
-            if not os.access(dir_path, os.R_OK):
-                continue
-            for subdir_name in os.listdir(dir_path):
-                full_path = os.path.join(dir_path, subdir_name, "gcloud")
-                search_paths.append(full_path)
+  """
+  The function searches the disk and returns
+  a list of files with GCP credentials.
+
+  Args:
+      explicit_path: An explicit path on disk to search.
+      If None, the function searches in
+      standard locations where gcloud profiles are usually located.
+
+  Returns:
+      list: The list of files with GCP credentials.
+  """
+
+  logging.info("Searching for credentials on disk")
+  list_of_creds_files = []
+
+  # Create a list of search paths to scan for credentials.db
+  search_paths = []
+  if explicit_path is not None and explicit_path != "-":
+    search_paths.append(explicit_path)
+  else:
+    credentials_db_search_places.append(os.getenv("HOME") + "/")
+    for dir_path in credentials_db_search_places:
+      if not os.access(dir_path, os.R_OK):
+        continue
+      for subdir_name in os.listdir(dir_path):
+        full_path = os.path.join(dir_path, subdir_name, "gcloud")
+        search_paths.append(full_path)
 
     # Scan each search path for credentials.db
     # and add them to the list_of_creds_files
-    for dir_path in search_paths:
-        print(f"Scanning {dir_path} for credentials.db")
-        full_path = os.path.join(dir_path, "credentials.db")
-        if os.path.exists(full_path) and os.access(full_path, os.R_OK):
-            print(f"Identified accessible gcloud config profile {full_path}")
-            list_of_creds_files.append(full_path)
+  for dir_path in search_paths:
+    print(f"Scanning {dir_path} for credentials.db")
+    full_path = os.path.join(dir_path, "credentials.db")
+    if os.path.exists(full_path) and os.access(full_path, os.R_OK):
+      print(f"Identified accessible gcloud config profile {full_path}")
+      list_of_creds_files.append(full_path)
 
-    print(f"Identified {len(list_of_creds_files)} credential DBs")
-    return list_of_creds_files
+  print(f"Identified {len(list_of_creds_files)} credential DBs")
+  return list_of_creds_files
 
 
 def get_access_tokens_dict(path_to_creds_db: str) -> Dict[str, str]:
-    """
-    The function searches and extracts OAuth2
-    access_tokens from a SQLite3 database.
+  """
+  The function searches and extracts OAuth2
+  access_tokens from a SQLite3 database.
 
-    Args:
-        path_to_creds_db: A path to SQLite3 database with gcloud access tokens.
+  Args:
+      path_to_creds_db: A path to SQLite3 database with gcloud access tokens.
 
-    Returns:
-        dict: The dictionary of account names and corresponding tokens.
-    """
+  Returns:
+      dict: The dictionary of account names and corresponding tokens.
+  """
 
-    access_tokens_dict = dict()
+  access_tokens_dict = dict()
 
-    # Replace credentials.db with access_tokens.db
-    # to get the path to access tokens database
-    access_tokens_path = path_to_creds_db.replace("credentials.db",
-                                                  "access_tokens.db")
+  # Replace credentials.db with access_tokens.db
+  # to get the path to access tokens database
+  access_tokens_path = path_to_creds_db.replace("credentials.db",
+                                                "access_tokens.db")
 
-    # Check if the access tokens database exists and can be read
-    if os.path.exists(access_tokens_path) and os.access(access_tokens_path,
-                                                        os.R_OK):
+  # Check if the access tokens database exists and can be read
+  if os.path.exists(access_tokens_path) and os.access(access_tokens_path,
+                                                      os.R_OK):
 
-        # If the access tokens database exists and can be read, connect to it
-        logging.info("Identified access tokens DB in %s", access_tokens_path)
-        conn = sqlite3.connect(access_tokens_path)
-        cursor = conn.execute("SELECT account_id, access_token,"
-                              "token_expiry FROM access_tokens")
+    # If the access tokens database exists and can be read, connect to it
+    logging.info("Identified access tokens DB in %s", access_tokens_path)
+    conn = sqlite3.connect(access_tokens_path)
+    cursor = conn.execute("SELECT account_id, access_token,"
+                          "token_expiry FROM access_tokens")
 
-        # Fetch all rows from the access tokens database
-        rows = cursor.fetchall()
+    # Fetch all rows from the access tokens database
+    rows = cursor.fetchall()
 
-        # Iterate over each row
-        for row in rows:
-            associated_account = row[0]
-            token = row[1]
-            expiration_date = row[2]
+    # Iterate over each row
+    for row in rows:
+      associated_account = row[0]
+      token = row[1]
+      expiration_date = row[2]
 
-            # Omit milliseconds from the expiration date
-            expiration_date = expiration_date.split(".")[0]
+      # Omit milliseconds from the expiration date
+      expiration_date = expiration_date.split(".")[0]
 
-            # Convert the expiration date to a datetime object
-            token_time_obj = datetime.datetime.strptime(
-                expiration_date, "%Y-%m-%d %H:%M:%S")
+      # Convert the expiration date to a datetime object
+      token_time_obj = datetime.datetime.strptime(
+          expiration_date, "%Y-%m-%d %H:%M:%S")
 
-            # Check if the token has expired
-            if datetime.datetime.now() > token_time_obj:
-                logging.info("Token for %s expired", associated_account)
-                continue
+      # Check if the token has expired
+      if datetime.datetime.now() > token_time_obj:
+        logging.info("Token for %s expired", associated_account)
+        continue
 
             # Add the associated account and
             # token to the access tokens dictionary
-            access_tokens_dict[associated_account] = token
+      access_tokens_dict[associated_account] = token
 
-    return access_tokens_dict
+  return access_tokens_dict
 
 
 def extract_creds(path_to_creds_db: str) -> List[
         Tuple[str, str, str]]:
-    """
-    The function extracts refresh and associated access
-    tokens from sqlite3 DBs.
-
-    Args:
-        path_to_creds_db (str): A path to sqlite3 DB
-        with gcloud refresh tokens.
-
-    Returns:
-        List of tuples: (account name, refresh token, access token).
-    """
-    # Log that we are opening the database
-    logging.info("Opening %s DB", path_to_creds_db)
-
-    # Create a named tuple for service accounts
-    SA = collections.namedtuple("SA", "account_name, creds, token")
-
-    # Initialize an empty list for the results
-    res = list()
-
-    # Connect to the database
-    conn = sqlite3.connect(path_to_creds_db)
-    # Select account_id and value from the credentials table
-    cursor = conn.execute("SELECT account_id, value FROM credentials")
-    rows = cursor.fetchall()
-
-    # Check if the database is empty
-    if len(rows) <= 0:
-        logging.error("Empty database")
-        return None
+  """
+  The function extracts refresh and associated access
+  tokens from sqlite3 DBs.
+
+  Args:
+      path_to_creds_db (str): A path to sqlite3 DB
+      with gcloud refresh tokens.
+
+  Returns:
+      List of tuples: (account name, refresh token, access token).
+  """
+  # Log that we are opening the database
+  logging.info("Opening %s DB", path_to_creds_db)
+
+  # Create a named tuple for service accounts
+  SA = collections.namedtuple("SA", "account_name, creds, token")
+
+  # Initialize an empty list for the results
+  res = list()
+
+  # Connect to the database
+  conn = sqlite3.connect(path_to_creds_db)
+  # Select account_id and value from the credentials table
+  cursor = conn.execute("SELECT account_id, value FROM credentials")
+  rows = cursor.fetchall()
+
+  # Check if the database is empty
+  if len(rows) <= 0:
+    logging.error("Empty database")
+    return None
 
-    # We also want to check for access_tokens to avoid unnecessary refreshing
-    access_tokens = get_access_tokens_dict(path_to_creds_db)
+  # We also want to check for access_tokens to avoid unnecessary refreshing
+  access_tokens = get_access_tokens_dict(path_to_creds_db)
 
-    # Loop through the rows
-    for row in rows:
-        access_token = None
+  # Loop through the rows
+  for row in rows:
+    access_token = None
 
-        # Check if the access token exists and is valid
-        if access_tokens.get(row[0], None) is not None:
-            logging.info("Found valid access token for %s", row[0])
-            access_token = access_tokens[row[0]]
+    # Check if the access token exists and is valid
+    if access_tokens.get(row[0], None) is not None:
+      logging.info("Found valid access token for %s", row[0])
+      access_token = access_tokens[row[0]]
 
-        # Append the account name, credentials, and access
-        # token to the results list
-        res.append(SA(row[0], row[1], access_token))
+    # Append the account name, credentials, and access
+    # token to the results list
+  res.append(SA(row[0], row[1], access_token))
 
-    # Print the number of identified credential entries
-    print(f"Identified {len(res)} credential entries")
+  # Print the number of identified credential entries
+  print(f"Identified {len(res)} credential entries")
 
-    # Return the results list
-    return res
+  # Return the results list
+  return res
 
 
 def get_account_creds_list(gcloud_profile_path: Optional[
         str] = None) -> List[List[Tuple[str, str, str]]]:
-    """The function searches and extracts gcloud credentials from disk.
+  """The function searches and extracts gcloud credentials from disk.
 
-    Args:
-        gcloud_profile_path: An explicit gcloud profile path on disk to
-        search. If None, the function searches in standard locations where
-        gcloud profiles are usually located.
+  Args:
+      gcloud_profile_path: An explicit gcloud profile path on disk to
+      search. If None, the function searches in standard locations where
+      gcloud profiles are usually located.
 
-    Returns:
-        list: A list of tuples (account name, refresh token, access token).
-    """
-    accounts = list()  # initialize an empty list
-    creds_file_list = find_creds(gcloud_profile_path)
-    for creds_file in creds_file_list:
-        res = extract_creds(creds_file)
-        if res is not None:
-            accounts.append(res)
-    return accounts  # return the accounts list
+  Returns:
+      list: A list of tuples (account name, refresh token, access token).
+  """
+  accounts = list()  # initialize an empty list
+  creds_file_list = find_creds(gcloud_profile_path)
+  for creds_file in creds_file_list:
+    res = extract_creds(creds_file)
+    if res is not None:
+      accounts.append(res)
+  return accounts  # return the accounts list
 
 
 def impersonate_sa(iam_client: IAMCredentialsClient,
                    target_account: str) -> Credentials:
-    """
-    The function is used to impersonate a service account.
-
-    Args:
-        iam_client (IAMCredentialsClient): The IAMCredentialsClient object.
-        target_account (str): The name of the service account to impersonate.
-
-    Returns:
-        Credentials: The constructed credentials.
-    """
-
-    # Define the scopes for the service account
-    scopes_sa = ["https://www.googleapis.com/auth/cloud-platform"]
-
-    # Generate an access token for the service account
-    intermediate_access_token = iam_client.generate_access_token(
-        name=target_account,
-        scope=scopes_sa,
-        retry=None,
-        # lifetime="43200"
-    )
-
-    # Use the access token to construct credentials
-    return credentials_from_token(
-        intermediate_access_token.access_token,
-        None,
-        None,
-        None,
-        None,
-        scopes_sa
-    )
+  """
+  The function is used to impersonate a service account.
+
+  Args:
+      iam_client (IAMCredentialsClient): The IAMCredentialsClient object.
+      target_account (str): The name of the service account to impersonate.
+
+  Returns:
+      Credentials: The constructed credentials.
+  """
+
+  # Define the scopes for the service account
+  scopes_sa = ["https://www.googleapis.com/auth/cloud-platform"]
+
+  # Generate an access token for the service account
+  intermediate_access_token = iam_client.generate_access_token(
+      name=target_account,
+      scope=scopes_sa,
+      retry=None,
+      # lifetime="43200"
+  )
+
+  # Use the access token to construct credentials
+  return credentials_from_token(
+      intermediate_access_token.access_token,
+      None,
+      None,
+      None,
+      None,
+      scopes_sa
+  )
 
 
 def creds_from_access_token(access_token_file):
-    """The function is used to obtain Google Auth
-    Credentials from access token.
-
-    Args:
-        access_token_file: a path to a file with access token
-        and scopes stored in JSON format. Example:
-        {
-            "access_token": "<token>",
-            "scopes": [
-                "https://www.googleapis.com/auth/devstorage.read_only",
-                "https://www.googleapis.com/auth/logging.write",
-                "https://www.googleapis.com/auth/monitoring.write",
-                "https://www.googleapis.com/auth/servicecontrol",
-                "https://www.googleapis.com/auth/service.management.readonly",
-                "https://www.googleapis.com/auth/trace.append"
-            ]
-        }
-
-    Returns:
-        google.auth.service_account.Credentials: The constructed credentials.
-    """
-
-    # Load the access token and scopes from the specified file
-    with open(access_token_file, encoding="utf-8") as f:
-        creds_dict = json.load(f)
+  """The function is used to obtain Google Auth
+  Credentials from access token.
+
+  Args:
+      access_token_file: a path to a file with access token
+      and scopes stored in JSON format. Example:
+      {
+          "access_token": "<token>",
+          "scopes": [
+              "https://www.googleapis.com/auth/devstorage.read_only",
+              "https://www.googleapis.com/auth/logging.write",
+              "https://www.googleapis.com/auth/monitoring.write",
+              "https://www.googleapis.com/auth/servicecontrol",
+              "https://www.googleapis.com/auth/service.management.readonly",
+              "https://www.googleapis.com/auth/trace.append"
+          ]
+      }
+
+  Returns:
+      google.auth.service_account.Credentials: The constructed credentials.
+  """
+
+  # Load the access token and scopes from the specified file
+  with open(access_token_file, encoding="utf-8") as f:
+    creds_dict = json.load(f)
 
     # Check if user-defined scopes are provided
-    user_scopes = creds_dict.get("scopes", None)
-    if user_scopes is None:
+  user_scopes = creds_dict.get("scopes", None)
+  if user_scopes is None:
         # Use default scopes if not provided
-        user_scopes = ["https://www.googleapis.com/auth/cloud-platform"]
-
-    # Construct credentials from the access token and scopes
-    return credentials_from_token(
-        creds_dict["access_token"],
-        None,
-        None,
-        None,
-        None,
-        user_scopes
-    )
+    user_scopes = ["https://www.googleapis.com/auth/cloud-platform"]
+
+  # Construct credentials from the access token and scopes
+  return credentials_from_token(
+      creds_dict["access_token"],
+      None,
+      None,
+      None,
+      None,
+      user_scopes
+  )
 
 
 def creds_from_refresh_token(refresh_token_file):
-    """
-    The function is used to obtain Google Auth Credentials from refresh token.
-
-    Args:
-    - refresh_token_file: a path to a file with refresh_token, client_id,
-        client_secret, and token_uri stored in JSON format.
-        Example:
-            {
-            "refresh_token": "<token>",
-            "client_id": "id",
-            "client_secret": "secret",
-            scopes: [
-                https://www.googleapis.com/auth/devstorage.read_only,
-                https://www.googleapis.com/auth/logging.write,
-                https://www.googleapis.com/auth/monitoring.write,
-                https://www.googleapis.com/auth/servicecontrol,
-                https://www.googleapis.com/auth/service.management.readonly,
-                https://www.googleapis.com/auth/trace.append
-            ]
-            }
-
-    Returns:
-    - google.auth.service_account.Credentials: The constructed credentials.
-    """
-
-    # Open the refresh_token_file in utf-8 encoding
-    # and load the contents to a dictionary
-    with open(refresh_token_file, encoding="utf-8") as f:
-        creds_dict = json.load(f)
-
-    # Get the user-defined scopes from the refresh token dictionary
-    user_scopes = get_scopes_from_refresh_token(creds_dict)
-
-    # Construct and return a google.auth.service_account.Credentials object
-    return credentials.Credentials(
-        None,
-        refresh_token=creds_dict["refresh_token"],
-        token_uri=creds_dict["token_uri"],
-        client_id=creds_dict["client_id"],
-        client_secret=creds_dict["client_secret"],
-        scopes=user_scopes,
-    )
+  """
+  The function is used to obtain Google Auth Credentials from refresh token.
+
+  Args:
+  - refresh_token_file: a path to a file with refresh_token, client_id,
+      client_secret, and token_uri stored in JSON format.
+      Example:
+          {
+          "refresh_token": "<token>",
+          "client_id": "id",
+          "client_secret": "secret",
+          scopes: [
+              https://www.googleapis.com/auth/devstorage.read_only,
+              https://www.googleapis.com/auth/logging.write,
+              https://www.googleapis.com/auth/monitoring.write,
+              https://www.googleapis.com/auth/servicecontrol,
+              https://www.googleapis.com/auth/service.management.readonly,
+              https://www.googleapis.com/auth/trace.append
+          ]
+          }
+
+  Returns:
+  - google.auth.service_account.Credentials: The constructed credentials.
+  """
+
+  # Open the refresh_token_file in utf-8 encoding
+  # and load the contents to a dictionary
+  with open(refresh_token_file, encoding="utf-8") as f:
+    creds_dict = json.load(f)
+
+  # Get the user-defined scopes from the refresh token dictionary
+  user_scopes = get_scopes_from_refresh_token(creds_dict)
+
+  # Construct and return a google.auth.service_account.Credentials object
+  return credentials.Credentials(
+      None,
+      refresh_token=creds_dict["refresh_token"],
+      token_uri=creds_dict["token_uri"],
+      client_id=creds_dict["client_id"],
+      client_secret=creds_dict["client_secret"],
+      scopes=user_scopes,
+  )
 
 
 def get_scopes_from_refresh_token(context) -> Union[List[str], None]:
-    """
-    The function is used to obtain scopes from a refresh token.
-
-    Args:
-        context: a dictionary containing refresh token data
-            Example:
-            {
-                "refresh_token": "<token>",
-                "client_id": "id",
-                "client_secret": "secret",
-            }
-
-    Returns:
-        a list of scopes or None
-    """
-
-    # Obtain access token from the refresh token
-    token_uri = "https://oauth2.googleapis.com/token"
-    context["grant_type"] = "refresh_token"
-
-    try:
-        response = requests.post(token_uri, data=context, timeout=5)
-
-        # prepare the scope string into a list
-        raw = response.json().get("scope", None)
-        return raw.split(" ") if raw else None
-
-    except ImportError as ex:
-        logging.error("Failed to retrieve access token from refresh token.")
-        logging.debug("Token refresh exception", exc_info=ex)
-
-    return None
+  """
+  The function is used to obtain scopes from a refresh token.
+
+  Args:
+      context: a dictionary containing refresh token data
+          Example:
+          {
+              "refresh_token": "<token>",
+              "client_id": "id",
+              "client_secret": "secret",
+          }
+
+  Returns:
+      a list of scopes or None
+  """
+
+  # Obtain access token from the refresh token
+  token_uri = "https://oauth2.googleapis.com/token"
+  context["grant_type"] = "refresh_token"
+
+  try:
+    response = requests.post(token_uri, data=context, timeout=5)
+
+    # prepare the scope string into a list
+    raw = response.json().get("scope", None)
+    return raw.split(" ") if raw else None
+
+  except ImportError as ex:
+    logging.error("Failed to retrieve access token from refresh token.")
+    logging.debug("Token refresh exception", exc_info=ex)
+
+  return None

From 3618b77cae797cf295d7d491930e6773e5e8ca8e Mon Sep 17 00:00:00 2001
From: Rohit Raj <88114930+ro4i7@users.noreply.github.com>
Date: Fri, 7 Apr 2023 19:55:07 +0530
Subject: [PATCH 25/25] Update credsdb.py

---
 src/gcp_scanner/credsdb.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/gcp_scanner/credsdb.py b/src/gcp_scanner/credsdb.py
index 6b726ca2..579a2aef 100644
--- a/src/gcp_scanner/credsdb.py
+++ b/src/gcp_scanner/credsdb.py
@@ -138,9 +138,9 @@ def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]:
     logging.error(sys.exc_info()[1])
     return None, None
 
-    # Print a message to indicate that
-    # we have successfully retrieved the instance metadata
-    print("Successfully retrieved instance metadata")
+  # Print a message to indicate that
+  # we have successfully retrieved the instance metadata
+  print("Successfully retrieved instance metadata")
 
   # Log the length of the access token, instance email, and instance scopes
   logging.info("Access token length: %d", len(token))
@@ -343,7 +343,7 @@ def extract_creds(path_to_creds_db: str) -> List[
 
     # Append the account name, credentials, and access
     # token to the results list
-  res.append(SA(row[0], row[1], access_token))
+    res.append(SA(row[0], row[1], access_token))
 
   # Print the number of identified credential entries
   print(f"Identified {len(res)} credential entries")