Skip to content

Commit

Permalink
feat: Enable lightweight scanning option
Browse files Browse the repository at this point in the history
We use a predefined schema to select the most interesting fields
for printing. User can enable it with -ls flag.

Additionally
* Requests are now timed out by 120 seconds
* Impersonation is now disabled by default
* Unwrapped several unnecessary lists in responses
* Fixes extra null output in GCS scanning results

Related to #135
  • Loading branch information
mshudrak committed Apr 7, 2023
1 parent ac11c81 commit c2210a0
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 40 deletions.
2 changes: 1 addition & 1 deletion example_config
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
"service_accounts": {
"fetch": true,
"comment": "Fetch list of available service accounts",
"impersonate": true
"impersonate": false
},
"dns_policies": {
"fetch": true
Expand Down
8 changes: 7 additions & 1 deletion src/gcp_scanner/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ def arg_parser():
dest='output',
default='scan_db',
help='Path to output directory')

parser.add_argument(
'-ls',
'--light-scan',
default=False,
dest='light_scan',
action='store_true',
help='Return only the most important GCP resource fields in the output.')
parser.add_argument(
'-k',
'--sa-key-path',
Expand Down
28 changes: 12 additions & 16 deletions src/gcp_scanner/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ def get_bucket_names(project_name: str, credentials: Credentials,
break

for bucket in response.get("items", []):
buckets_dict[bucket["name"]] = (bucket, None)
buckets_dict[bucket["name"]] = bucket
if dump_fd is not None:
ret_fields = "nextPageToken,items(name,size,contentType,timeCreated)"

Expand Down Expand Up @@ -469,7 +469,8 @@ def get_gke_images(project_name: str, access_token: str) -> Dict[str, Any]:
gcr_url = f"https://{region}gcr.io/v2/{project_name}/tags/list"
try:
res = requests.get(
gcr_url, auth=HTTPBasicAuth("oauth2accesstoken", access_token))
gcr_url, auth=HTTPBasicAuth("oauth2accesstoken", access_token),
timeout=120)
if not res.ok:
logging.info("Failed to retrieve gcr images list. Status code: %d",
res.status_code)
Expand Down Expand Up @@ -897,7 +898,7 @@ def get_iam_policy(project_name: str,
return None


def get_associated_service_accounts(
def get_sas_for_impersonation(
iam_policy: List[Dict[str, Any]]) -> List[str]:
"""Extract a list of unique SAs from IAM policy associated with project.
Expand All @@ -913,16 +914,11 @@ def get_associated_service_accounts(

list_of_sas = list()
for entry in iam_policy:
for member in entry["members"]:
if "deleted:" in member:
continue
account_name = None
for element in member.split(":"):
if "@" in element:
account_name = element
break
if account_name and account_name not in list_of_sas:
list_of_sas.append(account_name)
for sa_name in entry.get("members", []):
if sa_name.startswith("serviceAccount") and "@" in sa_name:
account_name = sa_name.split(":")[1]
if account_name not in list_of_sas:
list_of_sas.append(account_name)

return list_of_sas

Expand Down Expand Up @@ -983,7 +979,7 @@ def list_services(project_id: str, credentials: Credentials) -> List[Any]:
try:
while request is not None:
response = request.execute()
list_of_services.append(response.get("services", None))
list_of_services.extend(response.get("services", []))

request = serviceusage.services().list_next(
previous_request=request, previous_response=response)
Expand Down Expand Up @@ -1016,7 +1012,7 @@ def list_sourcerepo(project_id: str, credentials: Credentials) -> List[Any]:
try:
while request is not None:
response = request.execute()
list_of_repos.append(response.get("repos", None))
list_of_repos.extend(response.get("repos", None))

request = service.projects().repos().list_next(
previous_request=request,
Expand Down Expand Up @@ -1049,7 +1045,7 @@ def list_dns_policies(project_id: str, credentials: Credentials) -> List[Any]:
try:
while request is not None:
response = request.execute()
list_of_policies.append(response.get("policies", None))
list_of_policies.extend(response.get("policies", None))

request = service.policies().list_next(
previous_request=request,
Expand Down
6 changes: 3 additions & 3 deletions src/gcp_scanner/credsdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,21 +95,21 @@ def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]:
service-accounts/default/email"
headers = {"Metadata-Flavor": "Google"}
try:
res = requests.get(token_url, headers=headers)
res = requests.get(token_url, headers=headers, timeout=120)
if not res.ok:
logging.error("Failed to retrieve instance token. Status code %d",
res.status_code)
return None, None
token = res.json()["access_token"]

res = requests.get(scope_url, headers=headers)
res = requests.get(scope_url, headers=headers, timeout=120)
if not res.ok:
logging.error("Failed to retrieve instance scopes. Status code %d",
res.status_code)
return None, None
instance_scopes = res.content.decode("utf-8")

res = requests.get(email_url, headers=headers)
res = requests.get(email_url, headers=headers, timeout=120)
if not res.ok:
logging.error("Failed to retrieve instance email. Status code %d",
res.status_code)
Expand Down
79 changes: 60 additions & 19 deletions src/gcp_scanner/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,65 @@
from httplib2 import Credentials
from .models import SpiderContext

# We define the schema statically to make it easier for the user and avoid extra
# config files.
light_version_scan_schema = {
'compute_instances': ['name', 'zone', 'machineType', 'networkInterfaces',
'status'],
'compute_images': ['name', 'status', 'diskSizeGb', 'sourceDisk'],
'machine_images': ['name', 'description', 'status', 'sourceInstance',
'totalStorageBytes', 'savedDisks'],
'compute_disks': ['name', 'sizeGb', 'zone', 'status', 'sourceImage', 'users'],
'compute_snapshots': ['name', 'status', 'sourceDisk', 'downloadBytes'],
'managed_zones': ['name', 'dnsName', 'description', 'nameServers'],
'sql_instances': ['name', 'region', 'ipAddresses', 'databaseVersion'
'state'],
'cloud_functions': ['name', 'eventTrigger', 'status', 'entryPoint',
'serviceAccountEmail'],
'kms': ['name', 'primary', 'purpose', 'createTime'],
'services': ['name'],
}

def is_set(config: Optional[dict], config_setting: str) -> Union[dict,bool]:
if config is None:
return True
obj = config.get(config_setting, {})
return obj.get('fetch', False)

def save_results(res_data: Dict, res_path: str, is_light: bool):
"""The function to save scan results on disk in json format.
Args:
res_data: scan results as a dictionary of entries
res_path: full path to save data in file
is_light: save only the most interesting results
"""

if is_light is True:
# returning the light version of the scan based on predefined schema
for gcp_resource, schema in light_version_scan_schema.items():
projects = res_data.get('projects', {})
for project_name, project_data in projects.items():
scan_results = project_data.get(gcp_resource, {})
light_results = list()
for scan_result in scan_results:
light_results.append({key: scan_result.get(key) for key in schema})

project_data.update({gcp_resource: light_results})
projects.update({project_name: project_data})
res_data.update({'projects': projects})

# Write out results to json DB
sa_results_data = json.dumps(res_data, indent=2, sort_keys=False)

with open(res_path, 'a', encoding='utf-8') as outfile:
outfile.write(sa_results_data)


def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
out_dir: str,
scan_config: Dict,
light_scan: bool,
target_project: Optional[str] = None,
force_projects: Optional[str] = None):
"""The main loop function to crawl GCP resources.
Expand Down Expand Up @@ -108,7 +158,7 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
output_path = Path(out_dir, output_file_name)

try:
with open(output_path, 'x', encoding='utf-8') as outfile:
with open(output_path, 'x', encoding='utf-8'):
pass

except FileExistsError:
Expand All @@ -117,7 +167,6 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],

if is_set(scan_config, 'iam_policy'):
# Get IAM policy
iam_client = iam_client_for_credentials(credentials)
iam_policy = crawl.get_iam_policy(project_id, credentials)
project_result['iam_policy'] = iam_policy

Expand Down Expand Up @@ -256,23 +305,21 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
credentials
)

# trying to impersonate SAs within project
if scan_config is not None:
impers = scan_config.get('service_accounts', None)
else:
impers = {'impersonate': True}
impers = {'impersonate': False} # do not impersonate by default

# trying to impersonate SAs within project
if impers is not None and impers.get('impersonate', False) is True:
iam_client = iam_client_for_credentials(credentials)
if is_set(scan_config, 'iam_policy') is False:
iam_policy = crawl.get_iam_policy(project_id, credentials)

project_service_accounts = crawl.get_associated_service_accounts(
iam_policy)

project_service_accounts = crawl.get_sas_for_impersonation(iam_policy)
for candidate_service_account in project_service_accounts:
logging.info('Trying %s', candidate_service_account)
if not candidate_service_account.startswith('serviceAccount'):
continue
try:
logging.info('Trying %s', candidate_service_account)
creds_impersonated = credsdb.impersonate_sa(
iam_client, candidate_service_account)
context.service_account_queue.put(
Expand All @@ -286,14 +333,9 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
candidate_service_account)
logging.error(sys.exc_info()[1])

# Write out results to json DB
logging.info('Saving results for %s into the file', project_id)

sa_results_data = json.dumps(sa_results, indent=2, sort_keys=False)

with open(output_path, 'a', encoding='utf-8') as outfile:
outfile.write(sa_results_data)

save_results(sa_results, output_path, light_scan)
# Clean memory to avoid leak for large amount projects.
sa_results.clear()

Expand Down Expand Up @@ -400,7 +442,6 @@ def main():
with open(args.config_path, 'r', encoding='utf-8') as f:
scan_config = json.load(f)


crawl_loop(sa_tuples, args.output, scan_config, args.target_project,
force_projects_list)
crawl_loop(sa_tuples, args.output, scan_config, args.light_scan,
args.target_project, force_projects_list)
return 0

0 comments on commit c2210a0

Please sign in to comment.