Skip to content

Commit

Permalink
feat: Enable lightweight scanning option (#136)
Browse files Browse the repository at this point in the history
* feat: Enable lightweight scanning option

We use a predefined schema to select the most interesting fields
for printing. User can enable it with -ls flag.

Additionally
* Requests are now timed out by 120 seconds
* Impersonation is now disabled by default
* Unwrapped several unnecessary lists in responses
* Fixes extra null output in GCS scanning results
* [tests] Relaxing and updating unit tests
* [tests] Printing error file on failure (

Related to #135
  • Loading branch information
mshudrak authored Apr 7, 2023
1 parent ac11c81 commit a229393
Show file tree
Hide file tree
Showing 11 changed files with 2,489 additions and 2,465 deletions.
2 changes: 1 addition & 1 deletion example_config
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
"service_accounts": {
"fetch": true,
"comment": "Fetch list of available service accounts",
"impersonate": true
"impersonate": false
},
"dns_policies": {
"fetch": true
Expand Down
8 changes: 7 additions & 1 deletion src/gcp_scanner/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ def arg_parser():
dest='output',
default='scan_db',
help='Path to output directory')

parser.add_argument(
'-ls',
'--light-scan',
default=False,
dest='light_scan',
action='store_true',
help='Return only the most important GCP resource fields in the output.')
parser.add_argument(
'-k',
'--sa-key-path',
Expand Down
28 changes: 12 additions & 16 deletions src/gcp_scanner/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ def get_bucket_names(project_name: str, credentials: Credentials,
break

for bucket in response.get("items", []):
buckets_dict[bucket["name"]] = (bucket, None)
buckets_dict[bucket["name"]] = bucket
if dump_fd is not None:
ret_fields = "nextPageToken,items(name,size,contentType,timeCreated)"

Expand Down Expand Up @@ -469,7 +469,8 @@ def get_gke_images(project_name: str, access_token: str) -> Dict[str, Any]:
gcr_url = f"https://{region}gcr.io/v2/{project_name}/tags/list"
try:
res = requests.get(
gcr_url, auth=HTTPBasicAuth("oauth2accesstoken", access_token))
gcr_url, auth=HTTPBasicAuth("oauth2accesstoken", access_token),
timeout=120)
if not res.ok:
logging.info("Failed to retrieve gcr images list. Status code: %d",
res.status_code)
Expand Down Expand Up @@ -897,7 +898,7 @@ def get_iam_policy(project_name: str,
return None


def get_associated_service_accounts(
def get_sas_for_impersonation(
iam_policy: List[Dict[str, Any]]) -> List[str]:
"""Extract a list of unique SAs from IAM policy associated with project.
Expand All @@ -913,16 +914,11 @@ def get_associated_service_accounts(

list_of_sas = list()
for entry in iam_policy:
for member in entry["members"]:
if "deleted:" in member:
continue
account_name = None
for element in member.split(":"):
if "@" in element:
account_name = element
break
if account_name and account_name not in list_of_sas:
list_of_sas.append(account_name)
for sa_name in entry.get("members", []):
if sa_name.startswith("serviceAccount") and "@" in sa_name:
account_name = sa_name.split(":")[1]
if account_name not in list_of_sas:
list_of_sas.append(account_name)

return list_of_sas

Expand Down Expand Up @@ -983,7 +979,7 @@ def list_services(project_id: str, credentials: Credentials) -> List[Any]:
try:
while request is not None:
response = request.execute()
list_of_services.append(response.get("services", None))
list_of_services.extend(response.get("services", []))

request = serviceusage.services().list_next(
previous_request=request, previous_response=response)
Expand Down Expand Up @@ -1016,7 +1012,7 @@ def list_sourcerepo(project_id: str, credentials: Credentials) -> List[Any]:
try:
while request is not None:
response = request.execute()
list_of_repos.append(response.get("repos", None))
list_of_repos.extend(response.get("repos", None))

request = service.projects().repos().list_next(
previous_request=request,
Expand Down Expand Up @@ -1049,7 +1045,7 @@ def list_dns_policies(project_id: str, credentials: Credentials) -> List[Any]:
try:
while request is not None:
response = request.execute()
list_of_policies.append(response.get("policies", None))
list_of_policies.extend(response.get("policies", None))

request = service.policies().list_next(
previous_request=request,
Expand Down
6 changes: 3 additions & 3 deletions src/gcp_scanner/credsdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,21 +95,21 @@ def get_creds_from_metadata() -> Tuple[Optional[str], Optional[Credentials]]:
service-accounts/default/email"
headers = {"Metadata-Flavor": "Google"}
try:
res = requests.get(token_url, headers=headers)
res = requests.get(token_url, headers=headers, timeout=120)
if not res.ok:
logging.error("Failed to retrieve instance token. Status code %d",
res.status_code)
return None, None
token = res.json()["access_token"]

res = requests.get(scope_url, headers=headers)
res = requests.get(scope_url, headers=headers, timeout=120)
if not res.ok:
logging.error("Failed to retrieve instance scopes. Status code %d",
res.status_code)
return None, None
instance_scopes = res.content.decode("utf-8")

res = requests.get(email_url, headers=headers)
res = requests.get(email_url, headers=headers, timeout=120)
if not res.ok:
logging.error("Failed to retrieve instance email. Status code %d",
res.status_code)
Expand Down
79 changes: 60 additions & 19 deletions src/gcp_scanner/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,65 @@
from httplib2 import Credentials
from .models import SpiderContext

# We define the schema statically to make it easier for the user and avoid extra
# config files.
light_version_scan_schema = {
'compute_instances': ['name', 'zone', 'machineType', 'networkInterfaces',
'status'],
'compute_images': ['name', 'status', 'diskSizeGb', 'sourceDisk'],
'machine_images': ['name', 'description', 'status', 'sourceInstance',
'totalStorageBytes', 'savedDisks'],
'compute_disks': ['name', 'sizeGb', 'zone', 'status', 'sourceImage', 'users'],
'compute_snapshots': ['name', 'status', 'sourceDisk', 'downloadBytes'],
'managed_zones': ['name', 'dnsName', 'description', 'nameServers'],
'sql_instances': ['name', 'region', 'ipAddresses', 'databaseVersion'
'state'],
'cloud_functions': ['name', 'eventTrigger', 'status', 'entryPoint',
'serviceAccountEmail'],
'kms': ['name', 'primary', 'purpose', 'createTime'],
'services': ['name'],
}

def is_set(config: Optional[dict], config_setting: str) -> Union[dict,bool]:
if config is None:
return True
obj = config.get(config_setting, {})
return obj.get('fetch', False)

def save_results(res_data: Dict, res_path: str, is_light: bool):
"""The function to save scan results on disk in json format.
Args:
res_data: scan results as a dictionary of entries
res_path: full path to save data in file
is_light: save only the most interesting results
"""

if is_light is True:
# returning the light version of the scan based on predefined schema
for gcp_resource, schema in light_version_scan_schema.items():
projects = res_data.get('projects', {})
for project_name, project_data in projects.items():
scan_results = project_data.get(gcp_resource, {})
light_results = list()
for scan_result in scan_results:
light_results.append({key: scan_result.get(key) for key in schema})

project_data.update({gcp_resource: light_results})
projects.update({project_name: project_data})
res_data.update({'projects': projects})

# Write out results to json DB
sa_results_data = json.dumps(res_data, indent=2, sort_keys=False)

with open(res_path, 'a', encoding='utf-8') as outfile:
outfile.write(sa_results_data)


def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
out_dir: str,
scan_config: Dict,
light_scan: bool,
target_project: Optional[str] = None,
force_projects: Optional[str] = None):
"""The main loop function to crawl GCP resources.
Expand Down Expand Up @@ -108,7 +158,7 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
output_path = Path(out_dir, output_file_name)

try:
with open(output_path, 'x', encoding='utf-8') as outfile:
with open(output_path, 'x', encoding='utf-8'):
pass

except FileExistsError:
Expand All @@ -117,7 +167,6 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],

if is_set(scan_config, 'iam_policy'):
# Get IAM policy
iam_client = iam_client_for_credentials(credentials)
iam_policy = crawl.get_iam_policy(project_id, credentials)
project_result['iam_policy'] = iam_policy

Expand Down Expand Up @@ -256,23 +305,21 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
credentials
)

# trying to impersonate SAs within project
if scan_config is not None:
impers = scan_config.get('service_accounts', None)
else:
impers = {'impersonate': True}
impers = {'impersonate': False} # do not impersonate by default

# trying to impersonate SAs within project
if impers is not None and impers.get('impersonate', False) is True:
iam_client = iam_client_for_credentials(credentials)
if is_set(scan_config, 'iam_policy') is False:
iam_policy = crawl.get_iam_policy(project_id, credentials)

project_service_accounts = crawl.get_associated_service_accounts(
iam_policy)

project_service_accounts = crawl.get_sas_for_impersonation(iam_policy)
for candidate_service_account in project_service_accounts:
logging.info('Trying %s', candidate_service_account)
if not candidate_service_account.startswith('serviceAccount'):
continue
try:
logging.info('Trying %s', candidate_service_account)
creds_impersonated = credsdb.impersonate_sa(
iam_client, candidate_service_account)
context.service_account_queue.put(
Expand All @@ -286,14 +333,9 @@ def crawl_loop(initial_sa_tuples: List[Tuple[str, Credentials, List[str]]],
candidate_service_account)
logging.error(sys.exc_info()[1])

# Write out results to json DB
logging.info('Saving results for %s into the file', project_id)

sa_results_data = json.dumps(sa_results, indent=2, sort_keys=False)

with open(output_path, 'a', encoding='utf-8') as outfile:
outfile.write(sa_results_data)

save_results(sa_results, output_path, light_scan)
# Clean memory to avoid leak for large amount projects.
sa_results.clear()

Expand Down Expand Up @@ -400,7 +442,6 @@ def main():
with open(args.config_path, 'r', encoding='utf-8') as f:
scan_config = json.load(f)


crawl_loop(sa_tuples, args.output, scan_config, args.target_project,
force_projects_list)
crawl_loop(sa_tuples, args.output, scan_config, args.light_scan,
args.target_project, force_projects_list)
return 0
2 changes: 1 addition & 1 deletion src/gcp_scanner/test_acceptance.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
CLOUD_FUNCTIONS = 1
ENDPOINTS_COUNT = 0
KMS_COUNT = 1
SERVICES_COUNT = 1
SERVICES_COUNT = 37
SERVICE_ACCOUNTS_COUNT = 3

def check_obj_entry(res_dict, subojects_count, entry_name, volatile = False):
Expand Down
6 changes: 4 additions & 2 deletions src/gcp_scanner/test_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,22 +61,24 @@ def save_to_test_file(res):
def compare_volatile(f1, f2):
res = True
with open(f1, "r", encoding="utf-8") as file_1:
file_1_text = file_1.readlines()
file_1_text = file_1.read()

with open(f2, "r", encoding="utf-8") as file_2:
file_2_text = file_2.readlines()

for line in file_2_text:
# line = line[:-1]
if not line.startswith("CHECK"):
continue # we compare only important part of output
line = line.replace("CHECK", "")
line = line.strip()
if line in file_1_text:
continue
else:
print(f"The following line was not identified in the output:\n{line}")
res = False

if res is False:
print(file_1_text)
return res


Expand Down
46 changes: 22 additions & 24 deletions test/dns_policies
Original file line number Diff line number Diff line change
@@ -1,29 +1,27 @@
[
[
{
"id": "1199893578059967130",
CHECK "name": "test-policy",
CHECK "enableInboundForwarding": true,
CHECK "description": "A test policy",
CHECK "networks": [
{
"id": "1199893578059967130",
CHECK "name": "test-policy",
CHECK "enableInboundForwarding": true,
CHECK "description": "A test policy",
CHECK "networks": [
{
CHECK "networkUrl": "https://www.googleapis.com/compute/v1/projects/test-gcp-scanner/global/networks/test-vpc",
CHECK "kind": "dns#policyNetwork"
}
],
CHECK "alternativeNameServerConfig": {
CHECK "targetNameServers": [
{
CHECK "networkUrl": "https://www.googleapis.com/compute/v1/projects/test-gcp-scanner/global/networks/test-vpc",
CHECK "kind": "dns#policyNetwork"
CHECK "ipv4Address": "8.8.8.8",
CHECK "forwardingPath": "private",
CHECK "ipv6Address": "",
CHECK "kind": "dns#policyAlternativeNameServerConfigTargetNameServer"
}
],
CHECK "alternativeNameServerConfig": {
CHECK "targetNameServers": [
{
CHECK "ipv4Address": "8.8.8.8",
CHECK "forwardingPath": "private",
CHECK "ipv6Address": "",
CHECK "kind": "dns#policyAlternativeNameServerConfigTargetNameServer"
}
],
CHECK "kind": "dns#policyAlternativeNameServerConfig"
},
"enableLogging": false,
CHECK "kind": "dns#policy"
}
]
CHECK "kind": "dns#policyAlternativeNameServerConfig"
},
"enableLogging": false,
CHECK "kind": "dns#policy"
}
]
Loading

0 comments on commit a229393

Please sign in to comment.