-
Notifications
You must be signed in to change notification settings - Fork 547
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[UX] Remove warning for gcp check #3647
Changes from 2 commits
e86934c
aa494a1
49d422d
4c45fee
c11094d
8d13688
ad37f34
8c60bac
9cc675e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -100,19 +100,20 @@ def _generate_node_name(cluster_name: str, node_suffix: str, | |
return node_name | ||
|
||
|
||
def _log_errors(errors: List[Dict[str, str]], e: Any, | ||
zone: Optional[str]) -> None: | ||
def _format_message_from_errors(errors: List[Dict[str, str]], e: Any, | ||
zone: Optional[str]) -> str: | ||
"""Format errors into a string.""" | ||
Michaelvll marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if errors: | ||
plural = 's' if len(errors) > 1 else '' | ||
codes = ', '.join(repr(e.get('code', 'N/A')) for e in errors) | ||
messages = '; '.join( | ||
repr(e.get('message', 'N/A').strip('.')) for e in errors) | ||
zone_str = f' in {zone}' if zone else '' | ||
logger.warning(f'Got return code{plural} {codes}' | ||
f'{zone_str}: {messages}') | ||
msg = f'Got return code{plural} {codes}{zone_str}: {messages}' | ||
else: | ||
logger.warning(f'create_instances: Failed with reason: {e}') | ||
msg = f'create_instances: Failed with reason: {e}' | ||
logger.warning(msg) | ||
return msg | ||
|
||
|
||
def selflink_to_name(selflink: str) -> str: | ||
|
@@ -441,8 +442,9 @@ def call_operation(fn, timeout: int): | |
logger.debug( | ||
'wait_operations: Failed to create instances. Reason: ' | ||
f'{errors}') | ||
_log_errors(errors, result, zone) | ||
msg = _format_message_from_errors(errors, result, zone) | ||
error = common.ProvisionerError('Operation failed') | ||
setattr(error, 'detailed_reason', msg) | ||
error.errors = errors | ||
raise error | ||
return | ||
|
@@ -462,8 +464,9 @@ def call_operation(fn, timeout: int): | |
'message': f'Timeout waiting for operation {operation["name"]}', | ||
'domain': 'wait_for_operation' | ||
}] | ||
_log_errors(errors, None, zone) | ||
msg = _format_message_from_errors(errors, None, zone) | ||
error = common.ProvisionerError('Operation timed out') | ||
setattr(error, 'detailed_reason', msg) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we add an attribute for the class and assign here, instead of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is only used for the usage collection, which uses There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh ic. LGTM! |
||
error.errors = errors | ||
raise error | ||
|
||
|
@@ -819,7 +822,7 @@ def _handle_http_error(e): | |
}) | ||
logger.debug( | ||
f'create_instances: googleapiclient.errors.HttpError: {e}') | ||
_log_errors(errors, e, zone) | ||
_format_message_from_errors(errors, e, zone) | ||
return errors | ||
|
||
# Allow Google Compute Engine instance templates. | ||
|
@@ -849,7 +852,7 @@ def _handle_http_error(e): | |
if errors: | ||
logger.debug('create_instances: Failed to create instances. ' | ||
f'Reason: {errors}') | ||
_log_errors(errors, operations, zone) | ||
_format_message_from_errors(errors, operations, zone) | ||
return errors | ||
|
||
logger.debug('Waiting GCP instances to be ready ...') | ||
|
@@ -1257,7 +1260,7 @@ def create_instances( | |
'domain': 'create_instances', | ||
'message': error_details, | ||
}) | ||
_log_errors(errors, e, zone) | ||
_format_message_from_errors(errors, e, zone) | ||
return errors, names | ||
for detail in error_details: | ||
# To be consistent with error messages returned by operation | ||
|
@@ -1276,7 +1279,7 @@ def create_instances( | |
'domain': violation.get('subject'), | ||
'message': violation.get('description'), | ||
}) | ||
_log_errors(errors, e, zone) | ||
_format_message_from_errors(errors, e, zone) | ||
return errors, names | ||
errors = [] | ||
for operation in operations: | ||
|
@@ -1294,7 +1297,7 @@ def create_instances( | |
if errors: | ||
logger.debug('create_instances: Failed to create instances. ' | ||
f'Reason: {errors}') | ||
_log_errors(errors, operations, zone) | ||
_format_message_from_errors(errors, operations, zone) | ||
return errors, names | ||
|
||
logger.debug('Waiting GCP instances to be ready ...') | ||
|
@@ -1336,7 +1339,7 @@ def create_instances( | |
'message': 'Timeout waiting for creation operation', | ||
'domain': 'create_instances' | ||
}] | ||
_log_errors(errors, None, zone) | ||
_format_message_from_errors(errors, None, zone) | ||
return errors, names | ||
|
||
# NOTE: Error example: | ||
|
@@ -1353,7 +1356,7 @@ def create_instances( | |
logger.debug( | ||
'create_instances: Failed to create instances. Reason: ' | ||
f'{errors}') | ||
_log_errors(errors, results, zone) | ||
_format_message_from_errors(errors, results, zone) | ||
return errors, names | ||
assert all(success), ( | ||
'Failed to create instances, but there is no error. ' | ||
|
@@ -1475,7 +1478,7 @@ def create_tpu_node(project_id: str, zone: str, tpu_node_config: Dict[str, str], | |
'https://console.cloud.google.com/iam-admin/quotas ' | ||
'for more information.' | ||
}] | ||
_log_errors(provisioner_err.errors, e, zone) | ||
_format_message_from_errors(provisioner_err.errors, e, zone) | ||
raise provisioner_err from e | ||
|
||
if 'PERMISSION_DENIED' in stderr: | ||
|
@@ -1484,7 +1487,7 @@ def create_tpu_node(project_id: str, zone: str, tpu_node_config: Dict[str, str], | |
'domain': 'tpu', | ||
'message': 'TPUs are not available in this zone.' | ||
}] | ||
_log_errors(provisioner_err.errors, e, zone) | ||
_format_message_from_errors(provisioner_err.errors, e, zone) | ||
raise provisioner_err from e | ||
|
||
if 'no more capacity in the zone' in stderr: | ||
|
@@ -1493,7 +1496,7 @@ def create_tpu_node(project_id: str, zone: str, tpu_node_config: Dict[str, str], | |
'domain': 'tpu', | ||
'message': 'No more capacity in this zone.' | ||
}] | ||
_log_errors(provisioner_err.errors, e, zone) | ||
_format_message_from_errors(provisioner_err.errors, e, zone) | ||
raise provisioner_err from e | ||
|
||
if 'CloudTpu received an invalid AcceleratorType' in stderr: | ||
|
@@ -1506,7 +1509,7 @@ def create_tpu_node(project_id: str, zone: str, tpu_node_config: Dict[str, str], | |
'message': (f'TPU type {tpu_type} is not available in this ' | ||
f'zone {zone}.') | ||
}] | ||
_log_errors(provisioner_err.errors, e, zone) | ||
_format_message_from_errors(provisioner_err.errors, e, zone) | ||
raise provisioner_err from e | ||
|
||
# TODO(zhwu): Add more error code handling, if needed. | ||
|
@@ -1515,7 +1518,7 @@ def create_tpu_node(project_id: str, zone: str, tpu_node_config: Dict[str, str], | |
'domain': 'tpu', | ||
'message': stderr | ||
}] | ||
_log_errors(provisioner_err.errors, e, zone) | ||
_format_message_from_errors(provisioner_err.errors, e, zone) | ||
raise provisioner_err from e | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we benchmark the performance degrade and make sure it is tolerable? I think it should be marginal though
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, the performance is similar. Tested with:
python test.py
with True
Average time to build GCP service: 0.431246 seconds
with False
Average time to build GCP service: 0.431241 seconds
Also, tested with lru_cache for
gcp.build
and it seems not affecting the time much.