From e99418109a33ea177db43e28e31d1587a1d2b5df Mon Sep 17 00:00:00 2001 From: Chester Li Date: Thu, 12 Dec 2024 11:15:24 +0800 Subject: [PATCH] [k8s] Add validation for pod_config #4206 Check pod_config when run 'sky check k8s' by using k8s api --- sky/provision/kubernetes/utils.py | 42 +++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py index 7442c9be7a6b..0e725f6f3d87 100644 --- a/sky/provision/kubernetes/utils.py +++ b/sky/provision/kubernetes/utils.py @@ -866,6 +866,15 @@ def check_credentials(context: Optional[str], _, exec_msg = is_kubeconfig_exec_auth(context) + # Check whether pod_config is valid + pod_config = skypilot_config.get_nested(('kubernetes', 'pod_config'), + default_value={}, + override_configs={}) + if pod_config: + _, pod_msg = _check_pod_config(context, pod_config) + if pod_msg: + return False, pod_msg + # We now check if GPUs are available and labels are set correctly on the # cluster, and if not we return hints that may help debug any issues. # This early check avoids later surprises for user when they try to run @@ -891,6 +900,39 @@ def check_credentials(context: Optional[str], else: return True, None +def _check_pod_config( + context: Optional[str] = None, pod_config: Optional[Any] = None) \ + -> Tuple[bool, Optional[str]]: + """Check if the pod_config is a valid pod config + + Using create_namespaced_pod api with dry_run to check the pod_config + is valid or not. + + Returns: + bool: True if pod_config is valid. + str: Error message about why the pod_config is invalid, None otherwise. + """ + try: + namespace = get_kube_config_context_namespace(context) + kubernetes.core_api(context).create_namespaced_pod( + namespace, + body=pod_config, + dry_run='All', + field_validation='Strict', + _request_timeout=kubernetes.API_TIMEOUT) + except kubernetes.api_exception() as e: + error_msg = '' + if e.body: + # get detail error message from api_exception + exception_body = json.loads(e.body) + error_msg = exception_body.get('message') + else: + error_msg = str(e) + return False, f'Invalid pod_config: {error_msg}' + except Exception as e: + return False, str(e) + return True, None + def is_kubeconfig_exec_auth( context: Optional[str] = None) -> Tuple[bool, Optional[str]]: