Skip to content

Commit

Permalink
[k8s] Add validation for pod_config #4206
Browse files Browse the repository at this point in the history
Check pod_config when run 'sky check k8s' by using k8s api
  • Loading branch information
chesterli29 committed Dec 12, 2024
1 parent e036888 commit e994181
Showing 1 changed file with 42 additions and 0 deletions.
42 changes: 42 additions & 0 deletions sky/provision/kubernetes/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,15 @@ def check_credentials(context: Optional[str],

_, exec_msg = is_kubeconfig_exec_auth(context)

# Check whether pod_config is valid
pod_config = skypilot_config.get_nested(('kubernetes', 'pod_config'),
default_value={},
override_configs={})
if pod_config:
_, pod_msg = _check_pod_config(context, pod_config)
if pod_msg:
return False, pod_msg

# We now check if GPUs are available and labels are set correctly on the
# cluster, and if not we return hints that may help debug any issues.
# This early check avoids later surprises for user when they try to run
Expand All @@ -891,6 +900,39 @@ def check_credentials(context: Optional[str],
else:
return True, None

def _check_pod_config(
context: Optional[str] = None, pod_config: Optional[Any] = None) \
-> Tuple[bool, Optional[str]]:
"""Check if the pod_config is a valid pod config
Using create_namespaced_pod api with dry_run to check the pod_config
is valid or not.
Returns:
bool: True if pod_config is valid.
str: Error message about why the pod_config is invalid, None otherwise.
"""
try:
namespace = get_kube_config_context_namespace(context)
kubernetes.core_api(context).create_namespaced_pod(
namespace,
body=pod_config,
dry_run='All',
field_validation='Strict',
_request_timeout=kubernetes.API_TIMEOUT)
except kubernetes.api_exception() as e:
error_msg = ''
if e.body:
# get detail error message from api_exception
exception_body = json.loads(e.body)
error_msg = exception_body.get('message')
else:
error_msg = str(e)
return False, f'Invalid pod_config: {error_msg}'
except Exception as e:
return False, str(e)
return True, None


def is_kubeconfig_exec_auth(
context: Optional[str] = None) -> Tuple[bool, Optional[str]]:
Expand Down

0 comments on commit e994181

Please sign in to comment.