diff --git a/sky/utils/kubernetes/deploy_remote_cluster.sh b/sky/utils/kubernetes/deploy_remote_cluster.sh index 8d7ba3e5729..fc7cf8bec2e 100755 --- a/sky/utils/kubernetes/deploy_remote_cluster.sh +++ b/sky/utils/kubernetes/deploy_remote_cluster.sh @@ -93,11 +93,11 @@ cleanup_agent_node() { check_gpu() { local NODE_IP=$1 - run_remote "$NODE_IP" " - if command -v nvidia-smi &> /dev/null; then - nvidia-smi --list-gpus | grep 'GPU 0' - fi - " + if run_remote "$NODE_IP" "command -v nvidia-smi &> /dev/null && nvidia-smi --query-gpu=gpu_name --format=csv,noheader &> /dev/null"; then + return 0 # GPU detected + else + return 1 # No GPU detected + fi } # Pre-flight checks