diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 88b3e6f37..f45985202 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -99,7 +99,7 @@ steps: echo "pass" > /workspace/user_result.txt # Make sure pods are running - kubectl wait --all pods -n ml-$SHORT_SHA-$_BUILD_ID --for=condition=Ready --timeout=300s + kubectl wait --all pods -n ml-$SHORT_SHA-$_BUILD_ID --for=condition=Ready --timeout=1200s kubectl port-forward -n ml-$SHORT_SHA-$_BUILD_ID service/ray-cluster-kuberay-head-svc 8265:8265 & # Wait port-forwarding to take its place sleep 5s @@ -156,7 +156,7 @@ steps: -auto-approve -no-color -lock=false echo "pass" > /workspace/jupyterhub_tf_result.txt - kubectl wait --all pods -n ml-$SHORT_SHA-$_BUILD_ID --for=condition=Ready --timeout=300s + kubectl wait --all pods -n ml-$SHORT_SHA-$_BUILD_ID --for=condition=Ready --timeout=1200s kubectl get services -n ml-$SHORT_SHA-$_BUILD_ID kubectl port-forward -n ml-$SHORT_SHA-$_BUILD_ID service/proxy-public 9443:80 & # Wait port-forwarding to take its place @@ -226,7 +226,7 @@ steps: echo "pass" > /workspace/rag_tf_result.txt # Validate Ray: Make sure pods are running - kubectl wait --all pods -n rag-$SHORT_SHA-$_BUILD_ID --for=condition=Ready --timeout=300s + kubectl wait --all pods -n rag-$SHORT_SHA-$_BUILD_ID --for=condition=Ready --timeout=1200s kubectl port-forward -n rag-$SHORT_SHA-$_BUILD_ID service/ray-cluster-kuberay-head-svc 8265:8265 & # Wait port-forwarding to take its place sleep 5s diff --git a/modules/kuberay-cluster/main.tf b/modules/kuberay-cluster/main.tf index 67559f8a0..36aaa49bf 100644 --- a/modules/kuberay-cluster/main.tf +++ b/modules/kuberay-cluster/main.tf @@ -34,9 +34,6 @@ resource "helm_release" "ray-cluster" { namespace = var.namespace create_namespace = true version = "1.0.0" - # Timeout is increased to guarantee sufficient scale-up time for Autopilot nodes. - timeout = 1200 - wait = true values = [ templatefile("${path.module}/values.yaml", {