diff --git a/applications/jupyter/main.tf b/applications/jupyter/main.tf index 05fa4700f..3a9dd89a1 100644 --- a/applications/jupyter/main.tf +++ b/applications/jupyter/main.tf @@ -69,12 +69,14 @@ module "infra" { subnetwork_name = "default" cpu_pools = var.cpu_pools enable_gpu = false + depends_on = [module.project-services] } data "google_container_cluster" "default" { - count = var.create_cluster ? 0 : 1 - name = var.cluster_name - location = var.cluster_location + count = var.create_cluster ? 0 : 1 + name = var.cluster_name + location = var.cluster_location + depends_on = [module.project-services] } locals { diff --git a/applications/rag/main.tf b/applications/rag/main.tf index 3b0eef8da..235ce6694 100644 --- a/applications/rag/main.tf +++ b/applications/rag/main.tf @@ -79,12 +79,14 @@ module "infra" { enable_gpu = true gpu_pools = var.gpu_pools kubernetes_version = var.kubernetes_version + depends_on = [module.project-services] } data "google_container_cluster" "default" { - count = var.create_cluster ? 0 : 1 - name = var.cluster_name - location = var.cluster_location + count = var.create_cluster ? 0 : 1 + name = var.cluster_name + location = var.cluster_location + depends_on = [module.project-services] } locals { diff --git a/applications/ray/main.tf b/applications/ray/main.tf index 72f7f1800..c25c411e6 100644 --- a/applications/ray/main.tf +++ b/applications/ray/main.tf @@ -73,12 +73,14 @@ module "infra" { cpu_pools = var.cpu_pools enable_gpu = var.enable_gpu gpu_pools = var.gpu_pools + depends_on = [module.project-services] } data "google_container_cluster" "default" { - count = var.create_cluster ? 0 : 1 - name = var.cluster_name - location = var.cluster_location + count = var.create_cluster ? 0 : 1 + name = var.cluster_name + location = var.cluster_location + depends_on = [module.project-services] } locals { diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 3b158d5f0..c7b6d27a6 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -59,6 +59,8 @@ steps: terraform apply \ -var-file=tfvars_tests/standard-gke-public.platform.tfvars \ -var=project_id=$PROJECT_ID \ + -var=network_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-$_AUTOPILOT_CLUSTER \ + -var=subnetwork_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-$_AUTOPILOT_CLUSTER \ -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ -var=autopilot_cluster=$_AUTOPILOT_CLUSTER \ -var=cluster_location=$_REGION \ @@ -202,7 +204,7 @@ steps: cd /workspace/applications/rag/ terraform apply \ -var-file=workloads.tfvars \ - -var=network_name=default \ + -var=network_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-$_AUTOPILOT_CLUSTER \ -var=create_cluster=false \ -var=jupyter_add_auth=false \ -var=frontend_add_auth=false \ @@ -261,7 +263,7 @@ steps: cd /workspace/applications/rag/ terraform destroy \ -var-file=workloads.tfvars \ - -var=network_name=default \ + -var=network_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-$_AUTOPILOT_CLUSTER \ -var=create_cluster=false \ -var=jupyter_add_auth=false \ -var=frontend_add_auth=false \ @@ -289,6 +291,8 @@ steps: cd /workspace/infrastructure terraform destroy -var-file=tfvars_tests/standard-gke-public.platform.tfvars -var=project_id=$PROJECT_ID \ -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ + -var=network_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-$_AUTOPILOT_CLUSTER \ + -var=subnetwork_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-$_AUTOPILOT_CLUSTER \ -var=autopilot_cluster=$_AUTOPILOT_CLUSTER \ -var=cluster_location=$_REGION -auto-approve -no-color diff --git a/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars b/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars index dbacd21d5..cf9393eed 100644 --- a/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars +++ b/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars @@ -25,7 +25,7 @@ project_id = "" # subnetwork_cidr = "10.100.0.0/16" # subnetwork_region = "us-central1" -create_network = false +create_network = true network_name = "default" subnetwork_name = "default" subnetwork_region = "us-central1" diff --git a/modules/kuberay-monitoring/main.tf b/modules/kuberay-monitoring/main.tf index 2ae393d07..83f5ee392 100644 --- a/modules/kuberay-monitoring/main.tf +++ b/modules/kuberay-monitoring/main.tf @@ -14,7 +14,8 @@ # Temporary workaround to ensure the GMP webhook is installed before applying PodMonitorings. resource "time_sleep" "wait_for_gmp_operator" { - count = var.autopilot_cluster ? 1 : 0 + ## Temporary workaroud, This is impacting for the standard cluster dependency graph + #count = var.autopilot_cluster ? 1 : 0 create_duration = "30s" }