From 57abad055f6b89b03419e036344c9408309b4cdd Mon Sep 17 00:00:00 2001 From: Himanshu Sachdeva Date: Mon, 18 Mar 2024 11:13:37 +0100 Subject: [PATCH] replace suffix with build_id --- cloudbuild.yaml | 92 +++++++++---------- .../jupyter/tests/change_jupyter_config.py | 3 +- modules/kuberay-cluster/variables.tf | 2 +- 3 files changed, 48 insertions(+), 49 deletions(-) diff --git a/cloudbuild.yaml b/cloudbuild.yaml index de9a19059..918388b7a 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -59,7 +59,7 @@ steps: terraform apply \ -var-file=tfvars_tests/standard-gke-public.platform.tfvars \ -var=project_id=$PROJECT_ID \ - -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_SUFFIX-cluster \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ -var=autopilot_cluster=$_AUTOPILOT_CLUSTER \ -var=cluster_location=$_REGION \ -auto-approve -no-color -lock=false @@ -78,7 +78,7 @@ steps: # Get kube config gcloud container clusters get-credentials \ - ml-$SHORT_SHA-$_PR_NUMBER-$_SUFFIX-cluster \ + ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ --location $_REGION \ --project $PROJECT_ID @@ -86,17 +86,17 @@ steps: terraform apply \ -var-file=workloads.tfvars \ -var=project_id=$PROJECT_ID \ - -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_SUFFIX-cluster \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ -var=cluster_location=$_REGION \ - -var=kubernetes_namespace=ml-$SHORT_SHA-$_SUFFIX \ - -var=workload_identity_service_account=ray-sa-$SHORT_SHA-$_SUFFIX \ - -var=gcs_bucket=gke-aieco-ray-$SHORT_SHA-$_SUFFIX \ + -var=kubernetes_namespace=ml-$SHORT_SHA-$_BUILD_ID \ + -var=workload_identity_service_account=ray-sa-$SHORT_SHA-$_BUILD_ID \ + -var=gcs_bucket=gke-aieco-ray-$SHORT_SHA-$_BUILD_ID \ -auto-approve -no-color -lock=false echo "pass" > /workspace/user_result.txt # Make sure pods are running - kubectl wait --all pods -n ml-$SHORT_SHA-$_SUFFIX --for=condition=Ready --timeout=300s - kubectl port-forward -n ml-$SHORT_SHA-$_SUFFIX service/example-cluster-kuberay-head-svc 8265:8265 & + kubectl wait --all pods -n ml-$SHORT_SHA-$_BUILD_ID --for=condition=Ready --timeout=300s + kubectl port-forward -n ml-$SHORT_SHA-$_BUILD_ID service/example-cluster-kuberay-head-svc 8265:8265 & # Wait port-forwarding to take its place sleep 5s @@ -118,11 +118,11 @@ steps: terraform destroy \ -var-file=workloads.tfvars \ -var=project_id=$PROJECT_ID \ - -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_SUFFIX-cluster \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ -var=cluster_location=$_REGION \ - -var=kubernetes_namespace=ml-$SHORT_SHA-$_SUFFIX \ - -var=workload_identity_service_account=ray-sa-$SHORT_SHA-$_SUFFIX \ - -var=gcs_bucket=gke-aieco-ray-$SHORT_SHA-$_SUFFIX \ + -var=kubernetes_namespace=ml-$SHORT_SHA-$_BUILD_ID \ + -var=workload_identity_service_account=ray-sa-$SHORT_SHA-$_BUILD_ID \ + -var=gcs_bucket=gke-aieco-ray-$SHORT_SHA-$_BUILD_ID \ -auto-approve -no-color -lock=false allowFailure: true @@ -143,16 +143,16 @@ steps: terraform apply \ -var-file=workloads-without-iap.example.tfvars \ -var=project_id=$PROJECT_ID \ - -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_SUFFIX-cluster \ - -var=kubernetes_namespace=ml-$SHORT_SHA-$_SUFFIX \ - -var=workload_identity_service_account=jupyter-sa-$SHORT_SHA-$_SUFFIX \ - -var=gcs_bucket=gke-aieco-jupyter-$SHORT_SHA-$_SUFFIX \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ + -var=kubernetes_namespace=ml-$SHORT_SHA-$_BUILD_ID \ + -var=workload_identity_service_account=jupyter-sa-$SHORT_SHA-$_BUILD_ID \ + -var=gcs_bucket=gke-aieco-jupyter-$SHORT_SHA-$_BUILD_ID \ -auto-approve -no-color -lock=false echo "pass" > /workspace/jupyterhub_tf_result.txt - kubectl wait --all pods -n ml-$SHORT_SHA-$_SUFFIX --for=condition=Ready --timeout=300s - kubectl get services -n ml-$SHORT_SHA-$_SUFFIX - kubectl get service proxy-public -n ml-$SHORT_SHA-$_SUFFIX --output jsonpath='{.status.loadBalancer.ingress[0].ip}' > /workspace/jupyterhub_host_url.txt + kubectl wait --all pods -n ml-$SHORT_SHA-$_BUILD_ID --for=condition=Ready --timeout=300s + kubectl get services -n ml-$SHORT_SHA-$_BUILD_ID + kubectl get service proxy-public -n ml-$SHORT_SHA-$_BUILD_ID --output jsonpath='{.status.loadBalancer.ingress[0].ip}' > /workspace/jupyterhub_host_url.txt echo "HOST URL is " $(cat /workspace/jupyterhub_host_url.txt) cd /workspace/modules/jupyter/tests @@ -172,10 +172,10 @@ steps: terraform destroy \ -var-file=workloads-without-iap.example.tfvars \ -var=project_id=$PROJECT_ID \ - -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_SUFFIX-cluster \ - -var=kubernetes_namespace=ml-$SHORT_SHA-$_SUFFIX \ - -var=workload_identity_service_account=jupyter-sa-$SHORT_SHA-$_SUFFIX \ - -var=gcs_bucket=gke-aieco-jupyter-$SHORT_SHA-$_SUFFIX \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ + -var=kubernetes_namespace=ml-$SHORT_SHA-$_BUILD_ID \ + -var=workload_identity_service_account=jupyter-sa-$SHORT_SHA-$_BUILD_ID \ + -var=gcs_bucket=gke-aieco-jupyter-$SHORT_SHA-$_BUILD_ID \ -auto-approve -no-color -lock=false allowFailure: true @@ -191,7 +191,7 @@ steps: # Get kube config gcloud container clusters get-credentials \ - ml-$SHORT_SHA-$_PR_NUMBER-$_SUFFIX-cluster \ + ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ --location $_REGION \ --project $PROJECT_ID @@ -204,19 +204,19 @@ steps: -var=jupyter_add_auth=false \ -var=frontend_add_auth=false \ -var=project_id=$PROJECT_ID \ - -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_SUFFIX-cluster \ - -var=kubernetes_namespace=rag-$SHORT_SHA-$_SUFFIX \ - -var=gcs_bucket=gke-aieco-rag-$SHORT_SHA-$_SUFFIX \ - -var=ray_service_account=ray-sa-$SHORT_SHA-$_SUFFIX \ - -var=rag_service_account=rag-sa-$SHORT_SHA-$_SUFFIX \ - -var=jupyter_service_account=jupyter-sa-$SHORT_SHA-$_SUFFIX \ - -var=cloudsql_instance=pgvector-instance-$SHORT_SHA-$_SUFFIX \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ + -var=kubernetes_namespace=rag-$SHORT_SHA-$_BUILD_ID \ + -var=gcs_bucket=gke-aieco-rag-$SHORT_SHA-$_BUILD_ID \ + -var=ray_service_account=ray-sa-$SHORT_SHA-$_BUILD_ID \ + -var=rag_service_account=rag-sa-$SHORT_SHA-$_BUILD_ID \ + -var=jupyter_service_account=jupyter-sa-$SHORT_SHA-$_BUILD_ID \ + -var=cloudsql_instance=pgvector-instance-$SHORT_SHA-$_BUILD_ID \ -auto-approve -no-color -lock=false echo "pass" > /workspace/rag_tf_result.txt # Validate Ray: Make sure pods are running - kubectl wait --all pods -n rag-$SHORT_SHA-$_SUFFIX --for=condition=Ready --timeout=300s - kubectl port-forward -n rag-$SHORT_SHA-$_SUFFIX service/example-cluster-kuberay-head-svc 8265:8265 & + kubectl wait --all pods -n rag-$SHORT_SHA-$_BUILD_ID --for=condition=Ready --timeout=300s + kubectl port-forward -n rag-$SHORT_SHA-$_BUILD_ID service/example-cluster-kuberay-head-svc 8265:8265 & # Wait port-forwarding to take its place sleep 5s @@ -226,8 +226,8 @@ steps: echo "pass" > /workspace/rag_ray_dashboard_result.txt # Validate Jupyterhub: Get hub url - kubectl get services -n rag-$SHORT_SHA-$_SUFFIX - kubectl get service proxy-public -n rag-$SHORT_SHA-$_SUFFIX --output jsonpath='{.status.loadBalancer.ingress[0].ip}' > /workspace/rag_jupyterhub_host_url.txt + kubectl get services -n rag-$SHORT_SHA-$_BUILD_ID + kubectl get service proxy-public -n rag-$SHORT_SHA-$_BUILD_ID --output jsonpath='{.status.loadBalancer.ingress[0].ip}' > /workspace/rag_jupyterhub_host_url.txt echo "HOST URL is " $(cat /workspace/rag_jupyterhub_host_url.txt) # Validate Jupyterhub: Test Hub @@ -236,7 +236,7 @@ steps: echo "pass" > /workspace/rag_jupyterhub_test_result.txt # Validate RAG: Test rag frontend - kubectl port-forward -n rag-$SHORT_SHA-$_SUFFIX service/rag-frontend 8081:8080 & + kubectl port-forward -n rag-$SHORT_SHA-$_BUILD_ID service/rag-frontend 8081:8080 & # Wait port-forwarding to take its place sleep 5s @@ -258,13 +258,13 @@ steps: terraform destroy \ -var-file=workloads.tfvars \ -var=project_id=$PROJECT_ID \ - -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_SUFFIX-cluster \ - -var=kubernetes_namespace=rag-$SHORT_SHA-$_SUFFIX \ - -var=gcs_bucket=gke-aieco-rag-$SHORT_SHA-$_SUFFIX \ - -var=ray_service_account=ray-sa-$SHORT_SHA-$_SUFFIX \ - -var=rag_service_account=rag-sa-$SHORT_SHA-$_SUFFIX \ - -var=jupyter_service_account=jupyter-sa-$SHORT_SHA-$_SUFFIX \ - -var=cloudsql_instance=pgvector-instance-$SHORT_SHA-$_SUFFIX \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ + -var=kubernetes_namespace=rag-$SHORT_SHA-$_BUILD_ID \ + -var=gcs_bucket=gke-aieco-rag-$SHORT_SHA-$_BUILD_ID \ + -var=ray_service_account=ray-sa-$SHORT_SHA-$_BUILD_ID \ + -var=rag_service_account=rag-sa-$SHORT_SHA-$_BUILD_ID \ + -var=jupyter_service_account=jupyter-sa-$SHORT_SHA-$_BUILD_ID \ + -var=cloudsql_instance=pgvector-instance-$SHORT_SHA-$_BUILD_ID \ -auto-approve -no-color allowFailure: true @@ -280,7 +280,7 @@ steps: cd /workspace/infrastructure terraform destroy -var-file=tfvars_tests/standard-gke-public.platform.tfvars -var=project_id=$PROJECT_ID \ - -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_SUFFIX-cluster \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ -var=autopilot_cluster=$_AUTOPILOT_CLUSTER \ -var=cluster_location=$_REGION -auto-approve -no-color @@ -344,8 +344,8 @@ substitutions: _REGION: us-central1 _USER_NAME: github _AUTOPILOT_CLUSTER: "false" - _SUFFIX: std + _BUILD_ID: ${BUILD_ID:0:8} options: substitutionOption: 'ALLOW_LOOSE' machineType: 'E2_HIGHCPU_8' -timeout: 5400s +timeout: 5400s \ No newline at end of file diff --git a/modules/jupyter/tests/change_jupyter_config.py b/modules/jupyter/tests/change_jupyter_config.py index b84a30030..f404750b7 100644 --- a/modules/jupyter/tests/change_jupyter_config.py +++ b/modules/jupyter/tests/change_jupyter_config.py @@ -7,11 +7,10 @@ if autopilot: config_file = "../jupyter_config/config-selfauth-autopilot.yaml" - with open(config_file, "r") as yaml_file: data = yaml.safe_load(yaml_file) data["hub"]["config"]["DummyAuthenticator"]["password"] = "dummy" with open(config_file, 'w') as yaml_file: - yaml.dump(data, yaml_file) \ No newline at end of file + yaml.dump(data, yaml_file) diff --git a/modules/kuberay-cluster/variables.tf b/modules/kuberay-cluster/variables.tf index dbfa88073..29aad83f5 100644 --- a/modules/kuberay-cluster/variables.tf +++ b/modules/kuberay-cluster/variables.tf @@ -170,5 +170,5 @@ variable "db_secret_name" { variable "disable_network_policy" { description = "Set to true to remove network policy / firewalls from your Ray clusters. Not recommended." type = bool - default = false + default = true }