diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 000000000..3188efcec --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,35 @@ +name: Terraform CI +on: + push: + branches: + - main + pull_request: + branches: + - main +jobs: + Terraform-Lint-Check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: hashicorp/setup-terraform@v3 + with: + terraform_version: "1.5.7" + + - name: Terraform fmt + id: fmt + run: terraform fmt -check -recursive + + - name: Terraform Init + id: init + run: | + terraform -chdir=applications/rag init + terraform -chdir=applications/ray init + terraform -chdir=applications/jupyter init + + - name: Terraform Validate + id: validate + run: | + terraform -chdir=applications/rag validate -no-color + terraform -chdir=applications/ray validate -no-color + terraform -chdir=applications/jupyter validate -no-color + diff --git a/applications/rag/README.md b/applications/rag/README.md index e8eaa5885..d585535c1 100644 --- a/applications/rag/README.md +++ b/applications/rag/README.md @@ -32,7 +32,7 @@ CLUSTER_REGION=us-central1 ``` 2. Use the following instructions to create a GKE cluster. We recommend using Autopilot for a simpler setup. -##### Autopilot (recommended) +##### Autopilot RAG requires the latest Autopilot features, available on GKE cluster version `1.29.1-gke.1575000`+ ``` @@ -46,23 +46,9 @@ gcloud container clusters create-auto ${CLUSTER_NAME:?} \ --cluster-version ${CLUSTER_VERSION:?} ``` -##### Standard +##### Standard (recommended) -1. To create a GKE Standard cluster using Terraform, please follow the [instructions here](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/infrastructure/README.md). - -TODO: Add GKE cluster requirements for a successful installation. - -2. The inference server requires L4 GPUs. Create an additional node pool: -``` -gcloud container node-pools create g2-standard-24 --cluster ${CLUSTER_NAME:?} \ - --accelerator type=nvidia-l4,count=2,gpu-driver-version=latest \ - --machine-type g2-standard-24 \ - --ephemeral-storage-local-ssd=count=2 \ - --enable-image-streaming \ - --num-nodes=1 --min-nodes=1 --max-nodes=2 \ - --node-locations ${CLUSTER_REGION:?}-a,${CLUSTER_REGION:?}-b \ - --location=${CLUSTER_REGION:?} -``` +1. To create a GKE Standard cluster using Terraform, follow the [instructions here](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/infrastructure/README.md). Use the preconfigured node pools in `/infrastructure/platform.tfvars` as this solution requires T4s and L4s. #### Setup Components @@ -105,7 +91,11 @@ gcloud container clusters get-credentials ${CLUSTER_NAME:?} --location ${CLUSTER 1. Verify Kuberay is setup: run `kubectl get pods -n ${NAMESPACE:?}`. There should be a Ray head (and Ray worker pod on GKE Standard only) in `Running` state (prefixed by `example-cluster-kuberay-head-` and `example-cluster-kuberay-worker-workergroup-`). 2. Verify Jupyterhub service is setup: - * Fetch the service IP: `kubectl get services proxy-public -n ${NAMESPACE:?} --output jsonpath='{.status.loadBalancer.ingress[0].ip}'` + * Fetch the service IP/Domain: + * IAP disabled: `kubectl get services proxy-public -n $NAMESPACE --output jsonpath='{.status.loadBalancer.ingress[0].ip}'` + * IAP enabled: Read terraform output `jupyter_uri` or use command: `kubectl get managedcertificates jupyter-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'` + * Remember login [Google Cloud Platform IAP](https://pantheon.corp.google.com/security/iap) to check if user has role `IAP-secured Web App User` + * Wait for domain status to be `Active` * Go to the IP in a browser which should display the Jupyterlab login UI. 3. Verify the instance `pgvector-instance` exists: `gcloud sql instances list | grep pgvector` @@ -132,8 +122,16 @@ EOF * At the end of the smoke test with the TGI server, stop port forwarding by using Ctrl-C on the original terminal. 5. Verify the frontend chat interface is setup: - * Verify the service exists: `kubectl get services rag-frontend -n ${NAMESPACE:?}` - * Verify the deployment exists: `kubectl get deployments rag-frontend -n ${NAMESPACE:?}` and ensure the deployment is in `READY` state. + * Verify the service exists: `kubectl get services rag-frontend -n ${NAMESPACE:?}` + * Verify the deployment exists: `kubectl get deployments rag-frontend -n ${NAMESPACE:?}` and ensure the deployment is in `READY` state. + * Verify the managed certificate is `Active`: + ``` + kubectl get managedcertificates frontend-managed-cert -n rag --output jsonpath='{.status.domainStatus[0].status}' + ``` + * Verify IAP is enabled: + ``` + gcloud compute backend-services list --format="table(name, backends, iap.enabled)" + ``` ### Vector Embeddings for Dataset @@ -141,8 +139,11 @@ This step generates the vector embeddings for your input dataset. Currently, the 1. Create a CloudSQL user to access the database: `gcloud sql users create rag-user-notebook --password= --instance=pgvector-instance --host=%` -2. Go to the Jupyterhub service endpoint in a browser: `kubectl get services proxy-public -n ${NAMESPACE:?} --output jsonpath='{.status.loadBalancer.ingress[0].ip}'` - +2. Go to the Jupyterhub service endpoint in a browser: + * IAP disable: `kubectl get services proxy-public -n $NAMESPACE --output jsonpath='{.status.loadBalancer.ingress[0].ip}'` + * IAP enabled: Read terraform output `jupyter_uri` or use commend: `kubectl get managedcertificates jupyter-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'` + * Remeber login GCP to check if user has role `IAP-secured Web App User` + * Waiting for domain status to be `Active` 3. Login with placeholder credentials [TBD: replace with instructions for IAP]: * username: user * password: use `terraform output jupyter_password` to fetch the password value @@ -166,11 +167,35 @@ This step generates the vector embeddings for your input dataset. Currently, the ### Launch the Frontend Chat Interface -1. Setup port forwarding for the frontend [TBD: Replace with IAP]: `kubectl port-forward service/rag-frontend -n ${NAMESPACE:?} 8080:8080 &` +#### Accessing the Frontend with IAP Disabled +1. Setup port forwarding for the frontend: `kubectl port-forward service/rag-frontend -n $NAMESPACE 8080:8080 &` 2. Go to `localhost:8080` in a browser & start chatting! This will fetch context related to your prompt from the vector embeddings in the `pgvector-instance`, augment the original prompt with the context & query the inference model (`mistral-7b`) with the augmented prompt. -3. TODO: Add some example prompts for the dataset. +#### Accessing the Frontend with IAP Enabled +1. Verify IAP is Enabled + + * Ensure that IAP is enabled on Google Cloud Platform (GCP) for your application. If you encounter any errors, try re-enabling IAP. + +2. Verify User Role + + * Make sure you have the role `IAP-secured Web App User` assigned to your user account. This role is necessary to access the application through IAP. + +3. Verify Domain is Active + * Make sure the domain is active using commend: + `kubectl get managedcertificates frontend-managed-cert -n rag --output jsonpath='{.status.domainStatus[0].status}'` + +3. Retrieve the Domain + + * Read terraform output `frontend_uri` or use the following command to find the domain created by IAP for accessing your service: + `kubectl get managedcertificates frontend-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'` + +4. Access the Frontend + + * Open your browser and navigate to the domain you retrieved in the previous step to start chatting! + +#### Prompts Example +3. [TODO: Add some example prompts for the dataset]. ### Cleanup diff --git a/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb b/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb index 849eaec20..72d3d5915 100644 --- a/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb +++ b/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb @@ -7,8 +7,8 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install langchain ray==2.7.1 datasets sentence-transformers kaggle\n", - "!pip install \"cloud-sql-python-connector[pg8000]\" SQLAlchemy==2.0.7" + "!pip install langchain==0.1.9 ray==2.7.1 datasets==2.18.0 sentence-transformers==2.5.1 kaggle==1.6.6\n", + "!pip install \"cloud-sql-python-connector[pg8000]==1.7.0\" SQLAlchemy==2.0.7" ] }, { @@ -294,14 +294,14 @@ " runtime_env={\n", " \"working_dir\": \"/home/jovyan/test\", # upload the local working directory to ray workers\n", " \"pip\": [\n", - " \"langchain\",\n", + " \"langchain==0.1.9\",\n", " \"transformers\",\n", - " \"sentence-transformers\",\n", + " \"sentence-transformers==2.5.1\",\n", " \"pyarrow\",\n", - " \"datasets\",\n", + " \"datasets==2.18.0\",\n", " \"torch==2.0.1\",\n", - " \"cloud-sql-python-connector[pg8000]\",\n", - " \"SQLAlchemy\",\n", + " \"cloud-sql-python-connector[pg8000]==1.7.0\",\n", + " \"SQLAlchemy==2.0.7\",\n", " \"huggingface_hub\",\n", " ]\n", " }\n", diff --git a/applications/rag/main.tf b/applications/rag/main.tf index 74b8abbcf..63eb89ac4 100644 --- a/applications/rag/main.tf +++ b/applications/rag/main.tf @@ -89,10 +89,10 @@ provider "helm" { } module "namespace" { - source = "../../modules/kubernetes-namespace" - providers = { helm = helm.rag} + source = "../../modules/kubernetes-namespace" + providers = { helm = helm.rag } create_namespace = true - namespace = var.kubernetes_namespace + namespace = var.kubernetes_namespace } module "kuberay-operator" { @@ -115,12 +115,12 @@ module "gcs" { } module "cloudsql" { - source = "../../modules/cloudsql" - providers = { kubernetes = kubernetes.rag } - project_id = var.project_id - instance_name = var.cloudsql_instance - namespace = var.kubernetes_namespace - depends_on = [module.namespace] + source = "../../modules/cloudsql" + providers = { kubernetes = kubernetes.rag } + project_id = var.project_id + instance_name = var.cloudsql_instance + namespace = var.kubernetes_namespace + depends_on = [module.namespace] } module "jupyterhub" { @@ -181,6 +181,7 @@ module "kuberay-monitoring" { create_namespace = true enable_grafana_on_ray_dashboard = var.enable_grafana_on_ray_dashboard k8s_service_account = var.ray_service_account + depends_on = [module.namespace] } module "inference-server" { @@ -199,6 +200,7 @@ module "frontend" { google_service_account = var.rag_service_account namespace = var.kubernetes_namespace inference_service_endpoint = module.inference-server.inference_service_endpoint + cloudsql_instance = module.cloudsql.instance db_secret_name = module.cloudsql.db_secret_name db_secret_namespace = module.cloudsql.db_secret_namespace dataset_embeddings_table_name = var.dataset_embeddings_table_name @@ -218,5 +220,5 @@ module "frontend" { url_domain_addr = var.frontend_url_domain_addr url_domain_name = var.frontend_url_domain_name members_allowlist = var.frontend_members_allowlist - depends_on = [ module.namespace ] + depends_on = [module.namespace] } diff --git a/applications/rag/tests/test_frontend.py b/applications/rag/tests/test_frontend.py new file mode 100644 index 000000000..c6e594c1c --- /dev/null +++ b/applications/rag/tests/test_frontend.py @@ -0,0 +1,11 @@ +import sys +import requests + +def test_frontend_up(rag_frontend_url): + r = requests.get(rag_frontend_url) + r.raise_for_status() + print("Rag frontend is up.") + +hub_url = "http://" + sys.argv[1] + +test_frontend_up(hub_url) diff --git a/applications/rag/variables.tf b/applications/rag/variables.tf index cd746e6e2..a5e77ff78 100644 --- a/applications/rag/variables.tf +++ b/applications/rag/variables.tf @@ -209,7 +209,7 @@ variable "jupyter_k8s_backend_service_name" { variable "jupyter_k8s_backend_service_port" { type = number - description = "NName of the Backend Service Port" + description = "Name of the Backend Service Port" default = 80 } @@ -265,9 +265,9 @@ variable "autopilot_cluster" { } variable "cloudsql_instance" { - type = string + type = string description = "Name of the CloudSQL instance for RAG VectorDB" - default = "pgvector-instance" + default = "pgvector-instance" } variable "cpu_pools" { diff --git a/applications/rag/workloads.tfvars b/applications/rag/workloads.tfvars index aba62feae..dca101637 100644 --- a/applications/rag/workloads.tfvars +++ b/applications/rag/workloads.tfvars @@ -38,7 +38,7 @@ rag_service_account = "rag-system-account" # Creates a google service account & k8s service account & configures workload identity with appropriate permissions. # Set to false & update the variable `jupyter_service_account` to use an existing IAM service account. -jupyter_service_account = "jupyter-system-account" +jupyter_service_account = "jupyter-system-account" ## Embeddings table name - change this to the TABLE_NAME used in the notebook. dataset_embeddings_table_name = "googlemaps_reviews_db" diff --git a/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/service_accounts/versions.tf b/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/service_accounts/versions.tf index 436ce51c2..53d5c8e95 100644 --- a/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/service_accounts/versions.tf +++ b/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/service_accounts/versions.tf @@ -15,7 +15,7 @@ terraform { required_providers { google = { - source = "hashicorp/google" + source = "hashicorp/google" } kubernetes = { source = "hashicorp/kubernetes" diff --git a/applications/ray/versions.tf b/applications/ray/versions.tf index a8a0a268a..b8e6f2c71 100644 --- a/applications/ray/versions.tf +++ b/applications/ray/versions.tf @@ -15,10 +15,10 @@ terraform { required_providers { google = { - source = "hashicorp/google" + source = "hashicorp/google" } google-beta = { - source = "hashicorp/google-beta" + source = "hashicorp/google-beta" } helm = { source = "hashicorp/helm" diff --git a/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars b/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars index ff2e8cd05..dcd6739b4 100644 --- a/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars +++ b/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars @@ -21,5 +21,5 @@ tokenizer = "tiiuae/falcon-7b" # Benchmark configuration for triggering single test via Locust Runner test_duration = 60 # Increase test_users to allow more parallelism (especially when testing HPA) -test_users = 1 -test_rate = 5 +test_users = 1 +test_rate = 5 diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 542e2fee9..f33a15a96 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ steps: terraform init -no-color terraform validate -no-color dir: 'applications/ray/' - waitFor: ['-'] + waitFor: ['validate platform'] - id: 'validate jupyterhub' name: 'gcr.io/$PROJECT_ID/terraform' @@ -35,8 +35,17 @@ steps: terraform init -no-color terraform validate -no-color dir: 'applications/jupyter/' - waitFor: ['-'] + waitFor: ['validate platform'] + + - id: 'validate rag' + name: 'gcr.io/$PROJECT_ID/terraform' + script: | + terraform init -no-color + terraform validate -no-color + dir: 'applications/rag/' + waitFor: ['validate platform'] + # Create cluster to test ray, jupyterhub - id: 'create gke cluster' name: 'gcr.io/$PROJECT_ID/terraform' env: @@ -46,13 +55,16 @@ steps: - '-c' - | set -e - terraform apply -var-file=tfvars_tests/standard-gke-public.platform.tfvars -var=project_id=$PROJECT_ID \ + terraform apply \ + -var-file=tfvars_tests/standard-gke-public.platform.tfvars \ + -var=project_id=$PROJECT_ID \ -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \ - -var=cluster_region=$_REGION -auto-approve -no-color + -var=cluster_region=$_REGION \ + -auto-approve -no-color -lock=false echo "pass" > /workspace/gke_cluster_result.txt dir: 'infrastructure/' allowFailure: true - waitFor: ['validate platform', 'validate ray', 'validate jupyterhub'] + waitFor: ['validate platform', 'validate ray', 'validate jupyterhub', validate rag] - id: 'test ray cluster' name: 'gcr.io/$PROJECT_ID/terraform' @@ -68,7 +80,7 @@ steps: --location $_REGION \ --project $PROJECT_ID - cd applications/ray/ + cd /workspace/applications/ray/ terraform apply \ -var-file=workloads.tfvars \ -var=project_id=$PROJECT_ID \ @@ -77,7 +89,7 @@ steps: -var=ray_namespace=ml-$SHORT_SHA \ -var=gcp_service_account=ray-sa-$SHORT_SHA \ -var=gcs_bucket=gke-aieco-ray-$SHORT_SHA \ - -auto-approve -no-color + -auto-approve -no-color -lock=false echo "pass" > /workspace/user_result.txt # Make sure pods are running @@ -87,11 +99,33 @@ steps: sleep 5s ray job submit --working-dir ./example_ray_job_scripts \ - --address=http://127.0.0.1:8265 -- python ray_job.py + --address=http://127.0.0.1:8265 -- python -c "import ray; ray.init(); print(ray.cluster_resources())" echo "pass" > /workspace/ray_result.txt allowFailure: true waitFor: ['create gke cluster'] + - id: 'cleanup ray cluster' + name: 'gcr.io/$PROJECT_ID/terraform' + entrypoint: 'bash' + args: + - '-c' + - | + set -e + + cd /workspace/applications/ray/ + terraform destroy \ + -var-file=workloads.tfvars \ + -var=project_id=$PROJECT_ID \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \ + -var=cluster_location=$_REGION \ + -var=ray_namespace=ml-$SHORT_SHA \ + -var=gcp_service_account=ray-sa-$SHORT_SHA \ + -var=gcs_bucket=gke-aieco-ray-$SHORT_SHA \ + -auto-approve -no-color + + allowFailure: true + waitFor: ['test ray cluster'] + - id: 'test jupyterhub' name: 'gcr.io/$PROJECT_ID/terraform' entrypoint: 'bash' @@ -111,7 +145,7 @@ steps: -var=namespace=ml-$SHORT_SHA \ -var=workload_identity_service_account=jupyter-sa-$SHORT_SHA \ -var=gcs_bucket=gke-aieco-jupyter-$SHORT_SHA \ - -auto-approve -no-color + -auto-approve -no-color -lock=false echo "pass" > /workspace/jupyterhub_tf_result.txt kubectl wait --all pods -n ml-$SHORT_SHA --for=condition=Ready --timeout=300s @@ -123,9 +157,94 @@ steps: python3 test_hub.py $(cat /workspace/jupyterhub_host_url.txt) echo "pass" > /workspace/jupyterhub_test_result.txt allowFailure: true - waitFor: ['test ray cluster'] + # waitFor: ['cleanup ray cluster'] - - id: 'clean gke cluster' + - id: 'cleanup jupyterhub' + name: 'gcr.io/$PROJECT_ID/terraform' + entrypoint: 'bash' + args: + - '-c' + - | + set -e + + cd /workspace/applications/jupyter/ + terraform destroy \ + -var-file=workloads-without-iap.example.tfvars \ + -var=project_id=$PROJECT_ID \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \ + -var=namespace=ml-$SHORT_SHA \ + -var=workload_identity_service_account=jupyter-sa-$SHORT_SHA \ + -var=gcs_bucket=gke-aieco-jupyter-$SHORT_SHA \ + -auto-approve -no-color + + allowFailure: true + waitFor: ['test jupyterhub'] + + - id: 'test rag' + name: 'gcr.io/$PROJECT_ID/terraform' + entrypoint: 'sh' + args: + - '-c' + - | + set -e + + # Get kube config + gcloud container clusters get-credentials \ + ml-$SHORT_SHA-$_PR_NUMBER-cluster \ + --location $_REGION \ + --project $PROJECT_ID + + cd /workspace/modules/jupyter/tests + python3 change_jupyter_config.py + + cd /workspace/applications/rag/ + terraform apply \ + -var-file=workloads.tfvars \ + -var=jupyter_add_auth=false \ + -var=frontend_add_auth=false \ + -var=project_id=$PROJECT_ID \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \ + -var=kubernetes_namespace=rag-$SHORT_SHA \ + -var=gcs_bucket=gke-aieco-rag-$SHORT_SHA \ + -var=ray_service_account=ray-sa-$SHORT_SHA \ + -var=rag_service_account=rag-sa-$SHORT_SHA \ + -var=jupyter_service_account=jupyter-sa-$SHORT_SHA \ + -var=cloudsql_instance=pgvector-instance-$SHORT_SHA \ + -auto-approve -no-color -lock=false + echo "pass" > /workspace/rag_tf_result.txt + + # Validate Ray: Make sure pods are running + kubectl wait --all pods -n rag-$SHORT_SHA --for=condition=Ready --timeout=300s + kubectl port-forward -n rag-$SHORT_SHA service/example-cluster-kuberay-head-svc 8265:8265 & + # Wait port-forwarding to take its place + sleep 5s + + # Validate Ray: Check dashboard + ray job submit --working-dir ./tests \ + --address=http://127.0.0.1:8265 -- python -c "import ray; ray.init(); print(ray.cluster_resources())" + echo "pass" > /workspace/rag_ray_dashboard_result.txt + + # Validate Jupyterhub: Get hub url + kubectl get services -n rag-$SHORT_SHA + kubectl get service proxy-public -n rag-$SHORT_SHA --output jsonpath='{.status.loadBalancer.ingress[0].ip}' > /workspace/rag_jupyterhub_host_url.txt + echo "HOST URL is " $(cat /workspace/rag_jupyterhub_host_url.txt) + + # Validate Jupyterhub: Test Hub + cd /workspace/modules/jupyter/tests + python3 test_hub.py $(cat /workspace/rag_jupyterhub_host_url.txt) + echo "pass" > /workspace/rag_jupyterhub_test_result.txt + + # Validate RAG: Test rag frontend + kubectl port-forward -n rag-$SHORT_SHA service/rag-frontend 8081:8080 & + # Wait port-forwarding to take its place + sleep 5s + + cd /workspace/applications/rag/tests + python3 test_frontend.py "127.0.0.1:8081" + echo "pass" > /workspace/rag_frontend_result.txt + allowFailure: true + + - id: 'cleanup rag' name: 'gcr.io/$PROJECT_ID/terraform' entrypoint: 'bash' args: @@ -154,12 +273,37 @@ steps: -var=gcs_bucket=gke-aieco-ray-$SHORT_SHA \ -auto-approve -no-color + cd /workspace/applications/rag/ + terraform destroy \ + -var-file=workloads.tfvars \ + -var=project_id=$PROJECT_ID \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \ + -var=kubernetes_namespace=rag-$SHORT_SHA \ + -var=gcs_bucket=gke-aieco-rag-$SHORT_SHA \ + -var=ray_service_account=ray-sa-$SHORT_SHA \ + -var=rag_service_account=rag-sa-$SHORT_SHA \ + -var=jupyter_service_account=jupyter-sa-$SHORT_SHA \ + -var=cloudsql_instance=pgvector-instance-$SHORT_SHA \ + -auto-approve -no-color + + allowFailure: true + waitFor: ['test rag'] + + - id: 'cleanup gke cluster' + name: 'gcr.io/$PROJECT_ID/terraform' + entrypoint: 'bash' + args: + - '-c' + - | + set -e + cd /workspace/infrastructure terraform destroy -var-file=tfvars_tests/standard-gke-public.platform.tfvars -var=project_id=$PROJECT_ID \ -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \ -var=cluster_region=$_REGION -auto-approve -no-color + allowFailure: true - waitFor: ['test jupyterhub'] + waitFor: ['cleanup rag'] - id: 'check result' name: 'gcr.io/$PROJECT_ID/terraform' @@ -191,8 +335,32 @@ steps: echo "jupyterhub test failed" exit 1 fi - waitFor: ['clean gke cluster'] + + if [[ $(cat /workspace/rag_tf_result.txt) != "pass" ]]; then + echo "rag tf failed" + exit 1 + fi + + if [[ $(cat /workspace/rag_ray_dashboard_result.txt) != "pass" ]]; then + echo "rag ray dashboard test failed" + exit 1 + fi + + if [[ $(cat /workspace/rag_jupyterhub_test_result.txt) != "pass" ]]; then + echo "rag jupyterhub test failed" + exit 1 + fi + + if [[ $(cat /workspace/rag_frontend_result.txt) != "pass" ]]; then + echo "rag frontend test failed" + exit 1 + fi + + waitFor: ['cleanup gke cluster'] substitutions: _REGION: us-central1 _USER_NAME: github +options: + substitutionOption: 'ALLOW_LOOSE' + diff --git a/infrastructure/README.md b/infrastructure/README.md index d7954f97c..9876791b3 100644 --- a/infrastructure/README.md +++ b/infrastructure/README.md @@ -2,7 +2,9 @@ Platform module (to be renamed to Infra), creates the GKE cluster & other related resources for the AI applications / workloads to be deployed on them. -Update the ```platform.tfvars``` file with the required configuration. Kindly refer to ```tfvars_examples``` for sample configuration. +1) Update the ```platform.tfvars``` file with the required configuration. Kindly refer to ```tfvars_examples``` for sample configuration. + +2) Run `terraform init` and `terraform apply --var-file=platform.tfvars` ## Prerequisites diff --git a/infrastructure/platform.tfvars b/infrastructure/platform.tfvars index ede37167a..5d704bcd5 100644 --- a/infrastructure/platform.tfvars +++ b/infrastructure/platform.tfvars @@ -29,7 +29,7 @@ private_cluster = false ## true = private cluster, false = public cluster autopilot_cluster = false ## true = autopilot cluster, false = standard cluster cluster_name = "ml-cluster" cluster_region = "us-central1" -cluster_zones = ["us-central1-a", "us-central1-b", "us-central1-f"] +cluster_zones = ["us-central1-a", "us-central1-b", "us-central1-c"] cpu_pools = [{ name = "cpu-pool" @@ -47,7 +47,7 @@ enable_gpu = true gpu_pools = [{ name = "gpu-pool" machine_type = "n1-standard-16" - node_locations = "us-central1-b,us-central1-c" + node_locations = "us-central1-a" autoscaling = true min_count = 1 max_count = 3 @@ -60,7 +60,7 @@ gpu_pools = [{ { name = "gpu-pool-l4" machine_type = "g2-standard-24" - node_locations = "us-central1-b,us-central1-c" + node_locations = "us-central1-a" autoscaling = true min_count = 1 max_count = 3 diff --git a/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars b/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars index 0c3680cfc..86d951569 100644 --- a/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars +++ b/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars @@ -49,3 +49,36 @@ cpu_pools = [{ disk_size_gb = 100 disk_type = "pd-standard" }] + +## make sure required gpu quotas are available in the corresponding region +enable_gpu = true +gpu_pools = [{ + name = "gpu-pool-t4" + machine_type = "n1-standard-16" + node_locations = "us-central1-b,us-central1-c" + autoscaling = true + min_count = 1 + max_count = 3 + disk_size_gb = 100 + enable_gcfs = true + logging_variant = "DEFAULT" + disk_type = "pd-balanced" + accelerator_count = 2 + accelerator_type = "nvidia-tesla-t4" + gpu_driver_version = "LATEST" + }, + { + name = "gpu-pool-l4" + machine_type = "g2-standard-24" + node_locations = "us-central1-a" + autoscaling = true + min_count = 2 + max_count = 3 + accelerator_count = 2 + disk_size_gb = 100 + enable_gcfs = true + logging_variant = "DEFAULT" + disk_type = "pd-balanced" + accelerator_type = "nvidia-l4" + gpu_driver_version = "LATEST" +}] \ No newline at end of file diff --git a/modules/cloudsql/outputs.tf b/modules/cloudsql/outputs.tf index f4010b142..cd8e2d1fb 100644 --- a/modules/cloudsql/outputs.tf +++ b/modules/cloudsql/outputs.tf @@ -20,4 +20,9 @@ output "db_secret_name" { output "db_secret_namespace" { description = "Cloud SQL DB secret namespace" value = kubernetes_secret.secret.metadata[0].namespace -} \ No newline at end of file +} + +output "instance" { + description = "Cloud SQL Instance name" + value = google_sql_database_instance.main.name +} diff --git a/modules/iap/iap.tf b/modules/iap/iap.tf index 097a1f387..c9344ae18 100644 --- a/modules/iap/iap.tf +++ b/modules/iap/iap.tf @@ -36,7 +36,7 @@ resource "helm_release" "iap_jupyter" { name = "iap-jupyter" chart = "${path.module}/charts/iap/" namespace = var.namespace - create_namespace = true + create_namespace = true # timeout increased to support autopilot scaling resources, and give enough time to complete the deployment timeout = 1200 set { @@ -108,7 +108,7 @@ resource "helm_release" "iap_frontend" { name = "iap-frontend" chart = "${path.module}/charts/iap/" namespace = var.namespace - create_namespace = true + create_namespace = true # timeout increased to support autopilot scaling resources, and give enough time to complete the deployment timeout = 1200 set { diff --git a/modules/iap/variables.tf b/modules/iap/variables.tf index 613fefcb9..af09d87c2 100644 --- a/modules/iap/variables.tf +++ b/modules/iap/variables.tf @@ -137,7 +137,7 @@ variable "jupyter_k8s_backend_service_name" { variable "jupyter_k8s_backend_service_port" { type = number - description = "NName of the Backend Service Port" + description = "Name of the Backend Service Port" default = 80 } diff --git a/modules/kuberay-monitoring/main.tf b/modules/kuberay-monitoring/main.tf index 46e627058..8a320ec81 100644 --- a/modules/kuberay-monitoring/main.tf +++ b/modules/kuberay-monitoring/main.tf @@ -47,7 +47,7 @@ resource "helm_release" "grafana" { } data "kubernetes_service" "example" { - count = var.enable_grafana_on_ray_dashboard ? 1 : 0 + count = var.enable_grafana_on_ray_dashboard ? 1 : 0 metadata { name = "grafana" namespace = var.namespace diff --git a/tutorials/hf-tgi/outputs.tf b/tutorials/hf-tgi/outputs.tf index 3816613c4..7078bac0d 100644 --- a/tutorials/hf-tgi/outputs.tf +++ b/tutorials/hf-tgi/outputs.tf @@ -24,5 +24,5 @@ output "inference_service_namespace" { output "inference_service_endpoint" { description = "Endpoint of model inference service" - value = kubernetes_service.inference_service.status != null ? (kubernetes_service.inference_service.status[0].load_balancer != null ? "${kubernetes_service.inference_service.status[0].load_balancer[0].ingress[0].ip}" : "") : "" + value = kubernetes_service.inference_service.status != null ? (kubernetes_service.inference_service.status[0].load_balancer != null ? "${kubernetes_service.inference_service.status[0].load_balancer[0].ingress[0].ip}" : "") : "" }