From 7093da37c616a21f2e0a7ea799b480418be40445 Mon Sep 17 00:00:00 2001 From: imreddy13 <132504814+imreddy13@users.noreply.github.com> Date: Tue, 5 Mar 2024 17:21:35 -0800 Subject: [PATCH 1/8] Fix kuberay_monitoring module dependency for RAG (#290) Fix `kuberay_monitoring` module dependency for RAG 'kuberay_monitoring' module should depends on 'namespace' module --- applications/rag/main.tf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/applications/rag/main.tf b/applications/rag/main.tf index 74b8abbcf..654151aaa 100644 --- a/applications/rag/main.tf +++ b/applications/rag/main.tf @@ -181,6 +181,7 @@ module "kuberay-monitoring" { create_namespace = true enable_grafana_on_ray_dashboard = var.enable_grafana_on_ray_dashboard k8s_service_account = var.ray_service_account + depends_on = [module.namespace] } module "inference-server" { @@ -218,5 +219,5 @@ module "frontend" { url_domain_addr = var.frontend_url_domain_addr url_domain_name = var.frontend_url_domain_name members_allowlist = var.frontend_members_allowlist - depends_on = [ module.namespace ] + depends_on = [ module.namespace ] } From 33265704d38e8942fba32c47c41acaa709c59150 Mon Sep 17 00:00:00 2001 From: imreddy13 <132504814+imreddy13@users.noreply.github.com> Date: Tue, 5 Mar 2024 20:14:04 -0800 Subject: [PATCH 2/8] Pin pip dependency versions for notebook (#291) Co-authored-by: Umesh Kumhar --- .../rag-kaggle-ray-sql-latest.ipynb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb b/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb index 849eaec20..72d3d5915 100644 --- a/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb +++ b/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb @@ -7,8 +7,8 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install langchain ray==2.7.1 datasets sentence-transformers kaggle\n", - "!pip install \"cloud-sql-python-connector[pg8000]\" SQLAlchemy==2.0.7" + "!pip install langchain==0.1.9 ray==2.7.1 datasets==2.18.0 sentence-transformers==2.5.1 kaggle==1.6.6\n", + "!pip install \"cloud-sql-python-connector[pg8000]==1.7.0\" SQLAlchemy==2.0.7" ] }, { @@ -294,14 +294,14 @@ " runtime_env={\n", " \"working_dir\": \"/home/jovyan/test\", # upload the local working directory to ray workers\n", " \"pip\": [\n", - " \"langchain\",\n", + " \"langchain==0.1.9\",\n", " \"transformers\",\n", - " \"sentence-transformers\",\n", + " \"sentence-transformers==2.5.1\",\n", " \"pyarrow\",\n", - " \"datasets\",\n", + " \"datasets==2.18.0\",\n", " \"torch==2.0.1\",\n", - " \"cloud-sql-python-connector[pg8000]\",\n", - " \"SQLAlchemy\",\n", + " \"cloud-sql-python-connector[pg8000]==1.7.0\",\n", + " \"SQLAlchemy==2.0.7\",\n", " \"huggingface_hub\",\n", " ]\n", " }\n", From a218defd398c680e9375583b1e654bf2863f296f Mon Sep 17 00:00:00 2001 From: imreddy13 <132504814+imreddy13@users.noreply.github.com> Date: Tue, 5 Mar 2024 21:31:32 -0800 Subject: [PATCH 3/8] Update READMEs with better cluster create instructions (#292) Co-authored-by: Umesh Kumhar --- applications/rag/README.md | 20 +++----------------- infrastructure/README.md | 4 +++- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/applications/rag/README.md b/applications/rag/README.md index e8eaa5885..9876b9159 100644 --- a/applications/rag/README.md +++ b/applications/rag/README.md @@ -32,7 +32,7 @@ CLUSTER_REGION=us-central1 ``` 2. Use the following instructions to create a GKE cluster. We recommend using Autopilot for a simpler setup. -##### Autopilot (recommended) +##### Autopilot RAG requires the latest Autopilot features, available on GKE cluster version `1.29.1-gke.1575000`+ ``` @@ -46,23 +46,9 @@ gcloud container clusters create-auto ${CLUSTER_NAME:?} \ --cluster-version ${CLUSTER_VERSION:?} ``` -##### Standard +##### Standard (recommended) -1. To create a GKE Standard cluster using Terraform, please follow the [instructions here](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/infrastructure/README.md). - -TODO: Add GKE cluster requirements for a successful installation. - -2. The inference server requires L4 GPUs. Create an additional node pool: -``` -gcloud container node-pools create g2-standard-24 --cluster ${CLUSTER_NAME:?} \ - --accelerator type=nvidia-l4,count=2,gpu-driver-version=latest \ - --machine-type g2-standard-24 \ - --ephemeral-storage-local-ssd=count=2 \ - --enable-image-streaming \ - --num-nodes=1 --min-nodes=1 --max-nodes=2 \ - --node-locations ${CLUSTER_REGION:?}-a,${CLUSTER_REGION:?}-b \ - --location=${CLUSTER_REGION:?} -``` +1. To create a GKE Standard cluster using Terraform, follow the [instructions here](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/infrastructure/README.md). Use the preconfigured node pools in `/infrastructure/platform.tfvars` as this solution requires T4s and L4s. #### Setup Components diff --git a/infrastructure/README.md b/infrastructure/README.md index d7954f97c..9876791b3 100644 --- a/infrastructure/README.md +++ b/infrastructure/README.md @@ -2,7 +2,9 @@ Platform module (to be renamed to Infra), creates the GKE cluster & other related resources for the AI applications / workloads to be deployed on them. -Update the ```platform.tfvars``` file with the required configuration. Kindly refer to ```tfvars_examples``` for sample configuration. +1) Update the ```platform.tfvars``` file with the required configuration. Kindly refer to ```tfvars_examples``` for sample configuration. + +2) Run `terraform init` and `terraform apply --var-file=platform.tfvars` ## Prerequisites From 620167ff5509773c968a4385846305a0d0af422d Mon Sep 17 00:00:00 2001 From: imreddy13 <132504814+imreddy13@users.noreply.github.com> Date: Tue, 5 Mar 2024 21:46:28 -0800 Subject: [PATCH 4/8] Update default zones for L4/T4 nodes (#293) --- infrastructure/platform.tfvars | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/infrastructure/platform.tfvars b/infrastructure/platform.tfvars index ede37167a..5d704bcd5 100644 --- a/infrastructure/platform.tfvars +++ b/infrastructure/platform.tfvars @@ -29,7 +29,7 @@ private_cluster = false ## true = private cluster, false = public cluster autopilot_cluster = false ## true = autopilot cluster, false = standard cluster cluster_name = "ml-cluster" cluster_region = "us-central1" -cluster_zones = ["us-central1-a", "us-central1-b", "us-central1-f"] +cluster_zones = ["us-central1-a", "us-central1-b", "us-central1-c"] cpu_pools = [{ name = "cpu-pool" @@ -47,7 +47,7 @@ enable_gpu = true gpu_pools = [{ name = "gpu-pool" machine_type = "n1-standard-16" - node_locations = "us-central1-b,us-central1-c" + node_locations = "us-central1-a" autoscaling = true min_count = 1 max_count = 3 @@ -60,7 +60,7 @@ gpu_pools = [{ { name = "gpu-pool-l4" machine_type = "g2-standard-24" - node_locations = "us-central1-b,us-central1-c" + node_locations = "us-central1-a" autoscaling = true min_count = 1 max_count = 3 From 39db88653f86a6ba349f63a7f9485e3f6fcbe996 Mon Sep 17 00:00:00 2001 From: Himanshu Sachdeva Date: Wed, 6 Mar 2024 08:12:21 +0100 Subject: [PATCH 5/8] Add cloudbuild tests for rag application (#261) * test rag application setup * update rag tests * fix check result step * fix as per review comments * fix variables for applications/jupyter * fix bugs for jupyter & rag app * CI fixes & revert jupterhub module changes * correcting merge conflict miss * fix system account SA names * add SHA suffix to cloudsql instance --- .../workloads-without-iap.example.tfvars | 8 +- applications/rag/main.tf | 1 + applications/rag/tests/test_frontend.py | 11 + applications/rag/variables.tf | 2 +- cloudbuild.yaml | 196 ++++++++++++++++-- .../standard-gke-public.platform.tfvars | 33 +++ modules/iap/variables.tf | 4 +- 7 files changed, 234 insertions(+), 21 deletions(-) create mode 100644 applications/rag/tests/test_frontend.py diff --git a/applications/jupyter/workloads-without-iap.example.tfvars b/applications/jupyter/workloads-without-iap.example.tfvars index e048ac674..ccf033cf2 100644 --- a/applications/jupyter/workloads-without-iap.example.tfvars +++ b/applications/jupyter/workloads-without-iap.example.tfvars @@ -26,10 +26,10 @@ cluster_membership_id = "" # required only for private clusters, default: cluste ####################################################### ## JupyterHub variables -namespace = "jupyter" -gcs_bucket = "" -create_gcs_bucket = true -workload_identity_service_account = "jupyter-service-account" +namespace = "jupyter" +gcs_bucket = "" +create_gcs_bucket = true +workload_identity_service_account = "jupyter-service-account" # Jupyterhub without IAP add_auth = false diff --git a/applications/rag/main.tf b/applications/rag/main.tf index 654151aaa..017f20a26 100644 --- a/applications/rag/main.tf +++ b/applications/rag/main.tf @@ -200,6 +200,7 @@ module "frontend" { google_service_account = var.rag_service_account namespace = var.kubernetes_namespace inference_service_endpoint = module.inference-server.inference_service_endpoint + cloudsql_instance = var.cloudsql_instance db_secret_name = module.cloudsql.db_secret_name db_secret_namespace = module.cloudsql.db_secret_namespace dataset_embeddings_table_name = var.dataset_embeddings_table_name diff --git a/applications/rag/tests/test_frontend.py b/applications/rag/tests/test_frontend.py new file mode 100644 index 000000000..c6e594c1c --- /dev/null +++ b/applications/rag/tests/test_frontend.py @@ -0,0 +1,11 @@ +import sys +import requests + +def test_frontend_up(rag_frontend_url): + r = requests.get(rag_frontend_url) + r.raise_for_status() + print("Rag frontend is up.") + +hub_url = "http://" + sys.argv[1] + +test_frontend_up(hub_url) diff --git a/applications/rag/variables.tf b/applications/rag/variables.tf index cd746e6e2..ca86bcf04 100644 --- a/applications/rag/variables.tf +++ b/applications/rag/variables.tf @@ -209,7 +209,7 @@ variable "jupyter_k8s_backend_service_name" { variable "jupyter_k8s_backend_service_port" { type = number - description = "NName of the Backend Service Port" + description = "Name of the Backend Service Port" default = 80 } diff --git a/cloudbuild.yaml b/cloudbuild.yaml index 542e2fee9..f33a15a96 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ steps: terraform init -no-color terraform validate -no-color dir: 'applications/ray/' - waitFor: ['-'] + waitFor: ['validate platform'] - id: 'validate jupyterhub' name: 'gcr.io/$PROJECT_ID/terraform' @@ -35,8 +35,17 @@ steps: terraform init -no-color terraform validate -no-color dir: 'applications/jupyter/' - waitFor: ['-'] + waitFor: ['validate platform'] + + - id: 'validate rag' + name: 'gcr.io/$PROJECT_ID/terraform' + script: | + terraform init -no-color + terraform validate -no-color + dir: 'applications/rag/' + waitFor: ['validate platform'] + # Create cluster to test ray, jupyterhub - id: 'create gke cluster' name: 'gcr.io/$PROJECT_ID/terraform' env: @@ -46,13 +55,16 @@ steps: - '-c' - | set -e - terraform apply -var-file=tfvars_tests/standard-gke-public.platform.tfvars -var=project_id=$PROJECT_ID \ + terraform apply \ + -var-file=tfvars_tests/standard-gke-public.platform.tfvars \ + -var=project_id=$PROJECT_ID \ -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \ - -var=cluster_region=$_REGION -auto-approve -no-color + -var=cluster_region=$_REGION \ + -auto-approve -no-color -lock=false echo "pass" > /workspace/gke_cluster_result.txt dir: 'infrastructure/' allowFailure: true - waitFor: ['validate platform', 'validate ray', 'validate jupyterhub'] + waitFor: ['validate platform', 'validate ray', 'validate jupyterhub', validate rag] - id: 'test ray cluster' name: 'gcr.io/$PROJECT_ID/terraform' @@ -68,7 +80,7 @@ steps: --location $_REGION \ --project $PROJECT_ID - cd applications/ray/ + cd /workspace/applications/ray/ terraform apply \ -var-file=workloads.tfvars \ -var=project_id=$PROJECT_ID \ @@ -77,7 +89,7 @@ steps: -var=ray_namespace=ml-$SHORT_SHA \ -var=gcp_service_account=ray-sa-$SHORT_SHA \ -var=gcs_bucket=gke-aieco-ray-$SHORT_SHA \ - -auto-approve -no-color + -auto-approve -no-color -lock=false echo "pass" > /workspace/user_result.txt # Make sure pods are running @@ -87,11 +99,33 @@ steps: sleep 5s ray job submit --working-dir ./example_ray_job_scripts \ - --address=http://127.0.0.1:8265 -- python ray_job.py + --address=http://127.0.0.1:8265 -- python -c "import ray; ray.init(); print(ray.cluster_resources())" echo "pass" > /workspace/ray_result.txt allowFailure: true waitFor: ['create gke cluster'] + - id: 'cleanup ray cluster' + name: 'gcr.io/$PROJECT_ID/terraform' + entrypoint: 'bash' + args: + - '-c' + - | + set -e + + cd /workspace/applications/ray/ + terraform destroy \ + -var-file=workloads.tfvars \ + -var=project_id=$PROJECT_ID \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \ + -var=cluster_location=$_REGION \ + -var=ray_namespace=ml-$SHORT_SHA \ + -var=gcp_service_account=ray-sa-$SHORT_SHA \ + -var=gcs_bucket=gke-aieco-ray-$SHORT_SHA \ + -auto-approve -no-color + + allowFailure: true + waitFor: ['test ray cluster'] + - id: 'test jupyterhub' name: 'gcr.io/$PROJECT_ID/terraform' entrypoint: 'bash' @@ -111,7 +145,7 @@ steps: -var=namespace=ml-$SHORT_SHA \ -var=workload_identity_service_account=jupyter-sa-$SHORT_SHA \ -var=gcs_bucket=gke-aieco-jupyter-$SHORT_SHA \ - -auto-approve -no-color + -auto-approve -no-color -lock=false echo "pass" > /workspace/jupyterhub_tf_result.txt kubectl wait --all pods -n ml-$SHORT_SHA --for=condition=Ready --timeout=300s @@ -123,9 +157,94 @@ steps: python3 test_hub.py $(cat /workspace/jupyterhub_host_url.txt) echo "pass" > /workspace/jupyterhub_test_result.txt allowFailure: true - waitFor: ['test ray cluster'] + # waitFor: ['cleanup ray cluster'] - - id: 'clean gke cluster' + - id: 'cleanup jupyterhub' + name: 'gcr.io/$PROJECT_ID/terraform' + entrypoint: 'bash' + args: + - '-c' + - | + set -e + + cd /workspace/applications/jupyter/ + terraform destroy \ + -var-file=workloads-without-iap.example.tfvars \ + -var=project_id=$PROJECT_ID \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \ + -var=namespace=ml-$SHORT_SHA \ + -var=workload_identity_service_account=jupyter-sa-$SHORT_SHA \ + -var=gcs_bucket=gke-aieco-jupyter-$SHORT_SHA \ + -auto-approve -no-color + + allowFailure: true + waitFor: ['test jupyterhub'] + + - id: 'test rag' + name: 'gcr.io/$PROJECT_ID/terraform' + entrypoint: 'sh' + args: + - '-c' + - | + set -e + + # Get kube config + gcloud container clusters get-credentials \ + ml-$SHORT_SHA-$_PR_NUMBER-cluster \ + --location $_REGION \ + --project $PROJECT_ID + + cd /workspace/modules/jupyter/tests + python3 change_jupyter_config.py + + cd /workspace/applications/rag/ + terraform apply \ + -var-file=workloads.tfvars \ + -var=jupyter_add_auth=false \ + -var=frontend_add_auth=false \ + -var=project_id=$PROJECT_ID \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \ + -var=kubernetes_namespace=rag-$SHORT_SHA \ + -var=gcs_bucket=gke-aieco-rag-$SHORT_SHA \ + -var=ray_service_account=ray-sa-$SHORT_SHA \ + -var=rag_service_account=rag-sa-$SHORT_SHA \ + -var=jupyter_service_account=jupyter-sa-$SHORT_SHA \ + -var=cloudsql_instance=pgvector-instance-$SHORT_SHA \ + -auto-approve -no-color -lock=false + echo "pass" > /workspace/rag_tf_result.txt + + # Validate Ray: Make sure pods are running + kubectl wait --all pods -n rag-$SHORT_SHA --for=condition=Ready --timeout=300s + kubectl port-forward -n rag-$SHORT_SHA service/example-cluster-kuberay-head-svc 8265:8265 & + # Wait port-forwarding to take its place + sleep 5s + + # Validate Ray: Check dashboard + ray job submit --working-dir ./tests \ + --address=http://127.0.0.1:8265 -- python -c "import ray; ray.init(); print(ray.cluster_resources())" + echo "pass" > /workspace/rag_ray_dashboard_result.txt + + # Validate Jupyterhub: Get hub url + kubectl get services -n rag-$SHORT_SHA + kubectl get service proxy-public -n rag-$SHORT_SHA --output jsonpath='{.status.loadBalancer.ingress[0].ip}' > /workspace/rag_jupyterhub_host_url.txt + echo "HOST URL is " $(cat /workspace/rag_jupyterhub_host_url.txt) + + # Validate Jupyterhub: Test Hub + cd /workspace/modules/jupyter/tests + python3 test_hub.py $(cat /workspace/rag_jupyterhub_host_url.txt) + echo "pass" > /workspace/rag_jupyterhub_test_result.txt + + # Validate RAG: Test rag frontend + kubectl port-forward -n rag-$SHORT_SHA service/rag-frontend 8081:8080 & + # Wait port-forwarding to take its place + sleep 5s + + cd /workspace/applications/rag/tests + python3 test_frontend.py "127.0.0.1:8081" + echo "pass" > /workspace/rag_frontend_result.txt + allowFailure: true + + - id: 'cleanup rag' name: 'gcr.io/$PROJECT_ID/terraform' entrypoint: 'bash' args: @@ -154,12 +273,37 @@ steps: -var=gcs_bucket=gke-aieco-ray-$SHORT_SHA \ -auto-approve -no-color + cd /workspace/applications/rag/ + terraform destroy \ + -var-file=workloads.tfvars \ + -var=project_id=$PROJECT_ID \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \ + -var=kubernetes_namespace=rag-$SHORT_SHA \ + -var=gcs_bucket=gke-aieco-rag-$SHORT_SHA \ + -var=ray_service_account=ray-sa-$SHORT_SHA \ + -var=rag_service_account=rag-sa-$SHORT_SHA \ + -var=jupyter_service_account=jupyter-sa-$SHORT_SHA \ + -var=cloudsql_instance=pgvector-instance-$SHORT_SHA \ + -auto-approve -no-color + + allowFailure: true + waitFor: ['test rag'] + + - id: 'cleanup gke cluster' + name: 'gcr.io/$PROJECT_ID/terraform' + entrypoint: 'bash' + args: + - '-c' + - | + set -e + cd /workspace/infrastructure terraform destroy -var-file=tfvars_tests/standard-gke-public.platform.tfvars -var=project_id=$PROJECT_ID \ -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \ -var=cluster_region=$_REGION -auto-approve -no-color + allowFailure: true - waitFor: ['test jupyterhub'] + waitFor: ['cleanup rag'] - id: 'check result' name: 'gcr.io/$PROJECT_ID/terraform' @@ -191,8 +335,32 @@ steps: echo "jupyterhub test failed" exit 1 fi - waitFor: ['clean gke cluster'] + + if [[ $(cat /workspace/rag_tf_result.txt) != "pass" ]]; then + echo "rag tf failed" + exit 1 + fi + + if [[ $(cat /workspace/rag_ray_dashboard_result.txt) != "pass" ]]; then + echo "rag ray dashboard test failed" + exit 1 + fi + + if [[ $(cat /workspace/rag_jupyterhub_test_result.txt) != "pass" ]]; then + echo "rag jupyterhub test failed" + exit 1 + fi + + if [[ $(cat /workspace/rag_frontend_result.txt) != "pass" ]]; then + echo "rag frontend test failed" + exit 1 + fi + + waitFor: ['cleanup gke cluster'] substitutions: _REGION: us-central1 _USER_NAME: github +options: + substitutionOption: 'ALLOW_LOOSE' + diff --git a/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars b/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars index 0c3680cfc..4046f4362 100644 --- a/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars +++ b/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars @@ -49,3 +49,36 @@ cpu_pools = [{ disk_size_gb = 100 disk_type = "pd-standard" }] + +## make sure required gpu quotas are available in the corresponding region +enable_gpu = true +gpu_pools = [{ + name = "gpu-pool-t4" + machine_type = "n1-standard-16" + node_locations = "us-central1-b,us-central1-c" + autoscaling = true + min_count = 1 + max_count = 3 + disk_size_gb = 100 + enable_gcfs = true + logging_variant = "DEFAULT" + disk_type = "pd-balanced" + accelerator_count = 2 + accelerator_type = "nvidia-tesla-t4" + gpu_driver_version = "LATEST" +}, +{ + name = "gpu-pool-l4" + machine_type = "g2-standard-24" + node_locations = "us-central1-a" + autoscaling = true + min_count = 2 + max_count = 3 + accelerator_count = 2 + disk_size_gb = 100 + enable_gcfs = true + logging_variant = "DEFAULT" + disk_type = "pd-balanced" + accelerator_type = "nvidia-l4" + gpu_driver_version = "LATEST" +}] \ No newline at end of file diff --git a/modules/iap/variables.tf b/modules/iap/variables.tf index 613fefcb9..b12c761a1 100644 --- a/modules/iap/variables.tf +++ b/modules/iap/variables.tf @@ -137,8 +137,8 @@ variable "jupyter_k8s_backend_service_name" { variable "jupyter_k8s_backend_service_port" { type = number - description = "NName of the Backend Service Port" - default = 80 + description = "Name of the Backend Service Port" + default = 80 } variable "jupyter_url_domain_addr" { From d8713479e2fb551dd93c7325590a2b2727ec56f2 Mon Sep 17 00:00:00 2001 From: zlq Date: Wed, 6 Mar 2024 01:21:20 -0800 Subject: [PATCH 6/8] Update instruction of IAP (#285) Update README --- applications/rag/README.md | 53 +++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/applications/rag/README.md b/applications/rag/README.md index 9876b9159..d585535c1 100644 --- a/applications/rag/README.md +++ b/applications/rag/README.md @@ -91,7 +91,11 @@ gcloud container clusters get-credentials ${CLUSTER_NAME:?} --location ${CLUSTER 1. Verify Kuberay is setup: run `kubectl get pods -n ${NAMESPACE:?}`. There should be a Ray head (and Ray worker pod on GKE Standard only) in `Running` state (prefixed by `example-cluster-kuberay-head-` and `example-cluster-kuberay-worker-workergroup-`). 2. Verify Jupyterhub service is setup: - * Fetch the service IP: `kubectl get services proxy-public -n ${NAMESPACE:?} --output jsonpath='{.status.loadBalancer.ingress[0].ip}'` + * Fetch the service IP/Domain: + * IAP disabled: `kubectl get services proxy-public -n $NAMESPACE --output jsonpath='{.status.loadBalancer.ingress[0].ip}'` + * IAP enabled: Read terraform output `jupyter_uri` or use command: `kubectl get managedcertificates jupyter-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'` + * Remember login [Google Cloud Platform IAP](https://pantheon.corp.google.com/security/iap) to check if user has role `IAP-secured Web App User` + * Wait for domain status to be `Active` * Go to the IP in a browser which should display the Jupyterlab login UI. 3. Verify the instance `pgvector-instance` exists: `gcloud sql instances list | grep pgvector` @@ -118,8 +122,16 @@ EOF * At the end of the smoke test with the TGI server, stop port forwarding by using Ctrl-C on the original terminal. 5. Verify the frontend chat interface is setup: - * Verify the service exists: `kubectl get services rag-frontend -n ${NAMESPACE:?}` - * Verify the deployment exists: `kubectl get deployments rag-frontend -n ${NAMESPACE:?}` and ensure the deployment is in `READY` state. + * Verify the service exists: `kubectl get services rag-frontend -n ${NAMESPACE:?}` + * Verify the deployment exists: `kubectl get deployments rag-frontend -n ${NAMESPACE:?}` and ensure the deployment is in `READY` state. + * Verify the managed certificate is `Active`: + ``` + kubectl get managedcertificates frontend-managed-cert -n rag --output jsonpath='{.status.domainStatus[0].status}' + ``` + * Verify IAP is enabled: + ``` + gcloud compute backend-services list --format="table(name, backends, iap.enabled)" + ``` ### Vector Embeddings for Dataset @@ -127,8 +139,11 @@ This step generates the vector embeddings for your input dataset. Currently, the 1. Create a CloudSQL user to access the database: `gcloud sql users create rag-user-notebook --password= --instance=pgvector-instance --host=%` -2. Go to the Jupyterhub service endpoint in a browser: `kubectl get services proxy-public -n ${NAMESPACE:?} --output jsonpath='{.status.loadBalancer.ingress[0].ip}'` - +2. Go to the Jupyterhub service endpoint in a browser: + * IAP disable: `kubectl get services proxy-public -n $NAMESPACE --output jsonpath='{.status.loadBalancer.ingress[0].ip}'` + * IAP enabled: Read terraform output `jupyter_uri` or use commend: `kubectl get managedcertificates jupyter-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'` + * Remeber login GCP to check if user has role `IAP-secured Web App User` + * Waiting for domain status to be `Active` 3. Login with placeholder credentials [TBD: replace with instructions for IAP]: * username: user * password: use `terraform output jupyter_password` to fetch the password value @@ -152,11 +167,35 @@ This step generates the vector embeddings for your input dataset. Currently, the ### Launch the Frontend Chat Interface -1. Setup port forwarding for the frontend [TBD: Replace with IAP]: `kubectl port-forward service/rag-frontend -n ${NAMESPACE:?} 8080:8080 &` +#### Accessing the Frontend with IAP Disabled +1. Setup port forwarding for the frontend: `kubectl port-forward service/rag-frontend -n $NAMESPACE 8080:8080 &` 2. Go to `localhost:8080` in a browser & start chatting! This will fetch context related to your prompt from the vector embeddings in the `pgvector-instance`, augment the original prompt with the context & query the inference model (`mistral-7b`) with the augmented prompt. -3. TODO: Add some example prompts for the dataset. +#### Accessing the Frontend with IAP Enabled +1. Verify IAP is Enabled + + * Ensure that IAP is enabled on Google Cloud Platform (GCP) for your application. If you encounter any errors, try re-enabling IAP. + +2. Verify User Role + + * Make sure you have the role `IAP-secured Web App User` assigned to your user account. This role is necessary to access the application through IAP. + +3. Verify Domain is Active + * Make sure the domain is active using commend: + `kubectl get managedcertificates frontend-managed-cert -n rag --output jsonpath='{.status.domainStatus[0].status}'` + +3. Retrieve the Domain + + * Read terraform output `frontend_uri` or use the following command to find the domain created by IAP for accessing your service: + `kubectl get managedcertificates frontend-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'` + +4. Access the Frontend + + * Open your browser and navigate to the domain you retrieved in the previous step to start chatting! + +#### Prompts Example +3. [TODO: Add some example prompts for the dataset]. ### Cleanup From 13be33c63a2ff371c5f374bc50d9d338a40ef35d Mon Sep 17 00:00:00 2001 From: Umesh Kumhar Date: Wed, 6 Mar 2024 16:55:45 +0530 Subject: [PATCH 7/8] Update frontend dependency with cloudsql (#302) update frontend dependency with cloudsql --- applications/rag/main.tf | 22 +++++++++++----------- modules/cloudsql/outputs.tf | 7 ++++++- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/applications/rag/main.tf b/applications/rag/main.tf index 017f20a26..63eb89ac4 100644 --- a/applications/rag/main.tf +++ b/applications/rag/main.tf @@ -89,10 +89,10 @@ provider "helm" { } module "namespace" { - source = "../../modules/kubernetes-namespace" - providers = { helm = helm.rag} + source = "../../modules/kubernetes-namespace" + providers = { helm = helm.rag } create_namespace = true - namespace = var.kubernetes_namespace + namespace = var.kubernetes_namespace } module "kuberay-operator" { @@ -115,12 +115,12 @@ module "gcs" { } module "cloudsql" { - source = "../../modules/cloudsql" - providers = { kubernetes = kubernetes.rag } - project_id = var.project_id - instance_name = var.cloudsql_instance - namespace = var.kubernetes_namespace - depends_on = [module.namespace] + source = "../../modules/cloudsql" + providers = { kubernetes = kubernetes.rag } + project_id = var.project_id + instance_name = var.cloudsql_instance + namespace = var.kubernetes_namespace + depends_on = [module.namespace] } module "jupyterhub" { @@ -200,7 +200,7 @@ module "frontend" { google_service_account = var.rag_service_account namespace = var.kubernetes_namespace inference_service_endpoint = module.inference-server.inference_service_endpoint - cloudsql_instance = var.cloudsql_instance + cloudsql_instance = module.cloudsql.instance db_secret_name = module.cloudsql.db_secret_name db_secret_namespace = module.cloudsql.db_secret_namespace dataset_embeddings_table_name = var.dataset_embeddings_table_name @@ -220,5 +220,5 @@ module "frontend" { url_domain_addr = var.frontend_url_domain_addr url_domain_name = var.frontend_url_domain_name members_allowlist = var.frontend_members_allowlist - depends_on = [ module.namespace ] + depends_on = [module.namespace] } diff --git a/modules/cloudsql/outputs.tf b/modules/cloudsql/outputs.tf index f4010b142..cd8e2d1fb 100644 --- a/modules/cloudsql/outputs.tf +++ b/modules/cloudsql/outputs.tf @@ -20,4 +20,9 @@ output "db_secret_name" { output "db_secret_namespace" { description = "Cloud SQL DB secret namespace" value = kubernetes_secret.secret.metadata[0].namespace -} \ No newline at end of file +} + +output "instance" { + description = "Cloud SQL Instance name" + value = google_sql_database_instance.main.name +} From ffd5a195796823c2ce9afc45894ef81a916940f5 Mon Sep 17 00:00:00 2001 From: Umesh Kumhar Date: Wed, 6 Mar 2024 18:14:48 +0530 Subject: [PATCH 8/8] Add github actions for tf lint check (#296) Add github actions for tf lint check --- .github/workflows/ci.yaml | 35 +++++++++++++++++++ .../workloads-without-iap.example.tfvars | 8 ++--- applications/rag/variables.tf | 4 +-- applications/rag/workloads.tfvars | 2 +- .../user/modules/service_accounts/versions.tf | 2 +- applications/ray/versions.tf | 4 +-- .../sample-terraform.tfvars | 4 +-- .../standard-gke-public.platform.tfvars | 4 +-- modules/iap/iap.tf | 4 +-- modules/iap/variables.tf | 2 +- modules/kuberay-monitoring/main.tf | 2 +- tutorials/hf-tgi/outputs.tf | 2 +- 12 files changed, 54 insertions(+), 19 deletions(-) create mode 100644 .github/workflows/ci.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 000000000..3188efcec --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,35 @@ +name: Terraform CI +on: + push: + branches: + - main + pull_request: + branches: + - main +jobs: + Terraform-Lint-Check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: hashicorp/setup-terraform@v3 + with: + terraform_version: "1.5.7" + + - name: Terraform fmt + id: fmt + run: terraform fmt -check -recursive + + - name: Terraform Init + id: init + run: | + terraform -chdir=applications/rag init + terraform -chdir=applications/ray init + terraform -chdir=applications/jupyter init + + - name: Terraform Validate + id: validate + run: | + terraform -chdir=applications/rag validate -no-color + terraform -chdir=applications/ray validate -no-color + terraform -chdir=applications/jupyter validate -no-color + diff --git a/applications/jupyter/workloads-without-iap.example.tfvars b/applications/jupyter/workloads-without-iap.example.tfvars index ccf033cf2..e048ac674 100644 --- a/applications/jupyter/workloads-without-iap.example.tfvars +++ b/applications/jupyter/workloads-without-iap.example.tfvars @@ -26,10 +26,10 @@ cluster_membership_id = "" # required only for private clusters, default: cluste ####################################################### ## JupyterHub variables -namespace = "jupyter" -gcs_bucket = "" -create_gcs_bucket = true -workload_identity_service_account = "jupyter-service-account" +namespace = "jupyter" +gcs_bucket = "" +create_gcs_bucket = true +workload_identity_service_account = "jupyter-service-account" # Jupyterhub without IAP add_auth = false diff --git a/applications/rag/variables.tf b/applications/rag/variables.tf index ca86bcf04..a5e77ff78 100644 --- a/applications/rag/variables.tf +++ b/applications/rag/variables.tf @@ -265,9 +265,9 @@ variable "autopilot_cluster" { } variable "cloudsql_instance" { - type = string + type = string description = "Name of the CloudSQL instance for RAG VectorDB" - default = "pgvector-instance" + default = "pgvector-instance" } variable "cpu_pools" { diff --git a/applications/rag/workloads.tfvars b/applications/rag/workloads.tfvars index aba62feae..dca101637 100644 --- a/applications/rag/workloads.tfvars +++ b/applications/rag/workloads.tfvars @@ -38,7 +38,7 @@ rag_service_account = "rag-system-account" # Creates a google service account & k8s service account & configures workload identity with appropriate permissions. # Set to false & update the variable `jupyter_service_account` to use an existing IAM service account. -jupyter_service_account = "jupyter-system-account" +jupyter_service_account = "jupyter-system-account" ## Embeddings table name - change this to the TABLE_NAME used in the notebook. dataset_embeddings_table_name = "googlemaps_reviews_db" diff --git a/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/service_accounts/versions.tf b/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/service_accounts/versions.tf index 436ce51c2..53d5c8e95 100644 --- a/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/service_accounts/versions.tf +++ b/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/service_accounts/versions.tf @@ -15,7 +15,7 @@ terraform { required_providers { google = { - source = "hashicorp/google" + source = "hashicorp/google" } kubernetes = { source = "hashicorp/kubernetes" diff --git a/applications/ray/versions.tf b/applications/ray/versions.tf index a8a0a268a..b8e6f2c71 100644 --- a/applications/ray/versions.tf +++ b/applications/ray/versions.tf @@ -15,10 +15,10 @@ terraform { required_providers { google = { - source = "hashicorp/google" + source = "hashicorp/google" } google-beta = { - source = "hashicorp/google-beta" + source = "hashicorp/google-beta" } helm = { source = "hashicorp/helm" diff --git a/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars b/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars index ff2e8cd05..dcd6739b4 100644 --- a/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars +++ b/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars @@ -21,5 +21,5 @@ tokenizer = "tiiuae/falcon-7b" # Benchmark configuration for triggering single test via Locust Runner test_duration = 60 # Increase test_users to allow more parallelism (especially when testing HPA) -test_users = 1 -test_rate = 5 +test_users = 1 +test_rate = 5 diff --git a/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars b/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars index 4046f4362..86d951569 100644 --- a/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars +++ b/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars @@ -66,8 +66,8 @@ gpu_pools = [{ accelerator_count = 2 accelerator_type = "nvidia-tesla-t4" gpu_driver_version = "LATEST" -}, -{ + }, + { name = "gpu-pool-l4" machine_type = "g2-standard-24" node_locations = "us-central1-a" diff --git a/modules/iap/iap.tf b/modules/iap/iap.tf index 097a1f387..c9344ae18 100644 --- a/modules/iap/iap.tf +++ b/modules/iap/iap.tf @@ -36,7 +36,7 @@ resource "helm_release" "iap_jupyter" { name = "iap-jupyter" chart = "${path.module}/charts/iap/" namespace = var.namespace - create_namespace = true + create_namespace = true # timeout increased to support autopilot scaling resources, and give enough time to complete the deployment timeout = 1200 set { @@ -108,7 +108,7 @@ resource "helm_release" "iap_frontend" { name = "iap-frontend" chart = "${path.module}/charts/iap/" namespace = var.namespace - create_namespace = true + create_namespace = true # timeout increased to support autopilot scaling resources, and give enough time to complete the deployment timeout = 1200 set { diff --git a/modules/iap/variables.tf b/modules/iap/variables.tf index b12c761a1..af09d87c2 100644 --- a/modules/iap/variables.tf +++ b/modules/iap/variables.tf @@ -138,7 +138,7 @@ variable "jupyter_k8s_backend_service_name" { variable "jupyter_k8s_backend_service_port" { type = number description = "Name of the Backend Service Port" - default = 80 + default = 80 } variable "jupyter_url_domain_addr" { diff --git a/modules/kuberay-monitoring/main.tf b/modules/kuberay-monitoring/main.tf index 46e627058..8a320ec81 100644 --- a/modules/kuberay-monitoring/main.tf +++ b/modules/kuberay-monitoring/main.tf @@ -47,7 +47,7 @@ resource "helm_release" "grafana" { } data "kubernetes_service" "example" { - count = var.enable_grafana_on_ray_dashboard ? 1 : 0 + count = var.enable_grafana_on_ray_dashboard ? 1 : 0 metadata { name = "grafana" namespace = var.namespace diff --git a/tutorials/hf-tgi/outputs.tf b/tutorials/hf-tgi/outputs.tf index 3816613c4..7078bac0d 100644 --- a/tutorials/hf-tgi/outputs.tf +++ b/tutorials/hf-tgi/outputs.tf @@ -24,5 +24,5 @@ output "inference_service_namespace" { output "inference_service_endpoint" { description = "Endpoint of model inference service" - value = kubernetes_service.inference_service.status != null ? (kubernetes_service.inference_service.status[0].load_balancer != null ? "${kubernetes_service.inference_service.status[0].load_balancer[0].ingress[0].ip}" : "") : "" + value = kubernetes_service.inference_service.status != null ? (kubernetes_service.inference_service.status[0].load_balancer != null ? "${kubernetes_service.inference_service.status[0].load_balancer[0].ingress[0].ip}" : "") : "" }