From 7093da37c616a21f2e0a7ea799b480418be40445 Mon Sep 17 00:00:00 2001
From: imreddy13 <132504814+imreddy13@users.noreply.github.com>
Date: Tue, 5 Mar 2024 17:21:35 -0800
Subject: [PATCH 1/8] Fix kuberay_monitoring module dependency for RAG (#290)

Fix `kuberay_monitoring` module dependency for RAG

'kuberay_monitoring' module should depends on 'namespace' module
---
 applications/rag/main.tf | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/applications/rag/main.tf b/applications/rag/main.tf
index 74b8abbcf..654151aaa 100644
--- a/applications/rag/main.tf
+++ b/applications/rag/main.tf
@@ -181,6 +181,7 @@ module "kuberay-monitoring" {
   create_namespace                = true
   enable_grafana_on_ray_dashboard = var.enable_grafana_on_ray_dashboard
   k8s_service_account             = var.ray_service_account
+  depends_on                      = [module.namespace]
 }
 
 module "inference-server" {
@@ -218,5 +219,5 @@ module "frontend" {
   url_domain_addr          = var.frontend_url_domain_addr
   url_domain_name          = var.frontend_url_domain_name
   members_allowlist        = var.frontend_members_allowlist
-  depends_on = [ module.namespace ]
+  depends_on               = [ module.namespace ]
 }

From 33265704d38e8942fba32c47c41acaa709c59150 Mon Sep 17 00:00:00 2001
From: imreddy13 <132504814+imreddy13@users.noreply.github.com>
Date: Tue, 5 Mar 2024 20:14:04 -0800
Subject: [PATCH 2/8] Pin pip dependency versions for notebook (#291)

Co-authored-by: Umesh Kumhar <umeshkumhar@google.com>
---
 .../rag-kaggle-ray-sql-latest.ipynb                | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb b/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb
index 849eaec20..72d3d5915 100644
--- a/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb
+++ b/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb
@@ -7,8 +7,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install langchain ray==2.7.1 datasets sentence-transformers kaggle\n",
-    "!pip install \"cloud-sql-python-connector[pg8000]\" SQLAlchemy==2.0.7"
+    "!pip install langchain==0.1.9 ray==2.7.1 datasets==2.18.0 sentence-transformers==2.5.1 kaggle==1.6.6\n",
+    "!pip install \"cloud-sql-python-connector[pg8000]==1.7.0\" SQLAlchemy==2.0.7"
    ]
   },
   {
@@ -294,14 +294,14 @@
     "    runtime_env={\n",
     "        \"working_dir\": \"/home/jovyan/test\", # upload the local working directory to ray workers\n",
     "        \"pip\": [\n",
-    "                \"langchain\",\n",
+    "                \"langchain==0.1.9\",\n",
     "                \"transformers\",\n",
-    "                \"sentence-transformers\",\n",
+    "                \"sentence-transformers==2.5.1\",\n",
     "                \"pyarrow\",\n",
-    "                \"datasets\",\n",
+    "                \"datasets==2.18.0\",\n",
     "                \"torch==2.0.1\",\n",
-    "                \"cloud-sql-python-connector[pg8000]\",\n",
-    "                \"SQLAlchemy\",\n",
+    "                \"cloud-sql-python-connector[pg8000]==1.7.0\",\n",
+    "                \"SQLAlchemy==2.0.7\",\n",
     "                \"huggingface_hub\",\n",
     "                ]\n",
     "    }\n",

From a218defd398c680e9375583b1e654bf2863f296f Mon Sep 17 00:00:00 2001
From: imreddy13 <132504814+imreddy13@users.noreply.github.com>
Date: Tue, 5 Mar 2024 21:31:32 -0800
Subject: [PATCH 3/8] Update READMEs with better cluster create instructions
 (#292)

Co-authored-by: Umesh Kumhar <umeshkumhar@google.com>
---
 applications/rag/README.md | 20 +++-----------------
 infrastructure/README.md   |  4 +++-
 2 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/applications/rag/README.md b/applications/rag/README.md
index e8eaa5885..9876b9159 100644
--- a/applications/rag/README.md
+++ b/applications/rag/README.md
@@ -32,7 +32,7 @@ CLUSTER_REGION=us-central1
 ```
 2. Use the following instructions to create a GKE cluster. We recommend using Autopilot for a simpler setup.
 
-##### Autopilot (recommended)
+##### Autopilot
 
 RAG requires the latest Autopilot features, available on GKE cluster version `1.29.1-gke.1575000`+
 ```
@@ -46,23 +46,9 @@ gcloud container clusters create-auto ${CLUSTER_NAME:?} \
   --cluster-version ${CLUSTER_VERSION:?}
 ```
 
-##### Standard
+##### Standard (recommended)
 
-1. To create a GKE Standard cluster using Terraform, please follow the [instructions here](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/infrastructure/README.md).
-
-TODO: Add GKE cluster requirements for a successful installation.
-
-2. The inference server requires L4 GPUs. Create an additional node pool:
-```
-gcloud container node-pools create g2-standard-24 --cluster ${CLUSTER_NAME:?} \
-  --accelerator type=nvidia-l4,count=2,gpu-driver-version=latest \
-  --machine-type g2-standard-24 \
-  --ephemeral-storage-local-ssd=count=2 \
- --enable-image-streaming \
- --num-nodes=1 --min-nodes=1 --max-nodes=2 \
- --node-locations ${CLUSTER_REGION:?}-a,${CLUSTER_REGION:?}-b \
- --location=${CLUSTER_REGION:?}
-```
+1. To create a GKE Standard cluster using Terraform, follow the [instructions here](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/infrastructure/README.md). Use the preconfigured node pools in `/infrastructure/platform.tfvars` as this solution requires T4s and L4s.
 
 #### Setup Components
 
diff --git a/infrastructure/README.md b/infrastructure/README.md
index d7954f97c..9876791b3 100644
--- a/infrastructure/README.md
+++ b/infrastructure/README.md
@@ -2,7 +2,9 @@
 
 Platform module (to be renamed to Infra), creates the GKE cluster & other related resources for the AI applications / workloads to be deployed on them. 
 
-Update the ```platform.tfvars``` file with the required configuration. Kindly refer to ```tfvars_examples``` for sample configuration.
+1) Update the ```platform.tfvars``` file with the required configuration. Kindly refer to ```tfvars_examples``` for sample configuration.
+
+2) Run `terraform init` and `terraform apply --var-file=platform.tfvars`
 
 
 ## Prerequisites

From 620167ff5509773c968a4385846305a0d0af422d Mon Sep 17 00:00:00 2001
From: imreddy13 <132504814+imreddy13@users.noreply.github.com>
Date: Tue, 5 Mar 2024 21:46:28 -0800
Subject: [PATCH 4/8] Update default zones for L4/T4 nodes (#293)

---
 infrastructure/platform.tfvars | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/infrastructure/platform.tfvars b/infrastructure/platform.tfvars
index ede37167a..5d704bcd5 100644
--- a/infrastructure/platform.tfvars
+++ b/infrastructure/platform.tfvars
@@ -29,7 +29,7 @@ private_cluster   = false ## true = private cluster, false = public cluster
 autopilot_cluster = false ## true = autopilot cluster, false = standard cluster
 cluster_name      = "ml-cluster"
 cluster_region    = "us-central1"
-cluster_zones     = ["us-central1-a", "us-central1-b", "us-central1-f"]
+cluster_zones     = ["us-central1-a", "us-central1-b", "us-central1-c"]
 
 cpu_pools = [{
   name           = "cpu-pool"
@@ -47,7 +47,7 @@ enable_gpu = true
 gpu_pools = [{
   name               = "gpu-pool"
   machine_type       = "n1-standard-16"
-  node_locations     = "us-central1-b,us-central1-c"
+  node_locations     = "us-central1-a"
   autoscaling        = true
   min_count          = 1
   max_count          = 3
@@ -60,7 +60,7 @@ gpu_pools = [{
   {
     name               = "gpu-pool-l4"
     machine_type       = "g2-standard-24"
-    node_locations     = "us-central1-b,us-central1-c"
+    node_locations     = "us-central1-a"
     autoscaling        = true
     min_count          = 1
     max_count          = 3

From 39db88653f86a6ba349f63a7f9485e3f6fcbe996 Mon Sep 17 00:00:00 2001
From: Himanshu Sachdeva <hisachdeva@google.com>
Date: Wed, 6 Mar 2024 08:12:21 +0100
Subject: [PATCH 5/8] Add cloudbuild tests for rag application (#261)

* test rag application setup

* update rag tests

* fix check result step

* fix as per review comments

* fix variables for applications/jupyter

* fix bugs for jupyter & rag app

* CI fixes & revert jupterhub module changes

* correcting merge conflict miss

* fix system account SA names

* add SHA suffix to cloudsql instance
---
 .../workloads-without-iap.example.tfvars      |   8 +-
 applications/rag/main.tf                      |   1 +
 applications/rag/tests/test_frontend.py       |  11 +
 applications/rag/variables.tf                 |   2 +-
 cloudbuild.yaml                               | 196 ++++++++++++++++--
 .../standard-gke-public.platform.tfvars       |  33 +++
 modules/iap/variables.tf                      |   4 +-
 7 files changed, 234 insertions(+), 21 deletions(-)
 create mode 100644 applications/rag/tests/test_frontend.py

diff --git a/applications/jupyter/workloads-without-iap.example.tfvars b/applications/jupyter/workloads-without-iap.example.tfvars
index e048ac674..ccf033cf2 100644
--- a/applications/jupyter/workloads-without-iap.example.tfvars
+++ b/applications/jupyter/workloads-without-iap.example.tfvars
@@ -26,10 +26,10 @@ cluster_membership_id = "" # required only for private clusters, default: cluste
 #######################################################
 
 ## JupyterHub variables
-namespace                         = "jupyter"
-gcs_bucket                        = "<gcs-bucket>"
-create_gcs_bucket                 = true
-workload_identity_service_account = "jupyter-service-account"
+namespace                            = "jupyter"
+gcs_bucket                           = "<gcs-bucket>"
+create_gcs_bucket                    = true
+workload_identity_service_account    = "jupyter-service-account"
 
 # Jupyterhub without IAP
 add_auth = false
diff --git a/applications/rag/main.tf b/applications/rag/main.tf
index 654151aaa..017f20a26 100644
--- a/applications/rag/main.tf
+++ b/applications/rag/main.tf
@@ -200,6 +200,7 @@ module "frontend" {
   google_service_account        = var.rag_service_account
   namespace                     = var.kubernetes_namespace
   inference_service_endpoint    = module.inference-server.inference_service_endpoint
+  cloudsql_instance             = var.cloudsql_instance
   db_secret_name                = module.cloudsql.db_secret_name
   db_secret_namespace           = module.cloudsql.db_secret_namespace
   dataset_embeddings_table_name = var.dataset_embeddings_table_name
diff --git a/applications/rag/tests/test_frontend.py b/applications/rag/tests/test_frontend.py
new file mode 100644
index 000000000..c6e594c1c
--- /dev/null
+++ b/applications/rag/tests/test_frontend.py
@@ -0,0 +1,11 @@
+import sys
+import requests
+
+def test_frontend_up(rag_frontend_url):
+    r = requests.get(rag_frontend_url)
+    r.raise_for_status()
+    print("Rag frontend is up.")
+
+hub_url = "http://" + sys.argv[1]
+
+test_frontend_up(hub_url)
diff --git a/applications/rag/variables.tf b/applications/rag/variables.tf
index cd746e6e2..ca86bcf04 100644
--- a/applications/rag/variables.tf
+++ b/applications/rag/variables.tf
@@ -209,7 +209,7 @@ variable "jupyter_k8s_backend_service_name" {
 
 variable "jupyter_k8s_backend_service_port" {
   type        = number
-  description = "NName of the Backend Service Port"
+  description = "Name of the Backend Service Port"
   default     = 80
 }
 
diff --git a/cloudbuild.yaml b/cloudbuild.yaml
index 542e2fee9..f33a15a96 100644
--- a/cloudbuild.yaml
+++ b/cloudbuild.yaml
@@ -1,4 +1,4 @@
-# Copyright 2023 Google LLC
+# Copyright 2024 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -27,7 +27,7 @@ steps:
       terraform init -no-color
       terraform validate -no-color
     dir: 'applications/ray/'
-    waitFor: ['-']
+    waitFor: ['validate platform']
   
   - id: 'validate jupyterhub'
     name: 'gcr.io/$PROJECT_ID/terraform'
@@ -35,8 +35,17 @@ steps:
       terraform init -no-color
       terraform validate -no-color
     dir: 'applications/jupyter/'
-    waitFor: ['-']
+    waitFor: ['validate platform']
+  
+  - id: 'validate rag'
+    name: 'gcr.io/$PROJECT_ID/terraform'
+    script: |
+      terraform init -no-color
+      terraform validate -no-color
+    dir: 'applications/rag/'
+    waitFor: ['validate platform']
 
+  # Create cluster to test ray, jupyterhub
   - id: 'create gke cluster'
     name: 'gcr.io/$PROJECT_ID/terraform'
     env:
@@ -46,13 +55,16 @@ steps:
       - '-c'
       - |
         set -e
-        terraform apply -var-file=tfvars_tests/standard-gke-public.platform.tfvars -var=project_id=$PROJECT_ID \
+        terraform apply \
+        -var-file=tfvars_tests/standard-gke-public.platform.tfvars \
+        -var=project_id=$PROJECT_ID \
         -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \
-        -var=cluster_region=$_REGION -auto-approve -no-color
+        -var=cluster_region=$_REGION \
+        -auto-approve -no-color -lock=false
         echo "pass" > /workspace/gke_cluster_result.txt
     dir: 'infrastructure/'
     allowFailure: true
-    waitFor: ['validate platform', 'validate ray', 'validate jupyterhub']
+    waitFor: ['validate platform', 'validate ray', 'validate jupyterhub', validate rag]
   
   - id: 'test ray cluster'
     name: 'gcr.io/$PROJECT_ID/terraform'
@@ -68,7 +80,7 @@ steps:
         --location $_REGION \
         --project $PROJECT_ID 
 
-        cd applications/ray/
+        cd /workspace/applications/ray/
         terraform apply \
         -var-file=workloads.tfvars \
         -var=project_id=$PROJECT_ID \
@@ -77,7 +89,7 @@ steps:
         -var=ray_namespace=ml-$SHORT_SHA \
         -var=gcp_service_account=ray-sa-$SHORT_SHA \
         -var=gcs_bucket=gke-aieco-ray-$SHORT_SHA \
-        -auto-approve -no-color
+        -auto-approve -no-color -lock=false
         echo "pass" > /workspace/user_result.txt
 
         # Make sure pods are running
@@ -87,11 +99,33 @@ steps:
         sleep 5s
 
         ray job submit --working-dir ./example_ray_job_scripts \
-        --address=http://127.0.0.1:8265 -- python ray_job.py 
+        --address=http://127.0.0.1:8265 -- python -c "import ray; ray.init(); print(ray.cluster_resources())"
         echo "pass" > /workspace/ray_result.txt
     allowFailure: true
     waitFor: ['create gke cluster']
 
+  - id: 'cleanup ray cluster'
+    name: 'gcr.io/$PROJECT_ID/terraform'
+    entrypoint: 'bash'
+    args:
+      - '-c'
+      - |
+        set -e
+
+        cd /workspace/applications/ray/
+        terraform destroy \
+        -var-file=workloads.tfvars \
+        -var=project_id=$PROJECT_ID \
+        -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \
+        -var=cluster_location=$_REGION \
+        -var=ray_namespace=ml-$SHORT_SHA \
+        -var=gcp_service_account=ray-sa-$SHORT_SHA \
+        -var=gcs_bucket=gke-aieco-ray-$SHORT_SHA \
+        -auto-approve -no-color
+
+    allowFailure: true
+    waitFor: ['test ray cluster']
+
   - id: 'test jupyterhub'
     name: 'gcr.io/$PROJECT_ID/terraform'
     entrypoint: 'bash'
@@ -111,7 +145,7 @@ steps:
         -var=namespace=ml-$SHORT_SHA \
         -var=workload_identity_service_account=jupyter-sa-$SHORT_SHA \
         -var=gcs_bucket=gke-aieco-jupyter-$SHORT_SHA \
-        -auto-approve -no-color
+        -auto-approve -no-color -lock=false
         echo "pass" > /workspace/jupyterhub_tf_result.txt
 
         kubectl wait --all pods -n ml-$SHORT_SHA --for=condition=Ready --timeout=300s
@@ -123,9 +157,94 @@ steps:
         python3 test_hub.py $(cat /workspace/jupyterhub_host_url.txt)
         echo "pass" > /workspace/jupyterhub_test_result.txt
     allowFailure: true
-    waitFor: ['test ray cluster']
+    # waitFor: ['cleanup ray cluster']
 
-  - id: 'clean gke cluster'
+  - id: 'cleanup jupyterhub'
+    name: 'gcr.io/$PROJECT_ID/terraform'
+    entrypoint: 'bash'
+    args:
+      - '-c'
+      - |
+        set -e
+
+        cd /workspace/applications/jupyter/
+        terraform destroy \
+        -var-file=workloads-without-iap.example.tfvars \
+        -var=project_id=$PROJECT_ID \
+        -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \
+        -var=namespace=ml-$SHORT_SHA \
+        -var=workload_identity_service_account=jupyter-sa-$SHORT_SHA \
+        -var=gcs_bucket=gke-aieco-jupyter-$SHORT_SHA \
+        -auto-approve -no-color
+
+    allowFailure: true
+    waitFor: ['test jupyterhub']
+
+  - id: 'test rag'
+    name: 'gcr.io/$PROJECT_ID/terraform'
+    entrypoint: 'sh'
+    args:
+      - '-c'
+      - |
+        set -e
+
+        # Get kube config
+        gcloud container clusters get-credentials \
+        ml-$SHORT_SHA-$_PR_NUMBER-cluster \
+        --location $_REGION \
+        --project $PROJECT_ID 
+
+        cd /workspace/modules/jupyter/tests
+        python3 change_jupyter_config.py
+
+        cd /workspace/applications/rag/
+        terraform apply \
+        -var-file=workloads.tfvars \
+        -var=jupyter_add_auth=false \
+        -var=frontend_add_auth=false \
+        -var=project_id=$PROJECT_ID \
+        -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \
+        -var=kubernetes_namespace=rag-$SHORT_SHA \
+        -var=gcs_bucket=gke-aieco-rag-$SHORT_SHA \
+        -var=ray_service_account=ray-sa-$SHORT_SHA \
+        -var=rag_service_account=rag-sa-$SHORT_SHA \
+        -var=jupyter_service_account=jupyter-sa-$SHORT_SHA \
+        -var=cloudsql_instance=pgvector-instance-$SHORT_SHA \
+        -auto-approve -no-color -lock=false
+        echo "pass" > /workspace/rag_tf_result.txt
+
+        # Validate Ray: Make sure pods are running
+        kubectl wait --all pods -n rag-$SHORT_SHA --for=condition=Ready --timeout=300s
+        kubectl port-forward -n rag-$SHORT_SHA service/example-cluster-kuberay-head-svc 8265:8265 &
+        # Wait port-forwarding to take its place
+        sleep 5s
+
+        # Validate Ray: Check dashboard
+        ray job submit --working-dir ./tests \
+        --address=http://127.0.0.1:8265 -- python -c "import ray; ray.init(); print(ray.cluster_resources())"
+        echo "pass" > /workspace/rag_ray_dashboard_result.txt
+
+        # Validate Jupyterhub: Get hub url
+        kubectl get services -n rag-$SHORT_SHA
+        kubectl get service proxy-public -n rag-$SHORT_SHA --output jsonpath='{.status.loadBalancer.ingress[0].ip}' > /workspace/rag_jupyterhub_host_url.txt
+        echo "HOST URL is " $(cat /workspace/rag_jupyterhub_host_url.txt)
+
+        # Validate Jupyterhub: Test Hub
+        cd /workspace/modules/jupyter/tests
+        python3 test_hub.py $(cat /workspace/rag_jupyterhub_host_url.txt)
+        echo "pass" > /workspace/rag_jupyterhub_test_result.txt
+
+        # Validate RAG: Test rag frontend
+        kubectl port-forward -n rag-$SHORT_SHA service/rag-frontend 8081:8080 &
+        # Wait port-forwarding to take its place
+        sleep 5s
+
+        cd /workspace/applications/rag/tests
+        python3 test_frontend.py "127.0.0.1:8081"
+        echo "pass" > /workspace/rag_frontend_result.txt
+    allowFailure: true
+
+  - id: 'cleanup rag'
     name: 'gcr.io/$PROJECT_ID/terraform'
     entrypoint: 'bash'
     args:
@@ -154,12 +273,37 @@ steps:
         -var=gcs_bucket=gke-aieco-ray-$SHORT_SHA \
         -auto-approve -no-color
 
+        cd /workspace/applications/rag/
+        terraform destroy \
+        -var-file=workloads.tfvars \
+        -var=project_id=$PROJECT_ID \
+        -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \
+        -var=kubernetes_namespace=rag-$SHORT_SHA \
+        -var=gcs_bucket=gke-aieco-rag-$SHORT_SHA \
+        -var=ray_service_account=ray-sa-$SHORT_SHA \
+        -var=rag_service_account=rag-sa-$SHORT_SHA \
+        -var=jupyter_service_account=jupyter-sa-$SHORT_SHA \
+        -var=cloudsql_instance=pgvector-instance-$SHORT_SHA \
+        -auto-approve -no-color
+
+    allowFailure: true
+    waitFor: ['test rag']
+
+  - id: 'cleanup gke cluster'
+    name: 'gcr.io/$PROJECT_ID/terraform'
+    entrypoint: 'bash'
+    args:
+      - '-c'
+      - |
+        set -e
+
         cd /workspace/infrastructure
         terraform destroy -var-file=tfvars_tests/standard-gke-public.platform.tfvars -var=project_id=$PROJECT_ID \
         -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-cluster \
         -var=cluster_region=$_REGION -auto-approve -no-color
+
     allowFailure: true
-    waitFor: ['test jupyterhub']
+    waitFor: ['cleanup rag']
 
   - id: 'check result'
     name: 'gcr.io/$PROJECT_ID/terraform'
@@ -191,8 +335,32 @@ steps:
           echo "jupyterhub test failed"
           exit 1
         fi
-    waitFor: ['clean gke cluster']
+
+        if [[ $(cat /workspace/rag_tf_result.txt) != "pass" ]]; then
+          echo "rag tf failed"
+          exit 1
+        fi
+
+        if [[ $(cat /workspace/rag_ray_dashboard_result.txt) != "pass" ]]; then
+          echo "rag ray dashboard test failed"
+          exit 1
+        fi
+        
+        if [[ $(cat /workspace/rag_jupyterhub_test_result.txt) != "pass" ]]; then
+          echo "rag jupyterhub test failed"
+          exit 1
+        fi
+
+        if [[ $(cat /workspace/rag_frontend_result.txt) != "pass" ]]; then
+          echo "rag frontend test failed"
+          exit 1
+        fi
+
+    waitFor: ['cleanup gke cluster']
         
 substitutions:
   _REGION: us-central1
   _USER_NAME: github
+options:
+  substitutionOption: 'ALLOW_LOOSE'
+
diff --git a/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars b/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars
index 0c3680cfc..4046f4362 100644
--- a/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars
+++ b/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars
@@ -49,3 +49,36 @@ cpu_pools = [{
   disk_size_gb   = 100
   disk_type      = "pd-standard"
 }]
+
+## make sure required gpu quotas are available in the corresponding region
+enable_gpu = true
+gpu_pools = [{
+  name               = "gpu-pool-t4"
+  machine_type       = "n1-standard-16"
+  node_locations     = "us-central1-b,us-central1-c"
+  autoscaling        = true
+  min_count          = 1
+  max_count          = 3
+  disk_size_gb       = 100
+  enable_gcfs        = true
+  logging_variant    = "DEFAULT"
+  disk_type          = "pd-balanced"
+  accelerator_count  = 2
+  accelerator_type   = "nvidia-tesla-t4"
+  gpu_driver_version = "LATEST"
+},
+{
+    name               = "gpu-pool-l4"
+    machine_type       = "g2-standard-24"
+    node_locations     = "us-central1-a"
+    autoscaling        = true
+    min_count          = 2
+    max_count          = 3
+    accelerator_count  = 2
+    disk_size_gb       = 100
+    enable_gcfs        = true
+    logging_variant    = "DEFAULT"
+    disk_type          = "pd-balanced"
+    accelerator_type   = "nvidia-l4"
+    gpu_driver_version = "LATEST"
+}]
\ No newline at end of file
diff --git a/modules/iap/variables.tf b/modules/iap/variables.tf
index 613fefcb9..b12c761a1 100644
--- a/modules/iap/variables.tf
+++ b/modules/iap/variables.tf
@@ -137,8 +137,8 @@ variable "jupyter_k8s_backend_service_name" {
 
 variable "jupyter_k8s_backend_service_port" {
   type        = number
-  description = "NName of the Backend Service Port"
-  default     = 80
+  description = "Name of the Backend Service Port"
+  default = 80
 }
 
 variable "jupyter_url_domain_addr" {

From d8713479e2fb551dd93c7325590a2b2727ec56f2 Mon Sep 17 00:00:00 2001
From: zlq <zlq@google.com>
Date: Wed, 6 Mar 2024 01:21:20 -0800
Subject: [PATCH 6/8] Update instruction of IAP (#285)

Update README
---
 applications/rag/README.md | 53 +++++++++++++++++++++++++++++++++-----
 1 file changed, 46 insertions(+), 7 deletions(-)

diff --git a/applications/rag/README.md b/applications/rag/README.md
index 9876b9159..d585535c1 100644
--- a/applications/rag/README.md
+++ b/applications/rag/README.md
@@ -91,7 +91,11 @@ gcloud container clusters get-credentials ${CLUSTER_NAME:?} --location ${CLUSTER
 1. Verify Kuberay is setup: run `kubectl get pods -n ${NAMESPACE:?}`. There should be a Ray head (and Ray worker pod on GKE Standard only) in `Running` state (prefixed by `example-cluster-kuberay-head-` and `example-cluster-kuberay-worker-workergroup-`).
 
 2. Verify Jupyterhub service is setup:
-    * Fetch the service IP: `kubectl get services proxy-public -n ${NAMESPACE:?} --output jsonpath='{.status.loadBalancer.ingress[0].ip}'`
+    * Fetch the service IP/Domain:
+      * IAP disabled: `kubectl get services proxy-public -n $NAMESPACE --output jsonpath='{.status.loadBalancer.ingress[0].ip}'`
+      * IAP enabled: Read terraform output `jupyter_uri` or use command: `kubectl get managedcertificates jupyter-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'`
+          * Remember login [Google Cloud Platform IAP](https://pantheon.corp.google.com/security/iap) to check if user has role `IAP-secured Web App User`
+          * Wait for domain status to be `Active`
     * Go to the IP in a browser which should display the Jupyterlab login UI.
 
 3. Verify the instance `pgvector-instance` exists: `gcloud sql instances list | grep pgvector`
@@ -118,8 +122,16 @@ EOF
     * At the end of the smoke test with the TGI server, stop port forwarding by using Ctrl-C on the original terminal.
 
 5. Verify the frontend chat interface is setup:
-    * Verify the service exists: `kubectl get services rag-frontend -n ${NAMESPACE:?}`
-    * Verify the deployment exists: `kubectl get deployments rag-frontend -n ${NAMESPACE:?}` and ensure the deployment is in `READY` state.
+   * Verify the service exists: `kubectl get services rag-frontend -n ${NAMESPACE:?}`
+   * Verify the deployment exists: `kubectl get deployments rag-frontend -n ${NAMESPACE:?}` and ensure the deployment is in `READY` state.
+   * Verify the managed certificate is `Active`: 
+      ```
+     kubectl get managedcertificates frontend-managed-cert -n rag --output jsonpath='{.status.domainStatus[0].status}'
+      ```
+   * Verify IAP is enabled: 
+      ```
+      gcloud compute backend-services list --format="table(name, backends, iap.enabled)"
+      ```
 
 ### Vector Embeddings for Dataset
 
@@ -127,8 +139,11 @@ This step generates the vector embeddings for your input dataset. Currently, the
 
 1. Create a CloudSQL user to access the database: `gcloud sql users create rag-user-notebook --password=<choose a password> --instance=pgvector-instance --host=%`
 
-2. Go to the Jupyterhub service endpoint in a browser: `kubectl get services proxy-public -n ${NAMESPACE:?} --output jsonpath='{.status.loadBalancer.ingress[0].ip}'`
-
+2. Go to the Jupyterhub service endpoint in a browser:       
+   * IAP disable: `kubectl get services proxy-public -n $NAMESPACE --output jsonpath='{.status.loadBalancer.ingress[0].ip}'`
+   * IAP enabled: Read terraform output `jupyter_uri` or use commend: `kubectl get managedcertificates jupyter-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'`
+       * Remeber login GCP to check if user has role `IAP-secured Web App User`
+       * Waiting for domain status to be `Active`
 3. Login with placeholder credentials [TBD: replace with instructions for IAP]:
     * username: user
     * password: use `terraform output jupyter_password` to fetch the password value
@@ -152,11 +167,35 @@ This step generates the vector embeddings for your input dataset. Currently, the
 
 ### Launch the Frontend Chat Interface
 
-1. Setup port forwarding for the frontend [TBD: Replace with IAP]: `kubectl port-forward service/rag-frontend -n ${NAMESPACE:?} 8080:8080 &`
+#### Accessing the Frontend with IAP Disabled
+1. Setup port forwarding for the frontend: `kubectl port-forward service/rag-frontend -n $NAMESPACE 8080:8080 &`
 
 2. Go to `localhost:8080` in a browser & start chatting! This will fetch context related to your prompt from the vector embeddings in the `pgvector-instance`, augment the original prompt with the context & query the inference model (`mistral-7b`) with the augmented prompt.
 
-3. TODO: Add some example prompts for the dataset.
+#### Accessing the Frontend with IAP Enabled
+1. Verify IAP is Enabled
+
+   * Ensure that IAP is enabled on Google Cloud Platform (GCP) for your application. If you encounter any errors, try re-enabling IAP.
+
+2. Verify User Role
+
+   * Make sure you have the role `IAP-secured Web App User` assigned to your user account. This role is necessary to access the application through IAP.
+
+3. Verify Domain is Active
+   * Make sure the domain is active using commend:
+     `kubectl get managedcertificates frontend-managed-cert -n rag --output jsonpath='{.status.domainStatus[0].status}'`
+
+3. Retrieve the Domain
+
+   * Read terraform output `frontend_uri` or use the following command to find the domain created by IAP for accessing your service:
+     `kubectl get managedcertificates frontend-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'`
+
+4. Access the Frontend
+
+   * Open your browser and navigate to the domain you retrieved in the previous step to start chatting!
+
+#### Prompts Example
+3. [TODO: Add some example prompts for the dataset].
 
 ### Cleanup
 

From 13be33c63a2ff371c5f374bc50d9d338a40ef35d Mon Sep 17 00:00:00 2001
From: Umesh Kumhar <umeshkumhar@google.com>
Date: Wed, 6 Mar 2024 16:55:45 +0530
Subject: [PATCH 7/8] Update frontend dependency with cloudsql (#302)

update frontend dependency with cloudsql
---
 applications/rag/main.tf    | 22 +++++++++++-----------
 modules/cloudsql/outputs.tf |  7 ++++++-
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/applications/rag/main.tf b/applications/rag/main.tf
index 017f20a26..63eb89ac4 100644
--- a/applications/rag/main.tf
+++ b/applications/rag/main.tf
@@ -89,10 +89,10 @@ provider "helm" {
 }
 
 module "namespace" {
-  source = "../../modules/kubernetes-namespace"
-  providers              = { helm = helm.rag}
+  source           = "../../modules/kubernetes-namespace"
+  providers        = { helm = helm.rag }
   create_namespace = true
-  namespace = var.kubernetes_namespace
+  namespace        = var.kubernetes_namespace
 }
 
 module "kuberay-operator" {
@@ -115,12 +115,12 @@ module "gcs" {
 }
 
 module "cloudsql" {
-  source          = "../../modules/cloudsql"
-  providers       = { kubernetes = kubernetes.rag }
-  project_id      = var.project_id
-  instance_name   = var.cloudsql_instance
-  namespace       = var.kubernetes_namespace
-  depends_on      = [module.namespace]
+  source        = "../../modules/cloudsql"
+  providers     = { kubernetes = kubernetes.rag }
+  project_id    = var.project_id
+  instance_name = var.cloudsql_instance
+  namespace     = var.kubernetes_namespace
+  depends_on    = [module.namespace]
 }
 
 module "jupyterhub" {
@@ -200,7 +200,7 @@ module "frontend" {
   google_service_account        = var.rag_service_account
   namespace                     = var.kubernetes_namespace
   inference_service_endpoint    = module.inference-server.inference_service_endpoint
-  cloudsql_instance             = var.cloudsql_instance
+  cloudsql_instance             = module.cloudsql.instance
   db_secret_name                = module.cloudsql.db_secret_name
   db_secret_namespace           = module.cloudsql.db_secret_namespace
   dataset_embeddings_table_name = var.dataset_embeddings_table_name
@@ -220,5 +220,5 @@ module "frontend" {
   url_domain_addr          = var.frontend_url_domain_addr
   url_domain_name          = var.frontend_url_domain_name
   members_allowlist        = var.frontend_members_allowlist
-  depends_on               = [ module.namespace ]
+  depends_on               = [module.namespace]
 }
diff --git a/modules/cloudsql/outputs.tf b/modules/cloudsql/outputs.tf
index f4010b142..cd8e2d1fb 100644
--- a/modules/cloudsql/outputs.tf
+++ b/modules/cloudsql/outputs.tf
@@ -20,4 +20,9 @@ output "db_secret_name" {
 output "db_secret_namespace" {
   description = "Cloud SQL DB secret namespace"
   value       = kubernetes_secret.secret.metadata[0].namespace
-}
\ No newline at end of file
+}
+
+output "instance" {
+  description = "Cloud SQL Instance name"
+  value       = google_sql_database_instance.main.name
+}

From ffd5a195796823c2ce9afc45894ef81a916940f5 Mon Sep 17 00:00:00 2001
From: Umesh Kumhar <umeshkumhar@google.com>
Date: Wed, 6 Mar 2024 18:14:48 +0530
Subject: [PATCH 8/8] Add github actions for tf lint check (#296)

Add github actions for tf lint check
---
 .github/workflows/ci.yaml                     | 35 +++++++++++++++++++
 .../workloads-without-iap.example.tfvars      |  8 ++---
 applications/rag/variables.tf                 |  4 +--
 applications/rag/workloads.tfvars             |  2 +-
 .../user/modules/service_accounts/versions.tf |  2 +-
 applications/ray/versions.tf                  |  4 +--
 .../sample-terraform.tfvars                   |  4 +--
 .../standard-gke-public.platform.tfvars       |  4 +--
 modules/iap/iap.tf                            |  4 +--
 modules/iap/variables.tf                      |  2 +-
 modules/kuberay-monitoring/main.tf            |  2 +-
 tutorials/hf-tgi/outputs.tf                   |  2 +-
 12 files changed, 54 insertions(+), 19 deletions(-)
 create mode 100644 .github/workflows/ci.yaml

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
new file mode 100644
index 000000000..3188efcec
--- /dev/null
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,35 @@
+name: Terraform CI
+on: 
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+jobs:
+  Terraform-Lint-Check:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: hashicorp/setup-terraform@v3
+        with:
+          terraform_version: "1.5.7"
+
+      - name: Terraform fmt
+        id: fmt
+        run: terraform fmt -check -recursive
+
+      - name: Terraform Init
+        id: init
+        run: |
+          terraform -chdir=applications/rag init 
+          terraform -chdir=applications/ray init 
+          terraform -chdir=applications/jupyter init 
+
+      - name: Terraform Validate
+        id: validate
+        run: |
+          terraform -chdir=applications/rag validate -no-color
+          terraform -chdir=applications/ray validate -no-color
+          terraform -chdir=applications/jupyter validate -no-color
+
diff --git a/applications/jupyter/workloads-without-iap.example.tfvars b/applications/jupyter/workloads-without-iap.example.tfvars
index ccf033cf2..e048ac674 100644
--- a/applications/jupyter/workloads-without-iap.example.tfvars
+++ b/applications/jupyter/workloads-without-iap.example.tfvars
@@ -26,10 +26,10 @@ cluster_membership_id = "" # required only for private clusters, default: cluste
 #######################################################
 
 ## JupyterHub variables
-namespace                            = "jupyter"
-gcs_bucket                           = "<gcs-bucket>"
-create_gcs_bucket                    = true
-workload_identity_service_account    = "jupyter-service-account"
+namespace                         = "jupyter"
+gcs_bucket                        = "<gcs-bucket>"
+create_gcs_bucket                 = true
+workload_identity_service_account = "jupyter-service-account"
 
 # Jupyterhub without IAP
 add_auth = false
diff --git a/applications/rag/variables.tf b/applications/rag/variables.tf
index ca86bcf04..a5e77ff78 100644
--- a/applications/rag/variables.tf
+++ b/applications/rag/variables.tf
@@ -265,9 +265,9 @@ variable "autopilot_cluster" {
 }
 
 variable "cloudsql_instance" {
-  type    = string
+  type        = string
   description = "Name of the CloudSQL instance for RAG VectorDB"
-  default = "pgvector-instance"
+  default     = "pgvector-instance"
 }
 
 variable "cpu_pools" {
diff --git a/applications/rag/workloads.tfvars b/applications/rag/workloads.tfvars
index aba62feae..dca101637 100644
--- a/applications/rag/workloads.tfvars
+++ b/applications/rag/workloads.tfvars
@@ -38,7 +38,7 @@ rag_service_account        = "rag-system-account"
 
 # Creates a google service account & k8s service account & configures workload identity with appropriate permissions.
 # Set to false & update the variable `jupyter_service_account` to use an existing IAM service account.
-jupyter_service_account        = "jupyter-system-account"
+jupyter_service_account = "jupyter-system-account"
 
 ## Embeddings table name - change this to the TABLE_NAME used in the notebook.
 dataset_embeddings_table_name = "googlemaps_reviews_db"
diff --git a/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/service_accounts/versions.tf b/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/service_accounts/versions.tf
index 436ce51c2..53d5c8e95 100644
--- a/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/service_accounts/versions.tf
+++ b/applications/ray/raytrain-examples/raytrain-with-gcsfusecsi/kuberaytf/user/modules/service_accounts/versions.tf
@@ -15,7 +15,7 @@
 terraform {
   required_providers {
     google = {
-      source  = "hashicorp/google"
+      source = "hashicorp/google"
     }
     kubernetes = {
       source  = "hashicorp/kubernetes"
diff --git a/applications/ray/versions.tf b/applications/ray/versions.tf
index a8a0a268a..b8e6f2c71 100644
--- a/applications/ray/versions.tf
+++ b/applications/ray/versions.tf
@@ -15,10 +15,10 @@
 terraform {
   required_providers {
     google = {
-      source  = "hashicorp/google"
+      source = "hashicorp/google"
     }
     google-beta = {
-      source  = "hashicorp/google-beta"
+      source = "hashicorp/google-beta"
     }
     helm = {
       source  = "hashicorp/helm"
diff --git a/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars b/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars
index ff2e8cd05..dcd6739b4 100644
--- a/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars
+++ b/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars
@@ -21,5 +21,5 @@ tokenizer                  = "tiiuae/falcon-7b"
 # Benchmark configuration for triggering single test via Locust Runner
 test_duration = 60
 # Increase test_users to allow more parallelism (especially when testing HPA)
-test_users    = 1
-test_rate     = 5
+test_users = 1
+test_rate  = 5
diff --git a/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars b/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars
index 4046f4362..86d951569 100644
--- a/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars
+++ b/infrastructure/tfvars_tests/standard-gke-public.platform.tfvars
@@ -66,8 +66,8 @@ gpu_pools = [{
   accelerator_count  = 2
   accelerator_type   = "nvidia-tesla-t4"
   gpu_driver_version = "LATEST"
-},
-{
+  },
+  {
     name               = "gpu-pool-l4"
     machine_type       = "g2-standard-24"
     node_locations     = "us-central1-a"
diff --git a/modules/iap/iap.tf b/modules/iap/iap.tf
index 097a1f387..c9344ae18 100644
--- a/modules/iap/iap.tf
+++ b/modules/iap/iap.tf
@@ -36,7 +36,7 @@ resource "helm_release" "iap_jupyter" {
   name             = "iap-jupyter"
   chart            = "${path.module}/charts/iap/"
   namespace        = var.namespace
-  create_namespace = true 
+  create_namespace = true
   # timeout increased to support autopilot scaling resources, and give enough time to complete the deployment 
   timeout = 1200
   set {
@@ -108,7 +108,7 @@ resource "helm_release" "iap_frontend" {
   name             = "iap-frontend"
   chart            = "${path.module}/charts/iap/"
   namespace        = var.namespace
-  create_namespace = true 
+  create_namespace = true
   # timeout increased to support autopilot scaling resources, and give enough time to complete the deployment
   timeout = 1200
   set {
diff --git a/modules/iap/variables.tf b/modules/iap/variables.tf
index b12c761a1..af09d87c2 100644
--- a/modules/iap/variables.tf
+++ b/modules/iap/variables.tf
@@ -138,7 +138,7 @@ variable "jupyter_k8s_backend_service_name" {
 variable "jupyter_k8s_backend_service_port" {
   type        = number
   description = "Name of the Backend Service Port"
-  default = 80
+  default     = 80
 }
 
 variable "jupyter_url_domain_addr" {
diff --git a/modules/kuberay-monitoring/main.tf b/modules/kuberay-monitoring/main.tf
index 46e627058..8a320ec81 100644
--- a/modules/kuberay-monitoring/main.tf
+++ b/modules/kuberay-monitoring/main.tf
@@ -47,7 +47,7 @@ resource "helm_release" "grafana" {
 }
 
 data "kubernetes_service" "example" {
-  count       = var.enable_grafana_on_ray_dashboard ? 1 : 0
+  count = var.enable_grafana_on_ray_dashboard ? 1 : 0
   metadata {
     name      = "grafana"
     namespace = var.namespace
diff --git a/tutorials/hf-tgi/outputs.tf b/tutorials/hf-tgi/outputs.tf
index 3816613c4..7078bac0d 100644
--- a/tutorials/hf-tgi/outputs.tf
+++ b/tutorials/hf-tgi/outputs.tf
@@ -24,5 +24,5 @@ output "inference_service_namespace" {
 
 output "inference_service_endpoint" {
   description = "Endpoint of model inference service"
-  value = kubernetes_service.inference_service.status != null ? (kubernetes_service.inference_service.status[0].load_balancer != null ? "${kubernetes_service.inference_service.status[0].load_balancer[0].ingress[0].ip}" : "") : ""
+  value       = kubernetes_service.inference_service.status != null ? (kubernetes_service.inference_service.status[0].load_balancer != null ? "${kubernetes_service.inference_service.status[0].load_balancer[0].ingress[0].ip}" : "") : ""
 }