From 775407b7b7bda316494a43bd9d3aa25028847615 Mon Sep 17 00:00:00 2001 From: imreddy13 <132504814+imreddy13@users.noreply.github.com> Date: Thu, 7 Mar 2024 13:13:17 -0800 Subject: [PATCH] Cleanup Cloud SQL DB creds for notebook and frontend (#295) Cleanup Cloud SQL DB creds 1) Use a k8s secret for DB creds 2) Mount k8s secret in volumes for Ray & frontend pods 3) Read the mounted secret volume from the Ray job script in the notebook and from the frontend container 4) Remove unecessesary DB cred variables from the notebook 5) Fix import bug with NLP/DLP --- applications/rag/README.md | 50 +++++++++---------- .../rag-kaggle-ray-sql-latest.ipynb | 42 ++++++++++------ applications/rag/frontend/container/main.py | 10 +++- .../rag/frontend/container/rai/dlp_filter.py | 2 +- .../rag/frontend/container/rai/nlp_filter.py | 2 +- applications/rag/frontend/main.tf | 45 ++++++----------- applications/rag/frontend/variables.tf | 8 +-- applications/rag/main.tf | 4 +- applications/rag/variables.tf | 6 +++ applications/rag/workloads.tfvars | 3 +- .../kuberay-autopilot-values.yaml | 41 ++++++++++++--- .../kuberay-cluster/kuberay-gpu-values.yaml | 33 ++++++++++-- .../kuberay-cluster/kuberay-tpu-values.yaml | 41 ++++++++++++--- modules/kuberay-cluster/kuberay-values.yaml | 26 ++++++++-- modules/kuberay-cluster/kuberay.tf | 12 +++++ modules/kuberay-cluster/variables.tf | 12 +++++ 16 files changed, 234 insertions(+), 103 deletions(-) diff --git a/applications/rag/README.md b/applications/rag/README.md index 8c904a76f..975babe68 100644 --- a/applications/rag/README.md +++ b/applications/rag/README.md @@ -93,8 +93,8 @@ gcloud container clusters get-credentials ${CLUSTER_NAME:?} --location ${CLUSTER 2. Verify Jupyterhub service is setup: * Fetch the service IP/Domain: * IAP disabled: `kubectl get services proxy-public -n $NAMESPACE --output jsonpath='{.status.loadBalancer.ingress[0].ip}'` - * IAP enabled: Read terraform output `jupyter_uri` or use command: `kubectl get managedcertificates jupyter-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'` - * Remember login [Google Cloud Platform IAP](https://pantheon.corp.google.com/security/iap) to check if user has role `IAP-secured Web App User` + * IAP enabled: Read terraform output: `terraform output jupyter_uri` or use command: `kubectl get managedcertificates jupyter-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'` + * From [Google Cloud Platform IAP](https://pantheon.corp.google.com/security/iap), check if the allowlisted user has role `IAP-secured Web App User` * Wait for domain status to be `Active` * Go to the IP in a browser which should display the Jupyterlab login UI. @@ -137,37 +137,35 @@ EOF ### Vector Embeddings for Dataset -Choose a password for your CloudSQL user: -``` -SQL_PASSWORD= -``` - This step generates the vector embeddings for your input dataset. Currently, the default dataset is [Google Maps Restaurant Reviews](https://www.kaggle.com/datasets/denizbilginn/google-maps-restaurant-reviews). We will use a Jupyter notebook to run a Ray job that generates the embeddings & populates them into the instance `pgvector-instance` created above. -1. Create a CloudSQL user to access the database: `gcloud sql users create rag-user-notebook --password=${SQL_PASSWORD:?} --instance=pgvector-instance --host=%` - -2. Go to the Jupyterhub service endpoint in a browser: +1. Fetch the Jupyterhub service endpoint & navigate to it in a browser. This should display the JupyterLab login UI: * IAP disabled: `kubectl get services proxy-public -n $NAMESPACE --output jsonpath='{.status.loadBalancer.ingress[0].ip}'` - * IAP enabled: Read terraform output `jupyter_uri` or use command: `kubectl get managedcertificates jupyter-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'` - * Open Google Cloud Console IAM to verify that the user has role `IAP-secured Web App User` + * IAP enabled: Read terraform output: `terraform output jupyter_uri` or use command: `kubectl get managedcertificates jupyter-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'` + * From [Google Cloud Platform IAP](https://pantheon.corp.google.com/security/iap), check if the allowlisted user has role `IAP-secured Web App User`. * Wait for the domain status to be `Active` -3. Login with placeholder credentials [TBD: replace with instructions for IAP]: - * username: user - * password: use `terraform output jupyter_password` to fetch the password value -4. Once logged in, choose the `CPU` preset. Go to File -> Open From URL & upload the notebook `rag-kaggle-ray-sql.ipynb` from `https://raw.githubusercontent.com/GoogleCloudPlatform/ai-on-gke/main/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb`. This path can also be found by going to the [notebook location](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb) and selecting `Raw`. +2. Login to Jupyterhub: + * IAP disabled: Use placeholder credentials: + * username: user + * password: use `terraform output jupyter_password` to fetch the password value + * IAP enabled: Login with your Google credentials. + +3. Once logged in, choose the `CPU` preset. Go to File -> Open From URL & upload the notebook `rag-kaggle-ray-sql.ipynb` from `https://raw.githubusercontent.com/GoogleCloudPlatform/ai-on-gke/main/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb`. This path can also be found by going to the [notebook location](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb) and selecting `Raw`. + +4. Create a Kaggle account and navigate to https://www.kaggle.com/settings/account and generate an API token. See https://www.kaggle.com/docs/api#authentication how to create one from https://kaggle.com/settings. This token is used in the notebook to access the [Google Maps Restaurant Reviews dataset](https://www.kaggle.com/datasets/denizbilginn/google-maps-restaurant-reviews) -5. Replace the variables in the 3rd cell with the following to access the database: - * `INSTANCE_CONNECTION_NAME`: `::pgvector-instance` - * `DB_USER`: `rag-user-notebook` - * `DB_PASS`: password from step 1 +5. Replace the variables in the 1st cell with your Kaggle credentials (can be found in the `kaggle.json` file created by Step 4): + * `KAGGLE_USERNAME` + * `KAGGLE_KEY` -6. Create a Kaggle account and navigate to https://www.kaggle.com/settings/account and generate an API token. See https://www.kaggle.com/docs/api#authentication how to create one from https://kaggle.com/settings. This token is used in the notebook to access the [Google Maps Restaurant Reviews dataset](https://www.kaggle.com/datasets/denizbilginn/google-maps-restaurant-reviews) +6. Run all the cells in the notebook. This generates vector embeddings for the input dataset (`denizbilginn/google-maps-restaurant-reviews`) and stores them in the `pgvector-instance` via a Ray job. + * When the last cell says the job has succeeded (eg: `Job 'raysubmit_APungAw6TyB55qxk' succeeded`), the vector embeddings have been generated and we can launch the frontend chat interface. + * Ray may take several minutes to create the runtime environment. During this time, the job will appear to be missing (e.g. `Status message: Job has not started yet`). -8. Replace the kaggle username and api token in 2nd cell with your credentials (can be found in the `kaggle.json` file created by Step 6): - * `os.environ['KAGGLE_USERNAME']` - * `os.environ['KAGGLE_KEY']` +### Launch the Frontend Chat Interface +1. Setup port forwarding for the frontend [TBD: Replace with IAP]: `kubectl port-forward service/rag-frontend -n ${NAMESPACE:?} 8080:8080 &` 9. Run all the cells in the notebook. This will generate vector embeddings for the input dataset (`denizbilginn/google-maps-restaurant-reviews`) and store them in the `pgvector-instance` via a Ray job. * If the Ray job has FAILED, re-run the cell. * When the Ray job has SUCCEEDED, we are ready to launch the frontend chat interface. @@ -181,10 +179,10 @@ This step generates the vector embeddings for your input dataset. Currently, the #### With IAP Enabled 1. Verify that IAP is enabled on Google Cloud Platform (GCP) for your application. If you encounter any errors, try re-enabling IAP. -2. Verify that you have the role `IAP-secured Web App User` assigned to your user account. This role is necessary to access the application through IAP. +2. From [Google Cloud Platform IAP](https://pantheon.corp.google.com/security/iap), check if the allowlisted user has role `IAP-secured Web App User`. This role is necessary to access the application through IAP. 3. Verify the domain is active using command: `kubectl get managedcertificates frontend-managed-cert -n rag --output jsonpath='{.status.domainStatus[0].status}'` -3. Read terraform output `frontend_uri` or use the following command to find the domain created by IAP for accessing your service: +3. Read terraform output: `terraform output frontend_uri` or use the following command to find the domain created by IAP for accessing your service: `kubectl get managedcertificates frontend-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'` 4. Open your browser and navigate to the domain you retrieved in the previous step to start chatting! diff --git a/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb b/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb index 3570e7db3..f4e7ce9fd 100644 --- a/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb +++ b/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb @@ -2,7 +2,20 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, + "id": "00b1aff4", + "metadata": {}, + "outputs": [], + "source": [ + "# Replace these with your settings\n", + "# Navigate to https://www.kaggle.com/settings/account and generate an API token to be used to setup the env variable. See https://www.kaggle.com/docs/api#authentication how to create one.\n", + "KAGGLE_USERNAME = \"\"\n", + "KAGGLE_KEY = \"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "a814e91b-3afe-4c28-a3d6-fe087c7af552", "metadata": {}, "outputs": [], @@ -13,15 +26,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "1e26faef-9e2e-4793-b8af-0e18470b482d", "metadata": {}, "outputs": [], "source": [ "import os\n", - "# navigate to https://www.kaggle.com/settings/account and generate an API token to be used to setup the env variable. See https://www.kaggle.com/docs/api#authentication how to create one.\n", - "os.environ['KAGGLE_USERNAME'] = \"\"\n", - "os.environ['KAGGLE_KEY'] = \"\"\n", + "os.environ['KAGGLE_USERNAME'] = KAGGLE_USERNAME\n", + "os.environ['KAGGLE_KEY'] = KAGGLE_KEY\n", "\n", "# Download the zip file to local storage and then extract the desired contents directly to the GKE GCS CSI mounted bucket. The bucket is mounted at the \"/persist-data\" path in the jupyter pod.\n", "!kaggle datasets download -d denizbilginn/google-maps-restaurant-reviews -p ~/data --force\n", @@ -64,12 +76,18 @@ "import sqlalchemy\n", "\n", "# initialize parameters\n", - "INSTANCE_CONNECTION_NAME = \"::pgvector-instance\" # Modify the project and region based on your setting\n", + "INSTANCE_CONNECTION_NAME = \"{project}:{region}:pgvector-instance\".format(project=os.environ[\"PROJECT_ID\"], region=os.environ[\"DB_REGION\"])\n", "print(f\"Your instance connection name is: {INSTANCE_CONNECTION_NAME}\")\n", - "DB_USER = \"rag-user-notebook\" # Modify this based on your setting\n", - "DB_PASS = \"\" # Modify this based on your setting\n", "DB_NAME = \"pgvector-database\"\n", "\n", + "db_username_file = open(\"/etc/secret-volume/username\", \"r\")\n", + "DB_USER = db_username_file.read()\n", + "db_username_file.close()\n", + "\n", + "db_password_file = open(\"/etc/secret-volume/password\", \"r\")\n", + "DB_PASS = db_password_file.read()\n", + "db_password_file.close()\n", + "\n", "# initialize Connector object\n", "connector = Connector()\n", "\n", @@ -189,12 +207,6 @@ " # commit transaction (SQLAlchemy v2.X.X is commit as you go)\n", " db_conn.commit()\n", " print(\"Created table=\", TABLE_NAME)\n", - "\n", - " # TODO: Fix workaround access grant for the frontend to access the table.\n", - " grant_access_stmt = \"GRANT SELECT on \" + TABLE_NAME + \" to \\\"rag-user\\\";\"\n", - " db_conn.execute(\n", - " sqlalchemy.text(grant_access_stmt)\n", - " )\n", " \n", " query_text = \"INSERT INTO \" + TABLE_NAME + \" (id, text, text_embedding) VALUES (:id, :text, :text_embedding)\"\n", " insert_stmt = sqlalchemy.text(query_text)\n", @@ -268,7 +280,7 @@ " \"cloud-sql-python-connector[pg8000]==1.7.0\",\n", " \"SQLAlchemy==2.0.7\",\n", " \"huggingface_hub\",\n", - " ]\n", + " ],\n", " }\n", ")\n", "\n", diff --git a/applications/rag/frontend/container/main.py b/applications/rag/frontend/container/main.py index f62d5fc3a..7f433b841 100644 --- a/applications/rag/frontend/container/main.py +++ b/applications/rag/frontend/container/main.py @@ -41,8 +41,14 @@ # initialize parameters INFERENCE_ENDPOINT=os.environ.get('INFERENCE_ENDPOINT', '127.0.0.1:8081') INSTANCE_CONNECTION_NAME = os.environ.get('INSTANCE_CONNECTION_NAME', '') -DB_USER = os.environ.get('DB_USER', '') -DB_PASS = os.environ.get('DB_PASSWORD', '') + +db_username_file = open("/etc/secret-volume/username", "r") +DB_USER = db_username_file.read() +db_username_file.close() + +db_password_file = open("/etc/secret-volume/password", "r") +DB_PASS = db_password_file.read() +db_password_file.close() db = None filter_names = ['DlpFilter', 'WebRiskFilter'] diff --git a/applications/rag/frontend/container/rai/dlp_filter.py b/applications/rag/frontend/container/rai/dlp_filter.py index c9c69409a..c7e24d206 100644 --- a/applications/rag/frontend/container/rai/dlp_filter.py +++ b/applications/rag/frontend/container/rai/dlp_filter.py @@ -14,7 +14,7 @@ import os import google.cloud.dlp -from .retry import retry +from . import retry # Convert the project id into a full resource id. parent = os.environ.get('PROJECT_ID', 'NULL') diff --git a/applications/rag/frontend/container/rai/nlp_filter.py b/applications/rag/frontend/container/rai/nlp_filter.py index 4109c8b45..2092293a2 100644 --- a/applications/rag/frontend/container/rai/nlp_filter.py +++ b/applications/rag/frontend/container/rai/nlp_filter.py @@ -14,7 +14,7 @@ import os import google.cloud.language as language -from .retry import retry +from . import retry # Convert the project id into a full resource id. parent = os.environ.get('PROJECT_ID', 'NULL') diff --git a/applications/rag/frontend/main.tf b/applications/rag/frontend/main.tf index e82cc1f76..302c45684 100644 --- a/applications/rag/frontend/main.tf +++ b/applications/rag/frontend/main.tf @@ -99,13 +99,19 @@ resource "kubernetes_deployment" "rag_frontend_deployment" { spec { service_account_name = var.google_service_account container { - image = "us-central1-docker.pkg.dev/ai-on-gke/rag-on-gke/frontend@sha256:e2dd85e92f42e3684455a316dee5f98f61f1f3fba80b9368bd6f48d5e2e3475e" + image = "us-central1-docker.pkg.dev/ai-on-gke/rag-on-gke/frontend@sha256:3d3b03e4bc6c8fe218105bd69cc6f9cfafb18fc4b1bbb81f5c46f2598b5d5f10" name = "rag-frontend" port { container_port = 8080 } + volume_mount { + name = "secret-volume" + mount_path = "/etc/secret-volume" + read_only = true + } + env { name = "PROJECT_ID" value = "projects/${var.project_id}" @@ -126,36 +132,6 @@ resource "kubernetes_deployment" "rag_frontend_deployment" { value = var.dataset_embeddings_table_name } - env { - name = "DB_USER" - value_from { - secret_key_ref { - name = var.db_secret_name - key = "username" - } - } - } - - env { - name = "DB_PASSWORD" - value_from { - secret_key_ref { - name = var.db_secret_name - key = "password" - } - } - } - - env { - name = "DB_NAME" - value_from { - secret_key_ref { - name = var.db_secret_name - key = "database" - } - } - } - resources { limits = { cpu = "3" @@ -170,6 +146,13 @@ resource "kubernetes_deployment" "rag_frontend_deployment" { } } + volume { + secret { + secret_name = var.db_secret_name + } + name = "secret-volume" + } + container { image = "gcr.io/cloud-sql-connectors/cloud-sql-proxy:2.8.0" name = "cloud-sql-proxy" diff --git a/applications/rag/frontend/variables.tf b/applications/rag/frontend/variables.tf index 89dbb6729..0fa73dc47 100644 --- a/applications/rag/frontend/variables.tf +++ b/applications/rag/frontend/variables.tf @@ -37,13 +37,7 @@ variable "cloudsql_instance" { variable "db_secret_name" { type = string - description = "CloudSQL user" -} - -variable "db_secret_namespace" { - type = string - description = "CloudSQL password" - default = "rag" + description = "CloudSQL user credentials" } variable "dataset_embeddings_table_name" { diff --git a/applications/rag/main.tf b/applications/rag/main.tf index ff6ab1f60..514a5a2a6 100644 --- a/applications/rag/main.tf +++ b/applications/rag/main.tf @@ -120,6 +120,7 @@ module "cloudsql" { project_id = var.project_id instance_name = var.cloudsql_instance namespace = var.kubernetes_namespace + region = var.cloudsql_instance_region depends_on = [module.namespace] } @@ -179,6 +180,8 @@ module "kuberay-cluster" { enable_tpu = local.enable_tpu autopilot_cluster = local.enable_autopilot google_service_account = var.ray_service_account + db_secret_name = module.cloudsql.db_secret_name + db_region = var.cloudsql_instance_region grafana_host = module.kuberay-monitoring.grafana_uri depends_on = [module.kuberay-operator] } @@ -212,7 +215,6 @@ module "frontend" { inference_service_endpoint = module.inference-server.inference_service_endpoint cloudsql_instance = module.cloudsql.instance db_secret_name = module.cloudsql.db_secret_name - db_secret_namespace = module.cloudsql.db_secret_namespace dataset_embeddings_table_name = var.dataset_embeddings_table_name # IAP Auth parameters diff --git a/applications/rag/variables.tf b/applications/rag/variables.tf index db4389322..72a925a58 100644 --- a/applications/rag/variables.tf +++ b/applications/rag/variables.tf @@ -270,6 +270,12 @@ variable "cloudsql_instance" { default = "pgvector-instance" } +variable "cloudsql_instance_region" { + type = string + description = "GCP region for CloudSQL instance" + default = "us-central1" +} + variable "cpu_pools" { type = list(object({ name = string diff --git a/applications/rag/workloads.tfvars b/applications/rag/workloads.tfvars index dca101637..1b57f5ddd 100644 --- a/applications/rag/workloads.tfvars +++ b/applications/rag/workloads.tfvars @@ -24,7 +24,8 @@ kubernetes_namespace = "rag" create_gcs_bucket = true gcs_bucket = "rag-data-xyzu" # Choose a globally unique bucket name. -cloudsql_instance = "pgvector-instance" +cloudsql_instance = "pgvector-instance" +cloudsql_instance_region = "us-central1" ## Service accounts # Creates a google service account & k8s service account & configures workload identity with appropriate permissions. # Set to false & update the variable `ray_service_account` to use an existing IAM service account. diff --git a/modules/kuberay-cluster/kuberay-autopilot-values.yaml b/modules/kuberay-cluster/kuberay-autopilot-values.yaml index 1f2e5e83c..b2df8a24d 100644 --- a/modules/kuberay-cluster/kuberay-autopilot-values.yaml +++ b/modules/kuberay-cluster/kuberay-autopilot-values.yaml @@ -72,6 +72,10 @@ head: value: http://grafana:80 - name: RAY_PROMETHEUS_HOST value: http://frontend:9090 + - name: PROJECT_ID + value: ${project_id} + - name: DB_REGION + value: ${db_region} envFrom: [] # - secretRef: # name: my-env-secret @@ -123,12 +127,19 @@ head: volumeAttributes: bucketName: ${gcs_bucket} mountOptions: "implicit-dirs,uid=1000,gid=100" + - name: secret-volume + secret: + secretName: ${secret_name} + optional: true # Ray writes logs to /tmp/ray/session_latests/logs volumeMounts: - mountPath: /tmp/ray name: ray-logs - name: gcs-fuse-csi-ephemeral mountPath: /data + - name: secret-volume + mountPath: /etc/secret-volume + readOnly: true # sidecarContainers specifies additional containers to attach to the Ray pod. # Follows standard K8s container spec. sidecarContainers: @@ -170,9 +181,11 @@ additionalWorkerGroups: block: 'true' # containerEnv specifies environment variables for the Ray container, # Follows standard K8s container env schema. - containerEnv: [] - # - name: EXAMPLE_ENV - # value: "1" + containerEnv: + - name: PROJECT_ID + value: ${project_id} + - name: DB_REGION + value: ${db_region} envFrom: [] # - secretRef: # name: my-env-secret @@ -223,6 +236,10 @@ additionalWorkerGroups: volumeAttributes: bucketName: ${gcs_bucket} mountOptions: "implicit-dirs,uid=1000,gid=100" + - name: secret-volume + secret: + secretName: ${secret_name} + optional: true # Ray writes logs to /tmp/ray/session_latests/logs volumeMounts: @@ -230,6 +247,9 @@ additionalWorkerGroups: name: ray-logs - name: gcs-fuse-csi-ephemeral mountPath: /data + - name: secret-volume + mountPath: /etc/secret-volume + readOnly: true # sidecarContainers specifies additional containers to attach to the Ray pod. # Follows standard K8s container spec. sidecarContainers: @@ -264,9 +284,11 @@ additionalWorkerGroups: block: 'true' # containerEnv specifies environment variables for the Ray container, # Follows standard K8s container env schema. - containerEnv: [] - # - name: EXAMPLE_ENV - # value: "1" + containerEnv: + - name: PROJECT_ID + value: ${project_id} + - name: DB_REGION + value: ${db_region} envFrom: [] # - secretRef: # name: my-env-secret @@ -319,12 +341,19 @@ additionalWorkerGroups: volumeAttributes: bucketName: ${gcs_bucket} mountOptions: "implicit-dirs,uid=1000,gid=100" + - name: secret-volume + secret: + secretName: ${secret_name} + optional: true # Ray writes logs to /tmp/ray/session_latests/logs volumeMounts: - mountPath: /tmp/ray name: ray-logs - name: gcs-fuse-csi-ephemeral mountPath: /data + - name: secret-volume + mountPath: /etc/secret-volume + readOnly: true # sidecarContainers specifies additional containers to attach to the Ray pod. # Follows standard K8s container spec. diff --git a/modules/kuberay-cluster/kuberay-gpu-values.yaml b/modules/kuberay-cluster/kuberay-gpu-values.yaml index 57e96225a..4c2eec213 100644 --- a/modules/kuberay-cluster/kuberay-gpu-values.yaml +++ b/modules/kuberay-cluster/kuberay-gpu-values.yaml @@ -68,6 +68,10 @@ head: value: http://grafana:80 - name: RAY_PROMETHEUS_HOST value: http://frontend:9090 + - name: PROJECT_ID + value: ${project_id} + - name: DB_REGION + value: ${db_region} envFrom: [] # - secretRef: # name: my-env-secret @@ -119,12 +123,19 @@ head: volumeAttributes: bucketName: ${gcs_bucket} mountOptions: "implicit-dirs,uid=1000,gid=100" + - name: secret-volume + secret: + secretName: ${secret_name} + optional: true # Ray writes logs to /tmp/ray/session_latests/logs volumeMounts: - mountPath: /tmp/ray name: ray-logs - name: gcs-fuse-csi-ephemeral mountPath: /data + - name: secret-volume + mountPath: /etc/secret-volume + readOnly: true # sidecarContainers specifies additional containers to attach to the Ray pod. # Follows standard K8s container spec. sidecarContainers: @@ -163,9 +174,11 @@ worker: initContainerSecurityContext: {} # containerEnv specifies environment variables for the Ray container, # Follows standard K8s container env schema. - containerEnv: [] - # - name: EXAMPLE_ENV - # value: "1" + containerEnv: + - name: PROJECT_ID + value: ${project_id} + - name: DB_REGION + value: ${db_region} envFrom: [] # - secretRef: # name: my-env-secret @@ -216,12 +229,19 @@ worker: volumeAttributes: bucketName: ${gcs_bucket} mountOptions: "implicit-dirs,uid=1000,gid=100" + - name: secret-volume + secret: + secretName: ${secret_name} + optional: true # Ray writes logs to /tmp/ray/session_latests/logs volumeMounts: - mountPath: /tmp/ray name: ray-logs - name: gcs-fuse-csi-ephemeral mountPath: /data + - name: secret-volume + mountPath: /etc/secret-volume + readOnly: true # sidecarContainers specifies additional containers to attach to the Ray pod. # Follows standard K8s container spec. sidecarContainers: @@ -298,10 +318,17 @@ additionalWorkerGroups: - name: fluentbit-config configMap: name: fluentbit-config + - name: secret-volume + secret: + secretName: ${secret_name} + optional: true # Ray writes logs to /tmp/ray/session_latests/logs volumeMounts: - mountPath: /tmp/ray name: ray-logs + - name: secret-volume + mountPath: /etc/secret-volume + readOnly: true # sidecarContainers specifies additional containers to attach to the Ray pod. # Follows standard K8s container spec. sidecarContainers: diff --git a/modules/kuberay-cluster/kuberay-tpu-values.yaml b/modules/kuberay-cluster/kuberay-tpu-values.yaml index 61c614ecd..a52130938 100644 --- a/modules/kuberay-cluster/kuberay-tpu-values.yaml +++ b/modules/kuberay-cluster/kuberay-tpu-values.yaml @@ -67,6 +67,10 @@ head: value: http://grafana:80 - name: RAY_PROMETHEUS_HOST value: http://frontend:9090 + - name: PROJECT_ID + value: ${project_id} + - name: DB_REGION + value: ${db_region} ports: - containerPort: 6379 name: gcs @@ -110,10 +114,17 @@ head: - name: fluentbit-config configMap: name: fluentbit-config + - name: secret-volume + secret: + secretName: ${secret_name} + optional: true # Ray writes logs to /tmp/ray/session_latests/logs volumeMounts: - mountPath: /tmp/ray name: ray-logs + - name: secret-volume + mountPath: /etc/secret-volume + readOnly: true # sidecarContainers specifies additional containers to attach to the Ray pod. # Follows standard K8s container spec. sidecarContainers: @@ -153,9 +164,11 @@ worker: initContainerSecurityContext: {} # containerEnv specifies environment variables for the Ray container, # Follows standard K8s container env schema. - containerEnv: [] - # - name: EXAMPLE_ENV - # value: "1" + containerEnv: + - name: PROJECT_ID + value: ${project_id} + - name: DB_REGION + value: ${db_region} envFrom: [] # - secretRef: # name: my-env-secret @@ -197,10 +210,17 @@ worker: - name: fluentbit-config configMap: name: fluentbit-config + - name: secret-volume + secret: + secretName: ${secret_name} + optional: true # Ray writes logs to /tmp/ray/session_latests/logs volumeMounts: - mountPath: /tmp/ray name: ray-logs + - name: secret-volume + mountPath: /etc/secret-volume + readOnly: true # sidecarContainers specifies additional containers to attach to the Ray pod. # Follows standard K8s container spec. sidecarContainers: @@ -240,9 +260,11 @@ additionalWorkerGroups: initContainerSecurityContext: {} # containerEnv specifies environment variables for the Ray container, # Follows standard K8s container env schema. - containerEnv: [] - # - name: EXAMPLE_ENV - # value: "1" + containerEnv: + - name: PROJECT_ID + value: ${project_id} + - name: DB_REGION + value: ${db_region} envFrom: [] # - secretRef: # name: my-env-secret @@ -273,10 +295,17 @@ additionalWorkerGroups: volumes: - name: log-volume emptyDir: {} + - name: secret-volume + secret: + secretName: ${secret_name} + optional: true # Ray writes logs to /tmp/ray/session_latests/logs volumeMounts: - mountPath: /tmp/ray name: log-volume + - name: secret-volume + mountPath: /etc/secret-volume + readOnly: true sidecarContainers: [] service: type: ClusterIP diff --git a/modules/kuberay-cluster/kuberay-values.yaml b/modules/kuberay-cluster/kuberay-values.yaml index d3b1a34e2..5c1742682 100644 --- a/modules/kuberay-cluster/kuberay-values.yaml +++ b/modules/kuberay-cluster/kuberay-values.yaml @@ -68,6 +68,10 @@ head: value: http://grafana:80 - name: RAY_PROMETHEUS_HOST value: http://frontend:9090 + - name: PROJECT_ID + value: ${project_id} + - name: DB_REGION + value: ${db_region} envFrom: [] # - secretRef: # name: my-env-secret @@ -116,12 +120,19 @@ head: volumeAttributes: bucketName: ${gcs_bucket} mountOptions: "implicit-dirs,uid=1000,gid=100" + - name: secret-volume + secret: + secretName: ${secret_name} + optional: true # Ray writes logs to /tmp/ray/session_latests/logs volumeMounts: - mountPath: /tmp/ray name: ray-logs - name: gcs-fuse-csi-ephemeral mountPath: /data + - name: secret-volume + mountPath: /etc/secret-volume + readOnly: true # sidecarContainers specifies additional containers to attach to the Ray pod. # Follows standard K8s container spec. sidecarContainers: @@ -160,9 +171,11 @@ worker: initContainerSecurityContext: {} # containerEnv specifies environment variables for the Ray container, # Follows standard K8s container env schema. - containerEnv: [] - # - name: EXAMPLE_ENV - # value: "1" + containerEnv: + - name: PROJECT_ID + value: ${project_id} + - name: DB_REGION + value: ${db_region} envFrom: [] # - secretRef: # name: my-env-secret @@ -210,12 +223,19 @@ worker: volumeAttributes: bucketName: ${gcs_bucket} mountOptions: "implicit-dirs,uid=1000,gid=100" + - name: secret-volume + secret: + secretName: ${secret_name} + optional: true # Ray writes logs to /tmp/ray/session_latests/logs volumeMounts: - mountPath: /tmp/ray name: ray-logs - name: gcs-fuse-csi-ephemeral mountPath: /data + - name: secret-volume + mountPath: /etc/secret-volume + readOnly: true # sidecarContainers specifies additional containers to attach to the Ray pod. # Follows standard K8s container spec. sidecarContainers: diff --git a/modules/kuberay-cluster/kuberay.tf b/modules/kuberay-cluster/kuberay.tf index 48df85235..5ba6344d7 100644 --- a/modules/kuberay-cluster/kuberay.tf +++ b/modules/kuberay-cluster/kuberay.tf @@ -30,18 +30,30 @@ resource "helm_release" "ray-cluster" { gcs_bucket = var.gcs_bucket k8s_service_account = var.google_service_account grafana_host = var.grafana_host + secret_name = var.db_secret_name + project_id = var.project_id + db_region = var.db_region }) : var.enable_tpu ? templatefile("${path.module}/kuberay-tpu-values.yaml", { gcs_bucket = var.gcs_bucket k8s_service_account = var.google_service_account grafana_host = var.grafana_host + secret_name = var.db_secret_name + project_id = var.project_id + db_region = var.db_region }) : var.enable_gpu ? templatefile("${path.module}/kuberay-gpu-values.yaml", { gcs_bucket = var.gcs_bucket k8s_service_account = var.google_service_account grafana_host = var.grafana_host + secret_name = var.db_secret_name + project_id = var.project_id + db_region = var.db_region }) : templatefile("${path.module}/kuberay-values.yaml", { gcs_bucket = var.gcs_bucket k8s_service_account = var.google_service_account grafana_host = var.grafana_host + secret_name = var.db_secret_name + project_id = var.project_id + db_region = var.db_region }) ] } diff --git a/modules/kuberay-cluster/variables.tf b/modules/kuberay-cluster/variables.tf index f96c44402..edbd8a5be 100644 --- a/modules/kuberay-cluster/variables.tf +++ b/modules/kuberay-cluster/variables.tf @@ -17,6 +17,12 @@ variable "project_id" { description = "GCP project id" } +variable "db_region" { + type = string + description = "Cloud SQL instance region" + default = "us-central1" +} + variable "namespace" { type = string description = "Kubernetes namespace where resources are deployed" @@ -57,3 +63,9 @@ variable "gcs_bucket" { variable "grafana_host" { type = string } + +variable "db_secret_name" { + type = string + description = "CloudSQL user credentials" + default = "empty-secret" +} \ No newline at end of file