diff --git a/applications/jupyter/main.tf b/applications/jupyter/main.tf index 0cdf30cf9..f4cb91abd 100644 --- a/applications/jupyter/main.tf +++ b/applications/jupyter/main.tf @@ -77,6 +77,16 @@ module "namespace" { create_namespace = true } +# IAP Section: Enabled the IAP service +resource "google_project_service" "project_service" { + count = var.add_auth ? 1 : 0 + project = var.project_id + service = "iap.googleapis.com" + + disable_dependent_services = false + disable_on_destroy = false +} + # Creates jupyterhub module "jupyterhub" { source = "../../modules/jupyter" diff --git a/applications/rag/README.md b/applications/rag/README.md index e68d1f327..8c904a76f 100644 --- a/applications/rag/README.md +++ b/applications/rag/README.md @@ -1,6 +1,6 @@ # RAG-on-GKE Application -**NOTE:** This solution is in beta/a work in progress - please expect friction while using it. +**NOTE:** This solution is in beta. Please expect friction while using it. This is a sample to deploy a RAG application on GKE. Retrieval Augmented Generation (RAG) is a popular approach for boosting the accuracy of LLM responses, particularly for domain specific or private data sets. The basic idea is to have a semantically searchable knowledge base (often using vector search), which is used to retrieve relevant snippets for a given prompt to provide additional context to the LLM. Augmenting the knowledge base with additional data is typically cheaper than fine tuning and is more scalable when incorporating current events and other rapidly changing data spaces. @@ -32,7 +32,7 @@ CLUSTER_REGION=us-central1 ``` 2. Use the following instructions to create a GKE cluster. We recommend using Autopilot for a simpler setup. -##### Autopilot +##### Autopilot (recommended) RAG requires the latest Autopilot features, available on GKE cluster version `1.29.1-gke.1575000`+ ``` @@ -46,7 +46,7 @@ gcloud container clusters create-auto ${CLUSTER_NAME:?} \ --cluster-version ${CLUSTER_VERSION:?} ``` -##### Standard (recommended) +##### Standard 1. To create a GKE Standard cluster using Terraform, follow the [instructions here](https://github.com/GoogleCloudPlatform/ai-on-gke/blob/main/infrastructure/README.md). Use the preconfigured node pools in `/infrastructure/platform.tfvars` as this solution requires T4s and L4s. @@ -105,6 +105,7 @@ gcloud container clusters get-credentials ${CLUSTER_NAME:?} --location ${CLUSTER ``` kubectl port-forward -n ${NAMESPACE:?} deployment/mistral-7b-instruct 8080:8080 ``` + * In a new terminal, try a few prompts: ``` export USER_PROMPT="How to deploy a container on K8s?" @@ -119,6 +120,7 @@ curl 127.0.0.1:8080/generate -X POST \ } EOF ``` + * At the end of the smoke test with the TGI server, stop port forwarding by using Ctrl-C on the original terminal. 5. Verify the frontend chat interface is setup: @@ -145,10 +147,10 @@ This step generates the vector embeddings for your input dataset. Currently, the 1. Create a CloudSQL user to access the database: `gcloud sql users create rag-user-notebook --password=${SQL_PASSWORD:?} --instance=pgvector-instance --host=%` 2. Go to the Jupyterhub service endpoint in a browser: - * IAP disable: `kubectl get services proxy-public -n $NAMESPACE --output jsonpath='{.status.loadBalancer.ingress[0].ip}'` - * IAP enabled: Read terraform output `jupyter_uri` or use commend: `kubectl get managedcertificates jupyter-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'` - * Remeber login GCP to check if user has role `IAP-secured Web App User` - * Waiting for domain status to be `Active` + * IAP disabled: `kubectl get services proxy-public -n $NAMESPACE --output jsonpath='{.status.loadBalancer.ingress[0].ip}'` + * IAP enabled: Read terraform output `jupyter_uri` or use command: `kubectl get managedcertificates jupyter-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'` + * Open Google Cloud Console IAM to verify that the user has role `IAP-secured Web App User` + * Wait for the domain status to be `Active` 3. Login with placeholder credentials [TBD: replace with instructions for IAP]: * username: user * password: use `terraform output jupyter_password` to fetch the password value @@ -167,40 +169,28 @@ This step generates the vector embeddings for your input dataset. Currently, the * `os.environ['KAGGLE_KEY']` 9. Run all the cells in the notebook. This will generate vector embeddings for the input dataset (`denizbilginn/google-maps-restaurant-reviews`) and store them in the `pgvector-instance` via a Ray job. - * Once submitted, Ray will take several minutes to create the runtime environment and optionally scale up Ray worker nodes. During this time, the job status will remain PENDING. - * When the job status is SUCCEEDED, the vector embeddings have been generated and we are ready to launch the frontend chat interface. + * If the Ray job has FAILED, re-run the cell. + * When the Ray job has SUCCEEDED, we are ready to launch the frontend chat interface. -### Launch the Frontend Chat Interface +### Access the Frontend Chat Interface -#### Accessing the Frontend with IAP Disabled +#### With IAP Disabled 1. Setup port forwarding for the frontend: `kubectl port-forward service/rag-frontend -n $NAMESPACE 8080:8080 &` 2. Go to `localhost:8080` in a browser & start chatting! This will fetch context related to your prompt from the vector embeddings in the `pgvector-instance`, augment the original prompt with the context & query the inference model (`mistral-7b`) with the augmented prompt. -#### Accessing the Frontend with IAP Enabled -1. Verify IAP is Enabled - - * Ensure that IAP is enabled on Google Cloud Platform (GCP) for your application. If you encounter any errors, try re-enabling IAP. - -2. Verify User Role - - * Make sure you have the role `IAP-secured Web App User` assigned to your user account. This role is necessary to access the application through IAP. - -3. Verify Domain is Active - * Make sure the domain is active using commend: - `kubectl get managedcertificates frontend-managed-cert -n rag --output jsonpath='{.status.domainStatus[0].status}'` - -3. Retrieve the Domain - - * Read terraform output `frontend_uri` or use the following command to find the domain created by IAP for accessing your service: - `kubectl get managedcertificates frontend-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'` - -4. Access the Frontend +#### With IAP Enabled +1. Verify that IAP is enabled on Google Cloud Platform (GCP) for your application. If you encounter any errors, try re-enabling IAP. +2. Verify that you have the role `IAP-secured Web App User` assigned to your user account. This role is necessary to access the application through IAP. +3. Verify the domain is active using command: + `kubectl get managedcertificates frontend-managed-cert -n rag --output jsonpath='{.status.domainStatus[0].status}'` +3. Read terraform output `frontend_uri` or use the following command to find the domain created by IAP for accessing your service: + `kubectl get managedcertificates frontend-managed-cert -n $NAMESPACE --output jsonpath='{.status.domainStatus[0].domain}'` +4. Open your browser and navigate to the domain you retrieved in the previous step to start chatting! - * Open your browser and navigate to the domain you retrieved in the previous step to start chatting! +#### Prompt Examples -#### Prompts Example -3. [TODO: Add some example prompts for the dataset]. +*TODO:* Add some example prompts for the dataset. ### Cleanup diff --git a/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb b/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb index b0a769af1..3570e7db3 100644 --- a/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb +++ b/applications/rag/example_notebooks/rag-kaggle-ray-sql-latest.ipynb @@ -252,7 +252,7 @@ "id": "7ba6c3ff-a25a-4f4d-b58e-68f7fe7d33df", "metadata": {}, "outputs": [], - "source": [ + "source": [ "job_id = client.submit_job(\n", " entrypoint=\"python test.py\",\n", " # Path to the local directory that contains the entrypoint file.\n", @@ -278,10 +278,9 @@ " status = client.get_job_status(job_id)\n", " if status != prev_status:\n", " print(\"Job status:\", status)\n", + " print(\"Job info:\", client.get_job_info(job_id).message)\n", " prev_status = status\n", " if status.is_terminal():\n", - " if status == 'FAILED':\n", - " print(\"Job info:\", client.get_job_info(job_id))\n", " break\n", " time.sleep(5)\n" ] diff --git a/applications/rag/frontend/main.tf b/applications/rag/frontend/main.tf index 2e402cfab..2efbe2c8a 100644 --- a/applications/rag/frontend/main.tf +++ b/applications/rag/frontend/main.tf @@ -15,48 +15,30 @@ data "google_project" "project" { project_id = var.project_id } - locals { instance_connection_name = format("%s:%s:%s", var.project_id, var.region, var.cloudsql_instance) } -# IAP Section: Enabled the IAP service -resource "google_project_service" "project_service" { - count = var.add_auth ? 1 : 0 - project = var.project_id - service = "iap.googleapis.com" - - disable_dependent_services = false - disable_on_destroy = false -} - -# IAP Section: Creates the OAuth client used in IAP -resource "google_iap_client" "iap_oauth_client" { - count = var.add_auth && var.client_id == "" ? 1 : 0 - display_name = "Frontend-Client" - brand = var.brand == "" ? "projects/${data.google_project.project.number}/brands/${data.google_project.project.number}" : var.brand -} - # IAP Section: Creates the GKE components module "iap_auth" { count = var.add_auth ? 1 : 0 source = "../../../modules/iap" - project_id = var.project_id - namespace = var.namespace - frontend_add_auth = var.add_auth - frontend_k8s_ingress_name = var.k8s_ingress_name - frontend_k8s_managed_cert_name = var.k8s_managed_cert_name - frontend_k8s_iap_secret_name = var.k8s_iap_secret_name - frontend_k8s_backend_config_name = var.k8s_backend_config_name - frontend_k8s_backend_service_name = var.k8s_backend_service_name - frontend_k8s_backend_service_port = var.k8s_backend_service_port - frontend_client_id = var.client_id != "" ? var.client_id : google_iap_client.iap_oauth_client[0].client_id - frontend_client_secret = var.client_id != "" ? var.client_secret : google_iap_client.iap_oauth_client[0].secret - frontend_url_domain_addr = var.url_domain_addr - frontend_url_domain_name = var.url_domain_name + project_id = var.project_id + namespace = var.namespace + app_name = "frontend" + brand = var.brand + k8s_ingress_name = var.k8s_ingress_name + k8s_managed_cert_name = var.k8s_managed_cert_name + k8s_iap_secret_name = var.k8s_iap_secret_name + k8s_backend_config_name = var.k8s_backend_config_name + k8s_backend_service_name = var.k8s_backend_service_name + k8s_backend_service_port = var.k8s_backend_service_port + client_id = var.client_id + client_secret = var.client_secret + url_domain_addr = var.url_domain_addr + url_domain_name = var.url_domain_name depends_on = [ - google_project_service.project_service, kubernetes_service.rag_frontend_service ] } diff --git a/applications/rag/frontend/outputs.tf b/applications/rag/frontend/outputs.tf index 84c2239ef..f808f71e5 100644 --- a/applications/rag/frontend/outputs.tf +++ b/applications/rag/frontend/outputs.tf @@ -13,5 +13,5 @@ # limitations under the License. output "frontend_uri" { - value = var.add_auth ? module.iap_auth[0].frontend_domain : (data.kubernetes_service.frontend-ingress.status != null ? (data.kubernetes_service.frontend-ingress.status[0].load_balancer != null ? "${data.kubernetes_service.frontend-ingress.status[0].load_balancer[0].ingress[0].ip}" : "") : "") + value = var.add_auth ? module.iap_auth[0].domain : (data.kubernetes_service.frontend-ingress.status != null ? (data.kubernetes_service.frontend-ingress.status[0].load_balancer != null ? "${data.kubernetes_service.frontend-ingress.status[0].load_balancer[0].ingress[0].ip}" : "") : "") } \ No newline at end of file diff --git a/applications/rag/main.tf b/applications/rag/main.tf index 63eb89ac4..c3fc34335 100644 --- a/applications/rag/main.tf +++ b/applications/rag/main.tf @@ -123,6 +123,16 @@ module "cloudsql" { depends_on = [module.namespace] } +# IAP Section: Enabled the IAP service +resource "google_project_service" "project_service" { + count = var.frontend_add_auth || var.jupyter_add_auth ? 1 : 0 + project = var.project_id + service = "iap.googleapis.com" + + disable_dependent_services = false + disable_on_destroy = false +} + module "jupyterhub" { source = "../../modules/jupyter" providers = { helm = helm.rag, kubernetes = kubernetes.rag } diff --git a/applications/rag/workloads.tfvars b/applications/rag/workloads.tfvars index dca101637..473fb4627 100644 --- a/applications/rag/workloads.tfvars +++ b/applications/rag/workloads.tfvars @@ -12,17 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -project_id = "" +project_id = "zlq-gke-dev" ## this is required for terraform to connect to GKE master and deploy workloads create_cluster = false # this flag will create a new standard public gke cluster in default network -cluster_name = "" +cluster_name = "ml-cluster" cluster_location = "us-central1" ## GKE environment variables kubernetes_namespace = "rag" -create_gcs_bucket = true -gcs_bucket = "rag-data-xyzu" # Choose a globally unique bucket name. +create_gcs_bucket = false +gcs_bucket = "rag-data-gcs-zlq-gke-dev" # Choose a globally unique bucket name. cloudsql_instance = "pgvector-instance" ## Service accounts @@ -44,11 +44,11 @@ jupyter_service_account = "jupyter-system-account" dataset_embeddings_table_name = "googlemaps_reviews_db" ## IAP config -brand = "projects//brands/" +brand = "projects/553239239816/brands/553239239816" ## Jupyter IAP Settings -jupyter_add_auth = false # Set to true when using auth with IAP -jupyter_support_email = "" +jupyter_add_auth = true # Set to true when using auth with IAP +jupyter_support_email = "zlq@google.com" jupyter_k8s_ingress_name = "jupyter-ingress" jupyter_k8s_managed_cert_name = "jupyter-managed-cert" jupyter_k8s_iap_secret_name = "jupyter-iap-secret" @@ -60,11 +60,11 @@ jupyter_url_domain_addr = "" jupyter_url_domain_name = "" jupyter_client_id = "" jupyter_client_secret = "" -jupyter_members_allowlist = ["allAuthenticatedUsers", "user:"] +jupyter_members_allowlist = ["allAuthenticatedUsers", "user:zlq@google.com"] ## Frontend IAP Settings -frontend_add_auth = false # Set to true when using auth with IAP -frontend_support_email = "" +frontend_add_auth = true # Set to true when using auth with IAP +frontend_support_email = "zlq@google.com" frontend_k8s_ingress_name = "frontend-ingress" frontend_k8s_managed_cert_name = "frontend-managed-cert" frontend_k8s_iap_secret_name = "frontend-iap-secret" @@ -76,4 +76,4 @@ frontend_url_domain_addr = "" frontend_url_domain_name = "" frontend_client_id = "" frontend_client_secret = "" -frontend_members_allowlist = ["allAuthenticatedUsers", "user:"] +frontend_members_allowlist = ["allAuthenticatedUsers", "user:zlq@google.com"] diff --git a/modules/iap/iap.tf b/modules/iap/iap.tf index c9344ae18..9ec76170a 100644 --- a/modules/iap/iap.tf +++ b/modules/iap/iap.tf @@ -12,100 +12,46 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Used to generate ip address -resource "random_string" "random" { - length = 4 - special = false - upper = false +# IAP Section: Enabled the IAP service +data "google_project" "project" { + project_id = var.project_id } -# TODO refactor Jupyter and Frontend to be one -# Jupyter IAP -resource "google_compute_global_address" "jupyter_ip_address" { - count = var.jupyter_add_auth && var.jupyter_url_domain_addr == "" ? 1 : 0 - provider = google-beta - project = var.project_id - name = "jupyter-address-${random_string.random.result}" - address_type = "EXTERNAL" - ip_version = "IPV4" +# Creates a "Brand", equivalent to the OAuth consent screen on Cloud console +resource "google_iap_brand" "project_brand" { + count = var.brand == "" ? 1 : 0 + support_email = var.support_email + application_title = "${var.app_name}-Application" + project = var.project_id } -# Helm Chart IAP -resource "helm_release" "iap_jupyter" { - count = var.jupyter_add_auth ? 1 : 0 - name = "iap-jupyter" - chart = "${path.module}/charts/iap/" - namespace = var.namespace - create_namespace = true - # timeout increased to support autopilot scaling resources, and give enough time to complete the deployment - timeout = 1200 - set { - name = "iap.backendConfig.name" - value = var.jupyter_k8s_backend_config_name - } - - set { - name = "iap.secret.name" - value = var.jupyter_k8s_iap_secret_name - } - - set { - name = "iap.secret.client_id" - value = base64encode(var.jupyter_client_id) - } - - set { - name = "iap.secret.client_secret" - value = base64encode(var.jupyter_client_secret) - } - - set { - name = "iap.managedCertificate.name" - value = var.jupyter_k8s_managed_cert_name - } - - set { - name = "iap.managedCertificate.domain" - value = var.jupyter_url_domain_addr != "" ? var.jupyter_url_domain_addr : "${google_compute_global_address.jupyter_ip_address[0].address}.nip.io" - } - - set { - name = "iap.ingress.staticIpName" - value = var.jupyter_url_domain_addr != "" ? var.jupyter_url_domain_name : "${google_compute_global_address.jupyter_ip_address[0].name}" - } - - set { - name = "iap.ingress.name" - value = var.jupyter_k8s_ingress_name - } - - set { - name = "iap.ingress.backendServiceName" - value = var.jupyter_k8s_backend_service_name - } - - set { - name = "iap.ingress.backendServicePort" - value = var.jupyter_k8s_backend_service_port - } +# IAP Section: Creates the OAuth client used in IAP +resource "google_iap_client" "iap_oauth_client" { + count = var.client_id == "" ? 1 : 0 + display_name = "${var.app_name}-Client" + brand = var.brand == "" ? "projects/${data.google_project.project.number}/brands/${data.google_project.project.number}" : var.brand } -# TODO set the member allowlist +# Used to generate ip address +resource "random_string" "random" { + length = 4 + special = false + upper = false +} -# Frontend IAP -resource "google_compute_global_address" "frontend_ip_address" { - count = var.frontend_add_auth && var.frontend_url_domain_addr == "" ? 1 : 0 +# IAP +resource "google_compute_global_address" "ip_address" { + count = var.url_domain_addr == "" ? 1 : 0 provider = google-beta project = var.project_id - name = "frontend-address-${random_string.random.result}" + name = "${var.app_name}-address-${random_string.random.result}" address_type = "EXTERNAL" ip_version = "IPV4" } # Helm Chart IAP resource "helm_release" "iap_frontend" { - count = var.frontend_add_auth ? 1 : 0 - name = "iap-frontend" + name = "${var.app_name}-iap" chart = "${path.module}/charts/iap/" namespace = var.namespace create_namespace = true @@ -113,51 +59,51 @@ resource "helm_release" "iap_frontend" { timeout = 1200 set { name = "iap.backendConfig.name" - value = var.frontend_k8s_backend_config_name + value = var.k8s_backend_config_name } set { name = "iap.secret.name" - value = var.frontend_k8s_iap_secret_name + value = var.k8s_iap_secret_name } set { name = "iap.secret.client_id" - value = base64encode(var.frontend_client_id) + value = base64encode(var.client_id != "" ? var.client_id : google_iap_client.iap_oauth_client[0].client_id) } set { name = "iap.secret.client_secret" - value = base64encode(var.frontend_client_secret) + value = base64encode(var.client_secret != "" ? var.client_secret : google_iap_client.iap_oauth_client[0].secret) } set { name = "iap.managedCertificate.name" - value = var.frontend_k8s_managed_cert_name + value = var.k8s_managed_cert_name } set { name = "iap.managedCertificate.domain" - value = var.frontend_url_domain_addr != "" ? var.frontend_url_domain_addr : "${google_compute_global_address.frontend_ip_address[0].address}.nip.io" + value = var.url_domain_addr != "" ? var.url_domain_addr : "${google_compute_global_address.ip_address[0].address}.nip.io" } set { name = "iap.ingress.staticIpName" - value = var.frontend_url_domain_addr != "" ? var.frontend_url_domain_name : "${google_compute_global_address.frontend_ip_address[0].name}" + value = var.url_domain_addr != "" ? var.url_domain_name : "${google_compute_global_address.ip_address[0].name}" } set { name = "iap.ingress.name" - value = var.frontend_k8s_ingress_name + value = var.k8s_ingress_name } set { name = "iap.ingress.backendServiceName" - value = var.frontend_k8s_backend_service_name + value = var.k8s_backend_service_name } set { name = "iap.ingress.backendServicePort" - value = var.frontend_k8s_backend_service_port + value = var.k8s_backend_service_port } } \ No newline at end of file diff --git a/modules/iap/outputs.tf b/modules/iap/outputs.tf index 8ec76c552..900d25a37 100644 --- a/modules/iap/outputs.tf +++ b/modules/iap/outputs.tf @@ -12,10 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -output "jupyter_domain" { - value = var.jupyter_add_auth && var.jupyter_url_domain_addr == "" ? "${google_compute_global_address.jupyter_ip_address[0].address}.nip.io" : var.jupyter_url_domain_addr -} - -output "frontend_domain" { - value = var.frontend_add_auth && var.frontend_url_domain_addr == "" ? "${google_compute_global_address.frontend_ip_address[0].address}.nip.io" : var.frontend_url_domain_addr +output "domain" { + value = var.url_domain_addr == "" ? "${google_compute_global_address.ip_address[0].address}.nip.io" : var.url_domain_addr } \ No newline at end of file diff --git a/modules/iap/variables.tf b/modules/iap/variables.tf index af09d87c2..4a442c8dc 100644 --- a/modules/iap/variables.tf +++ b/modules/iap/variables.tf @@ -22,156 +22,78 @@ variable "namespace" { description = "Kubernetes namespace where resources are deployed" } -# Frontend IAP settings -variable "frontend_add_auth" { - type = bool - description = "Enable iap authentication on frontend" - default = false -} - -variable "frontend_k8s_ingress_name" { - type = string - default = "frontend-ingress" -} - -variable "frontend_k8s_managed_cert_name" { - type = string - description = "Name for frontend managed certificate" - default = "frontend-managed-cert" -} - -variable "frontend_k8s_iap_secret_name" { - type = string - description = "Name for frontend iap secret" - default = "frontend-iap-secret" -} - -variable "frontend_k8s_backend_config_name" { - type = string - description = "Name of the Kubernetes Backend Config" - default = "frontend-iap-config" -} - -variable "frontend_k8s_backend_service_name" { - type = string - description = "Name of the Backend Service" - default = "rag-frontend" -} - -variable "frontend_k8s_backend_service_port" { - type = number - description = "Name of the Backend Service Port" - default = 8080 -} - -variable "frontend_url_domain_addr" { - type = string - description = "Domain provided by the user. If it's empty, we will create one for you." - default = "" -} - -variable "frontend_url_domain_name" { +variable "app_name" { type = string - description = "Name of the domain provided by the user. This var will only be used if url_domain_addr is not empty" - default = "" + description = "App Name" } -variable "frontend_support_email" { +# IAP settings +variable "brand" { type = string - description = "Email for users to contact with questions about their consent" - default = "" + description = "Brand" } -variable "frontend_client_id" { +variable "k8s_ingress_name" { type = string - description = "Client ID used for enabling IAP" - default = "" -} - -variable "frontend_client_secret" { - type = string - description = "Client secret used for enabling IAP" - default = "" -} - -variable "frontend_members_allowlist" { - type = list(string) - default = [] -} - -# Jupyter IAP settings -variable "jupyter_add_auth" { - type = bool - description = "Enable iap authentication on jupyterhub" - default = false -} - -variable "jupyter_k8s_ingress_name" { - type = string - default = "jupyter-ingress" + description = "Name for k8s Ingress" } -variable "jupyter_k8s_managed_cert_name" { +variable "k8s_managed_cert_name" { type = string - description = "Name for frontend managed certificate" - default = "frontend-managed-cert" + description = "Name for k8s managed certificate" } -variable "jupyter_k8s_iap_secret_name" { +variable "k8s_iap_secret_name" { type = string - description = "Name for jupyter iap secret" - default = "jupyter-iap-secret" + description = "Name for k8s iap secret" } -variable "jupyter_k8s_backend_config_name" { +variable "k8s_backend_config_name" { type = string description = "Name of the Kubernetes Backend Config" - default = "jupyter-iap-config" } -variable "jupyter_k8s_backend_service_name" { +variable "k8s_backend_service_name" { type = string description = "Name of the Backend Service" - default = "proxy-public" } -variable "jupyter_k8s_backend_service_port" { +variable "k8s_backend_service_port" { type = number description = "Name of the Backend Service Port" - default = 80 } -variable "jupyter_url_domain_addr" { +variable "url_domain_addr" { type = string description = "Domain provided by the user. If it's empty, we will create one for you." default = "" } -variable "jupyter_url_domain_name" { +variable "url_domain_name" { type = string description = "Name of the domain provided by the user. This var will only be used if url_domain_addr is not empty" default = "" } -variable "jupyter_support_email" { +variable "support_email" { type = string description = "Email for users to contact with questions about their consent" default = "" } -variable "jupyter_client_id" { +variable "client_id" { type = string description = "Client ID used for enabling IAP" default = "" } -variable "jupyter_client_secret" { +variable "client_secret" { type = string description = "Client secret used for enabling IAP" default = "" } -variable "jupyter_members_allowlist" { +variable "members_allowlist" { type = list(string) default = [] } \ No newline at end of file diff --git a/modules/jupyter/main.tf b/modules/jupyter/main.tf index 46c857ffb..d7707d39c 100644 --- a/modules/jupyter/main.tf +++ b/modules/jupyter/main.tf @@ -16,51 +16,26 @@ data "google_project" "project" { project_id = var.project_id } -# Creates a "Brand", equivalent to the OAuth consent screen on Cloud console -resource "google_iap_brand" "project_brand" { - count = var.add_auth && var.brand == "" ? 1 : 0 - support_email = var.support_email - application_title = "Application" - project = var.project_id -} - -# IAP Section: Enabled the IAP service -resource "google_project_service" "project_service" { - count = var.add_auth ? 1 : 0 - project = var.project_id - service = "iap.googleapis.com" - - disable_dependent_services = false - disable_on_destroy = false -} - -# IAP Section: Creates the OAuth client used in IAP -resource "google_iap_client" "iap_oauth_client" { - count = var.add_auth && var.client_id == "" ? 1 : 0 - display_name = "Jupyter-Client" - brand = var.brand == "" ? "projects/${data.google_project.project.number}/brands/${data.google_project.project.number}" : var.brand -} - # IAP Section: Creates the GKE components module "iap_auth" { count = var.add_auth ? 1 : 0 source = "../../modules/iap" - project_id = var.project_id - namespace = var.namespace - jupyter_add_auth = var.add_auth - jupyter_k8s_ingress_name = var.k8s_ingress_name - jupyter_k8s_managed_cert_name = var.k8s_managed_cert_name - jupyter_k8s_iap_secret_name = var.k8s_iap_secret_name - jupyter_k8s_backend_config_name = var.k8s_backend_config_name - jupyter_k8s_backend_service_name = var.k8s_backend_service_name - jupyter_k8s_backend_service_port = var.k8s_backend_service_port - jupyter_client_id = var.client_id != "" ? var.client_id : google_iap_client.iap_oauth_client[0].client_id - jupyter_client_secret = var.client_id != "" ? var.client_secret : google_iap_client.iap_oauth_client[0].secret - jupyter_url_domain_addr = var.url_domain_addr - jupyter_url_domain_name = var.url_domain_name + project_id = var.project_id + namespace = var.namespace + app_name = "jupyter" + brand = var.brand + k8s_ingress_name = var.k8s_ingress_name + k8s_managed_cert_name = var.k8s_managed_cert_name + k8s_iap_secret_name = var.k8s_iap_secret_name + k8s_backend_config_name = var.k8s_backend_config_name + k8s_backend_service_name = var.k8s_backend_service_name + k8s_backend_service_port = var.k8s_backend_service_port + client_id = var.client_id + client_secret = var.client_secret + url_domain_addr = var.url_domain_addr + url_domain_name = var.url_domain_name depends_on = [ - google_project_service.project_service, helm_release.jupyterhub ] } @@ -155,9 +130,6 @@ resource "helm_release" "jupyterhub" { password = var.add_auth ? "dummy" : random_password.generated_password[0].result project_id = var.project_id project_number = data.google_project.project.number - - # Support legacy image. - service_id = "" # TODO(umeshkumhar): var.add_auth ? (data.google_compute_backend_service.jupyter-ingress[0].generated_id != null ? data.google_compute_backend_service.jupyter-ingress[0].generated_id : "no-id-yet") : "no-id-yet" namespace = var.namespace backend_config = var.k8s_backend_config_name service_name = var.k8s_backend_service_name diff --git a/modules/jupyter/outputs.tf b/modules/jupyter/outputs.tf index f1e36a12d..a6a4ef30f 100644 --- a/modules/jupyter/outputs.tf +++ b/modules/jupyter/outputs.tf @@ -13,7 +13,7 @@ # limitations under the License. output "jupyterhub_uri" { - value = var.add_auth ? module.iap_auth[0].jupyter_domain : (data.kubernetes_service.jupyter-ingress.status != null ? (data.kubernetes_service.jupyter-ingress.status[0].load_balancer != null ? "${data.kubernetes_service.jupyter-ingress.status[0].load_balancer[0].ingress[0].ip}" : "") : "") + value = var.add_auth ? module.iap_auth[0].domain : (data.kubernetes_service.jupyter-ingress.status != null ? (data.kubernetes_service.jupyter-ingress.status[0].load_balancer != null ? "${data.kubernetes_service.jupyter-ingress.status[0].load_balancer[0].ingress[0].ip}" : "") : "") } output "jupyterhub_password" { diff --git a/modules/kuberay-cluster/kuberay-autopilot-values.yaml b/modules/kuberay-cluster/kuberay-autopilot-values.yaml index 11a20a90e..1f2e5e83c 100644 --- a/modules/kuberay-cluster/kuberay-autopilot-values.yaml +++ b/modules/kuberay-cluster/kuberay-autopilot-values.yaml @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ image: # Replace this with your own image if needed. repository: rayproject/ray - tag: 2.6.1-py310-gpu + tag: 2.9.3-py310-gpu pullPolicy: IfNotPresent nameOverride: "kuberay" @@ -64,8 +64,6 @@ head: # containerEnv specifies environment variables for the Ray container, # Follows standard K8s container env schema. containerEnv: - # - name: EXAMPLE_ENV - # value: "1" - name: RAY_memory_monitor_refresh_ms value: "0" - name: RAY_GRAFANA_IFRAME_HOST @@ -90,18 +88,18 @@ head: # for further guidance. resources: limits: - cpu: "8" + cpu: "1" # To avoid out-of-memory issues, never allocate less than 2G memory for the Ray head. - memory: "20G" + memory: "8G" ephemeral-storage: 20Gi requests: - cpu: "8" - memory: "20G" + cpu: "1" + memory: "8G" ephemeral-storage: 20Gi annotations: gke-gcsfuse/volumes: "true" - gke-gcsfuse/cpu-limit: "2" - gke-gcsfuse/memory-limit: 20Gi + gke-gcsfuse/cpu-limit: "1" + gke-gcsfuse/memory-limit: 4Gi gke-gcsfuse/ephemeral-storage-limit: 20Gi nodeSelector: cloud.google.com/compute-class: "Performance" @@ -158,8 +156,6 @@ worker: disabled: true # The map's key is used as the groupName. -# For example, key:small-group in the map below -# will be used as the groupName additionalWorkerGroups: cpuGroup: # Disabled by default @@ -194,16 +190,16 @@ additionalWorkerGroups: resources: limits: cpu: 4 - memory: "20G" + memory: "16G" ephemeral-storage: 20Gi requests: cpu: 4 - memory: "20G" + memory: "16G" ephemeral-storage: 20Gi annotations: gke-gcsfuse/volumes: "true" gke-gcsfuse/cpu-limit: "2" - gke-gcsfuse/memory-limit: 20Gi + gke-gcsfuse/memory-limit: 8Gi gke-gcsfuse/ephemeral-storage-limit: 20Gi nodeSelector: cloud.google.com/compute-class: "Performance" @@ -287,19 +283,19 @@ additionalWorkerGroups: # for further guidance. resources: limits: - cpu: "8" + cpu: "4" nvidia.com/gpu: "2" - memory: "40G" + memory: "16G" ephemeral-storage: 20Gi requests: - cpu: "8" + cpu: "4" nvidia.com/gpu: "2" - memory: "40G" + memory: "16G" ephemeral-storage: 20Gi annotations: gke-gcsfuse/volumes: "true" gke-gcsfuse/cpu-limit: "2" - gke-gcsfuse/memory-limit: 20Gi + gke-gcsfuse/memory-limit: 8Gi gke-gcsfuse/ephemeral-storage-limit: 20Gi nodeSelector: cloud.google.com/compute-class: "Accelerator"