diff --git a/applications/jupyter/main.tf b/applications/jupyter/main.tf index 7d8e08614..0cdf30cf9 100644 --- a/applications/jupyter/main.tf +++ b/applications/jupyter/main.tf @@ -70,6 +70,13 @@ module "gcs" { bucket_name = var.gcs_bucket } +# create namespace +module "namespace" { + source = "../../modules/kubernetes-namespace" + namespace = var.namespace + create_namespace = true +} + # Creates jupyterhub module "jupyterhub" { source = "../../modules/jupyter" @@ -94,5 +101,5 @@ module "jupyterhub" { url_domain_addr = var.url_domain_addr url_domain_name = var.url_domain_name members_allowlist = var.members_allowlist - depends_on = [module.gcs] + depends_on = [module.gcs, module.namespace] } diff --git a/applications/rag/frontend/main.tf b/applications/rag/frontend/main.tf index 38b938738..2e402cfab 100644 --- a/applications/rag/frontend/main.tf +++ b/applications/rag/frontend/main.tf @@ -16,20 +16,6 @@ data "google_project" "project" { } -data "kubernetes_service" "inference_service" { - metadata { - name = var.inference_service_name - namespace = var.inference_service_namespace - } -} - -data "kubernetes_secret" "db_secret" { - metadata { - name = var.db_secret_name - namespace = var.db_secret_namespace - } -} - locals { instance_connection_name = format("%s:%s:%s", var.project_id, var.region, var.cloudsql_instance) } @@ -149,7 +135,7 @@ resource "kubernetes_deployment" "rag_frontend_deployment" { env { name = "INFERENCE_ENDPOINT" - value = data.kubernetes_service.inference_service.status.0.load_balancer.0.ingress.0.ip + value = var.inference_service_endpoint } env { @@ -161,7 +147,7 @@ resource "kubernetes_deployment" "rag_frontend_deployment" { name = "DB_USER" value_from { secret_key_ref { - name = data.kubernetes_secret.db_secret.metadata.0.name + name = var.db_secret_name key = "username" } } @@ -171,7 +157,7 @@ resource "kubernetes_deployment" "rag_frontend_deployment" { name = "DB_PASSWORD" value_from { secret_key_ref { - name = data.kubernetes_secret.db_secret.metadata.0.name + name = var.db_secret_name key = "password" } } @@ -181,7 +167,7 @@ resource "kubernetes_deployment" "rag_frontend_deployment" { name = "DB_NAME" value_from { secret_key_ref { - name = data.kubernetes_secret.db_secret.metadata.0.name + name = var.db_secret_name key = "database" } } diff --git a/applications/rag/frontend/variables.tf b/applications/rag/frontend/variables.tf index 1ef7d41d6..89dbb6729 100644 --- a/applications/rag/frontend/variables.tf +++ b/applications/rag/frontend/variables.tf @@ -51,15 +51,9 @@ variable "dataset_embeddings_table_name" { description = "Name of the table that stores vector embeddings for input dataset" } -variable "inference_service_name" { - type = string - description = "Model inference k8s service name" -} - -variable "inference_service_namespace" { +variable "inference_service_endpoint" { type = string description = "Model inference k8s service endpoint" - default = "rag" } variable "create_service_account" { diff --git a/applications/rag/frontend/versions.tf b/applications/rag/frontend/versions.tf index 1ae1daebc..5bdbf99bc 100644 --- a/applications/rag/frontend/versions.tf +++ b/applications/rag/frontend/versions.tf @@ -20,5 +20,8 @@ terraform { kubernetes = { source = "hashicorp/kubernetes" } + helm = { + source = "hashicorp/helm" + } } } \ No newline at end of file diff --git a/applications/rag/main.tf b/applications/rag/main.tf index 2b30fc17e..74b8abbcf 100644 --- a/applications/rag/main.tf +++ b/applications/rag/main.tf @@ -88,6 +88,12 @@ provider "helm" { } } +module "namespace" { + source = "../../modules/kubernetes-namespace" + providers = { helm = helm.rag} + create_namespace = true + namespace = var.kubernetes_namespace +} module "kuberay-operator" { source = "../../modules/kuberay-operator" @@ -114,7 +120,7 @@ module "cloudsql" { project_id = var.project_id instance_name = var.cloudsql_instance namespace = var.kubernetes_namespace - depends_on = [module.kuberay-operator] + depends_on = [module.namespace] } module "jupyterhub" { @@ -142,14 +148,14 @@ module "jupyterhub" { url_domain_name = var.jupyter_url_domain_name members_allowlist = var.jupyter_members_allowlist - depends_on = [module.kuberay-operator, module.gcs] + depends_on = [module.namespace, module.gcs] } module "kuberay-logging" { source = "../../modules/kuberay-logging" providers = { kubernetes = kubernetes.rag } namespace = var.kubernetes_namespace - depends_on = [module.kuberay-operator] + depends_on = [module.namespace] } module "kuberay-cluster" { @@ -164,7 +170,7 @@ module "kuberay-cluster" { autopilot_cluster = local.enable_autopilot google_service_account = var.ray_service_account grafana_host = module.kuberay-monitoring.grafana_uri - depends_on = [module.kuberay-operator, module.kuberay-monitoring] + depends_on = [module.kuberay-operator] } module "kuberay-monitoring" { @@ -175,7 +181,6 @@ module "kuberay-monitoring" { create_namespace = true enable_grafana_on_ray_dashboard = var.enable_grafana_on_ray_dashboard k8s_service_account = var.ray_service_account - depends_on = [module.kuberay-operator] } module "inference-server" { @@ -183,7 +188,7 @@ module "inference-server" { providers = { kubernetes = kubernetes.rag } namespace = var.kubernetes_namespace autopilot_cluster = local.enable_autopilot - depends_on = [module.kuberay-operator] + depends_on = [module.namespace] } module "frontend" { @@ -193,8 +198,7 @@ module "frontend" { create_service_account = var.create_rag_service_account google_service_account = var.rag_service_account namespace = var.kubernetes_namespace - inference_service_name = module.inference-server.inference_service_name - inference_service_namespace = module.inference-server.inference_service_namespace + inference_service_endpoint = module.inference-server.inference_service_endpoint db_secret_name = module.cloudsql.db_secret_name db_secret_namespace = module.cloudsql.db_secret_namespace dataset_embeddings_table_name = var.dataset_embeddings_table_name @@ -214,5 +218,5 @@ module "frontend" { url_domain_addr = var.frontend_url_domain_addr url_domain_name = var.frontend_url_domain_name members_allowlist = var.frontend_members_allowlist - depends_on = [ module.gcs ] + depends_on = [ module.namespace ] } diff --git a/applications/rag/variables.tf b/applications/rag/variables.tf index 66c511d4b..cd746e6e2 100644 --- a/applications/rag/variables.tf +++ b/applications/rag/variables.tf @@ -347,7 +347,7 @@ variable "gpu_pools" { { name = "gpu-pool-l4" machine_type = "g2-standard-24" - node_locations = "us-central1-a,us-central1-b" + node_locations = "us-central1-a" autoscaling = true min_count = 1 max_count = 3 diff --git a/applications/rag/workloads.tfvars b/applications/rag/workloads.tfvars index 341ab60ba..aba62feae 100644 --- a/applications/rag/workloads.tfvars +++ b/applications/rag/workloads.tfvars @@ -24,6 +24,7 @@ kubernetes_namespace = "rag" create_gcs_bucket = true gcs_bucket = "rag-data-xyzu" # Choose a globally unique bucket name. +cloudsql_instance = "pgvector-instance" ## Service accounts # Creates a google service account & k8s service account & configures workload identity with appropriate permissions. # Set to false & update the variable `ray_service_account` to use an existing IAM service account. diff --git a/modules/jupyter/main.tf b/modules/jupyter/main.tf index 60bf665e0..46c857ffb 100644 --- a/modules/jupyter/main.tf +++ b/modules/jupyter/main.tf @@ -16,13 +16,6 @@ data "google_project" "project" { project_id = var.project_id } -# create namespace -module "namespace" { - source = "../../modules/kubernetes-namespace" - namespace = var.namespace - create_namespace = true -} - # Creates a "Brand", equivalent to the OAuth consent screen on Cloud console resource "google_iap_brand" "project_brand" { count = var.add_auth && var.brand == "" ? 1 : 0 diff --git a/modules/kuberay-monitoring/main.tf b/modules/kuberay-monitoring/main.tf index 28eec5bde..46e627058 100644 --- a/modules/kuberay-monitoring/main.tf +++ b/modules/kuberay-monitoring/main.tf @@ -47,6 +47,7 @@ resource "helm_release" "grafana" { } data "kubernetes_service" "example" { + count = var.enable_grafana_on_ray_dashboard ? 1 : 0 metadata { name = "grafana" namespace = var.namespace diff --git a/modules/kuberay-monitoring/outputs.tf b/modules/kuberay-monitoring/outputs.tf index e23424827..0244d8392 100644 --- a/modules/kuberay-monitoring/outputs.tf +++ b/modules/kuberay-monitoring/outputs.tf @@ -13,6 +13,6 @@ # limitations under the License. output "grafana_uri" { - value = data.kubernetes_service.example.status != null ? (data.kubernetes_service.example.status[0].load_balancer != null ? "${data.kubernetes_service.example.status[0].load_balancer[0].ingress[0].ip}" : "") : "" + value = var.enable_grafana_on_ray_dashboard ? (data.kubernetes_service.example[0].status != null ? (data.kubernetes_service.example[0].status[0].load_balancer != null ? "${data.kubernetes_service.example[0].status[0].load_balancer[0].ingress[0].ip}" : "") : "") : "" } diff --git a/modules/kubernetes-namespace/main.tf b/modules/kubernetes-namespace/main.tf index a4a9ab45b..9238e1982 100644 --- a/modules/kubernetes-namespace/main.tf +++ b/modules/kubernetes-namespace/main.tf @@ -13,8 +13,8 @@ # limitations under the License. # Helm Chart -resource "helm_release" "namespace" { - name = "namespace" +resource "helm_release" "app-namespace" { + name = "app-namespace" chart = "${path.module}/charts/namespace/" namespace = var.namespace create_namespace = var.create_namespace diff --git a/tutorials/hf-tgi/outputs.tf b/tutorials/hf-tgi/outputs.tf index e9aca67f9..3816613c4 100644 --- a/tutorials/hf-tgi/outputs.tf +++ b/tutorials/hf-tgi/outputs.tf @@ -20,4 +20,9 @@ output "inference_service_name" { output "inference_service_namespace" { description = "Namespace of model inference service" value = kubernetes_service.inference_service.metadata[0].namespace -} \ No newline at end of file +} + +output "inference_service_endpoint" { + description = "Endpoint of model inference service" + value = kubernetes_service.inference_service.status != null ? (kubernetes_service.inference_service.status[0].load_balancer != null ? "${kubernetes_service.inference_service.status[0].load_balancer[0].ingress[0].ip}" : "") : "" +}