Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into hpa_custommetrics
Browse files Browse the repository at this point in the history
  • Loading branch information
rsgowman committed Mar 5, 2024
2 parents fdb506a + 9799818 commit e45392a
Show file tree
Hide file tree
Showing 12 changed files with 41 additions and 47 deletions.
9 changes: 8 additions & 1 deletion applications/jupyter/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ module "gcs" {
bucket_name = var.gcs_bucket
}

# create namespace
module "namespace" {
source = "../../modules/kubernetes-namespace"
namespace = var.namespace
create_namespace = true
}

# Creates jupyterhub
module "jupyterhub" {
source = "../../modules/jupyter"
Expand All @@ -94,5 +101,5 @@ module "jupyterhub" {
url_domain_addr = var.url_domain_addr
url_domain_name = var.url_domain_name
members_allowlist = var.members_allowlist
depends_on = [module.gcs]
depends_on = [module.gcs, module.namespace]
}
22 changes: 4 additions & 18 deletions applications/rag/frontend/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,6 @@ data "google_project" "project" {
}


data "kubernetes_service" "inference_service" {
metadata {
name = var.inference_service_name
namespace = var.inference_service_namespace
}
}

data "kubernetes_secret" "db_secret" {
metadata {
name = var.db_secret_name
namespace = var.db_secret_namespace
}
}

locals {
instance_connection_name = format("%s:%s:%s", var.project_id, var.region, var.cloudsql_instance)
}
Expand Down Expand Up @@ -149,7 +135,7 @@ resource "kubernetes_deployment" "rag_frontend_deployment" {

env {
name = "INFERENCE_ENDPOINT"
value = data.kubernetes_service.inference_service.status.0.load_balancer.0.ingress.0.ip
value = var.inference_service_endpoint
}

env {
Expand All @@ -161,7 +147,7 @@ resource "kubernetes_deployment" "rag_frontend_deployment" {
name = "DB_USER"
value_from {
secret_key_ref {
name = data.kubernetes_secret.db_secret.metadata.0.name
name = var.db_secret_name
key = "username"
}
}
Expand All @@ -171,7 +157,7 @@ resource "kubernetes_deployment" "rag_frontend_deployment" {
name = "DB_PASSWORD"
value_from {
secret_key_ref {
name = data.kubernetes_secret.db_secret.metadata.0.name
name = var.db_secret_name
key = "password"
}
}
Expand All @@ -181,7 +167,7 @@ resource "kubernetes_deployment" "rag_frontend_deployment" {
name = "DB_NAME"
value_from {
secret_key_ref {
name = data.kubernetes_secret.db_secret.metadata.0.name
name = var.db_secret_name
key = "database"
}
}
Expand Down
8 changes: 1 addition & 7 deletions applications/rag/frontend/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,9 @@ variable "dataset_embeddings_table_name" {
description = "Name of the table that stores vector embeddings for input dataset"
}

variable "inference_service_name" {
type = string
description = "Model inference k8s service name"
}

variable "inference_service_namespace" {
variable "inference_service_endpoint" {
type = string
description = "Model inference k8s service endpoint"
default = "rag"
}

variable "create_service_account" {
Expand Down
3 changes: 3 additions & 0 deletions applications/rag/frontend/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,8 @@ terraform {
kubernetes = {
source = "hashicorp/kubernetes"
}
helm = {
source = "hashicorp/helm"
}
}
}
22 changes: 13 additions & 9 deletions applications/rag/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ provider "helm" {
}
}

module "namespace" {
source = "../../modules/kubernetes-namespace"
providers = { helm = helm.rag}
create_namespace = true
namespace = var.kubernetes_namespace
}

module "kuberay-operator" {
source = "../../modules/kuberay-operator"
Expand All @@ -114,7 +120,7 @@ module "cloudsql" {
project_id = var.project_id
instance_name = var.cloudsql_instance
namespace = var.kubernetes_namespace
depends_on = [module.kuberay-operator]
depends_on = [module.namespace]
}

module "jupyterhub" {
Expand Down Expand Up @@ -142,14 +148,14 @@ module "jupyterhub" {
url_domain_name = var.jupyter_url_domain_name
members_allowlist = var.jupyter_members_allowlist

depends_on = [module.kuberay-operator, module.gcs]
depends_on = [module.namespace, module.gcs]
}

module "kuberay-logging" {
source = "../../modules/kuberay-logging"
providers = { kubernetes = kubernetes.rag }
namespace = var.kubernetes_namespace
depends_on = [module.kuberay-operator]
depends_on = [module.namespace]
}

module "kuberay-cluster" {
Expand All @@ -164,7 +170,7 @@ module "kuberay-cluster" {
autopilot_cluster = local.enable_autopilot
google_service_account = var.ray_service_account
grafana_host = module.kuberay-monitoring.grafana_uri
depends_on = [module.kuberay-operator, module.kuberay-monitoring]
depends_on = [module.kuberay-operator]
}

module "kuberay-monitoring" {
Expand All @@ -175,15 +181,14 @@ module "kuberay-monitoring" {
create_namespace = true
enable_grafana_on_ray_dashboard = var.enable_grafana_on_ray_dashboard
k8s_service_account = var.ray_service_account
depends_on = [module.kuberay-operator]
}

module "inference-server" {
source = "../../tutorials/hf-tgi"
providers = { kubernetes = kubernetes.rag }
namespace = var.kubernetes_namespace
autopilot_cluster = local.enable_autopilot
depends_on = [module.kuberay-operator]
depends_on = [module.namespace]
}

module "frontend" {
Expand All @@ -193,8 +198,7 @@ module "frontend" {
create_service_account = var.create_rag_service_account
google_service_account = var.rag_service_account
namespace = var.kubernetes_namespace
inference_service_name = module.inference-server.inference_service_name
inference_service_namespace = module.inference-server.inference_service_namespace
inference_service_endpoint = module.inference-server.inference_service_endpoint
db_secret_name = module.cloudsql.db_secret_name
db_secret_namespace = module.cloudsql.db_secret_namespace
dataset_embeddings_table_name = var.dataset_embeddings_table_name
Expand All @@ -214,5 +218,5 @@ module "frontend" {
url_domain_addr = var.frontend_url_domain_addr
url_domain_name = var.frontend_url_domain_name
members_allowlist = var.frontend_members_allowlist
depends_on = [ module.gcs ]
depends_on = [ module.namespace ]
}
2 changes: 1 addition & 1 deletion applications/rag/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ variable "gpu_pools" {
{
name = "gpu-pool-l4"
machine_type = "g2-standard-24"
node_locations = "us-central1-a,us-central1-b"
node_locations = "us-central1-a"
autoscaling = true
min_count = 1
max_count = 3
Expand Down
1 change: 1 addition & 0 deletions applications/rag/workloads.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ kubernetes_namespace = "rag"
create_gcs_bucket = true
gcs_bucket = "rag-data-xyzu" # Choose a globally unique bucket name.

cloudsql_instance = "pgvector-instance"
## Service accounts
# Creates a google service account & k8s service account & configures workload identity with appropriate permissions.
# Set to false & update the variable `ray_service_account` to use an existing IAM service account.
Expand Down
7 changes: 0 additions & 7 deletions modules/jupyter/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,6 @@ data "google_project" "project" {
project_id = var.project_id
}

# create namespace
module "namespace" {
source = "../../modules/kubernetes-namespace"
namespace = var.namespace
create_namespace = true
}

# Creates a "Brand", equivalent to the OAuth consent screen on Cloud console
resource "google_iap_brand" "project_brand" {
count = var.add_auth && var.brand == "" ? 1 : 0
Expand Down
1 change: 1 addition & 0 deletions modules/kuberay-monitoring/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ resource "helm_release" "grafana" {
}

data "kubernetes_service" "example" {
count = var.enable_grafana_on_ray_dashboard ? 1 : 0
metadata {
name = "grafana"
namespace = var.namespace
Expand Down
2 changes: 1 addition & 1 deletion modules/kuberay-monitoring/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
# limitations under the License.

output "grafana_uri" {
value = data.kubernetes_service.example.status != null ? (data.kubernetes_service.example.status[0].load_balancer != null ? "${data.kubernetes_service.example.status[0].load_balancer[0].ingress[0].ip}" : "") : ""
value = var.enable_grafana_on_ray_dashboard ? (data.kubernetes_service.example[0].status != null ? (data.kubernetes_service.example[0].status[0].load_balancer != null ? "${data.kubernetes_service.example[0].status[0].load_balancer[0].ingress[0].ip}" : "") : "") : ""
}

4 changes: 2 additions & 2 deletions modules/kubernetes-namespace/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
# limitations under the License.

# Helm Chart
resource "helm_release" "namespace" {
name = "namespace"
resource "helm_release" "app-namespace" {
name = "app-namespace"
chart = "${path.module}/charts/namespace/"
namespace = var.namespace
create_namespace = var.create_namespace
Expand Down
7 changes: 6 additions & 1 deletion tutorials/hf-tgi/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,9 @@ output "inference_service_name" {
output "inference_service_namespace" {
description = "Namespace of model inference service"
value = kubernetes_service.inference_service.metadata[0].namespace
}
}

output "inference_service_endpoint" {
description = "Endpoint of model inference service"
value = kubernetes_service.inference_service.status != null ? (kubernetes_service.inference_service.status[0].load_balancer != null ? "${kubernetes_service.inference_service.status[0].load_balancer[0].ingress[0].ip}" : "") : ""
}

0 comments on commit e45392a

Please sign in to comment.