Skip to content

Commit

Permalink
Merge branch 'main' into bump-tgi
Browse files Browse the repository at this point in the history
  • Loading branch information
laoj2 authored Mar 6, 2024
2 parents 9f3effa + 6b0f717 commit afb8aed
Show file tree
Hide file tree
Showing 26 changed files with 79 additions and 53 deletions.
9 changes: 8 additions & 1 deletion applications/jupyter/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,13 @@ module "gcs" {
bucket_name = var.gcs_bucket
}

# create namespace
module "namespace" {
source = "../../modules/kubernetes-namespace"
namespace = var.namespace
create_namespace = true
}

# Creates jupyterhub
module "jupyterhub" {
source = "../../modules/jupyter"
Expand All @@ -94,5 +101,5 @@ module "jupyterhub" {
url_domain_addr = var.url_domain_addr
url_domain_name = var.url_domain_name
members_allowlist = var.members_allowlist
depends_on = [module.gcs]
depends_on = [module.gcs, module.namespace]
}
22 changes: 4 additions & 18 deletions applications/rag/frontend/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,6 @@ data "google_project" "project" {
}


data "kubernetes_service" "inference_service" {
metadata {
name = var.inference_service_name
namespace = var.inference_service_namespace
}
}

data "kubernetes_secret" "db_secret" {
metadata {
name = var.db_secret_name
namespace = var.db_secret_namespace
}
}

locals {
instance_connection_name = format("%s:%s:%s", var.project_id, var.region, var.cloudsql_instance)
}
Expand Down Expand Up @@ -149,7 +135,7 @@ resource "kubernetes_deployment" "rag_frontend_deployment" {

env {
name = "INFERENCE_ENDPOINT"
value = data.kubernetes_service.inference_service.status.0.load_balancer.0.ingress.0.ip
value = var.inference_service_endpoint
}

env {
Expand All @@ -161,7 +147,7 @@ resource "kubernetes_deployment" "rag_frontend_deployment" {
name = "DB_USER"
value_from {
secret_key_ref {
name = data.kubernetes_secret.db_secret.metadata.0.name
name = var.db_secret_name
key = "username"
}
}
Expand All @@ -171,7 +157,7 @@ resource "kubernetes_deployment" "rag_frontend_deployment" {
name = "DB_PASSWORD"
value_from {
secret_key_ref {
name = data.kubernetes_secret.db_secret.metadata.0.name
name = var.db_secret_name
key = "password"
}
}
Expand All @@ -181,7 +167,7 @@ resource "kubernetes_deployment" "rag_frontend_deployment" {
name = "DB_NAME"
value_from {
secret_key_ref {
name = data.kubernetes_secret.db_secret.metadata.0.name
name = var.db_secret_name
key = "database"
}
}
Expand Down
8 changes: 1 addition & 7 deletions applications/rag/frontend/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,9 @@ variable "dataset_embeddings_table_name" {
description = "Name of the table that stores vector embeddings for input dataset"
}

variable "inference_service_name" {
type = string
description = "Model inference k8s service name"
}

variable "inference_service_namespace" {
variable "inference_service_endpoint" {
type = string
description = "Model inference k8s service endpoint"
default = "rag"
}

variable "create_service_account" {
Expand Down
3 changes: 3 additions & 0 deletions applications/rag/frontend/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,8 @@ terraform {
kubernetes = {
source = "hashicorp/kubernetes"
}
helm = {
source = "hashicorp/helm"
}
}
}
22 changes: 13 additions & 9 deletions applications/rag/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ provider "helm" {
}
}

module "namespace" {
source = "../../modules/kubernetes-namespace"
providers = { helm = helm.rag}
create_namespace = true
namespace = var.kubernetes_namespace
}

module "kuberay-operator" {
source = "../../modules/kuberay-operator"
Expand All @@ -114,7 +120,7 @@ module "cloudsql" {
project_id = var.project_id
instance_name = var.cloudsql_instance
namespace = var.kubernetes_namespace
depends_on = [module.kuberay-operator]
depends_on = [module.namespace]
}

module "jupyterhub" {
Expand Down Expand Up @@ -142,14 +148,14 @@ module "jupyterhub" {
url_domain_name = var.jupyter_url_domain_name
members_allowlist = var.jupyter_members_allowlist

depends_on = [module.kuberay-operator, module.gcs]
depends_on = [module.namespace, module.gcs]
}

module "kuberay-logging" {
source = "../../modules/kuberay-logging"
providers = { kubernetes = kubernetes.rag }
namespace = var.kubernetes_namespace
depends_on = [module.kuberay-operator]
depends_on = [module.namespace]
}

module "kuberay-cluster" {
Expand All @@ -164,7 +170,7 @@ module "kuberay-cluster" {
autopilot_cluster = local.enable_autopilot
google_service_account = var.ray_service_account
grafana_host = module.kuberay-monitoring.grafana_uri
depends_on = [module.kuberay-operator, module.kuberay-monitoring]
depends_on = [module.kuberay-operator]
}

module "kuberay-monitoring" {
Expand All @@ -175,15 +181,14 @@ module "kuberay-monitoring" {
create_namespace = true
enable_grafana_on_ray_dashboard = var.enable_grafana_on_ray_dashboard
k8s_service_account = var.ray_service_account
depends_on = [module.kuberay-operator]
}

module "inference-server" {
source = "../../tutorials/hf-tgi"
providers = { kubernetes = kubernetes.rag }
namespace = var.kubernetes_namespace
autopilot_cluster = local.enable_autopilot
depends_on = [module.kuberay-operator]
depends_on = [module.namespace]
}

module "frontend" {
Expand All @@ -193,8 +198,7 @@ module "frontend" {
create_service_account = var.create_rag_service_account
google_service_account = var.rag_service_account
namespace = var.kubernetes_namespace
inference_service_name = module.inference-server.inference_service_name
inference_service_namespace = module.inference-server.inference_service_namespace
inference_service_endpoint = module.inference-server.inference_service_endpoint
db_secret_name = module.cloudsql.db_secret_name
db_secret_namespace = module.cloudsql.db_secret_namespace
dataset_embeddings_table_name = var.dataset_embeddings_table_name
Expand All @@ -214,5 +218,5 @@ module "frontend" {
url_domain_addr = var.frontend_url_domain_addr
url_domain_name = var.frontend_url_domain_name
members_allowlist = var.frontend_members_allowlist
depends_on = [ module.gcs ]
depends_on = [ module.namespace ]
}
2 changes: 1 addition & 1 deletion applications/rag/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ variable "gpu_pools" {
{
name = "gpu-pool-l4"
machine_type = "g2-standard-24"
node_locations = "us-central1-a,us-central1-b"
node_locations = "us-central1-a"
autoscaling = true
min_count = 1
max_count = 3
Expand Down
1 change: 1 addition & 0 deletions applications/rag/workloads.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ kubernetes_namespace = "rag"
create_gcs_bucket = true
gcs_bucket = "rag-data-xyzu" # Choose a globally unique bucket name.

cloudsql_instance = "pgvector-instance"
## Service accounts
# Creates a google service account & k8s service account & configures workload identity with appropriate permissions.
# Set to false & update the variable `ray_service_account` to use an existing IAM service account.
Expand Down
4 changes: 4 additions & 0 deletions infrastructure/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ module "public-gke-standard-cluster" {
cluster_name = var.cluster_name
cluster_labels = var.cluster_labels
kubernetes_version = var.kubernetes_version
release_channel = var.release_channel
cluster_region = var.cluster_region
cluster_zones = var.cluster_zones
ip_range_pods = var.ip_range_pods
Expand Down Expand Up @@ -93,6 +94,7 @@ module "public-gke-autopilot-cluster" {
cluster_name = var.cluster_name
cluster_labels = var.cluster_labels
kubernetes_version = var.kubernetes_version
release_channel = var.release_channel
cluster_region = var.cluster_region
cluster_zones = var.cluster_zones
ip_range_pods = var.ip_range_pods
Expand All @@ -117,6 +119,7 @@ module "private-gke-standard-cluster" {
cluster_name = var.cluster_name
cluster_labels = var.cluster_labels
kubernetes_version = var.kubernetes_version
release_channel = var.release_channel
cluster_region = var.cluster_region
cluster_zones = var.cluster_zones
ip_range_pods = var.ip_range_pods
Expand Down Expand Up @@ -154,6 +157,7 @@ module "private-gke-autopilot-cluster" {
cluster_name = var.cluster_name
cluster_labels = var.cluster_labels
kubernetes_version = var.kubernetes_version
release_channel = var.release_channel
cluster_region = var.cluster_region
cluster_zones = var.cluster_zones
ip_range_pods = var.ip_range_pods
Expand Down
5 changes: 5 additions & 0 deletions infrastructure/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ variable "kubernetes_version" {
default = "latest"
}

variable "release_channel" {
type = string
default = "RAPID"
}

variable "cluster_region" {
type = string
}
Expand Down
1 change: 1 addition & 0 deletions modules/gke-autopilot-private-cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
| <a name="input_network_name"></a> [network\_name](#input\_network\_name) | # network variables | `string` | n/a | yes |
| <a name="input_project_id"></a> [project\_id](#input\_project\_id) | GCP project id | `string` | n/a | yes |
| <a name="input_region"></a> [region](#input\_region) | GCP project region or zone | `string` | `"us-central1"` | no |
| <a name="input_release_channel"></a> [release\_channel](#input\_release\_channel) | n/a | `string` | n/a | yes |
| <a name="input_subnetwork_name"></a> [subnetwork\_name](#input\_subnetwork\_name) | n/a | `string` | n/a | yes |

## Outputs
Expand Down
1 change: 1 addition & 0 deletions modules/gke-autopilot-private-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ module "gke" {
name = var.cluster_name
cluster_resource_labels = var.cluster_labels
kubernetes_version = var.kubernetes_version
release_channel = var.release_channel
region = var.cluster_region
zones = var.cluster_zones
network = var.network_name
Expand Down
4 changes: 4 additions & 0 deletions modules/gke-autopilot-private-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ variable "kubernetes_version" {
type = string
}

variable "release_channel" {
type = string
}

variable "cluster_regional" {
type = bool
}
Expand Down
3 changes: 2 additions & 1 deletion modules/gke-autopilot-public-cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ No resources.
| <a name="input_network_name"></a> [network\_name](#input\_network\_name) | # network variables | `string` | n/a | yes |
| <a name="input_project_id"></a> [project\_id](#input\_project\_id) | GCP project id | `string` | n/a | yes |
| <a name="input_region"></a> [region](#input\_region) | GCP project region or zone | `string` | `"us-central1"` | no |
| <a name="input_release_channel"></a> [release\_channel](#input\_release\_channel) | n/a | `string` | n/a | yes |
| <a name="input_subnetwork_name"></a> [subnetwork\_name](#input\_subnetwork\_name) | n/a | `string` | n/a | yes |

## Outputs

| Name | Description |
|------|-------------|
| <a name="output_cluster"></a> [cluster](#output\_cluster) | n/a |
| <a name="output_cluster"></a> [cluster](#output\_cluster) | n/a |
2 changes: 1 addition & 1 deletion modules/gke-autopilot-public-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ module "gke" {
cluster_resource_labels = var.cluster_labels
region = var.cluster_region
kubernetes_version = var.kubernetes_version
release_channel = var.release_channel
zones = var.cluster_zones
network = var.network_name
subnetwork = var.subnetwork_name
ip_range_pods = var.ip_range_pods
ip_range_services = var.ip_range_services
master_authorized_networks = var.master_authorized_networks
deletion_protection = var.deletion_protection

}
6 changes: 5 additions & 1 deletion modules/gke-autopilot-public-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ variable "kubernetes_version" {
type = string
}

variable "release_channel" {
type = string
}

variable "cluster_region" {
type = string
}
Expand All @@ -74,4 +78,4 @@ variable "master_authorized_networks" {
variable "deletion_protection" {
type = bool
default = false
}
}
3 changes: 2 additions & 1 deletion modules/gke-standard-private-cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,12 @@
| <a name="input_network_name"></a> [network\_name](#input\_network\_name) | # network variables | `string` | n/a | yes |
| <a name="input_project_id"></a> [project\_id](#input\_project\_id) | GCP project id | `string` | n/a | yes |
| <a name="input_region"></a> [region](#input\_region) | GCP project region or zone | `string` | `"us-central1"` | no |
| <a name="input_release_channel"></a> [release\_channel](#input\_release\_channel) | n/a | `string` | n/a | yes |
| <a name="input_subnetwork_name"></a> [subnetwork\_name](#input\_subnetwork\_name) | n/a | `string` | n/a | yes |
| <a name="input_tpu_pools"></a> [tpu\_pools](#input\_tpu\_pools) | n/a | `list(map(any))` | n/a | yes |

## Outputs

| Name | Description |
|------|-------------|
| <a name="output_cluster"></a> [cluster](#output\_cluster) | n/a |
| <a name="output_cluster"></a> [cluster](#output\_cluster) | n/a |
1 change: 1 addition & 0 deletions modules/gke-standard-private-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ module "gke" {
name = var.cluster_name
cluster_resource_labels = var.cluster_labels
kubernetes_version = var.kubernetes_version
release_channel = var.release_channel
region = var.cluster_region
zones = var.cluster_zones
network = var.network_name
Expand Down
4 changes: 4 additions & 0 deletions modules/gke-standard-private-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ variable "kubernetes_version" {
type = string
}

variable "release_channel" {
type = string
}

variable "cluster_regional" {
type = bool
}
Expand Down
3 changes: 2 additions & 1 deletion modules/gke-standard-public-cluster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@ No resources.
| <a name="input_network_name"></a> [network\_name](#input\_network\_name) | # network variables | `string` | n/a | yes |
| <a name="input_project_id"></a> [project\_id](#input\_project\_id) | GCP project id | `string` | n/a | yes |
| <a name="input_region"></a> [region](#input\_region) | GCP project region or zone | `string` | `"us-central1"` | no |
| <a name="input_release_channel"></a> [release\_channel](#input\_release\_channel) | n/a | `string` | n/a | yes |
| <a name="input_subnetwork_name"></a> [subnetwork\_name](#input\_subnetwork\_name) | n/a | `string` | n/a | yes |
| <a name="input_tpu_pools"></a> [tpu\_pools](#input\_tpu\_pools) | n/a | `list(map(any))` | n/a | yes |

## Outputs

| Name | Description |
|------|-------------|
| <a name="output_cluster"></a> [cluster](#output\_cluster) | n/a |
| <a name="output_cluster"></a> [cluster](#output\_cluster) | n/a |
1 change: 1 addition & 0 deletions modules/gke-standard-public-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ module "gke" {
cluster_resource_labels = var.cluster_labels
region = var.cluster_region
kubernetes_version = var.kubernetes_version
release_channel = var.release_channel
zones = var.cluster_zones
network = var.network_name
subnetwork = var.subnetwork_name
Expand Down
6 changes: 5 additions & 1 deletion modules/gke-standard-public-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ variable "kubernetes_version" {
type = string
}

variable "release_channel" {
type = string
}

variable "cluster_region" {
type = string
}
Expand Down Expand Up @@ -117,4 +121,4 @@ variable "gpu_pools" {

variable "tpu_pools" {
type = list(map(any))
}
}
Loading

0 comments on commit afb8aed

Please sign in to comment.