Skip to content

Commit

Permalink
Updating branch with main
Browse files Browse the repository at this point in the history
  • Loading branch information
german-grandas committed Sep 18, 2024
2 parents 015d3ff + 0f771a1 commit 8f48578
Show file tree
Hide file tree
Showing 240 changed files with 163,421 additions and 1,592 deletions.
2 changes: 1 addition & 1 deletion applications/rag/frontend/container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM python:3.12.2
FROM python:3.12.4

ADD ./ /workspace/frontend
WORKDIR /workspace/frontend
Expand Down
4 changes: 2 additions & 2 deletions applications/rag/frontend/container/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

Flask==3.0.0
gunicorn==22.0.0
Werkzeug==3.0.1
Werkzeug==3.0.3
langchain==0.1.9
sentence-transformers==2.5.1
text_generation==0.6.1
Expand All @@ -30,5 +30,5 @@ google==3.0.0
google-cloud==0.34.0
google-cloud-logging==3.9.0
google-api-python-client==2.114.0
pymysql==1.1.0
pymysql==1.1.1
cloud-sql-python-connector[pg8000]==1.7.0
2 changes: 1 addition & 1 deletion applications/rag/frontend/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ resource "kubernetes_deployment" "rag_frontend_deployment" {
spec {
service_account_name = var.google_service_account
container {
image = "us-central1-docker.pkg.dev/ai-on-gke/rag-on-gke/frontend@sha256:d65b538742ee29826ee629cfe05c0008e7c09ce5357ddc08ea2eaf3fd6cefe4b"
image = "us-central1-docker.pkg.dev/ai-on-gke/rag-on-gke/frontend@sha256:ec0e7b1ce6d0f9570957dd7fb3dcf0a16259cba915570846b356a17d6e377c59"
name = "rag-frontend"

port {
Expand Down
109 changes: 59 additions & 50 deletions applications/rag/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -64,22 +64,22 @@ module "infra" {
source = "../../infrastructure"
count = var.create_cluster ? 1 : 0

project_id = var.project_id
cluster_name = local.cluster_name
cluster_location = var.cluster_location
region = local.cluster_location_region
autopilot_cluster = var.autopilot_cluster
private_cluster = var.private_cluster
create_network = var.create_network
network_name = local.network_name
subnetwork_name = local.network_name
subnetwork_cidr = var.subnetwork_cidr
subnetwork_region = local.cluster_location_region
cpu_pools = var.cpu_pools
enable_gpu = true
gpu_pools = var.gpu_pools
kubernetes_version = var.kubernetes_version
depends_on = [module.project-services]
project_id = var.project_id
cluster_name = local.cluster_name
cluster_location = var.cluster_location
region = local.cluster_location_region
autopilot_cluster = var.autopilot_cluster
private_cluster = var.private_cluster
create_network = var.create_network
network_name = local.network_name
subnetwork_name = local.network_name
subnetwork_cidr = var.subnetwork_cidr
subnetwork_region = local.cluster_location_region
cpu_pools = var.cpu_pools
enable_gpu = true
gpu_pools = var.gpu_pools
ray_addon_enabled = true
depends_on = [module.project-services]
}

data "google_container_cluster" "default" {
Expand Down Expand Up @@ -152,18 +152,6 @@ module "namespace" {
namespace = local.kubernetes_namespace
}

module "kuberay-operator" {
source = "../../modules/kuberay-operator"
providers = { helm = helm.rag, kubernetes = kubernetes.rag }
name = "kuberay-operator"
project_id = var.project_id
create_namespace = true
namespace = local.kubernetes_namespace
google_service_account = local.ray_service_account
create_service_account = var.create_ray_service_account
autopilot_cluster = local.enable_autopilot
}

module "gcs" {
source = "../../modules/gcs"
count = var.create_gcs_bucket ? 1 : 0
Expand Down Expand Up @@ -218,11 +206,29 @@ module "jupyterhub" {
depends_on = [module.namespace, module.gcs]
}

module "kuberay-logging" {
source = "../../modules/kuberay-logging"
providers = { kubernetes = kubernetes.rag }
namespace = local.kubernetes_namespace
depends_on = [module.namespace]
module "kuberay-workload-identity" {
providers = { kubernetes = kubernetes.rag }
source = "terraform-google-modules/kubernetes-engine/google//modules/workload-identity"
version = "30.0.0" # Pinning to a previous version as current version (30.1.0) showed inconsitent behaviour with workload identity service accounts
use_existing_gcp_sa = !var.create_ray_service_account
name = local.ray_service_account
namespace = local.kubernetes_namespace
project_id = var.project_id
roles = ["roles/cloudsql.client", "roles/monitoring.viewer"]
automount_service_account_token = true
depends_on = [module.namespace]
}

module "kuberay-monitoring" {
source = "../../modules/kuberay-monitoring"
providers = { helm = helm.rag, kubernetes = kubernetes.rag }
project_id = var.project_id
autopilot_cluster = local.enable_autopilot
namespace = local.kubernetes_namespace
create_namespace = true
enable_grafana_on_ray_dashboard = var.enable_grafana_on_ray_dashboard
k8s_service_account = local.ray_service_account
depends_on = [module.namespace, module.kuberay-workload-identity]
}

module "kuberay-cluster" {
Expand All @@ -233,16 +239,17 @@ module "kuberay-cluster" {
enable_gpu = true
gcs_bucket = var.gcs_bucket
autopilot_cluster = local.enable_autopilot
db_secret_name = module.cloudsql.db_secret_name
cloudsql_instance_name = local.cloudsql_instance
db_region = local.cloudsql_instance_region
google_service_account = local.ray_service_account
grafana_host = module.kuberay-monitoring.grafana_uri
disable_network_policy = var.disable_ray_cluster_network_policy
depends_on = [module.kuberay-operator]
use_custom_image = true
additional_labels = var.additional_labels

# Implicit dependency
db_secret_name = module.cloudsql.db_secret_name
grafana_host = module.kuberay-monitoring.grafana_uri

# IAP Auth parameters
add_auth = var.ray_dashboard_add_auth
create_brand = var.create_brand
Expand All @@ -256,23 +263,11 @@ module "kuberay-cluster" {
k8s_backend_service_port = var.ray_dashboard_k8s_backend_service_port
domain = var.ray_dashboard_domain
members_allowlist = var.ray_dashboard_members_allowlist != "" ? split(",", var.ray_dashboard_members_allowlist) : []
}

module "kuberay-monitoring" {
source = "../../modules/kuberay-monitoring"
providers = { helm = helm.rag, kubernetes = kubernetes.rag }
project_id = var.project_id
autopilot_cluster = local.enable_autopilot
namespace = local.kubernetes_namespace
create_namespace = true
enable_grafana_on_ray_dashboard = var.enable_grafana_on_ray_dashboard
k8s_service_account = local.ray_service_account
# TODO(umeshkumhar): remove kuberay-operator depends, figure out service account dependency
depends_on = [module.namespace, module.kuberay-operator]
depends_on = [module.gcs, module.kuberay-workload-identity]
}

module "inference-server" {
source = "../../tutorials-and-examples/hf-tgi"
source = "../../modules/inference-service"
providers = { kubernetes = kubernetes.rag }
namespace = local.kubernetes_namespace
additional_labels = var.additional_labels
Expand Down Expand Up @@ -310,3 +305,17 @@ module "frontend" {
members_allowlist = var.frontend_members_allowlist != "" ? split(",", var.frontend_members_allowlist) : []
depends_on = [module.namespace]
}

resource "helm_release" "gmp-apps" {
name = "gmp-apps"
provider = helm.rag
chart = "../../charts/gmp-engine/"
namespace = local.kubernetes_namespace
# Timeout is increased to guarantee sufficient scale-up time for Autopilot nodes.
timeout = 1200
depends_on = [module.inference-server, module.frontend]
values = [
"${file("${path.module}/podmonitoring.yaml")}"
]
}

11 changes: 11 additions & 0 deletions applications/rag/podmonitoring.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
podMonitoring:
- name: mistral-7b-instruct
selector:
app: mistral-7b-instruct
port: metrics
interval: 30s
- name: rag-frontend
selector:
app: rag-frontend
port: metrics
interval: 30s
2 changes: 1 addition & 1 deletion applications/rag/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ variable "cluster_location" {

variable "kubernetes_version" {
type = string
default = "1.28"
default = "1.30"
}

variable "kubernetes_namespace" {
Expand Down
34 changes: 14 additions & 20 deletions applications/ray/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ module "infra" {
cpu_pools = var.cpu_pools
enable_gpu = var.enable_gpu
gpu_pools = var.gpu_pools
ray_addon_enabled = true
depends_on = [module.project-services]
}

Expand Down Expand Up @@ -134,24 +135,17 @@ module "namespace" {
namespace = local.kubernetes_namespace
}

module "kuberay-operator" {
source = "../../modules/kuberay-operator"
providers = { helm = helm.ray, kubernetes = kubernetes.ray }
name = "kuberay-operator"
create_namespace = true
namespace = local.kubernetes_namespace
project_id = var.project_id
autopilot_cluster = local.enable_autopilot
google_service_account = local.workload_identity_service_account
create_service_account = var.create_service_account
}

module "kuberay-logging" {
source = "../../modules/kuberay-logging"
providers = { kubernetes = kubernetes.ray }
namespace = local.kubernetes_namespace

depends_on = [module.namespace]
module "kuberay-workload-identity" {
providers = { kubernetes = kubernetes.ray }
source = "terraform-google-modules/kubernetes-engine/google//modules/workload-identity"
version = "30.0.0" # Pinning to a previous version as current version (30.1.0) showed inconsitent behaviour with workload identity service accounts
use_existing_gcp_sa = !var.create_service_account
name = local.workload_identity_service_account
namespace = local.kubernetes_namespace
project_id = var.project_id
roles = ["roles/cloudsql.client", "roles/monitoring.viewer"]
automount_service_account_token = true
depends_on = [module.namespace]
}

module "kuberay-monitoring" {
Expand All @@ -164,7 +158,7 @@ module "kuberay-monitoring" {
create_namespace = true
enable_grafana_on_ray_dashboard = var.enable_grafana_on_ray_dashboard
k8s_service_account = local.workload_identity_service_account
depends_on = [module.kuberay-operator]
depends_on = [module.kuberay-workload-identity]
}

module "gcs" {
Expand Down Expand Up @@ -204,7 +198,7 @@ module "kuberay-cluster" {
k8s_backend_service_port = var.ray_dashboard_k8s_backend_service_port
domain = var.ray_dashboard_domain
members_allowlist = var.ray_dashboard_members_allowlist != "" ? split(",", var.ray_dashboard_members_allowlist) : []
depends_on = [module.gcs, module.kuberay-operator]
depends_on = [module.gcs, module.kuberay-workload-identity]
}


Expand Down
Loading

0 comments on commit 8f48578

Please sign in to comment.