diff --git a/benchmarks/README.md b/benchmarks/README.md index 77c713818..341393709 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -34,7 +34,7 @@ cd infra/stage-1 # Copy the sample variables and update the project ID, cluster name and other parameters as needed in the `terraform.tfvars` file. -cp sample-terraform.tfvars terraform.tfvars +cp ./sample-tfvars/gpu-sample.tfvars terraform.tfvars # Initialize the Terraform modules. terraform init @@ -67,7 +67,7 @@ cd infra/stage-2 # and the project name and bucket name parameters as needed in the # `terraform.tfvars` file. You can specify a new bucket name in which case it # will be created. -cp sample-terraform.tfvars terraform.tfvars +cp ./sample-tfvars/gpu-sample.tfvars terraform.tfvars # Initialize the Terraform modules. terraform init @@ -88,7 +88,7 @@ cd inference-server/text-generation-inference # Copy the sample variables and update the project number and cluster name in # the fleet_host variable "https://connectgateway.googleapis.com/v1/projects//locations/global/gkeMemberships/" # in the `terraform.tfvars` file. -cp sample-terraform.tfvars terraform.tfvars +cp ./sample-tfvars/gpu-sample.tfvars terraform.tfvars # Initialize the Terraform modules. terraform init @@ -120,7 +120,7 @@ cd benchmark/tools/locust-load-inference # Copy the sample variables and update the project number and cluster name in # the fleet_host variable "https://connectgateway.googleapis.com/v1/projects//locations/global/gkeMemberships/" # in the `terraform.tfvars` file. -cp sample-terraform.tfvars terraform.tfvars +cp ./sample-tfvars/tgi-sample.tfvars terraform.tfvars # Initialize the Terraform modules. terraform init diff --git a/benchmarks/benchmark/tools/locust-load-inference/README.md b/benchmarks/benchmark/tools/locust-load-inference/README.md index cbc3f585c..cc3727b9f 100644 --- a/benchmarks/benchmark/tools/locust-load-inference/README.md +++ b/benchmarks/benchmark/tools/locust-load-inference/README.md @@ -37,6 +37,7 @@ The Locust benchmarking tool currently supports these frameworks: - tensorrt_llm_triton - text generation inference (tgi) - vllm +- jetstream ## Instructions @@ -49,7 +50,7 @@ This is my first prompt.\n This is my second prompt.\n ``` -Example prompt datasets are available in the "../../dataset" folder with python scripts and instructions on how to make the dataset available for consumption by this benchmark. The dataset used in the `sample-terraform.tfvars` is the "ShareGPT_v3_unflitered_cleaned_split". +Example prompt datasets are available in the "../../dataset" folder with python scripts and instructions on how to make the dataset available for consumption by this benchmark. The dataset used in the `./sample-tfvars/tgi-sample.tfvars` is the "ShareGPT_v3_unflitered_cleaned_split". You will set the `gcs_path` in your `terraform.tfvars` to this gcs path containing your prompts. @@ -100,10 +101,10 @@ gcloud artifacts repositories create ai-benchmark --location=us-central1 --repos ### Step 6: create and configure terraform.tfvars -Create a `terraform.tfvars` file. `sample-terraform.tfvars` is provided as an example file. You can copy the file as a starting point. Note that at a minimum you will have to change the existing `credentials_config`, `project_id`, and `artifact_registry`. +Create a `terraform.tfvars` file. `./sample-tfvars/tgi-sample.tfvars` is provided as an example file. You can copy the file as a starting point. Note that at a minimum you will have to change the existing `credentials_config`, `project_id`, and `artifact_registry`. ```bash -cp sample-terraform.tfvars terraform.tfvars +cp ./sample-tfvars/tgi-sample.tfvars terraform.tfvars ``` Fill out your `terraform.tfvars` with the desired model and server configuration, referring to the list of required and optional variables [here](#variables). The following variables are required: @@ -265,5 +266,6 @@ To change the benchmark configuration, you will have to rerun terraform destroy | [sax\_model](#input\_sax\_model) | Benchmark server configuration for sax model. Only required if framework is sax. | `string` | `""` | no | | [tokenizer](#input\_tokenizer) | Benchmark server configuration for tokenizer. | `string` | `"tiiuae/falcon-7b"` | yes | | [use\_beam\_search](#input\_use\_beam\_search) | Benchmark server configuration for use beam search. | `bool` | `false` | no | - [huggingface_secret](#input\_huggingface_secret) | Name of the kubectl huggingface secret token | `string` | `huggingface-secret` | no | + [huggingface_secret](#input\_huggingface_secret) | Name of the secret holding the huggingface token. Stored in GCP Secrets Manager. | `string` | `huggingface-secret` | no | + [k8s_hf_secret](#input\_huggingface_secret) | Name of the secret holding the huggingface token. Stored in K8s. Key is expected to be named: `HF_TOKEN`. See [here](https://kubernetes.io/docs/tasks/configmap-secret/managing-secret-using-kubectl/#use-raw-data) for more. | `string` | `huggingface-secret` | no | diff --git a/benchmarks/benchmark/tools/locust-load-inference/main.tf b/benchmarks/benchmark/tools/locust-load-inference/main.tf index 5a8a3b03b..81115fd66 100644 --- a/benchmarks/benchmark/tools/locust-load-inference/main.tf +++ b/benchmarks/benchmark/tools/locust-load-inference/main.tf @@ -47,6 +47,7 @@ locals { tokenizer = var.tokenizer use_beam_search = var.use_beam_search hugging_face_token_secret_list = local.hugging_face_token_secret == null ? [] : [local.hugging_face_token_secret] + k8s_hf_secret_list = var.k8s_hf_secret == null ? [] : [var.k8s_hf_secret] stop_timeout = var.stop_timeout request_type = var.request_type })) : data] diff --git a/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-worker-controller.yaml.tpl b/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-worker-controller.yaml.tpl index b952e4f36..451fe80ae 100644 --- a/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-worker-controller.yaml.tpl +++ b/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-worker-controller.yaml.tpl @@ -48,6 +48,13 @@ spec: - name: USE_BEAM_SEARCH value: ${use_beam_search} %{ for hugging_face_token_secret in hugging_face_token_secret_list ~} + - name: HUGGINGFACE_TOKEN + valueFrom: + secretKeyRef: + name: hf-key + key: HF_TOKEN +%{ endfor ~} +%{ for hf_token in k8s_hf_secret_list ~} - name: HUGGINGFACE_TOKEN valueFrom: secretKeyRef: diff --git a/benchmarks/benchmark/tools/locust-load-inference/sample-tfvars/jetstream-sample.tfvars b/benchmarks/benchmark/tools/locust-load-inference/sample-tfvars/jetstream-sample.tfvars new file mode 100644 index 000000000..d5b3c0dce --- /dev/null +++ b/benchmarks/benchmark/tools/locust-load-inference/sample-tfvars/jetstream-sample.tfvars @@ -0,0 +1,29 @@ +credentials_config = { + fleet_host = "https://connectgateway.googleapis.com/v1/projects/PROJECT_NUMBER/locations/global/gkeMemberships/ai-benchmark" +} + +project_id = "PROJECT_ID" + +namespace = "default" +ksa = "benchmark-sa" +request_type = "grpc" + +k8s_hf_secret = "hf-token" + + +# Locust service configuration +artifact_registry = "REGISTRY_LOCATION" +inference_server_service = "jetstream-svc:9000" +locust_runner_kubernetes_service_account = "sample-runner-sa" +output_bucket = "${PROJECT_ID}-benchmark-output-bucket-01" +gcs_path = "PATH_TO_PROMPT_BUCKET" + +# Benchmark configuration for Locust Docker accessing inference server +inference_server_framework = "jetstream" +tokenizer = "google/gemma-7b" + +# Benchmark configuration for triggering single test via Locust Runner +test_duration = 60 +# Increase test_users to allow more parallelism (especially when testing HPA) +test_users = 1 +test_rate = 5 diff --git a/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars b/benchmarks/benchmark/tools/locust-load-inference/sample-tfvars/tgi-sample.tfvars similarity index 100% rename from benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars rename to benchmarks/benchmark/tools/locust-load-inference/sample-tfvars/tgi-sample.tfvars diff --git a/benchmarks/benchmark/tools/locust-load-inference/variables.tf b/benchmarks/benchmark/tools/locust-load-inference/variables.tf index 4b1155d1d..4b3d3a030 100644 --- a/benchmarks/benchmark/tools/locust-load-inference/variables.tf +++ b/benchmarks/benchmark/tools/locust-load-inference/variables.tf @@ -197,8 +197,16 @@ variable "run_test_automatically" { default = false } +// TODO: add validation to make k8s_hf_secret & hugging_face_secret mutually exclusive once terraform is updated with: https://discuss.hashicorp.com/t/experiment-feedback-input-variable-validation-can-cross-reference-other-objects/66644 +variable "k8s_hf_secret" { + description = "Name of secret for huggingface token; stored in k8s " + type = string + nullable = true + default = null +} + variable "hugging_face_secret" { - description = "name of the kubectl huggingface secret token" + description = "name of the kubectl huggingface secret token; stored in Secret Manager. Security considerations: https://kubernetes.io/docs/concepts/security/secrets-good-practices/" type = string nullable = true default = null diff --git a/benchmarks/inference-server/jetstream/README.md b/benchmarks/inference-server/jetstream/README.md new file mode 100644 index 000000000..032febb62 --- /dev/null +++ b/benchmarks/inference-server/jetstream/README.md @@ -0,0 +1,151 @@ +# AI on GKE Benchmarking for JetStream + +Deploying and benchmarking JetStream on TPU has many similarities with the standard GPU path. But distinct enough differences to warrant a separate readme. If you are familiar with deploying on GPU, much of this should be familiar. For a more detailed understanding of each step. Refer to our primary benchmarking [README](https://github.com/GoogleCloudPlatform/ai-on-gke/tree/main/benchmarks) + +## Pre-requisites +- [kaggle user/token](https://www.kaggle.com/docs/api) +- [huggingface user/token](https://huggingface.co/docs/hub/en/security-tokens) + +### Creating K8s infra + +To create our TPU cluster, run: + +``` +# Stage 1 creates the cluster. +cd infra/stage-1 + +# Copy the sample variables and update the project ID, cluster name and other +parameters as needed in the `terraform.tfvars` file. +cp sample-tfvars/jetstream-sample.tfvars terraform.tfvars + +# Initialize the Terraform modules. +terraform init + +# Run plan to see the changes that will be made. +terraform plan + +# Run apply if the changes look good by confirming the prompt. +terraform apply +``` +To verify that the cluster has been set up correctly, run +``` +# Get credentials using fleet membership +gcloud container fleet memberships get-credentials + +# Run a kubectl command to verify +kubectl get nodes +``` + +## Configure the cluster + +To configure the cluster to run inference workloads we need to set up workload identity and GCS Fuse. +``` +# Stage 2 configures the cluster for running inference workloads. +cd infra/stage-2 + +# Copy the sample variables and update the project number and cluster name in +# the fleet_host variable "https://connectgateway.googleapis.com/v1/projects//locations/global/gkeMemberships/" +# and the project name and bucket name parameters as needed in the +# `terraform.tfvars` file. You can specify a new bucket name in which case it +# will be created. +cp sample-tfvars/jetstream-sample.tfvars terraform.tfvars + +# Initialize the Terraform modules. +terraform init + +# Run plan to see the changes that will be made. +terraform plan + +# Run apply if the changes look good by confirming the prompt. +terraform apply +``` + +### Convert Gemma model weights to maxtext weights + +JetStream has [two engine implementations](https://github.com/google/JetStream?tab=readme-ov-file#jetstream-engine-implementation). A Jax variant (via MaxText) and a Pytorch variant. This guide will use the Jax backend. + +Jetstream currently requires that models be converted to MaxText weights. This example will deploy a Gemma-7b model. Much of this information is similar to this guide [here](https://cloud.google.com/kubernetes-engine/docs/tutorials/serve-gemma-tpu-jetstream#convert-checkpoints). + +*SKIP IF ALREADY COMPLETED* + +Create kaggle secret +``` +kubectl create secret generic kaggle-secret \ + --from-file=kaggle.json +``` + +Replace `model-conversion/kaggle_converter.yaml: GEMMA_BUCKET_NAME` with the correct bucket name where you would like the model to be stored. +***NOTE: If you are using a different bucket that the ones you created give the service account Storage Admin permissions on that bucket. This can be done on the UI or by running: +``` +gcloud projects add-iam-policy-binding PROJECT_ID \ + --member "serviceAccount:SA_NAME@PROJECT_ID.iam.gserviceaccount.com" \ + --role roles/storage.admin +``` + +Run: +``` +kubectl apply -f model-conversion/kaggle_converter.yaml +``` + +This should take ~10 minutes to complete. + +### Deploy JetStream + +Replace the `jetstream.yaml:GEMMA_BUCKET_NAME` with the same bucket name as above. + +Run: +``` +kubectl apply -f jetstream.yaml +``` + +Verify the pod is running with +``` +kubectl get pods +``` + +Get the external IP with: + +``` +kubectl get services +``` + +And you can make a request prompt with: +``` +curl --request POST \ +--header "Content-type: application/json" \ +-s \ +JETSTREAM_EXTERNAL_IP:8000/generate \ +--data \ +'{ + "prompt": "What is a TPU?", + "max_tokens": 200 +}' +``` + +### Deploy the benchmark + +To prepare the dataset for the Locust inference benchmark, view the README.md file in: +``` +cd benchmark/dataset/ShareGPT_v3_unflitered_cleaned_split +``` + +To deploy the Locust inference benchmark with the above model, run +``` +cd benchmark/tools/locust-load-inference + +# Copy the sample variables and update the project number and cluster name in +# the fleet_host variable "https://connectgateway.googleapis.com/v1/projects//locations/global/gkeMemberships/" +# in the `terraform.tfvars` file. +cp sample-tfvars/jetstream-sample.tfvars terraform.tfvars + +# Initialize the Terraform modules. +terraform init + +# Run plan to see the changes that will be made. +terraform plan + +# Run apply if the changes look good by confirming the prompt. +terraform apply +``` + +To further interact with the Locust inference benchmark, view the README.md file in `benchmark/tools/locust-load-inference` diff --git a/benchmarks/inference-server/jetstream/jetstream.yaml b/benchmarks/inference-server/jetstream/jetstream.yaml new file mode 100644 index 000000000..b8bb42f98 --- /dev/null +++ b/benchmarks/inference-server/jetstream/jetstream.yaml @@ -0,0 +1,63 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: maxengine-server +spec: + replicas: 1 + selector: + matchLabels: + app: maxengine-server + template: + metadata: + labels: + app: maxengine-server + spec: + serviceAccountName: benchmark-sa + nodeSelector: + cloud.google.com/gke-tpu-topology: 2x2 + cloud.google.com/gke-tpu-accelerator: tpu-v5-lite-podslice + containers: + - name: maxengine-server + image: us-docker.pkg.dev/cloud-tpu-images/inference/maxengine-server:v0.2.0 + args: + - model_name=gemma-7b + - tokenizer_path=assets/tokenizer.gemma + - per_device_batch_size=4 + - max_prefill_predict_length=1024 + - max_target_length=2048 + - async_checkpointing=false + - ici_fsdp_parallelism=1 + - ici_autoregressive_parallelism=-1 + - ici_tensor_parallelism=1 + - scan_layers=false + - weight_dtype=bfloat16 + - load_parameters_path=gs://GEMMA_BUCKET_NAME/final/unscanned/gemma_7b-it/0/checkpoints/0/items + ports: + - containerPort: 9000 + resources: + requests: + google.com/tpu: 4 + limits: + google.com/tpu: 4 + - name: jetstream-http + image: us-docker.pkg.dev/cloud-tpu-images/inference/jetstream-http:v0.2.0 + ports: + - containerPort: 8000 +--- +apiVersion: v1 +kind: Service +metadata: + name: jetstream-svc +spec: + selector: + app: maxengine-server + ports: + - protocol: TCP + name: http + port: 8000 + targetPort: 8000 + - protocol: TCP + name: grpc + port: 9000 + targetPort: 9000 + type: LoadBalancer \ No newline at end of file diff --git a/benchmarks/inference-server/jetstream/model-conversion/kaggle_converter.yaml b/benchmarks/inference-server/jetstream/model-conversion/kaggle_converter.yaml new file mode 100644 index 000000000..2d0ec2d23 --- /dev/null +++ b/benchmarks/inference-server/jetstream/model-conversion/kaggle_converter.yaml @@ -0,0 +1,33 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: data-loader-7b +spec: + ttlSecondsAfterFinished: 30 + template: + spec: + serviceAccountName: benchmark-sa + restartPolicy: Never + containers: + - name: inference-checkpoint + image: us-docker.pkg.dev/cloud-tpu-images/inference/inference-checkpoint:v0.2.0 + args: + - -b=GEMMA_BUCKET_NAME + - -m=google/gemma/maxtext/7b-it/2 + volumeMounts: + - mountPath: "/kaggle/" + name: kaggle-credentials + readOnly: true + resources: + requests: + google.com/tpu: 4 + limits: + google.com/tpu: 4 + nodeSelector: + cloud.google.com/gke-tpu-topology: 2x2 + cloud.google.com/gke-tpu-accelerator: tpu-v5-lite-podslice + volumes: + - name: kaggle-credentials + secret: + defaultMode: 0400 + secretName: kaggle-secret \ No newline at end of file diff --git a/benchmarks/infra/README.md b/benchmarks/infra/README.md index 75aa91628..87ec02b18 100644 --- a/benchmarks/infra/README.md +++ b/benchmarks/infra/README.md @@ -14,7 +14,7 @@ At a high level you will run the following: ``` cd infra/stage-1 -cp sample-terraform.tfvars terraform.tfvars +cp ./sample-tfvars/gpu-sample.tfvars terraform.tfvars terraform init @@ -31,7 +31,7 @@ You can find more details in the stage-2/README.md. At a high level you will run ``` cd infra/stage-2 -cp sample-terraform.tfvars terraform.tfvars +cp ./sample-tfvars/gpu-sample.tfvars terraform.tfvars terraform init diff --git a/benchmarks/infra/stage-1/README.md b/benchmarks/infra/stage-1/README.md index 32260df5a..8f56588a8 100644 --- a/benchmarks/infra/stage-1/README.md +++ b/benchmarks/infra/stage-1/README.md @@ -27,10 +27,10 @@ In particular, stage-1 provisions: ### Step 1: create and configure terraform.tfvars -Create a `terraform.tfvars` file. `sample-terraform.tfvars` is provided as an example file. You can copy the file as a starting point. Note that you will have to change the existing `project_id`. +Create a `terraform.tfvars` file. `./sample-tfvars/gpu-sample.tfvars` is provided as an example file. You can copy the file as a starting point. Note that you will have to change the existing `project_id`. ```bash -cp sample-terraform.tfvars terraform.tfvars +cp ./sample-tfvars/gpu-sample.tfvars terraform.tfvars ``` Fill out your `terraform.tfvars` with the desired project and cluster configuration, referring to the list of required and optional variables [here](#variables). Variables `cluster_name` and `project_id` are required. diff --git a/benchmarks/infra/stage-1/modules/gke-infra/cluster.tf b/benchmarks/infra/stage-1/modules/gke-infra/cluster.tf index 375c64b03..69bb6b33e 100644 --- a/benchmarks/infra/stage-1/modules/gke-infra/cluster.tf +++ b/benchmarks/infra/stage-1/modules/gke-infra/cluster.tf @@ -187,6 +187,7 @@ module "cluster-nodepool" { node_config = { machine_type = each.value.machine_type + spot = each.value.spot shielded_instance_config = { enable_integrity_monitoring = true enable_secure_boot = true diff --git a/benchmarks/infra/stage-1/modules/gke-infra/variables.tf b/benchmarks/infra/stage-1/modules/gke-infra/variables.tf index 4bf79563d..c45bff1fe 100644 --- a/benchmarks/infra/stage-1/modules/gke-infra/variables.tf +++ b/benchmarks/infra/stage-1/modules/gke-infra/variables.tf @@ -143,6 +143,7 @@ variable "nodepools" { gke_version = optional(string), max_node_count = optional(number, 10), min_node_count = optional(number, 1), + spot = optional(bool, false) guest_accelerator = optional(object({ type = optional(string), diff --git a/benchmarks/infra/stage-1/sample-terraform.tfvars b/benchmarks/infra/stage-1/sample-tfvars/gpu-sample.tfvars similarity index 88% rename from benchmarks/infra/stage-1/sample-terraform.tfvars rename to benchmarks/infra/stage-1/sample-tfvars/gpu-sample.tfvars index 1d5a4d045..2557f9c6f 100644 --- a/benchmarks/infra/stage-1/sample-terraform.tfvars +++ b/benchmarks/infra/stage-1/sample-tfvars/gpu-sample.tfvars @@ -1,4 +1,5 @@ -project_id = "change-me" +project_id = "change-me" +// TODO: change all instances of clusterName to be ai-gpu-benchmark. cluster_name = "ai-benchmark" region = "us-central1" gke_location = "us-central1-a" diff --git a/benchmarks/infra/stage-1/sample-tfvars/jetstream-sample.tfvars b/benchmarks/infra/stage-1/sample-tfvars/jetstream-sample.tfvars new file mode 100644 index 000000000..28dd61827 --- /dev/null +++ b/benchmarks/infra/stage-1/sample-tfvars/jetstream-sample.tfvars @@ -0,0 +1,27 @@ +project_id = "PROJECT_ID" +cluster_name = "ai-benchmark" +region = "us-east1" +gke_location = "us-east1-c" +prefix = "ai-benchmark" +spot_vms = true + +vpc_create = { + name = "ai-benchmark" + enable_cloud_nat = true +} + +cluster_options = { + enable_gcs_fuse_csi_driver = false + enable_gcp_filestore_csi_driver = false + enable_gce_persistent_disk_csi_driver = false +} + +nodepools = { + nodepool-tpu = { + machine_type = "ct5lp-hightpu-4t", + spot = true, + }, + nodepool-cpu = { + machine_type = "n2-standard-2", + }, +} diff --git a/benchmarks/infra/stage-2/README.md b/benchmarks/infra/stage-2/README.md index edd3ec61c..7d02cf6c3 100644 --- a/benchmarks/infra/stage-2/README.md +++ b/benchmarks/infra/stage-2/README.md @@ -29,10 +29,10 @@ In particular, stage-2 provisions: ### Step 1: create and configure terraform.tfvars -Create a `terraform.tfvars` file. `sample-terraform.tfvars` is provided as an example file. You can copy the file as a starting point. Note that you will have to change the existing `project_id`. +Create a `terraform.tfvars` file. `./sample-tfvars/gpu-sample.tfvars` is provided as an example file. You can copy the file as a starting point. Note that you will have to change the existing `project_id`. ```bash -cp sample-terraform.tfvars terraform.tfvars +cp ./sample-tfvars/gpu-sample.tfvars terraform.tfvars ``` Fill out your `terraform.tfvars` with the desired project and cluster configuration, referring to the list of required and optional variables [here](#variables). Variables `credentials_config` and `project_id` are required. @@ -104,6 +104,8 @@ kubectl get nodes | [secret_location](variables.tf#L105) | Location of secret | string | | null | | [secret_name](variables.tf#L98) | Secret name | string | | null | | [workload_identity_create](variables.tf#L54) | Setup Workload Identity configuration for newly created GKE cluster. Set to false to skip. | bool | | true | +| [nvidia_dcgm_create](variables.tf#L136) | Determines if DCGM resources should be added to the cluster. Used in capturing GPU metrics. | bool | | true | +| [gcs_fuse_create](variables.tf#L136) | Gives the model server service account Storage Admin access to the model store bucket | bool | | true | ## Outputs diff --git a/benchmarks/infra/stage-2/main.tf b/benchmarks/infra/stage-2/main.tf index 6d53463f0..28c073fb1 100644 --- a/benchmarks/infra/stage-2/main.tf +++ b/benchmarks/infra/stage-2/main.tf @@ -32,4 +32,6 @@ module "gke-setup" { secret_create = var.secret_name == null ? false : true secret_name = var.secret_name secret_location = var.secret_location + nvidia_dcgm_create = var.nvidia_dcgm_create + gcs_fuse_create = var.gcs_fuse_create } diff --git a/benchmarks/infra/stage-2/modules/gke-setup/main.tf b/benchmarks/infra/stage-2/modules/gke-setup/main.tf index 28b1a0b8a..98fffb1df 100644 --- a/benchmarks/infra/stage-2/modules/gke-setup/main.tf +++ b/benchmarks/infra/stage-2/modules/gke-setup/main.tf @@ -32,7 +32,7 @@ module "gcs-fuse" { project_id = var.project_id bucket_name = var.bucket_name bucket_location = var.bucket_location - google_service_account = var.google_service_account + google_service_account = module.workload-identity.0.created_resources.gsa_email depends_on = [module.workload-identity] } diff --git a/benchmarks/infra/stage-2/modules/gke-setup/modules/gcs-fuse/main.tf b/benchmarks/infra/stage-2/modules/gke-setup/modules/gcs-fuse/main.tf index 37d8079df..405e7c3cf 100644 --- a/benchmarks/infra/stage-2/modules/gke-setup/modules/gcs-fuse/main.tf +++ b/benchmarks/infra/stage-2/modules/gke-setup/modules/gcs-fuse/main.tf @@ -42,6 +42,6 @@ module "gcs-fuse-bucket" { resource "google_storage_bucket_iam_member" "bucket-iam" { bucket = local.bucket_name - role = "roles/storage.objectAdmin" - member = data.google_service_account.gsa.member + role = "roles/storage.admin" + member = "serviceAccount:${var.google_service_account}" } diff --git a/benchmarks/infra/stage-2/sample-terraform.tfvars b/benchmarks/infra/stage-2/sample-tfvars/gpu-sample.tfvars similarity index 100% rename from benchmarks/infra/stage-2/sample-terraform.tfvars rename to benchmarks/infra/stage-2/sample-tfvars/gpu-sample.tfvars diff --git a/benchmarks/infra/stage-2/sample-tfvars/jetstream-sample.tfvars b/benchmarks/infra/stage-2/sample-tfvars/jetstream-sample.tfvars new file mode 100644 index 000000000..c9c884f2c --- /dev/null +++ b/benchmarks/infra/stage-2/sample-tfvars/jetstream-sample.tfvars @@ -0,0 +1,27 @@ +# can be obtained from stage-1 by running: +# terraform output -json | jq '."fleet_host".value' +credentials_config = { + fleet_host = "https://connectgateway.googleapis.com/v1/projects/$PROJECT_NUMBER/locations/global/gkeMemberships/ai-benchmark" +} + +# can be obtained from stage-1 by running: +# terraform output -json | jq '."project_id".value' +project_id = "PROJECT_ID" + +bucket_name = "${PROJECT_ID}-model-repo-bucket-01" +bucket_location = "US" + +output_bucket_name = "${PROJECT_ID}-benchmark-output-bucket-01" +output_bucket_location = "US" + +google_service_account = "benchmark-sa-01" +kubernetes_service_account = "benchmark-sa" + +benchmark_runner_google_service_account = "sample-runner-sa-01" +benchmark_runner_kubernetes_service_account = "sample-runner-sa" + +nvidia_dcgm_create = "false" +namespace = "default" +namespace_create = false +gcs_fuse_create = true + diff --git a/benchmarks/infra/stage-2/variables.tf b/benchmarks/infra/stage-2/variables.tf index f6085a639..e68ace4b6 100644 --- a/benchmarks/infra/stage-2/variables.tf +++ b/benchmarks/infra/stage-2/variables.tf @@ -132,3 +132,15 @@ variable "secret_location" { default = null nullable = true } + +variable "nvidia_dcgm_create" { + description = "Should create nvidia dcgm resources or not; for use on GPU VMs" + type = bool + default = true +} + +variable "gcs_fuse_create" { + description = "Give the SA object admin privileges" + type = bool + default = false +} \ No newline at end of file