diff --git a/benchmarks/README.md b/benchmarks/README.md
index 77c713818..341393709 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -34,7 +34,7 @@ cd infra/stage-1
 
 # Copy the sample variables and update the project ID, cluster name and other
 parameters as needed in the `terraform.tfvars` file.
-cp sample-terraform.tfvars terraform.tfvars
+cp ./sample-tfvars/gpu-sample.tfvars terraform.tfvars
 
 # Initialize the Terraform modules.
 terraform init
@@ -67,7 +67,7 @@ cd infra/stage-2
 # and the project name and bucket name parameters as needed in the
 # `terraform.tfvars` file. You can specify a new bucket name in which case it
 # will be created.
-cp sample-terraform.tfvars terraform.tfvars
+cp ./sample-tfvars/gpu-sample.tfvars terraform.tfvars
 
 # Initialize the Terraform modules.
 terraform init
@@ -88,7 +88,7 @@ cd inference-server/text-generation-inference
 # Copy the sample variables and update the project number and cluster name in
 # the fleet_host variable "https://connectgateway.googleapis.com/v1/projects/<project-number>/locations/global/gkeMemberships/<cluster-name>"
 # in the `terraform.tfvars` file.
-cp sample-terraform.tfvars terraform.tfvars
+cp ./sample-tfvars/gpu-sample.tfvars terraform.tfvars
 
 # Initialize the Terraform modules.
 terraform init
@@ -120,7 +120,7 @@ cd benchmark/tools/locust-load-inference
 # Copy the sample variables and update the project number and cluster name in
 # the fleet_host variable "https://connectgateway.googleapis.com/v1/projects/<project-number>/locations/global/gkeMemberships/<cluster-name>"
 # in the `terraform.tfvars` file.
-cp sample-terraform.tfvars terraform.tfvars
+cp ./sample-tfvars/tgi-sample.tfvars terraform.tfvars
 
 # Initialize the Terraform modules.
 terraform init
diff --git a/benchmarks/benchmark/tools/locust-load-inference/README.md b/benchmarks/benchmark/tools/locust-load-inference/README.md
index cbc3f585c..cc3727b9f 100644
--- a/benchmarks/benchmark/tools/locust-load-inference/README.md
+++ b/benchmarks/benchmark/tools/locust-load-inference/README.md
@@ -37,6 +37,7 @@ The Locust benchmarking tool currently supports these frameworks:
 - tensorrt_llm_triton
 - text generation inference (tgi)
 - vllm
+- jetstream
 
 ## Instructions
 
@@ -49,7 +50,7 @@ This is my first prompt.\n
 This is my second prompt.\n
 ```
 
-Example prompt datasets are available in the "../../dataset" folder with python scripts and instructions on how to make the dataset available for consumption by this benchmark. The dataset used in the `sample-terraform.tfvars` is the "ShareGPT_v3_unflitered_cleaned_split".
+Example prompt datasets are available in the "../../dataset" folder with python scripts and instructions on how to make the dataset available for consumption by this benchmark. The dataset used in the `./sample-tfvars/tgi-sample.tfvars` is the "ShareGPT_v3_unflitered_cleaned_split".
 
 You will set the `gcs_path` in your `terraform.tfvars` to this gcs path containing your prompts.
 
@@ -100,10 +101,10 @@ gcloud artifacts repositories create ai-benchmark --location=us-central1 --repos
 
 ### Step 6: create and configure terraform.tfvars
 
-Create a `terraform.tfvars` file. `sample-terraform.tfvars` is provided as an example file. You can copy the file as a starting point. Note that at a minimum you will have to change the existing `credentials_config`, `project_id`, and `artifact_registry`.
+Create a `terraform.tfvars` file. `./sample-tfvars/tgi-sample.tfvars` is provided as an example file. You can copy the file as a starting point. Note that at a minimum you will have to change the existing `credentials_config`, `project_id`, and `artifact_registry`.
 
 ```bash
-cp sample-terraform.tfvars terraform.tfvars
+cp ./sample-tfvars/tgi-sample.tfvars terraform.tfvars
 ```
 
 Fill out your `terraform.tfvars` with the desired model and server configuration, referring to the list of required and optional variables [here](#variables). The following variables are required:
@@ -265,5 +266,6 @@ To change the benchmark configuration, you will have to rerun terraform destroy
 | <a name="input_sax_model"></a> [sax\_model](#input\_sax\_model)                                                      | Benchmark server configuration for sax model. Only required if framework is sax.                                            | `string`                                                                                                                                                                                                    | `""`                 |    no    |
 | <a name="input_tokenizer"></a> [tokenizer](#input\_tokenizer)                                                        | Benchmark server configuration for tokenizer.                                                                               | `string`                                                                                                                                                                                                    | `"tiiuae/falcon-7b"` |   yes    |
 | <a name="input_use_beam_search"></a> [use\_beam\_search](#input\_use\_beam\_search)                                  | Benchmark server configuration for use beam search.                                                                         | `bool`                                                                                                                                                                                                      | `false`              |    no    |
-  <a name="huggingface_secret"></a> [huggingface_secret](#input\_huggingface_secret)                                  | Name of the kubectl huggingface secret token                                                                          | `string`                                                                                                                                                                                                      | `huggingface-secret`              |    no   |
+  <a name="huggingface_secret"></a> [huggingface_secret](#input\_huggingface_secret)                                  | Name of the secret holding the huggingface token. Stored in GCP Secrets Manager.                                                                          | `string`                                                                                                                                                                                                      | `huggingface-secret`              |    no   |
+  <a name="k8s_hf_secret"></a> [k8s_hf_secret](#input\_huggingface_secret)                                  | Name of the secret holding the huggingface token. Stored in K8s. Key is expected to be named: `HF_TOKEN`. See [here](https://kubernetes.io/docs/tasks/configmap-secret/managing-secret-using-kubectl/#use-raw-data) for more.                                                                          | `string`                                                                                                                                                                                                      | `huggingface-secret`              |    no   |
 <!-- END_TF_DOCS -->
diff --git a/benchmarks/benchmark/tools/locust-load-inference/main.tf b/benchmarks/benchmark/tools/locust-load-inference/main.tf
index 5a8a3b03b..81115fd66 100644
--- a/benchmarks/benchmark/tools/locust-load-inference/main.tf
+++ b/benchmarks/benchmark/tools/locust-load-inference/main.tf
@@ -47,6 +47,7 @@ locals {
       tokenizer                      = var.tokenizer
       use_beam_search                = var.use_beam_search
       hugging_face_token_secret_list = local.hugging_face_token_secret == null ? [] : [local.hugging_face_token_secret]
+      k8s_hf_secret_list             = var.k8s_hf_secret == null ? [] : [var.k8s_hf_secret]
       stop_timeout                   = var.stop_timeout
       request_type                   = var.request_type
     })) : data]
diff --git a/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-worker-controller.yaml.tpl b/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-worker-controller.yaml.tpl
index b952e4f36..451fe80ae 100644
--- a/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-worker-controller.yaml.tpl
+++ b/benchmarks/benchmark/tools/locust-load-inference/manifest-templates/locust-worker-controller.yaml.tpl
@@ -48,6 +48,13 @@ spec:
             - name: USE_BEAM_SEARCH
               value: ${use_beam_search}
 %{ for hugging_face_token_secret in hugging_face_token_secret_list ~}
+            - name: HUGGINGFACE_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: hf-key
+                  key: HF_TOKEN
+%{ endfor ~}
+%{ for hf_token in k8s_hf_secret_list ~}
             - name: HUGGINGFACE_TOKEN
               valueFrom:
                 secretKeyRef:
diff --git a/benchmarks/benchmark/tools/locust-load-inference/sample-tfvars/jetstream-sample.tfvars b/benchmarks/benchmark/tools/locust-load-inference/sample-tfvars/jetstream-sample.tfvars
new file mode 100644
index 000000000..d5b3c0dce
--- /dev/null
+++ b/benchmarks/benchmark/tools/locust-load-inference/sample-tfvars/jetstream-sample.tfvars
@@ -0,0 +1,29 @@
+credentials_config = {
+  fleet_host = "https://connectgateway.googleapis.com/v1/projects/PROJECT_NUMBER/locations/global/gkeMemberships/ai-benchmark"
+}
+
+project_id = "PROJECT_ID"
+
+namespace    = "default"
+ksa          = "benchmark-sa"
+request_type = "grpc"
+
+k8s_hf_secret = "hf-token"
+
+
+# Locust service configuration 
+artifact_registry                        = "REGISTRY_LOCATION"
+inference_server_service                 = "jetstream-svc:9000"
+locust_runner_kubernetes_service_account = "sample-runner-sa"
+output_bucket                            = "${PROJECT_ID}-benchmark-output-bucket-01"
+gcs_path                                 = "PATH_TO_PROMPT_BUCKET"
+
+# Benchmark configuration for Locust Docker accessing inference server
+inference_server_framework = "jetstream"
+tokenizer                  = "google/gemma-7b"
+
+# Benchmark configuration for triggering single test via Locust Runner
+test_duration = 60
+# Increase test_users to allow more parallelism (especially when testing HPA)
+test_users = 1
+test_rate  = 5
diff --git a/benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars b/benchmarks/benchmark/tools/locust-load-inference/sample-tfvars/tgi-sample.tfvars
similarity index 100%
rename from benchmarks/benchmark/tools/locust-load-inference/sample-terraform.tfvars
rename to benchmarks/benchmark/tools/locust-load-inference/sample-tfvars/tgi-sample.tfvars
diff --git a/benchmarks/benchmark/tools/locust-load-inference/variables.tf b/benchmarks/benchmark/tools/locust-load-inference/variables.tf
index 4b1155d1d..4b3d3a030 100644
--- a/benchmarks/benchmark/tools/locust-load-inference/variables.tf
+++ b/benchmarks/benchmark/tools/locust-load-inference/variables.tf
@@ -197,8 +197,16 @@ variable "run_test_automatically" {
   default     = false
 }
 
+// TODO: add validation to make k8s_hf_secret & hugging_face_secret mutually exclusive once terraform is updated with: https://discuss.hashicorp.com/t/experiment-feedback-input-variable-validation-can-cross-reference-other-objects/66644
+variable "k8s_hf_secret" {
+  description = "Name of secret for huggingface token; stored in k8s "
+  type        = string
+  nullable    = true
+  default     = null
+}
+
 variable "hugging_face_secret" {
-  description = "name of the kubectl huggingface secret token"
+  description = "name of the kubectl huggingface secret token; stored in Secret Manager. Security considerations: https://kubernetes.io/docs/concepts/security/secrets-good-practices/"
   type        = string
   nullable    = true
   default     = null
diff --git a/benchmarks/inference-server/jetstream/README.md b/benchmarks/inference-server/jetstream/README.md
new file mode 100644
index 000000000..032febb62
--- /dev/null
+++ b/benchmarks/inference-server/jetstream/README.md
@@ -0,0 +1,151 @@
+# AI on GKE Benchmarking for JetStream
+
+Deploying and benchmarking JetStream on TPU has many similarities with the standard GPU path. But distinct enough differences to warrant a separate readme. If you are familiar with deploying on GPU, much of this should be familiar. For a more detailed understanding of each step. Refer to our primary benchmarking [README](https://github.com/GoogleCloudPlatform/ai-on-gke/tree/main/benchmarks)
+
+## Pre-requisites
+- [kaggle user/token](https://www.kaggle.com/docs/api)
+- [huggingface user/token](https://huggingface.co/docs/hub/en/security-tokens)
+
+### Creating K8s infra
+
+To create our TPU cluster, run:
+
+```
+# Stage 1 creates the cluster.
+cd infra/stage-1
+
+# Copy the sample variables and update the project ID, cluster name and other 
+parameters as needed in the `terraform.tfvars` file.
+cp sample-tfvars/jetstream-sample.tfvars terraform.tfvars
+
+# Initialize the Terraform modules.
+terraform init
+
+# Run plan to see the changes that will be made.
+terraform plan
+
+# Run apply if the changes look good by confirming the prompt.
+terraform apply
+```
+To verify that the cluster has been set up correctly, run
+```
+# Get credentials using fleet membership
+gcloud container fleet memberships get-credentials <cluster-name>
+
+# Run a kubectl command to verify
+kubectl get nodes
+```
+
+## Configure the cluster
+
+To configure the cluster to run inference workloads we need to set up workload identity and GCS Fuse.
+```
+# Stage 2 configures the cluster for running inference workloads.
+cd infra/stage-2
+
+# Copy the sample variables and update the project number and cluster name in
+# the fleet_host variable "https://connectgateway.googleapis.com/v1/projects/<project-number>/locations/global/gkeMemberships/<cluster-name>"
+# and the project name and bucket name parameters as needed in the
+# `terraform.tfvars` file. You can specify a new bucket name in which case it
+# will be created.
+cp sample-tfvars/jetstream-sample.tfvars terraform.tfvars
+
+# Initialize the Terraform modules.
+terraform init
+
+# Run plan to see the changes that will be made.
+terraform plan
+
+# Run apply if the changes look good by confirming the prompt.
+terraform apply
+```
+
+### Convert Gemma model weights to maxtext weights
+
+JetStream has [two engine implementations](https://github.com/google/JetStream?tab=readme-ov-file#jetstream-engine-implementation). A Jax variant (via MaxText) and a Pytorch variant. This guide will use the Jax backend.
+
+Jetstream currently requires that models be converted to MaxText weights. This example will deploy a Gemma-7b model. Much of this information is similar to this guide [here](https://cloud.google.com/kubernetes-engine/docs/tutorials/serve-gemma-tpu-jetstream#convert-checkpoints).
+
+*SKIP IF ALREADY COMPLETED*
+
+Create kaggle secret
+```
+kubectl create secret generic kaggle-secret \
+    --from-file=kaggle.json
+```
+
+Replace `model-conversion/kaggle_converter.yaml: GEMMA_BUCKET_NAME` with the correct bucket name where you would like the model to be stored.
+***NOTE: If you are using a different bucket that the ones you created give the service account Storage Admin permissions on that bucket. This can be done on the UI or by running:
+```
+gcloud projects add-iam-policy-binding PROJECT_ID \
+    --member "serviceAccount:SA_NAME@PROJECT_ID.iam.gserviceaccount.com" \
+    --role roles/storage.admin
+```
+
+Run:
+```
+kubectl apply -f model-conversion/kaggle_converter.yaml
+```
+
+This should take ~10 minutes to complete.
+
+### Deploy JetStream
+
+Replace the `jetstream.yaml:GEMMA_BUCKET_NAME` with the same bucket name as above.
+
+Run:
+```
+kubectl apply -f jetstream.yaml
+```
+
+Verify the pod is running with
+```
+kubectl get pods
+```
+
+Get the external IP with:
+
+```
+kubectl get services
+```
+
+And you can make a request prompt with:
+```
+curl --request POST \
+--header "Content-type: application/json" \
+-s \
+JETSTREAM_EXTERNAL_IP:8000/generate \
+--data \
+'{
+    "prompt": "What is a TPU?",
+    "max_tokens": 200
+}'
+```
+
+### Deploy the benchmark
+
+To prepare the dataset for the Locust inference benchmark, view the README.md file in:
+```
+cd benchmark/dataset/ShareGPT_v3_unflitered_cleaned_split
+```
+
+To deploy the Locust inference benchmark with the above model, run
+```
+cd benchmark/tools/locust-load-inference
+
+# Copy the sample variables and update the project number and cluster name in
+# the fleet_host variable "https://connectgateway.googleapis.com/v1/projects/<project-number>/locations/global/gkeMemberships/<cluster-name>"
+# in the `terraform.tfvars` file.
+cp sample-tfvars/jetstream-sample.tfvars terraform.tfvars
+
+# Initialize the Terraform modules.
+terraform init
+
+# Run plan to see the changes that will be made.
+terraform plan
+
+# Run apply if the changes look good by confirming the prompt.
+terraform apply
+```
+
+To further interact with the Locust inference benchmark, view the README.md file in `benchmark/tools/locust-load-inference`
diff --git a/benchmarks/inference-server/jetstream/jetstream.yaml b/benchmarks/inference-server/jetstream/jetstream.yaml
new file mode 100644
index 000000000..b8bb42f98
--- /dev/null
+++ b/benchmarks/inference-server/jetstream/jetstream.yaml
@@ -0,0 +1,63 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: maxengine-server
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: maxengine-server
+  template:
+    metadata:
+      labels:
+        app: maxengine-server
+    spec:
+      serviceAccountName: benchmark-sa
+      nodeSelector:
+        cloud.google.com/gke-tpu-topology: 2x2
+        cloud.google.com/gke-tpu-accelerator: tpu-v5-lite-podslice
+      containers:
+      - name: maxengine-server
+        image: us-docker.pkg.dev/cloud-tpu-images/inference/maxengine-server:v0.2.0
+        args:
+        - model_name=gemma-7b
+        - tokenizer_path=assets/tokenizer.gemma
+        - per_device_batch_size=4
+        - max_prefill_predict_length=1024
+        - max_target_length=2048
+        - async_checkpointing=false
+        - ici_fsdp_parallelism=1
+        - ici_autoregressive_parallelism=-1
+        - ici_tensor_parallelism=1
+        - scan_layers=false
+        - weight_dtype=bfloat16
+        - load_parameters_path=gs://GEMMA_BUCKET_NAME/final/unscanned/gemma_7b-it/0/checkpoints/0/items
+        ports:
+        - containerPort: 9000
+        resources:
+          requests:
+            google.com/tpu: 4
+          limits:
+            google.com/tpu: 4
+      - name: jetstream-http
+        image: us-docker.pkg.dev/cloud-tpu-images/inference/jetstream-http:v0.2.0
+        ports:
+        - containerPort: 8000
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: jetstream-svc
+spec:
+  selector:
+    app: maxengine-server
+  ports:
+  - protocol: TCP
+    name: http
+    port: 8000
+    targetPort: 8000
+  - protocol: TCP
+    name: grpc
+    port: 9000
+    targetPort: 9000
+  type: LoadBalancer
\ No newline at end of file
diff --git a/benchmarks/inference-server/jetstream/model-conversion/kaggle_converter.yaml b/benchmarks/inference-server/jetstream/model-conversion/kaggle_converter.yaml
new file mode 100644
index 000000000..2d0ec2d23
--- /dev/null
+++ b/benchmarks/inference-server/jetstream/model-conversion/kaggle_converter.yaml
@@ -0,0 +1,33 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: data-loader-7b
+spec:
+  ttlSecondsAfterFinished: 30
+  template:
+    spec:
+      serviceAccountName: benchmark-sa
+      restartPolicy: Never
+      containers:
+      - name: inference-checkpoint
+        image: us-docker.pkg.dev/cloud-tpu-images/inference/inference-checkpoint:v0.2.0
+        args:
+        - -b=GEMMA_BUCKET_NAME
+        - -m=google/gemma/maxtext/7b-it/2
+        volumeMounts:
+        - mountPath: "/kaggle/"
+          name: kaggle-credentials
+          readOnly: true
+        resources:
+          requests:
+            google.com/tpu: 4
+          limits:
+            google.com/tpu: 4
+      nodeSelector:
+        cloud.google.com/gke-tpu-topology: 2x2
+        cloud.google.com/gke-tpu-accelerator: tpu-v5-lite-podslice
+      volumes:
+      - name: kaggle-credentials
+        secret:
+          defaultMode: 0400
+          secretName: kaggle-secret
\ No newline at end of file
diff --git a/benchmarks/infra/README.md b/benchmarks/infra/README.md
index 75aa91628..87ec02b18 100644
--- a/benchmarks/infra/README.md
+++ b/benchmarks/infra/README.md
@@ -14,7 +14,7 @@ At a high level you will run the following:
 ```
 cd infra/stage-1
 
-cp sample-terraform.tfvars terraform.tfvars
+cp ./sample-tfvars/gpu-sample.tfvars terraform.tfvars
 
 terraform init
 
@@ -31,7 +31,7 @@ You can find more details in the stage-2/README.md. At a high level you will run
 ```
 cd infra/stage-2
 
-cp sample-terraform.tfvars terraform.tfvars
+cp ./sample-tfvars/gpu-sample.tfvars terraform.tfvars
 
 terraform init
 
diff --git a/benchmarks/infra/stage-1/README.md b/benchmarks/infra/stage-1/README.md
index 32260df5a..8f56588a8 100644
--- a/benchmarks/infra/stage-1/README.md
+++ b/benchmarks/infra/stage-1/README.md
@@ -27,10 +27,10 @@ In particular, stage-1 provisions:
 
 ### Step 1: create and configure terraform.tfvars
 
-Create a `terraform.tfvars` file. `sample-terraform.tfvars` is provided as an example file. You can copy the file as a starting point. Note that you will have to change the existing `project_id`.
+Create a `terraform.tfvars` file. `./sample-tfvars/gpu-sample.tfvars` is provided as an example file. You can copy the file as a starting point. Note that you will have to change the existing `project_id`.
 
 ```bash
-cp sample-terraform.tfvars terraform.tfvars
+cp ./sample-tfvars/gpu-sample.tfvars terraform.tfvars
 ```
 
 Fill out your `terraform.tfvars` with the desired project and cluster configuration, referring to the list of required and optional variables [here](#variables). Variables `cluster_name` and `project_id` are required.
diff --git a/benchmarks/infra/stage-1/modules/gke-infra/cluster.tf b/benchmarks/infra/stage-1/modules/gke-infra/cluster.tf
index 375c64b03..69bb6b33e 100644
--- a/benchmarks/infra/stage-1/modules/gke-infra/cluster.tf
+++ b/benchmarks/infra/stage-1/modules/gke-infra/cluster.tf
@@ -187,6 +187,7 @@ module "cluster-nodepool" {
 
   node_config = {
     machine_type = each.value.machine_type
+    spot         = each.value.spot
     shielded_instance_config = {
       enable_integrity_monitoring = true
       enable_secure_boot          = true
diff --git a/benchmarks/infra/stage-1/modules/gke-infra/variables.tf b/benchmarks/infra/stage-1/modules/gke-infra/variables.tf
index 4bf79563d..c45bff1fe 100644
--- a/benchmarks/infra/stage-1/modules/gke-infra/variables.tf
+++ b/benchmarks/infra/stage-1/modules/gke-infra/variables.tf
@@ -143,6 +143,7 @@ variable "nodepools" {
     gke_version    = optional(string),
     max_node_count = optional(number, 10),
     min_node_count = optional(number, 1),
+    spot           = optional(bool, false)
 
     guest_accelerator = optional(object({
       type  = optional(string),
diff --git a/benchmarks/infra/stage-1/sample-terraform.tfvars b/benchmarks/infra/stage-1/sample-tfvars/gpu-sample.tfvars
similarity index 88%
rename from benchmarks/infra/stage-1/sample-terraform.tfvars
rename to benchmarks/infra/stage-1/sample-tfvars/gpu-sample.tfvars
index 1d5a4d045..2557f9c6f 100644
--- a/benchmarks/infra/stage-1/sample-terraform.tfvars
+++ b/benchmarks/infra/stage-1/sample-tfvars/gpu-sample.tfvars
@@ -1,4 +1,5 @@
-project_id   = "change-me"
+project_id = "change-me"
+// TODO: change all instances of clusterName to be ai-gpu-benchmark.
 cluster_name = "ai-benchmark"
 region       = "us-central1"
 gke_location = "us-central1-a"
diff --git a/benchmarks/infra/stage-1/sample-tfvars/jetstream-sample.tfvars b/benchmarks/infra/stage-1/sample-tfvars/jetstream-sample.tfvars
new file mode 100644
index 000000000..28dd61827
--- /dev/null
+++ b/benchmarks/infra/stage-1/sample-tfvars/jetstream-sample.tfvars
@@ -0,0 +1,27 @@
+project_id   = "PROJECT_ID"
+cluster_name = "ai-benchmark"
+region       = "us-east1"
+gke_location = "us-east1-c"
+prefix       = "ai-benchmark"
+spot_vms     = true
+
+vpc_create = {
+  name             = "ai-benchmark"
+  enable_cloud_nat = true
+}
+
+cluster_options = {
+  enable_gcs_fuse_csi_driver            = false
+  enable_gcp_filestore_csi_driver       = false
+  enable_gce_persistent_disk_csi_driver = false
+}
+
+nodepools = {
+  nodepool-tpu = {
+    machine_type = "ct5lp-hightpu-4t",
+    spot         = true,
+  },
+  nodepool-cpu = {
+    machine_type = "n2-standard-2",
+  },
+}
diff --git a/benchmarks/infra/stage-2/README.md b/benchmarks/infra/stage-2/README.md
index edd3ec61c..7d02cf6c3 100644
--- a/benchmarks/infra/stage-2/README.md
+++ b/benchmarks/infra/stage-2/README.md
@@ -29,10 +29,10 @@ In particular, stage-2 provisions:
 
 ### Step 1: create and configure terraform.tfvars
 
-Create a `terraform.tfvars` file. `sample-terraform.tfvars` is provided as an example file. You can copy the file as a starting point. Note that you will have to change the existing `project_id`.
+Create a `terraform.tfvars` file. `./sample-tfvars/gpu-sample.tfvars` is provided as an example file. You can copy the file as a starting point. Note that you will have to change the existing `project_id`.
 
 ```bash
-cp sample-terraform.tfvars terraform.tfvars
+cp ./sample-tfvars/gpu-sample.tfvars terraform.tfvars
 ```
 
 Fill out your `terraform.tfvars` with the desired project and cluster configuration, referring to the list of required and optional variables [here](#variables). Variables `credentials_config` and `project_id` are required.
@@ -104,6 +104,8 @@ kubectl get nodes
 | [secret_location](variables.tf#L105) | Location of secret | <code>string</code> |  | <code>null</code> |
 | [secret_name](variables.tf#L98) | Secret name | <code>string</code> |  | <code>null</code> |
 | [workload_identity_create](variables.tf#L54) | Setup Workload Identity configuration for newly created GKE cluster. Set to false to skip. | <code>bool</code> |  | <code>true</code> |
+| [nvidia_dcgm_create](variables.tf#L136) | Determines if DCGM resources should be added to the cluster. Used in capturing GPU metrics. | <code>bool</code> |  | <code>true</code> |
+| [gcs_fuse_create](variables.tf#L136) | Gives the model server service account Storage Admin access to the model store bucket | <code>bool</code> |  | <code>true</code> |
 
 ## Outputs
 
diff --git a/benchmarks/infra/stage-2/main.tf b/benchmarks/infra/stage-2/main.tf
index 6d53463f0..28c073fb1 100644
--- a/benchmarks/infra/stage-2/main.tf
+++ b/benchmarks/infra/stage-2/main.tf
@@ -32,4 +32,6 @@ module "gke-setup" {
   secret_create                               = var.secret_name == null ? false : true
   secret_name                                 = var.secret_name
   secret_location                             = var.secret_location
+  nvidia_dcgm_create                          = var.nvidia_dcgm_create
+  gcs_fuse_create                             = var.gcs_fuse_create
 }
diff --git a/benchmarks/infra/stage-2/modules/gke-setup/main.tf b/benchmarks/infra/stage-2/modules/gke-setup/main.tf
index 28b1a0b8a..98fffb1df 100644
--- a/benchmarks/infra/stage-2/modules/gke-setup/main.tf
+++ b/benchmarks/infra/stage-2/modules/gke-setup/main.tf
@@ -32,7 +32,7 @@ module "gcs-fuse" {
   project_id             = var.project_id
   bucket_name            = var.bucket_name
   bucket_location        = var.bucket_location
-  google_service_account = var.google_service_account
+  google_service_account = module.workload-identity.0.created_resources.gsa_email
   depends_on             = [module.workload-identity]
 }
 
diff --git a/benchmarks/infra/stage-2/modules/gke-setup/modules/gcs-fuse/main.tf b/benchmarks/infra/stage-2/modules/gke-setup/modules/gcs-fuse/main.tf
index 37d8079df..405e7c3cf 100644
--- a/benchmarks/infra/stage-2/modules/gke-setup/modules/gcs-fuse/main.tf
+++ b/benchmarks/infra/stage-2/modules/gke-setup/modules/gcs-fuse/main.tf
@@ -42,6 +42,6 @@ module "gcs-fuse-bucket" {
 
 resource "google_storage_bucket_iam_member" "bucket-iam" {
   bucket = local.bucket_name
-  role   = "roles/storage.objectAdmin"
-  member = data.google_service_account.gsa.member
+  role   = "roles/storage.admin"
+  member = "serviceAccount:${var.google_service_account}"
 }
diff --git a/benchmarks/infra/stage-2/sample-terraform.tfvars b/benchmarks/infra/stage-2/sample-tfvars/gpu-sample.tfvars
similarity index 100%
rename from benchmarks/infra/stage-2/sample-terraform.tfvars
rename to benchmarks/infra/stage-2/sample-tfvars/gpu-sample.tfvars
diff --git a/benchmarks/infra/stage-2/sample-tfvars/jetstream-sample.tfvars b/benchmarks/infra/stage-2/sample-tfvars/jetstream-sample.tfvars
new file mode 100644
index 000000000..c9c884f2c
--- /dev/null
+++ b/benchmarks/infra/stage-2/sample-tfvars/jetstream-sample.tfvars
@@ -0,0 +1,27 @@
+# can be obtained from stage-1 by running:
+# terraform output -json  | jq '."fleet_host".value'
+credentials_config = {
+  fleet_host = "https://connectgateway.googleapis.com/v1/projects/$PROJECT_NUMBER/locations/global/gkeMemberships/ai-benchmark"
+}
+
+# can be obtained from stage-1 by running:
+# terraform output -json  | jq '."project_id".value'
+project_id = "PROJECT_ID"
+
+bucket_name     = "${PROJECT_ID}-model-repo-bucket-01"
+bucket_location = "US"
+
+output_bucket_name     = "${PROJECT_ID}-benchmark-output-bucket-01"
+output_bucket_location = "US"
+
+google_service_account     = "benchmark-sa-01"
+kubernetes_service_account = "benchmark-sa"
+
+benchmark_runner_google_service_account     = "sample-runner-sa-01"
+benchmark_runner_kubernetes_service_account = "sample-runner-sa"
+
+nvidia_dcgm_create = "false"
+namespace          = "default"
+namespace_create   = false
+gcs_fuse_create    = true
+
diff --git a/benchmarks/infra/stage-2/variables.tf b/benchmarks/infra/stage-2/variables.tf
index f6085a639..e68ace4b6 100644
--- a/benchmarks/infra/stage-2/variables.tf
+++ b/benchmarks/infra/stage-2/variables.tf
@@ -132,3 +132,15 @@ variable "secret_location" {
   default     = null
   nullable    = true
 }
+
+variable "nvidia_dcgm_create" {
+  description = "Should create nvidia dcgm resources or not; for use on GPU VMs"
+  type        = bool
+  default     = true
+}
+
+variable "gcs_fuse_create" {
+  description = "Give the SA object admin privileges"
+  type        = bool
+  default     = false
+}
\ No newline at end of file