GoogleCloudPlatform · genlu2011 · Aug 29, 2024 · Aug 20, 2024
diff --git a/tutorials-and-examples/hf-tgi/README.md b/tutorials-and-examples/hf-tgi/README.md
@@ -25,7 +25,7 @@ gcloud container node-pools create g2-standard-24 --cluster l4-demo \
  --num-nodes=1 --min-nodes=1 --max-nodes=2 \
  --node-locations $REGION-a,$REGION-b --region $REGION
  ```
-4. Provision the job and enable gathering metrics: `terrafrom apply`
+4. Set the project_id in workloads.tfvars and create the application: `terrafrom apply -var-file=workloads.tfvars` 
 5. Make sure app started ok: `kubectl logs -l app=mistral-7b-instruct`
 6. Set up port forward
 ```

diff --git a/tutorials-and-examples/hf-tgi/cloudbuild.yaml b/tutorials-and-examples/hf-tgi/cloudbuild.yaml
@@ -0,0 +1,162 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+steps:
+  - id: 'validate platform'
+    name: 'gcr.io/$PROJECT_ID/terraform'
+    script: |
+      terraform init -no-color
+      terraform validate -no-color
+    dir: 'infrastructure/'
+    waitFor: ['-']
+
+  - id: 'validate hf'
+    name: 'gcr.io/$PROJECT_ID/terraform'
+    script: |
+      terraform init -no-color
+      terraform validate -no-color
+    dir: 'tutorials-and-examples/hf-tgi/'
+    waitFor: ['validate platform']
+
+  - id: 'create gke cluster'
+    name: 'gcr.io/$PROJECT_ID/terraform'
+    env:
+      - "KUBE_LOAD_CONFIG_FILE=false"
+    entrypoint: 'sh'
+    args:
+      - '-c'
+      - |
+        set -e
+
+        terraform apply \
+        -var-file=tfvars_tests/standard-gke-public.platform.tfvars \
+        -var=project_id=$PROJECT_ID \
+        -var=network_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-$_AUTOPILOT_CLUSTER  \
+        -var=subnetwork_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-$_AUTOPILOT_CLUSTER  \
+        -var=subnetwork_region=$_REGION \
+        -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \
+        -var=autopilot_cluster=$_AUTOPILOT_CLUSTER \
+        -var=cluster_location=$_REGION \
+        -var='cpu_pools=[{initial_node_count=2,name="cpu-pool",machine_type="n1-standard-16",autoscaling=true,min_count=1,max_count=3,disk_size_gb=100,disk_type="pd-standard",}]' \
+        -var='gpu_pools=[{initial_node_count=2,name="gpu-pool",machine_type="g2-standard-24",autoscaling=true,min_count=1,max_count=3,disk_size_gb=100,disk_type="pd-balanced",accelerator_count=2,accelerator_type="nvidia-l4",gpu_driver_version="DEFAULT",}]' \
+        -auto-approve -no-color 
+        echo "pass" > /workspace/gke_cluster_result.txt
+    dir: 'infrastructure/'
+    allowFailure: true
+    waitFor: ['validate platform', 'validate hf']
+
+  - id: 'test hg-tgi'
+    name: 'gcr.io/$PROJECT_ID/terraform'
+    entrypoint: 'sh'
+    args:
+      - '-c'
+      - |
+        set -e
+
+        # Get kube config
+        gcloud container clusters get-credentials \
+        ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \
+        --location $_REGION \
+        --project $PROJECT_ID 
+
+        terraform apply \
+        -var-file=workloads.tfvars \
+        -var=project_id=$PROJECT_ID \
+        -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \
+        -var=location=$_REGION \
+        -var=namespace=ml-$SHORT_SHA-$_BUILD_ID-hf \
+        -var=autopilot_cluster=$_AUTOPILOT_CLUSTER \
+        -auto-approve -no-color 
+
+        # Make sure pods are running
+        kubectl wait --all pods -n ml-$SHORT_SHA-$_BUILD_ID-hf --for=condition=Ready --timeout=1200s
+        kubectl port-forward -n ml-$SHORT_SHA-$_BUILD_ID-hf service/mistral-7b-instruct-service 8080:80 &
+        # Wait port-forwarding to take its place
+        sleep 10s
+
+        curl 127.0.0.1:8080/generate -X POST \
+            -H 'Content-Type: application/json' \
+            --data '{"inputs": "[INST]Hello world![/INST]","parameters": {"max_new_tokens": 400}}'
+        echo "pass" > /workspace/hf_result.txt
+    allowFailure: true
+    dir: 'tutorials-and-examples/hf-tgi/'
+    waitFor: ['create gke cluster']
+
+  - id: 'cleanup hf-tgi'
+    name: 'gcr.io/$PROJECT_ID/terraform'
+    entrypoint: 'bash'
+    args:
+      - '-c'
+      - |
+        set -e
+
+        cd /workspace/tutorials-and-examples/hf-tgi/
+
+        terraform destroy \
+        -var-file=workloads.tfvars \
+        -var=project_id=$PROJECT_ID \
+        -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \
+        -var=location=$_REGION \
+        -var=namespace=ml-$SHORT_SHA-$_BUILD_ID-hf \
+        -var=autopilot_cluster=$_AUTOPILOT_CLUSTER \
+        -auto-approve -no-color 
+    allowFailure: true
+    waitFor: ['test hg-tgi']
+
+  - id: 'cleanup gke cluster'
+    name: 'gcr.io/$PROJECT_ID/terraform'
+    entrypoint: 'bash'
+    args:
+      - '-c'
+      - |
+        set -e
+
+        cd /workspace/infrastructure
+
+        terraform destroy -var-file=tfvars_tests/standard-gke-public.platform.tfvars -var=project_id=$PROJECT_ID \
+        -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \
+        -var=network_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-$_AUTOPILOT_CLUSTER  \
+        -var=subnetwork_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-$_AUTOPILOT_CLUSTER  \
+        -var=autopilot_cluster=$_AUTOPILOT_CLUSTER \
+        -var=cluster_location=$_REGION -auto-approve -no-color
+
+    allowFailure: true
+    waitFor: ['cleanup hf-tgi']
+
+  - id: 'check result'
+    name: 'gcr.io/$PROJECT_ID/terraform'
+    entrypoint: 'bash'
+    args:
+      - '-c'
+      - |
+        if [[ $(cat /workspace/gke_cluster_result.txt) != "pass" ]]; then
+          echo "gke cluster creation failed"
+          exit 1
+        fi
+
+        if [[ $(cat /workspace/hf_result.txt) != "pass" ]]; then
+          echo "hf-gti test failed"
+          exit 1
+        fi
+    waitFor: ['cleanup gke cluster']
+
+substitutions:
+  _REGION: us-east4
+  _USER_NAME: github
+  _AUTOPILOT_CLUSTER: "false"
+  _BUILD_ID: ${BUILD_ID:0:8}
+options:
+  substitutionOption: 'ALLOW_LOOSE'
+  machineType: 'E2_HIGHCPU_8'
+timeout: 5400s
diff --git a/tutorials-and-examples/hf-tgi/main.tf b/tutorials-and-examples/hf-tgi/main.tf
@@ -52,11 +52,18 @@ provider "helm" {
   }
 }
 
+module "namespace" {
+  source           = "../../modules/kubernetes-namespace"
+  create_namespace = true
+  namespace        = var.namespace
+}
+
 module "inference-server" {
   source            = "../../modules/inference-service"
   namespace         = var.namespace
   additional_labels = var.additional_labels
   autopilot_cluster = var.autopilot_cluster
+  depends_on        = [module.namespace]
 }
 
 resource "helm_release" "gmp-engine" {
@@ -68,4 +75,5 @@ resource "helm_release" "gmp-engine" {
   values = [
     "${file("${path.module}/podmonitoring.yaml")}"
   ]
+  depends_on = [module.namespace]
 }
diff --git a/tutorials-and-examples/hf-tgi/workloads.tfvars b/tutorials-and-examples/hf-tgi/workloads.tfvars
@@ -0,0 +1,6 @@
+project_id   = "<your project ID>"
+cluster_name = "l4-demo"
+location     = "us-central1"
+
+namespace         = "l4-demo"
+autopilot_cluster = false