diff --git a/tutorials-and-examples/hf-tgi/README.md b/tutorials-and-examples/hf-tgi/README.md index a51f0ab69..440e007dc 100644 --- a/tutorials-and-examples/hf-tgi/README.md +++ b/tutorials-and-examples/hf-tgi/README.md @@ -25,7 +25,7 @@ gcloud container node-pools create g2-standard-24 --cluster l4-demo \ --num-nodes=1 --min-nodes=1 --max-nodes=2 \ --node-locations $REGION-a,$REGION-b --region $REGION ``` -4. Provision the job and enable gathering metrics: `terrafrom apply` +4. Set the project_id in workloads.tfvars and create the application: `terrafrom apply -var-file=workloads.tfvars` 5. Make sure app started ok: `kubectl logs -l app=mistral-7b-instruct` 6. Set up port forward ``` diff --git a/tutorials-and-examples/hf-tgi/cloudbuild.yaml b/tutorials-and-examples/hf-tgi/cloudbuild.yaml new file mode 100644 index 000000000..c3c2c98e6 --- /dev/null +++ b/tutorials-and-examples/hf-tgi/cloudbuild.yaml @@ -0,0 +1,162 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +steps: + - id: 'validate platform' + name: 'gcr.io/$PROJECT_ID/terraform' + script: | + terraform init -no-color + terraform validate -no-color + dir: 'infrastructure/' + waitFor: ['-'] + + - id: 'validate hf' + name: 'gcr.io/$PROJECT_ID/terraform' + script: | + terraform init -no-color + terraform validate -no-color + dir: 'tutorials-and-examples/hf-tgi/' + waitFor: ['validate platform'] + + - id: 'create gke cluster' + name: 'gcr.io/$PROJECT_ID/terraform' + env: + - "KUBE_LOAD_CONFIG_FILE=false" + entrypoint: 'sh' + args: + - '-c' + - | + set -e + + terraform apply \ + -var-file=tfvars_tests/standard-gke-public.platform.tfvars \ + -var=project_id=$PROJECT_ID \ + -var=network_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-$_AUTOPILOT_CLUSTER \ + -var=subnetwork_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-$_AUTOPILOT_CLUSTER \ + -var=subnetwork_region=$_REGION \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ + -var=autopilot_cluster=$_AUTOPILOT_CLUSTER \ + -var=cluster_location=$_REGION \ + -var='cpu_pools=[{initial_node_count=2,name="cpu-pool",machine_type="n1-standard-16",autoscaling=true,min_count=1,max_count=3,disk_size_gb=100,disk_type="pd-standard",}]' \ + -var='gpu_pools=[{initial_node_count=2,name="gpu-pool",machine_type="g2-standard-24",autoscaling=true,min_count=1,max_count=3,disk_size_gb=100,disk_type="pd-balanced",accelerator_count=2,accelerator_type="nvidia-l4",gpu_driver_version="DEFAULT",}]' \ + -auto-approve -no-color + echo "pass" > /workspace/gke_cluster_result.txt + dir: 'infrastructure/' + allowFailure: true + waitFor: ['validate platform', 'validate hf'] + + - id: 'test hg-tgi' + name: 'gcr.io/$PROJECT_ID/terraform' + entrypoint: 'sh' + args: + - '-c' + - | + set -e + + # Get kube config + gcloud container clusters get-credentials \ + ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ + --location $_REGION \ + --project $PROJECT_ID + + terraform apply \ + -var-file=workloads.tfvars \ + -var=project_id=$PROJECT_ID \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ + -var=location=$_REGION \ + -var=namespace=ml-$SHORT_SHA-$_BUILD_ID-hf \ + -var=autopilot_cluster=$_AUTOPILOT_CLUSTER \ + -auto-approve -no-color + + # Make sure pods are running + kubectl wait --all pods -n ml-$SHORT_SHA-$_BUILD_ID-hf --for=condition=Ready --timeout=1200s + kubectl port-forward -n ml-$SHORT_SHA-$_BUILD_ID-hf service/mistral-7b-instruct-service 8080:80 & + # Wait port-forwarding to take its place + sleep 10s + + curl 127.0.0.1:8080/generate -X POST \ + -H 'Content-Type: application/json' \ + --data '{"inputs": "[INST]Hello world![/INST]","parameters": {"max_new_tokens": 400}}' + echo "pass" > /workspace/hf_result.txt + allowFailure: true + dir: 'tutorials-and-examples/hf-tgi/' + waitFor: ['create gke cluster'] + + - id: 'cleanup hf-tgi' + name: 'gcr.io/$PROJECT_ID/terraform' + entrypoint: 'bash' + args: + - '-c' + - | + set -e + + cd /workspace/tutorials-and-examples/hf-tgi/ + + terraform destroy \ + -var-file=workloads.tfvars \ + -var=project_id=$PROJECT_ID \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ + -var=location=$_REGION \ + -var=namespace=ml-$SHORT_SHA-$_BUILD_ID-hf \ + -var=autopilot_cluster=$_AUTOPILOT_CLUSTER \ + -auto-approve -no-color + allowFailure: true + waitFor: ['test hg-tgi'] + + - id: 'cleanup gke cluster' + name: 'gcr.io/$PROJECT_ID/terraform' + entrypoint: 'bash' + args: + - '-c' + - | + set -e + + cd /workspace/infrastructure + + terraform destroy -var-file=tfvars_tests/standard-gke-public.platform.tfvars -var=project_id=$PROJECT_ID \ + -var=cluster_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-cluster \ + -var=network_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-$_AUTOPILOT_CLUSTER \ + -var=subnetwork_name=ml-$SHORT_SHA-$_PR_NUMBER-$_BUILD_ID-$_AUTOPILOT_CLUSTER \ + -var=autopilot_cluster=$_AUTOPILOT_CLUSTER \ + -var=cluster_location=$_REGION -auto-approve -no-color + + allowFailure: true + waitFor: ['cleanup hf-tgi'] + + - id: 'check result' + name: 'gcr.io/$PROJECT_ID/terraform' + entrypoint: 'bash' + args: + - '-c' + - | + if [[ $(cat /workspace/gke_cluster_result.txt) != "pass" ]]; then + echo "gke cluster creation failed" + exit 1 + fi + + if [[ $(cat /workspace/hf_result.txt) != "pass" ]]; then + echo "hf-gti test failed" + exit 1 + fi + waitFor: ['cleanup gke cluster'] + +substitutions: + _REGION: us-east4 + _USER_NAME: github + _AUTOPILOT_CLUSTER: "false" + _BUILD_ID: ${BUILD_ID:0:8} +options: + substitutionOption: 'ALLOW_LOOSE' + machineType: 'E2_HIGHCPU_8' +timeout: 5400s diff --git a/tutorials-and-examples/hf-tgi/main.tf b/tutorials-and-examples/hf-tgi/main.tf index 55e0757d9..faa55128a 100644 --- a/tutorials-and-examples/hf-tgi/main.tf +++ b/tutorials-and-examples/hf-tgi/main.tf @@ -52,11 +52,18 @@ provider "helm" { } } +module "namespace" { + source = "../../modules/kubernetes-namespace" + create_namespace = true + namespace = var.namespace +} + module "inference-server" { source = "../../modules/inference-service" namespace = var.namespace additional_labels = var.additional_labels autopilot_cluster = var.autopilot_cluster + depends_on = [module.namespace] } resource "helm_release" "gmp-engine" { @@ -68,4 +75,5 @@ resource "helm_release" "gmp-engine" { values = [ "${file("${path.module}/podmonitoring.yaml")}" ] + depends_on = [module.namespace] } diff --git a/tutorials-and-examples/hf-tgi/workloads.tfvars b/tutorials-and-examples/hf-tgi/workloads.tfvars new file mode 100644 index 000000000..c21772f3b --- /dev/null +++ b/tutorials-and-examples/hf-tgi/workloads.tfvars @@ -0,0 +1,6 @@ +project_id = "" +cluster_name = "l4-demo" +location = "us-central1" + +namespace = "l4-demo" +autopilot_cluster = false