fix readme

GoogleCloudPlatform · Mar 4, 2024 · 9108fba · 9108fba
1 parent 75bf52b
commit 9108fba
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 12 deletions.
diff --git a/benchmarks/inference-server/triton/README.md b/benchmarks/inference-server/triton/README.md
@@ -68,11 +68,10 @@ This guide provides instructions for deploying and benchmarking a TensorRT Large
    ```
    Replace `your_registry` with your actual Docker registry path.
 
-   ***Method 2: Upload Model repository and the relevant scripts to gcs***
+   ***Method 2: Upload Model repository to gcs***
 
-   In this method we can directly upload the model engine and scripts to gcs and use the base image provided by Nvidia: 
+   In this method we can directly upload the model engine to gcs and use the base image provided by Nvidia and specify the command to launch the triton server via the deployment yaml file: 
    ```
-   gsutil cp -r your_script_folder gs://your_model_repo/scripts/
    gsutil cp -r your_model_folder gs://your_model_repo/all_models/ 
    ```
    Replace `your_model_repo` with your actual gcs repo path.

diff --git a/benchmarks/inference-server/triton/manifest-templates/triton-tensorrtllm-inference-gs.tftpl b/benchmarks/inference-server/triton/manifest-templates/triton-tensorrtllm-inference-gs.tftpl
@@ -37,26 +37,23 @@ spec:
             sizeLimit: 1Gi
         - name: all-models-volume
           emptyDir: {}
-        - name: scripts-volume
-          emptyDir: {}
       initContainers:
         - name: init-gcs-download
           image: google/cloud-sdk
           serviceAccountName: ${ksa}
           command: ["/bin/sh", "-c"] 
           args:
-            - gsutil cp -r gs://your_gcs_repo/all_models ./ && gsutil cp -r gs://your_gcs_repo/scripts ./;
+            - gsutil cp -r gs://your_gcs_repo/all_models ./;
           volumeMounts:
             - name: all-models-volume
               mountPath: /all_models
-            - name: scripts-volume
-              mountPath: /scripts
       containers:
         - name: triton-inference
           image: "${image_path}"  # Replace ${image_path} with the actual image path
           workingDir: /opt/tritonserver
+          #command: ["/bin/sleep", "3600"]
           command: ["/bin/bash", "-c"]
-          args: ["pip install sentencepiece protobuf && huggingface-cli login --token $HUGGINGFACE_TOKEN && python /scripts/launch_triton_server.py --model_repo /all_models/inflight_batcher_llm --world_size 1"]
+          args: ["pip install sentencepiece protobuf && huggingface-cli login --token $HUGGINGFACE_TOKEN && mpirun --allow-run-as-root -n 1 /opt/tritonserver/bin/tritonserver --model-repository=/all_models/inflight_batcher_llm --disable-auto-complete-config --backend-config=python,shm-region-prefix-name=prefix0_ :"]
           ports:
             - containerPort: 8000
               name: http-triton
@@ -76,6 +73,4 @@ spec:
           serviceAccountName: ${ksa}
           volumeMounts:
             - name: all-models-volume
-              mountPath: /all_models
-            - name: scripts-volume
-              mountPath: /scripts
+              mountPath: /all_models