Skip to content

Commit

Permalink
[CI] Basic Integration Test For TPU (vllm-project#9968)
Browse files Browse the repository at this point in the history
Signed-off-by: Robert Shaw <[email protected]>
Signed-off-by: Richard Liu <[email protected]>
  • Loading branch information
robertgshaw2-neuralmagic authored and richardsliu committed Nov 4, 2024
1 parent db45c39 commit af148c8
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .buildkite/run-tpu-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ remove_docker_container
# For HF_TOKEN.
source /etc/environment
# Run a simple end-to-end example.
docker run --privileged --net host --shm-size=16G -it -e HF_TOKEN=$HF_TOKEN --name tpu-test vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git && python3 -m pip install pytest && pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py && python3 /workspace/vllm/tests/tpu/test_compilation.py && python3 /workspace/vllm/examples/offline_inference_tpu.py"
docker run --privileged --net host --shm-size=16G -it -e HF_TOKEN=$HF_TOKEN --name tpu-test vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git && python3 -m pip install pytest && python3 -m pip install lm_eval[api]==0.4.4 && pytest -v -s /workspace/vllm/tests/entrypoints/openai/test_accuracy.py && pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py && python3 /workspace/vllm/tests/tpu/test_compilation.py && python3 /workspace/vllm/examples/offline_inference_tpu.py"
17 changes: 15 additions & 2 deletions tests/entrypoints/openai/test_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import lm_eval
import pytest

from vllm.platforms import current_platform

from ...utils import RemoteOpenAIServer

MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
Expand All @@ -18,12 +20,21 @@
FILTER = "exact_match,strict-match"
RTOL = 0.03
EXPECTED_VALUE = 0.58
DEFAULT_ARGS = ["--max-model-len", "4096", "--disable-log-requests"]
DEFAULT_ARGS = ["--max-model-len", "2048", "--disable-log-requests"]
MORE_ARGS_LIST = [
[], # Default
["--enable-chunked-prefill"], # Chunked
["--num-scheduler-steps", "8"], # MS
["--num-scheduler-steps", "8", "--multi-step-stream-outputs"] # MS+Stream
]
MAX_WAIT_SECONDS = None

if current_platform.is_tpu():
MORE_ARGS_LIST = [
[], # Default
# ["--num-scheduler-steps", "8"], # Multi-step << currently fails
]
MAX_WAIT_SECONDS = 600


@pytest.mark.parametrize("more_args", MORE_ARGS_LIST)
Expand All @@ -33,7 +44,9 @@ def test_lm_eval_accuracy(more_args):

print(f"Running with: {args}")

with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
with RemoteOpenAIServer(
MODEL_NAME, args,
max_wait_seconds=MAX_WAIT_SECONDS) as remote_server:
url = f"{remote_server.url_for('v1')}/completions"

model_args = (
Expand Down

0 comments on commit af148c8

Please sign in to comment.