From 40c27a7cbb496ede63da9d636c07a1f315fd36e1 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Tue, 30 Jul 2024 14:59:48 -0700 Subject: [PATCH] [Build] Temporarily Disable Kernels and LoRA tests (#6961) --- .buildkite/test-pipeline.yaml | 40 +++++++++++++++++------------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index be8807df0b098..91418e5ec1752 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -155,12 +155,12 @@ steps: - pytest -v -s test_inputs.py - pytest -v -s multimodal -- label: Kernels Test %N - #mirror_hardwares: [amd] - commands: - - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl - - pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT - parallelism: 4 +# - label: Kernels Test %N +# #mirror_hardwares: [amd] +# commands: +# - pip install https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl +# - pytest -v -s kernels --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT +# parallelism: 4 - label: Models Test #mirror_hardwares: [amd] @@ -202,20 +202,20 @@ steps: - export VLLM_ATTENTION_BACKEND=XFORMERS - pytest -v -s spec_decode -- label: LoRA Test %N - #mirror_hardwares: [amd] - command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py - parallelism: 4 - -- label: LoRA Long Context (Distributed) - #mirror_hardwares: [amd] - num_gpus: 4 - # This test runs llama 13B, so it is required to run on 4 GPUs. - commands: - # FIXIT: find out which code initialize cuda before running the test - # before the fix, we need to use spawn to test it - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - pytest -v -s -x lora/test_long_context.py +# - label: LoRA Test %N +# #mirror_hardwares: [amd] +# command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_long_context.py +# parallelism: 4 + +# - label: LoRA Long Context (Distributed) +# #mirror_hardwares: [amd] +# num_gpus: 4 +# # This test runs llama 13B, so it is required to run on 4 GPUs. +# commands: +# # FIXIT: find out which code initialize cuda before running the test +# # before the fix, we need to use spawn to test it +# - export VLLM_WORKER_MULTIPROC_METHOD=spawn +# - pytest -v -s -x lora/test_long_context.py - label: Tensorizer Test #mirror_hardwares: [amd]