Skip to content

Commit

Permalink
[ci] set timeout for test_oot_registration.py (vllm-project#7082)
Browse files Browse the repository at this point in the history
  • Loading branch information
youkaichao authored Aug 2, 2024
1 parent c16eaac commit 8069495
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 2 deletions.
4 changes: 4 additions & 0 deletions tests/entrypoints/openai/test_oot_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@ def test_oot_registration_for_api_server():
ctx = torch.multiprocessing.get_context()
server = ctx.Process(target=server_function, args=(port, ))
server.start()
MAX_SERVER_START_WAIT_S = 60
client = OpenAI(
base_url=f"http://localhost:{port}/v1",
api_key="token-abc123",
)
now = time.time()
while True:
try:
completion = client.chat.completions.create(
Expand All @@ -57,6 +59,8 @@ def test_oot_registration_for_api_server():
except OpenAIError as e:
if "Connection error" in str(e):
time.sleep(3)
if time.time() - now > MAX_SERVER_START_WAIT_S:
raise RuntimeError("Server did not start in time") from e
else:
raise e
server.kill()
Expand Down
4 changes: 3 additions & 1 deletion vllm/worker/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,9 @@ def determine_num_available_blocks(self) -> Tuple[int, int]:
# GPU did not change their memory usage during the profiling.
peak_memory = self.init_gpu_memory - free_gpu_memory
assert peak_memory > 0, (
"Error in memory profiling. This happens when the GPU memory was "
"Error in memory profiling. "
f"Initial free memory {self.init_gpu_memory}, current free memory"
f" {free_gpu_memory}. This happens when the GPU memory was "
"not properly cleaned up before initializing the vLLM instance.")

cache_block_size = self.get_cache_block_size_bytes()
Expand Down
4 changes: 3 additions & 1 deletion vllm/worker/xpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,9 @@ def determine_num_available_blocks(self) -> Tuple[int, int]:
# GPU did not change their memory usage during the profiling.
peak_memory = self.init_gpu_memory - free_gpu_memory
assert peak_memory > 0, (
"Error in memory profiling. This happens when the GPU memory was "
"Error in memory profiling. "
f"Initial free memory {self.init_gpu_memory}, current free memory"
f" {free_gpu_memory}. This happens when the GPU memory was "
"not properly cleaned up before initializing the vLLM instance.")

cache_block_size = self.get_cache_block_size_bytes()
Expand Down

0 comments on commit 8069495

Please sign in to comment.