Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

api: better startup failure UX #881

Merged
merged 1 commit into from
Dec 12, 2024
Merged

api: better startup failure UX #881

merged 1 commit into from
Dec 12, 2024

Conversation

AlpinDale
Copy link
Member

Before:

Process SpawnProcess-1:
Traceback (most recent call last):
  File "/home/ubuntu/aphrodite-engine/conda/envs/aphrodite-runtime/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/home/ubuntu/aphrodite-engine/conda/envs/aphrodite-runtime/lib/python3.11/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ubuntu/aphrodite-engine/aphrodite/endpoints/openai/rpc/server.py", line 198, in run_rpc_server
    server = AsyncEngineRPCServer(async_engine_args, rpc_path)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/endpoints/openai/rpc/server.py", line 35, in __init__
    self.engine = AsyncAphrodite.from_engine_args(async_engine_args)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/engine/async_aphrodite.py", line 716, in from_engine_args
    engine = cls(
             ^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/engine/async_aphrodite.py", line 625, in __init__
    self.engine = self._init_engine(*args, **kwargs)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/engine/async_aphrodite.py", line 814, in _init_engine
    return engine_class(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/engine/async_aphrodite.py", line 271, in __init__
    super().__init__(*args, **kwargs)
  File "/home/ubuntu/aphrodite-engine/aphrodite/engine/aphrodite_engine.py", line 262, in __init__
    self.model_executor = executor_class(
                          ^^^^^^^^^^^^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/executor/executor_base.py", line 45, in __init__
    self._init_executor()
  File "/home/ubuntu/aphrodite-engine/aphrodite/executor/gpu_executor.py", line 34, in _init_executor
    assert self.parallel_config.world_size == 1, (
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: GPUExecutor only supports single GPU.
Traceback (most recent call last):
  File "/home/ubuntu/aphrodite-engine/aphrodite/endpoints/openai/api_server.py", line 169, in build_async_engine_client
    await async_engine_client.setup()
  File "/home/ubuntu/aphrodite-engine/aphrodite/endpoints/openai/rpc/client.py", line 127, in setup
    await self._wait_for_server_rpc()
  File "/home/ubuntu/aphrodite-engine/aphrodite/endpoints/openai/rpc/client.py", line 232, in _wait_for_server_rpc
    await self._send_one_way_rpc_request(
  File "/home/ubuntu/aphrodite-engine/aphrodite/endpoints/openai/rpc/client.py", line 208, in _send_one_way_rpc_request
    response = await do_rpc_call(socket, request, timeout)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/endpoints/openai/rpc/client.py", line 202, in do_rpc_call
    raise TimeoutError(f"Server didn't reply within {timeout} ms")
TimeoutError: Server didn't reply within 1000 ms

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/ubuntu/aphrodite-engine/conda/envs/aphrodite-runtime/bin/aphrodite", line 8, in <module>
    sys.exit(main())
             ^^^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/endpoints/cli.py", line 205, in main
    args.dispatch_function(args)
  File "/home/ubuntu/aphrodite-engine/aphrodite/endpoints/cli.py", line 31, in serve
    asyncio.run(run_server(args))
  File "/home/ubuntu/aphrodite-engine/conda/envs/aphrodite-runtime/lib/python3.11/asyncio/runners.py", line 190, in run
    return runner.run(main)
           ^^^^^^^^^^^^^^^^
  File "/home/ubuntu/aphrodite-engine/conda/envs/aphrodite-runtime/lib/python3.11/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/aphrodite-engine/conda/envs/aphrodite-runtime/lib/python3.11/asyncio/base_events.py", line 654, in run_until_complete
    return future.result()
           ^^^^^^^^^^^^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/endpoints/openai/api_server.py", line 761, in run_server
    async with build_async_engine_client(args) as async_engine_client:
  File "/home/ubuntu/aphrodite-engine/conda/envs/aphrodite-runtime/lib/python3.11/contextlib.py", line 210, in __aenter__
    return await anext(self.gen)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/endpoints/openai/api_server.py", line 173, in build_async_engine_client
    raise RuntimeError(
RuntimeError: The server process died before responding to the readiness probe

After:

Process SpawnProcess-1:
Traceback (most recent call last):
  File "/home/ubuntu/aphrodite-engine/conda/envs/aphrodite-runtime/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/home/ubuntu/aphrodite-engine/conda/envs/aphrodite-runtime/lib/python3.11/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ubuntu/aphrodite-engine/aphrodite/endpoints/openai/rpc/server.py", line 198, in run_rpc_server
    server = AsyncEngineRPCServer(async_engine_args, rpc_path)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/endpoints/openai/rpc/server.py", line 35, in __init__
    self.engine = AsyncAphrodite.from_engine_args(async_engine_args)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/engine/async_aphrodite.py", line 716, in from_engine_args
    engine = cls(
             ^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/engine/async_aphrodite.py", line 625, in __init__
    self.engine = self._init_engine(*args, **kwargs)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/engine/async_aphrodite.py", line 814, in _init_engine
    return engine_class(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/engine/async_aphrodite.py", line 271, in __init__
    super().__init__(*args, **kwargs)
  File "/home/ubuntu/aphrodite-engine/aphrodite/engine/aphrodite_engine.py", line 262, in __init__
    self.model_executor = executor_class(
                          ^^^^^^^^^^^^^^^
  File "/home/ubuntu/aphrodite-engine/aphrodite/executor/executor_base.py", line 45, in __init__
    self._init_executor()
  File "/home/ubuntu/aphrodite-engine/aphrodite/executor/gpu_executor.py", line 34, in _init_executor
    assert self.parallel_config.world_size == 1, (
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: GPUExecutor only supports single GPU.
ERROR:    RPCServer process died before responding to readiness probe

@AlpinDale AlpinDale merged commit ce6e3d6 into main Dec 12, 2024
5 checks passed
@AlpinDale AlpinDale deleted the startup-ux branch December 12, 2024 04:35
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant