Skip to content

Commit

Permalink
[Doc] Use intersphinx and update entrypoints docs (vllm-project#5125)
Browse files Browse the repository at this point in the history
  • Loading branch information
DarkLight1337 authored May 30, 2024
1 parent d79d9ea commit a9bcc7a
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 13 deletions.
13 changes: 12 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def setup(app):
generate_examples()


# Mock out external dependencies here.
# Mock out external dependencies here, otherwise the autodoc pages may be blank.
autodoc_mock_imports = [
"cpuinfo",
"torch",
Expand Down Expand Up @@ -115,4 +115,15 @@ def add_line(self, line: str, source: str, *lineno: int) -> None:

autodoc.ClassDocumenter = MockedClassDocumenter

intersphinx_mapping = {
'python': ('https://docs.python.org/3', None),
'typing_extensions':
('https://typing-extensions.readthedocs.io/en/latest', None),
'numpy': ('https://numpy.org/doc/stable', None),
'torch': ('https://pytorch.org/docs/stable', None),
'psutil': ('https://psutil.readthedocs.io/en/stable', None),
}

autodoc_preserve_defaults = True

navigation_with_keys = False
2 changes: 0 additions & 2 deletions vllm/engine/async_llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,6 @@ class AsyncLLMEngine:
generate method when there are requests in the waiting queue. The generate
method yields the outputs from the :class:`LLMEngine` to the caller.
NOTE: For the comprehensive list of arguments, see :class:`LLMEngine`.
Args:
worker_use_ray: Whether to use Ray for model workers. Required for
distributed execution. Should be the same as
Expand Down
4 changes: 2 additions & 2 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ class LLMEngine:
The :class:`~vllm.LLM` class wraps this class for offline batched inference
and the :class:`AsyncLLMEngine` class wraps this class for online serving.
NOTE: The config arguments are derived from the :class:`~vllm.EngineArgs`
class. For the comprehensive list of arguments, see :ref:`engine_args`.
The config arguments are derived from :class:`~vllm.EngineArgs`. (See
:ref:`engine_args`)
Args:
model_config: The configuration related to the LLM model.
Expand Down
26 changes: 18 additions & 8 deletions vllm/entrypoints/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,6 @@ class LLM:
this class generates texts from the model, using an intelligent batching
mechanism and efficient memory management.
NOTE: This class is intended to be used for offline inference. For online
serving, use the :class:`~vllm.AsyncLLMEngine` class instead.
NOTE: For the comprehensive list of arguments, see
:class:`~vllm.EngineArgs`.
Args:
model: The name or path of a HuggingFace Transformers model.
tokenizer: The name or path of a HuggingFace Transformers tokenizer.
Expand Down Expand Up @@ -84,6 +78,12 @@ class LLM:
When a sequence has context length larger than this, we fall back
to eager mode.
disable_custom_all_reduce: See ParallelConfig
**kwargs: Arguments for :class:`~vllm.EngineArgs`. (See
:ref:`engine_args`)
Note:
This class is intended to be used for offline inference. For online
serving, use the :class:`~vllm.AsyncLLMEngine` class instead.
"""

DEPRECATE_LEGACY: ClassVar[bool] = False
Expand Down Expand Up @@ -253,7 +253,7 @@ def generate(
) -> List[RequestOutput]:
"""Generates the completions for the input prompts.
NOTE: This class automatically batches the given prompts, considering
This class automatically batches the given prompts, considering
the memory constraint. For the best performance, put all of your prompts
into a single list and pass it to this method.
Expand All @@ -270,6 +270,11 @@ def generate(
Returns:
A list of `RequestOutput` objects containing the
generated completions in the same order as the input prompts.
Note:
Using ``prompts`` and ``prompt_token_ids`` as keyword parameters is
considered legacy and may be deprecated in the future. You should
instead pass them via the ``inputs`` parameter.
"""
if prompt_token_ids is not None or multi_modal_data is not None:
inputs = self._convert_v1_inputs(
Expand Down Expand Up @@ -393,7 +398,7 @@ def encode(
) -> List[EmbeddingRequestOutput]:
"""Generates the completions for the input prompts.
NOTE: This class automatically batches the given prompts, considering
This class automatically batches the given prompts, considering
the memory constraint. For the best performance, put all of your prompts
into a single list and pass it to this method.
Expand All @@ -409,6 +414,11 @@ def encode(
Returns:
A list of `EmbeddingRequestOutput` objects containing the
generated embeddings in the same order as the input prompts.
Note:
Using ``prompts`` and ``prompt_token_ids`` as keyword parameters is
considered legacy and may be deprecated in the future. You should
instead pass them via the ``inputs`` parameter.
"""
if prompt_token_ids is not None or multi_modal_data is not None:
inputs = self._convert_v1_inputs(
Expand Down

0 comments on commit a9bcc7a

Please sign in to comment.