diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index f883595f6d9ad..e288f8f30159a 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -38,7 +38,7 @@ steps: - pip install -r requirements-docs.txt - SPHINXOPTS=\"-W\" make html # Check API reference (if it fails, you may have missing mock imports) - - grep \"sig sig-object py\" build/html/api/params.html + - grep \"sig sig-object py\" build/html/api/inference_params.html - label: Async Engine, Inputs, Utils, Worker Test # 24min fast_check: true diff --git a/docs/source/api/params.md b/docs/source/api/inference_params.md similarity index 79% rename from docs/source/api/params.md rename to docs/source/api/inference_params.md index a3b4d9cbb44ec..181c30cab9c4a 100644 --- a/docs/source/api/params.md +++ b/docs/source/api/inference_params.md @@ -1,6 +1,6 @@ -# Optional Parameters +# Inference Parameters -Optional parameters for vLLM APIs. +Inference parameters for vLLM APIs. (sampling-params)= @@ -19,4 +19,3 @@ Optional parameters for vLLM APIs. .. autoclass:: vllm.PoolingParams :members: ``` - diff --git a/docs/source/api/model/adapters.md b/docs/source/api/model/adapters.md new file mode 100644 index 0000000000000..e103a51d0070d --- /dev/null +++ b/docs/source/api/model/adapters.md @@ -0,0 +1,9 @@ +# Model Adapters + +## Module Contents + +```{eval-rst} +.. automodule:: vllm.model_executor.models.adapters + :members: + :member-order: bysource +``` diff --git a/docs/source/api/model/index.md b/docs/source/api/model/index.md new file mode 100644 index 0000000000000..b8437e3c3517a --- /dev/null +++ b/docs/source/api/model/index.md @@ -0,0 +1,12 @@ +# Model Development + +## Submodules + +```{toctree} +:maxdepth: 1 + +interfaces_base +interfaces +adapters +``` + diff --git a/docs/source/api/model/interfaces.md b/docs/source/api/model/interfaces.md new file mode 100644 index 0000000000000..55bee57f64faa --- /dev/null +++ b/docs/source/api/model/interfaces.md @@ -0,0 +1,9 @@ +# Optional Interfaces + +## Module Contents + +```{eval-rst} +.. automodule:: vllm.model_executor.models.interfaces + :members: + :member-order: bysource +``` diff --git a/docs/source/api/model/interfaces_base.md b/docs/source/api/model/interfaces_base.md new file mode 100644 index 0000000000000..75d58d34228e9 --- /dev/null +++ b/docs/source/api/model/interfaces_base.md @@ -0,0 +1,9 @@ +# Base Model Interfaces + +## Module Contents + +```{eval-rst} +.. automodule:: vllm.model_executor.models.interfaces_base + :members: + :member-order: bysource +``` diff --git a/docs/source/index.md b/docs/source/index.md index 6747a7fcce4fe..23e4304fe29d9 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -139,8 +139,9 @@ community/sponsors api/offline_inference/index api/engine/index +api/inference_params api/multimodal/index -api/params +api/model/index ``` % Design Documents: Details about vLLM internals diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py index 6f26603046483..b51cba86ec1a4 100644 --- a/vllm/model_executor/models/interfaces.py +++ b/vllm/model_executor/models/interfaces.py @@ -38,13 +38,15 @@ def get_multimodal_embeddings(self, **kwargs) -> Optional[T]: to be merged with text embeddings. The output embeddings must be one of the following formats: + - A list or tuple of 2D tensors, where each tensor corresponds to - each input multimodal data item (e.g, image). + each input multimodal data item (e.g, image). - A single 3D tensor, with the batch dimension grouping the 2D tensors. - NOTE: The returned multimodal embeddings must be in the same order as - the appearances of their corresponding multimodal data item in the - input prompt. + Note: + The returned multimodal embeddings must be in the same order as + the appearances of their corresponding multimodal data item in the + input prompt. """ ... @@ -59,6 +61,7 @@ def get_input_embeddings( ) -> torch.Tensor: ... + @overload def get_input_embeddings( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/interfaces_base.py b/vllm/model_executor/models/interfaces_base.py index de733b6d49a53..4c353ae6ffc13 100644 --- a/vllm/model_executor/models/interfaces_base.py +++ b/vllm/model_executor/models/interfaces_base.py @@ -35,6 +35,7 @@ @runtime_checkable class VllmModel(Protocol[C_co, T_co]): + """The interface required for all models in vLLM.""" def __init__( self, @@ -97,6 +98,7 @@ def is_vllm_model( @runtime_checkable class VllmModelForTextGeneration(VllmModel[C_co, T], Protocol[C_co, T]): + """The interface required for all generative models in vLLM.""" def compute_logits( self, @@ -142,6 +144,7 @@ def is_text_generation_model( @runtime_checkable class VllmModelForPooling(VllmModel[C_co, T], Protocol[C_co, T]): + """The interface required for all pooling models in vLLM.""" def pooler( self,