diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 95417718b51fe..e7503b9655830 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -183,6 +183,16 @@ async def authentication(request: Request, call_next): served_model_names = [args.model] engine_args = AsyncEngineArgs.from_cli_args(args) + + # Enforce pixel values as image input type for vision language models + # when serving with API server + if engine_args.image_input_type is not None and \ + engine_args.image_input_type.upper() != "PIXEL_VALUES": + raise ValueError( + f"Invalid image_input_type: {engine_args.image_input_type}. " + "Only --image-input-type 'pixel_values' is supported for serving " + "vision language models with the vLLM API server.") + engine = AsyncLLMEngine.from_engine_args( engine_args, usage_context=UsageContext.OPENAI_API_SERVER)