Skip to content

Commit

Permalink
remove disable arg
Browse files Browse the repository at this point in the history
  • Loading branch information
alexm-redhat committed Dec 2, 2024
1 parent 4e8dac4 commit f340a9d
Show file tree
Hide file tree
Showing 6 changed files with 7 additions and 30 deletions.
1 change: 0 additions & 1 deletion examples/offline_inference_vision_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def run_llava(question: str, modality: str):

llm = LLM(model="llava-hf/llava-1.5-7b-hf",
max_model_len=4096)
#mm_disable_frontend_processor=True)

stop_token_ids = None
return llm, prompt, stop_token_ids
Expand Down
10 changes: 2 additions & 8 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,6 @@ class ModelConfig:
HuggingFace config.
mm_processor_kwargs: Arguments to be forwarded to the model's processor
for multi-modal data, e.g., image processor.
mm_disable_frontend_processor: Disables multi-modal HF preprocessor/mapper
execution in the frontend process (not recommended)
override_neuron_config: Initialize non default neuron config or
override default neuron config that are specific to Neuron devices,
this argument will be used to configure the neuron config that
Expand Down Expand Up @@ -165,7 +163,6 @@ def __init__(
config_format: ConfigFormat = ConfigFormat.AUTO,
hf_overrides: Optional[HfOverrides] = None,
mm_processor_kwargs: Optional[Dict[str, Any]] = None,
mm_disable_frontend_processor: bool = False,
override_neuron_config: Optional[Dict[str, Any]] = None,
override_pooler_config: Optional["PoolerConfig"] = None) -> None:
self.model = model
Expand Down Expand Up @@ -225,7 +222,6 @@ def __init__(
self.dtype = _get_and_verify_dtype(self.hf_text_config, dtype)
self.use_async_output_proc = use_async_output_proc
self.mm_processor_kwargs = mm_processor_kwargs
self.mm_disable_frontend_processor = mm_disable_frontend_processor

# Set enforce_eager to False if the value is unset.
if self.enforce_eager is None:
Expand Down Expand Up @@ -2397,8 +2393,7 @@ def __str__(self):
"decoding_config=%r, observability_config=%r, "
"seed=%d, served_model_name=%s, "
"num_scheduler_steps=%d, enable_prefix_caching=%s, "
"use_async_output_proc=%s, mm_processor_kwargs=%s, "
"mm_disable_frontend_processor=%s") % \
"use_async_output_proc=%s, mm_processor_kwargs=%s") % \
(self.model_config.model, self.speculative_config,
self.model_config.tokenizer,
self.model_config.skip_tokenizer_init,
Expand All @@ -2424,5 +2419,4 @@ def __str__(self):
self.scheduler_config.num_scheduler_steps,
self.cache_config.enable_prefix_caching,
self.model_config.use_async_output_proc,
self.model_config.mm_processor_kwargs,
self.model_config.mm_disable_frontend_processor)
self.model_config.mm_processor_kwargs)
8 changes: 0 additions & 8 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ class EngineArgs:
tokenizer_pool_extra_config: Optional[Dict[str, Any]] = None
limit_mm_per_prompt: Optional[Mapping[str, int]] = None
mm_processor_kwargs: Optional[Dict[str, Any]] = None
mm_disable_frontend_processor: bool = False
enable_lora: bool = False
enable_lora_bias: bool = False
max_loras: int = 1
Expand Down Expand Up @@ -570,12 +569,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
help=('Overrides for the multimodal input mapping/processing, '
'e.g., image processor. For example: {"num_crops": 4}.'))

parser.add_argument(
'--mm-disable-frontend-processor',
action='store_true',
default=EngineArgs.mm_disable_frontend_processor,
help="Disable multi-modal frontend processing (not recommended)")

# LoRA related configs
parser.add_argument('--enable-lora',
action='store_true',
Expand Down Expand Up @@ -941,7 +934,6 @@ def create_model_config(self) -> ModelConfig:
use_async_output_proc=not self.disable_async_output_proc,
config_format=self.config_format,
mm_processor_kwargs=self.mm_processor_kwargs,
mm_disable_frontend_processor=self.mm_disable_frontend_processor,
override_neuron_config=self.override_neuron_config,
override_pooler_config=self.override_pooler_config,
)
Expand Down
7 changes: 1 addition & 6 deletions vllm/entrypoints/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ def __init__(
disable_async_output_proc: bool = False,
hf_overrides: Optional[HfOverrides] = None,
mm_processor_kwargs: Optional[Dict[str, Any]] = None,
mm_disable_frontend_processor: bool = False,
# After positional args are removed, move this right below `model`
task: TaskOption = "auto",
override_pooler_config: Optional[PoolerConfig] = None,
Expand Down Expand Up @@ -214,7 +213,6 @@ def __init__(
disable_async_output_proc=disable_async_output_proc,
hf_overrides=hf_overrides,
mm_processor_kwargs=mm_processor_kwargs,
mm_disable_frontend_processor=mm_disable_frontend_processor,
override_pooler_config=override_pooler_config,
compilation_config=compilation_config_instance,
**kwargs,
Expand Down Expand Up @@ -545,7 +543,6 @@ def chat(
continue_final_message: bool = False,
tools: Optional[List[Dict[str, Any]]] = None,
mm_processor_kwargs: Optional[Dict[str, Any]] = None,
mm_disable_frontend_processor: bool = False
) -> List[RequestOutput]:
"""
Generate responses for a chat conversation.
Expand Down Expand Up @@ -587,9 +584,7 @@ def chat(
``True`` if ``add_generation_prompt`` is also ``True``.
mm_processor_kwargs: Multimodal processor kwarg overrides for this
chat request. Only used for offline requests.
mm_disable_frontend_processor: Disable multi-modal frontend
processing (not recommended)
Returns:
A list of ``RequestOutput`` objects containing the generated
responses in the same order as the input messages.
Expand Down
8 changes: 3 additions & 5 deletions vllm/v1/engine/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,9 @@ def add_request(self, request: EngineCoreRequest):
"""Add request to the scheduler."""
req = Request.from_engine_core_request(request)

# Apply multi-modal mapper (if necessary)
if req.mm_data:
assert req.mm_inputs is None or req.mm_inputs == []
req.mm_inputs = self.mm_input_mapper.process_inputs(
req.mm_data, req.mm_processor_kwargs)
# Sanity check to verify that the multi-modal preprocessor
# ran in the frontend P0 process
assert req.mm_data is None or req.mm_data == {}

self.scheduler.add_request(req)

Expand Down
3 changes: 1 addition & 2 deletions vllm/v1/engine/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,7 @@ def process_inputs(
# here in the frontend process (if enabled)
mm_data = decoder_inputs.multi_modal_data
mm_inputs = None
if (not self.model_config.mm_disable_frontend_processor
and mm_data is not None):
if mm_data is not None:
mm_inputs = self.mm_input_mapper.process_inputs(
decoder_inputs.multi_modal_data,
decoder_inputs.mm_processor_kwargs)
Expand Down

0 comments on commit f340a9d

Please sign in to comment.