You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
-
`----> 3 llm = LLM(model="Qwen/QwQ-32B-AWQ",tensor_parallel_size=1, max_model_len=29500, max_num_seqs=1, enable_chunked_prefill=True, max_num_batched_tokens=512, kv_cache_dtype='fp8', gpu_memory_utilization=0.95, dtype='half', enforce_eager=True, quantization='awq_marlin', enable_reasoning=True, reasoning_parser='deepseek_r1')
File ~/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/utils.py:1022, in deprecate_args..wrapper..inner(*args, **kwargs)
1015 msg += f" {additional_message}"
1017 warnings.warn(
1018 DeprecationWarning(msg),
1019 stacklevel=3, # The inner function takes up one level
1020 )
-> 1022 return fn(*args, **kwargs)
File ~/miniconda3/envs/vllm/lib/python3.12/site-packages/vllm/entrypoints/llm.py:212, in LLM.init(self, model, tokenizer, tokenizer_mode, skip_tokenizer_init, trust_remote_code, allowed_local_media_path, tensor_parallel_size, dtype, quantization, revision, tokenizer_revision, seed, gpu_memory_utilization, swap_space, cpu_offload_gb, enforce_eager, max_seq_len_to_capture, disable_custom_all_reduce, disable_async_output_proc, hf_overrides, mm_processor_kwargs, task, override_pooler_config, compilation_config, **kwargs)
209 else:
210 compilation_config_instance = None
--> 212 engine_args = EngineArgs(
213 model=model,
214 task=task,
215 tokenizer=tokenizer,
216 tokenizer_mode=tokenizer_mode,
217 skip_tokenizer_init=skip_tokenizer_init,
218 trust_remote_code=trust_remote_code,
219 allowed_local_media_path=allowed_local_media_path,
220 tensor_parallel_size=tensor_parallel_size,
221 dtype=dtype,
222 quantization=quantization,
223 revision=revision,
224 tokenizer_revision=tokenizer_revision,
225 seed=seed,
226 gpu_memory_utilization=gpu_memory_utilization,
227 swap_space=swap_space,
228 cpu_offload_gb=cpu_offload_gb,
229 enforce_eager=enforce_eager,
230 max_seq_len_to_capture=max_seq_len_to_capture,
231 disable_custom_all_reduce=disable_custom_all_reduce,
232 disable_async_output_proc=disable_async_output_proc,
233 hf_overrides=hf_overrides,
234 mm_processor_kwargs=mm_processor_kwargs,
235 override_pooler_config=override_pooler_config,
236 compilation_config=compilation_config_instance,
237 **kwargs,
238 )
239 # Logic to switch between engines is done at runtime instead of import
240 # to avoid import order issues
241 self.engine_class = self.get_engine_class()
TypeError: EngineArgs.init() got an unexpected keyword argument 'enable_reasoning'`
Beta Was this translation helpful? Give feedback.
All reactions