diff --git a/fastchat/serve/vllm_worker.py b/fastchat/serve/vllm_worker.py index b1dc54f3d..50b028afb 100644 --- a/fastchat/serve/vllm_worker.py +++ b/fastchat/serve/vllm_worker.py @@ -192,6 +192,7 @@ async def api_model_details(request: Request): "--controller-address", type=str, default="http://localhost:21001" ) parser.add_argument("--model-path", type=str, default="lmsys/vicuna-7b-v1.3") + parser.add_argument("--quantization", type=str) parser.add_argument( "--model-names", type=lambda s: s.split(","),