diff --git a/docs/vllm_integration.md b/docs/vllm_integration.md
index 021fc3853..e371ef32f 100644
--- a/docs/vllm_integration.md
+++ b/docs/vllm_integration.md
@@ -19,7 +19,7 @@ See the supported models [here](https://vllm.readthedocs.io/en/latest/models/sup
    python3 -m fastchat.serve.vllm_worker --model-path lmsys/vicuna-7b-v1.3 --tokenizer hf-internal-testing/llama-tokenizer
    ```
 
-   if you use a awq model, try
+   If you use an AWQ quantized model, try
    '''
    python3 -m fastchat.serve.vllm_worker --model-path TheBloke/vicuna-7B-v1.5-AWQ --quantization awq
    '''
diff --git a/fastchat/model/model_registry.py b/fastchat/model/model_registry.py
index 19a513eaa..4a046b47f 100644
--- a/fastchat/model/model_registry.py
+++ b/fastchat/model/model_registry.py
@@ -306,3 +306,9 @@ def get_model_info(name: str) -> ModelInfo:
     "https://huggingface.co/bofenghuang/vigogne-2-7b-chat",
     "Vigogne-Chat is a French large language model (LLM) optimized for instruction-following and multi-turn dialogues, developed by Bofeng Huang",
 )
+register_model_info(
+    ["mistral-7b-instruct"],
+    "Mistral",
+    "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1",
+    "a large language model by Mistral AI team",
+)
diff --git a/fastchat/serve/vllm_worker.py b/fastchat/serve/vllm_worker.py
index 50b028afb..8e255b79c 100644
--- a/fastchat/serve/vllm_worker.py
+++ b/fastchat/serve/vllm_worker.py
@@ -192,7 +192,6 @@ async def api_model_details(request: Request):
         "--controller-address", type=str, default="http://localhost:21001"
     )
     parser.add_argument("--model-path", type=str, default="lmsys/vicuna-7b-v1.3")
-    parser.add_argument("--quantization", type=str)
     parser.add_argument(
         "--model-names",
         type=lambda s: s.split(","),
@@ -211,8 +210,6 @@ async def api_model_details(request: Request):
         args.model = args.model_path
     if args.num_gpus > 1:
         args.tensor_parallel_size = args.num_gpus
-    if args.quantization:
-        args.quantization = args.quantization
 
     engine_args = AsyncEngineArgs.from_cli_args(args)
     engine = AsyncLLMEngine.from_engine_args(engine_args)