Merge pull request #9 from shaleprotocol/merge_0226

Merge 0226
shaleprotocol · Feb 26, 2024 · 63d3d2c · 63d3d2c
2 parents dac3317 + 57bd6a2
commit 63d3d2c
Show file tree

Hide file tree

Showing 8 changed files with 40 additions and 107 deletions.
diff --git a/fastchat/conversation.py b/fastchat/conversation.py
@@ -1579,6 +1579,19 @@ def get_conv_template(name: str) -> Conversation:
     )
 )
 
+# Gemma
+# reference: https://huggingface.co/google/gemma-7b-it?text=%3Cstart_of_turn%3Euser%0AHow+does+the+brain+work%3F%3Cend_of_turn%3E%0A%3Cstart_of_turn%3Emodel
+register_conv_template(
+    Conversation(
+        name="gemma",
+        system_message="<bos>",
+        roles=("<start_of_turn>user\n", "<start_of_turn>model\n"),
+        sep_style=SeparatorStyle.NO_COLON_SINGLE,
+        sep="<end_of_turn>\n",
+        stop_str="<end_of_turn>",
+    )
+)
+
 
 if __name__ == "__main__":
     from fastchat.conversation import get_conv_template

diff --git a/fastchat/model/model_adapter.py b/fastchat/model/model_adapter.py
@@ -2279,6 +2279,16 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
         return get_conv_template("yuan")
 
 
+class GemmaAdapter(BaseModelAdapter):
+    """The model adapter for Gemma"""
+
+    def match(self, model_path: str):
+        return "gemma" in model_path.lower()
+
+    def get_default_conv_template(self, model_path: str) -> Conversation:
+        return get_conv_template("gemma")
+
+
 # Note: the registration order matters.
 # The one registered earlier has a higher matching priority.
 register_model_adapter(PeftModelAdapter)
@@ -2368,6 +2378,7 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
 register_model_adapter(SteerLMAdapter)
 register_model_adapter(LlavaAdapter)
 register_model_adapter(YuanAdapter)
+register_model_adapter(GemmaAdapter)
 
 # After all adapters, try the default base adapter.
 register_model_adapter(BaseModelAdapter)
diff --git a/fastchat/model/model_registry.py b/fastchat/model/model_registry.py
@@ -379,18 +379,6 @@ def get_model_info(name: str) -> ModelInfo:
     "PaLM 2 for Chat (chat-bison@001) by Google",
 )
 
-register_model_info(
-    ["openchat-3.5"],
-    "OpenChat 3.5",
-    "https://github.com/imoneoi/openchat",
-    "OpenChat 3.5 is a versatile, open-source language model fine-tuned using C-RLFT",
-)
-register_model_info(
-    ["starling-lm-7b-alpha"],
-    "Starling-LM-7B-alpha",
-    "https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha",
-    "an open model trained using RLAIF by Berkeley",
-)
 register_model_info(
     ["llama-7b", "llama-13b"],
     "LLaMA",
@@ -491,16 +479,6 @@ def get_model_info(name: str) -> ModelInfo:
     "De-aligned versions of the airoboros models",
 )
 
-register_model_info(
-    [
-        "spicyboros-7b-2.2",
-        "spicyboros-13b-2.2",
-        "spicyboros-70b-2.2",
-    ],
-    "spicyboros",
-    "https://huggingface.co/jondurbin/spicyboros-70b-2.2",
-    "de-aligned versions of the airoboros models",
-)
 register_model_info(
     ["Robin-7b-v2", "Robin-13b-v2", "Robin-33b-v2"],
     "Robin-v2",
@@ -571,12 +549,6 @@ def get_model_info(name: str) -> ModelInfo:
     "New extended Chinese vocabulary beyond Llama-2, open-sourcing the Chinese LLaMA-2 and Alpaca-2 LLMs.",
 )
 
-register_model_info(
-    ["Chinese-Alpaca-2-7B", "Chinese-Alpaca-2-13B"],
-    "Chinese-Alpaca",
-    "https://huggingface.co/hfl/chinese-alpaca-2-13b",
-    "New extended Chinese vocabulary beyond Llama-2, open-sourcing the Chinese LLaMA-2 and Alpaca-2 LLMs.",
-)
 register_model_info(
     ["Vigogne-2-7B-Instruct", "Vigogne-2-13B-Instruct"],
     "Vigogne-Instruct",
@@ -684,59 +656,10 @@ def get_model_info(name: str) -> ModelInfo:
     "https://github.com/haotian-liu/LLaVA",
     "an open large language and vision assistant",
 )
-register_model_info(
-    ["stable-vicuna-13B-HF"],
-    "stable-vicuna",
-    "https://huggingface.co/TheBloke/stable-vicuna-13B-HF",
-    "StableVicuna is a Vicuna model fine-tuned using RLHF via PPO on various conversational and instructional datasets.",
-)
-register_model_info(
-    ["deluxe-chat-v1", "deluxe-chat-v1.1"],
-    "DeluxeChat",
-    "",
-    "Deluxe Chat",
-)
-register_model_info(
-    [
-        "Xwin-LM-7B-V0.1",
-        "Xwin-LM-13B-V0.1",
-        "Xwin-LM-70B-V0.1",
-        "Xwin-LM-7B-V0.2",
-        "Xwin-LM-13B-V0.2",
-    ],
-    "Xwin-LM",
-    "https://github.com/Xwin-LM/Xwin-LM",
-    "Chat models developed by Xwin-LM team",
-)
 
 register_model_info(
-    ["lemur-70b-chat"],
-    "Lemur-Chat",
-    "https://huggingface.co/OpenLemur/lemur-70b-chat-v1",
-    "an openly accessible language model optimized for both natural language and coding capabilities ",
-)
-
-register_model_info(
-    ["Mistral-7B-OpenOrca"],
-    "Open-Orca",
-    "https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca",
-    "A fine-tune of [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) using [OpenOrca dataset](https://huggingface.co/datasets/Open-Orca/OpenOrca)",
-)
-
-register_model_info(
-    [
-        "AquilaChat-7B",
-        "AquilaChat2-7B",
-        "AquilaChat2-34B",
-    ],
-    "Aquila-Chat",
-    "https://huggingface.co/BAAI/AquilaChat2-34B",
-    "Chat models developed by BAAI team",
-)
-
-register_model_info(
-    ["Yi-34B-Chat", "Yi-6B-Chat"],
-    "Yi-Chat",
-    "https://huggingface.co/01-ai",
-    "A large language model by 01.AI.",
+    ["gemma-7b-it", "gemma-2b-it"],
+    "Gemma",
+    "https://blog.google/technology/developers/gemma-open-models/",
+    "Gemma by Google",
 )
diff --git a/fastchat/serve/gradio_web_server.py b/fastchat/serve/gradio_web_server.py
@@ -573,26 +573,6 @@ def bot_response(
     width: auto;
     float: left;
 }
-.image-container {
-    display: flex;
-    align-items: center;
-    padding: 1px;
-}
-.image-container img {
-    margin: 0 30px;
-    height: 20px;
-    max-height: 100%;
-    width: auto;
-    max-width: 20%;
-}
-.image-about img {
-    margin: 0 30px;
-    margin-top:  30px;
-    height: 60px;
-    max-height: 100%;
-    width: auto;
-    float: left;
-}
 """
 
 

diff --git a/fastchat/serve/gradio_web_server_multi.py b/fastchat/serve/gradio_web_server_multi.py
@@ -146,6 +146,9 @@ def build_demo(models, vl_models, elo_results_file, leaderboard_table_file):
             with gr.Tab("About Us", id=5):
                 about = build_about()
 
+            with gr.Tab("About Us", id=5):
+                about = build_about()
+
         url_params = gr.JSON(visible=False)
 
         if args.model_list_mode not in ["once", "reload"]:

diff --git a/fastchat/serve/openai_api_server.py b/fastchat/serve/openai_api_server.py
@@ -224,11 +224,6 @@ def check_requests(request) -> Optional[JSONResponse]:
             ErrorCode.PARAM_OUT_OF_RANGE,
             f"{request.top_k} is out of Range. Either set top_k to -1 or >=1.",
         )
-    if request.top_k is not None and (request.top_k > -1 and request.top_k < 1):
-        return create_error_response(
-            ErrorCode.PARAM_OUT_OF_RANGE,
-            f"{request.top_k} is out of Range. Either set top_k to -1 or >=1.",
-        )
     if request.stop is not None and (
         not isinstance(request.stop, str) and not isinstance(request.stop, list)
     ):

diff --git a/pyproject.toml b/pyproject.toml
@@ -14,7 +14,7 @@ classifiers = [
 ]
 dependencies = [
     "accelerate>=0.21", "aiohttp", "einops", "fastapi", "gradio", "httpx", "markdown2[all]", "mysqlclient", "nh3", "numpy",
-    "peft", "prompt_toolkit>=3.0.0", "pydantic", "redis", "requests", "rich>=10.0.0", "sentencepiece",
+    "peft", "prompt_toolkit>=3.0.0", "pydantic", "psutil", "redis", "requests", "rich>=10.0.0", "sentencepiece",
     "shortuuid", "SQLAlchemy", "slowapi", "tiktoken", "tokenizers>=0.12.1", "torch",
     "transformers>=4.31.0", "uvicorn", "wandb"
 ]

diff --git a/tests/launch_openai_api_test_server.py b/tests/launch_openai_api_test_server.py
@@ -49,5 +49,13 @@ def launch_process(cmd):
 
         launch_process(cmd)
 
+        if "llava" in model_path.lower():
+            cmd += f"--tokenizer-path llava-hf/llava-1.5-7b-hf"
+
+        if worker_name == "vllm_worker":
+            cmd += "--tokenizer hf-internal-testing/llama-tokenizer"
+
+        launch_process(cmd)
+
     while True:
         pass