feat(model): supports the deployment of multiple models through the A…

…PI and add the corresponding command line interface (#570) - Split `LLM_MODEL_CONFIG` into `LLM_MODEL_CONFIG` and `EMBEDDING_MODEL_CONFIG`. - New HTTP API to obtain the list of models and configuration parameters supported by the current cluster. - New HTTP API to launch models on a specified machine. - The command line supports above HTTP API.
eosphoros-ai · Sep 11, 2023 · 68d30dd · 68d30dd
2 parents f9e184a + 27d7f9f
commit 68d30dd
Show file tree

Hide file tree

Showing 38 changed files with 1,077 additions and 345 deletions.
diff --git a/pilot/configs/model_config.py b/pilot/configs/model_config.py
@@ -41,25 +41,12 @@ def get_device() -> str:
     # (Llama2 based) see https://huggingface.co/lmsys/vicuna-13b-v1.5
     "vicuna-13b-v1.5": os.path.join(MODEL_PATH, "vicuna-13b-v1.5"),
     "vicuna-7b-v1.5": os.path.join(MODEL_PATH, "vicuna-7b-v1.5"),
-    "text2vec": os.path.join(MODEL_PATH, "text2vec-large-chinese"),
-    # https://huggingface.co/moka-ai/m3e-large
-    "m3e-base": os.path.join(MODEL_PATH, "m3e-base"),
-    # https://huggingface.co/moka-ai/m3e-base
-    "m3e-large": os.path.join(MODEL_PATH, "m3e-large"),
-    # https://huggingface.co/BAAI/bge-large-en
-    "bge-large-en": os.path.join(MODEL_PATH, "bge-large-en"),
-    "bge-base-en": os.path.join(MODEL_PATH, "bge-base-en"),
-    # https://huggingface.co/BAAI/bge-large-zh
-    "bge-large-zh": os.path.join(MODEL_PATH, "bge-large-zh"),
-    "bge-base-zh": os.path.join(MODEL_PATH, "bge-base-zh"),
-    "sentence-transforms": os.path.join(MODEL_PATH, "all-MiniLM-L6-v2"),
     "codegen2-1b": os.path.join(MODEL_PATH, "codegen2-1B"),
     "codet5p-2b": os.path.join(MODEL_PATH, "codet5p-2b"),
     "chatglm-6b-int4": os.path.join(MODEL_PATH, "chatglm-6b-int4"),
     "chatglm-6b": os.path.join(MODEL_PATH, "chatglm-6b"),
     "chatglm2-6b": os.path.join(MODEL_PATH, "chatglm2-6b"),
     "chatglm2-6b-int4": os.path.join(MODEL_PATH, "chatglm2-6b-int4"),
-    "text2vec-base": os.path.join(MODEL_PATH, "text2vec-base-chinese"),
     "guanaco-33b-merged": os.path.join(MODEL_PATH, "guanaco-33b-merged"),
     "falcon-40b": os.path.join(MODEL_PATH, "falcon-40b"),
     "gorilla-7b": os.path.join(MODEL_PATH, "gorilla-7b"),
@@ -84,6 +71,22 @@ def get_device() -> str:
     "llama-cpp": os.path.join(MODEL_PATH, "ggml-model-q4_0.bin"),
 }
 
+EMBEDDING_MODEL_CONFIG = {
+    "text2vec": os.path.join(MODEL_PATH, "text2vec-large-chinese"),
+    "text2vec-base": os.path.join(MODEL_PATH, "text2vec-base-chinese"),
+    # https://huggingface.co/moka-ai/m3e-large
+    "m3e-base": os.path.join(MODEL_PATH, "m3e-base"),
+    # https://huggingface.co/moka-ai/m3e-base
+    "m3e-large": os.path.join(MODEL_PATH, "m3e-large"),
+    # https://huggingface.co/BAAI/bge-large-en
+    "bge-large-en": os.path.join(MODEL_PATH, "bge-large-en"),
+    "bge-base-en": os.path.join(MODEL_PATH, "bge-base-en"),
+    # https://huggingface.co/BAAI/bge-large-zh
+    "bge-large-zh": os.path.join(MODEL_PATH, "bge-large-zh"),
+    "bge-base-zh": os.path.join(MODEL_PATH, "bge-base-zh"),
+    "sentence-transforms": os.path.join(MODEL_PATH, "all-MiniLM-L6-v2"),
+}
+
 # Load model config
 ISDEBUG = False
 

diff --git a/pilot/model/adapter.py b/pilot/model/adapter.py
@@ -108,6 +108,17 @@ def _dynamic_model_parser() -> Callable[[None], List[Type]]:
     return [param_class]
 
 
+def _parse_model_param_class(model_name: str, model_path: str) -> ModelParameters:
+    try:
+        llm_adapter = get_llm_model_adapter(model_name, model_path)
+        return llm_adapter.model_param_class()
+    except Exception as e:
+        logger.warn(
+            f"Parse model parameters with model name {model_name} and model {model_path} failed {str(e)}, return `ModelParameters`"
+        )
+        return ModelParameters
+
+
 # TODO support cpu? for practise we support gpt4all or chatglm-6b-int4?
 
 

diff --git a/pilot/model/base.py b/pilot/model/base.py
@@ -2,9 +2,10 @@
 # -*- coding: utf-8 -*-
 
 from enum import Enum
-from typing import TypedDict, Optional, Dict
+from typing import TypedDict, Optional, Dict, List
 from dataclasses import dataclass
 from datetime import datetime
+from pilot.utils.parameter_utils import ParameterDescription
 
 
 class Message(TypedDict):
@@ -46,5 +47,40 @@ class ModelOutput:
 @dataclass
 class WorkerApplyOutput:
     message: str
+    success: Optional[bool] = True
     # The seconds cost to apply some action to worker instances
     timecost: Optional[int] = -1
+
+
+@dataclass
+class SupportedModel:
+    model: str
+    path: str
+    worker_type: str
+    path_exist: bool
+    proxy: bool
+    enabled: bool
+    params: List[ParameterDescription]
+
+    @classmethod
+    def from_dict(cls, model_data: Dict) -> "SupportedModel":
+        params = model_data.get("params", [])
+        if params:
+            params = [ParameterDescription(**param) for param in params]
+        model_data["params"] = params
+        return cls(**model_data)
+
+
+@dataclass
+class WorkerSupportedModel:
+    host: str
+    port: int
+    models: List[SupportedModel]
+
+    @classmethod
+    def from_dict(cls, worker_data: Dict) -> "WorkerSupportedModel":
+        models = [
+            SupportedModel.from_dict(model_data) for model_data in worker_data["models"]
+        ]
+        worker_data["models"] = models
+        return cls(**worker_data)