Skip to content

Commit

Permalink
feat(model): supports the deployment of multiple models through the A…
Browse files Browse the repository at this point in the history
…PI and add the corresponding command line interface (#570)

- Split `LLM_MODEL_CONFIG` into `LLM_MODEL_CONFIG` and
`EMBEDDING_MODEL_CONFIG`.
- New HTTP API to obtain the list of models and configuration parameters
supported by the current cluster.
  - New HTTP API to launch models on a specified machine.
  - The command line supports above HTTP API.
  • Loading branch information
Aries-ckt authored Sep 11, 2023
2 parents f9e184a + 27d7f9f commit 68d30dd
Show file tree
Hide file tree
Showing 38 changed files with 1,077 additions and 345 deletions.
29 changes: 16 additions & 13 deletions pilot/configs/model_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,12 @@ def get_device() -> str:
# (Llama2 based) see https://huggingface.co/lmsys/vicuna-13b-v1.5
"vicuna-13b-v1.5": os.path.join(MODEL_PATH, "vicuna-13b-v1.5"),
"vicuna-7b-v1.5": os.path.join(MODEL_PATH, "vicuna-7b-v1.5"),
"text2vec": os.path.join(MODEL_PATH, "text2vec-large-chinese"),
# https://huggingface.co/moka-ai/m3e-large
"m3e-base": os.path.join(MODEL_PATH, "m3e-base"),
# https://huggingface.co/moka-ai/m3e-base
"m3e-large": os.path.join(MODEL_PATH, "m3e-large"),
# https://huggingface.co/BAAI/bge-large-en
"bge-large-en": os.path.join(MODEL_PATH, "bge-large-en"),
"bge-base-en": os.path.join(MODEL_PATH, "bge-base-en"),
# https://huggingface.co/BAAI/bge-large-zh
"bge-large-zh": os.path.join(MODEL_PATH, "bge-large-zh"),
"bge-base-zh": os.path.join(MODEL_PATH, "bge-base-zh"),
"sentence-transforms": os.path.join(MODEL_PATH, "all-MiniLM-L6-v2"),
"codegen2-1b": os.path.join(MODEL_PATH, "codegen2-1B"),
"codet5p-2b": os.path.join(MODEL_PATH, "codet5p-2b"),
"chatglm-6b-int4": os.path.join(MODEL_PATH, "chatglm-6b-int4"),
"chatglm-6b": os.path.join(MODEL_PATH, "chatglm-6b"),
"chatglm2-6b": os.path.join(MODEL_PATH, "chatglm2-6b"),
"chatglm2-6b-int4": os.path.join(MODEL_PATH, "chatglm2-6b-int4"),
"text2vec-base": os.path.join(MODEL_PATH, "text2vec-base-chinese"),
"guanaco-33b-merged": os.path.join(MODEL_PATH, "guanaco-33b-merged"),
"falcon-40b": os.path.join(MODEL_PATH, "falcon-40b"),
"gorilla-7b": os.path.join(MODEL_PATH, "gorilla-7b"),
Expand All @@ -84,6 +71,22 @@ def get_device() -> str:
"llama-cpp": os.path.join(MODEL_PATH, "ggml-model-q4_0.bin"),
}

EMBEDDING_MODEL_CONFIG = {
"text2vec": os.path.join(MODEL_PATH, "text2vec-large-chinese"),
"text2vec-base": os.path.join(MODEL_PATH, "text2vec-base-chinese"),
# https://huggingface.co/moka-ai/m3e-large
"m3e-base": os.path.join(MODEL_PATH, "m3e-base"),
# https://huggingface.co/moka-ai/m3e-base
"m3e-large": os.path.join(MODEL_PATH, "m3e-large"),
# https://huggingface.co/BAAI/bge-large-en
"bge-large-en": os.path.join(MODEL_PATH, "bge-large-en"),
"bge-base-en": os.path.join(MODEL_PATH, "bge-base-en"),
# https://huggingface.co/BAAI/bge-large-zh
"bge-large-zh": os.path.join(MODEL_PATH, "bge-large-zh"),
"bge-base-zh": os.path.join(MODEL_PATH, "bge-base-zh"),
"sentence-transforms": os.path.join(MODEL_PATH, "all-MiniLM-L6-v2"),
}

# Load model config
ISDEBUG = False

Expand Down
11 changes: 11 additions & 0 deletions pilot/model/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,17 @@ def _dynamic_model_parser() -> Callable[[None], List[Type]]:
return [param_class]


def _parse_model_param_class(model_name: str, model_path: str) -> ModelParameters:
try:
llm_adapter = get_llm_model_adapter(model_name, model_path)
return llm_adapter.model_param_class()
except Exception as e:
logger.warn(
f"Parse model parameters with model name {model_name} and model {model_path} failed {str(e)}, return `ModelParameters`"
)
return ModelParameters


# TODO support cpu? for practise we support gpt4all or chatglm-6b-int4?


Expand Down
38 changes: 37 additions & 1 deletion pilot/model/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
# -*- coding: utf-8 -*-

from enum import Enum
from typing import TypedDict, Optional, Dict
from typing import TypedDict, Optional, Dict, List
from dataclasses import dataclass
from datetime import datetime
from pilot.utils.parameter_utils import ParameterDescription


class Message(TypedDict):
Expand Down Expand Up @@ -46,5 +47,40 @@ class ModelOutput:
@dataclass
class WorkerApplyOutput:
message: str
success: Optional[bool] = True
# The seconds cost to apply some action to worker instances
timecost: Optional[int] = -1


@dataclass
class SupportedModel:
model: str
path: str
worker_type: str
path_exist: bool
proxy: bool
enabled: bool
params: List[ParameterDescription]

@classmethod
def from_dict(cls, model_data: Dict) -> "SupportedModel":
params = model_data.get("params", [])
if params:
params = [ParameterDescription(**param) for param in params]
model_data["params"] = params
return cls(**model_data)


@dataclass
class WorkerSupportedModel:
host: str
port: int
models: List[SupportedModel]

@classmethod
def from_dict(cls, worker_data: Dict) -> "WorkerSupportedModel":
models = [
SupportedModel.from_dict(model_data) for model_data in worker_data["models"]
]
worker_data["models"] = models
return cls(**worker_data)
Loading

0 comments on commit 68d30dd

Please sign in to comment.