Skip to content

Commit

Permalink
update server.cfg
Browse files Browse the repository at this point in the history
  • Loading branch information
yucl80 committed May 4, 2024
1 parent 7a3a0b1 commit 7309f7d
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 26 deletions.
5 changes: 3 additions & 2 deletions __main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,14 @@
def main():
description = "🦙 Llama.cpp python server. Host your own LLMs!🚀"
parser = argparse.ArgumentParser(description=description)

current_file_path = __file__
current_directory = os.path.dirname(current_file_path)
add_args_from_model(parser, Settings)
parser.add_argument(
"--config_file",
type=str,
help="Path to a config file to load.",
default="/home/test/api_server.cfg",
default= current_directory + "/server.cfg",
)
server_settings: ServerSettings | None = None
model_settings: list[ModelSettings] = []
Expand Down
26 changes: 12 additions & 14 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,24 +129,12 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:

kwargs = {}

if settings.hf_model_repo_id is not None:
create_fn = functools.partial(
llama_cpp.Llama.from_pretrained,
repo_id=settings.hf_model_repo_id,
filename=settings.model,
)
elif settings.chat_format == "chatglm":
create_fn = chatglm_cpp.Pipeline
kwargs["model_path"] = settings.model
else:
create_fn = llama_cpp.Llama
kwargs["model_path"] = settings.model

if settings.chat_format == "chatglm3":
if settings.chat_format == "chatglm3" or settings.chat_format == "chatglm":
_model = chatglm_cpp.Pipeline(settings.model)
_model.create_chat_completion = chatglm.create_chat_completion

if settings.chat_format == "bge-onnx":
elif settings.chat_format == "bge-onnx":
_model =extends.BgeOnnxModel(settings.model,settings.model_alias)

elif settings.chat_format == "firefunction" :
Expand Down Expand Up @@ -189,6 +177,16 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
n_threads_batch=settings.n_threads_batch,
)
else:
if settings.hf_model_repo_id is not None:
create_fn = functools.partial(
llama_cpp.Llama.from_pretrained,
repo_id=settings.hf_model_repo_id,
filename=settings.model,
)
else:
create_fn = llama_cpp.Llama
kwargs["model_path"] = settings.model

_model = create_fn(
**kwargs,
# Model Params
Expand Down
50 changes: 40 additions & 10 deletions server.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,46 @@
"host": "0.0.0.0",
"port": 8000,
"models": [
{
"model": "/home/test/llm-models/chatglm3-ggml.bin",
"model_alias": "chatglm3",
"chat_format": "chatglm3",
"n_gpu_layers": 0,
"offload_kqv": true,
"embedding": false,
"n_threads": 12,
"n_batch": 512
},
{
"model": "/home/test/llm-models/bge-large-zh-v1.5-q4_k_m.gguf",
"model_alias": "bge-large-zh-v1.5",
"chat_format": "bert",
"n_gpu_layers": 0,
"offload_kqv": true,
"n_threads": 12,
"n_ctx": 8192,
"embedding": true,
"n_batch": 512,
"verbose": false
},
{
"model": "/home/test/.cache/huggingface/hub/models--BAAI--bge-m3/snapshots/babcf60cae0a1f438d7ade582983d4ba462303c2/onnx/",
"model_alias": "bge-m3",
"chat_format": "bge-onnx",
"embedding": true,
"n_gpu_layers": 0,
"n_ctx": 8192,
"offload_kqv": true,
"n_threads": 12,
"n_batch": 512
},
{
"model": "/home/test/llm-models/chatglm3-ggml-q8.bin",
"model_alias": "chatglm3",
"model_alias": "chatglm3-q8",
"chat_format": "chatglm3",
"n_gpu_layers": 0,
"offload_kqv": true,
"embedding": false,
"n_threads": 12,
"n_batch": 512
},
Expand All @@ -18,6 +52,7 @@
"chat_format": "openfunctions",
"n_gpu_layers": 0,
"n_ctx":4096,
"embedding": false,
"offload_kqv": true,
"n_threads": 12,
"n_batch": 512
Expand All @@ -31,20 +66,13 @@
"offload_kqv": true,
"n_threads": 12,
"n_batch": 512,
"embedding": false,
"n_ctx": 8192,
"use_mmap":true
},
{
"model": "/home/test/llm-models/bge-large-zh-v1.5-q4_k_m.gguf",
"model_alias": "bge-large-zh-v1.5",
"chat_format": "bert",
"n_gpu_layers": 0,
"offload_kqv": true,
"n_threads": 12,
"n_batch": 512
},
{
"model": "/home/test/llm-models/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf",
"hf_pretrained_model_name_or_path":"meta-llama/Meta-Llama-3-8B-Instruct",
"model_alias": "llama-3-8b",
"chat_format": "llama-3",
"n_gpu_layers": 0,
Expand All @@ -60,6 +88,7 @@
"chat_format": "gemma",
"n_gpu_layers": 0,
"offload_kqv": true,
"embedding": false,
"n_threads": 12,
"n_ctx": 8192,
"n_batch": 512
Expand All @@ -71,6 +100,7 @@
"clip_model_path": "/home/test/llm-models/mmproj-model-f16.gguf",
"n_gpu_layers": 0,
"offload_kqv": true,
"embedding": false,
"n_threads": 12,
"n_ctx": 4096,
"n_batch": 512
Expand Down

0 comments on commit 7309f7d

Please sign in to comment.