From d6c984797aa2fab40100fe30294319af89b38510 Mon Sep 17 00:00:00 2001 From: Chen Xin Date: Mon, 18 Mar 2024 20:06:21 +0800 Subject: [PATCH] Fix crash when api_server loads a turbomind model (#1304) * fix loading workspace model * fix lint * update * update --- lmdeploy/archs.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lmdeploy/archs.py b/lmdeploy/archs.py index 2b945ba399..e9b6852d5c 100644 --- a/lmdeploy/archs.py +++ b/lmdeploy/archs.py @@ -1,4 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. +import os from typing import Literal, Optional, Union from lmdeploy.serve.async_engine import AsyncEngine @@ -100,6 +101,9 @@ def check_vl_llm(config: dict) -> bool: def get_task(model_path: str): """get pipeline type and pipeline class from model config.""" + if os.path.exists(os.path.join(model_path, 'triton_models', 'weights')): + # workspace model + return 'llm', AsyncEngine config = get_hf_config_content(model_path) if check_vl_llm(config): return 'vlm', VLAsyncEngine