From 59f8e67416e90cc82f1dafea46f95886d1b2c157 Mon Sep 17 00:00:00 2001 From: AllentDan <41138331+AllentDan@users.noreply.github.com> Date: Fri, 25 Aug 2023 12:45:16 +0800 Subject: [PATCH] Import turbomind in gradio server only when it is needed (#303) --- lmdeploy/serve/async_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lmdeploy/serve/async_engine.py b/lmdeploy/serve/async_engine.py index 8d7c8a027b..98350c9681 100644 --- a/lmdeploy/serve/async_engine.py +++ b/lmdeploy/serve/async_engine.py @@ -6,7 +6,6 @@ from contextlib import contextmanager from typing import Literal, Optional -from lmdeploy import turbomind as tm from lmdeploy.model import MODELS, BaseModel @@ -30,6 +29,7 @@ class AsyncEngine: """ def __init__(self, model_path, instance_num=32, tp=1) -> None: + from lmdeploy import turbomind as tm from lmdeploy.turbomind.tokenizer import Tokenizer tokenizer_model_path = osp.join(model_path, 'triton_models', 'tokenizer')