diff --git a/lmdeploy/serve/async_engine.py b/lmdeploy/serve/async_engine.py index 8d7c8a027..98350c968 100644 --- a/lmdeploy/serve/async_engine.py +++ b/lmdeploy/serve/async_engine.py @@ -6,7 +6,6 @@ from contextlib import contextmanager from typing import Literal, Optional -from lmdeploy import turbomind as tm from lmdeploy.model import MODELS, BaseModel @@ -30,6 +29,7 @@ class AsyncEngine: """ def __init__(self, model_path, instance_num=32, tp=1) -> None: + from lmdeploy import turbomind as tm from lmdeploy.turbomind.tokenizer import Tokenizer tokenizer_model_path = osp.join(model_path, 'triton_models', 'tokenizer')