From 59f8e67416e90cc82f1dafea46f95886d1b2c157 Mon Sep 17 00:00:00 2001
From: AllentDan <41138331+AllentDan@users.noreply.github.com>
Date: Fri, 25 Aug 2023 12:45:16 +0800
Subject: [PATCH] Import turbomind in gradio server only when it is needed
 (#303)

---
 lmdeploy/serve/async_engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lmdeploy/serve/async_engine.py b/lmdeploy/serve/async_engine.py
index 8d7c8a027b..98350c9681 100644
--- a/lmdeploy/serve/async_engine.py
+++ b/lmdeploy/serve/async_engine.py
@@ -6,7 +6,6 @@
 from contextlib import contextmanager
 from typing import Literal, Optional
 
-from lmdeploy import turbomind as tm
 from lmdeploy.model import MODELS, BaseModel
 
 
@@ -30,6 +29,7 @@ class AsyncEngine:
     """
 
     def __init__(self, model_path, instance_num=32, tp=1) -> None:
+        from lmdeploy import turbomind as tm
         from lmdeploy.turbomind.tokenizer import Tokenizer
         tokenizer_model_path = osp.join(model_path, 'triton_models',
                                         'tokenizer')