diff --git a/benchmark/profile_generation.py b/benchmark/profile_generation.py
index ecfd0d3e4..50e4be008 100644
--- a/benchmark/profile_generation.py
+++ b/benchmark/profile_generation.py
@@ -18,7 +18,8 @@
                     nvmlInit, nvmlShutdown, nvmlSystemGetDriverVersion)
 from tqdm import tqdm
 
-from lmdeploy.turbomind import Tokenizer, TurboMind
+from lmdeploy.tokenizer import Tokenizer
+from lmdeploy.turbomind import TurboMind
 
 
 def infer(model, session_id: int, input_ids: str, output_seqlen: int,
diff --git a/benchmark/profile_restful_api.py b/benchmark/profile_restful_api.py
index ed922bfd7..d1f6ebf80 100644
--- a/benchmark/profile_restful_api.py
+++ b/benchmark/profile_restful_api.py
@@ -8,7 +8,7 @@
 import numpy as np
 import requests
 
-from lmdeploy.turbomind.tokenizer import Tokenizer
+from lmdeploy.tokenizer import Tokenizer
 from lmdeploy.utils import get_logger
 
 
diff --git a/benchmark/profile_serving.py b/benchmark/profile_serving.py
index c60e0799d..4580757ee 100644
--- a/benchmark/profile_serving.py
+++ b/benchmark/profile_serving.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 from lmdeploy.serve.turbomind.chatbot import Chatbot
-from lmdeploy.turbomind.tokenizer import Tokenizer
+from lmdeploy.tokenizer import Tokenizer
 
 
 def infer(chatbot, session_id: int, req_que: mp.Queue, res_que: mp.Queue):
diff --git a/benchmark/profile_throughput.py b/benchmark/profile_throughput.py
index d8100113c..9d92b31fa 100644
--- a/benchmark/profile_throughput.py
+++ b/benchmark/profile_throughput.py
@@ -8,7 +8,8 @@
 
 import fire
 
-from lmdeploy.turbomind import Tokenizer, TurboMind
+from lmdeploy.tokenizer import Tokenizer
+from lmdeploy.turbomind import TurboMind
 
 
 def sample_requests(
diff --git a/lmdeploy/serve/async_engine.py b/lmdeploy/serve/async_engine.py
index e2c4b3684..9588b00da 100644
--- a/lmdeploy/serve/async_engine.py
+++ b/lmdeploy/serve/async_engine.py
@@ -30,7 +30,7 @@ class AsyncEngine:
 
     def __init__(self, model_path, instance_num=32, tp=1) -> None:
         from lmdeploy import turbomind as tm
-        from lmdeploy.turbomind.tokenizer import Tokenizer
+        from lmdeploy.tokenizer import Tokenizer
         tokenizer_model_path = osp.join(model_path, 'triton_models',
                                         'tokenizer')
         tokenizer = Tokenizer(tokenizer_model_path)
diff --git a/lmdeploy/serve/turbomind/deploy.py b/lmdeploy/serve/turbomind/deploy.py
index 992f2f57d..cc8db88f5 100644
--- a/lmdeploy/serve/turbomind/deploy.py
+++ b/lmdeploy/serve/turbomind/deploy.py
@@ -306,7 +306,7 @@ def deploy_llama(model_name: str, model_path: str, tokenizer_path: str,
         shutil.copy(tokenizer_path,
                     osp.join(triton_models_path, 'tokenizer/tokenizer.model'))
         with get_package_root_path() as root_path:
-            shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'),
+            shutil.copy(osp.join(root_path, 'tokenizer.py'),
                         osp.join(triton_models_path, 'tokenizer'))
     else:
         print(f'tokenizer model {tokenizer_path} does not exist')
@@ -435,7 +435,7 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
                 shutil.copy(json_path,
                             osp.join(triton_models_path, 'tokenizer', _file))
         with get_package_root_path() as root_path:
-            shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'),
+            shutil.copy(osp.join(root_path, 'tokenizer.py'),
                         osp.join(triton_models_path, 'tokenizer'))
     else:
         print(f'tokenizer model {tokenizer_path} does not exist')
@@ -601,7 +601,7 @@ def deploy_awq(model_name: str, model_path: str, tokenizer_path: str,
                 shutil.copy(json_path,
                             osp.join(triton_models_path, 'tokenizer', _file))
         with get_package_root_path() as root_path:
-            shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'),
+            shutil.copy(osp.join(root_path, 'tokenizer.py'),
                         osp.join(triton_models_path, 'tokenizer'))
     else:
         print(f'tokenizer model {tokenizer_path} does not exist')
@@ -831,7 +831,7 @@ def deploy_qwen(model_name: str, model_path: str, tokenizer_path: str,
                 shutil.copy(json_path,
                             osp.join(triton_models_path, 'tokenizer', _file))
         with get_package_root_path() as root_path:
-            shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'),
+            shutil.copy(osp.join(root_path, 'tokenizer.py'),
                         osp.join(triton_models_path, 'tokenizer'))
     else:
         print(f'tokenizer model {tokenizer_path} does not exist')
diff --git a/lmdeploy/turbomind/tokenizer.py b/lmdeploy/tokenizer.py
similarity index 100%
rename from lmdeploy/turbomind/tokenizer.py
rename to lmdeploy/tokenizer.py
diff --git a/lmdeploy/turbomind/__init__.py b/lmdeploy/turbomind/__init__.py
index 02fb288f8..b2df77014 100644
--- a/lmdeploy/turbomind/__init__.py
+++ b/lmdeploy/turbomind/__init__.py
@@ -1,5 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .tokenizer import Tokenizer
 from .turbomind import TurboMind
 
-__all__ = ['Tokenizer', 'TurboMind']
+__all__ = ['TurboMind']
diff --git a/lmdeploy/turbomind/chat.py b/lmdeploy/turbomind/chat.py
index 4648b7921..de31a5daa 100644
--- a/lmdeploy/turbomind/chat.py
+++ b/lmdeploy/turbomind/chat.py
@@ -8,7 +8,7 @@
 
 from lmdeploy import turbomind as tm
 from lmdeploy.model import MODELS
-from lmdeploy.turbomind.tokenizer import Tokenizer
+from lmdeploy.tokenizer import Tokenizer
 
 os.environ['TM_LOG_LEVEL'] = 'ERROR'
 
diff --git a/lmdeploy/turbomind/decode.py b/lmdeploy/turbomind/decode.py
index 32dd40ca2..daef35298 100644
--- a/lmdeploy/turbomind/decode.py
+++ b/lmdeploy/turbomind/decode.py
@@ -6,7 +6,7 @@
 import torch
 
 from lmdeploy import turbomind as tm
-from lmdeploy.turbomind.tokenizer import Tokenizer
+from lmdeploy.tokenizer import Tokenizer
 
 os.environ['TM_LOG_LEVEL'] = 'ERROR'
 
diff --git a/lmdeploy/turbomind/turbomind.py b/lmdeploy/turbomind/turbomind.py
index 2c0f8924b..b63f5dafe 100644
--- a/lmdeploy/turbomind/turbomind.py
+++ b/lmdeploy/turbomind/turbomind.py
@@ -14,7 +14,7 @@
 
 import lmdeploy
 from lmdeploy.model import MODELS
-from lmdeploy.turbomind import Tokenizer
+from lmdeploy.tokenizer import Tokenizer
 from lmdeploy.utils import get_logger
 
 # TODO: find another way import _turbomind
diff --git a/tests/python/test_tokenizer.py b/tests/python/test_tokenizer.py
deleted file mode 100644
index 411ca4156..000000000
--- a/tests/python/test_tokenizer.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from lmdeploy.turbomind.tokenizer import Tokenizer
-
-
-def main():
-    tokenizer = Tokenizer('huggyllama/llama-7b')
-
-    prompts = ['cest la vie', '上帝已死']
-    for prompt in prompts:
-        tokens = tokenizer.encode(prompt)
-        output = tokenizer.decode(tokens)
-        print(output)
-
-
-if __name__ == '__main__':
-    main()