diff --git a/benchmark/profile_generation.py b/benchmark/profile_generation.py index ecfd0d3e4..50e4be008 100644 --- a/benchmark/profile_generation.py +++ b/benchmark/profile_generation.py @@ -18,7 +18,8 @@ nvmlInit, nvmlShutdown, nvmlSystemGetDriverVersion) from tqdm import tqdm -from lmdeploy.turbomind import Tokenizer, TurboMind +from lmdeploy.tokenizer import Tokenizer +from lmdeploy.turbomind import TurboMind def infer(model, session_id: int, input_ids: str, output_seqlen: int, diff --git a/benchmark/profile_restful_api.py b/benchmark/profile_restful_api.py index ed922bfd7..d1f6ebf80 100644 --- a/benchmark/profile_restful_api.py +++ b/benchmark/profile_restful_api.py @@ -8,7 +8,7 @@ import numpy as np import requests -from lmdeploy.turbomind.tokenizer import Tokenizer +from lmdeploy.tokenizer import Tokenizer from lmdeploy.utils import get_logger diff --git a/benchmark/profile_serving.py b/benchmark/profile_serving.py index c60e0799d..4580757ee 100644 --- a/benchmark/profile_serving.py +++ b/benchmark/profile_serving.py @@ -8,7 +8,7 @@ import numpy as np from lmdeploy.serve.turbomind.chatbot import Chatbot -from lmdeploy.turbomind.tokenizer import Tokenizer +from lmdeploy.tokenizer import Tokenizer def infer(chatbot, session_id: int, req_que: mp.Queue, res_que: mp.Queue): diff --git a/benchmark/profile_throughput.py b/benchmark/profile_throughput.py index d8100113c..9d92b31fa 100644 --- a/benchmark/profile_throughput.py +++ b/benchmark/profile_throughput.py @@ -8,7 +8,8 @@ import fire -from lmdeploy.turbomind import Tokenizer, TurboMind +from lmdeploy.tokenizer import Tokenizer +from lmdeploy.turbomind import TurboMind def sample_requests( diff --git a/lmdeploy/serve/async_engine.py b/lmdeploy/serve/async_engine.py index e2c4b3684..9588b00da 100644 --- a/lmdeploy/serve/async_engine.py +++ b/lmdeploy/serve/async_engine.py @@ -30,7 +30,7 @@ class AsyncEngine: def __init__(self, model_path, instance_num=32, tp=1) -> None: from lmdeploy import turbomind as tm - from lmdeploy.turbomind.tokenizer import Tokenizer + from lmdeploy.tokenizer import Tokenizer tokenizer_model_path = osp.join(model_path, 'triton_models', 'tokenizer') tokenizer = Tokenizer(tokenizer_model_path) diff --git a/lmdeploy/serve/turbomind/deploy.py b/lmdeploy/serve/turbomind/deploy.py index 992f2f57d..cc8db88f5 100644 --- a/lmdeploy/serve/turbomind/deploy.py +++ b/lmdeploy/serve/turbomind/deploy.py @@ -306,7 +306,7 @@ def deploy_llama(model_name: str, model_path: str, tokenizer_path: str, shutil.copy(tokenizer_path, osp.join(triton_models_path, 'tokenizer/tokenizer.model')) with get_package_root_path() as root_path: - shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'), + shutil.copy(osp.join(root_path, 'tokenizer.py'), osp.join(triton_models_path, 'tokenizer')) else: print(f'tokenizer model {tokenizer_path} does not exist') @@ -435,7 +435,7 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str, shutil.copy(json_path, osp.join(triton_models_path, 'tokenizer', _file)) with get_package_root_path() as root_path: - shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'), + shutil.copy(osp.join(root_path, 'tokenizer.py'), osp.join(triton_models_path, 'tokenizer')) else: print(f'tokenizer model {tokenizer_path} does not exist') @@ -601,7 +601,7 @@ def deploy_awq(model_name: str, model_path: str, tokenizer_path: str, shutil.copy(json_path, osp.join(triton_models_path, 'tokenizer', _file)) with get_package_root_path() as root_path: - shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'), + shutil.copy(osp.join(root_path, 'tokenizer.py'), osp.join(triton_models_path, 'tokenizer')) else: print(f'tokenizer model {tokenizer_path} does not exist') @@ -831,7 +831,7 @@ def deploy_qwen(model_name: str, model_path: str, tokenizer_path: str, shutil.copy(json_path, osp.join(triton_models_path, 'tokenizer', _file)) with get_package_root_path() as root_path: - shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'), + shutil.copy(osp.join(root_path, 'tokenizer.py'), osp.join(triton_models_path, 'tokenizer')) else: print(f'tokenizer model {tokenizer_path} does not exist') diff --git a/lmdeploy/turbomind/tokenizer.py b/lmdeploy/tokenizer.py similarity index 100% rename from lmdeploy/turbomind/tokenizer.py rename to lmdeploy/tokenizer.py diff --git a/lmdeploy/turbomind/__init__.py b/lmdeploy/turbomind/__init__.py index 02fb288f8..b2df77014 100644 --- a/lmdeploy/turbomind/__init__.py +++ b/lmdeploy/turbomind/__init__.py @@ -1,5 +1,4 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .tokenizer import Tokenizer from .turbomind import TurboMind -__all__ = ['Tokenizer', 'TurboMind'] +__all__ = ['TurboMind'] diff --git a/lmdeploy/turbomind/chat.py b/lmdeploy/turbomind/chat.py index 4648b7921..de31a5daa 100644 --- a/lmdeploy/turbomind/chat.py +++ b/lmdeploy/turbomind/chat.py @@ -8,7 +8,7 @@ from lmdeploy import turbomind as tm from lmdeploy.model import MODELS -from lmdeploy.turbomind.tokenizer import Tokenizer +from lmdeploy.tokenizer import Tokenizer os.environ['TM_LOG_LEVEL'] = 'ERROR' diff --git a/lmdeploy/turbomind/decode.py b/lmdeploy/turbomind/decode.py index 32dd40ca2..daef35298 100644 --- a/lmdeploy/turbomind/decode.py +++ b/lmdeploy/turbomind/decode.py @@ -6,7 +6,7 @@ import torch from lmdeploy import turbomind as tm -from lmdeploy.turbomind.tokenizer import Tokenizer +from lmdeploy.tokenizer import Tokenizer os.environ['TM_LOG_LEVEL'] = 'ERROR' diff --git a/lmdeploy/turbomind/turbomind.py b/lmdeploy/turbomind/turbomind.py index 2c0f8924b..b63f5dafe 100644 --- a/lmdeploy/turbomind/turbomind.py +++ b/lmdeploy/turbomind/turbomind.py @@ -14,7 +14,7 @@ import lmdeploy from lmdeploy.model import MODELS -from lmdeploy.turbomind import Tokenizer +from lmdeploy.tokenizer import Tokenizer from lmdeploy.utils import get_logger # TODO: find another way import _turbomind diff --git a/tests/python/test_tokenizer.py b/tests/python/test_tokenizer.py deleted file mode 100644 index 411ca4156..000000000 --- a/tests/python/test_tokenizer.py +++ /dev/null @@ -1,15 +0,0 @@ -from lmdeploy.turbomind.tokenizer import Tokenizer - - -def main(): - tokenizer = Tokenizer('huggyllama/llama-7b') - - prompts = ['cest la vie', '上帝已死'] - for prompt in prompts: - tokens = tokenizer.encode(prompt) - output = tokenizer.decode(tokens) - print(output) - - -if __name__ == '__main__': - main()