diff --git a/python/pybackend_libs/requirements.txt b/python/pybackend_libs/requirements.txt index ea144c8..1fd09c1 100644 --- a/python/pybackend_libs/requirements.txt +++ b/python/pybackend_libs/requirements.txt @@ -19,7 +19,7 @@ transformers_stream_generator==0.0.4 SwissArmyTransformer==0.4.8 cpm_kernels==1.0.11 accelerate==0.24.1 -vllm==0.2.1 +vllm==0.2.1.post1 anyio==3.7.1 starlette==0.27.0 diff --git a/src/tests/regression/test_close_models.py b/src/tests/regression/test_close_models.py index 8c21e0b..c1bba69 100644 --- a/src/tests/regression/test_close_models.py +++ b/src/tests/regression/test_close_models.py @@ -79,7 +79,7 @@ def test_infer(name): def test_closed_model(): RT_EP = os.environ.get('RT_EP', '192.168.106.12:9001') - ep_prefix = f'http://{RT_EP}/v2' + ep_prefix = f'http://{RT_EP}' repo_ep = f'http://{RT_EP}/v2/repository' test_config = 'config/closed_models.json' @@ -97,7 +97,7 @@ def test_closed_model(): for name in model_names: load_ep = f'{repo_ep}/models/{name}/load' unload_ep = f'{repo_ep}/models/{name}/unload' - ready_ep = f'{ep_prefix}/models/{name}/ready' + ready_ep = f'{ep_prefix}/v2/models/{name}/ready' load_params = load_params_map.get(name) print(f'load model {name}...') diff --git a/src/tests/regression/test_load_unload.py b/src/tests/regression/test_load_unload.py index 0345f60..d58d5cc 100644 --- a/src/tests/regression/test_load_unload.py +++ b/src/tests/regression/test_load_unload.py @@ -37,7 +37,7 @@ def call_llm(model, ep): def test_load_unload(): RT_EP = os.environ.get('RT_EP', '192.168.106.12:9001') - ep_prefix = f'http://{RT_EP}/v2' + ep_prefix = f'http://{RT_EP}' repo_ep = f'http://{RT_EP}/v2/repository' test_config = 'config/load_unload.json' @@ -55,7 +55,7 @@ def test_load_unload(): for name in model_names: load_ep = f'{repo_ep}/models/{name}/load' unload_ep = f'{repo_ep}/models/{name}/unload' - ready_ep = f'{ep_prefix}/models/{name}/ready' + ready_ep = f'{ep_prefix}/v2/models/{name}/ready' load_params = load_params_map.get(name) infer_ep = f'{ep_prefix}/v2.1/models/{name}/infer'