Skip to content

Commit

Permalink
Fix crash when api_server loads a turbomind model (#1304)
Browse files Browse the repository at this point in the history
* fix loading workspace model

* fix lint

* update

* update
  • Loading branch information
irexyc authored Mar 18, 2024
1 parent 299d522 commit d6c9847
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions lmdeploy/archs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os
from typing import Literal, Optional, Union

from lmdeploy.serve.async_engine import AsyncEngine
Expand Down Expand Up @@ -100,6 +101,9 @@ def check_vl_llm(config: dict) -> bool:

def get_task(model_path: str):
"""get pipeline type and pipeline class from model config."""
if os.path.exists(os.path.join(model_path, 'triton_models', 'weights')):
# workspace model
return 'llm', AsyncEngine
config = get_hf_config_content(model_path)
if check_vl_llm(config):
return 'vlm', VLAsyncEngine
Expand Down

0 comments on commit d6c9847

Please sign in to comment.