diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py index e54b8c909f359..83f621ee0f9af 100644 --- a/tests/distributed/test_pipeline_parallel.py +++ b/tests/distributed/test_pipeline_parallel.py @@ -156,11 +156,13 @@ def iter_params(self, model_name: str): # "internlm/internlm-chat-7b": PPTestSettings.fast(), "internlm/internlm2-chat-7b": PPTestSettings.fast(trust_remote_code=True), "inceptionai/jais-13b-chat": PPTestSettings.fast(), + "ai21labs/Jamba-tiny-dev": PPTestSettings.fast(), "meta-llama/Meta-Llama-3-8B": PPTestSettings.detailed(), "openbmb/MiniCPM-2B-sft-bf16": PPTestSettings.fast(trust_remote_code=True), "openbmb/MiniCPM3-4B": PPTestSettings.fast(trust_remote_code=True), # Uses Llama # "mistralai/Mistral-7B-Instruct-v0.1": PPTestSettings.fast(), + "state-spaces/mamba-130m-hf": PPTestSettings.fast(), "mistralai/Mixtral-8x7B-Instruct-v0.1": PPTestSettings.fast(tp_base=4), "mosaicml/mpt-7b": PPTestSettings.fast(), "nvidia/Minitron-8B-Base": PPTestSettings.fast(), @@ -178,9 +180,7 @@ def iter_params(self, model_name: str): "Qwen/Qwen1.5-MoE-A2.7B-Chat": PPTestSettings.fast(), "stabilityai/stablelm-3b-4e1t": PPTestSettings.fast(), "bigcode/starcoder2-3b": PPTestSettings.fast(), - "upstage/solar-pro-preview-instruct": PPTestSettings.fast(tp_base=2), - "ai21labs/Jamba-tiny-dev": PPTestSettings.fast(), - "state-spaces/mamba-130m-hf": PPTestSettings.fast() + "upstage/solar-pro-preview-instruct": PPTestSettings.fast(tp_base=2) # FIXME: Cannot load tokenizer in latest transformers version. # Need to use tokenizer from `meta-llama/Llama-2-7b-chat-hf` # "xverse/XVERSE-7B-Chat": PPTestSettings.fast(trust_remote_code=True), @@ -234,6 +234,8 @@ def iter_params(self, model_name: str): "OpenGVLab/InternVL2-1B", "microsoft/Phi-3-vision-128k-instruct", "fixie-ai/ultravox-v0_3", + # [LANGUAGE GENERATION - HYBRID ARCH] + "ai21labs/Jamba-tiny-dev", ] diff --git a/vllm/config.py b/vllm/config.py index 8b4d15a7055b9..5e419be4ec8aa 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -704,10 +704,8 @@ def get_num_layers_by_block_type( # Transformers supports layers_block_type @property layers_block_type_value = getattr(self.hf_config, "layers_block_type", [block_type.value] * (end - start)) - return len([ - t for t in layers_block_type_value[start:end] - if t == block_type.value - ]) + return sum(t == block_type.value + for t in layers_block_type_value[start:end]) def get_multimodal_config(self) -> "MultiModalConfig": """ diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py index dc1dabe8e478e..5aebbed9f0ba4 100644 --- a/vllm/model_executor/models/jamba.py +++ b/vllm/model_executor/models/jamba.py @@ -286,7 +286,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): ) def get_layer(prefix: str): - layer_idx = int(prefix.split(".")[-1]) + layer_idx = int(prefix.rsplit(".", 1)[1]) layer_class = ALL_DECODER_LAYER_TYPES[ config.layers_block_type[layer_idx]] return layer_class(