modified auto_cls logic, and lint check

Signed-off-by: Kevin-Yang <[email protected]>
vllm-project · Oct 26, 2024 · e50fd79 · e50fd79
1 parent b1d1afc
commit e50fd79
Show file tree

Hide file tree

Showing 4 changed files with 10 additions and 19 deletions.
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -14,9 +14,8 @@
 import torch.nn.functional as F
 from huggingface_hub import snapshot_download
 from PIL import Image
-from transformers import (AutoModelForCausalLM,
-                          AutoModelForSequenceClassification, AutoTokenizer,
-                          AutoConfig, BatchEncoding, BatchFeature)
+from transformers import (AutoModelForCausalLM, AutoTokenizer, BatchEncoding,
+                          BatchFeature)
 from transformers.models.auto.auto_factory import _BaseAutoModelClass
 
 from tests.models.utils import (TokensTextLogprobs,
@@ -272,16 +271,6 @@ def __init__(
                 ).to(dtype=torch_dtype))
         else:
             model_kwargs = model_kwargs if model_kwargs is not None else {}
-            config = AutoConfig.from_pretrained(
-                model_name,
-                torch_dtype=torch_dtype,
-                trust_remote_code=True,
-            )
-            arch = config.architectures
-            if len(arch) > 0:
-                cls_type = arch[0].split("For")[-1]
-                auto_cls = eval(f"AutoModelFor{cls_type}")
-
             self.model = self.wrap_device(
                 auto_cls.from_pretrained(
                     model_name,

diff --git a/tests/models/decoder_only/language/test_cls_models.py b/tests/models/decoder_only/language/test_cls_models.py
@@ -7,10 +7,9 @@
 """
 import pytest
 import torch
+from transformers import AutoModelForSequenceClassification
 
-CLASSIFICATION_MODELS = [
-    "jason9693/Qwen2.5-1.5B-apeach"
-]
+CLASSIFICATION_MODELS = ["jason9693/Qwen2.5-1.5B-apeach"]
 
 
 @pytest.mark.parametrize("model", CLASSIFICATION_MODELS)
@@ -22,7 +21,9 @@ def test_classification_models(
     model: str,
     dtype: str,
 ) -> None:
-    with hf_runner(model, dtype=dtype) as hf_model:
+    with hf_runner(model,
+                   dtype=dtype,
+                   auto_cls=AutoModelForSequenceClassification) as hf_model:
         hf_outputs = hf_model.classify(example_prompts)
 
     with vllm_runner(model, dtype=dtype) as vllm_model:

diff --git a/vllm/attention/backends/flash_attn.py b/vllm/attention/backends/flash_attn.py
@@ -349,7 +349,7 @@ def _add_seq_group(
                 else:
                     block_table = block_tables[seq_id][
                         -curr_sliding_window_block:]
-            
+
             print(f"prefix cache hit: {prefix_cache_hit}")
             print(f"chunked prefill enabled: {chunked_prefill_enabled}")
             print(f"prompt: {is_prompt}")

diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py
@@ -92,7 +92,8 @@
     "Gemma2Model": ("gemma2", "Gemma2EmbeddingModel"),
     "MistralModel": ("llama", "LlamaEmbeddingModel"),
     "Qwen2ForRewardModel": ("qwen2_rm", "Qwen2ForRewardModel"),
-    "Qwen2ForSequenceClassification": ("qwen2_cls", "Qwen2ForSequenceClassification"),
+    "Qwen2ForSequenceClassification": (
+        "qwen2_cls", "Qwen2ForSequenceClassification"),
     # [Multimodal]
     "Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
 }