From 4ea1f9678dd93f02424ab3de2149f83a490e6c6f Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Sat, 27 Apr 2024 14:35:33 -0400 Subject: [PATCH] [BugFix] Resolved Issues For LinearMethod --> QuantConfig (#4418) --- vllm/model_executor/models/bloom.py | 1 - vllm/model_executor/models/falcon.py | 1 - vllm/model_executor/models/gpt2.py | 1 - vllm/model_executor/models/gpt_bigcode.py | 1 - vllm/model_executor/models/gpt_j.py | 1 - vllm/model_executor/models/gpt_neox.py | 1 - vllm/model_executor/models/mpt.py | 1 - vllm/model_executor/models/opt.py | 1 - vllm/model_executor/models/phi.py | 1 - vllm/model_executor/models/starcoder2.py | 1 - 10 files changed, 10 deletions(-) diff --git a/vllm/model_executor/models/bloom.py b/vllm/model_executor/models/bloom.py index b425af4863c36..1d7e5d2517c72 100644 --- a/vllm/model_executor/models/bloom.py +++ b/vllm/model_executor/models/bloom.py @@ -139,7 +139,6 @@ def __init__( 4 * hidden_size, quant_config=quant_config, ) - quant_config = getattr(quant_config, "quant_config", None) self.gelu_impl = get_act_fn("gelu", quant_config, 4 * hidden_size) self.dense_4h_to_h = RowParallelLinear( 4 * hidden_size, diff --git a/vllm/model_executor/models/falcon.py b/vllm/model_executor/models/falcon.py index 4be1f064cdd3e..08dd69923dc6d 100644 --- a/vllm/model_executor/models/falcon.py +++ b/vllm/model_executor/models/falcon.py @@ -203,7 +203,6 @@ def __init__( bias=config.bias, skip_bias_add=True, quant_config=quant_config) - quant_config = getattr(quant_config, "quant_config", None) self.act = get_act_fn("gelu", quant_config, 4 * hidden_size) self.reduce_row_parallel_results = not (config.new_decoder_architecture or config.parallel_attn) diff --git a/vllm/model_executor/models/gpt2.py b/vllm/model_executor/models/gpt2.py index ac1dce6dec8a6..75eaebf0dbd15 100644 --- a/vllm/model_executor/models/gpt2.py +++ b/vllm/model_executor/models/gpt2.py @@ -107,7 +107,6 @@ def __init__( bias=True, quant_config=quant_config, ) - quant_config = getattr(quant_config, "quant_config", None) self.act = get_act_fn(config.activation_function, quant_config, intermediate_size) diff --git a/vllm/model_executor/models/gpt_bigcode.py b/vllm/model_executor/models/gpt_bigcode.py index e52ac679f5d03..d057fd928fdb5 100644 --- a/vllm/model_executor/models/gpt_bigcode.py +++ b/vllm/model_executor/models/gpt_bigcode.py @@ -128,7 +128,6 @@ def __init__( bias=True, quant_config=quant_config, ) - quant_config = getattr(quant_config, "quant_config", None) self.act = get_act_fn(config.activation_function, quant_config, intermediate_size) diff --git a/vllm/model_executor/models/gpt_j.py b/vllm/model_executor/models/gpt_j.py index 287f4186f7469..8d7fe8a5beef7 100644 --- a/vllm/model_executor/models/gpt_j.py +++ b/vllm/model_executor/models/gpt_j.py @@ -120,7 +120,6 @@ def __init__( hidden_size, quant_config=quant_config, ) - quant_config = getattr(quant_config, "quant_config", None) self.act = get_act_fn(config.activation_function, quant_config, intermediate_size) diff --git a/vllm/model_executor/models/gpt_neox.py b/vllm/model_executor/models/gpt_neox.py index cbc5115bd377b..bab563b9c5a39 100644 --- a/vllm/model_executor/models/gpt_neox.py +++ b/vllm/model_executor/models/gpt_neox.py @@ -119,7 +119,6 @@ def __init__( config.hidden_size, quant_config=quant_config, ) - quant_config = getattr(quant_config, "quant_config", None) self.act = get_act_fn(config.hidden_act, quant_config, config.intermediate_size) diff --git a/vllm/model_executor/models/mpt.py b/vllm/model_executor/models/mpt.py index 8c5e7e77c9306..6fa5c5bd3014a 100644 --- a/vllm/model_executor/models/mpt.py +++ b/vllm/model_executor/models/mpt.py @@ -146,7 +146,6 @@ def __init__( bias=not config.no_bias, quant_config=quant_config, ) - quant_config = getattr(quant_config, "quant_config", None) self.act = get_act_fn("gelu", quant_config, intermediate_size) self.down_proj = RowParallelLinear( intermediate_size, diff --git a/vllm/model_executor/models/opt.py b/vllm/model_executor/models/opt.py index 838a2f0adc4d1..336f765ababaa 100644 --- a/vllm/model_executor/models/opt.py +++ b/vllm/model_executor/models/opt.py @@ -130,7 +130,6 @@ def __init__( bias=config.enable_bias, quant_config=quant_config, ) - quant_config = getattr(quant_config, "quant_config", None) self.activation_fn = get_act_fn(config.activation_function, quant_config, config.ffn_dim) self.fc2 = RowParallelLinear( diff --git a/vllm/model_executor/models/phi.py b/vllm/model_executor/models/phi.py index 7a9b8dcd6a509..4a45879201af3 100644 --- a/vllm/model_executor/models/phi.py +++ b/vllm/model_executor/models/phi.py @@ -142,7 +142,6 @@ def __init__(self, config.hidden_size, quant_config=quant_config, ) - quant_config = getattr(quant_config, "quant_config", None) self.act = get_act_fn(config.hidden_act, quant_config, n_inner) def forward(self, hidden_states): diff --git a/vllm/model_executor/models/starcoder2.py b/vllm/model_executor/models/starcoder2.py index 29d887b21032b..33998e2aad5c5 100644 --- a/vllm/model_executor/models/starcoder2.py +++ b/vllm/model_executor/models/starcoder2.py @@ -136,7 +136,6 @@ def __init__(self, bias=config.use_bias, quant_config=quant_config, ) - quant_config = getattr(quant_config, "quant_config", None) self.act = get_act_fn(config.hidden_act, quant_config, config.intermediate_size)