From 062a1d0fab111723ab768f94bdd48a6adc054007 Mon Sep 17 00:00:00 2001 From: QQSong Date: Thu, 25 Jul 2024 19:24:58 -0700 Subject: [PATCH] Fix ReplicatedLinear weight loading (#6793) --- vllm/model_executor/layers/linear.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py index 0e0a2b72f93d..b6e280ae6504 100644 --- a/vllm/model_executor/layers/linear.py +++ b/vllm/model_executor/layers/linear.py @@ -199,12 +199,16 @@ def __init__(self, self.input_size, self.output_size, self.params_dtype, + weight_loader=self.weight_loader, prefix=prefix) if bias: self.bias = Parameter( torch.empty(self.output_size, dtype=self.params_dtype)) - set_weight_attrs(self.bias, {"output_dim": 0}) + set_weight_attrs(self.bias, { + "output_dim": 0, + "weight_loader": self.weight_loader, + }) else: self.register_parameter("bias", None)