Fixed bias

predibase · Mar 13, 2024 · e33a5aa · e33a5aa
1 parent a23556d
commit e33a5aa
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/server/lorax_server/models/custom_modeling/flash_qwen2_modeling.py b/server/lorax_server/models/custom_modeling/flash_qwen2_modeling.py
@@ -113,7 +113,7 @@ def load_attention_multi(config, prefix, weights):
             prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
             dim=0,
             weights=weights,
-            bias=False,
+            bias=True,
         )
 
 
@@ -139,7 +139,7 @@ def _load_gqa(config, prefix: str, weights):
         ], f"{list(weight.shape)} != {[(num_heads + 2 * config.num_key_value_heads) * head_size, config.hidden_size]}"
 
     return TensorParallelColumnLinear(
-        get_linear(weight, bias=None, quantize=config.quantize)
+        get_linear(weight, bias=True, quantize=config.quantize)
     )