fix: ShardedStateLoader with fp8 quant (#900)

PygmalionAI · Dec 16, 2024 · 64c05b9 · 64c05b9
1 parent 132aa2a
commit 64c05b9
Showing 1 changed file with 4 additions and 0 deletions.
diff --git a/aphrodite/modeling/model_loader/loader.py b/aphrodite/modeling/model_loader/loader.py
@@ -585,6 +585,10 @@ def load_model(self, *, model_config: ModelConfig,
             with torch.device(device_config.device):
                 model = _initialize_model(model_config, self.load_config,
                                           lora_config, cache_config)
+                for _, module in model.named_modules():
+                    quant_method = getattr(module, "quant_method", None)
+                    if quant_method is not None:
+                        quant_method.process_weights_after_loading(module)
             rank = get_tensor_model_parallel_rank()
             pattern = os.path.join(
                 local_model_path,