Fix

Giuseppe5 · Giuseppe5 · commit 8f6d656fc473 · 2023-12-15T13:25:14.000Z
diff --git a/src/brevitas_examples/llm/llm_quant/equalize.py b/src/brevitas_examples/llm/llm_quant/equalize.py
@@ -28,6 +28,12 @@ def activation_equalization_iter(curr_layer, inps, outs, cached_values, alpha):
     return outs
 
 
+def trace_and_standardize(model, ref_kwargs):
+    graph_model = value_trace(model, value_args=ref_kwargs)
+    graph_model = TorchFunctionalToModule().apply(graph_model)
+    graph_model = DuplicateSharedStatelessModule().apply(graph_model)
+
+
 @torch.no_grad()
 def apply_act_equalization(
         model,
@@ -51,9 +57,7 @@ def apply_act_equalization(
         # We can't do fp16 tracing on CPU as many kernels are not implemented
         # So we have to cast to fp32 first, trace, apply equalization, and then cast back
         with cast_to_float32(model, dtype):
-            graph_model = value_trace(model, value_args=ref_kwargs)
-            graph_model = TorchFunctionalToModule().apply(graph_model)
-            graph_model = DuplicateSharedStatelessModule().apply(graph_model)
+            graph_model = trace_and_standardize(model, ref_kwargs=ref_kwargs)
             # TODO this is currently running on CPU. We need Accelerate or a TorchDispatchMode
             # or an FX interpreter to run it on GPU
             warnings.warn(
@@ -74,5 +78,5 @@ def apply_weight_equalization(model, dtype, ref_kwargs, scale_computation_type='
     # We can't do fp16 tracing on CPU as many kernels are not implemented
     # So we have to cast to fp32 first, trace, apply equalization, and then cast back
     with cast_to_float32(model, dtype):
-        graph_model = value_trace(model, value_args=ref_kwargs)
+        graph_model = trace_and_standardize(model, ref_kwargs=ref_kwargs)
         EqualizeGraph(scale_computation_type=scale_computation_type).apply(graph_model)