Update derived_zero initialization to use device aligned with other i… (#14978)

cccclai · web-flow · commit 71bbb1b1705d · 2025-10-13T17:17:59.000-07:00
Run into issues when trying to use GPU to quantize the model. Fix the
device type for zero point. The log was like
```
input.device.type cuda  scales.device.type cuda  zero_points.device.type cpu
```
diff --git a/backends/qualcomm/quantizer/qconfig.py b/backends/qualcomm/quantizer/qconfig.py
@@ -52,7 +52,9 @@ def _derive_bias_qparams_fn(
             act_scale, weight_scale
         )
         derived_scale = (broadcast_act_scale * broadcast_weight_scale).to(torch.float32)
-        derived_zero = torch.zeros(derived_scale.size()).to(torch.int32)
+        derived_zero = torch.zeros(derived_scale.size(), device=weight_zp.device).to(
+            torch.int32
+        )
         if isinstance(weight_obs_or_fq, PerBlockParamObserver):
             # keep maximum scale of each channel for bias
             derived_scale = (