force W_q to device

mobiusml · Aug 27, 2024 · 33608a4 · 33608a4
1 parent 1e620a9
commit 33608a4
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/hqq/backends/bitblas.py b/hqq/backends/bitblas.py
@@ -112,7 +112,7 @@ def __init__(self, hqq_layer):
 
         self.matmul_eng = HQQLinearBitBlas.ENG_CACHE[self.eng_tag]
 
-        self.W_q = self.matmul_eng.transform_weight(self.W_q.reshape(self.shape))
+        self.W_q = self.matmul_eng.transform_weight(self.W_q.reshape(self.shape)).to(self.device)
         self.zero = self.zero.view(self.meta_shape_bitblas)
         self.scale = self.scale.view(self.meta_shape_bitblas)
         torch.cuda.empty_cache()