Skip to content

Commit

Permalink
Fix (gptq): stabilize Cholesky decomposition
Browse files Browse the repository at this point in the history
  • Loading branch information
i-colbert committed Dec 4, 2024
1 parent af1eef4 commit 2f24dff
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions src/brevitas/graph/gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def update_batch(self, module, input, current_layer):
current_layer.forward_count = 0
raise StopFwdException

def single_layer_update(self, percdamp=.01):
def single_layer_update(self, percdamp=.01, c=1e3):
assert not self.layer.weight_quant.requires_quant_input, "Error: GPTQ does not support weight quantizers that require quantized inputs."
if hasattr(self.layer, 'allocate_params'):
self.layer.allocate_params(self.layer)
Expand Down Expand Up @@ -174,7 +174,8 @@ def single_layer_update(self, percdamp=.01):
self.H[i, diag, diag] += damp
self.H[i, :, :] = torch.linalg.cholesky(self.H[i, :, :])
self.H[i, :, :] = torch.cholesky_inverse(self.H[i, :, :])
self.H[i, :, :] = torch.linalg.cholesky(self.H[i, :, :], upper=True)
# stabilizing the Cholesky decomposition with a fairly large constant, c
self.H[i, :, :] = torch.linalg.cholesky(self.H[i, :, :] * (c ** 2), upper=True) / c
h_inv = self.H
except LinAlgError as e:
warnings.warn(
Expand Down

0 comments on commit 2f24dff

Please sign in to comment.