From 1de2296ae39378271cdb7e0a4c7eea2e8ebe2233 Mon Sep 17 00:00:00 2001 From: Chang Sun Date: Mon, 8 Jan 2024 22:06:52 -0800 Subject: [PATCH] better bits yield computation --- src/HGQ/quantizer/quantizer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/HGQ/quantizer/quantizer.py b/src/HGQ/quantizer/quantizer.py index 1d26df4..547b343 100644 --- a/src/HGQ/quantizer/quantizer.py +++ b/src/HGQ/quantizer/quantizer.py @@ -253,7 +253,9 @@ def get_bits_exact(self, ref=None, pos_only=False): int_bits = np.floor(np.log2(_ref)) + 1 kn = np.zeros_like(self._max) else: - int_bits = np.floor(np.log2(np.maximum(np.abs(self._max), np.abs(self._min)))) + 1 # type:ignore + int_bits1 = np.ceil(np.log2(np.abs(self._min))) # type:ignore + int_bits2 = np.ceil(np.log2(np.abs(self._max) + 2**-fp_bits)) # type:ignore + int_bits = np.maximum(int_bits1, int_bits2) kn = (self._min.numpy() < 0) # type:ignore int_bits = np.clip(int_bits, -fp_bits - kn, 32) return kn.astype(np.int8), int_bits.astype(np.int8), fp_bits.astype(np.int8)