Commit 52daf86 1 parent 84f4225 commit 52daf86 Copy full SHA for 52daf86
File tree 2 files changed +3
-10
lines changed
2 files changed +3
-10
lines changed Original file line number Diff line number Diff line change @@ -193,6 +193,7 @@ def quantize_model(
193
193
# Modify the weight quantizer based on the arguments passed in
194
194
weight_quant = weight_quant .let (
195
195
** {
196
+ 'bit_width' : weight_bit_width ,
196
197
'narrow_range' : False ,
197
198
'block_size' : weight_group_size ,
198
199
'quantize_zero_point' : quantize_weight_zero_point },
@@ -311,15 +312,8 @@ def quantize_model(
311
312
'group_dim' : 1 , 'group_size' : input_group_size })
312
313
313
314
quant_linear_kwargs = {
314
- 'input_quant' : linear_2d_input_quant ,
315
- 'weight_quant' : weight_quant ,
316
- 'weight_bit_width' : weight_bit_width ,
317
- 'dtype' : dtype }
318
- quant_conv_kwargs = {
319
- 'input_quant' : input_quant ,
320
- 'weight_quant' : weight_quant ,
321
- 'weight_bit_width' : weight_bit_width ,
322
- 'dtype' : dtype }
315
+ 'input_quant' : linear_2d_input_quant , 'weight_quant' : weight_quant , 'dtype' : dtype }
316
+ quant_conv_kwargs = {'input_quant' : input_quant , 'weight_quant' : weight_quant , 'dtype' : dtype }
323
317
324
318
quant_mha_kwargs = {
325
319
'in_proj_input_quant' : input_quant ,
Original file line number Diff line number Diff line change @@ -304,7 +304,6 @@ def main():
304
304
seqlen = args .seqlen )
305
305
# Tie back first/last layer weights in case they got untied
306
306
model .tie_weights ()
307
- print (model )
308
307
print ("Model quantization applied." )
309
308
310
309
if args .act_calibration :
You can’t perform that action at this time.
0 commit comments