diff --git a/src/brevitas_examples/imagenet_classification/ptq/benchmark/ptq_benchmark_torchvision.py b/src/brevitas_examples/imagenet_classification/ptq/benchmark/ptq_benchmark_torchvision.py index 58af032ef..99eb76f5d 100644 --- a/src/brevitas_examples/imagenet_classification/ptq/benchmark/ptq_benchmark_torchvision.py +++ b/src/brevitas_examples/imagenet_classification/ptq/benchmark/ptq_benchmark_torchvision.py @@ -65,7 +65,7 @@ 'gpfq': [False, True], # Enable/Disable GPFQ 'gpfq_p': [0.25, 0.75], # GPFQ P 'act_quant_percentile': [99.9, 99.99, 99.999], # Activation Quantization Percentile -} + 'uint_sym_act_for_unsigned_values': [False],} OPTIONS_DEFAULT = { 'target_backend': ['fx'], # Target backend @@ -87,7 +87,7 @@ 'gpfq_p': [0.25], # GPFQ P 'gptq_act_order': [False], # Use act_order euristics for GPTQ 'act_quant_percentile': [99.999], # Activation Quantization Percentile -} + 'uint_sym_act_for_unsigned_values': [False],} parser = argparse.ArgumentParser(description='PyTorch ImageNet PTQ Validation') parser.add_argument('idx', type=int) @@ -228,7 +228,8 @@ def ptq_torchvision_models(args): weight_quant_granularity=config_namespace.weight_quant_granularity, act_quant_percentile=config_namespace.act_quant_percentile, act_quant_type=config_namespace.act_quant_type, - scale_factor_type=config_namespace.scale_factor_type) + scale_factor_type=config_namespace.scale_factor_type, + uint_sym_act_for_unsigned_values=config_namespace.uint_sym_act_for_unsigned_values) # If available, use the selected GPU if args.gpu is not None: diff --git a/src/brevitas_examples/imagenet_classification/ptq/ptq_common.py b/src/brevitas_examples/imagenet_classification/ptq/ptq_common.py index 42e976134..aa1214d5e 100644 --- a/src/brevitas_examples/imagenet_classification/ptq/ptq_common.py +++ b/src/brevitas_examples/imagenet_classification/ptq/ptq_common.py @@ -106,6 +106,7 @@ def quantize_model( act_param_method='stats', weight_quant_type='sym', act_quant_granularity='per_tensor', + uint_sym_act_for_unsigned_values=True, dtype=torch.float32): # Define what quantize function to use and, based on the given configuration, its arguments quantize_fn = QUANTIZE_MAP[backend] @@ -127,6 +128,7 @@ def bit_width_fn(module, other_bit_width): act_bit_width_or_lambda = act_bit_width if backend != 'layerwise' else lambda module: bit_width_fn( module, act_bit_width) quant_layer_map, quant_layerwise_layer_map, quant_act_map, quant_identity_map = create_quant_maps(dtype=dtype, + uint_sym_act_for_unsigned_values=uint_sym_act_for_unsigned_values, bias_bit_width=bias_bit_width, weight_bit_width=weight_bit_width_or_lambda, weight_param_method=weight_param_method, @@ -164,6 +166,7 @@ def create_quant_maps( weight_quant_type, weight_quant_granularity, weight_narrow_range, + uint_sym_act_for_unsigned_values=True, act_bit_width=None, act_scale_type=None, act_param_method=None, @@ -235,7 +238,7 @@ def kwargs_prefix(prefix, weight_kwargs): 'softmax_input_quant': None, 'attn_output_weights_quant': sym_act_quant, 'attn_output_weights_bit_width': act_bit_width, - 'attn_output_weights_signed': False, + 'attn_output_weights_signed': not uint_sym_act_for_unsigned_values, 'q_scaled_quant': sym_act_quant, 'q_scaled_bit_width': act_bit_width, 'k_transposed_quant': sym_act_quant, @@ -273,17 +276,21 @@ def kwargs_prefix(prefix, weight_kwargs): act_quant_and_bit_width = {'act_quant': act_quant, 'bit_width': act_bit_width} quant_act_kwargs = {**act_quant_and_bit_width, 'return_quant_tensor': True} quant_act_map = { - torch.nn.ReLU: (qnn.QuantReLU, { - **quant_act_kwargs, 'signed': False}), - torch.nn.ReLU6: (qnn.QuantReLU, { - **quant_act_kwargs, 'signed': False}), - torch.nn.Sigmoid: (qnn.QuantSigmoid, { - **quant_act_kwargs, 'signed': False}),} + torch.nn.ReLU: + (qnn.QuantReLU, { + **quant_act_kwargs, 'signed': not uint_sym_act_for_unsigned_values}), + torch.nn.ReLU6: + (qnn.QuantReLU, { + **quant_act_kwargs, 'signed': not uint_sym_act_for_unsigned_values}), + torch.nn.Sigmoid: ( + qnn.QuantSigmoid, { + **quant_act_kwargs, 'signed': not uint_sym_act_for_unsigned_values}),} quant_identity_map = { 'signed': (qnn.QuantIdentity, { **quant_act_kwargs}), - 'unsigned': (qnn.QuantIdentity, { - **quant_act_kwargs, 'signed': False}),} + 'unsigned': ( + qnn.QuantIdentity, { + **quant_act_kwargs, 'signed': not uint_sym_act_for_unsigned_values}),} quant_layerwise_layer_map = { torch.nn.Linear: (qnn.QuantLinear, layerwise_quant_wbiol_kwargs), torch.nn.MultiheadAttention: (qnn.QuantMultiheadAttention, layerwise_quant_mha_kwargs),