Feat (ptq): flag to disable/enable signed activations

Xilinx · Oct 2, 2023 · 5f2790a · 5f2790a
1 parent f1e52ba
commit 5f2790a
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 12 deletions.
diff --git a/src/brevitas_examples/imagenet_classification/ptq/benchmark/ptq_benchmark_torchvision.py b/src/brevitas_examples/imagenet_classification/ptq/benchmark/ptq_benchmark_torchvision.py
@@ -65,7 +65,7 @@
     'gpfq': [False, True],  # Enable/Disable GPFQ
     'gpfq_p': [0.25, 0.75],  # GPFQ P
     'act_quant_percentile': [99.9, 99.99, 99.999],  # Activation Quantization Percentile
-}
+    'uint_sym_act_for_unsigned_values': [False],}
 
 OPTIONS_DEFAULT = {
     'target_backend': ['fx'],  # Target backend
@@ -87,7 +87,7 @@
     'gpfq_p': [0.25],  # GPFQ P
     'gptq_act_order': [False],  # Use act_order euristics for GPTQ
     'act_quant_percentile': [99.999],  # Activation Quantization Percentile
-}
+    'uint_sym_act_for_unsigned_values': [False],}
 
 parser = argparse.ArgumentParser(description='PyTorch ImageNet PTQ Validation')
 parser.add_argument('idx', type=int)
@@ -228,7 +228,8 @@ def ptq_torchvision_models(args):
         weight_quant_granularity=config_namespace.weight_quant_granularity,
         act_quant_percentile=config_namespace.act_quant_percentile,
         act_quant_type=config_namespace.act_quant_type,
-        scale_factor_type=config_namespace.scale_factor_type)
+        scale_factor_type=config_namespace.scale_factor_type,
+        uint_sym_act_for_unsigned_values=config_namespace.uint_sym_act_for_unsigned_values)
 
     # If available, use the selected GPU
     if args.gpu is not None:

diff --git a/src/brevitas_examples/imagenet_classification/ptq/ptq_common.py b/src/brevitas_examples/imagenet_classification/ptq/ptq_common.py
@@ -106,6 +106,7 @@ def quantize_model(
         act_param_method='stats',
         weight_quant_type='sym',
         act_quant_granularity='per_tensor',
+        uint_sym_act_for_unsigned_values=True,
         dtype=torch.float32):
     # Define what quantize function to use and, based on the given configuration, its arguments
     quantize_fn = QUANTIZE_MAP[backend]
@@ -127,6 +128,7 @@ def bit_width_fn(module, other_bit_width):
     act_bit_width_or_lambda = act_bit_width if backend != 'layerwise' else lambda module: bit_width_fn(
         module, act_bit_width)
     quant_layer_map, quant_layerwise_layer_map, quant_act_map, quant_identity_map = create_quant_maps(dtype=dtype,
+                            uint_sym_act_for_unsigned_values=uint_sym_act_for_unsigned_values,
                             bias_bit_width=bias_bit_width,
                             weight_bit_width=weight_bit_width_or_lambda,
                             weight_param_method=weight_param_method,
@@ -164,6 +166,7 @@ def create_quant_maps(
         weight_quant_type,
         weight_quant_granularity,
         weight_narrow_range,
+        uint_sym_act_for_unsigned_values=True,
         act_bit_width=None,
         act_scale_type=None,
         act_param_method=None,
@@ -235,7 +238,7 @@ def kwargs_prefix(prefix, weight_kwargs):
         'softmax_input_quant': None,
         'attn_output_weights_quant': sym_act_quant,
         'attn_output_weights_bit_width': act_bit_width,
-        'attn_output_weights_signed': False,
+        'attn_output_weights_signed': not uint_sym_act_for_unsigned_values,
         'q_scaled_quant': sym_act_quant,
         'q_scaled_bit_width': act_bit_width,
         'k_transposed_quant': sym_act_quant,
@@ -273,17 +276,21 @@ def kwargs_prefix(prefix, weight_kwargs):
     act_quant_and_bit_width = {'act_quant': act_quant, 'bit_width': act_bit_width}
     quant_act_kwargs = {**act_quant_and_bit_width, 'return_quant_tensor': True}
     quant_act_map = {
-        torch.nn.ReLU: (qnn.QuantReLU, {
-            **quant_act_kwargs, 'signed': False}),
-        torch.nn.ReLU6: (qnn.QuantReLU, {
-            **quant_act_kwargs, 'signed': False}),
-        torch.nn.Sigmoid: (qnn.QuantSigmoid, {
-            **quant_act_kwargs, 'signed': False}),}
+        torch.nn.ReLU:
+            (qnn.QuantReLU, {
+                **quant_act_kwargs, 'signed': not uint_sym_act_for_unsigned_values}),
+        torch.nn.ReLU6:
+            (qnn.QuantReLU, {
+                **quant_act_kwargs, 'signed': not uint_sym_act_for_unsigned_values}),
+        torch.nn.Sigmoid: (
+            qnn.QuantSigmoid, {
+                **quant_act_kwargs, 'signed': not uint_sym_act_for_unsigned_values}),}
     quant_identity_map = {
         'signed': (qnn.QuantIdentity, {
             **quant_act_kwargs}),
-        'unsigned': (qnn.QuantIdentity, {
-            **quant_act_kwargs, 'signed': False}),}
+        'unsigned': (
+            qnn.QuantIdentity, {
+                **quant_act_kwargs, 'signed': not uint_sym_act_for_unsigned_values}),}
     quant_layerwise_layer_map = {
         torch.nn.Linear: (qnn.QuantLinear, layerwise_quant_wbiol_kwargs),
         torch.nn.MultiheadAttention: (qnn.QuantMultiheadAttention, layerwise_quant_mha_kwargs),