Skip to content

Commit

Permalink
Feat (ptq): add flag for uint sym activations (#714)
Browse files Browse the repository at this point in the history
  • Loading branch information
Giuseppe5 authored Oct 2, 2023
1 parent f1e52ba commit 9d24ace
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
'gpfq': [False, True], # Enable/Disable GPFQ
'gpfq_p': [0.25, 0.75], # GPFQ P
'act_quant_percentile': [99.9, 99.99, 99.999], # Activation Quantization Percentile
'uint_sym_act_for_unsigned_values': [True], # Whether to use unsigned act quant when possible
}

OPTIONS_DEFAULT = {
Expand All @@ -87,6 +88,7 @@
'gpfq_p': [0.25], # GPFQ P
'gptq_act_order': [False], # Use act_order euristics for GPTQ
'act_quant_percentile': [99.999], # Activation Quantization Percentile
'uint_sym_act_for_unsigned_values': [True], # Whether to use unsigned act quant when possible
}

parser = argparse.ArgumentParser(description='PyTorch ImageNet PTQ Validation')
Expand Down Expand Up @@ -228,7 +230,8 @@ def ptq_torchvision_models(args):
weight_quant_granularity=config_namespace.weight_quant_granularity,
act_quant_percentile=config_namespace.act_quant_percentile,
act_quant_type=config_namespace.act_quant_type,
scale_factor_type=config_namespace.scale_factor_type)
scale_factor_type=config_namespace.scale_factor_type,
uint_sym_act_for_unsigned_values=config_namespace.uint_sym_act_for_unsigned_values)

# If available, use the selected GPU
if args.gpu is not None:
Expand Down
20 changes: 15 additions & 5 deletions src/brevitas_examples/imagenet_classification/ptq/ptq_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def quantize_model(
act_param_method='stats',
weight_quant_type='sym',
act_quant_granularity='per_tensor',
uint_sym_act_for_unsigned_values=True,
dtype=torch.float32):
# Define what quantize function to use and, based on the given configuration, its arguments
quantize_fn = QUANTIZE_MAP[backend]
Expand All @@ -127,6 +128,7 @@ def bit_width_fn(module, other_bit_width):
act_bit_width_or_lambda = act_bit_width if backend != 'layerwise' else lambda module: bit_width_fn(
module, act_bit_width)
quant_layer_map, quant_layerwise_layer_map, quant_act_map, quant_identity_map = create_quant_maps(dtype=dtype,
uint_sym_act_for_unsigned_values=uint_sym_act_for_unsigned_values,
bias_bit_width=bias_bit_width,
weight_bit_width=weight_bit_width_or_lambda,
weight_param_method=weight_param_method,
Expand Down Expand Up @@ -164,6 +166,7 @@ def create_quant_maps(
weight_quant_type,
weight_quant_granularity,
weight_narrow_range,
uint_sym_act_for_unsigned_values=True,
act_bit_width=None,
act_scale_type=None,
act_param_method=None,
Expand Down Expand Up @@ -235,7 +238,6 @@ def kwargs_prefix(prefix, weight_kwargs):
'softmax_input_quant': None,
'attn_output_weights_quant': sym_act_quant,
'attn_output_weights_bit_width': act_bit_width,
'attn_output_weights_signed': False,
'q_scaled_quant': sym_act_quant,
'q_scaled_bit_width': act_bit_width,
'k_transposed_quant': sym_act_quant,
Expand Down Expand Up @@ -272,18 +274,26 @@ def kwargs_prefix(prefix, weight_kwargs):

act_quant_and_bit_width = {'act_quant': act_quant, 'bit_width': act_bit_width}
quant_act_kwargs = {**act_quant_and_bit_width, 'return_quant_tensor': True}

# For potentially unsigned activations, we create a separate dict
unsigned_quant_act_kwargs = quant_act_kwargs.copy()
if uint_sym_act_for_unsigned_values:
# In case we support unsigned activation, the output of softmax can be unsigned
quant_mha_kwargs['attn_output_weights_signed'] = False
unsigned_quant_act_kwargs['signed'] = False

quant_act_map = {
torch.nn.ReLU: (qnn.QuantReLU, {
**quant_act_kwargs, 'signed': False}),
**unsigned_quant_act_kwargs}),
torch.nn.ReLU6: (qnn.QuantReLU, {
**quant_act_kwargs, 'signed': False}),
**unsigned_quant_act_kwargs}),
torch.nn.Sigmoid: (qnn.QuantSigmoid, {
**quant_act_kwargs, 'signed': False}),}
**unsigned_quant_act_kwargs}),}
quant_identity_map = {
'signed': (qnn.QuantIdentity, {
**quant_act_kwargs}),
'unsigned': (qnn.QuantIdentity, {
**quant_act_kwargs, 'signed': False}),}
**unsigned_quant_act_kwargs}),}
quant_layerwise_layer_map = {
torch.nn.Linear: (qnn.QuantLinear, layerwise_quant_wbiol_kwargs),
torch.nn.MultiheadAttention: (qnn.QuantMultiheadAttention, layerwise_quant_mha_kwargs),
Expand Down

0 comments on commit 9d24ace

Please sign in to comment.