Skip to content

Commit

Permalink
updated config
Browse files Browse the repository at this point in the history
  • Loading branch information
mht-sharma committed Sep 20, 2023
1 parent 2c6c09b commit 6fc369b
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 6 deletions.
16 changes: 12 additions & 4 deletions optimum/onnxruntime/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,18 @@ class QuantizationConfig:
qdq_op_type_per_channel_support_to_axis (`Dict[str, int]`):
Set the channel axis for a specific operator type. Effective only when per channel quantization is
supported and `per_channel` is set to True.
smooth_quant (`bool`, defaults to `False`) :
Default is False. If enabled, SmoothQuant algorithm will be applied before quantization to do
fake input channel quantization.
smooth_quant_alpha (`float`, defaults to `0.5`) :
Default is 0.5. It only works if SmoothQuant is True. It controls the difficulty of weight
and activation quantization. A larger alpha value could be used on models with more significant
activation outliers to migrate more quantization difficulty to weights.
smooth_quant_folding (`bool`, defaults to `True`) :
It only works if SmoothQuant is True. If enabled, inserted Mul ops during
SmoothQuant will be folded into the previous op if the previous op is foldable.
smooth_quant_op_types (`List[str]`, defaults to `[]`):
The op types to be smooth quantized
"""

is_static: bool
Expand Down Expand Up @@ -396,7 +408,6 @@ def arm64(
nodes_to_quantize: Optional[List[str]] = None,
nodes_to_exclude: Optional[List[str]] = None,
operators_to_quantize: Optional[List[str]] = None,
smooth_quant_op_types: Optional[List[str]] = None,
):
"""
Creates a [`~onnxruntime.QuantizationConfig`] fit for ARM64.
Expand Down Expand Up @@ -450,7 +461,6 @@ def avx2(
nodes_to_quantize: Optional[List[str]] = None,
nodes_to_exclude: Optional[List[str]] = None,
operators_to_quantize: Optional[List[str]] = None,
smooth_quant_op_types: Optional[List[str]] = None,
) -> QuantizationConfig:
"""
Creates a [`~onnxruntime.QuantizationConfig`] fit for CPU with AVX2 instruction set.
Expand Down Expand Up @@ -508,7 +518,6 @@ def avx512(
nodes_to_quantize: Optional[List[str]] = None,
nodes_to_exclude: Optional[List[str]] = None,
operators_to_quantize: Optional[List[str]] = None,
smooth_quant_op_types: Optional[List[str]] = None,
) -> QuantizationConfig:
"""
Creates a [`~onnxruntime.QuantizationConfig`] fit for CPU with AVX512 instruction set.
Expand Down Expand Up @@ -565,7 +574,6 @@ def avx512_vnni(
nodes_to_quantize: Optional[List[str]] = None,
nodes_to_exclude: Optional[List[str]] = None,
operators_to_quantize: Optional[List[str]] = None,
smooth_quant_op_types: Optional[List[str]] = None,
) -> QuantizationConfig:
"""
Creates a [`~onnxruntime.QuantizationConfig`] fit for CPU with AVX512-VNNI instruction set.
Expand Down
7 changes: 5 additions & 2 deletions optimum/onnxruntime/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,9 @@ def apply_smooth_quant(
importlib.import_module("neural_compressor.adaptor.ox_utils.smooth_quant")
except Exception as e:
logging.error(f"{e}.")
raise RuntimeError("neural-compressor is not correctly installed. Please check your environment.") from e
raise RuntimeError("Neural-compressor is required for SmoothQuant. Please install the library") from e

import copy

import onnx
from neural_compressor.adaptor.ox_utils.smooth_quant import ORTSmoothQuant
Expand All @@ -242,13 +244,14 @@ def apply_smooth_quant(
os.makedirs(save_dir, exist_ok=True)

def inc_dataloader():
calibration_data_reader = ORTCalibrationDataReader(dataset, batch_size)
calibration_data_reader = ORTCalibrationDataReader(copy.deepcopy(dataset), batch_size)
for data in calibration_data_reader:
yield data, None

orig_nodes = [i.name for i in model.graph.node]
dataloader = inc_dataloader()
sq = ORTSmoothQuant(self.onnx_model_path.as_posix(), dataloader, quantization_config.reduce_range)
del dataloader
model = sq.transform(
quantization_config.smooth_quant_alpha,
quantization_config.smooth_quant_folding,
Expand Down

0 comments on commit 6fc369b

Please sign in to comment.