From 3c3a12a2a004feb744600aacdd37b9ad8f1a2807 Mon Sep 17 00:00:00 2001 From: Ran chongzhi <57489288+ranchongzhi@users.noreply.github.com> Date: Wed, 11 Oct 2023 11:11:21 +0800 Subject: [PATCH] [Fix]modify quantized ops during mkldnn int8 inference (#3514) * modify quantized ops during mkldnn int8 inference * modify --- deploy/slim/act/readme.md | 18 ++++++++++++++++++ deploy/slim/act/test_seg.py | 6 +++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/deploy/slim/act/readme.md b/deploy/slim/act/readme.md index 8249a37153..34b6fabd93 100644 --- a/deploy/slim/act/readme.md +++ b/deploy/slim/act/readme.md @@ -331,3 +331,21 @@ Int8推理结果 ### 7. NotImplementedError:delete weight dequant op pass is not supported for per channel quantization **A**:参考https://github.com/PaddlePaddle/Paddle/issues/56619,并参考[TensorRT安装说明](../../../docs/deployment/installtrt.md)安装TensorRT。 + +### 8. CPU推理精度严重下降 + +**A**:CPU推理精度下降通常是由于推理过程中量化的op设置问题导致的,请确保推理过程中量化的op和训练过程中量化的op一致,才能保证推理精度和训练精度对齐。以本文的`PP-Liteseg`为例进行说明: + +量化训练配置文件是`configs/ppliteseg/ppliteseg_qat.yaml`,其中量化的op是`conv2d`和`depthwise_conv2d`,因此在推理过程中也需要量化这两个op,可以通过使用如下函数进行设置: +```python +# deploy/slim/act/test_seg.py:64 +pred_cfg.enable_mkldnn_int8({ + "conv2d", "depthwise_conv2d" + }) +``` +而且最好只量化这两个op,如果增加其他op的量化,可能会导致精度下降。以下是一个简单的实验结果: + +| | 原模型fp32推理 | 原模型fp32+mkldnn加速 | 量化模型int8推理(量化conv2d,depthwise_conv2d) | 量化模型int8推理(量化conv2d,depthwise_conv2d,elementwise_mul) | 量化模型int8推理(量化conv2d,depthwise_conv2d,elementwise_mul,pool2d) | +|:------:|:---------:|:----------------:|:-------------------------------------:|:-----------------------------------------------------:|:------------------------------------------------------------:| +| mIoU | 0.7704 | 0.7704 | 0.7658 | 0.7657 | 0.7372 | +| 耗时(ms) | 1216.8 | 1191.3 | 434.5 | 439.6 | 505.8 | diff --git a/deploy/slim/act/test_seg.py b/deploy/slim/act/test_seg.py index 69266479d0..8bf69e0b16 100644 --- a/deploy/slim/act/test_seg.py +++ b/deploy/slim/act/test_seg.py @@ -59,9 +59,9 @@ def load_predictor(args): if args.use_mkldnn: pred_cfg.enable_mkldnn() if args.precision == "int8": - pred_cfg.enable_mkldnn_int8({ - "conv2d", "depthwise_conv2d", "pool2d", "elementwise_mul" - }) + # Please ensure that the quantized ops during inference are the same as + # the ops set in the qat training configuration file + pred_cfg.enable_mkldnn_int8({"conv2d", "depthwise_conv2d"}) if args.use_trt: # To collect the dynamic shapes of inputs for TensorRT engine