-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[GPU] activations scaling to resolve accuracy issues for infer precis…
…ion of f16 (#27265) ### Details: - When a model runs at inference precision of f16, it might be unable to calculate correct results due to limited range of f16. - The purpose of this PR is to avoid situations where overflow occurs during calculation by scaling down the activation, thereby obtaining correct results when the infer precision is f16. - A new config property "ACTIVATIONS_SCALE_FACTOR" is introduced, which holds a single floating-point value. For example, if it is 64, activations are divided by 64 before Convolution and MatMul. If it is smaller than 0, this feature is disabled. - This property also can be set via rt_info of a model as below. ```html <rt_info> <runtime_options> <ACTIVATIONS_SCALE_FACTOR value="8.0" /> </runtime_options> </rt_info> ``` ### Tickets: - 147052 --------- Co-authored-by: Andrew Park <[email protected]>
- Loading branch information
1 parent
d63a9df
commit cc67ad1
Showing
16 changed files
with
1,035 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 104 additions & 0 deletions
104
...mmon/transformations/include/transformations/common_optimizations/activations_scaling.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
// Copyright (C) 2024 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include <memory> | ||
|
||
#include "openvino/pass/matcher_pass.hpp" | ||
#include "transformations_visibility.hpp" | ||
|
||
namespace ov { | ||
namespace pass { | ||
|
||
class TRANSFORMATIONS_API ActivationsScaling; | ||
|
||
namespace activations_scaling { | ||
|
||
class TRANSFORMATIONS_API ScaleDownSingleLayer; | ||
class TRANSFORMATIONS_API EliminateScalarMul; | ||
class TRANSFORMATIONS_API MulConcatTransformation; | ||
class TRANSFORMATIONS_API MulShareTransformation; | ||
class TRANSFORMATIONS_API MoveDownScalarMul; | ||
|
||
} // namespace activations_scaling | ||
} // namespace pass | ||
} // namespace ov | ||
|
||
// ActivationsScaling makes activation values smaller to prevent overflow due to the limited range of FP16 | ||
// This feature is controlled by ov::hint::activations_scale_factor. | ||
// For example, when this property is set as 16, activations are divided by 16. | ||
// If ov::hint::activations_scale_factor is less than or equal to zero, it is disabled. | ||
|
||
// Add scale_down and scale_up layers around Convolution and MatMul nodes | ||
// Conv/MatMul | ||
// ==> | ||
// Multiply(scale_down by scale_factor) --> Conv/MatMul --> Multiply(scale_up by scale_factor) | ||
class ov::pass::activations_scaling::ScaleDownSingleLayer : public ov::pass::MatcherPass { | ||
public: | ||
OPENVINO_MATCHER_PASS_RTTI("ScaleDownSingleLayer", "0"); | ||
ScaleDownSingleLayer(float scale_factor, ov::element::Type scaled_prec); | ||
}; | ||
|
||
// Normalization and ShapeOf have the following property. | ||
// | ||
// Norm(input * const_a) = Norm(input) | ||
// | ||
// So, we can skip Multiply that is connected to Normalization and ShapeOf. | ||
// | ||
// input --> Multiply --> Normalization/ShapeOf | ||
// ==> | ||
// input --> Normalization/ShapeOf | ||
class ov::pass::activations_scaling::EliminateScalarMul : public ov::pass::MatcherPass { | ||
public: | ||
OPENVINO_MATCHER_PASS_RTTI("EliminateScalarMul", "0"); | ||
EliminateScalarMul(); | ||
}; | ||
|
||
// input_a const_a input_b const_b input_c const_c | ||
// \ / \ / \ / | ||
// Multiply_a Multiply_b Multiply_c | ||
// \ | / | ||
// \ | / | ||
// ---------- Concat ------------ | ||
// ==> | ||
// (const_a (const_b (const_c | ||
// input_a /const_c) input_b /const_c) input_c /const_c) | ||
// \ / \ / \ / | ||
// Multiply_a Multiply_b Multiply_c | ||
// \ | / | ||
// \ | / | ||
// ---------- Concat ------------ | ||
// | const_c | ||
// | / | ||
// Multiply | ||
class ov::pass::activations_scaling::MulConcatTransformation : public ov::pass::MatcherPass { | ||
public: | ||
OPENVINO_MATCHER_PASS_RTTI("MulConcatTransformation", "0"); | ||
MulConcatTransformation(); | ||
}; | ||
|
||
// input input | ||
// / \ | | ||
// Norm Mul ==> Mul (expect to be fused into the input layer) | ||
// | | / \_ | ||
// op_a op_b Norm op_b | ||
// | | ||
// op_a | ||
class ov::pass::activations_scaling::MulShareTransformation : public ov::pass::MatcherPass { | ||
public: | ||
OPENVINO_MATCHER_PASS_RTTI("MulShareTransformation", "0"); | ||
MulShareTransformation(); | ||
}; | ||
|
||
// input_b scalar input_a input_b | ||
// \ / \ / | ||
// input_a Mul_b ==> Mul_a' scalar | ||
// \ / \ / | ||
// Mul_a Mul_b' (expect to be merged with Mul_a') | ||
class ov::pass::activations_scaling::MoveDownScalarMul : public ov::pass::MatcherPass { | ||
public: | ||
OPENVINO_MATCHER_PASS_RTTI("MoveDownScalarMul", "0"); | ||
MoveDownScalarMul(); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.