Skip to content

Commit

Permalink
disabled scaling for quantized models
Browse files Browse the repository at this point in the history
  • Loading branch information
e-ddykim committed Jan 13, 2025
1 parent 8122cde commit 0874b17
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 10 deletions.
4 changes: 2 additions & 2 deletions src/common/low_precision_transformations/src/add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,11 +216,11 @@ bool AddTransformation::transform(TransformationContext& context, ov::pass::patt

auto output_type = scalingMode ? add->get_output_element_type(0) : element::f32;
newAddOrSubtract = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Add>>(
std::vector<element::Type>{output_type, output_type}, std::vector<element::Type>{ output_type },
std::vector<element::Type>{output_type, output_type}, std::vector<element::Type>{output_type},
ov::op::TemporaryReplaceOutputType(inputs[0], output_type).get(),
ov::op::TemporaryReplaceOutputType(inputs[1], output_type).get());
newMultiply = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Multiply>>(
std::vector<element::Type>{output_type, output_type}, std::vector<element::Type>{ add->get_output_element_type(0) },
std::vector<element::Type>{output_type, output_type}, std::vector<element::Type>{add->get_output_element_type(0)},
ov::op::TemporaryReplaceOutputType(newAddOrSubtract, output_type).get(),
ov::op::TemporaryReplaceOutputType(multiplyEmptyPathValues, output_type).get());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,17 @@ bool MultiplyPartialTransformation::transform(TransformationContext& context, ov
auto constParent = multiply->input_value(multiplyBranch.first == 0 ? 1 : 0);
auto multiplyParentParent = multiplyParent.get_node_shared_ptr()->input_value(multiplyBranch.second);
auto multiplyParentConst = multiplyParent.get_node_shared_ptr()->input_value(multiplyBranch.second == 0 ? 1 : 0);
auto input_data_type = scalingMode ? multiply->get_output_element_type(0) : element::f32;
auto inputDataType = scalingMode ? multiply->get_output_element_type(0) : element::f32;

newMultiply = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Multiply>>(
std::vector<ov::element::Type>{ input_data_type, input_data_type },
std::vector<ov::element::Type>{ inputDataType, inputDataType },
std::vector<ov::element::Type>{ multiply->get_output_element_type(0) },
ov::op::TemporaryReplaceOutputType(multiplyParentParent, input_data_type).get(),
ov::op::TemporaryReplaceOutputType(multiplyParentParent, inputDataType).get(),
ov::op::TemporaryReplaceOutputType(
fold<ov::opset1::Multiply>(
foldConvert(multiplyParentConst, input_data_type),
foldConvert(constParent, input_data_type)),
input_data_type).get());
foldConvert(multiplyParentConst, inputDataType),
foldConvert(constParent, inputDataType)),
inputDataType).get());

NetworkHelper::copyInfo(multiplyParent.get_node_shared_ptr(), newMultiply);
NetworkHelper::copyInfo(multiply, newMultiply);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {

float activations_scale_factor = config.get_property(ov::hint::activations_scale_factor);

if (activations_scale_factor > 0.f && infer_precision == ov::element::f16) {
if (activations_scale_factor > 0.f && infer_precision == ov::element::f16 && !enableInt8) {
using namespace ov::pass::low_precision;

auto supportedPrecisions = std::vector<PrecisionsRestriction>({});
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/runtime/execution_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,8 +275,8 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) {
void ExecutionConfig::apply_rt_info(const cldnn::device_info& info, const ov::RTMap& rt_info) {
if (!info.supports_immad) {
apply_rt_info_property(ov::hint::kv_cache_precision, rt_info);
apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
}
apply_rt_info_property(ov::hint::activations_scale_factor, rt_info);
apply_rt_info_property(ov::hint::dynamic_quantization_group_size, rt_info);
}

Expand Down

0 comments on commit 0874b17

Please sign in to comment.