Skip to content

Commit

Permalink
[CPU]fix property test
Browse files Browse the repository at this point in the history
Signed-off-by: Zhang Yi3 <[email protected]>
  • Loading branch information
zhangYiIntel committed Dec 11, 2024
1 parent 685f263 commit e56639a
Show file tree
Hide file tree
Showing 10 changed files with 66 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from openvino._pyopenvino.properties.hint import allow_auto_batching
from openvino._pyopenvino.properties.hint import dynamic_quantization_group_size
from openvino._pyopenvino.properties.hint import kv_cache_precision
from openvino._pyopenvino.properties.hint import key_cache_precision
from openvino._pyopenvino.properties.hint import value_cache_precision
from openvino._pyopenvino.properties.hint import key_cache_group_size
from openvino._pyopenvino.properties.hint import value_cache_group_size
from openvino._pyopenvino.properties.hint import activations_scale_factor
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ void regmodule_properties(py::module m) {
wrap_property_RW(m_hint, ov::hint::allow_auto_batching, "allow_auto_batching");
wrap_property_RW(m_hint, ov::hint::dynamic_quantization_group_size, "dynamic_quantization_group_size");
wrap_property_RW(m_hint, ov::hint::kv_cache_precision, "kv_cache_precision");
wrap_property_RW(m_hint, ov::hint::key_cache_precision, "key_cache_precision");
wrap_property_RW(m_hint, ov::hint::value_cache_precision, "value_cache_precision");
wrap_property_RW(m_hint, ov::hint::key_cache_group_size, "key_cache_group_size");
wrap_property_RW(m_hint, ov::hint::value_cache_group_size, "value_cache_group_size");
wrap_property_RW(m_hint, ov::hint::activations_scale_factor, "activations_scale_factor");
Expand Down
2 changes: 2 additions & 0 deletions src/bindings/python/tests/test_runtime/test_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,8 @@ def test_properties_ro(ov_property_ro, expected_value):
((64, 64),),
),
(hints.kv_cache_precision, "KV_CACHE_PRECISION", ((Type.f32, Type.f32),)),
(hints.key_cache_precision, "KEY_CACHE_PRECISION", ((Type.f32, Type.f32),)),
(hints.value_cache_precision, "VALUE_CACHE_PRECISION", ((Type.f32, Type.f32),)),
(
hints.activations_scale_factor,
"ACTIVATIONS_SCALE_FACTOR",
Expand Down
12 changes: 12 additions & 0 deletions src/inference/include/openvino/runtime/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,18 @@ static constexpr Property<uint64_t, PropertyMutability::RW> dynamic_quantization
*/
static constexpr Property<element::Type, PropertyMutability::RW> kv_cache_precision{"KV_CACHE_PRECISION"};

/**
* @brief Hint for device to use specified precision for key cache compression
* @ingroup ov_runtime_cpp_prop_api
*/
static constexpr Property<element::Type, PropertyMutability::RW> key_cache_precision{"KEY_CACHE_PRECISION"};

/**
* @brief Hint for device to use specified precision for value cache compression
* @ingroup ov_runtime_cpp_prop_api
*/
static constexpr Property<element::Type, PropertyMutability::RW> value_cache_precision{"VALUE_CACHE_PRECISION"};

/**
* @brief Hint for device to use group_size for key cache compression
* @ingroup ov_runtime_cpp_prop_api
Expand Down
6 changes: 6 additions & 0 deletions src/plugins/intel_cpu/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,8 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
RO_property(ov::intel_cpu::sparse_weights_decompression_rate.name()),
RO_property(ov::hint::dynamic_quantization_group_size.name()),
RO_property(ov::hint::kv_cache_precision.name()),
RO_property(ov::hint::key_cache_precision.name()),
RO_property(ov::hint::value_cache_precision.name()),
RO_property(ov::hint::key_cache_group_size.name()),
RO_property(ov::hint::value_cache_group_size.name()),
};
Expand Down Expand Up @@ -335,6 +337,10 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
config.fcDynamicQuantizationGroupSize);
} else if (name == ov::hint::kv_cache_precision) {
return decltype(ov::hint::kv_cache_precision)::value_type(config.kvCachePrecision);
} else if (name == ov::hint::key_cache_precision) {
return decltype(ov::hint::key_cache_precision)::value_type(config.keyCachePrecision);
} else if (name == ov::hint::value_cache_precision) {
return decltype(ov::hint::value_cache_precision)::value_type(config.valueCachePrecision);
} else if (name == ov::hint::key_cache_group_size) {
return decltype(ov::hint::key_cache_group_size)::value_type(config.keyCacheGroupSize);
} else if (name == ov::hint::value_cache_group_size) {
Expand Down
22 changes: 22 additions & 0 deletions src/plugins/intel_cpu/src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,26 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
ov::hint::kv_cache_precision.name(),
". Supported values: u8, bf16, f16, f32");
}
} else if (key == ov::hint::key_cache_precision.name() || key == ov::hint::value_cache_precision.name()) {
try {
kvCachePrecisionSetExplicitly = true;
auto const prec = val.as<ov::element::Type>();
if (key == ov::hint::key_cache_precision.name()) {
if (one_of(prec, ov::element::f32, ov::element::f16, ov::element::bf16, ov::element::u8)) {
keyCachePrecision = prec;
} else {
OPENVINO_THROW("keyCachePrecision doesn't support value ", prec);
}
} else {
if (one_of(prec, ov::element::f32, ov::element::f16, ov::element::bf16, ov::element::u8, ov::element::u4, ov::element::i4)) {
valueCachePrecision = prec;
} else {
OPENVINO_THROW("valueCachePrecision doesn't support value ", prec);
}
}
} catch (ov::Exception&) {

}
} else if (key == ov::hint::key_cache_group_size.name() || key == ov::hint::value_cache_group_size.name()) {
try {
auto const groupSize = val.as<uint64_t>();
Expand Down Expand Up @@ -432,6 +452,8 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
}
if (!kvCachePrecisionSetExplicitly) {
kvCachePrecision = ov::element::f32;
valueCachePrecision = ov::element::f32;
keyCachePrecision = ov::element::f32;
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/plugins/intel_cpu/src/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,13 @@ struct Config {
#endif
#if defined(OPENVINO_ARCH_X86_64)
ov::element::Type kvCachePrecision = ov::element::u8;
ov::element::Type keyCachePrecision = ov::element::u8;
ov::element::Type valueCachePrecision = ov::element::u8;
size_t rtCacheCapacity = 5000ul;
#else
ov::element::Type kvCachePrecision = ov::element::f16;
ov::element::Type keyCachePrecision = ov::element::f16;
ov::element::Type valueCachePrecision = ov::element::f16;
// TODO: Executor cache may leads to incorrect behavior on oneDNN ACL primitives
size_t rtCacheCapacity = 0ul;
#endif
Expand Down
12 changes: 12 additions & 0 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,14 @@ ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options)
engConfig.fcDynamicQuantizationGroupSize);
} else if (name == ov::hint::kv_cache_precision) {
return decltype(ov::hint::kv_cache_precision)::value_type(engConfig.kvCachePrecision);
} else if (name == ov::hint::key_cache_precision) {
return decltype(ov::hint::key_cache_precision)::value_type(engConfig.keyCachePrecision);
} else if (name == ov::hint::value_cache_precision) {
return decltype(ov::hint::value_cache_precision)::value_type(engConfig.valueCachePrecision);
} else if (name == ov::hint::key_cache_group_size) {
return decltype(ov::hint::key_cache_group_size)::value_type(engConfig.keyCacheGroupSize);
} else if (name == ov::hint::value_cache_group_size) {
return decltype(ov::hint::value_cache_group_size)::value_type(engConfig.valueCacheGroupSize);
}
return get_ro_property(name, options);
}
Expand Down Expand Up @@ -433,6 +441,10 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio
RW_property(ov::intel_cpu::sparse_weights_decompression_rate.name()),
RW_property(ov::hint::dynamic_quantization_group_size.name()),
RW_property(ov::hint::kv_cache_precision.name()),
RW_property(ov::hint::key_cache_precision.name()),
RW_property(ov::hint::value_cache_precision.name()),
RW_property(ov::hint::key_cache_group_size.name()),
RW_property(ov::hint::value_cache_group_size.name()),
};

OPENVINO_SUPPRESS_DEPRECATED_START
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable
RO_property(ov::intel_cpu::sparse_weights_decompression_rate.name()),
RO_property(ov::hint::dynamic_quantization_group_size.name()),
RO_property(ov::hint::kv_cache_precision.name()),
RO_property(ov::hint::key_cache_precision.name()),
RO_property(ov::hint::value_cache_precision.name()),
RO_property(ov::hint::key_cache_group_size.name()),
RO_property(ov::hint::value_cache_group_size.name()),
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) {
RW_property(ov::intel_cpu::sparse_weights_decompression_rate.name()),
RW_property(ov::hint::dynamic_quantization_group_size.name()),
RW_property(ov::hint::kv_cache_precision.name()),
RW_property(ov::hint::key_cache_precision.name()),
RW_property(ov::hint::value_cache_precision.name()),
RW_property(ov::hint::key_cache_group_size.name()),
RW_property(ov::hint::value_cache_group_size.name()),
};
Expand Down

0 comments on commit e56639a

Please sign in to comment.