From 6ca4f1b225b6568a2121bb4ce1c37b93b83d33e5 Mon Sep 17 00:00:00 2001 From: chenhu-wang Date: Thu, 9 Jan 2025 05:39:35 +0800 Subject: [PATCH] executor cache x --- cmake/features.cmake | 2 +- .../snippets/aarch64/cpu_generator.cpp | 13 +++-- .../snippets/aarch64/cpu_generator.hpp | 3 +- .../snippets/aarch64/jit_brgemm_emitter.cpp | 13 ++--- .../snippets/aarch64/jit_brgemm_emitter.hpp | 10 +--- .../aarch64/kernel_executors/brgemm.cpp | 54 +++++++++---------- .../aarch64/kernel_executors/brgemm.hpp | 33 +++++------- .../kernel_executors => }/brgemm_base.cpp | 7 ++- .../kernel_executors => }/brgemm_base.hpp | 0 .../snippets/x64/jit_brgemm_emitter.hpp | 2 +- .../snippets/x64/kernel_executors/brgemm.hpp | 2 +- .../x64/kernel_executors/brgemm_amx.hpp | 2 +- src/plugins/intel_cpu/src/emitters/utils.hpp | 23 ++++---- 13 files changed, 79 insertions(+), 85 deletions(-) rename src/plugins/intel_cpu/src/emitters/snippets/{x64/kernel_executors => }/brgemm_base.cpp (99%) rename src/plugins/intel_cpu/src/emitters/snippets/{x64/kernel_executors => }/brgemm_base.hpp (100%) diff --git a/cmake/features.cmake b/cmake/features.cmake index e5132be08707cb..19481391342895 100644 --- a/cmake/features.cmake +++ b/cmake/features.cmake @@ -52,7 +52,7 @@ ov_dependent_option (ENABLE_GPU_DEBUG_CAPS "enable GPU debug capabilities at run ov_dependent_option (ENABLE_CPU_DEBUG_CAPS "enable CPU debug capabilities at runtime" ON "ENABLE_DEBUG_CAPS;ENABLE_INTEL_CPU" OFF) ov_dependent_option (ENABLE_SNIPPETS_DEBUG_CAPS "enable Snippets debug capabilities at runtime" ON "ENABLE_DEBUG_CAPS" OFF) -ov_dependent_option (ENABLE_SNIPPETS_LIBXSMM_TPP "allow Snippets to use LIBXSMM Tensor Processing Primitives" OFF "ENABLE_INTEL_CPU AND X86_64" OFF) +ov_dependent_option (ENABLE_SNIPPETS_LIBXSMM_TPP "allow Snippets to use LIBXSMM Tensor Processing Primitives" OFF "ENABLE_INTEL_CPU" OFF) ov_option (ENABLE_PROFILING_ITT "Build with ITT tracing. Optionally configure pre-built ittnotify library though INTEL_VTUNE_DIR variable." OFF) diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp index cdc768f5d4e1cc..27359974faf4ae 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -10,6 +10,7 @@ #include "emitters/snippets/aarch64/jit_kernel_emitter.hpp" #include "emitters/snippets/aarch64/jit_loop_emitters.hpp" #include "emitters/snippets/aarch64/jit_memory_emitters.hpp" +#include "emitters/snippets/aarch64/jit_brgemm_emitter.hpp" #include "emitters/snippets/cpu_runtime_configurator.hpp" #include "emitters/utils.hpp" #include "jit_snippets_emitters.hpp" @@ -23,13 +24,15 @@ #include "snippets/snippets_isa.hpp" #include "transformations/cpu_opset/common/op/swish_cpu.hpp" #include "transformations/snippets/common/op/fused_mul_add.hpp" +#include "transformations/tpp/x64/op/brgemm.hpp" +#include "emitters/snippets/cpu_kernel_executor_table.hpp" namespace ov { -#define CREATE_SNIPPETS_EMITTER(e_type) \ +#define CREATE_SNIPPETS_EMITTER(e_type, ...) \ { \ [this](const snippets::lowered::ExpressionPtr& expr) -> std::shared_ptr { \ - return std::make_shared(h.get(), isa, expr); \ + return std::make_shared(h.get(), isa, expr, ##__VA_ARGS__); \ }, \ [](const std::shared_ptr& n) -> std::set> { \ return e_type::get_supported_precisions(n); \ @@ -202,6 +205,10 @@ CPUTargetMachine::CPUTargetMachine(dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, jitters[ov::intel_cpu::SwishNode::get_type_info_static()] = CREATE_CPU_EMITTER(jit_swish_emitter); jitters[ov::op::v0::Tanh::get_type_info_static()] = CREATE_CPU_EMITTER(jit_tanh_emitter); + // brgemm + jitters[ov::intel_cpu::tpp::op::BrgemmTPP::get_type_info_static()] = + CREATE_SNIPPETS_EMITTER(jit_brgemm_emitter, configurator->get_kernel_executor_table(), compiled_kernel_cache); + // control flow jitters[snippets::op::KernelStatic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_kernel_static_emitter); jitters[snippets::op::KernelDynamic::get_type_info_static()] = CREATE_SNIPPETS_EMITTER(jit_kernel_dynamic_emitter); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.hpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.hpp index 90c2662e33d070..c319f6c3ec7b07 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/cpu_generator.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -8,6 +8,7 @@ #include "cpu/aarch64/jit_generator.hpp" #include "snippets/generator.hpp" #include "snippets/target_machine.hpp" +#include "cache/multi_cache.h" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.cpp index 37a56b90745743..5d705a755ca492 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.cpp @@ -4,12 +4,7 @@ #include "jit_brgemm_emitter.hpp" -#include "emitters/plugin/x64/utils.hpp" -#include "emitters/snippets/x64/kernel_executors/brgemm.hpp" -#include "emitters/snippets/x64/kernel_executors/brgemm_amx.hpp" #include "snippets/utils/utils.hpp" -#include "transformations/snippets/x64/op/brgemm_cpu.hpp" -#include "transformations/snippets/x64/op/brgemm_utils.hpp" #include "transformations/tpp/x64/op/brgemm.hpp" using namespace Xbyak_aarch64; @@ -33,13 +28,12 @@ jit_brgemm_emitter::jit_brgemm_emitter(jit_generator* h, const auto& brg0Prc = brgemm_node->get_input_element_type(0); const auto& brg1Prc = brgemm_node->get_input_element_type(1); BrgemmKernelConfig kernel_config(brg0Prc, brg1Prc, isa); - m_kernel_executor = - kernel_table->register_kernel(expr, compiled_kernel_cache, kernel_config); + m_kernel_executor = kernel_table->register_kernel(expr, compiled_kernel_cache, kernel_config); } std::set> jit_brgemm_emitter::get_supported_precisions( const std::shared_ptr& node) { - // Note: Brgemm currently supports only fp32 + // Note: Brgemm currently supports only fp32 on arm return {{element::f32, element::f32}}; } @@ -48,7 +42,7 @@ void jit_brgemm_emitter::validate_arguments(const std::vector& in, const OV_CPU_JIT_EMITTER_ASSERT(out.size() == 1, "Expects 1 output reg, got" + std::to_string(out.size())); } -void jit_brgemm_emitter::emit_code(const std::vector &in, const std::vector &out) const { +void jit_brgemm_emitter::emit_code(const std::vector& in, const std::vector& out) const { validate_arguments(in, out); emit_impl(in, out); } @@ -60,7 +54,6 @@ void jit_brgemm_emitter::emit_impl(const std::vector& in, const std::vec Xbyak_aarch64::XReg func_reg(9); h->mov(func_reg, get_execute_function_ptr()); - Xbyak_aarch64::XReg x0(0); Xbyak_aarch64::XReg x1(1); Xbyak_aarch64::XReg x2(2); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.hpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.hpp index 1191da4d301bb9..83e46631ac8030 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_brgemm_emitter.hpp @@ -20,14 +20,13 @@ class jit_brgemm_emitter : public jit_emitter { const ov::intel_cpu::MultiCacheWeakPtr& compiled_kernel_cache); size_t get_inputs_count() const override { - return m_memory_offsets.size() - 1; + return 2; } static std::set> get_supported_precisions( const std::shared_ptr& node = nullptr); - void emit_code(const std::vector &in, - const std::vector &out) const; + void emit_code(const std::vector& in, const std::vector& out) const; private: void validate_arguments(const std::vector& in, const std::vector& out) const override; @@ -36,11 +35,6 @@ class jit_brgemm_emitter : public jit_emitter { const uintptr_t get_execute_function_ptr() const; const uintptr_t get_compiled_kernel_ptr() const; - // Note: offsets order: A, B, C (+ scratchpad, if needed). Values can be dynamic_value if offset is calculated in - // runtime - std::vector m_memory_offsets{}; - // Note: cluster ids order: A, B, C (+ scratchpad, if needed). Values can be dynamic_value if there is no buffer - std::vector m_buffer_ids{}; std::shared_ptr m_kernel_executor = nullptr; }; diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.cpp index 7e2a50e527d767..05d8fc4c5939ff 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.cpp @@ -3,13 +3,11 @@ // #include "brgemm.hpp" + #include "transformations/tpp/x64/op/brgemm.hpp" using namespace Xbyak; using namespace dnnl::impl; -using namespace dnnl::impl::cpu::x64; - -#define HASH(X) seed = hash_combine(seed, X) namespace ov { namespace intel_cpu { @@ -28,45 +26,37 @@ BrgemmKernelConfig::BrgemmKernelConfig(const element::Type& in0_dtype, const element::Type& in1_dtype, dnnl::impl::cpu::aarch64::cpu_isa_t primitive_isa) : BrgemmBaseKernelConfig(), - m_static_params( - std::make_shared(in0_dtype, in1_dtype, primitive_isa)) { + m_static_params(std::make_shared(in0_dtype, in1_dtype, primitive_isa)) { m_hash = compute_hash(); } BrgemmKernelConfig::StaticParams::StaticParams(const element::Type& in0_dtype, const element::Type& in1_dtype, dnnl::impl::cpu::aarch64::cpu_isa_t primitive_isa) - : StaticBaseParams(in0_dtype, in1_dtype, dnnl::impl::cpu::x64::cpu_isa_t::isa_undef, compute_hash(primitive_isa)), - m_prefetching_flags(false), - isa(primitive_isa) { - m_type_in0 = ov_to_xsmm_dtype(in0_dtype); - m_type_in1 = ov_to_xsmm_dtype(in1_dtype); - m_type_exec = LIBXSMM_DATATYPE_F32; - m_type_out0 = LIBXSMM_DATATYPE_F32; - m_compile_flags = LIBXSMM_GEMM_FLAGS('N', 'N'); - } + : StaticBaseParams(in0_dtype, in1_dtype, dnnl::impl::cpu::x64::cpu_isa_t::isa_undef, compute_hash(primitive_isa)) { + m_type_in0 = ov_to_xsmm_dtype(in0_dtype); + m_type_in1 = ov_to_xsmm_dtype(in1_dtype); + m_type_exec = LIBXSMM_DATATYPE_F32; + m_type_out0 = LIBXSMM_DATATYPE_F32; + m_compile_flags = LIBXSMM_GEMM_FLAGS('N', 'N'); + m_prefetching_flags = false; + isa = primitive_isa; +} size_t BrgemmKernelConfig::StaticParams::compute_hash(dnnl::impl::cpu::aarch64::cpu_isa_t aarch_isa) { return hash_combine(0, aarch_isa); } bool BrgemmKernelConfig::StaticParams::operator==(const StaticParams& rhs) const { - return StaticBaseParams::operator==(rhs) && - isa == rhs.isa && - m_type_in0 == rhs.m_type_in0 && - m_type_in1 == rhs.m_type_in1 && - m_type_exec == rhs.m_type_exec && - m_type_out0 == rhs.m_type_out0 && - m_compile_flags == rhs.m_compile_flags && - m_prefetching_flags == rhs.m_prefetching_flags; + return StaticBaseParams::operator==(rhs) && isa == rhs.isa && m_type_in0 == rhs.m_type_in0 && + m_type_in1 == rhs.m_type_in1 && m_type_exec == rhs.m_type_exec && m_type_out0 == rhs.m_type_out0 && + m_compile_flags == rhs.m_compile_flags && m_prefetching_flags == rhs.m_prefetching_flags; } -BrgemmKernelExecutor::BrgemmKernelExecutor(ov::intel_cpu::MultiCacheWeakPtr kernel_cache, - BrgemmKernelConfig config) +BrgemmKernelExecutor::BrgemmKernelExecutor(ov::intel_cpu::MultiCacheWeakPtr kernel_cache, BrgemmKernelConfig config) : CPUKernelExecutor(std::move(kernel_cache), std::move(config)) {} -std::shared_ptr BrgemmKernelExecutor::compile_kernel( - const BrgemmKernelConfig& config) const { +std::shared_ptr BrgemmKernelExecutor::compile_kernel(const BrgemmKernelConfig& config) const { std::shared_ptr compiled_kernel = std::make_shared(); // Brgemm is not executable - nothing to compile @@ -84,8 +74,8 @@ std::shared_ptr BrgemmKernelExecutor::compile_kernel( config.get_type_out0(), config.get_type_exec()); const auto& compile_flag = config.get_compile_flags(); - auto refreshed_compile_flag = config.get_beta() == 0 ? config.get_compile_flags() | LIBXSMM_GEMM_FLAG_BETA_0 : - compile_flag; + auto refreshed_compile_flag = + config.get_beta() == 0 ? config.get_compile_flags() | LIBXSMM_GEMM_FLAG_BETA_0 : compile_flag; compiled_kernel->brgemm_kernel = std::make_shared(COMPILE_BRGEMM_TPP_KERNEL( libxsmm_dispatch_gemm(m_shape, refreshed_compile_flag, config.get_prefetching_flags()))); @@ -119,7 +109,13 @@ void BrgemmKernelExecutor::update_config(const ov::snippets::lowered::Expression replace_full_dim(tpp_mod->get_output_stride(i), expr->get_output_port_descriptor(i)->get_shape().back()); } - config.update(config.get_M(), config.get_N(), config.get_K(), io_strides[0], io_strides[1], io_strides[2], config.get_beta()); + config.update(config.get_M(), + config.get_N(), + config.get_K(), + io_strides[0], + io_strides[1], + io_strides[2], + config.get_beta()); // update compile flag, which is depend on beta. should be part of hash. config.set_compile_flags(config.get_beta() == 0); } diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.hpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.hpp index 57af0225dd9e85..aa54e57cc178df 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/kernel_executors/brgemm.hpp @@ -4,18 +4,10 @@ #pragma once -#include -#include "libxsmm.h" - #include "cpu/aarch64/cpu_isa_traits.hpp" -#include "emitters/plugin/aarch64/jit_emitter.hpp" -#include "emitters/snippets/cpu_kernel_executor_table.hpp" -// #include "emitters/snippets/jit_snippets_call_args.hpp" -// #include "openvino/core/type/element_type.hpp" -// #include "snippets/lowered/loop_info.hpp" -// #include "snippets/lowered/loop_manager.hpp" +#include "emitters/snippets/brgemm_base.hpp" #include "emitters/utils.hpp" -#include "emitters/snippets/x64/kernel_executors/brgemm_base.hpp" +#include "libxsmm.h" namespace ov { namespace intel_cpu { @@ -23,9 +15,10 @@ namespace aarch64 { struct BrgemmKernelConfig : public BrgemmBaseKernelConfig { public: - BrgemmKernelConfig(const element::Type& in0_dtype, - const element::Type& in1_dtype, - dnnl::impl::cpu::aarch64::cpu_isa_t primitive_isa = dnnl::impl::cpu::aarch64::cpu_isa_t::isa_undef); + BrgemmKernelConfig( + const element::Type& in0_dtype, + const element::Type& in1_dtype, + dnnl::impl::cpu::aarch64::cpu_isa_t primitive_isa = dnnl::impl::cpu::aarch64::cpu_isa_t::isa_undef); BrgemmKernelConfig() = delete; std::unique_ptr get_clone_ptr() const override { @@ -71,7 +64,7 @@ struct BrgemmKernelConfig : public BrgemmBaseKernelConfig { } private: - struct StaticParams : public StaticBaseParams{ + struct StaticParams : public StaticBaseParams { StaticParams(const element::Type& in0_dtype, const element::Type& in1_dtype, dnnl::impl::cpu::aarch64::cpu_isa_t primitive_isa); @@ -83,19 +76,19 @@ struct BrgemmKernelConfig : public BrgemmBaseKernelConfig { } size_t compute_hash(dnnl::impl::cpu::aarch64::cpu_isa_t aarch_isa); - dnnl::impl::cpu::aarch64::cpu_isa_t isa{dnnl::impl::cpu::aarch64::isa_undef}; + dnnl::impl::cpu::aarch64::cpu_isa_t isa; libxsmm_datatype m_type_in0; libxsmm_datatype m_type_in1; libxsmm_datatype m_type_out0; libxsmm_datatype m_type_exec; - libxsmm_bitfield m_compile_flags {0}; - const bool m_prefetching_flags{false}; + libxsmm_bitfield m_compile_flags; + bool m_prefetching_flags; }; std::shared_ptr get_static_params() const override { return m_static_params; } - libxsmm_bitfield m_compile_flags {0}; + libxsmm_bitfield m_compile_flags{0}; std::shared_ptr m_static_params{nullptr}; }; @@ -113,11 +106,11 @@ class BrgemmKernelExecutor : public BrgemmBaseKernelExecutor, BrgemmKernelExecutor(ov::intel_cpu::MultiCacheWeakPtr kernel_cache, BrgemmKernelConfig config); virtual ~BrgemmKernelExecutor() = default; - /** Function that will be called in runtime to execute the kernel */ + // Function that will be called in runtime to execute the kernel static void execute(const BrgemmKernelExecutor* executor, void* in0, void* in1, void* out0); private: - std::shared_ptr compile_kernel(const BrgemmKernelConfig& c) const; + std::shared_ptr compile_kernel(const BrgemmKernelConfig& c) const override; void update_config(const ov::snippets::lowered::ExpressionPtr& expr, const ov::snippets::lowered::LinearIRCPtr& linear_ir, diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_base.cpp b/src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.cpp similarity index 99% rename from src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_base.cpp rename to src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.cpp index 008237780de3f6..47225756c24a60 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_base.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.cpp @@ -252,6 +252,11 @@ void BrgemmBaseKernelExecutor::update_config(const ov::snippets::lowered::Expres beta = get_beta(loop_manager, static_cast(loop_ids.back()), current_expanded_loop_info); } +#ifndef OPENVINO_ARCH_X86_64 + config.update(DIM_CAST(M), DIM_CAST(N), DIM_CAST(K), 0, 0, 0, beta); + return; +#endif + const auto LDA = DIM_CAST(snippets::utils::get_dim_stride(expr->get_input_port(0))); const auto LDC = DIM_CAST(snippets::utils::get_dim_stride(expr->get_output_port(0))); auto LDB = DIM_CAST(snippets::utils::get_dim_stride(expr->get_input_port(1))); @@ -261,7 +266,6 @@ void BrgemmBaseKernelExecutor::update_config(const ov::snippets::lowered::Expres // In case of data repacking LDB is chosen in accordance with repacking buffer size if (with_repacking(brgemm_node->get_type())) LDB = DIM_CAST(brgemm_utils::repacking::compute_LDB(LDB, brgemm_node->get_input_element_type(1))); - config.update(DIM_CAST(M), DIM_CAST(N), DIM_CAST(K), LDA, LDB, LDC, beta); } @@ -327,6 +331,7 @@ void BrgemmBaseKernelExecutor::execute_brgemm_kernel( brgemm_p.do_post_ops = with_comp; brgemm_p.do_apply_comp = with_comp; brgemm_p.skip_accm = 0; + brgemm_p.BS = 1; // default value OV_CPU_JIT_EMITTER_ASSERT(kernel, "has nullptr Brgemm kernel"); (*kernel)(&brgemm_p); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_base.hpp b/src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.hpp similarity index 100% rename from src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_base.hpp rename to src/plugins/intel_cpu/src/emitters/snippets/brgemm_base.hpp diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_emitter.hpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_emitter.hpp index 9d072065c0fe52..f1a41adfb259d3 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_emitter.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_brgemm_emitter.hpp @@ -5,7 +5,7 @@ #pragma once #include "emitters/plugin/x64/jit_emitter.hpp" -#include "emitters/snippets/x64/kernel_executors/brgemm_base.hpp" +#include "emitters/snippets/brgemm_base.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp index 9cc17049c4d3ae..651a9704c47b05 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm.hpp @@ -4,7 +4,7 @@ #pragma once -#include "brgemm_base.hpp" +#include "emitters/snippets/brgemm_base.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_amx.hpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_amx.hpp index 733295ec995583..7975b9de452e93 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_amx.hpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/kernel_executors/brgemm_amx.hpp @@ -7,7 +7,7 @@ #include #include -#include "brgemm_base.hpp" +#include "emitters/snippets/brgemm_base.hpp" #include "emitters/plugin/x64/jit_emitter.hpp" #include "emitters/snippets/cpu_kernel_executor_table.hpp" #include "emitters/snippets/jit_snippets_call_args.hpp" diff --git a/src/plugins/intel_cpu/src/emitters/utils.hpp b/src/plugins/intel_cpu/src/emitters/utils.hpp index 3d5b6471fad24d..e7ba602359c604 100644 --- a/src/plugins/intel_cpu/src/emitters/utils.hpp +++ b/src/plugins/intel_cpu/src/emitters/utils.hpp @@ -6,9 +6,9 @@ #include +#include "libxsmm.h" #include "openvino/core/except.hpp" #include "openvino/core/type/element_type.hpp" -#include "libxsmm.h" namespace ov { namespace intel_cpu { @@ -26,14 +26,19 @@ std::string jit_emitter_pretty_name(const std::string& pretty_func); inline libxsmm_datatype ov_to_xsmm_dtype(ov::element::Type_t elemet_type) { switch (elemet_type) { - case ov::element::Type_t::f32 : return LIBXSMM_DATATYPE_F32; - case ov::element::Type_t::bf16 : return LIBXSMM_DATATYPE_BF16; - case ov::element::Type_t::f16 : return LIBXSMM_DATATYPE_F16; - case ov::element::Type_t::i8 : return LIBXSMM_DATATYPE_I8; - case ov::element::Type_t::u8 : return LIBXSMM_DATATYPE_U8; - default: - OV_CPU_JIT_EMITTER_THROW("Attempt to convert unsupported ov data type"); - return LIBXSMM_DATATYPE_IMPLICIT; + case ov::element::Type_t::f32: + return LIBXSMM_DATATYPE_F32; + case ov::element::Type_t::bf16: + return LIBXSMM_DATATYPE_BF16; + case ov::element::Type_t::f16: + return LIBXSMM_DATATYPE_F16; + case ov::element::Type_t::i8: + return LIBXSMM_DATATYPE_I8; + case ov::element::Type_t::u8: + return LIBXSMM_DATATYPE_U8; + default: + OV_CPU_JIT_EMITTER_THROW("Attempt to convert unsupported ov data type"); + return LIBXSMM_DATATYPE_IMPLICIT; } }