Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#Issue 27715: Implement RegPrints Class for ARM64 SIMD Platforms #27829

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
amanmogal marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#ifndef DEBUG_CAPABILITIES_HPP
#define DEBUG_CAPABILITIES_HPP

#include <iostream>
#include <cstdint>
#include <arm_neon.h> // For SIMD support
#include "openvino/util/ov_string_utils.hpp" // For ov::util::join

class RegPrints {
public:
static void print_gpr(jit_generator &gen, const uint64_t &reg_value, const char *reg_name) {
// Emit JIT code to print general-purpose register during runtime
gen.mov(gen.rdi, reg_value); // Move register value into rdi
gen.mov(gen.rsi, reinterpret_cast<uint64_t>(reg_name)); // Pass register name as argument
gen.call(reinterpret_cast<void(*)(const char*, uint64_t)>(print_runtime_gpr)); // Call runtime function
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The instruction call is missed on aarch64. Please take a look at the instruction blr. It calls a subroutine at an address in a register.

The registers rdi and rsi are also missed on aarch64. This naming is used on x64 platforms.

Also, as mentioned in the issue, the perfect example for this task is the implementation of jit_power_static_emitter which generates code to call std::powf in runtime. Please take a look at this impl.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By the way, could you please share the output of test of some Eltwise op with RegPrints using?
For example, you can launch tests for Add op: ./bin/[platform]/[build_type]/ov_cpu_func_tests --gtest_filter="*smoke*Eltwise*Add*"?

}

static void print_simd(jit_generator &gen, const float32x4_t &reg_value, const char *reg_name) {
// Emit JIT code to handle SIMD printing during runtime
gen.mov(gen.rdi, reinterpret_cast<uint64_t>(&reg_value)); // Move SIMD value into rdi
gen.mov(gen.rsi, reinterpret_cast<uint64_t>(reg_name)); // Pass register name as argument
gen.call(reinterpret_cast<void(*)(const char*, const float*)>(print_runtime_simd)); // Call runtime function
}

private:
// Runtime functions to print the registers
static void print_runtime_gpr(const char *reg_name, uint64_t value) {
std::cout << "Register " << reg_name << ": " << std::hex << value << std::endl;
}

static void print_runtime_simd(const char *reg_name, const float *values) {
std::cout << "SIMD Register " << reg_name << ": ["
<< values[0] << ", " << values[1] << ", " << values[2] << ", " << values[3] << "]" << std::endl;
}
};

#endif // DEBUG_CAPABILITIES_HPP
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,18 @@ void jit_uni_eltwise_generic<isa>::generate() {
if (jep.use_runtime_ptrs) {
for (size_t i = 0; i < jep.inputs_number; i++) {
ldr(start_to_offsets, ptr(reg_const_params, static_cast<int32_t>(offsetof(node::jit_eltwise_call_args_ptrs, src_offsets) + i * sizeof(size_t))));
RegPrints::print_gpr(start_to_offsets.getIdx(), "start_to_offsets");
ldr(get_src_reg(i), ptr(reg_const_params, static_cast<int32_t>(offsetof(node::jit_eltwise_call_args_ptrs, src_ptr[0]) + i * sizeof(size_t))));
RegPrints::print_gpr(get_src_reg(i).getIdx(), "src_ptr");
XReg offset_reg = get_aux_gpr(0); // X_TMP_0;
XReg index_reg = get_aux_gpr(1); // X_TMP_1;
for (int j = 0; j < offset_count; j++) {
ldr(offset_reg, ptr(start_to_offsets, static_cast<int32_t>(j * sizeof(size_t))));
RegPrints::print_gpr(offset_reg.getIdx(), "offset_reg");
ldr(index_reg, ptr(reg_indexes, static_cast<int32_t>(j * sizeof(size_t))));
RegPrints::print_gpr(index_reg.getIdx(), "index_reg");
madd(get_src_reg(i), offset_reg, index_reg, get_src_reg(i));
RegPrints::print_gpr(get_src_reg(i).getIdx(), "effective_address");
}
}

Expand Down
Loading