diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d8e769..6aa23e2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,11 +69,14 @@ find_package(glad CONFIG REQUIRED) # Dependency: nativefiledialog find_package(unofficial-nativefiledialog CONFIG REQUIRED) +# OpenMP +find_package(OpenMP) + ### Add source tree add_subdirectory(src) target_link_libraries(${PROJECT_NAME} PRIVATE - glad::glad glfw imgui::imgui glm spdlog::spdlog imgui_glfw_gl3_backend stb_image util unofficial::nativefiledialog::nfd) + glad::glad glfw imgui::imgui glm spdlog::spdlog imgui_glfw_gl3_backend stb_image util unofficial::nativefiledialog::nfd OpenMP::OpenMP_CXX) ### Add externals directory for additional externals add_subdirectory(external) diff --git a/src/renderer.cpp b/src/renderer.cpp index 2bc0d56..6f9a6f5 100644 --- a/src/renderer.cpp +++ b/src/renderer.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -195,6 +196,38 @@ void Renderer::change_scale(float scale_mult) { is_screen_quad_updated = false; } +template +void Renderer::threshold_body(s32& i, f32 val) { + if (val == 0.0) + return; + + if (-0.0001 <= val && val <= 0.0001) + return; + + if (four_byte_stride) { + for (const auto& range : float_ranges) { + if (range.enabled && range.start <= val && val <= range.end) { + m_texture_data[i / 4] = range.color; + + break; + } + } + } else { + for (const auto& range : float_ranges) { + if (range.enabled && range.start <= val && val <= range.end) { + m_texture_data[i + 0] = range.color; + m_texture_data[i + 1] = range.color; + m_texture_data[i + 2] = range.color; + m_texture_data[i + 3] = range.color; + + if constexpr (UnalignedFloats) + i += 3; // and loop statement will increase i by 1 more + break; + } + } + } +} + void Renderer::update_texture() { auto data_size = m_data.size(); auto tex_data_offset = m_texture_data_offset; @@ -228,57 +261,27 @@ void Renderer::update_texture() { } } - auto threshold_body = [&](s32& i, f32 val) { - if (val == 0.0) - return; - - u32 color{}; - - if (-0.0001 <= val && val <= 0.0001) - return; - - if (four_byte_stride) { - for (const auto& range : float_ranges) { - if (range.enabled && range.start <= val && val <= range.end) { - m_texture_data[i / 4] = range.color; - - break; - } - } - } else { - for (const auto& range : float_ranges) { - if (range.enabled && range.start <= val && val <= range.end) { - m_texture_data[i + 0] = range.color; - m_texture_data[i + 1] = range.color; - m_texture_data[i + 2] = range.color; - m_texture_data[i + 3] = range.color; - - if (m_unaligned_floats) - i += 3; // and loop statement will increase i by 1 more - break; - } - } - } - }; - if (m_unaligned_floats) { +#pragma omp parallel for schedule(static) for (s32 i = 0; i < max_data_size; ++i) { const size_t offset = m_texture_data_offset + i; const f32 val = *((f32*)&m_data[offset]); - threshold_body(i, val); + threshold_body(i, val); } } else { +#pragma omp parallel for for (s32 i = 0; i < max_data_size; i += 4) { const size_t offset = ((m_texture_data_offset + i) / 4) * 4; // Align to 4 const f32 val = *((f32*)&m_data[offset]); - threshold_body(i, val); + threshold_body(i, val); } } break; } case DrawMode::Paletted: { +#pragma omp parallel for for (s32 i = 0; i < max_data_size; i++) { const u8 val = m_data[size_t(m_texture_data_offset + i)]; m_texture_data[i] = palette_colors[val]; @@ -287,6 +290,7 @@ void Renderer::update_texture() { break; } case DrawMode::RGBA: { +#pragma omp parallel for for (s32 i = 0; i < max_data_size; i += 4) { const u32 val = *((u32*)&m_data[m_texture_data_offset + i]); const u8 r = (val >> 0) & 0xFF; diff --git a/src/renderer.hpp b/src/renderer.hpp index b551599..8b956ca 100644 --- a/src/renderer.hpp +++ b/src/renderer.hpp @@ -39,6 +39,9 @@ class Renderer { void palette_rainbow(); void palette_golden_angle(); + template + void threshold_body(s32& i, f32 val); + bool is_inited{}; glm::ivec2 m_texture_size{};