From ad9e324a89901ba8a35c1c9af339fdca28851d09 Mon Sep 17 00:00:00 2001 From: Ilya Isaev Date: Wed, 21 Aug 2024 17:25:03 +0200 Subject: [PATCH] Make thread registration in local_wait_for_all (#1477) local_wait_for_all function seems to be a common spot for both external and worker threads to perform thread registration. So, basically whole implementation is to simply call register_thread only on first thread enter to local_wait_for_all and unregister_thread on last thread exit. Signed-off-by: Isaev, Ilya --- src/tbb/arena.cpp | 9 ++++----- src/tbb/task_dispatcher.h | 15 +++++++++++++-- src/tbb/tcm.h | 5 +++-- src/tbb/tcm_adaptor.cpp | 10 ++++++++-- src/tbb/thread_data.h | 5 ++++- 5 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/tbb/arena.cpp b/src/tbb/arena.cpp index a7985d826a..6ca062d02f 100644 --- a/src/tbb/arena.cpp +++ b/src/tbb/arena.cpp @@ -195,8 +195,6 @@ void arena::process(thread_data& tls) { return; } - my_tc_client.get_pm_client()->register_thread(); - __TBB_ASSERT( index >= my_num_reserved_slots, "Workers cannot occupy reserved slots" ); tls.attach_arena(*this, index); // worker thread enters the dispatch loop to look for a work @@ -236,8 +234,6 @@ void arena::process(thread_data& tls) { __TBB_ASSERT(tls.my_inbox.is_idle_state(true), nullptr); __TBB_ASSERT(is_alive(my_guard), nullptr); - my_tc_client.get_pm_client()->unregister_thread(); - // In contrast to earlier versions of TBB (before 3.0 U5) now it is possible // that arena may be temporarily left unpopulated by threads. See comments in // arena::on_thread_leaving() for more details. @@ -646,9 +642,11 @@ class nested_arena_context : no_copy { m_orig_arena = td.my_arena; m_orig_slot_index = td.my_arena_index; m_orig_last_observer = td.my_last_observer; + m_orig_is_thread_registered = td.my_is_registered; td.detach_task_dispatcher(); td.attach_arena(nested_arena, slot_index); + td.my_is_registered = false; if (td.my_inbox.is_idle_state(true)) td.my_inbox.set_is_idle(false); task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher(); @@ -699,7 +697,7 @@ class nested_arena_context : no_copy { td.leave_task_dispatcher(); td.my_arena_slot->release(); td.my_arena->my_exit_monitors.notify_one(); // do not relax! - + td.my_is_registered = m_orig_is_thread_registered; td.attach_arena(*m_orig_arena, m_orig_slot_index); td.attach_task_dispatcher(*m_orig_execute_data_ext.task_disp); __TBB_ASSERT(td.my_inbox.is_idle_state(false), nullptr); @@ -715,6 +713,7 @@ class nested_arena_context : no_copy { unsigned m_orig_slot_index{}; bool m_orig_fifo_tasks_allowed{}; bool m_orig_critical_task_allowed{}; + bool m_orig_is_thread_registered{}; }; class delegated_task : public d1::task { diff --git a/src/tbb/task_dispatcher.h b/src/tbb/task_dispatcher.h index 20c7c731a7..c818934e5a 100644 --- a/src/tbb/task_dispatcher.h +++ b/src/tbb/task_dispatcher.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2020-2023 Intel Corporation + Copyright (c) 2020-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -249,15 +249,21 @@ d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter ) { task_dispatcher& task_disp; execution_data_ext old_execute_data_ext; properties old_properties; + bool is_initially_registered; ~dispatch_loop_guard() { task_disp.m_execute_data_ext = old_execute_data_ext; task_disp.m_properties = old_properties; + if (!is_initially_registered) { + task_disp.m_thread_data->my_arena->my_tc_client.get_pm_client()->unregister_thread(); + task_disp.m_thread_data->my_is_registered = false; + } + __TBB_ASSERT(task_disp.m_thread_data && governor::is_thread_data_set(task_disp.m_thread_data), nullptr); __TBB_ASSERT(task_disp.m_thread_data->my_task_dispatcher == &task_disp, nullptr); } - } dl_guard{ *this, m_execute_data_ext, m_properties }; + } dl_guard{ *this, m_execute_data_ext, m_properties, m_thread_data->my_is_registered }; // The context guard to track fp setting and itt tasks. context_guard_helper context_guard; @@ -282,6 +288,11 @@ d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter ) { m_properties.outermost = false; m_properties.fifo_tasks_allowed = false; + if (!dl_guard.is_initially_registered) { + m_thread_data->my_arena->my_tc_client.get_pm_client()->register_thread(); + m_thread_data->my_is_registered = true; + } + t = get_critical_task(t, ed, isolation, critical_allowed); if (t && m_thread_data->my_inbox.is_idle_state(true)) { // The thread has a work to do. Therefore, marking its inbox as not idle so that diff --git a/src/tbb/tcm.h b/src/tbb/tcm.h index 05fe0434eb..66ee18a2f0 100644 --- a/src/tbb/tcm.h +++ b/src/tbb/tcm.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2023 Intel Corporation + Copyright (c) 2023-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -50,7 +50,8 @@ typedef struct _tcm_permit_flags_t { uint32_t stale : 1; uint32_t rigid_concurrency : 1; uint32_t exclusive : 1; - uint32_t reserved : 29; + uint32_t request_as_inactive : 1; + uint32_t reserved : 28; } tcm_permit_flags_t; typedef struct _tcm_callback_flags_t { diff --git a/src/tbb/tcm_adaptor.cpp b/src/tbb/tcm_adaptor.cpp index e20ebb831d..85ca125b4e 100644 --- a/src/tbb/tcm_adaptor.cpp +++ b/src/tbb/tcm_adaptor.cpp @@ -170,7 +170,7 @@ class tcm_client : public pm_client { __TBB_ASSERT_EX(res == TCM_RESULT_SUCCESS, nullptr); } - void init(d1::constraints& constraints) { + void init(tcm_client_id_t client_id, d1::constraints& constraints) { __TBB_ASSERT(tcm_request_permit, nullptr); __TBB_ASSERT(tcm_deactivate_permit, nullptr); @@ -190,6 +190,12 @@ class tcm_client : public pm_client { my_permit_request.min_sw_threads = 0; my_permit_request.max_sw_threads = 0; + my_permit_request.flags.request_as_inactive = 1; + + tcm_result_t res = tcm_request_permit(client_id, my_permit_request, this, &my_permit_handle, nullptr); + __TBB_ASSERT_EX(res == TCM_RESULT_SUCCESS, nullptr); + + my_permit_request.flags.request_as_inactive = 0; } void register_thread() override { @@ -279,7 +285,7 @@ pm_client* tcm_adaptor::create_client(arena& a) { } void tcm_adaptor::register_client(pm_client* c, d1::constraints& constraints) { - static_cast(c)->init(constraints); + static_cast(c)->init(my_impl->client_id, constraints); } void tcm_adaptor::unregister_and_destroy_client(pm_client& c) { diff --git a/src/tbb/thread_data.h b/src/tbb/thread_data.h index 9dfa492a72..422ec694ec 100644 --- a/src/tbb/thread_data.h +++ b/src/tbb/thread_data.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2020-2023 Intel Corporation + Copyright (c) 2020-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -101,6 +101,7 @@ class thread_data : public ::rml::job thread_data(unsigned short index, bool is_worker) : my_arena_index{ index } , my_is_worker{ is_worker } + , my_is_registered { false } , my_task_dispatcher{ nullptr } , my_arena{ nullptr } , my_last_client{ nullptr } @@ -145,6 +146,8 @@ class thread_data : public ::rml::job //! Indicates if the thread is created by RML const bool my_is_worker; + bool my_is_registered; + //! The current task dipsatcher task_dispatcher* my_task_dispatcher;