diff --git a/libcoz/profiler.cpp b/libcoz/profiler.cpp index 81c53a3..38d9fa5 100644 --- a/libcoz/profiler.cpp +++ b/libcoz/profiler.cpp @@ -81,7 +81,7 @@ void profiler::startup(const string& outfile, // Begin sampling in the main thread thread_state* state = add_thread(); - REQUIRE(state) << "Failed to add thread state"; + REQUIRE(state) << "Failed to add thread"; begin_sampling(state); } @@ -300,7 +300,11 @@ void profiler::shutdown() { } thread_state* profiler::add_thread() { - return _thread_states.insert(gettid()); + thread_state* inserted = _thread_states.insert(gettid()); + if (inserted != nullptr) { + _num_threads_running += 1; + } + return inserted; } thread_state* profiler::get_thread_state() { @@ -309,6 +313,7 @@ thread_state* profiler::get_thread_state() { void profiler::remove_thread() { _thread_states.remove(gettid()); + _num_threads_running -= 1; } /** @@ -318,7 +323,7 @@ void* profiler::start_thread(void* p) { thread_start_arg* arg = reinterpret_cast(p); thread_state* state = get_instance().add_thread(); - REQUIRE(state) << "Failed to add thread state"; + REQUIRE(state) << "Failed to add thread"; state->local_delay = arg->_parent_delay_time; diff --git a/libcoz/profiler.h b/libcoz/profiler.h index 0998a22..d71dbd8 100644 --- a/libcoz/profiler.h +++ b/libcoz/profiler.h @@ -132,6 +132,10 @@ class profiler { /// Force threads to catch up on delays, and stop sampling before the thread exits void handle_pthread_exit(void* result) __attribute__((noreturn)) { end_sampling(); + // If no more threads being sampled, shut down the profiler + if (_num_threads_running == 0) { + shutdown(); + } real::pthread_exit(result); abort(); // Silence g++ warning about noreturn } @@ -223,6 +227,7 @@ class profiler { spinlock _latency_points_lock; //< Spinlock that protects the latency points map static_map _thread_states; //< Map from thread IDs to thread-local state + uint64_t _num_threads_running; //< Number of threads that are currently being sampled std::atomic _experiment_active; //< Is an experiment running? std::atomic _global_delay; //< The global delay time required