Skip to content

Commit

Permalink
Add a way to record only the main thread on macOS.
Browse files Browse the repository at this point in the history
I was seeing too much overhead with lots of thread in a case
where I was only interested in the main thread anyway.
  • Loading branch information
mstange committed Feb 14, 2024
1 parent 6c20010 commit 48fa241
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 15 deletions.
14 changes: 9 additions & 5 deletions samply/src/mac/sampler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use mach::port::mach_port_t;

use std::mem;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::thread;
use std::time::Duration;
use std::time::SystemTime;
Expand Down Expand Up @@ -33,8 +34,8 @@ pub struct TaskInit {
pub struct Sampler {
command_name: String,
task_receiver: Receiver<TaskInit>,
recording_props: RecordingProps,
conversion_props: ConversionProps,
recording_props: Arc<RecordingProps>,
conversion_props: Arc<ConversionProps>,
}

impl Sampler {
Expand All @@ -55,8 +56,8 @@ impl Sampler {
Sampler {
command_name,
task_receiver,
recording_props,
conversion_props,
recording_props: Arc::new(recording_props),
conversion_props: Arc::new(conversion_props),
}
}

Expand Down Expand Up @@ -100,6 +101,8 @@ impl Sampler {
&self.command_name,
&mut profile,
process_recycler.as_mut(),
self.recording_props.clone(),
self.conversion_props.clone(),
)
.expect("couldn't create root TaskProfiler");

Expand Down Expand Up @@ -129,6 +132,8 @@ impl Sampler {
&self.command_name,
&mut profile,
process_recycler.as_mut(),
self.recording_props.clone(),
self.conversion_props.clone(),
) {
live_tasks.push(new_task);
} else {
Expand Down Expand Up @@ -164,7 +169,6 @@ impl Sampler {
&mut profile,
&mut stack_scratch_buffer,
&mut unresolved_stacks,
self.conversion_props.fold_recursive_prefix,
)?;
if still_alive {
live_tasks.push(task);
Expand Down
31 changes: 21 additions & 10 deletions samply/src/mac/task_profiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use std::collections::{HashMap, HashSet};
use std::mem;
use std::ops::{Deref, Range};
use std::path::{Path, PathBuf};
use std::sync::Arc;

use crate::shared::jit_category_manager::JitCategoryManager;
use crate::shared::jit_function_recycler::JitFunctionRecycler;
Expand All @@ -32,6 +33,7 @@ use crate::shared::lib_mappings::{
use crate::shared::marker_file::get_markers;
use crate::shared::perf_map::try_load_perf_map;
use crate::shared::process_sample_data::{MarkerSpanOnThread, ProcessSampleData};
use crate::shared::recording_props::{ConversionProps, RecordingProps};
use crate::shared::recycling::{ProcessRecycler, ProcessRecyclingData, ThreadRecycler};
use crate::shared::timestamp_converter::TimestampConverter;
use crate::shared::unresolved_samples::{UnresolvedSamples, UnresolvedStacks};
Expand Down Expand Up @@ -109,6 +111,8 @@ pub struct TaskProfiler {
thread_recycler: Option<ThreadRecycler>,
jit_function_recycler: Option<JitFunctionRecycler>,
timestamp_converter: TimestampConverter,
recording_props: Arc<RecordingProps>,
conversion_props: Arc<ConversionProps>,
}

impl TaskProfiler {
Expand All @@ -118,6 +122,8 @@ impl TaskProfiler {
command_name: &str,
profile: &mut Profile,
mut process_recycler: Option<&mut ProcessRecycler>,
recording_props: Arc<RecordingProps>,
conversion_props: Arc<ConversionProps>,
) -> Result<Self, SamplingError> {
let TaskInit {
start_time_mono,
Expand All @@ -141,7 +147,7 @@ impl TaskProfiler {
})
.unwrap_or_else(|| command_name.to_string());

let thread_acts = get_thread_list(task)?;
let thread_acts = get_thread_list(task, recording_props.main_thread_only)?;
if thread_acts.is_empty() {
return Err(SamplingError::Ignorable(
"No threads",
Expand Down Expand Up @@ -255,14 +261,15 @@ impl TaskProfiler {
thread_recycler,
jit_function_recycler,
timestamp_converter,
recording_props,
conversion_props,
};

task_profiler.process_lib_modifications(start_time_mono, initial_lib_mods, profile);

Ok(task_profiler)
}

#[allow(clippy::too_many_arguments)]
pub fn sample(
&mut self,
now: Timestamp,
Expand All @@ -271,7 +278,6 @@ impl TaskProfiler {
profile: &mut Profile,
stack_scratch_buffer: &mut Vec<FrameAddress>,
unresolved_stacks: &mut UnresolvedStacks,
fold_recursive_prefix: bool,
) -> Result<bool, SamplingError> {
let result = self.sample_impl(
now,
Expand All @@ -280,7 +286,6 @@ impl TaskProfiler {
profile,
stack_scratch_buffer,
unresolved_stacks,
fold_recursive_prefix,
);
match result {
Ok(()) => Ok(true),
Expand All @@ -303,7 +308,6 @@ impl TaskProfiler {
}
}

#[allow(clippy::too_many_arguments)]
fn sample_impl(
&mut self,
now: Timestamp,
Expand All @@ -312,15 +316,14 @@ impl TaskProfiler {
profile: &mut Profile,
stack_scratch_buffer: &mut Vec<FrameAddress>,
unresolved_stacks: &mut UnresolvedStacks,
fold_recursive_prefix: bool,
) -> Result<(), SamplingError> {
// First, check for any newly-loaded libraries.
if let Ok(changes) = self.lib_info_manager.check_for_changes() {
self.process_lib_modifications(now_mono, changes, profile);
}

// Enumerate threads.
let thread_acts = get_thread_list(self.task)?;
let thread_acts = get_thread_list(self.task, self.recording_props.main_thread_only)?;
let previously_live_threads: HashSet<_> = self.live_threads.keys().cloned().collect();
let mut now_live_threads = HashSet::new();
for thread_act in thread_acts {
Expand Down Expand Up @@ -368,7 +371,7 @@ impl TaskProfiler {
stack_scratch_buffer,
unresolved_stacks,
&mut self.unresolved_samples,
fold_recursive_prefix,
self.conversion_props.fold_recursive_prefix,
)?;
if still_alive {
now_live_threads.insert(thread_act);
Expand Down Expand Up @@ -677,7 +680,10 @@ fn get_debug_frame(file_path: &str) -> Option<UnwindSectionBytes> {
}
}

fn get_thread_list(task: mach_port_t) -> Result<Vec<thread_act_t>, SamplingError> {
fn get_thread_list(
task: mach_port_t,
main_thread_only: bool,
) -> Result<Vec<thread_act_t>, SamplingError> {
let mut thread_list: thread_act_port_array_t = std::ptr::null_mut();
let mut thread_count: mach_msg_type_number_t = Default::default();
unsafe { task_threads(task, &mut thread_list, &mut thread_count) }
Expand All @@ -691,7 +697,7 @@ fn get_thread_list(task: mach_port_t) -> Result<Vec<thread_act_t>, SamplingError
err => SamplingError::Ignorable("task_threads in get_thread_list", err),
})?;

let thread_acts =
let mut thread_acts =
unsafe { std::slice::from_raw_parts(thread_list, thread_count as usize) }.to_owned();

unsafe {
Expand All @@ -704,6 +710,11 @@ fn get_thread_list(task: mach_port_t) -> Result<Vec<thread_act_t>, SamplingError
.into_result()
.map_err(|err| SamplingError::Fatal("mach_vm_deallocate in get_thread_list", err))?;

if main_thread_only {
// Keep only the main thread. It's always the first thread in the list.
thread_acts.truncate(1);
}

Ok(thread_acts)
}

Expand Down
6 changes: 6 additions & 0 deletions samply/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ struct RecordArgs {
#[arg(long, default_value = "1")]
iteration_count: u32,

/// Reduce profiling overhead by only recording the main thread.
/// This option is only respected on macOS.
#[arg(long)]
main_thread_only: bool,

#[command(flatten)]
conversion_args: ConversionArgs,

Expand Down Expand Up @@ -242,6 +247,7 @@ impl RecordArgs {
output_file: self.output.clone(),
time_limit,
interval,
main_thread_only: self.main_thread_only,
}
}

Expand Down
1 change: 1 addition & 0 deletions samply/src/shared/recording_props.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pub struct RecordingProps {
pub output_file: PathBuf,
pub time_limit: Option<Duration>,
pub interval: Duration,
pub main_thread_only: bool,
}

pub struct ConversionProps {
Expand Down

0 comments on commit 48fa241

Please sign in to comment.