diff --git a/samply/src/linux/profiler.rs b/samply/src/linux/profiler.rs index a9818405..de8ee4df 100644 --- a/samply/src/linux/profiler.rs +++ b/samply/src/linux/profiler.rs @@ -19,7 +19,9 @@ use super::perf_event::EventSource; use super::perf_group::{AttachMode, PerfGroup}; use super::proc_maps; use super::process::SuspendedLaunchedProcess; -use crate::linux_shared::{ConvertRegs, Converter, EventInterpretation, MmapRangeOrVec}; +use crate::linux_shared::{ + ConvertRegs, Converter, EventInterpretation, MmapRangeOrVec, OffCpuIndicator, +}; use crate::server::{start_server_main, ServerProps}; use crate::shared::recording_props::{ConversionProps, RecordingProps}; @@ -304,7 +306,7 @@ fn make_converter( main_event_attr_index: 0, main_event_name: "cycles".to_string(), sampling_is_time_based: Some(interval_nanos), - have_context_switches: true, + off_cpu_indicator: Some(OffCpuIndicator::ContextSwitches), sched_switch_attr_index: None, known_event_indices: HashMap::new(), event_names: vec!["cycles".to_string()], diff --git a/samply/src/linux_shared/converter.rs b/samply/src/linux_shared/converter.rs index 72a635f8..dd28d32a 100644 --- a/samply/src/linux_shared/converter.rs +++ b/samply/src/linux_shared/converter.rs @@ -30,7 +30,7 @@ use std::{ops::Range, path::Path}; use super::context_switch::{ContextSwitchHandler, OffCpuSampleGroup}; use super::convert_regs::ConvertRegs; -use super::event_interpretation::EventInterpretation; +use super::event_interpretation::{EventInterpretation, OffCpuIndicator}; use super::injected_jit_object::{correct_bad_perf_jit_so_file, jit_function_name}; use super::kernel_symbols::{kernel_module_build_id, KernelSymbols}; use super::mmap_range_or_vec::MmapRangeOrVec; @@ -74,7 +74,7 @@ where context_switch_handler: ContextSwitchHandler, unresolved_stacks: UnresolvedStacks, off_cpu_weight_per_sample: i32, - have_context_switches: bool, + off_cpu_indicator: Option, event_names: Vec, kernel_symbols: Option, @@ -149,7 +149,7 @@ where off_cpu_weight_per_sample, context_switch_handler: ContextSwitchHandler::new(off_cpu_sampling_interval_ns), unresolved_stacks: UnresolvedStacks::default(), - have_context_switches: interpretation.have_context_switches, + off_cpu_indicator: interpretation.off_cpu_indicator, event_names: interpretation.event_names, kernel_symbols, suspected_pe_mappings: BTreeMap::new(), @@ -230,7 +230,7 @@ where ); } - let cpu_delta = if self.have_context_switches { + let cpu_delta = if self.off_cpu_indicator.is_some() { CpuDelta::from_nanos( self.context_switch_handler .consume_cpu_delta(&mut thread.context_switch_data), @@ -282,6 +282,17 @@ where .convert_no_kernel(stack.iter().rev().cloned()); let thread = process.threads.get_thread_by_tid(tid, &mut self.profile); thread.off_cpu_stack = Some(stack_index); + + if self.off_cpu_indicator == Some(OffCpuIndicator::SchedSwitchAndSamples) { + // Treat this sched_switch sample as a switch-out. + // Sometimes we have sched_switch samples but no context switch records; for + // example when using `simpleperf record --trace-offcpu`. + let timestamp = e + .timestamp + .expect("Can't handle context switch without time"); + self.context_switch_handler + .handle_switch_out(timestamp, &mut thread.context_switch_data); + } } pub fn handle_rss_stat_sample>( diff --git a/samply/src/linux_shared/event_interpretation.rs b/samply/src/linux_shared/event_interpretation.rs index 10d3cb6f..5fd1f6f7 100644 --- a/samply/src/linux_shared/event_interpretation.rs +++ b/samply/src/linux_shared/event_interpretation.rs @@ -15,13 +15,22 @@ pub enum KnownEvent { PageFault, } +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum OffCpuIndicator { + /// We can see when threads go off-CPU and back with CONTEXT_SWITCH records. + ContextSwitches, + /// We can use sched_switch samples to see when threads go off-CPU, and + /// "main event" (e.g. cpu-cycles) samples to see when they come back on-CPU. + SchedSwitchAndSamples, +} + #[derive(Debug, Clone)] pub struct EventInterpretation { pub main_event_attr_index: usize, #[allow(unused)] pub main_event_name: String, pub sampling_is_time_based: Option, - pub have_context_switches: bool, + pub off_cpu_indicator: Option, pub sched_switch_attr_index: Option, pub known_event_indices: HashMap, pub event_names: Vec, @@ -59,6 +68,11 @@ impl EventInterpretation { let sched_switch_attr_index = attrs .iter() .position(|attr_desc| attr_desc.name.as_deref() == Some("sched:sched_switch")); + let off_cpu_indicator = match (have_context_switches, sched_switch_attr_index) { + (true, _) => Some(OffCpuIndicator::ContextSwitches), + (false, Some(_)) => Some(OffCpuIndicator::SchedSwitchAndSamples), + _ => None, + }; let mut known_event_indices = HashMap::new(); let known_events = [ @@ -93,7 +107,7 @@ impl EventInterpretation { main_event_attr_index, main_event_name, sampling_is_time_based, - have_context_switches, + off_cpu_indicator, sched_switch_attr_index, known_event_indices, event_names, diff --git a/samply/src/linux_shared/mod.rs b/samply/src/linux_shared/mod.rs index 009ec3c8..f79eb0d6 100644 --- a/samply/src/linux_shared/mod.rs +++ b/samply/src/linux_shared/mod.rs @@ -15,5 +15,6 @@ mod thread; pub use convert_regs::{ConvertRegs, ConvertRegsAarch64, ConvertRegsX86_64}; pub use converter::Converter; -pub use event_interpretation::{EventInterpretation, KnownEvent}; +#[allow(unused)] +pub use event_interpretation::{EventInterpretation, KnownEvent, OffCpuIndicator}; pub use mmap_range_or_vec::MmapRangeOrVec;