Skip to content

Commit ef23761

Browse files
authored
Fix profiling when python symbols aren't available. (#718)
Since python 3.10 - we haven't been able to profile python interpreters that have been compiled without symbols. This is because cpython changed where the 'PyRuntime' global is stored in python 3.10, from being in the BSS section into being in its own named section in the binary. This especially affected profiling on windows, where you'd have to install python symbols to be able to use py-spy. Fix by reading in the address/size of the the PyRuntime section from the elf/mach/pe binaries and using that to scan python interpreters when symbols aren't available.
1 parent ad69f92 commit ef23761

File tree

2 files changed

+87
-24
lines changed

2 files changed

+87
-24
lines changed

src/binary_parser.rs

+72-24
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ pub struct BinaryInfo {
1212
pub symbols: HashMap<String, u64>,
1313
pub bss_addr: u64,
1414
pub bss_size: u64,
15+
pub pyruntime_addr: u64,
16+
pub pyruntime_size: u64,
1517
#[allow(dead_code)]
1618
pub addr: u64,
1719
#[allow(dead_code)]
@@ -65,11 +67,23 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo,
6567
}
6668
};
6769

70+
let mut pyruntime_addr = 0;
71+
let mut pyruntime_size = 0;
6872
let mut bss_addr = 0;
6973
let mut bss_size = 0;
7074
for segment in mach.segments.iter() {
7175
for (section, _) in &segment.sections()? {
72-
if section.name()? == "__bss" {
76+
let name = section.name()?;
77+
if name == "PyRuntime" {
78+
if let Some(addr) = section.addr.checked_add(offset) {
79+
if addr.checked_add(section.size).is_some() {
80+
pyruntime_addr = addr;
81+
pyruntime_size = section.size;
82+
}
83+
}
84+
}
85+
86+
if name == "__bss" {
7387
if let Some(addr) = section.addr.checked_add(offset) {
7488
if addr.checked_add(section.size).is_some() {
7589
bss_addr = addr;
@@ -94,6 +108,8 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo,
94108
symbols,
95109
bss_addr,
96110
bss_size,
111+
pyruntime_addr,
112+
pyruntime_size,
97113
addr,
98114
size,
99115
})
@@ -153,6 +169,21 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo,
153169
bss_end = bss_header.sh_addr + bss_header.sh_size;
154170
}
155171

172+
let pyruntime_header = elf.section_headers.iter().find(|header| {
173+
strtab
174+
.get_at(header.sh_name)
175+
.map_or(false, |name| name == ".PyRuntime")
176+
});
177+
178+
let mut pyruntime_addr = 0;
179+
let mut pyruntime_size = 0;
180+
if let Some(header) = pyruntime_header {
181+
if let Some(addr) = header.sh_addr.checked_add(offset) {
182+
pyruntime_addr = addr;
183+
pyruntime_size = header.sh_size;
184+
}
185+
}
186+
156187
for sym in elf.syms.iter() {
157188
// Skip imported symbols
158189
if sym.is_import()
@@ -194,6 +225,8 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo,
194225
symbols,
195226
bss_addr,
196227
bss_size,
228+
pyruntime_addr,
229+
pyruntime_size,
197230
addr,
198231
size,
199232
})
@@ -207,33 +240,48 @@ pub fn parse_binary(filename: &Path, addr: u64, size: u64) -> Result<BinaryInfo,
207240
}
208241
}
209242

210-
pe.sections
211-
.iter()
212-
.find(|section| section.name.starts_with(b".data"))
213-
.ok_or_else(|| {
214-
format_err!(
215-
"Failed to find .data section in PE binary of {}",
216-
filename.display()
217-
)
218-
})
219-
.map(|data_section| {
220-
let mut bss_addr = 0;
221-
let mut bss_size = 0;
222-
if let Some(addr) = offset.checked_add(data_section.virtual_address as u64) {
223-
if addr.checked_add(data_section.virtual_size as u64).is_some() {
243+
let mut bss_addr = 0;
244+
let mut bss_size = 0;
245+
let mut pyruntime_addr = 0;
246+
let mut pyruntime_size = 0;
247+
let mut found_data = false;
248+
for section in pe.sections.iter() {
249+
if section.name.starts_with(b".data") {
250+
found_data = true;
251+
if let Some(addr) = offset.checked_add(section.virtual_address as u64) {
252+
if addr.checked_add(section.virtual_size as u64).is_some() {
224253
bss_addr = addr;
225-
bss_size = u64::from(data_section.virtual_size);
254+
bss_size = u64::from(section.virtual_size);
226255
}
227256
}
228-
229-
BinaryInfo {
230-
symbols,
231-
bss_addr,
232-
bss_size,
233-
addr,
234-
size,
257+
} else if section.name.starts_with(b"PyRuntim") {
258+
// note that the name is only 8 chars here, so we don't check for
259+
// trailing 'e' in PyRuntime
260+
if let Some(addr) = offset.checked_add(section.virtual_address as u64) {
261+
if addr.checked_add(section.virtual_size as u64).is_some() {
262+
pyruntime_addr = addr;
263+
pyruntime_size = u64::from(section.virtual_size);
264+
}
235265
}
236-
})
266+
}
267+
}
268+
269+
if !found_data {
270+
return Err(format_err!(
271+
"Failed to find .data section in PE binary of {}",
272+
filename.display()
273+
));
274+
}
275+
276+
Ok(BinaryInfo {
277+
symbols,
278+
bss_addr,
279+
bss_size,
280+
pyruntime_size,
281+
pyruntime_addr,
282+
addr,
283+
size,
284+
})
237285
}
238286
_ => Err(format_err!("Unhandled binary type")),
239287
}

src/python_process_info.rs

+15
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,21 @@ fn get_interpreter_address_from_binary<P>(
451451
where
452452
P: ProcessMemory,
453453
{
454+
// First check the pyruntime section it was found
455+
if binary.pyruntime_addr != 0 {
456+
let bss = process.copy(
457+
binary.pyruntime_addr as usize,
458+
binary.pyruntime_size as usize,
459+
)?;
460+
#[allow(clippy::cast_ptr_alignment)]
461+
let addrs = unsafe {
462+
slice::from_raw_parts(bss.as_ptr() as *const usize, bss.len() / size_of::<usize>())
463+
};
464+
if let Ok(addr) = check_interpreter_addresses(addrs, maps, process, version) {
465+
return Ok(addr);
466+
}
467+
}
468+
454469
// We're going to scan the BSS/data section for things, and try to narrowly scan things that
455470
// look like pointers to PyinterpreterState
456471
let bss = process.copy(binary.bss_addr as usize, binary.bss_size as usize)?;

0 commit comments

Comments
 (0)