From 1ba6b56c817777c5f504a5975591da6de68dd361 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 1 Aug 2024 14:18:16 -0700 Subject: [PATCH 1/6] std.debug.Info: extract to separate file --- lib/std/debug.zig | 1377 +-------------------------------------- lib/std/debug/Dwarf.zig | 6 +- lib/std/debug/Info.zig | 1377 +++++++++++++++++++++++++++++++++++++++ lib/std/pdb.zig | 6 +- 4 files changed, 1390 insertions(+), 1376 deletions(-) create mode 100644 lib/std/debug/Info.zig diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 22a7e551ec4c..6dac92188e7e 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -1,5 +1,5 @@ -const std = @import("std.zig"); const builtin = @import("builtin"); +const std = @import("std.zig"); const math = std.math; const mem = std.mem; const io = std.io; @@ -19,6 +19,7 @@ const native_os = builtin.os.tag; const native_endian = native_arch.endian(); pub const Dwarf = @import("debug/Dwarf.zig"); +pub const Info = @import("debug/Info.zig"); pub const runtime_safety = switch (builtin.mode) { .Debug, .ReleaseSafe => true, @@ -46,39 +47,6 @@ pub const sys_can_stack_trace = switch (builtin.cpu.arch) { else => true, }; -pub const LineInfo = struct { - line: u64, - column: u64, - file_name: []const u8, - - pub fn deinit(self: LineInfo, allocator: mem.Allocator) void { - allocator.free(self.file_name); - } -}; - -pub const SymbolInfo = struct { - symbol_name: []const u8 = "???", - compile_unit_name: []const u8 = "???", - line_info: ?LineInfo = null, - - pub fn deinit(self: SymbolInfo, allocator: mem.Allocator) void { - if (self.line_info) |li| { - li.deinit(allocator); - } - } -}; -const PdbOrDwarf = union(enum) { - pdb: pdb.Pdb, - dwarf: Dwarf, - - fn deinit(self: *PdbOrDwarf, allocator: mem.Allocator) void { - switch (self.*) { - .pdb => |*inner| inner.deinit(), - .dwarf => |*inner| inner.deinit(allocator), - } - } -}; - /// Allows the caller to freely write to stderr until `unlockStdErr` is called. /// /// During the lock, any `std.Progress` information is cleared from the terminal. @@ -110,7 +78,7 @@ pub fn getSelfDebugInfo() !*Info { if (self_debug_info) |*info| { return info; } else { - self_debug_info = try openSelfDebugInfo(getDebugInfoAllocator()); + self_debug_info = try Info.openSelf(getDebugInfoAllocator()); return &self_debug_info.?; } } @@ -957,51 +925,6 @@ pub fn writeStackTraceWindows( } } -fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { - var min: usize = 0; - var max: usize = symbols.len - 1; - while (min < max) { - const mid = min + (max - min) / 2; - const curr = &symbols[mid]; - const next = &symbols[mid + 1]; - if (address >= next.address()) { - min = mid + 1; - } else if (address < curr.address()) { - max = mid; - } else { - return curr; - } - } - - const max_sym = &symbols[symbols.len - 1]; - if (address >= max_sym.address()) - return max_sym; - - return null; -} - -test machoSearchSymbols { - const symbols = [_]MachoSymbol{ - .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, - }; - - try testing.expectEqual(null, machoSearchSymbols(&symbols, 0)); - try testing.expectEqual(null, machoSearchSymbols(&symbols, 99)); - try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 100).?); - try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 150).?); - try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 199).?); - - try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 200).?); - try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 250).?); - try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 299).?); - - try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 300).?); - try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 301).?); - try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 5000).?); -} - fn printUnknownSource(debug_info: *Info, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { const module_name = debug_info.getModuleNameForAddress(address); return printLineInfo( @@ -1058,7 +981,7 @@ pub fn printSourceAtAddress(debug_info: *Info, out_stream: anytype, address: usi fn printLineInfo( out_stream: anytype, - line_info: ?LineInfo, + line_info: ?Info.SourceLocation, address: usize, symbol_name: []const u8, compile_unit_name: []const u8, @@ -1104,428 +1027,7 @@ fn printLineInfo( } } -pub const OpenSelfDebugInfoError = error{ - MissingDebugInfo, - UnsupportedOperatingSystem, -} || @typeInfo(@typeInfo(@TypeOf(Info.init)).Fn.return_type.?).ErrorUnion.error_set; - -pub fn openSelfDebugInfo(allocator: mem.Allocator) OpenSelfDebugInfoError!Info { - nosuspend { - if (builtin.strip_debug_info) - return error.MissingDebugInfo; - if (@hasDecl(root, "os") and @hasDecl(root.os, "debug") and @hasDecl(root.os.debug, "openSelfDebugInfo")) { - return root.os.debug.openSelfDebugInfo(allocator); - } - switch (native_os) { - .linux, - .freebsd, - .netbsd, - .dragonfly, - .openbsd, - .macos, - .solaris, - .illumos, - .windows, - => return try Info.init(allocator), - else => return error.UnsupportedOperatingSystem, - } - } -} - -fn readCoffDebugInfo(allocator: mem.Allocator, coff_obj: *coff.Coff) !ModuleDebugInfo { - nosuspend { - var di = ModuleDebugInfo{ - .base_address = undefined, - .coff_image_base = coff_obj.getImageBase(), - .coff_section_headers = undefined, - }; - - if (coff_obj.getSectionByName(".debug_info")) |_| { - // This coff file has embedded DWARF debug info - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); - - inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { - sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { - break :blk .{ - .data = try coff_obj.getSectionDataAlloc(section_header, allocator), - .virtual_address = section_header.virtual_address, - .owned = true, - }; - } else null; - } - - var dwarf = Dwarf{ - .endian = native_endian, - .sections = sections, - .is_macho = false, - }; - - try Dwarf.open(&dwarf, allocator); - di.dwarf = dwarf; - } - - const raw_path = try coff_obj.getPdbPath() orelse return di; - const path = blk: { - if (fs.path.isAbsolute(raw_path)) { - break :blk raw_path; - } else { - const self_dir = try fs.selfExeDirPathAlloc(allocator); - defer allocator.free(self_dir); - break :blk try fs.path.join(allocator, &.{ self_dir, raw_path }); - } - }; - defer if (path.ptr != raw_path.ptr) allocator.free(path); - - di.pdb = pdb.Pdb.init(allocator, path) catch |err| switch (err) { - error.FileNotFound, error.IsDir => { - if (di.dwarf == null) return error.MissingDebugInfo; - return di; - }, - else => return err, - }; - try di.pdb.?.parseInfoStream(); - try di.pdb.?.parseDbiStream(); - - if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) - return error.InvalidDebugInfo; - - // Only used by the pdb path - di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(allocator); - errdefer allocator.free(di.coff_section_headers); - - return di; - } -} - -fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { - const start = math.cast(usize, offset) orelse return error.Overflow; - const end = start + (math.cast(usize, size) orelse return error.Overflow); - return ptr[start..end]; -} - -/// Reads debug info from an ELF file, or the current binary if none in specified. -/// If the required sections aren't present but a reference to external debug info is, -/// then this this function will recurse to attempt to load the debug sections from -/// an external file. -pub fn readElfDebugInfo( - allocator: mem.Allocator, - elf_filename: ?[]const u8, - build_id: ?[]const u8, - expected_crc: ?u32, - parent_sections: *Dwarf.SectionArray, - parent_mapped_mem: ?[]align(mem.page_size) const u8, -) !ModuleDebugInfo { - nosuspend { - const elf_file = (if (elf_filename) |filename| blk: { - break :blk fs.cwd().openFile(filename, .{}); - } else fs.openSelfExe(.{})) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - - const mapped_mem = try mapWholeFile(elf_file); - if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; - - const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); - if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; - if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; - - const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { - elf.ELFDATA2LSB => .little, - elf.ELFDATA2MSB => .big, - else => return error.InvalidElfEndian, - }; - assert(endian == native_endian); // this is our own debug info - - const shoff = hdr.e_shoff; - const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); - const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[math.cast(usize, str_section_off) orelse return error.Overflow])); - const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; - const shdrs = @as( - [*]const elf.Shdr, - @ptrCast(@alignCast(&mapped_mem[shoff])), - )[0..hdr.e_shnum]; - - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - - // Combine section list. This takes ownership over any owned sections from the parent scope. - for (parent_sections, §ions) |*parent, *section| { - if (parent.*) |*p| { - section.* = p.*; - p.owned = false; - } - } - errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); - - var separate_debug_filename: ?[]const u8 = null; - var separate_debug_crc: ?u32 = null; - - for (shdrs) |*shdr| { - if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; - const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); - - if (mem.eql(u8, name, ".gnu_debuglink")) { - const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); - const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); - const crc_bytes = gnu_debuglink[crc_offset..][0..4]; - separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian); - separate_debug_filename = debug_filename; - continue; - } - - var section_index: ?usize = null; - inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { - if (mem.eql(u8, "." ++ section.name, name)) section_index = i; - } - if (section_index == null) continue; - if (sections[section_index.?] != null) continue; - - const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { - var section_stream = io.fixedBufferStream(section_bytes); - var section_reader = section_stream.reader(); - const chdr = section_reader.readStruct(elf.Chdr) catch continue; - if (chdr.ch_type != .ZLIB) continue; - - var zlib_stream = std.compress.zlib.decompressor(section_stream.reader()); - - const decompressed_section = try allocator.alloc(u8, chdr.ch_size); - errdefer allocator.free(decompressed_section); - - const read = zlib_stream.reader().readAll(decompressed_section) catch continue; - assert(read == decompressed_section.len); - - break :blk .{ - .data = decompressed_section, - .virtual_address = shdr.sh_addr, - .owned = true, - }; - } else .{ - .data = section_bytes, - .virtual_address = shdr.sh_addr, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - - // Attempt to load debug info from an external file - // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html - if (missing_debug_info) { - - // Only allow one level of debug info nesting - if (parent_mapped_mem) |_| { - return error.MissingDebugInfo; - } - - const global_debug_directories = [_][]const u8{ - "/usr/lib/debug", - }; - - // /.build-id/<2-character id prefix>/.debug - if (build_id) |id| blk: { - if (id.len < 3) break :blk; - - // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice - const extension = ".debug"; - var id_prefix_buf: [2]u8 = undefined; - var filename_buf: [38 + extension.len]u8 = undefined; - - _ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable; - const filename = std.fmt.bufPrint( - &filename_buf, - "{s}" ++ extension, - .{std.fmt.fmtSliceHexLower(id[1..])}, - ) catch break :blk; - - for (global_debug_directories) |global_directory| { - const path = try fs.path.join(allocator, &.{ global_directory, ".build-id", &id_prefix_buf, filename }); - defer allocator.free(path); - - return readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; - } - } - - // use the path from .gnu_debuglink, in the same search order as gdb - if (separate_debug_filename) |separate_filename| blk: { - if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) return error.MissingDebugInfo; - - // / - if (readElfDebugInfo(allocator, separate_filename, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - - // /.debug/ - { - const path = try fs.path.join(allocator, &.{ ".debug", separate_filename }); - defer allocator.free(path); - - if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - } - - var cwd_buf: [fs.max_path_bytes]u8 = undefined; - const cwd_path = posix.realpath(".", &cwd_buf) catch break :blk; - - // // - for (global_debug_directories) |global_directory| { - const path = try fs.path.join(allocator, &.{ global_directory, cwd_path, separate_filename }); - defer allocator.free(path); - if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - } - } - - return error.MissingDebugInfo; - } - - var di = Dwarf{ - .endian = endian, - .sections = sections, - .is_macho = false, - }; - - try Dwarf.open(&di, allocator); - - return ModuleDebugInfo{ - .base_address = undefined, - .dwarf = di, - .mapped_memory = parent_mapped_mem orelse mapped_mem, - .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, - }; - } -} - -/// This takes ownership of macho_file: users of this function should not close -/// it themselves, even on error. -/// TODO it's weird to take ownership even on error, rework this code. -fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugInfo { - const mapped_mem = try mapWholeFile(macho_file); - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - var it = macho.LoadCommandIterator{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => break cmd.cast(macho.symtab_command).?, - else => {}, - } else return error.MissingDebugInfo; - - const syms = @as( - [*]const macho.nlist_64, - @ptrCast(@alignCast(&mapped_mem[symtab.symoff])), - )[0..symtab.nsyms]; - const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; - - const symbols_buf = try allocator.alloc(MachoSymbol, syms.len); - - var ofile: u32 = undefined; - var last_sym: MachoSymbol = undefined; - var symbol_index: usize = 0; - var state: enum { - init, - oso_open, - oso_close, - bnsym, - fun_strx, - fun_size, - ensym, - } = .init; - - for (syms) |*sym| { - if (!sym.stab()) continue; - - // TODO handle globals N_GSYM, and statics N_STSYM - switch (sym.n_type) { - macho.N_OSO => { - switch (state) { - .init, .oso_close => { - state = .oso_open; - ofile = sym.n_strx; - }, - else => return error.InvalidDebugInfo, - } - }, - macho.N_BNSYM => { - switch (state) { - .oso_open, .ensym => { - state = .bnsym; - last_sym = .{ - .strx = 0, - .addr = sym.n_value, - .size = 0, - .ofile = ofile, - }; - }, - else => return error.InvalidDebugInfo, - } - }, - macho.N_FUN => { - switch (state) { - .bnsym => { - state = .fun_strx; - last_sym.strx = sym.n_strx; - }, - .fun_strx => { - state = .fun_size; - last_sym.size = @as(u32, @intCast(sym.n_value)); - }, - else => return error.InvalidDebugInfo, - } - }, - macho.N_ENSYM => { - switch (state) { - .fun_size => { - state = .ensym; - symbols_buf[symbol_index] = last_sym; - symbol_index += 1; - }, - else => return error.InvalidDebugInfo, - } - }, - macho.N_SO => { - switch (state) { - .init, .oso_close => {}, - .oso_open, .ensym => { - state = .oso_close; - }, - else => return error.InvalidDebugInfo, - } - }, - else => {}, - } - } - - switch (state) { - .init => return error.MissingDebugInfo, - .oso_close => {}, - else => return error.InvalidDebugInfo, - } - - const symbols = try allocator.realloc(symbols_buf, symbol_index); - - // Even though lld emits symbols in ascending order, this debug code - // should work for programs linked in any valid way. - // This sort is so that we can binary search later. - mem.sort(MachoSymbol, symbols, {}, MachoSymbol.addressLessThan); - - return ModuleDebugInfo{ - .base_address = undefined, - .vmaddr_slide = undefined, - .mapped_memory = mapped_mem, - .ofiles = ModuleDebugInfo.OFileTable.init(allocator), - .symbols = symbols, - .strings = strings, - }; -} - -fn printLineFromFileAnyOs(out_stream: anytype, line_info: LineInfo) !void { +fn printLineFromFileAnyOs(out_stream: anytype, line_info: Info.SourceLocation) !void { // Need this to always block even in async I/O mode, because this could potentially // be called from e.g. the event loop code crashing. var f = try fs.cwd().openFile(line_info.file_name, .{}); @@ -1591,7 +1093,7 @@ test printLineFromFileAnyOs { var test_dir = std.testing.tmpDir(.{}); defer test_dir.cleanup(); - // Relies on testing.tmpDir internals which is not ideal, but LineInfo requires paths. + // Relies on testing.tmpDir internals which is not ideal, but Info.SourceLocation requires paths. const test_dir_path = try join(allocator, &.{ ".zig-cache", "tmp", test_dir.sub_path[0..] }); defer allocator.free(test_dir_path); @@ -1702,871 +1204,6 @@ test printLineFromFileAnyOs { } } -const MachoSymbol = struct { - strx: u32, - addr: u64, - size: u32, - ofile: u32, - - /// Returns the address from the macho file - fn address(self: MachoSymbol) u64 { - return self.addr; - } - - fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { - _ = context; - return lhs.addr < rhs.addr; - } -}; - -/// Takes ownership of file, even on error. -/// TODO it's weird to take ownership even on error, rework this code. -fn mapWholeFile(file: File) ![]align(mem.page_size) const u8 { - nosuspend { - defer file.close(); - - const file_len = math.cast(usize, try file.getEndPos()) orelse math.maxInt(usize); - const mapped_mem = try posix.mmap( - null, - file_len, - posix.PROT.READ, - .{ .TYPE = .SHARED }, - file.handle, - 0, - ); - errdefer posix.munmap(mapped_mem); - - return mapped_mem; - } -} - -pub const WindowsModuleInfo = struct { - base_address: usize, - size: u32, - name: []const u8, - handle: windows.HMODULE, - - // Set when the image file needed to be mapped from disk - mapped_file: ?struct { - file: File, - section_handle: windows.HANDLE, - section_view: []const u8, - - pub fn deinit(self: @This()) void { - const process_handle = windows.GetCurrentProcess(); - assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(@ptrCast(self.section_view.ptr))) == .SUCCESS); - windows.CloseHandle(self.section_handle); - self.file.close(); - } - } = null, -}; - -pub const Info = struct { - allocator: mem.Allocator, - address_map: std.AutoHashMap(usize, *ModuleDebugInfo), - modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModuleInfo) else void, - - pub fn init(allocator: mem.Allocator) !Info { - var debug_info = Info{ - .allocator = allocator, - .address_map = std.AutoHashMap(usize, *ModuleDebugInfo).init(allocator), - .modules = if (native_os == .windows) .{} else {}, - }; - - if (native_os == .windows) { - errdefer debug_info.modules.deinit(allocator); - - const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); - if (handle == windows.INVALID_HANDLE_VALUE) { - switch (windows.GetLastError()) { - else => |err| return windows.unexpectedError(err), - } - } - defer windows.CloseHandle(handle); - - var module_entry: windows.MODULEENTRY32 = undefined; - module_entry.dwSize = @sizeOf(windows.MODULEENTRY32); - if (windows.kernel32.Module32First(handle, &module_entry) == 0) { - return error.MissingDebugInfo; - } - - var module_valid = true; - while (module_valid) { - const module_info = try debug_info.modules.addOne(allocator); - const name = allocator.dupe(u8, mem.sliceTo(&module_entry.szModule, 0)) catch &.{}; - errdefer allocator.free(name); - - module_info.* = .{ - .base_address = @intFromPtr(module_entry.modBaseAddr), - .size = module_entry.modBaseSize, - .name = name, - .handle = module_entry.hModule, - }; - - module_valid = windows.kernel32.Module32Next(handle, &module_entry) == 1; - } - } - - return debug_info; - } - - pub fn deinit(self: *Info) void { - var it = self.address_map.iterator(); - while (it.next()) |entry| { - const mdi = entry.value_ptr.*; - mdi.deinit(self.allocator); - self.allocator.destroy(mdi); - } - self.address_map.deinit(); - if (native_os == .windows) { - for (self.modules.items) |module| { - self.allocator.free(module.name); - if (module.mapped_file) |mapped_file| mapped_file.deinit(); - } - self.modules.deinit(self.allocator); - } - } - - pub fn getModuleForAddress(self: *Info, address: usize) !*ModuleDebugInfo { - if (comptime builtin.target.isDarwin()) { - return self.lookupModuleDyld(address); - } else if (native_os == .windows) { - return self.lookupModuleWin32(address); - } else if (native_os == .haiku) { - return self.lookupModuleHaiku(address); - } else if (comptime builtin.target.isWasm()) { - return self.lookupModuleWasm(address); - } else { - return self.lookupModuleDl(address); - } - } - - // Returns the module name for a given address. - // This can be called when getModuleForAddress fails, so implementations should provide - // a path that doesn't rely on any side-effects of a prior successful module lookup. - pub fn getModuleNameForAddress(self: *Info, address: usize) ?[]const u8 { - if (comptime builtin.target.isDarwin()) { - return self.lookupModuleNameDyld(address); - } else if (native_os == .windows) { - return self.lookupModuleNameWin32(address); - } else if (native_os == .haiku) { - return null; - } else if (comptime builtin.target.isWasm()) { - return null; - } else { - return self.lookupModuleNameDl(address); - } - } - - fn lookupModuleDyld(self: *Info, address: usize) !*ModuleDebugInfo { - const image_count = std.c._dyld_image_count(); - - var i: u32 = 0; - while (i < image_count) : (i += 1) { - const header = std.c._dyld_get_image_header(i) orelse continue; - const base_address = @intFromPtr(header); - if (address < base_address) continue; - const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); - - var it = macho.LoadCommandIterator{ - .ncmds = header.ncmds, - .buffer = @alignCast(@as( - [*]u8, - @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), - )[0..header.sizeofcmds]), - }; - - var unwind_info: ?[]const u8 = null; - var eh_frame: ?[]const u8 = null; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => { - const segment_cmd = cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; - - const seg_start = segment_cmd.vmaddr + vmaddr_slide; - const seg_end = seg_start + segment_cmd.vmsize; - if (address >= seg_start and address < seg_end) { - if (self.address_map.get(base_address)) |obj_di| { - return obj_di; - } - - for (cmd.getSections()) |sect| { - if (mem.eql(u8, "__unwind_info", sect.sectName())) { - unwind_info = @as([*]const u8, @ptrFromInt(sect.addr + vmaddr_slide))[0..sect.size]; - } else if (mem.eql(u8, "__eh_frame", sect.sectName())) { - eh_frame = @as([*]const u8, @ptrFromInt(sect.addr + vmaddr_slide))[0..sect.size]; - } - } - - const obj_di = try self.allocator.create(ModuleDebugInfo); - errdefer self.allocator.destroy(obj_di); - - const macho_path = mem.sliceTo(std.c._dyld_get_image_name(i), 0); - const macho_file = fs.cwd().openFile(macho_path, .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - obj_di.* = try readMachODebugInfo(self.allocator, macho_file); - obj_di.base_address = base_address; - obj_di.vmaddr_slide = vmaddr_slide; - obj_di.unwind_info = unwind_info; - obj_di.eh_frame = eh_frame; - - try self.address_map.putNoClobber(base_address, obj_di); - - return obj_di; - } - }, - else => {}, - }; - } - - return error.MissingDebugInfo; - } - - fn lookupModuleNameDyld(self: *Info, address: usize) ?[]const u8 { - _ = self; - const image_count = std.c._dyld_image_count(); - - var i: u32 = 0; - while (i < image_count) : (i += 1) { - const header = std.c._dyld_get_image_header(i) orelse continue; - const base_address = @intFromPtr(header); - if (address < base_address) continue; - const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); - - var it = macho.LoadCommandIterator{ - .ncmds = header.ncmds, - .buffer = @alignCast(@as( - [*]u8, - @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), - )[0..header.sizeofcmds]), - }; - - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => { - const segment_cmd = cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; - - const original_address = address - vmaddr_slide; - const seg_start = segment_cmd.vmaddr; - const seg_end = seg_start + segment_cmd.vmsize; - if (original_address >= seg_start and original_address < seg_end) { - return fs.path.basename(mem.sliceTo(std.c._dyld_get_image_name(i), 0)); - } - }, - else => {}, - }; - } - - return null; - } - - fn lookupModuleWin32(self: *Info, address: usize) !*ModuleDebugInfo { - for (self.modules.items) |*module| { - if (address >= module.base_address and address < module.base_address + module.size) { - if (self.address_map.get(module.base_address)) |obj_di| { - return obj_di; - } - - const obj_di = try self.allocator.create(ModuleDebugInfo); - errdefer self.allocator.destroy(obj_di); - - const mapped_module = @as([*]const u8, @ptrFromInt(module.base_address))[0..module.size]; - var coff_obj = try coff.Coff.init(mapped_module, true); - - // The string table is not mapped into memory by the loader, so if a section name is in the - // string table then we have to map the full image file from disk. This can happen when - // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. - if (coff_obj.strtabRequired()) { - var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; - // openFileAbsoluteW requires the prefix to be present - @memcpy(name_buffer[0..4], &[_]u16{ '\\', '?', '?', '\\' }); - - const process_handle = windows.GetCurrentProcess(); - const len = windows.kernel32.GetModuleFileNameExW( - process_handle, - module.handle, - @ptrCast(&name_buffer[4]), - windows.PATH_MAX_WIDE, - ); - - if (len == 0) return error.MissingDebugInfo; - const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - errdefer coff_file.close(); - - var section_handle: windows.HANDLE = undefined; - const create_section_rc = windows.ntdll.NtCreateSection( - §ion_handle, - windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, - null, - null, - windows.PAGE_READONLY, - // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. - // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. - windows.SEC_COMMIT, - coff_file.handle, - ); - if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer windows.CloseHandle(section_handle); - - var coff_len: usize = 0; - var base_ptr: usize = 0; - const map_section_rc = windows.ntdll.NtMapViewOfSection( - section_handle, - process_handle, - @ptrCast(&base_ptr), - null, - 0, - null, - &coff_len, - .ViewUnmap, - 0, - windows.PAGE_READONLY, - ); - if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @ptrFromInt(base_ptr)) == .SUCCESS); - - const section_view = @as([*]const u8, @ptrFromInt(base_ptr))[0..coff_len]; - coff_obj = try coff.Coff.init(section_view, false); - - module.mapped_file = .{ - .file = coff_file, - .section_handle = section_handle, - .section_view = section_view, - }; - } - errdefer if (module.mapped_file) |mapped_file| mapped_file.deinit(); - - obj_di.* = try readCoffDebugInfo(self.allocator, &coff_obj); - obj_di.base_address = module.base_address; - - try self.address_map.putNoClobber(module.base_address, obj_di); - return obj_di; - } - } - - return error.MissingDebugInfo; - } - - fn lookupModuleNameWin32(self: *Info, address: usize) ?[]const u8 { - for (self.modules.items) |module| { - if (address >= module.base_address and address < module.base_address + module.size) { - return module.name; - } - } - return null; - } - - fn lookupModuleNameDl(self: *Info, address: usize) ?[]const u8 { - _ = self; - - var ctx: struct { - // Input - address: usize, - // Output - name: []const u8 = "", - } = .{ .address = address }; - const CtxTy = @TypeOf(ctx); - - if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { - fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { - _ = size; - if (context.address < info.addr) return; - const phdrs = info.phdr[0..info.phnum]; - for (phdrs) |*phdr| { - if (phdr.p_type != elf.PT_LOAD) continue; - - const seg_start = info.addr +% phdr.p_vaddr; - const seg_end = seg_start + phdr.p_memsz; - if (context.address >= seg_start and context.address < seg_end) { - context.name = mem.sliceTo(info.name, 0) orelse ""; - break; - } - } else return; - - return error.Found; - } - }.callback)) { - return null; - } else |err| switch (err) { - error.Found => return fs.path.basename(ctx.name), - } - - return null; - } - - fn lookupModuleDl(self: *Info, address: usize) !*ModuleDebugInfo { - var ctx: struct { - // Input - address: usize, - // Output - base_address: usize = undefined, - name: []const u8 = undefined, - build_id: ?[]const u8 = null, - gnu_eh_frame: ?[]const u8 = null, - } = .{ .address = address }; - const CtxTy = @TypeOf(ctx); - - if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { - fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { - _ = size; - // The base address is too high - if (context.address < info.addr) - return; - - const phdrs = info.phdr[0..info.phnum]; - for (phdrs) |*phdr| { - if (phdr.p_type != elf.PT_LOAD) continue; - - // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 - const seg_start = info.addr +% phdr.p_vaddr; - const seg_end = seg_start + phdr.p_memsz; - if (context.address >= seg_start and context.address < seg_end) { - // Android libc uses NULL instead of an empty string to mark the - // main program - context.name = mem.sliceTo(info.name, 0) orelse ""; - context.base_address = info.addr; - break; - } - } else return; - - for (info.phdr[0..info.phnum]) |phdr| { - switch (phdr.p_type) { - elf.PT_NOTE => { - // Look for .note.gnu.build-id - const note_bytes = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; - const name_size = mem.readInt(u32, note_bytes[0..4], native_endian); - if (name_size != 4) continue; - const desc_size = mem.readInt(u32, note_bytes[4..8], native_endian); - const note_type = mem.readInt(u32, note_bytes[8..12], native_endian); - if (note_type != elf.NT_GNU_BUILD_ID) continue; - if (!mem.eql(u8, "GNU\x00", note_bytes[12..16])) continue; - context.build_id = note_bytes[16..][0..desc_size]; - }, - elf.PT_GNU_EH_FRAME => { - context.gnu_eh_frame = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; - }, - else => {}, - } - } - - // Stop the iteration - return error.Found; - } - }.callback)) { - return error.MissingDebugInfo; - } else |err| switch (err) { - error.Found => {}, - } - - if (self.address_map.get(ctx.base_address)) |obj_di| { - return obj_di; - } - - const obj_di = try self.allocator.create(ModuleDebugInfo); - errdefer self.allocator.destroy(obj_di); - - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - if (ctx.gnu_eh_frame) |eh_frame_hdr| { - // This is a special case - pointer offsets inside .eh_frame_hdr - // are encoded relative to its base address, so we must use the - // version that is already memory mapped, and not the one that - // will be mapped separately from the ELF file. - sections[@intFromEnum(Dwarf.Section.Id.eh_frame_hdr)] = .{ - .data = eh_frame_hdr, - .owned = false, - }; - } - - obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, §ions, null); - obj_di.base_address = ctx.base_address; - - // Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding - obj_di.dwarf.scanAllUnwindInfo(self.allocator, ctx.base_address) catch {}; - - try self.address_map.putNoClobber(ctx.base_address, obj_di); - - return obj_di; - } - - fn lookupModuleHaiku(self: *Info, address: usize) !*ModuleDebugInfo { - _ = self; - _ = address; - @panic("TODO implement lookup module for Haiku"); - } - - fn lookupModuleWasm(self: *Info, address: usize) !*ModuleDebugInfo { - _ = self; - _ = address; - @panic("TODO implement lookup module for Wasm"); - } -}; - -pub const ModuleDebugInfo = switch (native_os) { - .macos, .ios, .watchos, .tvos, .visionos => struct { - base_address: usize, - vmaddr_slide: usize, - mapped_memory: []align(mem.page_size) const u8, - symbols: []const MachoSymbol, - strings: [:0]const u8, - ofiles: OFileTable, - - // Backed by the in-memory sections mapped by the loader - unwind_info: ?[]const u8 = null, - eh_frame: ?[]const u8 = null, - - const OFileTable = std.StringHashMap(OFileInfo); - const OFileInfo = struct { - di: Dwarf, - addr_table: std.StringHashMap(u64), - }; - - pub fn deinit(self: *@This(), allocator: mem.Allocator) void { - var it = self.ofiles.iterator(); - while (it.next()) |entry| { - const ofile = entry.value_ptr; - ofile.di.deinit(allocator); - ofile.addr_table.deinit(); - } - self.ofiles.deinit(); - allocator.free(self.symbols); - posix.munmap(self.mapped_memory); - } - - fn loadOFile(self: *@This(), allocator: mem.Allocator, o_file_path: []const u8) !*OFileInfo { - const o_file = try fs.cwd().openFile(o_file_path, .{}); - const mapped_mem = try mapWholeFile(o_file); - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != std.macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - var segcmd: ?macho.LoadCommandIterator.LoadCommand = null; - var symtabcmd: ?macho.symtab_command = null; - var it = macho.LoadCommandIterator{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => segcmd = cmd, - .SYMTAB => symtabcmd = cmd.cast(macho.symtab_command).?, - else => {}, - }; - - if (segcmd == null or symtabcmd == null) return error.MissingDebugInfo; - - // Parse symbols - const strtab = @as( - [*]const u8, - @ptrCast(&mapped_mem[symtabcmd.?.stroff]), - )[0 .. symtabcmd.?.strsize - 1 :0]; - const symtab = @as( - [*]const macho.nlist_64, - @ptrCast(@alignCast(&mapped_mem[symtabcmd.?.symoff])), - )[0..symtabcmd.?.nsyms]; - - // TODO handle tentative (common) symbols - var addr_table = std.StringHashMap(u64).init(allocator); - try addr_table.ensureTotalCapacity(@as(u32, @intCast(symtab.len))); - for (symtab) |sym| { - if (sym.n_strx == 0) continue; - if (sym.undf() or sym.tentative() or sym.abs()) continue; - const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); - // TODO is it possible to have a symbol collision? - addr_table.putAssumeCapacityNoClobber(sym_name, sym.n_value); - } - - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - if (self.eh_frame) |eh_frame| sections[@intFromEnum(Dwarf.Section.Id.eh_frame)] = .{ - .data = eh_frame, - .owned = false, - }; - - for (segcmd.?.getSections()) |sect| { - if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; - - var section_index: ?usize = null; - inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; - } - if (section_index == null) continue; - - const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); - sections[section_index.?] = .{ - .data = section_bytes, - .virtual_address = sect.addr, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - if (missing_debug_info) return error.MissingDebugInfo; - - var di = Dwarf{ - .endian = .little, - .sections = sections, - .is_macho = true, - }; - - try Dwarf.open(&di, allocator); - const info = OFileInfo{ - .di = di, - .addr_table = addr_table, - }; - - // Add the debug info to the cache - const result = try self.ofiles.getOrPut(o_file_path); - assert(!result.found_existing); - result.value_ptr.* = info; - - return result.value_ptr; - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: mem.Allocator, address: usize) !SymbolInfo { - nosuspend { - const result = try self.getOFileInfoForAddress(allocator, address); - if (result.symbol == null) return .{}; - - // Take the symbol name from the N_FUN STAB entry, we're going to - // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(self.strings[result.symbol.?.strx..], 0); - if (result.o_file_info == null) return .{ .symbol_name = stab_symbol }; - - // Translate again the address, this time into an address inside the - // .o file - const relocated_address_o = result.o_file_info.?.addr_table.get(stab_symbol) orelse return .{ - .symbol_name = "???", - }; - - const addr_off = result.relocated_address - result.symbol.?.addr; - const o_file_di = &result.o_file_info.?.di; - if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { - return SymbolInfo{ - .symbol_name = o_file_di.getSymbolName(relocated_address_o) orelse "???", - .compile_unit_name = compile_unit.die.getAttrString( - o_file_di, - DW.AT.name, - o_file_di.section(.debug_str), - compile_unit.*, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", - }, - .line_info = o_file_di.getLineNumberInfo( - allocator, - compile_unit.*, - relocated_address_o + addr_off, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - return SymbolInfo{ .symbol_name = stab_symbol }; - }, - else => return err, - } - } - } - - pub fn getOFileInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !struct { - relocated_address: usize, - symbol: ?*const MachoSymbol = null, - o_file_info: ?*OFileInfo = null, - } { - nosuspend { - // Translate the VA into an address into this object - const relocated_address = address - self.vmaddr_slide; - - // Find the .o file where this symbol is defined - const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse return .{ - .relocated_address = relocated_address, - }; - - // Check if its debug infos are already in the cache - const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); - const o_file_info = self.ofiles.getPtr(o_file_path) orelse - (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { - error.FileNotFound, - error.MissingDebugInfo, - error.InvalidDebugInfo, - => return .{ - .relocated_address = relocated_address, - .symbol = symbol, - }, - else => return err, - }); - - return .{ - .relocated_address = relocated_address, - .symbol = symbol, - .o_file_info = o_file_info, - }; - } - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*const Dwarf { - return if ((try self.getOFileInfoForAddress(allocator, address)).o_file_info) |o_file_info| &o_file_info.di else null; - } - }, - .uefi, .windows => struct { - base_address: usize, - pdb: ?pdb.Pdb = null, - dwarf: ?Dwarf = null, - coff_image_base: u64, - - /// Only used if pdb is non-null - coff_section_headers: []coff.SectionHeader, - - pub fn deinit(self: *@This(), allocator: mem.Allocator) void { - if (self.dwarf) |*dwarf| { - dwarf.deinit(allocator); - } - - if (self.pdb) |*p| { - p.deinit(); - allocator.free(self.coff_section_headers); - } - } - - fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?SymbolInfo { - var coff_section: *align(1) const coff.SectionHeader = undefined; - const mod_index = for (self.pdb.?.sect_contribs) |sect_contrib| { - if (sect_contrib.Section > self.coff_section_headers.len) continue; - // Remember that SectionContribEntry.Section is 1-based. - coff_section = &self.coff_section_headers[sect_contrib.Section - 1]; - - const vaddr_start = coff_section.virtual_address + sect_contrib.Offset; - const vaddr_end = vaddr_start + sect_contrib.Size; - if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { - break sect_contrib.ModuleIndex; - } - } else { - // we have no information to add to the address - return null; - }; - - const module = (try self.pdb.?.getModule(mod_index)) orelse - return error.InvalidDebugInfo; - const obj_basename = fs.path.basename(module.obj_file_name); - - const symbol_name = self.pdb.?.getSymbolName( - module, - relocated_address - coff_section.virtual_address, - ) orelse "???"; - const opt_line_info = try self.pdb.?.getLineNumberInfo( - module, - relocated_address - coff_section.virtual_address, - ); - - return SymbolInfo{ - .symbol_name = symbol_name, - .compile_unit_name = obj_basename, - .line_info = opt_line_info, - }; - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: mem.Allocator, address: usize) !SymbolInfo { - // Translate the VA into an address into this object - const relocated_address = address - self.base_address; - - if (self.pdb != null) { - if (try self.getSymbolFromPdb(relocated_address)) |symbol| return symbol; - } - - if (self.dwarf) |*dwarf| { - const dwarf_address = relocated_address + self.coff_image_base; - return getSymbolFromDwarf(allocator, dwarf_address, dwarf); - } - - return SymbolInfo{}; - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*const Dwarf { - _ = allocator; - _ = address; - - return switch (self.debug_data) { - .dwarf => |*dwarf| dwarf, - else => null, - }; - } - }, - .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { - base_address: usize, - dwarf: Dwarf, - mapped_memory: []align(mem.page_size) const u8, - external_mapped_memory: ?[]align(mem.page_size) const u8, - - pub fn deinit(self: *@This(), allocator: mem.Allocator) void { - self.dwarf.deinit(allocator); - posix.munmap(self.mapped_memory); - if (self.external_mapped_memory) |m| posix.munmap(m); - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: mem.Allocator, address: usize) !SymbolInfo { - // Translate the VA into an address into this object - const relocated_address = address - self.base_address; - return getSymbolFromDwarf(allocator, relocated_address, &self.dwarf); - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*const Dwarf { - _ = allocator; - _ = address; - return &self.dwarf; - } - }, - .wasi, .emscripten => struct { - pub fn deinit(self: *@This(), allocator: mem.Allocator) void { - _ = self; - _ = allocator; - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: mem.Allocator, address: usize) !SymbolInfo { - _ = self; - _ = allocator; - _ = address; - return SymbolInfo{}; - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*const Dwarf { - _ = self; - _ = allocator; - _ = address; - return null; - } - }, - else => Dwarf, -}; - -fn getSymbolFromDwarf(allocator: mem.Allocator, address: u64, di: *Dwarf) !SymbolInfo { - if (nosuspend di.findCompileUnit(address)) |compile_unit| { - return SymbolInfo{ - .symbol_name = nosuspend di.getSymbolName(address) orelse "???", - .compile_unit_name = compile_unit.die.getAttrString(di, DW.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", - }, - .line_info = nosuspend di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - return SymbolInfo{}; - }, - else => return err, - } -} - /// TODO multithreaded awareness var debug_info_allocator: ?mem.Allocator = null; var debug_info_arena_allocator: std.heap.ArenaAllocator = undefined; @@ -2802,7 +1439,7 @@ test "manage resources correctly" { } const writer = std.io.null_writer; - var di = try openSelfDebugInfo(testing.allocator); + var di = try Info.openSelf(testing.allocator); defer di.deinit(); try printSourceAtAddress(&di, writer, showMyTrace(), io.tty.detectConfig(std.io.getStdErr())); } diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index f17fd737a1f1..353c097471ab 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -1353,7 +1353,7 @@ pub fn getLineNumberInfo( allocator: Allocator, compile_unit: CompileUnit, target_address: u64, -) !std.debug.LineInfo { +) !std.debug.Info.SourceLocation { const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); @@ -2084,7 +2084,7 @@ const LineNumberProgram = struct { self: *LineNumberProgram, allocator: Allocator, file_entries: []const FileEntry, - ) !?std.debug.LineInfo { + ) !?std.debug.Info.SourceLocation { if (self.prev_valid and self.target_address >= self.prev_address and self.target_address < self.address) @@ -2104,7 +2104,7 @@ const LineNumberProgram = struct { dir_name, file_entry.path, }); - return std.debug.LineInfo{ + return std.debug.Info.SourceLocation{ .line = if (self.prev_line >= 0) @as(u64, @intCast(self.prev_line)) else 0, .column = self.prev_column, .file_name = file_name, diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig new file mode 100644 index 000000000000..e863cb91dee4 --- /dev/null +++ b/lib/std/debug/Info.zig @@ -0,0 +1,1377 @@ +//! Cross-platform abstraction for debug information. + +const builtin = @import("builtin"); +const native_os = builtin.os.tag; +const native_endian = native_arch.endian(); +const native_arch = builtin.cpu.arch; + +const std = @import("../std.zig"); +const mem = std.mem; +const Allocator = std.mem.Allocator; +const windows = std.os.windows; +const macho = std.macho; +const fs = std.fs; +const coff = std.coff; +const pdb = std.pdb; +const assert = std.debug.assert; +const posix = std.posix; +const elf = std.elf; +const Dwarf = std.debug.Dwarf; +const File = std.fs.File; +const math = std.math; +const testing = std.testing; + +const Info = @This(); + +const root = @import("root"); + +allocator: Allocator, +address_map: std.AutoHashMap(usize, *ModuleDebugInfo), +modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModuleInfo) else void, + +pub const OpenSelfError = error{ + MissingDebugInfo, + UnsupportedOperatingSystem, +} || @typeInfo(@typeInfo(@TypeOf(Info.init)).Fn.return_type.?).ErrorUnion.error_set; + +pub fn openSelf(allocator: Allocator) OpenSelfError!Info { + nosuspend { + if (builtin.strip_debug_info) + return error.MissingDebugInfo; + if (@hasDecl(root, "os") and @hasDecl(root.os, "debug") and @hasDecl(root.os.debug, "openSelfDebugInfo")) { + return root.os.debug.openSelfDebugInfo(allocator); + } + switch (native_os) { + .linux, + .freebsd, + .netbsd, + .dragonfly, + .openbsd, + .macos, + .solaris, + .illumos, + .windows, + => return try Info.init(allocator), + else => return error.UnsupportedOperatingSystem, + } + } +} + +pub fn init(allocator: Allocator) !Info { + var debug_info = Info{ + .allocator = allocator, + .address_map = std.AutoHashMap(usize, *ModuleDebugInfo).init(allocator), + .modules = if (native_os == .windows) .{} else {}, + }; + + if (native_os == .windows) { + errdefer debug_info.modules.deinit(allocator); + + const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); + if (handle == windows.INVALID_HANDLE_VALUE) { + switch (windows.GetLastError()) { + else => |err| return windows.unexpectedError(err), + } + } + defer windows.CloseHandle(handle); + + var module_entry: windows.MODULEENTRY32 = undefined; + module_entry.dwSize = @sizeOf(windows.MODULEENTRY32); + if (windows.kernel32.Module32First(handle, &module_entry) == 0) { + return error.MissingDebugInfo; + } + + var module_valid = true; + while (module_valid) { + const module_info = try debug_info.modules.addOne(allocator); + const name = allocator.dupe(u8, mem.sliceTo(&module_entry.szModule, 0)) catch &.{}; + errdefer allocator.free(name); + + module_info.* = .{ + .base_address = @intFromPtr(module_entry.modBaseAddr), + .size = module_entry.modBaseSize, + .name = name, + .handle = module_entry.hModule, + }; + + module_valid = windows.kernel32.Module32Next(handle, &module_entry) == 1; + } + } + + return debug_info; +} + +pub fn deinit(self: *Info) void { + var it = self.address_map.iterator(); + while (it.next()) |entry| { + const mdi = entry.value_ptr.*; + mdi.deinit(self.allocator); + self.allocator.destroy(mdi); + } + self.address_map.deinit(); + if (native_os == .windows) { + for (self.modules.items) |module| { + self.allocator.free(module.name); + if (module.mapped_file) |mapped_file| mapped_file.deinit(); + } + self.modules.deinit(self.allocator); + } +} + +pub fn getModuleForAddress(self: *Info, address: usize) !*ModuleDebugInfo { + if (comptime builtin.target.isDarwin()) { + return self.lookupModuleDyld(address); + } else if (native_os == .windows) { + return self.lookupModuleWin32(address); + } else if (native_os == .haiku) { + return self.lookupModuleHaiku(address); + } else if (comptime builtin.target.isWasm()) { + return self.lookupModuleWasm(address); + } else { + return self.lookupModuleDl(address); + } +} + +// Returns the module name for a given address. +// This can be called when getModuleForAddress fails, so implementations should provide +// a path that doesn't rely on any side-effects of a prior successful module lookup. +pub fn getModuleNameForAddress(self: *Info, address: usize) ?[]const u8 { + if (comptime builtin.target.isDarwin()) { + return self.lookupModuleNameDyld(address); + } else if (native_os == .windows) { + return self.lookupModuleNameWin32(address); + } else if (native_os == .haiku) { + return null; + } else if (comptime builtin.target.isWasm()) { + return null; + } else { + return self.lookupModuleNameDl(address); + } +} + +fn lookupModuleDyld(self: *Info, address: usize) !*ModuleDebugInfo { + const image_count = std.c._dyld_image_count(); + + var i: u32 = 0; + while (i < image_count) : (i += 1) { + const header = std.c._dyld_get_image_header(i) orelse continue; + const base_address = @intFromPtr(header); + if (address < base_address) continue; + const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); + + var it = macho.LoadCommandIterator{ + .ncmds = header.ncmds, + .buffer = @alignCast(@as( + [*]u8, + @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), + )[0..header.sizeofcmds]), + }; + + var unwind_info: ?[]const u8 = null; + var eh_frame: ?[]const u8 = null; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => { + const segment_cmd = cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; + + const seg_start = segment_cmd.vmaddr + vmaddr_slide; + const seg_end = seg_start + segment_cmd.vmsize; + if (address >= seg_start and address < seg_end) { + if (self.address_map.get(base_address)) |obj_di| { + return obj_di; + } + + for (cmd.getSections()) |sect| { + if (mem.eql(u8, "__unwind_info", sect.sectName())) { + unwind_info = @as([*]const u8, @ptrFromInt(sect.addr + vmaddr_slide))[0..sect.size]; + } else if (mem.eql(u8, "__eh_frame", sect.sectName())) { + eh_frame = @as([*]const u8, @ptrFromInt(sect.addr + vmaddr_slide))[0..sect.size]; + } + } + + const obj_di = try self.allocator.create(ModuleDebugInfo); + errdefer self.allocator.destroy(obj_di); + + const macho_path = mem.sliceTo(std.c._dyld_get_image_name(i), 0); + const macho_file = fs.cwd().openFile(macho_path, .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return err, + }; + obj_di.* = try readMachODebugInfo(self.allocator, macho_file); + obj_di.base_address = base_address; + obj_di.vmaddr_slide = vmaddr_slide; + obj_di.unwind_info = unwind_info; + obj_di.eh_frame = eh_frame; + + try self.address_map.putNoClobber(base_address, obj_di); + + return obj_di; + } + }, + else => {}, + }; + } + + return error.MissingDebugInfo; +} + +fn lookupModuleNameDyld(self: *Info, address: usize) ?[]const u8 { + _ = self; + const image_count = std.c._dyld_image_count(); + + var i: u32 = 0; + while (i < image_count) : (i += 1) { + const header = std.c._dyld_get_image_header(i) orelse continue; + const base_address = @intFromPtr(header); + if (address < base_address) continue; + const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); + + var it = macho.LoadCommandIterator{ + .ncmds = header.ncmds, + .buffer = @alignCast(@as( + [*]u8, + @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), + )[0..header.sizeofcmds]), + }; + + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => { + const segment_cmd = cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; + + const original_address = address - vmaddr_slide; + const seg_start = segment_cmd.vmaddr; + const seg_end = seg_start + segment_cmd.vmsize; + if (original_address >= seg_start and original_address < seg_end) { + return fs.path.basename(mem.sliceTo(std.c._dyld_get_image_name(i), 0)); + } + }, + else => {}, + }; + } + + return null; +} + +fn lookupModuleWin32(self: *Info, address: usize) !*ModuleDebugInfo { + for (self.modules.items) |*module| { + if (address >= module.base_address and address < module.base_address + module.size) { + if (self.address_map.get(module.base_address)) |obj_di| { + return obj_di; + } + + const obj_di = try self.allocator.create(ModuleDebugInfo); + errdefer self.allocator.destroy(obj_di); + + const mapped_module = @as([*]const u8, @ptrFromInt(module.base_address))[0..module.size]; + var coff_obj = try coff.Coff.init(mapped_module, true); + + // The string table is not mapped into memory by the loader, so if a section name is in the + // string table then we have to map the full image file from disk. This can happen when + // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. + if (coff_obj.strtabRequired()) { + var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; + // openFileAbsoluteW requires the prefix to be present + @memcpy(name_buffer[0..4], &[_]u16{ '\\', '?', '?', '\\' }); + + const process_handle = windows.GetCurrentProcess(); + const len = windows.kernel32.GetModuleFileNameExW( + process_handle, + module.handle, + @ptrCast(&name_buffer[4]), + windows.PATH_MAX_WIDE, + ); + + if (len == 0) return error.MissingDebugInfo; + const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return err, + }; + errdefer coff_file.close(); + + var section_handle: windows.HANDLE = undefined; + const create_section_rc = windows.ntdll.NtCreateSection( + §ion_handle, + windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, + null, + null, + windows.PAGE_READONLY, + // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. + // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. + windows.SEC_COMMIT, + coff_file.handle, + ); + if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer windows.CloseHandle(section_handle); + + var coff_len: usize = 0; + var base_ptr: usize = 0; + const map_section_rc = windows.ntdll.NtMapViewOfSection( + section_handle, + process_handle, + @ptrCast(&base_ptr), + null, + 0, + null, + &coff_len, + .ViewUnmap, + 0, + windows.PAGE_READONLY, + ); + if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @ptrFromInt(base_ptr)) == .SUCCESS); + + const section_view = @as([*]const u8, @ptrFromInt(base_ptr))[0..coff_len]; + coff_obj = try coff.Coff.init(section_view, false); + + module.mapped_file = .{ + .file = coff_file, + .section_handle = section_handle, + .section_view = section_view, + }; + } + errdefer if (module.mapped_file) |mapped_file| mapped_file.deinit(); + + obj_di.* = try readCoffDebugInfo(self.allocator, &coff_obj); + obj_di.base_address = module.base_address; + + try self.address_map.putNoClobber(module.base_address, obj_di); + return obj_di; + } + } + + return error.MissingDebugInfo; +} + +fn lookupModuleNameWin32(self: *Info, address: usize) ?[]const u8 { + for (self.modules.items) |module| { + if (address >= module.base_address and address < module.base_address + module.size) { + return module.name; + } + } + return null; +} + +fn lookupModuleNameDl(self: *Info, address: usize) ?[]const u8 { + _ = self; + + var ctx: struct { + // Input + address: usize, + // Output + name: []const u8 = "", + } = .{ .address = address }; + const CtxTy = @TypeOf(ctx); + + if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { + fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { + _ = size; + if (context.address < info.addr) return; + const phdrs = info.phdr[0..info.phnum]; + for (phdrs) |*phdr| { + if (phdr.p_type != elf.PT_LOAD) continue; + + const seg_start = info.addr +% phdr.p_vaddr; + const seg_end = seg_start + phdr.p_memsz; + if (context.address >= seg_start and context.address < seg_end) { + context.name = mem.sliceTo(info.name, 0) orelse ""; + break; + } + } else return; + + return error.Found; + } + }.callback)) { + return null; + } else |err| switch (err) { + error.Found => return fs.path.basename(ctx.name), + } + + return null; +} + +fn lookupModuleDl(self: *Info, address: usize) !*ModuleDebugInfo { + var ctx: struct { + // Input + address: usize, + // Output + base_address: usize = undefined, + name: []const u8 = undefined, + build_id: ?[]const u8 = null, + gnu_eh_frame: ?[]const u8 = null, + } = .{ .address = address }; + const CtxTy = @TypeOf(ctx); + + if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { + fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { + _ = size; + // The base address is too high + if (context.address < info.addr) + return; + + const phdrs = info.phdr[0..info.phnum]; + for (phdrs) |*phdr| { + if (phdr.p_type != elf.PT_LOAD) continue; + + // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 + const seg_start = info.addr +% phdr.p_vaddr; + const seg_end = seg_start + phdr.p_memsz; + if (context.address >= seg_start and context.address < seg_end) { + // Android libc uses NULL instead of an empty string to mark the + // main program + context.name = mem.sliceTo(info.name, 0) orelse ""; + context.base_address = info.addr; + break; + } + } else return; + + for (info.phdr[0..info.phnum]) |phdr| { + switch (phdr.p_type) { + elf.PT_NOTE => { + // Look for .note.gnu.build-id + const note_bytes = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; + const name_size = mem.readInt(u32, note_bytes[0..4], native_endian); + if (name_size != 4) continue; + const desc_size = mem.readInt(u32, note_bytes[4..8], native_endian); + const note_type = mem.readInt(u32, note_bytes[8..12], native_endian); + if (note_type != elf.NT_GNU_BUILD_ID) continue; + if (!mem.eql(u8, "GNU\x00", note_bytes[12..16])) continue; + context.build_id = note_bytes[16..][0..desc_size]; + }, + elf.PT_GNU_EH_FRAME => { + context.gnu_eh_frame = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; + }, + else => {}, + } + } + + // Stop the iteration + return error.Found; + } + }.callback)) { + return error.MissingDebugInfo; + } else |err| switch (err) { + error.Found => {}, + } + + if (self.address_map.get(ctx.base_address)) |obj_di| { + return obj_di; + } + + const obj_di = try self.allocator.create(ModuleDebugInfo); + errdefer self.allocator.destroy(obj_di); + + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + if (ctx.gnu_eh_frame) |eh_frame_hdr| { + // This is a special case - pointer offsets inside .eh_frame_hdr + // are encoded relative to its base address, so we must use the + // version that is already memory mapped, and not the one that + // will be mapped separately from the ELF file. + sections[@intFromEnum(Dwarf.Section.Id.eh_frame_hdr)] = .{ + .data = eh_frame_hdr, + .owned = false, + }; + } + + obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, §ions, null); + obj_di.base_address = ctx.base_address; + + // Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding + obj_di.dwarf.scanAllUnwindInfo(self.allocator, ctx.base_address) catch {}; + + try self.address_map.putNoClobber(ctx.base_address, obj_di); + + return obj_di; +} + +fn lookupModuleHaiku(self: *Info, address: usize) !*ModuleDebugInfo { + _ = self; + _ = address; + @panic("TODO implement lookup module for Haiku"); +} + +fn lookupModuleWasm(self: *Info, address: usize) !*ModuleDebugInfo { + _ = self; + _ = address; + @panic("TODO implement lookup module for Wasm"); +} + +pub const ModuleDebugInfo = switch (native_os) { + .macos, .ios, .watchos, .tvos, .visionos => struct { + base_address: usize, + vmaddr_slide: usize, + mapped_memory: []align(mem.page_size) const u8, + symbols: []const MachoSymbol, + strings: [:0]const u8, + ofiles: OFileTable, + + // Backed by the in-memory sections mapped by the loader + unwind_info: ?[]const u8 = null, + eh_frame: ?[]const u8 = null, + + const OFileTable = std.StringHashMap(OFileInfo); + const OFileInfo = struct { + di: Dwarf, + addr_table: std.StringHashMap(u64), + }; + + pub fn deinit(self: *@This(), allocator: Allocator) void { + var it = self.ofiles.iterator(); + while (it.next()) |entry| { + const ofile = entry.value_ptr; + ofile.di.deinit(allocator); + ofile.addr_table.deinit(); + } + self.ofiles.deinit(); + allocator.free(self.symbols); + posix.munmap(self.mapped_memory); + } + + fn loadOFile(self: *@This(), allocator: Allocator, o_file_path: []const u8) !*OFileInfo { + const o_file = try fs.cwd().openFile(o_file_path, .{}); + const mapped_mem = try mapWholeFile(o_file); + + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + if (hdr.magic != std.macho.MH_MAGIC_64) + return error.InvalidDebugInfo; + + var segcmd: ?macho.LoadCommandIterator.LoadCommand = null; + var symtabcmd: ?macho.symtab_command = null; + var it = macho.LoadCommandIterator{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => segcmd = cmd, + .SYMTAB => symtabcmd = cmd.cast(macho.symtab_command).?, + else => {}, + }; + + if (segcmd == null or symtabcmd == null) return error.MissingDebugInfo; + + // Parse symbols + const strtab = @as( + [*]const u8, + @ptrCast(&mapped_mem[symtabcmd.?.stroff]), + )[0 .. symtabcmd.?.strsize - 1 :0]; + const symtab = @as( + [*]const macho.nlist_64, + @ptrCast(@alignCast(&mapped_mem[symtabcmd.?.symoff])), + )[0..symtabcmd.?.nsyms]; + + // TODO handle tentative (common) symbols + var addr_table = std.StringHashMap(u64).init(allocator); + try addr_table.ensureTotalCapacity(@as(u32, @intCast(symtab.len))); + for (symtab) |sym| { + if (sym.n_strx == 0) continue; + if (sym.undf() or sym.tentative() or sym.abs()) continue; + const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); + // TODO is it possible to have a symbol collision? + addr_table.putAssumeCapacityNoClobber(sym_name, sym.n_value); + } + + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + if (self.eh_frame) |eh_frame| sections[@intFromEnum(Dwarf.Section.Id.eh_frame)] = .{ + .data = eh_frame, + .owned = false, + }; + + for (segcmd.?.getSections()) |sect| { + if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; + + var section_index: ?usize = null; + inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; + } + if (section_index == null) continue; + + const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); + sections[section_index.?] = .{ + .data = section_bytes, + .virtual_address = sect.addr, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + if (missing_debug_info) return error.MissingDebugInfo; + + var di = Dwarf{ + .endian = .little, + .sections = sections, + .is_macho = true, + }; + + try Dwarf.open(&di, allocator); + const info = OFileInfo{ + .di = di, + .addr_table = addr_table, + }; + + // Add the debug info to the cache + const result = try self.ofiles.getOrPut(o_file_path); + assert(!result.found_existing); + result.value_ptr.* = info; + + return result.value_ptr; + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + nosuspend { + const result = try self.getOFileInfoForAddress(allocator, address); + if (result.symbol == null) return .{}; + + // Take the symbol name from the N_FUN STAB entry, we're going to + // use it if we fail to find the DWARF infos + const stab_symbol = mem.sliceTo(self.strings[result.symbol.?.strx..], 0); + if (result.o_file_info == null) return .{ .symbol_name = stab_symbol }; + + // Translate again the address, this time into an address inside the + // .o file + const relocated_address_o = result.o_file_info.?.addr_table.get(stab_symbol) orelse return .{ + .symbol_name = "???", + }; + + const addr_off = result.relocated_address - result.symbol.?.addr; + const o_file_di = &result.o_file_info.?.di; + if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { + return SymbolInfo{ + .symbol_name = o_file_di.getSymbolName(relocated_address_o) orelse "???", + .compile_unit_name = compile_unit.die.getAttrString( + o_file_di, + std.dwarf.AT.name, + o_file_di.section(.debug_str), + compile_unit.*, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => "???", + }, + .line_info = o_file_di.getLineNumberInfo( + allocator, + compile_unit.*, + relocated_address_o + addr_off, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, + }; + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + return SymbolInfo{ .symbol_name = stab_symbol }; + }, + else => return err, + } + } + } + + pub fn getOFileInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !struct { + relocated_address: usize, + symbol: ?*const MachoSymbol = null, + o_file_info: ?*OFileInfo = null, + } { + nosuspend { + // Translate the VA into an address into this object + const relocated_address = address - self.vmaddr_slide; + + // Find the .o file where this symbol is defined + const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse return .{ + .relocated_address = relocated_address, + }; + + // Check if its debug infos are already in the cache + const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); + const o_file_info = self.ofiles.getPtr(o_file_path) orelse + (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { + error.FileNotFound, + error.MissingDebugInfo, + error.InvalidDebugInfo, + => return .{ + .relocated_address = relocated_address, + .symbol = symbol, + }, + else => return err, + }); + + return .{ + .relocated_address = relocated_address, + .symbol = symbol, + .o_file_info = o_file_info, + }; + } + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + return if ((try self.getOFileInfoForAddress(allocator, address)).o_file_info) |o_file_info| &o_file_info.di else null; + } + }, + .uefi, .windows => struct { + base_address: usize, + pdb: ?pdb.Pdb = null, + dwarf: ?Dwarf = null, + coff_image_base: u64, + + /// Only used if pdb is non-null + coff_section_headers: []coff.SectionHeader, + + pub fn deinit(self: *@This(), allocator: Allocator) void { + if (self.dwarf) |*dwarf| { + dwarf.deinit(allocator); + } + + if (self.pdb) |*p| { + p.deinit(); + allocator.free(self.coff_section_headers); + } + } + + fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?SymbolInfo { + var coff_section: *align(1) const coff.SectionHeader = undefined; + const mod_index = for (self.pdb.?.sect_contribs) |sect_contrib| { + if (sect_contrib.Section > self.coff_section_headers.len) continue; + // Remember that SectionContribEntry.Section is 1-based. + coff_section = &self.coff_section_headers[sect_contrib.Section - 1]; + + const vaddr_start = coff_section.virtual_address + sect_contrib.Offset; + const vaddr_end = vaddr_start + sect_contrib.Size; + if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { + break sect_contrib.ModuleIndex; + } + } else { + // we have no information to add to the address + return null; + }; + + const module = (try self.pdb.?.getModule(mod_index)) orelse + return error.InvalidDebugInfo; + const obj_basename = fs.path.basename(module.obj_file_name); + + const symbol_name = self.pdb.?.getSymbolName( + module, + relocated_address - coff_section.virtual_address, + ) orelse "???"; + const opt_line_info = try self.pdb.?.getLineNumberInfo( + module, + relocated_address - coff_section.virtual_address, + ); + + return SymbolInfo{ + .symbol_name = symbol_name, + .compile_unit_name = obj_basename, + .line_info = opt_line_info, + }; + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + // Translate the VA into an address into this object + const relocated_address = address - self.base_address; + + if (self.pdb != null) { + if (try self.getSymbolFromPdb(relocated_address)) |symbol| return symbol; + } + + if (self.dwarf) |*dwarf| { + const dwarf_address = relocated_address + self.coff_image_base; + return getSymbolFromDwarf(allocator, dwarf_address, dwarf); + } + + return SymbolInfo{}; + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + _ = allocator; + _ = address; + + return switch (self.debug_data) { + .dwarf => |*dwarf| dwarf, + else => null, + }; + } + }, + .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { + base_address: usize, + dwarf: Dwarf, + mapped_memory: []align(mem.page_size) const u8, + external_mapped_memory: ?[]align(mem.page_size) const u8, + + pub fn deinit(self: *@This(), allocator: Allocator) void { + self.dwarf.deinit(allocator); + posix.munmap(self.mapped_memory); + if (self.external_mapped_memory) |m| posix.munmap(m); + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + // Translate the VA into an address into this object + const relocated_address = address - self.base_address; + return getSymbolFromDwarf(allocator, relocated_address, &self.dwarf); + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + _ = allocator; + _ = address; + return &self.dwarf; + } + }, + .wasi, .emscripten => struct { + pub fn deinit(self: *@This(), allocator: Allocator) void { + _ = self; + _ = allocator; + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + _ = self; + _ = allocator; + _ = address; + return SymbolInfo{}; + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + _ = self; + _ = allocator; + _ = address; + return null; + } + }, + else => Dwarf, +}; + +pub const WindowsModuleInfo = struct { + base_address: usize, + size: u32, + name: []const u8, + handle: windows.HMODULE, + + // Set when the image file needed to be mapped from disk + mapped_file: ?struct { + file: File, + section_handle: windows.HANDLE, + section_view: []const u8, + + pub fn deinit(self: @This()) void { + const process_handle = windows.GetCurrentProcess(); + assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(@ptrCast(self.section_view.ptr))) == .SUCCESS); + windows.CloseHandle(self.section_handle); + self.file.close(); + } + } = null, +}; + +/// This takes ownership of macho_file: users of this function should not close +/// it themselves, even on error. +/// TODO it's weird to take ownership even on error, rework this code. +fn readMachODebugInfo(allocator: Allocator, macho_file: File) !ModuleDebugInfo { + const mapped_mem = try mapWholeFile(macho_file); + + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + if (hdr.magic != macho.MH_MAGIC_64) + return error.InvalidDebugInfo; + + var it = macho.LoadCommandIterator{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { + .SYMTAB => break cmd.cast(macho.symtab_command).?, + else => {}, + } else return error.MissingDebugInfo; + + const syms = @as( + [*]const macho.nlist_64, + @ptrCast(@alignCast(&mapped_mem[symtab.symoff])), + )[0..symtab.nsyms]; + const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; + + const symbols_buf = try allocator.alloc(MachoSymbol, syms.len); + + var ofile: u32 = undefined; + var last_sym: MachoSymbol = undefined; + var symbol_index: usize = 0; + var state: enum { + init, + oso_open, + oso_close, + bnsym, + fun_strx, + fun_size, + ensym, + } = .init; + + for (syms) |*sym| { + if (!sym.stab()) continue; + + // TODO handle globals N_GSYM, and statics N_STSYM + switch (sym.n_type) { + macho.N_OSO => { + switch (state) { + .init, .oso_close => { + state = .oso_open; + ofile = sym.n_strx; + }, + else => return error.InvalidDebugInfo, + } + }, + macho.N_BNSYM => { + switch (state) { + .oso_open, .ensym => { + state = .bnsym; + last_sym = .{ + .strx = 0, + .addr = sym.n_value, + .size = 0, + .ofile = ofile, + }; + }, + else => return error.InvalidDebugInfo, + } + }, + macho.N_FUN => { + switch (state) { + .bnsym => { + state = .fun_strx; + last_sym.strx = sym.n_strx; + }, + .fun_strx => { + state = .fun_size; + last_sym.size = @as(u32, @intCast(sym.n_value)); + }, + else => return error.InvalidDebugInfo, + } + }, + macho.N_ENSYM => { + switch (state) { + .fun_size => { + state = .ensym; + symbols_buf[symbol_index] = last_sym; + symbol_index += 1; + }, + else => return error.InvalidDebugInfo, + } + }, + macho.N_SO => { + switch (state) { + .init, .oso_close => {}, + .oso_open, .ensym => { + state = .oso_close; + }, + else => return error.InvalidDebugInfo, + } + }, + else => {}, + } + } + + switch (state) { + .init => return error.MissingDebugInfo, + .oso_close => {}, + else => return error.InvalidDebugInfo, + } + + const symbols = try allocator.realloc(symbols_buf, symbol_index); + + // Even though lld emits symbols in ascending order, this debug code + // should work for programs linked in any valid way. + // This sort is so that we can binary search later. + mem.sort(MachoSymbol, symbols, {}, MachoSymbol.addressLessThan); + + return ModuleDebugInfo{ + .base_address = undefined, + .vmaddr_slide = undefined, + .mapped_memory = mapped_mem, + .ofiles = ModuleDebugInfo.OFileTable.init(allocator), + .symbols = symbols, + .strings = strings, + }; +} + +fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !ModuleDebugInfo { + nosuspend { + var di = ModuleDebugInfo{ + .base_address = undefined, + .coff_image_base = coff_obj.getImageBase(), + .coff_section_headers = undefined, + }; + + if (coff_obj.getSectionByName(".debug_info")) |_| { + // This coff file has embedded DWARF debug info + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); + + inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { + sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { + break :blk .{ + .data = try coff_obj.getSectionDataAlloc(section_header, allocator), + .virtual_address = section_header.virtual_address, + .owned = true, + }; + } else null; + } + + var dwarf = Dwarf{ + .endian = native_endian, + .sections = sections, + .is_macho = false, + }; + + try Dwarf.open(&dwarf, allocator); + di.dwarf = dwarf; + } + + const raw_path = try coff_obj.getPdbPath() orelse return di; + const path = blk: { + if (fs.path.isAbsolute(raw_path)) { + break :blk raw_path; + } else { + const self_dir = try fs.selfExeDirPathAlloc(allocator); + defer allocator.free(self_dir); + break :blk try fs.path.join(allocator, &.{ self_dir, raw_path }); + } + }; + defer if (path.ptr != raw_path.ptr) allocator.free(path); + + di.pdb = pdb.Pdb.init(allocator, path) catch |err| switch (err) { + error.FileNotFound, error.IsDir => { + if (di.dwarf == null) return error.MissingDebugInfo; + return di; + }, + else => return err, + }; + try di.pdb.?.parseInfoStream(); + try di.pdb.?.parseDbiStream(); + + if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) + return error.InvalidDebugInfo; + + // Only used by the pdb path + di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(allocator); + errdefer allocator.free(di.coff_section_headers); + + return di; + } +} + +/// Reads debug info from an ELF file, or the current binary if none in specified. +/// If the required sections aren't present but a reference to external debug info is, +/// then this this function will recurse to attempt to load the debug sections from +/// an external file. +pub fn readElfDebugInfo( + allocator: Allocator, + elf_filename: ?[]const u8, + build_id: ?[]const u8, + expected_crc: ?u32, + parent_sections: *Dwarf.SectionArray, + parent_mapped_mem: ?[]align(mem.page_size) const u8, +) !ModuleDebugInfo { + nosuspend { + const elf_file = (if (elf_filename) |filename| blk: { + break :blk fs.cwd().openFile(filename, .{}); + } else fs.openSelfExe(.{})) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return err, + }; + + const mapped_mem = try mapWholeFile(elf_file); + if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; + + const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); + if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; + if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; + + const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { + elf.ELFDATA2LSB => .little, + elf.ELFDATA2MSB => .big, + else => return error.InvalidElfEndian, + }; + assert(endian == native_endian); // this is our own debug info + + const shoff = hdr.e_shoff; + const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); + const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[math.cast(usize, str_section_off) orelse return error.Overflow])); + const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; + const shdrs = @as( + [*]const elf.Shdr, + @ptrCast(@alignCast(&mapped_mem[shoff])), + )[0..hdr.e_shnum]; + + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + + // Combine section list. This takes ownership over any owned sections from the parent scope. + for (parent_sections, §ions) |*parent, *section| { + if (parent.*) |*p| { + section.* = p.*; + p.owned = false; + } + } + errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); + + var separate_debug_filename: ?[]const u8 = null; + var separate_debug_crc: ?u32 = null; + + for (shdrs) |*shdr| { + if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; + const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); + + if (mem.eql(u8, name, ".gnu_debuglink")) { + const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); + const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); + const crc_bytes = gnu_debuglink[crc_offset..][0..4]; + separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian); + separate_debug_filename = debug_filename; + continue; + } + + var section_index: ?usize = null; + inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { + if (mem.eql(u8, "." ++ section.name, name)) section_index = i; + } + if (section_index == null) continue; + if (sections[section_index.?] != null) continue; + + const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { + var section_stream = std.io.fixedBufferStream(section_bytes); + var section_reader = section_stream.reader(); + const chdr = section_reader.readStruct(elf.Chdr) catch continue; + if (chdr.ch_type != .ZLIB) continue; + + var zlib_stream = std.compress.zlib.decompressor(section_stream.reader()); + + const decompressed_section = try allocator.alloc(u8, chdr.ch_size); + errdefer allocator.free(decompressed_section); + + const read = zlib_stream.reader().readAll(decompressed_section) catch continue; + assert(read == decompressed_section.len); + + break :blk .{ + .data = decompressed_section, + .virtual_address = shdr.sh_addr, + .owned = true, + }; + } else .{ + .data = section_bytes, + .virtual_address = shdr.sh_addr, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + + // Attempt to load debug info from an external file + // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html + if (missing_debug_info) { + + // Only allow one level of debug info nesting + if (parent_mapped_mem) |_| { + return error.MissingDebugInfo; + } + + const global_debug_directories = [_][]const u8{ + "/usr/lib/debug", + }; + + // /.build-id/<2-character id prefix>/.debug + if (build_id) |id| blk: { + if (id.len < 3) break :blk; + + // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice + const extension = ".debug"; + var id_prefix_buf: [2]u8 = undefined; + var filename_buf: [38 + extension.len]u8 = undefined; + + _ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable; + const filename = std.fmt.bufPrint( + &filename_buf, + "{s}" ++ extension, + .{std.fmt.fmtSliceHexLower(id[1..])}, + ) catch break :blk; + + for (global_debug_directories) |global_directory| { + const path = try fs.path.join(allocator, &.{ global_directory, ".build-id", &id_prefix_buf, filename }); + defer allocator.free(path); + + return readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; + } + } + + // use the path from .gnu_debuglink, in the same search order as gdb + if (separate_debug_filename) |separate_filename| blk: { + if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) return error.MissingDebugInfo; + + // / + if (readElfDebugInfo(allocator, separate_filename, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + + // /.debug/ + { + const path = try fs.path.join(allocator, &.{ ".debug", separate_filename }); + defer allocator.free(path); + + if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + } + + var cwd_buf: [fs.max_path_bytes]u8 = undefined; + const cwd_path = posix.realpath(".", &cwd_buf) catch break :blk; + + // // + for (global_debug_directories) |global_directory| { + const path = try fs.path.join(allocator, &.{ global_directory, cwd_path, separate_filename }); + defer allocator.free(path); + if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + } + } + + return error.MissingDebugInfo; + } + + var di = Dwarf{ + .endian = endian, + .sections = sections, + .is_macho = false, + }; + + try Dwarf.open(&di, allocator); + + return ModuleDebugInfo{ + .base_address = undefined, + .dwarf = di, + .mapped_memory = parent_mapped_mem orelse mapped_mem, + .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, + }; + } +} + +const MachoSymbol = struct { + strx: u32, + addr: u64, + size: u32, + ofile: u32, + + /// Returns the address from the macho file + fn address(self: MachoSymbol) u64 { + return self.addr; + } + + fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { + _ = context; + return lhs.addr < rhs.addr; + } +}; + +/// Takes ownership of file, even on error. +/// TODO it's weird to take ownership even on error, rework this code. +fn mapWholeFile(file: File) ![]align(mem.page_size) const u8 { + nosuspend { + defer file.close(); + + const file_len = math.cast(usize, try file.getEndPos()) orelse math.maxInt(usize); + const mapped_mem = try posix.mmap( + null, + file_len, + posix.PROT.READ, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ); + errdefer posix.munmap(mapped_mem); + + return mapped_mem; + } +} + +fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { + const start = math.cast(usize, offset) orelse return error.Overflow; + const end = start + (math.cast(usize, size) orelse return error.Overflow); + return ptr[start..end]; +} + +pub const SymbolInfo = struct { + symbol_name: []const u8 = "???", + compile_unit_name: []const u8 = "???", + line_info: ?SourceLocation = null, + + pub fn deinit(self: SymbolInfo, allocator: Allocator) void { + if (self.line_info) |li| { + li.deinit(allocator); + } + } +}; + +pub const SourceLocation = struct { + line: u64, + column: u64, + file_name: []const u8, + + pub fn deinit(self: SourceLocation, allocator: Allocator) void { + allocator.free(self.file_name); + } +}; + +fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { + var min: usize = 0; + var max: usize = symbols.len - 1; + while (min < max) { + const mid = min + (max - min) / 2; + const curr = &symbols[mid]; + const next = &symbols[mid + 1]; + if (address >= next.address()) { + min = mid + 1; + } else if (address < curr.address()) { + max = mid; + } else { + return curr; + } + } + + const max_sym = &symbols[symbols.len - 1]; + if (address >= max_sym.address()) + return max_sym; + + return null; +} + +test machoSearchSymbols { + const symbols = [_]MachoSymbol{ + .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, + }; + + try testing.expectEqual(null, machoSearchSymbols(&symbols, 0)); + try testing.expectEqual(null, machoSearchSymbols(&symbols, 99)); + try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 100).?); + try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 150).?); + try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 199).?); + + try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 200).?); + try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 250).?); + try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 299).?); + + try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 300).?); + try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 301).?); + try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 5000).?); +} + +fn getSymbolFromDwarf(allocator: Allocator, address: u64, di: *Dwarf) !SymbolInfo { + if (nosuspend di.findCompileUnit(address)) |compile_unit| { + return SymbolInfo{ + .symbol_name = nosuspend di.getSymbolName(address) orelse "???", + .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => "???", + }, + .line_info = nosuspend di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, + }; + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + return SymbolInfo{}; + }, + else => return err, + } +} diff --git a/lib/std/pdb.zig b/lib/std/pdb.zig index ece1cc63dc92..c96eb81fa9f5 100644 --- a/lib/std/pdb.zig +++ b/lib/std/pdb.zig @@ -706,7 +706,7 @@ pub const Pdb = struct { return null; } - pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !debug.LineInfo { + pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !debug.Info.SourceLocation { std.debug.assert(module.populated); const subsect_info = module.subsect_info; @@ -731,7 +731,7 @@ pub const Pdb = struct { if (address >= frag_vaddr_start and address < frag_vaddr_end) { // There is an unknown number of LineBlockFragmentHeaders (and their accompanying line and column records) - // from now on. We will iterate through them, and eventually find a LineInfo that we're interested in, + // from now on. We will iterate through them, and eventually find a SourceLocation that we're interested in, // breaking out to :subsections. If not, we will make sure to not read anything outside of this subsection. const subsection_end_index = sect_offset + subsect_hdr.Length; @@ -778,7 +778,7 @@ pub const Pdb = struct { const line_num_entry: *align(1) LineNumberEntry = @ptrCast(&subsect_info[found_line_index]); const flags: *align(1) LineNumberEntry.Flags = @ptrCast(&line_num_entry.Flags); - return debug.LineInfo{ + return debug.Info.SourceLocation{ .file_name = source_file_name, .line = flags.Start, .column = column, From ab0253f6620f5b87f06fdbddfc8876263c2a10a2 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 1 Aug 2024 15:39:27 -0700 Subject: [PATCH 2/6] std.debug.Info: rename ModuleDebugInfo to Module --- lib/std/debug/Info.zig | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index e863cb91dee4..9d3074834bb9 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -26,7 +26,7 @@ const Info = @This(); const root = @import("root"); allocator: Allocator, -address_map: std.AutoHashMap(usize, *ModuleDebugInfo), +address_map: std.AutoHashMap(usize, *Module), modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModuleInfo) else void, pub const OpenSelfError = error{ @@ -58,9 +58,9 @@ pub fn openSelf(allocator: Allocator) OpenSelfError!Info { } pub fn init(allocator: Allocator) !Info { - var debug_info = Info{ + var debug_info: Info = .{ .allocator = allocator, - .address_map = std.AutoHashMap(usize, *ModuleDebugInfo).init(allocator), + .address_map = std.AutoHashMap(usize, *Module).init(allocator), .modules = if (native_os == .windows) .{} else {}, }; @@ -118,7 +118,7 @@ pub fn deinit(self: *Info) void { } } -pub fn getModuleForAddress(self: *Info, address: usize) !*ModuleDebugInfo { +pub fn getModuleForAddress(self: *Info, address: usize) !*Module { if (comptime builtin.target.isDarwin()) { return self.lookupModuleDyld(address); } else if (native_os == .windows) { @@ -149,7 +149,7 @@ pub fn getModuleNameForAddress(self: *Info, address: usize) ?[]const u8 { } } -fn lookupModuleDyld(self: *Info, address: usize) !*ModuleDebugInfo { +fn lookupModuleDyld(self: *Info, address: usize) !*Module { const image_count = std.c._dyld_image_count(); var i: u32 = 0; @@ -189,7 +189,7 @@ fn lookupModuleDyld(self: *Info, address: usize) !*ModuleDebugInfo { } } - const obj_di = try self.allocator.create(ModuleDebugInfo); + const obj_di = try self.allocator.create(Module); errdefer self.allocator.destroy(obj_di); const macho_path = mem.sliceTo(std.c._dyld_get_image_name(i), 0); @@ -253,14 +253,14 @@ fn lookupModuleNameDyld(self: *Info, address: usize) ?[]const u8 { return null; } -fn lookupModuleWin32(self: *Info, address: usize) !*ModuleDebugInfo { +fn lookupModuleWin32(self: *Info, address: usize) !*Module { for (self.modules.items) |*module| { if (address >= module.base_address and address < module.base_address + module.size) { if (self.address_map.get(module.base_address)) |obj_di| { return obj_di; } - const obj_di = try self.allocator.create(ModuleDebugInfo); + const obj_di = try self.allocator.create(Module); errdefer self.allocator.destroy(obj_di); const mapped_module = @as([*]const u8, @ptrFromInt(module.base_address))[0..module.size]; @@ -390,7 +390,7 @@ fn lookupModuleNameDl(self: *Info, address: usize) ?[]const u8 { return null; } -fn lookupModuleDl(self: *Info, address: usize) !*ModuleDebugInfo { +fn lookupModuleDl(self: *Info, address: usize) !*Module { var ctx: struct { // Input address: usize, @@ -458,7 +458,7 @@ fn lookupModuleDl(self: *Info, address: usize) !*ModuleDebugInfo { return obj_di; } - const obj_di = try self.allocator.create(ModuleDebugInfo); + const obj_di = try self.allocator.create(Module); errdefer self.allocator.destroy(obj_di); var sections: Dwarf.SectionArray = Dwarf.null_section_array; @@ -484,19 +484,19 @@ fn lookupModuleDl(self: *Info, address: usize) !*ModuleDebugInfo { return obj_di; } -fn lookupModuleHaiku(self: *Info, address: usize) !*ModuleDebugInfo { +fn lookupModuleHaiku(self: *Info, address: usize) !*Module { _ = self; _ = address; @panic("TODO implement lookup module for Haiku"); } -fn lookupModuleWasm(self: *Info, address: usize) !*ModuleDebugInfo { +fn lookupModuleWasm(self: *Info, address: usize) !*Module { _ = self; _ = address; @panic("TODO implement lookup module for Wasm"); } -pub const ModuleDebugInfo = switch (native_os) { +pub const Module = switch (native_os) { .macos, .ios, .watchos, .tvos, .visionos => struct { base_address: usize, vmaddr_slide: usize, @@ -861,7 +861,7 @@ pub const WindowsModuleInfo = struct { /// This takes ownership of macho_file: users of this function should not close /// it themselves, even on error. /// TODO it's weird to take ownership even on error, rework this code. -fn readMachODebugInfo(allocator: Allocator, macho_file: File) !ModuleDebugInfo { +fn readMachODebugInfo(allocator: Allocator, macho_file: File) !Module { const mapped_mem = try mapWholeFile(macho_file); const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); @@ -975,19 +975,19 @@ fn readMachODebugInfo(allocator: Allocator, macho_file: File) !ModuleDebugInfo { // This sort is so that we can binary search later. mem.sort(MachoSymbol, symbols, {}, MachoSymbol.addressLessThan); - return ModuleDebugInfo{ + return .{ .base_address = undefined, .vmaddr_slide = undefined, .mapped_memory = mapped_mem, - .ofiles = ModuleDebugInfo.OFileTable.init(allocator), + .ofiles = Module.OFileTable.init(allocator), .symbols = symbols, .strings = strings, }; } -fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !ModuleDebugInfo { +fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module { nosuspend { - var di = ModuleDebugInfo{ + var di: Module = .{ .base_address = undefined, .coff_image_base = coff_obj.getImageBase(), .coff_section_headers = undefined, @@ -1062,7 +1062,7 @@ pub fn readElfDebugInfo( expected_crc: ?u32, parent_sections: *Dwarf.SectionArray, parent_mapped_mem: ?[]align(mem.page_size) const u8, -) !ModuleDebugInfo { +) !Module { nosuspend { const elf_file = (if (elf_filename) |filename| blk: { break :blk fs.cwd().openFile(filename, .{}); @@ -1236,7 +1236,7 @@ pub fn readElfDebugInfo( try Dwarf.open(&di, allocator); - return ModuleDebugInfo{ + return .{ .base_address = undefined, .dwarf = di, .mapped_memory = parent_mapped_mem orelse mapped_mem, From 290966c2497dc9d212bf9d4bd0fecee4988091a5 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 1 Aug 2024 16:31:03 -0700 Subject: [PATCH 3/6] std.debug: rename Info to SelfInfo This code has the hard-coded goal of supporting the executable's own debug information and makes design choices along that goal, such as memory-mapping the inputs, using dl_iterate_phdr, and doing conditional compilation on the host target. A more general-purpose implementation of debug information may be able to share code with this, but there are some fundamental incompatibilities. For example, the "SelfInfo" implementation wants to avoid bloating the binary with PDB on POSIX systems, and likewise DWARF on Windows systems, while a general-purpose implementation needs to support both PDB and DWARF from the same binary. It might, for example, inspect the debug information from a cross-compiled binary. `SourceLocation` now lives at `std.debug.SourceLocation` and is documented. Deprecate `std.debug.runtime_safety` because it returns the optimization mode of the standard library, when the caller probably wants to use the optimization mode of their own module. `std.pdb.Pdb` is moved to `std.debug.Pdb`, mirroring the recent extraction of `std.debug.Dwarf` from `std.dwarf`. I have no idea why we have both Module (with a Windows-specific definition) and WindowsModule. I left some passive aggressive doc comments to express my frustration. --- lib/std/debug.zig | 57 ++- lib/std/debug/Dwarf.zig | 6 +- lib/std/debug/Pdb.zig | 591 ++++++++++++++++++++++ lib/std/debug/{Info.zig => SelfInfo.zig} | 66 ++- lib/std/pdb.zig | 607 +---------------------- 5 files changed, 668 insertions(+), 659 deletions(-) create mode 100644 lib/std/debug/Pdb.zig rename lib/std/debug/{Info.zig => SelfInfo.zig} (96%) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 6dac92188e7e..2753a0f52f2e 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -6,11 +6,6 @@ const io = std.io; const posix = std.posix; const fs = std.fs; const testing = std.testing; -const elf = std.elf; -const DW = std.dwarf; -const macho = std.macho; -const coff = std.coff; -const pdb = std.pdb; const root = @import("root"); const File = std.fs.File; const windows = std.os.windows; @@ -19,8 +14,22 @@ const native_os = builtin.os.tag; const native_endian = native_arch.endian(); pub const Dwarf = @import("debug/Dwarf.zig"); -pub const Info = @import("debug/Info.zig"); +pub const Pdb = @import("debug/Pdb.zig"); +pub const SelfInfo = @import("debug/SelfInfo.zig"); + +/// Unresolved source locations can be represented with a single `usize` that +/// corresponds to a virtual memory address of the program counter. Combined +/// with debug information, those values can be converted into a resolved +/// source location, including file, line, and column. +pub const SourceLocation = struct { + line: u64, + column: u64, + file_name: []const u8, +}; +/// Deprecated because it returns the optimization mode of the standard +/// library, when the caller probably wants to use the optimization mode of +/// their own module. pub const runtime_safety = switch (builtin.mode) { .Debug, .ReleaseSafe => true, .ReleaseFast, .ReleaseSmall => false, @@ -72,13 +81,13 @@ pub fn getStderrMutex() *std.Thread.Mutex { } /// TODO multithreaded awareness -var self_debug_info: ?Info = null; +var self_debug_info: ?SelfInfo = null; -pub fn getSelfDebugInfo() !*Info { +pub fn getSelfDebugInfo() !*SelfInfo { if (self_debug_info) |*info| { return info; } else { - self_debug_info = try Info.openSelf(getDebugInfoAllocator()); + self_debug_info = try SelfInfo.openSelf(getDebugInfoAllocator()); return &self_debug_info.?; } } @@ -316,7 +325,7 @@ pub fn captureStackTrace(first_address: ?usize, stack_trace: *std.builtin.StackT stack_trace.index = slice.len; } else { // TODO: This should use the DWARF unwinder if .eh_frame_hdr is available (so that full debug info parsing isn't required). - // A new path for loading Info needs to be created which will only attempt to parse in-memory sections, because + // A new path for loading SelfInfo needs to be created which will only attempt to parse in-memory sections, because // stopping to load other debug info (ie. source line info) from disk here is not required for unwinding. var it = StackIterator.init(first_address, null); defer it.deinit(); @@ -494,7 +503,7 @@ pub fn writeStackTrace( stack_trace: std.builtin.StackTrace, out_stream: anytype, allocator: mem.Allocator, - debug_info: *Info, + debug_info: *SelfInfo, tty_config: io.tty.Config, ) !void { _ = allocator; @@ -531,11 +540,11 @@ pub const StackIterator = struct { fp: usize, ma: MemoryAccessor = MemoryAccessor.init, - // When Info and a register context is available, this iterator can unwind + // When SelfInfo and a register context is available, this iterator can unwind // stacks with frames that don't use a frame pointer (ie. -fomit-frame-pointer), // using DWARF and MachO unwind info. unwind_state: if (have_ucontext) ?struct { - debug_info: *Info, + debug_info: *SelfInfo, dwarf_context: Dwarf.UnwindContext, last_error: ?UnwindError = null, failed: bool = false, @@ -560,7 +569,7 @@ pub const StackIterator = struct { }; } - pub fn initWithContext(first_address: ?usize, debug_info: *Info, context: *const posix.ucontext_t) !StackIterator { + pub fn initWithContext(first_address: ?usize, debug_info: *SelfInfo, context: *const posix.ucontext_t) !StackIterator { // The implementation of DWARF unwinding on aarch64-macos is not complete. However, Apple mandates that // the frame pointer register is always used, so on this platform we can safely use the FP-based unwinder. if (comptime builtin.target.isDarwin() and native_arch == .aarch64) { @@ -820,7 +829,7 @@ const have_msync = switch (native_os) { pub fn writeCurrentStackTrace( out_stream: anytype, - debug_info: *Info, + debug_info: *SelfInfo, tty_config: io.tty.Config, start_addr: ?usize, ) !void { @@ -906,7 +915,7 @@ pub noinline fn walkStackWindows(addresses: []usize, existing_context: ?*const w pub fn writeStackTraceWindows( out_stream: anytype, - debug_info: *Info, + debug_info: *SelfInfo, tty_config: io.tty.Config, context: *const windows.CONTEXT, start_addr: ?usize, @@ -925,7 +934,7 @@ pub fn writeStackTraceWindows( } } -fn printUnknownSource(debug_info: *Info, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { +fn printUnknownSource(debug_info: *SelfInfo, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { const module_name = debug_info.getModuleNameForAddress(address); return printLineInfo( out_stream, @@ -938,14 +947,14 @@ fn printUnknownSource(debug_info: *Info, out_stream: anytype, address: usize, tt ); } -fn printLastUnwindError(it: *StackIterator, debug_info: *Info, out_stream: anytype, tty_config: io.tty.Config) void { +fn printLastUnwindError(it: *StackIterator, debug_info: *SelfInfo, out_stream: anytype, tty_config: io.tty.Config) void { if (!have_ucontext) return; if (it.getLastError()) |unwind_error| { printUnwindError(debug_info, out_stream, unwind_error.address, unwind_error.err, tty_config) catch {}; } } -fn printUnwindError(debug_info: *Info, out_stream: anytype, address: usize, err: UnwindError, tty_config: io.tty.Config) !void { +fn printUnwindError(debug_info: *SelfInfo, out_stream: anytype, address: usize, err: UnwindError, tty_config: io.tty.Config) !void { const module_name = debug_info.getModuleNameForAddress(address) orelse "???"; try tty_config.setColor(out_stream, .dim); if (err == error.MissingDebugInfo) { @@ -956,7 +965,7 @@ fn printUnwindError(debug_info: *Info, out_stream: anytype, address: usize, err: try tty_config.setColor(out_stream, .reset); } -pub fn printSourceAtAddress(debug_info: *Info, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { +pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { const module = debug_info.getModuleForAddress(address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config), else => return err, @@ -981,7 +990,7 @@ pub fn printSourceAtAddress(debug_info: *Info, out_stream: anytype, address: usi fn printLineInfo( out_stream: anytype, - line_info: ?Info.SourceLocation, + line_info: ?SourceLocation, address: usize, symbol_name: []const u8, compile_unit_name: []const u8, @@ -1027,7 +1036,7 @@ fn printLineInfo( } } -fn printLineFromFileAnyOs(out_stream: anytype, line_info: Info.SourceLocation) !void { +fn printLineFromFileAnyOs(out_stream: anytype, line_info: SourceLocation) !void { // Need this to always block even in async I/O mode, because this could potentially // be called from e.g. the event loop code crashing. var f = try fs.cwd().openFile(line_info.file_name, .{}); @@ -1093,7 +1102,7 @@ test printLineFromFileAnyOs { var test_dir = std.testing.tmpDir(.{}); defer test_dir.cleanup(); - // Relies on testing.tmpDir internals which is not ideal, but Info.SourceLocation requires paths. + // Relies on testing.tmpDir internals which is not ideal, but SourceLocation requires paths. const test_dir_path = try join(allocator, &.{ ".zig-cache", "tmp", test_dir.sub_path[0..] }); defer allocator.free(test_dir_path); @@ -1439,7 +1448,7 @@ test "manage resources correctly" { } const writer = std.io.null_writer; - var di = try Info.openSelf(testing.allocator); + var di = try SelfInfo.openSelf(testing.allocator); defer di.deinit(); try printSourceAtAddress(&di, writer, showMyTrace(), io.tty.detectConfig(std.io.getStdErr())); } diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 353c097471ab..4fff2562b243 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -1353,7 +1353,7 @@ pub fn getLineNumberInfo( allocator: Allocator, compile_unit: CompileUnit, target_address: u64, -) !std.debug.Info.SourceLocation { +) !std.debug.SourceLocation { const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); @@ -2084,7 +2084,7 @@ const LineNumberProgram = struct { self: *LineNumberProgram, allocator: Allocator, file_entries: []const FileEntry, - ) !?std.debug.Info.SourceLocation { + ) !?std.debug.SourceLocation { if (self.prev_valid and self.target_address >= self.prev_address and self.target_address < self.address) @@ -2104,7 +2104,7 @@ const LineNumberProgram = struct { dir_name, file_entry.path, }); - return std.debug.Info.SourceLocation{ + return std.debug.SourceLocation{ .line = if (self.prev_line >= 0) @as(u64, @intCast(self.prev_line)) else 0, .column = self.prev_column, .file_name = file_name, diff --git a/lib/std/debug/Pdb.zig b/lib/std/debug/Pdb.zig new file mode 100644 index 000000000000..bdcc108c1dc0 --- /dev/null +++ b/lib/std/debug/Pdb.zig @@ -0,0 +1,591 @@ +const std = @import("../std.zig"); +const File = std.fs.File; +const Allocator = std.mem.Allocator; +const pdb = std.pdb; + +const Pdb = @This(); + +in_file: File, +msf: Msf, +allocator: Allocator, +string_table: ?*MsfStream, +dbi: ?*MsfStream, +modules: []Module, +sect_contribs: []pdb.SectionContribEntry, +guid: [16]u8, +age: u32, + +pub const Module = struct { + mod_info: pdb.ModInfo, + module_name: []u8, + obj_file_name: []u8, + // The fields below are filled on demand. + populated: bool, + symbols: []u8, + subsect_info: []u8, + checksum_offset: ?usize, + + pub fn deinit(self: *Module, allocator: Allocator) void { + allocator.free(self.module_name); + allocator.free(self.obj_file_name); + if (self.populated) { + allocator.free(self.symbols); + allocator.free(self.subsect_info); + } + } +}; + +pub fn init(allocator: Allocator, path: []const u8) !Pdb { + const file = try std.fs.cwd().openFile(path, .{}); + errdefer file.close(); + + return .{ + .in_file = file, + .allocator = allocator, + .string_table = null, + .dbi = null, + .msf = try Msf.init(allocator, file), + .modules = &[_]Module{}, + .sect_contribs = &[_]pdb.SectionContribEntry{}, + .guid = undefined, + .age = undefined, + }; +} + +pub fn deinit(self: *Pdb) void { + self.in_file.close(); + self.msf.deinit(self.allocator); + for (self.modules) |*module| { + module.deinit(self.allocator); + } + self.allocator.free(self.modules); + self.allocator.free(self.sect_contribs); +} + +pub fn parseDbiStream(self: *Pdb) !void { + var stream = self.getStream(pdb.StreamType.Dbi) orelse + return error.InvalidDebugInfo; + const reader = stream.reader(); + + const header = try reader.readStruct(std.pdb.DbiStreamHeader); + if (header.VersionHeader != 19990903) // V70, only value observed by LLVM team + return error.UnknownPDBVersion; + // if (header.Age != age) + // return error.UnmatchingPDB; + + const mod_info_size = header.ModInfoSize; + const section_contrib_size = header.SectionContributionSize; + + var modules = std.ArrayList(Module).init(self.allocator); + errdefer modules.deinit(); + + // Module Info Substream + var mod_info_offset: usize = 0; + while (mod_info_offset != mod_info_size) { + const mod_info = try reader.readStruct(pdb.ModInfo); + var this_record_len: usize = @sizeOf(pdb.ModInfo); + + const module_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); + errdefer self.allocator.free(module_name); + this_record_len += module_name.len + 1; + + const obj_file_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); + errdefer self.allocator.free(obj_file_name); + this_record_len += obj_file_name.len + 1; + + if (this_record_len % 4 != 0) { + const round_to_next_4 = (this_record_len | 0x3) + 1; + const march_forward_bytes = round_to_next_4 - this_record_len; + try stream.seekBy(@as(isize, @intCast(march_forward_bytes))); + this_record_len += march_forward_bytes; + } + + try modules.append(Module{ + .mod_info = mod_info, + .module_name = module_name, + .obj_file_name = obj_file_name, + + .populated = false, + .symbols = undefined, + .subsect_info = undefined, + .checksum_offset = null, + }); + + mod_info_offset += this_record_len; + if (mod_info_offset > mod_info_size) + return error.InvalidDebugInfo; + } + + // Section Contribution Substream + var sect_contribs = std.ArrayList(pdb.SectionContribEntry).init(self.allocator); + errdefer sect_contribs.deinit(); + + var sect_cont_offset: usize = 0; + if (section_contrib_size != 0) { + const version = reader.readEnum(std.pdb.SectionContrSubstreamVersion, .little) catch |err| switch (err) { + error.InvalidValue => return error.InvalidDebugInfo, + else => |e| return e, + }; + _ = version; + sect_cont_offset += @sizeOf(u32); + } + while (sect_cont_offset != section_contrib_size) { + const entry = try sect_contribs.addOne(); + entry.* = try reader.readStruct(pdb.SectionContribEntry); + sect_cont_offset += @sizeOf(pdb.SectionContribEntry); + + if (sect_cont_offset > section_contrib_size) + return error.InvalidDebugInfo; + } + + self.modules = try modules.toOwnedSlice(); + self.sect_contribs = try sect_contribs.toOwnedSlice(); +} + +pub fn parseInfoStream(self: *Pdb) !void { + var stream = self.getStream(pdb.StreamType.Pdb) orelse + return error.InvalidDebugInfo; + const reader = stream.reader(); + + // Parse the InfoStreamHeader. + const version = try reader.readInt(u32, .little); + const signature = try reader.readInt(u32, .little); + _ = signature; + const age = try reader.readInt(u32, .little); + const guid = try reader.readBytesNoEof(16); + + if (version != 20000404) // VC70, only value observed by LLVM team + return error.UnknownPDBVersion; + + self.guid = guid; + self.age = age; + + // Find the string table. + const string_table_index = str_tab_index: { + const name_bytes_len = try reader.readInt(u32, .little); + const name_bytes = try self.allocator.alloc(u8, name_bytes_len); + defer self.allocator.free(name_bytes); + try reader.readNoEof(name_bytes); + + const HashTableHeader = extern struct { + Size: u32, + Capacity: u32, + + fn maxLoad(cap: u32) u32 { + return cap * 2 / 3 + 1; + } + }; + const hash_tbl_hdr = try reader.readStruct(HashTableHeader); + if (hash_tbl_hdr.Capacity == 0) + return error.InvalidDebugInfo; + + if (hash_tbl_hdr.Size > HashTableHeader.maxLoad(hash_tbl_hdr.Capacity)) + return error.InvalidDebugInfo; + + const present = try readSparseBitVector(&reader, self.allocator); + defer self.allocator.free(present); + if (present.len != hash_tbl_hdr.Size) + return error.InvalidDebugInfo; + const deleted = try readSparseBitVector(&reader, self.allocator); + defer self.allocator.free(deleted); + + for (present) |_| { + const name_offset = try reader.readInt(u32, .little); + const name_index = try reader.readInt(u32, .little); + if (name_offset > name_bytes.len) + return error.InvalidDebugInfo; + const name = std.mem.sliceTo(name_bytes[name_offset..], 0); + if (std.mem.eql(u8, name, "/names")) { + break :str_tab_index name_index; + } + } + return error.MissingDebugInfo; + }; + + self.string_table = self.getStreamById(string_table_index) orelse + return error.MissingDebugInfo; +} + +pub fn getSymbolName(self: *Pdb, module: *Module, address: u64) ?[]const u8 { + _ = self; + std.debug.assert(module.populated); + + var symbol_i: usize = 0; + while (symbol_i != module.symbols.len) { + const prefix = @as(*align(1) pdb.RecordPrefix, @ptrCast(&module.symbols[symbol_i])); + if (prefix.RecordLen < 2) + return null; + switch (prefix.RecordKind) { + .S_LPROC32, .S_GPROC32 => { + const proc_sym = @as(*align(1) pdb.ProcSym, @ptrCast(&module.symbols[symbol_i + @sizeOf(pdb.RecordPrefix)])); + if (address >= proc_sym.CodeOffset and address < proc_sym.CodeOffset + proc_sym.CodeSize) { + return std.mem.sliceTo(@as([*:0]u8, @ptrCast(&proc_sym.Name[0])), 0); + } + }, + else => {}, + } + symbol_i += prefix.RecordLen + @sizeOf(u16); + } + + return null; +} + +pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !std.debug.SourceLocation { + std.debug.assert(module.populated); + const subsect_info = module.subsect_info; + + var sect_offset: usize = 0; + var skip_len: usize = undefined; + const checksum_offset = module.checksum_offset orelse return error.MissingDebugInfo; + while (sect_offset != subsect_info.len) : (sect_offset += skip_len) { + const subsect_hdr = @as(*align(1) pdb.DebugSubsectionHeader, @ptrCast(&subsect_info[sect_offset])); + skip_len = subsect_hdr.Length; + sect_offset += @sizeOf(pdb.DebugSubsectionHeader); + + switch (subsect_hdr.Kind) { + .Lines => { + var line_index = sect_offset; + + const line_hdr = @as(*align(1) pdb.LineFragmentHeader, @ptrCast(&subsect_info[line_index])); + if (line_hdr.RelocSegment == 0) + return error.MissingDebugInfo; + line_index += @sizeOf(pdb.LineFragmentHeader); + const frag_vaddr_start = line_hdr.RelocOffset; + const frag_vaddr_end = frag_vaddr_start + line_hdr.CodeSize; + + if (address >= frag_vaddr_start and address < frag_vaddr_end) { + // There is an unknown number of LineBlockFragmentHeaders (and their accompanying line and column records) + // from now on. We will iterate through them, and eventually find a SourceLocation that we're interested in, + // breaking out to :subsections. If not, we will make sure to not read anything outside of this subsection. + const subsection_end_index = sect_offset + subsect_hdr.Length; + + while (line_index < subsection_end_index) { + const block_hdr = @as(*align(1) pdb.LineBlockFragmentHeader, @ptrCast(&subsect_info[line_index])); + line_index += @sizeOf(pdb.LineBlockFragmentHeader); + const start_line_index = line_index; + + const has_column = line_hdr.Flags.LF_HaveColumns; + + // All line entries are stored inside their line block by ascending start address. + // Heuristic: we want to find the last line entry + // that has a vaddr_start <= address. + // This is done with a simple linear search. + var line_i: u32 = 0; + while (line_i < block_hdr.NumLines) : (line_i += 1) { + const line_num_entry = @as(*align(1) pdb.LineNumberEntry, @ptrCast(&subsect_info[line_index])); + line_index += @sizeOf(pdb.LineNumberEntry); + + const vaddr_start = frag_vaddr_start + line_num_entry.Offset; + if (address < vaddr_start) { + break; + } + } + + // line_i == 0 would mean that no matching pdb.LineNumberEntry was found. + if (line_i > 0) { + const subsect_index = checksum_offset + block_hdr.NameIndex; + const chksum_hdr = @as(*align(1) pdb.FileChecksumEntryHeader, @ptrCast(&module.subsect_info[subsect_index])); + const strtab_offset = @sizeOf(pdb.StringTableHeader) + chksum_hdr.FileNameOffset; + try self.string_table.?.seekTo(strtab_offset); + const source_file_name = try self.string_table.?.reader().readUntilDelimiterAlloc(self.allocator, 0, 1024); + + const line_entry_idx = line_i - 1; + + const column = if (has_column) blk: { + const start_col_index = start_line_index + @sizeOf(pdb.LineNumberEntry) * block_hdr.NumLines; + const col_index = start_col_index + @sizeOf(pdb.ColumnNumberEntry) * line_entry_idx; + const col_num_entry = @as(*align(1) pdb.ColumnNumberEntry, @ptrCast(&subsect_info[col_index])); + break :blk col_num_entry.StartColumn; + } else 0; + + const found_line_index = start_line_index + line_entry_idx * @sizeOf(pdb.LineNumberEntry); + const line_num_entry: *align(1) pdb.LineNumberEntry = @ptrCast(&subsect_info[found_line_index]); + const flags: *align(1) pdb.LineNumberEntry.Flags = @ptrCast(&line_num_entry.Flags); + + return .{ + .file_name = source_file_name, + .line = flags.Start, + .column = column, + }; + } + } + + // Checking that we are not reading garbage after the (possibly) multiple block fragments. + if (line_index != subsection_end_index) { + return error.InvalidDebugInfo; + } + } + }, + else => {}, + } + + if (sect_offset > subsect_info.len) + return error.InvalidDebugInfo; + } + + return error.MissingDebugInfo; +} + +pub fn getModule(self: *Pdb, index: usize) !?*Module { + if (index >= self.modules.len) + return null; + + const mod = &self.modules[index]; + if (mod.populated) + return mod; + + // At most one can be non-zero. + if (mod.mod_info.C11ByteSize != 0 and mod.mod_info.C13ByteSize != 0) + return error.InvalidDebugInfo; + if (mod.mod_info.C13ByteSize == 0) + return error.InvalidDebugInfo; + + const stream = self.getStreamById(mod.mod_info.ModuleSymStream) orelse + return error.MissingDebugInfo; + const reader = stream.reader(); + + const signature = try reader.readInt(u32, .little); + if (signature != 4) + return error.InvalidDebugInfo; + + mod.symbols = try self.allocator.alloc(u8, mod.mod_info.SymByteSize - 4); + errdefer self.allocator.free(mod.symbols); + try reader.readNoEof(mod.symbols); + + mod.subsect_info = try self.allocator.alloc(u8, mod.mod_info.C13ByteSize); + errdefer self.allocator.free(mod.subsect_info); + try reader.readNoEof(mod.subsect_info); + + var sect_offset: usize = 0; + var skip_len: usize = undefined; + while (sect_offset != mod.subsect_info.len) : (sect_offset += skip_len) { + const subsect_hdr = @as(*align(1) pdb.DebugSubsectionHeader, @ptrCast(&mod.subsect_info[sect_offset])); + skip_len = subsect_hdr.Length; + sect_offset += @sizeOf(pdb.DebugSubsectionHeader); + + switch (subsect_hdr.Kind) { + .FileChecksums => { + mod.checksum_offset = sect_offset; + break; + }, + else => {}, + } + + if (sect_offset > mod.subsect_info.len) + return error.InvalidDebugInfo; + } + + mod.populated = true; + return mod; +} + +pub fn getStreamById(self: *Pdb, id: u32) ?*MsfStream { + if (id >= self.msf.streams.len) + return null; + return &self.msf.streams[id]; +} + +pub fn getStream(self: *Pdb, stream: pdb.StreamType) ?*MsfStream { + const id = @intFromEnum(stream); + return self.getStreamById(id); +} + +/// https://llvm.org/docs/PDB/MsfFile.html +const Msf = struct { + directory: MsfStream, + streams: []MsfStream, + + fn init(allocator: Allocator, file: File) !Msf { + const in = file.reader(); + + const superblock = try in.readStruct(pdb.SuperBlock); + + // Sanity checks + if (!std.mem.eql(u8, &superblock.FileMagic, pdb.SuperBlock.file_magic)) + return error.InvalidDebugInfo; + if (superblock.FreeBlockMapBlock != 1 and superblock.FreeBlockMapBlock != 2) + return error.InvalidDebugInfo; + const file_len = try file.getEndPos(); + if (superblock.NumBlocks * superblock.BlockSize != file_len) + return error.InvalidDebugInfo; + switch (superblock.BlockSize) { + // llvm only supports 4096 but we can handle any of these values + 512, 1024, 2048, 4096 => {}, + else => return error.InvalidDebugInfo, + } + + const dir_block_count = blockCountFromSize(superblock.NumDirectoryBytes, superblock.BlockSize); + if (dir_block_count > superblock.BlockSize / @sizeOf(u32)) + return error.UnhandledBigDirectoryStream; // cf. BlockMapAddr comment. + + try file.seekTo(superblock.BlockSize * superblock.BlockMapAddr); + const dir_blocks = try allocator.alloc(u32, dir_block_count); + for (dir_blocks) |*b| { + b.* = try in.readInt(u32, .little); + } + var directory = MsfStream.init( + superblock.BlockSize, + file, + dir_blocks, + ); + + const begin = directory.pos; + const stream_count = try directory.reader().readInt(u32, .little); + const stream_sizes = try allocator.alloc(u32, stream_count); + defer allocator.free(stream_sizes); + + // Microsoft's implementation uses @as(u32, -1) for inexistent streams. + // These streams are not used, but still participate in the file + // and must be taken into account when resolving stream indices. + const Nil = 0xFFFFFFFF; + for (stream_sizes) |*s| { + const size = try directory.reader().readInt(u32, .little); + s.* = if (size == Nil) 0 else blockCountFromSize(size, superblock.BlockSize); + } + + const streams = try allocator.alloc(MsfStream, stream_count); + for (streams, 0..) |*stream, i| { + const size = stream_sizes[i]; + if (size == 0) { + stream.* = MsfStream{ + .blocks = &[_]u32{}, + }; + } else { + var blocks = try allocator.alloc(u32, size); + var j: u32 = 0; + while (j < size) : (j += 1) { + const block_id = try directory.reader().readInt(u32, .little); + const n = (block_id % superblock.BlockSize); + // 0 is for pdb.SuperBlock, 1 and 2 for FPMs. + if (block_id == 0 or n == 1 or n == 2 or block_id * superblock.BlockSize > file_len) + return error.InvalidBlockIndex; + blocks[j] = block_id; + } + + stream.* = MsfStream.init( + superblock.BlockSize, + file, + blocks, + ); + } + } + + const end = directory.pos; + if (end - begin != superblock.NumDirectoryBytes) + return error.InvalidStreamDirectory; + + return Msf{ + .directory = directory, + .streams = streams, + }; + } + + fn deinit(self: *Msf, allocator: Allocator) void { + allocator.free(self.directory.blocks); + for (self.streams) |*stream| { + allocator.free(stream.blocks); + } + allocator.free(self.streams); + } +}; + +const MsfStream = struct { + in_file: File = undefined, + pos: u64 = undefined, + blocks: []u32 = undefined, + block_size: u32 = undefined, + + pub const Error = @typeInfo(@typeInfo(@TypeOf(read)).Fn.return_type.?).ErrorUnion.error_set; + + fn init(block_size: u32, file: File, blocks: []u32) MsfStream { + const stream = MsfStream{ + .in_file = file, + .pos = 0, + .blocks = blocks, + .block_size = block_size, + }; + + return stream; + } + + fn read(self: *MsfStream, buffer: []u8) !usize { + var block_id = @as(usize, @intCast(self.pos / self.block_size)); + if (block_id >= self.blocks.len) return 0; // End of Stream + var block = self.blocks[block_id]; + var offset = self.pos % self.block_size; + + try self.in_file.seekTo(block * self.block_size + offset); + const in = self.in_file.reader(); + + var size: usize = 0; + var rem_buffer = buffer; + while (size < buffer.len) { + const size_to_read = @min(self.block_size - offset, rem_buffer.len); + size += try in.read(rem_buffer[0..size_to_read]); + rem_buffer = buffer[size..]; + offset += size_to_read; + + // If we're at the end of a block, go to the next one. + if (offset == self.block_size) { + offset = 0; + block_id += 1; + if (block_id >= self.blocks.len) break; // End of Stream + block = self.blocks[block_id]; + try self.in_file.seekTo(block * self.block_size); + } + } + + self.pos += buffer.len; + return buffer.len; + } + + pub fn seekBy(self: *MsfStream, len: i64) !void { + self.pos = @as(u64, @intCast(@as(i64, @intCast(self.pos)) + len)); + if (self.pos >= self.blocks.len * self.block_size) + return error.EOF; + } + + pub fn seekTo(self: *MsfStream, len: u64) !void { + self.pos = len; + if (self.pos >= self.blocks.len * self.block_size) + return error.EOF; + } + + fn getSize(self: *const MsfStream) u64 { + return self.blocks.len * self.block_size; + } + + fn getFilePos(self: MsfStream) u64 { + const block_id = self.pos / self.block_size; + const block = self.blocks[block_id]; + const offset = self.pos % self.block_size; + + return block * self.block_size + offset; + } + + pub fn reader(self: *MsfStream) std.io.Reader(*MsfStream, Error, read) { + return .{ .context = self }; + } +}; + +fn readSparseBitVector(stream: anytype, allocator: Allocator) ![]u32 { + const num_words = try stream.readInt(u32, .little); + var list = std.ArrayList(u32).init(allocator); + errdefer list.deinit(); + var word_i: u32 = 0; + while (word_i != num_words) : (word_i += 1) { + const word = try stream.readInt(u32, .little); + var bit_i: u5 = 0; + while (true) : (bit_i += 1) { + if (word & (@as(u32, 1) << bit_i) != 0) { + try list.append(word_i * 32 + bit_i); + } + if (bit_i == std.math.maxInt(u5)) break; + } + } + return try list.toOwnedSlice(); +} + +fn blockCountFromSize(size: u32, block_size: u32) u32 { + return (size + block_size - 1) / block_size; +} diff --git a/lib/std/debug/Info.zig b/lib/std/debug/SelfInfo.zig similarity index 96% rename from lib/std/debug/Info.zig rename to lib/std/debug/SelfInfo.zig index 9d3074834bb9..58fe4b23b263 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/SelfInfo.zig @@ -1,4 +1,5 @@ -//! Cross-platform abstraction for debug information. +//! Cross-platform abstraction for this binary's own debug information, with a +//! goal of minimal code bloat and compilation speed penalty. const builtin = @import("builtin"); const native_os = builtin.os.tag; @@ -17,24 +18,25 @@ const assert = std.debug.assert; const posix = std.posix; const elf = std.elf; const Dwarf = std.debug.Dwarf; +const Pdb = std.debug.Pdb; const File = std.fs.File; const math = std.math; const testing = std.testing; -const Info = @This(); +const SelfInfo = @This(); const root = @import("root"); allocator: Allocator, address_map: std.AutoHashMap(usize, *Module), -modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModuleInfo) else void, +modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModule) else void, pub const OpenSelfError = error{ MissingDebugInfo, UnsupportedOperatingSystem, -} || @typeInfo(@typeInfo(@TypeOf(Info.init)).Fn.return_type.?).ErrorUnion.error_set; +} || @typeInfo(@typeInfo(@TypeOf(SelfInfo.init)).Fn.return_type.?).ErrorUnion.error_set; -pub fn openSelf(allocator: Allocator) OpenSelfError!Info { +pub fn openSelf(allocator: Allocator) OpenSelfError!SelfInfo { nosuspend { if (builtin.strip_debug_info) return error.MissingDebugInfo; @@ -51,14 +53,14 @@ pub fn openSelf(allocator: Allocator) OpenSelfError!Info { .solaris, .illumos, .windows, - => return try Info.init(allocator), + => return try SelfInfo.init(allocator), else => return error.UnsupportedOperatingSystem, } } } -pub fn init(allocator: Allocator) !Info { - var debug_info: Info = .{ +pub fn init(allocator: Allocator) !SelfInfo { + var debug_info: SelfInfo = .{ .allocator = allocator, .address_map = std.AutoHashMap(usize, *Module).init(allocator), .modules = if (native_os == .windows) .{} else {}, @@ -101,7 +103,7 @@ pub fn init(allocator: Allocator) !Info { return debug_info; } -pub fn deinit(self: *Info) void { +pub fn deinit(self: *SelfInfo) void { var it = self.address_map.iterator(); while (it.next()) |entry| { const mdi = entry.value_ptr.*; @@ -118,7 +120,7 @@ pub fn deinit(self: *Info) void { } } -pub fn getModuleForAddress(self: *Info, address: usize) !*Module { +pub fn getModuleForAddress(self: *SelfInfo, address: usize) !*Module { if (comptime builtin.target.isDarwin()) { return self.lookupModuleDyld(address); } else if (native_os == .windows) { @@ -135,7 +137,7 @@ pub fn getModuleForAddress(self: *Info, address: usize) !*Module { // Returns the module name for a given address. // This can be called when getModuleForAddress fails, so implementations should provide // a path that doesn't rely on any side-effects of a prior successful module lookup. -pub fn getModuleNameForAddress(self: *Info, address: usize) ?[]const u8 { +pub fn getModuleNameForAddress(self: *SelfInfo, address: usize) ?[]const u8 { if (comptime builtin.target.isDarwin()) { return self.lookupModuleNameDyld(address); } else if (native_os == .windows) { @@ -149,7 +151,7 @@ pub fn getModuleNameForAddress(self: *Info, address: usize) ?[]const u8 { } } -fn lookupModuleDyld(self: *Info, address: usize) !*Module { +fn lookupModuleDyld(self: *SelfInfo, address: usize) !*Module { const image_count = std.c._dyld_image_count(); var i: u32 = 0; @@ -215,7 +217,7 @@ fn lookupModuleDyld(self: *Info, address: usize) !*Module { return error.MissingDebugInfo; } -fn lookupModuleNameDyld(self: *Info, address: usize) ?[]const u8 { +fn lookupModuleNameDyld(self: *SelfInfo, address: usize) ?[]const u8 { _ = self; const image_count = std.c._dyld_image_count(); @@ -253,7 +255,7 @@ fn lookupModuleNameDyld(self: *Info, address: usize) ?[]const u8 { return null; } -fn lookupModuleWin32(self: *Info, address: usize) !*Module { +fn lookupModuleWin32(self: *SelfInfo, address: usize) !*Module { for (self.modules.items) |*module| { if (address >= module.base_address and address < module.base_address + module.size) { if (self.address_map.get(module.base_address)) |obj_di| { @@ -343,7 +345,7 @@ fn lookupModuleWin32(self: *Info, address: usize) !*Module { return error.MissingDebugInfo; } -fn lookupModuleNameWin32(self: *Info, address: usize) ?[]const u8 { +fn lookupModuleNameWin32(self: *SelfInfo, address: usize) ?[]const u8 { for (self.modules.items) |module| { if (address >= module.base_address and address < module.base_address + module.size) { return module.name; @@ -352,7 +354,7 @@ fn lookupModuleNameWin32(self: *Info, address: usize) ?[]const u8 { return null; } -fn lookupModuleNameDl(self: *Info, address: usize) ?[]const u8 { +fn lookupModuleNameDl(self: *SelfInfo, address: usize) ?[]const u8 { _ = self; var ctx: struct { @@ -390,7 +392,7 @@ fn lookupModuleNameDl(self: *Info, address: usize) ?[]const u8 { return null; } -fn lookupModuleDl(self: *Info, address: usize) !*Module { +fn lookupModuleDl(self: *SelfInfo, address: usize) !*Module { var ctx: struct { // Input address: usize, @@ -484,13 +486,13 @@ fn lookupModuleDl(self: *Info, address: usize) !*Module { return obj_di; } -fn lookupModuleHaiku(self: *Info, address: usize) !*Module { +fn lookupModuleHaiku(self: *SelfInfo, address: usize) !*Module { _ = self; _ = address; @panic("TODO implement lookup module for Haiku"); } -fn lookupModuleWasm(self: *Info, address: usize) !*Module { +fn lookupModuleWasm(self: *SelfInfo, address: usize) !*Module { _ = self; _ = address; @panic("TODO implement lookup module for Wasm"); @@ -709,7 +711,7 @@ pub const Module = switch (native_os) { }, .uefi, .windows => struct { base_address: usize, - pdb: ?pdb.Pdb = null, + pdb: ?Pdb = null, dwarf: ?Dwarf = null, coff_image_base: u64, @@ -837,7 +839,11 @@ pub const Module = switch (native_os) { else => Dwarf, }; -pub const WindowsModuleInfo = struct { +/// How is this different than `Module` when the host is Windows? +/// Why are both stored in the `SelfInfo` struct? +/// Boy, it sure would be nice if someone added documentation comments for this +/// struct explaining it. +pub const WindowsModule = struct { base_address: usize, size: u32, name: []const u8, @@ -1030,7 +1036,7 @@ fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module { }; defer if (path.ptr != raw_path.ptr) allocator.free(path); - di.pdb = pdb.Pdb.init(allocator, path) catch |err| switch (err) { + di.pdb = Pdb.init(allocator, path) catch |err| switch (err) { error.FileNotFound, error.IsDir => { if (di.dwarf == null) return error.MissingDebugInfo; return di; @@ -1292,22 +1298,10 @@ fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 pub const SymbolInfo = struct { symbol_name: []const u8 = "???", compile_unit_name: []const u8 = "???", - line_info: ?SourceLocation = null, + line_info: ?std.debug.SourceLocation = null, pub fn deinit(self: SymbolInfo, allocator: Allocator) void { - if (self.line_info) |li| { - li.deinit(allocator); - } - } -}; - -pub const SourceLocation = struct { - line: u64, - column: u64, - file_name: []const u8, - - pub fn deinit(self: SourceLocation, allocator: Allocator) void { - allocator.free(self.file_name); + if (self.line_info) |li| allocator.free(li.file_name); } }; diff --git a/lib/std/pdb.zig b/lib/std/pdb.zig index c96eb81fa9f5..31ad02e94564 100644 --- a/lib/std/pdb.zig +++ b/lib/std/pdb.zig @@ -1,3 +1,12 @@ +//! Program Data Base debugging information format. +//! +//! This namespace contains unopinionated types and data definitions only. For +//! an implementation of parsing and caching PDB information, see +//! `std.debug.Pdb`. +//! +//! Most of this is based on information gathered from LLVM source code, +//! documentation and/or contributors. + const std = @import("std.zig"); const io = std.io; const math = std.math; @@ -9,10 +18,7 @@ const debug = std.debug; const ArrayList = std.ArrayList; -// Note: most of this is based on information gathered from LLVM source code, -// documentation and/or contributors. - -// https://llvm.org/docs/PDB/DbiStream.html#stream-header +/// https://llvm.org/docs/PDB/DbiStream.html#stream-header pub const DbiStreamHeader = extern struct { VersionSignature: i32, VersionHeader: u32, @@ -415,10 +421,8 @@ pub const ColumnNumberEntry = extern struct { pub const FileChecksumEntryHeader = extern struct { /// Byte offset of filename in global string table. FileNameOffset: u32, - /// Number of bytes of checksum. ChecksumSize: u8, - /// FileChecksumKind ChecksumKind: u8, }; @@ -451,525 +455,15 @@ pub const DebugSubsectionHeader = extern struct { Length: u32, }; -pub const PDBStringTableHeader = extern struct { +pub const StringTableHeader = extern struct { /// PDBStringTableSignature Signature: u32, - /// 1 or 2 HashVersion: u32, - /// Number of bytes of names buffer. ByteSize: u32, }; -fn readSparseBitVector(stream: anytype, allocator: mem.Allocator) ![]u32 { - const num_words = try stream.readInt(u32, .little); - var list = ArrayList(u32).init(allocator); - errdefer list.deinit(); - var word_i: u32 = 0; - while (word_i != num_words) : (word_i += 1) { - const word = try stream.readInt(u32, .little); - var bit_i: u5 = 0; - while (true) : (bit_i += 1) { - if (word & (@as(u32, 1) << bit_i) != 0) { - try list.append(word_i * 32 + bit_i); - } - if (bit_i == std.math.maxInt(u5)) break; - } - } - return try list.toOwnedSlice(); -} - -pub const Pdb = struct { - in_file: File, - msf: Msf, - allocator: mem.Allocator, - string_table: ?*MsfStream, - dbi: ?*MsfStream, - modules: []Module, - sect_contribs: []SectionContribEntry, - guid: [16]u8, - age: u32, - - pub const Module = struct { - mod_info: ModInfo, - module_name: []u8, - obj_file_name: []u8, - // The fields below are filled on demand. - populated: bool, - symbols: []u8, - subsect_info: []u8, - checksum_offset: ?usize, - - pub fn deinit(self: *Module, allocator: mem.Allocator) void { - allocator.free(self.module_name); - allocator.free(self.obj_file_name); - if (self.populated) { - allocator.free(self.symbols); - allocator.free(self.subsect_info); - } - } - }; - - pub fn init(allocator: mem.Allocator, path: []const u8) !Pdb { - const file = try fs.cwd().openFile(path, .{}); - errdefer file.close(); - - return Pdb{ - .in_file = file, - .allocator = allocator, - .string_table = null, - .dbi = null, - .msf = try Msf.init(allocator, file), - .modules = &[_]Module{}, - .sect_contribs = &[_]SectionContribEntry{}, - .guid = undefined, - .age = undefined, - }; - } - - pub fn deinit(self: *Pdb) void { - self.in_file.close(); - self.msf.deinit(self.allocator); - for (self.modules) |*module| { - module.deinit(self.allocator); - } - self.allocator.free(self.modules); - self.allocator.free(self.sect_contribs); - } - - pub fn parseDbiStream(self: *Pdb) !void { - var stream = self.getStream(StreamType.Dbi) orelse - return error.InvalidDebugInfo; - const reader = stream.reader(); - - const header = try reader.readStruct(DbiStreamHeader); - if (header.VersionHeader != 19990903) // V70, only value observed by LLVM team - return error.UnknownPDBVersion; - // if (header.Age != age) - // return error.UnmatchingPDB; - - const mod_info_size = header.ModInfoSize; - const section_contrib_size = header.SectionContributionSize; - - var modules = ArrayList(Module).init(self.allocator); - errdefer modules.deinit(); - - // Module Info Substream - var mod_info_offset: usize = 0; - while (mod_info_offset != mod_info_size) { - const mod_info = try reader.readStruct(ModInfo); - var this_record_len: usize = @sizeOf(ModInfo); - - const module_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); - errdefer self.allocator.free(module_name); - this_record_len += module_name.len + 1; - - const obj_file_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); - errdefer self.allocator.free(obj_file_name); - this_record_len += obj_file_name.len + 1; - - if (this_record_len % 4 != 0) { - const round_to_next_4 = (this_record_len | 0x3) + 1; - const march_forward_bytes = round_to_next_4 - this_record_len; - try stream.seekBy(@as(isize, @intCast(march_forward_bytes))); - this_record_len += march_forward_bytes; - } - - try modules.append(Module{ - .mod_info = mod_info, - .module_name = module_name, - .obj_file_name = obj_file_name, - - .populated = false, - .symbols = undefined, - .subsect_info = undefined, - .checksum_offset = null, - }); - - mod_info_offset += this_record_len; - if (mod_info_offset > mod_info_size) - return error.InvalidDebugInfo; - } - - // Section Contribution Substream - var sect_contribs = ArrayList(SectionContribEntry).init(self.allocator); - errdefer sect_contribs.deinit(); - - var sect_cont_offset: usize = 0; - if (section_contrib_size != 0) { - const version = reader.readEnum(SectionContrSubstreamVersion, .little) catch |err| switch (err) { - error.InvalidValue => return error.InvalidDebugInfo, - else => |e| return e, - }; - _ = version; - sect_cont_offset += @sizeOf(u32); - } - while (sect_cont_offset != section_contrib_size) { - const entry = try sect_contribs.addOne(); - entry.* = try reader.readStruct(SectionContribEntry); - sect_cont_offset += @sizeOf(SectionContribEntry); - - if (sect_cont_offset > section_contrib_size) - return error.InvalidDebugInfo; - } - - self.modules = try modules.toOwnedSlice(); - self.sect_contribs = try sect_contribs.toOwnedSlice(); - } - - pub fn parseInfoStream(self: *Pdb) !void { - var stream = self.getStream(StreamType.Pdb) orelse - return error.InvalidDebugInfo; - const reader = stream.reader(); - - // Parse the InfoStreamHeader. - const version = try reader.readInt(u32, .little); - const signature = try reader.readInt(u32, .little); - _ = signature; - const age = try reader.readInt(u32, .little); - const guid = try reader.readBytesNoEof(16); - - if (version != 20000404) // VC70, only value observed by LLVM team - return error.UnknownPDBVersion; - - self.guid = guid; - self.age = age; - - // Find the string table. - const string_table_index = str_tab_index: { - const name_bytes_len = try reader.readInt(u32, .little); - const name_bytes = try self.allocator.alloc(u8, name_bytes_len); - defer self.allocator.free(name_bytes); - try reader.readNoEof(name_bytes); - - const HashTableHeader = extern struct { - Size: u32, - Capacity: u32, - - fn maxLoad(cap: u32) u32 { - return cap * 2 / 3 + 1; - } - }; - const hash_tbl_hdr = try reader.readStruct(HashTableHeader); - if (hash_tbl_hdr.Capacity == 0) - return error.InvalidDebugInfo; - - if (hash_tbl_hdr.Size > HashTableHeader.maxLoad(hash_tbl_hdr.Capacity)) - return error.InvalidDebugInfo; - - const present = try readSparseBitVector(&reader, self.allocator); - defer self.allocator.free(present); - if (present.len != hash_tbl_hdr.Size) - return error.InvalidDebugInfo; - const deleted = try readSparseBitVector(&reader, self.allocator); - defer self.allocator.free(deleted); - - for (present) |_| { - const name_offset = try reader.readInt(u32, .little); - const name_index = try reader.readInt(u32, .little); - if (name_offset > name_bytes.len) - return error.InvalidDebugInfo; - const name = mem.sliceTo(name_bytes[name_offset..], 0); - if (mem.eql(u8, name, "/names")) { - break :str_tab_index name_index; - } - } - return error.MissingDebugInfo; - }; - - self.string_table = self.getStreamById(string_table_index) orelse - return error.MissingDebugInfo; - } - - pub fn getSymbolName(self: *Pdb, module: *Module, address: u64) ?[]const u8 { - _ = self; - std.debug.assert(module.populated); - - var symbol_i: usize = 0; - while (symbol_i != module.symbols.len) { - const prefix = @as(*align(1) RecordPrefix, @ptrCast(&module.symbols[symbol_i])); - if (prefix.RecordLen < 2) - return null; - switch (prefix.RecordKind) { - .S_LPROC32, .S_GPROC32 => { - const proc_sym = @as(*align(1) ProcSym, @ptrCast(&module.symbols[symbol_i + @sizeOf(RecordPrefix)])); - if (address >= proc_sym.CodeOffset and address < proc_sym.CodeOffset + proc_sym.CodeSize) { - return mem.sliceTo(@as([*:0]u8, @ptrCast(&proc_sym.Name[0])), 0); - } - }, - else => {}, - } - symbol_i += prefix.RecordLen + @sizeOf(u16); - } - - return null; - } - - pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !debug.Info.SourceLocation { - std.debug.assert(module.populated); - const subsect_info = module.subsect_info; - - var sect_offset: usize = 0; - var skip_len: usize = undefined; - const checksum_offset = module.checksum_offset orelse return error.MissingDebugInfo; - while (sect_offset != subsect_info.len) : (sect_offset += skip_len) { - const subsect_hdr = @as(*align(1) DebugSubsectionHeader, @ptrCast(&subsect_info[sect_offset])); - skip_len = subsect_hdr.Length; - sect_offset += @sizeOf(DebugSubsectionHeader); - - switch (subsect_hdr.Kind) { - .Lines => { - var line_index = sect_offset; - - const line_hdr = @as(*align(1) LineFragmentHeader, @ptrCast(&subsect_info[line_index])); - if (line_hdr.RelocSegment == 0) - return error.MissingDebugInfo; - line_index += @sizeOf(LineFragmentHeader); - const frag_vaddr_start = line_hdr.RelocOffset; - const frag_vaddr_end = frag_vaddr_start + line_hdr.CodeSize; - - if (address >= frag_vaddr_start and address < frag_vaddr_end) { - // There is an unknown number of LineBlockFragmentHeaders (and their accompanying line and column records) - // from now on. We will iterate through them, and eventually find a SourceLocation that we're interested in, - // breaking out to :subsections. If not, we will make sure to not read anything outside of this subsection. - const subsection_end_index = sect_offset + subsect_hdr.Length; - - while (line_index < subsection_end_index) { - const block_hdr = @as(*align(1) LineBlockFragmentHeader, @ptrCast(&subsect_info[line_index])); - line_index += @sizeOf(LineBlockFragmentHeader); - const start_line_index = line_index; - - const has_column = line_hdr.Flags.LF_HaveColumns; - - // All line entries are stored inside their line block by ascending start address. - // Heuristic: we want to find the last line entry - // that has a vaddr_start <= address. - // This is done with a simple linear search. - var line_i: u32 = 0; - while (line_i < block_hdr.NumLines) : (line_i += 1) { - const line_num_entry = @as(*align(1) LineNumberEntry, @ptrCast(&subsect_info[line_index])); - line_index += @sizeOf(LineNumberEntry); - - const vaddr_start = frag_vaddr_start + line_num_entry.Offset; - if (address < vaddr_start) { - break; - } - } - - // line_i == 0 would mean that no matching LineNumberEntry was found. - if (line_i > 0) { - const subsect_index = checksum_offset + block_hdr.NameIndex; - const chksum_hdr = @as(*align(1) FileChecksumEntryHeader, @ptrCast(&module.subsect_info[subsect_index])); - const strtab_offset = @sizeOf(PDBStringTableHeader) + chksum_hdr.FileNameOffset; - try self.string_table.?.seekTo(strtab_offset); - const source_file_name = try self.string_table.?.reader().readUntilDelimiterAlloc(self.allocator, 0, 1024); - - const line_entry_idx = line_i - 1; - - const column = if (has_column) blk: { - const start_col_index = start_line_index + @sizeOf(LineNumberEntry) * block_hdr.NumLines; - const col_index = start_col_index + @sizeOf(ColumnNumberEntry) * line_entry_idx; - const col_num_entry = @as(*align(1) ColumnNumberEntry, @ptrCast(&subsect_info[col_index])); - break :blk col_num_entry.StartColumn; - } else 0; - - const found_line_index = start_line_index + line_entry_idx * @sizeOf(LineNumberEntry); - const line_num_entry: *align(1) LineNumberEntry = @ptrCast(&subsect_info[found_line_index]); - const flags: *align(1) LineNumberEntry.Flags = @ptrCast(&line_num_entry.Flags); - - return debug.Info.SourceLocation{ - .file_name = source_file_name, - .line = flags.Start, - .column = column, - }; - } - } - - // Checking that we are not reading garbage after the (possibly) multiple block fragments. - if (line_index != subsection_end_index) { - return error.InvalidDebugInfo; - } - } - }, - else => {}, - } - - if (sect_offset > subsect_info.len) - return error.InvalidDebugInfo; - } - - return error.MissingDebugInfo; - } - - pub fn getModule(self: *Pdb, index: usize) !?*Module { - if (index >= self.modules.len) - return null; - - const mod = &self.modules[index]; - if (mod.populated) - return mod; - - // At most one can be non-zero. - if (mod.mod_info.C11ByteSize != 0 and mod.mod_info.C13ByteSize != 0) - return error.InvalidDebugInfo; - if (mod.mod_info.C13ByteSize == 0) - return error.InvalidDebugInfo; - - const stream = self.getStreamById(mod.mod_info.ModuleSymStream) orelse - return error.MissingDebugInfo; - const reader = stream.reader(); - - const signature = try reader.readInt(u32, .little); - if (signature != 4) - return error.InvalidDebugInfo; - - mod.symbols = try self.allocator.alloc(u8, mod.mod_info.SymByteSize - 4); - errdefer self.allocator.free(mod.symbols); - try reader.readNoEof(mod.symbols); - - mod.subsect_info = try self.allocator.alloc(u8, mod.mod_info.C13ByteSize); - errdefer self.allocator.free(mod.subsect_info); - try reader.readNoEof(mod.subsect_info); - - var sect_offset: usize = 0; - var skip_len: usize = undefined; - while (sect_offset != mod.subsect_info.len) : (sect_offset += skip_len) { - const subsect_hdr = @as(*align(1) DebugSubsectionHeader, @ptrCast(&mod.subsect_info[sect_offset])); - skip_len = subsect_hdr.Length; - sect_offset += @sizeOf(DebugSubsectionHeader); - - switch (subsect_hdr.Kind) { - .FileChecksums => { - mod.checksum_offset = sect_offset; - break; - }, - else => {}, - } - - if (sect_offset > mod.subsect_info.len) - return error.InvalidDebugInfo; - } - - mod.populated = true; - return mod; - } - - pub fn getStreamById(self: *Pdb, id: u32) ?*MsfStream { - if (id >= self.msf.streams.len) - return null; - return &self.msf.streams[id]; - } - - pub fn getStream(self: *Pdb, stream: StreamType) ?*MsfStream { - const id = @intFromEnum(stream); - return self.getStreamById(id); - } -}; - -// see https://llvm.org/docs/PDB/MsfFile.html -const Msf = struct { - directory: MsfStream, - streams: []MsfStream, - - fn init(allocator: mem.Allocator, file: File) !Msf { - const in = file.reader(); - - const superblock = try in.readStruct(SuperBlock); - - // Sanity checks - if (!mem.eql(u8, &superblock.FileMagic, SuperBlock.file_magic)) - return error.InvalidDebugInfo; - if (superblock.FreeBlockMapBlock != 1 and superblock.FreeBlockMapBlock != 2) - return error.InvalidDebugInfo; - const file_len = try file.getEndPos(); - if (superblock.NumBlocks * superblock.BlockSize != file_len) - return error.InvalidDebugInfo; - switch (superblock.BlockSize) { - // llvm only supports 4096 but we can handle any of these values - 512, 1024, 2048, 4096 => {}, - else => return error.InvalidDebugInfo, - } - - const dir_block_count = blockCountFromSize(superblock.NumDirectoryBytes, superblock.BlockSize); - if (dir_block_count > superblock.BlockSize / @sizeOf(u32)) - return error.UnhandledBigDirectoryStream; // cf. BlockMapAddr comment. - - try file.seekTo(superblock.BlockSize * superblock.BlockMapAddr); - const dir_blocks = try allocator.alloc(u32, dir_block_count); - for (dir_blocks) |*b| { - b.* = try in.readInt(u32, .little); - } - var directory = MsfStream.init( - superblock.BlockSize, - file, - dir_blocks, - ); - - const begin = directory.pos; - const stream_count = try directory.reader().readInt(u32, .little); - const stream_sizes = try allocator.alloc(u32, stream_count); - defer allocator.free(stream_sizes); - - // Microsoft's implementation uses @as(u32, -1) for inexistent streams. - // These streams are not used, but still participate in the file - // and must be taken into account when resolving stream indices. - const Nil = 0xFFFFFFFF; - for (stream_sizes) |*s| { - const size = try directory.reader().readInt(u32, .little); - s.* = if (size == Nil) 0 else blockCountFromSize(size, superblock.BlockSize); - } - - const streams = try allocator.alloc(MsfStream, stream_count); - for (streams, 0..) |*stream, i| { - const size = stream_sizes[i]; - if (size == 0) { - stream.* = MsfStream{ - .blocks = &[_]u32{}, - }; - } else { - var blocks = try allocator.alloc(u32, size); - var j: u32 = 0; - while (j < size) : (j += 1) { - const block_id = try directory.reader().readInt(u32, .little); - const n = (block_id % superblock.BlockSize); - // 0 is for SuperBlock, 1 and 2 for FPMs. - if (block_id == 0 or n == 1 or n == 2 or block_id * superblock.BlockSize > file_len) - return error.InvalidBlockIndex; - blocks[j] = block_id; - } - - stream.* = MsfStream.init( - superblock.BlockSize, - file, - blocks, - ); - } - } - - const end = directory.pos; - if (end - begin != superblock.NumDirectoryBytes) - return error.InvalidStreamDirectory; - - return Msf{ - .directory = directory, - .streams = streams, - }; - } - - fn deinit(self: *Msf, allocator: mem.Allocator) void { - allocator.free(self.directory.blocks); - for (self.streams) |*stream| { - allocator.free(stream.blocks); - } - allocator.free(self.streams); - } -}; - -fn blockCountFromSize(size: u32, block_size: u32) u32 { - return (size + block_size - 1) / block_size; -} - // https://llvm.org/docs/PDB/MsfFile.html#the-superblock pub const SuperBlock = extern struct { /// The LLVM docs list a space between C / C++ but empirically this is not the case. @@ -1016,82 +510,3 @@ pub const SuperBlock = extern struct { // implement it so we're kind of safe making this assumption for now. BlockMapAddr: u32, }; - -const MsfStream = struct { - in_file: File = undefined, - pos: u64 = undefined, - blocks: []u32 = undefined, - block_size: u32 = undefined, - - pub const Error = @typeInfo(@typeInfo(@TypeOf(read)).Fn.return_type.?).ErrorUnion.error_set; - - fn init(block_size: u32, file: File, blocks: []u32) MsfStream { - const stream = MsfStream{ - .in_file = file, - .pos = 0, - .blocks = blocks, - .block_size = block_size, - }; - - return stream; - } - - fn read(self: *MsfStream, buffer: []u8) !usize { - var block_id = @as(usize, @intCast(self.pos / self.block_size)); - if (block_id >= self.blocks.len) return 0; // End of Stream - var block = self.blocks[block_id]; - var offset = self.pos % self.block_size; - - try self.in_file.seekTo(block * self.block_size + offset); - const in = self.in_file.reader(); - - var size: usize = 0; - var rem_buffer = buffer; - while (size < buffer.len) { - const size_to_read = @min(self.block_size - offset, rem_buffer.len); - size += try in.read(rem_buffer[0..size_to_read]); - rem_buffer = buffer[size..]; - offset += size_to_read; - - // If we're at the end of a block, go to the next one. - if (offset == self.block_size) { - offset = 0; - block_id += 1; - if (block_id >= self.blocks.len) break; // End of Stream - block = self.blocks[block_id]; - try self.in_file.seekTo(block * self.block_size); - } - } - - self.pos += buffer.len; - return buffer.len; - } - - pub fn seekBy(self: *MsfStream, len: i64) !void { - self.pos = @as(u64, @intCast(@as(i64, @intCast(self.pos)) + len)); - if (self.pos >= self.blocks.len * self.block_size) - return error.EOF; - } - - pub fn seekTo(self: *MsfStream, len: u64) !void { - self.pos = len; - if (self.pos >= self.blocks.len * self.block_size) - return error.EOF; - } - - fn getSize(self: *const MsfStream) u64 { - return self.blocks.len * self.block_size; - } - - fn getFilePos(self: MsfStream) u64 { - const block_id = self.pos / self.block_size; - const block = self.blocks[block_id]; - const offset = self.pos % self.block_size; - - return block * self.block_size + offset; - } - - pub fn reader(self: *MsfStream) std.io.Reader(*MsfStream, Error, read) { - return .{ .context = self }; - } -}; From 48d584e3a33a76ef4ea643905a11d311e9ed8bbf Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 1 Aug 2024 22:04:40 -0700 Subject: [PATCH 4/6] std.debug: reorg and clarify API goals After this commit: `std.debug.SelfInfo` is a cross-platform abstraction for the current executable's own debug information, with a goal of minimal code bloat and compilation speed penalty. `std.debug.Dwarf` does not assume the current executable is itself the thing being debugged, however, it does assume the debug info has the same CPU architecture and OS as the current executable. It is planned to remove this limitation. --- lib/std/debug.zig | 233 +++---- lib/std/debug/Dwarf.zig | 916 ++++-------------------- lib/std/debug/Dwarf/abi.zig | 127 +--- lib/std/debug/Dwarf/call_frame.zig | 388 ----------- lib/std/debug/Dwarf/expression.zig | 26 +- lib/std/debug/MemoryAccessor.zig | 128 ++++ lib/std/debug/SelfInfo.zig | 1033 ++++++++++++++++++++++++++++ src/crash_report.zig | 2 +- 8 files changed, 1434 insertions(+), 1419 deletions(-) create mode 100644 lib/std/debug/MemoryAccessor.zig diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 2753a0f52f2e..0f10bada7173 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -13,6 +13,7 @@ const native_arch = builtin.cpu.arch; const native_os = builtin.os.tag; const native_endian = native_arch.endian(); +pub const MemoryAccessor = @import("debug/MemoryAccessor.zig"); pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); @@ -243,7 +244,7 @@ pub inline fn getContext(context: *ThreadContext) bool { /// Tries to print the stack trace starting from the supplied base pointer to stderr, /// unbuffered, and ignores any error returned. /// TODO multithreaded awareness -pub fn dumpStackTraceFromBase(context: *const ThreadContext) void { +pub fn dumpStackTraceFromBase(context: *ThreadContext) void { nosuspend { if (comptime builtin.target.isWasm()) { if (native_os == .wasi) { @@ -545,7 +546,7 @@ pub const StackIterator = struct { // using DWARF and MachO unwind info. unwind_state: if (have_ucontext) ?struct { debug_info: *SelfInfo, - dwarf_context: Dwarf.UnwindContext, + dwarf_context: SelfInfo.UnwindContext, last_error: ?UnwindError = null, failed: bool = false, } else void = if (have_ucontext) null else {}, @@ -569,16 +570,16 @@ pub const StackIterator = struct { }; } - pub fn initWithContext(first_address: ?usize, debug_info: *SelfInfo, context: *const posix.ucontext_t) !StackIterator { + pub fn initWithContext(first_address: ?usize, debug_info: *SelfInfo, context: *posix.ucontext_t) !StackIterator { // The implementation of DWARF unwinding on aarch64-macos is not complete. However, Apple mandates that // the frame pointer register is always used, so on this platform we can safely use the FP-based unwinder. - if (comptime builtin.target.isDarwin() and native_arch == .aarch64) { + if (builtin.target.isDarwin() and native_arch == .aarch64) { return init(first_address, context.mcontext.ss.fp); } else { var iterator = init(first_address, null); iterator.unwind_state = .{ .debug_info = debug_info, - .dwarf_context = try Dwarf.UnwindContext.init(debug_info.allocator, context), + .dwarf_context = try SelfInfo.UnwindContext.init(debug_info.allocator, context), }; return iterator; @@ -644,116 +645,6 @@ pub const StackIterator = struct { return address; } - fn isValidMemory(address: usize) bool { - // We are unable to determine validity of memory for freestanding targets - if (native_os == .freestanding or native_os == .uefi) return true; - - const aligned_address = address & ~@as(usize, @intCast((mem.page_size - 1))); - if (aligned_address == 0) return false; - const aligned_memory = @as([*]align(mem.page_size) u8, @ptrFromInt(aligned_address))[0..mem.page_size]; - - if (native_os == .windows) { - var memory_info: windows.MEMORY_BASIC_INFORMATION = undefined; - - // The only error this function can throw is ERROR_INVALID_PARAMETER. - // supply an address that invalid i'll be thrown. - const rc = windows.VirtualQuery(aligned_memory, &memory_info, aligned_memory.len) catch { - return false; - }; - - // Result code has to be bigger than zero (number of bytes written) - if (rc == 0) { - return false; - } - - // Free pages cannot be read, they are unmapped - if (memory_info.State == windows.MEM_FREE) { - return false; - } - - return true; - } else if (have_msync) { - posix.msync(aligned_memory, posix.MSF.ASYNC) catch |err| { - switch (err) { - error.UnmappedMemory => return false, - else => unreachable, - } - }; - - return true; - } else { - // We are unable to determine validity of memory on this target. - return true; - } - } - - pub const MemoryAccessor = struct { - var cached_pid: posix.pid_t = -1; - - mem: switch (native_os) { - .linux => File, - else => void, - }, - - pub const init: MemoryAccessor = .{ - .mem = switch (native_os) { - .linux => .{ .handle = -1 }, - else => {}, - }, - }; - - fn read(ma: *MemoryAccessor, address: usize, buf: []u8) bool { - switch (native_os) { - .linux => while (true) switch (ma.mem.handle) { - -2 => break, - -1 => { - const linux = std.os.linux; - const pid = switch (@atomicLoad(posix.pid_t, &cached_pid, .monotonic)) { - -1 => pid: { - const pid = linux.getpid(); - @atomicStore(posix.pid_t, &cached_pid, pid, .monotonic); - break :pid pid; - }, - else => |pid| pid, - }; - const bytes_read = linux.process_vm_readv( - pid, - &.{.{ .base = buf.ptr, .len = buf.len }}, - &.{.{ .base = @ptrFromInt(address), .len = buf.len }}, - 0, - ); - switch (linux.E.init(bytes_read)) { - .SUCCESS => return bytes_read == buf.len, - .FAULT => return false, - .INVAL, .PERM, .SRCH => unreachable, // own pid is always valid - .NOMEM => {}, - .NOSYS => {}, // QEMU is known not to implement this syscall. - else => unreachable, // unexpected - } - var path_buf: [ - std.fmt.count("/proc/{d}/mem", .{math.minInt(posix.pid_t)}) - ]u8 = undefined; - const path = std.fmt.bufPrint(&path_buf, "/proc/{d}/mem", .{pid}) catch - unreachable; - ma.mem = std.fs.openFileAbsolute(path, .{}) catch { - ma.mem.handle = -2; - break; - }; - }, - else => return (ma.mem.pread(buf, address) catch return false) == buf.len, - }, - else => {}, - } - if (!isValidMemory(address)) return false; - @memcpy(buf, @as([*]const u8, @ptrFromInt(address))); - return true; - } - pub fn load(ma: *MemoryAccessor, comptime Type: type, address: usize) ?Type { - var result: Type = undefined; - return if (ma.read(address, std.mem.asBytes(&result))) result else null; - } - }; - fn next_unwind(it: *StackIterator) !usize { const unwind_state = &it.unwind_state.?; const module = try unwind_state.debug_info.getModuleForAddress(unwind_state.dwarf_context.pc); @@ -762,7 +653,13 @@ pub const StackIterator = struct { // __unwind_info is a requirement for unwinding on Darwin. It may fall back to DWARF, but unwinding // via DWARF before attempting to use the compact unwind info will produce incorrect results. if (module.unwind_info) |unwind_info| { - if (Dwarf.unwindFrameMachO(&unwind_state.dwarf_context, &it.ma, unwind_info, module.eh_frame, module.base_address)) |return_address| { + if (SelfInfo.unwindFrameMachO( + &unwind_state.dwarf_context, + &it.ma, + unwind_info, + module.eh_frame, + module.base_address, + )) |return_address| { return return_address; } else |err| { if (err != error.RequiresDWARFUnwind) return err; @@ -773,7 +670,7 @@ pub const StackIterator = struct { } if (try module.getDwarfInfoForAddress(unwind_state.debug_info.allocator, unwind_state.dwarf_context.pc)) |di| { - return di.unwindFrame(&unwind_state.dwarf_context, &it.ma, null); + return SelfInfo.unwindFrameDwarf(di, &unwind_state.dwarf_context, &it.ma, null); } else return error.MissingDebugInfo; } @@ -822,11 +719,6 @@ pub const StackIterator = struct { } }; -const have_msync = switch (native_os) { - .wasi, .emscripten, .windows => false, - else => true, -}; - pub fn writeCurrentStackTrace( out_stream: anytype, debug_info: *SelfInfo, @@ -1333,7 +1225,7 @@ fn handleSegfaultPosix(sig: i32, info: *const posix.siginfo_t, ctx_ptr: ?*anyopa posix.abort(); } -fn dumpSegfaultInfoPosix(sig: i32, code: i32, addr: usize, ctx_ptr: ?*const anyopaque) void { +fn dumpSegfaultInfoPosix(sig: i32, code: i32, addr: usize, ctx_ptr: ?*anyopaque) void { const stderr = io.getStdErr().writer(); _ = switch (sig) { posix.SIG.SEGV => if (native_arch == .x86_64 and native_os == .linux and code == 128) // SI_KERNEL @@ -1359,7 +1251,7 @@ fn dumpSegfaultInfoPosix(sig: i32, code: i32, addr: usize, ctx_ptr: ?*const anyo .arm, .aarch64, => { - const ctx: *const posix.ucontext_t = @ptrCast(@alignCast(ctx_ptr)); + const ctx: *posix.ucontext_t = @ptrCast(@alignCast(ctx_ptr)); dumpStackTraceFromBase(ctx); }, else => {}, @@ -1585,6 +1477,99 @@ pub const SafetyLock = struct { } }; +/// Deprecated. Don't use this, just read from your memory directly. +/// +/// This only exists because someone was too lazy to rework logic that used to +/// operate on an open file to operate on a memory buffer instead. +pub const DeprecatedFixedBufferReader = struct { + buf: []const u8, + pos: usize = 0, + endian: std.builtin.Endian, + + pub const Error = error{ EndOfBuffer, Overflow, InvalidBuffer }; + + pub fn seekTo(fbr: *DeprecatedFixedBufferReader, pos: u64) Error!void { + if (pos > fbr.buf.len) return error.EndOfBuffer; + fbr.pos = @intCast(pos); + } + + pub fn seekForward(fbr: *DeprecatedFixedBufferReader, amount: u64) Error!void { + if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer; + fbr.pos += @intCast(amount); + } + + pub inline fn readByte(fbr: *DeprecatedFixedBufferReader) Error!u8 { + if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer; + defer fbr.pos += 1; + return fbr.buf[fbr.pos]; + } + + pub fn readByteSigned(fbr: *DeprecatedFixedBufferReader) Error!i8 { + return @bitCast(try fbr.readByte()); + } + + pub fn readInt(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { + const size = @divExact(@typeInfo(T).Int.bits, 8); + if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer; + defer fbr.pos += size; + return std.mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian); + } + + pub fn readIntChecked( + fbr: *DeprecatedFixedBufferReader, + comptime T: type, + ma: *MemoryAccessor, + ) Error!T { + if (ma.load(T, @intFromPtr(fbr.buf[fbr.pos..].ptr)) == null) + return error.InvalidBuffer; + + return fbr.readInt(T); + } + + pub fn readUleb128(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { + return std.leb.readUleb128(T, fbr); + } + + pub fn readIleb128(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { + return std.leb.readIleb128(T, fbr); + } + + pub fn readAddress(fbr: *DeprecatedFixedBufferReader, format: std.dwarf.Format) Error!u64 { + return switch (format) { + .@"32" => try fbr.readInt(u32), + .@"64" => try fbr.readInt(u64), + }; + } + + pub fn readAddressChecked( + fbr: *DeprecatedFixedBufferReader, + format: std.dwarf.Format, + ma: *MemoryAccessor, + ) Error!u64 { + return switch (format) { + .@"32" => try fbr.readIntChecked(u32, ma), + .@"64" => try fbr.readIntChecked(u64, ma), + }; + } + + pub fn readBytes(fbr: *DeprecatedFixedBufferReader, len: usize) Error![]const u8 { + if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer; + defer fbr.pos += len; + return fbr.buf[fbr.pos..][0..len]; + } + + pub fn readBytesTo(fbr: *DeprecatedFixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 { + const end = @call(.always_inline, std.mem.indexOfScalarPos, .{ + u8, + fbr.buf, + fbr.pos, + sentinel, + }) orelse return error.EndOfBuffer; + defer fbr.pos = end + 1; + return fbr.buf[fbr.pos..end :sentinel]; + } +}; + /// Detect whether the program is being executed in the Valgrind virtual machine. /// /// When Valgrind integrations are disabled, this returns comptime-known false. diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 4fff2562b243..991c7315492c 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -1,23 +1,32 @@ //! Implements parsing, decoding, and caching of DWARF information. //! +//! This API does not assume the current executable is itself the thing being +//! debugged, however, it does assume the debug info has the same CPU +//! architecture and OS as the current executable. It is planned to remove this +//! limitation. +//! //! For unopinionated types and bits, see `std.dwarf`. const builtin = @import("builtin"); +const native_endian = builtin.cpu.arch.endian(); + const std = @import("../std.zig"); -const AT = DW.AT; const Allocator = std.mem.Allocator; const DW = std.dwarf; +const AT = DW.AT; const EH = DW.EH; const FORM = DW.FORM; const Format = DW.Format; const RLE = DW.RLE; -const StackIterator = std.debug.StackIterator; const UT = DW.UT; const assert = std.debug.assert; const cast = std.math.cast; const maxInt = std.math.maxInt; -const native_endian = builtin.cpu.arch.endian(); const readInt = std.mem.readInt; +const MemoryAccessor = std.debug.MemoryAccessor; + +/// Did I mention this is deprecated? +const DeprecatedFixedBufferReader = std.debug.DeprecatedFixedBufferReader; const Dwarf = @This(); @@ -153,7 +162,7 @@ pub const FormValue = union(enum) { .string => |s| return s, .strp => |off| return di.getString(off), .line_strp => |off| return di.getLineString(off), - else => return badDwarf(), + else => return bad(), } } @@ -162,8 +171,8 @@ pub const FormValue = union(enum) { inline .udata, .sdata, .sec_offset, - => |c| cast(U, c) orelse badDwarf(), - else => badDwarf(), + => |c| cast(U, c) orelse bad(), + else => bad(), }; } }; @@ -237,25 +246,25 @@ pub const Die = struct { .string => |value| return value, .strp => |offset| return di.getString(offset), .strx => |index| { - const debug_str_offsets = di.section(.debug_str_offsets) orelse return badDwarf(); - if (compile_unit.str_offsets_base == 0) return badDwarf(); + const debug_str_offsets = di.section(.debug_str_offsets) orelse return bad(); + if (compile_unit.str_offsets_base == 0) return bad(); switch (compile_unit.format) { .@"32" => { const byte_offset = compile_unit.str_offsets_base + 4 * index; - if (byte_offset + 4 > debug_str_offsets.len) return badDwarf(); + if (byte_offset + 4 > debug_str_offsets.len) return bad(); const offset = readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian); return getStringGeneric(opt_str, offset); }, .@"64" => { const byte_offset = compile_unit.str_offsets_base + 8 * index; - if (byte_offset + 8 > debug_str_offsets.len) return badDwarf(); + if (byte_offset + 8 > debug_str_offsets.len) return bad(); const offset = readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian); return getStringGeneric(opt_str, offset); }, } }, .line_strp => |offset| return di.getLineString(offset), - else => return badDwarf(), + else => return bad(), } } }; @@ -279,7 +288,7 @@ pub const ExceptionFrameHeader = struct { EH.PE.sdata8, => 16, // This is a binary search table, so all entries must be the same length - else => return badDwarf(), + else => return bad(), }; } @@ -287,7 +296,7 @@ pub const ExceptionFrameHeader = struct { self: ExceptionFrameHeader, comptime T: type, ptr: usize, - ma: *StackIterator.MemoryAccessor, + ma: *MemoryAccessor, eh_frame_len: ?usize, ) bool { if (eh_frame_len) |len| { @@ -304,7 +313,7 @@ pub const ExceptionFrameHeader = struct { /// If `eh_frame_len` is provided, then these checks can be skipped. pub fn findEntry( self: ExceptionFrameHeader, - ma: *StackIterator.MemoryAccessor, + ma: *MemoryAccessor, eh_frame_len: ?usize, eh_frame_hdr_ptr: usize, pc: usize, @@ -316,7 +325,7 @@ pub const ExceptionFrameHeader = struct { var left: usize = 0; var len: usize = self.fde_count; - var fbr: FixedBufferReader = .{ .buf = self.entries, .endian = native_endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = self.entries, .endian = native_endian }; while (len > 1) { const mid = left + len / 2; @@ -326,7 +335,7 @@ pub const ExceptionFrameHeader = struct { .pc_rel_base = @intFromPtr(&self.entries[fbr.pos]), .follow_indirect = true, .data_rel_base = eh_frame_hdr_ptr, - }) orelse return badDwarf(); + }) orelse return bad(); if (pc < pc_begin) { len /= 2; @@ -337,7 +346,7 @@ pub const ExceptionFrameHeader = struct { } } - if (len == 0) return badDwarf(); + if (len == 0) return bad(); fbr.pos = left * entry_size; // Read past the pc_begin field of the entry @@ -345,36 +354,36 @@ pub const ExceptionFrameHeader = struct { .pc_rel_base = @intFromPtr(&self.entries[fbr.pos]), .follow_indirect = true, .data_rel_base = eh_frame_hdr_ptr, - }) orelse return badDwarf(); + }) orelse return bad(); const fde_ptr = cast(usize, try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ .pc_rel_base = @intFromPtr(&self.entries[fbr.pos]), .follow_indirect = true, .data_rel_base = eh_frame_hdr_ptr, - }) orelse return badDwarf()) orelse return badDwarf(); + }) orelse return bad()) orelse return bad(); - if (fde_ptr < self.eh_frame_ptr) return badDwarf(); + if (fde_ptr < self.eh_frame_ptr) return bad(); // Even if eh_frame_len is not specified, all ranges accssed are checked via MemoryAccessor const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0 .. eh_frame_len orelse maxInt(u32)]; const fde_offset = fde_ptr - self.eh_frame_ptr; - var eh_frame_fbr: FixedBufferReader = .{ + var eh_frame_fbr: DeprecatedFixedBufferReader = .{ .buf = eh_frame, .pos = fde_offset, .endian = native_endian, }; const fde_entry_header = try EntryHeader.read(&eh_frame_fbr, if (eh_frame_len == null) ma else null, .eh_frame); - if (!self.isValidPtr(u8, @intFromPtr(&fde_entry_header.entry_bytes[fde_entry_header.entry_bytes.len - 1]), ma, eh_frame_len)) return badDwarf(); - if (fde_entry_header.type != .fde) return badDwarf(); + if (!self.isValidPtr(u8, @intFromPtr(&fde_entry_header.entry_bytes[fde_entry_header.entry_bytes.len - 1]), ma, eh_frame_len)) return bad(); + if (fde_entry_header.type != .fde) return bad(); // CIEs always come before FDEs (the offset is a subtraction), so we can assume this memory is readable const cie_offset = fde_entry_header.type.fde; try eh_frame_fbr.seekTo(cie_offset); const cie_entry_header = try EntryHeader.read(&eh_frame_fbr, if (eh_frame_len == null) ma else null, .eh_frame); - if (!self.isValidPtr(u8, @intFromPtr(&cie_entry_header.entry_bytes[cie_entry_header.entry_bytes.len - 1]), ma, eh_frame_len)) return badDwarf(); - if (cie_entry_header.type != .cie) return badDwarf(); + if (!self.isValidPtr(u8, @intFromPtr(&cie_entry_header.entry_bytes[cie_entry_header.entry_bytes.len - 1]), ma, eh_frame_len)) return bad(); + if (cie_entry_header.type != .cie) return bad(); cie.* = try CommonInformationEntry.parse( cie_entry_header.entry_bytes, @@ -417,17 +426,17 @@ pub const EntryHeader = struct { } /// Reads a header for either an FDE or a CIE, then advances the fbr to the position after the trailing structure. - /// `fbr` must be a FixedBufferReader backed by either the .eh_frame or .debug_frame sections. + /// `fbr` must be a DeprecatedFixedBufferReader backed by either the .eh_frame or .debug_frame sections. pub fn read( - fbr: *FixedBufferReader, - opt_ma: ?*StackIterator.MemoryAccessor, + fbr: *DeprecatedFixedBufferReader, + opt_ma: ?*MemoryAccessor, dwarf_section: Section.Id, ) !EntryHeader { assert(dwarf_section == .eh_frame or dwarf_section == .debug_frame); const length_offset = fbr.pos; const unit_header = try readUnitHeader(fbr, opt_ma); - const unit_length = cast(usize, unit_header.unit_length) orelse return badDwarf(); + const unit_length = cast(usize, unit_header.unit_length) orelse return bad(); if (unit_length == 0) return .{ .length_offset = length_offset, .format = unit_header.format, @@ -532,7 +541,7 @@ pub const CommonInformationEntry = struct { ) !CommonInformationEntry { if (addr_size_bytes > 8) return error.UnsupportedAddrSize; - var fbr: FixedBufferReader = .{ .buf = cie_bytes, .endian = endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = cie_bytes, .endian = endian }; const version = try fbr.readByte(); switch (dwarf_section) { @@ -550,15 +559,15 @@ pub const CommonInformationEntry = struct { while (aug_byte != 0) : (aug_byte = try fbr.readByte()) { switch (aug_byte) { 'z' => { - if (aug_str_len != 0) return badDwarf(); + if (aug_str_len != 0) return bad(); has_aug_data = true; }, 'e' => { - if (has_aug_data or aug_str_len != 0) return badDwarf(); - if (try fbr.readByte() != 'h') return badDwarf(); + if (has_aug_data or aug_str_len != 0) return bad(); + if (try fbr.readByte() != 'h') return bad(); has_eh_data = true; }, - else => if (has_eh_data) return badDwarf(), + else => if (has_eh_data) return bad(), } aug_str_len += 1; @@ -604,7 +613,7 @@ pub const CommonInformationEntry = struct { fde_pointer_enc = try fbr.readByte(); }, 'S', 'B', 'G' => {}, - else => return badDwarf(), + else => return bad(), } } @@ -666,17 +675,17 @@ pub const FrameDescriptionEntry = struct { ) !FrameDescriptionEntry { if (addr_size_bytes > 8) return error.InvalidAddrSize; - var fbr: FixedBufferReader = .{ .buf = fde_bytes, .endian = endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = fde_bytes, .endian = endian }; const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.pos]), pc_rel_offset), .follow_indirect = is_runtime, - }) orelse return badDwarf(); + }) orelse return bad(); const pc_range = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ .pc_rel_base = 0, .follow_indirect = false, - }) orelse return badDwarf(); + }) orelse return bad(); var aug_data: []const u8 = &[_]u8{}; const lsda_pointer = if (cie.aug_str.len > 0) blk: { @@ -708,54 +717,6 @@ pub const FrameDescriptionEntry = struct { } }; -pub const UnwindContext = struct { - allocator: Allocator, - cfa: ?usize, - pc: usize, - thread_context: *std.debug.ThreadContext, - reg_context: abi.RegisterContext, - vm: call_frame.VirtualMachine, - stack_machine: expression.StackMachine(.{ .call_frame_context = true }), - - pub fn init( - allocator: Allocator, - thread_context: *const std.debug.ThreadContext, - ) !UnwindContext { - const pc = abi.stripInstructionPtrAuthCode( - (try abi.regValueNative( - usize, - thread_context, - abi.ipRegNum(), - null, - )).*, - ); - - const context_copy = try allocator.create(std.debug.ThreadContext); - std.debug.copyContext(thread_context, context_copy); - - return .{ - .allocator = allocator, - .cfa = null, - .pc = pc, - .thread_context = context_copy, - .reg_context = undefined, - .vm = .{}, - .stack_machine = .{}, - }; - } - - pub fn deinit(self: *UnwindContext) void { - self.vm.deinit(self.allocator); - self.stack_machine.deinit(self.allocator); - self.allocator.destroy(self.thread_context); - self.* = undefined; - } - - pub fn getFp(self: *const UnwindContext) !usize { - return (try abi.regValueNative(usize, self.thread_context, abi.fpRegNum(self.reg_context), self.reg_context)).*; - } -}; - const num_sections = std.enums.directEnumArrayLen(Section.Id, 0); pub const SectionArray = [num_sections]?Section; pub const null_section_array = [_]?Section{null} ** num_sections; @@ -817,7 +778,7 @@ pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 { } fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { - var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; while (this_unit_offset < fbr.buf.len) { @@ -828,20 +789,20 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { const next_offset = unit_header.header_length + unit_header.unit_length; const version = try fbr.readInt(u16); - if (version < 2 or version > 5) return badDwarf(); + if (version < 2 or version > 5) return bad(); var address_size: u8 = undefined; var debug_abbrev_offset: u64 = undefined; if (version >= 5) { const unit_type = try fbr.readInt(u8); - if (unit_type != DW.UT.compile) return badDwarf(); + if (unit_type != DW.UT.compile) return bad(); address_size = try fbr.readByte(); debug_abbrev_offset = try fbr.readAddress(unit_header.format); } else { debug_abbrev_offset = try fbr.readAddress(unit_header.format); address_size = try fbr.readByte(); } - if (address_size != @sizeOf(usize)) return badDwarf(); + if (address_size != @sizeOf(usize)) return bad(); const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset); @@ -915,28 +876,28 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { // Follow the DIE it points to and repeat const ref_offset = try this_die_obj.getAttrRef(AT.abstract_origin); - if (ref_offset > next_offset) return badDwarf(); + if (ref_offset > next_offset) return bad(); try fbr.seekTo(this_unit_offset + ref_offset); this_die_obj = (try parseDie( &fbr, attrs_bufs[2], abbrev_table, unit_header.format, - )) orelse return badDwarf(); + )) orelse return bad(); } else if (this_die_obj.getAttr(AT.specification)) |_| { const after_die_offset = fbr.pos; defer fbr.pos = after_die_offset; // Follow the DIE it points to and repeat const ref_offset = try this_die_obj.getAttrRef(AT.specification); - if (ref_offset > next_offset) return badDwarf(); + if (ref_offset > next_offset) return bad(); try fbr.seekTo(this_unit_offset + ref_offset); this_die_obj = (try parseDie( &fbr, attrs_bufs[2], abbrev_table, unit_header.format, - )) orelse return badDwarf(); + )) orelse return bad(); } else { break :x null; } @@ -950,7 +911,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { const pc_end = switch (high_pc_value.*) { .addr => |value| value, .udata => |offset| low_pc + offset, - else => return badDwarf(), + else => return bad(), }; try di.func_list.append(allocator, .{ @@ -1004,7 +965,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { } fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { - var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; var attrs_buf = std.ArrayList(Die.Attr).init(allocator); @@ -1018,20 +979,20 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { const next_offset = unit_header.header_length + unit_header.unit_length; const version = try fbr.readInt(u16); - if (version < 2 or version > 5) return badDwarf(); + if (version < 2 or version > 5) return bad(); var address_size: u8 = undefined; var debug_abbrev_offset: u64 = undefined; if (version >= 5) { const unit_type = try fbr.readInt(u8); - if (unit_type != UT.compile) return badDwarf(); + if (unit_type != UT.compile) return bad(); address_size = try fbr.readByte(); debug_abbrev_offset = try fbr.readAddress(unit_header.format); } else { debug_abbrev_offset = try fbr.readAddress(unit_header.format); address_size = try fbr.readByte(); } - if (address_size != @sizeOf(usize)) return badDwarf(); + if (address_size != @sizeOf(usize)) return bad(); const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset); @@ -1046,9 +1007,9 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { attrs_buf.items, abbrev_table, unit_header.format, - )) orelse return badDwarf(); + )) orelse return bad(); - if (compile_unit_die.tag_id != DW.TAG.compile_unit) return badDwarf(); + if (compile_unit_die.tag_id != DW.TAG.compile_unit) return bad(); compile_unit_die.attrs = try allocator.dupe(Die.Attr, compile_unit_die.attrs); @@ -1070,7 +1031,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { const pc_end = switch (high_pc_value.*) { .addr => |value| value, .udata => |offset| low_pc + offset, - else => return badDwarf(), + else => return bad(), }; break :x PcRange{ .start = low_pc, @@ -1096,7 +1057,7 @@ const DebugRangeIterator = struct { section_type: Section.Id, di: *const Dwarf, compile_unit: *const CompileUnit, - fbr: FixedBufferReader, + fbr: DeprecatedFixedBufferReader, pub fn init(ranges_value: *const FormValue, di: *const Dwarf, compile_unit: *const CompileUnit) !@This() { const section_type = if (compile_unit.version >= 5) Section.Id.debug_rnglists else Section.Id.debug_ranges; @@ -1108,19 +1069,19 @@ const DebugRangeIterator = struct { switch (compile_unit.format) { .@"32" => { const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 4 * idx)); - if (offset_loc + 4 > debug_ranges.len) return badDwarf(); + if (offset_loc + 4 > debug_ranges.len) return bad(); const offset = readInt(u32, debug_ranges[offset_loc..][0..4], di.endian); break :off compile_unit.rnglists_base + offset; }, .@"64" => { const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 8 * idx)); - if (offset_loc + 8 > debug_ranges.len) return badDwarf(); + if (offset_loc + 8 > debug_ranges.len) return bad(); const offset = readInt(u64, debug_ranges[offset_loc..][0..8], di.endian); break :off compile_unit.rnglists_base + offset; }, } }, - else => return badDwarf(), + else => return bad(), }; // All the addresses in the list are relative to the value @@ -1139,7 +1100,7 @@ const DebugRangeIterator = struct { .compile_unit = compile_unit, .fbr = .{ .buf = debug_ranges, - .pos = cast(usize, ranges_offset) orelse return badDwarf(), + .pos = cast(usize, ranges_offset) orelse return bad(), .endian = di.endian, }, }; @@ -1214,7 +1175,7 @@ const DebugRangeIterator = struct { .end_addr = end_addr, }; }, - else => return badDwarf(), + else => return bad(), } }, .debug_ranges => { @@ -1251,7 +1212,7 @@ pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*const CompileUni } } - return missingDwarf(); + return missing(); } /// Gets an already existing AbbrevTable given the abbrev_offset, or if not found, @@ -1270,9 +1231,9 @@ fn getAbbrevTable(di: *Dwarf, allocator: Allocator, abbrev_offset: u64) !*const } fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table { - var fbr: FixedBufferReader = .{ + var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_abbrev).?, - .pos = cast(usize, offset) orelse return badDwarf(), + .pos = cast(usize, offset) orelse return bad(), .endian = di.endian, }; @@ -1322,14 +1283,14 @@ fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table } fn parseDie( - fbr: *FixedBufferReader, + fbr: *DeprecatedFixedBufferReader, attrs_buf: []Die.Attr, abbrev_table: *const Abbrev.Table, format: Format, ) !?Die { const abbrev_code = try fbr.readUleb128(u64); if (abbrev_code == 0) return null; - const table_entry = abbrev_table.get(abbrev_code) orelse return badDwarf(); + const table_entry = abbrev_table.get(abbrev_code) orelse return bad(); const attrs = attrs_buf[0..table_entry.attrs.len]; for (attrs, table_entry.attrs) |*result_attr, attr| result_attr.* = Die.Attr{ @@ -1357,15 +1318,15 @@ pub fn getLineNumberInfo( const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); - var fbr: FixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; try fbr.seekTo(line_info_offset); const unit_header = try readUnitHeader(&fbr, null); - if (unit_header.unit_length == 0) return missingDwarf(); + if (unit_header.unit_length == 0) return missing(); const next_offset = unit_header.header_length + unit_header.unit_length; const version = try fbr.readInt(u16); - if (version < 2) return badDwarf(); + if (version < 2) return bad(); var addr_size: u8 = switch (unit_header.format) { .@"32" => 4, @@ -1381,7 +1342,7 @@ pub fn getLineNumberInfo( const prog_start_offset = fbr.pos + prologue_length; const minimum_instruction_length = try fbr.readByte(); - if (minimum_instruction_length == 0) return badDwarf(); + if (minimum_instruction_length == 0) return bad(); if (version >= 4) { // maximum_operations_per_instruction @@ -1392,7 +1353,7 @@ pub fn getLineNumberInfo( const line_base = try fbr.readByteSigned(); const line_range = try fbr.readByte(); - if (line_range == 0) return badDwarf(); + if (line_range == 0) return bad(); const opcode_base = try fbr.readByte(); @@ -1433,7 +1394,7 @@ pub fn getLineNumberInfo( { var dir_ent_fmt_buf: [10]FileEntFmt = undefined; const directory_entry_format_count = try fbr.readByte(); - if (directory_entry_format_count > dir_ent_fmt_buf.len) return badDwarf(); + if (directory_entry_format_count > dir_ent_fmt_buf.len) return bad(); for (dir_ent_fmt_buf[0..directory_entry_format_count]) |*ent_fmt| { ent_fmt.* = .{ .content_type_code = try fbr.readUleb128(u8), @@ -1461,7 +1422,7 @@ pub fn getLineNumberInfo( DW.LNCT.size => e.size = try form_value.getUInt(u64), DW.LNCT.MD5 => e.md5 = switch (form_value) { .data16 => |data16| data16.*, - else => return badDwarf(), + else => return bad(), }, else => continue, } @@ -1473,7 +1434,7 @@ pub fn getLineNumberInfo( var file_ent_fmt_buf: [10]FileEntFmt = undefined; const file_name_entry_format_count = try fbr.readByte(); - if (file_name_entry_format_count > file_ent_fmt_buf.len) return badDwarf(); + if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad(); for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| { ent_fmt.* = .{ .content_type_code = try fbr.readUleb128(u8), @@ -1501,7 +1462,7 @@ pub fn getLineNumberInfo( DW.LNCT.size => e.size = try form_value.getUInt(u64), DW.LNCT.MD5 => e.md5 = switch (form_value) { .data16 => |data16| data16.*, - else => return badDwarf(), + else => return bad(), }, else => continue, } @@ -1527,7 +1488,7 @@ pub fn getLineNumberInfo( if (opcode == DW.LNS.extended_op) { const op_size = try fbr.readUleb128(u64); - if (op_size < 1) return badDwarf(); + if (op_size < 1) return bad(); const sub_op = try fbr.readByte(); switch (sub_op) { DW.LNE.end_sequence => { @@ -1600,14 +1561,14 @@ pub fn getLineNumberInfo( }, DW.LNS.set_prologue_end => {}, else => { - if (opcode - 1 >= standard_opcode_lengths.len) return badDwarf(); + if (opcode - 1 >= standard_opcode_lengths.len) return bad(); try fbr.seekForward(standard_opcode_lengths[opcode - 1]); }, } } } - return missingDwarf(); + return missing(); } fn getString(di: Dwarf, offset: u64) ![:0]const u8 { @@ -1619,28 +1580,28 @@ fn getLineString(di: Dwarf, offset: u64) ![:0]const u8 { } fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { - const debug_addr = di.section(.debug_addr) orelse return badDwarf(); + const debug_addr = di.section(.debug_addr) orelse return bad(); // addr_base points to the first item after the header, however we // need to read the header to know the size of each item. Empirically, // it may disagree with is_64 on the compile unit. // The header is 8 or 12 bytes depending on is_64. - if (compile_unit.addr_base < 8) return badDwarf(); + if (compile_unit.addr_base < 8) return bad(); const version = readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian); - if (version != 5) return badDwarf(); + if (version != 5) return bad(); const addr_size = debug_addr[compile_unit.addr_base - 2]; const seg_size = debug_addr[compile_unit.addr_base - 1]; const byte_offset = @as(usize, @intCast(compile_unit.addr_base + (addr_size + seg_size) * index)); - if (byte_offset + addr_size > debug_addr.len) return badDwarf(); + if (byte_offset + addr_size > debug_addr.len) return bad(); return switch (addr_size) { 1 => debug_addr[byte_offset], 2 => readInt(u16, debug_addr[byte_offset..][0..2], di.endian), 4 => readInt(u32, debug_addr[byte_offset..][0..4], di.endian), 8 => readInt(u64, debug_addr[byte_offset..][0..8], di.endian), - else => badDwarf(), + else => bad(), }; } @@ -1650,7 +1611,7 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { /// of FDEs is built for binary searching during unwinding. pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { - var fbr: FixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian }; const version = try fbr.readByte(); if (version != 1) break :blk; @@ -1665,16 +1626,16 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) const eh_frame_ptr = cast(usize, try readEhPointer(&fbr, eh_frame_ptr_enc, @sizeOf(usize), .{ .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.pos]), .follow_indirect = true, - }) orelse return badDwarf()) orelse return badDwarf(); + }) orelse return bad()) orelse return bad(); const fde_count = cast(usize, try readEhPointer(&fbr, fde_count_enc, @sizeOf(usize), .{ .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.pos]), .follow_indirect = true, - }) orelse return badDwarf()) orelse return badDwarf(); + }) orelse return bad()) orelse return bad(); const entry_size = try ExceptionFrameHeader.entrySize(table_enc); const entries_len = fde_count * entry_size; - if (entries_len > eh_frame_hdr.len - fbr.pos) return badDwarf(); + if (entries_len > eh_frame_hdr.len - fbr.pos) return bad(); di.eh_frame_hdr = .{ .eh_frame_ptr = eh_frame_ptr, @@ -1690,7 +1651,7 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) const frame_sections = [2]Section.Id{ .eh_frame, .debug_frame }; for (frame_sections) |frame_section| { if (di.section(frame_section)) |section_data| { - var fbr: FixedBufferReader = .{ .buf = section_data, .endian = di.endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = section_data, .endian = di.endian }; while (fbr.pos < fbr.buf.len) { const entry_header = try EntryHeader.read(&fbr, null, frame_section); switch (entry_header.type) { @@ -1708,7 +1669,7 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) try di.cie_map.put(allocator, entry_header.length_offset, cie); }, .fde => |cie_offset| { - const cie = di.cie_map.get(cie_offset) orelse return badDwarf(); + const cie = di.cie_map.get(cie_offset) orelse return bad(); const fde = try FrameDescriptionEntry.parse( entry_header.entry_bytes, di.sectionVirtualOffset(frame_section, base_address).?, @@ -1733,205 +1694,8 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) } } -/// Unwind a stack frame using DWARF unwinding info, updating the register context. -/// -/// If `.eh_frame_hdr` is available, it will be used to binary search for the FDE. -/// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. -/// -/// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info -/// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. -pub fn unwindFrame(di: *const Dwarf, context: *UnwindContext, ma: *StackIterator.MemoryAccessor, explicit_fde_offset: ?usize) !usize { - if (!comptime abi.supportsUnwinding(builtin.target)) return error.UnsupportedCpuArchitecture; - if (context.pc == 0) return 0; - - // Find the FDE and CIE - var cie: CommonInformationEntry = undefined; - var fde: FrameDescriptionEntry = undefined; - - if (explicit_fde_offset) |fde_offset| { - const dwarf_section: Section.Id = .eh_frame; - const frame_section = di.section(dwarf_section) orelse return error.MissingFDE; - if (fde_offset >= frame_section.len) return error.MissingFDE; - - var fbr: FixedBufferReader = .{ - .buf = frame_section, - .pos = fde_offset, - .endian = di.endian, - }; - - const fde_entry_header = try EntryHeader.read(&fbr, null, dwarf_section); - if (fde_entry_header.type != .fde) return error.MissingFDE; - - const cie_offset = fde_entry_header.type.fde; - try fbr.seekTo(cie_offset); - - fbr.endian = native_endian; - const cie_entry_header = try EntryHeader.read(&fbr, null, dwarf_section); - if (cie_entry_header.type != .cie) return badDwarf(); - - cie = try CommonInformationEntry.parse( - cie_entry_header.entry_bytes, - 0, - true, - cie_entry_header.format, - dwarf_section, - cie_entry_header.length_offset, - @sizeOf(usize), - native_endian, - ); - - fde = try FrameDescriptionEntry.parse( - fde_entry_header.entry_bytes, - 0, - true, - cie, - @sizeOf(usize), - native_endian, - ); - } else if (di.eh_frame_hdr) |header| { - const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null; - try header.findEntry( - ma, - eh_frame_len, - @intFromPtr(di.section(.eh_frame_hdr).?.ptr), - context.pc, - &cie, - &fde, - ); - } else { - const index = std.sort.binarySearch(FrameDescriptionEntry, context.pc, di.fde_list.items, {}, struct { - pub fn compareFn(_: void, pc: usize, mid_item: FrameDescriptionEntry) std.math.Order { - if (pc < mid_item.pc_begin) return .lt; - - const range_end = mid_item.pc_begin + mid_item.pc_range; - if (pc < range_end) return .eq; - - return .gt; - } - }.compareFn); - - fde = if (index) |i| di.fde_list.items[i] else return error.MissingFDE; - cie = di.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE; - } - - var expression_context: expression.Context = .{ - .format = cie.format, - .memory_accessor = ma, - .compile_unit = di.findCompileUnit(fde.pc_begin) catch null, - .thread_context = context.thread_context, - .reg_context = context.reg_context, - .cfa = context.cfa, - }; - - context.vm.reset(); - context.reg_context.eh_frame = cie.version != 4; - context.reg_context.is_macho = di.is_macho; - - const row = try context.vm.runToNative(context.allocator, context.pc, cie, fde); - context.cfa = switch (row.cfa.rule) { - .val_offset => |offset| blk: { - const register = row.cfa.register orelse return error.InvalidCFARule; - const value = readInt(usize, (try abi.regBytes(context.thread_context, register, context.reg_context))[0..@sizeOf(usize)], native_endian); - break :blk try call_frame.applyOffset(value, offset); - }, - .expression => |expr| blk: { - context.stack_machine.reset(); - const value = try context.stack_machine.run( - expr, - context.allocator, - expression_context, - context.cfa, - ); - - if (value) |v| { - if (v != .generic) return error.InvalidExpressionValue; - break :blk v.generic; - } else return error.NoExpressionValue; - }, - else => return error.InvalidCFARule, - }; - - if (ma.load(usize, context.cfa.?) == null) return error.InvalidCFA; - expression_context.cfa = context.cfa; - - // Buffering the modifications is done because copying the thread context is not portable, - // some implementations (ie. darwin) use internal pointers to the mcontext. - var arena = std.heap.ArenaAllocator.init(context.allocator); - defer arena.deinit(); - const update_allocator = arena.allocator(); - - const RegisterUpdate = struct { - // Backed by thread_context - dest: []u8, - // Backed by arena - src: []const u8, - prev: ?*@This(), - }; - - var update_tail: ?*RegisterUpdate = null; - var has_return_address = true; - for (context.vm.rowColumns(row)) |column| { - if (column.register) |register| { - if (register == cie.return_address_register) { - has_return_address = column.rule != .undefined; - } - - const dest = try abi.regBytes(context.thread_context, register, context.reg_context); - const src = try update_allocator.alloc(u8, dest.len); - - const prev = update_tail; - update_tail = try update_allocator.create(RegisterUpdate); - update_tail.?.* = .{ - .dest = dest, - .src = src, - .prev = prev, - }; - - try column.resolveValue( - context, - expression_context, - ma, - src, - ); - } - } - - // On all implemented architectures, the CFA is defined as being the previous frame's SP - (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(context.reg_context), context.reg_context)).* = context.cfa.?; - - while (update_tail) |tail| { - @memcpy(tail.dest, tail.src); - update_tail = tail.prev; - } - - if (has_return_address) { - context.pc = abi.stripInstructionPtrAuthCode(readInt(usize, (try abi.regBytes( - context.thread_context, - cie.return_address_register, - context.reg_context, - ))[0..@sizeOf(usize)], native_endian)); - } else { - context.pc = 0; - } - - (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), context.reg_context)).* = context.pc; - - // The call instruction will have pushed the address of the instruction that follows the call as the return address. - // This next instruction may be past the end of the function if the caller was `noreturn` (ie. the last instruction in - // the function was the call). If we were to look up an FDE entry using the return address directly, it could end up - // either not finding an FDE at all, or using the next FDE in the program, producing incorrect results. To prevent this, - // we subtract one so that the next lookup is guaranteed to land inside the - // - // The exception to this rule is signal frames, where we return execution would be returned to the instruction - // that triggered the handler. - const return_address = context.pc; - if (context.pc > 0 and !cie.isSignalFrame()) context.pc -= 1; - - return return_address; -} - fn parseFormValue( - fbr: *FixedBufferReader, + fbr: *DeprecatedFixedBufferReader, form_id: u64, format: Format, implicit_const: ?i64, @@ -1990,12 +1754,12 @@ fn parseFormValue( FORM.strx => .{ .strx = try fbr.readUleb128(usize) }, FORM.line_strp => .{ .line_strp = try fbr.readAddress(format) }, FORM.indirect => parseFormValue(fbr, try fbr.readUleb128(u64), format, implicit_const), - FORM.implicit_const => .{ .sdata = implicit_const orelse return badDwarf() }, + FORM.implicit_const => .{ .sdata = implicit_const orelse return bad() }, FORM.loclistx => .{ .loclistx = try fbr.readUleb128(u64) }, FORM.rnglistx => .{ .rnglistx = try fbr.readUleb128(u64) }, else => { //debug.print("unrecognized form id: {x}\n", .{form_id}); - return badDwarf(); + return bad(); }, }; } @@ -2090,14 +1854,14 @@ const LineNumberProgram = struct { self.target_address < self.address) { const file_index = if (self.version >= 5) self.prev_file else i: { - if (self.prev_file == 0) return missingDwarf(); + if (self.prev_file == 0) return missing(); break :i self.prev_file - 1; }; - if (file_index >= file_entries.len) return badDwarf(); + if (file_index >= file_entries.len) return bad(); const file_entry = &file_entries[file_index]; - if (file_entry.dir_index >= self.include_dirs.len) return badDwarf(); + if (file_entry.dir_index >= self.include_dirs.len) return bad(); const dir_name = self.include_dirs[file_entry.dir_index].path; const file_name = try std.fs.path.join(allocator, &[_][]const u8{ @@ -2128,14 +1892,14 @@ const UnitHeader = struct { header_length: u4, unit_length: u64, }; -fn readUnitHeader(fbr: *FixedBufferReader, opt_ma: ?*StackIterator.MemoryAccessor) !UnitHeader { +fn readUnitHeader(fbr: *DeprecatedFixedBufferReader, opt_ma: ?*MemoryAccessor) !UnitHeader { return switch (try if (opt_ma) |ma| fbr.readIntChecked(u32, ma) else fbr.readInt(u32)) { 0...0xfffffff0 - 1 => |unit_length| .{ .format = .@"32", .header_length = 4, .unit_length = unit_length, }, - 0xfffffff0...0xffffffff - 1 => badDwarf(), + 0xfffffff0...0xffffffff - 1 => bad(), 0xffffffff => .{ .format = .@"64", .header_length = 12, @@ -2145,7 +1909,7 @@ fn readUnitHeader(fbr: *FixedBufferReader, opt_ma: ?*StackIterator.MemoryAccesso } /// Returns the DWARF register number for an x86_64 register number found in compact unwind info -fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 { +pub fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 { return switch (unwind_reg_number) { 1 => 3, // RBX 2 => 12, // R12 @@ -2159,473 +1923,25 @@ fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 { /// This function is to make it handy to comment out the return and make it /// into a crash when working on this file. -fn badDwarf() error{InvalidDebugInfo} { - //if (true) @panic("badDwarf"); // can be handy to uncomment when working on this file +pub fn bad() error{InvalidDebugInfo} { + //if (true) @panic("bad dwarf"); // can be handy to uncomment when working on this file return error.InvalidDebugInfo; } -fn missingDwarf() error{MissingDebugInfo} { - //if (true) @panic("missingDwarf"); // can be handy to uncomment when working on this file +fn missing() error{MissingDebugInfo} { + //if (true) @panic("missing dwarf"); // can be handy to uncomment when working on this file return error.MissingDebugInfo; } fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 { - const str = opt_str orelse return badDwarf(); - if (offset > str.len) return badDwarf(); - const casted_offset = cast(usize, offset) orelse return badDwarf(); + const str = opt_str orelse return bad(); + if (offset > str.len) return bad(); + const casted_offset = cast(usize, offset) orelse return bad(); // Valid strings always have a terminating zero byte - const last = std.mem.indexOfScalarPos(u8, str, casted_offset, 0) orelse return badDwarf(); + const last = std.mem.indexOfScalarPos(u8, str, casted_offset, 0) orelse return bad(); return str[casted_offset..last :0]; } -// Reading debug info needs to be fast, even when compiled in debug mode, -// so avoid using a `std.io.FixedBufferStream` which is too slow. -pub const FixedBufferReader = struct { - buf: []const u8, - pos: usize = 0, - endian: std.builtin.Endian, - - pub const Error = error{ EndOfBuffer, Overflow, InvalidBuffer }; - - fn seekTo(fbr: *FixedBufferReader, pos: u64) Error!void { - if (pos > fbr.buf.len) return error.EndOfBuffer; - fbr.pos = @intCast(pos); - } - - fn seekForward(fbr: *FixedBufferReader, amount: u64) Error!void { - if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer; - fbr.pos += @intCast(amount); - } - - pub inline fn readByte(fbr: *FixedBufferReader) Error!u8 { - if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer; - defer fbr.pos += 1; - return fbr.buf[fbr.pos]; - } - - fn readByteSigned(fbr: *FixedBufferReader) Error!i8 { - return @bitCast(try fbr.readByte()); - } - - fn readInt(fbr: *FixedBufferReader, comptime T: type) Error!T { - const size = @divExact(@typeInfo(T).Int.bits, 8); - if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer; - defer fbr.pos += size; - return std.mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian); - } - - fn readIntChecked( - fbr: *FixedBufferReader, - comptime T: type, - ma: *std.debug.StackIterator.MemoryAccessor, - ) Error!T { - if (ma.load(T, @intFromPtr(fbr.buf[fbr.pos..].ptr)) == null) - return error.InvalidBuffer; - - return fbr.readInt(T); - } - - fn readUleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { - return std.leb.readUleb128(T, fbr); - } - - fn readIleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { - return std.leb.readIleb128(T, fbr); - } - - fn readAddress(fbr: *FixedBufferReader, format: Format) Error!u64 { - return switch (format) { - .@"32" => try fbr.readInt(u32), - .@"64" => try fbr.readInt(u64), - }; - } - - fn readAddressChecked( - fbr: *FixedBufferReader, - format: Format, - ma: *std.debug.StackIterator.MemoryAccessor, - ) Error!u64 { - return switch (format) { - .@"32" => try fbr.readIntChecked(u32, ma), - .@"64" => try fbr.readIntChecked(u64, ma), - }; - } - - fn readBytes(fbr: *FixedBufferReader, len: usize) Error![]const u8 { - if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer; - defer fbr.pos += len; - return fbr.buf[fbr.pos..][0..len]; - } - - fn readBytesTo(fbr: *FixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 { - const end = @call(.always_inline, std.mem.indexOfScalarPos, .{ - u8, - fbr.buf, - fbr.pos, - sentinel, - }) orelse return error.EndOfBuffer; - defer fbr.pos = end + 1; - return fbr.buf[fbr.pos..end :sentinel]; - } -}; - -/// Unwind a frame using MachO compact unwind info (from __unwind_info). -/// If the compact encoding can't encode a way to unwind a frame, it will -/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -pub fn unwindFrameMachO( - context: *UnwindContext, - ma: *StackIterator.MemoryAccessor, - unwind_info: []const u8, - eh_frame: ?[]const u8, - module_base_address: usize, -) !usize { - const macho = std.macho; - - const header = std.mem.bytesAsValue( - macho.unwind_info_section_header, - unwind_info[0..@sizeOf(macho.unwind_info_section_header)], - ); - const indices = std.mem.bytesAsSlice( - macho.unwind_info_section_header_index_entry, - unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry)], - ); - if (indices.len == 0) return error.MissingUnwindInfo; - - const mapped_pc = context.pc - module_base_address; - const second_level_index = blk: { - var left: usize = 0; - var len: usize = indices.len; - - while (len > 1) { - const mid = left + len / 2; - const offset = indices[mid].functionOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } - - // Last index is a sentinel containing the highest address as its functionOffset - if (indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo; - break :blk &indices[left]; - }; - - const common_encodings = std.mem.bytesAsSlice( - macho.compact_unwind_encoding_t, - unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t)], - ); - - const start_offset = second_level_index.secondLevelPagesSectionOffset; - const kind = std.mem.bytesAsValue( - macho.UNWIND_SECOND_LEVEL, - unwind_info[start_offset..][0..@sizeOf(macho.UNWIND_SECOND_LEVEL)], - ); - - const entry: struct { - function_offset: usize, - raw_encoding: u32, - } = switch (kind.*) { - .REGULAR => blk: { - const page_header = std.mem.bytesAsValue( - macho.unwind_info_regular_second_level_page_header, - unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_regular_second_level_page_header)], - ); - - const entries = std.mem.bytesAsSlice( - macho.unwind_info_regular_second_level_entry, - unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry)], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - const offset = entries[mid].functionOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } - - break :blk .{ - .function_offset = entries[left].functionOffset, - .raw_encoding = entries[left].encoding, - }; - }, - .COMPRESSED => blk: { - const page_header = std.mem.bytesAsValue( - macho.unwind_info_compressed_second_level_page_header, - unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_compressed_second_level_page_header)], - ); - - const entries = std.mem.bytesAsSlice( - macho.UnwindInfoCompressedEntry, - unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry)], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - const offset = second_level_index.functionOffset + entries[mid].funcOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } - - const entry = entries[left]; - const function_offset = second_level_index.functionOffset + entry.funcOffset; - if (entry.encodingIndex < header.commonEncodingsArrayCount) { - if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo; - break :blk .{ - .function_offset = function_offset, - .raw_encoding = common_encodings[entry.encodingIndex], - }; - } else { - const local_index = try std.math.sub( - u8, - entry.encodingIndex, - cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo, - ); - const local_encodings = std.mem.bytesAsSlice( - macho.compact_unwind_encoding_t, - unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t)], - ); - if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; - break :blk .{ - .function_offset = function_offset, - .raw_encoding = local_encodings[local_index], - }; - } - }, - else => return error.InvalidUnwindInfo, - }; - - if (entry.raw_encoding == 0) return error.NoUnwindInfo; - const reg_context = abi.RegisterContext{ - .eh_frame = false, - .is_macho = true, - }; - - const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); - const new_ip = switch (builtin.cpu.arch) { - .x86_64 => switch (encoding.mode.x86_64) { - .OLD => return error.UnimplementedUnwindEncoding, - .RBP_FRAME => blk: { - const regs: [5]u3 = .{ - encoding.value.x86_64.frame.reg0, - encoding.value.x86_64.frame.reg1, - encoding.value.x86_64.frame.reg2, - encoding.value.x86_64.frame.reg3, - encoding.value.x86_64.frame.reg4, - }; - - const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize); - var max_reg: usize = 0; - inline for (regs, 0..) |reg, i| { - if (reg > 0) max_reg = i; - } - - const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*; - const new_sp = fp + 2 * @sizeOf(usize); - - // Verify the stack range we're about to read register values from - if (ma.load(usize, new_sp) == null or ma.load(usize, fp - frame_offset + max_reg * @sizeOf(usize)) == null) return error.InvalidUnwindInfo; - - const ip_ptr = fp + @sizeOf(usize); - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp; - (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; - (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; - - for (regs, 0..) |reg, i| { - if (reg == 0) continue; - const addr = fp - frame_offset + i * @sizeOf(usize); - const reg_number = try compactUnwindToDwarfRegNumber(reg); - (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; - } - - break :blk new_ip; - }, - .STACK_IMMD, - .STACK_IND, - => blk: { - const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*; - const stack_size = if (encoding.mode.x86_64 == .STACK_IMMD) - @as(usize, encoding.value.x86_64.frameless.stack.direct.stack_size) * @sizeOf(usize) - else stack_size: { - // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. - const sub_offset_addr = - module_base_address + - entry.function_offset + - encoding.value.x86_64.frameless.stack.indirect.sub_offset; - if (ma.load(usize, sub_offset_addr) == null) return error.InvalidUnwindInfo; - - // `sub_offset_addr` points to the offset of the literal within the instruction - const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; - break :stack_size sub_operand + @sizeOf(usize) * @as(usize, encoding.value.x86_64.frameless.stack.indirect.stack_adjust); - }; - - // Decode the Lehmer-coded sequence of registers. - // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h - - // Decode the variable-based permutation number into its digits. Each digit represents - // an index into the list of register numbers that weren't yet used in the sequence at - // the time the digit was added. - const reg_count = encoding.value.x86_64.frameless.stack_reg_count; - const ip_ptr = if (reg_count > 0) reg_blk: { - var digits: [6]u3 = undefined; - var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation; - var base: usize = 2; - for (0..reg_count) |i| { - const div = accumulator / base; - digits[digits.len - 1 - i] = @intCast(accumulator - base * div); - accumulator = div; - base += 1; - } - - const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 }; - var registers: [reg_numbers.len]u3 = undefined; - var used_indices = [_]bool{false} ** reg_numbers.len; - for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { - var unused_count: u8 = 0; - const unused_index = for (used_indices, 0..) |used, index| { - if (!used) { - if (target_unused_index == unused_count) break index; - unused_count += 1; - } - } else unreachable; - - registers[i] = reg_numbers[unused_index]; - used_indices[unused_index] = true; - } - - var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); - if (ma.load(usize, reg_addr) == null) return error.InvalidUnwindInfo; - for (0..reg_count) |i| { - const reg_number = try compactUnwindToDwarfRegNumber(registers[i]); - (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - - break :reg_blk reg_addr; - } else sp + stack_size - @sizeOf(usize); - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_sp = ip_ptr + @sizeOf(usize); - if (ma.load(usize, new_sp) == null) return error.InvalidUnwindInfo; - - (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; - (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; - - break :blk new_ip; - }, - .DWARF => { - return unwindFrameMachODwarf(context, ma, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.x86_64.dwarf)); - }, - }, - .aarch64 => switch (encoding.mode.arm64) { - .OLD => return error.UnimplementedUnwindEncoding, - .FRAMELESS => blk: { - const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*; - const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; - const new_ip = (try abi.regValueNative(usize, context.thread_context, 30, reg_context)).*; - if (ma.load(usize, new_sp) == null) return error.InvalidUnwindInfo; - (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; - break :blk new_ip; - }, - .DWARF => { - return unwindFrameMachODwarf(context, ma, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.arm64.dwarf)); - }, - .FRAME => blk: { - const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*; - const new_sp = fp + 16; - const ip_ptr = fp + @sizeOf(usize); - - const num_restored_pairs: usize = - @popCount(@as(u5, @bitCast(encoding.value.arm64.frame.x_reg_pairs))) + - @popCount(@as(u4, @bitCast(encoding.value.arm64.frame.d_reg_pairs))); - const min_reg_addr = fp - num_restored_pairs * 2 * @sizeOf(usize); - - if (ma.load(usize, new_sp) == null or ma.load(usize, min_reg_addr) == null) return error.InvalidUnwindInfo; - - var reg_addr = fp - @sizeOf(usize); - inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.x_reg_pairs)).Struct.fields, 0..) |field, i| { - if (@field(encoding.value.arm64.frame.x_reg_pairs, field.name) != 0) { - (try abi.regValueNative(usize, context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - (try abi.regValueNative(usize, context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - } - - inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.d_reg_pairs)).Struct.fields, 0..) |field, i| { - if (@field(encoding.value.arm64.frame.d_reg_pairs, field.name) != 0) { - // Only the lower half of the 128-bit V registers are restored during unwinding - @memcpy( - try abi.regBytes(context.thread_context, 64 + 8 + i, context.reg_context), - std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), - ); - reg_addr += @sizeOf(usize); - @memcpy( - try abi.regBytes(context.thread_context, 64 + 9 + i, context.reg_context), - std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), - ); - reg_addr += @sizeOf(usize); - } - } - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp; - (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; - - break :blk new_ip; - }, - }, - else => return error.UnimplementedArch, - }; - - context.pc = abi.stripInstructionPtrAuthCode(new_ip); - if (context.pc > 0) context.pc -= 1; - return new_ip; -} - -fn unwindFrameMachODwarf( - context: *UnwindContext, - ma: *std.debug.StackIterator.MemoryAccessor, - eh_frame: []const u8, - fde_offset: usize, -) !usize { - var di = Dwarf{ - .endian = native_endian, - .is_macho = true, - }; - defer di.deinit(context.allocator); - - di.sections[@intFromEnum(Section.Id.eh_frame)] = .{ - .data = eh_frame, - .owned = false, - }; - - return di.unwindFrame(context, ma, fde_offset); -} - const EhPointerContext = struct { // The address of the pointer field itself pc_rel_base: u64, @@ -2641,7 +1957,7 @@ const EhPointerContext = struct { text_rel_base: ?u64 = null, function_rel_base: ?u64 = null, }; -fn readEhPointer(fbr: *FixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 { +fn readEhPointer(fbr: *DeprecatedFixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 { if (enc == EH.PE.omit) return null; const value: union(enum) { @@ -2664,7 +1980,7 @@ fn readEhPointer(fbr: *FixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhP EH.PE.sdata2 => .{ .signed = try fbr.readInt(i16) }, EH.PE.sdata4 => .{ .signed = try fbr.readInt(i32) }, EH.PE.sdata8 => .{ .signed = try fbr.readInt(i64) }, - else => return badDwarf(), + else => return bad(), }; const base = switch (enc & EH.PE.rel_mask) { diff --git a/lib/std/debug/Dwarf/abi.zig b/lib/std/debug/Dwarf/abi.zig index 1a47625ae7df..e87f023d72f6 100644 --- a/lib/std/debug/Dwarf/abi.zig +++ b/lib/std/debug/Dwarf/abi.zig @@ -1,8 +1,9 @@ const builtin = @import("builtin"); + const std = @import("../../std.zig"); const mem = std.mem; -const native_os = builtin.os.tag; const posix = std.posix; +const Arch = std.Target.Cpu.Arch; pub fn supportsUnwinding(target: std.Target) bool { return switch (target.cpu.arch) { @@ -26,8 +27,8 @@ pub fn supportsUnwinding(target: std.Target) bool { }; } -pub fn ipRegNum() u8 { - return switch (builtin.cpu.arch) { +pub fn ipRegNum(arch: Arch) u8 { + return switch (arch) { .x86 => 8, .x86_64 => 16, .arm => 15, @@ -36,9 +37,10 @@ pub fn ipRegNum() u8 { }; } -pub fn fpRegNum(reg_context: RegisterContext) u8 { - return switch (builtin.cpu.arch) { - // GCC on OS X historically did the opposite of ELF for these registers (only in .eh_frame), and that is now the convention for MachO +pub fn fpRegNum(arch: Arch, reg_context: RegisterContext) u8 { + return switch (arch) { + // GCC on OS X historically did the opposite of ELF for these registers + // (only in .eh_frame), and that is now the convention for MachO .x86 => if (reg_context.eh_frame and reg_context.is_macho) 4 else 5, .x86_64 => 6, .arm => 11, @@ -47,8 +49,8 @@ pub fn fpRegNum(reg_context: RegisterContext) u8 { }; } -pub fn spRegNum(reg_context: RegisterContext) u8 { - return switch (builtin.cpu.arch) { +pub fn spRegNum(arch: Arch, reg_context: RegisterContext) u8 { + return switch (arch) { .x86 => if (reg_context.eh_frame and reg_context.is_macho) 5 else 4, .x86_64 => 7, .arm => 13, @@ -57,33 +59,12 @@ pub fn spRegNum(reg_context: RegisterContext) u8 { }; } -/// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature. -/// This function clears these signature bits to make the pointer usable. -pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { - if (builtin.cpu.arch == .aarch64) { - // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) - // The save / restore is because `xpaclri` operates on x30 (LR) - return asm ( - \\mov x16, x30 - \\mov x30, x15 - \\hint 0x07 - \\mov x15, x30 - \\mov x30, x16 - : [ret] "={x15}" (-> usize), - : [ptr] "{x15}" (ptr), - : "x16" - ); - } - - return ptr; -} - pub const RegisterContext = struct { eh_frame: bool, is_macho: bool, }; -pub const AbiError = error{ +pub const RegBytesError = error{ InvalidRegister, UnimplementedArch, UnimplementedOs, @@ -91,55 +72,21 @@ pub const AbiError = error{ ThreadContextNotSupported, }; -fn RegValueReturnType(comptime ContextPtrType: type, comptime T: type) type { - const reg_bytes_type = comptime RegBytesReturnType(ContextPtrType); - const info = @typeInfo(reg_bytes_type).Pointer; - return @Type(.{ - .Pointer = .{ - .size = .One, - .is_const = info.is_const, - .is_volatile = info.is_volatile, - .is_allowzero = info.is_allowzero, - .alignment = info.alignment, - .address_space = info.address_space, - .child = T, - .sentinel = null, - }, - }); -} - -/// Returns a pointer to a register stored in a ThreadContext, preserving the pointer attributes of the context. -pub fn regValueNative( - comptime T: type, - thread_context_ptr: anytype, - reg_number: u8, - reg_context: ?RegisterContext, -) !RegValueReturnType(@TypeOf(thread_context_ptr), T) { - const reg_bytes = try regBytes(thread_context_ptr, reg_number, reg_context); - if (@sizeOf(T) != reg_bytes.len) return error.IncompatibleRegisterSize; - return mem.bytesAsValue(T, reg_bytes[0..@sizeOf(T)]); -} - -fn RegBytesReturnType(comptime ContextPtrType: type) type { - const info = @typeInfo(ContextPtrType); - if (info != .Pointer or info.Pointer.child != std.debug.ThreadContext) { - @compileError("Expected a pointer to std.debug.ThreadContext, got " ++ @typeName(@TypeOf(ContextPtrType))); - } - - return if (info.Pointer.is_const) return []const u8 else []u8; -} - /// Returns a slice containing the backing storage for `reg_number`. /// +/// This function assumes the Dwarf information corresponds not necessarily to +/// the current executable, but at least with a matching CPU architecture and +/// OS. It is planned to lift this limitation with a future enhancement. +/// /// `reg_context` describes in what context the register number is used, as it can have different /// meanings depending on the DWARF container. It is only required when getting the stack or /// frame pointer register on some architectures. pub fn regBytes( - thread_context_ptr: anytype, + thread_context_ptr: *std.debug.ThreadContext, reg_number: u8, reg_context: ?RegisterContext, -) AbiError!RegBytesReturnType(@TypeOf(thread_context_ptr)) { - if (native_os == .windows) { +) RegBytesError![]u8 { + if (builtin.os.tag == .windows) { return switch (builtin.cpu.arch) { .x86 => switch (reg_number) { 0 => mem.asBytes(&thread_context_ptr.Eax), @@ -194,7 +141,7 @@ pub fn regBytes( const ucontext_ptr = thread_context_ptr; return switch (builtin.cpu.arch) { - .x86 => switch (native_os) { + .x86 => switch (builtin.os.tag) { .linux, .netbsd, .solaris, .illumos => switch (reg_number) { 0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EAX]), 1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ECX]), @@ -229,7 +176,7 @@ pub fn regBytes( }, else => error.UnimplementedOs, }, - .x86_64 => switch (native_os) { + .x86_64 => switch (builtin.os.tag) { .linux, .solaris, .illumos => switch (reg_number) { 0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RAX]), 1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RDX]), @@ -248,7 +195,7 @@ pub fn regBytes( 14 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R14]), 15 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R15]), 16 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RIP]), - 17...32 => |i| if (native_os.isSolarish()) + 17...32 => |i| if (builtin.os.tag.isSolarish()) mem.asBytes(&ucontext_ptr.mcontext.fpregs.chip_state.xmm[i - 17]) else mem.asBytes(&ucontext_ptr.mcontext.fpregs.xmm[i - 17]), @@ -318,7 +265,7 @@ pub fn regBytes( }, else => error.UnimplementedOs, }, - .arm => switch (native_os) { + .arm => switch (builtin.os.tag) { .linux => switch (reg_number) { 0 => mem.asBytes(&ucontext_ptr.mcontext.arm_r0), 1 => mem.asBytes(&ucontext_ptr.mcontext.arm_r1), @@ -341,7 +288,7 @@ pub fn regBytes( }, else => error.UnimplementedOs, }, - .aarch64 => switch (native_os) { + .aarch64 => switch (builtin.os.tag) { .macos, .ios => switch (reg_number) { 0...28 => mem.asBytes(&ucontext_ptr.mcontext.ss.regs[reg_number]), 29 => mem.asBytes(&ucontext_ptr.mcontext.ss.fp), @@ -389,22 +336,14 @@ pub fn regBytes( }; } -/// Returns the ABI-defined default value this register has in the unwinding table -/// before running any of the CIE instructions. The DWARF spec defines these as having -/// the .undefined rule by default, but allows ABI authors to override that. -pub fn getRegDefaultValue(reg_number: u8, context: *std.debug.Dwarf.UnwindContext, out: []u8) !void { - switch (builtin.cpu.arch) { - .aarch64 => { - // Callee-saved registers are initialized as if they had the .same_value rule - if (reg_number >= 19 and reg_number <= 28) { - const src = try regBytes(context.thread_context, reg_number, context.reg_context); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - return; - } - }, - else => {}, - } - - @memset(out, undefined); +/// Returns a pointer to a register stored in a ThreadContext, preserving the +/// pointer attributes of the context. +pub fn regValueNative( + thread_context_ptr: *std.debug.ThreadContext, + reg_number: u8, + reg_context: ?RegisterContext, +) !*align(1) usize { + const reg_bytes = try regBytes(thread_context_ptr, reg_number, reg_context); + if (@sizeOf(usize) != reg_bytes.len) return error.IncompatibleRegisterSize; + return mem.bytesAsValue(usize, reg_bytes[0..@sizeOf(usize)]); } diff --git a/lib/std/debug/Dwarf/call_frame.zig b/lib/std/debug/Dwarf/call_frame.zig index 73e00d309979..3e3d2585db32 100644 --- a/lib/std/debug/Dwarf/call_frame.zig +++ b/lib/std/debug/Dwarf/call_frame.zig @@ -297,391 +297,3 @@ pub const Instruction = union(Opcode) { } } }; - -/// Since register rules are applied (usually) during a panic, -/// checked addition / subtraction is used so that we can return -/// an error and fall back to FP-based unwinding. -pub fn applyOffset(base: usize, offset: i64) !usize { - return if (offset >= 0) - try std.math.add(usize, base, @as(usize, @intCast(offset))) - else - try std.math.sub(usize, base, @as(usize, @intCast(-offset))); -} - -/// This is a virtual machine that runs DWARF call frame instructions. -pub const VirtualMachine = struct { - /// See section 6.4.1 of the DWARF5 specification for details on each - const RegisterRule = union(enum) { - // The spec says that the default rule for each column is the undefined rule. - // However, it also allows ABI / compiler authors to specify alternate defaults, so - // there is a distinction made here. - default: void, - - undefined: void, - same_value: void, - - // offset(N) - offset: i64, - - // val_offset(N) - val_offset: i64, - - // register(R) - register: u8, - - // expression(E) - expression: []const u8, - - // val_expression(E) - val_expression: []const u8, - - // Augmenter-defined rule - architectural: void, - }; - - /// Each row contains unwinding rules for a set of registers. - pub const Row = struct { - /// Offset from `FrameDescriptionEntry.pc_begin` - offset: u64 = 0, - - /// Special-case column that defines the CFA (Canonical Frame Address) rule. - /// The register field of this column defines the register that CFA is derived from. - cfa: Column = .{}, - - /// The register fields in these columns define the register the rule applies to. - columns: ColumnRange = .{}, - - /// Indicates that the next write to any column in this row needs to copy - /// the backing column storage first, as it may be referenced by previous rows. - copy_on_write: bool = false, - }; - - pub const Column = struct { - register: ?u8 = null, - rule: RegisterRule = .{ .default = {} }, - - /// Resolves the register rule and places the result into `out` (see dwarf.abi.regBytes) - pub fn resolveValue( - self: Column, - context: *std.debug.Dwarf.UnwindContext, - expression_context: std.debug.Dwarf.expression.Context, - ma: *debug.StackIterator.MemoryAccessor, - out: []u8, - ) !void { - switch (self.rule) { - .default => { - const register = self.register orelse return error.InvalidRegister; - try abi.getRegDefaultValue(register, context, out); - }, - .undefined => { - @memset(out, undefined); - }, - .same_value => { - // TODO: This copy could be eliminated if callers always copy the state then call this function to update it - const register = self.register orelse return error.InvalidRegister; - const src = try abi.regBytes(context.thread_context, register, context.reg_context); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - }, - .offset => |offset| { - if (context.cfa) |cfa| { - const addr = try applyOffset(cfa, offset); - if (ma.load(usize, addr) == null) return error.InvalidAddress; - const ptr: *const usize = @ptrFromInt(addr); - mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); - } else return error.InvalidCFA; - }, - .val_offset => |offset| { - if (context.cfa) |cfa| { - mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian); - } else return error.InvalidCFA; - }, - .register => |register| { - const src = try abi.regBytes(context.thread_context, register, context.reg_context); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, try abi.regBytes(context.thread_context, register, context.reg_context)); - }, - .expression => |expression| { - context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); - const addr = if (value) |v| blk: { - if (v != .generic) return error.InvalidExpressionValue; - break :blk v.generic; - } else return error.NoExpressionValue; - - if (ma.load(usize, addr) == null) return error.InvalidExpressionAddress; - const ptr: *usize = @ptrFromInt(addr); - mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); - }, - .val_expression => |expression| { - context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); - if (value) |v| { - if (v != .generic) return error.InvalidExpressionValue; - mem.writeInt(usize, out[0..@sizeOf(usize)], v.generic, native_endian); - } else return error.NoExpressionValue; - }, - .architectural => return error.UnimplementedRegisterRule, - } - } - }; - - const ColumnRange = struct { - /// Index into `columns` of the first column in this row. - start: usize = undefined, - len: u8 = 0, - }; - - columns: std.ArrayListUnmanaged(Column) = .{}, - stack: std.ArrayListUnmanaged(ColumnRange) = .{}, - current_row: Row = .{}, - - /// The result of executing the CIE's initial_instructions - cie_row: ?Row = null, - - pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void { - self.stack.deinit(allocator); - self.columns.deinit(allocator); - self.* = undefined; - } - - pub fn reset(self: *VirtualMachine) void { - self.stack.clearRetainingCapacity(); - self.columns.clearRetainingCapacity(); - self.current_row = .{}; - self.cie_row = null; - } - - /// Return a slice backed by the row's non-CFA columns - pub fn rowColumns(self: VirtualMachine, row: Row) []Column { - if (row.columns.len == 0) return &.{}; - return self.columns.items[row.columns.start..][0..row.columns.len]; - } - - /// Either retrieves or adds a column for `register` (non-CFA) in the current row. - fn getOrAddColumn(self: *VirtualMachine, allocator: std.mem.Allocator, register: u8) !*Column { - for (self.rowColumns(self.current_row)) |*c| { - if (c.register == register) return c; - } - - if (self.current_row.columns.len == 0) { - self.current_row.columns.start = self.columns.items.len; - } - self.current_row.columns.len += 1; - - const column = try self.columns.addOne(allocator); - column.* = .{ - .register = register, - }; - - return column; - } - - /// Runs the CIE instructions, then the FDE instructions. Execution halts - /// once the row that corresponds to `pc` is known, and the row is returned. - pub fn runTo( - self: *VirtualMachine, - allocator: std.mem.Allocator, - pc: u64, - cie: std.debug.Dwarf.CommonInformationEntry, - fde: std.debug.Dwarf.FrameDescriptionEntry, - addr_size_bytes: u8, - endian: std.builtin.Endian, - ) !Row { - assert(self.cie_row == null); - if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return error.AddressOutOfRange; - - var prev_row: Row = self.current_row; - - var cie_stream = std.io.fixedBufferStream(cie.initial_instructions); - var fde_stream = std.io.fixedBufferStream(fde.instructions); - var streams = [_]*std.io.FixedBufferStream([]const u8){ - &cie_stream, - &fde_stream, - }; - - for (&streams, 0..) |stream, i| { - while (stream.pos < stream.buffer.len) { - const instruction = try std.debug.Dwarf.call_frame.Instruction.read(stream, addr_size_bytes, endian); - prev_row = try self.step(allocator, cie, i == 0, instruction); - if (pc < fde.pc_begin + self.current_row.offset) return prev_row; - } - } - - return self.current_row; - } - - pub fn runToNative( - self: *VirtualMachine, - allocator: std.mem.Allocator, - pc: u64, - cie: std.debug.Dwarf.CommonInformationEntry, - fde: std.debug.Dwarf.FrameDescriptionEntry, - ) !Row { - return self.runTo(allocator, pc, cie, fde, @sizeOf(usize), builtin.target.cpu.arch.endian()); - } - - fn resolveCopyOnWrite(self: *VirtualMachine, allocator: std.mem.Allocator) !void { - if (!self.current_row.copy_on_write) return; - - const new_start = self.columns.items.len; - if (self.current_row.columns.len > 0) { - try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len); - self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); - self.current_row.columns.start = new_start; - } - } - - /// Executes a single instruction. - /// If this instruction is from the CIE, `is_initial` should be set. - /// Returns the value of `current_row` before executing this instruction. - pub fn step( - self: *VirtualMachine, - allocator: std.mem.Allocator, - cie: std.debug.Dwarf.CommonInformationEntry, - is_initial: bool, - instruction: Instruction, - ) !Row { - // CIE instructions must be run before FDE instructions - assert(!is_initial or self.cie_row == null); - if (!is_initial and self.cie_row == null) { - self.cie_row = self.current_row; - self.current_row.copy_on_write = true; - } - - const prev_row = self.current_row; - switch (instruction) { - .set_loc => |i| { - if (i.address <= self.current_row.offset) return error.InvalidOperation; - // TODO: Check cie.segment_selector_size != 0 for DWARFV4 - self.current_row.offset = i.address; - }, - inline .advance_loc, - .advance_loc1, - .advance_loc2, - .advance_loc4, - => |i| { - self.current_row.offset += i.delta * cie.code_alignment_factor; - self.current_row.copy_on_write = true; - }, - inline .offset, - .offset_extended, - .offset_extended_sf, - => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor }; - }, - inline .restore, - .restore_extended, - => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.cie_row) |cie_row| { - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = for (self.rowColumns(cie_row)) |cie_column| { - if (cie_column.register == i.register) break cie_column.rule; - } else .{ .default = {} }; - } else return error.InvalidOperation; - }, - .nop => {}, - .undefined => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .undefined = {} }; - }, - .same_value => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .same_value = {} }; - }, - .register => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .register = i.target_register }; - }, - .remember_state => { - try self.stack.append(allocator, self.current_row.columns); - self.current_row.copy_on_write = true; - }, - .restore_state => { - const restored_columns = self.stack.popOrNull() orelse return error.InvalidOperation; - self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len); - try self.columns.ensureUnusedCapacity(allocator, restored_columns.len); - - self.current_row.columns.start = self.columns.items.len; - self.current_row.columns.len = restored_columns.len; - self.columns.appendSliceAssumeCapacity(self.columns.items[restored_columns.start..][0..restored_columns.len]); - }, - .def_cfa => |i| { - try self.resolveCopyOnWrite(allocator); - self.current_row.cfa = .{ - .register = i.register, - .rule = .{ .val_offset = @intCast(i.offset) }, - }; - }, - .def_cfa_sf => |i| { - try self.resolveCopyOnWrite(allocator); - self.current_row.cfa = .{ - .register = i.register, - .rule = .{ .val_offset = i.offset * cie.data_alignment_factor }, - }; - }, - .def_cfa_register => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.register = i.register; - }, - .def_cfa_offset => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ - .val_offset = @intCast(i.offset), - }; - }, - .def_cfa_offset_sf => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ - .val_offset = i.offset * cie.data_alignment_factor, - }; - }, - .def_cfa_expression => |i| { - try self.resolveCopyOnWrite(allocator); - self.current_row.cfa.register = undefined; - self.current_row.cfa.rule = .{ - .expression = i.block, - }; - }, - .expression => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .expression = i.block, - }; - }, - .val_offset => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .val_offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor, - }; - }, - .val_offset_sf => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .val_offset = i.offset * cie.data_alignment_factor, - }; - }, - .val_expression => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .val_expression = i.block, - }; - }, - } - - return prev_row; - } -}; diff --git a/lib/std/debug/Dwarf/expression.zig b/lib/std/debug/Dwarf/expression.zig index 6243ea971780..7a3a4ed7403f 100644 --- a/lib/std/debug/Dwarf/expression.zig +++ b/lib/std/debug/Dwarf/expression.zig @@ -1,11 +1,13 @@ -const std = @import("std"); const builtin = @import("builtin"); +const native_arch = builtin.cpu.arch; +const native_endian = native_arch.endian(); + +const std = @import("std"); const leb = std.leb; const OP = std.dwarf.OP; const abi = std.debug.Dwarf.abi; const mem = std.mem; const assert = std.debug.assert; -const native_endian = builtin.cpu.arch.endian(); /// Expressions can be evaluated in different contexts, each requiring its own set of inputs. /// Callers should specify all the fields relevant to their context. If a field is required @@ -14,7 +16,7 @@ pub const Context = struct { /// The dwarf format of the section this expression is in format: std.dwarf.Format = .@"32", /// If specified, any addresses will pass through before being accessed - memory_accessor: ?*std.debug.StackIterator.MemoryAccessor = null, + memory_accessor: ?*std.debug.MemoryAccessor = null, /// The compilation unit this expression relates to, if any compile_unit: ?*const std.debug.Dwarf.CompileUnit = null, /// When evaluating a user-presented expression, this is the address of the object being evaluated @@ -34,7 +36,7 @@ pub const Options = struct { /// The address size of the target architecture addr_size: u8 = @sizeOf(usize), /// Endianness of the target architecture - endian: std.builtin.Endian = builtin.target.cpu.arch.endian(), + endian: std.builtin.Endian = native_endian, /// Restrict the stack machine to a subset of opcodes used in call frame instructions call_frame_context: bool = false, }; @@ -60,7 +62,7 @@ pub const Error = error{ InvalidTypeLength, TruncatedIntegralType, -} || abi.AbiError || error{ EndOfStream, Overflow, OutOfMemory, DivisionByZero }; +} || abi.RegBytesError || error{ EndOfStream, Overflow, OutOfMemory, DivisionByZero }; /// A stack machine that can decode and run DWARF expressions. /// Expressions can be decoded for non-native address size and endianness, @@ -304,7 +306,7 @@ pub fn StackMachine(comptime options: Options) type { allocator: std.mem.Allocator, context: Context, ) Error!bool { - if (@sizeOf(usize) != @sizeOf(addr_type) or options.endian != comptime builtin.target.cpu.arch.endian()) + if (@sizeOf(usize) != @sizeOf(addr_type) or options.endian != native_endian) @compileError("Execution of non-native address sizes / endianness is not supported"); const opcode = try stream.reader().readByte(); @@ -1186,13 +1188,13 @@ test "DWARF expressions" { // TODO: Test fbreg (once implemented): mock a DIE and point compile_unit.frame_base at it mem.writeInt(usize, reg_bytes[0..@sizeOf(usize)], 0xee, native_endian); - (try abi.regValueNative(usize, &thread_context, abi.fpRegNum(reg_context), reg_context)).* = 1; - (try abi.regValueNative(usize, &thread_context, abi.spRegNum(reg_context), reg_context)).* = 2; - (try abi.regValueNative(usize, &thread_context, abi.ipRegNum(), reg_context)).* = 3; + (try abi.regValueNative(&thread_context, abi.fpRegNum(native_arch, reg_context), reg_context)).* = 1; + (try abi.regValueNative(&thread_context, abi.spRegNum(native_arch, reg_context), reg_context)).* = 2; + (try abi.regValueNative(&thread_context, abi.ipRegNum(native_arch), reg_context)).* = 3; - try b.writeBreg(writer, abi.fpRegNum(reg_context), @as(usize, 100)); - try b.writeBreg(writer, abi.spRegNum(reg_context), @as(usize, 200)); - try b.writeBregx(writer, abi.ipRegNum(), @as(usize, 300)); + try b.writeBreg(writer, abi.fpRegNum(native_arch, reg_context), @as(usize, 100)); + try b.writeBreg(writer, abi.spRegNum(native_arch, reg_context), @as(usize, 200)); + try b.writeBregx(writer, abi.ipRegNum(native_arch), @as(usize, 300)); try b.writeRegvalType(writer, @as(u8, 0), @as(usize, 400)); _ = try stack_machine.run(program.items, allocator, context, 0); diff --git a/lib/std/debug/MemoryAccessor.zig b/lib/std/debug/MemoryAccessor.zig new file mode 100644 index 000000000000..bfdda609f65a --- /dev/null +++ b/lib/std/debug/MemoryAccessor.zig @@ -0,0 +1,128 @@ +//! Reads memory from any address of the current location using OS-specific +//! syscalls, bypassing memory page protection. Useful for stack unwinding. + +const builtin = @import("builtin"); +const native_os = builtin.os.tag; + +const std = @import("../std.zig"); +const posix = std.posix; +const File = std.fs.File; +const page_size = std.mem.page_size; + +const MemoryAccessor = @This(); + +var cached_pid: posix.pid_t = -1; + +mem: switch (native_os) { + .linux => File, + else => void, +}, + +pub const init: MemoryAccessor = .{ + .mem = switch (native_os) { + .linux => .{ .handle = -1 }, + else => {}, + }, +}; + +fn read(ma: *MemoryAccessor, address: usize, buf: []u8) bool { + switch (native_os) { + .linux => while (true) switch (ma.mem.handle) { + -2 => break, + -1 => { + const linux = std.os.linux; + const pid = switch (@atomicLoad(posix.pid_t, &cached_pid, .monotonic)) { + -1 => pid: { + const pid = linux.getpid(); + @atomicStore(posix.pid_t, &cached_pid, pid, .monotonic); + break :pid pid; + }, + else => |pid| pid, + }; + const bytes_read = linux.process_vm_readv( + pid, + &.{.{ .base = buf.ptr, .len = buf.len }}, + &.{.{ .base = @ptrFromInt(address), .len = buf.len }}, + 0, + ); + switch (linux.E.init(bytes_read)) { + .SUCCESS => return bytes_read == buf.len, + .FAULT => return false, + .INVAL, .PERM, .SRCH => unreachable, // own pid is always valid + .NOMEM => {}, + .NOSYS => {}, // QEMU is known not to implement this syscall. + else => unreachable, // unexpected + } + var path_buf: [ + std.fmt.count("/proc/{d}/mem", .{std.math.minInt(posix.pid_t)}) + ]u8 = undefined; + const path = std.fmt.bufPrint(&path_buf, "/proc/{d}/mem", .{pid}) catch + unreachable; + ma.mem = std.fs.openFileAbsolute(path, .{}) catch { + ma.mem.handle = -2; + break; + }; + }, + else => return (ma.mem.pread(buf, address) catch return false) == buf.len, + }, + else => {}, + } + if (!isValidMemory(address)) return false; + @memcpy(buf, @as([*]const u8, @ptrFromInt(address))); + return true; +} + +pub fn load(ma: *MemoryAccessor, comptime Type: type, address: usize) ?Type { + var result: Type = undefined; + return if (ma.read(address, std.mem.asBytes(&result))) result else null; +} + +pub fn isValidMemory(address: usize) bool { + // We are unable to determine validity of memory for freestanding targets + if (native_os == .freestanding or native_os == .uefi) return true; + + const aligned_address = address & ~@as(usize, @intCast((page_size - 1))); + if (aligned_address == 0) return false; + const aligned_memory = @as([*]align(page_size) u8, @ptrFromInt(aligned_address))[0..page_size]; + + if (native_os == .windows) { + const windows = std.os.windows; + + var memory_info: windows.MEMORY_BASIC_INFORMATION = undefined; + + // The only error this function can throw is ERROR_INVALID_PARAMETER. + // supply an address that invalid i'll be thrown. + const rc = windows.VirtualQuery(aligned_memory, &memory_info, aligned_memory.len) catch { + return false; + }; + + // Result code has to be bigger than zero (number of bytes written) + if (rc == 0) { + return false; + } + + // Free pages cannot be read, they are unmapped + if (memory_info.State == windows.MEM_FREE) { + return false; + } + + return true; + } else if (have_msync) { + posix.msync(aligned_memory, posix.MSF.ASYNC) catch |err| { + switch (err) { + error.UnmappedMemory => return false, + else => unreachable, + } + }; + + return true; + } else { + // We are unable to determine validity of memory on this target. + return true; + } +} + +const have_msync = switch (native_os) { + .wasi, .emscripten, .windows => false, + else => true, +}; diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 58fe4b23b263..80a8cb4cd991 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -22,6 +22,9 @@ const Pdb = std.debug.Pdb; const File = std.fs.File; const math = std.math; const testing = std.testing; +const StackIterator = std.debug.StackIterator; +const regBytes = Dwarf.abi.regBytes; +const regValueNative = Dwarf.abi.regValueNative; const SelfInfo = @This(); @@ -1369,3 +1372,1033 @@ fn getSymbolFromDwarf(allocator: Allocator, address: u64, di: *Dwarf) !SymbolInf else => return err, } } + +/// Unwind a frame using MachO compact unwind info (from __unwind_info). +/// If the compact encoding can't encode a way to unwind a frame, it will +/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. +pub fn unwindFrameMachO( + context: *UnwindContext, + ma: *std.debug.MemoryAccessor, + unwind_info: []const u8, + eh_frame: ?[]const u8, + module_base_address: usize, +) !usize { + const header = std.mem.bytesAsValue( + macho.unwind_info_section_header, + unwind_info[0..@sizeOf(macho.unwind_info_section_header)], + ); + const indices = std.mem.bytesAsSlice( + macho.unwind_info_section_header_index_entry, + unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry)], + ); + if (indices.len == 0) return error.MissingUnwindInfo; + + const mapped_pc = context.pc - module_base_address; + const second_level_index = blk: { + var left: usize = 0; + var len: usize = indices.len; + + while (len > 1) { + const mid = left + len / 2; + const offset = indices[mid].functionOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + // Last index is a sentinel containing the highest address as its functionOffset + if (indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo; + break :blk &indices[left]; + }; + + const common_encodings = std.mem.bytesAsSlice( + macho.compact_unwind_encoding_t, + unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t)], + ); + + const start_offset = second_level_index.secondLevelPagesSectionOffset; + const kind = std.mem.bytesAsValue( + macho.UNWIND_SECOND_LEVEL, + unwind_info[start_offset..][0..@sizeOf(macho.UNWIND_SECOND_LEVEL)], + ); + + const entry: struct { + function_offset: usize, + raw_encoding: u32, + } = switch (kind.*) { + .REGULAR => blk: { + const page_header = std.mem.bytesAsValue( + macho.unwind_info_regular_second_level_page_header, + unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_regular_second_level_page_header)], + ); + + const entries = std.mem.bytesAsSlice( + macho.unwind_info_regular_second_level_entry, + unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry)], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + const offset = entries[mid].functionOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + break :blk .{ + .function_offset = entries[left].functionOffset, + .raw_encoding = entries[left].encoding, + }; + }, + .COMPRESSED => blk: { + const page_header = std.mem.bytesAsValue( + macho.unwind_info_compressed_second_level_page_header, + unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_compressed_second_level_page_header)], + ); + + const entries = std.mem.bytesAsSlice( + macho.UnwindInfoCompressedEntry, + unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry)], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + const offset = second_level_index.functionOffset + entries[mid].funcOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + const entry = entries[left]; + const function_offset = second_level_index.functionOffset + entry.funcOffset; + if (entry.encodingIndex < header.commonEncodingsArrayCount) { + if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo; + break :blk .{ + .function_offset = function_offset, + .raw_encoding = common_encodings[entry.encodingIndex], + }; + } else { + const local_index = try math.sub( + u8, + entry.encodingIndex, + math.cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo, + ); + const local_encodings = std.mem.bytesAsSlice( + macho.compact_unwind_encoding_t, + unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t)], + ); + if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; + break :blk .{ + .function_offset = function_offset, + .raw_encoding = local_encodings[local_index], + }; + } + }, + else => return error.InvalidUnwindInfo, + }; + + if (entry.raw_encoding == 0) return error.NoUnwindInfo; + const reg_context = Dwarf.abi.RegisterContext{ + .eh_frame = false, + .is_macho = true, + }; + + const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); + const new_ip = switch (builtin.cpu.arch) { + .x86_64 => switch (encoding.mode.x86_64) { + .OLD => return error.UnimplementedUnwindEncoding, + .RBP_FRAME => blk: { + const regs: [5]u3 = .{ + encoding.value.x86_64.frame.reg0, + encoding.value.x86_64.frame.reg1, + encoding.value.x86_64.frame.reg2, + encoding.value.x86_64.frame.reg3, + encoding.value.x86_64.frame.reg4, + }; + + const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize); + var max_reg: usize = 0; + inline for (regs, 0..) |reg, i| { + if (reg > 0) max_reg = i; + } + + const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 2 * @sizeOf(usize); + + // Verify the stack range we're about to read register values from + if (ma.load(usize, new_sp) == null or ma.load(usize, fp - frame_offset + max_reg * @sizeOf(usize)) == null) return error.InvalidUnwindInfo; + + const ip_ptr = fp + @sizeOf(usize); + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + for (regs, 0..) |reg, i| { + if (reg == 0) continue; + const addr = fp - frame_offset + i * @sizeOf(usize); + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); + (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; + } + + break :blk new_ip; + }, + .STACK_IMMD, + .STACK_IND, + => blk: { + const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const stack_size = if (encoding.mode.x86_64 == .STACK_IMMD) + @as(usize, encoding.value.x86_64.frameless.stack.direct.stack_size) * @sizeOf(usize) + else stack_size: { + // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. + const sub_offset_addr = + module_base_address + + entry.function_offset + + encoding.value.x86_64.frameless.stack.indirect.sub_offset; + if (ma.load(usize, sub_offset_addr) == null) return error.InvalidUnwindInfo; + + // `sub_offset_addr` points to the offset of the literal within the instruction + const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; + break :stack_size sub_operand + @sizeOf(usize) * @as(usize, encoding.value.x86_64.frameless.stack.indirect.stack_adjust); + }; + + // Decode the Lehmer-coded sequence of registers. + // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h + + // Decode the variable-based permutation number into its digits. Each digit represents + // an index into the list of register numbers that weren't yet used in the sequence at + // the time the digit was added. + const reg_count = encoding.value.x86_64.frameless.stack_reg_count; + const ip_ptr = if (reg_count > 0) reg_blk: { + var digits: [6]u3 = undefined; + var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation; + var base: usize = 2; + for (0..reg_count) |i| { + const div = accumulator / base; + digits[digits.len - 1 - i] = @intCast(accumulator - base * div); + accumulator = div; + base += 1; + } + + const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 }; + var registers: [reg_numbers.len]u3 = undefined; + var used_indices = [_]bool{false} ** reg_numbers.len; + for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { + var unused_count: u8 = 0; + const unused_index = for (used_indices, 0..) |used, index| { + if (!used) { + if (target_unused_index == unused_count) break index; + unused_count += 1; + } + } else unreachable; + + registers[i] = reg_numbers[unused_index]; + used_indices[unused_index] = true; + } + + var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); + if (ma.load(usize, reg_addr) == null) return error.InvalidUnwindInfo; + for (0..reg_count) |i| { + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); + (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + + break :reg_blk reg_addr; + } else sp + stack_size - @sizeOf(usize); + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_sp = ip_ptr + @sizeOf(usize); + if (ma.load(usize, new_sp) == null) return error.InvalidUnwindInfo; + + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + break :blk new_ip; + }, + .DWARF => { + return unwindFrameMachODwarf(context, ma, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.x86_64.dwarf)); + }, + }, + .aarch64 => switch (encoding.mode.arm64) { + .OLD => return error.UnimplementedUnwindEncoding, + .FRAMELESS => blk: { + const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; + const new_ip = (try regValueNative(context.thread_context, 30, reg_context)).*; + if (ma.load(usize, new_sp) == null) return error.InvalidUnwindInfo; + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + break :blk new_ip; + }, + .DWARF => { + return unwindFrameMachODwarf(context, ma, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.arm64.dwarf)); + }, + .FRAME => blk: { + const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 16; + const ip_ptr = fp + @sizeOf(usize); + + const num_restored_pairs: usize = + @popCount(@as(u5, @bitCast(encoding.value.arm64.frame.x_reg_pairs))) + + @popCount(@as(u4, @bitCast(encoding.value.arm64.frame.d_reg_pairs))); + const min_reg_addr = fp - num_restored_pairs * 2 * @sizeOf(usize); + + if (ma.load(usize, new_sp) == null or ma.load(usize, min_reg_addr) == null) return error.InvalidUnwindInfo; + + var reg_addr = fp - @sizeOf(usize); + inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.x_reg_pairs)).Struct.fields, 0..) |field, i| { + if (@field(encoding.value.arm64.frame.x_reg_pairs, field.name) != 0) { + (try regValueNative(context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + (try regValueNative(context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + } + + inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.d_reg_pairs)).Struct.fields, 0..) |field, i| { + if (@field(encoding.value.arm64.frame.d_reg_pairs, field.name) != 0) { + // Only the lower half of the 128-bit V registers are restored during unwinding + @memcpy( + try regBytes(context.thread_context, 64 + 8 + i, context.reg_context), + std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), + ); + reg_addr += @sizeOf(usize); + @memcpy( + try regBytes(context.thread_context, 64 + 9 + i, context.reg_context), + std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), + ); + reg_addr += @sizeOf(usize); + } + } + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + break :blk new_ip; + }, + }, + else => return error.UnimplementedArch, + }; + + context.pc = stripInstructionPtrAuthCode(new_ip); + if (context.pc > 0) context.pc -= 1; + return new_ip; +} + +pub const UnwindContext = struct { + allocator: Allocator, + cfa: ?usize, + pc: usize, + thread_context: *std.debug.ThreadContext, + reg_context: Dwarf.abi.RegisterContext, + vm: VirtualMachine, + stack_machine: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), + + pub fn init( + allocator: Allocator, + thread_context: *std.debug.ThreadContext, + ) !UnwindContext { + const pc = stripInstructionPtrAuthCode( + (try regValueNative(thread_context, ip_reg_num, null)).*, + ); + + const context_copy = try allocator.create(std.debug.ThreadContext); + std.debug.copyContext(thread_context, context_copy); + + return .{ + .allocator = allocator, + .cfa = null, + .pc = pc, + .thread_context = context_copy, + .reg_context = undefined, + .vm = .{}, + .stack_machine = .{}, + }; + } + + pub fn deinit(self: *UnwindContext) void { + self.vm.deinit(self.allocator); + self.stack_machine.deinit(self.allocator); + self.allocator.destroy(self.thread_context); + self.* = undefined; + } + + pub fn getFp(self: *const UnwindContext) !usize { + return (try regValueNative(self.thread_context, fpRegNum(self.reg_context), self.reg_context)).*; + } +}; + +/// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature. +/// This function clears these signature bits to make the pointer usable. +pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { + if (native_arch == .aarch64) { + // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) + // The save / restore is because `xpaclri` operates on x30 (LR) + return asm ( + \\mov x16, x30 + \\mov x30, x15 + \\hint 0x07 + \\mov x15, x30 + \\mov x30, x16 + : [ret] "={x15}" (-> usize), + : [ptr] "{x15}" (ptr), + : "x16" + ); + } + + return ptr; +} + +/// Unwind a stack frame using DWARF unwinding info, updating the register context. +/// +/// If `.eh_frame_hdr` is available, it will be used to binary search for the FDE. +/// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. +/// +/// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info +/// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. +pub fn unwindFrameDwarf( + di: *const Dwarf, + context: *UnwindContext, + ma: *std.debug.MemoryAccessor, + explicit_fde_offset: ?usize, +) !usize { + if (!supports_unwinding) return error.UnsupportedCpuArchitecture; + if (context.pc == 0) return 0; + + // Find the FDE and CIE + var cie: Dwarf.CommonInformationEntry = undefined; + var fde: Dwarf.FrameDescriptionEntry = undefined; + + if (explicit_fde_offset) |fde_offset| { + const dwarf_section: Dwarf.Section.Id = .eh_frame; + const frame_section = di.section(dwarf_section) orelse return error.MissingFDE; + if (fde_offset >= frame_section.len) return error.MissingFDE; + + var fbr: std.debug.DeprecatedFixedBufferReader = .{ + .buf = frame_section, + .pos = fde_offset, + .endian = di.endian, + }; + + const fde_entry_header = try Dwarf.EntryHeader.read(&fbr, null, dwarf_section); + if (fde_entry_header.type != .fde) return error.MissingFDE; + + const cie_offset = fde_entry_header.type.fde; + try fbr.seekTo(cie_offset); + + fbr.endian = native_endian; + const cie_entry_header = try Dwarf.EntryHeader.read(&fbr, null, dwarf_section); + if (cie_entry_header.type != .cie) return Dwarf.bad(); + + cie = try Dwarf.CommonInformationEntry.parse( + cie_entry_header.entry_bytes, + 0, + true, + cie_entry_header.format, + dwarf_section, + cie_entry_header.length_offset, + @sizeOf(usize), + native_endian, + ); + + fde = try Dwarf.FrameDescriptionEntry.parse( + fde_entry_header.entry_bytes, + 0, + true, + cie, + @sizeOf(usize), + native_endian, + ); + } else if (di.eh_frame_hdr) |header| { + const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null; + try header.findEntry( + ma, + eh_frame_len, + @intFromPtr(di.section(.eh_frame_hdr).?.ptr), + context.pc, + &cie, + &fde, + ); + } else { + const index = std.sort.binarySearch(Dwarf.FrameDescriptionEntry, context.pc, di.fde_list.items, {}, struct { + pub fn compareFn(_: void, pc: usize, mid_item: Dwarf.FrameDescriptionEntry) std.math.Order { + if (pc < mid_item.pc_begin) return .lt; + + const range_end = mid_item.pc_begin + mid_item.pc_range; + if (pc < range_end) return .eq; + + return .gt; + } + }.compareFn); + + fde = if (index) |i| di.fde_list.items[i] else return error.MissingFDE; + cie = di.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE; + } + + var expression_context: Dwarf.expression.Context = .{ + .format = cie.format, + .memory_accessor = ma, + .compile_unit = di.findCompileUnit(fde.pc_begin) catch null, + .thread_context = context.thread_context, + .reg_context = context.reg_context, + .cfa = context.cfa, + }; + + context.vm.reset(); + context.reg_context.eh_frame = cie.version != 4; + context.reg_context.is_macho = di.is_macho; + + const row = try context.vm.runToNative(context.allocator, context.pc, cie, fde); + context.cfa = switch (row.cfa.rule) { + .val_offset => |offset| blk: { + const register = row.cfa.register orelse return error.InvalidCFARule; + const value = mem.readInt(usize, (try regBytes(context.thread_context, register, context.reg_context))[0..@sizeOf(usize)], native_endian); + break :blk try applyOffset(value, offset); + }, + .expression => |expr| blk: { + context.stack_machine.reset(); + const value = try context.stack_machine.run( + expr, + context.allocator, + expression_context, + context.cfa, + ); + + if (value) |v| { + if (v != .generic) return error.InvalidExpressionValue; + break :blk v.generic; + } else return error.NoExpressionValue; + }, + else => return error.InvalidCFARule, + }; + + if (ma.load(usize, context.cfa.?) == null) return error.InvalidCFA; + expression_context.cfa = context.cfa; + + // Buffering the modifications is done because copying the thread context is not portable, + // some implementations (ie. darwin) use internal pointers to the mcontext. + var arena = std.heap.ArenaAllocator.init(context.allocator); + defer arena.deinit(); + const update_allocator = arena.allocator(); + + const RegisterUpdate = struct { + // Backed by thread_context + dest: []u8, + // Backed by arena + src: []const u8, + prev: ?*@This(), + }; + + var update_tail: ?*RegisterUpdate = null; + var has_return_address = true; + for (context.vm.rowColumns(row)) |column| { + if (column.register) |register| { + if (register == cie.return_address_register) { + has_return_address = column.rule != .undefined; + } + + const dest = try regBytes(context.thread_context, register, context.reg_context); + const src = try update_allocator.alloc(u8, dest.len); + + const prev = update_tail; + update_tail = try update_allocator.create(RegisterUpdate); + update_tail.?.* = .{ + .dest = dest, + .src = src, + .prev = prev, + }; + + try column.resolveValue( + context, + expression_context, + ma, + src, + ); + } + } + + // On all implemented architectures, the CFA is defined as being the previous frame's SP + (try regValueNative(context.thread_context, spRegNum(context.reg_context), context.reg_context)).* = context.cfa.?; + + while (update_tail) |tail| { + @memcpy(tail.dest, tail.src); + update_tail = tail.prev; + } + + if (has_return_address) { + context.pc = stripInstructionPtrAuthCode(mem.readInt(usize, (try regBytes( + context.thread_context, + cie.return_address_register, + context.reg_context, + ))[0..@sizeOf(usize)], native_endian)); + } else { + context.pc = 0; + } + + (try regValueNative(context.thread_context, ip_reg_num, context.reg_context)).* = context.pc; + + // The call instruction will have pushed the address of the instruction that follows the call as the return address. + // This next instruction may be past the end of the function if the caller was `noreturn` (ie. the last instruction in + // the function was the call). If we were to look up an FDE entry using the return address directly, it could end up + // either not finding an FDE at all, or using the next FDE in the program, producing incorrect results. To prevent this, + // we subtract one so that the next lookup is guaranteed to land inside the + // + // The exception to this rule is signal frames, where we return execution would be returned to the instruction + // that triggered the handler. + const return_address = context.pc; + if (context.pc > 0 and !cie.isSignalFrame()) context.pc -= 1; + + return return_address; +} + +fn fpRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { + return Dwarf.abi.fpRegNum(native_arch, reg_context); +} + +fn spRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { + return Dwarf.abi.spRegNum(native_arch, reg_context); +} + +const ip_reg_num = Dwarf.abi.ipRegNum(native_arch); +const supports_unwinding = Dwarf.abi.supportsUnwinding(builtin.target); + +fn unwindFrameMachODwarf( + context: *UnwindContext, + ma: *std.debug.MemoryAccessor, + eh_frame: []const u8, + fde_offset: usize, +) !usize { + var di: Dwarf = .{ + .endian = native_endian, + .is_macho = true, + }; + defer di.deinit(context.allocator); + + di.sections[@intFromEnum(Dwarf.Section.Id.eh_frame)] = .{ + .data = eh_frame, + .owned = false, + }; + + return unwindFrameDwarf(&di, context, ma, fde_offset); +} + +/// This is a virtual machine that runs DWARF call frame instructions. +pub const VirtualMachine = struct { + /// See section 6.4.1 of the DWARF5 specification for details on each + const RegisterRule = union(enum) { + // The spec says that the default rule for each column is the undefined rule. + // However, it also allows ABI / compiler authors to specify alternate defaults, so + // there is a distinction made here. + default: void, + undefined: void, + same_value: void, + // offset(N) + offset: i64, + // val_offset(N) + val_offset: i64, + // register(R) + register: u8, + // expression(E) + expression: []const u8, + // val_expression(E) + val_expression: []const u8, + // Augmenter-defined rule + architectural: void, + }; + + /// Each row contains unwinding rules for a set of registers. + pub const Row = struct { + /// Offset from `FrameDescriptionEntry.pc_begin` + offset: u64 = 0, + /// Special-case column that defines the CFA (Canonical Frame Address) rule. + /// The register field of this column defines the register that CFA is derived from. + cfa: Column = .{}, + /// The register fields in these columns define the register the rule applies to. + columns: ColumnRange = .{}, + /// Indicates that the next write to any column in this row needs to copy + /// the backing column storage first, as it may be referenced by previous rows. + copy_on_write: bool = false, + }; + + pub const Column = struct { + register: ?u8 = null, + rule: RegisterRule = .{ .default = {} }, + + /// Resolves the register rule and places the result into `out` (see regBytes) + pub fn resolveValue( + self: Column, + context: *SelfInfo.UnwindContext, + expression_context: std.debug.Dwarf.expression.Context, + ma: *std.debug.MemoryAccessor, + out: []u8, + ) !void { + switch (self.rule) { + .default => { + const register = self.register orelse return error.InvalidRegister; + try getRegDefaultValue(register, context, out); + }, + .undefined => { + @memset(out, undefined); + }, + .same_value => { + // TODO: This copy could be eliminated if callers always copy the state then call this function to update it + const register = self.register orelse return error.InvalidRegister; + const src = try regBytes(context.thread_context, register, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, src); + }, + .offset => |offset| { + if (context.cfa) |cfa| { + const addr = try applyOffset(cfa, offset); + if (ma.load(usize, addr) == null) return error.InvalidAddress; + const ptr: *const usize = @ptrFromInt(addr); + mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); + } else return error.InvalidCFA; + }, + .val_offset => |offset| { + if (context.cfa) |cfa| { + mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian); + } else return error.InvalidCFA; + }, + .register => |register| { + const src = try regBytes(context.thread_context, register, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, try regBytes(context.thread_context, register, context.reg_context)); + }, + .expression => |expression| { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); + const addr = if (value) |v| blk: { + if (v != .generic) return error.InvalidExpressionValue; + break :blk v.generic; + } else return error.NoExpressionValue; + + if (ma.load(usize, addr) == null) return error.InvalidExpressionAddress; + const ptr: *usize = @ptrFromInt(addr); + mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); + }, + .val_expression => |expression| { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); + if (value) |v| { + if (v != .generic) return error.InvalidExpressionValue; + mem.writeInt(usize, out[0..@sizeOf(usize)], v.generic, native_endian); + } else return error.NoExpressionValue; + }, + .architectural => return error.UnimplementedRegisterRule, + } + } + }; + + const ColumnRange = struct { + /// Index into `columns` of the first column in this row. + start: usize = undefined, + len: u8 = 0, + }; + + columns: std.ArrayListUnmanaged(Column) = .{}, + stack: std.ArrayListUnmanaged(ColumnRange) = .{}, + current_row: Row = .{}, + + /// The result of executing the CIE's initial_instructions + cie_row: ?Row = null, + + pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void { + self.stack.deinit(allocator); + self.columns.deinit(allocator); + self.* = undefined; + } + + pub fn reset(self: *VirtualMachine) void { + self.stack.clearRetainingCapacity(); + self.columns.clearRetainingCapacity(); + self.current_row = .{}; + self.cie_row = null; + } + + /// Return a slice backed by the row's non-CFA columns + pub fn rowColumns(self: VirtualMachine, row: Row) []Column { + if (row.columns.len == 0) return &.{}; + return self.columns.items[row.columns.start..][0..row.columns.len]; + } + + /// Either retrieves or adds a column for `register` (non-CFA) in the current row. + fn getOrAddColumn(self: *VirtualMachine, allocator: std.mem.Allocator, register: u8) !*Column { + for (self.rowColumns(self.current_row)) |*c| { + if (c.register == register) return c; + } + + if (self.current_row.columns.len == 0) { + self.current_row.columns.start = self.columns.items.len; + } + self.current_row.columns.len += 1; + + const column = try self.columns.addOne(allocator); + column.* = .{ + .register = register, + }; + + return column; + } + + /// Runs the CIE instructions, then the FDE instructions. Execution halts + /// once the row that corresponds to `pc` is known, and the row is returned. + pub fn runTo( + self: *VirtualMachine, + allocator: std.mem.Allocator, + pc: u64, + cie: std.debug.Dwarf.CommonInformationEntry, + fde: std.debug.Dwarf.FrameDescriptionEntry, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !Row { + assert(self.cie_row == null); + if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return error.AddressOutOfRange; + + var prev_row: Row = self.current_row; + + var cie_stream = std.io.fixedBufferStream(cie.initial_instructions); + var fde_stream = std.io.fixedBufferStream(fde.instructions); + var streams = [_]*std.io.FixedBufferStream([]const u8){ + &cie_stream, + &fde_stream, + }; + + for (&streams, 0..) |stream, i| { + while (stream.pos < stream.buffer.len) { + const instruction = try std.debug.Dwarf.call_frame.Instruction.read(stream, addr_size_bytes, endian); + prev_row = try self.step(allocator, cie, i == 0, instruction); + if (pc < fde.pc_begin + self.current_row.offset) return prev_row; + } + } + + return self.current_row; + } + + pub fn runToNative( + self: *VirtualMachine, + allocator: std.mem.Allocator, + pc: u64, + cie: std.debug.Dwarf.CommonInformationEntry, + fde: std.debug.Dwarf.FrameDescriptionEntry, + ) !Row { + return self.runTo(allocator, pc, cie, fde, @sizeOf(usize), native_endian); + } + + fn resolveCopyOnWrite(self: *VirtualMachine, allocator: std.mem.Allocator) !void { + if (!self.current_row.copy_on_write) return; + + const new_start = self.columns.items.len; + if (self.current_row.columns.len > 0) { + try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len); + self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); + self.current_row.columns.start = new_start; + } + } + + /// Executes a single instruction. + /// If this instruction is from the CIE, `is_initial` should be set. + /// Returns the value of `current_row` before executing this instruction. + pub fn step( + self: *VirtualMachine, + allocator: std.mem.Allocator, + cie: std.debug.Dwarf.CommonInformationEntry, + is_initial: bool, + instruction: Dwarf.call_frame.Instruction, + ) !Row { + // CIE instructions must be run before FDE instructions + assert(!is_initial or self.cie_row == null); + if (!is_initial and self.cie_row == null) { + self.cie_row = self.current_row; + self.current_row.copy_on_write = true; + } + + const prev_row = self.current_row; + switch (instruction) { + .set_loc => |i| { + if (i.address <= self.current_row.offset) return error.InvalidOperation; + // TODO: Check cie.segment_selector_size != 0 for DWARFV4 + self.current_row.offset = i.address; + }, + inline .advance_loc, + .advance_loc1, + .advance_loc2, + .advance_loc4, + => |i| { + self.current_row.offset += i.delta * cie.code_alignment_factor; + self.current_row.copy_on_write = true; + }, + inline .offset, + .offset_extended, + .offset_extended_sf, + => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor }; + }, + inline .restore, + .restore_extended, + => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.cie_row) |cie_row| { + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = for (self.rowColumns(cie_row)) |cie_column| { + if (cie_column.register == i.register) break cie_column.rule; + } else .{ .default = {} }; + } else return error.InvalidOperation; + }, + .nop => {}, + .undefined => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .undefined = {} }; + }, + .same_value => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .same_value = {} }; + }, + .register => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .register = i.target_register }; + }, + .remember_state => { + try self.stack.append(allocator, self.current_row.columns); + self.current_row.copy_on_write = true; + }, + .restore_state => { + const restored_columns = self.stack.popOrNull() orelse return error.InvalidOperation; + self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len); + try self.columns.ensureUnusedCapacity(allocator, restored_columns.len); + + self.current_row.columns.start = self.columns.items.len; + self.current_row.columns.len = restored_columns.len; + self.columns.appendSliceAssumeCapacity(self.columns.items[restored_columns.start..][0..restored_columns.len]); + }, + .def_cfa => |i| { + try self.resolveCopyOnWrite(allocator); + self.current_row.cfa = .{ + .register = i.register, + .rule = .{ .val_offset = @intCast(i.offset) }, + }; + }, + .def_cfa_sf => |i| { + try self.resolveCopyOnWrite(allocator); + self.current_row.cfa = .{ + .register = i.register, + .rule = .{ .val_offset = i.offset * cie.data_alignment_factor }, + }; + }, + .def_cfa_register => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.register = i.register; + }, + .def_cfa_offset => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.rule = .{ + .val_offset = @intCast(i.offset), + }; + }, + .def_cfa_offset_sf => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.rule = .{ + .val_offset = i.offset * cie.data_alignment_factor, + }; + }, + .def_cfa_expression => |i| { + try self.resolveCopyOnWrite(allocator); + self.current_row.cfa.register = undefined; + self.current_row.cfa.rule = .{ + .expression = i.block, + }; + }, + .expression => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .expression = i.block, + }; + }, + .val_offset => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .val_offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor, + }; + }, + .val_offset_sf => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .val_offset = i.offset * cie.data_alignment_factor, + }; + }, + .val_expression => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .val_expression = i.block, + }; + }, + } + + return prev_row; + } +}; + +/// Returns the ABI-defined default value this register has in the unwinding table +/// before running any of the CIE instructions. The DWARF spec defines these as having +/// the .undefined rule by default, but allows ABI authors to override that. +fn getRegDefaultValue(reg_number: u8, context: *UnwindContext, out: []u8) !void { + switch (builtin.cpu.arch) { + .aarch64 => { + // Callee-saved registers are initialized as if they had the .same_value rule + if (reg_number >= 19 and reg_number <= 28) { + const src = try regBytes(context.thread_context, reg_number, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, src); + return; + } + }, + else => {}, + } + + @memset(out, undefined); +} + +/// Since register rules are applied (usually) during a panic, +/// checked addition / subtraction is used so that we can return +/// an error and fall back to FP-based unwinding. +fn applyOffset(base: usize, offset: i64) !usize { + return if (offset >= 0) + try std.math.add(usize, base, @as(usize, @intCast(offset))) + else + try std.math.sub(usize, base, @as(usize, @intCast(-offset))); +} diff --git a/src/crash_report.zig b/src/crash_report.zig index 0f9e73a3cb1e..d4fe72a8e8a7 100644 --- a/src/crash_report.zig +++ b/src/crash_report.zig @@ -256,7 +256,7 @@ const StackContext = union(enum) { current: struct { ret_addr: ?usize, }, - exception: *const debug.ThreadContext, + exception: *debug.ThreadContext, not_supported: void, pub fn dumpStackTrace(ctx: @This()) void { From 975c185b92a7d470ea705b28f46a8004bdda3c60 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 12:00:08 -0700 Subject: [PATCH 5/6] fix compilation on powerpc GNU systems ...which have a ucontext_t but not a PC register. The current stack unwinding implementation does not yet support this architecture. Also fix name of `std.debug.SelfInfo.openSelf` to remove redundancy. Also removed this hook into root providing an "openSelfDebugInfo" function. Sorry, this debugging code is not of sufficient quality to offer a plugin API right now. --- lib/std/debug.zig | 22 +++++++++++++++------- lib/std/debug/Dwarf.zig | 24 ++++++++++++++++++++++++ lib/std/debug/Dwarf/abi.zig | 27 +++------------------------ lib/std/debug/Dwarf/expression.zig | 4 ++-- lib/std/debug/SelfInfo.zig | 13 ++++++------- test/standalone/coff_dwarf/main.zig | 2 +- 6 files changed, 51 insertions(+), 41 deletions(-) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 0f10bada7173..4d3437f665c6 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -88,7 +88,7 @@ pub fn getSelfDebugInfo() !*SelfInfo { if (self_debug_info) |*info| { return info; } else { - self_debug_info = try SelfInfo.openSelf(getDebugInfoAllocator()); + self_debug_info = try SelfInfo.open(getDebugInfoAllocator()); return &self_debug_info.?; } } @@ -573,17 +573,19 @@ pub const StackIterator = struct { pub fn initWithContext(first_address: ?usize, debug_info: *SelfInfo, context: *posix.ucontext_t) !StackIterator { // The implementation of DWARF unwinding on aarch64-macos is not complete. However, Apple mandates that // the frame pointer register is always used, so on this platform we can safely use the FP-based unwinder. - if (builtin.target.isDarwin() and native_arch == .aarch64) { + if (builtin.target.isDarwin() and native_arch == .aarch64) return init(first_address, context.mcontext.ss.fp); - } else { + + if (SelfInfo.supports_unwinding) { var iterator = init(first_address, null); iterator.unwind_state = .{ .debug_info = debug_info, .dwarf_context = try SelfInfo.UnwindContext.init(debug_info.allocator, context), }; - return iterator; } + + return init(first_address, null); } pub fn deinit(it: *StackIterator) void { @@ -725,11 +727,13 @@ pub fn writeCurrentStackTrace( tty_config: io.tty.Config, start_addr: ?usize, ) !void { - var context: ThreadContext = undefined; - const has_context = getContext(&context); if (native_os == .windows) { + var context: ThreadContext = undefined; + assert(getContext(&context)); return writeStackTraceWindows(out_stream, debug_info, tty_config, &context, start_addr); } + var context: ThreadContext = undefined; + const has_context = getContext(&context); var it = (if (has_context) blk: { break :blk StackIterator.initWithContext(start_addr, debug_info, &context) catch null; @@ -1340,7 +1344,7 @@ test "manage resources correctly" { } const writer = std.io.null_writer; - var di = try SelfInfo.openSelf(testing.allocator); + var di = try SelfInfo.open(testing.allocator); defer di.deinit(); try printSourceAtAddress(&di, writer, showMyTrace(), io.tty.detectConfig(std.io.getStdErr())); } @@ -1581,5 +1585,9 @@ pub inline fn inValgrind() bool { } test { + _ = &Dwarf; + _ = &MemoryAccessor; + _ = &Pdb; + _ = &SelfInfo; _ = &dumpHex; } diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 991c7315492c..e30e3d06ac6f 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -2023,3 +2023,27 @@ fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { return std.math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset))); } } + +pub fn supportsUnwinding(target: std.Target) bool { + return switch (target.cpu.arch) { + .x86 => switch (target.os.tag) { + .linux, .netbsd, .solaris, .illumos => true, + else => false, + }, + .x86_64 => switch (target.os.tag) { + .linux, .netbsd, .freebsd, .openbsd, .macos, .ios, .solaris, .illumos => true, + else => false, + }, + .arm => switch (target.os.tag) { + .linux => true, + else => false, + }, + .aarch64 => switch (target.os.tag) { + .linux, .netbsd, .freebsd, .macos, .ios => true, + else => false, + }, + // Unwinding is possible on other targets but this implementation does + // not support them...yet! + else => false, + }; +} diff --git a/lib/std/debug/Dwarf/abi.zig b/lib/std/debug/Dwarf/abi.zig index e87f023d72f6..f153a10ba472 100644 --- a/lib/std/debug/Dwarf/abi.zig +++ b/lib/std/debug/Dwarf/abi.zig @@ -5,35 +5,14 @@ const mem = std.mem; const posix = std.posix; const Arch = std.Target.Cpu.Arch; -pub fn supportsUnwinding(target: std.Target) bool { - return switch (target.cpu.arch) { - .x86 => switch (target.os.tag) { - .linux, .netbsd, .solaris, .illumos => true, - else => false, - }, - .x86_64 => switch (target.os.tag) { - .linux, .netbsd, .freebsd, .openbsd, .macos, .ios, .solaris, .illumos => true, - else => false, - }, - .arm => switch (target.os.tag) { - .linux => true, - else => false, - }, - .aarch64 => switch (target.os.tag) { - .linux, .netbsd, .freebsd, .macos, .ios => true, - else => false, - }, - else => false, - }; -} - -pub fn ipRegNum(arch: Arch) u8 { +/// Returns `null` for CPU architectures without an instruction pointer register. +pub fn ipRegNum(arch: Arch) ?u8 { return switch (arch) { .x86 => 8, .x86_64 => 16, .arm => 15, .aarch64 => 32, - else => unreachable, + else => null, }; } diff --git a/lib/std/debug/Dwarf/expression.zig b/lib/std/debug/Dwarf/expression.zig index 7a3a4ed7403f..5fab56de6e41 100644 --- a/lib/std/debug/Dwarf/expression.zig +++ b/lib/std/debug/Dwarf/expression.zig @@ -1190,11 +1190,11 @@ test "DWARF expressions" { mem.writeInt(usize, reg_bytes[0..@sizeOf(usize)], 0xee, native_endian); (try abi.regValueNative(&thread_context, abi.fpRegNum(native_arch, reg_context), reg_context)).* = 1; (try abi.regValueNative(&thread_context, abi.spRegNum(native_arch, reg_context), reg_context)).* = 2; - (try abi.regValueNative(&thread_context, abi.ipRegNum(native_arch), reg_context)).* = 3; + (try abi.regValueNative(&thread_context, abi.ipRegNum(native_arch).?, reg_context)).* = 3; try b.writeBreg(writer, abi.fpRegNum(native_arch, reg_context), @as(usize, 100)); try b.writeBreg(writer, abi.spRegNum(native_arch, reg_context), @as(usize, 200)); - try b.writeBregx(writer, abi.ipRegNum(native_arch), @as(usize, 300)); + try b.writeBregx(writer, abi.ipRegNum(native_arch).?, @as(usize, 300)); try b.writeRegvalType(writer, @as(u8, 0), @as(usize, 400)); _ = try stack_machine.run(program.items, allocator, context, 0); diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 80a8cb4cd991..b1679a224b9c 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -34,18 +34,15 @@ allocator: Allocator, address_map: std.AutoHashMap(usize, *Module), modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModule) else void, -pub const OpenSelfError = error{ +pub const OpenError = error{ MissingDebugInfo, UnsupportedOperatingSystem, } || @typeInfo(@typeInfo(@TypeOf(SelfInfo.init)).Fn.return_type.?).ErrorUnion.error_set; -pub fn openSelf(allocator: Allocator) OpenSelfError!SelfInfo { +pub fn open(allocator: Allocator) OpenError!SelfInfo { nosuspend { if (builtin.strip_debug_info) return error.MissingDebugInfo; - if (@hasDecl(root, "os") and @hasDecl(root.os, "debug") and @hasDecl(root.os.debug, "openSelfDebugInfo")) { - return root.os.debug.openSelfDebugInfo(allocator); - } switch (native_os) { .linux, .freebsd, @@ -1721,6 +1718,8 @@ pub const UnwindContext = struct { allocator: Allocator, thread_context: *std.debug.ThreadContext, ) !UnwindContext { + comptime assert(supports_unwinding); + const pc = stripInstructionPtrAuthCode( (try regValueNative(thread_context, ip_reg_num, null)).*, ); @@ -1982,8 +1981,8 @@ fn spRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { return Dwarf.abi.spRegNum(native_arch, reg_context); } -const ip_reg_num = Dwarf.abi.ipRegNum(native_arch); -const supports_unwinding = Dwarf.abi.supportsUnwinding(builtin.target); +const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; +pub const supports_unwinding = Dwarf.supportsUnwinding(builtin.target); fn unwindFrameMachODwarf( context: *UnwindContext, diff --git a/test/standalone/coff_dwarf/main.zig b/test/standalone/coff_dwarf/main.zig index 236aa1c5fa0b..1cf2587e58e5 100644 --- a/test/standalone/coff_dwarf/main.zig +++ b/test/standalone/coff_dwarf/main.zig @@ -9,7 +9,7 @@ pub fn main() !void { defer assert(gpa.deinit() == .ok); const allocator = gpa.allocator(); - var debug_info = try std.debug.openSelfDebugInfo(allocator); + var debug_info = try std.debug.SelfInfo.open(allocator); defer debug_info.deinit(); var add_addr: usize = undefined; From 6d606cc38b4df2b20af9d77367f8ab22bbbea092 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 12:35:49 -0700 Subject: [PATCH 6/6] reintroduce std.Dwarf.abi.supportsUnwinding There are two concepts here: one for whether dwarf supports unwinding on that target, and another for whether the Zig standard library implements it yet. --- lib/std/debug/Dwarf.zig | 24 ------------------------ lib/std/debug/Dwarf/abi.zig | 25 +++++++++++++++++++++++++ lib/std/debug/SelfInfo.zig | 37 ++++++++++++++++++++++++++++++++++++- 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index e30e3d06ac6f..991c7315492c 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -2023,27 +2023,3 @@ fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { return std.math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset))); } } - -pub fn supportsUnwinding(target: std.Target) bool { - return switch (target.cpu.arch) { - .x86 => switch (target.os.tag) { - .linux, .netbsd, .solaris, .illumos => true, - else => false, - }, - .x86_64 => switch (target.os.tag) { - .linux, .netbsd, .freebsd, .openbsd, .macos, .ios, .solaris, .illumos => true, - else => false, - }, - .arm => switch (target.os.tag) { - .linux => true, - else => false, - }, - .aarch64 => switch (target.os.tag) { - .linux, .netbsd, .freebsd, .macos, .ios => true, - else => false, - }, - // Unwinding is possible on other targets but this implementation does - // not support them...yet! - else => false, - }; -} diff --git a/lib/std/debug/Dwarf/abi.zig b/lib/std/debug/Dwarf/abi.zig index f153a10ba472..b2c4cb9536df 100644 --- a/lib/std/debug/Dwarf/abi.zig +++ b/lib/std/debug/Dwarf/abi.zig @@ -5,6 +5,31 @@ const mem = std.mem; const posix = std.posix; const Arch = std.Target.Cpu.Arch; +/// Tells whether unwinding for this target is supported by the Dwarf standard. +/// +/// See also `std.debug.SelfInfo.supportsUnwinding` which tells whether the Zig +/// standard library has a working implementation of unwinding for this target. +pub fn supportsUnwinding(target: std.Target) bool { + return switch (target.cpu.arch) { + .amdgcn, + .nvptx, + .nvptx64, + .spirv, + .spirv32, + .spirv64, + .spu_2, + => false, + + // Enabling this causes relocation errors such as: + // error: invalid relocation type R_RISCV_SUB32 at offset 0x20 + .riscv64, .riscv32 => false, + + // Conservative guess. Feel free to update this logic with any targets + // that are known to not support Dwarf unwinding. + else => true, + }; +} + /// Returns `null` for CPU architectures without an instruction pointer register. pub fn ipRegNum(arch: Arch) ?u8 { return switch (arch) { diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index b1679a224b9c..c27f466fb378 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -1982,7 +1982,42 @@ fn spRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { } const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; -pub const supports_unwinding = Dwarf.supportsUnwinding(builtin.target); + +/// Tells whether unwinding for the host is implemented. +pub const supports_unwinding = supportsUnwinding(builtin.target); + +comptime { + if (supports_unwinding) assert(Dwarf.abi.supportsUnwinding(builtin.target)); +} + +/// Tells whether unwinding for this target is *implemented* here in the Zig +/// standard library. +/// +/// See also `Dwarf.abi.supportsUnwinding` which tells whether Dwarf supports +/// unwinding on that target *in theory*. +pub fn supportsUnwinding(target: std.Target) bool { + return switch (target.cpu.arch) { + .x86 => switch (target.os.tag) { + .linux, .netbsd, .solaris, .illumos => true, + else => false, + }, + .x86_64 => switch (target.os.tag) { + .linux, .netbsd, .freebsd, .openbsd, .macos, .ios, .solaris, .illumos => true, + else => false, + }, + .arm => switch (target.os.tag) { + .linux => true, + else => false, + }, + .aarch64 => switch (target.os.tag) { + .linux, .netbsd, .freebsd, .macos, .ios => true, + else => false, + }, + // Unwinding is possible on other targets but this implementation does + // not support them...yet! + else => false, + }; +} fn unwindFrameMachODwarf( context: *UnwindContext,