From 290966c2497dc9d212bf9d4bd0fecee4988091a5 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 1 Aug 2024 16:31:03 -0700 Subject: [PATCH] std.debug: rename Info to SelfInfo This code has the hard-coded goal of supporting the executable's own debug information and makes design choices along that goal, such as memory-mapping the inputs, using dl_iterate_phdr, and doing conditional compilation on the host target. A more general-purpose implementation of debug information may be able to share code with this, but there are some fundamental incompatibilities. For example, the "SelfInfo" implementation wants to avoid bloating the binary with PDB on POSIX systems, and likewise DWARF on Windows systems, while a general-purpose implementation needs to support both PDB and DWARF from the same binary. It might, for example, inspect the debug information from a cross-compiled binary. `SourceLocation` now lives at `std.debug.SourceLocation` and is documented. Deprecate `std.debug.runtime_safety` because it returns the optimization mode of the standard library, when the caller probably wants to use the optimization mode of their own module. `std.pdb.Pdb` is moved to `std.debug.Pdb`, mirroring the recent extraction of `std.debug.Dwarf` from `std.dwarf`. I have no idea why we have both Module (with a Windows-specific definition) and WindowsModule. I left some passive aggressive doc comments to express my frustration. --- lib/std/debug.zig | 57 ++- lib/std/debug/Dwarf.zig | 6 +- lib/std/debug/Pdb.zig | 591 ++++++++++++++++++++++ lib/std/debug/{Info.zig => SelfInfo.zig} | 66 ++- lib/std/pdb.zig | 607 +---------------------- 5 files changed, 668 insertions(+), 659 deletions(-) create mode 100644 lib/std/debug/Pdb.zig rename lib/std/debug/{Info.zig => SelfInfo.zig} (96%) diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 6dac92188e7e..2753a0f52f2e 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -6,11 +6,6 @@ const io = std.io; const posix = std.posix; const fs = std.fs; const testing = std.testing; -const elf = std.elf; -const DW = std.dwarf; -const macho = std.macho; -const coff = std.coff; -const pdb = std.pdb; const root = @import("root"); const File = std.fs.File; const windows = std.os.windows; @@ -19,8 +14,22 @@ const native_os = builtin.os.tag; const native_endian = native_arch.endian(); pub const Dwarf = @import("debug/Dwarf.zig"); -pub const Info = @import("debug/Info.zig"); +pub const Pdb = @import("debug/Pdb.zig"); +pub const SelfInfo = @import("debug/SelfInfo.zig"); + +/// Unresolved source locations can be represented with a single `usize` that +/// corresponds to a virtual memory address of the program counter. Combined +/// with debug information, those values can be converted into a resolved +/// source location, including file, line, and column. +pub const SourceLocation = struct { + line: u64, + column: u64, + file_name: []const u8, +}; +/// Deprecated because it returns the optimization mode of the standard +/// library, when the caller probably wants to use the optimization mode of +/// their own module. pub const runtime_safety = switch (builtin.mode) { .Debug, .ReleaseSafe => true, .ReleaseFast, .ReleaseSmall => false, @@ -72,13 +81,13 @@ pub fn getStderrMutex() *std.Thread.Mutex { } /// TODO multithreaded awareness -var self_debug_info: ?Info = null; +var self_debug_info: ?SelfInfo = null; -pub fn getSelfDebugInfo() !*Info { +pub fn getSelfDebugInfo() !*SelfInfo { if (self_debug_info) |*info| { return info; } else { - self_debug_info = try Info.openSelf(getDebugInfoAllocator()); + self_debug_info = try SelfInfo.openSelf(getDebugInfoAllocator()); return &self_debug_info.?; } } @@ -316,7 +325,7 @@ pub fn captureStackTrace(first_address: ?usize, stack_trace: *std.builtin.StackT stack_trace.index = slice.len; } else { // TODO: This should use the DWARF unwinder if .eh_frame_hdr is available (so that full debug info parsing isn't required). - // A new path for loading Info needs to be created which will only attempt to parse in-memory sections, because + // A new path for loading SelfInfo needs to be created which will only attempt to parse in-memory sections, because // stopping to load other debug info (ie. source line info) from disk here is not required for unwinding. var it = StackIterator.init(first_address, null); defer it.deinit(); @@ -494,7 +503,7 @@ pub fn writeStackTrace( stack_trace: std.builtin.StackTrace, out_stream: anytype, allocator: mem.Allocator, - debug_info: *Info, + debug_info: *SelfInfo, tty_config: io.tty.Config, ) !void { _ = allocator; @@ -531,11 +540,11 @@ pub const StackIterator = struct { fp: usize, ma: MemoryAccessor = MemoryAccessor.init, - // When Info and a register context is available, this iterator can unwind + // When SelfInfo and a register context is available, this iterator can unwind // stacks with frames that don't use a frame pointer (ie. -fomit-frame-pointer), // using DWARF and MachO unwind info. unwind_state: if (have_ucontext) ?struct { - debug_info: *Info, + debug_info: *SelfInfo, dwarf_context: Dwarf.UnwindContext, last_error: ?UnwindError = null, failed: bool = false, @@ -560,7 +569,7 @@ pub const StackIterator = struct { }; } - pub fn initWithContext(first_address: ?usize, debug_info: *Info, context: *const posix.ucontext_t) !StackIterator { + pub fn initWithContext(first_address: ?usize, debug_info: *SelfInfo, context: *const posix.ucontext_t) !StackIterator { // The implementation of DWARF unwinding on aarch64-macos is not complete. However, Apple mandates that // the frame pointer register is always used, so on this platform we can safely use the FP-based unwinder. if (comptime builtin.target.isDarwin() and native_arch == .aarch64) { @@ -820,7 +829,7 @@ const have_msync = switch (native_os) { pub fn writeCurrentStackTrace( out_stream: anytype, - debug_info: *Info, + debug_info: *SelfInfo, tty_config: io.tty.Config, start_addr: ?usize, ) !void { @@ -906,7 +915,7 @@ pub noinline fn walkStackWindows(addresses: []usize, existing_context: ?*const w pub fn writeStackTraceWindows( out_stream: anytype, - debug_info: *Info, + debug_info: *SelfInfo, tty_config: io.tty.Config, context: *const windows.CONTEXT, start_addr: ?usize, @@ -925,7 +934,7 @@ pub fn writeStackTraceWindows( } } -fn printUnknownSource(debug_info: *Info, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { +fn printUnknownSource(debug_info: *SelfInfo, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { const module_name = debug_info.getModuleNameForAddress(address); return printLineInfo( out_stream, @@ -938,14 +947,14 @@ fn printUnknownSource(debug_info: *Info, out_stream: anytype, address: usize, tt ); } -fn printLastUnwindError(it: *StackIterator, debug_info: *Info, out_stream: anytype, tty_config: io.tty.Config) void { +fn printLastUnwindError(it: *StackIterator, debug_info: *SelfInfo, out_stream: anytype, tty_config: io.tty.Config) void { if (!have_ucontext) return; if (it.getLastError()) |unwind_error| { printUnwindError(debug_info, out_stream, unwind_error.address, unwind_error.err, tty_config) catch {}; } } -fn printUnwindError(debug_info: *Info, out_stream: anytype, address: usize, err: UnwindError, tty_config: io.tty.Config) !void { +fn printUnwindError(debug_info: *SelfInfo, out_stream: anytype, address: usize, err: UnwindError, tty_config: io.tty.Config) !void { const module_name = debug_info.getModuleNameForAddress(address) orelse "???"; try tty_config.setColor(out_stream, .dim); if (err == error.MissingDebugInfo) { @@ -956,7 +965,7 @@ fn printUnwindError(debug_info: *Info, out_stream: anytype, address: usize, err: try tty_config.setColor(out_stream, .reset); } -pub fn printSourceAtAddress(debug_info: *Info, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { +pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { const module = debug_info.getModuleForAddress(address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config), else => return err, @@ -981,7 +990,7 @@ pub fn printSourceAtAddress(debug_info: *Info, out_stream: anytype, address: usi fn printLineInfo( out_stream: anytype, - line_info: ?Info.SourceLocation, + line_info: ?SourceLocation, address: usize, symbol_name: []const u8, compile_unit_name: []const u8, @@ -1027,7 +1036,7 @@ fn printLineInfo( } } -fn printLineFromFileAnyOs(out_stream: anytype, line_info: Info.SourceLocation) !void { +fn printLineFromFileAnyOs(out_stream: anytype, line_info: SourceLocation) !void { // Need this to always block even in async I/O mode, because this could potentially // be called from e.g. the event loop code crashing. var f = try fs.cwd().openFile(line_info.file_name, .{}); @@ -1093,7 +1102,7 @@ test printLineFromFileAnyOs { var test_dir = std.testing.tmpDir(.{}); defer test_dir.cleanup(); - // Relies on testing.tmpDir internals which is not ideal, but Info.SourceLocation requires paths. + // Relies on testing.tmpDir internals which is not ideal, but SourceLocation requires paths. const test_dir_path = try join(allocator, &.{ ".zig-cache", "tmp", test_dir.sub_path[0..] }); defer allocator.free(test_dir_path); @@ -1439,7 +1448,7 @@ test "manage resources correctly" { } const writer = std.io.null_writer; - var di = try Info.openSelf(testing.allocator); + var di = try SelfInfo.openSelf(testing.allocator); defer di.deinit(); try printSourceAtAddress(&di, writer, showMyTrace(), io.tty.detectConfig(std.io.getStdErr())); } diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 353c097471ab..4fff2562b243 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -1353,7 +1353,7 @@ pub fn getLineNumberInfo( allocator: Allocator, compile_unit: CompileUnit, target_address: u64, -) !std.debug.Info.SourceLocation { +) !std.debug.SourceLocation { const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); @@ -2084,7 +2084,7 @@ const LineNumberProgram = struct { self: *LineNumberProgram, allocator: Allocator, file_entries: []const FileEntry, - ) !?std.debug.Info.SourceLocation { + ) !?std.debug.SourceLocation { if (self.prev_valid and self.target_address >= self.prev_address and self.target_address < self.address) @@ -2104,7 +2104,7 @@ const LineNumberProgram = struct { dir_name, file_entry.path, }); - return std.debug.Info.SourceLocation{ + return std.debug.SourceLocation{ .line = if (self.prev_line >= 0) @as(u64, @intCast(self.prev_line)) else 0, .column = self.prev_column, .file_name = file_name, diff --git a/lib/std/debug/Pdb.zig b/lib/std/debug/Pdb.zig new file mode 100644 index 000000000000..bdcc108c1dc0 --- /dev/null +++ b/lib/std/debug/Pdb.zig @@ -0,0 +1,591 @@ +const std = @import("../std.zig"); +const File = std.fs.File; +const Allocator = std.mem.Allocator; +const pdb = std.pdb; + +const Pdb = @This(); + +in_file: File, +msf: Msf, +allocator: Allocator, +string_table: ?*MsfStream, +dbi: ?*MsfStream, +modules: []Module, +sect_contribs: []pdb.SectionContribEntry, +guid: [16]u8, +age: u32, + +pub const Module = struct { + mod_info: pdb.ModInfo, + module_name: []u8, + obj_file_name: []u8, + // The fields below are filled on demand. + populated: bool, + symbols: []u8, + subsect_info: []u8, + checksum_offset: ?usize, + + pub fn deinit(self: *Module, allocator: Allocator) void { + allocator.free(self.module_name); + allocator.free(self.obj_file_name); + if (self.populated) { + allocator.free(self.symbols); + allocator.free(self.subsect_info); + } + } +}; + +pub fn init(allocator: Allocator, path: []const u8) !Pdb { + const file = try std.fs.cwd().openFile(path, .{}); + errdefer file.close(); + + return .{ + .in_file = file, + .allocator = allocator, + .string_table = null, + .dbi = null, + .msf = try Msf.init(allocator, file), + .modules = &[_]Module{}, + .sect_contribs = &[_]pdb.SectionContribEntry{}, + .guid = undefined, + .age = undefined, + }; +} + +pub fn deinit(self: *Pdb) void { + self.in_file.close(); + self.msf.deinit(self.allocator); + for (self.modules) |*module| { + module.deinit(self.allocator); + } + self.allocator.free(self.modules); + self.allocator.free(self.sect_contribs); +} + +pub fn parseDbiStream(self: *Pdb) !void { + var stream = self.getStream(pdb.StreamType.Dbi) orelse + return error.InvalidDebugInfo; + const reader = stream.reader(); + + const header = try reader.readStruct(std.pdb.DbiStreamHeader); + if (header.VersionHeader != 19990903) // V70, only value observed by LLVM team + return error.UnknownPDBVersion; + // if (header.Age != age) + // return error.UnmatchingPDB; + + const mod_info_size = header.ModInfoSize; + const section_contrib_size = header.SectionContributionSize; + + var modules = std.ArrayList(Module).init(self.allocator); + errdefer modules.deinit(); + + // Module Info Substream + var mod_info_offset: usize = 0; + while (mod_info_offset != mod_info_size) { + const mod_info = try reader.readStruct(pdb.ModInfo); + var this_record_len: usize = @sizeOf(pdb.ModInfo); + + const module_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); + errdefer self.allocator.free(module_name); + this_record_len += module_name.len + 1; + + const obj_file_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); + errdefer self.allocator.free(obj_file_name); + this_record_len += obj_file_name.len + 1; + + if (this_record_len % 4 != 0) { + const round_to_next_4 = (this_record_len | 0x3) + 1; + const march_forward_bytes = round_to_next_4 - this_record_len; + try stream.seekBy(@as(isize, @intCast(march_forward_bytes))); + this_record_len += march_forward_bytes; + } + + try modules.append(Module{ + .mod_info = mod_info, + .module_name = module_name, + .obj_file_name = obj_file_name, + + .populated = false, + .symbols = undefined, + .subsect_info = undefined, + .checksum_offset = null, + }); + + mod_info_offset += this_record_len; + if (mod_info_offset > mod_info_size) + return error.InvalidDebugInfo; + } + + // Section Contribution Substream + var sect_contribs = std.ArrayList(pdb.SectionContribEntry).init(self.allocator); + errdefer sect_contribs.deinit(); + + var sect_cont_offset: usize = 0; + if (section_contrib_size != 0) { + const version = reader.readEnum(std.pdb.SectionContrSubstreamVersion, .little) catch |err| switch (err) { + error.InvalidValue => return error.InvalidDebugInfo, + else => |e| return e, + }; + _ = version; + sect_cont_offset += @sizeOf(u32); + } + while (sect_cont_offset != section_contrib_size) { + const entry = try sect_contribs.addOne(); + entry.* = try reader.readStruct(pdb.SectionContribEntry); + sect_cont_offset += @sizeOf(pdb.SectionContribEntry); + + if (sect_cont_offset > section_contrib_size) + return error.InvalidDebugInfo; + } + + self.modules = try modules.toOwnedSlice(); + self.sect_contribs = try sect_contribs.toOwnedSlice(); +} + +pub fn parseInfoStream(self: *Pdb) !void { + var stream = self.getStream(pdb.StreamType.Pdb) orelse + return error.InvalidDebugInfo; + const reader = stream.reader(); + + // Parse the InfoStreamHeader. + const version = try reader.readInt(u32, .little); + const signature = try reader.readInt(u32, .little); + _ = signature; + const age = try reader.readInt(u32, .little); + const guid = try reader.readBytesNoEof(16); + + if (version != 20000404) // VC70, only value observed by LLVM team + return error.UnknownPDBVersion; + + self.guid = guid; + self.age = age; + + // Find the string table. + const string_table_index = str_tab_index: { + const name_bytes_len = try reader.readInt(u32, .little); + const name_bytes = try self.allocator.alloc(u8, name_bytes_len); + defer self.allocator.free(name_bytes); + try reader.readNoEof(name_bytes); + + const HashTableHeader = extern struct { + Size: u32, + Capacity: u32, + + fn maxLoad(cap: u32) u32 { + return cap * 2 / 3 + 1; + } + }; + const hash_tbl_hdr = try reader.readStruct(HashTableHeader); + if (hash_tbl_hdr.Capacity == 0) + return error.InvalidDebugInfo; + + if (hash_tbl_hdr.Size > HashTableHeader.maxLoad(hash_tbl_hdr.Capacity)) + return error.InvalidDebugInfo; + + const present = try readSparseBitVector(&reader, self.allocator); + defer self.allocator.free(present); + if (present.len != hash_tbl_hdr.Size) + return error.InvalidDebugInfo; + const deleted = try readSparseBitVector(&reader, self.allocator); + defer self.allocator.free(deleted); + + for (present) |_| { + const name_offset = try reader.readInt(u32, .little); + const name_index = try reader.readInt(u32, .little); + if (name_offset > name_bytes.len) + return error.InvalidDebugInfo; + const name = std.mem.sliceTo(name_bytes[name_offset..], 0); + if (std.mem.eql(u8, name, "/names")) { + break :str_tab_index name_index; + } + } + return error.MissingDebugInfo; + }; + + self.string_table = self.getStreamById(string_table_index) orelse + return error.MissingDebugInfo; +} + +pub fn getSymbolName(self: *Pdb, module: *Module, address: u64) ?[]const u8 { + _ = self; + std.debug.assert(module.populated); + + var symbol_i: usize = 0; + while (symbol_i != module.symbols.len) { + const prefix = @as(*align(1) pdb.RecordPrefix, @ptrCast(&module.symbols[symbol_i])); + if (prefix.RecordLen < 2) + return null; + switch (prefix.RecordKind) { + .S_LPROC32, .S_GPROC32 => { + const proc_sym = @as(*align(1) pdb.ProcSym, @ptrCast(&module.symbols[symbol_i + @sizeOf(pdb.RecordPrefix)])); + if (address >= proc_sym.CodeOffset and address < proc_sym.CodeOffset + proc_sym.CodeSize) { + return std.mem.sliceTo(@as([*:0]u8, @ptrCast(&proc_sym.Name[0])), 0); + } + }, + else => {}, + } + symbol_i += prefix.RecordLen + @sizeOf(u16); + } + + return null; +} + +pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !std.debug.SourceLocation { + std.debug.assert(module.populated); + const subsect_info = module.subsect_info; + + var sect_offset: usize = 0; + var skip_len: usize = undefined; + const checksum_offset = module.checksum_offset orelse return error.MissingDebugInfo; + while (sect_offset != subsect_info.len) : (sect_offset += skip_len) { + const subsect_hdr = @as(*align(1) pdb.DebugSubsectionHeader, @ptrCast(&subsect_info[sect_offset])); + skip_len = subsect_hdr.Length; + sect_offset += @sizeOf(pdb.DebugSubsectionHeader); + + switch (subsect_hdr.Kind) { + .Lines => { + var line_index = sect_offset; + + const line_hdr = @as(*align(1) pdb.LineFragmentHeader, @ptrCast(&subsect_info[line_index])); + if (line_hdr.RelocSegment == 0) + return error.MissingDebugInfo; + line_index += @sizeOf(pdb.LineFragmentHeader); + const frag_vaddr_start = line_hdr.RelocOffset; + const frag_vaddr_end = frag_vaddr_start + line_hdr.CodeSize; + + if (address >= frag_vaddr_start and address < frag_vaddr_end) { + // There is an unknown number of LineBlockFragmentHeaders (and their accompanying line and column records) + // from now on. We will iterate through them, and eventually find a SourceLocation that we're interested in, + // breaking out to :subsections. If not, we will make sure to not read anything outside of this subsection. + const subsection_end_index = sect_offset + subsect_hdr.Length; + + while (line_index < subsection_end_index) { + const block_hdr = @as(*align(1) pdb.LineBlockFragmentHeader, @ptrCast(&subsect_info[line_index])); + line_index += @sizeOf(pdb.LineBlockFragmentHeader); + const start_line_index = line_index; + + const has_column = line_hdr.Flags.LF_HaveColumns; + + // All line entries are stored inside their line block by ascending start address. + // Heuristic: we want to find the last line entry + // that has a vaddr_start <= address. + // This is done with a simple linear search. + var line_i: u32 = 0; + while (line_i < block_hdr.NumLines) : (line_i += 1) { + const line_num_entry = @as(*align(1) pdb.LineNumberEntry, @ptrCast(&subsect_info[line_index])); + line_index += @sizeOf(pdb.LineNumberEntry); + + const vaddr_start = frag_vaddr_start + line_num_entry.Offset; + if (address < vaddr_start) { + break; + } + } + + // line_i == 0 would mean that no matching pdb.LineNumberEntry was found. + if (line_i > 0) { + const subsect_index = checksum_offset + block_hdr.NameIndex; + const chksum_hdr = @as(*align(1) pdb.FileChecksumEntryHeader, @ptrCast(&module.subsect_info[subsect_index])); + const strtab_offset = @sizeOf(pdb.StringTableHeader) + chksum_hdr.FileNameOffset; + try self.string_table.?.seekTo(strtab_offset); + const source_file_name = try self.string_table.?.reader().readUntilDelimiterAlloc(self.allocator, 0, 1024); + + const line_entry_idx = line_i - 1; + + const column = if (has_column) blk: { + const start_col_index = start_line_index + @sizeOf(pdb.LineNumberEntry) * block_hdr.NumLines; + const col_index = start_col_index + @sizeOf(pdb.ColumnNumberEntry) * line_entry_idx; + const col_num_entry = @as(*align(1) pdb.ColumnNumberEntry, @ptrCast(&subsect_info[col_index])); + break :blk col_num_entry.StartColumn; + } else 0; + + const found_line_index = start_line_index + line_entry_idx * @sizeOf(pdb.LineNumberEntry); + const line_num_entry: *align(1) pdb.LineNumberEntry = @ptrCast(&subsect_info[found_line_index]); + const flags: *align(1) pdb.LineNumberEntry.Flags = @ptrCast(&line_num_entry.Flags); + + return .{ + .file_name = source_file_name, + .line = flags.Start, + .column = column, + }; + } + } + + // Checking that we are not reading garbage after the (possibly) multiple block fragments. + if (line_index != subsection_end_index) { + return error.InvalidDebugInfo; + } + } + }, + else => {}, + } + + if (sect_offset > subsect_info.len) + return error.InvalidDebugInfo; + } + + return error.MissingDebugInfo; +} + +pub fn getModule(self: *Pdb, index: usize) !?*Module { + if (index >= self.modules.len) + return null; + + const mod = &self.modules[index]; + if (mod.populated) + return mod; + + // At most one can be non-zero. + if (mod.mod_info.C11ByteSize != 0 and mod.mod_info.C13ByteSize != 0) + return error.InvalidDebugInfo; + if (mod.mod_info.C13ByteSize == 0) + return error.InvalidDebugInfo; + + const stream = self.getStreamById(mod.mod_info.ModuleSymStream) orelse + return error.MissingDebugInfo; + const reader = stream.reader(); + + const signature = try reader.readInt(u32, .little); + if (signature != 4) + return error.InvalidDebugInfo; + + mod.symbols = try self.allocator.alloc(u8, mod.mod_info.SymByteSize - 4); + errdefer self.allocator.free(mod.symbols); + try reader.readNoEof(mod.symbols); + + mod.subsect_info = try self.allocator.alloc(u8, mod.mod_info.C13ByteSize); + errdefer self.allocator.free(mod.subsect_info); + try reader.readNoEof(mod.subsect_info); + + var sect_offset: usize = 0; + var skip_len: usize = undefined; + while (sect_offset != mod.subsect_info.len) : (sect_offset += skip_len) { + const subsect_hdr = @as(*align(1) pdb.DebugSubsectionHeader, @ptrCast(&mod.subsect_info[sect_offset])); + skip_len = subsect_hdr.Length; + sect_offset += @sizeOf(pdb.DebugSubsectionHeader); + + switch (subsect_hdr.Kind) { + .FileChecksums => { + mod.checksum_offset = sect_offset; + break; + }, + else => {}, + } + + if (sect_offset > mod.subsect_info.len) + return error.InvalidDebugInfo; + } + + mod.populated = true; + return mod; +} + +pub fn getStreamById(self: *Pdb, id: u32) ?*MsfStream { + if (id >= self.msf.streams.len) + return null; + return &self.msf.streams[id]; +} + +pub fn getStream(self: *Pdb, stream: pdb.StreamType) ?*MsfStream { + const id = @intFromEnum(stream); + return self.getStreamById(id); +} + +/// https://llvm.org/docs/PDB/MsfFile.html +const Msf = struct { + directory: MsfStream, + streams: []MsfStream, + + fn init(allocator: Allocator, file: File) !Msf { + const in = file.reader(); + + const superblock = try in.readStruct(pdb.SuperBlock); + + // Sanity checks + if (!std.mem.eql(u8, &superblock.FileMagic, pdb.SuperBlock.file_magic)) + return error.InvalidDebugInfo; + if (superblock.FreeBlockMapBlock != 1 and superblock.FreeBlockMapBlock != 2) + return error.InvalidDebugInfo; + const file_len = try file.getEndPos(); + if (superblock.NumBlocks * superblock.BlockSize != file_len) + return error.InvalidDebugInfo; + switch (superblock.BlockSize) { + // llvm only supports 4096 but we can handle any of these values + 512, 1024, 2048, 4096 => {}, + else => return error.InvalidDebugInfo, + } + + const dir_block_count = blockCountFromSize(superblock.NumDirectoryBytes, superblock.BlockSize); + if (dir_block_count > superblock.BlockSize / @sizeOf(u32)) + return error.UnhandledBigDirectoryStream; // cf. BlockMapAddr comment. + + try file.seekTo(superblock.BlockSize * superblock.BlockMapAddr); + const dir_blocks = try allocator.alloc(u32, dir_block_count); + for (dir_blocks) |*b| { + b.* = try in.readInt(u32, .little); + } + var directory = MsfStream.init( + superblock.BlockSize, + file, + dir_blocks, + ); + + const begin = directory.pos; + const stream_count = try directory.reader().readInt(u32, .little); + const stream_sizes = try allocator.alloc(u32, stream_count); + defer allocator.free(stream_sizes); + + // Microsoft's implementation uses @as(u32, -1) for inexistent streams. + // These streams are not used, but still participate in the file + // and must be taken into account when resolving stream indices. + const Nil = 0xFFFFFFFF; + for (stream_sizes) |*s| { + const size = try directory.reader().readInt(u32, .little); + s.* = if (size == Nil) 0 else blockCountFromSize(size, superblock.BlockSize); + } + + const streams = try allocator.alloc(MsfStream, stream_count); + for (streams, 0..) |*stream, i| { + const size = stream_sizes[i]; + if (size == 0) { + stream.* = MsfStream{ + .blocks = &[_]u32{}, + }; + } else { + var blocks = try allocator.alloc(u32, size); + var j: u32 = 0; + while (j < size) : (j += 1) { + const block_id = try directory.reader().readInt(u32, .little); + const n = (block_id % superblock.BlockSize); + // 0 is for pdb.SuperBlock, 1 and 2 for FPMs. + if (block_id == 0 or n == 1 or n == 2 or block_id * superblock.BlockSize > file_len) + return error.InvalidBlockIndex; + blocks[j] = block_id; + } + + stream.* = MsfStream.init( + superblock.BlockSize, + file, + blocks, + ); + } + } + + const end = directory.pos; + if (end - begin != superblock.NumDirectoryBytes) + return error.InvalidStreamDirectory; + + return Msf{ + .directory = directory, + .streams = streams, + }; + } + + fn deinit(self: *Msf, allocator: Allocator) void { + allocator.free(self.directory.blocks); + for (self.streams) |*stream| { + allocator.free(stream.blocks); + } + allocator.free(self.streams); + } +}; + +const MsfStream = struct { + in_file: File = undefined, + pos: u64 = undefined, + blocks: []u32 = undefined, + block_size: u32 = undefined, + + pub const Error = @typeInfo(@typeInfo(@TypeOf(read)).Fn.return_type.?).ErrorUnion.error_set; + + fn init(block_size: u32, file: File, blocks: []u32) MsfStream { + const stream = MsfStream{ + .in_file = file, + .pos = 0, + .blocks = blocks, + .block_size = block_size, + }; + + return stream; + } + + fn read(self: *MsfStream, buffer: []u8) !usize { + var block_id = @as(usize, @intCast(self.pos / self.block_size)); + if (block_id >= self.blocks.len) return 0; // End of Stream + var block = self.blocks[block_id]; + var offset = self.pos % self.block_size; + + try self.in_file.seekTo(block * self.block_size + offset); + const in = self.in_file.reader(); + + var size: usize = 0; + var rem_buffer = buffer; + while (size < buffer.len) { + const size_to_read = @min(self.block_size - offset, rem_buffer.len); + size += try in.read(rem_buffer[0..size_to_read]); + rem_buffer = buffer[size..]; + offset += size_to_read; + + // If we're at the end of a block, go to the next one. + if (offset == self.block_size) { + offset = 0; + block_id += 1; + if (block_id >= self.blocks.len) break; // End of Stream + block = self.blocks[block_id]; + try self.in_file.seekTo(block * self.block_size); + } + } + + self.pos += buffer.len; + return buffer.len; + } + + pub fn seekBy(self: *MsfStream, len: i64) !void { + self.pos = @as(u64, @intCast(@as(i64, @intCast(self.pos)) + len)); + if (self.pos >= self.blocks.len * self.block_size) + return error.EOF; + } + + pub fn seekTo(self: *MsfStream, len: u64) !void { + self.pos = len; + if (self.pos >= self.blocks.len * self.block_size) + return error.EOF; + } + + fn getSize(self: *const MsfStream) u64 { + return self.blocks.len * self.block_size; + } + + fn getFilePos(self: MsfStream) u64 { + const block_id = self.pos / self.block_size; + const block = self.blocks[block_id]; + const offset = self.pos % self.block_size; + + return block * self.block_size + offset; + } + + pub fn reader(self: *MsfStream) std.io.Reader(*MsfStream, Error, read) { + return .{ .context = self }; + } +}; + +fn readSparseBitVector(stream: anytype, allocator: Allocator) ![]u32 { + const num_words = try stream.readInt(u32, .little); + var list = std.ArrayList(u32).init(allocator); + errdefer list.deinit(); + var word_i: u32 = 0; + while (word_i != num_words) : (word_i += 1) { + const word = try stream.readInt(u32, .little); + var bit_i: u5 = 0; + while (true) : (bit_i += 1) { + if (word & (@as(u32, 1) << bit_i) != 0) { + try list.append(word_i * 32 + bit_i); + } + if (bit_i == std.math.maxInt(u5)) break; + } + } + return try list.toOwnedSlice(); +} + +fn blockCountFromSize(size: u32, block_size: u32) u32 { + return (size + block_size - 1) / block_size; +} diff --git a/lib/std/debug/Info.zig b/lib/std/debug/SelfInfo.zig similarity index 96% rename from lib/std/debug/Info.zig rename to lib/std/debug/SelfInfo.zig index 9d3074834bb9..58fe4b23b263 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/SelfInfo.zig @@ -1,4 +1,5 @@ -//! Cross-platform abstraction for debug information. +//! Cross-platform abstraction for this binary's own debug information, with a +//! goal of minimal code bloat and compilation speed penalty. const builtin = @import("builtin"); const native_os = builtin.os.tag; @@ -17,24 +18,25 @@ const assert = std.debug.assert; const posix = std.posix; const elf = std.elf; const Dwarf = std.debug.Dwarf; +const Pdb = std.debug.Pdb; const File = std.fs.File; const math = std.math; const testing = std.testing; -const Info = @This(); +const SelfInfo = @This(); const root = @import("root"); allocator: Allocator, address_map: std.AutoHashMap(usize, *Module), -modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModuleInfo) else void, +modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModule) else void, pub const OpenSelfError = error{ MissingDebugInfo, UnsupportedOperatingSystem, -} || @typeInfo(@typeInfo(@TypeOf(Info.init)).Fn.return_type.?).ErrorUnion.error_set; +} || @typeInfo(@typeInfo(@TypeOf(SelfInfo.init)).Fn.return_type.?).ErrorUnion.error_set; -pub fn openSelf(allocator: Allocator) OpenSelfError!Info { +pub fn openSelf(allocator: Allocator) OpenSelfError!SelfInfo { nosuspend { if (builtin.strip_debug_info) return error.MissingDebugInfo; @@ -51,14 +53,14 @@ pub fn openSelf(allocator: Allocator) OpenSelfError!Info { .solaris, .illumos, .windows, - => return try Info.init(allocator), + => return try SelfInfo.init(allocator), else => return error.UnsupportedOperatingSystem, } } } -pub fn init(allocator: Allocator) !Info { - var debug_info: Info = .{ +pub fn init(allocator: Allocator) !SelfInfo { + var debug_info: SelfInfo = .{ .allocator = allocator, .address_map = std.AutoHashMap(usize, *Module).init(allocator), .modules = if (native_os == .windows) .{} else {}, @@ -101,7 +103,7 @@ pub fn init(allocator: Allocator) !Info { return debug_info; } -pub fn deinit(self: *Info) void { +pub fn deinit(self: *SelfInfo) void { var it = self.address_map.iterator(); while (it.next()) |entry| { const mdi = entry.value_ptr.*; @@ -118,7 +120,7 @@ pub fn deinit(self: *Info) void { } } -pub fn getModuleForAddress(self: *Info, address: usize) !*Module { +pub fn getModuleForAddress(self: *SelfInfo, address: usize) !*Module { if (comptime builtin.target.isDarwin()) { return self.lookupModuleDyld(address); } else if (native_os == .windows) { @@ -135,7 +137,7 @@ pub fn getModuleForAddress(self: *Info, address: usize) !*Module { // Returns the module name for a given address. // This can be called when getModuleForAddress fails, so implementations should provide // a path that doesn't rely on any side-effects of a prior successful module lookup. -pub fn getModuleNameForAddress(self: *Info, address: usize) ?[]const u8 { +pub fn getModuleNameForAddress(self: *SelfInfo, address: usize) ?[]const u8 { if (comptime builtin.target.isDarwin()) { return self.lookupModuleNameDyld(address); } else if (native_os == .windows) { @@ -149,7 +151,7 @@ pub fn getModuleNameForAddress(self: *Info, address: usize) ?[]const u8 { } } -fn lookupModuleDyld(self: *Info, address: usize) !*Module { +fn lookupModuleDyld(self: *SelfInfo, address: usize) !*Module { const image_count = std.c._dyld_image_count(); var i: u32 = 0; @@ -215,7 +217,7 @@ fn lookupModuleDyld(self: *Info, address: usize) !*Module { return error.MissingDebugInfo; } -fn lookupModuleNameDyld(self: *Info, address: usize) ?[]const u8 { +fn lookupModuleNameDyld(self: *SelfInfo, address: usize) ?[]const u8 { _ = self; const image_count = std.c._dyld_image_count(); @@ -253,7 +255,7 @@ fn lookupModuleNameDyld(self: *Info, address: usize) ?[]const u8 { return null; } -fn lookupModuleWin32(self: *Info, address: usize) !*Module { +fn lookupModuleWin32(self: *SelfInfo, address: usize) !*Module { for (self.modules.items) |*module| { if (address >= module.base_address and address < module.base_address + module.size) { if (self.address_map.get(module.base_address)) |obj_di| { @@ -343,7 +345,7 @@ fn lookupModuleWin32(self: *Info, address: usize) !*Module { return error.MissingDebugInfo; } -fn lookupModuleNameWin32(self: *Info, address: usize) ?[]const u8 { +fn lookupModuleNameWin32(self: *SelfInfo, address: usize) ?[]const u8 { for (self.modules.items) |module| { if (address >= module.base_address and address < module.base_address + module.size) { return module.name; @@ -352,7 +354,7 @@ fn lookupModuleNameWin32(self: *Info, address: usize) ?[]const u8 { return null; } -fn lookupModuleNameDl(self: *Info, address: usize) ?[]const u8 { +fn lookupModuleNameDl(self: *SelfInfo, address: usize) ?[]const u8 { _ = self; var ctx: struct { @@ -390,7 +392,7 @@ fn lookupModuleNameDl(self: *Info, address: usize) ?[]const u8 { return null; } -fn lookupModuleDl(self: *Info, address: usize) !*Module { +fn lookupModuleDl(self: *SelfInfo, address: usize) !*Module { var ctx: struct { // Input address: usize, @@ -484,13 +486,13 @@ fn lookupModuleDl(self: *Info, address: usize) !*Module { return obj_di; } -fn lookupModuleHaiku(self: *Info, address: usize) !*Module { +fn lookupModuleHaiku(self: *SelfInfo, address: usize) !*Module { _ = self; _ = address; @panic("TODO implement lookup module for Haiku"); } -fn lookupModuleWasm(self: *Info, address: usize) !*Module { +fn lookupModuleWasm(self: *SelfInfo, address: usize) !*Module { _ = self; _ = address; @panic("TODO implement lookup module for Wasm"); @@ -709,7 +711,7 @@ pub const Module = switch (native_os) { }, .uefi, .windows => struct { base_address: usize, - pdb: ?pdb.Pdb = null, + pdb: ?Pdb = null, dwarf: ?Dwarf = null, coff_image_base: u64, @@ -837,7 +839,11 @@ pub const Module = switch (native_os) { else => Dwarf, }; -pub const WindowsModuleInfo = struct { +/// How is this different than `Module` when the host is Windows? +/// Why are both stored in the `SelfInfo` struct? +/// Boy, it sure would be nice if someone added documentation comments for this +/// struct explaining it. +pub const WindowsModule = struct { base_address: usize, size: u32, name: []const u8, @@ -1030,7 +1036,7 @@ fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module { }; defer if (path.ptr != raw_path.ptr) allocator.free(path); - di.pdb = pdb.Pdb.init(allocator, path) catch |err| switch (err) { + di.pdb = Pdb.init(allocator, path) catch |err| switch (err) { error.FileNotFound, error.IsDir => { if (di.dwarf == null) return error.MissingDebugInfo; return di; @@ -1292,22 +1298,10 @@ fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 pub const SymbolInfo = struct { symbol_name: []const u8 = "???", compile_unit_name: []const u8 = "???", - line_info: ?SourceLocation = null, + line_info: ?std.debug.SourceLocation = null, pub fn deinit(self: SymbolInfo, allocator: Allocator) void { - if (self.line_info) |li| { - li.deinit(allocator); - } - } -}; - -pub const SourceLocation = struct { - line: u64, - column: u64, - file_name: []const u8, - - pub fn deinit(self: SourceLocation, allocator: Allocator) void { - allocator.free(self.file_name); + if (self.line_info) |li| allocator.free(li.file_name); } }; diff --git a/lib/std/pdb.zig b/lib/std/pdb.zig index c96eb81fa9f5..31ad02e94564 100644 --- a/lib/std/pdb.zig +++ b/lib/std/pdb.zig @@ -1,3 +1,12 @@ +//! Program Data Base debugging information format. +//! +//! This namespace contains unopinionated types and data definitions only. For +//! an implementation of parsing and caching PDB information, see +//! `std.debug.Pdb`. +//! +//! Most of this is based on information gathered from LLVM source code, +//! documentation and/or contributors. + const std = @import("std.zig"); const io = std.io; const math = std.math; @@ -9,10 +18,7 @@ const debug = std.debug; const ArrayList = std.ArrayList; -// Note: most of this is based on information gathered from LLVM source code, -// documentation and/or contributors. - -// https://llvm.org/docs/PDB/DbiStream.html#stream-header +/// https://llvm.org/docs/PDB/DbiStream.html#stream-header pub const DbiStreamHeader = extern struct { VersionSignature: i32, VersionHeader: u32, @@ -415,10 +421,8 @@ pub const ColumnNumberEntry = extern struct { pub const FileChecksumEntryHeader = extern struct { /// Byte offset of filename in global string table. FileNameOffset: u32, - /// Number of bytes of checksum. ChecksumSize: u8, - /// FileChecksumKind ChecksumKind: u8, }; @@ -451,525 +455,15 @@ pub const DebugSubsectionHeader = extern struct { Length: u32, }; -pub const PDBStringTableHeader = extern struct { +pub const StringTableHeader = extern struct { /// PDBStringTableSignature Signature: u32, - /// 1 or 2 HashVersion: u32, - /// Number of bytes of names buffer. ByteSize: u32, }; -fn readSparseBitVector(stream: anytype, allocator: mem.Allocator) ![]u32 { - const num_words = try stream.readInt(u32, .little); - var list = ArrayList(u32).init(allocator); - errdefer list.deinit(); - var word_i: u32 = 0; - while (word_i != num_words) : (word_i += 1) { - const word = try stream.readInt(u32, .little); - var bit_i: u5 = 0; - while (true) : (bit_i += 1) { - if (word & (@as(u32, 1) << bit_i) != 0) { - try list.append(word_i * 32 + bit_i); - } - if (bit_i == std.math.maxInt(u5)) break; - } - } - return try list.toOwnedSlice(); -} - -pub const Pdb = struct { - in_file: File, - msf: Msf, - allocator: mem.Allocator, - string_table: ?*MsfStream, - dbi: ?*MsfStream, - modules: []Module, - sect_contribs: []SectionContribEntry, - guid: [16]u8, - age: u32, - - pub const Module = struct { - mod_info: ModInfo, - module_name: []u8, - obj_file_name: []u8, - // The fields below are filled on demand. - populated: bool, - symbols: []u8, - subsect_info: []u8, - checksum_offset: ?usize, - - pub fn deinit(self: *Module, allocator: mem.Allocator) void { - allocator.free(self.module_name); - allocator.free(self.obj_file_name); - if (self.populated) { - allocator.free(self.symbols); - allocator.free(self.subsect_info); - } - } - }; - - pub fn init(allocator: mem.Allocator, path: []const u8) !Pdb { - const file = try fs.cwd().openFile(path, .{}); - errdefer file.close(); - - return Pdb{ - .in_file = file, - .allocator = allocator, - .string_table = null, - .dbi = null, - .msf = try Msf.init(allocator, file), - .modules = &[_]Module{}, - .sect_contribs = &[_]SectionContribEntry{}, - .guid = undefined, - .age = undefined, - }; - } - - pub fn deinit(self: *Pdb) void { - self.in_file.close(); - self.msf.deinit(self.allocator); - for (self.modules) |*module| { - module.deinit(self.allocator); - } - self.allocator.free(self.modules); - self.allocator.free(self.sect_contribs); - } - - pub fn parseDbiStream(self: *Pdb) !void { - var stream = self.getStream(StreamType.Dbi) orelse - return error.InvalidDebugInfo; - const reader = stream.reader(); - - const header = try reader.readStruct(DbiStreamHeader); - if (header.VersionHeader != 19990903) // V70, only value observed by LLVM team - return error.UnknownPDBVersion; - // if (header.Age != age) - // return error.UnmatchingPDB; - - const mod_info_size = header.ModInfoSize; - const section_contrib_size = header.SectionContributionSize; - - var modules = ArrayList(Module).init(self.allocator); - errdefer modules.deinit(); - - // Module Info Substream - var mod_info_offset: usize = 0; - while (mod_info_offset != mod_info_size) { - const mod_info = try reader.readStruct(ModInfo); - var this_record_len: usize = @sizeOf(ModInfo); - - const module_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); - errdefer self.allocator.free(module_name); - this_record_len += module_name.len + 1; - - const obj_file_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); - errdefer self.allocator.free(obj_file_name); - this_record_len += obj_file_name.len + 1; - - if (this_record_len % 4 != 0) { - const round_to_next_4 = (this_record_len | 0x3) + 1; - const march_forward_bytes = round_to_next_4 - this_record_len; - try stream.seekBy(@as(isize, @intCast(march_forward_bytes))); - this_record_len += march_forward_bytes; - } - - try modules.append(Module{ - .mod_info = mod_info, - .module_name = module_name, - .obj_file_name = obj_file_name, - - .populated = false, - .symbols = undefined, - .subsect_info = undefined, - .checksum_offset = null, - }); - - mod_info_offset += this_record_len; - if (mod_info_offset > mod_info_size) - return error.InvalidDebugInfo; - } - - // Section Contribution Substream - var sect_contribs = ArrayList(SectionContribEntry).init(self.allocator); - errdefer sect_contribs.deinit(); - - var sect_cont_offset: usize = 0; - if (section_contrib_size != 0) { - const version = reader.readEnum(SectionContrSubstreamVersion, .little) catch |err| switch (err) { - error.InvalidValue => return error.InvalidDebugInfo, - else => |e| return e, - }; - _ = version; - sect_cont_offset += @sizeOf(u32); - } - while (sect_cont_offset != section_contrib_size) { - const entry = try sect_contribs.addOne(); - entry.* = try reader.readStruct(SectionContribEntry); - sect_cont_offset += @sizeOf(SectionContribEntry); - - if (sect_cont_offset > section_contrib_size) - return error.InvalidDebugInfo; - } - - self.modules = try modules.toOwnedSlice(); - self.sect_contribs = try sect_contribs.toOwnedSlice(); - } - - pub fn parseInfoStream(self: *Pdb) !void { - var stream = self.getStream(StreamType.Pdb) orelse - return error.InvalidDebugInfo; - const reader = stream.reader(); - - // Parse the InfoStreamHeader. - const version = try reader.readInt(u32, .little); - const signature = try reader.readInt(u32, .little); - _ = signature; - const age = try reader.readInt(u32, .little); - const guid = try reader.readBytesNoEof(16); - - if (version != 20000404) // VC70, only value observed by LLVM team - return error.UnknownPDBVersion; - - self.guid = guid; - self.age = age; - - // Find the string table. - const string_table_index = str_tab_index: { - const name_bytes_len = try reader.readInt(u32, .little); - const name_bytes = try self.allocator.alloc(u8, name_bytes_len); - defer self.allocator.free(name_bytes); - try reader.readNoEof(name_bytes); - - const HashTableHeader = extern struct { - Size: u32, - Capacity: u32, - - fn maxLoad(cap: u32) u32 { - return cap * 2 / 3 + 1; - } - }; - const hash_tbl_hdr = try reader.readStruct(HashTableHeader); - if (hash_tbl_hdr.Capacity == 0) - return error.InvalidDebugInfo; - - if (hash_tbl_hdr.Size > HashTableHeader.maxLoad(hash_tbl_hdr.Capacity)) - return error.InvalidDebugInfo; - - const present = try readSparseBitVector(&reader, self.allocator); - defer self.allocator.free(present); - if (present.len != hash_tbl_hdr.Size) - return error.InvalidDebugInfo; - const deleted = try readSparseBitVector(&reader, self.allocator); - defer self.allocator.free(deleted); - - for (present) |_| { - const name_offset = try reader.readInt(u32, .little); - const name_index = try reader.readInt(u32, .little); - if (name_offset > name_bytes.len) - return error.InvalidDebugInfo; - const name = mem.sliceTo(name_bytes[name_offset..], 0); - if (mem.eql(u8, name, "/names")) { - break :str_tab_index name_index; - } - } - return error.MissingDebugInfo; - }; - - self.string_table = self.getStreamById(string_table_index) orelse - return error.MissingDebugInfo; - } - - pub fn getSymbolName(self: *Pdb, module: *Module, address: u64) ?[]const u8 { - _ = self; - std.debug.assert(module.populated); - - var symbol_i: usize = 0; - while (symbol_i != module.symbols.len) { - const prefix = @as(*align(1) RecordPrefix, @ptrCast(&module.symbols[symbol_i])); - if (prefix.RecordLen < 2) - return null; - switch (prefix.RecordKind) { - .S_LPROC32, .S_GPROC32 => { - const proc_sym = @as(*align(1) ProcSym, @ptrCast(&module.symbols[symbol_i + @sizeOf(RecordPrefix)])); - if (address >= proc_sym.CodeOffset and address < proc_sym.CodeOffset + proc_sym.CodeSize) { - return mem.sliceTo(@as([*:0]u8, @ptrCast(&proc_sym.Name[0])), 0); - } - }, - else => {}, - } - symbol_i += prefix.RecordLen + @sizeOf(u16); - } - - return null; - } - - pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !debug.Info.SourceLocation { - std.debug.assert(module.populated); - const subsect_info = module.subsect_info; - - var sect_offset: usize = 0; - var skip_len: usize = undefined; - const checksum_offset = module.checksum_offset orelse return error.MissingDebugInfo; - while (sect_offset != subsect_info.len) : (sect_offset += skip_len) { - const subsect_hdr = @as(*align(1) DebugSubsectionHeader, @ptrCast(&subsect_info[sect_offset])); - skip_len = subsect_hdr.Length; - sect_offset += @sizeOf(DebugSubsectionHeader); - - switch (subsect_hdr.Kind) { - .Lines => { - var line_index = sect_offset; - - const line_hdr = @as(*align(1) LineFragmentHeader, @ptrCast(&subsect_info[line_index])); - if (line_hdr.RelocSegment == 0) - return error.MissingDebugInfo; - line_index += @sizeOf(LineFragmentHeader); - const frag_vaddr_start = line_hdr.RelocOffset; - const frag_vaddr_end = frag_vaddr_start + line_hdr.CodeSize; - - if (address >= frag_vaddr_start and address < frag_vaddr_end) { - // There is an unknown number of LineBlockFragmentHeaders (and their accompanying line and column records) - // from now on. We will iterate through them, and eventually find a SourceLocation that we're interested in, - // breaking out to :subsections. If not, we will make sure to not read anything outside of this subsection. - const subsection_end_index = sect_offset + subsect_hdr.Length; - - while (line_index < subsection_end_index) { - const block_hdr = @as(*align(1) LineBlockFragmentHeader, @ptrCast(&subsect_info[line_index])); - line_index += @sizeOf(LineBlockFragmentHeader); - const start_line_index = line_index; - - const has_column = line_hdr.Flags.LF_HaveColumns; - - // All line entries are stored inside their line block by ascending start address. - // Heuristic: we want to find the last line entry - // that has a vaddr_start <= address. - // This is done with a simple linear search. - var line_i: u32 = 0; - while (line_i < block_hdr.NumLines) : (line_i += 1) { - const line_num_entry = @as(*align(1) LineNumberEntry, @ptrCast(&subsect_info[line_index])); - line_index += @sizeOf(LineNumberEntry); - - const vaddr_start = frag_vaddr_start + line_num_entry.Offset; - if (address < vaddr_start) { - break; - } - } - - // line_i == 0 would mean that no matching LineNumberEntry was found. - if (line_i > 0) { - const subsect_index = checksum_offset + block_hdr.NameIndex; - const chksum_hdr = @as(*align(1) FileChecksumEntryHeader, @ptrCast(&module.subsect_info[subsect_index])); - const strtab_offset = @sizeOf(PDBStringTableHeader) + chksum_hdr.FileNameOffset; - try self.string_table.?.seekTo(strtab_offset); - const source_file_name = try self.string_table.?.reader().readUntilDelimiterAlloc(self.allocator, 0, 1024); - - const line_entry_idx = line_i - 1; - - const column = if (has_column) blk: { - const start_col_index = start_line_index + @sizeOf(LineNumberEntry) * block_hdr.NumLines; - const col_index = start_col_index + @sizeOf(ColumnNumberEntry) * line_entry_idx; - const col_num_entry = @as(*align(1) ColumnNumberEntry, @ptrCast(&subsect_info[col_index])); - break :blk col_num_entry.StartColumn; - } else 0; - - const found_line_index = start_line_index + line_entry_idx * @sizeOf(LineNumberEntry); - const line_num_entry: *align(1) LineNumberEntry = @ptrCast(&subsect_info[found_line_index]); - const flags: *align(1) LineNumberEntry.Flags = @ptrCast(&line_num_entry.Flags); - - return debug.Info.SourceLocation{ - .file_name = source_file_name, - .line = flags.Start, - .column = column, - }; - } - } - - // Checking that we are not reading garbage after the (possibly) multiple block fragments. - if (line_index != subsection_end_index) { - return error.InvalidDebugInfo; - } - } - }, - else => {}, - } - - if (sect_offset > subsect_info.len) - return error.InvalidDebugInfo; - } - - return error.MissingDebugInfo; - } - - pub fn getModule(self: *Pdb, index: usize) !?*Module { - if (index >= self.modules.len) - return null; - - const mod = &self.modules[index]; - if (mod.populated) - return mod; - - // At most one can be non-zero. - if (mod.mod_info.C11ByteSize != 0 and mod.mod_info.C13ByteSize != 0) - return error.InvalidDebugInfo; - if (mod.mod_info.C13ByteSize == 0) - return error.InvalidDebugInfo; - - const stream = self.getStreamById(mod.mod_info.ModuleSymStream) orelse - return error.MissingDebugInfo; - const reader = stream.reader(); - - const signature = try reader.readInt(u32, .little); - if (signature != 4) - return error.InvalidDebugInfo; - - mod.symbols = try self.allocator.alloc(u8, mod.mod_info.SymByteSize - 4); - errdefer self.allocator.free(mod.symbols); - try reader.readNoEof(mod.symbols); - - mod.subsect_info = try self.allocator.alloc(u8, mod.mod_info.C13ByteSize); - errdefer self.allocator.free(mod.subsect_info); - try reader.readNoEof(mod.subsect_info); - - var sect_offset: usize = 0; - var skip_len: usize = undefined; - while (sect_offset != mod.subsect_info.len) : (sect_offset += skip_len) { - const subsect_hdr = @as(*align(1) DebugSubsectionHeader, @ptrCast(&mod.subsect_info[sect_offset])); - skip_len = subsect_hdr.Length; - sect_offset += @sizeOf(DebugSubsectionHeader); - - switch (subsect_hdr.Kind) { - .FileChecksums => { - mod.checksum_offset = sect_offset; - break; - }, - else => {}, - } - - if (sect_offset > mod.subsect_info.len) - return error.InvalidDebugInfo; - } - - mod.populated = true; - return mod; - } - - pub fn getStreamById(self: *Pdb, id: u32) ?*MsfStream { - if (id >= self.msf.streams.len) - return null; - return &self.msf.streams[id]; - } - - pub fn getStream(self: *Pdb, stream: StreamType) ?*MsfStream { - const id = @intFromEnum(stream); - return self.getStreamById(id); - } -}; - -// see https://llvm.org/docs/PDB/MsfFile.html -const Msf = struct { - directory: MsfStream, - streams: []MsfStream, - - fn init(allocator: mem.Allocator, file: File) !Msf { - const in = file.reader(); - - const superblock = try in.readStruct(SuperBlock); - - // Sanity checks - if (!mem.eql(u8, &superblock.FileMagic, SuperBlock.file_magic)) - return error.InvalidDebugInfo; - if (superblock.FreeBlockMapBlock != 1 and superblock.FreeBlockMapBlock != 2) - return error.InvalidDebugInfo; - const file_len = try file.getEndPos(); - if (superblock.NumBlocks * superblock.BlockSize != file_len) - return error.InvalidDebugInfo; - switch (superblock.BlockSize) { - // llvm only supports 4096 but we can handle any of these values - 512, 1024, 2048, 4096 => {}, - else => return error.InvalidDebugInfo, - } - - const dir_block_count = blockCountFromSize(superblock.NumDirectoryBytes, superblock.BlockSize); - if (dir_block_count > superblock.BlockSize / @sizeOf(u32)) - return error.UnhandledBigDirectoryStream; // cf. BlockMapAddr comment. - - try file.seekTo(superblock.BlockSize * superblock.BlockMapAddr); - const dir_blocks = try allocator.alloc(u32, dir_block_count); - for (dir_blocks) |*b| { - b.* = try in.readInt(u32, .little); - } - var directory = MsfStream.init( - superblock.BlockSize, - file, - dir_blocks, - ); - - const begin = directory.pos; - const stream_count = try directory.reader().readInt(u32, .little); - const stream_sizes = try allocator.alloc(u32, stream_count); - defer allocator.free(stream_sizes); - - // Microsoft's implementation uses @as(u32, -1) for inexistent streams. - // These streams are not used, but still participate in the file - // and must be taken into account when resolving stream indices. - const Nil = 0xFFFFFFFF; - for (stream_sizes) |*s| { - const size = try directory.reader().readInt(u32, .little); - s.* = if (size == Nil) 0 else blockCountFromSize(size, superblock.BlockSize); - } - - const streams = try allocator.alloc(MsfStream, stream_count); - for (streams, 0..) |*stream, i| { - const size = stream_sizes[i]; - if (size == 0) { - stream.* = MsfStream{ - .blocks = &[_]u32{}, - }; - } else { - var blocks = try allocator.alloc(u32, size); - var j: u32 = 0; - while (j < size) : (j += 1) { - const block_id = try directory.reader().readInt(u32, .little); - const n = (block_id % superblock.BlockSize); - // 0 is for SuperBlock, 1 and 2 for FPMs. - if (block_id == 0 or n == 1 or n == 2 or block_id * superblock.BlockSize > file_len) - return error.InvalidBlockIndex; - blocks[j] = block_id; - } - - stream.* = MsfStream.init( - superblock.BlockSize, - file, - blocks, - ); - } - } - - const end = directory.pos; - if (end - begin != superblock.NumDirectoryBytes) - return error.InvalidStreamDirectory; - - return Msf{ - .directory = directory, - .streams = streams, - }; - } - - fn deinit(self: *Msf, allocator: mem.Allocator) void { - allocator.free(self.directory.blocks); - for (self.streams) |*stream| { - allocator.free(stream.blocks); - } - allocator.free(self.streams); - } -}; - -fn blockCountFromSize(size: u32, block_size: u32) u32 { - return (size + block_size - 1) / block_size; -} - // https://llvm.org/docs/PDB/MsfFile.html#the-superblock pub const SuperBlock = extern struct { /// The LLVM docs list a space between C / C++ but empirically this is not the case. @@ -1016,82 +510,3 @@ pub const SuperBlock = extern struct { // implement it so we're kind of safe making this assumption for now. BlockMapAddr: u32, }; - -const MsfStream = struct { - in_file: File = undefined, - pos: u64 = undefined, - blocks: []u32 = undefined, - block_size: u32 = undefined, - - pub const Error = @typeInfo(@typeInfo(@TypeOf(read)).Fn.return_type.?).ErrorUnion.error_set; - - fn init(block_size: u32, file: File, blocks: []u32) MsfStream { - const stream = MsfStream{ - .in_file = file, - .pos = 0, - .blocks = blocks, - .block_size = block_size, - }; - - return stream; - } - - fn read(self: *MsfStream, buffer: []u8) !usize { - var block_id = @as(usize, @intCast(self.pos / self.block_size)); - if (block_id >= self.blocks.len) return 0; // End of Stream - var block = self.blocks[block_id]; - var offset = self.pos % self.block_size; - - try self.in_file.seekTo(block * self.block_size + offset); - const in = self.in_file.reader(); - - var size: usize = 0; - var rem_buffer = buffer; - while (size < buffer.len) { - const size_to_read = @min(self.block_size - offset, rem_buffer.len); - size += try in.read(rem_buffer[0..size_to_read]); - rem_buffer = buffer[size..]; - offset += size_to_read; - - // If we're at the end of a block, go to the next one. - if (offset == self.block_size) { - offset = 0; - block_id += 1; - if (block_id >= self.blocks.len) break; // End of Stream - block = self.blocks[block_id]; - try self.in_file.seekTo(block * self.block_size); - } - } - - self.pos += buffer.len; - return buffer.len; - } - - pub fn seekBy(self: *MsfStream, len: i64) !void { - self.pos = @as(u64, @intCast(@as(i64, @intCast(self.pos)) + len)); - if (self.pos >= self.blocks.len * self.block_size) - return error.EOF; - } - - pub fn seekTo(self: *MsfStream, len: u64) !void { - self.pos = len; - if (self.pos >= self.blocks.len * self.block_size) - return error.EOF; - } - - fn getSize(self: *const MsfStream) u64 { - return self.blocks.len * self.block_size; - } - - fn getFilePos(self: MsfStream) u64 { - const block_id = self.pos / self.block_size; - const block = self.blocks[block_id]; - const offset = self.pos % self.block_size; - - return block * self.block_size + offset; - } - - pub fn reader(self: *MsfStream) std.io.Reader(*MsfStream, Error, read) { - return .{ .context = self }; - } -};