From 658a02c24f452dd70df35e44444c90a696cc3f65 Mon Sep 17 00:00:00 2001 From: Alex Borcan Date: Fri, 17 Jan 2025 11:33:20 -0800 Subject: [PATCH 1/8] [DebugInfo] Add option to use DW_AT_LLVM_stmt_sequence in line table lookups --- .../llvm/DebugInfo/DWARF/DWARFDebugLine.h | 43 +++++- llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 37 ++++- .../DebugInfo/DWARF/DWARFDebugLineTest.cpp | 133 +++++++++++++++++- 3 files changed, 201 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index ff7bf87d8e6b5..732a4bfc28ab3 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -209,6 +209,9 @@ class DWARFDebugLine { unsigned LastRowIndex; bool Empty; + /// The offset into the line table where this sequence begins + uint64_t Offset = 0; + void reset(); static bool orderByHighPC(const Sequence &LHS, const Sequence &RHS) { @@ -243,8 +246,20 @@ class DWARFDebugLine { uint32_t lookupAddress(object::SectionedAddress Address, bool *IsApproximateLine = nullptr) const; + /// Fills the Result argument with the indices of the rows that correspond + /// to the address range specified by \p Address and \p Size. + /// + /// \param Address - The starting address of the range. + /// \param Size - The size of the address range. + /// \param Result - The vector to fill with row indices. + /// \param Die - if provided, the function will check for a + /// DW_AT_LLVM_stmt_sequence attribute. If present, only rows from the + /// sequence starting at the matching offset will be added to the result. + /// + /// Returns true if any rows were found. bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size, - std::vector &Result) const; + std::vector &Result, + const DWARFDie *Die = nullptr) const; bool hasFileAtIndex(uint64_t FileIndex) const { return Prologue.hasFileAtIndex(FileIndex); @@ -305,8 +320,20 @@ class DWARFDebugLine { uint32_t lookupAddressImpl(object::SectionedAddress Address, bool *IsApproximateLine = nullptr) const; - bool lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size, - std::vector &Result) const; + /// Fills the Result argument with the indices of the rows that correspond + /// to the address range specified by \p Address and \p Size. + /// + /// \param Address - The starting address of the range. + /// \param Size - The size of the address range. + /// \param Result - The vector to fill with row indices. + /// \param StmtSequenceOffset - if provided, only rows from the sequence + /// starting at the matching offset will be added to the result. + /// + /// Returns true if any rows were found. + bool + lookupAddressRangeImpl(object::SectionedAddress Address, uint64_t Size, + std::vector &Result, + std::optional StmtSequenceOffset) const; }; const LineTable *getLineTable(uint64_t Offset) const; @@ -377,7 +404,15 @@ class DWARFDebugLine { function_ref ErrorHandler); void resetRowAndSequence(); - void appendRowToMatrix(); + + /// Append the current Row to the LineTable's matrix of rows and update the + /// current Sequence information. + /// + /// \param LineTableOffset - the offset into the line table where the + /// current sequence of rows begins. This offset is stored in the Sequence + /// to allow filtering rows based on their originating sequence when a + /// DW_AT_LLVM_stmt_sequence attribute is present. + void appendRowToMatrix(uint64_t LineTableOffset); struct AddrOpIndexDelta { uint64_t AddrOffset; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 939a5163d55ab..27b4c6a548912 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -570,8 +570,9 @@ void DWARFDebugLine::ParsingState::resetRowAndSequence() { Sequence.reset(); } -void DWARFDebugLine::ParsingState::appendRowToMatrix() { +void DWARFDebugLine::ParsingState::appendRowToMatrix(uint64_t LineTableOffset) { unsigned RowNumber = LineTable->Rows.size(); + Sequence.Offset = LineTableOffset; if (Sequence.Empty) { // Record the beginning of instruction sequence. Sequence.Empty = false; @@ -848,6 +849,8 @@ Error DWARFDebugLine::LineTable::parse( *OS << '\n'; Row::dumpTableHeader(*OS, /*Indent=*/Verbose ? 12 : 0); } + uint64_t LineTableSeqOffset = *OffsetPtr; + bool TombstonedAddress = false; auto EmitRow = [&] { if (!TombstonedAddress) { @@ -857,7 +860,7 @@ Error DWARFDebugLine::LineTable::parse( } if (OS) State.Row.dump(*OS); - State.appendRowToMatrix(); + State.appendRowToMatrix(LineTableSeqOffset); } }; while (*OffsetPtr < EndOffset) { @@ -913,6 +916,9 @@ Error DWARFDebugLine::LineTable::parse( // followed. EmitRow(); State.resetRowAndSequence(); + // Cursor now points to right after the end_sequence opcode - so points + // to the start of the next sequence - if one exists. + LineTableSeqOffset = Cursor.tell(); break; case DW_LNE_set_address: @@ -1364,10 +1370,18 @@ DWARFDebugLine::LineTable::lookupAddressImpl(object::SectionedAddress Address, bool DWARFDebugLine::LineTable::lookupAddressRange( object::SectionedAddress Address, uint64_t Size, - std::vector &Result) const { + std::vector &Result, const DWARFDie *Die) const { + + // If DIE is provided, check for DW_AT_LLVM_stmt_sequence + std::optional StmtSequenceOffset; + if (Die) { + if (auto StmtSeqAttr = Die->find(dwarf::DW_AT_LLVM_stmt_sequence)) { + StmtSequenceOffset = toSectionOffset(StmtSeqAttr); + } + } // Search for relocatable addresses - if (lookupAddressRangeImpl(Address, Size, Result)) + if (lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset)) return true; if (Address.SectionIndex == object::SectionedAddress::UndefSection) @@ -1375,12 +1389,13 @@ bool DWARFDebugLine::LineTable::lookupAddressRange( // Search for absolute addresses Address.SectionIndex = object::SectionedAddress::UndefSection; - return lookupAddressRangeImpl(Address, Size, Result); + return lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset); } bool DWARFDebugLine::LineTable::lookupAddressRangeImpl( object::SectionedAddress Address, uint64_t Size, - std::vector &Result) const { + std::vector &Result, + std::optional StmtSequenceOffset) const { if (Sequences.empty()) return false; uint64_t EndAddr = Address.Address + Size; @@ -1401,6 +1416,14 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl( while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) { const DWARFDebugLine::Sequence &CurSeq = *SeqPos; + + // Skip sequences that don't match our stmt_sequence offset if one was + // provided + if (StmtSequenceOffset && CurSeq.Offset != *StmtSequenceOffset) { + ++SeqPos; + continue; + } + // For the first sequence, we need to find which row in the sequence is the // first in our range. uint32_t FirstRowIndex = CurSeq.FirstRowIndex; @@ -1423,7 +1446,7 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl( ++SeqPos; } - return true; + return !Result.empty(); } std::optional diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp index e549128031744..12ec63d10449f 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "DwarfGenerator.h" #include "DwarfUtils.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Testing/Support/Error.h" #include "gtest/gtest.h" @@ -2035,4 +2035,135 @@ TEST_F(DebugLineBasicFixture, PrintPathsProperly) { EXPECT_THAT(Result.c_str(), MatchesRegex("a dir.b dir.b file")); } +/// Test that lookupAddressRange correctly filters rows based on +/// DW_AT_LLVM_stmt_sequence. +/// +/// This test verifies that: +/// 1. When a DIE has a DW_AT_LLVM_stmt_sequence attribute, lookupAddressRange +/// only returns rows from the sequence starting at the specified offset +/// 2. When a DIE has an invalid DW_AT_LLVM_stmt_sequence offset, no rows are +/// returned +/// 3. When no DW_AT_LLVM_stmt_sequence is present, all matching rows are +/// returned +/// +/// The test creates a line table with two sequences at the same address range +/// but different line numbers. It then creates three subprogram DIEs: +/// - One with DW_AT_LLVM_stmt_sequence pointing to the first sequence +/// - One with DW_AT_LLVM_stmt_sequence pointing to the second sequence +/// - One with an invalid DW_AT_LLVM_stmt_sequence offset +TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) { + if (!setupGenerator()) + GTEST_SKIP(); + + // Create new DWARF with the subprogram DIE + dwarfgen::CompileUnit &CU = Gen->addCompileUnit(); + dwarfgen::DIE CUDie = CU.getUnitDIE(); + + CUDie.addAttribute(DW_AT_name, DW_FORM_strp, "/tmp/main.c"); + CUDie.addAttribute(DW_AT_language, DW_FORM_data2, DW_LANG_C); + + dwarfgen::DIE SD1 = CUDie.addChild(DW_TAG_subprogram); + SD1.addAttribute(DW_AT_name, DW_FORM_strp, "sub1"); + SD1.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U); + SD1.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U); + // DW_AT_LLVM_stmt_sequence points to the first sequence + SD1.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x2e); + + dwarfgen::DIE SD2 = CUDie.addChild(DW_TAG_subprogram); + SD2.addAttribute(DW_AT_name, DW_FORM_strp, "sub2"); + SD2.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U); + SD2.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U); + // DW_AT_LLVM_stmt_sequence points to the second sequence + SD2.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x42); + + dwarfgen::DIE SD3 = CUDie.addChild(DW_TAG_subprogram); + SD3.addAttribute(DW_AT_name, DW_FORM_strp, "sub3"); + SD3.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U); + SD3.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U); + // Invalid DW_AT_LLVM_stmt_sequence + SD3.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x66); + + // Create a line table with multiple sequences + LineTable < = Gen->addLineTable(); + + // First sequence with addresses 0x1000(Ln100), 0x1004(Ln101) + LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}}); + LT.addStandardOpcode(DW_LNS_set_prologue_end, {}); + LT.addStandardOpcode(DW_LNS_advance_line, {{99, LineTable::SLEB}}); + LT.addStandardOpcode(DW_LNS_copy, {}); + LT.addByte(0x4b); // Special opcode: address += 4, line += 1 + LT.addExtendedOpcode(1, DW_LNE_end_sequence, {}); + + // Second sequence with addresses 0x1000(Ln200), 0x1004(Ln201) + LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}}); + LT.addStandardOpcode(DW_LNS_set_prologue_end, {}); + LT.addStandardOpcode(DW_LNS_advance_line, {{199, LineTable::SLEB}}); + LT.addStandardOpcode(DW_LNS_copy, {}); + LT.addByte(0x4b); // Special opcode: address += 4, line += 1 + LT.addExtendedOpcode(1, DW_LNE_end_sequence, {}); + + // Generate the DWARF + generate(); + + // Parse the line table to get the sequence offset + auto ExpectedLineTable = Line.getOrParseLineTable( + LineData, /*Offset=*/0, *Context, nullptr, RecordRecoverable); + ASSERT_THAT_EXPECTED(ExpectedLineTable, Succeeded()); + const auto *Table = *ExpectedLineTable; + + uint32_t NumCUs = Context->getNumCompileUnits(); + ASSERT_EQ(NumCUs, 1u); + DWARFUnit *Unit = Context->getUnitAtIndex(0); + auto DwarfCUDie = Unit->getUnitDIE(false); + + auto Sub1Die = DwarfCUDie.getFirstChild(); + auto Sub2Die = Sub1Die.getSibling(); + auto Sub3Die = Sub2Die.getSibling(); + + // Verify Sub1Die is the DIE generated from SD1 + auto NameAttr1 = Sub1Die.find(DW_AT_name); + EXPECT_STREQ(*dwarf::toString(*NameAttr1), "sub1"); + + // Verify Sub2Die is the DIE generated from SD2 + auto NameAttr2 = Sub2Die.find(DW_AT_name); + EXPECT_STREQ(*dwarf::toString(*NameAttr2), "sub2"); + + // Verify Sub2Die is the DIE generated from SD3 + auto NameAttr3 = Sub3Die.find(DW_AT_name); + EXPECT_STREQ(*dwarf::toString(*NameAttr3), "sub3"); + + // Ensure there are two sequences + ASSERT_EQ(Table->Sequences.size(), 2u); + + // Lookup addresses in the first sequence with the second sequence's filter + { + std::vector Rows; + bool Found; + + // Look up using Sub3Die, which has an invalid DW_AT_LLVM_stmt_sequence + Found = Table->lookupAddressRange( + {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows, + &Sub3Die); + EXPECT_FALSE(Found); + + // Look up using Sub1Die, which has a valid DW_AT_LLVM_stmt_sequence and + // should return row 0 + Found = Table->lookupAddressRange( + {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows, + &Sub1Die); + EXPECT_TRUE(Found); + ASSERT_EQ(Rows.size(), 1u); + EXPECT_EQ(Rows[0], 0); + + // Look up using Sub2Die, which has a valid DW_AT_LLVM_stmt_sequence and + // should return row 3 + Rows.clear(); + Found = Table->lookupAddressRange( + {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows, + &Sub2Die); + EXPECT_TRUE(Found); + ASSERT_EQ(Rows.size(), 1u); + EXPECT_EQ(Rows[0], 3); + } +} } // end anonymous namespace From ad1d1d0a606f35466c8fdf74a00d7363d2a50442 Mon Sep 17 00:00:00 2001 From: Alex Borcan Date: Tue, 21 Jan 2025 15:39:36 -0800 Subject: [PATCH 2/8] Address Comments #1 --- .../llvm/DebugInfo/DWARF/DWARFDebugLine.h | 16 +++++---------- llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 20 +++++++++++-------- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index 732a4bfc28ab3..734a00801457b 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -210,7 +210,7 @@ class DWARFDebugLine { bool Empty; /// The offset into the line table where this sequence begins - uint64_t Offset = 0; + uint64_t StmtSeqOffset = UINT64_MAX; void reset(); @@ -227,6 +227,8 @@ class DWARFDebugLine { return SectionIndex == PC.SectionIndex && (LowPC <= PC.Address && PC.Address < HighPC); } + + void SetSequenceOffset(uint64_t Offset) { StmtSeqOffset = Offset; } }; struct LineTable { @@ -403,16 +405,8 @@ class DWARFDebugLine { ParsingState(struct LineTable *LT, uint64_t TableOffset, function_ref ErrorHandler); - void resetRowAndSequence(); - - /// Append the current Row to the LineTable's matrix of rows and update the - /// current Sequence information. - /// - /// \param LineTableOffset - the offset into the line table where the - /// current sequence of rows begins. This offset is stored in the Sequence - /// to allow filtering rows based on their originating sequence when a - /// DW_AT_LLVM_stmt_sequence attribute is present. - void appendRowToMatrix(uint64_t LineTableOffset); + void resetRowAndSequence(uint64_t Offset = UINT64_MAX); + void appendRowToMatrix(); struct AddrOpIndexDelta { uint64_t AddrOffset; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 27b4c6a548912..dd91ef04eca83 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -531,6 +531,7 @@ void DWARFDebugLine::Sequence::reset() { FirstRowIndex = 0; LastRowIndex = 0; Empty = true; + StmtSeqOffset = UINT64_MAX; } DWARFDebugLine::LineTable::LineTable() { clear(); } @@ -565,14 +566,16 @@ DWARFDebugLine::ParsingState::ParsingState( resetRowAndSequence(); } -void DWARFDebugLine::ParsingState::resetRowAndSequence() { +void DWARFDebugLine::ParsingState::resetRowAndSequence(uint64_t Offset) { Row.reset(LineTable->Prologue.DefaultIsStmt); Sequence.reset(); + if (Offset != UINT64_MAX) { + Sequence.SetSequenceOffset(Offset); + } } -void DWARFDebugLine::ParsingState::appendRowToMatrix(uint64_t LineTableOffset) { +void DWARFDebugLine::ParsingState::appendRowToMatrix() { unsigned RowNumber = LineTable->Rows.size(); - Sequence.Offset = LineTableOffset; if (Sequence.Empty) { // Record the beginning of instruction sequence. Sequence.Empty = false; @@ -849,7 +852,9 @@ Error DWARFDebugLine::LineTable::parse( *OS << '\n'; Row::dumpTableHeader(*OS, /*Indent=*/Verbose ? 12 : 0); } - uint64_t LineTableSeqOffset = *OffsetPtr; + // *OffsetPtr points to the end of the prologue - i.e. the start of the first + // sequence. So initialize the first sequence offset accordingly. + State.Sequence.SetSequenceOffset(*OffsetPtr); bool TombstonedAddress = false; auto EmitRow = [&] { @@ -860,7 +865,7 @@ Error DWARFDebugLine::LineTable::parse( } if (OS) State.Row.dump(*OS); - State.appendRowToMatrix(LineTableSeqOffset); + State.appendRowToMatrix(); } }; while (*OffsetPtr < EndOffset) { @@ -915,10 +920,9 @@ Error DWARFDebugLine::LineTable::parse( // into this code path - if it were invalid, the default case would be // followed. EmitRow(); - State.resetRowAndSequence(); // Cursor now points to right after the end_sequence opcode - so points // to the start of the next sequence - if one exists. - LineTableSeqOffset = Cursor.tell(); + State.resetRowAndSequence(Cursor.tell()); break; case DW_LNE_set_address: @@ -1419,7 +1423,7 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl( // Skip sequences that don't match our stmt_sequence offset if one was // provided - if (StmtSequenceOffset && CurSeq.Offset != *StmtSequenceOffset) { + if (StmtSequenceOffset && CurSeq.StmtSeqOffset != *StmtSequenceOffset) { ++SeqPos; continue; } From cc8df97c0619b1456a371e132642f1c08e7a7dba Mon Sep 17 00:00:00 2001 From: Alex B Date: Tue, 21 Jan 2025 17:58:51 -0800 Subject: [PATCH 3/8] Fix failing test --- .../unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp index 12ec63d10449f..002e105daeb92 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp @@ -2059,25 +2059,25 @@ TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) { dwarfgen::CompileUnit &CU = Gen->addCompileUnit(); dwarfgen::DIE CUDie = CU.getUnitDIE(); - CUDie.addAttribute(DW_AT_name, DW_FORM_strp, "/tmp/main.c"); + CUDie.addAttribute(DW_AT_name, DW_FORM_string, "/tmp/main.c"); CUDie.addAttribute(DW_AT_language, DW_FORM_data2, DW_LANG_C); dwarfgen::DIE SD1 = CUDie.addChild(DW_TAG_subprogram); - SD1.addAttribute(DW_AT_name, DW_FORM_strp, "sub1"); + SD1.addAttribute(DW_AT_name, DW_FORM_string, "sub1"); SD1.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U); SD1.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U); // DW_AT_LLVM_stmt_sequence points to the first sequence SD1.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x2e); dwarfgen::DIE SD2 = CUDie.addChild(DW_TAG_subprogram); - SD2.addAttribute(DW_AT_name, DW_FORM_strp, "sub2"); + SD2.addAttribute(DW_AT_name, DW_FORM_string, "sub2"); SD2.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U); SD2.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U); // DW_AT_LLVM_stmt_sequence points to the second sequence SD2.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x42); dwarfgen::DIE SD3 = CUDie.addChild(DW_TAG_subprogram); - SD3.addAttribute(DW_AT_name, DW_FORM_strp, "sub3"); + SD3.addAttribute(DW_AT_name, DW_FORM_string, "sub3"); SD3.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U); SD3.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U); // Invalid DW_AT_LLVM_stmt_sequence @@ -2153,7 +2153,7 @@ TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) { &Sub1Die); EXPECT_TRUE(Found); ASSERT_EQ(Rows.size(), 1u); - EXPECT_EQ(Rows[0], 0); + EXPECT_EQ(Rows[0], 0U); // Look up using Sub2Die, which has a valid DW_AT_LLVM_stmt_sequence and // should return row 3 @@ -2163,7 +2163,7 @@ TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) { &Sub2Die); EXPECT_TRUE(Found); ASSERT_EQ(Rows.size(), 1u); - EXPECT_EQ(Rows[0], 3); + EXPECT_EQ(Rows[0], 3u); } } } // end anonymous namespace From 88a816f2971d4c90a00a8a06cdf7bc77d539b569 Mon Sep 17 00:00:00 2001 From: Alex Borcan Date: Thu, 23 Jan 2025 15:19:01 -0800 Subject: [PATCH 4/8] Dont pass DIE, pass offset --- .../llvm/DebugInfo/DWARF/DWARFDebugLine.h | 12 +++++------ llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 11 ++-------- .../DebugInfo/DWARF/DWARFDebugLineTest.cpp | 20 +++++++++++-------- 3 files changed, 20 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index 734a00801457b..5694cbdde6fdb 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -254,14 +254,14 @@ class DWARFDebugLine { /// \param Address - The starting address of the range. /// \param Size - The size of the address range. /// \param Result - The vector to fill with row indices. - /// \param Die - if provided, the function will check for a - /// DW_AT_LLVM_stmt_sequence attribute. If present, only rows from the - /// sequence starting at the matching offset will be added to the result. + /// \param StmtSequenceOffset - if provided, only rows from the sequence + /// starting at the matching offset will be added to the result. /// /// Returns true if any rows were found. - bool lookupAddressRange(object::SectionedAddress Address, uint64_t Size, - std::vector &Result, - const DWARFDie *Die = nullptr) const; + bool lookupAddressRange( + object::SectionedAddress Address, uint64_t Size, + std::vector &Result, + std::optional StmtSequenceOffset = std::nullopt) const; bool hasFileAtIndex(uint64_t FileIndex) const { return Prologue.hasFileAtIndex(FileIndex); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index dd91ef04eca83..eb5181276a37a 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -1374,15 +1374,8 @@ DWARFDebugLine::LineTable::lookupAddressImpl(object::SectionedAddress Address, bool DWARFDebugLine::LineTable::lookupAddressRange( object::SectionedAddress Address, uint64_t Size, - std::vector &Result, const DWARFDie *Die) const { - - // If DIE is provided, check for DW_AT_LLVM_stmt_sequence - std::optional StmtSequenceOffset; - if (Die) { - if (auto StmtSeqAttr = Die->find(dwarf::DW_AT_LLVM_stmt_sequence)) { - StmtSequenceOffset = toSectionOffset(StmtSeqAttr); - } - } + std::vector &Result, + std::optional StmtSequenceOffset) const { // Search for relocatable addresses if (lookupAddressRangeImpl(Address, Size, Result, StmtSequenceOffset)) diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp index 002e105daeb92..4c0116ad32b1f 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp @@ -2140,27 +2140,31 @@ TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) { std::vector Rows; bool Found; - // Look up using Sub3Die, which has an invalid DW_AT_LLVM_stmt_sequence + // Look up using Sub3Die's invalid stmt_sequence offset + auto StmtSeqAttr3 = Sub3Die.find(dwarf::DW_AT_LLVM_stmt_sequence); + ASSERT_TRUE(StmtSeqAttr3); Found = Table->lookupAddressRange( {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows, - &Sub3Die); + toSectionOffset(StmtSeqAttr3)); EXPECT_FALSE(Found); - // Look up using Sub1Die, which has a valid DW_AT_LLVM_stmt_sequence and - // should return row 0 + // Look up using Sub1Die's valid stmt_sequence offset + auto StmtSeqAttr1 = Sub1Die.find(dwarf::DW_AT_LLVM_stmt_sequence); + ASSERT_TRUE(StmtSeqAttr1); Found = Table->lookupAddressRange( {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows, - &Sub1Die); + toSectionOffset(StmtSeqAttr1)); EXPECT_TRUE(Found); ASSERT_EQ(Rows.size(), 1u); EXPECT_EQ(Rows[0], 0U); - // Look up using Sub2Die, which has a valid DW_AT_LLVM_stmt_sequence and - // should return row 3 + // Look up using Sub2Die's valid stmt_sequence offset Rows.clear(); + auto StmtSeqAttr2 = Sub2Die.find(dwarf::DW_AT_LLVM_stmt_sequence); + ASSERT_TRUE(StmtSeqAttr2); Found = Table->lookupAddressRange( {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows, - &Sub2Die); + toSectionOffset(StmtSeqAttr2)); EXPECT_TRUE(Found); ASSERT_EQ(Rows.size(), 1u); EXPECT_EQ(Rows[0], 3u); From 7aa8caeec4133979313ddbc34c4f1a87aa0df81d Mon Sep 17 00:00:00 2001 From: Alex Borcan Date: Wed, 29 Jan 2025 05:28:23 -0800 Subject: [PATCH 5/8] Address feedback nr.2 --- llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h | 2 +- llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index 5694cbdde6fdb..b1bfc139119af 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -405,7 +405,7 @@ class DWARFDebugLine { ParsingState(struct LineTable *LT, uint64_t TableOffset, function_ref ErrorHandler); - void resetRowAndSequence(uint64_t Offset = UINT64_MAX); + void resetRowAndSequence(uint64_t Offset); void appendRowToMatrix(); struct AddrOpIndexDelta { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index eb5181276a37a..ebc1e28fa0294 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -562,16 +562,12 @@ void DWARFDebugLine::LineTable::clear() { DWARFDebugLine::ParsingState::ParsingState( struct LineTable *LT, uint64_t TableOffset, function_ref ErrorHandler) - : LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) { - resetRowAndSequence(); -} + : LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) {} void DWARFDebugLine::ParsingState::resetRowAndSequence(uint64_t Offset) { Row.reset(LineTable->Prologue.DefaultIsStmt); Sequence.reset(); - if (Offset != UINT64_MAX) { - Sequence.SetSequenceOffset(Offset); - } + Sequence.SetSequenceOffset(Offset); } void DWARFDebugLine::ParsingState::appendRowToMatrix() { @@ -854,7 +850,7 @@ Error DWARFDebugLine::LineTable::parse( } // *OffsetPtr points to the end of the prologue - i.e. the start of the first // sequence. So initialize the first sequence offset accordingly. - State.Sequence.SetSequenceOffset(*OffsetPtr); + State.resetRowAndSequence(*OffsetPtr); bool TombstonedAddress = false; auto EmitRow = [&] { From 16e8e204ac986a45f7778e624e05e19e44224098 Mon Sep 17 00:00:00 2001 From: Alex Borcan Date: Tue, 4 Feb 2025 09:07:55 -0800 Subject: [PATCH 6/8] Address Feedback Nr.3 --- .../llvm/DebugInfo/DWARF/DWARFDebugLine.h | 2 - llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 99 +++++++---- .../DebugInfo/DWARF/DWARFDebugLineTest.cpp | 160 ++++++++---------- 3 files changed, 136 insertions(+), 125 deletions(-) diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index b1bfc139119af..61f17a27b3d28 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -227,8 +227,6 @@ class DWARFDebugLine { return SectionIndex == PC.SectionIndex && (LowPC <= PC.Address && PC.Address < HighPC); } - - void SetSequenceOffset(uint64_t Offset) { StmtSeqOffset = Offset; } }; struct LineTable { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index ebc1e28fa0294..606635da2fec6 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -567,7 +567,7 @@ DWARFDebugLine::ParsingState::ParsingState( void DWARFDebugLine::ParsingState::resetRowAndSequence(uint64_t Offset) { Row.reset(LineTable->Prologue.DefaultIsStmt); Sequence.reset(); - Sequence.SetSequenceOffset(Offset); + Sequence.StmtSeqOffset = Offset; } void DWARFDebugLine::ParsingState::appendRowToMatrix() { @@ -1391,51 +1391,86 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl( std::optional StmtSequenceOffset) const { if (Sequences.empty()) return false; - uint64_t EndAddr = Address.Address + Size; - // First, find an instruction sequence containing the given address. - DWARFDebugLine::Sequence Sequence; - Sequence.SectionIndex = Address.SectionIndex; - Sequence.HighPC = Address.Address; - SequenceIter LastSeq = Sequences.end(); - SequenceIter SeqPos = llvm::upper_bound( - Sequences, Sequence, DWARFDebugLine::Sequence::orderByHighPC); - if (SeqPos == LastSeq || !SeqPos->containsPC(Address)) - return false; - SequenceIter StartPos = SeqPos; + const uint64_t EndAddr = Address.Address + Size; - // Add the rows from the first sequence to the vector, starting with the - // index we just calculated - - while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) { - const DWARFDebugLine::Sequence &CurSeq = *SeqPos; - - // Skip sequences that don't match our stmt_sequence offset if one was - // provided - if (StmtSequenceOffset && CurSeq.StmtSeqOffset != *StmtSequenceOffset) { - ++SeqPos; - continue; - } + // Helper: Given a sequence and a starting address, find the subset + // of rows within [Address, EndAddr) and append them to `Result`. + auto addRowsFromSequence = [&](const Sequence &Seq, + object::SectionedAddress StartAddr, + uint64_t EndAddress, bool IsFirstSequence) { + // If this sequence does not intersect our [StartAddr, EndAddress) range, + // do nothing. + if (Seq.HighPC <= StartAddr.Address || Seq.LowPC >= EndAddress) + return; - // For the first sequence, we need to find which row in the sequence is the - // first in our range. - uint32_t FirstRowIndex = CurSeq.FirstRowIndex; - if (SeqPos == StartPos) - FirstRowIndex = findRowInSeq(CurSeq, Address); + // For the "first" sequence in the search, we must figure out which row + // actually starts within our address range. + uint32_t FirstRowIndex = Seq.FirstRowIndex; + if (IsFirstSequence) + FirstRowIndex = findRowInSeq(Seq, StartAddr); - // Figure out the last row in the range. + // Similarly, compute the last row that is within [StartAddr, EndAddress). uint32_t LastRowIndex = - findRowInSeq(CurSeq, {EndAddr - 1, Address.SectionIndex}); + findRowInSeq(Seq, {EndAddress - 1, StartAddr.SectionIndex}); if (LastRowIndex == UnknownRowIndex) - LastRowIndex = CurSeq.LastRowIndex - 1; + LastRowIndex = Seq.LastRowIndex - 1; assert(FirstRowIndex != UnknownRowIndex); assert(LastRowIndex != UnknownRowIndex); + // Append all row indices in [FirstRowIndex, LastRowIndex]. for (uint32_t I = FirstRowIndex; I <= LastRowIndex; ++I) { Result.push_back(I); } + }; + + // If a stmt_sequence_offset was provided, do a binary search to find the + // single sequence that matches that offset. Then add only its relevant rows. + if (StmtSequenceOffset) { + // Binary-search the Sequences by their StmtSeqOffset: + auto It = llvm::lower_bound(Sequences, *StmtSequenceOffset, + [](const Sequence &Seq, uint64_t OffsetVal) { + return Seq.StmtSeqOffset < OffsetVal; + }); + + // If not found or mismatched, there’s no match to return. + if (It == Sequences.end() || It->StmtSeqOffset != *StmtSequenceOffset) + return false; + + // Now add rows from the discovered sequence if it intersects [Address, + // EndAddr). + addRowsFromSequence(*It, Address, EndAddr, /*IsFirstSequence=*/true); + return !Result.empty(); + } + // Otherwise, fall back to logic of walking sequences by Address. + // We first find a sequence containing `Address` (via upper_bound on HighPC), + // then proceed forward through overlapping sequences in ascending order. + + // Construct a dummy Sequence to find where `Address` fits by HighPC. + DWARFDebugLine::Sequence SearchSeq; + SearchSeq.SectionIndex = Address.SectionIndex; + SearchSeq.HighPC = Address.Address; + + auto LastSeq = Sequences.end(); + auto SeqPos = llvm::upper_bound(Sequences, SearchSeq, + DWARFDebugLine::Sequence::orderByHighPC); + if (SeqPos == LastSeq || !SeqPos->containsPC(Address)) + return false; + + // This marks the first sequence we found that might contain Address. + const auto StartPos = SeqPos; + + // Walk forward until sequences no longer intersect [Address, EndAddr). + while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) { + // Add rows only if the sequence is in the same section: + if (SeqPos->SectionIndex == Address.SectionIndex) { + // For the very first sequence, we need the row that lines up with + // `Address`. + bool IsFirst = (SeqPos == StartPos); + addRowsFromSequence(*SeqPos, Address, EndAddr, IsFirst); + } ++SeqPos; } diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp index 4c0116ad32b1f..2fe52600df923 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp @@ -2036,138 +2036,116 @@ TEST_F(DebugLineBasicFixture, PrintPathsProperly) { } /// Test that lookupAddressRange correctly filters rows based on -/// DW_AT_LLVM_stmt_sequence. +/// a statement-sequence offset (simulating DW_AT_LLVM_stmt_sequence). /// /// This test verifies that: -/// 1. When a DIE has a DW_AT_LLVM_stmt_sequence attribute, lookupAddressRange -/// only returns rows from the sequence starting at the specified offset -/// 2. When a DIE has an invalid DW_AT_LLVM_stmt_sequence offset, no rows are -/// returned -/// 3. When no DW_AT_LLVM_stmt_sequence is present, all matching rows are -/// returned +/// 1. When a statement-sequence offset is provided, lookupAddressRange +/// only returns rows from the sequence starting at that offset. +/// 2. When an invalid statement-sequence offset is provided, no rows +/// are returned. +/// 3. When no statement-sequence offset is provided, all matching rows +/// in the table are returned. /// -/// The test creates a line table with two sequences at the same address range -/// but different line numbers. It then creates three subprogram DIEs: -/// - One with DW_AT_LLVM_stmt_sequence pointing to the first sequence -/// - One with DW_AT_LLVM_stmt_sequence pointing to the second sequence -/// - One with an invalid DW_AT_LLVM_stmt_sequence offset +/// We build a line table with two sequences at the same address range +/// but different line numbers. Then we try lookups with various statement- +/// sequence offsets to check the filtering logic. TEST_F(DebugLineBasicFixture, LookupAddressRangeWithStmtSequenceOffset) { if (!setupGenerator()) GTEST_SKIP(); - // Create new DWARF with the subprogram DIE - dwarfgen::CompileUnit &CU = Gen->addCompileUnit(); - dwarfgen::DIE CUDie = CU.getUnitDIE(); - - CUDie.addAttribute(DW_AT_name, DW_FORM_string, "/tmp/main.c"); - CUDie.addAttribute(DW_AT_language, DW_FORM_data2, DW_LANG_C); - - dwarfgen::DIE SD1 = CUDie.addChild(DW_TAG_subprogram); - SD1.addAttribute(DW_AT_name, DW_FORM_string, "sub1"); - SD1.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U); - SD1.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U); - // DW_AT_LLVM_stmt_sequence points to the first sequence - SD1.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x2e); - - dwarfgen::DIE SD2 = CUDie.addChild(DW_TAG_subprogram); - SD2.addAttribute(DW_AT_name, DW_FORM_string, "sub2"); - SD2.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U); - SD2.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U); - // DW_AT_LLVM_stmt_sequence points to the second sequence - SD2.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x42); - - dwarfgen::DIE SD3 = CUDie.addChild(DW_TAG_subprogram); - SD3.addAttribute(DW_AT_name, DW_FORM_string, "sub3"); - SD3.addAttribute(DW_AT_low_pc, DW_FORM_addr, 0x1000U); - SD3.addAttribute(DW_AT_high_pc, DW_FORM_addr, 0x1032U); - // Invalid DW_AT_LLVM_stmt_sequence - SD3.addAttribute(DW_AT_LLVM_stmt_sequence, DW_FORM_sec_offset, 0x66); - - // Create a line table with multiple sequences + // Create a line table that has two sequences covering [0x1000, 0x1004). + // Each sequence has two rows: addresses at 0x1000 and 0x1004, but + // they differ by line numbers (100 vs. 200, etc.). + // + // We'll pretend the first sequence starts at offset 0x2e in the line table, + // the second at 0x42, and we'll also test an invalid offset 0x66. + LineTable < = Gen->addLineTable(); - // First sequence with addresses 0x1000(Ln100), 0x1004(Ln101) + // First sequence at offset 0x2e: addresses 0x1000(Ln=100), 0x1004(Ln=101) LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}}); LT.addStandardOpcode(DW_LNS_set_prologue_end, {}); + // Advance the line register by 99 (so line=100) and copy. LT.addStandardOpcode(DW_LNS_advance_line, {{99, LineTable::SLEB}}); LT.addStandardOpcode(DW_LNS_copy, {}); - LT.addByte(0x4b); // Special opcode: address += 4, line += 1 + // 0x4b is a special opcode: address += 4, line += 1 (so line=101). + LT.addByte(0x4b); + // End this sequence. LT.addExtendedOpcode(1, DW_LNE_end_sequence, {}); - // Second sequence with addresses 0x1000(Ln200), 0x1004(Ln201) + // Second sequence at offset 0x42: addresses 0x1000(Ln=200), 0x1004(Ln=201) LT.addExtendedOpcode(9, DW_LNE_set_address, {{0x1000U, LineTable::Quad}}); LT.addStandardOpcode(DW_LNS_set_prologue_end, {}); LT.addStandardOpcode(DW_LNS_advance_line, {{199, LineTable::SLEB}}); LT.addStandardOpcode(DW_LNS_copy, {}); - LT.addByte(0x4b); // Special opcode: address += 4, line += 1 + // 0x4b again: address += 4, line += 1 (so line=201). + LT.addByte(0x4b); + // End this second sequence. LT.addExtendedOpcode(1, DW_LNE_end_sequence, {}); - // Generate the DWARF + // Generate the DWARF data. generate(); - // Parse the line table to get the sequence offset - auto ExpectedLineTable = Line.getOrParseLineTable( - LineData, /*Offset=*/0, *Context, nullptr, RecordRecoverable); + // Parse the line table. + auto ExpectedLineTable = + Line.getOrParseLineTable(LineData, /*Offset=*/0, *Context, + /*DwarfUnit=*/nullptr, RecordRecoverable); ASSERT_THAT_EXPECTED(ExpectedLineTable, Succeeded()); const auto *Table = *ExpectedLineTable; - uint32_t NumCUs = Context->getNumCompileUnits(); - ASSERT_EQ(NumCUs, 1u); - DWARFUnit *Unit = Context->getUnitAtIndex(0); - auto DwarfCUDie = Unit->getUnitDIE(false); - - auto Sub1Die = DwarfCUDie.getFirstChild(); - auto Sub2Die = Sub1Die.getSibling(); - auto Sub3Die = Sub2Die.getSibling(); - - // Verify Sub1Die is the DIE generated from SD1 - auto NameAttr1 = Sub1Die.find(DW_AT_name); - EXPECT_STREQ(*dwarf::toString(*NameAttr1), "sub1"); - - // Verify Sub2Die is the DIE generated from SD2 - auto NameAttr2 = Sub2Die.find(DW_AT_name); - EXPECT_STREQ(*dwarf::toString(*NameAttr2), "sub2"); - - // Verify Sub2Die is the DIE generated from SD3 - auto NameAttr3 = Sub3Die.find(DW_AT_name); - EXPECT_STREQ(*dwarf::toString(*NameAttr3), "sub3"); - - // Ensure there are two sequences + // The table should have two sequences, each starting at our chosen offsets. ASSERT_EQ(Table->Sequences.size(), 2u); - // Lookup addresses in the first sequence with the second sequence's filter + // 1) Try looking up with an invalid offset (simulating an invalid + // DW_AT_LLVM_stmt_sequence). We expect no rows. { std::vector Rows; - bool Found; - - // Look up using Sub3Die's invalid stmt_sequence offset - auto StmtSeqAttr3 = Sub3Die.find(dwarf::DW_AT_LLVM_stmt_sequence); - ASSERT_TRUE(StmtSeqAttr3); - Found = Table->lookupAddressRange( + bool Found = Table->lookupAddressRange( {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows, - toSectionOffset(StmtSeqAttr3)); + /*StmtSequenceOffset=*/0x66); // invalid offset EXPECT_FALSE(Found); + EXPECT_TRUE(Rows.empty()); + } - // Look up using Sub1Die's valid stmt_sequence offset - auto StmtSeqAttr1 = Sub1Die.find(dwarf::DW_AT_LLVM_stmt_sequence); - ASSERT_TRUE(StmtSeqAttr1); - Found = Table->lookupAddressRange( + // 2) Look up using the offset 0x2e (our first sequence). We expect + // to get the rows from that sequence only (which for 0x1000 is row #0). + { + std::vector Rows; + bool Found = Table->lookupAddressRange( {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows, - toSectionOffset(StmtSeqAttr1)); + /*StmtSequenceOffset=*/0x2e); EXPECT_TRUE(Found); ASSERT_EQ(Rows.size(), 1u); - EXPECT_EQ(Rows[0], 0U); + // The first sequence's first row is index 0. + EXPECT_EQ(Rows[0], 0u); + } - // Look up using Sub2Die's valid stmt_sequence offset - Rows.clear(); - auto StmtSeqAttr2 = Sub2Die.find(dwarf::DW_AT_LLVM_stmt_sequence); - ASSERT_TRUE(StmtSeqAttr2); - Found = Table->lookupAddressRange( + // 3) Look up using the offset 0x42 (second sequence). For address 0x1000 + // in that second sequence, we should see row #2. + { + std::vector Rows; + bool Found = Table->lookupAddressRange( {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows, - toSectionOffset(StmtSeqAttr2)); + /*StmtSequenceOffset=*/0x42); EXPECT_TRUE(Found); ASSERT_EQ(Rows.size(), 1u); + // The second sequence's first row is index 2 in the table. EXPECT_EQ(Rows[0], 3u); } + + // 4) Look up with no statement-sequence offset specified. + // We should get rows from both sequences for address 0x1000. + { + std::vector Rows; + bool Found = Table->lookupAddressRange( + {0x1000, object::SectionedAddress::UndefSection}, /*Size=*/1, Rows, + std::nullopt /* no filter */); + EXPECT_TRUE(Found); + // The first sequence's row is #0, second's row is #2, so both should + // appear. + ASSERT_EQ(Rows.size(), 2u); + EXPECT_EQ(Rows[0], 0u); + EXPECT_EQ(Rows[1], 3u); + } } } // end anonymous namespace From 569bbe4b08b4a673fca6b26b1eb6a624f22bf080 Mon Sep 17 00:00:00 2001 From: Alex Borcan Date: Wed, 5 Feb 2025 16:03:17 -0800 Subject: [PATCH 7/8] Address Feedback Nr.4 --- llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 111 ++++++++------------ 1 file changed, 44 insertions(+), 67 deletions(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 606635da2fec6..267958a8602ff 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -1391,90 +1391,67 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl( std::optional StmtSequenceOffset) const { if (Sequences.empty()) return false; + uint64_t EndAddr = Address.Address + Size; + // First, find an instruction sequence containing the given address. + DWARFDebugLine::Sequence Sequence; + Sequence.SectionIndex = Address.SectionIndex; + Sequence.HighPC = Address.Address; + SequenceIter LastSeq = Sequences.end(); + SequenceIter SeqPos; - const uint64_t EndAddr = Address.Address + Size; + if (StmtSequenceOffset) { + // If we have a statement sequence offset, find the specific sequence + // Binary search for sequence with matching StmtSeqOffset + Sequence.StmtSeqOffset = *StmtSequenceOffset; + SeqPos = std::lower_bound(Sequences.begin(), LastSeq, Sequence, + [](const DWARFDebugLine::Sequence &LHS, + const DWARFDebugLine::Sequence &RHS) { + return LHS.StmtSeqOffset < RHS.StmtSeqOffset; + }); + + // If sequence not found or doesn't contain the address, return false + if (SeqPos == LastSeq || SeqPos->StmtSeqOffset != *StmtSequenceOffset || + !SeqPos->containsPC(Address)) + return false; - // Helper: Given a sequence and a starting address, find the subset - // of rows within [Address, EndAddr) and append them to `Result`. - auto addRowsFromSequence = [&](const Sequence &Seq, - object::SectionedAddress StartAddr, - uint64_t EndAddress, bool IsFirstSequence) { - // If this sequence does not intersect our [StartAddr, EndAddress) range, - // do nothing. - if (Seq.HighPC <= StartAddr.Address || Seq.LowPC >= EndAddress) - return; + // Set LastSeq to just past this sequence since we only want this one + LastSeq = std::next(SeqPos); + } else { + // No specific sequence requested, find first sequence containing address + SeqPos = std::upper_bound(Sequences.begin(), LastSeq, Sequence, + DWARFDebugLine::Sequence::orderByHighPC); + if (SeqPos == LastSeq || !SeqPos->containsPC(Address)) + return false; + } - // For the "first" sequence in the search, we must figure out which row - // actually starts within our address range. - uint32_t FirstRowIndex = Seq.FirstRowIndex; - if (IsFirstSequence) - FirstRowIndex = findRowInSeq(Seq, StartAddr); + SequenceIter StartPos = SeqPos; - // Similarly, compute the last row that is within [StartAddr, EndAddress). + // Process sequences that overlap with the desired range + while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) { + const DWARFDebugLine::Sequence &CurSeq = *SeqPos; + // For the first sequence, we need to find which row in the sequence is the + // first in our range. + uint32_t FirstRowIndex = CurSeq.FirstRowIndex; + if (SeqPos == StartPos) + FirstRowIndex = findRowInSeq(CurSeq, Address); + + // Figure out the last row in the range. uint32_t LastRowIndex = - findRowInSeq(Seq, {EndAddress - 1, StartAddr.SectionIndex}); + findRowInSeq(CurSeq, {EndAddr - 1, Address.SectionIndex}); if (LastRowIndex == UnknownRowIndex) - LastRowIndex = Seq.LastRowIndex - 1; + LastRowIndex = CurSeq.LastRowIndex - 1; assert(FirstRowIndex != UnknownRowIndex); assert(LastRowIndex != UnknownRowIndex); - // Append all row indices in [FirstRowIndex, LastRowIndex]. for (uint32_t I = FirstRowIndex; I <= LastRowIndex; ++I) { Result.push_back(I); } - }; - // If a stmt_sequence_offset was provided, do a binary search to find the - // single sequence that matches that offset. Then add only its relevant rows. - if (StmtSequenceOffset) { - // Binary-search the Sequences by their StmtSeqOffset: - auto It = llvm::lower_bound(Sequences, *StmtSequenceOffset, - [](const Sequence &Seq, uint64_t OffsetVal) { - return Seq.StmtSeqOffset < OffsetVal; - }); - - // If not found or mismatched, there’s no match to return. - if (It == Sequences.end() || It->StmtSeqOffset != *StmtSequenceOffset) - return false; - - // Now add rows from the discovered sequence if it intersects [Address, - // EndAddr). - addRowsFromSequence(*It, Address, EndAddr, /*IsFirstSequence=*/true); - return !Result.empty(); - } - - // Otherwise, fall back to logic of walking sequences by Address. - // We first find a sequence containing `Address` (via upper_bound on HighPC), - // then proceed forward through overlapping sequences in ascending order. - - // Construct a dummy Sequence to find where `Address` fits by HighPC. - DWARFDebugLine::Sequence SearchSeq; - SearchSeq.SectionIndex = Address.SectionIndex; - SearchSeq.HighPC = Address.Address; - - auto LastSeq = Sequences.end(); - auto SeqPos = llvm::upper_bound(Sequences, SearchSeq, - DWARFDebugLine::Sequence::orderByHighPC); - if (SeqPos == LastSeq || !SeqPos->containsPC(Address)) - return false; - - // This marks the first sequence we found that might contain Address. - const auto StartPos = SeqPos; - - // Walk forward until sequences no longer intersect [Address, EndAddr). - while (SeqPos != LastSeq && SeqPos->LowPC < EndAddr) { - // Add rows only if the sequence is in the same section: - if (SeqPos->SectionIndex == Address.SectionIndex) { - // For the very first sequence, we need the row that lines up with - // `Address`. - bool IsFirst = (SeqPos == StartPos); - addRowsFromSequence(*SeqPos, Address, EndAddr, IsFirst); - } ++SeqPos; } - return !Result.empty(); + return true; } std::optional From 539ab4cd67db9a5239478379a683df894a79d832 Mon Sep 17 00:00:00 2001 From: Alex Borcan Date: Wed, 5 Feb 2025 17:28:29 -0800 Subject: [PATCH 8/8] Address Feedback Nr.5 --- llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 32 +++++++++++---------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 267958a8602ff..547a288a82d25 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -1400,30 +1400,32 @@ bool DWARFDebugLine::LineTable::lookupAddressRangeImpl( SequenceIter SeqPos; if (StmtSequenceOffset) { - // If we have a statement sequence offset, find the specific sequence - // Binary search for sequence with matching StmtSeqOffset - Sequence.StmtSeqOffset = *StmtSequenceOffset; - SeqPos = std::lower_bound(Sequences.begin(), LastSeq, Sequence, - [](const DWARFDebugLine::Sequence &LHS, - const DWARFDebugLine::Sequence &RHS) { - return LHS.StmtSeqOffset < RHS.StmtSeqOffset; - }); - - // If sequence not found or doesn't contain the address, return false - if (SeqPos == LastSeq || SeqPos->StmtSeqOffset != *StmtSequenceOffset || - !SeqPos->containsPC(Address)) + // If we have a statement sequence offset, find the specific sequence. + // Linear search for sequence with matching StmtSeqOffset + SeqPos = std::find_if(Sequences.begin(), LastSeq, + [&](const DWARFDebugLine::Sequence &S) { + return S.StmtSeqOffset == *StmtSequenceOffset; + }); + + // If sequence not found, return false + if (SeqPos == LastSeq) return false; - // Set LastSeq to just past this sequence since we only want this one - LastSeq = std::next(SeqPos); + // Set LastSeq to the next sequence since we only want the one matching + // sequence (sequences are guaranteed to have unique StmtSeqOffset) + LastSeq = SeqPos + 1; } else { // No specific sequence requested, find first sequence containing address SeqPos = std::upper_bound(Sequences.begin(), LastSeq, Sequence, DWARFDebugLine::Sequence::orderByHighPC); - if (SeqPos == LastSeq || !SeqPos->containsPC(Address)) + if (SeqPos == LastSeq) return false; } + // If the start sequence doesn't contain the address, nothing to do + if (!SeqPos->containsPC(Address)) + return false; + SequenceIter StartPos = SeqPos; // Process sequences that overlap with the desired range