Skip to content

Commit b2d387f

Browse files
committed
[SharedCache] Apply .symbols file information when applying an image
This improves symbol recovery drastically on newer shared caches Related PR: #6210
1 parent 0400467 commit b2d387f

File tree

6 files changed

+97
-31
lines changed

6 files changed

+97
-31
lines changed

view/sharedcache/core/MachO.cpp

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -457,32 +457,19 @@ std::optional<SharedCacheMachOHeader> SharedCacheMachOHeader::ParseHeaderForAddr
457457
return header;
458458
}
459459

460-
// TODO: Support reading from .symbols file.
461-
// TODO: Replace view with address size?
462-
std::vector<CacheSymbol> SharedCacheMachOHeader::ReadSymbolTable(BinaryView& view, VirtualMemory& vm) const
460+
std::vector<CacheSymbol> SharedCacheMachOHeader::ReadSymbolTable(VirtualMemory& vm, const TableInfo &symbolInfo, const TableInfo &stringInfo) const
463461
{
464-
auto addressSize = view.GetAddressSize();
465-
// NOTE: The symbol table will exist within the link edit segment, the table offsets are relative to the file not
466-
// the linkedit segment.
467-
uint64_t symbolsAddress = GetLinkEditFileBase() + symtab.symoff;
468-
uint64_t stringsAddress = GetLinkEditFileBase() + symtab.stroff;
469-
470-
// TODO: This needs to be passed in as an optional argument.
471-
// TODO: Sometimes symbol tables are shared and we have to offset into the table for a specific header.
472-
// TODO: The "shared" symbol tables are stored in .symbols files.
473-
int nlistStartIndex = 0;
474-
475462
std::vector<CacheSymbol> symbolList;
476-
for (uint64_t i = 0; i < symtab.nsyms; i++)
463+
// TODO: This assumes that 95% (or more) are going to be added.
464+
symbolList.reserve(symbolInfo.entries);
465+
for (uint64_t entryIndex = 0; entryIndex < symbolInfo.entries; entryIndex++)
477466
{
478-
uint64_t entryIndex = (nlistStartIndex + i);
479-
480467
nlist_64 nlist = {};
481-
if (addressSize == 4)
468+
if (vm.GetAddressSize() == 4)
482469
{
483470
// 32-bit DSC
484471
struct nlist nlist32 = {};
485-
vm.Read(&nlist, symbolsAddress + (entryIndex * sizeof(nlist32)), sizeof(nlist32));
472+
vm.Read(&nlist, symbolInfo.address + (entryIndex * sizeof(nlist32)), sizeof(nlist32));
486473
nlist.n_strx = nlist32.n_strx;
487474
nlist.n_type = nlist32.n_type;
488475
nlist.n_sect = nlist32.n_sect;
@@ -492,24 +479,24 @@ std::vector<CacheSymbol> SharedCacheMachOHeader::ReadSymbolTable(BinaryView& vie
492479
else
493480
{
494481
// 64-bit DSC
495-
vm.Read(&nlist, symbolsAddress + (entryIndex * sizeof(nlist)), sizeof(nlist));
482+
vm.Read(&nlist, symbolInfo.address + (entryIndex * sizeof(nlist)), sizeof(nlist));
496483
}
497484

498485
auto symbolAddress = nlist.n_value;
499486
if (((nlist.n_type & N_TYPE) == N_INDR) || symbolAddress == 0)
500487
continue;
501488

502-
if (nlist.n_strx >= symtab.strsize)
489+
if (nlist.n_strx >= stringInfo.entries)
503490
{
504491
// TODO: where logger?
505492
LogError(
506493
"Symbol entry at index %llu has a string offset of %u which is outside the strings buffer of size %u "
507494
"for symbol table %x",
508-
entryIndex, nlist.n_strx, symtab.strsize, symtab.stroff);
495+
entryIndex, nlist.n_strx, stringInfo.address, stringInfo.entries);
509496
continue;
510497
}
511498

512-
std::string symbolName = vm.ReadCString(stringsAddress + nlist.n_strx);
499+
std::string symbolName = vm.ReadCString(stringInfo.address + nlist.n_strx);
513500
if (symbolName == "<redacted>")
514501
continue;
515502

view/sharedcache/core/MachO.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,15 @@
77

88
struct CacheSymbol;
99

10+
// Used when reading symbol/string table info.
11+
struct TableInfo
12+
{
13+
// VM address where the reading will begin.
14+
uint64_t address;
15+
// Number of entries in the table.
16+
uint32_t entries;
17+
};
18+
1019
struct SharedCacheMachOHeader
1120
{
1221
uint64_t textBase = 0;
@@ -61,8 +70,7 @@ struct SharedCacheMachOHeader
6170
static std::optional<SharedCacheMachOHeader> ParseHeaderForAddress(
6271
std::shared_ptr<VirtualMemory> vm, uint64_t address, const std::string& imagePath);
6372

64-
// TODO: Replace view with address size?
65-
std::vector<CacheSymbol> ReadSymbolTable(BinaryNinja::BinaryView& view, VirtualMemory& vm) const;
73+
std::vector<CacheSymbol> ReadSymbolTable(VirtualMemory& vm, const TableInfo &symbolInfo, const TableInfo &stringInfo) const;
6674

6775
bool AddExportTerminalSymbol(
6876
std::vector<CacheSymbol>& symbols, const std::string& symbolName, const uint8_t* current,

view/sharedcache/core/MachOProcessor.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,14 @@ void SharedCacheMachOProcessor::ApplyHeader(SharedCacheMachOHeader& header)
5353
auto typeLib = typeLibraryFromName(header.installName);
5454
m_view->BeginBulkModifySymbols();
5555

56-
// TODO: Why does this need to only happen in linkeditSegment?
5756
// Apply symbols from symbol table.
5857
if (header.symtab.symoff != 0)
5958
{
60-
// Mach-O View symtab processing with
61-
// a ton of stuff cut out so it can work
6259
// NOTE: This table is read relative to the link edit segment file base.
63-
const auto symbols = header.ReadSymbolTable(*m_view, *m_vm);
60+
// NOTE: This does not handle the shared .symbols cache entry symbols, that is the responsibility of the caller.
61+
TableInfo symbolInfo = { header.GetLinkEditFileBase() + header.symtab.symoff, header.symtab.nsyms };
62+
TableInfo stringInfo = { header.GetLinkEditFileBase() + header.symtab.stroff, header.symtab.strsize };
63+
const auto symbols = header.ReadSymbolTable(*m_vm, symbolInfo, stringInfo);
6464
for (const auto& sym : symbols)
6565
{
6666
auto [symbol, symbolType] = sym.GetBNSymbolAndType(*m_view);
@@ -72,7 +72,6 @@ void SharedCacheMachOProcessor::ApplyHeader(SharedCacheMachOHeader& header)
7272
if (header.exportTriePresent)
7373
{
7474
// NOTE: This table is read relative to the link edit segment file base.
75-
// TODO: Remove this and use the m_symbols in the cache?
7675
const auto exportSymbols = header.ReadExportSymbolTrie(*m_vm);
7776
for (const auto& sym : exportSymbols)
7877
{

view/sharedcache/core/SharedCache.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,14 @@ std::optional<CacheEntry> CacheEntry::FromFile(const std::string& filePath, cons
8080
// We found a single dyld data cache entry file. Mark it as such!
8181
type = CacheEntryType::DyldData;
8282
}
83-
else if (fileName.find(".symbols") != std::string::npos)
83+
else if (fileName.find(".symbols") != std::string::npos && mappings.size() == 1)
8484
{
8585
// We found a single symbols cache entry file. Mark it as such!
8686
type = CacheEntryType::Symbols;
87+
// Adjust the mapping for the symbol file, they seem to be only for the header.
88+
// If we do not adjust the mapping than we will not be able to read the symbol table through the virtual memory.
89+
mappings[0].fileOffset = 0;
90+
mappings[0].size = file->Length();
8791
}
8892
else if (mappings.size() == 1 && header.imagesCountOld == 0 && header.imagesCount == 0
8993
&& header.imagesTextOffset == 0)

view/sharedcache/core/SharedCacheController.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,71 @@ bool SharedCacheController::ApplyImage(BinaryView& view, const CacheImage& image
213213
view.SetFunctionAnalysisUpdateDisabled(true);
214214
machoProcessor.ApplyHeader(*image.header);
215215
view.SetFunctionAnalysisUpdateDisabled(prevDisabledState);
216+
217+
// Add symbols from the symbol cache files.
218+
// This is done separate from applying the header as it constitutes knowing about other cache images.
219+
// NOTE: If we want to move this into the `ApplyHeader` above we would need to give it some extra cache context.
220+
const auto& cache = GetCache();
221+
const auto vm = cache.GetVirtualMemory();
222+
for (const auto& entry : cache.GetEntries())
223+
{
224+
if (entry.GetType() != CacheEntryType::Symbols && vm->GetAddressSize() == 8)
225+
continue;
226+
const auto& header = entry.GetHeader();
227+
228+
// This is where we get the symbol and string table information from in the .symbols file.
229+
dyld_cache_local_symbols_info localSymbolsInfo = {};
230+
auto localSymbolsInfoAddr = entry.GetMappedAddress(header.localSymbolsOffset);
231+
if (!localSymbolsInfoAddr.has_value())
232+
continue;
233+
vm->Read(&localSymbolsInfo, *localSymbolsInfoAddr, sizeof(dyld_cache_local_symbols_info));
234+
235+
// Read each symbols entry, looking for the current image entry.
236+
uint64_t localEntriesAddr = *localSymbolsInfoAddr + localSymbolsInfo.entriesOffset;
237+
uint64_t localSymbolsAddr = *localSymbolsInfoAddr + localSymbolsInfo.nlistOffset;
238+
uint64_t localStringsAddr = *localSymbolsInfoAddr + localSymbolsInfo.stringsOffset;
239+
std::vector<dyld_cache_local_symbols_entry_64> symbolEntries;
240+
symbolEntries.reserve(localSymbolsInfo.entriesCount);
241+
dyld_cache_local_symbols_entry_64 localSymbolsEntry = {};
242+
243+
// TODO: Clean this up!!!! This is taken from the macho code...
244+
auto typeLibraryFromName = [&](const std::string& name) -> Ref<TypeLibrary> {
245+
// Check to see if we have already loaded the type library.
246+
if (auto typeLib = view.GetTypeLibrary(name))
247+
return typeLib;
248+
249+
auto typeLibs = view.GetDefaultPlatform()->GetTypeLibrariesByName(name);
250+
if (!typeLibs.empty())
251+
return typeLibs.front();
252+
return nullptr;
253+
};
254+
255+
// Pull the available type library for the image we are loading, so we can apply known types.
256+
auto typeLib = typeLibraryFromName(image.GetName());
257+
258+
for (uint32_t i = 0; i < localSymbolsInfo.entriesCount; i++)
259+
{
260+
vm->Read(&localSymbolsEntry, localEntriesAddr + i * sizeof(dyld_cache_local_symbols_entry_64),
261+
sizeof(dyld_cache_local_symbols_entry_64));
262+
symbolEntries.push_back(localSymbolsEntry);
263+
auto imageAddr = cache.GetBaseAddress() + localSymbolsEntry.dylibOffset;
264+
if (image.headerAddress == imageAddr)
265+
{
266+
// We have found the entry to read!
267+
// TODO: Support 32bit nlist
268+
uint64_t symbolTableStart = localSymbolsAddr + (localSymbolsEntry.nlistStartIndex * sizeof(nlist_64));
269+
TableInfo symbolInfo = { symbolTableStart, localSymbolsEntry.nlistCount };
270+
TableInfo stringInfo = { localStringsAddr, localSymbolsInfo.stringsSize };
271+
const auto symbols = image.header->ReadSymbolTable(*vm, symbolInfo, stringInfo);
272+
m_logger->LogInfoF("Found {} symbols in .symbols for image {}", symbols.size(), image.path.c_str());
273+
for (const auto& sym : symbols)
274+
{
275+
auto [symbol, symbolType] = sym.GetBNSymbolAndType(view);
276+
ApplySymbol(&view, typeLib, symbol, symbolType);
277+
}
278+
}
279+
}
280+
}
216281

217282
// Load objective-c information.
218283
auto objcProcessor = DSCObjC::SharedCacheObjCProcessor(&view, false, image.headerAddress);

view/sharedcache/core/VirtualMemory.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,11 @@ class VirtualMemory
4242
{
4343
std::shared_mutex m_regionMutex;
4444
AddressRangeMap<VirtualMemoryRegion> m_regions;
45+
uint64_t m_addressSize = 8;
4546

4647
public:
48+
uint64_t GetAddressSize() const { return m_addressSize; }
49+
4750
// At no point do we ever store a strong pointer to a file accessor, that is the job of the `FileAccessorCache`.
4851
void MapRegion(WeakFileAccessor fileAccessor, AddressRange mappedRange, uint64_t fileOffset);
4952

0 commit comments

Comments
 (0)