diff --git a/view/macho/machoview.cpp b/view/macho/machoview.cpp index d26769e1b..23031ed07 100644 --- a/view/macho/machoview.cpp +++ b/view/macho/machoview.cpp @@ -14,13 +14,6 @@ #include "lowlevelilinstruction.h" #include "rapidjsonwrapper.h" -enum { - N_STAB = 0xe0, - N_PEXT = 0x10, - N_TYPE = 0x0e, - N_EXT = 0x01 -}; - using namespace BinaryNinja; using namespace std; diff --git a/view/macho/machoview.h b/view/macho/machoview.h index 6a5e80ae3..745085595 100644 --- a/view/macho/machoview.h +++ b/view/macho/machoview.h @@ -268,14 +268,129 @@ typedef int vm_prot_t; #define SEG_UNIXSTACK "__UNIXSTACK" #define SEG_IMPORT "__IMPORT" -//Symbol Types (N_TYPE) -#define N_UNDF 0x0 -#define N_ABS 0x2 -#define N_SECT 0xe -#define N_PBUD 0xc -#define N_INDR 0xa - -#define N_ARM_THUMB_DEF 0x0008 +/* + * Symbols with a index into the string table of zero (n_un.n_strx == 0) are + * defined to have a null, "", name. Therefore all string indexes to non null + * names must not have a zero string index. This is bit historical information + * that has never been well documented. + */ + +/* + * The n_type field really contains four fields: + * unsigned char N_STAB:3, + * N_PEXT:1, + * N_TYPE:3, + * N_EXT:1; + * which are used via the following masks. + */ +#define N_STAB 0xe0 /* if any of these bits set, a symbolic debugging entry */ +#define N_PEXT 0x10 /* private external symbol bit */ +#define N_TYPE 0x0e /* mask for the type bits */ +#define N_EXT 0x01 /* external symbol bit, set for external symbols */ + +/* + * Only symbolic debugging entries have some of the N_STAB bits set and if any + * of these bits are set then it is a symbolic debugging entry (a stab). In + * which case then the values of the n_type field (the entire field) are given + * in + */ + +/* + * Values for N_TYPE bits of the n_type field. + */ +#define N_UNDF 0x0 /* undefined, n_sect == NO_SECT */ +#define N_ABS 0x2 /* absolute, n_sect == NO_SECT */ +#define N_SECT 0xe /* defined in section number n_sect */ +#define N_PBUD 0xc /* prebound undefined (defined in a dylib) */ +#define N_INDR 0xa /* indirect */ + +/* + * If the type is N_INDR then the symbol is defined to be the same as another + * symbol. In this case the n_value field is an index into the string table + * of the other symbol's name. When the other symbol is defined then they both + * take on the defined type and value. + */ + +/* + * If the type is N_SECT then the n_sect field contains an ordinal of the + * section the symbol is defined in. The sections are numbered from 1 and + * refer to sections in order they appear in the load commands for the file + * they are in. This means the same ordinal may very well refer to different + * sections in different files. + * + * The n_value field for all symbol table entries (including N_STAB's) gets + * updated by the link editor based on the value of it's n_sect field and where + * the section n_sect references gets relocated. If the value of the n_sect + * field is NO_SECT then it's n_value field is not changed by the link editor. + */ +#define NO_SECT 0 /* symbol is not in any section */ +#define MAX_SECT 255 /* 1 thru 255 inclusive */ + +/* + * The bit 0x0020 of the n_desc field is used for two non-overlapping purposes + * and has two different symbolic names, N_NO_DEAD_STRIP and N_DESC_DISCARDED. + */ + +/* + * The N_NO_DEAD_STRIP bit of the n_desc field only ever appears in a + * relocatable .o file (MH_OBJECT filetype). And is used to indicate to the + * static link editor it is never to dead strip the symbol. + */ +#define N_NO_DEAD_STRIP 0x0020 /* symbol is not to be dead stripped */ + +/* + * The N_DESC_DISCARDED bit of the n_desc field never appears in linked image. + * But is used in very rare cases by the dynamic link editor to mark an in + * memory symbol as discared and longer used for linking. + */ +#define N_DESC_DISCARDED 0x0020 /* symbol is discarded */ + +/* + * The N_WEAK_REF bit of the n_desc field indicates to the dynamic linker that + * the undefined symbol is allowed to be missing and is to have the address of + * zero when missing. + */ +#define N_WEAK_REF 0x0040 /* symbol is weak referenced */ + +/* + * The N_WEAK_DEF bit of the n_desc field indicates to the static and dynamic + * linkers that the symbol definition is weak, allowing a non-weak symbol to + * also be used which causes the weak definition to be discared. Currently this + * is only supported for symbols in coalesed sections. + */ +#define N_WEAK_DEF 0x0080 /* coalesed symbol is a weak definition */ + +/* + * The N_REF_TO_WEAK bit of the n_desc field indicates to the dynamic linker + * that the undefined symbol should be resolved using flat namespace searching. + */ +#define N_REF_TO_WEAK 0x0080 /* reference to a weak symbol */ + +/* + * The N_ARM_THUMB_DEF bit of the n_desc field indicates that the symbol is + * a defintion of a Thumb function. + */ +#define N_ARM_THUMB_DEF 0x0008 /* symbol is a Thumb function (ARM) */ + +/* + * The N_SYMBOL_RESOLVER bit of the n_desc field indicates that the + * that the function is actually a resolver function and should + * be called to get the address of the real function to use. + * This bit is only available in .o files (MH_OBJECT filetype) + */ +#define N_SYMBOL_RESOLVER 0x0100 + +/* + * The N_ALT_ENTRY bit of the n_desc field indicates that the + * symbol is pinned to the previous content. + */ +#define N_ALT_ENTRY 0x0200 + +/* + * The N_COLD_FUNC bit of the n_desc field indicates that the symbol is used + * infrequently and the linker should order it towards the end of the section. + */ +#define N_COLD_FUNC 0x0400 /* * An indirect symbol table entry is simply a 32bit index into the symbol table diff --git a/view/sharedcache/CMakeLists.txt b/view/sharedcache/CMakeLists.txt index 63ba602bf..0f6abea22 100644 --- a/view/sharedcache/CMakeLists.txt +++ b/view/sharedcache/CMakeLists.txt @@ -30,7 +30,7 @@ endif() set(HARD_FAIL_MODE OFF CACHE BOOL "Enable hard fail mode") set(SLIDEINFO_DEBUG_TAGS OFF CACHE BOOL "Enable debug tags in slideinfo") set(VIEW_NAME "DSCView" CACHE STRING "Name of the view") -set(METADATA_VERSION 2 CACHE STRING "Version of the metadata") +set(METADATA_VERSION 3 CACHE STRING "Version of the metadata") add_subdirectory(core) add_subdirectory(api) diff --git a/view/sharedcache/api/python/_sharedcachecore.py b/view/sharedcache/api/python/_sharedcachecore.py index d208048bc..ec99f659c 100644 --- a/view/sharedcache/api/python/_sharedcachecore.py +++ b/view/sharedcache/api/python/_sharedcachecore.py @@ -42,6 +42,7 @@ def free_string(value:ctypes.c_char_p) -> None: BNFreeString(ctypes.cast(value, ctypes.POINTER(ctypes.c_byte))) # Type definitions +BackingCacheTypeEnum = ctypes.c_int from binaryninja._binaryninjacore import BNBinaryView, BNBinaryViewHandle class BNDSCBackingCache(ctypes.Structure): @property @@ -110,7 +111,7 @@ class BNSharedCache(ctypes.Structure): # Structure definitions BNDSCBackingCache._fields_ = [ ("_path", ctypes.c_char_p), - ("isPrimary", ctypes.c_bool), + ("cacheType", BackingCacheTypeEnum), ("mappings", ctypes.POINTER(BNDSCBackingCacheMapping)), ("mappingCount", ctypes.c_ulonglong), ] diff --git a/view/sharedcache/api/python/sharedcache.py b/view/sharedcache/api/python/sharedcache.py index b660d9d97..d77794a83 100644 --- a/view/sharedcache/api/python/sharedcache.py +++ b/view/sharedcache/api/python/sharedcache.py @@ -52,14 +52,21 @@ def __repr__(self): @dataclasses.dataclass class DSCBackingCache: path: str - isPrimary: bool + cacheType: BackingCacheType mappings: list[DSCBackingCacheMapping] def __str__(self): return repr(self) def __repr__(self): - return f"" + match self.cacheType: + case BackingCacheType.BackingCacheTypePrimary: + cacheTypeStr = 'Primary' + case BackingCacheType.BackingCacheTypeSecondary: + cacheTypeStr = 'Secondary' + case BackingCacheType.BackingCacheTypeSymbols: + cacheTypeStr = 'Symbols' + return f"" @dataclasses.dataclass @@ -136,7 +143,7 @@ def caches(self): mappings.append(mapping) result.append(DSCBackingCache( value[i].path, - value[i].isPrimary, + value[i].cacheType, mappings )) diff --git a/view/sharedcache/api/python/sharedcache_enums.py b/view/sharedcache/api/python/sharedcache_enums.py index 346684245..ea86b5c63 100644 --- a/view/sharedcache/api/python/sharedcache_enums.py +++ b/view/sharedcache/api/python/sharedcache_enums.py @@ -1,6 +1,12 @@ import enum +class BackingCacheType(enum.IntEnum): + BackingCacheTypePrimary = 0 + BackingCacheTypeSecondary = 1 + BackingCacheTypeSymbols = 2 + + class DSCViewLoadProgress(enum.IntEnum): LoadProgressNotStarted = 0 LoadProgressLoadingCaches = 1 diff --git a/view/sharedcache/api/sharedcache.cpp b/view/sharedcache/api/sharedcache.cpp index 5ca7c480e..0be2599dd 100644 --- a/view/sharedcache/api/sharedcache.cpp +++ b/view/sharedcache/api/sharedcache.cpp @@ -91,7 +91,7 @@ namespace SharedCacheAPI { { BackingCache cache; cache.path = value[i].path; - cache.isPrimary = value[i].isPrimary; + cache.cacheType = value[i].cacheType; for (size_t j = 0; j < value[i].mappingCount; j++) { BackingCacheMapping mapping; diff --git a/view/sharedcache/api/sharedcacheapi.h b/view/sharedcache/api/sharedcacheapi.h index 19f021e7c..e1c5bec3e 100644 --- a/view/sharedcache/api/sharedcacheapi.h +++ b/view/sharedcache/api/sharedcacheapi.h @@ -105,7 +105,7 @@ namespace SharedCacheAPI { struct BackingCache { std::string path; - bool isPrimary; + BNBackingCacheType cacheType; std::vector mappings; }; diff --git a/view/sharedcache/api/sharedcachecore.h b/view/sharedcache/api/sharedcachecore.h index 9fc332756..ed462ac74 100644 --- a/view/sharedcache/api/sharedcachecore.h +++ b/view/sharedcache/api/sharedcachecore.h @@ -64,6 +64,12 @@ extern "C" LoadProgressFinished, } BNDSCViewLoadProgress; + typedef enum BNBackingCacheType { + BackingCacheTypePrimary, + BackingCacheTypeSecondary, + BackingCacheTypeSymbols, + } BNBackingCacheType; + typedef struct BNBinaryView BNBinaryView; typedef struct BNSharedCache BNSharedCache; @@ -97,7 +103,7 @@ extern "C" typedef struct BNDSCBackingCache { char* path; - bool isPrimary; + BNBackingCacheType cacheType; BNDSCBackingCacheMapping* mappings; size_t mappingCount; } BNDSCBackingCache; diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index a7c7ffb01..8944b639b 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -72,7 +72,7 @@ struct ViewStateCacheStore { std::string m_baseFilePath; std::unordered_map>>> m_exportInfos; - std::unordered_map>>> m_symbolInfos; + std::unordered_map>> m_symbolInfos; }; static std::recursive_mutex viewStateMutex; @@ -254,13 +254,17 @@ void SharedCache::PerformInitialLoad() m_cacheFormat = iOS16CacheFormat; } + // Don't store directly into `m_imageStarts` so that the order is preserved. That way + // `imageIndex` can be assigned to a `CacheImage` in `m_images`. + std::vector> imageStarts; + switch (m_cacheFormat) { case RegularCacheFormat: { dyld_cache_mapping_info mapping {}; BackingCache cache; - cache.isPrimary = true; + cache.cacheType = BackingCacheTypePrimary; cache.path = path; for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) @@ -280,7 +284,7 @@ void SharedCache::PerformInitialLoad() { baseFile->Read(&img, primaryCacheHeader.imagesOffsetOld + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - m_imageStarts[iname] = img.address; + imageStarts.push_back({iname, img.address}); } m_logger->LogInfo("Found %d images in the shared cache", primaryCacheHeader.imagesCountOld); @@ -328,7 +332,7 @@ void SharedCache::PerformInitialLoad() // briefly. BackingCache cache; - cache.isPrimary = true; + cache.cacheType = BackingCacheTypePrimary; cache.path = path; for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) @@ -348,7 +352,7 @@ void SharedCache::PerformInitialLoad() { baseFile->Read(&img, primaryCacheHeader.imagesOffset + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - m_imageStarts[iname] = img.address; + imageStarts.push_back({iname, img.address}); } if (primaryCacheHeader.branchPoolsCount) @@ -356,7 +360,7 @@ void SharedCache::PerformInitialLoad() std::vector pool {}; for (size_t i = 0; i < primaryCacheHeader.branchPoolsCount; i++) { - m_imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); + imageStarts.push_back({"dyld_shared_cache_branch_islands_" + std::to_string(i), baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize()))}); } } std::string mainFileName = base_name(path); @@ -402,7 +406,7 @@ void SharedCache::PerformInitialLoad() dyld_cache_mapping_info subCacheMapping {}; BackingCache subCache; - subCache.isPrimary = false; + subCache.cacheType = BackingCacheTypeSecondary; subCache.path = subCachePath; for (size_t j = 0; j < subCacheHeader.mappingCount; j++) @@ -439,7 +443,7 @@ void SharedCache::PerformInitialLoad() dyld_cache_mapping_info mapping {}; // We're going to reuse this for all of the mappings. We only need it // briefly. BackingCache cache; - cache.isPrimary = true; + cache.cacheType = BackingCacheTypePrimary; cache.path = path; for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) @@ -459,7 +463,7 @@ void SharedCache::PerformInitialLoad() { baseFile->Read(&img, primaryCacheHeader.imagesOffset + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - m_imageStarts[iname] = img.address; + imageStarts.push_back({iname, img.address}); } if (primaryCacheHeader.branchPoolsCount) @@ -467,7 +471,7 @@ void SharedCache::PerformInitialLoad() std::vector pool {}; for (size_t i = 0; i < primaryCacheHeader.branchPoolsCount; i++) { - m_imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); + imageStarts.push_back({"dyld_shared_cache_branch_islands_" + std::to_string(i), baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize()))}); } } @@ -495,7 +499,7 @@ void SharedCache::PerformInitialLoad() subCacheFile->Read(&subCacheHeader, 0, headerSize); BackingCache subCache; - subCache.isPrimary = false; + subCache.cacheType = BackingCacheTypeSecondary; subCache.path = subCachePath; dyld_cache_mapping_info subCacheMapping {}; @@ -565,7 +569,7 @@ void SharedCache::PerformInitialLoad() dyld_cache_mapping_info mapping {}; BackingCache cache; - cache.isPrimary = true; + cache.cacheType = BackingCacheTypePrimary; cache.path = path; for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) @@ -586,7 +590,7 @@ void SharedCache::PerformInitialLoad() { baseFile->Read(&img, primaryCacheHeader.imagesOffset + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - m_imageStarts[iname] = img.address; + imageStarts.push_back({iname, img.address}); } if (primaryCacheHeader.branchPoolsCount) @@ -594,7 +598,7 @@ void SharedCache::PerformInitialLoad() std::vector pool {}; for (size_t i = 0; i < primaryCacheHeader.branchPoolsCount; i++) { - m_imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); + imageStarts.push_back({"dyld_shared_cache_branch_islands_" + std::to_string(i), baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize()))}); } } @@ -645,7 +649,7 @@ void SharedCache::PerformInitialLoad() dyld_cache_mapping_info subCacheMapping {}; BackingCache subCache; - subCache.isPrimary = false; + subCache.cacheType = BackingCacheTypeSecondary; subCache.path = subCachePath; for (size_t j = 0; j < subCacheHeader.mappingCount; j++) @@ -705,7 +709,7 @@ void SharedCache::PerformInitialLoad() subCacheFile->Read(&subCacheHeader, 0, headerSize); BackingCache subCache; - subCache.isPrimary = false; + subCache.cacheType = BackingCacheTypeSymbols; subCache.path = subCachePath; dyld_cache_mapping_info subCacheMapping {}; @@ -724,7 +728,9 @@ void SharedCache::PerformInitialLoad() m_backingCaches.push_back(subCache); } catch (...) - {} + { + m_logger->LogWarn("Failed to load the symbols cache"); + } break; } } @@ -741,8 +747,10 @@ void SharedCache::PerformInitialLoad() m_logger->LogError("Failed to map VM pages for Shared Cache on initial load, this is fatal."); return; } - for (const auto &start : m_imageStarts) + for (uint32_t imageIndex = 0; imageIndex < imageStarts.size(); imageIndex++) { + const auto& start = imageStarts[imageIndex]; + m_imageStarts[start.first] = start.second; try { auto imageHeader = SharedCache::LoadHeaderForAddress(vm, start.second, start.first); if (imageHeader) @@ -754,6 +762,7 @@ void SharedCache::PerformInitialLoad() } m_headers[start.second] = imageHeader.value(); CacheImage image; + image.index = imageIndex; image.installName = start.first; image.headerLocation = start.second; for (const auto& segment : imageHeader->segments) @@ -2257,6 +2266,128 @@ std::optional SharedCache::LoadHeaderForAddress(std::sha return header; } +void SharedCache::ProcessSymbols(std::shared_ptr file, const SharedCacheMachOHeader& header, uint64_t stringsOffset, size_t stringsSize, uint64_t nlistEntriesOffset, uint32_t nlistCount, uint32_t nlistStartIndex) +{ + auto addressSize = m_dscView->GetAddressSize(); + auto strings = file->ReadBuffer(stringsOffset, stringsSize); + + for (uint64_t i = 0; i < nlistCount; i++) + { + uint64_t entryIndex = (nlistStartIndex + i); + + nlist_64 nlist; + if (addressSize == 4) + { + // 32-bit DSC + struct nlist nlist32; + file->Read(&nlist, nlistEntriesOffset + (entryIndex * sizeof(nlist32)), sizeof(nlist32)); + nlist.n_strx = nlist32.n_strx; + nlist.n_type = nlist32.n_type; + nlist.n_sect = nlist32.n_sect; + nlist.n_desc = nlist32.n_desc; + nlist.n_value = nlist32.n_value; + } + else + { + // 64-bit DSC + file->Read(&nlist, nlistEntriesOffset + (entryIndex * sizeof(nlist)), sizeof(nlist)); + } + + auto symbolAddress = nlist.n_value; + if (((nlist.n_type & N_TYPE) == N_INDR) || symbolAddress == 0) + continue; + + if (nlist.n_strx >= stringsSize) + { + m_logger->LogError("Symbol entry at index %llu has a string offset of %u which is outside the strings buffer of size %llu for file %s", entryIndex, nlist.n_strx, stringsSize, file->Path().c_str()); + continue; + } + + std::string symbolName((char*)strings.GetDataAt(nlist.n_strx)); + if (symbolName == "") + continue; + + BNSymbolType symbolType = DataSymbol; + uint32_t flags; + if ((nlist.n_type & N_TYPE) == N_SECT && nlist.n_sect > 0 && (size_t)(nlist.n_sect - 1) < header.sections.size()) + {} + else if ((nlist.n_type & N_TYPE) == N_ABS) + {} + else if ((nlist.n_type & N_EXT)) + { + symbolType = ExternalSymbol; + } + else + continue; + + for (auto s : header.sections) + { + if (s.addr <= symbolAddress && symbolAddress < s.addr + s.size) + { + flags = s.flags; + } + } + + if (symbolType != ExternalSymbol) + { + if ((flags & S_ATTR_PURE_INSTRUCTIONS) == S_ATTR_PURE_INSTRUCTIONS + || (flags & S_ATTR_SOME_INSTRUCTIONS) == S_ATTR_SOME_INSTRUCTIONS) + symbolType = FunctionSymbol; + else + symbolType = DataSymbol; + } + if ((nlist.n_desc & N_ARM_THUMB_DEF) == N_ARM_THUMB_DEF) + symbolAddress++; + + m_symbolInfos[header.textBase].push_back(new Symbol(symbolType, symbolName, symbolAddress, GlobalBinding)); + } +} + +void SharedCache::ApplySymbol(Ref view, Ref typeLib, Ref symbol) +{ + Ref func = nullptr; + auto symbolAddress = symbol->GetAddress(); + + if (symbol->GetType() == FunctionSymbol) + { + Ref targetPlatform = view->GetDefaultPlatform(); + func = view->AddFunctionForAnalysis(targetPlatform, symbolAddress); + } + if (typeLib) + { + auto type = m_dscView->ImportTypeLibraryObject(typeLib, {symbol->GetFullName()}); + if (type) + view->DefineAutoSymbolAndVariableOrFunction(view->GetDefaultPlatform(), symbol, type); + else + view->DefineAutoSymbol(symbol); + + if (!func) + func = view->GetAnalysisFunction(view->GetDefaultPlatform(), symbolAddress); + if (func) + { + if (symbol->GetFullName() == "_objc_msgSend") + { + func->SetHasVariableArguments(false); + } + else if (symbol->GetFullName().find("_objc_retain_x") != std::string::npos || symbol->GetFullName().find("_objc_release_x") != std::string::npos) + { + auto x = symbol->GetFullName().rfind("x"); + auto num = symbol->GetFullName().substr(x + 1); + + std::vector callTypeParams; + auto cc = m_dscView->GetDefaultArchitecture()->GetCallingConventionByName("apple-arm64-objc-fast-arc-" + num); + + callTypeParams.push_back({"obj", m_dscView->GetTypeByName({ "id" }), true, BinaryNinja::Variable()}); + + auto funcType = BinaryNinja::Type::FunctionType(m_dscView->GetTypeByName({ "id" }), cc, callTypeParams); + func->SetUserType(funcType); + } + } + } + else + view->DefineAutoSymbol(symbol); +} + void SharedCache::InitializeHeader( Ref view, VM* vm, SharedCacheMachOHeader header, std::vector regionsToLoad) { @@ -2550,86 +2681,84 @@ void SharedCache::InitializeHeader( } } - view->BeginBulkModifySymbols(); if (header.symtab.symoff != 0 && header.linkeditPresent && vm->AddressIsMapped(header.linkeditSegment.vmaddr)) { // Mach-O View symtab processing with // a ton of stuff cut out so it can work auto reader = vm->MappingAtAddress(header.linkeditSegment.vmaddr).first.fileAccessor->lock(); - // auto symtab = reader->ReadBuffer(header.symtab.symoff, header.symtab.nsyms * sizeof(nlist_64)); - auto strtab = reader->ReadBuffer(header.symtab.stroff, header.symtab.strsize); - nlist_64 sym; - memset(&sym, 0, sizeof(sym)); - auto N_TYPE = 0xE; // idk - std::vector>> symbolInfos; - for (size_t i = 0; i < header.symtab.nsyms; i++) - { - reader->Read(&sym, header.symtab.symoff + i * sizeof(nlist_64), sizeof(nlist_64)); - if (sym.n_strx >= header.symtab.strsize || ((sym.n_type & N_TYPE) == N_INDR)) - continue; + ProcessSymbols(reader, header, header.symtab.stroff, header.symtab.strsize, header.symtab.symoff, header.symtab.nsyms); + } - std::string symbol((char*)strtab.GetDataAt(sym.n_strx)); - // BNLogError("%s: 0x%llx", symbol.c_str(), sym.n_value); - if (symbol == "") + int64_t imageIndex = -1; + for (auto& cacheImage : m_images) + { + if (cacheImage.headerLocation == header.textBase) + { + imageIndex = cacheImage.index; + break; + } + } + if (imageIndex > -1) + { + auto addressSize = m_dscView->GetAddressSize(); + for (auto backingCache : m_backingCaches) + { + if (backingCache.cacheType != BackingCacheTypeSymbols) continue; - BNSymbolType type = DataSymbol; - uint32_t flags; - if ((sym.n_type & N_TYPE) == N_SECT && sym.n_sect > 0 && (size_t)(sym.n_sect - 1) < header.sections.size()) - {} - else if ((sym.n_type & N_TYPE) == N_ABS) - {} - else if ((sym.n_type & 0x1)) + auto subCacheFile = MMappedFileAccessor::Open(m_dscView, m_dscView->GetFile()->GetSessionId(), backingCache.path)->lock(); + + dyld_cache_header subCacheHeader {}; + uint64_t headerSize = subCacheFile->ReadUInt32(__offsetof(dyld_cache_header, mappingOffset)); + if (headerSize > sizeof(dyld_cache_header)) { - type = ExternalSymbol; + m_logger->LogDebug("Header size is larger than expected, using default size"); + headerSize = sizeof(dyld_cache_header); } - else - continue; + subCacheFile->Read(&subCacheHeader, 0, headerSize); - for (auto s : header.sections) + if (subCacheHeader.localSymbolsOffset != 0) { - if (s.addr < sym.n_value) + dyld_cache_local_symbols_info localSymbolsInfo; + subCacheFile->Read(&localSymbolsInfo, subCacheHeader.localSymbolsOffset, sizeof(localSymbolsInfo)); + + if (imageIndex < localSymbolsInfo.entriesCount) { - if (s.addr + s.size > sym.n_value) + dyld_cache_local_symbols_entry_64 localSymbolsEntry; + if (addressSize == 4) + { + // 32-bit DSC + dyld_cache_local_symbols_entry localSymbolsEntry32; + subCacheFile->Read(&localSymbolsEntry32, subCacheHeader.localSymbolsOffset + localSymbolsInfo.entriesOffset + (imageIndex * sizeof(localSymbolsEntry32)), sizeof(localSymbolsEntry32)); + localSymbolsEntry.dylibOffset = localSymbolsEntry32.dylibOffset; + localSymbolsEntry.nlistStartIndex = localSymbolsEntry32.nlistStartIndex; + localSymbolsEntry.nlistCount = localSymbolsEntry32.nlistCount; + } + else { - flags = s.flags; + // 64-bit DSC + subCacheFile->Read(&localSymbolsEntry, subCacheHeader.localSymbolsOffset + localSymbolsInfo.entriesOffset + (imageIndex * sizeof(localSymbolsEntry)), sizeof(localSymbolsEntry)); } + ProcessSymbols(subCacheFile, header, subCacheHeader.localSymbolsOffset + localSymbolsInfo.stringsOffset, localSymbolsInfo.stringsSize, subCacheHeader.localSymbolsOffset + localSymbolsInfo.nlistOffset, localSymbolsEntry.nlistCount, localSymbolsEntry.nlistStartIndex); } - } - - if (type != ExternalSymbol) - { - if ((flags & S_ATTR_PURE_INSTRUCTIONS) == S_ATTR_PURE_INSTRUCTIONS - || (flags & S_ATTR_SOME_INSTRUCTIONS) == S_ATTR_SOME_INSTRUCTIONS) - type = FunctionSymbol; else - type = DataSymbol; - } - if ((sym.n_desc & N_ARM_THUMB_DEF) == N_ARM_THUMB_DEF) - sym.n_value++; - - auto symbolObj = new Symbol(type, symbol, sym.n_value, GlobalBinding); - if (type == FunctionSymbol) - { - Ref targetPlatform = view->GetDefaultPlatform(); - view->AddFunctionForAnalysis(targetPlatform, sym.n_value); - } - if (typeLib) - { - auto _type = m_dscView->ImportTypeLibraryObject(typeLib, {symbolObj->GetFullName()}); - if (_type) { - view->DefineAutoSymbolAndVariableOrFunction(view->GetDefaultPlatform(), symbolObj, _type); + m_logger->LogDebug("No entry for image index %lld in symbols file %s with %u entries", imageIndex, subCacheFile->Path().c_str(), localSymbolsInfo.entriesCount); } - else - view->DefineAutoSymbol(symbolObj); } - else - view->DefineAutoSymbol(symbolObj); - symbolInfos.push_back({sym.n_value, {type, symbol}}); } - m_symbolInfos[header.textBase] = symbolInfos; + m_logger->LogDebug("Loaded local symbols"); + } + else + { + m_logger->LogError("Failed to identify the DSC image that contains the header at 0x%llx", header.textBase); + } + + view->BeginBulkModifySymbols(); + for (auto symbol : m_symbolInfos[header.textBase]) + { + ApplySymbol(view, typeLib, symbol); } if (header.exportTriePresent && header.linkeditPresent && vm->AddressIsMapped(header.linkeditSegment.vmaddr)) @@ -2639,41 +2768,7 @@ void SharedCache::InitializeHeader( for (const auto& symbol : symbols) { exportMapping.push_back({symbol->GetAddress(), {symbol->GetType(), symbol->GetRawName()}}); - if (typeLib) - { - auto type = m_dscView->ImportTypeLibraryObject(typeLib, {symbol->GetFullName()}); - - if (type) - { - view->DefineAutoSymbolAndVariableOrFunction(view->GetDefaultPlatform(), symbol, type); - } - else - view->DefineAutoSymbol(symbol); - - if (view->GetAnalysisFunction(view->GetDefaultPlatform(), symbol->GetAddress())) - { - auto func = view->GetAnalysisFunction(view->GetDefaultPlatform(), symbol->GetAddress()); - if (symbol->GetFullName() == "_objc_msgSend") - { - func->SetHasVariableArguments(false); - } - else if (symbol->GetFullName().find("_objc_retain_x") != std::string::npos || symbol->GetFullName().find("_objc_release_x") != std::string::npos) - { - auto x = symbol->GetFullName().rfind("x"); - auto num = symbol->GetFullName().substr(x + 1); - - std::vector callTypeParams; - auto cc = m_dscView->GetDefaultArchitecture()->GetCallingConventionByName("apple-arm64-objc-fast-arc-" + num); - - callTypeParams.push_back({"obj", m_dscView->GetTypeByName({ "id" }), true, BinaryNinja::Variable()}); - - auto funcType = BinaryNinja::Type::FunctionType(m_dscView->GetTypeByName({ "id" }), cc, callTypeParams); - func->SetUserType(funcType); - } - } - } - else - view->DefineAutoSymbol(symbol); + ApplySymbol(view, typeLib, symbol); } m_exportInfos[header.textBase] = exportMapping; } @@ -3168,7 +3263,7 @@ extern "C" for (size_t i = 0; i < viewCaches.size(); i++) { caches[i].path = BNAllocString(viewCaches[i].path.c_str()); - caches[i].isPrimary = viewCaches[i].isPrimary; + caches[i].cacheType = viewCaches[i].cacheType; BNDSCBackingCacheMapping* mappings; mappings = (BNDSCBackingCacheMapping*)malloc(sizeof(BNDSCBackingCacheMapping) * viewCaches[i].mappings.size()); diff --git a/view/sharedcache/core/SharedCache.h b/view/sharedcache/core/SharedCache.h index 81c026203..3e182edbc 100644 --- a/view/sharedcache/core/SharedCache.h +++ b/view/sharedcache/core/SharedCache.h @@ -59,12 +59,14 @@ namespace SharedCacheCore { struct CacheImage : public MetadataSerializable { + uint32_t index; // image index in the DSC std::string installName; uint64_t headerLocation; std::vector regions; void Store() override { + MSS(index); MSS(installName); MSS(headerLocation); rapidjson::Value key("regions", m_activeContext.allocator); @@ -78,6 +80,7 @@ namespace SharedCacheCore { void Load() override { + MSL(index); MSL(installName); MSL(headerLocation); auto bArr = m_activeDeserContext.doc["regions"].GetArray(); @@ -94,19 +97,19 @@ namespace SharedCacheCore { struct BackingCache : public MetadataSerializable { std::string path; - bool isPrimary = false; + BNBackingCacheType cacheType = BackingCacheTypeSecondary; std::vector>> mappings; void Store() override { MSS(path); - MSS(isPrimary); + MSS_CAST(cacheType, uint32_t); MSS(mappings); } void Load() override { MSL(path); - MSL(isPrimary); + MSL_CAST(cacheType, uint32_t, BNBackingCacheType); MSL(mappings); } }; @@ -976,6 +979,27 @@ namespace SharedCacheCore { } m_activeContext.doc.AddMember("exportInfos", exportInfos, m_activeContext.allocator); + rapidjson::Document symbolInfos(rapidjson::kArrayType); + for (const auto& [headerLocation, symbolVec] : m_symbolInfos) + { + rapidjson::Value subObj(rapidjson::kObjectType); + rapidjson::Value subArr(rapidjson::kArrayType); + for (const auto& symbol : symbolVec) + { + rapidjson::Value subSubArr(rapidjson::kArrayType); + subSubArr.PushBack(symbol->GetAddress(), m_activeContext.allocator); + subSubArr.PushBack(symbol->GetType(), m_activeContext.allocator); + subSubArr.PushBack(symbol->GetRawName(), m_activeContext.allocator); + subArr.PushBack(subSubArr, m_activeContext.allocator); + } + + subObj.AddMember("key", headerLocation, m_activeContext.allocator); + subObj.AddMember("value", subArr, m_activeContext.allocator); + + symbolInfos.PushBack(subObj, m_activeContext.allocator); + } + m_activeContext.doc.AddMember("symbolInfos", symbolInfos, m_activeContext.allocator); + rapidjson::Value backingCaches(rapidjson::kArrayType); for (auto bc : m_backingCaches) { @@ -1054,13 +1078,12 @@ namespace SharedCacheCore { m_symbolInfos.clear(); for (auto& symbolInfo : m_activeDeserContext.doc["symbolInfos"].GetArray()) { - std::vector>> symbolInfoVec; - for (auto& symbolInfoPair : symbolInfo.GetArray()) + std::vector> symbolVec; + for (auto& symbolInfoPair : symbolInfo["value"].GetArray()) { - symbolInfoVec.push_back({symbolInfoPair[0].GetUint64(), - {(BNSymbolType)symbolInfoPair[1].GetUint(), symbolInfoPair[2].GetString()}}); + symbolVec.push_back(new Symbol((BNSymbolType)symbolInfoPair[1].GetUint(), symbolInfoPair[2].GetString(), symbolInfoPair[0].GetUint64())); } - m_symbolInfos[symbolInfo[0].GetUint64()] = symbolInfoVec; + m_symbolInfos[symbolInfo["key"].GetUint64()] = symbolVec; } m_backingCaches.clear(); for (auto& bcV : m_activeDeserContext.doc["backingCaches"].GetArray()) @@ -1116,8 +1139,7 @@ namespace SharedCacheCore { DSCViewState m_viewState = DSCViewStateUnloaded; std::unordered_map>>> m_exportInfos; - std::unordered_map>>> - m_symbolInfos; + std::unordered_map>> m_symbolInfos; // --- // Serialized once by PerformInitialLoad and available after m_viewState == Loaded @@ -1196,6 +1218,9 @@ namespace SharedCacheCore { const std::string& currentText, size_t cursor, uint32_t endGuard); std::vector> ParseExportTrie( std::shared_ptr linkeditFile, SharedCacheMachOHeader header); + + void ProcessSymbols(std::shared_ptr file, const SharedCacheMachOHeader& header, uint64_t stringsOffset, size_t stringsSize, uint64_t nlistEntriesOffset, uint32_t nlistCount, uint32_t nlistStartIndex = 0); + void ApplySymbol(Ref view, Ref typeLib, Ref symbol); };