From f990bc8bc131438c1fc3fbf8841078c4865ae69b Mon Sep 17 00:00:00 2001 From: WeiN76LQh Date: Thu, 21 Nov 2024 17:36:01 +0000 Subject: [PATCH 1/3] [SharedCache] Serialize `SharedCache::m_symbolInfos` `SharedCache::m_symbolInfos` isn't being serialized but there is an attempt to deserialize it. This commit adds in the code to serialize it. I slightly modified the format because I didn't really understand how it was expected to be serialized based on the deserialization code. The deserialization code looked wrong to me but its likely a misunderstanding on my part. I kept it similar to how `m_exportInfos` is serialized. --- view/sharedcache/core/SharedCache.h | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/view/sharedcache/core/SharedCache.h b/view/sharedcache/core/SharedCache.h index 81c026203..8a6e1f394 100644 --- a/view/sharedcache/core/SharedCache.h +++ b/view/sharedcache/core/SharedCache.h @@ -976,6 +976,27 @@ namespace SharedCacheCore { } m_activeContext.doc.AddMember("exportInfos", exportInfos, m_activeContext.allocator); + rapidjson::Document symbolInfos(rapidjson::kArrayType); + for (const auto& pair1 : m_symbolInfos) + { + rapidjson::Value subObj(rapidjson::kObjectType); + rapidjson::Value subArr(rapidjson::kArrayType); + for (const auto& pair2 : pair1.second) + { + rapidjson::Value subSubArr(rapidjson::kArrayType); + subSubArr.PushBack(pair2.first, m_activeContext.allocator); + subSubArr.PushBack(pair2.second.first, m_activeContext.allocator); + subSubArr.PushBack(pair2.second.second, m_activeContext.allocator); + subArr.PushBack(subSubArr, m_activeContext.allocator); + } + + subObj.AddMember("key", pair1.first, m_activeContext.allocator); + subObj.AddMember("value", subArr, m_activeContext.allocator); + + symbolInfos.PushBack(subObj, m_activeContext.allocator); + } + m_activeContext.doc.AddMember("symbolInfos", symbolInfos, m_activeContext.allocator); + rapidjson::Value backingCaches(rapidjson::kArrayType); for (auto bc : m_backingCaches) { @@ -1055,12 +1076,12 @@ namespace SharedCacheCore { for (auto& symbolInfo : m_activeDeserContext.doc["symbolInfos"].GetArray()) { std::vector>> symbolInfoVec; - for (auto& symbolInfoPair : symbolInfo.GetArray()) + for (auto& symbolInfoPair : symbolInfo["value"].GetArray()) { symbolInfoVec.push_back({symbolInfoPair[0].GetUint64(), {(BNSymbolType)symbolInfoPair[1].GetUint(), symbolInfoPair[2].GetString()}}); } - m_symbolInfos[symbolInfo[0].GetUint64()] = symbolInfoVec; + m_symbolInfos[symbolInfo["key"].GetUint64()] = symbolInfoVec; } m_backingCaches.clear(); for (auto& bcV : m_activeDeserContext.doc["backingCaches"].GetArray()) From 722f804dc60aa064c82a99313eada9f1cea2d710 Mon Sep 17 00:00:00 2001 From: WeiN76LQh Date: Thu, 28 Nov 2024 18:52:19 +0000 Subject: [PATCH 2/3] [SharedCache] Process the .symbols cache file A significant number of symbols are not being defined because there is currently no support for parsing the .symbols cache file. This commit adds that support. `m_symbolInfos` has been modified to be a vector of references to `Symbol`s which is similar to [this PR](https://github.com/Vector35/binaryninja-api/pull/6197). It makes more sense for the use case where `m_symbolInfos` is used as a symbol cache, otherwise a bunch of time is spent transforming between the old style of `m_symbolInfos` entries to Binary Ninja `Symbol`s. This commit does require a metadata version bump. I felt this was necessary to determine which symbols to load from the symbols cache. The problem is that the `m_images` container does not store the images in the order they are found in the DSC. The index they are at determines the location of their symbols in the symbols cache file. Rather than converting `m_images` to a vector and relying on its ordering being correct, it seemed more prudent to store the index of the image in the `CacheImage` structure. As this is serialized, the metadata version has to be bumped to accomodate the change. --- view/macho/machoview.h | 131 ++++++- view/sharedcache/CMakeLists.txt | 2 +- .../api/python/_sharedcachecore.py | 3 +- view/sharedcache/api/python/sharedcache.py | 13 +- .../api/python/sharedcache_enums.py | 6 + view/sharedcache/api/sharedcache.cpp | 2 +- view/sharedcache/api/sharedcacheapi.h | 2 +- view/sharedcache/api/sharedcachecore.h | 8 +- view/sharedcache/core/SharedCache.cpp | 319 ++++++++++++------ view/sharedcache/core/SharedCache.h | 34 +- 10 files changed, 377 insertions(+), 143 deletions(-) diff --git a/view/macho/machoview.h b/view/macho/machoview.h index 6a5e80ae3..745085595 100644 --- a/view/macho/machoview.h +++ b/view/macho/machoview.h @@ -268,14 +268,129 @@ typedef int vm_prot_t; #define SEG_UNIXSTACK "__UNIXSTACK" #define SEG_IMPORT "__IMPORT" -//Symbol Types (N_TYPE) -#define N_UNDF 0x0 -#define N_ABS 0x2 -#define N_SECT 0xe -#define N_PBUD 0xc -#define N_INDR 0xa - -#define N_ARM_THUMB_DEF 0x0008 +/* + * Symbols with a index into the string table of zero (n_un.n_strx == 0) are + * defined to have a null, "", name. Therefore all string indexes to non null + * names must not have a zero string index. This is bit historical information + * that has never been well documented. + */ + +/* + * The n_type field really contains four fields: + * unsigned char N_STAB:3, + * N_PEXT:1, + * N_TYPE:3, + * N_EXT:1; + * which are used via the following masks. + */ +#define N_STAB 0xe0 /* if any of these bits set, a symbolic debugging entry */ +#define N_PEXT 0x10 /* private external symbol bit */ +#define N_TYPE 0x0e /* mask for the type bits */ +#define N_EXT 0x01 /* external symbol bit, set for external symbols */ + +/* + * Only symbolic debugging entries have some of the N_STAB bits set and if any + * of these bits are set then it is a symbolic debugging entry (a stab). In + * which case then the values of the n_type field (the entire field) are given + * in + */ + +/* + * Values for N_TYPE bits of the n_type field. + */ +#define N_UNDF 0x0 /* undefined, n_sect == NO_SECT */ +#define N_ABS 0x2 /* absolute, n_sect == NO_SECT */ +#define N_SECT 0xe /* defined in section number n_sect */ +#define N_PBUD 0xc /* prebound undefined (defined in a dylib) */ +#define N_INDR 0xa /* indirect */ + +/* + * If the type is N_INDR then the symbol is defined to be the same as another + * symbol. In this case the n_value field is an index into the string table + * of the other symbol's name. When the other symbol is defined then they both + * take on the defined type and value. + */ + +/* + * If the type is N_SECT then the n_sect field contains an ordinal of the + * section the symbol is defined in. The sections are numbered from 1 and + * refer to sections in order they appear in the load commands for the file + * they are in. This means the same ordinal may very well refer to different + * sections in different files. + * + * The n_value field for all symbol table entries (including N_STAB's) gets + * updated by the link editor based on the value of it's n_sect field and where + * the section n_sect references gets relocated. If the value of the n_sect + * field is NO_SECT then it's n_value field is not changed by the link editor. + */ +#define NO_SECT 0 /* symbol is not in any section */ +#define MAX_SECT 255 /* 1 thru 255 inclusive */ + +/* + * The bit 0x0020 of the n_desc field is used for two non-overlapping purposes + * and has two different symbolic names, N_NO_DEAD_STRIP and N_DESC_DISCARDED. + */ + +/* + * The N_NO_DEAD_STRIP bit of the n_desc field only ever appears in a + * relocatable .o file (MH_OBJECT filetype). And is used to indicate to the + * static link editor it is never to dead strip the symbol. + */ +#define N_NO_DEAD_STRIP 0x0020 /* symbol is not to be dead stripped */ + +/* + * The N_DESC_DISCARDED bit of the n_desc field never appears in linked image. + * But is used in very rare cases by the dynamic link editor to mark an in + * memory symbol as discared and longer used for linking. + */ +#define N_DESC_DISCARDED 0x0020 /* symbol is discarded */ + +/* + * The N_WEAK_REF bit of the n_desc field indicates to the dynamic linker that + * the undefined symbol is allowed to be missing and is to have the address of + * zero when missing. + */ +#define N_WEAK_REF 0x0040 /* symbol is weak referenced */ + +/* + * The N_WEAK_DEF bit of the n_desc field indicates to the static and dynamic + * linkers that the symbol definition is weak, allowing a non-weak symbol to + * also be used which causes the weak definition to be discared. Currently this + * is only supported for symbols in coalesed sections. + */ +#define N_WEAK_DEF 0x0080 /* coalesed symbol is a weak definition */ + +/* + * The N_REF_TO_WEAK bit of the n_desc field indicates to the dynamic linker + * that the undefined symbol should be resolved using flat namespace searching. + */ +#define N_REF_TO_WEAK 0x0080 /* reference to a weak symbol */ + +/* + * The N_ARM_THUMB_DEF bit of the n_desc field indicates that the symbol is + * a defintion of a Thumb function. + */ +#define N_ARM_THUMB_DEF 0x0008 /* symbol is a Thumb function (ARM) */ + +/* + * The N_SYMBOL_RESOLVER bit of the n_desc field indicates that the + * that the function is actually a resolver function and should + * be called to get the address of the real function to use. + * This bit is only available in .o files (MH_OBJECT filetype) + */ +#define N_SYMBOL_RESOLVER 0x0100 + +/* + * The N_ALT_ENTRY bit of the n_desc field indicates that the + * symbol is pinned to the previous content. + */ +#define N_ALT_ENTRY 0x0200 + +/* + * The N_COLD_FUNC bit of the n_desc field indicates that the symbol is used + * infrequently and the linker should order it towards the end of the section. + */ +#define N_COLD_FUNC 0x0400 /* * An indirect symbol table entry is simply a 32bit index into the symbol table diff --git a/view/sharedcache/CMakeLists.txt b/view/sharedcache/CMakeLists.txt index 63ba602bf..0f6abea22 100644 --- a/view/sharedcache/CMakeLists.txt +++ b/view/sharedcache/CMakeLists.txt @@ -30,7 +30,7 @@ endif() set(HARD_FAIL_MODE OFF CACHE BOOL "Enable hard fail mode") set(SLIDEINFO_DEBUG_TAGS OFF CACHE BOOL "Enable debug tags in slideinfo") set(VIEW_NAME "DSCView" CACHE STRING "Name of the view") -set(METADATA_VERSION 2 CACHE STRING "Version of the metadata") +set(METADATA_VERSION 3 CACHE STRING "Version of the metadata") add_subdirectory(core) add_subdirectory(api) diff --git a/view/sharedcache/api/python/_sharedcachecore.py b/view/sharedcache/api/python/_sharedcachecore.py index d208048bc..ec99f659c 100644 --- a/view/sharedcache/api/python/_sharedcachecore.py +++ b/view/sharedcache/api/python/_sharedcachecore.py @@ -42,6 +42,7 @@ def free_string(value:ctypes.c_char_p) -> None: BNFreeString(ctypes.cast(value, ctypes.POINTER(ctypes.c_byte))) # Type definitions +BackingCacheTypeEnum = ctypes.c_int from binaryninja._binaryninjacore import BNBinaryView, BNBinaryViewHandle class BNDSCBackingCache(ctypes.Structure): @property @@ -110,7 +111,7 @@ class BNSharedCache(ctypes.Structure): # Structure definitions BNDSCBackingCache._fields_ = [ ("_path", ctypes.c_char_p), - ("isPrimary", ctypes.c_bool), + ("cacheType", BackingCacheTypeEnum), ("mappings", ctypes.POINTER(BNDSCBackingCacheMapping)), ("mappingCount", ctypes.c_ulonglong), ] diff --git a/view/sharedcache/api/python/sharedcache.py b/view/sharedcache/api/python/sharedcache.py index b660d9d97..d77794a83 100644 --- a/view/sharedcache/api/python/sharedcache.py +++ b/view/sharedcache/api/python/sharedcache.py @@ -52,14 +52,21 @@ def __repr__(self): @dataclasses.dataclass class DSCBackingCache: path: str - isPrimary: bool + cacheType: BackingCacheType mappings: list[DSCBackingCacheMapping] def __str__(self): return repr(self) def __repr__(self): - return f"" + match self.cacheType: + case BackingCacheType.BackingCacheTypePrimary: + cacheTypeStr = 'Primary' + case BackingCacheType.BackingCacheTypeSecondary: + cacheTypeStr = 'Secondary' + case BackingCacheType.BackingCacheTypeSymbols: + cacheTypeStr = 'Symbols' + return f"" @dataclasses.dataclass @@ -136,7 +143,7 @@ def caches(self): mappings.append(mapping) result.append(DSCBackingCache( value[i].path, - value[i].isPrimary, + value[i].cacheType, mappings )) diff --git a/view/sharedcache/api/python/sharedcache_enums.py b/view/sharedcache/api/python/sharedcache_enums.py index 346684245..ea86b5c63 100644 --- a/view/sharedcache/api/python/sharedcache_enums.py +++ b/view/sharedcache/api/python/sharedcache_enums.py @@ -1,6 +1,12 @@ import enum +class BackingCacheType(enum.IntEnum): + BackingCacheTypePrimary = 0 + BackingCacheTypeSecondary = 1 + BackingCacheTypeSymbols = 2 + + class DSCViewLoadProgress(enum.IntEnum): LoadProgressNotStarted = 0 LoadProgressLoadingCaches = 1 diff --git a/view/sharedcache/api/sharedcache.cpp b/view/sharedcache/api/sharedcache.cpp index 5ca7c480e..0be2599dd 100644 --- a/view/sharedcache/api/sharedcache.cpp +++ b/view/sharedcache/api/sharedcache.cpp @@ -91,7 +91,7 @@ namespace SharedCacheAPI { { BackingCache cache; cache.path = value[i].path; - cache.isPrimary = value[i].isPrimary; + cache.cacheType = value[i].cacheType; for (size_t j = 0; j < value[i].mappingCount; j++) { BackingCacheMapping mapping; diff --git a/view/sharedcache/api/sharedcacheapi.h b/view/sharedcache/api/sharedcacheapi.h index 19f021e7c..e1c5bec3e 100644 --- a/view/sharedcache/api/sharedcacheapi.h +++ b/view/sharedcache/api/sharedcacheapi.h @@ -105,7 +105,7 @@ namespace SharedCacheAPI { struct BackingCache { std::string path; - bool isPrimary; + BNBackingCacheType cacheType; std::vector mappings; }; diff --git a/view/sharedcache/api/sharedcachecore.h b/view/sharedcache/api/sharedcachecore.h index 9fc332756..ed462ac74 100644 --- a/view/sharedcache/api/sharedcachecore.h +++ b/view/sharedcache/api/sharedcachecore.h @@ -64,6 +64,12 @@ extern "C" LoadProgressFinished, } BNDSCViewLoadProgress; + typedef enum BNBackingCacheType { + BackingCacheTypePrimary, + BackingCacheTypeSecondary, + BackingCacheTypeSymbols, + } BNBackingCacheType; + typedef struct BNBinaryView BNBinaryView; typedef struct BNSharedCache BNSharedCache; @@ -97,7 +103,7 @@ extern "C" typedef struct BNDSCBackingCache { char* path; - bool isPrimary; + BNBackingCacheType cacheType; BNDSCBackingCacheMapping* mappings; size_t mappingCount; } BNDSCBackingCache; diff --git a/view/sharedcache/core/SharedCache.cpp b/view/sharedcache/core/SharedCache.cpp index a7c7ffb01..8944b639b 100644 --- a/view/sharedcache/core/SharedCache.cpp +++ b/view/sharedcache/core/SharedCache.cpp @@ -72,7 +72,7 @@ struct ViewStateCacheStore { std::string m_baseFilePath; std::unordered_map>>> m_exportInfos; - std::unordered_map>>> m_symbolInfos; + std::unordered_map>> m_symbolInfos; }; static std::recursive_mutex viewStateMutex; @@ -254,13 +254,17 @@ void SharedCache::PerformInitialLoad() m_cacheFormat = iOS16CacheFormat; } + // Don't store directly into `m_imageStarts` so that the order is preserved. That way + // `imageIndex` can be assigned to a `CacheImage` in `m_images`. + std::vector> imageStarts; + switch (m_cacheFormat) { case RegularCacheFormat: { dyld_cache_mapping_info mapping {}; BackingCache cache; - cache.isPrimary = true; + cache.cacheType = BackingCacheTypePrimary; cache.path = path; for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) @@ -280,7 +284,7 @@ void SharedCache::PerformInitialLoad() { baseFile->Read(&img, primaryCacheHeader.imagesOffsetOld + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - m_imageStarts[iname] = img.address; + imageStarts.push_back({iname, img.address}); } m_logger->LogInfo("Found %d images in the shared cache", primaryCacheHeader.imagesCountOld); @@ -328,7 +332,7 @@ void SharedCache::PerformInitialLoad() // briefly. BackingCache cache; - cache.isPrimary = true; + cache.cacheType = BackingCacheTypePrimary; cache.path = path; for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) @@ -348,7 +352,7 @@ void SharedCache::PerformInitialLoad() { baseFile->Read(&img, primaryCacheHeader.imagesOffset + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - m_imageStarts[iname] = img.address; + imageStarts.push_back({iname, img.address}); } if (primaryCacheHeader.branchPoolsCount) @@ -356,7 +360,7 @@ void SharedCache::PerformInitialLoad() std::vector pool {}; for (size_t i = 0; i < primaryCacheHeader.branchPoolsCount; i++) { - m_imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); + imageStarts.push_back({"dyld_shared_cache_branch_islands_" + std::to_string(i), baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize()))}); } } std::string mainFileName = base_name(path); @@ -402,7 +406,7 @@ void SharedCache::PerformInitialLoad() dyld_cache_mapping_info subCacheMapping {}; BackingCache subCache; - subCache.isPrimary = false; + subCache.cacheType = BackingCacheTypeSecondary; subCache.path = subCachePath; for (size_t j = 0; j < subCacheHeader.mappingCount; j++) @@ -439,7 +443,7 @@ void SharedCache::PerformInitialLoad() dyld_cache_mapping_info mapping {}; // We're going to reuse this for all of the mappings. We only need it // briefly. BackingCache cache; - cache.isPrimary = true; + cache.cacheType = BackingCacheTypePrimary; cache.path = path; for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) @@ -459,7 +463,7 @@ void SharedCache::PerformInitialLoad() { baseFile->Read(&img, primaryCacheHeader.imagesOffset + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - m_imageStarts[iname] = img.address; + imageStarts.push_back({iname, img.address}); } if (primaryCacheHeader.branchPoolsCount) @@ -467,7 +471,7 @@ void SharedCache::PerformInitialLoad() std::vector pool {}; for (size_t i = 0; i < primaryCacheHeader.branchPoolsCount; i++) { - m_imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); + imageStarts.push_back({"dyld_shared_cache_branch_islands_" + std::to_string(i), baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize()))}); } } @@ -495,7 +499,7 @@ void SharedCache::PerformInitialLoad() subCacheFile->Read(&subCacheHeader, 0, headerSize); BackingCache subCache; - subCache.isPrimary = false; + subCache.cacheType = BackingCacheTypeSecondary; subCache.path = subCachePath; dyld_cache_mapping_info subCacheMapping {}; @@ -565,7 +569,7 @@ void SharedCache::PerformInitialLoad() dyld_cache_mapping_info mapping {}; BackingCache cache; - cache.isPrimary = true; + cache.cacheType = BackingCacheTypePrimary; cache.path = path; for (size_t i = 0; i < primaryCacheHeader.mappingCount; i++) @@ -586,7 +590,7 @@ void SharedCache::PerformInitialLoad() { baseFile->Read(&img, primaryCacheHeader.imagesOffset + (i * sizeof(img)), sizeof(img)); auto iname = baseFile->ReadNullTermString(img.pathFileOffset); - m_imageStarts[iname] = img.address; + imageStarts.push_back({iname, img.address}); } if (primaryCacheHeader.branchPoolsCount) @@ -594,7 +598,7 @@ void SharedCache::PerformInitialLoad() std::vector pool {}; for (size_t i = 0; i < primaryCacheHeader.branchPoolsCount; i++) { - m_imageStarts["dyld_shared_cache_branch_islands_" + std::to_string(i)] = baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize())); + imageStarts.push_back({"dyld_shared_cache_branch_islands_" + std::to_string(i), baseFile->ReadULong(primaryCacheHeader.branchPoolsOffset + (i * m_dscView->GetAddressSize()))}); } } @@ -645,7 +649,7 @@ void SharedCache::PerformInitialLoad() dyld_cache_mapping_info subCacheMapping {}; BackingCache subCache; - subCache.isPrimary = false; + subCache.cacheType = BackingCacheTypeSecondary; subCache.path = subCachePath; for (size_t j = 0; j < subCacheHeader.mappingCount; j++) @@ -705,7 +709,7 @@ void SharedCache::PerformInitialLoad() subCacheFile->Read(&subCacheHeader, 0, headerSize); BackingCache subCache; - subCache.isPrimary = false; + subCache.cacheType = BackingCacheTypeSymbols; subCache.path = subCachePath; dyld_cache_mapping_info subCacheMapping {}; @@ -724,7 +728,9 @@ void SharedCache::PerformInitialLoad() m_backingCaches.push_back(subCache); } catch (...) - {} + { + m_logger->LogWarn("Failed to load the symbols cache"); + } break; } } @@ -741,8 +747,10 @@ void SharedCache::PerformInitialLoad() m_logger->LogError("Failed to map VM pages for Shared Cache on initial load, this is fatal."); return; } - for (const auto &start : m_imageStarts) + for (uint32_t imageIndex = 0; imageIndex < imageStarts.size(); imageIndex++) { + const auto& start = imageStarts[imageIndex]; + m_imageStarts[start.first] = start.second; try { auto imageHeader = SharedCache::LoadHeaderForAddress(vm, start.second, start.first); if (imageHeader) @@ -754,6 +762,7 @@ void SharedCache::PerformInitialLoad() } m_headers[start.second] = imageHeader.value(); CacheImage image; + image.index = imageIndex; image.installName = start.first; image.headerLocation = start.second; for (const auto& segment : imageHeader->segments) @@ -2257,6 +2266,128 @@ std::optional SharedCache::LoadHeaderForAddress(std::sha return header; } +void SharedCache::ProcessSymbols(std::shared_ptr file, const SharedCacheMachOHeader& header, uint64_t stringsOffset, size_t stringsSize, uint64_t nlistEntriesOffset, uint32_t nlistCount, uint32_t nlistStartIndex) +{ + auto addressSize = m_dscView->GetAddressSize(); + auto strings = file->ReadBuffer(stringsOffset, stringsSize); + + for (uint64_t i = 0; i < nlistCount; i++) + { + uint64_t entryIndex = (nlistStartIndex + i); + + nlist_64 nlist; + if (addressSize == 4) + { + // 32-bit DSC + struct nlist nlist32; + file->Read(&nlist, nlistEntriesOffset + (entryIndex * sizeof(nlist32)), sizeof(nlist32)); + nlist.n_strx = nlist32.n_strx; + nlist.n_type = nlist32.n_type; + nlist.n_sect = nlist32.n_sect; + nlist.n_desc = nlist32.n_desc; + nlist.n_value = nlist32.n_value; + } + else + { + // 64-bit DSC + file->Read(&nlist, nlistEntriesOffset + (entryIndex * sizeof(nlist)), sizeof(nlist)); + } + + auto symbolAddress = nlist.n_value; + if (((nlist.n_type & N_TYPE) == N_INDR) || symbolAddress == 0) + continue; + + if (nlist.n_strx >= stringsSize) + { + m_logger->LogError("Symbol entry at index %llu has a string offset of %u which is outside the strings buffer of size %llu for file %s", entryIndex, nlist.n_strx, stringsSize, file->Path().c_str()); + continue; + } + + std::string symbolName((char*)strings.GetDataAt(nlist.n_strx)); + if (symbolName == "") + continue; + + BNSymbolType symbolType = DataSymbol; + uint32_t flags; + if ((nlist.n_type & N_TYPE) == N_SECT && nlist.n_sect > 0 && (size_t)(nlist.n_sect - 1) < header.sections.size()) + {} + else if ((nlist.n_type & N_TYPE) == N_ABS) + {} + else if ((nlist.n_type & N_EXT)) + { + symbolType = ExternalSymbol; + } + else + continue; + + for (auto s : header.sections) + { + if (s.addr <= symbolAddress && symbolAddress < s.addr + s.size) + { + flags = s.flags; + } + } + + if (symbolType != ExternalSymbol) + { + if ((flags & S_ATTR_PURE_INSTRUCTIONS) == S_ATTR_PURE_INSTRUCTIONS + || (flags & S_ATTR_SOME_INSTRUCTIONS) == S_ATTR_SOME_INSTRUCTIONS) + symbolType = FunctionSymbol; + else + symbolType = DataSymbol; + } + if ((nlist.n_desc & N_ARM_THUMB_DEF) == N_ARM_THUMB_DEF) + symbolAddress++; + + m_symbolInfos[header.textBase].push_back(new Symbol(symbolType, symbolName, symbolAddress, GlobalBinding)); + } +} + +void SharedCache::ApplySymbol(Ref view, Ref typeLib, Ref symbol) +{ + Ref func = nullptr; + auto symbolAddress = symbol->GetAddress(); + + if (symbol->GetType() == FunctionSymbol) + { + Ref targetPlatform = view->GetDefaultPlatform(); + func = view->AddFunctionForAnalysis(targetPlatform, symbolAddress); + } + if (typeLib) + { + auto type = m_dscView->ImportTypeLibraryObject(typeLib, {symbol->GetFullName()}); + if (type) + view->DefineAutoSymbolAndVariableOrFunction(view->GetDefaultPlatform(), symbol, type); + else + view->DefineAutoSymbol(symbol); + + if (!func) + func = view->GetAnalysisFunction(view->GetDefaultPlatform(), symbolAddress); + if (func) + { + if (symbol->GetFullName() == "_objc_msgSend") + { + func->SetHasVariableArguments(false); + } + else if (symbol->GetFullName().find("_objc_retain_x") != std::string::npos || symbol->GetFullName().find("_objc_release_x") != std::string::npos) + { + auto x = symbol->GetFullName().rfind("x"); + auto num = symbol->GetFullName().substr(x + 1); + + std::vector callTypeParams; + auto cc = m_dscView->GetDefaultArchitecture()->GetCallingConventionByName("apple-arm64-objc-fast-arc-" + num); + + callTypeParams.push_back({"obj", m_dscView->GetTypeByName({ "id" }), true, BinaryNinja::Variable()}); + + auto funcType = BinaryNinja::Type::FunctionType(m_dscView->GetTypeByName({ "id" }), cc, callTypeParams); + func->SetUserType(funcType); + } + } + } + else + view->DefineAutoSymbol(symbol); +} + void SharedCache::InitializeHeader( Ref view, VM* vm, SharedCacheMachOHeader header, std::vector regionsToLoad) { @@ -2550,86 +2681,84 @@ void SharedCache::InitializeHeader( } } - view->BeginBulkModifySymbols(); if (header.symtab.symoff != 0 && header.linkeditPresent && vm->AddressIsMapped(header.linkeditSegment.vmaddr)) { // Mach-O View symtab processing with // a ton of stuff cut out so it can work auto reader = vm->MappingAtAddress(header.linkeditSegment.vmaddr).first.fileAccessor->lock(); - // auto symtab = reader->ReadBuffer(header.symtab.symoff, header.symtab.nsyms * sizeof(nlist_64)); - auto strtab = reader->ReadBuffer(header.symtab.stroff, header.symtab.strsize); - nlist_64 sym; - memset(&sym, 0, sizeof(sym)); - auto N_TYPE = 0xE; // idk - std::vector>> symbolInfos; - for (size_t i = 0; i < header.symtab.nsyms; i++) - { - reader->Read(&sym, header.symtab.symoff + i * sizeof(nlist_64), sizeof(nlist_64)); - if (sym.n_strx >= header.symtab.strsize || ((sym.n_type & N_TYPE) == N_INDR)) - continue; + ProcessSymbols(reader, header, header.symtab.stroff, header.symtab.strsize, header.symtab.symoff, header.symtab.nsyms); + } - std::string symbol((char*)strtab.GetDataAt(sym.n_strx)); - // BNLogError("%s: 0x%llx", symbol.c_str(), sym.n_value); - if (symbol == "") + int64_t imageIndex = -1; + for (auto& cacheImage : m_images) + { + if (cacheImage.headerLocation == header.textBase) + { + imageIndex = cacheImage.index; + break; + } + } + if (imageIndex > -1) + { + auto addressSize = m_dscView->GetAddressSize(); + for (auto backingCache : m_backingCaches) + { + if (backingCache.cacheType != BackingCacheTypeSymbols) continue; - BNSymbolType type = DataSymbol; - uint32_t flags; - if ((sym.n_type & N_TYPE) == N_SECT && sym.n_sect > 0 && (size_t)(sym.n_sect - 1) < header.sections.size()) - {} - else if ((sym.n_type & N_TYPE) == N_ABS) - {} - else if ((sym.n_type & 0x1)) + auto subCacheFile = MMappedFileAccessor::Open(m_dscView, m_dscView->GetFile()->GetSessionId(), backingCache.path)->lock(); + + dyld_cache_header subCacheHeader {}; + uint64_t headerSize = subCacheFile->ReadUInt32(__offsetof(dyld_cache_header, mappingOffset)); + if (headerSize > sizeof(dyld_cache_header)) { - type = ExternalSymbol; + m_logger->LogDebug("Header size is larger than expected, using default size"); + headerSize = sizeof(dyld_cache_header); } - else - continue; + subCacheFile->Read(&subCacheHeader, 0, headerSize); - for (auto s : header.sections) + if (subCacheHeader.localSymbolsOffset != 0) { - if (s.addr < sym.n_value) + dyld_cache_local_symbols_info localSymbolsInfo; + subCacheFile->Read(&localSymbolsInfo, subCacheHeader.localSymbolsOffset, sizeof(localSymbolsInfo)); + + if (imageIndex < localSymbolsInfo.entriesCount) { - if (s.addr + s.size > sym.n_value) + dyld_cache_local_symbols_entry_64 localSymbolsEntry; + if (addressSize == 4) + { + // 32-bit DSC + dyld_cache_local_symbols_entry localSymbolsEntry32; + subCacheFile->Read(&localSymbolsEntry32, subCacheHeader.localSymbolsOffset + localSymbolsInfo.entriesOffset + (imageIndex * sizeof(localSymbolsEntry32)), sizeof(localSymbolsEntry32)); + localSymbolsEntry.dylibOffset = localSymbolsEntry32.dylibOffset; + localSymbolsEntry.nlistStartIndex = localSymbolsEntry32.nlistStartIndex; + localSymbolsEntry.nlistCount = localSymbolsEntry32.nlistCount; + } + else { - flags = s.flags; + // 64-bit DSC + subCacheFile->Read(&localSymbolsEntry, subCacheHeader.localSymbolsOffset + localSymbolsInfo.entriesOffset + (imageIndex * sizeof(localSymbolsEntry)), sizeof(localSymbolsEntry)); } + ProcessSymbols(subCacheFile, header, subCacheHeader.localSymbolsOffset + localSymbolsInfo.stringsOffset, localSymbolsInfo.stringsSize, subCacheHeader.localSymbolsOffset + localSymbolsInfo.nlistOffset, localSymbolsEntry.nlistCount, localSymbolsEntry.nlistStartIndex); } - } - - if (type != ExternalSymbol) - { - if ((flags & S_ATTR_PURE_INSTRUCTIONS) == S_ATTR_PURE_INSTRUCTIONS - || (flags & S_ATTR_SOME_INSTRUCTIONS) == S_ATTR_SOME_INSTRUCTIONS) - type = FunctionSymbol; else - type = DataSymbol; - } - if ((sym.n_desc & N_ARM_THUMB_DEF) == N_ARM_THUMB_DEF) - sym.n_value++; - - auto symbolObj = new Symbol(type, symbol, sym.n_value, GlobalBinding); - if (type == FunctionSymbol) - { - Ref targetPlatform = view->GetDefaultPlatform(); - view->AddFunctionForAnalysis(targetPlatform, sym.n_value); - } - if (typeLib) - { - auto _type = m_dscView->ImportTypeLibraryObject(typeLib, {symbolObj->GetFullName()}); - if (_type) { - view->DefineAutoSymbolAndVariableOrFunction(view->GetDefaultPlatform(), symbolObj, _type); + m_logger->LogDebug("No entry for image index %lld in symbols file %s with %u entries", imageIndex, subCacheFile->Path().c_str(), localSymbolsInfo.entriesCount); } - else - view->DefineAutoSymbol(symbolObj); } - else - view->DefineAutoSymbol(symbolObj); - symbolInfos.push_back({sym.n_value, {type, symbol}}); } - m_symbolInfos[header.textBase] = symbolInfos; + m_logger->LogDebug("Loaded local symbols"); + } + else + { + m_logger->LogError("Failed to identify the DSC image that contains the header at 0x%llx", header.textBase); + } + + view->BeginBulkModifySymbols(); + for (auto symbol : m_symbolInfos[header.textBase]) + { + ApplySymbol(view, typeLib, symbol); } if (header.exportTriePresent && header.linkeditPresent && vm->AddressIsMapped(header.linkeditSegment.vmaddr)) @@ -2639,41 +2768,7 @@ void SharedCache::InitializeHeader( for (const auto& symbol : symbols) { exportMapping.push_back({symbol->GetAddress(), {symbol->GetType(), symbol->GetRawName()}}); - if (typeLib) - { - auto type = m_dscView->ImportTypeLibraryObject(typeLib, {symbol->GetFullName()}); - - if (type) - { - view->DefineAutoSymbolAndVariableOrFunction(view->GetDefaultPlatform(), symbol, type); - } - else - view->DefineAutoSymbol(symbol); - - if (view->GetAnalysisFunction(view->GetDefaultPlatform(), symbol->GetAddress())) - { - auto func = view->GetAnalysisFunction(view->GetDefaultPlatform(), symbol->GetAddress()); - if (symbol->GetFullName() == "_objc_msgSend") - { - func->SetHasVariableArguments(false); - } - else if (symbol->GetFullName().find("_objc_retain_x") != std::string::npos || symbol->GetFullName().find("_objc_release_x") != std::string::npos) - { - auto x = symbol->GetFullName().rfind("x"); - auto num = symbol->GetFullName().substr(x + 1); - - std::vector callTypeParams; - auto cc = m_dscView->GetDefaultArchitecture()->GetCallingConventionByName("apple-arm64-objc-fast-arc-" + num); - - callTypeParams.push_back({"obj", m_dscView->GetTypeByName({ "id" }), true, BinaryNinja::Variable()}); - - auto funcType = BinaryNinja::Type::FunctionType(m_dscView->GetTypeByName({ "id" }), cc, callTypeParams); - func->SetUserType(funcType); - } - } - } - else - view->DefineAutoSymbol(symbol); + ApplySymbol(view, typeLib, symbol); } m_exportInfos[header.textBase] = exportMapping; } @@ -3168,7 +3263,7 @@ extern "C" for (size_t i = 0; i < viewCaches.size(); i++) { caches[i].path = BNAllocString(viewCaches[i].path.c_str()); - caches[i].isPrimary = viewCaches[i].isPrimary; + caches[i].cacheType = viewCaches[i].cacheType; BNDSCBackingCacheMapping* mappings; mappings = (BNDSCBackingCacheMapping*)malloc(sizeof(BNDSCBackingCacheMapping) * viewCaches[i].mappings.size()); diff --git a/view/sharedcache/core/SharedCache.h b/view/sharedcache/core/SharedCache.h index 8a6e1f394..3e182edbc 100644 --- a/view/sharedcache/core/SharedCache.h +++ b/view/sharedcache/core/SharedCache.h @@ -59,12 +59,14 @@ namespace SharedCacheCore { struct CacheImage : public MetadataSerializable { + uint32_t index; // image index in the DSC std::string installName; uint64_t headerLocation; std::vector regions; void Store() override { + MSS(index); MSS(installName); MSS(headerLocation); rapidjson::Value key("regions", m_activeContext.allocator); @@ -78,6 +80,7 @@ namespace SharedCacheCore { void Load() override { + MSL(index); MSL(installName); MSL(headerLocation); auto bArr = m_activeDeserContext.doc["regions"].GetArray(); @@ -94,19 +97,19 @@ namespace SharedCacheCore { struct BackingCache : public MetadataSerializable { std::string path; - bool isPrimary = false; + BNBackingCacheType cacheType = BackingCacheTypeSecondary; std::vector>> mappings; void Store() override { MSS(path); - MSS(isPrimary); + MSS_CAST(cacheType, uint32_t); MSS(mappings); } void Load() override { MSL(path); - MSL(isPrimary); + MSL_CAST(cacheType, uint32_t, BNBackingCacheType); MSL(mappings); } }; @@ -977,20 +980,20 @@ namespace SharedCacheCore { m_activeContext.doc.AddMember("exportInfos", exportInfos, m_activeContext.allocator); rapidjson::Document symbolInfos(rapidjson::kArrayType); - for (const auto& pair1 : m_symbolInfos) + for (const auto& [headerLocation, symbolVec] : m_symbolInfos) { rapidjson::Value subObj(rapidjson::kObjectType); rapidjson::Value subArr(rapidjson::kArrayType); - for (const auto& pair2 : pair1.second) + for (const auto& symbol : symbolVec) { rapidjson::Value subSubArr(rapidjson::kArrayType); - subSubArr.PushBack(pair2.first, m_activeContext.allocator); - subSubArr.PushBack(pair2.second.first, m_activeContext.allocator); - subSubArr.PushBack(pair2.second.second, m_activeContext.allocator); + subSubArr.PushBack(symbol->GetAddress(), m_activeContext.allocator); + subSubArr.PushBack(symbol->GetType(), m_activeContext.allocator); + subSubArr.PushBack(symbol->GetRawName(), m_activeContext.allocator); subArr.PushBack(subSubArr, m_activeContext.allocator); } - subObj.AddMember("key", pair1.first, m_activeContext.allocator); + subObj.AddMember("key", headerLocation, m_activeContext.allocator); subObj.AddMember("value", subArr, m_activeContext.allocator); symbolInfos.PushBack(subObj, m_activeContext.allocator); @@ -1075,13 +1078,12 @@ namespace SharedCacheCore { m_symbolInfos.clear(); for (auto& symbolInfo : m_activeDeserContext.doc["symbolInfos"].GetArray()) { - std::vector>> symbolInfoVec; + std::vector> symbolVec; for (auto& symbolInfoPair : symbolInfo["value"].GetArray()) { - symbolInfoVec.push_back({symbolInfoPair[0].GetUint64(), - {(BNSymbolType)symbolInfoPair[1].GetUint(), symbolInfoPair[2].GetString()}}); + symbolVec.push_back(new Symbol((BNSymbolType)symbolInfoPair[1].GetUint(), symbolInfoPair[2].GetString(), symbolInfoPair[0].GetUint64())); } - m_symbolInfos[symbolInfo["key"].GetUint64()] = symbolInfoVec; + m_symbolInfos[symbolInfo["key"].GetUint64()] = symbolVec; } m_backingCaches.clear(); for (auto& bcV : m_activeDeserContext.doc["backingCaches"].GetArray()) @@ -1137,8 +1139,7 @@ namespace SharedCacheCore { DSCViewState m_viewState = DSCViewStateUnloaded; std::unordered_map>>> m_exportInfos; - std::unordered_map>>> - m_symbolInfos; + std::unordered_map>> m_symbolInfos; // --- // Serialized once by PerformInitialLoad and available after m_viewState == Loaded @@ -1217,6 +1218,9 @@ namespace SharedCacheCore { const std::string& currentText, size_t cursor, uint32_t endGuard); std::vector> ParseExportTrie( std::shared_ptr linkeditFile, SharedCacheMachOHeader header); + + void ProcessSymbols(std::shared_ptr file, const SharedCacheMachOHeader& header, uint64_t stringsOffset, size_t stringsSize, uint64_t nlistEntriesOffset, uint32_t nlistCount, uint32_t nlistStartIndex = 0); + void ApplySymbol(Ref view, Ref typeLib, Ref symbol); }; From e1264fb5c194ffe4cb876f6442d1d9c089924fa1 Mon Sep 17 00:00:00 2001 From: WeiN76LQh Date: Mon, 2 Dec 2024 22:24:30 +0000 Subject: [PATCH 3/3] Fix macho view plugin compilation The previous commit broke compiling the macho view plugin due to duplicate definitions --- view/macho/machoview.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/view/macho/machoview.cpp b/view/macho/machoview.cpp index d26769e1b..23031ed07 100644 --- a/view/macho/machoview.cpp +++ b/view/macho/machoview.cpp @@ -14,13 +14,6 @@ #include "lowlevelilinstruction.h" #include "rapidjsonwrapper.h" -enum { - N_STAB = 0xe0, - N_PEXT = 0x10, - N_TYPE = 0x0e, - N_EXT = 0x01 -}; - using namespace BinaryNinja; using namespace std;