From 66b8f880744cff339c925aeb52a3b8328cf9f300 Mon Sep 17 00:00:00 2001 From: Hugh Sanderson Date: Sun, 6 Oct 2024 13:37:11 +0800 Subject: [PATCH 1/3] Add 'set' with no name to dump current values. Check for undefined values when linking android --- toolchain/android-toolchain-clang.xml | 3 +++ tools/hxcpp/BuildTool.hx | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/toolchain/android-toolchain-clang.xml b/toolchain/android-toolchain-clang.xml index a6583b3b8..6ebbd5001 100644 --- a/toolchain/android-toolchain-clang.xml +++ b/toolchain/android-toolchain-clang.xml @@ -92,6 +92,9 @@ + + + | + * |<-----------compressed data--------->| + * |<-----------decompressed size------------------>| + * |<----margin---->| + * + * This technique is more useful for decompression, + * since decompressed size is typically larger, + * and margin is short. + * + * In-place decompression will work inside any buffer + * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize). + * This presumes that decompressedSize > compressedSize. + * Otherwise, it means compression actually expanded data, + * and it would be more efficient to store such data with a flag indicating it's not compressed. + * This can happen when data is not compressible (already compressed, or encrypted). + * + * For in-place compression, margin is larger, as it must be able to cope with both + * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX, + * and data expansion, which can happen when input is not compressible. + * As a consequence, buffer size requirements are much higher, + * and memory savings offered by in-place compression are more limited. + * + * There are ways to limit this cost for compression : + * - Reduce history size, by modifying LZ4_DISTANCE_MAX. + * Note that it is a compile-time constant, so all compressions will apply this limit. + * Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX, + * so it's a reasonable trick when inputs are known to be small. + * - Require the compressor to deliver a "maximum compressed size". + * This is the `dstCapacity` parameter in `LZ4_compress*()`. + * When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail, + * in which case, the return code will be 0 (zero). + * The caller must be ready for these cases to happen, + * and typically design a backup scheme to send data uncompressed. + * The combination of both techniques can significantly reduce + * the amount of margin required for in-place compression. + * + * In-place compression can work in any buffer + * which size is >= (maxCompressedSize) + * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success. + * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX, + * so it's possible to reduce memory requirements by playing with them. + */ + +#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32) +#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */ + +#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ +# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ +#endif + +#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */ +#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */ + +} + +#endif /* LZ4_STATIC_3504398509 */ +#endif /* LZ4_STATIC_LINKING_ONLY */ + + + +#ifndef TRACY_LZ4_H_98237428734687 +#define TRACY_LZ4_H_98237428734687 + +namespace tracy +{ + +/*-************************************************************ + * Private Definitions + ************************************************************** + * Do not use these definitions directly. + * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`. + * Accessing members will expose user code to API and/or ABI break in future versions of the library. + **************************************************************/ +#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) +#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) +#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */ + +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) + typedef int8_t LZ4_i8; + typedef uint8_t LZ4_byte; + typedef uint16_t LZ4_u16; + typedef uint32_t LZ4_u32; +#else + typedef signed char LZ4_i8; + typedef unsigned char LZ4_byte; + typedef unsigned short LZ4_u16; + typedef unsigned int LZ4_u32; +#endif + +/*! LZ4_stream_t : + * Never ever use below internal definitions directly ! + * These definitions are not API/ABI safe, and may change in future versions. + * If you need static allocation, declare or allocate an LZ4_stream_t object. +**/ + +typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; +struct LZ4_stream_t_internal { + LZ4_u32 hashTable[LZ4_HASH_SIZE_U32]; + const LZ4_byte* dictionary; + const LZ4_stream_t_internal* dictCtx; + LZ4_u32 currentOffset; + LZ4_u32 tableType; + LZ4_u32 dictSize; + /* Implicit padding to ensure structure is aligned */ +}; + +#define LZ4_STREAM_MINSIZE ((1UL << LZ4_MEMORY_USAGE) + 32) /* static size, for inter-version compatibility */ +union LZ4_stream_u { + char minStateSize[LZ4_STREAM_MINSIZE]; + LZ4_stream_t_internal internal_donotuse; +}; /* previously typedef'd to LZ4_stream_t */ + + +/*! LZ4_initStream() : v1.9.0+ + * An LZ4_stream_t structure must be initialized at least once. + * This is automatically done when invoking LZ4_createStream(), + * but it's not when the structure is simply declared on stack (for example). + * + * Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t. + * It can also initialize any arbitrary buffer of sufficient size, + * and will @return a pointer of proper type upon initialization. + * + * Note : initialization fails if size and alignment conditions are not respected. + * In which case, the function will @return NULL. + * Note2: An LZ4_stream_t structure guarantees correct alignment and size. + * Note3: Before v1.9.0, use LZ4_resetStream() instead +**/ +LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size); + + +/*! LZ4_streamDecode_t : + * Never ever use below internal definitions directly ! + * These definitions are not API/ABI safe, and may change in future versions. + * If you need static allocation, declare or allocate an LZ4_streamDecode_t object. +**/ +typedef struct { + const LZ4_byte* externalDict; + const LZ4_byte* prefixEnd; + size_t extDictSize; + size_t prefixSize; +} LZ4_streamDecode_t_internal; + +#define LZ4_STREAMDECODE_MINSIZE 32 +union LZ4_streamDecode_u { + char minStateSize[LZ4_STREAMDECODE_MINSIZE]; + LZ4_streamDecode_t_internal internal_donotuse; +} ; /* previously typedef'd to LZ4_streamDecode_t */ + + + +/*-************************************ +* Obsolete Functions +**************************************/ + +/*! Deprecation warnings + * + * Deprecated functions make the compiler generate a warning when invoked. + * This is meant to invite users to update their source code. + * Should deprecation warnings be a problem, it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc + * or _CRT_SECURE_NO_WARNINGS in Visual. + * + * Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS + * before including the header file. + */ +#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS +# define LZ4_DEPRECATED(message) /* disable deprecation warnings */ +#else +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define LZ4_DEPRECATED(message) [[deprecated(message)]] +# elif defined(_MSC_VER) +# define LZ4_DEPRECATED(message) __declspec(deprecated(message)) +# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45)) +# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31) +# define LZ4_DEPRECATED(message) __attribute__((deprecated)) +# else +# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler") +# define LZ4_DEPRECATED(message) /* disabled */ +# endif +#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */ + +/*! Obsolete compression functions (since v1.7.3) */ +LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize); +LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); + +/*! Obsolete decompression functions (since v1.8.0) */ +LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize); +LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); + +/* Obsolete streaming functions (since v1.7.0) + * degraded functionality; do not use! + * + * In order to perform streaming compression, these functions depended on data + * that is no longer tracked in the state. They have been preserved as well as + * possible: using them will still produce a correct output. However, they don't + * actually retain any history between compression calls. The compression ratio + * achieved will therefore be no better than compressing each chunk + * independently. + */ +LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer); +LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void); +LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer); +LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state); + +/*! Obsolete streaming decoding functions (since v1.7.0) */ +LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); +LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); + +/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) : + * These functions used to be faster than LZ4_decompress_safe(), + * but this is no longer the case. They are now slower. + * This is because LZ4_decompress_fast() doesn't know the input size, + * and therefore must progress more cautiously into the input buffer to not read beyond the end of block. + * On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability. + * As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated. + * + * The last remaining LZ4_decompress_fast() specificity is that + * it can decompress a block without knowing its compressed size. + * Such functionality can be achieved in a more secure manner + * by employing LZ4_decompress_safe_partial(). + * + * Parameters: + * originalSize : is the uncompressed size to regenerate. + * `dst` must be already allocated, its size must be >= 'originalSize' bytes. + * @return : number of bytes read from source buffer (== compressed size). + * The function expects to finish at block's end exactly. + * If the source stream is detected malformed, the function stops decoding and returns a negative result. + * note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer. + * However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds. + * Also, since match offsets are not validated, match reads from 'src' may underflow too. + * These issues never happen if input (compressed) data is correct. + * But they may happen if input data is invalid (error or intentional tampering). + * As a consequence, use these functions in trusted environments with trusted data **only**. + */ +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead") +LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead") +LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead") +LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize); + +/*! LZ4_resetStream() : + * An LZ4_stream_t structure must be initialized at least once. + * This is done with LZ4_initStream(), or LZ4_resetStream(). + * Consider switching to LZ4_initStream(), + * invoking LZ4_resetStream() will trigger deprecation warnings in the future. + */ +LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr); + +} + +#endif /* LZ4_H_98237428734687 */ diff --git a/project/thirdparty/tracy-0.11.1/common/tracy_lz4hc.cpp b/project/thirdparty/tracy-0.11.1/common/tracy_lz4hc.cpp new file mode 100644 index 000000000..eec7239e0 --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/common/tracy_lz4hc.cpp @@ -0,0 +1,1636 @@ +/* + LZ4 HC - High Compression Mode of LZ4 + Copyright (C) 2011-2020, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */ + + +/* ************************************* +* Tuning Parameter +***************************************/ + +/*! HEAPMODE : + * Select how default compression function will allocate workplace memory, + * in stack (0:fastest), or in heap (1:requires malloc()). + * Since workplace is rather large, heap mode is recommended. +**/ +#ifndef LZ4HC_HEAPMODE +# define LZ4HC_HEAPMODE 1 +#endif + + +/*=== Dependency ===*/ +#define LZ4_HC_STATIC_LINKING_ONLY +#include "tracy_lz4hc.hpp" + + +/*=== Common definitions ===*/ +#if defined(__GNUC__) +# pragma GCC diagnostic ignored "-Wunused-function" +#endif +#if defined (__clang__) +# pragma clang diagnostic ignored "-Wunused-function" +#endif + +#define LZ4_COMMONDEFS_ONLY +#ifndef LZ4_SRC_INCLUDED +#include "tracy_lz4.cpp" /* LZ4_count, constants, mem */ +#endif + + +/*=== Enums ===*/ +typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive; + + +/*=== Constants ===*/ +#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) +#define LZ4_OPT_NUM (1<<12) + + +/*=== Macros ===*/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) +#define MAX(a,b) ( (a) > (b) ? (a) : (b) ) +#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG)) +#define DELTANEXTMAXD(p) chainTable[(p) & LZ4HC_MAXD_MASK] /* flexible, LZ4HC_MAXD dependent */ +#define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */ +/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */ +#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor + +namespace tracy +{ + +static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); } + + +/************************************** +* HC Compression +**************************************/ +static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4) +{ + MEM_INIT(hc4->hashTable, 0, sizeof(hc4->hashTable)); + MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); +} + +static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start) +{ + size_t const bufferSize = (size_t)(hc4->end - hc4->prefixStart); + size_t newStartingOffset = bufferSize + hc4->dictLimit; + assert(newStartingOffset >= bufferSize); /* check overflow */ + if (newStartingOffset > 1 GB) { + LZ4HC_clearTables(hc4); + newStartingOffset = 0; + } + newStartingOffset += 64 KB; + hc4->nextToUpdate = (U32)newStartingOffset; + hc4->prefixStart = start; + hc4->end = start; + hc4->dictStart = start; + hc4->dictLimit = (U32)newStartingOffset; + hc4->lowLimit = (U32)newStartingOffset; +} + + +/* Update chains up to ip (excluded) */ +LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip) +{ + U16* const chainTable = hc4->chainTable; + U32* const hashTable = hc4->hashTable; + const BYTE* const prefixPtr = hc4->prefixStart; + U32 const prefixIdx = hc4->dictLimit; + U32 const target = (U32)(ip - prefixPtr) + prefixIdx; + U32 idx = hc4->nextToUpdate; + assert(ip >= prefixPtr); + assert(target >= prefixIdx); + + while (idx < target) { + U32 const h = LZ4HC_hashPtr(prefixPtr+idx-prefixIdx); + size_t delta = idx - hashTable[h]; + if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX; + DELTANEXTU16(chainTable, idx) = (U16)delta; + hashTable[h] = idx; + idx++; + } + + hc4->nextToUpdate = target; +} + +/** LZ4HC_countBack() : + * @return : negative value, nb of common bytes before ip/match */ +LZ4_FORCE_INLINE +int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, + const BYTE* const iMin, const BYTE* const mMin) +{ + int back = 0; + int const min = (int)MAX(iMin - ip, mMin - match); + assert(min <= 0); + assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31)); + assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31)); + while ( (back > min) + && (ip[back-1] == match[back-1]) ) + back--; + return back; +} + +#if defined(_MSC_VER) +# define LZ4HC_rotl32(x,r) _rotl(x,r) +#else +# define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#endif + + +static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern) +{ + size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3; + if (bitsToRotate == 0) return pattern; + return LZ4HC_rotl32(pattern, (int)bitsToRotate); +} + +/* LZ4HC_countPattern() : + * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */ +static unsigned +LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32) +{ + const BYTE* const iStart = ip; + reg_t const pattern = (sizeof(pattern)==8) ? + (reg_t)pattern32 + (((reg_t)pattern32) << (sizeof(pattern)*4)) : pattern32; + + while (likely(ip < iEnd-(sizeof(pattern)-1))) { + reg_t const diff = LZ4_read_ARCH(ip) ^ pattern; + if (!diff) { ip+=sizeof(pattern); continue; } + ip += LZ4_NbCommonBytes(diff); + return (unsigned)(ip - iStart); + } + + if (LZ4_isLittleEndian()) { + reg_t patternByte = pattern; + while ((ip>= 8; + } + } else { /* big endian */ + U32 bitOffset = (sizeof(pattern)*8) - 8; + while (ip < iEnd) { + BYTE const byte = (BYTE)(pattern >> bitOffset); + if (*ip != byte) break; + ip ++; bitOffset -= 8; + } } + + return (unsigned)(ip - iStart); +} + +/* LZ4HC_reverseCountPattern() : + * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) + * read using natural platform endianness */ +static unsigned +LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern) +{ + const BYTE* const iStart = ip; + + while (likely(ip >= iLow+4)) { + if (LZ4_read32(ip-4) != pattern) break; + ip -= 4; + } + { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianness */ + while (likely(ip>iLow)) { + if (ip[-1] != *bytePtr) break; + ip--; bytePtr--; + } } + return (unsigned)(iStart - ip); +} + +/* LZ4HC_protectDictEnd() : + * Checks if the match is in the last 3 bytes of the dictionary, so reading the + * 4 byte MINMATCH would overflow. + * @returns true if the match index is okay. + */ +static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex) +{ + return ((U32)((dictLimit - 1) - matchIndex) >= 3); +} + +typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e; +typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e; + +LZ4_FORCE_INLINE int +LZ4HC_InsertAndGetWiderMatch ( + LZ4HC_CCtx_internal* const hc4, + const BYTE* const ip, + const BYTE* const iLowLimit, const BYTE* const iHighLimit, + int longest, + const BYTE** matchpos, + const BYTE** startpos, + const int maxNbAttempts, + const int patternAnalysis, const int chainSwap, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed) +{ + U16* const chainTable = hc4->chainTable; + U32* const HashTable = hc4->hashTable; + const LZ4HC_CCtx_internal * const dictCtx = hc4->dictCtx; + const BYTE* const prefixPtr = hc4->prefixStart; + const U32 prefixIdx = hc4->dictLimit; + const U32 ipIndex = (U32)(ip - prefixPtr) + prefixIdx; + const int withinStartDistance = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex); + const U32 lowestMatchIndex = (withinStartDistance) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX; + const BYTE* const dictStart = hc4->dictStart; + const U32 dictIdx = hc4->lowLimit; + const BYTE* const dictEnd = dictStart + prefixIdx - dictIdx; + int const lookBackLength = (int)(ip-iLowLimit); + int nbAttempts = maxNbAttempts; + U32 matchChainPos = 0; + U32 const pattern = LZ4_read32(ip); + U32 matchIndex; + repeat_state_e repeat = rep_untested; + size_t srcPatternLength = 0; + + DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); + /* First Match */ + LZ4HC_Insert(hc4, ip); + matchIndex = HashTable[LZ4HC_hashPtr(ip)]; + DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)", + matchIndex, lowestMatchIndex); + + while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) { + int matchLength=0; + nbAttempts--; + assert(matchIndex < ipIndex); + if (favorDecSpeed && (ipIndex - matchIndex < 8)) { + /* do nothing */ + } else if (matchIndex >= prefixIdx) { /* within current Prefix */ + const BYTE* const matchPtr = prefixPtr + matchIndex - prefixIdx; + assert(matchPtr < ip); + assert(longest >= 1); + if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) { + if (LZ4_read32(matchPtr) == pattern) { + int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, prefixPtr) : 0; + matchLength = MINMATCH + (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); + matchLength -= back; + if (matchLength > longest) { + longest = matchLength; + *matchpos = matchPtr + back; + *startpos = ip + back; + } } } + } else { /* lowestMatchIndex <= matchIndex < dictLimit */ + const BYTE* const matchPtr = dictStart + (matchIndex - dictIdx); + assert(matchIndex >= dictIdx); + if ( likely(matchIndex <= prefixIdx - 4) + && (LZ4_read32(matchPtr) == pattern) ) { + int back = 0; + const BYTE* vLimit = ip + (prefixIdx - matchIndex); + if (vLimit > iHighLimit) vLimit = iHighLimit; + matchLength = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; + if ((ip+matchLength == vLimit) && (vLimit < iHighLimit)) + matchLength += LZ4_count(ip+matchLength, prefixPtr, iHighLimit); + back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0; + matchLength -= back; + if (matchLength > longest) { + longest = matchLength; + *matchpos = prefixPtr - prefixIdx + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */ + *startpos = ip + back; + } } } + + if (chainSwap && matchLength==longest) { /* better match => select a better chain */ + assert(lookBackLength==0); /* search forward only */ + if (matchIndex + (U32)longest <= ipIndex) { + int const kTrigger = 4; + U32 distanceToNextMatch = 1; + int const end = longest - MINMATCH + 1; + int step = 1; + int accel = 1 << kTrigger; + int pos; + for (pos = 0; pos < end; pos += step) { + U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos); + step = (accel++ >> kTrigger); + if (candidateDist > distanceToNextMatch) { + distanceToNextMatch = candidateDist; + matchChainPos = (U32)pos; + accel = 1 << kTrigger; + } } + if (distanceToNextMatch > 1) { + if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ + matchIndex -= distanceToNextMatch; + continue; + } } } + + { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex); + if (patternAnalysis && distNextMatch==1 && matchChainPos==0) { + U32 const matchCandidateIdx = matchIndex-1; + /* may be a repeated pattern */ + if (repeat == rep_untested) { + if ( ((pattern & 0xFFFF) == (pattern >> 16)) + & ((pattern & 0xFF) == (pattern >> 24)) ) { + repeat = rep_confirmed; + srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); + } else { + repeat = rep_not; + } } + if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex) + && LZ4HC_protectDictEnd(prefixIdx, matchCandidateIdx) ) { + const int extDict = matchCandidateIdx < prefixIdx; + const BYTE* const matchPtr = (extDict ? dictStart - dictIdx : prefixPtr - prefixIdx) + matchCandidateIdx; + if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ + const BYTE* const iLimit = extDict ? dictEnd : iHighLimit; + size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern); + if (extDict && matchPtr + forwardPatternLength == iLimit) { + U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern); + forwardPatternLength += LZ4HC_countPattern(prefixPtr, iHighLimit, rotatedPattern); + } + { const BYTE* const lowestMatchPtr = extDict ? dictStart : prefixPtr; + size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern); + size_t currentSegmentLength; + if (!extDict + && matchPtr - backLength == prefixPtr + && dictIdx < prefixIdx) { + U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern); + backLength += LZ4HC_reverseCountPattern(dictEnd, dictStart, rotatedPattern); + } + /* Limit backLength not go further than lowestMatchIndex */ + backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex); + assert(matchCandidateIdx - backLength >= lowestMatchIndex); + currentSegmentLength = backLength + forwardPatternLength; + /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */ + if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ + && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ + U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ + if (LZ4HC_protectDictEnd(prefixIdx, newMatchIndex)) + matchIndex = newMatchIndex; + else { + /* Can only happen if started in the prefix */ + assert(newMatchIndex >= prefixIdx - 3 && newMatchIndex < prefixIdx && !extDict); + matchIndex = prefixIdx; + } + } else { + U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */ + if (!LZ4HC_protectDictEnd(prefixIdx, newMatchIndex)) { + assert(newMatchIndex >= prefixIdx - 3 && newMatchIndex < prefixIdx && !extDict); + matchIndex = prefixIdx; + } else { + matchIndex = newMatchIndex; + if (lookBackLength==0) { /* no back possible */ + size_t const maxML = MIN(currentSegmentLength, srcPatternLength); + if ((size_t)longest < maxML) { + assert(prefixPtr - prefixIdx + matchIndex != ip); + if ((size_t)(ip - prefixPtr) + prefixIdx - matchIndex > LZ4_DISTANCE_MAX) break; + assert(maxML < 2 GB); + longest = (int)maxML; + *matchpos = prefixPtr - prefixIdx + matchIndex; /* virtual pos, relative to ip, to retrieve offset */ + *startpos = ip; + } + { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex); + if (distToNextPattern > matchIndex) break; /* avoid overflow */ + matchIndex -= distToNextPattern; + } } } } } + continue; + } } + } } /* PA optimization */ + + /* follow current chain */ + matchIndex -= DELTANEXTU16(chainTable, matchIndex + matchChainPos); + + } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ + + if ( dict == usingDictCtxHc + && nbAttempts > 0 + && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) { + size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit; + U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)]; + assert(dictEndOffset <= 1 GB); + matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset; + while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) { + const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + dictMatchIndex; + + if (LZ4_read32(matchPtr) == pattern) { + int mlt; + int back = 0; + const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex); + if (vLimit > iHighLimit) vLimit = iHighLimit; + mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; + back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->prefixStart) : 0; + mlt -= back; + if (mlt > longest) { + longest = mlt; + *matchpos = prefixPtr - prefixIdx + matchIndex + back; + *startpos = ip + back; + } } + + { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex); + dictMatchIndex -= nextOffset; + matchIndex -= nextOffset; + } } } + + return longest; +} + +LZ4_FORCE_INLINE int +LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + const BYTE** matchpos, + const int maxNbAttempts, + const int patternAnalysis, + const dictCtx_directive dict) +{ + const BYTE* uselessPtr = ip; + /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), + * but this won't be the case here, as we define iLowLimit==ip, + * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ + return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio); +} + +/* LZ4HC_encodeSequence() : + * @return : 0 if ok, + * 1 if buffer issue detected */ +LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( + const BYTE** _ip, + BYTE** _op, + const BYTE** _anchor, + int matchLength, + const BYTE* const match, + limitedOutput_directive limit, + BYTE* oend) +{ +#define ip (*_ip) +#define op (*_op) +#define anchor (*_anchor) + + size_t length; + BYTE* const token = op++; + +#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6) + static const BYTE* start = NULL; + static U32 totalCost = 0; + U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start); + U32 const ll = (U32)(ip - anchor); + U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0; + U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; + U32 const cost = 1 + llAdd + ll + 2 + mlAdd; + if (start==NULL) start = anchor; /* only works for single segment */ + /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */ + DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u", + pos, + (U32)(ip - anchor), matchLength, (U32)(ip-match), + cost, totalCost); + totalCost += cost; +#endif + + /* Encode Literal length */ + length = (size_t)(ip - anchor); + LZ4_STATIC_ASSERT(notLimited == 0); + /* Check output limit */ + if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) { + DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)", + (int)length, (int)(oend - op)); + return 1; + } + if (length >= RUN_MASK) { + size_t len = length - RUN_MASK; + *token = (RUN_MASK << ML_BITS); + for(; len >= 255 ; len -= 255) *op++ = 255; + *op++ = (BYTE)len; + } else { + *token = (BYTE)(length << ML_BITS); + } + + /* Copy Literals */ + LZ4_wildCopy8(op, anchor, op + length); + op += length; + + /* Encode Offset */ + assert( (ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */ + LZ4_writeLE16(op, (U16)(ip - match)); op += 2; + + /* Encode MatchLength */ + assert(matchLength >= MINMATCH); + length = (size_t)matchLength - MINMATCH; + if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) { + DEBUGLOG(6, "Not enough room to write match length"); + return 1; /* Check output limit */ + } + if (length >= ML_MASK) { + *token += ML_MASK; + length -= ML_MASK; + for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; } + if (length >= 255) { length -= 255; *op++ = 255; } + *op++ = (BYTE)length; + } else { + *token += (BYTE)(length); + } + + /* Prepare next loop */ + ip += matchLength; + anchor = ip; + + return 0; +} +#undef ip +#undef op +#undef anchor + +LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( + LZ4HC_CCtx_internal* const ctx, + const char* const source, + char* const dest, + int* srcSizePtr, + int const maxOutputSize, + int maxNbAttempts, + const limitedOutput_directive limit, + const dictCtx_directive dict + ) +{ + const int inputSize = *srcSizePtr; + const int patternAnalysis = (maxNbAttempts > 128); /* levels 9+ */ + + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = (iend - LASTLITERALS); + + BYTE* optr = (BYTE*) dest; + BYTE* op = (BYTE*) dest; + BYTE* oend = op + maxOutputSize; + + int ml0, ml, ml2, ml3; + const BYTE* start0; + const BYTE* ref0; + const BYTE* ref = NULL; + const BYTE* start2 = NULL; + const BYTE* ref2 = NULL; + const BYTE* start3 = NULL; + const BYTE* ref3 = NULL; + + /* init */ + *srcSizePtr = 0; + if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ + if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ + + /* Main Loop */ + while (ip <= mflimit) { + ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict); + if (ml encode ML1 */ + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + continue; + } + + if (start0 < ip) { /* first match was skipped at least once */ + if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */ + ip = start0; ref = ref0; ml = ml0; /* restore initial ML1 */ + } } + + /* Here, start0==ip */ + if ((start2 - ip) < 3) { /* First Match too small : removed */ + ml = ml2; + ip = start2; + ref =ref2; + goto _Search2; + } + +_Search3: + /* At this stage, we have : + * ml2 > ml1, and + * ip1+3 <= ip2 (usually < ip1+ml1) */ + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + int new_ml = ml; + if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; + if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = new_ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } + /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ + + if (start2 + ml2 <= mflimit) { + ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, + start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, + maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio); + } else { + ml3 = ml2; + } + + if (ml3 == ml2) { /* No better match => encode ML1 and ML2 */ + /* ip & ref are known; Now for ml */ + if (start2 < ip+ml) ml = (int)(start2 - ip); + /* Now, encode 2 sequences */ + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + ip = start2; + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) { + ml = ml2; + ref = ref2; + goto _dest_overflow; + } + continue; + } + + if (start3 < ip+ml+3) { /* Not enough space for match 2 : remove it */ + if (start3 >= (ip+ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ + if (start2 < ip+ml) { + int correction = (int)(ip+ml - start2); + start2 += correction; + ref2 += correction; + ml2 -= correction; + if (ml2 < MINMATCH) { + start2 = start3; + ref2 = ref3; + ml2 = ml3; + } + } + + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + ip = start3; + ref = ref3; + ml = ml3; + + start0 = start2; + ref0 = ref2; + ml0 = ml2; + goto _Search2; + } + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + goto _Search3; + } + + /* + * OK, now we have 3 ascending matches; + * let's write the first one ML1. + * ip & ref are known; Now decide ml. + */ + if (start2 < ip+ml) { + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; + if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } else { + ml = (int)(start2 - ip); + } + } + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + + /* ML2 becomes ML1 */ + ip = start2; ref = ref2; ml = ml2; + + /* ML3 becomes ML2 */ + start2 = start3; ref2 = ref3; ml2 = ml3; + + /* let's find a new ML3 */ + goto _Search3; + } + +_last_literals: + /* Encode Last Literals */ + { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ + size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + llAdd + lastRunSize; + if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) return 0; + /* adapt lastRunSize to fill 'dest' */ + lastRunSize = (size_t)(oend - op) - 1 /*token*/; + llAdd = (lastRunSize + 256 - RUN_MASK) / 256; + lastRunSize -= llAdd; + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); + ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ + + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + LZ4_memcpy(op, anchor, lastRunSize); + op += lastRunSize; + } + + /* End */ + *srcSizePtr = (int) (((const char*)ip) - source); + return (int) (((char*)op)-dest); + +_dest_overflow: + if (limit == fillOutput) { + /* Assumption : ip, anchor, ml and ref must be set correctly */ + size_t const ll = (size_t)(ip - anchor); + size_t const ll_addbytes = (ll + 240) / 255; + size_t const ll_totalCost = 1 + ll_addbytes + ll; + BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ + DEBUGLOG(6, "Last sequence overflowing"); + op = optr; /* restore correct out pointer */ + if (op + ll_totalCost <= maxLitPos) { + /* ll validated; now adjust match length */ + size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); + size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); + assert(maxMlSize < INT_MAX); assert(ml >= 0); + if ((size_t)ml > maxMlSize) ml = (int)maxMlSize; + if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) { + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, notLimited, oend); + } } + goto _last_literals; + } + /* compression failed */ + return 0; +} + + +static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx, + const char* const source, char* dst, + int* srcSizePtr, int dstCapacity, + int const nbSearches, size_t sufficient_len, + const limitedOutput_directive limit, int const fullUpdate, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed); + + +LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + const limitedOutput_directive limit, + const dictCtx_directive dict + ) +{ + typedef enum { lz4hc, lz4opt } lz4hc_strat_e; + typedef struct { + lz4hc_strat_e strat; + int nbSearches; + U32 targetLength; + } cParams_t; + static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = { + { lz4hc, 2, 16 }, /* 0, unused */ + { lz4hc, 2, 16 }, /* 1, unused */ + { lz4hc, 2, 16 }, /* 2, unused */ + { lz4hc, 4, 16 }, /* 3 */ + { lz4hc, 8, 16 }, /* 4 */ + { lz4hc, 16, 16 }, /* 5 */ + { lz4hc, 32, 16 }, /* 6 */ + { lz4hc, 64, 16 }, /* 7 */ + { lz4hc, 128, 16 }, /* 8 */ + { lz4hc, 256, 16 }, /* 9 */ + { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/ + { lz4opt, 512,128 }, /*11 */ + { lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */ + }; + + DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)", + ctx, src, *srcSizePtr, limit); + + if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */ + if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */ + + ctx->end += *srcSizePtr; + if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */ + cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel); + { cParams_t const cParam = clTable[cLevel]; + HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio; + int result; + + if (cParam.strat == lz4hc) { + result = LZ4HC_compress_hashChain(ctx, + src, dst, srcSizePtr, dstCapacity, + cParam.nbSearches, limit, dict); + } else { + assert(cParam.strat == lz4opt); + result = LZ4HC_compress_optimal(ctx, + src, dst, srcSizePtr, dstCapacity, + cParam.nbSearches, cParam.targetLength, limit, + cLevel == LZ4HC_CLEVEL_MAX, /* ultra mode */ + dict, favor); + } + if (result <= 0) ctx->dirty = 1; + return result; + } +} + +static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock); + +static int +LZ4HC_compress_generic_noDictCtx ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + limitedOutput_directive limit + ) +{ + assert(ctx->dictCtx == NULL); + return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx); +} + +static int +LZ4HC_compress_generic_dictCtx ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + limitedOutput_directive limit + ) +{ + const size_t position = (size_t)(ctx->end - ctx->prefixStart) + (ctx->dictLimit - ctx->lowLimit); + assert(ctx->dictCtx != NULL); + if (position >= 64 KB) { + ctx->dictCtx = NULL; + return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } else if (position == 0 && *srcSizePtr > 4 KB) { + LZ4_memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal)); + LZ4HC_setExternalDict(ctx, (const BYTE *)src); + ctx->compressionLevel = (short)cLevel; + return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } else { + return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtxHc); + } +} + +static int +LZ4HC_compress_generic ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + limitedOutput_directive limit + ) +{ + if (ctx->dictCtx == NULL) { + return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } else { + return LZ4HC_compress_generic_dictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } +} + + +int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); } + +static size_t LZ4_streamHC_t_alignment(void) +{ +#if LZ4_ALIGN_TEST + typedef struct { char c; LZ4_streamHC_t t; } t_a; + return sizeof(t_a) - sizeof(LZ4_streamHC_t); +#else + return 1; /* effectively disabled */ +#endif +} + +/* state is presumed correctly initialized, + * in which case its size and alignment have already been validate */ +int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +{ + LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse; + if (!LZ4_isAligned(state, LZ4_streamHC_t_alignment())) return 0; + LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel); + LZ4HC_init_internal (ctx, (const BYTE*)src); + if (dstCapacity < LZ4_compressBound(srcSize)) + return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput); + else + return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, notLimited); +} + +int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +{ + LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); + if (ctx==NULL) return 0; /* init failure */ + return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel); +} + +int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +{ + int cSize; +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t)); + if (statePtr==NULL) return 0; +#else + LZ4_streamHC_t state; + LZ4_streamHC_t* const statePtr = &state; +#endif + cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel); +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + FREEMEM(statePtr); +#endif + return cSize; +} + +/* state is presumed sized correctly (>= sizeof(LZ4_streamHC_t)) */ +int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel) +{ + LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); + if (ctx==NULL) return 0; /* init failure */ + LZ4HC_init_internal(&ctx->internal_donotuse, (const BYTE*) source); + LZ4_setCompressionLevel(ctx, cLevel); + return LZ4HC_compress_generic(&ctx->internal_donotuse, source, dest, sourceSizePtr, targetDestSize, cLevel, fillOutput); +} + + + +/************************************** +* Streaming Functions +**************************************/ +/* allocation */ +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +LZ4_streamHC_t* LZ4_createStreamHC(void) +{ + LZ4_streamHC_t* const state = + (LZ4_streamHC_t*)ALLOC_AND_ZERO(sizeof(LZ4_streamHC_t)); + if (state == NULL) return NULL; + LZ4_setCompressionLevel(state, LZ4HC_CLEVEL_DEFAULT); + return state; +} + +int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) +{ + DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr); + if (!LZ4_streamHCPtr) return 0; /* support free on NULL */ + FREEMEM(LZ4_streamHCPtr); + return 0; +} +#endif + + +LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size) +{ + LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer; + DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", buffer, (unsigned)size); + /* check conditions */ + if (buffer == NULL) return NULL; + if (size < sizeof(LZ4_streamHC_t)) return NULL; + if (!LZ4_isAligned(buffer, LZ4_streamHC_t_alignment())) return NULL; + /* init */ + { LZ4HC_CCtx_internal* const hcstate = &(LZ4_streamHCPtr->internal_donotuse); + MEM_INIT(hcstate, 0, sizeof(*hcstate)); } + LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT); + return LZ4_streamHCPtr; +} + +/* just a stub */ +void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +{ + LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); + LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); +} + +void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +{ + DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel); + if (LZ4_streamHCPtr->internal_donotuse.dirty) { + LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); + } else { + /* preserve end - prefixStart : can trigger clearTable's threshold */ + if (LZ4_streamHCPtr->internal_donotuse.end != NULL) { + LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.prefixStart; + } else { + assert(LZ4_streamHCPtr->internal_donotuse.prefixStart == NULL); + } + LZ4_streamHCPtr->internal_donotuse.prefixStart = NULL; + LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL; + } + LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); +} + +void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +{ + DEBUGLOG(5, "LZ4_setCompressionLevel(%p, %d)", LZ4_streamHCPtr, compressionLevel); + if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT; + if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX; + LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel; +} + +void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor) +{ + LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor!=0); +} + +/* LZ4_loadDictHC() : + * LZ4_streamHCPtr is presumed properly initialized */ +int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, + const char* dictionary, int dictSize) +{ + LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; + DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d)", LZ4_streamHCPtr, dictionary, dictSize); + assert(LZ4_streamHCPtr != NULL); + if (dictSize > 64 KB) { + dictionary += (size_t)dictSize - 64 KB; + dictSize = 64 KB; + } + /* need a full initialization, there are bad side-effects when using resetFast() */ + { int const cLevel = ctxPtr->compressionLevel; + LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); + LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel); + } + LZ4HC_init_internal (ctxPtr, (const BYTE*)dictionary); + ctxPtr->end = (const BYTE*)dictionary + dictSize; + if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); + return dictSize; +} + +void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream) { + working_stream->internal_donotuse.dictCtx = dictionary_stream != NULL ? &(dictionary_stream->internal_donotuse) : NULL; +} + +/* compression */ + +static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock) +{ + DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock); + if (ctxPtr->end >= ctxPtr->prefixStart + 4) + LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */ + + /* Only one memory segment for extDict, so any previous extDict is lost at this stage */ + ctxPtr->lowLimit = ctxPtr->dictLimit; + ctxPtr->dictStart = ctxPtr->prefixStart; + ctxPtr->dictLimit += (U32)(ctxPtr->end - ctxPtr->prefixStart); + ctxPtr->prefixStart = newBlock; + ctxPtr->end = newBlock; + ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */ + + /* cannot reference an extDict and a dictCtx at the same time */ + ctxPtr->dictCtx = NULL; +} + +static int +LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr, + const char* src, char* dst, + int* srcSizePtr, int dstCapacity, + limitedOutput_directive limit) +{ + LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; + DEBUGLOG(5, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)", + LZ4_streamHCPtr, src, *srcSizePtr, limit); + assert(ctxPtr != NULL); + /* auto-init if forgotten */ + if (ctxPtr->prefixStart == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src); + + /* Check overflow */ + if ((size_t)(ctxPtr->end - ctxPtr->prefixStart) + ctxPtr->dictLimit > 2 GB) { + size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->prefixStart); + if (dictSize > 64 KB) dictSize = 64 KB; + LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize); + } + + /* Check if blocks follow each other */ + if ((const BYTE*)src != ctxPtr->end) + LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src); + + /* Check overlapping input/dictionary space */ + { const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr; + const BYTE* const dictBegin = ctxPtr->dictStart; + const BYTE* const dictEnd = ctxPtr->dictStart + (ctxPtr->dictLimit - ctxPtr->lowLimit); + if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) { + if (sourceEnd > dictEnd) sourceEnd = dictEnd; + ctxPtr->lowLimit += (U32)(sourceEnd - ctxPtr->dictStart); + ctxPtr->dictStart += (U32)(sourceEnd - ctxPtr->dictStart); + if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) { + ctxPtr->lowLimit = ctxPtr->dictLimit; + ctxPtr->dictStart = ctxPtr->prefixStart; + } } } + + return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit); +} + +int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity) +{ + if (dstCapacity < LZ4_compressBound(srcSize)) + return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput); + else + return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, notLimited); +} + +int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize) +{ + return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, fillOutput); +} + + + +/* LZ4_saveDictHC : + * save history content + * into a user-provided buffer + * which is then used to continue compression + */ +int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize) +{ + LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse; + int const prefixSize = (int)(streamPtr->end - streamPtr->prefixStart); + DEBUGLOG(5, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize); + assert(prefixSize >= 0); + if (dictSize > 64 KB) dictSize = 64 KB; + if (dictSize < 4) dictSize = 0; + if (dictSize > prefixSize) dictSize = prefixSize; + if (safeBuffer == NULL) assert(dictSize == 0); + if (dictSize > 0) + LZ4_memmove(safeBuffer, streamPtr->end - dictSize, dictSize); + { U32 const endIndex = (U32)(streamPtr->end - streamPtr->prefixStart) + streamPtr->dictLimit; + streamPtr->end = (const BYTE*)safeBuffer + dictSize; + streamPtr->prefixStart = streamPtr->end - dictSize; + streamPtr->dictLimit = endIndex - (U32)dictSize; + streamPtr->lowLimit = endIndex - (U32)dictSize; + streamPtr->dictStart = streamPtr->prefixStart; + if (streamPtr->nextToUpdate < streamPtr->dictLimit) + streamPtr->nextToUpdate = streamPtr->dictLimit; + } + return dictSize; +} + + +/*************************************************** +* Deprecated Functions +***************************************************/ + +/* These functions currently generate deprecation warnings */ + +/* Wrappers for deprecated compression functions */ +int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); } +int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); } +int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } +int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); } +int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); } +int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); } +int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } +int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); } +int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); } +int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); } + + +/* Deprecated streaming functions */ +int LZ4_sizeofStreamStateHC(void) { return sizeof(LZ4_streamHC_t); } + +/* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t) + * @return : 0 on success, !=0 if error */ +int LZ4_resetStreamStateHC(void* state, char* inputBuffer) +{ + LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4)); + if (hc4 == NULL) return 1; /* init failed */ + LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); + return 0; +} + +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +void* LZ4_createHC (const char* inputBuffer) +{ + LZ4_streamHC_t* const hc4 = LZ4_createStreamHC(); + if (hc4 == NULL) return NULL; /* not enough memory */ + LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); + return hc4; +} + +int LZ4_freeHC (void* LZ4HC_Data) +{ + if (!LZ4HC_Data) return 0; /* support free on NULL */ + FREEMEM(LZ4HC_Data); + return 0; +} +#endif + +int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel) +{ + return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited); +} + +int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel) +{ + return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput); +} + +char* LZ4_slideInputBufferHC(void* LZ4HC_Data) +{ + LZ4_streamHC_t* const ctx = (LZ4_streamHC_t*)LZ4HC_Data; + const BYTE* bufferStart = ctx->internal_donotuse.prefixStart - ctx->internal_donotuse.dictLimit + ctx->internal_donotuse.lowLimit; + LZ4_resetStreamHC_fast(ctx, ctx->internal_donotuse.compressionLevel); + /* avoid const char * -> char * conversion warning :( */ + return (char*)(uptrval)bufferStart; +} + + +/* ================================================ + * LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX]) + * ===============================================*/ +typedef struct { + int price; + int off; + int mlen; + int litlen; +} LZ4HC_optimal_t; + +/* price in bytes */ +LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen) +{ + int price = litlen; + assert(litlen >= 0); + if (litlen >= (int)RUN_MASK) + price += 1 + ((litlen-(int)RUN_MASK) / 255); + return price; +} + + +/* requires mlen >= MINMATCH */ +LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) +{ + int price = 1 + 2 ; /* token + 16-bit offset */ + assert(litlen >= 0); + assert(mlen >= MINMATCH); + + price += LZ4HC_literalsPrice(litlen); + + if (mlen >= (int)(ML_MASK+MINMATCH)) + price += 1 + ((mlen-(int)(ML_MASK+MINMATCH)) / 255); + + return price; +} + + +typedef struct { + int off; + int len; +} LZ4HC_match_t; + +LZ4_FORCE_INLINE LZ4HC_match_t +LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, + const BYTE* ip, const BYTE* const iHighLimit, + int minLen, int nbSearches, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed) +{ + LZ4HC_match_t match = { 0 , 0 }; + const BYTE* matchPtr = NULL; + /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), + * but this won't be the case here, as we define iLowLimit==ip, + * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ + int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed); + if (matchLength <= minLen) return match; + if (favorDecSpeed) { + if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */ + } + match.len = matchLength; + match.off = (int)(ip-matchPtr); + return match; +} + + +static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, + const char* const source, + char* dst, + int* srcSizePtr, + int dstCapacity, + int const nbSearches, + size_t sufficient_len, + const limitedOutput_directive limit, + int const fullUpdate, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed) +{ + int retval = 0; +#define TRAILING_LITERALS 3 +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS)); +#else + LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */ +#endif + + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + *srcSizePtr; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = iend - LASTLITERALS; + BYTE* op = (BYTE*) dst; + BYTE* opSaved = (BYTE*) dst; + BYTE* oend = op + dstCapacity; + int ovml = MINMATCH; /* overflow - last sequence */ + const BYTE* ovref = NULL; + + /* init */ +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + if (opt == NULL) goto _return_label; +#endif + DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity); + *srcSizePtr = 0; + if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ + if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1; + + /* Main Loop */ + while (ip <= mflimit) { + int const llen = (int)(ip - anchor); + int best_mlen, best_off; + int cur, last_match_pos = 0; + + LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); + if (firstMatch.len==0) { ip++; continue; } + + if ((size_t)firstMatch.len > sufficient_len) { + /* good enough solution : immediate encoding */ + int const firstML = firstMatch.len; + const BYTE* const matchPos = ip - firstMatch.off; + opSaved = op; + if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) { /* updates ip, op and anchor */ + ovml = firstML; + ovref = matchPos; + goto _dest_overflow; + } + continue; + } + + /* set prices for first positions (literals) */ + { int rPos; + for (rPos = 0 ; rPos < MINMATCH ; rPos++) { + int const cost = LZ4HC_literalsPrice(llen + rPos); + opt[rPos].mlen = 1; + opt[rPos].off = 0; + opt[rPos].litlen = llen + rPos; + opt[rPos].price = cost; + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + rPos, cost, opt[rPos].litlen); + } } + /* set prices using initial match */ + { int mlen = MINMATCH; + int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ + int const offset = firstMatch.off; + assert(matchML < LZ4_OPT_NUM); + for ( ; mlen <= matchML ; mlen++) { + int const cost = LZ4HC_sequencePrice(llen, mlen); + opt[mlen].mlen = mlen; + opt[mlen].off = offset; + opt[mlen].litlen = llen; + opt[mlen].price = cost; + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", + mlen, cost, mlen); + } } + last_match_pos = firstMatch.len; + { int addLit; + for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { + opt[last_match_pos+addLit].mlen = 1; /* literal */ + opt[last_match_pos+addLit].off = 0; + opt[last_match_pos+addLit].litlen = addLit; + opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); + } } + + /* check further positions */ + for (cur = 1; cur < last_match_pos; cur++) { + const BYTE* const curPtr = ip + cur; + LZ4HC_match_t newMatch; + + if (curPtr > mflimit) break; + DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", + cur, opt[cur].price, opt[cur+1].price, cur+1); + if (fullUpdate) { + /* not useful to search here if next position has same (or lower) cost */ + if ( (opt[cur+1].price <= opt[cur].price) + /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */ + && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) ) + continue; + } else { + /* not useful to search here if next position has same (or lower) cost */ + if (opt[cur+1].price <= opt[cur].price) continue; + } + + DEBUGLOG(7, "search at rPos:%u", cur); + if (fullUpdate) + newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); + else + /* only test matches of minimum length; slightly faster, but misses a few bytes */ + newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches, dict, favorDecSpeed); + if (!newMatch.len) continue; + + if ( ((size_t)newMatch.len > sufficient_len) + || (newMatch.len + cur >= LZ4_OPT_NUM) ) { + /* immediate encoding */ + best_mlen = newMatch.len; + best_off = newMatch.off; + last_match_pos = cur + 1; + goto encode; + } + + /* before match : set price with literals at beginning */ + { int const baseLitlen = opt[cur].litlen; + int litlen; + for (litlen = 1; litlen < MINMATCH; litlen++) { + int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen); + int const pos = cur + litlen; + if (price < opt[pos].price) { + opt[pos].mlen = 1; /* literal */ + opt[pos].off = 0; + opt[pos].litlen = baseLitlen+litlen; + opt[pos].price = price; + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", + pos, price, opt[pos].litlen); + } } } + + /* set prices using match at position = cur */ + { int const matchML = newMatch.len; + int ml = MINMATCH; + + assert(cur + newMatch.len < LZ4_OPT_NUM); + for ( ; ml <= matchML ; ml++) { + int const pos = cur + ml; + int const offset = newMatch.off; + int price; + int ll; + DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)", + pos, last_match_pos); + if (opt[cur].mlen == 1) { + ll = opt[cur].litlen; + price = ((cur > ll) ? opt[cur - ll].price : 0) + + LZ4HC_sequencePrice(ll, ml); + } else { + ll = 0; + price = opt[cur].price + LZ4HC_sequencePrice(0, ml); + } + + assert((U32)favorDecSpeed <= 1); + if (pos > last_match_pos+TRAILING_LITERALS + || price <= opt[pos].price - (int)favorDecSpeed) { + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", + pos, price, ml); + assert(pos < LZ4_OPT_NUM); + if ( (ml == matchML) /* last pos of last match */ + && (last_match_pos < pos) ) + last_match_pos = pos; + opt[pos].mlen = ml; + opt[pos].off = offset; + opt[pos].litlen = ll; + opt[pos].price = price; + } } } + /* complete following positions with literals */ + { int addLit; + for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { + opt[last_match_pos+addLit].mlen = 1; /* literal */ + opt[last_match_pos+addLit].off = 0; + opt[last_match_pos+addLit].litlen = addLit; + opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); + } } + } /* for (cur = 1; cur <= last_match_pos; cur++) */ + + assert(last_match_pos < LZ4_OPT_NUM + TRAILING_LITERALS); + best_mlen = opt[last_match_pos].mlen; + best_off = opt[last_match_pos].off; + cur = last_match_pos - best_mlen; + +encode: /* cur, last_match_pos, best_mlen, best_off must be set */ + assert(cur < LZ4_OPT_NUM); + assert(last_match_pos >= 1); /* == 1 when only one candidate */ + DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos); + { int candidate_pos = cur; + int selected_matchLength = best_mlen; + int selected_offset = best_off; + while (1) { /* from end to beginning */ + int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */ + int const next_offset = opt[candidate_pos].off; + DEBUGLOG(7, "pos %i: sequence length %i", candidate_pos, selected_matchLength); + opt[candidate_pos].mlen = selected_matchLength; + opt[candidate_pos].off = selected_offset; + selected_matchLength = next_matchLength; + selected_offset = next_offset; + if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */ + assert(next_matchLength > 0); /* can be 1, means literal */ + candidate_pos -= next_matchLength; + } } + + /* encode all recorded sequences in order */ + { int rPos = 0; /* relative position (to ip) */ + while (rPos < last_match_pos) { + int const ml = opt[rPos].mlen; + int const offset = opt[rPos].off; + if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */ + rPos += ml; + assert(ml >= MINMATCH); + assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX)); + opSaved = op; + if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) { /* updates ip, op and anchor */ + ovml = ml; + ovref = ip - offset; + goto _dest_overflow; + } } } + } /* while (ip <= mflimit) */ + +_last_literals: + /* Encode Last Literals */ + { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ + size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + llAdd + lastRunSize; + if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) { /* Check output limit */ + retval = 0; + goto _return_label; + } + /* adapt lastRunSize to fill 'dst' */ + lastRunSize = (size_t)(oend - op) - 1 /*token*/; + llAdd = (lastRunSize + 256 - RUN_MASK) / 256; + lastRunSize -= llAdd; + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); + ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ + + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + LZ4_memcpy(op, anchor, lastRunSize); + op += lastRunSize; + } + + /* End */ + *srcSizePtr = (int) (((const char*)ip) - source); + retval = (int) ((char*)op-dst); + goto _return_label; + +_dest_overflow: +if (limit == fillOutput) { + /* Assumption : ip, anchor, ovml and ovref must be set correctly */ + size_t const ll = (size_t)(ip - anchor); + size_t const ll_addbytes = (ll + 240) / 255; + size_t const ll_totalCost = 1 + ll_addbytes + ll; + BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ + DEBUGLOG(6, "Last sequence overflowing (only %i bytes remaining)", (int)(oend-1-opSaved)); + op = opSaved; /* restore correct out pointer */ + if (op + ll_totalCost <= maxLitPos) { + /* ll validated; now adjust match length */ + size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); + size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); + assert(maxMlSize < INT_MAX); assert(ovml >= 0); + if ((size_t)ovml > maxMlSize) ovml = (int)maxMlSize; + if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) { + DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml); + DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor); + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovref, notLimited, oend); + DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor); + } } + goto _last_literals; +} +_return_label: +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + FREEMEM(opt); +#endif + return retval; +} + +} diff --git a/project/thirdparty/tracy-0.11.1/common/tracy_lz4hc.hpp b/project/thirdparty/tracy-0.11.1/common/tracy_lz4hc.hpp new file mode 100644 index 000000000..460cbae7f --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/common/tracy_lz4hc.hpp @@ -0,0 +1,405 @@ +/* + LZ4 HC - High Compression Mode of LZ4 + Header File + Copyright (C) 2011-2020, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +#ifndef TRACY_LZ4_HC_H_19834876238432 +#define TRACY_LZ4_HC_H_19834876238432 + +/* --- Dependency --- */ +/* note : lz4hc requires lz4.h/lz4.c for compilation */ +#include "tracy_lz4.hpp" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */ + + +/* --- Useful constants --- */ +#define LZ4HC_CLEVEL_MIN 3 +#define LZ4HC_CLEVEL_DEFAULT 9 +#define LZ4HC_CLEVEL_OPT_MIN 10 +#define LZ4HC_CLEVEL_MAX 12 + +namespace tracy +{ + +/*-************************************ + * Block Compression + **************************************/ +/*! LZ4_compress_HC() : + * Compress data from `src` into `dst`, using the powerful but slower "HC" algorithm. + * `dst` must be already allocated. + * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h") + * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h") + * `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work. + * Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX. + * @return : the number of bytes written into 'dst' + * or 0 if compression fails. + */ +LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel); + + +/* Note : + * Decompression functions are provided within "lz4.h" (BSD license) + */ + + +/*! LZ4_compress_HC_extStateHC() : + * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`. + * `state` size is provided by LZ4_sizeofStateHC(). + * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly). + */ +LZ4LIB_API int LZ4_sizeofStateHC(void); +LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel); + + +/*! LZ4_compress_HC_destSize() : v1.9.0+ + * Will compress as much data as possible from `src` + * to fit into `targetDstSize` budget. + * Result is provided in 2 parts : + * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize) + * or 0 if compression fails. + * `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src` + */ +LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC, + const char* src, char* dst, + int* srcSizePtr, int targetDstSize, + int compressionLevel); + + +/*-************************************ + * Streaming Compression + * Bufferless synchronous API + **************************************/ + typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */ + +/*! LZ4_createStreamHC() and LZ4_freeStreamHC() : + * These functions create and release memory for LZ4 HC streaming state. + * Newly created states are automatically initialized. + * A same state can be used multiple times consecutively, + * starting with LZ4_resetStreamHC_fast() to start a new stream of blocks. + */ +LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void); +LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr); + +/* + These functions compress data in successive blocks of any size, + using previous blocks as dictionary, to improve compression ratio. + One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks. + There is an exception for ring buffers, which can be smaller than 64 KB. + Ring-buffer scenario is automatically detected and handled within LZ4_compress_HC_continue(). + + Before starting compression, state must be allocated and properly initialized. + LZ4_createStreamHC() does both, though compression level is set to LZ4HC_CLEVEL_DEFAULT. + + Selecting the compression level can be done with LZ4_resetStreamHC_fast() (starts a new stream) + or LZ4_setCompressionLevel() (anytime, between blocks in the same stream) (experimental). + LZ4_resetStreamHC_fast() only works on states which have been properly initialized at least once, + which is automatically the case when state is created using LZ4_createStreamHC(). + + After reset, a first "fictional block" can be designated as initial dictionary, + using LZ4_loadDictHC() (Optional). + + Invoke LZ4_compress_HC_continue() to compress each successive block. + The number of blocks is unlimited. + Previous input blocks, including initial dictionary when present, + must remain accessible and unmodified during compression. + + It's allowed to update compression level anytime between blocks, + using LZ4_setCompressionLevel() (experimental). + + 'dst' buffer should be sized to handle worst case scenarios + (see LZ4_compressBound(), it ensures compression success). + In case of failure, the API does not guarantee recovery, + so the state _must_ be reset. + To ensure compression success + whenever `dst` buffer size cannot be made >= LZ4_compressBound(), + consider using LZ4_compress_HC_continue_destSize(). + + Whenever previous input blocks can't be preserved unmodified in-place during compression of next blocks, + it's possible to copy the last blocks into a more stable memory space, using LZ4_saveDictHC(). + Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer' (<= 64 KB) + + After completing a streaming compression, + it's possible to start a new stream of blocks, using the same LZ4_streamHC_t state, + just by resetting it, using LZ4_resetStreamHC_fast(). +*/ + +LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t* streamHCPtr, int compressionLevel); /* v1.9.0+ */ +LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize); + +LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, + const char* src, char* dst, + int srcSize, int maxDstSize); + +/*! LZ4_compress_HC_continue_destSize() : v1.9.0+ + * Similar to LZ4_compress_HC_continue(), + * but will read as much data as possible from `src` + * to fit into `targetDstSize` budget. + * Result is provided into 2 parts : + * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize) + * or 0 if compression fails. + * `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`. + * Note that this function may not consume the entire input. + */ +LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr, + const char* src, char* dst, + int* srcSizePtr, int targetDstSize); + +LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize); + + + +/*^********************************************** + * !!!!!! STATIC LINKING ONLY !!!!!! + ***********************************************/ + +/*-****************************************************************** + * PRIVATE DEFINITIONS : + * Do not use these definitions directly. + * They are merely exposed to allow static allocation of `LZ4_streamHC_t`. + * Declare an `LZ4_streamHC_t` directly, rather than any type below. + * Even then, only do so in the context of static linking, as definitions may change between versions. + ********************************************************************/ + +#define LZ4HC_DICTIONARY_LOGSIZE 16 +#define LZ4HC_MAXD (1<= LZ4HC_CLEVEL_OPT_MIN. + */ +LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed( + LZ4_streamHC_t* LZ4_streamHCPtr, int favor); + +/*! LZ4_resetStreamHC_fast() : v1.9.0+ + * When an LZ4_streamHC_t is known to be in a internally coherent state, + * it can often be prepared for a new compression with almost no work, only + * sometimes falling back to the full, expensive reset that is always required + * when the stream is in an indeterminate state (i.e., the reset performed by + * LZ4_resetStreamHC()). + * + * LZ4_streamHCs are guaranteed to be in a valid state when: + * - returned from LZ4_createStreamHC() + * - reset by LZ4_resetStreamHC() + * - memset(stream, 0, sizeof(LZ4_streamHC_t)) + * - the stream was in a valid state and was reset by LZ4_resetStreamHC_fast() + * - the stream was in a valid state and was then used in any compression call + * that returned success + * - the stream was in an indeterminate state and was used in a compression + * call that fully reset the state (LZ4_compress_HC_extStateHC()) and that + * returned success + * + * Note: + * A stream that was last used in a compression call that returned an error + * may be passed to this function. However, it will be fully reset, which will + * clear any existing history and settings from the context. + */ +LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast( + LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); + +/*! LZ4_compress_HC_extStateHC_fastReset() : + * A variant of LZ4_compress_HC_extStateHC(). + * + * Using this variant avoids an expensive initialization step. It is only safe + * to call if the state buffer is known to be correctly initialized already + * (see above comment on LZ4_resetStreamHC_fast() for a definition of + * "correctly initialized"). From a high level, the difference is that this + * function initializes the provided state with a call to + * LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a + * call to LZ4_resetStreamHC(). + */ +LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset ( + void* state, + const char* src, char* dst, + int srcSize, int dstCapacity, + int compressionLevel); + +/*! LZ4_attach_HC_dictionary() : + * This is an experimental API that allows for the efficient use of a + * static dictionary many times. + * + * Rather than re-loading the dictionary buffer into a working context before + * each compression, or copying a pre-loaded dictionary's LZ4_streamHC_t into a + * working LZ4_streamHC_t, this function introduces a no-copy setup mechanism, + * in which the working stream references the dictionary stream in-place. + * + * Several assumptions are made about the state of the dictionary stream. + * Currently, only streams which have been prepared by LZ4_loadDictHC() should + * be expected to work. + * + * Alternatively, the provided dictionary stream pointer may be NULL, in which + * case any existing dictionary stream is unset. + * + * A dictionary should only be attached to a stream without any history (i.e., + * a stream that has just been reset). + * + * The dictionary will remain attached to the working stream only for the + * current stream session. Calls to LZ4_resetStreamHC(_fast) will remove the + * dictionary context association from the working stream. The dictionary + * stream (and source buffer) must remain in-place / accessible / unchanged + * through the lifetime of the stream session. + */ +LZ4LIB_STATIC_API void LZ4_attach_HC_dictionary( + LZ4_streamHC_t *working_stream, + const LZ4_streamHC_t *dictionary_stream); + +} + +#endif /* LZ4_HC_SLO_098092834 */ +#endif /* LZ4_HC_STATIC_LINKING_ONLY */ diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/LICENSE b/project/thirdparty/tracy-0.11.1/libbacktrace/LICENSE new file mode 100644 index 000000000..097d2774e --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/libbacktrace/LICENSE @@ -0,0 +1,29 @@ +# Copyright (C) 2012-2016 Free Software Foundation, Inc. + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: + +# (1) Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. + +# (2) Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. + +# (3) The name of the author may not be used to +# endorse or promote products derived from this software without +# specific prior written permission. + +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/alloc.cpp b/project/thirdparty/tracy-0.11.1/libbacktrace/alloc.cpp new file mode 100644 index 000000000..a365a4860 --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/libbacktrace/alloc.cpp @@ -0,0 +1,174 @@ +/* alloc.c -- Memory allocation without mmap. + Copyright (C) 2012-2021 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +#include "config.h" + +#include +#include +#include + +#include "backtrace.hpp" +#include "internal.hpp" + +#include "../common/TracyAlloc.hpp" + +namespace tracy +{ + +/* Allocation routines to use on systems that do not support anonymous + mmap. This implementation just uses malloc, which means that the + backtrace functions may not be safely invoked from a signal + handler. */ + +/* Allocate memory like malloc. If ERROR_CALLBACK is NULL, don't + report an error. */ + +void * +backtrace_alloc (struct backtrace_state *state ATTRIBUTE_UNUSED, + size_t size, backtrace_error_callback error_callback, + void *data) +{ + void *ret; + + ret = tracy_malloc (size); + if (ret == NULL) + { + if (error_callback) + error_callback (data, "malloc", errno); + } + return ret; +} + +/* Free memory. */ + +void +backtrace_free (struct backtrace_state *state ATTRIBUTE_UNUSED, + void *p, size_t size ATTRIBUTE_UNUSED, + backtrace_error_callback error_callback ATTRIBUTE_UNUSED, + void *data ATTRIBUTE_UNUSED) +{ + tracy_free (p); +} + +/* Grow VEC by SIZE bytes. */ + +void * +backtrace_vector_grow (struct backtrace_state *state ATTRIBUTE_UNUSED, + size_t size, backtrace_error_callback error_callback, + void *data, struct backtrace_vector *vec) +{ + void *ret; + + if (size > vec->alc) + { + size_t alc; + void *base; + + if (vec->size == 0) + alc = 32 * size; + else if (vec->size >= 4096) + alc = vec->size + 4096; + else + alc = 2 * vec->size; + + if (alc < vec->size + size) + alc = vec->size + size; + + base = tracy_realloc (vec->base, alc); + if (base == NULL) + { + error_callback (data, "realloc", errno); + return NULL; + } + + vec->base = base; + vec->alc = alc - vec->size; + } + + ret = (char *) vec->base + vec->size; + vec->size += size; + vec->alc -= size; + return ret; +} + +/* Finish the current allocation on VEC. */ + +void * +backtrace_vector_finish (struct backtrace_state *state, + struct backtrace_vector *vec, + backtrace_error_callback error_callback, + void *data) +{ + void *ret; + + /* With this allocator we call realloc in backtrace_vector_grow, + which means we can't easily reuse the memory here. So just + release it. */ + if (!backtrace_vector_release (state, vec, error_callback, data)) + return NULL; + ret = vec->base; + vec->base = NULL; + vec->size = 0; + vec->alc = 0; + return ret; +} + +/* Release any extra space allocated for VEC. */ + +int +backtrace_vector_release (struct backtrace_state *state ATTRIBUTE_UNUSED, + struct backtrace_vector *vec, + backtrace_error_callback error_callback, + void *data) +{ + vec->alc = 0; + + if (vec->size == 0) + { + /* As of C17, realloc with size 0 is marked as an obsolescent feature, use + free instead. */ + tracy_free (vec->base); + vec->base = NULL; + return 1; + } + + vec->base = tracy_realloc (vec->base, vec->size); + if (vec->base == NULL) + { + error_callback (data, "realloc", errno); + return 0; + } + + return 1; +} + +} diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/backtrace.hpp b/project/thirdparty/tracy-0.11.1/libbacktrace/backtrace.hpp new file mode 100644 index 000000000..e4be297a9 --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/libbacktrace/backtrace.hpp @@ -0,0 +1,186 @@ +/* backtrace.h -- Public header file for stack backtrace library. + Copyright (C) 2012-2021 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef BACKTRACE_H +#define BACKTRACE_H + +#include +#include +#include + +namespace tracy +{ + +/* The backtrace state. This struct is intentionally not defined in + the public interface. */ + +struct backtrace_state; + +/* The type of the error callback argument to backtrace functions. + This function, if not NULL, will be called for certain error cases. + The DATA argument is passed to the function that calls this one. + The MSG argument is an error message. The ERRNUM argument, if + greater than 0, holds an errno value. The MSG buffer may become + invalid after this function returns. + + As a special case, the ERRNUM argument will be passed as -1 if no + debug info can be found for the executable, or if the debug info + exists but has an unsupported version, but the function requires + debug info (e.g., backtrace_full, backtrace_pcinfo). The MSG in + this case will be something along the lines of "no debug info". + Similarly, ERRNUM will be passed as -1 if there is no symbol table, + but the function requires a symbol table (e.g., backtrace_syminfo). + This may be used as a signal that some other approach should be + tried. */ + +typedef void (*backtrace_error_callback) (void *data, const char *msg, + int errnum); + +/* Create state information for the backtrace routines. This must be + called before any of the other routines, and its return value must + be passed to all of the other routines. FILENAME is the path name + of the executable file; if it is NULL the library will try + system-specific path names. If not NULL, FILENAME must point to a + permanent buffer. If THREADED is non-zero the state may be + accessed by multiple threads simultaneously, and the library will + use appropriate atomic operations. If THREADED is zero the state + may only be accessed by one thread at a time. This returns a state + pointer on success, NULL on error. If an error occurs, this will + call the ERROR_CALLBACK routine. + + Calling this function allocates resources that cannot be freed. + There is no backtrace_free_state function. The state is used to + cache information that is expensive to recompute. Programs are + expected to call this function at most once and to save the return + value for all later calls to backtrace functions. */ + +extern struct backtrace_state *backtrace_create_state ( + const char *filename, int threaded, + backtrace_error_callback error_callback, void *data); + +/* The type of the callback argument to the backtrace_full function. + DATA is the argument passed to backtrace_full. PC is the program + counter. FILENAME is the name of the file containing PC, or NULL + if not available. LINENO is the line number in FILENAME containing + PC, or 0 if not available. FUNCTION is the name of the function + containing PC, or NULL if not available. This should return 0 to + continuing tracing. The FILENAME and FUNCTION buffers may become + invalid after this function returns. */ + +typedef int (*backtrace_full_callback) (void *data, uintptr_t pc, uintptr_t lowaddr, + const char *filename, int lineno, + const char *function); + +/* Get a full stack backtrace. SKIP is the number of frames to skip; + passing 0 will start the trace with the function calling + backtrace_full. DATA is passed to the callback routine. If any + call to CALLBACK returns a non-zero value, the stack backtrace + stops, and backtrace returns that value; this may be used to limit + the number of stack frames desired. If all calls to CALLBACK + return 0, backtrace returns 0. The backtrace_full function will + make at least one call to either CALLBACK or ERROR_CALLBACK. This + function requires debug info for the executable. */ + +extern int backtrace_full (struct backtrace_state *state, int skip, + backtrace_full_callback callback, + backtrace_error_callback error_callback, + void *data); + +/* The type of the callback argument to the backtrace_simple function. + DATA is the argument passed to simple_backtrace. PC is the program + counter. This should return 0 to continue tracing. */ + +typedef int (*backtrace_simple_callback) (void *data, uintptr_t pc); + +/* Get a simple backtrace. SKIP is the number of frames to skip, as + in backtrace. DATA is passed to the callback routine. If any call + to CALLBACK returns a non-zero value, the stack backtrace stops, + and backtrace_simple returns that value. Otherwise + backtrace_simple returns 0. The backtrace_simple function will + make at least one call to either CALLBACK or ERROR_CALLBACK. This + function does not require any debug info for the executable. */ + +extern int backtrace_simple (struct backtrace_state *state, int skip, + backtrace_simple_callback callback, + backtrace_error_callback error_callback, + void *data); + +/* Print the current backtrace in a user readable format to a FILE. + SKIP is the number of frames to skip, as in backtrace_full. Any + error messages are printed to stderr. This function requires debug + info for the executable. */ + +extern void backtrace_print (struct backtrace_state *state, int skip, FILE *); + +/* Given PC, a program counter in the current program, call the + callback function with filename, line number, and function name + information. This will normally call the callback function exactly + once. However, if the PC happens to describe an inlined call, and + the debugging information contains the necessary information, then + this may call the callback function multiple times. This will make + at least one call to either CALLBACK or ERROR_CALLBACK. This + returns the first non-zero value returned by CALLBACK, or 0. */ + +extern int backtrace_pcinfo (struct backtrace_state *state, uintptr_t pc, + backtrace_full_callback callback, + backtrace_error_callback error_callback, + void *data); + +/* The type of the callback argument to backtrace_syminfo. DATA and + PC are the arguments passed to backtrace_syminfo. SYMNAME is the + name of the symbol for the corresponding code. SYMVAL is the + value and SYMSIZE is the size of the symbol. SYMNAME will be NULL + if no error occurred but the symbol could not be found. */ + +typedef void (*backtrace_syminfo_callback) (void *data, uintptr_t pc, + const char *symname, + uintptr_t symval, + uintptr_t symsize); + +/* Given ADDR, an address or program counter in the current program, + call the callback information with the symbol name and value + describing the function or variable in which ADDR may be found. + This will call either CALLBACK or ERROR_CALLBACK exactly once. + This returns 1 on success, 0 on failure. This function requires + the symbol table but does not require the debug info. Note that if + the symbol table is present but ADDR could not be found in the + table, CALLBACK will be called with a NULL SYMNAME argument. + Returns 1 on success, 0 on error. */ + +extern int backtrace_syminfo (struct backtrace_state *state, uintptr_t addr, + backtrace_syminfo_callback callback, + backtrace_error_callback error_callback, + void *data); + +} + +#endif diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/config.h b/project/thirdparty/tracy-0.11.1/libbacktrace/config.h new file mode 100644 index 000000000..87e38a95b --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/libbacktrace/config.h @@ -0,0 +1,26 @@ +#include +#if defined(__linux__) && !defined(__GLIBC__) && !defined(__WORDSIZE) +// include __WORDSIZE headers for musl +# include +#endif +#if __WORDSIZE == 64 +# define BACKTRACE_ELF_SIZE 64 +#else +# define BACKTRACE_ELF_SIZE 32 +#endif + +#define HAVE_DLFCN_H 1 +#define HAVE_FCNTL 1 +#define HAVE_INTTYPES_H 1 +#define HAVE_LSTAT 1 +#define HAVE_READLINK 1 +#define HAVE_DL_ITERATE_PHDR 1 +#define HAVE_ATOMIC_FUNCTIONS 1 +#define HAVE_DECL_STRNLEN 1 + +#ifdef __APPLE__ +# define HAVE_MACH_O_DYLD_H 1 +#elif defined BSD +# define HAVE_KERN_PROC 1 +# define HAVE_KERN_PROC_ARGS 1 +#endif diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/dwarf.cpp b/project/thirdparty/tracy-0.11.1/libbacktrace/dwarf.cpp new file mode 100644 index 000000000..52fa8a8d2 --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/libbacktrace/dwarf.cpp @@ -0,0 +1,4455 @@ +/* dwarf.c -- Get file/line information from DWARF for backtraces. + Copyright (C) 2012-2021 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +#include "config.h" + +#include +#include +#include +#include + +#include "filenames.hpp" + +#include "backtrace.hpp" +#include "internal.hpp" + +namespace tracy +{ + +/* DWARF constants. */ + +enum dwarf_tag { + DW_TAG_entry_point = 0x3, + DW_TAG_compile_unit = 0x11, + DW_TAG_inlined_subroutine = 0x1d, + DW_TAG_subprogram = 0x2e, + DW_TAG_skeleton_unit = 0x4a, +}; + +enum dwarf_form { + DW_FORM_addr = 0x01, + DW_FORM_block2 = 0x03, + DW_FORM_block4 = 0x04, + DW_FORM_data2 = 0x05, + DW_FORM_data4 = 0x06, + DW_FORM_data8 = 0x07, + DW_FORM_string = 0x08, + DW_FORM_block = 0x09, + DW_FORM_block1 = 0x0a, + DW_FORM_data1 = 0x0b, + DW_FORM_flag = 0x0c, + DW_FORM_sdata = 0x0d, + DW_FORM_strp = 0x0e, + DW_FORM_udata = 0x0f, + DW_FORM_ref_addr = 0x10, + DW_FORM_ref1 = 0x11, + DW_FORM_ref2 = 0x12, + DW_FORM_ref4 = 0x13, + DW_FORM_ref8 = 0x14, + DW_FORM_ref_udata = 0x15, + DW_FORM_indirect = 0x16, + DW_FORM_sec_offset = 0x17, + DW_FORM_exprloc = 0x18, + DW_FORM_flag_present = 0x19, + DW_FORM_ref_sig8 = 0x20, + DW_FORM_strx = 0x1a, + DW_FORM_addrx = 0x1b, + DW_FORM_ref_sup4 = 0x1c, + DW_FORM_strp_sup = 0x1d, + DW_FORM_data16 = 0x1e, + DW_FORM_line_strp = 0x1f, + DW_FORM_implicit_const = 0x21, + DW_FORM_loclistx = 0x22, + DW_FORM_rnglistx = 0x23, + DW_FORM_ref_sup8 = 0x24, + DW_FORM_strx1 = 0x25, + DW_FORM_strx2 = 0x26, + DW_FORM_strx3 = 0x27, + DW_FORM_strx4 = 0x28, + DW_FORM_addrx1 = 0x29, + DW_FORM_addrx2 = 0x2a, + DW_FORM_addrx3 = 0x2b, + DW_FORM_addrx4 = 0x2c, + DW_FORM_GNU_addr_index = 0x1f01, + DW_FORM_GNU_str_index = 0x1f02, + DW_FORM_GNU_ref_alt = 0x1f20, + DW_FORM_GNU_strp_alt = 0x1f21 +}; + +enum dwarf_attribute { + DW_AT_sibling = 0x01, + DW_AT_location = 0x02, + DW_AT_name = 0x03, + DW_AT_ordering = 0x09, + DW_AT_subscr_data = 0x0a, + DW_AT_byte_size = 0x0b, + DW_AT_bit_offset = 0x0c, + DW_AT_bit_size = 0x0d, + DW_AT_element_list = 0x0f, + DW_AT_stmt_list = 0x10, + DW_AT_low_pc = 0x11, + DW_AT_high_pc = 0x12, + DW_AT_language = 0x13, + DW_AT_member = 0x14, + DW_AT_discr = 0x15, + DW_AT_discr_value = 0x16, + DW_AT_visibility = 0x17, + DW_AT_import = 0x18, + DW_AT_string_length = 0x19, + DW_AT_common_reference = 0x1a, + DW_AT_comp_dir = 0x1b, + DW_AT_const_value = 0x1c, + DW_AT_containing_type = 0x1d, + DW_AT_default_value = 0x1e, + DW_AT_inline = 0x20, + DW_AT_is_optional = 0x21, + DW_AT_lower_bound = 0x22, + DW_AT_producer = 0x25, + DW_AT_prototyped = 0x27, + DW_AT_return_addr = 0x2a, + DW_AT_start_scope = 0x2c, + DW_AT_bit_stride = 0x2e, + DW_AT_upper_bound = 0x2f, + DW_AT_abstract_origin = 0x31, + DW_AT_accessibility = 0x32, + DW_AT_address_class = 0x33, + DW_AT_artificial = 0x34, + DW_AT_base_types = 0x35, + DW_AT_calling_convention = 0x36, + DW_AT_count = 0x37, + DW_AT_data_member_location = 0x38, + DW_AT_decl_column = 0x39, + DW_AT_decl_file = 0x3a, + DW_AT_decl_line = 0x3b, + DW_AT_declaration = 0x3c, + DW_AT_discr_list = 0x3d, + DW_AT_encoding = 0x3e, + DW_AT_external = 0x3f, + DW_AT_frame_base = 0x40, + DW_AT_friend = 0x41, + DW_AT_identifier_case = 0x42, + DW_AT_macro_info = 0x43, + DW_AT_namelist_items = 0x44, + DW_AT_priority = 0x45, + DW_AT_segment = 0x46, + DW_AT_specification = 0x47, + DW_AT_static_link = 0x48, + DW_AT_type = 0x49, + DW_AT_use_location = 0x4a, + DW_AT_variable_parameter = 0x4b, + DW_AT_virtuality = 0x4c, + DW_AT_vtable_elem_location = 0x4d, + DW_AT_allocated = 0x4e, + DW_AT_associated = 0x4f, + DW_AT_data_location = 0x50, + DW_AT_byte_stride = 0x51, + DW_AT_entry_pc = 0x52, + DW_AT_use_UTF8 = 0x53, + DW_AT_extension = 0x54, + DW_AT_ranges = 0x55, + DW_AT_trampoline = 0x56, + DW_AT_call_column = 0x57, + DW_AT_call_file = 0x58, + DW_AT_call_line = 0x59, + DW_AT_description = 0x5a, + DW_AT_binary_scale = 0x5b, + DW_AT_decimal_scale = 0x5c, + DW_AT_small = 0x5d, + DW_AT_decimal_sign = 0x5e, + DW_AT_digit_count = 0x5f, + DW_AT_picture_string = 0x60, + DW_AT_mutable = 0x61, + DW_AT_threads_scaled = 0x62, + DW_AT_explicit = 0x63, + DW_AT_object_pointer = 0x64, + DW_AT_endianity = 0x65, + DW_AT_elemental = 0x66, + DW_AT_pure = 0x67, + DW_AT_recursive = 0x68, + DW_AT_signature = 0x69, + DW_AT_main_subprogram = 0x6a, + DW_AT_data_bit_offset = 0x6b, + DW_AT_const_expr = 0x6c, + DW_AT_enum_class = 0x6d, + DW_AT_linkage_name = 0x6e, + DW_AT_string_length_bit_size = 0x6f, + DW_AT_string_length_byte_size = 0x70, + DW_AT_rank = 0x71, + DW_AT_str_offsets_base = 0x72, + DW_AT_addr_base = 0x73, + DW_AT_rnglists_base = 0x74, + DW_AT_dwo_name = 0x76, + DW_AT_reference = 0x77, + DW_AT_rvalue_reference = 0x78, + DW_AT_macros = 0x79, + DW_AT_call_all_calls = 0x7a, + DW_AT_call_all_source_calls = 0x7b, + DW_AT_call_all_tail_calls = 0x7c, + DW_AT_call_return_pc = 0x7d, + DW_AT_call_value = 0x7e, + DW_AT_call_origin = 0x7f, + DW_AT_call_parameter = 0x80, + DW_AT_call_pc = 0x81, + DW_AT_call_tail_call = 0x82, + DW_AT_call_target = 0x83, + DW_AT_call_target_clobbered = 0x84, + DW_AT_call_data_location = 0x85, + DW_AT_call_data_value = 0x86, + DW_AT_noreturn = 0x87, + DW_AT_alignment = 0x88, + DW_AT_export_symbols = 0x89, + DW_AT_deleted = 0x8a, + DW_AT_defaulted = 0x8b, + DW_AT_loclists_base = 0x8c, + DW_AT_lo_user = 0x2000, + DW_AT_hi_user = 0x3fff, + DW_AT_MIPS_fde = 0x2001, + DW_AT_MIPS_loop_begin = 0x2002, + DW_AT_MIPS_tail_loop_begin = 0x2003, + DW_AT_MIPS_epilog_begin = 0x2004, + DW_AT_MIPS_loop_unroll_factor = 0x2005, + DW_AT_MIPS_software_pipeline_depth = 0x2006, + DW_AT_MIPS_linkage_name = 0x2007, + DW_AT_MIPS_stride = 0x2008, + DW_AT_MIPS_abstract_name = 0x2009, + DW_AT_MIPS_clone_origin = 0x200a, + DW_AT_MIPS_has_inlines = 0x200b, + DW_AT_HP_block_index = 0x2000, + DW_AT_HP_unmodifiable = 0x2001, + DW_AT_HP_prologue = 0x2005, + DW_AT_HP_epilogue = 0x2008, + DW_AT_HP_actuals_stmt_list = 0x2010, + DW_AT_HP_proc_per_section = 0x2011, + DW_AT_HP_raw_data_ptr = 0x2012, + DW_AT_HP_pass_by_reference = 0x2013, + DW_AT_HP_opt_level = 0x2014, + DW_AT_HP_prof_version_id = 0x2015, + DW_AT_HP_opt_flags = 0x2016, + DW_AT_HP_cold_region_low_pc = 0x2017, + DW_AT_HP_cold_region_high_pc = 0x2018, + DW_AT_HP_all_variables_modifiable = 0x2019, + DW_AT_HP_linkage_name = 0x201a, + DW_AT_HP_prof_flags = 0x201b, + DW_AT_HP_unit_name = 0x201f, + DW_AT_HP_unit_size = 0x2020, + DW_AT_HP_widened_byte_size = 0x2021, + DW_AT_HP_definition_points = 0x2022, + DW_AT_HP_default_location = 0x2023, + DW_AT_HP_is_result_param = 0x2029, + DW_AT_sf_names = 0x2101, + DW_AT_src_info = 0x2102, + DW_AT_mac_info = 0x2103, + DW_AT_src_coords = 0x2104, + DW_AT_body_begin = 0x2105, + DW_AT_body_end = 0x2106, + DW_AT_GNU_vector = 0x2107, + DW_AT_GNU_guarded_by = 0x2108, + DW_AT_GNU_pt_guarded_by = 0x2109, + DW_AT_GNU_guarded = 0x210a, + DW_AT_GNU_pt_guarded = 0x210b, + DW_AT_GNU_locks_excluded = 0x210c, + DW_AT_GNU_exclusive_locks_required = 0x210d, + DW_AT_GNU_shared_locks_required = 0x210e, + DW_AT_GNU_odr_signature = 0x210f, + DW_AT_GNU_template_name = 0x2110, + DW_AT_GNU_call_site_value = 0x2111, + DW_AT_GNU_call_site_data_value = 0x2112, + DW_AT_GNU_call_site_target = 0x2113, + DW_AT_GNU_call_site_target_clobbered = 0x2114, + DW_AT_GNU_tail_call = 0x2115, + DW_AT_GNU_all_tail_call_sites = 0x2116, + DW_AT_GNU_all_call_sites = 0x2117, + DW_AT_GNU_all_source_call_sites = 0x2118, + DW_AT_GNU_macros = 0x2119, + DW_AT_GNU_deleted = 0x211a, + DW_AT_GNU_dwo_name = 0x2130, + DW_AT_GNU_dwo_id = 0x2131, + DW_AT_GNU_ranges_base = 0x2132, + DW_AT_GNU_addr_base = 0x2133, + DW_AT_GNU_pubnames = 0x2134, + DW_AT_GNU_pubtypes = 0x2135, + DW_AT_GNU_discriminator = 0x2136, + DW_AT_GNU_locviews = 0x2137, + DW_AT_GNU_entry_view = 0x2138, + DW_AT_VMS_rtnbeg_pd_address = 0x2201, + DW_AT_use_GNAT_descriptive_type = 0x2301, + DW_AT_GNAT_descriptive_type = 0x2302, + DW_AT_GNU_numerator = 0x2303, + DW_AT_GNU_denominator = 0x2304, + DW_AT_GNU_bias = 0x2305, + DW_AT_upc_threads_scaled = 0x3210, + DW_AT_PGI_lbase = 0x3a00, + DW_AT_PGI_soffset = 0x3a01, + DW_AT_PGI_lstride = 0x3a02, + DW_AT_APPLE_optimized = 0x3fe1, + DW_AT_APPLE_flags = 0x3fe2, + DW_AT_APPLE_isa = 0x3fe3, + DW_AT_APPLE_block = 0x3fe4, + DW_AT_APPLE_major_runtime_vers = 0x3fe5, + DW_AT_APPLE_runtime_class = 0x3fe6, + DW_AT_APPLE_omit_frame_ptr = 0x3fe7, + DW_AT_APPLE_property_name = 0x3fe8, + DW_AT_APPLE_property_getter = 0x3fe9, + DW_AT_APPLE_property_setter = 0x3fea, + DW_AT_APPLE_property_attribute = 0x3feb, + DW_AT_APPLE_objc_complete_type = 0x3fec, + DW_AT_APPLE_property = 0x3fed +}; + +enum dwarf_line_number_op { + DW_LNS_extended_op = 0x0, + DW_LNS_copy = 0x1, + DW_LNS_advance_pc = 0x2, + DW_LNS_advance_line = 0x3, + DW_LNS_set_file = 0x4, + DW_LNS_set_column = 0x5, + DW_LNS_negate_stmt = 0x6, + DW_LNS_set_basic_block = 0x7, + DW_LNS_const_add_pc = 0x8, + DW_LNS_fixed_advance_pc = 0x9, + DW_LNS_set_prologue_end = 0xa, + DW_LNS_set_epilogue_begin = 0xb, + DW_LNS_set_isa = 0xc, +}; + +enum dwarf_extended_line_number_op { + DW_LNE_end_sequence = 0x1, + DW_LNE_set_address = 0x2, + DW_LNE_define_file = 0x3, + DW_LNE_set_discriminator = 0x4, +}; + +enum dwarf_line_number_content_type { + DW_LNCT_path = 0x1, + DW_LNCT_directory_index = 0x2, + DW_LNCT_timestamp = 0x3, + DW_LNCT_size = 0x4, + DW_LNCT_MD5 = 0x5, + DW_LNCT_lo_user = 0x2000, + DW_LNCT_hi_user = 0x3fff +}; + +enum dwarf_range_list_entry { + DW_RLE_end_of_list = 0x00, + DW_RLE_base_addressx = 0x01, + DW_RLE_startx_endx = 0x02, + DW_RLE_startx_length = 0x03, + DW_RLE_offset_pair = 0x04, + DW_RLE_base_address = 0x05, + DW_RLE_start_end = 0x06, + DW_RLE_start_length = 0x07 +}; + +enum dwarf_unit_type { + DW_UT_compile = 0x01, + DW_UT_type = 0x02, + DW_UT_partial = 0x03, + DW_UT_skeleton = 0x04, + DW_UT_split_compile = 0x05, + DW_UT_split_type = 0x06, + DW_UT_lo_user = 0x80, + DW_UT_hi_user = 0xff +}; + +#if !defined(HAVE_DECL_STRNLEN) || !HAVE_DECL_STRNLEN + +/* If strnlen is not declared, provide our own version. */ + +static size_t +xstrnlen (const char *s, size_t maxlen) +{ + size_t i; + + for (i = 0; i < maxlen; ++i) + if (s[i] == '\0') + break; + return i; +} + +#define strnlen xstrnlen + +#endif + +/* A buffer to read DWARF info. */ + +struct dwarf_buf +{ + /* Buffer name for error messages. */ + const char *name; + /* Start of the buffer. */ + const unsigned char *start; + /* Next byte to read. */ + const unsigned char *buf; + /* The number of bytes remaining. */ + size_t left; + /* Whether the data is big-endian. */ + int is_bigendian; + /* Error callback routine. */ + backtrace_error_callback error_callback; + /* Data for error_callback. */ + void *data; + /* Non-zero if we've reported an underflow error. */ + int reported_underflow; +}; + +/* A single attribute in a DWARF abbreviation. */ + +struct attr +{ + /* The attribute name. */ + enum dwarf_attribute name; + /* The attribute form. */ + enum dwarf_form form; + /* The attribute value, for DW_FORM_implicit_const. */ + int64_t val; +}; + +/* A single DWARF abbreviation. */ + +struct abbrev +{ + /* The abbrev code--the number used to refer to the abbrev. */ + uint64_t code; + /* The entry tag. */ + enum dwarf_tag tag; + /* Non-zero if this abbrev has child entries. */ + int has_children; + /* The number of attributes. */ + size_t num_attrs; + /* The attributes. */ + struct attr *attrs; +}; + +/* The DWARF abbreviations for a compilation unit. This structure + only exists while reading the compilation unit. Most DWARF readers + seem to a hash table to map abbrev ID's to abbrev entries. + However, we primarily care about GCC, and GCC simply issues ID's in + numerical order starting at 1. So we simply keep a sorted vector, + and try to just look up the code. */ + +struct abbrevs +{ + /* The number of abbrevs in the vector. */ + size_t num_abbrevs; + /* The abbrevs, sorted by the code field. */ + struct abbrev *abbrevs; +}; + +/* The different kinds of attribute values. */ + +enum attr_val_encoding +{ + /* No attribute value. */ + ATTR_VAL_NONE, + /* An address. */ + ATTR_VAL_ADDRESS, + /* An index into the .debug_addr section, whose value is relative to + the DW_AT_addr_base attribute of the compilation unit. */ + ATTR_VAL_ADDRESS_INDEX, + /* A unsigned integer. */ + ATTR_VAL_UINT, + /* A sigd integer. */ + ATTR_VAL_SINT, + /* A string. */ + ATTR_VAL_STRING, + /* An index into the .debug_str_offsets section. */ + ATTR_VAL_STRING_INDEX, + /* An offset to other data in the containing unit. */ + ATTR_VAL_REF_UNIT, + /* An offset to other data within the .debug_info section. */ + ATTR_VAL_REF_INFO, + /* An offset to other data within the alt .debug_info section. */ + ATTR_VAL_REF_ALT_INFO, + /* An offset to data in some other section. */ + ATTR_VAL_REF_SECTION, + /* A type signature. */ + ATTR_VAL_REF_TYPE, + /* An index into the .debug_rnglists section. */ + ATTR_VAL_RNGLISTS_INDEX, + /* A block of data (not represented). */ + ATTR_VAL_BLOCK, + /* An expression (not represented). */ + ATTR_VAL_EXPR, +}; + +/* An attribute value. */ + +struct attr_val +{ + /* How the value is stored in the field u. */ + enum attr_val_encoding encoding; + union + { + /* ATTR_VAL_ADDRESS*, ATTR_VAL_UINT, ATTR_VAL_REF*. */ + uint64_t uint; + /* ATTR_VAL_SINT. */ + int64_t sint; + /* ATTR_VAL_STRING. */ + const char *string; + /* ATTR_VAL_BLOCK not stored. */ + } u; +}; + +/* The line number program header. */ + +struct line_header +{ + /* The version of the line number information. */ + int version; + /* Address size. */ + int addrsize; + /* The minimum instruction length. */ + unsigned int min_insn_len; + /* The maximum number of ops per instruction. */ + unsigned int max_ops_per_insn; + /* The line base for special opcodes. */ + int line_base; + /* The line range for special opcodes. */ + unsigned int line_range; + /* The opcode base--the first special opcode. */ + unsigned int opcode_base; + /* Opcode lengths, indexed by opcode - 1. */ + const unsigned char *opcode_lengths; + /* The number of directory entries. */ + size_t dirs_count; + /* The directory entries. */ + const char **dirs; + /* The number of filenames. */ + size_t filenames_count; + /* The filenames. */ + const char **filenames; +}; + +/* A format description from a line header. */ + +struct line_header_format +{ + int lnct; /* LNCT code. */ + enum dwarf_form form; /* Form of entry data. */ +}; + +/* Map a single PC value to a file/line. We will keep a vector of + these sorted by PC value. Each file/line will be correct from the + PC up to the PC of the next entry if there is one. We allocate one + extra entry at the end so that we can use bsearch. */ + +struct line +{ + /* PC. */ + uintptr_t pc; + /* File name. Many entries in the array are expected to point to + the same file name. */ + const char *filename; + /* Line number. */ + int lineno; + /* Index of the object in the original array read from the DWARF + section, before it has been sorted. The index makes it possible + to use Quicksort and maintain stability. */ + int idx; +}; + +/* A growable vector of line number information. This is used while + reading the line numbers. */ + +struct line_vector +{ + /* Memory. This is an array of struct line. */ + struct backtrace_vector vec; + /* Number of valid mappings. */ + size_t count; +}; + +/* A function described in the debug info. */ + +struct function +{ + /* The name of the function. */ + const char *name; + /* If this is an inlined function, the filename of the call + site. */ + const char *caller_filename; + /* If this is an inlined function, the line number of the call + site. */ + int caller_lineno; + /* Map PC ranges to inlined functions. */ + struct function_addrs *function_addrs; + size_t function_addrs_count; +}; + +/* An address range for a function. This maps a PC value to a + specific function. */ + +struct function_addrs +{ + /* Range is LOW <= PC < HIGH. */ + uintptr_t low; + uintptr_t high; + /* Function for this address range. */ + struct function *function; +}; + +/* A growable vector of function address ranges. */ + +struct function_vector +{ + /* Memory. This is an array of struct function_addrs. */ + struct backtrace_vector vec; + /* Number of address ranges present. */ + size_t count; +}; + +/* A DWARF compilation unit. This only holds the information we need + to map a PC to a file and line. */ + +struct unit +{ + /* The first entry for this compilation unit. */ + const unsigned char *unit_data; + /* The length of the data for this compilation unit. */ + size_t unit_data_len; + /* The offset of UNIT_DATA from the start of the information for + this compilation unit. */ + size_t unit_data_offset; + /* Offset of the start of the compilation unit from the start of the + .debug_info section. */ + size_t low_offset; + /* Offset of the end of the compilation unit from the start of the + .debug_info section. */ + size_t high_offset; + /* DWARF version. */ + int version; + /* Whether unit is DWARF64. */ + int is_dwarf64; + /* Address size. */ + int addrsize; + /* Offset into line number information. */ + off_t lineoff; + /* Offset of compilation unit in .debug_str_offsets. */ + uint64_t str_offsets_base; + /* Offset of compilation unit in .debug_addr. */ + uint64_t addr_base; + /* Offset of compilation unit in .debug_rnglists. */ + uint64_t rnglists_base; + /* Primary source file. */ + const char *filename; + /* Compilation command working directory. */ + const char *comp_dir; + /* Absolute file name, only set if needed. */ + const char *abs_filename; + /* The abbreviations for this unit. */ + struct abbrevs abbrevs; + + /* The fields above this point are read in during initialization and + may be accessed freely. The fields below this point are read in + as needed, and therefore require care, as different threads may + try to initialize them simultaneously. */ + + /* PC to line number mapping. This is NULL if the values have not + been read. This is (struct line *) -1 if there was an error + reading the values. */ + struct line *lines; + /* Number of entries in lines. */ + size_t lines_count; + /* PC ranges to function. */ + struct function_addrs *function_addrs; + size_t function_addrs_count; +}; + +/* An address range for a compilation unit. This maps a PC value to a + specific compilation unit. Note that we invert the representation + in DWARF: instead of listing the units and attaching a list of + ranges, we list the ranges and have each one point to the unit. + This lets us do a binary search to find the unit. */ + +struct unit_addrs +{ + /* Range is LOW <= PC < HIGH. */ + uintptr_t low; + uintptr_t high; + /* Compilation unit for this address range. */ + struct unit *u; +}; + +/* A growable vector of compilation unit address ranges. */ + +struct unit_addrs_vector +{ + /* Memory. This is an array of struct unit_addrs. */ + struct backtrace_vector vec; + /* Number of address ranges present. */ + size_t count; +}; + +/* A growable vector of compilation unit pointer. */ + +struct unit_vector +{ + struct backtrace_vector vec; + size_t count; +}; + +/* The information we need to map a PC to a file and line. */ + +struct dwarf_data +{ + /* The data for the next file we know about. */ + struct dwarf_data *next; + /* The data for .gnu_debugaltlink. */ + struct dwarf_data *altlink; +/* The base address mapping for this file. */ + struct libbacktrace_base_address base_address; + /* A sorted list of address ranges. */ + struct unit_addrs *addrs; + /* Number of address ranges in list. */ + size_t addrs_count; + /* A sorted list of units. */ + struct unit **units; + /* Number of units in the list. */ + size_t units_count; + /* The unparsed DWARF debug data. */ + struct dwarf_sections dwarf_sections; + /* Whether the data is big-endian or not. */ + int is_bigendian; + /* A vector used for function addresses. We keep this here so that + we can grow the vector as we read more functions. */ + struct function_vector fvec; +}; + +/* Report an error for a DWARF buffer. */ + +static void +dwarf_buf_error (struct dwarf_buf *buf, const char *msg, int errnum) +{ + char b[200]; + + snprintf (b, sizeof b, "%s in %s at %d", + msg, buf->name, (int) (buf->buf - buf->start)); + buf->error_callback (buf->data, b, errnum); +} + +/* Require at least COUNT bytes in BUF. Return 1 if all is well, 0 on + error. */ + +static int +require (struct dwarf_buf *buf, size_t count) +{ + if (buf->left >= count) + return 1; + + if (!buf->reported_underflow) + { + dwarf_buf_error (buf, "DWARF underflow", 0); + buf->reported_underflow = 1; + } + + return 0; +} + +/* Advance COUNT bytes in BUF. Return 1 if all is well, 0 on + error. */ + +static int +advance (struct dwarf_buf *buf, size_t count) +{ + if (!require (buf, count)) + return 0; + buf->buf += count; + buf->left -= count; + return 1; +} + +/* Read one zero-terminated string from BUF and advance past the string. */ + +static const char * +read_string (struct dwarf_buf *buf) +{ + const char *p = (const char *)buf->buf; + size_t len = strnlen (p, buf->left); + + /* - If len == left, we ran out of buffer before finding the zero terminator. + Generate an error by advancing len + 1. + - If len < left, advance by len + 1 to skip past the zero terminator. */ + size_t count = len + 1; + + if (!advance (buf, count)) + return NULL; + + return p; +} + +/* Read one byte from BUF and advance 1 byte. */ + +static unsigned char +read_byte (struct dwarf_buf *buf) +{ + const unsigned char *p = buf->buf; + + if (!advance (buf, 1)) + return 0; + return p[0]; +} + +/* Read a signed char from BUF and advance 1 byte. */ + +static signed char +read_sbyte (struct dwarf_buf *buf) +{ + const unsigned char *p = buf->buf; + + if (!advance (buf, 1)) + return 0; + return (*p ^ 0x80) - 0x80; +} + +/* Read a uint16 from BUF and advance 2 bytes. */ + +static uint16_t +read_uint16 (struct dwarf_buf *buf) +{ + const unsigned char *p = buf->buf; + + if (!advance (buf, 2)) + return 0; + if (buf->is_bigendian) + return ((uint16_t) p[0] << 8) | (uint16_t) p[1]; + else + return ((uint16_t) p[1] << 8) | (uint16_t) p[0]; +} + +/* Read a 24 bit value from BUF and advance 3 bytes. */ + +static uint32_t +read_uint24 (struct dwarf_buf *buf) +{ + const unsigned char *p = buf->buf; + + if (!advance (buf, 3)) + return 0; + if (buf->is_bigendian) + return (((uint32_t) p[0] << 16) | ((uint32_t) p[1] << 8) + | (uint32_t) p[2]); + else + return (((uint32_t) p[2] << 16) | ((uint32_t) p[1] << 8) + | (uint32_t) p[0]); +} + +/* Read a uint32 from BUF and advance 4 bytes. */ + +static uint32_t +read_uint32 (struct dwarf_buf *buf) +{ + const unsigned char *p = buf->buf; + + if (!advance (buf, 4)) + return 0; + if (buf->is_bigendian) + return (((uint32_t) p[0] << 24) | ((uint32_t) p[1] << 16) + | ((uint32_t) p[2] << 8) | (uint32_t) p[3]); + else + return (((uint32_t) p[3] << 24) | ((uint32_t) p[2] << 16) + | ((uint32_t) p[1] << 8) | (uint32_t) p[0]); +} + +/* Read a uint64 from BUF and advance 8 bytes. */ + +static uint64_t +read_uint64 (struct dwarf_buf *buf) +{ + const unsigned char *p = buf->buf; + + if (!advance (buf, 8)) + return 0; + if (buf->is_bigendian) + return (((uint64_t) p[0] << 56) | ((uint64_t) p[1] << 48) + | ((uint64_t) p[2] << 40) | ((uint64_t) p[3] << 32) + | ((uint64_t) p[4] << 24) | ((uint64_t) p[5] << 16) + | ((uint64_t) p[6] << 8) | (uint64_t) p[7]); + else + return (((uint64_t) p[7] << 56) | ((uint64_t) p[6] << 48) + | ((uint64_t) p[5] << 40) | ((uint64_t) p[4] << 32) + | ((uint64_t) p[3] << 24) | ((uint64_t) p[2] << 16) + | ((uint64_t) p[1] << 8) | (uint64_t) p[0]); +} + +/* Read an offset from BUF and advance the appropriate number of + bytes. */ + +static uint64_t +read_offset (struct dwarf_buf *buf, int is_dwarf64) +{ + if (is_dwarf64) + return read_uint64 (buf); + else + return read_uint32 (buf); +} + +/* Read an address from BUF and advance the appropriate number of + bytes. */ + +static uint64_t +read_address (struct dwarf_buf *buf, int addrsize) +{ + switch (addrsize) + { + case 1: + return read_byte (buf); + case 2: + return read_uint16 (buf); + case 4: + return read_uint32 (buf); + case 8: + return read_uint64 (buf); + default: + dwarf_buf_error (buf, "unrecognized address size", 0); + return 0; + } +} + +/* Return whether a value is the highest possible address, given the + address size. */ + +static int +is_highest_address (uint64_t address, int addrsize) +{ + switch (addrsize) + { + case 1: + return address == (unsigned char) -1; + case 2: + return address == (uint16_t) -1; + case 4: + return address == (uint32_t) -1; + case 8: + return address == (uint64_t) -1; + default: + return 0; + } +} + +/* Read an unsigned LEB128 number. */ + +static uint64_t +read_uleb128 (struct dwarf_buf *buf) +{ + uint64_t ret; + unsigned int shift; + int overflow; + unsigned char b; + + ret = 0; + shift = 0; + overflow = 0; + do + { + const unsigned char *p; + + p = buf->buf; + if (!advance (buf, 1)) + return 0; + b = *p; + if (shift < 64) + ret |= ((uint64_t) (b & 0x7f)) << shift; + else if (!overflow) + { + dwarf_buf_error (buf, "LEB128 overflows uint64_t", 0); + overflow = 1; + } + shift += 7; + } + while ((b & 0x80) != 0); + + return ret; +} + +/* Read a signed LEB128 number. */ + +static int64_t +read_sleb128 (struct dwarf_buf *buf) +{ + uint64_t val; + unsigned int shift; + int overflow; + unsigned char b; + + val = 0; + shift = 0; + overflow = 0; + do + { + const unsigned char *p; + + p = buf->buf; + if (!advance (buf, 1)) + return 0; + b = *p; + if (shift < 64) + val |= ((uint64_t) (b & 0x7f)) << shift; + else if (!overflow) + { + dwarf_buf_error (buf, "signed LEB128 overflows uint64_t", 0); + overflow = 1; + } + shift += 7; + } + while ((b & 0x80) != 0); + + if ((b & 0x40) != 0 && shift < 64) + val |= ((uint64_t) -1) << shift; + + return (int64_t) val; +} + +/* Return the length of an LEB128 number. */ + +static size_t +leb128_len (const unsigned char *p) +{ + size_t ret; + + ret = 1; + while ((*p & 0x80) != 0) + { + ++p; + ++ret; + } + return ret; +} + +/* Read initial_length from BUF and advance the appropriate number of bytes. */ + +static uint64_t +read_initial_length (struct dwarf_buf *buf, int *is_dwarf64) +{ + uint64_t len; + + len = read_uint32 (buf); + if (len == 0xffffffff) + { + len = read_uint64 (buf); + *is_dwarf64 = 1; + } + else + *is_dwarf64 = 0; + + return len; +} + +/* Free an abbreviations structure. */ + +static void +free_abbrevs (struct backtrace_state *state, struct abbrevs *abbrevs, + backtrace_error_callback error_callback, void *data) +{ + size_t i; + + for (i = 0; i < abbrevs->num_abbrevs; ++i) + backtrace_free (state, abbrevs->abbrevs[i].attrs, + abbrevs->abbrevs[i].num_attrs * sizeof (struct attr), + error_callback, data); + backtrace_free (state, abbrevs->abbrevs, + abbrevs->num_abbrevs * sizeof (struct abbrev), + error_callback, data); + abbrevs->num_abbrevs = 0; + abbrevs->abbrevs = NULL; +} + +/* Read an attribute value. Returns 1 on success, 0 on failure. If + the value can be represented as a uint64_t, sets *VAL and sets + *IS_VALID to 1. We don't try to store the value of other attribute + forms, because we don't care about them. */ + +static int +read_attribute (enum dwarf_form form, uint64_t implicit_val, + struct dwarf_buf *buf, int is_dwarf64, int version, + int addrsize, const struct dwarf_sections *dwarf_sections, + struct dwarf_data *altlink, struct attr_val *val) +{ + /* Avoid warnings about val.u.FIELD may be used uninitialized if + this function is inlined. The warnings aren't valid but can + occur because the different fields are set and used + conditionally. */ + memset (val, 0, sizeof *val); + + switch (form) + { + case DW_FORM_addr: + val->encoding = ATTR_VAL_ADDRESS; + val->u.uint = read_address (buf, addrsize); + return 1; + case DW_FORM_block2: + val->encoding = ATTR_VAL_BLOCK; + return advance (buf, read_uint16 (buf)); + case DW_FORM_block4: + val->encoding = ATTR_VAL_BLOCK; + return advance (buf, read_uint32 (buf)); + case DW_FORM_data2: + val->encoding = ATTR_VAL_UINT; + val->u.uint = read_uint16 (buf); + return 1; + case DW_FORM_data4: + val->encoding = ATTR_VAL_UINT; + val->u.uint = read_uint32 (buf); + return 1; + case DW_FORM_data8: + val->encoding = ATTR_VAL_UINT; + val->u.uint = read_uint64 (buf); + return 1; + case DW_FORM_data16: + val->encoding = ATTR_VAL_BLOCK; + return advance (buf, 16); + case DW_FORM_string: + val->encoding = ATTR_VAL_STRING; + val->u.string = read_string (buf); + return val->u.string == NULL ? 0 : 1; + case DW_FORM_block: + val->encoding = ATTR_VAL_BLOCK; + return advance (buf, read_uleb128 (buf)); + case DW_FORM_block1: + val->encoding = ATTR_VAL_BLOCK; + return advance (buf, read_byte (buf)); + case DW_FORM_data1: + val->encoding = ATTR_VAL_UINT; + val->u.uint = read_byte (buf); + return 1; + case DW_FORM_flag: + val->encoding = ATTR_VAL_UINT; + val->u.uint = read_byte (buf); + return 1; + case DW_FORM_sdata: + val->encoding = ATTR_VAL_SINT; + val->u.sint = read_sleb128 (buf); + return 1; + case DW_FORM_strp: + { + uint64_t offset; + + offset = read_offset (buf, is_dwarf64); + if (offset >= dwarf_sections->size[DEBUG_STR]) + { + dwarf_buf_error (buf, "DW_FORM_strp out of range", 0); + return 0; + } + val->encoding = ATTR_VAL_STRING; + val->u.string = + (const char *) dwarf_sections->data[DEBUG_STR] + offset; + return 1; + } + case DW_FORM_line_strp: + { + uint64_t offset; + + offset = read_offset (buf, is_dwarf64); + if (offset >= dwarf_sections->size[DEBUG_LINE_STR]) + { + dwarf_buf_error (buf, "DW_FORM_line_strp out of range", 0); + return 0; + } + val->encoding = ATTR_VAL_STRING; + val->u.string = + (const char *) dwarf_sections->data[DEBUG_LINE_STR] + offset; + return 1; + } + case DW_FORM_udata: + val->encoding = ATTR_VAL_UINT; + val->u.uint = read_uleb128 (buf); + return 1; + case DW_FORM_ref_addr: + val->encoding = ATTR_VAL_REF_INFO; + if (version == 2) + val->u.uint = read_address (buf, addrsize); + else + val->u.uint = read_offset (buf, is_dwarf64); + return 1; + case DW_FORM_ref1: + val->encoding = ATTR_VAL_REF_UNIT; + val->u.uint = read_byte (buf); + return 1; + case DW_FORM_ref2: + val->encoding = ATTR_VAL_REF_UNIT; + val->u.uint = read_uint16 (buf); + return 1; + case DW_FORM_ref4: + val->encoding = ATTR_VAL_REF_UNIT; + val->u.uint = read_uint32 (buf); + return 1; + case DW_FORM_ref8: + val->encoding = ATTR_VAL_REF_UNIT; + val->u.uint = read_uint64 (buf); + return 1; + case DW_FORM_ref_udata: + val->encoding = ATTR_VAL_REF_UNIT; + val->u.uint = read_uleb128 (buf); + return 1; + case DW_FORM_indirect: + { + uint64_t form; + + form = read_uleb128 (buf); + if (form == DW_FORM_implicit_const) + { + dwarf_buf_error (buf, + "DW_FORM_indirect to DW_FORM_implicit_const", + 0); + return 0; + } + return read_attribute ((enum dwarf_form) form, 0, buf, is_dwarf64, + version, addrsize, dwarf_sections, altlink, + val); + } + case DW_FORM_sec_offset: + val->encoding = ATTR_VAL_REF_SECTION; + val->u.uint = read_offset (buf, is_dwarf64); + return 1; + case DW_FORM_exprloc: + val->encoding = ATTR_VAL_EXPR; + return advance (buf, read_uleb128 (buf)); + case DW_FORM_flag_present: + val->encoding = ATTR_VAL_UINT; + val->u.uint = 1; + return 1; + case DW_FORM_ref_sig8: + val->encoding = ATTR_VAL_REF_TYPE; + val->u.uint = read_uint64 (buf); + return 1; + case DW_FORM_strx: case DW_FORM_strx1: case DW_FORM_strx2: + case DW_FORM_strx3: case DW_FORM_strx4: + { + uint64_t offset; + + switch (form) + { + case DW_FORM_strx: + offset = read_uleb128 (buf); + break; + case DW_FORM_strx1: + offset = read_byte (buf); + break; + case DW_FORM_strx2: + offset = read_uint16 (buf); + break; + case DW_FORM_strx3: + offset = read_uint24 (buf); + break; + case DW_FORM_strx4: + offset = read_uint32 (buf); + break; + default: + /* This case can't happen. */ + return 0; + } + val->encoding = ATTR_VAL_STRING_INDEX; + val->u.uint = offset; + return 1; + } + case DW_FORM_addrx: case DW_FORM_addrx1: case DW_FORM_addrx2: + case DW_FORM_addrx3: case DW_FORM_addrx4: + { + uint64_t offset; + + switch (form) + { + case DW_FORM_addrx: + offset = read_uleb128 (buf); + break; + case DW_FORM_addrx1: + offset = read_byte (buf); + break; + case DW_FORM_addrx2: + offset = read_uint16 (buf); + break; + case DW_FORM_addrx3: + offset = read_uint24 (buf); + break; + case DW_FORM_addrx4: + offset = read_uint32 (buf); + break; + default: + /* This case can't happen. */ + return 0; + } + val->encoding = ATTR_VAL_ADDRESS_INDEX; + val->u.uint = offset; + return 1; + } + case DW_FORM_ref_sup4: + val->encoding = ATTR_VAL_REF_SECTION; + val->u.uint = read_uint32 (buf); + return 1; + case DW_FORM_ref_sup8: + val->encoding = ATTR_VAL_REF_SECTION; + val->u.uint = read_uint64 (buf); + return 1; + case DW_FORM_implicit_const: + val->encoding = ATTR_VAL_UINT; + val->u.uint = implicit_val; + return 1; + case DW_FORM_loclistx: + /* We don't distinguish this from DW_FORM_sec_offset. It + * shouldn't matter since we don't care about loclists. */ + val->encoding = ATTR_VAL_REF_SECTION; + val->u.uint = read_uleb128 (buf); + return 1; + case DW_FORM_rnglistx: + val->encoding = ATTR_VAL_RNGLISTS_INDEX; + val->u.uint = read_uleb128 (buf); + return 1; + case DW_FORM_GNU_addr_index: + val->encoding = ATTR_VAL_REF_SECTION; + val->u.uint = read_uleb128 (buf); + return 1; + case DW_FORM_GNU_str_index: + val->encoding = ATTR_VAL_REF_SECTION; + val->u.uint = read_uleb128 (buf); + return 1; + case DW_FORM_GNU_ref_alt: + val->u.uint = read_offset (buf, is_dwarf64); + if (altlink == NULL) + { + val->encoding = ATTR_VAL_NONE; + return 1; + } + val->encoding = ATTR_VAL_REF_ALT_INFO; + return 1; + case DW_FORM_strp_sup: case DW_FORM_GNU_strp_alt: + { + uint64_t offset; + + offset = read_offset (buf, is_dwarf64); + if (altlink == NULL) + { + val->encoding = ATTR_VAL_NONE; + return 1; + } + if (offset >= altlink->dwarf_sections.size[DEBUG_STR]) + { + dwarf_buf_error (buf, "DW_FORM_strp_sup out of range", 0); + return 0; + } + val->encoding = ATTR_VAL_STRING; + val->u.string = + (const char *) altlink->dwarf_sections.data[DEBUG_STR] + offset; + return 1; + } + default: + dwarf_buf_error (buf, "unrecognized DWARF form", -1); + return 0; + } +} + +/* If we can determine the value of a string attribute, set *STRING to + point to the string. Return 1 on success, 0 on error. If we don't + know the value, we consider that a success, and we don't change + *STRING. An error is only reported for some sort of out of range + offset. */ + +static int +resolve_string (const struct dwarf_sections *dwarf_sections, int is_dwarf64, + int is_bigendian, uint64_t str_offsets_base, + const struct attr_val *val, + backtrace_error_callback error_callback, void *data, + const char **string) +{ + switch (val->encoding) + { + case ATTR_VAL_STRING: + *string = val->u.string; + return 1; + + case ATTR_VAL_STRING_INDEX: + { + uint64_t offset; + struct dwarf_buf offset_buf; + + offset = val->u.uint * (is_dwarf64 ? 8 : 4) + str_offsets_base; + if (offset + (is_dwarf64 ? 8 : 4) + > dwarf_sections->size[DEBUG_STR_OFFSETS]) + { + error_callback (data, "DW_FORM_strx value out of range", 0); + return 0; + } + + offset_buf.name = ".debug_str_offsets"; + offset_buf.start = dwarf_sections->data[DEBUG_STR_OFFSETS]; + offset_buf.buf = dwarf_sections->data[DEBUG_STR_OFFSETS] + offset; + offset_buf.left = dwarf_sections->size[DEBUG_STR_OFFSETS] - offset; + offset_buf.is_bigendian = is_bigendian; + offset_buf.error_callback = error_callback; + offset_buf.data = data; + offset_buf.reported_underflow = 0; + + offset = read_offset (&offset_buf, is_dwarf64); + if (offset >= dwarf_sections->size[DEBUG_STR]) + { + dwarf_buf_error (&offset_buf, + "DW_FORM_strx offset out of range", + 0); + return 0; + } + *string = (const char *) dwarf_sections->data[DEBUG_STR] + offset; + return 1; + } + + default: + return 1; + } +} + +/* Set *ADDRESS to the real address for a ATTR_VAL_ADDRESS_INDEX. + Return 1 on success, 0 on error. */ + +static int +resolve_addr_index (const struct dwarf_sections *dwarf_sections, + uint64_t addr_base, int addrsize, int is_bigendian, + uint64_t addr_index, + backtrace_error_callback error_callback, void *data, + uintptr_t *address) +{ + uint64_t offset; + struct dwarf_buf addr_buf; + + offset = addr_index * addrsize + addr_base; + if (offset + addrsize > dwarf_sections->size[DEBUG_ADDR]) + { + error_callback (data, "DW_FORM_addrx value out of range", 0); + return 0; + } + + addr_buf.name = ".debug_addr"; + addr_buf.start = dwarf_sections->data[DEBUG_ADDR]; + addr_buf.buf = dwarf_sections->data[DEBUG_ADDR] + offset; + addr_buf.left = dwarf_sections->size[DEBUG_ADDR] - offset; + addr_buf.is_bigendian = is_bigendian; + addr_buf.error_callback = error_callback; + addr_buf.data = data; + addr_buf.reported_underflow = 0; + + *address = (uintptr_t) read_address (&addr_buf, addrsize); + return 1; +} + +/* Compare a unit offset against a unit for bsearch. */ + +static int +units_search (const void *vkey, const void *ventry) +{ + const size_t *key = (const size_t *) vkey; + const struct unit *entry = *((const struct unit *const *) ventry); + size_t offset; + + offset = *key; + if (offset < entry->low_offset) + return -1; + else if (offset >= entry->high_offset) + return 1; + else + return 0; +} + +/* Find a unit in PU containing OFFSET. */ + +static struct unit * +find_unit (struct unit **pu, size_t units_count, size_t offset) +{ + struct unit **u; + u = (struct unit**)bsearch (&offset, pu, units_count, sizeof (struct unit *), units_search); + return u == NULL ? NULL : *u; +} + +/* Compare function_addrs for qsort. When ranges are nested, make the + smallest one sort last. */ + +static int +function_addrs_compare (const void *v1, const void *v2) +{ + const struct function_addrs *a1 = (const struct function_addrs *) v1; + const struct function_addrs *a2 = (const struct function_addrs *) v2; + + if (a1->low < a2->low) + return -1; + if (a1->low > a2->low) + return 1; + if (a1->high < a2->high) + return 1; + if (a1->high > a2->high) + return -1; + return strcmp (a1->function->name, a2->function->name); +} + +/* Compare a PC against a function_addrs for bsearch. We always + allocate an entra entry at the end of the vector, so that this + routine can safely look at the next entry. Note that if there are + multiple ranges containing PC, which one will be returned is + unpredictable. We compensate for that in dwarf_fileline. */ + +static int +function_addrs_search (const void *vkey, const void *ventry) +{ + const uintptr_t *key = (const uintptr_t *) vkey; + const struct function_addrs *entry = (const struct function_addrs *) ventry; + uintptr_t pc; + + pc = *key; + if (pc < entry->low) + return -1; + else if (pc > (entry + 1)->low) + return 1; + else + return 0; +} + +/* Add a new compilation unit address range to a vector. This is + called via add_ranges. Returns 1 on success, 0 on failure. */ + +static int +add_unit_addr (struct backtrace_state *state, void *rdata, + uintptr_t lowpc, uintptr_t highpc, + backtrace_error_callback error_callback, void *data, + void *pvec) +{ + struct unit *u = (struct unit *) rdata; + struct unit_addrs_vector *vec = (struct unit_addrs_vector *) pvec; + struct unit_addrs *p; + + /* Try to merge with the last entry. */ + if (vec->count > 0) + { + p = (struct unit_addrs *) vec->vec.base + (vec->count - 1); + if ((lowpc == p->high || lowpc == p->high + 1) + && u == p->u) + { + if (highpc > p->high) + p->high = highpc; + return 1; + } + } + + p = ((struct unit_addrs *) + backtrace_vector_grow (state, sizeof (struct unit_addrs), + error_callback, data, &vec->vec)); + if (p == NULL) + return 0; + + p->low = lowpc; + p->high = highpc; + p->u = u; + + ++vec->count; + + return 1; +} + +/* Compare unit_addrs for qsort. When ranges are nested, make the + smallest one sort last. */ + +static int +unit_addrs_compare (const void *v1, const void *v2) +{ + const struct unit_addrs *a1 = (const struct unit_addrs *) v1; + const struct unit_addrs *a2 = (const struct unit_addrs *) v2; + + if (a1->low < a2->low) + return -1; + if (a1->low > a2->low) + return 1; + if (a1->high < a2->high) + return 1; + if (a1->high > a2->high) + return -1; + if (a1->u->lineoff < a2->u->lineoff) + return -1; + if (a1->u->lineoff > a2->u->lineoff) + return 1; + return 0; +} + +/* Compare a PC against a unit_addrs for bsearch. We always allocate + an entry entry at the end of the vector, so that this routine can + safely look at the next entry. Note that if there are multiple + ranges containing PC, which one will be returned is unpredictable. + We compensate for that in dwarf_fileline. */ + +static int +unit_addrs_search (const void *vkey, const void *ventry) +{ + const uintptr_t *key = (const uintptr_t *) vkey; + const struct unit_addrs *entry = (const struct unit_addrs *) ventry; + uintptr_t pc; + + pc = *key; + if (pc < entry->low) + return -1; + else if (pc > (entry + 1)->low) + return 1; + else + return 0; +} + +/* Sort the line vector by PC. We want a stable sort here to maintain + the order of lines for the same PC values. Since the sequence is + being sorted in place, their addresses cannot be relied on to + maintain stability. That is the purpose of the index member. */ + +static int +line_compare (const void *v1, const void *v2) +{ + const struct line *ln1 = (const struct line *) v1; + const struct line *ln2 = (const struct line *) v2; + + if (ln1->pc < ln2->pc) + return -1; + else if (ln1->pc > ln2->pc) + return 1; + else if (ln1->idx < ln2->idx) + return -1; + else if (ln1->idx > ln2->idx) + return 1; + else + return 0; +} + +/* Find a PC in a line vector. We always allocate an extra entry at + the end of the lines vector, so that this routine can safely look + at the next entry. Note that when there are multiple mappings for + the same PC value, this will return the last one. */ + +static int +line_search (const void *vkey, const void *ventry) +{ + const uintptr_t *key = (const uintptr_t *) vkey; + const struct line *entry = (const struct line *) ventry; + uintptr_t pc; + + pc = *key; + if (pc < entry->pc) + return -1; + else if (pc >= (entry + 1)->pc) + return 1; + else + return 0; +} + +/* Sort the abbrevs by the abbrev code. This function is passed to + both qsort and bsearch. */ + +static int +abbrev_compare (const void *v1, const void *v2) +{ + const struct abbrev *a1 = (const struct abbrev *) v1; + const struct abbrev *a2 = (const struct abbrev *) v2; + + if (a1->code < a2->code) + return -1; + else if (a1->code > a2->code) + return 1; + else + { + /* This really shouldn't happen. It means there are two + different abbrevs with the same code, and that means we don't + know which one lookup_abbrev should return. */ + return 0; + } +} + +/* Read the abbreviation table for a compilation unit. Returns 1 on + success, 0 on failure. */ + +static int +read_abbrevs (struct backtrace_state *state, uint64_t abbrev_offset, + const unsigned char *dwarf_abbrev, size_t dwarf_abbrev_size, + int is_bigendian, backtrace_error_callback error_callback, + void *data, struct abbrevs *abbrevs) +{ + struct dwarf_buf abbrev_buf; + struct dwarf_buf count_buf; + size_t num_abbrevs; + + abbrevs->num_abbrevs = 0; + abbrevs->abbrevs = NULL; + + if (abbrev_offset >= dwarf_abbrev_size) + { + error_callback (data, "abbrev offset out of range", 0); + return 0; + } + + abbrev_buf.name = ".debug_abbrev"; + abbrev_buf.start = dwarf_abbrev; + abbrev_buf.buf = dwarf_abbrev + abbrev_offset; + abbrev_buf.left = dwarf_abbrev_size - abbrev_offset; + abbrev_buf.is_bigendian = is_bigendian; + abbrev_buf.error_callback = error_callback; + abbrev_buf.data = data; + abbrev_buf.reported_underflow = 0; + + /* Count the number of abbrevs in this list. */ + + count_buf = abbrev_buf; + num_abbrevs = 0; + while (read_uleb128 (&count_buf) != 0) + { + if (count_buf.reported_underflow) + return 0; + ++num_abbrevs; + // Skip tag. + read_uleb128 (&count_buf); + // Skip has_children. + read_byte (&count_buf); + // Skip attributes. + while (read_uleb128 (&count_buf) != 0) + { + uint64_t form; + + form = read_uleb128 (&count_buf); + if ((enum dwarf_form) form == DW_FORM_implicit_const) + read_sleb128 (&count_buf); + } + // Skip form of last attribute. + read_uleb128 (&count_buf); + } + + if (count_buf.reported_underflow) + return 0; + + if (num_abbrevs == 0) + return 1; + + abbrevs->abbrevs = ((struct abbrev *) + backtrace_alloc (state, + num_abbrevs * sizeof (struct abbrev), + error_callback, data)); + if (abbrevs->abbrevs == NULL) + return 0; + abbrevs->num_abbrevs = num_abbrevs; + memset (abbrevs->abbrevs, 0, num_abbrevs * sizeof (struct abbrev)); + + num_abbrevs = 0; + while (1) + { + uint64_t code; + struct abbrev a; + size_t num_attrs; + struct attr *attrs; + + if (abbrev_buf.reported_underflow) + goto fail; + + code = read_uleb128 (&abbrev_buf); + if (code == 0) + break; + + a.code = code; + a.tag = (enum dwarf_tag) read_uleb128 (&abbrev_buf); + a.has_children = read_byte (&abbrev_buf); + + count_buf = abbrev_buf; + num_attrs = 0; + while (read_uleb128 (&count_buf) != 0) + { + uint64_t form; + + ++num_attrs; + form = read_uleb128 (&count_buf); + if ((enum dwarf_form) form == DW_FORM_implicit_const) + read_sleb128 (&count_buf); + } + + if (num_attrs == 0) + { + attrs = NULL; + read_uleb128 (&abbrev_buf); + read_uleb128 (&abbrev_buf); + } + else + { + attrs = ((struct attr *) + backtrace_alloc (state, num_attrs * sizeof *attrs, + error_callback, data)); + if (attrs == NULL) + goto fail; + num_attrs = 0; + while (1) + { + uint64_t name; + uint64_t form; + + name = read_uleb128 (&abbrev_buf); + form = read_uleb128 (&abbrev_buf); + if (name == 0) + break; + attrs[num_attrs].name = (enum dwarf_attribute) name; + attrs[num_attrs].form = (enum dwarf_form) form; + if ((enum dwarf_form) form == DW_FORM_implicit_const) + attrs[num_attrs].val = read_sleb128 (&abbrev_buf); + else + attrs[num_attrs].val = 0; + ++num_attrs; + } + } + + a.num_attrs = num_attrs; + a.attrs = attrs; + + abbrevs->abbrevs[num_abbrevs] = a; + ++num_abbrevs; + } + + backtrace_qsort (abbrevs->abbrevs, abbrevs->num_abbrevs, + sizeof (struct abbrev), abbrev_compare); + + return 1; + + fail: + free_abbrevs (state, abbrevs, error_callback, data); + return 0; +} + +/* Return the abbrev information for an abbrev code. */ + +static const struct abbrev * +lookup_abbrev (struct abbrevs *abbrevs, uint64_t code, + backtrace_error_callback error_callback, void *data) +{ + struct abbrev key; + void *p; + + /* With GCC, where abbrevs are simply numbered in order, we should + be able to just look up the entry. */ + if (code - 1 < abbrevs->num_abbrevs + && abbrevs->abbrevs[code - 1].code == code) + return &abbrevs->abbrevs[code - 1]; + + /* Otherwise we have to search. */ + memset (&key, 0, sizeof key); + key.code = code; + p = bsearch (&key, abbrevs->abbrevs, abbrevs->num_abbrevs, + sizeof (struct abbrev), abbrev_compare); + if (p == NULL) + { + error_callback (data, "invalid abbreviation code", 0); + return NULL; + } + return (const struct abbrev *) p; +} + +/* This struct is used to gather address range information while + reading attributes. We use this while building a mapping from + address ranges to compilation units and then again while mapping + from address ranges to function entries. Normally either + lowpc/highpc is set or ranges is set. */ + +struct pcrange { + uintptr_t lowpc; /* The low PC value. */ + int have_lowpc; /* Whether a low PC value was found. */ + int lowpc_is_addr_index; /* Whether lowpc is in .debug_addr. */ + uintptr_t highpc; /* The high PC value. */ + int have_highpc; /* Whether a high PC value was found. */ + int highpc_is_relative; /* Whether highpc is relative to lowpc. */ + int highpc_is_addr_index; /* Whether highpc is in .debug_addr. */ + uint64_t ranges; /* Offset in ranges section. */ + int have_ranges; /* Whether ranges is valid. */ + int ranges_is_index; /* Whether ranges is DW_FORM_rnglistx. */ +}; + +/* Update PCRANGE from an attribute value. */ + +static void +update_pcrange (const struct attr* attr, const struct attr_val* val, + struct pcrange *pcrange) +{ + switch (attr->name) + { + case DW_AT_low_pc: + if (val->encoding == ATTR_VAL_ADDRESS) + { + pcrange->lowpc = (uintptr_t) val->u.uint; + pcrange->have_lowpc = 1; + } + else if (val->encoding == ATTR_VAL_ADDRESS_INDEX) + { + pcrange->lowpc = (uintptr_t) val->u.uint; + pcrange->have_lowpc = 1; + pcrange->lowpc_is_addr_index = 1; + } + break; + + case DW_AT_high_pc: + if (val->encoding == ATTR_VAL_ADDRESS) + { + pcrange->highpc = (uintptr_t) val->u.uint; + pcrange->have_highpc = 1; + } + else if (val->encoding == ATTR_VAL_UINT) + { + pcrange->highpc = (uintptr_t) val->u.uint; + pcrange->have_highpc = 1; + pcrange->highpc_is_relative = 1; + } + else if (val->encoding == ATTR_VAL_ADDRESS_INDEX) + { + pcrange->highpc = (uintptr_t) val->u.uint; + pcrange->have_highpc = 1; + pcrange->highpc_is_addr_index = 1; + } + break; + + case DW_AT_ranges: + if (val->encoding == ATTR_VAL_UINT + || val->encoding == ATTR_VAL_REF_SECTION) + { + pcrange->ranges = val->u.uint; + pcrange->have_ranges = 1; + } + else if (val->encoding == ATTR_VAL_RNGLISTS_INDEX) + { + pcrange->ranges = val->u.uint; + pcrange->have_ranges = 1; + pcrange->ranges_is_index = 1; + } + break; + + default: + break; + } +} + +/* Call ADD_RANGE for a low/high PC pair. Returns 1 on success, 0 on + error. */ + +static int +add_low_high_range (struct backtrace_state *state, + const struct dwarf_sections *dwarf_sections, + struct libbacktrace_base_address base_address, + int is_bigendian, struct unit *u, + const struct pcrange *pcrange, + int (*add_range) (struct backtrace_state *state, + void *rdata, uintptr_t lowpc, + uintptr_t highpc, + backtrace_error_callback error_callback, + void *data, void *vec), + void *rdata, + backtrace_error_callback error_callback, void *data, + void *vec) +{ + uintptr_t lowpc; + uintptr_t highpc; + + lowpc = pcrange->lowpc; + if (pcrange->lowpc_is_addr_index) + { + if (!resolve_addr_index (dwarf_sections, u->addr_base, u->addrsize, + is_bigendian, lowpc, error_callback, data, + &lowpc)) + return 0; + } + + highpc = pcrange->highpc; + if (pcrange->highpc_is_addr_index) + { + if (!resolve_addr_index (dwarf_sections, u->addr_base, u->addrsize, + is_bigendian, highpc, error_callback, data, + &highpc)) + return 0; + } + if (pcrange->highpc_is_relative) + highpc += lowpc; + + /* Add in the base address of the module when recording PC values, + so that we can look up the PC directly. */ + lowpc = libbacktrace_add_base (lowpc, base_address); + highpc = libbacktrace_add_base (highpc, base_address); + + return add_range (state, rdata, lowpc, highpc, error_callback, data, vec); +} + +/* Call ADD_RANGE for each range read from .debug_ranges, as used in + DWARF versions 2 through 4. */ + +static int +add_ranges_from_ranges ( + struct backtrace_state *state, + const struct dwarf_sections *dwarf_sections, + struct libbacktrace_base_address base_address, int is_bigendian, + struct unit *u, uintptr_t base, + const struct pcrange *pcrange, + int (*add_range) (struct backtrace_state *state, void *rdata, + uintptr_t lowpc, uintptr_t highpc, + backtrace_error_callback error_callback, void *data, + void *vec), + void *rdata, + backtrace_error_callback error_callback, void *data, + void *vec) +{ + struct dwarf_buf ranges_buf; + + if (pcrange->ranges >= dwarf_sections->size[DEBUG_RANGES]) + { + error_callback (data, "ranges offset out of range", 0); + return 0; + } + + ranges_buf.name = ".debug_ranges"; + ranges_buf.start = dwarf_sections->data[DEBUG_RANGES]; + ranges_buf.buf = dwarf_sections->data[DEBUG_RANGES] + pcrange->ranges; + ranges_buf.left = dwarf_sections->size[DEBUG_RANGES] - pcrange->ranges; + ranges_buf.is_bigendian = is_bigendian; + ranges_buf.error_callback = error_callback; + ranges_buf.data = data; + ranges_buf.reported_underflow = 0; + + while (1) + { + uint64_t low; + uint64_t high; + + if (ranges_buf.reported_underflow) + return 0; + + low = read_address (&ranges_buf, u->addrsize); + high = read_address (&ranges_buf, u->addrsize); + + if (low == 0 && high == 0) + break; + + if (is_highest_address (low, u->addrsize)) + base = (uintptr_t) high; + else + { + uintptr_t rl, rh; + + rl = libbacktrace_add_base ((uintptr_t) low + base, base_address); + rh = libbacktrace_add_base ((uintptr_t) high + base, base_address); + if (!add_range (state, rdata, rl, rh, error_callback, data, vec)) + return 0; + } + } + + if (ranges_buf.reported_underflow) + return 0; + + return 1; +} + +/* Call ADD_RANGE for each range read from .debug_rnglists, as used in + DWARF version 5. */ + +static int +add_ranges_from_rnglists ( + struct backtrace_state *state, + const struct dwarf_sections *dwarf_sections, + struct libbacktrace_base_address base_address, int is_bigendian, + struct unit *u, uintptr_t base, + const struct pcrange *pcrange, + int (*add_range) (struct backtrace_state *state, void *rdata, + uintptr_t lowpc, uintptr_t highpc, + backtrace_error_callback error_callback, void *data, + void *vec), + void *rdata, + backtrace_error_callback error_callback, void *data, + void *vec) +{ + uint64_t offset; + struct dwarf_buf rnglists_buf; + + if (!pcrange->ranges_is_index) + offset = pcrange->ranges; + else + offset = u->rnglists_base + pcrange->ranges * (u->is_dwarf64 ? 8 : 4); + if (offset >= dwarf_sections->size[DEBUG_RNGLISTS]) + { + error_callback (data, "rnglists offset out of range", 0); + return 0; + } + + rnglists_buf.name = ".debug_rnglists"; + rnglists_buf.start = dwarf_sections->data[DEBUG_RNGLISTS]; + rnglists_buf.buf = dwarf_sections->data[DEBUG_RNGLISTS] + offset; + rnglists_buf.left = dwarf_sections->size[DEBUG_RNGLISTS] - offset; + rnglists_buf.is_bigendian = is_bigendian; + rnglists_buf.error_callback = error_callback; + rnglists_buf.data = data; + rnglists_buf.reported_underflow = 0; + + if (pcrange->ranges_is_index) + { + offset = read_offset (&rnglists_buf, u->is_dwarf64); + offset += u->rnglists_base; + if (offset >= dwarf_sections->size[DEBUG_RNGLISTS]) + { + error_callback (data, "rnglists index offset out of range", 0); + return 0; + } + rnglists_buf.buf = dwarf_sections->data[DEBUG_RNGLISTS] + offset; + rnglists_buf.left = dwarf_sections->size[DEBUG_RNGLISTS] - offset; + } + + while (1) + { + unsigned char rle; + + rle = read_byte (&rnglists_buf); + if (rle == DW_RLE_end_of_list) + break; + switch (rle) + { + case DW_RLE_base_addressx: + { + uint64_t index; + + index = read_uleb128 (&rnglists_buf); + if (!resolve_addr_index (dwarf_sections, u->addr_base, + u->addrsize, is_bigendian, index, + error_callback, data, &base)) + return 0; + } + break; + + case DW_RLE_startx_endx: + { + uint64_t index; + uintptr_t low; + uintptr_t high; + + index = read_uleb128 (&rnglists_buf); + if (!resolve_addr_index (dwarf_sections, u->addr_base, + u->addrsize, is_bigendian, index, + error_callback, data, &low)) + return 0; + index = read_uleb128 (&rnglists_buf); + if (!resolve_addr_index (dwarf_sections, u->addr_base, + u->addrsize, is_bigendian, index, + error_callback, data, &high)) + return 0; + if (!add_range (state, rdata, + libbacktrace_add_base (low, base_address), + libbacktrace_add_base (high, base_address), + error_callback, data, vec)) + return 0; + } + break; + + case DW_RLE_startx_length: + { + uint64_t index; + uintptr_t low; + uintptr_t length; + + index = read_uleb128 (&rnglists_buf); + if (!resolve_addr_index (dwarf_sections, u->addr_base, + u->addrsize, is_bigendian, index, + error_callback, data, &low)) + return 0; + length = read_uleb128 (&rnglists_buf); + low = libbacktrace_add_base (low, base_address); + if (!add_range (state, rdata, low, low + length, + error_callback, data, vec)) + return 0; + } + break; + + case DW_RLE_offset_pair: + { + uint64_t low; + uint64_t high; + + low = read_uleb128 (&rnglists_buf); + high = read_uleb128 (&rnglists_buf); + if (!add_range (state, rdata, + libbacktrace_add_base (low + base, base_address), + libbacktrace_add_base (high + base, base_address), + error_callback, data, vec)) + return 0; + } + break; + + case DW_RLE_base_address: + base = (uintptr_t) read_address (&rnglists_buf, u->addrsize); + break; + + case DW_RLE_start_end: + { + uintptr_t low; + uintptr_t high; + + low = (uintptr_t) read_address (&rnglists_buf, u->addrsize); + high = (uintptr_t) read_address (&rnglists_buf, u->addrsize); + if (!add_range (state, rdata, + libbacktrace_add_base (low, base_address), + libbacktrace_add_base (high, base_address), + error_callback, data, vec)) + return 0; + } + break; + + case DW_RLE_start_length: + { + uintptr_t low; + uintptr_t length; + + low = (uintptr_t) read_address (&rnglists_buf, u->addrsize); + length = (uintptr_t) read_uleb128 (&rnglists_buf); + low = libbacktrace_add_base (low, base_address); + if (!add_range (state, rdata, low, low + length, + error_callback, data, vec)) + return 0; + } + break; + + default: + dwarf_buf_error (&rnglists_buf, "unrecognized DW_RLE value", -1); + return 0; + } + } + + if (rnglists_buf.reported_underflow) + return 0; + + return 1; +} + +/* Call ADD_RANGE for each lowpc/highpc pair in PCRANGE. RDATA is + passed to ADD_RANGE, and is either a struct unit * or a struct + function *. VEC is the vector we are adding ranges to, and is + either a struct unit_addrs_vector * or a struct function_vector *. + Returns 1 on success, 0 on error. */ + +static int +add_ranges (struct backtrace_state *state, + const struct dwarf_sections *dwarf_sections, + struct libbacktrace_base_address base_address, int is_bigendian, + struct unit *u, uintptr_t base, const struct pcrange *pcrange, + int (*add_range) (struct backtrace_state *state, void *rdata, + uintptr_t lowpc, uintptr_t highpc, + backtrace_error_callback error_callback, + void *data, void *vec), + void *rdata, + backtrace_error_callback error_callback, void *data, + void *vec) +{ + if (pcrange->have_lowpc && pcrange->have_highpc) + return add_low_high_range (state, dwarf_sections, base_address, + is_bigendian, u, pcrange, add_range, rdata, + error_callback, data, vec); + + if (!pcrange->have_ranges) + { + /* Did not find any address ranges to add. */ + return 1; + } + + if (u->version < 5) + return add_ranges_from_ranges (state, dwarf_sections, base_address, + is_bigendian, u, base, pcrange, add_range, + rdata, error_callback, data, vec); + else + return add_ranges_from_rnglists (state, dwarf_sections, base_address, + is_bigendian, u, base, pcrange, add_range, + rdata, error_callback, data, vec); +} + +/* Find the address range covered by a compilation unit, reading from + UNIT_BUF and adding values to U. Returns 1 if all data could be + read, 0 if there is some error. */ + +static int +find_address_ranges (struct backtrace_state *state, + struct libbacktrace_base_address base_address, + struct dwarf_buf *unit_buf, + const struct dwarf_sections *dwarf_sections, + int is_bigendian, struct dwarf_data *altlink, + backtrace_error_callback error_callback, void *data, + struct unit *u, struct unit_addrs_vector *addrs, + enum dwarf_tag *unit_tag) +{ + while (unit_buf->left > 0) + { + uint64_t code; + const struct abbrev *abbrev; + struct pcrange pcrange; + struct attr_val name_val; + int have_name_val; + struct attr_val comp_dir_val; + int have_comp_dir_val; + size_t i; + + code = read_uleb128 (unit_buf); + if (code == 0) + return 1; + + abbrev = lookup_abbrev (&u->abbrevs, code, error_callback, data); + if (abbrev == NULL) + return 0; + + if (unit_tag != NULL) + *unit_tag = abbrev->tag; + + memset (&pcrange, 0, sizeof pcrange); + memset (&name_val, 0, sizeof name_val); + have_name_val = 0; + memset (&comp_dir_val, 0, sizeof comp_dir_val); + have_comp_dir_val = 0; + for (i = 0; i < abbrev->num_attrs; ++i) + { + struct attr_val val; + + if (!read_attribute (abbrev->attrs[i].form, abbrev->attrs[i].val, + unit_buf, u->is_dwarf64, u->version, + u->addrsize, dwarf_sections, altlink, &val)) + return 0; + + switch (abbrev->attrs[i].name) + { + case DW_AT_low_pc: case DW_AT_high_pc: case DW_AT_ranges: + update_pcrange (&abbrev->attrs[i], &val, &pcrange); + break; + + case DW_AT_stmt_list: + if ((abbrev->tag == DW_TAG_compile_unit + || abbrev->tag == DW_TAG_skeleton_unit) + && (val.encoding == ATTR_VAL_UINT + || val.encoding == ATTR_VAL_REF_SECTION)) + u->lineoff = val.u.uint; + break; + + case DW_AT_name: + if (abbrev->tag == DW_TAG_compile_unit + || abbrev->tag == DW_TAG_skeleton_unit) + { + name_val = val; + have_name_val = 1; + } + break; + + case DW_AT_comp_dir: + if (abbrev->tag == DW_TAG_compile_unit + || abbrev->tag == DW_TAG_skeleton_unit) + { + comp_dir_val = val; + have_comp_dir_val = 1; + } + break; + + case DW_AT_str_offsets_base: + if ((abbrev->tag == DW_TAG_compile_unit + || abbrev->tag == DW_TAG_skeleton_unit) + && val.encoding == ATTR_VAL_REF_SECTION) + u->str_offsets_base = val.u.uint; + break; + + case DW_AT_addr_base: + if ((abbrev->tag == DW_TAG_compile_unit + || abbrev->tag == DW_TAG_skeleton_unit) + && val.encoding == ATTR_VAL_REF_SECTION) + u->addr_base = val.u.uint; + break; + + case DW_AT_rnglists_base: + if ((abbrev->tag == DW_TAG_compile_unit + || abbrev->tag == DW_TAG_skeleton_unit) + && val.encoding == ATTR_VAL_REF_SECTION) + u->rnglists_base = val.u.uint; + break; + + default: + break; + } + } + + // Resolve strings after we're sure that we have seen + // DW_AT_str_offsets_base. + if (have_name_val) + { + if (!resolve_string (dwarf_sections, u->is_dwarf64, is_bigendian, + u->str_offsets_base, &name_val, + error_callback, data, &u->filename)) + return 0; + } + if (have_comp_dir_val) + { + if (!resolve_string (dwarf_sections, u->is_dwarf64, is_bigendian, + u->str_offsets_base, &comp_dir_val, + error_callback, data, &u->comp_dir)) + return 0; + } + + if (abbrev->tag == DW_TAG_compile_unit + || abbrev->tag == DW_TAG_subprogram + || abbrev->tag == DW_TAG_skeleton_unit) + { + if (!add_ranges (state, dwarf_sections, base_address, + is_bigendian, u, pcrange.lowpc, &pcrange, + add_unit_addr, (void *) u, error_callback, data, + (void *) addrs)) + return 0; + + /* If we found the PC range in the DW_TAG_compile_unit or + DW_TAG_skeleton_unit, we can stop now. */ + if ((abbrev->tag == DW_TAG_compile_unit + || abbrev->tag == DW_TAG_skeleton_unit) + && (pcrange.have_ranges + || (pcrange.have_lowpc && pcrange.have_highpc))) + return 1; + } + + if (abbrev->has_children) + { + if (!find_address_ranges (state, base_address, unit_buf, + dwarf_sections, is_bigendian, altlink, + error_callback, data, u, addrs, NULL)) + return 0; + } + } + + return 1; +} + +/* Build a mapping from address ranges to the compilation units where + the line number information for that range can be found. Returns 1 + on success, 0 on failure. */ + +static int +build_address_map (struct backtrace_state *state, + struct libbacktrace_base_address base_address, + const struct dwarf_sections *dwarf_sections, + int is_bigendian, struct dwarf_data *altlink, + backtrace_error_callback error_callback, void *data, + struct unit_addrs_vector *addrs, + struct unit_vector *unit_vec) +{ + struct dwarf_buf info; + struct backtrace_vector units; + size_t units_count; + size_t i; + struct unit **pu; + size_t unit_offset = 0; + struct unit_addrs *pa; + + memset (&addrs->vec, 0, sizeof addrs->vec); + memset (&unit_vec->vec, 0, sizeof unit_vec->vec); + addrs->count = 0; + unit_vec->count = 0; + + /* Read through the .debug_info section. FIXME: Should we use the + .debug_aranges section? gdb and addr2line don't use it, but I'm + not sure why. */ + + info.name = ".debug_info"; + info.start = dwarf_sections->data[DEBUG_INFO]; + info.buf = info.start; + info.left = dwarf_sections->size[DEBUG_INFO]; + info.is_bigendian = is_bigendian; + info.error_callback = error_callback; + info.data = data; + info.reported_underflow = 0; + + memset (&units, 0, sizeof units); + units_count = 0; + + while (info.left > 0) + { + const unsigned char *unit_data_start; + uint64_t len; + int is_dwarf64; + struct dwarf_buf unit_buf; + int version; + int unit_type; + uint64_t abbrev_offset; + int addrsize; + struct unit *u; + enum dwarf_tag unit_tag; + + if (info.reported_underflow) + goto fail; + + unit_data_start = info.buf; + + len = read_initial_length (&info, &is_dwarf64); + unit_buf = info; + unit_buf.left = len; + + if (!advance (&info, len)) + goto fail; + + version = read_uint16 (&unit_buf); + if (version < 2 || version > 5) + { + dwarf_buf_error (&unit_buf, "unrecognized DWARF version", -1); + goto fail; + } + + if (version < 5) + unit_type = 0; + else + { + unit_type = read_byte (&unit_buf); + if (unit_type == DW_UT_type || unit_type == DW_UT_split_type) + { + /* This unit doesn't have anything we need. */ + continue; + } + } + + pu = ((struct unit **) + backtrace_vector_grow (state, sizeof (struct unit *), + error_callback, data, &units)); + if (pu == NULL) + goto fail; + + u = ((struct unit *) + backtrace_alloc (state, sizeof *u, error_callback, data)); + if (u == NULL) + goto fail; + + *pu = u; + ++units_count; + + if (version < 5) + addrsize = 0; /* Set below. */ + else + addrsize = read_byte (&unit_buf); + + memset (&u->abbrevs, 0, sizeof u->abbrevs); + abbrev_offset = read_offset (&unit_buf, is_dwarf64); + if (!read_abbrevs (state, abbrev_offset, + dwarf_sections->data[DEBUG_ABBREV], + dwarf_sections->size[DEBUG_ABBREV], + is_bigendian, error_callback, data, &u->abbrevs)) + goto fail; + + if (version < 5) + addrsize = read_byte (&unit_buf); + + switch (unit_type) + { + case 0: + break; + case DW_UT_compile: case DW_UT_partial: + break; + case DW_UT_skeleton: case DW_UT_split_compile: + read_uint64 (&unit_buf); /* dwo_id */ + break; + default: + break; + } + + u->low_offset = unit_offset; + unit_offset += len + (is_dwarf64 ? 12 : 4); + u->high_offset = unit_offset; + u->unit_data = unit_buf.buf; + u->unit_data_len = unit_buf.left; + u->unit_data_offset = unit_buf.buf - unit_data_start; + u->version = version; + u->is_dwarf64 = is_dwarf64; + u->addrsize = addrsize; + u->filename = NULL; + u->comp_dir = NULL; + u->abs_filename = NULL; + u->lineoff = 0; + u->str_offsets_base = 0; + u->addr_base = 0; + u->rnglists_base = 0; + + /* The actual line number mappings will be read as needed. */ + u->lines = NULL; + u->lines_count = 0; + u->function_addrs = NULL; + u->function_addrs_count = 0; + + if (!find_address_ranges (state, base_address, &unit_buf, dwarf_sections, + is_bigendian, altlink, error_callback, data, + u, addrs, &unit_tag)) + goto fail; + + if (unit_buf.reported_underflow) + goto fail; + } + if (info.reported_underflow) + goto fail; + + /* Add a trailing addrs entry, but don't include it in addrs->count. */ + pa = ((struct unit_addrs *) + backtrace_vector_grow (state, sizeof (struct unit_addrs), + error_callback, data, &addrs->vec)); + if (pa == NULL) + goto fail; + pa->low = 0; + --pa->low; + pa->high = pa->low; + pa->u = NULL; + + unit_vec->vec = units; + unit_vec->count = units_count; + return 1; + + fail: + if (units_count > 0) + { + pu = (struct unit **) units.base; + for (i = 0; i < units_count; i++) + { + free_abbrevs (state, &pu[i]->abbrevs, error_callback, data); + backtrace_free (state, pu[i], sizeof **pu, error_callback, data); + } + backtrace_vector_free (state, &units, error_callback, data); + } + if (addrs->count > 0) + { + backtrace_vector_free (state, &addrs->vec, error_callback, data); + addrs->count = 0; + } + return 0; +} + +/* Add a new mapping to the vector of line mappings that we are + building. Returns 1 on success, 0 on failure. */ + +static int +add_line (struct backtrace_state *state, struct dwarf_data *ddata, + uintptr_t pc, const char *filename, int lineno, + backtrace_error_callback error_callback, void *data, + struct line_vector *vec) +{ + struct line *ln; + + /* If we are adding the same mapping, ignore it. This can happen + when using discriminators. */ + if (vec->count > 0) + { + ln = (struct line *) vec->vec.base + (vec->count - 1); + if (pc == ln->pc && filename == ln->filename && lineno == ln->lineno) + return 1; + } + + ln = ((struct line *) + backtrace_vector_grow (state, sizeof (struct line), error_callback, + data, &vec->vec)); + if (ln == NULL) + return 0; + + /* Add in the base address here, so that we can look up the PC + directly. */ + ln->pc = libbacktrace_add_base (pc, ddata->base_address); + + ln->filename = filename; + ln->lineno = lineno; + ln->idx = vec->count; + + ++vec->count; + + return 1; +} + +/* Free the line header information. */ + +static void +free_line_header (struct backtrace_state *state, struct line_header *hdr, + backtrace_error_callback error_callback, void *data) +{ + if (hdr->dirs_count != 0) + backtrace_free (state, hdr->dirs, hdr->dirs_count * sizeof (const char *), + error_callback, data); + backtrace_free (state, hdr->filenames, + hdr->filenames_count * sizeof (char *), + error_callback, data); +} + +/* Read the directories and file names for a line header for version + 2, setting fields in HDR. Return 1 on success, 0 on failure. */ + +static int +read_v2_paths (struct backtrace_state *state, struct unit *u, + struct dwarf_buf *hdr_buf, struct line_header *hdr) +{ + const unsigned char *p; + const unsigned char *pend; + size_t i; + + /* Count the number of directory entries. */ + hdr->dirs_count = 0; + p = hdr_buf->buf; + pend = p + hdr_buf->left; + while (p < pend && *p != '\0') + { + p += strnlen((const char *) p, pend - p) + 1; + ++hdr->dirs_count; + } + + /* The index of the first entry in the list of directories is 1. Index 0 is + used for the current directory of the compilation. To simplify index + handling, we set entry 0 to the compilation unit directory. */ + ++hdr->dirs_count; + hdr->dirs = ((const char **) + backtrace_alloc (state, + hdr->dirs_count * sizeof (const char *), + hdr_buf->error_callback, + hdr_buf->data)); + if (hdr->dirs == NULL) + return 0; + + hdr->dirs[0] = u->comp_dir; + i = 1; + while (*hdr_buf->buf != '\0') + { + if (hdr_buf->reported_underflow) + return 0; + + hdr->dirs[i] = read_string (hdr_buf); + if (hdr->dirs[i] == NULL) + return 0; + ++i; + } + if (!advance (hdr_buf, 1)) + return 0; + + /* Count the number of file entries. */ + hdr->filenames_count = 0; + p = hdr_buf->buf; + pend = p + hdr_buf->left; + while (p < pend && *p != '\0') + { + p += strnlen ((const char *) p, pend - p) + 1; + p += leb128_len (p); + p += leb128_len (p); + p += leb128_len (p); + ++hdr->filenames_count; + } + + /* The index of the first entry in the list of file names is 1. Index 0 is + used for the DW_AT_name of the compilation unit. To simplify index + handling, we set entry 0 to the compilation unit file name. */ + ++hdr->filenames_count; + hdr->filenames = ((const char **) + backtrace_alloc (state, + hdr->filenames_count * sizeof (char *), + hdr_buf->error_callback, + hdr_buf->data)); + if (hdr->filenames == NULL) + return 0; + hdr->filenames[0] = u->filename; + i = 1; + while (*hdr_buf->buf != '\0') + { + const char *filename; + uint64_t dir_index; + + if (hdr_buf->reported_underflow) + return 0; + + filename = read_string (hdr_buf); + if (filename == NULL) + return 0; + dir_index = read_uleb128 (hdr_buf); + if (IS_ABSOLUTE_PATH (filename) + || (dir_index < hdr->dirs_count && hdr->dirs[dir_index] == NULL)) + hdr->filenames[i] = filename; + else + { + const char *dir; + size_t dir_len; + size_t filename_len; + char *s; + + if (dir_index < hdr->dirs_count) + dir = hdr->dirs[dir_index]; + else + { + dwarf_buf_error (hdr_buf, + ("invalid directory index in " + "line number program header"), + 0); + return 0; + } + dir_len = strlen (dir); + filename_len = strlen (filename); + s = ((char *) backtrace_alloc (state, dir_len + filename_len + 2, + hdr_buf->error_callback, + hdr_buf->data)); + if (s == NULL) + return 0; + memcpy (s, dir, dir_len); + /* FIXME: If we are on a DOS-based file system, and the + directory or the file name use backslashes, then we + should use a backslash here. */ + s[dir_len] = '/'; + memcpy (s + dir_len + 1, filename, filename_len + 1); + hdr->filenames[i] = s; + } + + /* Ignore the modification time and size. */ + read_uleb128 (hdr_buf); + read_uleb128 (hdr_buf); + + ++i; + } + + return 1; +} + +/* Read a single version 5 LNCT entry for a directory or file name in a + line header. Sets *STRING to the resulting name, ignoring other + data. Return 1 on success, 0 on failure. */ + +static int +read_lnct (struct backtrace_state *state, struct dwarf_data *ddata, + struct unit *u, struct dwarf_buf *hdr_buf, + const struct line_header *hdr, size_t formats_count, + const struct line_header_format *formats, const char **string) +{ + size_t i; + const char *dir; + const char *path; + + dir = NULL; + path = NULL; + for (i = 0; i < formats_count; i++) + { + struct attr_val val; + + if (!read_attribute (formats[i].form, 0, hdr_buf, u->is_dwarf64, + u->version, hdr->addrsize, &ddata->dwarf_sections, + ddata->altlink, &val)) + return 0; + switch (formats[i].lnct) + { + case DW_LNCT_path: + if (!resolve_string (&ddata->dwarf_sections, u->is_dwarf64, + ddata->is_bigendian, u->str_offsets_base, + &val, hdr_buf->error_callback, hdr_buf->data, + &path)) + return 0; + break; + case DW_LNCT_directory_index: + if (val.encoding == ATTR_VAL_UINT) + { + if (val.u.uint >= hdr->dirs_count) + { + dwarf_buf_error (hdr_buf, + ("invalid directory index in " + "line number program header"), + 0); + return 0; + } + dir = hdr->dirs[val.u.uint]; + } + break; + default: + /* We don't care about timestamps or sizes or hashes. */ + break; + } + } + + if (path == NULL) + { + dwarf_buf_error (hdr_buf, + "missing file name in line number program header", + 0); + return 0; + } + + if (dir == NULL) + *string = path; + else + { + size_t dir_len; + size_t path_len; + char *s; + + dir_len = strlen (dir); + path_len = strlen (path); + s = (char *) backtrace_alloc (state, dir_len + path_len + 2, + hdr_buf->error_callback, hdr_buf->data); + if (s == NULL) + return 0; + memcpy (s, dir, dir_len); + /* FIXME: If we are on a DOS-based file system, and the + directory or the path name use backslashes, then we should + use a backslash here. */ + s[dir_len] = '/'; + memcpy (s + dir_len + 1, path, path_len + 1); + *string = s; + } + + return 1; +} + +/* Read a set of DWARF 5 line header format entries, setting *PCOUNT + and *PPATHS. Return 1 on success, 0 on failure. */ + +static int +read_line_header_format_entries (struct backtrace_state *state, + struct dwarf_data *ddata, + struct unit *u, + struct dwarf_buf *hdr_buf, + struct line_header *hdr, + size_t *pcount, + const char ***ppaths) +{ + size_t formats_count; + struct line_header_format *formats; + size_t paths_count; + const char **paths; + size_t i; + int ret; + + formats_count = read_byte (hdr_buf); + if (formats_count == 0) + formats = NULL; + else + { + formats = ((struct line_header_format *) + backtrace_alloc (state, + (formats_count + * sizeof (struct line_header_format)), + hdr_buf->error_callback, + hdr_buf->data)); + if (formats == NULL) + return 0; + + for (i = 0; i < formats_count; i++) + { + formats[i].lnct = (int) read_uleb128(hdr_buf); + formats[i].form = (enum dwarf_form) read_uleb128 (hdr_buf); + } + } + + paths_count = read_uleb128 (hdr_buf); + if (paths_count == 0) + { + *pcount = 0; + *ppaths = NULL; + ret = 1; + goto exit; + } + + paths = ((const char **) + backtrace_alloc (state, paths_count * sizeof (const char *), + hdr_buf->error_callback, hdr_buf->data)); + if (paths == NULL) + { + ret = 0; + goto exit; + } + for (i = 0; i < paths_count; i++) + { + if (!read_lnct (state, ddata, u, hdr_buf, hdr, formats_count, + formats, &paths[i])) + { + backtrace_free (state, paths, + paths_count * sizeof (const char *), + hdr_buf->error_callback, hdr_buf->data); + ret = 0; + goto exit; + } + } + + *pcount = paths_count; + *ppaths = paths; + + ret = 1; + + exit: + if (formats != NULL) + backtrace_free (state, formats, + formats_count * sizeof (struct line_header_format), + hdr_buf->error_callback, hdr_buf->data); + + return ret; +} + +/* Read the line header. Return 1 on success, 0 on failure. */ + +static int +read_line_header (struct backtrace_state *state, struct dwarf_data *ddata, + struct unit *u, int is_dwarf64, struct dwarf_buf *line_buf, + struct line_header *hdr) +{ + uint64_t hdrlen; + struct dwarf_buf hdr_buf; + + hdr->version = read_uint16 (line_buf); + if (hdr->version < 2 || hdr->version > 5) + { + dwarf_buf_error (line_buf, "unsupported line number version", -1); + return 0; + } + + if (hdr->version < 5) + hdr->addrsize = u->addrsize; + else + { + hdr->addrsize = read_byte (line_buf); + /* We could support a non-zero segment_selector_size but I doubt + we'll ever see it. */ + if (read_byte (line_buf) != 0) + { + dwarf_buf_error (line_buf, + "non-zero segment_selector_size not supported", + -1); + return 0; + } + } + + hdrlen = read_offset (line_buf, is_dwarf64); + + hdr_buf = *line_buf; + hdr_buf.left = hdrlen; + + if (!advance (line_buf, hdrlen)) + return 0; + + hdr->min_insn_len = read_byte (&hdr_buf); + if (hdr->version < 4) + hdr->max_ops_per_insn = 1; + else + hdr->max_ops_per_insn = read_byte (&hdr_buf); + + /* We don't care about default_is_stmt. */ + read_byte (&hdr_buf); + + hdr->line_base = read_sbyte (&hdr_buf); + hdr->line_range = read_byte (&hdr_buf); + + hdr->opcode_base = read_byte (&hdr_buf); + hdr->opcode_lengths = hdr_buf.buf; + if (!advance (&hdr_buf, hdr->opcode_base - 1)) + return 0; + + if (hdr->version < 5) + { + if (!read_v2_paths (state, u, &hdr_buf, hdr)) + return 0; + } + else + { + if (!read_line_header_format_entries (state, ddata, u, &hdr_buf, hdr, + &hdr->dirs_count, + &hdr->dirs)) + return 0; + if (!read_line_header_format_entries (state, ddata, u, &hdr_buf, hdr, + &hdr->filenames_count, + &hdr->filenames)) + return 0; + } + + if (hdr_buf.reported_underflow) + return 0; + + return 1; +} + +/* Read the line program, adding line mappings to VEC. Return 1 on + success, 0 on failure. */ + +static int +read_line_program (struct backtrace_state *state, struct dwarf_data *ddata, + const struct line_header *hdr, struct dwarf_buf *line_buf, + struct line_vector *vec) +{ + uint64_t address; + unsigned int op_index; + const char *reset_filename; + const char *filename; + int lineno; + + address = 0; + op_index = 0; + if (hdr->filenames_count > 1) + reset_filename = hdr->filenames[1]; + else + reset_filename = ""; + filename = reset_filename; + lineno = 1; + while (line_buf->left > 0) + { + unsigned int op; + + op = read_byte (line_buf); + if (op >= hdr->opcode_base) + { + unsigned int advance; + + /* Special opcode. */ + op -= hdr->opcode_base; + advance = op / hdr->line_range; + address += (hdr->min_insn_len * (op_index + advance) + / hdr->max_ops_per_insn); + op_index = (op_index + advance) % hdr->max_ops_per_insn; + lineno += hdr->line_base + (int) (op % hdr->line_range); + add_line (state, ddata, address, filename, lineno, + line_buf->error_callback, line_buf->data, vec); + } + else if (op == DW_LNS_extended_op) + { + uint64_t len; + + len = read_uleb128 (line_buf); + op = read_byte (line_buf); + switch (op) + { + case DW_LNE_end_sequence: + /* FIXME: Should we mark the high PC here? It seems + that we already have that information from the + compilation unit. */ + address = 0; + op_index = 0; + filename = reset_filename; + lineno = 1; + break; + case DW_LNE_set_address: + address = read_address (line_buf, hdr->addrsize); + break; + case DW_LNE_define_file: + { + const char *f; + unsigned int dir_index; + + f = read_string (line_buf); + if (f == NULL) + return 0; + dir_index = read_uleb128 (line_buf); + /* Ignore that time and length. */ + read_uleb128 (line_buf); + read_uleb128 (line_buf); + if (IS_ABSOLUTE_PATH (f)) + filename = f; + else + { + const char *dir; + size_t dir_len; + size_t f_len; + char *p; + + if (dir_index < hdr->dirs_count) + dir = hdr->dirs[dir_index]; + else + { + dwarf_buf_error (line_buf, + ("invalid directory index " + "in line number program"), + 0); + return 0; + } + dir_len = strlen (dir); + f_len = strlen (f); + p = ((char *) + backtrace_alloc (state, dir_len + f_len + 2, + line_buf->error_callback, + line_buf->data)); + if (p == NULL) + return 0; + memcpy (p, dir, dir_len); + /* FIXME: If we are on a DOS-based file system, + and the directory or the file name use + backslashes, then we should use a backslash + here. */ + p[dir_len] = '/'; + memcpy (p + dir_len + 1, f, f_len + 1); + filename = p; + } + } + break; + case DW_LNE_set_discriminator: + /* We don't care about discriminators. */ + read_uleb128 (line_buf); + break; + default: + if (!advance (line_buf, len - 1)) + return 0; + break; + } + } + else + { + switch (op) + { + case DW_LNS_copy: + add_line (state, ddata, address, filename, lineno, + line_buf->error_callback, line_buf->data, vec); + break; + case DW_LNS_advance_pc: + { + uint64_t advance; + + advance = read_uleb128 (line_buf); + address += (hdr->min_insn_len * (op_index + advance) + / hdr->max_ops_per_insn); + op_index = (op_index + advance) % hdr->max_ops_per_insn; + } + break; + case DW_LNS_advance_line: + lineno += (int) read_sleb128 (line_buf); + break; + case DW_LNS_set_file: + { + uint64_t fileno; + + fileno = read_uleb128 (line_buf); + if (fileno >= hdr->filenames_count) + { + dwarf_buf_error (line_buf, + ("invalid file number in " + "line number program"), + 0); + return 0; + } + filename = hdr->filenames[fileno]; + } + break; + case DW_LNS_set_column: + read_uleb128 (line_buf); + break; + case DW_LNS_negate_stmt: + break; + case DW_LNS_set_basic_block: + break; + case DW_LNS_const_add_pc: + { + unsigned int advance; + + op = 255 - hdr->opcode_base; + advance = op / hdr->line_range; + address += (hdr->min_insn_len * (op_index + advance) + / hdr->max_ops_per_insn); + op_index = (op_index + advance) % hdr->max_ops_per_insn; + } + break; + case DW_LNS_fixed_advance_pc: + address += read_uint16 (line_buf); + op_index = 0; + break; + case DW_LNS_set_prologue_end: + break; + case DW_LNS_set_epilogue_begin: + break; + case DW_LNS_set_isa: + read_uleb128 (line_buf); + break; + default: + { + unsigned int i; + + for (i = hdr->opcode_lengths[op - 1]; i > 0; --i) + read_uleb128 (line_buf); + } + break; + } + } + } + + return 1; +} + +/* Read the line number information for a compilation unit. Returns 1 + on success, 0 on failure. */ + +static int +read_line_info (struct backtrace_state *state, struct dwarf_data *ddata, + backtrace_error_callback error_callback, void *data, + struct unit *u, struct line_header *hdr, struct line **lines, + size_t *lines_count) +{ + struct line_vector vec; + struct dwarf_buf line_buf; + uint64_t len; + int is_dwarf64; + struct line *ln; + + memset (&vec.vec, 0, sizeof vec.vec); + vec.count = 0; + + memset (hdr, 0, sizeof *hdr); + + if (u->lineoff != (off_t) (size_t) u->lineoff + || (size_t) u->lineoff >= ddata->dwarf_sections.size[DEBUG_LINE]) + { + error_callback (data, "unit line offset out of range", 0); + goto fail; + } + + line_buf.name = ".debug_line"; + line_buf.start = ddata->dwarf_sections.data[DEBUG_LINE]; + line_buf.buf = ddata->dwarf_sections.data[DEBUG_LINE] + u->lineoff; + line_buf.left = ddata->dwarf_sections.size[DEBUG_LINE] - u->lineoff; + line_buf.is_bigendian = ddata->is_bigendian; + line_buf.error_callback = error_callback; + line_buf.data = data; + line_buf.reported_underflow = 0; + + len = read_initial_length (&line_buf, &is_dwarf64); + line_buf.left = len; + + if (!read_line_header (state, ddata, u, is_dwarf64, &line_buf, hdr)) + goto fail; + + if (!read_line_program (state, ddata, hdr, &line_buf, &vec)) + goto fail; + + if (line_buf.reported_underflow) + goto fail; + + if (vec.count == 0) + { + /* This is not a failure in the sense of a generating an error, + but it is a failure in that sense that we have no useful + information. */ + goto fail; + } + + /* Allocate one extra entry at the end. */ + ln = ((struct line *) + backtrace_vector_grow (state, sizeof (struct line), error_callback, + data, &vec.vec)); + if (ln == NULL) + goto fail; + ln->pc = (uintptr_t) -1; + ln->filename = NULL; + ln->lineno = 0; + ln->idx = 0; + + if (!backtrace_vector_release (state, &vec.vec, error_callback, data)) + goto fail; + + ln = (struct line *) vec.vec.base; + backtrace_qsort (ln, vec.count, sizeof (struct line), line_compare); + + *lines = ln; + *lines_count = vec.count; + + return 1; + + fail: + backtrace_vector_free (state, &vec.vec, error_callback, data); + free_line_header (state, hdr, error_callback, data); + *lines = (struct line *) (uintptr_t) -1; + *lines_count = 0; + return 0; +} + +static const char *read_referenced_name (struct dwarf_data *, struct unit *, + uint64_t, backtrace_error_callback, + void *); + +/* Read the name of a function from a DIE referenced by ATTR with VAL. */ + +static const char * +read_referenced_name_from_attr (struct dwarf_data *ddata, struct unit *u, + struct attr *attr, struct attr_val *val, + backtrace_error_callback error_callback, + void *data) +{ + switch (attr->name) + { + case DW_AT_abstract_origin: + case DW_AT_specification: + break; + default: + return NULL; + } + + if (attr->form == DW_FORM_ref_sig8) + return NULL; + + if (val->encoding == ATTR_VAL_REF_INFO) + { + struct unit *unit + = find_unit (ddata->units, ddata->units_count, + val->u.uint); + if (unit == NULL) + return NULL; + + uint64_t offset = val->u.uint - unit->low_offset; + return read_referenced_name (ddata, unit, offset, error_callback, data); + } + + if (val->encoding == ATTR_VAL_UINT + || val->encoding == ATTR_VAL_REF_UNIT) + return read_referenced_name (ddata, u, val->u.uint, error_callback, data); + + if (val->encoding == ATTR_VAL_REF_ALT_INFO) + { + struct unit *alt_unit + = find_unit (ddata->altlink->units, ddata->altlink->units_count, + val->u.uint); + if (alt_unit == NULL) + return NULL; + + uint64_t offset = val->u.uint - alt_unit->low_offset; + return read_referenced_name (ddata->altlink, alt_unit, offset, + error_callback, data); + } + + return NULL; +} + +/* Read the name of a function from a DIE referenced by a + DW_AT_abstract_origin or DW_AT_specification tag. OFFSET is within + the same compilation unit. */ + +static const char * +read_referenced_name (struct dwarf_data *ddata, struct unit *u, + uint64_t offset, backtrace_error_callback error_callback, + void *data) +{ + struct dwarf_buf unit_buf; + uint64_t code; + const struct abbrev *abbrev; + const char *ret; + size_t i; + + /* OFFSET is from the start of the data for this compilation unit. + U->unit_data is the data, but it starts U->unit_data_offset bytes + from the beginning. */ + + if (offset < u->unit_data_offset + || offset - u->unit_data_offset >= u->unit_data_len) + { + error_callback (data, + "abstract origin or specification out of range", + 0); + return NULL; + } + + offset -= u->unit_data_offset; + + unit_buf.name = ".debug_info"; + unit_buf.start = ddata->dwarf_sections.data[DEBUG_INFO]; + unit_buf.buf = u->unit_data + offset; + unit_buf.left = u->unit_data_len - offset; + unit_buf.is_bigendian = ddata->is_bigendian; + unit_buf.error_callback = error_callback; + unit_buf.data = data; + unit_buf.reported_underflow = 0; + + code = read_uleb128 (&unit_buf); + if (code == 0) + { + dwarf_buf_error (&unit_buf, + "invalid abstract origin or specification", + 0); + return NULL; + } + + abbrev = lookup_abbrev (&u->abbrevs, code, error_callback, data); + if (abbrev == NULL) + return NULL; + + ret = NULL; + for (i = 0; i < abbrev->num_attrs; ++i) + { + struct attr_val val; + + if (!read_attribute (abbrev->attrs[i].form, abbrev->attrs[i].val, + &unit_buf, u->is_dwarf64, u->version, u->addrsize, + &ddata->dwarf_sections, ddata->altlink, &val)) + return NULL; + + switch (abbrev->attrs[i].name) + { + case DW_AT_name: + /* Third name preference: don't override. A name we found in some + other way, will normally be more useful -- e.g., this name is + normally not mangled. */ + if (ret != NULL) + break; + if (!resolve_string (&ddata->dwarf_sections, u->is_dwarf64, + ddata->is_bigendian, u->str_offsets_base, + &val, error_callback, data, &ret)) + return NULL; + break; + + case DW_AT_linkage_name: + case DW_AT_MIPS_linkage_name: + /* First name preference: override all. */ + { + const char *s; + + s = NULL; + if (!resolve_string (&ddata->dwarf_sections, u->is_dwarf64, + ddata->is_bigendian, u->str_offsets_base, + &val, error_callback, data, &s)) + return NULL; + if (s != NULL) + return s; + } + break; + + case DW_AT_specification: + /* Second name preference: override DW_AT_name, don't override + DW_AT_linkage_name. */ + { + const char *name; + + name = read_referenced_name_from_attr (ddata, u, &abbrev->attrs[i], + &val, error_callback, data); + if (name != NULL) + ret = name; + } + break; + + default: + break; + } + } + + return ret; +} + +/* Add a range to a unit that maps to a function. This is called via + add_ranges. Returns 1 on success, 0 on error. */ + +static int +add_function_range (struct backtrace_state *state, void *rdata, + uintptr_t lowpc, uintptr_t highpc, + backtrace_error_callback error_callback, void *data, + void *pvec) +{ + struct function *function = (struct function *) rdata; + struct function_vector *vec = (struct function_vector *) pvec; + struct function_addrs *p; + + if (vec->count > 0) + { + p = (struct function_addrs *) vec->vec.base + (vec->count - 1); + if ((lowpc == p->high || lowpc == p->high + 1) + && function == p->function) + { + if (highpc > p->high) + p->high = highpc; + return 1; + } + } + + p = ((struct function_addrs *) + backtrace_vector_grow (state, sizeof (struct function_addrs), + error_callback, data, &vec->vec)); + if (p == NULL) + return 0; + + p->low = lowpc; + p->high = highpc; + p->function = function; + + ++vec->count; + + return 1; +} + +/* Read one entry plus all its children. Add function addresses to + VEC. Returns 1 on success, 0 on error. */ + +static int +read_function_entry (struct backtrace_state *state, struct dwarf_data *ddata, + struct unit *u, uintptr_t base, struct dwarf_buf *unit_buf, + const struct line_header *lhdr, + backtrace_error_callback error_callback, void *data, + struct function_vector *vec_function, + struct function_vector *vec_inlined) +{ + while (unit_buf->left > 0) + { + uint64_t code; + const struct abbrev *abbrev; + int is_function; + struct function *function; + struct function_vector *vec; + size_t i; + struct pcrange pcrange; + int have_linkage_name; + + code = read_uleb128 (unit_buf); + if (code == 0) + return 1; + + abbrev = lookup_abbrev (&u->abbrevs, code, error_callback, data); + if (abbrev == NULL) + return 0; + + is_function = (abbrev->tag == DW_TAG_subprogram + || abbrev->tag == DW_TAG_entry_point + || abbrev->tag == DW_TAG_inlined_subroutine); + + if (abbrev->tag == DW_TAG_inlined_subroutine) + vec = vec_inlined; + else + vec = vec_function; + + function = NULL; + if (is_function) + { + function = ((struct function *) + backtrace_alloc (state, sizeof *function, + error_callback, data)); + if (function == NULL) + return 0; + memset (function, 0, sizeof *function); + } + + memset (&pcrange, 0, sizeof pcrange); + have_linkage_name = 0; + for (i = 0; i < abbrev->num_attrs; ++i) + { + struct attr_val val; + + if (!read_attribute (abbrev->attrs[i].form, abbrev->attrs[i].val, + unit_buf, u->is_dwarf64, u->version, + u->addrsize, &ddata->dwarf_sections, + ddata->altlink, &val)) + return 0; + + /* The compile unit sets the base address for any address + ranges in the function entries. */ + if ((abbrev->tag == DW_TAG_compile_unit + || abbrev->tag == DW_TAG_skeleton_unit) + && abbrev->attrs[i].name == DW_AT_low_pc) + { + if (val.encoding == ATTR_VAL_ADDRESS) + base = (uintptr_t) val.u.uint; + else if (val.encoding == ATTR_VAL_ADDRESS_INDEX) + { + if (!resolve_addr_index (&ddata->dwarf_sections, + u->addr_base, u->addrsize, + ddata->is_bigendian, val.u.uint, + error_callback, data, &base)) + return 0; + } + } + + if (is_function) + { + switch (abbrev->attrs[i].name) + { + case DW_AT_call_file: + if (val.encoding == ATTR_VAL_UINT) + { + if (val.u.uint >= lhdr->filenames_count) + { + dwarf_buf_error (unit_buf, + ("invalid file number in " + "DW_AT_call_file attribute"), + 0); + return 0; + } + function->caller_filename = lhdr->filenames[val.u.uint]; + } + break; + + case DW_AT_call_line: + if (val.encoding == ATTR_VAL_UINT) + function->caller_lineno = val.u.uint; + break; + + case DW_AT_abstract_origin: + case DW_AT_specification: + /* Second name preference: override DW_AT_name, don't override + DW_AT_linkage_name. */ + if (have_linkage_name) + break; + { + const char *name; + + name + = read_referenced_name_from_attr (ddata, u, + &abbrev->attrs[i], &val, + error_callback, data); + if (name != NULL) + function->name = name; + } + break; + + case DW_AT_name: + /* Third name preference: don't override. */ + if (function->name != NULL) + break; + if (!resolve_string (&ddata->dwarf_sections, u->is_dwarf64, + ddata->is_bigendian, + u->str_offsets_base, &val, + error_callback, data, &function->name)) + return 0; + break; + + case DW_AT_linkage_name: + case DW_AT_MIPS_linkage_name: + /* First name preference: override all. */ + { + const char *s; + + s = NULL; + if (!resolve_string (&ddata->dwarf_sections, u->is_dwarf64, + ddata->is_bigendian, + u->str_offsets_base, &val, + error_callback, data, &s)) + return 0; + if (s != NULL) + { + function->name = s; + have_linkage_name = 1; + } + } + break; + + case DW_AT_low_pc: case DW_AT_high_pc: case DW_AT_ranges: + update_pcrange (&abbrev->attrs[i], &val, &pcrange); + break; + + default: + break; + } + } + } + + /* If we couldn't find a name for the function, we have no use + for it. */ + if (is_function && function->name == NULL) + { + backtrace_free (state, function, sizeof *function, + error_callback, data); + is_function = 0; + } + + if (is_function) + { + if (pcrange.have_ranges + || (pcrange.have_lowpc && pcrange.have_highpc)) + { + if (!add_ranges (state, &ddata->dwarf_sections, + ddata->base_address, ddata->is_bigendian, + u, base, &pcrange, add_function_range, + (void *) function, error_callback, data, + (void *) vec)) + return 0; + } + else + { + backtrace_free (state, function, sizeof *function, + error_callback, data); + is_function = 0; + } + } + + if (abbrev->has_children) + { + if (!is_function) + { + if (!read_function_entry (state, ddata, u, base, unit_buf, lhdr, + error_callback, data, vec_function, + vec_inlined)) + return 0; + } + else + { + struct function_vector fvec; + + /* Gather any information for inlined functions in + FVEC. */ + + memset (&fvec, 0, sizeof fvec); + + if (!read_function_entry (state, ddata, u, base, unit_buf, lhdr, + error_callback, data, vec_function, + &fvec)) + return 0; + + if (fvec.count > 0) + { + struct function_addrs *p; + struct function_addrs *faddrs; + + /* Allocate a trailing entry, but don't include it + in fvec.count. */ + p = ((struct function_addrs *) + backtrace_vector_grow (state, + sizeof (struct function_addrs), + error_callback, data, + &fvec.vec)); + if (p == NULL) + return 0; + p->low = 0; + --p->low; + p->high = p->low; + p->function = NULL; + + if (!backtrace_vector_release (state, &fvec.vec, + error_callback, data)) + return 0; + + faddrs = (struct function_addrs *) fvec.vec.base; + backtrace_qsort (faddrs, fvec.count, + sizeof (struct function_addrs), + function_addrs_compare); + + function->function_addrs = faddrs; + function->function_addrs_count = fvec.count; + } + } + } + } + + return 1; +} + +/* Read function name information for a compilation unit. We look + through the whole unit looking for function tags. */ + +static void +read_function_info (struct backtrace_state *state, struct dwarf_data *ddata, + const struct line_header *lhdr, + backtrace_error_callback error_callback, void *data, + struct unit *u, struct function_vector *fvec, + struct function_addrs **ret_addrs, + size_t *ret_addrs_count) +{ + struct function_vector lvec; + struct function_vector *pfvec; + struct dwarf_buf unit_buf; + struct function_addrs *p; + struct function_addrs *addrs; + size_t addrs_count; + + /* Use FVEC if it is not NULL. Otherwise use our own vector. */ + if (fvec != NULL) + pfvec = fvec; + else + { + memset (&lvec, 0, sizeof lvec); + pfvec = &lvec; + } + + unit_buf.name = ".debug_info"; + unit_buf.start = ddata->dwarf_sections.data[DEBUG_INFO]; + unit_buf.buf = u->unit_data; + unit_buf.left = u->unit_data_len; + unit_buf.is_bigendian = ddata->is_bigendian; + unit_buf.error_callback = error_callback; + unit_buf.data = data; + unit_buf.reported_underflow = 0; + + while (unit_buf.left > 0) + { + if (!read_function_entry (state, ddata, u, 0, &unit_buf, lhdr, + error_callback, data, pfvec, pfvec)) + return; + } + + if (pfvec->count == 0) + return; + + /* Allocate a trailing entry, but don't include it in + pfvec->count. */ + p = ((struct function_addrs *) + backtrace_vector_grow (state, sizeof (struct function_addrs), + error_callback, data, &pfvec->vec)); + if (p == NULL) + return; + p->low = 0; + --p->low; + p->high = p->low; + p->function = NULL; + + addrs_count = pfvec->count; + + if (fvec == NULL) + { + if (!backtrace_vector_release (state, &lvec.vec, error_callback, data)) + return; + addrs = (struct function_addrs *) pfvec->vec.base; + } + else + { + /* Finish this list of addresses, but leave the remaining space in + the vector available for the next function unit. */ + addrs = ((struct function_addrs *) + backtrace_vector_finish (state, &fvec->vec, + error_callback, data)); + if (addrs == NULL) + return; + fvec->count = 0; + } + + backtrace_qsort (addrs, addrs_count, sizeof (struct function_addrs), + function_addrs_compare); + + *ret_addrs = addrs; + *ret_addrs_count = addrs_count; +} + +/* See if PC is inlined in FUNCTION. If it is, print out the inlined + information, and update FILENAME and LINENO for the caller. + Returns whatever CALLBACK returns, or 0 to keep going. */ + +static int +report_inlined_functions (uintptr_t pc, struct function *function, const char* comp_dir, + backtrace_full_callback callback, void *data, + const char **filename, int *lineno) +{ + struct function_addrs *p; + struct function_addrs *match; + struct function *inlined; + int ret; + + if (function->function_addrs_count == 0) + return 0; + + /* Our search isn't safe if pc == -1, as that is the sentinel + value. */ + if (pc + 1 == 0) + return 0; + + p = ((struct function_addrs *) + bsearch (&pc, function->function_addrs, + function->function_addrs_count, + sizeof (struct function_addrs), + function_addrs_search)); + if (p == NULL) + return 0; + + /* Here pc >= p->low && pc < (p + 1)->low. The function_addrs are + sorted by low, so if pc > p->low we are at the end of a range of + function_addrs with the same low value. If pc == p->low walk + forward to the end of the range with that low value. Then walk + backward and use the first range that includes pc. */ + while (pc == (p + 1)->low) + ++p; + match = NULL; + while (1) + { + if (pc < p->high) + { + match = p; + break; + } + if (p == function->function_addrs) + break; + if ((p - 1)->low < p->low) + break; + --p; + } + if (match == NULL) + return 0; + + /* We found an inlined call. */ + + inlined = match->function; + + /* Report any calls inlined into this one. */ + ret = report_inlined_functions (pc, inlined, comp_dir, callback, data, + filename, lineno); + if (ret != 0) + return ret; + + /* Report this inlined call. */ + if (*filename[0] != '/' && comp_dir) + { + char buf[1024]; + snprintf (buf, 1024, "%s/%s", comp_dir, *filename); + ret = callback (data, pc, match->low, buf, *lineno, inlined->name); + } + else + { + ret = callback (data, pc, match->low, *filename, *lineno, inlined->name); + } + if (ret != 0) + return ret; + + /* Our caller will report the caller of the inlined function; tell + it the appropriate filename and line number. */ + *filename = inlined->caller_filename; + *lineno = inlined->caller_lineno; + + return 0; +} + +/* Look for a PC in the DWARF mapping for one module. On success, + call CALLBACK and return whatever it returns. On error, call + ERROR_CALLBACK and return 0. Sets *FOUND to 1 if the PC is found, + 0 if not. */ + +static int +dwarf_lookup_pc (struct backtrace_state *state, struct dwarf_data *ddata, + uintptr_t pc, backtrace_full_callback callback, + backtrace_error_callback error_callback, void *data, + int *found) +{ + struct unit_addrs *entry; + int found_entry; + struct unit *u; + int new_data; + struct line *lines; + struct line *ln; + struct function_addrs *p; + struct function_addrs *fmatch; + struct function *function; + const char *filename; + int lineno; + int ret; + + *found = 1; + + /* Find an address range that includes PC. Our search isn't safe if + PC == -1, as we use that as a sentinel value, so skip the search + in that case. */ + entry = (ddata->addrs_count == 0 || pc + 1 == 0 + ? NULL + : (struct unit_addrs*)bsearch (&pc, ddata->addrs, ddata->addrs_count, + sizeof (struct unit_addrs), unit_addrs_search)); + + if (entry == NULL) + { + *found = 0; + return 0; + } + + /* Here pc >= entry->low && pc < (entry + 1)->low. The unit_addrs + are sorted by low, so if pc > p->low we are at the end of a range + of unit_addrs with the same low value. If pc == p->low walk + forward to the end of the range with that low value. Then walk + backward and use the first range that includes pc. */ + while (pc == (entry + 1)->low) + ++entry; + found_entry = 0; + while (1) + { + if (pc < entry->high) + { + found_entry = 1; + break; + } + if (entry == ddata->addrs) + break; + if ((entry - 1)->low < entry->low) + break; + --entry; + } + if (!found_entry) + { + *found = 0; + return 0; + } + + /* We need the lines, lines_count, function_addrs, + function_addrs_count fields of u. If they are not set, we need + to set them. When running in threaded mode, we need to allow for + the possibility that some other thread is setting them + simultaneously. */ + + u = entry->u; + lines = u->lines; + + /* Skip units with no useful line number information by walking + backward. Useless line number information is marked by setting + lines == -1. */ + while (entry > ddata->addrs + && pc >= (entry - 1)->low + && pc < (entry - 1)->high) + { + if (state->threaded) + lines = (struct line *) backtrace_atomic_load_pointer (&u->lines); + + if (lines != (struct line *) (uintptr_t) -1) + break; + + --entry; + + u = entry->u; + lines = u->lines; + } + + if (state->threaded) + lines = backtrace_atomic_load_pointer (&u->lines); + + new_data = 0; + if (lines == NULL) + { + struct function_addrs *function_addrs; + size_t function_addrs_count; + struct line_header lhdr; + size_t count; + + /* We have never read the line information for this unit. Read + it now. */ + + function_addrs = NULL; + function_addrs_count = 0; + if (read_line_info (state, ddata, error_callback, data, entry->u, &lhdr, + &lines, &count)) + { + struct function_vector *pfvec; + + /* If not threaded, reuse DDATA->FVEC for better memory + consumption. */ + if (state->threaded) + pfvec = NULL; + else + pfvec = &ddata->fvec; + read_function_info (state, ddata, &lhdr, error_callback, data, + entry->u, pfvec, &function_addrs, + &function_addrs_count); + free_line_header (state, &lhdr, error_callback, data); + new_data = 1; + } + + /* Atomically store the information we just read into the unit. + If another thread is simultaneously writing, it presumably + read the same information, and we don't care which one we + wind up with; we just leak the other one. We do have to + write the lines field last, so that the acquire-loads above + ensure that the other fields are set. */ + + if (!state->threaded) + { + u->lines_count = count; + u->function_addrs = function_addrs; + u->function_addrs_count = function_addrs_count; + u->lines = lines; + } + else + { + backtrace_atomic_store_size_t (&u->lines_count, count); + backtrace_atomic_store_pointer (&u->function_addrs, function_addrs); + backtrace_atomic_store_size_t (&u->function_addrs_count, + function_addrs_count); + backtrace_atomic_store_pointer (&u->lines, lines); + } + } + + /* Now all fields of U have been initialized. */ + + if (lines == (struct line *) (uintptr_t) -1) + { + /* If reading the line number information failed in some way, + try again to see if there is a better compilation unit for + this PC. */ + if (new_data) + return dwarf_lookup_pc (state, ddata, pc, callback, error_callback, + data, found); + return callback (data, pc, 0, NULL, 0, NULL); + } + + /* Search for PC within this unit. */ + + ln = (struct line *) bsearch (&pc, lines, entry->u->lines_count, + sizeof (struct line), line_search); + if (ln == NULL) + { + /* The PC is between the low_pc and high_pc attributes of the + compilation unit, but no entry in the line table covers it. + This implies that the start of the compilation unit has no + line number information. */ + + if (entry->u->abs_filename == NULL) + { + const char *filename; + + filename = entry->u->filename; + if (filename != NULL + && !IS_ABSOLUTE_PATH (filename) + && entry->u->comp_dir != NULL) + { + size_t filename_len; + const char *dir; + size_t dir_len; + char *s; + + filename_len = strlen (filename); + dir = entry->u->comp_dir; + dir_len = strlen (dir); + s = (char *) backtrace_alloc (state, dir_len + filename_len + 2, + error_callback, data); + if (s == NULL) + { + *found = 0; + return 0; + } + memcpy (s, dir, dir_len); + /* FIXME: Should use backslash if DOS file system. */ + s[dir_len] = '/'; + memcpy (s + dir_len + 1, filename, filename_len + 1); + filename = s; + } + entry->u->abs_filename = filename; + } + + return callback (data, pc, 0, entry->u->abs_filename, 0, NULL); + } + + /* Search for function name within this unit. */ + + if (entry->u->function_addrs_count == 0) + return callback (data, pc, 0, ln->filename, ln->lineno, NULL); + + p = ((struct function_addrs *) + bsearch (&pc, entry->u->function_addrs, + entry->u->function_addrs_count, + sizeof (struct function_addrs), + function_addrs_search)); + if (p == NULL) + return callback (data, pc, 0, ln->filename, ln->lineno, NULL); + + /* Here pc >= p->low && pc < (p + 1)->low. The function_addrs are + sorted by low, so if pc > p->low we are at the end of a range of + function_addrs with the same low value. If pc == p->low walk + forward to the end of the range with that low value. Then walk + backward and use the first range that includes pc. */ + while (pc == (p + 1)->low) + ++p; + fmatch = NULL; + while (1) + { + if (pc < p->high) + { + fmatch = p; + break; + } + if (p == entry->u->function_addrs) + break; + if ((p - 1)->low < p->low) + break; + --p; + } + if (fmatch == NULL) + return callback (data, pc, 0, ln->filename, ln->lineno, NULL); + + function = fmatch->function; + + filename = ln->filename; + lineno = ln->lineno; + + ret = report_inlined_functions (pc, function, entry->u->comp_dir, callback, data, + &filename, &lineno); + if (ret != 0) + return ret; + + if (filename[0] != '/' && entry->u->comp_dir) + { + char buf[1024]; + snprintf (buf, 1024, "%s/%s", entry->u->comp_dir, filename); + return callback (data, pc, fmatch->low, buf, lineno, function->name); + } + else + { + return callback (data, pc, fmatch->low, filename, lineno, function->name); + } +} + +bool dwarf_fileline_dwarf_lookup_pc_in_all_entries(struct backtrace_state *state, uintptr_t pc, + backtrace_full_callback callback, backtrace_error_callback error_callback, void *data, + int& found, int ret) +{ + for (struct dwarf_data* ddata = (struct dwarf_data *)state->fileline_data; + ddata != NULL; + ddata = ddata->next) + { + ret = dwarf_lookup_pc(state, ddata, pc, callback, error_callback, data, &found); + if (ret != 0 || found) return true; + } + return false; +} + +/* Return the file/line information for a PC using the DWARF mapping + we built earlier. */ + +static int +dwarf_fileline (struct backtrace_state *state, uintptr_t pc, + backtrace_full_callback callback, + backtrace_error_callback error_callback, void *data) +{ + struct dwarf_data *ddata; + int found; + int ret = 0; + + if (!state->threaded) + { + if (dwarf_fileline_dwarf_lookup_pc_in_all_entries(state, pc, callback, error_callback, data, found, ret)) + { + return ret; + } + + // if we failed to obtain an entry in range, it can mean that the address map has been changed and new entries + // have been loaded in the meantime. Request a refresh and try again. + if (state->request_known_address_ranges_refresh_fn) + { + int new_range_count = state->request_known_address_ranges_refresh_fn(state, pc); + if (new_range_count > 0) + { + if (dwarf_fileline_dwarf_lookup_pc_in_all_entries(state, pc, callback, error_callback, data, found, ret)) + { + return ret; + } + } + } + + } + else + { + struct dwarf_data **pp; + + pp = (struct dwarf_data **) (void *) &state->fileline_data; + while (1) + { + ddata = backtrace_atomic_load_pointer (pp); + if (ddata == NULL) + break; + + ret = dwarf_lookup_pc (state, ddata, pc, callback, error_callback, + data, &found); + if (ret != 0 || found) + return ret; + + pp = &ddata->next; + } + } + + /* FIXME: See if any libraries have been dlopen'ed. */ + + return callback (data, pc, 0, NULL, 0, NULL); +} + +/* Initialize our data structures from the DWARF debug info for a + file. Return NULL on failure. */ + +static struct dwarf_data * +build_dwarf_data (struct backtrace_state *state, + struct libbacktrace_base_address base_address, + const struct dwarf_sections *dwarf_sections, + int is_bigendian, + struct dwarf_data *altlink, + backtrace_error_callback error_callback, + void *data) +{ + struct unit_addrs_vector addrs_vec; + struct unit_addrs *addrs; + size_t addrs_count; + struct unit_vector units_vec; + struct unit **units; + size_t units_count; + struct dwarf_data *fdata; + + if (!build_address_map (state, base_address, dwarf_sections, is_bigendian, + altlink, error_callback, data, &addrs_vec, + &units_vec)) + return NULL; + + if (!backtrace_vector_release (state, &addrs_vec.vec, error_callback, data)) + return NULL; + if (!backtrace_vector_release (state, &units_vec.vec, error_callback, data)) + return NULL; + addrs = (struct unit_addrs *) addrs_vec.vec.base; + units = (struct unit **) units_vec.vec.base; + addrs_count = addrs_vec.count; + units_count = units_vec.count; + backtrace_qsort (addrs, addrs_count, sizeof (struct unit_addrs), + unit_addrs_compare); + /* No qsort for units required, already sorted. */ + + fdata = ((struct dwarf_data *) + backtrace_alloc (state, sizeof (struct dwarf_data), + error_callback, data)); + if (fdata == NULL) + return NULL; + + fdata->next = NULL; + fdata->altlink = altlink; + fdata->base_address = base_address; + fdata->addrs = addrs; + fdata->addrs_count = addrs_count; + fdata->units = units; + fdata->units_count = units_count; + fdata->dwarf_sections = *dwarf_sections; + fdata->is_bigendian = is_bigendian; + memset (&fdata->fvec, 0, sizeof fdata->fvec); + + return fdata; +} + +/* Build our data structures from the DWARF sections for a module. + Set FILELINE_FN and STATE->FILELINE_DATA. Return 1 on success, 0 + on failure. */ + +int +backtrace_dwarf_add (struct backtrace_state *state, + struct libbacktrace_base_address base_address, + const struct dwarf_sections *dwarf_sections, + int is_bigendian, + struct dwarf_data *fileline_altlink, + backtrace_error_callback error_callback, + void *data, fileline *fileline_fn, + struct dwarf_data **fileline_entry) +{ + struct dwarf_data *fdata; + + fdata = build_dwarf_data (state, base_address, dwarf_sections, is_bigendian, + fileline_altlink, error_callback, data); + if (fdata == NULL) + return 0; + + if (fileline_entry != NULL) + *fileline_entry = fdata; + + if (!state->threaded) + { + struct dwarf_data **pp; + + for (pp = (struct dwarf_data **) (void *) &state->fileline_data; + *pp != NULL; + pp = &(*pp)->next) + ; + *pp = fdata; + } + else + { + while (1) + { + struct dwarf_data **pp; + + pp = (struct dwarf_data **) (void *) &state->fileline_data; + + while (1) + { + struct dwarf_data *p; + + p = backtrace_atomic_load_pointer (pp); + + if (p == NULL) + break; + + pp = &p->next; + } + + if (__sync_bool_compare_and_swap (pp, NULL, fdata)) + break; + } + } + + *fileline_fn = dwarf_fileline; + + return 1; +} + +} diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/elf.cpp b/project/thirdparty/tracy-0.11.1/libbacktrace/elf.cpp new file mode 100644 index 000000000..ffe8d7024 --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/libbacktrace/elf.cpp @@ -0,0 +1,7605 @@ +/* elf.c -- Get debug data from an ELF file for backtraces. + Copyright (C) 2012-2021 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_DL_ITERATE_PHDR +#include +#endif + +#include "backtrace.hpp" +#include "internal.hpp" + +#include "../client/TracyFastVector.hpp" +#include "../common/TracyAlloc.hpp" + +#ifndef S_ISLNK + #ifndef S_IFLNK + #define S_IFLNK 0120000 + #endif + #ifndef S_IFMT + #define S_IFMT 0170000 + #endif + #define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) +#endif + +#ifndef __GNUC__ +#define __builtin_prefetch(p, r, l) +#ifndef unlikely +#define unlikely(x) (x) +#endif +#else +#ifndef unlikely +#define unlikely(x) __builtin_expect(!!(x), 0) +#endif +#endif + +namespace tracy +{ + +#ifdef TRACY_DEBUGINFOD +int GetDebugInfoDescriptor( const char* buildid_data, size_t buildid_size ); +#endif + +#if !defined(HAVE_DECL_STRNLEN) || !HAVE_DECL_STRNLEN + +/* If strnlen is not declared, provide our own version. */ + +static size_t +xstrnlen (const char *s, size_t maxlen) +{ + size_t i; + + for (i = 0; i < maxlen; ++i) + if (s[i] == '\0') + break; + return i; +} + +#define strnlen xstrnlen + +#endif + +#ifndef HAVE_LSTAT + +/* Dummy version of lstat for systems that don't have it. */ + +static int +xlstat (const char *path ATTRIBUTE_UNUSED, struct stat *st ATTRIBUTE_UNUSED) +{ + return -1; +} + +#define lstat xlstat + +#endif + +#ifndef HAVE_READLINK + +/* Dummy version of readlink for systems that don't have it. */ + +static ssize_t +xreadlink (const char *path ATTRIBUTE_UNUSED, char *buf ATTRIBUTE_UNUSED, + size_t bufsz ATTRIBUTE_UNUSED) +{ + return -1; +} + +#define readlink xreadlink + +#endif + +#ifndef HAVE_DL_ITERATE_PHDR + +/* Dummy version of dl_iterate_phdr for systems that don't have it. */ + +#define dl_phdr_info x_dl_phdr_info +#define dl_iterate_phdr x_dl_iterate_phdr + +struct dl_phdr_info +{ + uintptr_t dlpi_addr; + const char *dlpi_name; +}; + +static int +dl_iterate_phdr (int (*callback) (struct dl_phdr_info *, + size_t, void *) ATTRIBUTE_UNUSED, + void *data ATTRIBUTE_UNUSED) +{ + return 0; +} + +#endif /* ! defined (HAVE_DL_ITERATE_PHDR) */ + +/* The configure script must tell us whether we are 32-bit or 64-bit + ELF. We could make this code test and support either possibility, + but there is no point. This code only works for the currently + running executable, which means that we know the ELF mode at + configure time. */ + +#if BACKTRACE_ELF_SIZE != 32 && BACKTRACE_ELF_SIZE != 64 +#error "Unknown BACKTRACE_ELF_SIZE" +#endif + +/* might #include which might define our constants + with slightly different values. Undefine them to be safe. */ + +#undef EI_NIDENT +#undef EI_MAG0 +#undef EI_MAG1 +#undef EI_MAG2 +#undef EI_MAG3 +#undef EI_CLASS +#undef EI_DATA +#undef EI_VERSION +#undef ELF_MAG0 +#undef ELF_MAG1 +#undef ELF_MAG2 +#undef ELF_MAG3 +#undef ELFCLASS32 +#undef ELFCLASS64 +#undef ELFDATA2LSB +#undef ELFDATA2MSB +#undef EV_CURRENT +#undef ET_DYN +#undef EM_PPC64 +#undef EF_PPC64_ABI +#undef SHN_LORESERVE +#undef SHN_XINDEX +#undef SHN_UNDEF +#undef SHT_PROGBITS +#undef SHT_SYMTAB +#undef SHT_STRTAB +#undef SHT_DYNSYM +#undef SHF_COMPRESSED +#undef STT_OBJECT +#undef STT_FUNC +#undef NT_GNU_BUILD_ID +#undef ELFCOMPRESS_ZLIB +#undef ELFCOMPRESS_ZSTD + +/* Basic types. */ + +typedef uint16_t b_elf_half; /* Elf_Half. */ +typedef uint32_t b_elf_word; /* Elf_Word. */ +typedef int32_t b_elf_sword; /* Elf_Sword. */ + +#if BACKTRACE_ELF_SIZE == 32 + +typedef uint32_t b_elf_addr; /* Elf_Addr. */ +typedef uint32_t b_elf_off; /* Elf_Off. */ + +typedef uint32_t b_elf_wxword; /* 32-bit Elf_Word, 64-bit ELF_Xword. */ + +#else + +typedef uint64_t b_elf_addr; /* Elf_Addr. */ +typedef uint64_t b_elf_off; /* Elf_Off. */ +typedef uint64_t b_elf_xword; /* Elf_Xword. */ +typedef int64_t b_elf_sxword; /* Elf_Sxword. */ + +typedef uint64_t b_elf_wxword; /* 32-bit Elf_Word, 64-bit ELF_Xword. */ + +#endif + +/* Data structures and associated constants. */ + +#define EI_NIDENT 16 + +typedef struct { + unsigned char e_ident[EI_NIDENT]; /* ELF "magic number" */ + b_elf_half e_type; /* Identifies object file type */ + b_elf_half e_machine; /* Specifies required architecture */ + b_elf_word e_version; /* Identifies object file version */ + b_elf_addr e_entry; /* Entry point virtual address */ + b_elf_off e_phoff; /* Program header table file offset */ + b_elf_off e_shoff; /* Section header table file offset */ + b_elf_word e_flags; /* Processor-specific flags */ + b_elf_half e_ehsize; /* ELF header size in bytes */ + b_elf_half e_phentsize; /* Program header table entry size */ + b_elf_half e_phnum; /* Program header table entry count */ + b_elf_half e_shentsize; /* Section header table entry size */ + b_elf_half e_shnum; /* Section header table entry count */ + b_elf_half e_shstrndx; /* Section header string table index */ +} b_elf_ehdr; /* Elf_Ehdr. */ + +#define EI_MAG0 0 +#define EI_MAG1 1 +#define EI_MAG2 2 +#define EI_MAG3 3 +#define EI_CLASS 4 +#define EI_DATA 5 +#define EI_VERSION 6 + +#define ELFMAG0 0x7f +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' + +#define ELFCLASS32 1 +#define ELFCLASS64 2 + +#define ELFDATA2LSB 1 +#define ELFDATA2MSB 2 + +#define EV_CURRENT 1 + +#define ET_DYN 3 + +#define EM_PPC64 21 +#define EF_PPC64_ABI 3 + +typedef struct { + b_elf_word sh_name; /* Section name, index in string tbl */ + b_elf_word sh_type; /* Type of section */ + b_elf_wxword sh_flags; /* Miscellaneous section attributes */ + b_elf_addr sh_addr; /* Section virtual addr at execution */ + b_elf_off sh_offset; /* Section file offset */ + b_elf_wxword sh_size; /* Size of section in bytes */ + b_elf_word sh_link; /* Index of another section */ + b_elf_word sh_info; /* Additional section information */ + b_elf_wxword sh_addralign; /* Section alignment */ + b_elf_wxword sh_entsize; /* Entry size if section holds table */ +} b_elf_shdr; /* Elf_Shdr. */ + +#define SHN_UNDEF 0x0000 /* Undefined section */ +#define SHN_LORESERVE 0xFF00 /* Begin range of reserved indices */ +#define SHN_XINDEX 0xFFFF /* Section index is held elsewhere */ + +#define SHT_PROGBITS 1 +#define SHT_SYMTAB 2 +#define SHT_STRTAB 3 +#define SHT_DYNSYM 11 + +#define SHF_COMPRESSED 0x800 + +#if BACKTRACE_ELF_SIZE == 32 + +typedef struct +{ + b_elf_word st_name; /* Symbol name, index in string tbl */ + b_elf_addr st_value; /* Symbol value */ + b_elf_word st_size; /* Symbol size */ + unsigned char st_info; /* Symbol binding and type */ + unsigned char st_other; /* Visibility and other data */ + b_elf_half st_shndx; /* Symbol section index */ +} b_elf_sym; /* Elf_Sym. */ + +#else /* BACKTRACE_ELF_SIZE != 32 */ + +typedef struct +{ + b_elf_word st_name; /* Symbol name, index in string tbl */ + unsigned char st_info; /* Symbol binding and type */ + unsigned char st_other; /* Visibility and other data */ + b_elf_half st_shndx; /* Symbol section index */ + b_elf_addr st_value; /* Symbol value */ + b_elf_xword st_size; /* Symbol size */ +} b_elf_sym; /* Elf_Sym. */ + +#endif /* BACKTRACE_ELF_SIZE != 32 */ + +#define STT_OBJECT 1 +#define STT_FUNC 2 + +typedef struct +{ + uint32_t namesz; + uint32_t descsz; + uint32_t type; + char name[1]; +} b_elf_note; + +#define NT_GNU_BUILD_ID 3 + +#if BACKTRACE_ELF_SIZE == 32 + +typedef struct +{ + b_elf_word ch_type; /* Compresstion algorithm */ + b_elf_word ch_size; /* Uncompressed size */ + b_elf_word ch_addralign; /* Alignment for uncompressed data */ +} b_elf_chdr; /* Elf_Chdr */ + +#else /* BACKTRACE_ELF_SIZE != 32 */ + +typedef struct +{ + b_elf_word ch_type; /* Compression algorithm */ + b_elf_word ch_reserved; /* Reserved */ + b_elf_xword ch_size; /* Uncompressed size */ + b_elf_xword ch_addralign; /* Alignment for uncompressed data */ +} b_elf_chdr; /* Elf_Chdr */ + +#endif /* BACKTRACE_ELF_SIZE != 32 */ + +#define ELFCOMPRESS_ZLIB 1 +#define ELFCOMPRESS_ZSTD 2 + +/* Names of sections, indexed by enum dwarf_section in internal.h. */ + +static const char * const dwarf_section_names[DEBUG_MAX] = +{ + ".debug_info", + ".debug_line", + ".debug_abbrev", + ".debug_ranges", + ".debug_str", + ".debug_addr", + ".debug_str_offsets", + ".debug_line_str", + ".debug_rnglists" +}; + +/* Information we gather for the sections we care about. */ + +struct debug_section_info +{ + /* Section file offset. */ + off_t offset; + /* Section size. */ + size_t size; + /* Section contents, after read from file. */ + const unsigned char *data; + /* Whether the SHF_COMPRESSED flag is set for the section. */ + int compressed; +}; + +/* Information we keep for an ELF symbol. */ + +struct elf_symbol +{ + /* The name of the symbol. */ + const char *name; + /* The address of the symbol. */ + uintptr_t address; + /* The size of the symbol. */ + size_t size; +}; + +/* Information to pass to elf_syminfo. */ + +struct elf_syminfo_data +{ + /* Symbols for the next module. */ + struct elf_syminfo_data *next; + /* The ELF symbols, sorted by address. */ + struct elf_symbol *symbols; + /* The number of symbols. */ + size_t count; +}; + +/* A view that works for either a file or memory. */ + +struct elf_view +{ + struct backtrace_view view; + int release; /* If non-zero, must call backtrace_release_view. */ +}; + +/* Information about PowerPC64 ELFv1 .opd section. */ + +struct elf_ppc64_opd_data +{ + /* Address of the .opd section. */ + b_elf_addr addr; + /* Section data. */ + const char *data; + /* Size of the .opd section. */ + size_t size; + /* Corresponding section view. */ + struct elf_view view; +}; + +/* Create a view of SIZE bytes from DESCRIPTOR/MEMORY at OFFSET. */ + +static int +elf_get_view (struct backtrace_state *state, int descriptor, + const unsigned char *memory, size_t memory_size, off_t offset, + uint64_t size, backtrace_error_callback error_callback, + void *data, struct elf_view *view) +{ + if (memory == NULL) + { + view->release = 1; + return backtrace_get_view (state, descriptor, offset, size, + error_callback, data, &view->view); + } + else + { + if ((uint64_t) offset + size > (uint64_t) memory_size) + { + error_callback (data, "out of range for in-memory file", 0); + return 0; + } + view->view.data = (const void *) (memory + offset); + view->view.base = NULL; + view->view.len = size; + view->release = 0; + return 1; + } +} + +/* Release a view read by elf_get_view. */ + +static void +elf_release_view (struct backtrace_state *state, struct elf_view *view, + backtrace_error_callback error_callback, void *data) +{ + if (view->release) + backtrace_release_view (state, &view->view, error_callback, data); +} + +/* Compute the CRC-32 of BUF/LEN. This uses the CRC used for + .gnu_debuglink files. */ + +static uint32_t +elf_crc32 (uint32_t crc, const unsigned char *buf, size_t len) +{ + static const uint32_t crc32_table[256] = + { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d + }; + const unsigned char *end; + + crc = ~crc; + for (end = buf + len; buf < end; ++ buf) + crc = crc32_table[(crc ^ *buf) & 0xff] ^ (crc >> 8); + return ~crc; +} + +/* Return the CRC-32 of the entire file open at DESCRIPTOR. */ + +static uint32_t +elf_crc32_file (struct backtrace_state *state, int descriptor, + backtrace_error_callback error_callback, void *data) +{ + struct stat st; + struct backtrace_view file_view; + uint32_t ret; + + if (fstat (descriptor, &st) < 0) + { + error_callback (data, "fstat", errno); + return 0; + } + + if (!backtrace_get_view (state, descriptor, 0, st.st_size, error_callback, + data, &file_view)) + return 0; + + ret = elf_crc32 (0, (const unsigned char *) file_view.data, st.st_size); + + backtrace_release_view (state, &file_view, error_callback, data); + + return ret; +} + +/* A dummy callback function used when we can't find a symbol + table. */ + +static void +elf_nosyms (struct backtrace_state *state ATTRIBUTE_UNUSED, + uintptr_t addr ATTRIBUTE_UNUSED, + backtrace_syminfo_callback callback ATTRIBUTE_UNUSED, + backtrace_error_callback error_callback, void *data) +{ + error_callback (data, "no symbol table in ELF executable", -1); +} + +/* A callback function used when we can't find any debug info. */ + +static int +elf_nodebug (struct backtrace_state *state, uintptr_t pc, + backtrace_full_callback callback, + backtrace_error_callback error_callback, void *data) +{ + if (state->syminfo_fn != NULL && state->syminfo_fn != elf_nosyms) + { + struct backtrace_call_full bdata; + + /* Fetch symbol information so that we can least get the + function name. */ + + bdata.full_callback = callback; + bdata.full_error_callback = error_callback; + bdata.full_data = data; + bdata.ret = 0; + state->syminfo_fn (state, pc, backtrace_syminfo_to_full_callback, + backtrace_syminfo_to_full_error_callback, &bdata); + return bdata.ret; + } + + error_callback (data, "no debug info in ELF executable", -1); + return 0; +} + +/* Compare struct elf_symbol for qsort. */ + +static int +elf_symbol_compare (const void *v1, const void *v2) +{ + const struct elf_symbol *e1 = (const struct elf_symbol *) v1; + const struct elf_symbol *e2 = (const struct elf_symbol *) v2; + + if (e1->address < e2->address) + return -1; + else if (e1->address > e2->address) + return 1; + else + return 0; +} + +/* Compare an ADDR against an elf_symbol for bsearch. We allocate one + extra entry in the array so that this can look safely at the next + entry. */ + +static int +elf_symbol_search (const void *vkey, const void *ventry) +{ + const uintptr_t *key = (const uintptr_t *) vkey; + const struct elf_symbol *entry = (const struct elf_symbol *) ventry; + uintptr_t addr; + + addr = *key; + if (addr < entry->address) + return -1; + else if (addr >= entry->address + entry->size) + return 1; + else + return 0; +} + +/* Initialize the symbol table info for elf_syminfo. */ + +static int +elf_initialize_syminfo (struct backtrace_state *state, + struct libbacktrace_base_address base_address, + const unsigned char *symtab_data, size_t symtab_size, + const unsigned char *strtab, size_t strtab_size, + backtrace_error_callback error_callback, + void *data, struct elf_syminfo_data *sdata, + struct elf_ppc64_opd_data *opd) +{ + size_t sym_count; + const b_elf_sym *sym; + size_t elf_symbol_count; + size_t elf_symbol_size; + struct elf_symbol *elf_symbols; + size_t i; + unsigned int j; + + sym_count = symtab_size / sizeof (b_elf_sym); + + /* We only care about function symbols. Count them. */ + sym = (const b_elf_sym *) symtab_data; + elf_symbol_count = 0; + for (i = 0; i < sym_count; ++i, ++sym) + { + int info; + + info = sym->st_info & 0xf; + if ((info == STT_FUNC || info == STT_OBJECT) + && sym->st_shndx != SHN_UNDEF) + ++elf_symbol_count; + } + + elf_symbol_size = elf_symbol_count * sizeof (struct elf_symbol); + elf_symbols = ((struct elf_symbol *) + backtrace_alloc (state, elf_symbol_size, error_callback, + data)); + if (elf_symbols == NULL) + return 0; + + sym = (const b_elf_sym *) symtab_data; + j = 0; + for (i = 0; i < sym_count; ++i, ++sym) + { + int info; + + info = sym->st_info & 0xf; + if (info != STT_FUNC && info != STT_OBJECT) + continue; + if (sym->st_shndx == SHN_UNDEF) + continue; + if (sym->st_name >= strtab_size) + { + error_callback (data, "symbol string index out of range", 0); + backtrace_free (state, elf_symbols, elf_symbol_size, error_callback, + data); + return 0; + } + elf_symbols[j].name = (const char *) strtab + sym->st_name; + /* Special case PowerPC64 ELFv1 symbols in .opd section, if the symbol + is a function descriptor, read the actual code address from the + descriptor. */ + if (opd + && sym->st_value >= opd->addr + && sym->st_value < opd->addr + opd->size) + elf_symbols[j].address + = *(const b_elf_addr *) (opd->data + (sym->st_value - opd->addr)); + else + elf_symbols[j].address = sym->st_value; + elf_symbols[j].address = + libbacktrace_add_base (elf_symbols[j].address, base_address); + elf_symbols[j].size = sym->st_size; + ++j; + } + + backtrace_qsort (elf_symbols, elf_symbol_count, sizeof (struct elf_symbol), + elf_symbol_compare); + + sdata->next = NULL; + sdata->symbols = elf_symbols; + sdata->count = elf_symbol_count; + + return 1; +} + +/* Add EDATA to the list in STATE. */ + +static void +elf_add_syminfo_data (struct backtrace_state *state, + struct elf_syminfo_data *edata) +{ + if (!state->threaded) + { + struct elf_syminfo_data **pp; + + for (pp = (struct elf_syminfo_data **) (void *) &state->syminfo_data; + *pp != NULL; + pp = &(*pp)->next) + ; + *pp = edata; + } + else + { + while (1) + { + struct elf_syminfo_data **pp; + + pp = (struct elf_syminfo_data **) (void *) &state->syminfo_data; + + while (1) + { + struct elf_syminfo_data *p; + + p = backtrace_atomic_load_pointer (pp); + + if (p == NULL) + break; + + pp = &p->next; + } + + if (__sync_bool_compare_and_swap (pp, NULL, edata)) + break; + } + } +} + +/* Return the symbol name and value for an ADDR. */ + +static void +elf_syminfo (struct backtrace_state *state, uintptr_t addr, + backtrace_syminfo_callback callback, + backtrace_error_callback error_callback ATTRIBUTE_UNUSED, + void *data) +{ + struct elf_syminfo_data *edata; + struct elf_symbol *sym = NULL; + + if (!state->threaded) + { + for (edata = (struct elf_syminfo_data *) state->syminfo_data; + edata != NULL; + edata = edata->next) + { + sym = ((struct elf_symbol *) + bsearch (&addr, edata->symbols, edata->count, + sizeof (struct elf_symbol), elf_symbol_search)); + if (sym != NULL) + break; + } + } + else + { + struct elf_syminfo_data **pp; + + pp = (struct elf_syminfo_data **) (void *) &state->syminfo_data; + while (1) + { + edata = backtrace_atomic_load_pointer (pp); + if (edata == NULL) + break; + + sym = ((struct elf_symbol *) + bsearch (&addr, edata->symbols, edata->count, + sizeof (struct elf_symbol), elf_symbol_search)); + if (sym != NULL) + break; + + pp = &edata->next; + } + } + + if (sym == NULL) + callback (data, addr, NULL, 0, 0); + else + callback (data, addr, sym->name, sym->address, sym->size); +} + +/* Return whether FILENAME is a symlink. */ + +static int +elf_is_symlink (const char *filename) +{ + struct stat st; + + if (lstat (filename, &st) < 0) + return 0; + return S_ISLNK (st.st_mode); +} + +/* Return the results of reading the symlink FILENAME in a buffer + allocated by backtrace_alloc. Return the length of the buffer in + *LEN. */ + +static char * +elf_readlink (struct backtrace_state *state, const char *filename, + backtrace_error_callback error_callback, void *data, + size_t *plen) +{ + size_t len; + char *buf; + + len = 128; + while (1) + { + ssize_t rl; + + buf = (char*)backtrace_alloc (state, len, error_callback, data); + if (buf == NULL) + return NULL; + rl = readlink (filename, buf, len); + if (rl < 0) + { + backtrace_free (state, buf, len, error_callback, data); + return NULL; + } + if ((size_t) rl < len - 1) + { + buf[rl] = '\0'; + *plen = len; + return buf; + } + backtrace_free (state, buf, len, error_callback, data); + len *= 2; + } +} + +#define SYSTEM_BUILD_ID_DIR "/usr/lib/debug/.build-id/" + +/* Open a separate debug info file, using the build ID to find it. + Returns an open file descriptor, or -1. + + The GDB manual says that the only place gdb looks for a debug file + when the build ID is known is in /usr/lib/debug/.build-id. */ + +static int +elf_open_debugfile_by_buildid (struct backtrace_state *state, + const char *buildid_data, size_t buildid_size, + const char *filename, + backtrace_error_callback error_callback, + void *data) +{ + const char * const prefix = SYSTEM_BUILD_ID_DIR; + const size_t prefix_len = strlen (prefix); + const char * const suffix = ".debug"; + const size_t suffix_len = strlen (suffix); + size_t len; + char *bd_filename; + char *t; + size_t i; + int ret; + int does_not_exist; + + len = prefix_len + buildid_size * 2 + suffix_len + 2; + bd_filename = (char*)backtrace_alloc (state, len, error_callback, data); + if (bd_filename == NULL) + return -1; + + t = bd_filename; + memcpy (t, prefix, prefix_len); + t += prefix_len; + for (i = 0; i < buildid_size; i++) + { + unsigned char b; + unsigned char nib; + + b = (unsigned char) buildid_data[i]; + nib = (b & 0xf0) >> 4; + *t++ = nib < 10 ? '0' + nib : 'a' + nib - 10; + nib = b & 0x0f; + *t++ = nib < 10 ? '0' + nib : 'a' + nib - 10; + if (i == 0) + *t++ = '/'; + } + memcpy (t, suffix, suffix_len); + t[suffix_len] = '\0'; + + ret = backtrace_open (bd_filename, error_callback, data, &does_not_exist); + + backtrace_free (state, bd_filename, len, error_callback, data); + + /* gdb checks that the debuginfo file has the same build ID note. + That seems kind of pointless to me--why would it have the right + name but not the right build ID?--so skipping the check. */ + +#ifdef TRACY_DEBUGINFOD + if (ret == -1) + return GetDebugInfoDescriptor( buildid_data, buildid_size, filename ); + else + return ret; +#else + return ret; +#endif +} + +/* Try to open a file whose name is PREFIX (length PREFIX_LEN) + concatenated with PREFIX2 (length PREFIX2_LEN) concatenated with + DEBUGLINK_NAME. Returns an open file descriptor, or -1. */ + +static int +elf_try_debugfile (struct backtrace_state *state, const char *prefix, + size_t prefix_len, const char *prefix2, size_t prefix2_len, + const char *debuglink_name, + backtrace_error_callback error_callback, void *data) +{ + size_t debuglink_len; + size_t try_len; + char *Try; + int does_not_exist; + int ret; + + debuglink_len = strlen (debuglink_name); + try_len = prefix_len + prefix2_len + debuglink_len + 1; + Try = (char*)backtrace_alloc (state, try_len, error_callback, data); + if (Try == NULL) + return -1; + + memcpy (Try, prefix, prefix_len); + memcpy (Try + prefix_len, prefix2, prefix2_len); + memcpy (Try + prefix_len + prefix2_len, debuglink_name, debuglink_len); + Try[prefix_len + prefix2_len + debuglink_len] = '\0'; + + ret = backtrace_open (Try, error_callback, data, &does_not_exist); + + backtrace_free (state, Try, try_len, error_callback, data); + + return ret; +} + +/* Find a separate debug info file, using the debuglink section data + to find it. Returns an open file descriptor, or -1. */ + +static int +elf_find_debugfile_by_debuglink (struct backtrace_state *state, + const char *filename, + const char *debuglink_name, + backtrace_error_callback error_callback, + void *data) +{ + int ret; + char *alc; + size_t alc_len; + const char *slash; + int ddescriptor; + const char *prefix; + size_t prefix_len; + + /* Resolve symlinks in FILENAME. Since FILENAME is fairly likely to + be /proc/self/exe, symlinks are common. We don't try to resolve + the whole path name, just the base name. */ + ret = -1; + alc = NULL; + alc_len = 0; + while (elf_is_symlink (filename)) + { + char *new_buf; + size_t new_len; + + new_buf = elf_readlink (state, filename, error_callback, data, &new_len); + if (new_buf == NULL) + break; + + if (new_buf[0] == '/') + filename = new_buf; + else + { + slash = strrchr (filename, '/'); + if (slash == NULL) + filename = new_buf; + else + { + size_t clen; + char *c; + + slash++; + clen = slash - filename + strlen (new_buf) + 1; + c = (char*)backtrace_alloc (state, clen, error_callback, data); + if (c == NULL) + goto done; + + memcpy (c, filename, slash - filename); + memcpy (c + (slash - filename), new_buf, strlen (new_buf)); + c[slash - filename + strlen (new_buf)] = '\0'; + backtrace_free (state, new_buf, new_len, error_callback, data); + filename = c; + new_buf = c; + new_len = clen; + } + } + + if (alc != NULL) + backtrace_free (state, alc, alc_len, error_callback, data); + alc = new_buf; + alc_len = new_len; + } + + /* Look for DEBUGLINK_NAME in the same directory as FILENAME. */ + + slash = strrchr (filename, '/'); + if (slash == NULL) + { + prefix = ""; + prefix_len = 0; + } + else + { + slash++; + prefix = filename; + prefix_len = slash - filename; + } + + ddescriptor = elf_try_debugfile (state, prefix, prefix_len, "", 0, + debuglink_name, error_callback, data); + if (ddescriptor >= 0) + { + ret = ddescriptor; + goto done; + } + + /* Look for DEBUGLINK_NAME in a .debug subdirectory of FILENAME. */ + + ddescriptor = elf_try_debugfile (state, prefix, prefix_len, ".debug/", + strlen (".debug/"), debuglink_name, + error_callback, data); + if (ddescriptor >= 0) + { + ret = ddescriptor; + goto done; + } + + /* Look for DEBUGLINK_NAME in /usr/lib/debug. */ + + ddescriptor = elf_try_debugfile (state, "/usr/lib/debug/", + strlen ("/usr/lib/debug/"), prefix, + prefix_len, debuglink_name, + error_callback, data); + if (ddescriptor >= 0) + ret = ddescriptor; + + done: + if (alc != NULL && alc_len > 0) + backtrace_free (state, alc, alc_len, error_callback, data); + return ret; +} + +/* Open a separate debug info file, using the debuglink section data + to find it. Returns an open file descriptor, or -1. */ + +static int +elf_open_debugfile_by_debuglink (struct backtrace_state *state, + const char *filename, + const char *debuglink_name, + uint32_t debuglink_crc, + backtrace_error_callback error_callback, + void *data) +{ + int ddescriptor; + + ddescriptor = elf_find_debugfile_by_debuglink (state, filename, + debuglink_name, + error_callback, data); + if (ddescriptor < 0) + return -1; + + if (debuglink_crc != 0) + { + uint32_t got_crc; + + got_crc = elf_crc32_file (state, ddescriptor, error_callback, data); + if (got_crc != debuglink_crc) + { + backtrace_close (ddescriptor, error_callback, data); + return -1; + } + } + + return ddescriptor; +} + +/* A function useful for setting a breakpoint for an inflation failure + when this code is compiled with -g. */ + +static void +elf_uncompress_failed(void) +{ +} + +/* *PVAL is the current value being read from the stream, and *PBITS + is the number of valid bits. Ensure that *PVAL holds at least 15 + bits by reading additional bits from *PPIN, up to PINEND, as + needed. Updates *PPIN, *PVAL and *PBITS. Returns 1 on success, 0 + on error. */ + +static int +elf_fetch_bits (const unsigned char **ppin, const unsigned char *pinend, + uint64_t *pval, unsigned int *pbits) +{ + unsigned int bits; + const unsigned char *pin; + uint64_t val; + uint32_t next; + + bits = *pbits; + if (bits >= 15) + return 1; + pin = *ppin; + val = *pval; + + if (unlikely (pinend - pin < 4)) + { + elf_uncompress_failed (); + return 0; + } + +#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) \ + && defined(__ORDER_BIG_ENDIAN__) \ + && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ \ + || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + /* We've ensured that PIN is aligned. */ + next = *(const uint32_t *)pin; + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + next = __builtin_bswap32 (next); +#endif +#else + next = pin[0] | (pin[1] << 8) | (pin[2] << 16) | (pin[3] << 24); +#endif + + val |= (uint64_t)next << bits; + bits += 32; + pin += 4; + + /* We will need the next four bytes soon. */ + __builtin_prefetch (pin, 0, 0); + + *ppin = pin; + *pval = val; + *pbits = bits; + return 1; +} + +/* This is like elf_fetch_bits, but it fetchs the bits backward, and ensures at + least 16 bits. This is for zstd. */ + +static int +elf_fetch_bits_backward (const unsigned char **ppin, + const unsigned char *pinend, + uint64_t *pval, unsigned int *pbits) +{ + unsigned int bits; + const unsigned char *pin; + uint64_t val; + uint32_t next; + + bits = *pbits; + if (bits >= 16) + return 1; + pin = *ppin; + val = *pval; + + if (unlikely (pin <= pinend)) + return 1; + + pin -= 4; + +#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) \ + && defined(__ORDER_BIG_ENDIAN__) \ + && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ \ + || __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + /* We've ensured that PIN is aligned. */ + next = *(const uint32_t *)pin; + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + next = __builtin_bswap32 (next); +#endif +#else + next = pin[0] | (pin[1] << 8) | (pin[2] << 16) | (pin[3] << 24); +#endif + + val <<= 32; + val |= next; + bits += 32; + + if (unlikely (pin < pinend)) + { + val >>= (pinend - pin) * 8; + bits -= (pinend - pin) * 8; + } + + *ppin = pin; + *pval = val; + *pbits = bits; + return 1; +} + +/* Initialize backward fetching when the bitstream starts with a 1 bit in the + last byte in memory (which is the first one that we read). This is used by + zstd decompression. Returns 1 on success, 0 on error. */ + +static int +elf_fetch_backward_init (const unsigned char **ppin, + const unsigned char *pinend, + uint64_t *pval, unsigned int *pbits) +{ + const unsigned char *pin; + unsigned int stream_start; + uint64_t val; + unsigned int bits; + + pin = *ppin; + stream_start = (unsigned int)*pin; + if (unlikely (stream_start == 0)) + { + elf_uncompress_failed (); + return 0; + } + val = 0; + bits = 0; + + /* Align to a 32-bit boundary. */ + while ((((uintptr_t)pin) & 3) != 0) + { + val <<= 8; + val |= (uint64_t)*pin; + bits += 8; + --pin; + } + + val <<= 8; + val |= (uint64_t)*pin; + bits += 8; + + *ppin = pin; + *pval = val; + *pbits = bits; + if (!elf_fetch_bits_backward (ppin, pinend, pval, pbits)) + return 0; + + *pbits -= __builtin_clz (stream_start) - (sizeof (unsigned int) - 1) * 8 + 1; + + if (!elf_fetch_bits_backward (ppin, pinend, pval, pbits)) + return 0; + + return 1; +} + +/* Huffman code tables, like the rest of the zlib format, are defined + by RFC 1951. We store a Huffman code table as a series of tables + stored sequentially in memory. Each entry in a table is 16 bits. + The first, main, table has 256 entries. It is followed by a set of + secondary tables of length 2 to 128 entries. The maximum length of + a code sequence in the deflate format is 15 bits, so that is all we + need. Each secondary table has an index, which is the offset of + the table in the overall memory storage. + + The deflate format says that all codes of a given bit length are + lexicographically consecutive. Perhaps we could have 130 values + that require a 15-bit code, perhaps requiring three secondary + tables of size 128. I don't know if this is actually possible, but + it suggests that the maximum size required for secondary tables is + 3 * 128 + 3 * 64 ... == 768. The zlib enough program reports 660 + as the maximum. We permit 768, since in addition to the 256 for + the primary table, with two bytes per entry, and with the two + tables we need, that gives us a page. + + A single table entry needs to store a value or (for the main table + only) the index and size of a secondary table. Values range from 0 + to 285, inclusive. Secondary table indexes, per above, range from + 0 to 510. For a value we need to store the number of bits we need + to determine that value (one value may appear multiple times in the + table), which is 1 to 8. For a secondary table we need to store + the number of bits used to index into the table, which is 1 to 7. + And of course we need 1 bit to decide whether we have a value or a + secondary table index. So each entry needs 9 bits for value/table + index, 3 bits for size, 1 bit what it is. For simplicity we use 16 + bits per entry. */ + +/* Number of entries we allocate to for one code table. We get a page + for the two code tables we need. */ + +#define ZLIB_HUFFMAN_TABLE_SIZE (1024) + +/* Bit masks and shifts for the values in the table. */ + +#define ZLIB_HUFFMAN_VALUE_MASK 0x01ff +#define ZLIB_HUFFMAN_BITS_SHIFT 9 +#define ZLIB_HUFFMAN_BITS_MASK 0x7 +#define ZLIB_HUFFMAN_SECONDARY_SHIFT 12 + +/* For working memory while inflating we need two code tables, we need + an array of code lengths (max value 15, so we use unsigned char), + and an array of unsigned shorts used while building a table. The + latter two arrays must be large enough to hold the maximum number + of code lengths, which RFC 1951 defines as 286 + 30. */ + +#define ZLIB_TABLE_SIZE \ + (2 * ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t) \ + + (286 + 30) * sizeof (uint16_t) \ + + (286 + 30) * sizeof (unsigned char)) + +#define ZLIB_TABLE_CODELEN_OFFSET \ + (2 * ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t) \ + + (286 + 30) * sizeof (uint16_t)) + +#define ZLIB_TABLE_WORK_OFFSET \ + (2 * ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t)) + +#ifdef BACKTRACE_GENERATE_FIXED_HUFFMAN_TABLE + +/* Used by the main function that generates the fixed table to learn + the table size. */ +static size_t final_next_secondary; + +#endif + +/* Build a Huffman code table from an array of lengths in CODES of + length CODES_LEN. The table is stored into *TABLE. ZDEBUG_TABLE + is the same as for elf_zlib_inflate, used to find some work space. + Returns 1 on success, 0 on error. */ + +static int +elf_zlib_inflate_table (unsigned char *codes, size_t codes_len, + uint16_t *zdebug_table, uint16_t *table) +{ + uint16_t count[16]; + uint16_t start[16]; + uint16_t prev[16]; + uint16_t firstcode[7]; + uint16_t *next; + size_t i; + size_t j; + unsigned int code; + size_t next_secondary; + + /* Count the number of code of each length. Set NEXT[val] to be the + next value after VAL with the same bit length. */ + + next = (uint16_t *) (((unsigned char *) zdebug_table) + + ZLIB_TABLE_WORK_OFFSET); + + memset (&count[0], 0, 16 * sizeof (uint16_t)); + for (i = 0; i < codes_len; ++i) + { + if (unlikely (codes[i] >= 16)) + { + elf_uncompress_failed (); + return 0; + } + + if (count[codes[i]] == 0) + { + start[codes[i]] = i; + prev[codes[i]] = i; + } + else + { + next[prev[codes[i]]] = i; + prev[codes[i]] = i; + } + + ++count[codes[i]]; + } + + /* For each length, fill in the table for the codes of that + length. */ + + memset (table, 0, ZLIB_HUFFMAN_TABLE_SIZE * sizeof (uint16_t)); + + /* Handle the values that do not require a secondary table. */ + + code = 0; + for (j = 1; j <= 8; ++j) + { + unsigned int jcnt; + unsigned int val; + + jcnt = count[j]; + if (jcnt == 0) + continue; + + if (unlikely (jcnt > (1U << j))) + { + elf_uncompress_failed (); + return 0; + } + + /* There are JCNT values that have this length, the values + starting from START[j] continuing through NEXT[VAL]. Those + values are assigned consecutive values starting at CODE. */ + + val = start[j]; + for (i = 0; i < jcnt; ++i) + { + uint16_t tval; + size_t ind; + unsigned int incr; + + /* In the compressed bit stream, the value VAL is encoded as + J bits with the value C. */ + + if (unlikely ((val & ~ZLIB_HUFFMAN_VALUE_MASK) != 0)) + { + elf_uncompress_failed (); + return 0; + } + + tval = val | ((j - 1) << ZLIB_HUFFMAN_BITS_SHIFT); + + /* The table lookup uses 8 bits. If J is less than 8, we + don't know what the other bits will be. We need to fill + in all possibilities in the table. Since the Huffman + code is unambiguous, those entries can't be used for any + other code. */ + + for (ind = code; ind < 0x100; ind += 1 << j) + { + if (unlikely (table[ind] != 0)) + { + elf_uncompress_failed (); + return 0; + } + table[ind] = tval; + } + + /* Advance to the next value with this length. */ + if (i + 1 < jcnt) + val = next[val]; + + /* The Huffman codes are stored in the bitstream with the + most significant bit first, as is required to make them + unambiguous. The effect is that when we read them from + the bitstream we see the bit sequence in reverse order: + the most significant bit of the Huffman code is the least + significant bit of the value we read from the bitstream. + That means that to make our table lookups work, we need + to reverse the bits of CODE. Since reversing bits is + tedious and in general requires using a table, we instead + increment CODE in reverse order. That is, if the number + of bits we are currently using, here named J, is 3, we + count as 000, 100, 010, 110, 001, 101, 011, 111, which is + to say the numbers from 0 to 7 but with the bits + reversed. Going to more bits, aka incrementing J, + effectively just adds more zero bits as the beginning, + and as such does not change the numeric value of CODE. + + To increment CODE of length J in reverse order, find the + most significant zero bit and set it to one while + clearing all higher bits. In other words, add 1 modulo + 2^J, only reversed. */ + + incr = 1U << (j - 1); + while ((code & incr) != 0) + incr >>= 1; + if (incr == 0) + code = 0; + else + { + code &= incr - 1; + code += incr; + } + } + } + + /* Handle the values that require a secondary table. */ + + /* Set FIRSTCODE, the number at which the codes start, for each + length. */ + + for (j = 9; j < 16; j++) + { + unsigned int jcnt; + unsigned int k; + + jcnt = count[j]; + if (jcnt == 0) + continue; + + /* There are JCNT values that have this length, the values + starting from START[j]. Those values are assigned + consecutive values starting at CODE. */ + + firstcode[j - 9] = code; + + /* Reverse add JCNT to CODE modulo 2^J. */ + for (k = 0; k < j; ++k) + { + if ((jcnt & (1U << k)) != 0) + { + unsigned int m; + unsigned int bit; + + bit = 1U << (j - k - 1); + for (m = 0; m < j - k; ++m, bit >>= 1) + { + if ((code & bit) == 0) + { + code += bit; + break; + } + code &= ~bit; + } + jcnt &= ~(1U << k); + } + } + if (unlikely (jcnt != 0)) + { + elf_uncompress_failed (); + return 0; + } + } + + /* For J from 9 to 15, inclusive, we store COUNT[J] consecutive + values starting at START[J] with consecutive codes starting at + FIRSTCODE[J - 9]. In the primary table we need to point to the + secondary table, and the secondary table will be indexed by J - 9 + bits. We count down from 15 so that we install the larger + secondary tables first, as the smaller ones may be embedded in + the larger ones. */ + + next_secondary = 0; /* Index of next secondary table (after primary). */ + for (j = 15; j >= 9; j--) + { + unsigned int jcnt; + unsigned int val; + size_t primary; /* Current primary index. */ + size_t secondary; /* Offset to current secondary table. */ + size_t secondary_bits; /* Bit size of current secondary table. */ + + jcnt = count[j]; + if (jcnt == 0) + continue; + + val = start[j]; + code = firstcode[j - 9]; + primary = 0x100; + secondary = 0; + secondary_bits = 0; + for (i = 0; i < jcnt; ++i) + { + uint16_t tval; + size_t ind; + unsigned int incr; + + if ((code & 0xff) != primary) + { + uint16_t tprimary; + + /* Fill in a new primary table entry. */ + + primary = code & 0xff; + + tprimary = table[primary]; + if (tprimary == 0) + { + /* Start a new secondary table. */ + + if (unlikely ((next_secondary & ZLIB_HUFFMAN_VALUE_MASK) + != next_secondary)) + { + elf_uncompress_failed (); + return 0; + } + + secondary = next_secondary; + secondary_bits = j - 8; + next_secondary += 1 << secondary_bits; + table[primary] = (secondary + + ((j - 8) << ZLIB_HUFFMAN_BITS_SHIFT) + + (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)); + } + else + { + /* There is an existing entry. It had better be a + secondary table with enough bits. */ + if (unlikely ((tprimary + & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) + == 0)) + { + elf_uncompress_failed (); + return 0; + } + secondary = tprimary & ZLIB_HUFFMAN_VALUE_MASK; + secondary_bits = ((tprimary >> ZLIB_HUFFMAN_BITS_SHIFT) + & ZLIB_HUFFMAN_BITS_MASK); + if (unlikely (secondary_bits < j - 8)) + { + elf_uncompress_failed (); + return 0; + } + } + } + + /* Fill in secondary table entries. */ + + tval = val | ((j - 8) << ZLIB_HUFFMAN_BITS_SHIFT); + + for (ind = code >> 8; + ind < (1U << secondary_bits); + ind += 1U << (j - 8)) + { + if (unlikely (table[secondary + 0x100 + ind] != 0)) + { + elf_uncompress_failed (); + return 0; + } + table[secondary + 0x100 + ind] = tval; + } + + if (i + 1 < jcnt) + val = next[val]; + + incr = 1U << (j - 1); + while ((code & incr) != 0) + incr >>= 1; + if (incr == 0) + code = 0; + else + { + code &= incr - 1; + code += incr; + } + } + } + +#ifdef BACKTRACE_GENERATE_FIXED_HUFFMAN_TABLE + final_next_secondary = next_secondary; +#endif + + return 1; +} + +#ifdef BACKTRACE_GENERATE_FIXED_HUFFMAN_TABLE + +/* Used to generate the fixed Huffman table for block type 1. */ + +#include + +static uint16_t table[ZLIB_TABLE_SIZE]; +static unsigned char codes[288]; + +int +main () +{ + size_t i; + + for (i = 0; i <= 143; ++i) + codes[i] = 8; + for (i = 144; i <= 255; ++i) + codes[i] = 9; + for (i = 256; i <= 279; ++i) + codes[i] = 7; + for (i = 280; i <= 287; ++i) + codes[i] = 8; + if (!elf_zlib_inflate_table (&codes[0], 288, &table[0], &table[0])) + { + fprintf (stderr, "elf_zlib_inflate_table failed\n"); + exit (EXIT_FAILURE); + } + + printf ("static const uint16_t elf_zlib_default_table[%#zx] =\n", + final_next_secondary + 0x100); + printf ("{\n"); + for (i = 0; i < final_next_secondary + 0x100; i += 8) + { + size_t j; + + printf (" "); + for (j = i; j < final_next_secondary + 0x100 && j < i + 8; ++j) + printf (" %#x,", table[j]); + printf ("\n"); + } + printf ("};\n"); + printf ("\n"); + + for (i = 0; i < 32; ++i) + codes[i] = 5; + if (!elf_zlib_inflate_table (&codes[0], 32, &table[0], &table[0])) + { + fprintf (stderr, "elf_zlib_inflate_table failed\n"); + exit (EXIT_FAILURE); + } + + printf ("static const uint16_t elf_zlib_default_dist_table[%#zx] =\n", + final_next_secondary + 0x100); + printf ("{\n"); + for (i = 0; i < final_next_secondary + 0x100; i += 8) + { + size_t j; + + printf (" "); + for (j = i; j < final_next_secondary + 0x100 && j < i + 8; ++j) + printf (" %#x,", table[j]); + printf ("\n"); + } + printf ("};\n"); + + return 0; +} + +#endif + +/* The fixed tables generated by the #ifdef'ed out main function + above. */ + +static const uint16_t elf_zlib_default_table[0x170] = +{ + 0xd00, 0xe50, 0xe10, 0xf18, 0xd10, 0xe70, 0xe30, 0x1230, + 0xd08, 0xe60, 0xe20, 0x1210, 0xe00, 0xe80, 0xe40, 0x1250, + 0xd04, 0xe58, 0xe18, 0x1200, 0xd14, 0xe78, 0xe38, 0x1240, + 0xd0c, 0xe68, 0xe28, 0x1220, 0xe08, 0xe88, 0xe48, 0x1260, + 0xd02, 0xe54, 0xe14, 0xf1c, 0xd12, 0xe74, 0xe34, 0x1238, + 0xd0a, 0xe64, 0xe24, 0x1218, 0xe04, 0xe84, 0xe44, 0x1258, + 0xd06, 0xe5c, 0xe1c, 0x1208, 0xd16, 0xe7c, 0xe3c, 0x1248, + 0xd0e, 0xe6c, 0xe2c, 0x1228, 0xe0c, 0xe8c, 0xe4c, 0x1268, + 0xd01, 0xe52, 0xe12, 0xf1a, 0xd11, 0xe72, 0xe32, 0x1234, + 0xd09, 0xe62, 0xe22, 0x1214, 0xe02, 0xe82, 0xe42, 0x1254, + 0xd05, 0xe5a, 0xe1a, 0x1204, 0xd15, 0xe7a, 0xe3a, 0x1244, + 0xd0d, 0xe6a, 0xe2a, 0x1224, 0xe0a, 0xe8a, 0xe4a, 0x1264, + 0xd03, 0xe56, 0xe16, 0xf1e, 0xd13, 0xe76, 0xe36, 0x123c, + 0xd0b, 0xe66, 0xe26, 0x121c, 0xe06, 0xe86, 0xe46, 0x125c, + 0xd07, 0xe5e, 0xe1e, 0x120c, 0xd17, 0xe7e, 0xe3e, 0x124c, + 0xd0f, 0xe6e, 0xe2e, 0x122c, 0xe0e, 0xe8e, 0xe4e, 0x126c, + 0xd00, 0xe51, 0xe11, 0xf19, 0xd10, 0xe71, 0xe31, 0x1232, + 0xd08, 0xe61, 0xe21, 0x1212, 0xe01, 0xe81, 0xe41, 0x1252, + 0xd04, 0xe59, 0xe19, 0x1202, 0xd14, 0xe79, 0xe39, 0x1242, + 0xd0c, 0xe69, 0xe29, 0x1222, 0xe09, 0xe89, 0xe49, 0x1262, + 0xd02, 0xe55, 0xe15, 0xf1d, 0xd12, 0xe75, 0xe35, 0x123a, + 0xd0a, 0xe65, 0xe25, 0x121a, 0xe05, 0xe85, 0xe45, 0x125a, + 0xd06, 0xe5d, 0xe1d, 0x120a, 0xd16, 0xe7d, 0xe3d, 0x124a, + 0xd0e, 0xe6d, 0xe2d, 0x122a, 0xe0d, 0xe8d, 0xe4d, 0x126a, + 0xd01, 0xe53, 0xe13, 0xf1b, 0xd11, 0xe73, 0xe33, 0x1236, + 0xd09, 0xe63, 0xe23, 0x1216, 0xe03, 0xe83, 0xe43, 0x1256, + 0xd05, 0xe5b, 0xe1b, 0x1206, 0xd15, 0xe7b, 0xe3b, 0x1246, + 0xd0d, 0xe6b, 0xe2b, 0x1226, 0xe0b, 0xe8b, 0xe4b, 0x1266, + 0xd03, 0xe57, 0xe17, 0xf1f, 0xd13, 0xe77, 0xe37, 0x123e, + 0xd0b, 0xe67, 0xe27, 0x121e, 0xe07, 0xe87, 0xe47, 0x125e, + 0xd07, 0xe5f, 0xe1f, 0x120e, 0xd17, 0xe7f, 0xe3f, 0x124e, + 0xd0f, 0xe6f, 0xe2f, 0x122e, 0xe0f, 0xe8f, 0xe4f, 0x126e, + 0x290, 0x291, 0x292, 0x293, 0x294, 0x295, 0x296, 0x297, + 0x298, 0x299, 0x29a, 0x29b, 0x29c, 0x29d, 0x29e, 0x29f, + 0x2a0, 0x2a1, 0x2a2, 0x2a3, 0x2a4, 0x2a5, 0x2a6, 0x2a7, + 0x2a8, 0x2a9, 0x2aa, 0x2ab, 0x2ac, 0x2ad, 0x2ae, 0x2af, + 0x2b0, 0x2b1, 0x2b2, 0x2b3, 0x2b4, 0x2b5, 0x2b6, 0x2b7, + 0x2b8, 0x2b9, 0x2ba, 0x2bb, 0x2bc, 0x2bd, 0x2be, 0x2bf, + 0x2c0, 0x2c1, 0x2c2, 0x2c3, 0x2c4, 0x2c5, 0x2c6, 0x2c7, + 0x2c8, 0x2c9, 0x2ca, 0x2cb, 0x2cc, 0x2cd, 0x2ce, 0x2cf, + 0x2d0, 0x2d1, 0x2d2, 0x2d3, 0x2d4, 0x2d5, 0x2d6, 0x2d7, + 0x2d8, 0x2d9, 0x2da, 0x2db, 0x2dc, 0x2dd, 0x2de, 0x2df, + 0x2e0, 0x2e1, 0x2e2, 0x2e3, 0x2e4, 0x2e5, 0x2e6, 0x2e7, + 0x2e8, 0x2e9, 0x2ea, 0x2eb, 0x2ec, 0x2ed, 0x2ee, 0x2ef, + 0x2f0, 0x2f1, 0x2f2, 0x2f3, 0x2f4, 0x2f5, 0x2f6, 0x2f7, + 0x2f8, 0x2f9, 0x2fa, 0x2fb, 0x2fc, 0x2fd, 0x2fe, 0x2ff, +}; + +static const uint16_t elf_zlib_default_dist_table[0x100] = +{ + 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, + 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, + 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, + 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, + 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, + 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, + 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, + 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, + 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, + 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, + 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, + 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, + 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, + 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, + 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, + 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, + 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, + 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, + 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, + 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, + 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, + 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, + 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, + 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, + 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, + 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, + 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, + 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, + 0x800, 0x810, 0x808, 0x818, 0x804, 0x814, 0x80c, 0x81c, + 0x802, 0x812, 0x80a, 0x81a, 0x806, 0x816, 0x80e, 0x81e, + 0x801, 0x811, 0x809, 0x819, 0x805, 0x815, 0x80d, 0x81d, + 0x803, 0x813, 0x80b, 0x81b, 0x807, 0x817, 0x80f, 0x81f, +}; + +/* Inflate a zlib stream from PIN/SIN to POUT/SOUT. Return 1 on + success, 0 on some error parsing the stream. */ + +static int +elf_zlib_inflate (const unsigned char *pin, size_t sin, uint16_t *zdebug_table, + unsigned char *pout, size_t sout) +{ + unsigned char *porigout; + const unsigned char *pinend; + unsigned char *poutend; + + /* We can apparently see multiple zlib streams concatenated + together, so keep going as long as there is something to read. + The last 4 bytes are the checksum. */ + porigout = pout; + pinend = pin + sin; + poutend = pout + sout; + while ((pinend - pin) > 4) + { + uint64_t val; + unsigned int bits; + int last; + + /* Read the two byte zlib header. */ + + if (unlikely ((pin[0] & 0xf) != 8)) /* 8 is zlib encoding. */ + { + /* Unknown compression method. */ + elf_uncompress_failed (); + return 0; + } + if (unlikely ((pin[0] >> 4) > 7)) + { + /* Window size too large. Other than this check, we don't + care about the window size. */ + elf_uncompress_failed (); + return 0; + } + if (unlikely ((pin[1] & 0x20) != 0)) + { + /* Stream expects a predefined dictionary, but we have no + dictionary. */ + elf_uncompress_failed (); + return 0; + } + val = (pin[0] << 8) | pin[1]; + if (unlikely (val % 31 != 0)) + { + /* Header check failure. */ + elf_uncompress_failed (); + return 0; + } + pin += 2; + + /* Align PIN to a 32-bit boundary. */ + + val = 0; + bits = 0; + while ((((uintptr_t) pin) & 3) != 0) + { + val |= (uint64_t)*pin << bits; + bits += 8; + ++pin; + } + + /* Read blocks until one is marked last. */ + + last = 0; + + while (!last) + { + unsigned int type; + const uint16_t *tlit; + const uint16_t *tdist; + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + last = val & 1; + type = (val >> 1) & 3; + val >>= 3; + bits -= 3; + + if (unlikely (type == 3)) + { + /* Invalid block type. */ + elf_uncompress_failed (); + return 0; + } + + if (type == 0) + { + uint16_t len; + uint16_t lenc; + + /* An uncompressed block. */ + + /* If we've read ahead more than a byte, back up. */ + while (bits >= 8) + { + --pin; + bits -= 8; + } + + val = 0; + bits = 0; + if (unlikely ((pinend - pin) < 4)) + { + /* Missing length. */ + elf_uncompress_failed (); + return 0; + } + len = pin[0] | (pin[1] << 8); + lenc = pin[2] | (pin[3] << 8); + pin += 4; + lenc = ~lenc; + if (unlikely (len != lenc)) + { + /* Corrupt data. */ + elf_uncompress_failed (); + return 0; + } + if (unlikely (len > (unsigned int) (pinend - pin) + || len > (unsigned int) (poutend - pout))) + { + /* Not enough space in buffers. */ + elf_uncompress_failed (); + return 0; + } + memcpy (pout, pin, len); + pout += len; + pin += len; + + /* Align PIN. */ + while ((((uintptr_t) pin) & 3) != 0) + { + val |= (uint64_t)*pin << bits; + bits += 8; + ++pin; + } + + /* Go around to read the next block. */ + continue; + } + + if (type == 1) + { + tlit = elf_zlib_default_table; + tdist = elf_zlib_default_dist_table; + } + else + { + unsigned int nlit; + unsigned int ndist; + unsigned int nclen; + unsigned char codebits[19]; + unsigned char *plenbase; + unsigned char *plen; + unsigned char *plenend; + + /* Read a Huffman encoding table. The various magic + numbers here are from RFC 1951. */ + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + nlit = (val & 0x1f) + 257; + val >>= 5; + ndist = (val & 0x1f) + 1; + val >>= 5; + nclen = (val & 0xf) + 4; + val >>= 4; + bits -= 14; + if (unlikely (nlit > 286 || ndist > 30)) + { + /* Values out of range. */ + elf_uncompress_failed (); + return 0; + } + + /* Read and build the table used to compress the + literal, length, and distance codes. */ + + memset(&codebits[0], 0, 19); + + /* There are always at least 4 elements in the + table. */ + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + codebits[16] = val & 7; + codebits[17] = (val >> 3) & 7; + codebits[18] = (val >> 6) & 7; + codebits[0] = (val >> 9) & 7; + val >>= 12; + bits -= 12; + + if (nclen == 4) + goto codebitsdone; + + codebits[8] = val & 7; + val >>= 3; + bits -= 3; + + if (nclen == 5) + goto codebitsdone; + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + codebits[7] = val & 7; + val >>= 3; + bits -= 3; + + if (nclen == 6) + goto codebitsdone; + + codebits[9] = val & 7; + val >>= 3; + bits -= 3; + + if (nclen == 7) + goto codebitsdone; + + codebits[6] = val & 7; + val >>= 3; + bits -= 3; + + if (nclen == 8) + goto codebitsdone; + + codebits[10] = val & 7; + val >>= 3; + bits -= 3; + + if (nclen == 9) + goto codebitsdone; + + codebits[5] = val & 7; + val >>= 3; + bits -= 3; + + if (nclen == 10) + goto codebitsdone; + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + codebits[11] = val & 7; + val >>= 3; + bits -= 3; + + if (nclen == 11) + goto codebitsdone; + + codebits[4] = val & 7; + val >>= 3; + bits -= 3; + + if (nclen == 12) + goto codebitsdone; + + codebits[12] = val & 7; + val >>= 3; + bits -= 3; + + if (nclen == 13) + goto codebitsdone; + + codebits[3] = val & 7; + val >>= 3; + bits -= 3; + + if (nclen == 14) + goto codebitsdone; + + codebits[13] = val & 7; + val >>= 3; + bits -= 3; + + if (nclen == 15) + goto codebitsdone; + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + codebits[2] = val & 7; + val >>= 3; + bits -= 3; + + if (nclen == 16) + goto codebitsdone; + + codebits[14] = val & 7; + val >>= 3; + bits -= 3; + + if (nclen == 17) + goto codebitsdone; + + codebits[1] = val & 7; + val >>= 3; + bits -= 3; + + if (nclen == 18) + goto codebitsdone; + + codebits[15] = val & 7; + val >>= 3; + bits -= 3; + + codebitsdone: + + if (!elf_zlib_inflate_table (codebits, 19, zdebug_table, + zdebug_table)) + return 0; + + /* Read the compressed bit lengths of the literal, + length, and distance codes. We have allocated space + at the end of zdebug_table to hold them. */ + + plenbase = (((unsigned char *) zdebug_table) + + ZLIB_TABLE_CODELEN_OFFSET); + plen = plenbase; + plenend = plen + nlit + ndist; + while (plen < plenend) + { + uint16_t t; + unsigned int b; + uint16_t v; + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + t = zdebug_table[val & 0xff]; + + /* The compression here uses bit lengths up to 7, so + a secondary table is never necessary. */ + if (unlikely ((t & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) + != 0)) + { + elf_uncompress_failed (); + return 0; + } + + b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; + val >>= b + 1; + bits -= b + 1; + + v = t & ZLIB_HUFFMAN_VALUE_MASK; + if (v < 16) + *plen++ = v; + else if (v == 16) + { + unsigned int c; + unsigned int prev; + + /* Copy previous entry 3 to 6 times. */ + + if (unlikely (plen == plenbase)) + { + elf_uncompress_failed (); + return 0; + } + + /* We used up to 7 bits since the last + elf_fetch_bits, so we have at least 8 bits + available here. */ + + c = 3 + (val & 0x3); + val >>= 2; + bits -= 2; + if (unlikely ((unsigned int) (plenend - plen) < c)) + { + elf_uncompress_failed (); + return 0; + } + + prev = plen[-1]; + switch (c) + { + case 6: + *plen++ = prev; + ATTRIBUTE_FALLTHROUGH; + case 5: + *plen++ = prev; + ATTRIBUTE_FALLTHROUGH; + case 4: + *plen++ = prev; + } + *plen++ = prev; + *plen++ = prev; + *plen++ = prev; + } + else if (v == 17) + { + unsigned int c; + + /* Store zero 3 to 10 times. */ + + /* We used up to 7 bits since the last + elf_fetch_bits, so we have at least 8 bits + available here. */ + + c = 3 + (val & 0x7); + val >>= 3; + bits -= 3; + if (unlikely ((unsigned int) (plenend - plen) < c)) + { + elf_uncompress_failed (); + return 0; + } + + switch (c) + { + case 10: + *plen++ = 0; + ATTRIBUTE_FALLTHROUGH; + case 9: + *plen++ = 0; + ATTRIBUTE_FALLTHROUGH; + case 8: + *plen++ = 0; + ATTRIBUTE_FALLTHROUGH; + case 7: + *plen++ = 0; + ATTRIBUTE_FALLTHROUGH; + case 6: + *plen++ = 0; + ATTRIBUTE_FALLTHROUGH; + case 5: + *plen++ = 0; + ATTRIBUTE_FALLTHROUGH; + case 4: + *plen++ = 0; + } + *plen++ = 0; + *plen++ = 0; + *plen++ = 0; + } + else if (v == 18) + { + unsigned int c; + + /* Store zero 11 to 138 times. */ + + /* We used up to 7 bits since the last + elf_fetch_bits, so we have at least 8 bits + available here. */ + + c = 11 + (val & 0x7f); + val >>= 7; + bits -= 7; + if (unlikely ((unsigned int) (plenend - plen) < c)) + { + elf_uncompress_failed (); + return 0; + } + + memset (plen, 0, c); + plen += c; + } + else + { + elf_uncompress_failed (); + return 0; + } + } + + /* Make sure that the stop code can appear. */ + + plen = plenbase; + if (unlikely (plen[256] == 0)) + { + elf_uncompress_failed (); + return 0; + } + + /* Build the decompression tables. */ + + if (!elf_zlib_inflate_table (plen, nlit, zdebug_table, + zdebug_table)) + return 0; + if (!elf_zlib_inflate_table (plen + nlit, ndist, zdebug_table, + (zdebug_table + + ZLIB_HUFFMAN_TABLE_SIZE))) + return 0; + tlit = zdebug_table; + tdist = zdebug_table + ZLIB_HUFFMAN_TABLE_SIZE; + } + + /* Inflate values until the end of the block. This is the + main loop of the inflation code. */ + + while (1) + { + uint16_t t; + unsigned int b; + uint16_t v; + unsigned int lit; + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + t = tlit[val & 0xff]; + b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; + v = t & ZLIB_HUFFMAN_VALUE_MASK; + + if ((t & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) == 0) + { + lit = v; + val >>= b + 1; + bits -= b + 1; + } + else + { + t = tlit[v + 0x100 + ((val >> 8) & ((1U << b) - 1))]; + b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; + lit = t & ZLIB_HUFFMAN_VALUE_MASK; + val >>= b + 8; + bits -= b + 8; + } + + if (lit < 256) + { + if (unlikely (pout == poutend)) + { + elf_uncompress_failed (); + return 0; + } + + *pout++ = lit; + + /* We will need to write the next byte soon. We ask + for high temporal locality because we will write + to the whole cache line soon. */ + __builtin_prefetch (pout, 1, 3); + } + else if (lit == 256) + { + /* The end of the block. */ + break; + } + else + { + unsigned int dist; + unsigned int len; + + /* Convert lit into a length. */ + + if (lit < 265) + len = lit - 257 + 3; + else if (lit == 285) + len = 258; + else if (unlikely (lit > 285)) + { + elf_uncompress_failed (); + return 0; + } + else + { + unsigned int extra; + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + /* This is an expression for the table of length + codes in RFC 1951 3.2.5. */ + lit -= 265; + extra = (lit >> 2) + 1; + len = (lit & 3) << extra; + len += 11; + len += ((1U << (extra - 1)) - 1) << 3; + len += val & ((1U << extra) - 1); + val >>= extra; + bits -= extra; + } + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + t = tdist[val & 0xff]; + b = (t >> ZLIB_HUFFMAN_BITS_SHIFT) & ZLIB_HUFFMAN_BITS_MASK; + v = t & ZLIB_HUFFMAN_VALUE_MASK; + + if ((t & (1U << ZLIB_HUFFMAN_SECONDARY_SHIFT)) == 0) + { + dist = v; + val >>= b + 1; + bits -= b + 1; + } + else + { + t = tdist[v + 0x100 + ((val >> 8) & ((1U << b) - 1))]; + b = ((t >> ZLIB_HUFFMAN_BITS_SHIFT) + & ZLIB_HUFFMAN_BITS_MASK); + dist = t & ZLIB_HUFFMAN_VALUE_MASK; + val >>= b + 8; + bits -= b + 8; + } + + /* Convert dist to a distance. */ + + if (dist == 0) + { + /* A distance of 1. A common case, meaning + repeat the last character LEN times. */ + + if (unlikely (pout == porigout)) + { + elf_uncompress_failed (); + return 0; + } + + if (unlikely ((unsigned int) (poutend - pout) < len)) + { + elf_uncompress_failed (); + return 0; + } + + memset (pout, pout[-1], len); + pout += len; + } + else if (unlikely (dist > 29)) + { + elf_uncompress_failed (); + return 0; + } + else + { + if (dist < 4) + dist = dist + 1; + else + { + unsigned int extra; + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + /* This is an expression for the table of + distance codes in RFC 1951 3.2.5. */ + dist -= 4; + extra = (dist >> 1) + 1; + dist = (dist & 1) << extra; + dist += 5; + dist += ((1U << (extra - 1)) - 1) << 2; + dist += val & ((1U << extra) - 1); + val >>= extra; + bits -= extra; + } + + /* Go back dist bytes, and copy len bytes from + there. */ + + if (unlikely ((unsigned int) (pout - porigout) < dist)) + { + elf_uncompress_failed (); + return 0; + } + + if (unlikely ((unsigned int) (poutend - pout) < len)) + { + elf_uncompress_failed (); + return 0; + } + + if (dist >= len) + { + memcpy (pout, pout - dist, len); + pout += len; + } + else + { + while (len > 0) + { + unsigned int copy; + + copy = len < dist ? len : dist; + memcpy (pout, pout - dist, copy); + len -= copy; + pout += copy; + } + } + } + } + } + } + } + + /* We should have filled the output buffer. */ + if (unlikely (pout != poutend)) + { + elf_uncompress_failed (); + return 0; + } + + return 1; +} + +/* Verify the zlib checksum. The checksum is in the 4 bytes at + CHECKBYTES, and the uncompressed data is at UNCOMPRESSED / + UNCOMPRESSED_SIZE. Returns 1 on success, 0 on failure. */ + +static int +elf_zlib_verify_checksum (const unsigned char *checkbytes, + const unsigned char *uncompressed, + size_t uncompressed_size) +{ + unsigned int i; + unsigned int cksum; + const unsigned char *p; + uint32_t s1; + uint32_t s2; + size_t hsz; + + cksum = 0; + for (i = 0; i < 4; i++) + cksum = (cksum << 8) | checkbytes[i]; + + s1 = 1; + s2 = 0; + + /* Minimize modulo operations. */ + + p = uncompressed; + hsz = uncompressed_size; + while (hsz >= 5552) + { + for (i = 0; i < 5552; i += 16) + { + /* Manually unroll loop 16 times. */ + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + } + hsz -= 5552; + s1 %= 65521; + s2 %= 65521; + } + + while (hsz >= 16) + { + /* Manually unroll loop 16 times. */ + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + s1 = s1 + *p++; + s2 = s2 + s1; + + hsz -= 16; + } + + for (i = 0; i < hsz; ++i) + { + s1 = s1 + *p++; + s2 = s2 + s1; + } + + s1 %= 65521; + s2 %= 65521; + + if (unlikely ((s2 << 16) + s1 != cksum)) + { + elf_uncompress_failed (); + return 0; + } + + return 1; +} + +/* Inflate a zlib stream from PIN/SIN to POUT/SOUT, and verify the + checksum. Return 1 on success, 0 on error. */ + +static int +elf_zlib_inflate_and_verify (const unsigned char *pin, size_t sin, + uint16_t *zdebug_table, unsigned char *pout, + size_t sout) +{ + if (!elf_zlib_inflate (pin, sin, zdebug_table, pout, sout)) + return 0; + if (!elf_zlib_verify_checksum (pin + sin - 4, pout, sout)) + return 0; + return 1; +} + +/* For working memory during zstd compression, we need + - a literal length FSE table: 512 64-bit values == 4096 bytes + - a match length FSE table: 512 64-bit values == 4096 bytes + - a offset FSE table: 256 64-bit values == 2048 bytes + - a Huffman tree: 2048 uint16_t values == 4096 bytes + - scratch space, one of + - to build an FSE table: 512 uint16_t values == 1024 bytes + - to build a Huffman tree: 512 uint16_t + 256 uint32_t == 2048 bytes +*/ + +#define ZSTD_TABLE_SIZE \ + (2 * 512 * sizeof (struct elf_zstd_fse_baseline_entry) \ + + 256 * sizeof (struct elf_zstd_fse_baseline_entry) \ + + 2048 * sizeof (uint16_t) \ + + 512 * sizeof (uint16_t) + 256 * sizeof (uint32_t)) + +#define ZSTD_TABLE_LITERAL_FSE_OFFSET (0) + +#define ZSTD_TABLE_MATCH_FSE_OFFSET \ + (512 * sizeof (struct elf_zstd_fse_baseline_entry)) + +#define ZSTD_TABLE_OFFSET_FSE_OFFSET \ + (ZSTD_TABLE_MATCH_FSE_OFFSET \ + + 512 * sizeof (struct elf_zstd_fse_baseline_entry)) + +#define ZSTD_TABLE_HUFFMAN_OFFSET \ + (ZSTD_TABLE_OFFSET_FSE_OFFSET \ + + 256 * sizeof (struct elf_zstd_fse_baseline_entry)) + +#define ZSTD_TABLE_WORK_OFFSET \ + (ZSTD_TABLE_HUFFMAN_OFFSET + 2048 * sizeof (uint16_t)) + +/* An entry in a zstd FSE table. */ + +struct elf_zstd_fse_entry +{ + /* The value that this FSE entry represents. */ + unsigned char symbol; + /* The number of bits to read to determine the next state. */ + unsigned char bits; + /* Add the bits to this base to get the next state. */ + uint16_t base; +}; + +static int +elf_zstd_build_fse (const int16_t *, int, uint16_t *, int, + struct elf_zstd_fse_entry *); + +/* Read a zstd FSE table and build the decoding table in *TABLE, updating *PPIN + as it reads. ZDEBUG_TABLE is scratch space; it must be enough for 512 + uint16_t values (1024 bytes). MAXIDX is the maximum number of symbols + permitted. *TABLE_BITS is the maximum number of bits for symbols in the + table: the size of *TABLE is at least 1 << *TABLE_BITS. This updates + *TABLE_BITS to the actual number of bits. Returns 1 on success, 0 on + error. */ + +static int +elf_zstd_read_fse (const unsigned char **ppin, const unsigned char *pinend, + uint16_t *zdebug_table, int maxidx, + struct elf_zstd_fse_entry *table, int *table_bits) +{ + const unsigned char *pin; + int16_t *norm; + uint16_t *next; + uint64_t val; + unsigned int bits; + int accuracy_log; + uint32_t remaining; + uint32_t threshold; + int bits_needed; + int idx; + int prev0; + + pin = *ppin; + + norm = (int16_t *) zdebug_table; + next = zdebug_table + 256; + + if (unlikely (pin + 3 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + + /* Align PIN to a 32-bit boundary. */ + + val = 0; + bits = 0; + while ((((uintptr_t) pin) & 3) != 0) + { + val |= (uint64_t)*pin << bits; + bits += 8; + ++pin; + } + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + accuracy_log = (val & 0xf) + 5; + if (accuracy_log > *table_bits) + { + elf_uncompress_failed (); + return 0; + } + *table_bits = accuracy_log; + val >>= 4; + bits -= 4; + + /* This code is mostly copied from the reference implementation. */ + + /* The number of remaining probabilities, plus 1. This sets the number of + bits that need to be read for the next value. */ + remaining = (1 << accuracy_log) + 1; + + /* The current difference between small and large values, which depends on + the number of remaining values. Small values use one less bit. */ + threshold = 1 << accuracy_log; + + /* The number of bits used to compute threshold. */ + bits_needed = accuracy_log + 1; + + /* The next character value. */ + idx = 0; + + /* Whether the last count was 0. */ + prev0 = 0; + + while (remaining > 1 && idx <= maxidx) + { + uint32_t max; + int32_t count; + + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + + if (prev0) + { + int zidx; + + /* Previous count was 0, so there is a 2-bit repeat flag. If the + 2-bit flag is 0b11, it adds 3 and then there is another repeat + flag. */ + zidx = idx; + while ((val & 0xfff) == 0xfff) + { + zidx += 3 * 6; + val >>= 12; + bits -= 12; + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + } + while ((val & 3) == 3) + { + zidx += 3; + val >>= 2; + bits -= 2; + if (!elf_fetch_bits (&pin, pinend, &val, &bits)) + return 0; + } + /* We have at least 13 bits here, don't need to fetch. */ + zidx += val & 3; + val >>= 2; + bits -= 2; + + if (unlikely (zidx > maxidx)) + { + elf_uncompress_failed (); + return 0; + } + + for (; idx < zidx; idx++) + norm[idx] = 0; + + prev0 = 0; + continue; + } + + max = (2 * threshold - 1) - remaining; + if ((val & (threshold - 1)) < max) + { + /* A small value. */ + count = (int32_t) ((uint32_t) val & (threshold - 1)); + val >>= bits_needed - 1; + bits -= bits_needed - 1; + } + else + { + /* A large value. */ + count = (int32_t) ((uint32_t) val & (2 * threshold - 1)); + if (count >= (int32_t) threshold) + count -= (int32_t) max; + val >>= bits_needed; + bits -= bits_needed; + } + + count--; + if (count >= 0) + remaining -= count; + else + remaining--; + if (unlikely (idx >= 256)) + { + elf_uncompress_failed (); + return 0; + } + norm[idx] = (int16_t) count; + ++idx; + + prev0 = count == 0; + + while (remaining < threshold) + { + bits_needed--; + threshold >>= 1; + } + } + + if (unlikely (remaining != 1)) + { + elf_uncompress_failed (); + return 0; + } + + /* If we've read ahead more than a byte, back up. */ + while (bits >= 8) + { + --pin; + bits -= 8; + } + + *ppin = pin; + + for (; idx <= maxidx; idx++) + norm[idx] = 0; + + return elf_zstd_build_fse (norm, idx, next, *table_bits, table); +} + +/* Build the FSE decoding table from a list of probabilities. This reads from + NORM of length IDX, uses NEXT as scratch space, and writes to *TABLE, whose + size is TABLE_BITS. */ + +static int +elf_zstd_build_fse (const int16_t *norm, int idx, uint16_t *next, + int table_bits, struct elf_zstd_fse_entry *table) +{ + int table_size; + int high_threshold; + int i; + int pos; + int step; + int mask; + + table_size = 1 << table_bits; + high_threshold = table_size - 1; + for (i = 0; i < idx; i++) + { + int16_t n; + + n = norm[i]; + if (n >= 0) + next[i] = (uint16_t) n; + else + { + table[high_threshold].symbol = (unsigned char) i; + high_threshold--; + next[i] = 1; + } + } + + pos = 0; + step = (table_size >> 1) + (table_size >> 3) + 3; + mask = table_size - 1; + for (i = 0; i < idx; i++) + { + int n; + int j; + + n = (int) norm[i]; + for (j = 0; j < n; j++) + { + table[pos].symbol = (unsigned char) i; + pos = (pos + step) & mask; + while (unlikely (pos > high_threshold)) + pos = (pos + step) & mask; + } + } + if (unlikely (pos != 0)) + { + elf_uncompress_failed (); + return 0; + } + + for (i = 0; i < table_size; i++) + { + unsigned char sym; + uint16_t next_state; + int high_bit; + int bits; + + sym = table[i].symbol; + next_state = next[sym]; + ++next[sym]; + + if (next_state == 0) + { + elf_uncompress_failed (); + return 0; + } + high_bit = 31 - __builtin_clz (next_state); + + bits = table_bits - high_bit; + table[i].bits = (unsigned char) bits; + table[i].base = (uint16_t) ((next_state << bits) - table_size); + } + + return 1; +} + +/* Encode the baseline and bits into a single 32-bit value. */ + +#define ZSTD_ENCODE_BASELINE_BITS(baseline, basebits) \ + ((uint32_t)(baseline) | ((uint32_t)(basebits) << 24)) + +#define ZSTD_DECODE_BASELINE(baseline_basebits) \ + ((uint32_t)(baseline_basebits) & 0xffffff) + +#define ZSTD_DECODE_BASEBITS(baseline_basebits) \ + ((uint32_t)(baseline_basebits) >> 24) + +/* Given a literal length code, we need to read a number of bits and add that + to a baseline. For states 0 to 15 the baseline is the state and the number + of bits is zero. */ + +#define ZSTD_LITERAL_LENGTH_BASELINE_OFFSET (16) + +static const uint32_t elf_zstd_literal_length_base[] = +{ + ZSTD_ENCODE_BASELINE_BITS(16, 1), + ZSTD_ENCODE_BASELINE_BITS(18, 1), + ZSTD_ENCODE_BASELINE_BITS(20, 1), + ZSTD_ENCODE_BASELINE_BITS(22, 1), + ZSTD_ENCODE_BASELINE_BITS(24, 2), + ZSTD_ENCODE_BASELINE_BITS(28, 2), + ZSTD_ENCODE_BASELINE_BITS(32, 3), + ZSTD_ENCODE_BASELINE_BITS(40, 3), + ZSTD_ENCODE_BASELINE_BITS(48, 4), + ZSTD_ENCODE_BASELINE_BITS(64, 6), + ZSTD_ENCODE_BASELINE_BITS(128, 7), + ZSTD_ENCODE_BASELINE_BITS(256, 8), + ZSTD_ENCODE_BASELINE_BITS(512, 9), + ZSTD_ENCODE_BASELINE_BITS(1024, 10), + ZSTD_ENCODE_BASELINE_BITS(2048, 11), + ZSTD_ENCODE_BASELINE_BITS(4096, 12), + ZSTD_ENCODE_BASELINE_BITS(8192, 13), + ZSTD_ENCODE_BASELINE_BITS(16384, 14), + ZSTD_ENCODE_BASELINE_BITS(32768, 15), + ZSTD_ENCODE_BASELINE_BITS(65536, 16) +}; + +/* The same applies to match length codes. For states 0 to 31 the baseline is + the state + 3 and the number of bits is zero. */ + +#define ZSTD_MATCH_LENGTH_BASELINE_OFFSET (32) + +static const uint32_t elf_zstd_match_length_base[] = +{ + ZSTD_ENCODE_BASELINE_BITS(35, 1), + ZSTD_ENCODE_BASELINE_BITS(37, 1), + ZSTD_ENCODE_BASELINE_BITS(39, 1), + ZSTD_ENCODE_BASELINE_BITS(41, 1), + ZSTD_ENCODE_BASELINE_BITS(43, 2), + ZSTD_ENCODE_BASELINE_BITS(47, 2), + ZSTD_ENCODE_BASELINE_BITS(51, 3), + ZSTD_ENCODE_BASELINE_BITS(59, 3), + ZSTD_ENCODE_BASELINE_BITS(67, 4), + ZSTD_ENCODE_BASELINE_BITS(83, 4), + ZSTD_ENCODE_BASELINE_BITS(99, 5), + ZSTD_ENCODE_BASELINE_BITS(131, 7), + ZSTD_ENCODE_BASELINE_BITS(259, 8), + ZSTD_ENCODE_BASELINE_BITS(515, 9), + ZSTD_ENCODE_BASELINE_BITS(1027, 10), + ZSTD_ENCODE_BASELINE_BITS(2051, 11), + ZSTD_ENCODE_BASELINE_BITS(4099, 12), + ZSTD_ENCODE_BASELINE_BITS(8195, 13), + ZSTD_ENCODE_BASELINE_BITS(16387, 14), + ZSTD_ENCODE_BASELINE_BITS(32771, 15), + ZSTD_ENCODE_BASELINE_BITS(65539, 16) +}; + +/* An entry in an FSE table used for literal/match/length values. For these we + have to map the symbol to a baseline value, and we have to read zero or more + bits and add that value to the baseline value. Rather than look the values + up in a separate table, we grow the FSE table so that we get better memory + caching. */ + +struct elf_zstd_fse_baseline_entry +{ + /* The baseline for the value that this FSE entry represents.. */ + uint32_t baseline; + /* The number of bits to read to add to the baseline. */ + unsigned char basebits; + /* The number of bits to read to determine the next state. */ + unsigned char bits; + /* Add the bits to this base to get the next state. */ + uint16_t base; +}; + +/* Convert the literal length FSE table FSE_TABLE to an FSE baseline table at + BASELINE_TABLE. Note that FSE_TABLE and BASELINE_TABLE will overlap. */ + +static int +elf_zstd_make_literal_baseline_fse ( + const struct elf_zstd_fse_entry *fse_table, + int table_bits, + struct elf_zstd_fse_baseline_entry *baseline_table) +{ + size_t count; + const struct elf_zstd_fse_entry *pfse; + struct elf_zstd_fse_baseline_entry *pbaseline; + + /* Convert backward to avoid overlap. */ + + count = 1U << table_bits; + pfse = fse_table + count; + pbaseline = baseline_table + count; + while (pfse > fse_table) + { + unsigned char symbol; + unsigned char bits; + uint16_t base; + + --pfse; + --pbaseline; + symbol = pfse->symbol; + bits = pfse->bits; + base = pfse->base; + if (symbol < ZSTD_LITERAL_LENGTH_BASELINE_OFFSET) + { + pbaseline->baseline = (uint32_t)symbol; + pbaseline->basebits = 0; + } + else + { + unsigned int idx; + uint32_t basebits; + + if (unlikely (symbol > 35)) + { + elf_uncompress_failed (); + return 0; + } + idx = symbol - ZSTD_LITERAL_LENGTH_BASELINE_OFFSET; + basebits = elf_zstd_literal_length_base[idx]; + pbaseline->baseline = ZSTD_DECODE_BASELINE(basebits); + pbaseline->basebits = ZSTD_DECODE_BASEBITS(basebits); + } + pbaseline->bits = bits; + pbaseline->base = base; + } + + return 1; +} + +/* Convert the offset length FSE table FSE_TABLE to an FSE baseline table at + BASELINE_TABLE. Note that FSE_TABLE and BASELINE_TABLE will overlap. */ + +static int +elf_zstd_make_offset_baseline_fse ( + const struct elf_zstd_fse_entry *fse_table, + int table_bits, + struct elf_zstd_fse_baseline_entry *baseline_table) +{ + size_t count; + const struct elf_zstd_fse_entry *pfse; + struct elf_zstd_fse_baseline_entry *pbaseline; + + /* Convert backward to avoid overlap. */ + + count = 1U << table_bits; + pfse = fse_table + count; + pbaseline = baseline_table + count; + while (pfse > fse_table) + { + unsigned char symbol; + unsigned char bits; + uint16_t base; + + --pfse; + --pbaseline; + symbol = pfse->symbol; + bits = pfse->bits; + base = pfse->base; + if (unlikely (symbol > 31)) + { + elf_uncompress_failed (); + return 0; + } + + /* The simple way to write this is + + pbaseline->baseline = (uint32_t)1 << symbol; + pbaseline->basebits = symbol; + + That will give us an offset value that corresponds to the one + described in the RFC. However, for offset values > 3, we have to + subtract 3. And for offset values 1, 2, 3 we use a repeated offset. + The baseline is always a power of 2, and is never 0, so for these low + values we will see one entry that is baseline 1, basebits 0, and one + entry that is baseline 2, basebits 1. All other entries will have + baseline >= 4 and basebits >= 2. + + So we can check for RFC offset <= 3 by checking for basebits <= 1. + And that means that we can subtract 3 here and not worry about doing + it in the hot loop. */ + + pbaseline->baseline = (uint32_t)1 << symbol; + if (symbol >= 2) + pbaseline->baseline -= 3; + pbaseline->basebits = symbol; + pbaseline->bits = bits; + pbaseline->base = base; + } + + return 1; +} + +/* Convert the match length FSE table FSE_TABLE to an FSE baseline table at + BASELINE_TABLE. Note that FSE_TABLE and BASELINE_TABLE will overlap. */ + +static int +elf_zstd_make_match_baseline_fse ( + const struct elf_zstd_fse_entry *fse_table, + int table_bits, + struct elf_zstd_fse_baseline_entry *baseline_table) +{ + size_t count; + const struct elf_zstd_fse_entry *pfse; + struct elf_zstd_fse_baseline_entry *pbaseline; + + /* Convert backward to avoid overlap. */ + + count = 1U << table_bits; + pfse = fse_table + count; + pbaseline = baseline_table + count; + while (pfse > fse_table) + { + unsigned char symbol; + unsigned char bits; + uint16_t base; + + --pfse; + --pbaseline; + symbol = pfse->symbol; + bits = pfse->bits; + base = pfse->base; + if (symbol < ZSTD_MATCH_LENGTH_BASELINE_OFFSET) + { + pbaseline->baseline = (uint32_t)symbol + 3; + pbaseline->basebits = 0; + } + else + { + unsigned int idx; + uint32_t basebits; + + if (unlikely (symbol > 52)) + { + elf_uncompress_failed (); + return 0; + } + idx = symbol - ZSTD_MATCH_LENGTH_BASELINE_OFFSET; + basebits = elf_zstd_match_length_base[idx]; + pbaseline->baseline = ZSTD_DECODE_BASELINE(basebits); + pbaseline->basebits = ZSTD_DECODE_BASEBITS(basebits); + } + pbaseline->bits = bits; + pbaseline->base = base; + } + + return 1; +} + +#ifdef BACKTRACE_GENERATE_ZSTD_FSE_TABLES + +/* Used to generate the predefined FSE decoding tables for zstd. */ + +#include + +/* These values are straight from RFC 8878. */ + +static int16_t lit[36] = +{ + 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, + -1,-1,-1,-1 +}; + +static int16_t match[53] = +{ + 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, + -1,-1,-1,-1,-1 +}; + +static int16_t offset[29] = +{ + 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 +}; + +static uint16_t next[256]; + +static void +print_table (const struct elf_zstd_fse_baseline_entry *table, size_t size) +{ + size_t i; + + printf ("{\n"); + for (i = 0; i < size; i += 3) + { + int j; + + printf (" "); + for (j = 0; j < 3 && i + j < size; ++j) + printf (" { %u, %d, %d, %d },", table[i + j].baseline, + table[i + j].basebits, table[i + j].bits, + table[i + j].base); + printf ("\n"); + } + printf ("};\n"); +} + +int +main () +{ + struct elf_zstd_fse_entry lit_table[64]; + struct elf_zstd_fse_baseline_entry lit_baseline[64]; + struct elf_zstd_fse_entry match_table[64]; + struct elf_zstd_fse_baseline_entry match_baseline[64]; + struct elf_zstd_fse_entry offset_table[32]; + struct elf_zstd_fse_baseline_entry offset_baseline[32]; + + if (!elf_zstd_build_fse (lit, sizeof lit / sizeof lit[0], next, + 6, lit_table)) + { + fprintf (stderr, "elf_zstd_build_fse failed\n"); + exit (EXIT_FAILURE); + } + + if (!elf_zstd_make_literal_baseline_fse (lit_table, 6, lit_baseline)) + { + fprintf (stderr, "elf_zstd_make_literal_baseline_fse failed\n"); + exit (EXIT_FAILURE); + } + + printf ("static const struct elf_zstd_fse_baseline_entry " + "elf_zstd_lit_table[64] =\n"); + print_table (lit_baseline, + sizeof lit_baseline / sizeof lit_baseline[0]); + printf ("\n"); + + if (!elf_zstd_build_fse (match, sizeof match / sizeof match[0], next, + 6, match_table)) + { + fprintf (stderr, "elf_zstd_build_fse failed\n"); + exit (EXIT_FAILURE); + } + + if (!elf_zstd_make_match_baseline_fse (match_table, 6, match_baseline)) + { + fprintf (stderr, "elf_zstd_make_match_baseline_fse failed\n"); + exit (EXIT_FAILURE); + } + + printf ("static const struct elf_zstd_fse_baseline_entry " + "elf_zstd_match_table[64] =\n"); + print_table (match_baseline, + sizeof match_baseline / sizeof match_baseline[0]); + printf ("\n"); + + if (!elf_zstd_build_fse (offset, sizeof offset / sizeof offset[0], next, + 5, offset_table)) + { + fprintf (stderr, "elf_zstd_build_fse failed\n"); + exit (EXIT_FAILURE); + } + + if (!elf_zstd_make_offset_baseline_fse (offset_table, 5, offset_baseline)) + { + fprintf (stderr, "elf_zstd_make_offset_baseline_fse failed\n"); + exit (EXIT_FAILURE); + } + + printf ("static const struct elf_zstd_fse_baseline_entry " + "elf_zstd_offset_table[32] =\n"); + print_table (offset_baseline, + sizeof offset_baseline / sizeof offset_baseline[0]); + printf ("\n"); + + return 0; +} + +#endif + +/* The fixed tables generated by the #ifdef'ed out main function + above. */ + +static const struct elf_zstd_fse_baseline_entry elf_zstd_lit_table[64] = +{ + { 0, 0, 4, 0 }, { 0, 0, 4, 16 }, { 1, 0, 5, 32 }, + { 3, 0, 5, 0 }, { 4, 0, 5, 0 }, { 6, 0, 5, 0 }, + { 7, 0, 5, 0 }, { 9, 0, 5, 0 }, { 10, 0, 5, 0 }, + { 12, 0, 5, 0 }, { 14, 0, 6, 0 }, { 16, 1, 5, 0 }, + { 20, 1, 5, 0 }, { 22, 1, 5, 0 }, { 28, 2, 5, 0 }, + { 32, 3, 5, 0 }, { 48, 4, 5, 0 }, { 64, 6, 5, 32 }, + { 128, 7, 5, 0 }, { 256, 8, 6, 0 }, { 1024, 10, 6, 0 }, + { 4096, 12, 6, 0 }, { 0, 0, 4, 32 }, { 1, 0, 4, 0 }, + { 2, 0, 5, 0 }, { 4, 0, 5, 32 }, { 5, 0, 5, 0 }, + { 7, 0, 5, 32 }, { 8, 0, 5, 0 }, { 10, 0, 5, 32 }, + { 11, 0, 5, 0 }, { 13, 0, 6, 0 }, { 16, 1, 5, 32 }, + { 18, 1, 5, 0 }, { 22, 1, 5, 32 }, { 24, 2, 5, 0 }, + { 32, 3, 5, 32 }, { 40, 3, 5, 0 }, { 64, 6, 4, 0 }, + { 64, 6, 4, 16 }, { 128, 7, 5, 32 }, { 512, 9, 6, 0 }, + { 2048, 11, 6, 0 }, { 0, 0, 4, 48 }, { 1, 0, 4, 16 }, + { 2, 0, 5, 32 }, { 3, 0, 5, 32 }, { 5, 0, 5, 32 }, + { 6, 0, 5, 32 }, { 8, 0, 5, 32 }, { 9, 0, 5, 32 }, + { 11, 0, 5, 32 }, { 12, 0, 5, 32 }, { 15, 0, 6, 0 }, + { 18, 1, 5, 32 }, { 20, 1, 5, 32 }, { 24, 2, 5, 32 }, + { 28, 2, 5, 32 }, { 40, 3, 5, 32 }, { 48, 4, 5, 32 }, + { 65536, 16, 6, 0 }, { 32768, 15, 6, 0 }, { 16384, 14, 6, 0 }, + { 8192, 13, 6, 0 }, +}; + +static const struct elf_zstd_fse_baseline_entry elf_zstd_match_table[64] = +{ + { 3, 0, 6, 0 }, { 4, 0, 4, 0 }, { 5, 0, 5, 32 }, + { 6, 0, 5, 0 }, { 8, 0, 5, 0 }, { 9, 0, 5, 0 }, + { 11, 0, 5, 0 }, { 13, 0, 6, 0 }, { 16, 0, 6, 0 }, + { 19, 0, 6, 0 }, { 22, 0, 6, 0 }, { 25, 0, 6, 0 }, + { 28, 0, 6, 0 }, { 31, 0, 6, 0 }, { 34, 0, 6, 0 }, + { 37, 1, 6, 0 }, { 41, 1, 6, 0 }, { 47, 2, 6, 0 }, + { 59, 3, 6, 0 }, { 83, 4, 6, 0 }, { 131, 7, 6, 0 }, + { 515, 9, 6, 0 }, { 4, 0, 4, 16 }, { 5, 0, 4, 0 }, + { 6, 0, 5, 32 }, { 7, 0, 5, 0 }, { 9, 0, 5, 32 }, + { 10, 0, 5, 0 }, { 12, 0, 6, 0 }, { 15, 0, 6, 0 }, + { 18, 0, 6, 0 }, { 21, 0, 6, 0 }, { 24, 0, 6, 0 }, + { 27, 0, 6, 0 }, { 30, 0, 6, 0 }, { 33, 0, 6, 0 }, + { 35, 1, 6, 0 }, { 39, 1, 6, 0 }, { 43, 2, 6, 0 }, + { 51, 3, 6, 0 }, { 67, 4, 6, 0 }, { 99, 5, 6, 0 }, + { 259, 8, 6, 0 }, { 4, 0, 4, 32 }, { 4, 0, 4, 48 }, + { 5, 0, 4, 16 }, { 7, 0, 5, 32 }, { 8, 0, 5, 32 }, + { 10, 0, 5, 32 }, { 11, 0, 5, 32 }, { 14, 0, 6, 0 }, + { 17, 0, 6, 0 }, { 20, 0, 6, 0 }, { 23, 0, 6, 0 }, + { 26, 0, 6, 0 }, { 29, 0, 6, 0 }, { 32, 0, 6, 0 }, + { 65539, 16, 6, 0 }, { 32771, 15, 6, 0 }, { 16387, 14, 6, 0 }, + { 8195, 13, 6, 0 }, { 4099, 12, 6, 0 }, { 2051, 11, 6, 0 }, + { 1027, 10, 6, 0 }, +}; + +static const struct elf_zstd_fse_baseline_entry elf_zstd_offset_table[32] = +{ + { 1, 0, 5, 0 }, { 61, 6, 4, 0 }, { 509, 9, 5, 0 }, + { 32765, 15, 5, 0 }, { 2097149, 21, 5, 0 }, { 5, 3, 5, 0 }, + { 125, 7, 4, 0 }, { 4093, 12, 5, 0 }, { 262141, 18, 5, 0 }, + { 8388605, 23, 5, 0 }, { 29, 5, 5, 0 }, { 253, 8, 4, 0 }, + { 16381, 14, 5, 0 }, { 1048573, 20, 5, 0 }, { 1, 2, 5, 0 }, + { 125, 7, 4, 16 }, { 2045, 11, 5, 0 }, { 131069, 17, 5, 0 }, + { 4194301, 22, 5, 0 }, { 13, 4, 5, 0 }, { 253, 8, 4, 16 }, + { 8189, 13, 5, 0 }, { 524285, 19, 5, 0 }, { 2, 1, 5, 0 }, + { 61, 6, 4, 16 }, { 1021, 10, 5, 0 }, { 65533, 16, 5, 0 }, + { 268435453, 28, 5, 0 }, { 134217725, 27, 5, 0 }, { 67108861, 26, 5, 0 }, + { 33554429, 25, 5, 0 }, { 16777213, 24, 5, 0 }, +}; + +/* Read a zstd Huffman table and build the decoding table in *TABLE, reading + and updating *PPIN. This sets *PTABLE_BITS to the number of bits of the + table, such that the table length is 1 << *TABLE_BITS. ZDEBUG_TABLE is + scratch space; it must be enough for 512 uint16_t values + 256 32-bit values + (2048 bytes). Returns 1 on success, 0 on error. */ + +static int +elf_zstd_read_huff (const unsigned char **ppin, const unsigned char *pinend, + uint16_t *zdebug_table, uint16_t *table, int *ptable_bits) +{ + const unsigned char *pin; + unsigned char hdr; + unsigned char *weights; + size_t count; + uint32_t *weight_mark; + size_t i; + uint32_t weight_mask; + size_t table_bits; + + pin = *ppin; + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + hdr = *pin; + ++pin; + + weights = (unsigned char *) zdebug_table; + + if (hdr < 128) + { + /* Table is compressed using FSE. */ + + struct elf_zstd_fse_entry *fse_table; + int fse_table_bits; + uint16_t *scratch; + const unsigned char *pfse; + const unsigned char *pback; + uint64_t val; + unsigned int bits; + unsigned int state1, state2; + + /* SCRATCH is used temporarily by elf_zstd_read_fse. It overlaps + WEIGHTS. */ + scratch = zdebug_table; + fse_table = (struct elf_zstd_fse_entry *) (scratch + 512); + fse_table_bits = 6; + + pfse = pin; + if (!elf_zstd_read_fse (&pfse, pinend, scratch, 255, fse_table, + &fse_table_bits)) + return 0; + + if (unlikely (pin + hdr > pinend)) + { + elf_uncompress_failed (); + return 0; + } + + /* We no longer need SCRATCH. Start recording weights. We need up to + 256 bytes of weights and 64 bytes of rank counts, so it won't overlap + FSE_TABLE. */ + + pback = pin + hdr - 1; + + if (!elf_fetch_backward_init (&pback, pfse, &val, &bits)) + return 0; + + bits -= fse_table_bits; + state1 = (val >> bits) & ((1U << fse_table_bits) - 1); + bits -= fse_table_bits; + state2 = (val >> bits) & ((1U << fse_table_bits) - 1); + + /* There are two independent FSE streams, tracked by STATE1 and STATE2. + We decode them alternately. */ + + count = 0; + while (1) + { + struct elf_zstd_fse_entry *pt; + uint64_t v; + + pt = &fse_table[state1]; + + if (unlikely (pin < pinend) && bits < pt->bits) + { + if (unlikely (count >= 254)) + { + elf_uncompress_failed (); + return 0; + } + weights[count] = (unsigned char) pt->symbol; + weights[count + 1] = (unsigned char) fse_table[state2].symbol; + count += 2; + break; + } + + if (unlikely (pt->bits == 0)) + v = 0; + else + { + if (!elf_fetch_bits_backward (&pback, pfse, &val, &bits)) + return 0; + + bits -= pt->bits; + v = (val >> bits) & (((uint64_t)1 << pt->bits) - 1); + } + + state1 = pt->base + v; + + if (unlikely (count >= 255)) + { + elf_uncompress_failed (); + return 0; + } + + weights[count] = pt->symbol; + ++count; + + pt = &fse_table[state2]; + + if (unlikely (pin < pinend && bits < pt->bits)) + { + if (unlikely (count >= 254)) + { + elf_uncompress_failed (); + return 0; + } + weights[count] = (unsigned char) pt->symbol; + weights[count + 1] = (unsigned char) fse_table[state1].symbol; + count += 2; + break; + } + + if (unlikely (pt->bits == 0)) + v = 0; + else + { + if (!elf_fetch_bits_backward (&pback, pfse, &val, &bits)) + return 0; + + bits -= pt->bits; + v = (val >> bits) & (((uint64_t)1 << pt->bits) - 1); + } + + state2 = pt->base + v; + + if (unlikely (count >= 255)) + { + elf_uncompress_failed (); + return 0; + } + + weights[count] = pt->symbol; + ++count; + } + + pin += hdr; + } + else + { + /* Table is not compressed. Each weight is 4 bits. */ + + count = hdr - 127; + if (unlikely (pin + ((count + 1) / 2) >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + for (i = 0; i < count; i += 2) + { + unsigned char b; + + b = *pin; + ++pin; + weights[i] = b >> 4; + weights[i + 1] = b & 0xf; + } + } + + weight_mark = (uint32_t *) (weights + 256); + memset (weight_mark, 0, 13 * sizeof (uint32_t)); + weight_mask = 0; + for (i = 0; i < count; ++i) + { + unsigned char w; + + w = weights[i]; + if (unlikely (w > 12)) + { + elf_uncompress_failed (); + return 0; + } + ++weight_mark[w]; + if (w > 0) + weight_mask += 1U << (w - 1); + } + if (unlikely (weight_mask == 0)) + { + elf_uncompress_failed (); + return 0; + } + + table_bits = 32 - __builtin_clz (weight_mask); + if (unlikely (table_bits > 11)) + { + elf_uncompress_failed (); + return 0; + } + + /* Work out the last weight value, which is omitted because the weights must + sum to a power of two. */ + { + uint32_t left; + uint32_t high_bit; + + left = ((uint32_t)1 << table_bits) - weight_mask; + if (left == 0) + { + elf_uncompress_failed (); + return 0; + } + high_bit = 31 - __builtin_clz (left); + if (((uint32_t)1 << high_bit) != left) + { + elf_uncompress_failed (); + return 0; + } + + if (unlikely (count >= 256)) + { + elf_uncompress_failed (); + return 0; + } + + weights[count] = high_bit + 1; + ++count; + ++weight_mark[high_bit + 1]; + } + + if (weight_mark[1] < 2 || (weight_mark[1] & 1) != 0) + { + elf_uncompress_failed (); + return 0; + } + + /* Change WEIGHT_MARK from a count of weights to the index of the first + symbol for that weight. We shift the indexes to also store how many we + have seen so far, below. */ + { + uint32_t next; + + next = 0; + for (i = 0; i < table_bits; ++i) + { + uint32_t cur; + + cur = next; + next += weight_mark[i + 1] << i; + weight_mark[i + 1] = cur; + } + } + + for (i = 0; i < count; ++i) + { + unsigned char weight; + uint32_t length; + uint16_t tval; + size_t start; + uint32_t j; + + weight = weights[i]; + if (weight == 0) + continue; + + length = 1U << (weight - 1); + tval = (i << 8) | (table_bits + 1 - weight); + start = weight_mark[weight]; + for (j = 0; j < length; ++j) + table[start + j] = tval; + weight_mark[weight] += length; + } + + *ppin = pin; + *ptable_bits = (int)table_bits; + + return 1; +} + +/* Read and decompress the literals and store them ending at POUTEND. This + works because we are going to use all the literals in the output, so they + must fit into the output buffer. HUFFMAN_TABLE, and PHUFFMAN_TABLE_BITS + store the Huffman table across calls. SCRATCH is used to read a Huffman + table. Store the start of the decompressed literals in *PPLIT. Update + *PPIN. Return 1 on success, 0 on error. */ + +static int +elf_zstd_read_literals (const unsigned char **ppin, + const unsigned char *pinend, + unsigned char *pout, + unsigned char *poutend, + uint16_t *scratch, + uint16_t *huffman_table, + int *phuffman_table_bits, + unsigned char **pplit) +{ + const unsigned char *pin; + unsigned char *plit; + unsigned char hdr; + uint32_t regenerated_size; + uint32_t compressed_size; + int streams; + uint32_t total_streams_size; + unsigned int huffman_table_bits; + uint64_t huffman_mask; + + pin = *ppin; + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + hdr = *pin; + ++pin; + + if ((hdr & 3) == 0 || (hdr & 3) == 1) + { + int raw; + + /* Raw_Literals_Block or RLE_Literals_Block */ + + raw = (hdr & 3) == 0; + + switch ((hdr >> 2) & 3) + { + case 0: case 2: + regenerated_size = hdr >> 3; + break; + case 1: + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + regenerated_size = (hdr >> 4) + ((uint32_t)(*pin) << 4); + ++pin; + break; + case 3: + if (unlikely (pin + 1 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + regenerated_size = ((hdr >> 4) + + ((uint32_t)*pin << 4) + + ((uint32_t)pin[1] << 12)); + pin += 2; + break; + default: + elf_uncompress_failed (); + return 0; + } + + if (unlikely ((size_t)(poutend - pout) < regenerated_size)) + { + elf_uncompress_failed (); + return 0; + } + + plit = poutend - regenerated_size; + + if (raw) + { + if (unlikely (pin + regenerated_size >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + memcpy (plit, pin, regenerated_size); + pin += regenerated_size; + } + else + { + if (pin >= pinend) + { + elf_uncompress_failed (); + return 0; + } + memset (plit, *pin, regenerated_size); + ++pin; + } + + *ppin = pin; + *pplit = plit; + + return 1; + } + + /* Compressed_Literals_Block or Treeless_Literals_Block */ + + switch ((hdr >> 2) & 3) + { + case 0: case 1: + if (unlikely (pin + 1 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + regenerated_size = (hdr >> 4) | ((uint32_t)(*pin & 0x3f) << 4); + compressed_size = (uint32_t)*pin >> 6 | ((uint32_t)pin[1] << 2); + pin += 2; + streams = ((hdr >> 2) & 3) == 0 ? 1 : 4; + break; + case 2: + if (unlikely (pin + 2 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + regenerated_size = (((uint32_t)hdr >> 4) + | ((uint32_t)*pin << 4) + | (((uint32_t)pin[1] & 3) << 12)); + compressed_size = (((uint32_t)pin[1] >> 2) + | ((uint32_t)pin[2] << 6)); + pin += 3; + streams = 4; + break; + case 3: + if (unlikely (pin + 3 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + regenerated_size = (((uint32_t)hdr >> 4) + | ((uint32_t)*pin << 4) + | (((uint32_t)pin[1] & 0x3f) << 12)); + compressed_size = (((uint32_t)pin[1] >> 6) + | ((uint32_t)pin[2] << 2) + | ((uint32_t)pin[3] << 10)); + pin += 4; + streams = 4; + break; + default: + elf_uncompress_failed (); + return 0; + } + + if (unlikely (pin + compressed_size > pinend)) + { + elf_uncompress_failed (); + return 0; + } + + pinend = pin + compressed_size; + *ppin = pinend; + + if (unlikely ((size_t)(poutend - pout) < regenerated_size)) + { + elf_uncompress_failed (); + return 0; + } + + plit = poutend - regenerated_size; + + *pplit = plit; + + total_streams_size = compressed_size; + if ((hdr & 3) == 2) + { + const unsigned char *ptable; + + /* Compressed_Literals_Block. Read Huffman tree. */ + + ptable = pin; + if (!elf_zstd_read_huff (&ptable, pinend, scratch, huffman_table, + phuffman_table_bits)) + return 0; + + if (unlikely (total_streams_size < (size_t)(ptable - pin))) + { + elf_uncompress_failed (); + return 0; + } + + total_streams_size -= ptable - pin; + pin = ptable; + } + else + { + /* Treeless_Literals_Block. Reuse previous Huffman tree. */ + if (unlikely (*phuffman_table_bits == 0)) + { + elf_uncompress_failed (); + return 0; + } + } + + /* Decompress COMPRESSED_SIZE bytes of data at PIN using the huffman table, + storing REGENERATED_SIZE bytes of decompressed data at PLIT. */ + + huffman_table_bits = (unsigned int)*phuffman_table_bits; + huffman_mask = ((uint64_t)1 << huffman_table_bits) - 1; + + if (streams == 1) + { + const unsigned char *pback; + const unsigned char *pbackend; + uint64_t val; + unsigned int bits; + uint32_t i; + + pback = pin + total_streams_size - 1; + pbackend = pin; + if (!elf_fetch_backward_init (&pback, pbackend, &val, &bits)) + return 0; + + /* This is one of the inner loops of the decompression algorithm, so we + put some effort into optimization. We can't get more than 64 bytes + from a single call to elf_fetch_bits_backward, and we can't subtract + more than 11 bits at a time. */ + + if (regenerated_size >= 64) + { + unsigned char *plitstart; + unsigned char *plitstop; + + plitstart = plit; + plitstop = plit + regenerated_size - 64; + while (plit < plitstop) + { + uint16_t t; + + if (!elf_fetch_bits_backward (&pback, pbackend, &val, &bits)) + return 0; + + if (bits < 16) + break; + + while (bits >= 33) + { + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *plit = t >> 8; + ++plit; + bits -= t & 0xff; + + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *plit = t >> 8; + ++plit; + bits -= t & 0xff; + + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *plit = t >> 8; + ++plit; + bits -= t & 0xff; + } + + while (bits > 11) + { + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + *plit = t >> 8; + ++plit; + bits -= t & 0xff; + } + } + + regenerated_size -= plit - plitstart; + } + + for (i = 0; i < regenerated_size; ++i) + { + uint16_t t; + + if (!elf_fetch_bits_backward (&pback, pbackend, &val, &bits)) + return 0; + + if (unlikely (bits < huffman_table_bits)) + { + t = huffman_table[(val << (huffman_table_bits - bits)) + & huffman_mask]; + if (unlikely (bits < (t & 0xff))) + { + elf_uncompress_failed (); + return 0; + } + } + else + t = huffman_table[(val >> (bits - huffman_table_bits)) + & huffman_mask]; + + *plit = t >> 8; + ++plit; + bits -= t & 0xff; + } + + return 1; + } + + { + uint32_t stream_size1, stream_size2, stream_size3, stream_size4; + uint32_t tot; + const unsigned char *pback1, *pback2, *pback3, *pback4; + const unsigned char *pbackend1, *pbackend2, *pbackend3, *pbackend4; + uint64_t val1, val2, val3, val4; + unsigned int bits1, bits2, bits3, bits4; + unsigned char *plit1, *plit2, *plit3, *plit4; + uint32_t regenerated_stream_size; + uint32_t regenerated_stream_size4; + uint16_t t1, t2, t3, t4; + uint32_t i; + uint32_t limit; + + /* Read jump table. */ + if (unlikely (pin + 5 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + stream_size1 = (uint32_t)*pin | ((uint32_t)pin[1] << 8); + pin += 2; + stream_size2 = (uint32_t)*pin | ((uint32_t)pin[1] << 8); + pin += 2; + stream_size3 = (uint32_t)*pin | ((uint32_t)pin[1] << 8); + pin += 2; + tot = stream_size1 + stream_size2 + stream_size3; + if (unlikely (tot > total_streams_size - 6)) + { + elf_uncompress_failed (); + return 0; + } + stream_size4 = total_streams_size - 6 - tot; + + pback1 = pin + stream_size1 - 1; + pbackend1 = pin; + + pback2 = pback1 + stream_size2; + pbackend2 = pback1 + 1; + + pback3 = pback2 + stream_size3; + pbackend3 = pback2 + 1; + + pback4 = pback3 + stream_size4; + pbackend4 = pback3 + 1; + + if (!elf_fetch_backward_init (&pback1, pbackend1, &val1, &bits1)) + return 0; + if (!elf_fetch_backward_init (&pback2, pbackend2, &val2, &bits2)) + return 0; + if (!elf_fetch_backward_init (&pback3, pbackend3, &val3, &bits3)) + return 0; + if (!elf_fetch_backward_init (&pback4, pbackend4, &val4, &bits4)) + return 0; + + regenerated_stream_size = (regenerated_size + 3) / 4; + + plit1 = plit; + plit2 = plit1 + regenerated_stream_size; + plit3 = plit2 + regenerated_stream_size; + plit4 = plit3 + regenerated_stream_size; + + regenerated_stream_size4 = regenerated_size - regenerated_stream_size * 3; + + /* We can't get more than 64 literal bytes from a single call to + elf_fetch_bits_backward. The fourth stream can be up to 3 bytes less, + so use as the limit. */ + + limit = regenerated_stream_size4 <= 64 ? 0 : regenerated_stream_size4 - 64; + i = 0; + while (i < limit) + { + if (!elf_fetch_bits_backward (&pback1, pbackend1, &val1, &bits1)) + return 0; + if (!elf_fetch_bits_backward (&pback2, pbackend2, &val2, &bits2)) + return 0; + if (!elf_fetch_bits_backward (&pback3, pbackend3, &val3, &bits3)) + return 0; + if (!elf_fetch_bits_backward (&pback4, pbackend4, &val4, &bits4)) + return 0; + + /* We can't subtract more than 11 bits at a time. */ + + do + { + t1 = huffman_table[(val1 >> (bits1 - huffman_table_bits)) + & huffman_mask]; + t2 = huffman_table[(val2 >> (bits2 - huffman_table_bits)) + & huffman_mask]; + t3 = huffman_table[(val3 >> (bits3 - huffman_table_bits)) + & huffman_mask]; + t4 = huffman_table[(val4 >> (bits4 - huffman_table_bits)) + & huffman_mask]; + + *plit1 = t1 >> 8; + ++plit1; + bits1 -= t1 & 0xff; + + *plit2 = t2 >> 8; + ++plit2; + bits2 -= t2 & 0xff; + + *plit3 = t3 >> 8; + ++plit3; + bits3 -= t3 & 0xff; + + *plit4 = t4 >> 8; + ++plit4; + bits4 -= t4 & 0xff; + + ++i; + } + while (bits1 > 11 && bits2 > 11 && bits3 > 11 && bits4 > 11); + } + + while (i < regenerated_stream_size) + { + int use4; + + use4 = i < regenerated_stream_size4; + + if (!elf_fetch_bits_backward (&pback1, pbackend1, &val1, &bits1)) + return 0; + if (!elf_fetch_bits_backward (&pback2, pbackend2, &val2, &bits2)) + return 0; + if (!elf_fetch_bits_backward (&pback3, pbackend3, &val3, &bits3)) + return 0; + if (use4) + { + if (!elf_fetch_bits_backward (&pback4, pbackend4, &val4, &bits4)) + return 0; + } + + if (unlikely (bits1 < huffman_table_bits)) + { + t1 = huffman_table[(val1 << (huffman_table_bits - bits1)) + & huffman_mask]; + if (unlikely (bits1 < (t1 & 0xff))) + { + elf_uncompress_failed (); + return 0; + } + } + else + t1 = huffman_table[(val1 >> (bits1 - huffman_table_bits)) + & huffman_mask]; + + if (unlikely (bits2 < huffman_table_bits)) + { + t2 = huffman_table[(val2 << (huffman_table_bits - bits2)) + & huffman_mask]; + if (unlikely (bits2 < (t2 & 0xff))) + { + elf_uncompress_failed (); + return 0; + } + } + else + t2 = huffman_table[(val2 >> (bits2 - huffman_table_bits)) + & huffman_mask]; + + if (unlikely (bits3 < huffman_table_bits)) + { + t3 = huffman_table[(val3 << (huffman_table_bits - bits3)) + & huffman_mask]; + if (unlikely (bits3 < (t3 & 0xff))) + { + elf_uncompress_failed (); + return 0; + } + } + else + t3 = huffman_table[(val3 >> (bits3 - huffman_table_bits)) + & huffman_mask]; + + if (use4) + { + if (unlikely (bits4 < huffman_table_bits)) + { + t4 = huffman_table[(val4 << (huffman_table_bits - bits4)) + & huffman_mask]; + if (unlikely (bits4 < (t4 & 0xff))) + { + elf_uncompress_failed (); + return 0; + } + } + else + t4 = huffman_table[(val4 >> (bits4 - huffman_table_bits)) + & huffman_mask]; + + *plit4 = t4 >> 8; + ++plit4; + bits4 -= t4 & 0xff; + } + + *plit1 = t1 >> 8; + ++plit1; + bits1 -= t1 & 0xff; + + *plit2 = t2 >> 8; + ++plit2; + bits2 -= t2 & 0xff; + + *plit3 = t3 >> 8; + ++plit3; + bits3 -= t3 & 0xff; + + ++i; + } + } + + return 1; +} + +/* The information used to decompress a sequence code, which can be a literal + length, an offset, or a match length. */ + +struct elf_zstd_seq_decode +{ + const struct elf_zstd_fse_baseline_entry *table; + int table_bits; +}; + +/* Unpack a sequence code compression mode. */ + +static int +elf_zstd_unpack_seq_decode (int mode, + const unsigned char **ppin, + const unsigned char *pinend, + const struct elf_zstd_fse_baseline_entry *predef, + int predef_bits, + uint16_t *scratch, + int maxidx, + struct elf_zstd_fse_baseline_entry *table, + int table_bits, + int (*conv)(const struct elf_zstd_fse_entry *, + int, + struct elf_zstd_fse_baseline_entry *), + struct elf_zstd_seq_decode *decode) +{ + switch (mode) + { + case 0: + decode->table = predef; + decode->table_bits = predef_bits; + break; + + case 1: + { + struct elf_zstd_fse_entry entry; + + if (unlikely (*ppin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + entry.symbol = **ppin; + ++*ppin; + entry.bits = 0; + entry.base = 0; + decode->table_bits = 0; + if (!conv (&entry, 0, table)) + return 0; + } + break; + + case 2: + { + struct elf_zstd_fse_entry *fse_table; + + /* We use the same space for the simple FSE table and the baseline + table. */ + fse_table = (struct elf_zstd_fse_entry *)table; + decode->table_bits = table_bits; + if (!elf_zstd_read_fse (ppin, pinend, scratch, maxidx, fse_table, + &decode->table_bits)) + return 0; + if (!conv (fse_table, decode->table_bits, table)) + return 0; + decode->table = table; + } + break; + + case 3: + if (unlikely (decode->table_bits == -1)) + { + elf_uncompress_failed (); + return 0; + } + break; + + default: + elf_uncompress_failed (); + return 0; + } + + return 1; +} + +/* Decompress a zstd stream from PIN/SIN to POUT/SOUT. Code based on RFC 8878. + Return 1 on success, 0 on error. */ + +static int +elf_zstd_decompress (const unsigned char *pin, size_t sin, + unsigned char *zdebug_table, unsigned char *pout, + size_t sout) +{ + const unsigned char *pinend; + unsigned char *poutstart; + unsigned char *poutend; + struct elf_zstd_seq_decode literal_decode; + struct elf_zstd_fse_baseline_entry *literal_fse_table; + struct elf_zstd_seq_decode match_decode; + struct elf_zstd_fse_baseline_entry *match_fse_table; + struct elf_zstd_seq_decode offset_decode; + struct elf_zstd_fse_baseline_entry *offset_fse_table; + uint16_t *huffman_table; + int huffman_table_bits; + uint32_t repeated_offset1; + uint32_t repeated_offset2; + uint32_t repeated_offset3; + uint16_t *scratch; + unsigned char hdr; + int has_checksum; + uint64_t content_size; + int last_block; + + pinend = pin + sin; + poutstart = pout; + poutend = pout + sout; + + literal_decode.table = NULL; + literal_decode.table_bits = -1; + literal_fse_table = ((struct elf_zstd_fse_baseline_entry *) + (zdebug_table + ZSTD_TABLE_LITERAL_FSE_OFFSET)); + + match_decode.table = NULL; + match_decode.table_bits = -1; + match_fse_table = ((struct elf_zstd_fse_baseline_entry *) + (zdebug_table + ZSTD_TABLE_MATCH_FSE_OFFSET)); + + offset_decode.table = NULL; + offset_decode.table_bits = -1; + offset_fse_table = ((struct elf_zstd_fse_baseline_entry *) + (zdebug_table + ZSTD_TABLE_OFFSET_FSE_OFFSET)); + huffman_table = ((uint16_t *) + (zdebug_table + ZSTD_TABLE_HUFFMAN_OFFSET)); + huffman_table_bits = 0; + scratch = ((uint16_t *) + (zdebug_table + ZSTD_TABLE_WORK_OFFSET)); + + repeated_offset1 = 1; + repeated_offset2 = 4; + repeated_offset3 = 8; + + if (unlikely (sin < 4)) + { + elf_uncompress_failed (); + return 0; + } + + /* These values are the zstd magic number. */ + if (unlikely (pin[0] != 0x28 + || pin[1] != 0xb5 + || pin[2] != 0x2f + || pin[3] != 0xfd)) + { + elf_uncompress_failed (); + return 0; + } + + pin += 4; + + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + + hdr = *pin++; + + /* We expect a single frame. */ + if (unlikely ((hdr & (1 << 5)) == 0)) + { + elf_uncompress_failed (); + return 0; + } + /* Reserved bit must be zero. */ + if (unlikely ((hdr & (1 << 3)) != 0)) + { + elf_uncompress_failed (); + return 0; + } + /* We do not expect a dictionary. */ + if (unlikely ((hdr & 3) != 0)) + { + elf_uncompress_failed (); + return 0; + } + has_checksum = (hdr & (1 << 2)) != 0; + switch (hdr >> 6) + { + case 0: + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + content_size = (uint64_t) *pin++; + break; + case 1: + if (unlikely (pin + 1 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + content_size = (((uint64_t) pin[0]) | (((uint64_t) pin[1]) << 8)) + 256; + pin += 2; + break; + case 2: + if (unlikely (pin + 3 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + content_size = ((uint64_t) pin[0] + | (((uint64_t) pin[1]) << 8) + | (((uint64_t) pin[2]) << 16) + | (((uint64_t) pin[3]) << 24)); + pin += 4; + break; + case 3: + if (unlikely (pin + 7 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + content_size = ((uint64_t) pin[0] + | (((uint64_t) pin[1]) << 8) + | (((uint64_t) pin[2]) << 16) + | (((uint64_t) pin[3]) << 24) + | (((uint64_t) pin[4]) << 32) + | (((uint64_t) pin[5]) << 40) + | (((uint64_t) pin[6]) << 48) + | (((uint64_t) pin[7]) << 56)); + pin += 8; + break; + default: + elf_uncompress_failed (); + return 0; + } + + if (unlikely (content_size != (size_t) content_size + || (size_t) content_size != sout)) + { + elf_uncompress_failed (); + return 0; + } + + last_block = 0; + while (!last_block) + { + uint32_t block_hdr; + int block_type; + uint32_t block_size; + + if (unlikely (pin + 2 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + block_hdr = ((uint32_t) pin[0] + | (((uint32_t) pin[1]) << 8) + | (((uint32_t) pin[2]) << 16)); + pin += 3; + + last_block = block_hdr & 1; + block_type = (block_hdr >> 1) & 3; + block_size = block_hdr >> 3; + + switch (block_type) + { + case 0: + /* Raw_Block */ + if (unlikely ((size_t) block_size > (size_t) (pinend - pin))) + { + elf_uncompress_failed (); + return 0; + } + if (unlikely ((size_t) block_size > (size_t) (poutend - pout))) + { + elf_uncompress_failed (); + return 0; + } + memcpy (pout, pin, block_size); + pout += block_size; + pin += block_size; + break; + + case 1: + /* RLE_Block */ + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + if (unlikely ((size_t) block_size > (size_t) (poutend - pout))) + { + elf_uncompress_failed (); + return 0; + } + memset (pout, *pin, block_size); + pout += block_size; + pin++; + break; + + case 2: + { + const unsigned char *pblockend; + unsigned char *plitstack; + unsigned char *plit; + uint32_t literal_count; + unsigned char seq_hdr; + size_t seq_count; + size_t seq; + const unsigned char *pback; + uint64_t val; + unsigned int bits; + unsigned int literal_state; + unsigned int offset_state; + unsigned int match_state; + + /* Compressed_Block */ + if (unlikely ((size_t) block_size > (size_t) (pinend - pin))) + { + elf_uncompress_failed (); + return 0; + } + + pblockend = pin + block_size; + + /* Read the literals into the end of the output space, and leave + PLIT pointing at them. */ + + if (!elf_zstd_read_literals (&pin, pblockend, pout, poutend, + scratch, huffman_table, + &huffman_table_bits, + &plitstack)) + return 0; + plit = plitstack; + literal_count = poutend - plit; + + seq_hdr = *pin; + pin++; + if (seq_hdr < 128) + seq_count = seq_hdr; + else if (seq_hdr < 255) + { + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + seq_count = ((seq_hdr - 128) << 8) + *pin; + pin++; + } + else + { + if (unlikely (pin + 1 >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + seq_count = *pin + (pin[1] << 8) + 0x7f00; + pin += 2; + } + + if (seq_count > 0) + { + int (*pfn)(const struct elf_zstd_fse_entry *, + int, struct elf_zstd_fse_baseline_entry *); + + if (unlikely (pin >= pinend)) + { + elf_uncompress_failed (); + return 0; + } + seq_hdr = *pin; + ++pin; + + pfn = elf_zstd_make_literal_baseline_fse; + if (!elf_zstd_unpack_seq_decode ((seq_hdr >> 6) & 3, + &pin, pinend, + &elf_zstd_lit_table[0], 6, + scratch, 35, + literal_fse_table, 9, pfn, + &literal_decode)) + return 0; + + pfn = elf_zstd_make_offset_baseline_fse; + if (!elf_zstd_unpack_seq_decode ((seq_hdr >> 4) & 3, + &pin, pinend, + &elf_zstd_offset_table[0], 5, + scratch, 31, + offset_fse_table, 8, pfn, + &offset_decode)) + return 0; + + pfn = elf_zstd_make_match_baseline_fse; + if (!elf_zstd_unpack_seq_decode ((seq_hdr >> 2) & 3, + &pin, pinend, + &elf_zstd_match_table[0], 6, + scratch, 52, + match_fse_table, 9, pfn, + &match_decode)) + return 0; + } + + pback = pblockend - 1; + if (!elf_fetch_backward_init (&pback, pin, &val, &bits)) + return 0; + + bits -= literal_decode.table_bits; + literal_state = ((val >> bits) + & ((1U << literal_decode.table_bits) - 1)); + + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= offset_decode.table_bits; + offset_state = ((val >> bits) + & ((1U << offset_decode.table_bits) - 1)); + + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= match_decode.table_bits; + match_state = ((val >> bits) + & ((1U << match_decode.table_bits) - 1)); + + seq = 0; + while (1) + { + const struct elf_zstd_fse_baseline_entry *pt; + uint32_t offset_basebits; + uint32_t offset_baseline; + uint32_t offset_bits; + uint32_t offset_base; + uint32_t offset; + uint32_t match_baseline; + uint32_t match_bits; + uint32_t match_base; + uint32_t match; + uint32_t literal_baseline; + uint32_t literal_bits; + uint32_t literal_base; + uint32_t literal; + uint32_t need; + uint32_t add; + + pt = &offset_decode.table[offset_state]; + offset_basebits = pt->basebits; + offset_baseline = pt->baseline; + offset_bits = pt->bits; + offset_base = pt->base; + + /* This case can be more than 16 bits, which is all that + elf_fetch_bits_backward promises. */ + need = offset_basebits; + add = 0; + if (unlikely (need > 16)) + { + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= 16; + add = (val >> bits) & ((1U << 16) - 1); + need -= 16; + add <<= need; + } + if (need > 0) + { + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= need; + add += (val >> bits) & ((1U << need) - 1); + } + + offset = offset_baseline + add; + + pt = &match_decode.table[match_state]; + need = pt->basebits; + match_baseline = pt->baseline; + match_bits = pt->bits; + match_base = pt->base; + + add = 0; + if (need > 0) + { + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= need; + add = (val >> bits) & ((1U << need) - 1); + } + + match = match_baseline + add; + + pt = &literal_decode.table[literal_state]; + need = pt->basebits; + literal_baseline = pt->baseline; + literal_bits = pt->bits; + literal_base = pt->base; + + add = 0; + if (need > 0) + { + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + bits -= need; + add = (val >> bits) & ((1U << need) - 1); + } + + literal = literal_baseline + add; + + /* See the comment in elf_zstd_make_offset_baseline_fse. */ + if (offset_basebits > 1) + { + repeated_offset3 = repeated_offset2; + repeated_offset2 = repeated_offset1; + repeated_offset1 = offset; + } + else + { + if (unlikely (literal == 0)) + ++offset; + switch (offset) + { + case 1: + offset = repeated_offset1; + break; + case 2: + offset = repeated_offset2; + repeated_offset2 = repeated_offset1; + repeated_offset1 = offset; + break; + case 3: + offset = repeated_offset3; + repeated_offset3 = repeated_offset2; + repeated_offset2 = repeated_offset1; + repeated_offset1 = offset; + break; + case 4: + offset = repeated_offset1 - 1; + repeated_offset3 = repeated_offset2; + repeated_offset2 = repeated_offset1; + repeated_offset1 = offset; + break; + } + } + + ++seq; + if (seq < seq_count) + { + uint32_t v; + + /* Update the three states. */ + + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + + need = literal_bits; + bits -= need; + v = (val >> bits) & (((uint32_t)1 << need) - 1); + + literal_state = literal_base + v; + + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + + need = match_bits; + bits -= need; + v = (val >> bits) & (((uint32_t)1 << need) - 1); + + match_state = match_base + v; + + if (!elf_fetch_bits_backward (&pback, pin, &val, &bits)) + return 0; + + need = offset_bits; + bits -= need; + v = (val >> bits) & (((uint32_t)1 << need) - 1); + + offset_state = offset_base + v; + } + + /* The next sequence is now in LITERAL, OFFSET, MATCH. */ + + /* Copy LITERAL bytes from the literals. */ + + if (unlikely ((size_t)(poutend - pout) < literal)) + { + elf_uncompress_failed (); + return 0; + } + + if (unlikely (literal_count < literal)) + { + elf_uncompress_failed (); + return 0; + } + + literal_count -= literal; + + /* Often LITERAL is small, so handle small cases quickly. */ + switch (literal) + { + case 8: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 7: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 6: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 5: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 4: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 3: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 2: + *pout++ = *plit++; + /* FALLTHROUGH */ + case 1: + *pout++ = *plit++; + break; + + case 0: + break; + + default: + if (unlikely ((size_t)(plit - pout) < literal)) + { + uint32_t move; + + move = plit - pout; + while (literal > move) + { + memcpy (pout, plit, move); + pout += move; + plit += move; + literal -= move; + } + } + + memcpy (pout, plit, literal); + pout += literal; + plit += literal; + } + + if (match > 0) + { + /* Copy MATCH bytes from the decoded output at OFFSET. */ + + if (unlikely ((size_t)(poutend - pout) < match)) + { + elf_uncompress_failed (); + return 0; + } + + if (unlikely ((size_t)(pout - poutstart) < offset)) + { + elf_uncompress_failed (); + return 0; + } + + if (offset >= match) + { + memcpy (pout, pout - offset, match); + pout += match; + } + else + { + while (match > 0) + { + uint32_t copy; + + copy = match < offset ? match : offset; + memcpy (pout, pout - offset, copy); + match -= copy; + pout += copy; + } + } + } + + if (unlikely (seq >= seq_count)) + { + /* Copy remaining literals. */ + if (literal_count > 0 && plit != pout) + { + if (unlikely ((size_t)(poutend - pout) + < literal_count)) + { + elf_uncompress_failed (); + return 0; + } + + if ((size_t)(plit - pout) < literal_count) + { + uint32_t move; + + move = plit - pout; + while (literal_count > move) + { + memcpy (pout, plit, move); + pout += move; + plit += move; + literal_count -= move; + } + } + + memcpy (pout, plit, literal_count); + } + + pout += literal_count; + + break; + } + } + + pin = pblockend; + } + break; + + case 3: + default: + elf_uncompress_failed (); + return 0; + } + } + + if (has_checksum) + { + if (unlikely (pin + 4 > pinend)) + { + elf_uncompress_failed (); + return 0; + } + + /* We don't currently verify the checksum. Currently running GNU ld with + --compress-debug-sections=zstd does not seem to generate a + checksum. */ + + pin += 4; + } + + if (pin != pinend) + { + elf_uncompress_failed (); + return 0; + } + + return 1; +} + +#define ZDEBUG_TABLE_SIZE \ + (ZLIB_TABLE_SIZE > ZSTD_TABLE_SIZE ? ZLIB_TABLE_SIZE : ZSTD_TABLE_SIZE) + +/* Uncompress the old compressed debug format, the one emitted by + --compress-debug-sections=zlib-gnu. The compressed data is in + COMPRESSED / COMPRESSED_SIZE, and the function writes to + *UNCOMPRESSED / *UNCOMPRESSED_SIZE. ZDEBUG_TABLE is work space to + hold Huffman tables. Returns 0 on error, 1 on successful + decompression or if something goes wrong. In general we try to + carry on, by returning 1, even if we can't decompress. */ + +static int +elf_uncompress_zdebug (struct backtrace_state *state, + const unsigned char *compressed, size_t compressed_size, + uint16_t *zdebug_table, + backtrace_error_callback error_callback, void *data, + unsigned char **uncompressed, size_t *uncompressed_size) +{ + size_t sz; + size_t i; + unsigned char *po; + + *uncompressed = NULL; + *uncompressed_size = 0; + + /* The format starts with the four bytes ZLIB, followed by the 8 + byte length of the uncompressed data in big-endian order, + followed by a zlib stream. */ + + if (compressed_size < 12 || memcmp (compressed, "ZLIB", 4) != 0) + return 1; + + sz = 0; + for (i = 0; i < 8; i++) + sz = (sz << 8) | compressed[i + 4]; + + if (*uncompressed != NULL && *uncompressed_size >= sz) + po = *uncompressed; + else + { + po = (unsigned char *) backtrace_alloc (state, sz, error_callback, data); + if (po == NULL) + return 0; + } + + if (!elf_zlib_inflate_and_verify (compressed + 12, compressed_size - 12, + zdebug_table, po, sz)) + return 1; + + *uncompressed = po; + *uncompressed_size = sz; + + return 1; +} + +/* Uncompress the new compressed debug format, the official standard + ELF approach emitted by --compress-debug-sections=zlib-gabi. The + compressed data is in COMPRESSED / COMPRESSED_SIZE, and the + function writes to *UNCOMPRESSED / *UNCOMPRESSED_SIZE. + ZDEBUG_TABLE is work space as for elf_uncompress_zdebug. Returns 0 + on error, 1 on successful decompression or if something goes wrong. + In general we try to carry on, by returning 1, even if we can't + decompress. */ + +static int +elf_uncompress_chdr (struct backtrace_state *state, + const unsigned char *compressed, size_t compressed_size, + uint16_t *zdebug_table, + backtrace_error_callback error_callback, void *data, + unsigned char **uncompressed, size_t *uncompressed_size) +{ + b_elf_chdr chdr; + char *alc; + size_t alc_len; + unsigned char *po; + + *uncompressed = NULL; + *uncompressed_size = 0; + + /* The format starts with an ELF compression header. */ + if (compressed_size < sizeof (b_elf_chdr)) + return 1; + + /* The lld linker can misalign a compressed section, so we can't safely read + the fields directly as we can for other ELF sections. See + https://github.com/ianlancetaylor/libbacktrace/pull/120. */ + memcpy (&chdr, compressed, sizeof (b_elf_chdr)); + + alc = NULL; + alc_len = 0; + if (*uncompressed != NULL && *uncompressed_size >= chdr.ch_size) + po = *uncompressed; + else + { + alc_len = chdr.ch_size; + alc = (char*)backtrace_alloc (state, alc_len, error_callback, data); + if (alc == NULL) + return 0; + po = (unsigned char *) alc; + } + + switch (chdr.ch_type) + { + case ELFCOMPRESS_ZLIB: + if (!elf_zlib_inflate_and_verify (compressed + sizeof (b_elf_chdr), + compressed_size - sizeof (b_elf_chdr), + zdebug_table, po, chdr.ch_size)) + goto skip; + break; + + case ELFCOMPRESS_ZSTD: + if (!elf_zstd_decompress (compressed + sizeof (b_elf_chdr), + compressed_size - sizeof (b_elf_chdr), + (unsigned char *)zdebug_table, po, + chdr.ch_size)) + goto skip; + break; + + default: + /* Unsupported compression algorithm. */ + goto skip; + } + + *uncompressed = po; + *uncompressed_size = chdr.ch_size; + + return 1; + + skip: + if (alc != NULL && alc_len > 0) + backtrace_free (state, alc, alc_len, error_callback, data); + return 1; +} + +/* This function is a hook for testing the zlib support. It is only + used by tests. */ + +int +backtrace_uncompress_zdebug (struct backtrace_state *state, + const unsigned char *compressed, + size_t compressed_size, + backtrace_error_callback error_callback, + void *data, unsigned char **uncompressed, + size_t *uncompressed_size) +{ + uint16_t *zdebug_table; + int ret; + + zdebug_table = ((uint16_t *) backtrace_alloc (state, ZDEBUG_TABLE_SIZE, + error_callback, data)); + if (zdebug_table == NULL) + return 0; + ret = elf_uncompress_zdebug (state, compressed, compressed_size, + zdebug_table, error_callback, data, + uncompressed, uncompressed_size); + backtrace_free (state, zdebug_table, ZDEBUG_TABLE_SIZE, + error_callback, data); + return ret; +} + +/* This function is a hook for testing the zstd support. It is only used by + tests. */ + +int +backtrace_uncompress_zstd (struct backtrace_state *state, + const unsigned char *compressed, + size_t compressed_size, + backtrace_error_callback error_callback, + void *data, unsigned char *uncompressed, + size_t uncompressed_size) +{ + unsigned char *zdebug_table; + int ret; + + zdebug_table = ((unsigned char *) backtrace_alloc (state, ZDEBUG_TABLE_SIZE, + error_callback, data)); + if (zdebug_table == NULL) + return 0; + ret = elf_zstd_decompress (compressed, compressed_size, + zdebug_table, uncompressed, uncompressed_size); + backtrace_free (state, zdebug_table, ZDEBUG_TABLE_SIZE, + error_callback, data); + return ret; +} + +/* Number of LZMA states. */ +#define LZMA_STATES (12) + +/* Number of LZMA position states. The pb value of the property byte + is the number of bits to include in these states, and the maximum + value of pb is 4. */ +#define LZMA_POS_STATES (16) + +/* Number of LZMA distance states. These are used match distances + with a short match length: up to 4 bytes. */ +#define LZMA_DIST_STATES (4) + +/* Number of LZMA distance slots. LZMA uses six bits to encode larger + match lengths, so 1 << 6 possible probabilities. */ +#define LZMA_DIST_SLOTS (64) + +/* LZMA distances 0 to 3 are encoded directly, larger values use a + probability model. */ +#define LZMA_DIST_MODEL_START (4) + +/* The LZMA probability model ends at 14. */ +#define LZMA_DIST_MODEL_END (14) + +/* LZMA distance slots for distances less than 127. */ +#define LZMA_FULL_DISTANCES (128) + +/* LZMA uses four alignment bits. */ +#define LZMA_ALIGN_SIZE (16) + +/* LZMA match length is encoded with 4, 5, or 10 bits, some of which + are already known. */ +#define LZMA_LEN_LOW_SYMBOLS (8) +#define LZMA_LEN_MID_SYMBOLS (8) +#define LZMA_LEN_HIGH_SYMBOLS (256) + +/* LZMA literal encoding. */ +#define LZMA_LITERAL_CODERS_MAX (16) +#define LZMA_LITERAL_CODER_SIZE (0x300) + +/* LZMA is based on a large set of probabilities, each managed + independently. Each probability is an 11 bit number that we store + in a uint16_t. We use a single large array of probabilities. */ + +/* Lengths of entries in the LZMA probabilities array. The names used + here are copied from the Linux kernel implementation. */ + +#define LZMA_PROB_IS_MATCH_LEN (LZMA_STATES * LZMA_POS_STATES) +#define LZMA_PROB_IS_REP_LEN LZMA_STATES +#define LZMA_PROB_IS_REP0_LEN LZMA_STATES +#define LZMA_PROB_IS_REP1_LEN LZMA_STATES +#define LZMA_PROB_IS_REP2_LEN LZMA_STATES +#define LZMA_PROB_IS_REP0_LONG_LEN (LZMA_STATES * LZMA_POS_STATES) +#define LZMA_PROB_DIST_SLOT_LEN (LZMA_DIST_STATES * LZMA_DIST_SLOTS) +#define LZMA_PROB_DIST_SPECIAL_LEN (LZMA_FULL_DISTANCES - LZMA_DIST_MODEL_END) +#define LZMA_PROB_DIST_ALIGN_LEN LZMA_ALIGN_SIZE +#define LZMA_PROB_MATCH_LEN_CHOICE_LEN 1 +#define LZMA_PROB_MATCH_LEN_CHOICE2_LEN 1 +#define LZMA_PROB_MATCH_LEN_LOW_LEN (LZMA_POS_STATES * LZMA_LEN_LOW_SYMBOLS) +#define LZMA_PROB_MATCH_LEN_MID_LEN (LZMA_POS_STATES * LZMA_LEN_MID_SYMBOLS) +#define LZMA_PROB_MATCH_LEN_HIGH_LEN LZMA_LEN_HIGH_SYMBOLS +#define LZMA_PROB_REP_LEN_CHOICE_LEN 1 +#define LZMA_PROB_REP_LEN_CHOICE2_LEN 1 +#define LZMA_PROB_REP_LEN_LOW_LEN (LZMA_POS_STATES * LZMA_LEN_LOW_SYMBOLS) +#define LZMA_PROB_REP_LEN_MID_LEN (LZMA_POS_STATES * LZMA_LEN_MID_SYMBOLS) +#define LZMA_PROB_REP_LEN_HIGH_LEN LZMA_LEN_HIGH_SYMBOLS +#define LZMA_PROB_LITERAL_LEN \ + (LZMA_LITERAL_CODERS_MAX * LZMA_LITERAL_CODER_SIZE) + +/* Offsets into the LZMA probabilities array. This is mechanically + generated from the above lengths. */ + +#define LZMA_PROB_IS_MATCH_OFFSET 0 +#define LZMA_PROB_IS_REP_OFFSET \ + (LZMA_PROB_IS_MATCH_OFFSET + LZMA_PROB_IS_MATCH_LEN) +#define LZMA_PROB_IS_REP0_OFFSET \ + (LZMA_PROB_IS_REP_OFFSET + LZMA_PROB_IS_REP_LEN) +#define LZMA_PROB_IS_REP1_OFFSET \ + (LZMA_PROB_IS_REP0_OFFSET + LZMA_PROB_IS_REP0_LEN) +#define LZMA_PROB_IS_REP2_OFFSET \ + (LZMA_PROB_IS_REP1_OFFSET + LZMA_PROB_IS_REP1_LEN) +#define LZMA_PROB_IS_REP0_LONG_OFFSET \ + (LZMA_PROB_IS_REP2_OFFSET + LZMA_PROB_IS_REP2_LEN) +#define LZMA_PROB_DIST_SLOT_OFFSET \ + (LZMA_PROB_IS_REP0_LONG_OFFSET + LZMA_PROB_IS_REP0_LONG_LEN) +#define LZMA_PROB_DIST_SPECIAL_OFFSET \ + (LZMA_PROB_DIST_SLOT_OFFSET + LZMA_PROB_DIST_SLOT_LEN) +#define LZMA_PROB_DIST_ALIGN_OFFSET \ + (LZMA_PROB_DIST_SPECIAL_OFFSET + LZMA_PROB_DIST_SPECIAL_LEN) +#define LZMA_PROB_MATCH_LEN_CHOICE_OFFSET \ + (LZMA_PROB_DIST_ALIGN_OFFSET + LZMA_PROB_DIST_ALIGN_LEN) +#define LZMA_PROB_MATCH_LEN_CHOICE2_OFFSET \ + (LZMA_PROB_MATCH_LEN_CHOICE_OFFSET + LZMA_PROB_MATCH_LEN_CHOICE_LEN) +#define LZMA_PROB_MATCH_LEN_LOW_OFFSET \ + (LZMA_PROB_MATCH_LEN_CHOICE2_OFFSET + LZMA_PROB_MATCH_LEN_CHOICE2_LEN) +#define LZMA_PROB_MATCH_LEN_MID_OFFSET \ + (LZMA_PROB_MATCH_LEN_LOW_OFFSET + LZMA_PROB_MATCH_LEN_LOW_LEN) +#define LZMA_PROB_MATCH_LEN_HIGH_OFFSET \ + (LZMA_PROB_MATCH_LEN_MID_OFFSET + LZMA_PROB_MATCH_LEN_MID_LEN) +#define LZMA_PROB_REP_LEN_CHOICE_OFFSET \ + (LZMA_PROB_MATCH_LEN_HIGH_OFFSET + LZMA_PROB_MATCH_LEN_HIGH_LEN) +#define LZMA_PROB_REP_LEN_CHOICE2_OFFSET \ + (LZMA_PROB_REP_LEN_CHOICE_OFFSET + LZMA_PROB_REP_LEN_CHOICE_LEN) +#define LZMA_PROB_REP_LEN_LOW_OFFSET \ + (LZMA_PROB_REP_LEN_CHOICE2_OFFSET + LZMA_PROB_REP_LEN_CHOICE2_LEN) +#define LZMA_PROB_REP_LEN_MID_OFFSET \ + (LZMA_PROB_REP_LEN_LOW_OFFSET + LZMA_PROB_REP_LEN_LOW_LEN) +#define LZMA_PROB_REP_LEN_HIGH_OFFSET \ + (LZMA_PROB_REP_LEN_MID_OFFSET + LZMA_PROB_REP_LEN_MID_LEN) +#define LZMA_PROB_LITERAL_OFFSET \ + (LZMA_PROB_REP_LEN_HIGH_OFFSET + LZMA_PROB_REP_LEN_HIGH_LEN) + +#define LZMA_PROB_TOTAL_COUNT \ + (LZMA_PROB_LITERAL_OFFSET + LZMA_PROB_LITERAL_LEN) + +/* Check that the number of LZMA probabilities is the same as the + Linux kernel implementation. */ + +#if LZMA_PROB_TOTAL_COUNT != 1846 + (1 << 4) * 0x300 + #error Wrong number of LZMA probabilities +#endif + +/* Expressions for the offset in the LZMA probabilities array of a + specific probability. */ + +#define LZMA_IS_MATCH(state, pos) \ + (LZMA_PROB_IS_MATCH_OFFSET + (state) * LZMA_POS_STATES + (pos)) +#define LZMA_IS_REP(state) \ + (LZMA_PROB_IS_REP_OFFSET + (state)) +#define LZMA_IS_REP0(state) \ + (LZMA_PROB_IS_REP0_OFFSET + (state)) +#define LZMA_IS_REP1(state) \ + (LZMA_PROB_IS_REP1_OFFSET + (state)) +#define LZMA_IS_REP2(state) \ + (LZMA_PROB_IS_REP2_OFFSET + (state)) +#define LZMA_IS_REP0_LONG(state, pos) \ + (LZMA_PROB_IS_REP0_LONG_OFFSET + (state) * LZMA_POS_STATES + (pos)) +#define LZMA_DIST_SLOT(dist, slot) \ + (LZMA_PROB_DIST_SLOT_OFFSET + (dist) * LZMA_DIST_SLOTS + (slot)) +#define LZMA_DIST_SPECIAL(dist) \ + (LZMA_PROB_DIST_SPECIAL_OFFSET + (dist)) +#define LZMA_DIST_ALIGN(dist) \ + (LZMA_PROB_DIST_ALIGN_OFFSET + (dist)) +#define LZMA_MATCH_LEN_CHOICE \ + LZMA_PROB_MATCH_LEN_CHOICE_OFFSET +#define LZMA_MATCH_LEN_CHOICE2 \ + LZMA_PROB_MATCH_LEN_CHOICE2_OFFSET +#define LZMA_MATCH_LEN_LOW(pos, sym) \ + (LZMA_PROB_MATCH_LEN_LOW_OFFSET + (pos) * LZMA_LEN_LOW_SYMBOLS + (sym)) +#define LZMA_MATCH_LEN_MID(pos, sym) \ + (LZMA_PROB_MATCH_LEN_MID_OFFSET + (pos) * LZMA_LEN_MID_SYMBOLS + (sym)) +#define LZMA_MATCH_LEN_HIGH(sym) \ + (LZMA_PROB_MATCH_LEN_HIGH_OFFSET + (sym)) +#define LZMA_REP_LEN_CHOICE \ + LZMA_PROB_REP_LEN_CHOICE_OFFSET +#define LZMA_REP_LEN_CHOICE2 \ + LZMA_PROB_REP_LEN_CHOICE2_OFFSET +#define LZMA_REP_LEN_LOW(pos, sym) \ + (LZMA_PROB_REP_LEN_LOW_OFFSET + (pos) * LZMA_LEN_LOW_SYMBOLS + (sym)) +#define LZMA_REP_LEN_MID(pos, sym) \ + (LZMA_PROB_REP_LEN_MID_OFFSET + (pos) * LZMA_LEN_MID_SYMBOLS + (sym)) +#define LZMA_REP_LEN_HIGH(sym) \ + (LZMA_PROB_REP_LEN_HIGH_OFFSET + (sym)) +#define LZMA_LITERAL(code, size) \ + (LZMA_PROB_LITERAL_OFFSET + (code) * LZMA_LITERAL_CODER_SIZE + (size)) + +/* Read an LZMA varint from BUF, reading and updating *POFFSET, + setting *VAL. Returns 0 on error, 1 on success. */ + +static int +elf_lzma_varint (const unsigned char *compressed, size_t compressed_size, + size_t *poffset, uint64_t *val) +{ + size_t off; + int i; + uint64_t v; + unsigned char b; + + off = *poffset; + i = 0; + v = 0; + while (1) + { + if (unlikely (off >= compressed_size)) + { + elf_uncompress_failed (); + return 0; + } + b = compressed[off]; + v |= (b & 0x7f) << (i * 7); + ++off; + if ((b & 0x80) == 0) + { + *poffset = off; + *val = v; + return 1; + } + ++i; + if (unlikely (i >= 9)) + { + elf_uncompress_failed (); + return 0; + } + } +} + +/* Normalize the LZMA range decoder, pulling in an extra input byte if + needed. */ + +static void +elf_lzma_range_normalize (const unsigned char *compressed, + size_t compressed_size, size_t *poffset, + uint32_t *prange, uint32_t *pcode) +{ + if (*prange < (1U << 24)) + { + if (unlikely (*poffset >= compressed_size)) + { + /* We assume this will be caught elsewhere. */ + elf_uncompress_failed (); + return; + } + *prange <<= 8; + *pcode <<= 8; + *pcode += compressed[*poffset]; + ++*poffset; + } +} + +/* Read and return a single bit from the LZMA stream, reading and + updating *PROB. Each bit comes from the range coder. */ + +static int +elf_lzma_bit (const unsigned char *compressed, size_t compressed_size, + uint16_t *prob, size_t *poffset, uint32_t *prange, + uint32_t *pcode) +{ + uint32_t bound; + + elf_lzma_range_normalize (compressed, compressed_size, poffset, + prange, pcode); + bound = (*prange >> 11) * (uint32_t) *prob; + if (*pcode < bound) + { + *prange = bound; + *prob += ((1U << 11) - *prob) >> 5; + return 0; + } + else + { + *prange -= bound; + *pcode -= bound; + *prob -= *prob >> 5; + return 1; + } +} + +/* Read an integer of size BITS from the LZMA stream, most significant + bit first. The bits are predicted using PROBS. */ + +static uint32_t +elf_lzma_integer (const unsigned char *compressed, size_t compressed_size, + uint16_t *probs, uint32_t bits, size_t *poffset, + uint32_t *prange, uint32_t *pcode) +{ + uint32_t sym; + uint32_t i; + + sym = 1; + for (i = 0; i < bits; i++) + { + int bit; + + bit = elf_lzma_bit (compressed, compressed_size, probs + sym, poffset, + prange, pcode); + sym <<= 1; + sym += bit; + } + return sym - (1 << bits); +} + +/* Read an integer of size BITS from the LZMA stream, least + significant bit first. The bits are predicted using PROBS. */ + +static uint32_t +elf_lzma_reverse_integer (const unsigned char *compressed, + size_t compressed_size, uint16_t *probs, + uint32_t bits, size_t *poffset, uint32_t *prange, + uint32_t *pcode) +{ + uint32_t sym; + uint32_t val; + uint32_t i; + + sym = 1; + val = 0; + for (i = 0; i < bits; i++) + { + int bit; + + bit = elf_lzma_bit (compressed, compressed_size, probs + sym, poffset, + prange, pcode); + sym <<= 1; + sym += bit; + val += bit << i; + } + return val; +} + +/* Read a length from the LZMA stream. IS_REP picks either LZMA_MATCH + or LZMA_REP probabilities. */ + +static uint32_t +elf_lzma_len (const unsigned char *compressed, size_t compressed_size, + uint16_t *probs, int is_rep, unsigned int pos_state, + size_t *poffset, uint32_t *prange, uint32_t *pcode) +{ + uint16_t *probs_choice; + uint16_t *probs_sym; + uint32_t bits; + uint32_t len; + + probs_choice = probs + (is_rep + ? LZMA_REP_LEN_CHOICE + : LZMA_MATCH_LEN_CHOICE); + if (elf_lzma_bit (compressed, compressed_size, probs_choice, poffset, + prange, pcode)) + { + probs_choice = probs + (is_rep + ? LZMA_REP_LEN_CHOICE2 + : LZMA_MATCH_LEN_CHOICE2); + if (elf_lzma_bit (compressed, compressed_size, probs_choice, + poffset, prange, pcode)) + { + probs_sym = probs + (is_rep + ? LZMA_REP_LEN_HIGH (0) + : LZMA_MATCH_LEN_HIGH (0)); + bits = 8; + len = 2 + 8 + 8; + } + else + { + probs_sym = probs + (is_rep + ? LZMA_REP_LEN_MID (pos_state, 0) + : LZMA_MATCH_LEN_MID (pos_state, 0)); + bits = 3; + len = 2 + 8; + } + } + else + { + probs_sym = probs + (is_rep + ? LZMA_REP_LEN_LOW (pos_state, 0) + : LZMA_MATCH_LEN_LOW (pos_state, 0)); + bits = 3; + len = 2; + } + + len += elf_lzma_integer (compressed, compressed_size, probs_sym, bits, + poffset, prange, pcode); + return len; +} + +/* Uncompress one LZMA block from a minidebug file. The compressed + data is at COMPRESSED + *POFFSET. Update *POFFSET. Store the data + into the memory at UNCOMPRESSED, size UNCOMPRESSED_SIZE. CHECK is + the stream flag from the xz header. Return 1 on successful + decompression. */ + +static int +elf_uncompress_lzma_block (const unsigned char *compressed, + size_t compressed_size, unsigned char check, + uint16_t *probs, unsigned char *uncompressed, + size_t uncompressed_size, size_t *poffset) +{ + size_t off; + size_t block_header_offset; + size_t block_header_size; + unsigned char block_flags; + uint64_t header_compressed_size; + uint64_t header_uncompressed_size; + unsigned char lzma2_properties; + size_t crc_offset; + uint32_t computed_crc; + uint32_t stream_crc; + size_t uncompressed_offset; + size_t dict_start_offset; + unsigned int lc; + unsigned int lp; + unsigned int pb; + uint32_t range; + uint32_t code; + uint32_t lstate; + uint32_t dist[4]; + + off = *poffset; + block_header_offset = off; + + /* Block header size is a single byte. */ + if (unlikely (off >= compressed_size)) + { + elf_uncompress_failed (); + return 0; + } + block_header_size = (compressed[off] + 1) * 4; + if (unlikely (off + block_header_size > compressed_size)) + { + elf_uncompress_failed (); + return 0; + } + + /* Block flags. */ + block_flags = compressed[off + 1]; + if (unlikely ((block_flags & 0x3c) != 0)) + { + elf_uncompress_failed (); + return 0; + } + + off += 2; + + /* Optional compressed size. */ + header_compressed_size = 0; + if ((block_flags & 0x40) != 0) + { + *poffset = off; + if (!elf_lzma_varint (compressed, compressed_size, poffset, + &header_compressed_size)) + return 0; + off = *poffset; + } + + /* Optional uncompressed size. */ + header_uncompressed_size = 0; + if ((block_flags & 0x80) != 0) + { + *poffset = off; + if (!elf_lzma_varint (compressed, compressed_size, poffset, + &header_uncompressed_size)) + return 0; + off = *poffset; + } + + /* The recipe for creating a minidebug file is to run the xz program + with no arguments, so we expect exactly one filter: lzma2. */ + + if (unlikely ((block_flags & 0x3) != 0)) + { + elf_uncompress_failed (); + return 0; + } + + if (unlikely (off + 2 >= block_header_offset + block_header_size)) + { + elf_uncompress_failed (); + return 0; + } + + /* The filter ID for LZMA2 is 0x21. */ + if (unlikely (compressed[off] != 0x21)) + { + elf_uncompress_failed (); + return 0; + } + ++off; + + /* The size of the filter properties for LZMA2 is 1. */ + if (unlikely (compressed[off] != 1)) + { + elf_uncompress_failed (); + return 0; + } + ++off; + + lzma2_properties = compressed[off]; + ++off; + + if (unlikely (lzma2_properties > 40)) + { + elf_uncompress_failed (); + return 0; + } + + /* The properties describe the dictionary size, but we don't care + what that is. */ + + /* Skip to just before CRC, verifying zero bytes in between. */ + crc_offset = block_header_offset + block_header_size - 4; + if (unlikely (crc_offset + 4 > compressed_size)) + { + elf_uncompress_failed (); + return 0; + } + for (; off < crc_offset; off++) + { + if (compressed[off] != 0) + { + elf_uncompress_failed (); + return 0; + } + } + + /* Block header CRC. */ + computed_crc = elf_crc32 (0, compressed + block_header_offset, + block_header_size - 4); + stream_crc = ((uint32_t)compressed[off] + | ((uint32_t)compressed[off + 1] << 8) + | ((uint32_t)compressed[off + 2] << 16) + | ((uint32_t)compressed[off + 3] << 24)); + if (unlikely (computed_crc != stream_crc)) + { + elf_uncompress_failed (); + return 0; + } + off += 4; + + /* Read a sequence of LZMA2 packets. */ + + uncompressed_offset = 0; + dict_start_offset = 0; + lc = 0; + lp = 0; + pb = 0; + lstate = 0; + while (off < compressed_size) + { + unsigned char control; + + range = 0xffffffff; + code = 0; + + control = compressed[off]; + ++off; + if (unlikely (control == 0)) + { + /* End of packets. */ + break; + } + + if (control == 1 || control >= 0xe0) + { + /* Reset dictionary to empty. */ + dict_start_offset = uncompressed_offset; + } + + if (control < 0x80) + { + size_t chunk_size; + + /* The only valid values here are 1 or 2. A 1 means to + reset the dictionary (done above). Then we see an + uncompressed chunk. */ + + if (unlikely (control > 2)) + { + elf_uncompress_failed (); + return 0; + } + + /* An uncompressed chunk is a two byte size followed by + data. */ + + if (unlikely (off + 2 > compressed_size)) + { + elf_uncompress_failed (); + return 0; + } + + chunk_size = compressed[off] << 8; + chunk_size += compressed[off + 1]; + ++chunk_size; + + off += 2; + + if (unlikely (off + chunk_size > compressed_size)) + { + elf_uncompress_failed (); + return 0; + } + if (unlikely (uncompressed_offset + chunk_size > uncompressed_size)) + { + elf_uncompress_failed (); + return 0; + } + + memcpy (uncompressed + uncompressed_offset, compressed + off, + chunk_size); + uncompressed_offset += chunk_size; + off += chunk_size; + } + else + { + size_t uncompressed_chunk_start; + size_t uncompressed_chunk_size; + size_t compressed_chunk_size; + size_t limit; + + /* An LZMA chunk. This starts with an uncompressed size and + a compressed size. */ + + if (unlikely (off + 4 >= compressed_size)) + { + elf_uncompress_failed (); + return 0; + } + + uncompressed_chunk_start = uncompressed_offset; + + uncompressed_chunk_size = (control & 0x1f) << 16; + uncompressed_chunk_size += compressed[off] << 8; + uncompressed_chunk_size += compressed[off + 1]; + ++uncompressed_chunk_size; + + compressed_chunk_size = compressed[off + 2] << 8; + compressed_chunk_size += compressed[off + 3]; + ++compressed_chunk_size; + + off += 4; + + /* Bit 7 (0x80) is set. + Bits 6 and 5 (0x40 and 0x20) are as follows: + 0: don't reset anything + 1: reset state + 2: reset state, read properties + 3: reset state, read properties, reset dictionary (done above) */ + + if (control >= 0xc0) + { + unsigned char props; + + /* Bit 6 is set, read properties. */ + + if (unlikely (off >= compressed_size)) + { + elf_uncompress_failed (); + return 0; + } + props = compressed[off]; + ++off; + if (unlikely (props > (4 * 5 + 4) * 9 + 8)) + { + elf_uncompress_failed (); + return 0; + } + pb = 0; + while (props >= 9 * 5) + { + props -= 9 * 5; + ++pb; + } + lp = 0; + while (props > 9) + { + props -= 9; + ++lp; + } + lc = props; + if (unlikely (lc + lp > 4)) + { + elf_uncompress_failed (); + return 0; + } + } + + if (control >= 0xa0) + { + size_t i; + + /* Bit 5 or 6 is set, reset LZMA state. */ + + lstate = 0; + memset (&dist, 0, sizeof dist); + for (i = 0; i < LZMA_PROB_TOTAL_COUNT; i++) + probs[i] = 1 << 10; + range = 0xffffffff; + code = 0; + } + + /* Read the range code. */ + + if (unlikely (off + 5 > compressed_size)) + { + elf_uncompress_failed (); + return 0; + } + + /* The byte at compressed[off] is ignored for some + reason. */ + + code = ((compressed[off + 1] << 24) + + (compressed[off + 2] << 16) + + (compressed[off + 3] << 8) + + compressed[off + 4]); + off += 5; + + /* This is the main LZMA decode loop. */ + + limit = off + compressed_chunk_size; + *poffset = off; + while (*poffset < limit) + { + unsigned int pos_state; + + if (unlikely (uncompressed_offset + == (uncompressed_chunk_start + + uncompressed_chunk_size))) + { + /* We've decompressed all the expected bytes. */ + break; + } + + pos_state = ((uncompressed_offset - dict_start_offset) + & ((1 << pb) - 1)); + + if (elf_lzma_bit (compressed, compressed_size, + probs + LZMA_IS_MATCH (lstate, pos_state), + poffset, &range, &code)) + { + uint32_t len; + + if (elf_lzma_bit (compressed, compressed_size, + probs + LZMA_IS_REP (lstate), + poffset, &range, &code)) + { + int short_rep; + uint32_t next_dist; + + /* Repeated match. */ + + short_rep = 0; + if (elf_lzma_bit (compressed, compressed_size, + probs + LZMA_IS_REP0 (lstate), + poffset, &range, &code)) + { + if (elf_lzma_bit (compressed, compressed_size, + probs + LZMA_IS_REP1 (lstate), + poffset, &range, &code)) + { + if (elf_lzma_bit (compressed, compressed_size, + probs + LZMA_IS_REP2 (lstate), + poffset, &range, &code)) + { + next_dist = dist[3]; + dist[3] = dist[2]; + } + else + { + next_dist = dist[2]; + } + dist[2] = dist[1]; + } + else + { + next_dist = dist[1]; + } + + dist[1] = dist[0]; + dist[0] = next_dist; + } + else + { + if (!elf_lzma_bit (compressed, compressed_size, + (probs + + LZMA_IS_REP0_LONG (lstate, + pos_state)), + poffset, &range, &code)) + short_rep = 1; + } + + if (lstate < 7) + lstate = short_rep ? 9 : 8; + else + lstate = 11; + + if (short_rep) + len = 1; + else + len = elf_lzma_len (compressed, compressed_size, + probs, 1, pos_state, poffset, + &range, &code); + } + else + { + uint32_t dist_state; + uint32_t dist_slot; + uint16_t *probs_dist; + + /* Match. */ + + if (lstate < 7) + lstate = 7; + else + lstate = 10; + dist[3] = dist[2]; + dist[2] = dist[1]; + dist[1] = dist[0]; + len = elf_lzma_len (compressed, compressed_size, + probs, 0, pos_state, poffset, + &range, &code); + + if (len < 4 + 2) + dist_state = len - 2; + else + dist_state = 3; + probs_dist = probs + LZMA_DIST_SLOT (dist_state, 0); + dist_slot = elf_lzma_integer (compressed, + compressed_size, + probs_dist, 6, + poffset, &range, + &code); + if (dist_slot < LZMA_DIST_MODEL_START) + dist[0] = dist_slot; + else + { + uint32_t limit; + + limit = (dist_slot >> 1) - 1; + dist[0] = 2 + (dist_slot & 1); + if (dist_slot < LZMA_DIST_MODEL_END) + { + dist[0] <<= limit; + probs_dist = (probs + + LZMA_DIST_SPECIAL(dist[0] + - dist_slot + - 1)); + dist[0] += + elf_lzma_reverse_integer (compressed, + compressed_size, + probs_dist, + limit, poffset, + &range, &code); + } + else + { + uint32_t dist0; + uint32_t i; + + dist0 = dist[0]; + for (i = 0; i < limit - 4; i++) + { + uint32_t mask; + + elf_lzma_range_normalize (compressed, + compressed_size, + poffset, + &range, &code); + range >>= 1; + code -= range; + mask = -(code >> 31); + code += range & mask; + dist0 <<= 1; + dist0 += mask + 1; + } + dist0 <<= 4; + probs_dist = probs + LZMA_DIST_ALIGN (0); + dist0 += + elf_lzma_reverse_integer (compressed, + compressed_size, + probs_dist, 4, + poffset, + &range, &code); + dist[0] = dist0; + } + } + } + + if (unlikely (uncompressed_offset + - dict_start_offset < dist[0] + 1)) + { + elf_uncompress_failed (); + return 0; + } + if (unlikely (uncompressed_offset + len > uncompressed_size)) + { + elf_uncompress_failed (); + return 0; + } + + if (dist[0] == 0) + { + /* A common case, meaning repeat the last + character LEN times. */ + memset (uncompressed + uncompressed_offset, + uncompressed[uncompressed_offset - 1], + len); + uncompressed_offset += len; + } + else if (dist[0] + 1 >= len) + { + memcpy (uncompressed + uncompressed_offset, + uncompressed + uncompressed_offset - dist[0] - 1, + len); + uncompressed_offset += len; + } + else + { + while (len > 0) + { + uint32_t copy; + + copy = len < dist[0] + 1 ? len : dist[0] + 1; + memcpy (uncompressed + uncompressed_offset, + (uncompressed + uncompressed_offset + - dist[0] - 1), + copy); + len -= copy; + uncompressed_offset += copy; + } + } + } + else + { + unsigned char prev; + unsigned char low; + size_t high; + uint16_t *lit_probs; + unsigned int sym; + + /* Literal value. */ + + if (uncompressed_offset > 0) + prev = uncompressed[uncompressed_offset - 1]; + else + prev = 0; + low = prev >> (8 - lc); + high = (((uncompressed_offset - dict_start_offset) + & ((1 << lp) - 1)) + << lc); + lit_probs = probs + LZMA_LITERAL (low + high, 0); + if (lstate < 7) + sym = elf_lzma_integer (compressed, compressed_size, + lit_probs, 8, poffset, &range, + &code); + else + { + unsigned int match; + unsigned int bit; + unsigned int match_bit; + unsigned int idx; + + sym = 1; + if (uncompressed_offset >= dist[0] + 1) + match = uncompressed[uncompressed_offset - dist[0] - 1]; + else + match = 0; + match <<= 1; + bit = 0x100; + do + { + match_bit = match & bit; + match <<= 1; + idx = bit + match_bit + sym; + sym <<= 1; + if (elf_lzma_bit (compressed, compressed_size, + lit_probs + idx, poffset, + &range, &code)) + { + ++sym; + bit &= match_bit; + } + else + { + bit &= ~ match_bit; + } + } + while (sym < 0x100); + } + + if (unlikely (uncompressed_offset >= uncompressed_size)) + { + elf_uncompress_failed (); + return 0; + } + + uncompressed[uncompressed_offset] = (unsigned char) sym; + ++uncompressed_offset; + if (lstate <= 3) + lstate = 0; + else if (lstate <= 9) + lstate -= 3; + else + lstate -= 6; + } + } + + elf_lzma_range_normalize (compressed, compressed_size, poffset, + &range, &code); + + off = *poffset; + } + } + + /* We have reached the end of the block. Pad to four byte + boundary. */ + off = (off + 3) &~ (size_t) 3; + if (unlikely (off > compressed_size)) + { + elf_uncompress_failed (); + return 0; + } + + switch (check) + { + case 0: + /* No check. */ + break; + + case 1: + /* CRC32 */ + if (unlikely (off + 4 > compressed_size)) + { + elf_uncompress_failed (); + return 0; + } + computed_crc = elf_crc32 (0, uncompressed, uncompressed_offset); + stream_crc = ((uint32_t)compressed[off] + | ((uint32_t)compressed[off + 1] << 8) + | ((uint32_t)compressed[off + 2] << 16) + | ((uint32_t)compressed[off + 3] << 24)); + if (computed_crc != stream_crc) + { + elf_uncompress_failed (); + return 0; + } + off += 4; + break; + + case 4: + /* CRC64. We don't bother computing a CRC64 checksum. */ + if (unlikely (off + 8 > compressed_size)) + { + elf_uncompress_failed (); + return 0; + } + off += 8; + break; + + case 10: + /* SHA. We don't bother computing a SHA checksum. */ + if (unlikely (off + 32 > compressed_size)) + { + elf_uncompress_failed (); + return 0; + } + off += 32; + break; + + default: + elf_uncompress_failed (); + return 0; + } + + *poffset = off; + + return 1; +} + +/* Uncompress LZMA data found in a minidebug file. The minidebug + format is described at + https://sourceware.org/gdb/current/onlinedocs/gdb/MiniDebugInfo.html. + Returns 0 on error, 1 on successful decompression. For this + function we return 0 on failure to decompress, as the calling code + will carry on in that case. */ + +static int +elf_uncompress_lzma (struct backtrace_state *state, + const unsigned char *compressed, size_t compressed_size, + backtrace_error_callback error_callback, void *data, + unsigned char **uncompressed, size_t *uncompressed_size) +{ + size_t header_size; + size_t footer_size; + unsigned char check; + uint32_t computed_crc; + uint32_t stream_crc; + size_t offset; + size_t index_size; + size_t footer_offset; + size_t index_offset; + uint64_t index_compressed_size; + uint64_t index_uncompressed_size; + unsigned char *mem; + uint16_t *probs; + size_t compressed_block_size; + + /* The format starts with a stream header and ends with a stream + footer. */ + header_size = 12; + footer_size = 12; + if (unlikely (compressed_size < header_size + footer_size)) + { + elf_uncompress_failed (); + return 0; + } + + /* The stream header starts with a magic string. */ + if (unlikely (memcmp (compressed, "\375" "7zXZ\0", 6) != 0)) + { + elf_uncompress_failed (); + return 0; + } + + /* Next come stream flags. The first byte is zero, the second byte + is the check. */ + if (unlikely (compressed[6] != 0)) + { + elf_uncompress_failed (); + return 0; + } + check = compressed[7]; + if (unlikely ((check & 0xf8) != 0)) + { + elf_uncompress_failed (); + return 0; + } + + /* Next comes a CRC of the stream flags. */ + computed_crc = elf_crc32 (0, compressed + 6, 2); + stream_crc = ((uint32_t)compressed[8] + | ((uint32_t)compressed[9] << 8) + | ((uint32_t)compressed[10] << 16) + | ((uint32_t)compressed[11] << 24)); + if (unlikely (computed_crc != stream_crc)) + { + elf_uncompress_failed (); + return 0; + } + + /* Now that we've parsed the header, parse the footer, so that we + can get the uncompressed size. */ + + /* The footer ends with two magic bytes. */ + + offset = compressed_size; + if (unlikely (memcmp (compressed + offset - 2, "YZ", 2) != 0)) + { + elf_uncompress_failed (); + return 0; + } + offset -= 2; + + /* Before that are the stream flags, which should be the same as the + flags in the header. */ + if (unlikely (compressed[offset - 2] != 0 + || compressed[offset - 1] != check)) + { + elf_uncompress_failed (); + return 0; + } + offset -= 2; + + /* Before that is the size of the index field, which precedes the + footer. */ + index_size = (compressed[offset - 4] + | (compressed[offset - 3] << 8) + | (compressed[offset - 2] << 16) + | (compressed[offset - 1] << 24)); + index_size = (index_size + 1) * 4; + offset -= 4; + + /* Before that is a footer CRC. */ + computed_crc = elf_crc32 (0, compressed + offset, 6); + stream_crc = ((uint32_t)compressed[offset - 4] + | ((uint32_t)compressed[offset - 3] << 8) + | ((uint32_t)compressed[offset - 2] << 16) + | ((uint32_t)compressed[offset - 1] << 24)); + if (unlikely (computed_crc != stream_crc)) + { + elf_uncompress_failed (); + return 0; + } + offset -= 4; + + /* The index comes just before the footer. */ + if (unlikely (offset < index_size + header_size)) + { + elf_uncompress_failed (); + return 0; + } + + footer_offset = offset; + offset -= index_size; + index_offset = offset; + + /* The index starts with a zero byte. */ + if (unlikely (compressed[offset] != 0)) + { + elf_uncompress_failed (); + return 0; + } + ++offset; + + /* Next is the number of blocks. We expect zero blocks for an empty + stream, and otherwise a single block. */ + if (unlikely (compressed[offset] == 0)) + { + *uncompressed = NULL; + *uncompressed_size = 0; + return 1; + } + if (unlikely (compressed[offset] != 1)) + { + elf_uncompress_failed (); + return 0; + } + ++offset; + + /* Next is the compressed size and the uncompressed size. */ + if (!elf_lzma_varint (compressed, compressed_size, &offset, + &index_compressed_size)) + return 0; + if (!elf_lzma_varint (compressed, compressed_size, &offset, + &index_uncompressed_size)) + return 0; + + /* Pad to a four byte boundary. */ + offset = (offset + 3) &~ (size_t) 3; + + /* Next is a CRC of the index. */ + computed_crc = elf_crc32 (0, compressed + index_offset, + offset - index_offset); + stream_crc = ((uint32_t)compressed[offset] + | ((uint32_t)compressed[offset + 1] << 8) + | ((uint32_t)compressed[offset + 2] << 16) + | ((uint32_t)compressed[offset + 3] << 24)); + if (unlikely (computed_crc != stream_crc)) + { + elf_uncompress_failed (); + return 0; + } + offset += 4; + + /* We should now be back at the footer. */ + if (unlikely (offset != footer_offset)) + { + elf_uncompress_failed (); + return 0; + } + + /* Allocate space to hold the uncompressed data. If we succeed in + uncompressing the LZMA data, we never free this memory. */ + mem = (unsigned char *) backtrace_alloc (state, index_uncompressed_size, + error_callback, data); + if (unlikely (mem == NULL)) + return 0; + *uncompressed = mem; + *uncompressed_size = index_uncompressed_size; + + /* Allocate space for probabilities. */ + probs = ((uint16_t *) + backtrace_alloc (state, + LZMA_PROB_TOTAL_COUNT * sizeof (uint16_t), + error_callback, data)); + if (unlikely (probs == NULL)) + { + backtrace_free (state, mem, index_uncompressed_size, error_callback, + data); + return 0; + } + + /* Uncompress the block, which follows the header. */ + offset = 12; + if (!elf_uncompress_lzma_block (compressed, compressed_size, check, probs, + mem, index_uncompressed_size, &offset)) + { + backtrace_free (state, mem, index_uncompressed_size, error_callback, + data); + return 0; + } + + compressed_block_size = offset - 12; + if (unlikely (compressed_block_size + != ((index_compressed_size + 3) &~ (size_t) 3))) + { + elf_uncompress_failed (); + backtrace_free (state, mem, index_uncompressed_size, error_callback, + data); + return 0; + } + + offset = (offset + 3) &~ (size_t) 3; + if (unlikely (offset != index_offset)) + { + elf_uncompress_failed (); + backtrace_free (state, mem, index_uncompressed_size, error_callback, + data); + return 0; + } + + return 1; +} + +/* This function is a hook for testing the LZMA support. It is only + used by tests. */ + +int +backtrace_uncompress_lzma (struct backtrace_state *state, + const unsigned char *compressed, + size_t compressed_size, + backtrace_error_callback error_callback, + void *data, unsigned char **uncompressed, + size_t *uncompressed_size) +{ + return elf_uncompress_lzma (state, compressed, compressed_size, + error_callback, data, uncompressed, + uncompressed_size); +} + +/* Add the backtrace data for one ELF file. Returns 1 on success, + 0 on failure (in both cases descriptor is closed) or -1 if exe + is non-zero and the ELF file is ET_DYN, which tells the caller that + elf_add will need to be called on the descriptor again after + base_address is determined. */ + +static int +elf_add (struct backtrace_state *state, const char *filename, int descriptor, + const unsigned char *memory, size_t memory_size, + struct libbacktrace_base_address base_address, + struct elf_ppc64_opd_data *caller_opd, + backtrace_error_callback error_callback, void *data, + fileline *fileline_fn, int *found_sym, int *found_dwarf, + struct dwarf_data **fileline_entry, int exe, int debuginfo, + const char *with_buildid_data, uint32_t with_buildid_size) +{ + struct elf_view ehdr_view; + b_elf_ehdr ehdr; + off_t shoff; + unsigned int shnum; + unsigned int shstrndx; + struct elf_view shdrs_view; + int shdrs_view_valid; + const b_elf_shdr *shdrs; + const b_elf_shdr *shstrhdr; + size_t shstr_size; + off_t shstr_off; + struct elf_view names_view; + int names_view_valid; + const char *names; + unsigned int symtab_shndx; + unsigned int dynsym_shndx; + unsigned int i; + struct debug_section_info sections[DEBUG_MAX]; + struct debug_section_info zsections[DEBUG_MAX]; + struct elf_view symtab_view; + int symtab_view_valid; + struct elf_view strtab_view; + int strtab_view_valid; + struct elf_view buildid_view; + int buildid_view_valid; + const char *buildid_data; + uint32_t buildid_size; + struct elf_view debuglink_view; + int debuglink_view_valid; + const char *debuglink_name; + uint32_t debuglink_crc; + struct elf_view debugaltlink_view; + int debugaltlink_view_valid; + const char *debugaltlink_name; + const char *debugaltlink_buildid_data; + uint32_t debugaltlink_buildid_size; + struct elf_view gnu_debugdata_view; + int gnu_debugdata_view_valid; + size_t gnu_debugdata_size; + unsigned char *gnu_debugdata_uncompressed; + size_t gnu_debugdata_uncompressed_size; + off_t min_offset; + off_t max_offset; + off_t debug_size; + struct elf_view debug_view; + int debug_view_valid; + unsigned int using_debug_view; + uint16_t *zdebug_table; + struct elf_view split_debug_view[DEBUG_MAX]; + unsigned char split_debug_view_valid[DEBUG_MAX]; + struct elf_ppc64_opd_data opd_data, *opd; + int opd_view_valid; + struct dwarf_sections dwarf_sections; + struct dwarf_data *fileline_altlink = NULL; + + if (!debuginfo) + { + *found_sym = 0; + *found_dwarf = 0; + } + + shdrs_view_valid = 0; + names_view_valid = 0; + symtab_view_valid = 0; + strtab_view_valid = 0; + buildid_view_valid = 0; + buildid_data = NULL; + buildid_size = 0; + debuglink_view_valid = 0; + debuglink_name = NULL; + debuglink_crc = 0; + debugaltlink_view_valid = 0; + debugaltlink_name = NULL; + debugaltlink_buildid_data = NULL; + debugaltlink_buildid_size = 0; + gnu_debugdata_view_valid = 0; + gnu_debugdata_size = 0; + debug_view_valid = 0; + memset (&split_debug_view_valid[0], 0, sizeof split_debug_view_valid); + opd = NULL; + opd_view_valid = 0; + + if (!elf_get_view (state, descriptor, memory, memory_size, 0, sizeof ehdr, + error_callback, data, &ehdr_view)) + goto fail; + + memcpy (&ehdr, ehdr_view.view.data, sizeof ehdr); + + elf_release_view (state, &ehdr_view, error_callback, data); + + if (ehdr.e_ident[EI_MAG0] != ELFMAG0 + || ehdr.e_ident[EI_MAG1] != ELFMAG1 + || ehdr.e_ident[EI_MAG2] != ELFMAG2 + || ehdr.e_ident[EI_MAG3] != ELFMAG3) + { + error_callback (data, "executable file is not ELF", 0); + goto fail; + } + if (ehdr.e_ident[EI_VERSION] != EV_CURRENT) + { + error_callback (data, "executable file is unrecognized ELF version", 0); + goto fail; + } + +#if BACKTRACE_ELF_SIZE == 32 +#define BACKTRACE_ELFCLASS ELFCLASS32 +#else +#define BACKTRACE_ELFCLASS ELFCLASS64 +#endif + + if (ehdr.e_ident[EI_CLASS] != BACKTRACE_ELFCLASS) + { + error_callback (data, "executable file is unexpected ELF class", 0); + goto fail; + } + + if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB + && ehdr.e_ident[EI_DATA] != ELFDATA2MSB) + { + error_callback (data, "executable file has unknown endianness", 0); + goto fail; + } + + /* If the executable is ET_DYN, it is either a PIE, or we are running + directly a shared library with .interp. We need to wait for + dl_iterate_phdr in that case to determine the actual base_address. */ + if (exe && ehdr.e_type == ET_DYN) + return -1; + + shoff = ehdr.e_shoff; + shnum = ehdr.e_shnum; + shstrndx = ehdr.e_shstrndx; + + if ((shnum == 0 || shstrndx == SHN_XINDEX) + && shoff != 0) + { + struct elf_view shdr_view; + const b_elf_shdr *shdr; + + if (!elf_get_view (state, descriptor, memory, memory_size, shoff, + sizeof shdr, error_callback, data, &shdr_view)) + goto fail; + + shdr = (const b_elf_shdr *) shdr_view.view.data; + + if (shnum == 0) + shnum = shdr->sh_size; + + if (shstrndx == SHN_XINDEX) + { + shstrndx = shdr->sh_link; + + /* Versions of the GNU binutils between 2.12 and 2.18 did + not handle objects with more than SHN_LORESERVE sections + correctly. All large section indexes were offset by + 0x100. There is more information at + http://sourceware.org/bugzilla/show_bug.cgi?id-5900 . + Fortunately these object files are easy to detect, as the + GNU binutils always put the section header string table + near the end of the list of sections. Thus if the + section header string table index is larger than the + number of sections, then we know we have to subtract + 0x100 to get the real section index. */ + if (shstrndx >= shnum && shstrndx >= SHN_LORESERVE + 0x100) + shstrndx -= 0x100; + } + + elf_release_view (state, &shdr_view, error_callback, data); + } + + if (shnum == 0 || shstrndx == 0) + goto fail; + + /* To translate PC to file/line when using DWARF, we need to find + the .debug_info and .debug_line sections. */ + + /* Read the section headers, skipping the first one. */ + + if (!elf_get_view (state, descriptor, memory, memory_size, + shoff + sizeof (b_elf_shdr), + (shnum - 1) * sizeof (b_elf_shdr), + error_callback, data, &shdrs_view)) + goto fail; + shdrs_view_valid = 1; + shdrs = (const b_elf_shdr *) shdrs_view.view.data; + + /* Read the section names. */ + + shstrhdr = &shdrs[shstrndx - 1]; + shstr_size = shstrhdr->sh_size; + shstr_off = shstrhdr->sh_offset; + + if (!elf_get_view (state, descriptor, memory, memory_size, shstr_off, + shstrhdr->sh_size, error_callback, data, &names_view)) + goto fail; + names_view_valid = 1; + names = (const char *) names_view.view.data; + + symtab_shndx = 0; + dynsym_shndx = 0; + + memset (sections, 0, sizeof sections); + memset (zsections, 0, sizeof zsections); + + /* Look for the symbol table. */ + for (i = 1; i < shnum; ++i) + { + const b_elf_shdr *shdr; + unsigned int sh_name; + const char *name; + int j; + + shdr = &shdrs[i - 1]; + + if (shdr->sh_type == SHT_SYMTAB) + symtab_shndx = i; + else if (shdr->sh_type == SHT_DYNSYM) + dynsym_shndx = i; + + sh_name = shdr->sh_name; + if (sh_name >= shstr_size) + { + error_callback (data, "ELF section name out of range", 0); + goto fail; + } + + name = names + sh_name; + + for (j = 0; j < (int) DEBUG_MAX; ++j) + { + if (strcmp (name, dwarf_section_names[j]) == 0) + { + sections[j].offset = shdr->sh_offset; + sections[j].size = shdr->sh_size; + sections[j].compressed = (shdr->sh_flags & SHF_COMPRESSED) != 0; + break; + } + } + + if (name[0] == '.' && name[1] == 'z') + { + for (j = 0; j < (int) DEBUG_MAX; ++j) + { + if (strcmp (name + 2, dwarf_section_names[j] + 1) == 0) + { + zsections[j].offset = shdr->sh_offset; + zsections[j].size = shdr->sh_size; + break; + } + } + } + + /* Read the build ID if present. This could check for any + SHT_NOTE section with the right note name and type, but gdb + looks for a specific section name. */ + if ((!debuginfo || with_buildid_data != NULL) + && !buildid_view_valid + && strcmp (name, ".note.gnu.build-id") == 0) + { + const b_elf_note *note; + + if (!elf_get_view (state, descriptor, memory, memory_size, + shdr->sh_offset, shdr->sh_size, error_callback, + data, &buildid_view)) + goto fail; + + buildid_view_valid = 1; + note = (const b_elf_note *) buildid_view.view.data; + if (note->type == NT_GNU_BUILD_ID + && note->namesz == 4 + && strncmp (note->name, "GNU", 4) == 0 + && shdr->sh_size <= 12 + ((note->namesz + 3) & ~ 3) + note->descsz) + { + buildid_data = ¬e->name[0] + ((note->namesz + 3) & ~ 3); + buildid_size = note->descsz; + } + + if (with_buildid_size != 0) + { + if (buildid_size != with_buildid_size) + goto fail; + + if (memcmp (buildid_data, with_buildid_data, buildid_size) != 0) + goto fail; + } + } + + /* Read the debuglink file if present. */ + if (!debuginfo + && !debuglink_view_valid + && strcmp (name, ".gnu_debuglink") == 0) + { + const char *debuglink_data; + size_t crc_offset; + + if (!elf_get_view (state, descriptor, memory, memory_size, + shdr->sh_offset, shdr->sh_size, error_callback, + data, &debuglink_view)) + goto fail; + + debuglink_view_valid = 1; + debuglink_data = (const char *) debuglink_view.view.data; + crc_offset = strnlen (debuglink_data, shdr->sh_size); + crc_offset = (crc_offset + 3) & ~3; + if (crc_offset + 4 <= shdr->sh_size) + { + debuglink_name = debuglink_data; + debuglink_crc = *(const uint32_t*)(debuglink_data + crc_offset); + } + } + + if (!debugaltlink_view_valid + && strcmp (name, ".gnu_debugaltlink") == 0) + { + const char *debugaltlink_data; + size_t debugaltlink_name_len; + + if (!elf_get_view (state, descriptor, memory, memory_size, + shdr->sh_offset, shdr->sh_size, error_callback, + data, &debugaltlink_view)) + goto fail; + + debugaltlink_view_valid = 1; + debugaltlink_data = (const char *) debugaltlink_view.view.data; + debugaltlink_name = debugaltlink_data; + debugaltlink_name_len = strnlen (debugaltlink_data, shdr->sh_size); + if (debugaltlink_name_len < shdr->sh_size) + { + /* Include terminating zero. */ + debugaltlink_name_len += 1; + + debugaltlink_buildid_data + = debugaltlink_data + debugaltlink_name_len; + debugaltlink_buildid_size = shdr->sh_size - debugaltlink_name_len; + } + } + + if (!debuginfo + && !gnu_debugdata_view_valid + && strcmp (name, ".gnu_debugdata") == 0) + { + if (!elf_get_view (state, descriptor, memory, memory_size, + shdr->sh_offset, shdr->sh_size, error_callback, + data, &gnu_debugdata_view)) + goto fail; + + gnu_debugdata_size = shdr->sh_size; + gnu_debugdata_view_valid = 1; + } + + /* Read the .opd section on PowerPC64 ELFv1. */ + if (ehdr.e_machine == EM_PPC64 + && (ehdr.e_flags & EF_PPC64_ABI) < 2 + && shdr->sh_type == SHT_PROGBITS + && strcmp (name, ".opd") == 0) + { + if (!elf_get_view (state, descriptor, memory, memory_size, + shdr->sh_offset, shdr->sh_size, error_callback, + data, &opd_data.view)) + goto fail; + + opd = &opd_data; + opd->addr = shdr->sh_addr; + opd->data = (const char *) opd_data.view.view.data; + opd->size = shdr->sh_size; + opd_view_valid = 1; + } + } + + /* A debuginfo file may not have a useful .opd section, but we can use the + one from the original executable. */ + if (opd == NULL) + opd = caller_opd; + + if (symtab_shndx == 0) + symtab_shndx = dynsym_shndx; + if (symtab_shndx != 0) + { + const b_elf_shdr *symtab_shdr; + unsigned int strtab_shndx; + const b_elf_shdr *strtab_shdr; + struct elf_syminfo_data *sdata; + + symtab_shdr = &shdrs[symtab_shndx - 1]; + strtab_shndx = symtab_shdr->sh_link; + if (strtab_shndx >= shnum) + { + error_callback (data, + "ELF symbol table strtab link out of range", 0); + goto fail; + } + strtab_shdr = &shdrs[strtab_shndx - 1]; + + if (!elf_get_view (state, descriptor, memory, memory_size, + symtab_shdr->sh_offset, symtab_shdr->sh_size, + error_callback, data, &symtab_view)) + goto fail; + symtab_view_valid = 1; + + if (!elf_get_view (state, descriptor, memory, memory_size, + strtab_shdr->sh_offset, strtab_shdr->sh_size, + error_callback, data, &strtab_view)) + goto fail; + strtab_view_valid = 1; + + sdata = ((struct elf_syminfo_data *) + backtrace_alloc (state, sizeof *sdata, error_callback, data)); + if (sdata == NULL) + goto fail; + + if (!elf_initialize_syminfo (state, base_address, + (const unsigned char*)symtab_view.view.data, symtab_shdr->sh_size, + (const unsigned char*)strtab_view.view.data, strtab_shdr->sh_size, + error_callback, data, sdata, opd)) + { + backtrace_free (state, sdata, sizeof *sdata, error_callback, data); + goto fail; + } + + /* We no longer need the symbol table, but we hold on to the + string table permanently. */ + elf_release_view (state, &symtab_view, error_callback, data); + symtab_view_valid = 0; + strtab_view_valid = 0; + + *found_sym = 1; + + elf_add_syminfo_data (state, sdata); + } + + elf_release_view (state, &shdrs_view, error_callback, data); + shdrs_view_valid = 0; + elf_release_view (state, &names_view, error_callback, data); + names_view_valid = 0; + + /* If the debug info is in a separate file, read that one instead. */ + + if (buildid_data != NULL) + { + int d; + + d = elf_open_debugfile_by_buildid (state, buildid_data, buildid_size, + filename, error_callback, data); + if (d >= 0) + { + int ret; + + elf_release_view (state, &buildid_view, error_callback, data); + if (debuglink_view_valid) + elf_release_view (state, &debuglink_view, error_callback, data); + if (debugaltlink_view_valid) + elf_release_view (state, &debugaltlink_view, error_callback, data); + ret = elf_add (state, "", d, NULL, 0, base_address, opd, + error_callback, data, fileline_fn, found_sym, + found_dwarf, NULL, 0, 1, NULL, 0); + if (ret < 0) + backtrace_close (d, error_callback, data); + else if (descriptor >= 0) + backtrace_close (descriptor, error_callback, data); + return ret; + } + } + + if (buildid_view_valid) + { + elf_release_view (state, &buildid_view, error_callback, data); + buildid_view_valid = 0; + } + + if (debuglink_name != NULL) + { + int d; + + d = elf_open_debugfile_by_debuglink (state, filename, debuglink_name, + debuglink_crc, error_callback, + data); + if (d >= 0) + { + int ret; + + elf_release_view (state, &debuglink_view, error_callback, data); + if (debugaltlink_view_valid) + elf_release_view (state, &debugaltlink_view, error_callback, data); + ret = elf_add (state, "", d, NULL, 0, base_address, opd, + error_callback, data, fileline_fn, found_sym, + found_dwarf, NULL, 0, 1, NULL, 0); + if (ret < 0) + backtrace_close (d, error_callback, data); + else if (descriptor >= 0) + backtrace_close(descriptor, error_callback, data); + return ret; + } + } + + if (debuglink_view_valid) + { + elf_release_view (state, &debuglink_view, error_callback, data); + debuglink_view_valid = 0; + } + + if (debugaltlink_name != NULL) + { + int d; + + d = elf_open_debugfile_by_debuglink (state, filename, debugaltlink_name, + 0, error_callback, data); + if (d >= 0) + { + int ret; + + ret = elf_add (state, filename, d, NULL, 0, base_address, opd, + error_callback, data, fileline_fn, found_sym, + found_dwarf, &fileline_altlink, 0, 1, + debugaltlink_buildid_data, debugaltlink_buildid_size); + elf_release_view (state, &debugaltlink_view, error_callback, data); + debugaltlink_view_valid = 0; + if (ret < 0) + { + backtrace_close (d, error_callback, data); + return ret; + } + } + } + + if (debugaltlink_view_valid) + { + elf_release_view (state, &debugaltlink_view, error_callback, data); + debugaltlink_view_valid = 0; + } + + if (gnu_debugdata_view_valid) + { + int ret; + + ret = elf_uncompress_lzma (state, + ((const unsigned char *) + gnu_debugdata_view.view.data), + gnu_debugdata_size, error_callback, data, + &gnu_debugdata_uncompressed, + &gnu_debugdata_uncompressed_size); + + elf_release_view (state, &gnu_debugdata_view, error_callback, data); + gnu_debugdata_view_valid = 0; + + if (ret) + { + ret = elf_add (state, filename, -1, gnu_debugdata_uncompressed, + gnu_debugdata_uncompressed_size, base_address, opd, + error_callback, data, fileline_fn, found_sym, + found_dwarf, NULL, 0, 0, NULL, 0); + if (ret >= 0 && descriptor >= 0) + backtrace_close(descriptor, error_callback, data); + return ret; + } + } + + if (opd_view_valid) + { + elf_release_view (state, &opd->view, error_callback, data); + opd_view_valid = 0; + opd = NULL; + } + + /* Read all the debug sections in a single view, since they are + probably adjacent in the file. If any of sections are + uncompressed, we never release this view. */ + + min_offset = 0; + max_offset = 0; + debug_size = 0; + for (i = 0; i < (int) DEBUG_MAX; ++i) + { + off_t end; + + if (sections[i].size != 0) + { + if (min_offset == 0 || sections[i].offset < min_offset) + min_offset = sections[i].offset; + end = sections[i].offset + sections[i].size; + if (end > max_offset) + max_offset = end; + debug_size += sections[i].size; + } + if (zsections[i].size != 0) + { + if (min_offset == 0 || zsections[i].offset < min_offset) + min_offset = zsections[i].offset; + end = zsections[i].offset + zsections[i].size; + if (end > max_offset) + max_offset = end; + debug_size += zsections[i].size; + } + } + if (min_offset == 0 || max_offset == 0) + { + if (descriptor >= 0) + { + if (!backtrace_close (descriptor, error_callback, data)) + goto fail; + } + return 1; + } + + /* If the total debug section size is large, assume that there are + gaps between the sections, and read them individually. */ + + if (max_offset - min_offset < 0x20000000 + || max_offset - min_offset < debug_size + 0x10000) + { + if (!elf_get_view (state, descriptor, memory, memory_size, min_offset, + max_offset - min_offset, error_callback, data, + &debug_view)) + goto fail; + debug_view_valid = 1; + } + else + { + memset (&split_debug_view[0], 0, sizeof split_debug_view); + for (i = 0; i < (int) DEBUG_MAX; ++i) + { + struct debug_section_info *dsec; + + if (sections[i].size != 0) + dsec = §ions[i]; + else if (zsections[i].size != 0) + dsec = &zsections[i]; + else + continue; + + if (!elf_get_view (state, descriptor, memory, memory_size, + dsec->offset, dsec->size, error_callback, data, + &split_debug_view[i])) + goto fail; + split_debug_view_valid[i] = 1; + + if (sections[i].size != 0) + sections[i].data = ((const unsigned char *) + split_debug_view[i].view.data); + else + zsections[i].data = ((const unsigned char *) + split_debug_view[i].view.data); + } + } + + /* We've read all we need from the executable. */ + if (descriptor >= 0) + { + if (!backtrace_close (descriptor, error_callback, data)) + goto fail; + descriptor = -1; + } + + using_debug_view = 0; + if (debug_view_valid) + { + for (i = 0; i < (int) DEBUG_MAX; ++i) + { + if (sections[i].size == 0) + sections[i].data = NULL; + else + { + sections[i].data = ((const unsigned char *) debug_view.view.data + + (sections[i].offset - min_offset)); + ++using_debug_view; + } + + if (zsections[i].size == 0) + zsections[i].data = NULL; + else + zsections[i].data = ((const unsigned char *) debug_view.view.data + + (zsections[i].offset - min_offset)); + } + } + + /* Uncompress the old format (--compress-debug-sections=zlib-gnu). */ + + zdebug_table = NULL; + for (i = 0; i < (int) DEBUG_MAX; ++i) + { + if (sections[i].size == 0 && zsections[i].size > 0) + { + unsigned char *uncompressed_data; + size_t uncompressed_size; + + if (zdebug_table == NULL) + { + zdebug_table = ((uint16_t *) + backtrace_alloc (state, ZLIB_TABLE_SIZE, + error_callback, data)); + if (zdebug_table == NULL) + goto fail; + } + + uncompressed_data = NULL; + uncompressed_size = 0; + if (!elf_uncompress_zdebug (state, zsections[i].data, + zsections[i].size, zdebug_table, + error_callback, data, + &uncompressed_data, &uncompressed_size)) + goto fail; + sections[i].data = uncompressed_data; + sections[i].size = uncompressed_size; + sections[i].compressed = 0; + + if (split_debug_view_valid[i]) + { + elf_release_view (state, &split_debug_view[i], + error_callback, data); + split_debug_view_valid[i] = 0; + } + } + } + + if (zdebug_table != NULL) + { + backtrace_free (state, zdebug_table, ZLIB_TABLE_SIZE, + error_callback, data); + zdebug_table = NULL; + } + + /* Uncompress the official ELF format + (--compress-debug-sections=zlib-gabi, --compress-debug-sections=zstd). */ + for (i = 0; i < (int) DEBUG_MAX; ++i) + { + unsigned char *uncompressed_data; + size_t uncompressed_size; + + if (sections[i].size == 0 || !sections[i].compressed) + continue; + + if (zdebug_table == NULL) + { + zdebug_table = ((uint16_t *) + backtrace_alloc (state, ZDEBUG_TABLE_SIZE, + error_callback, data)); + if (zdebug_table == NULL) + goto fail; + } + + uncompressed_data = NULL; + uncompressed_size = 0; + if (!elf_uncompress_chdr (state, sections[i].data, sections[i].size, + zdebug_table, error_callback, data, + &uncompressed_data, &uncompressed_size)) + goto fail; + sections[i].data = uncompressed_data; + sections[i].size = uncompressed_size; + sections[i].compressed = 0; + + if (debug_view_valid) + --using_debug_view; + else if (split_debug_view_valid[i]) + { + elf_release_view (state, &split_debug_view[i], error_callback, data); + split_debug_view_valid[i] = 0; + } + } + + if (zdebug_table != NULL) + backtrace_free (state, zdebug_table, ZDEBUG_TABLE_SIZE, + error_callback, data); + + if (debug_view_valid && using_debug_view == 0) + { + elf_release_view (state, &debug_view, error_callback, data); + debug_view_valid = 0; + } + + for (i = 0; i < (int) DEBUG_MAX; ++i) + { + dwarf_sections.data[i] = sections[i].data; + dwarf_sections.size[i] = sections[i].size; + } + + if (!backtrace_dwarf_add (state, base_address, &dwarf_sections, + ehdr.e_ident[EI_DATA] == ELFDATA2MSB, + fileline_altlink, + error_callback, data, fileline_fn, + fileline_entry)) + goto fail; + + *found_dwarf = 1; + + return 1; + + fail: + if (shdrs_view_valid) + elf_release_view (state, &shdrs_view, error_callback, data); + if (names_view_valid) + elf_release_view (state, &names_view, error_callback, data); + if (symtab_view_valid) + elf_release_view (state, &symtab_view, error_callback, data); + if (strtab_view_valid) + elf_release_view (state, &strtab_view, error_callback, data); + if (debuglink_view_valid) + elf_release_view (state, &debuglink_view, error_callback, data); + if (debugaltlink_view_valid) + elf_release_view (state, &debugaltlink_view, error_callback, data); + if (gnu_debugdata_view_valid) + elf_release_view (state, &gnu_debugdata_view, error_callback, data); + if (buildid_view_valid) + elf_release_view (state, &buildid_view, error_callback, data); + if (debug_view_valid) + elf_release_view (state, &debug_view, error_callback, data); + for (i = 0; i < (int) DEBUG_MAX; ++i) + { + if (split_debug_view_valid[i]) + elf_release_view (state, &split_debug_view[i], error_callback, data); + } + if (opd_view_valid) + elf_release_view (state, &opd->view, error_callback, data); + if (descriptor >= 0) + backtrace_close (descriptor, error_callback, data); + return 0; +} + +/* Data passed to phdr_callback. */ + +struct phdr_data +{ + struct backtrace_state *state; + backtrace_error_callback error_callback; + void *data; + fileline *fileline_fn; + int *found_sym; + int *found_dwarf; + const char *exe_filename; + int exe_descriptor; +}; + +/* Callback passed to dl_iterate_phdr. Load debug info from shared + libraries. */ + +struct PhdrIterate +{ + char* dlpi_name; + ElfW(Addr) dlpi_addr; + ElfW(Addr) dlpi_end_addr; +}; +FastVector s_phdrData(16); + +struct ElfAddrRange +{ + ElfW(Addr) dlpi_addr; + ElfW(Addr) dlpi_end_addr; +}; +FastVector s_sortedKnownElfRanges(16); + +static int address_in_known_elf_ranges(uintptr_t pc) +{ + auto it = std::lower_bound( s_sortedKnownElfRanges.begin(), s_sortedKnownElfRanges.end(), pc, + []( const ElfAddrRange& lhs, const uintptr_t rhs ) { return uintptr_t(lhs.dlpi_addr) > rhs; } ); + if( it != s_sortedKnownElfRanges.end() && pc <= it->dlpi_end_addr ) + { + return true; + } + return false; +} + +static int +phdr_callback_mock (struct dl_phdr_info *info, size_t size ATTRIBUTE_UNUSED, + void *pdata) +{ + if( address_in_known_elf_ranges(info->dlpi_addr) ) + { + return 0; + } + + auto ptr = s_phdrData.push_next(); + if (info->dlpi_name) + { + size_t sz = strlen (info->dlpi_name) + 1; + ptr->dlpi_name = (char*)tracy_malloc (sz); + memcpy (ptr->dlpi_name, info->dlpi_name, sz); + } + else ptr->dlpi_name = nullptr; + ptr->dlpi_addr = info->dlpi_addr; + + // calculate the end address as well, so we can quickly determine if a PC is within the range of this image + ptr->dlpi_end_addr = uintptr_t(info->dlpi_addr) + (info->dlpi_phnum ? uintptr_t( + info->dlpi_phdr[info->dlpi_phnum - 1].p_vaddr + + info->dlpi_phdr[info->dlpi_phnum - 1].p_memsz) : 0); + + return 0; +} + +static int +#ifdef __i386__ +__attribute__ ((__force_align_arg_pointer__)) +#endif +phdr_callback (struct PhdrIterate *info, void *pdata) +{ + struct phdr_data *pd = (struct phdr_data *) pdata; + const char *filename; + int descriptor; + int does_not_exist; + struct libbacktrace_base_address base_address; + fileline elf_fileline_fn; + int found_dwarf; + + /* There is not much we can do if we don't have the module name, + unless executable is ET_DYN, where we expect the very first + phdr_callback to be for the PIE. */ + if (info->dlpi_name == NULL || info->dlpi_name[0] == '\0') + { + if (pd->exe_descriptor == -1) + return 0; + filename = pd->exe_filename; + descriptor = pd->exe_descriptor; + pd->exe_descriptor = -1; + } + else + { + if (pd->exe_descriptor != -1) + { + backtrace_close (pd->exe_descriptor, pd->error_callback, pd->data); + pd->exe_descriptor = -1; + } + + filename = info->dlpi_name; + descriptor = backtrace_open (info->dlpi_name, pd->error_callback, + pd->data, &does_not_exist); + if (descriptor < 0) + return 0; + } + + base_address.m = info->dlpi_addr; + if (elf_add (pd->state, filename, descriptor, NULL, 0, base_address, NULL, + pd->error_callback, pd->data, &elf_fileline_fn, pd->found_sym, + &found_dwarf, NULL, 0, 0, NULL, 0)) + { + if (found_dwarf) + { + *pd->found_dwarf = 1; + *pd->fileline_fn = elf_fileline_fn; + } + } + + return 0; +} + +static int elf_iterate_phdr_and_add_new_files(phdr_data *pd) +{ + assert(s_phdrData.empty()); + // dl_iterate_phdr, will only add entries for elf files loaded in a previously unseen range + dl_iterate_phdr(phdr_callback_mock, nullptr); + + if(s_phdrData.size() == 0) + { + return 0; + } + + uint32_t headersAdded = 0; + for (auto &v : s_phdrData) + { + phdr_callback(&v, (void *)pd); + + auto newEntry = s_sortedKnownElfRanges.push_next(); + newEntry->dlpi_addr = v.dlpi_addr; + newEntry->dlpi_end_addr = v.dlpi_end_addr; + + tracy_free(v.dlpi_name); + + headersAdded++; + } + + s_phdrData.clear(); + + std::sort( s_sortedKnownElfRanges.begin(), s_sortedKnownElfRanges.end(), + []( const ElfAddrRange& lhs, const ElfAddrRange& rhs ) { return lhs.dlpi_addr > rhs.dlpi_addr; } ); + + return headersAdded; +} + +#ifdef TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT +/* Request an elf entry update if the pc passed in is not in any of the known elf ranges. +This could mean that new images were dlopened and we need to add those new elf entries */ +static int elf_refresh_address_ranges_if_needed(struct backtrace_state *state, uintptr_t pc) +{ + if ( address_in_known_elf_ranges(pc) ) + { + return 0; + } + + struct phdr_data pd; + int found_sym = 0; + int found_dwarf = 0; + fileline fileline_fn = nullptr; + pd.state = state; + pd.error_callback = nullptr; + pd.data = nullptr; + pd.fileline_fn = &fileline_fn; + pd.found_sym = &found_sym; + pd.found_dwarf = &found_dwarf; + pd.exe_filename = nullptr; + pd.exe_descriptor = -1; + + return elf_iterate_phdr_and_add_new_files(&pd); +} +#endif //#ifdef TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT + +/* Initialize the backtrace data we need from an ELF executable. At + the ELF level, all we need to do is find the debug info + sections. */ + +int +backtrace_initialize (struct backtrace_state *state, const char *filename, + int descriptor, backtrace_error_callback error_callback, + void *data, fileline *fileline_fn) +{ + int ret; + int found_sym; + int found_dwarf; + fileline elf_fileline_fn = elf_nodebug; + struct phdr_data pd; + + + /* When using fdpic we must use dl_iterate_phdr for all modules, including + the main executable, so that we can get the right base address + mapping. */ + if (!libbacktrace_using_fdpic ()) + { + struct libbacktrace_base_address zero_base_address; + + memset (&zero_base_address, 0, sizeof zero_base_address); + ret = elf_add (state, filename, descriptor, NULL, 0, zero_base_address, + NULL, error_callback, data, &elf_fileline_fn, &found_sym, + &found_dwarf, NULL, 1, 0, NULL, 0); + if (!ret) + return 0; + } + + pd.state = state; + pd.error_callback = error_callback; + pd.data = data; + pd.fileline_fn = &elf_fileline_fn; + pd.found_sym = &found_sym; + pd.found_dwarf = &found_dwarf; + pd.exe_filename = filename; + pd.exe_descriptor = ret < 0 ? descriptor : -1; + + elf_iterate_phdr_and_add_new_files(&pd); + + if (!state->threaded) + { + if (found_sym) + state->syminfo_fn = elf_syminfo; + else if (state->syminfo_fn == NULL) + state->syminfo_fn = elf_nosyms; + } + else + { + if (found_sym) + backtrace_atomic_store_pointer (&state->syminfo_fn, &elf_syminfo); + else + (void) __sync_bool_compare_and_swap (&state->syminfo_fn, NULL, + elf_nosyms); + } + + if (!state->threaded) + *fileline_fn = state->fileline_fn; + else + *fileline_fn = backtrace_atomic_load_pointer (&state->fileline_fn); + + if (*fileline_fn == NULL || *fileline_fn == elf_nodebug) + *fileline_fn = elf_fileline_fn; + + // install an address range refresh callback so we can cope with dynamically loaded elf files +#ifdef TRACY_LIBBACKTRACE_ELF_DYNLOAD_SUPPORT + state->request_known_address_ranges_refresh_fn = elf_refresh_address_ranges_if_needed; +#else + state->request_known_address_ranges_refresh_fn = NULL; +#endif + + return 1; +} + +} diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/fileline.cpp b/project/thirdparty/tracy-0.11.1/libbacktrace/fileline.cpp new file mode 100644 index 000000000..5a37ff0c7 --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/libbacktrace/fileline.cpp @@ -0,0 +1,412 @@ +/* fileline.c -- Get file and line number information in a backtrace. + Copyright (C) 2012-2021 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +#include "config.h" + +#include +#include +#include +#include +#include +#include + +#if defined (HAVE_KERN_PROC_ARGS) || defined (HAVE_KERN_PROC) +#include +#endif + +#ifdef HAVE_MACH_O_DYLD_H +#include +#endif + +#ifdef HAVE_WINDOWS_H +#ifndef WIN32_MEAN_AND_LEAN +#define WIN32_MEAN_AND_LEAN +#endif + +#ifndef NOMINMAX +#define NOMINMAX +#endif + +#include +#endif + +#include "backtrace.hpp" +#include "internal.hpp" + +#ifndef HAVE_GETEXECNAME +#define getexecname() NULL +#endif + +namespace tracy +{ + +#if !defined (HAVE_KERN_PROC_ARGS) && !defined (HAVE_KERN_PROC) + +#define sysctl_exec_name1(state, error_callback, data) NULL +#define sysctl_exec_name2(state, error_callback, data) NULL + +#else /* defined (HAVE_KERN_PROC_ARGS) || |defined (HAVE_KERN_PROC) */ + +static char * +sysctl_exec_name (struct backtrace_state *state, + int mib0, int mib1, int mib2, int mib3, + backtrace_error_callback error_callback, void *data) +{ + int mib[4]; + size_t len; + char *name; + size_t rlen; + + mib[0] = mib0; + mib[1] = mib1; + mib[2] = mib2; + mib[3] = mib3; + + if (sysctl (mib, 4, NULL, &len, NULL, 0) < 0) + return NULL; + name = (char *) backtrace_alloc (state, len, error_callback, data); + if (name == NULL) + return NULL; + rlen = len; + if (sysctl (mib, 4, name, &rlen, NULL, 0) < 0) + { + backtrace_free (state, name, len, error_callback, data); + return NULL; + } + return name; +} + +#ifdef HAVE_KERN_PROC_ARGS + +static char * +sysctl_exec_name1 (struct backtrace_state *state, + backtrace_error_callback error_callback, void *data) +{ + /* This variant is used on NetBSD. */ + return sysctl_exec_name (state, CTL_KERN, KERN_PROC_ARGS, -1, + KERN_PROC_PATHNAME, error_callback, data); +} + +#else + +#define sysctl_exec_name1(state, error_callback, data) NULL + +#endif + +#ifdef HAVE_KERN_PROC + +static char * +sysctl_exec_name2 (struct backtrace_state *state, + backtrace_error_callback error_callback, void *data) +{ + /* This variant is used on FreeBSD. */ + return sysctl_exec_name (state, CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1, + error_callback, data); +} + +#else + +#define sysctl_exec_name2(state, error_callback, data) NULL + +#endif + +#endif /* defined (HAVE_KERN_PROC_ARGS) || |defined (HAVE_KERN_PROC) */ + +#ifdef HAVE_MACH_O_DYLD_H + +static char * +macho_get_executable_path (struct backtrace_state *state, + backtrace_error_callback error_callback, void *data) +{ + uint32_t len; + char *name; + + len = 0; + if (_NSGetExecutablePath (NULL, &len) == 0) + return NULL; + name = (char *) backtrace_alloc (state, len, error_callback, data); + if (name == NULL) + return NULL; + if (_NSGetExecutablePath (name, &len) != 0) + { + backtrace_free (state, name, len, error_callback, data); + return NULL; + } + return name; +} + +#else /* !defined (HAVE_MACH_O_DYLD_H) */ + +#define macho_get_executable_path(state, error_callback, data) NULL + +#endif /* !defined (HAVE_MACH_O_DYLD_H) */ + +#if HAVE_DECL__PGMPTR + +#define windows_executable_filename() _pgmptr + +#else /* !HAVE_DECL__PGMPTR */ + +#define windows_executable_filename() NULL + +#endif /* !HAVE_DECL__PGMPTR */ + +#ifdef HAVE_WINDOWS_H + +#define FILENAME_BUF_SIZE (MAX_PATH) + +static char * +windows_get_executable_path (char *buf, backtrace_error_callback error_callback, + void *data) +{ + size_t got; + int error; + + got = GetModuleFileNameA (NULL, buf, FILENAME_BUF_SIZE - 1); + error = GetLastError (); + if (got == 0 + || (got == FILENAME_BUF_SIZE - 1 && error == ERROR_INSUFFICIENT_BUFFER)) + { + error_callback (data, + "could not get the filename of the current executable", + error); + return NULL; + } + return buf; +} + +#else /* !defined (HAVE_WINDOWS_H) */ + +#define windows_get_executable_path(buf, error_callback, data) NULL +#define FILENAME_BUF_SIZE 64 + +#endif /* !defined (HAVE_WINDOWS_H) */ + +/* Initialize the fileline information from the executable. Returns 1 + on success, 0 on failure. */ + +static int +fileline_initialize (struct backtrace_state *state, + backtrace_error_callback error_callback, void *data) +{ + int failed; + fileline fileline_fn; + int pass; + int called_error_callback; + int descriptor; + const char *filename; + char buf[FILENAME_BUF_SIZE]; + + if (!state->threaded) + failed = state->fileline_initialization_failed; + else + failed = backtrace_atomic_load_int (&state->fileline_initialization_failed); + + if (failed) + { + error_callback (data, "failed to read executable information", -1); + return 0; + } + + if (!state->threaded) + fileline_fn = state->fileline_fn; + else + fileline_fn = backtrace_atomic_load_pointer (&state->fileline_fn); + if (fileline_fn != NULL) + return 1; + + /* We have not initialized the information. Do it now. */ + + descriptor = -1; + called_error_callback = 0; + for (pass = 0; pass < 10; ++pass) + { + int does_not_exist; + + switch (pass) + { + case 0: + filename = state->filename; + break; + case 1: + filename = getexecname (); + break; + case 2: + /* Test this before /proc/self/exe, as the latter exists but points + to the wine binary (and thus doesn't work). */ + filename = windows_executable_filename (); + break; + case 3: + filename = "/proc/self/exe"; + break; + case 4: + filename = "/proc/curproc/file"; + break; + case 5: + snprintf (buf, sizeof (buf), "/proc/%ld/object/a.out", + (long) getpid ()); + filename = buf; + break; + case 6: + filename = sysctl_exec_name1 (state, error_callback, data); + break; + case 7: + filename = sysctl_exec_name2 (state, error_callback, data); + break; + case 8: + filename = macho_get_executable_path (state, error_callback, data); + break; + case 9: + filename = windows_get_executable_path (buf, error_callback, data); + break; + default: + abort (); + } + + if (filename == NULL) + continue; + + descriptor = backtrace_open (filename, error_callback, data, + &does_not_exist); + if (descriptor < 0 && !does_not_exist) + { + called_error_callback = 1; + break; + } + if (descriptor >= 0) + break; + } + + if (descriptor < 0) + { + if (!called_error_callback) + { + if (state->filename != NULL) + error_callback (data, state->filename, ENOENT); + else + error_callback (data, + "libbacktrace could not find executable to open", + 0); + } + failed = 1; + } + + if (!failed) + { + if (!backtrace_initialize (state, filename, descriptor, error_callback, + data, &fileline_fn)) + failed = 1; + } + + if (failed) + { + if (!state->threaded) + state->fileline_initialization_failed = 1; + else + backtrace_atomic_store_int (&state->fileline_initialization_failed, 1); + return 0; + } + + if (!state->threaded) + state->fileline_fn = fileline_fn; + else + { + backtrace_atomic_store_pointer (&state->fileline_fn, fileline_fn); + + /* Note that if two threads initialize at once, one of the data + sets may be leaked. */ + } + + return 1; +} + +/* Given a PC, find the file name, line number, and function name. */ + +int +backtrace_pcinfo (struct backtrace_state *state, uintptr_t pc, + backtrace_full_callback callback, + backtrace_error_callback error_callback, void *data) +{ + if (!fileline_initialize (state, error_callback, data)) + return 0; + + if (state->fileline_initialization_failed) + return 0; + + return state->fileline_fn (state, pc, callback, error_callback, data); +} + +/* Given a PC, find the symbol for it, and its value. */ + +int +backtrace_syminfo (struct backtrace_state *state, uintptr_t pc, + backtrace_syminfo_callback callback, + backtrace_error_callback error_callback, void *data) +{ + if (!fileline_initialize (state, error_callback, data)) + return 0; + + if (state->fileline_initialization_failed) + return 0; + + state->syminfo_fn (state, pc, callback, error_callback, data); + return 1; +} + +/* A backtrace_syminfo_callback that can call into a + backtrace_full_callback, used when we have a symbol table but no + debug info. */ + +void +backtrace_syminfo_to_full_callback (void *data, uintptr_t pc, + const char *symname, + uintptr_t symval ATTRIBUTE_UNUSED, + uintptr_t symsize ATTRIBUTE_UNUSED) +{ + struct backtrace_call_full *bdata = (struct backtrace_call_full *) data; + + bdata->ret = bdata->full_callback (bdata->full_data, pc, 0, NULL, 0, symname); +} + +/* An error callback that corresponds to + backtrace_syminfo_to_full_callback. */ + +void +backtrace_syminfo_to_full_error_callback (void *data, const char *msg, + int errnum) +{ + struct backtrace_call_full *bdata = (struct backtrace_call_full *) data; + + bdata->full_error_callback (bdata->full_data, msg, errnum); +} + +} diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/filenames.hpp b/project/thirdparty/tracy-0.11.1/libbacktrace/filenames.hpp new file mode 100644 index 000000000..aa7bd7adf --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/libbacktrace/filenames.hpp @@ -0,0 +1,52 @@ +/* btest.c -- Filename header for libbacktrace library + Copyright (C) 2012-2018 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef GCC_VERSION +# define GCC_VERSION (__GNUC__ * 1000 + __GNUC_MINOR__) +#endif + +#if (GCC_VERSION < 2007) +# define __attribute__(x) +#endif + +#ifndef ATTRIBUTE_UNUSED +# define ATTRIBUTE_UNUSED __attribute__ ((__unused__)) +#endif + +#if defined(__MSDOS__) || defined(_WIN32) || defined(__OS2__) || defined (__CYGWIN__) +# define IS_DIR_SEPARATOR(c) ((c) == '/' || (c) == '\\') +# define HAS_DRIVE_SPEC(f) ((f)[0] != '\0' && (f)[1] == ':') +# define IS_ABSOLUTE_PATH(f) (IS_DIR_SEPARATOR((f)[0]) || HAS_DRIVE_SPEC(f)) +#else +# define IS_DIR_SEPARATOR(c) ((c) == '/') +# define IS_ABSOLUTE_PATH(f) (IS_DIR_SEPARATOR((f)[0])) +#endif diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/internal.hpp b/project/thirdparty/tracy-0.11.1/libbacktrace/internal.hpp new file mode 100644 index 000000000..213959759 --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/libbacktrace/internal.hpp @@ -0,0 +1,435 @@ +/* internal.h -- Internal header file for stack backtrace library. + Copyright (C) 2012-2021 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef BACKTRACE_INTERNAL_H +#define BACKTRACE_INTERNAL_H + +/* We assume that and "backtrace.h" have already been + included. */ + +#ifndef GCC_VERSION +# define GCC_VERSION (__GNUC__ * 1000 + __GNUC_MINOR__) +#endif + +#if (GCC_VERSION < 2007) +# define __attribute__(x) +#endif + +#ifndef ATTRIBUTE_UNUSED +# define ATTRIBUTE_UNUSED __attribute__ ((__unused__)) +#endif + +#ifndef ATTRIBUTE_MALLOC +# if (GCC_VERSION >= 2096) +# define ATTRIBUTE_MALLOC __attribute__ ((__malloc__)) +# else +# define ATTRIBUTE_MALLOC +# endif +#endif + +#ifndef ATTRIBUTE_FALLTHROUGH +# if (GCC_VERSION >= 7000) +# define ATTRIBUTE_FALLTHROUGH __attribute__ ((__fallthrough__)) +# else +# define ATTRIBUTE_FALLTHROUGH +# endif +#endif + +#ifndef HAVE_SYNC_FUNCTIONS + +/* Define out the sync functions. These should never be called if + they are not available. */ + +#define __sync_bool_compare_and_swap(A, B, C) (abort(), 1) +#define __sync_lock_test_and_set(A, B) (abort(), 0) +#define __sync_lock_release(A) abort() + +#endif /* !defined (HAVE_SYNC_FUNCTIONS) */ + +#ifdef HAVE_ATOMIC_FUNCTIONS + +/* We have the atomic builtin functions. */ + +#define backtrace_atomic_load_pointer(p) \ + __atomic_load_n ((p), __ATOMIC_ACQUIRE) +#define backtrace_atomic_load_int(p) \ + __atomic_load_n ((p), __ATOMIC_ACQUIRE) +#define backtrace_atomic_store_pointer(p, v) \ + __atomic_store_n ((p), (v), __ATOMIC_RELEASE) +#define backtrace_atomic_store_size_t(p, v) \ + __atomic_store_n ((p), (v), __ATOMIC_RELEASE) +#define backtrace_atomic_store_int(p, v) \ + __atomic_store_n ((p), (v), __ATOMIC_RELEASE) + +#else /* !defined (HAVE_ATOMIC_FUNCTIONS) */ +#ifdef HAVE_SYNC_FUNCTIONS + +/* We have the sync functions but not the atomic functions. Define + the atomic ones in terms of the sync ones. */ + +extern void *backtrace_atomic_load_pointer (void *); +extern int backtrace_atomic_load_int (int *); +extern void backtrace_atomic_store_pointer (void *, void *); +extern void backtrace_atomic_store_size_t (size_t *, size_t); +extern void backtrace_atomic_store_int (int *, int); + +#else /* !defined (HAVE_SYNC_FUNCTIONS) */ + +/* We have neither the sync nor the atomic functions. These will + never be called. */ + +#define backtrace_atomic_load_pointer(p) (abort(), (void *) NULL) +#define backtrace_atomic_load_int(p) (abort(), 0) +#define backtrace_atomic_store_pointer(p, v) abort() +#define backtrace_atomic_store_size_t(p, v) abort() +#define backtrace_atomic_store_int(p, v) abort() + +#endif /* !defined (HAVE_SYNC_FUNCTIONS) */ +#endif /* !defined (HAVE_ATOMIC_FUNCTIONS) */ + +namespace tracy +{ + +/* The type of the function that collects file/line information. This + is like backtrace_pcinfo. */ + +typedef int (*fileline) (struct backtrace_state *state, uintptr_t pc, + backtrace_full_callback callback, + backtrace_error_callback error_callback, void *data); + +/* The type of the function that collects symbol information. This is + like backtrace_syminfo. */ + +typedef void (*syminfo) (struct backtrace_state *state, uintptr_t pc, + backtrace_syminfo_callback callback, + backtrace_error_callback error_callback, void *data); + +/* The type of the function that will trigger an known address range refresh + (if pc passed in is for an address whichs lies ourtisde of known ranges) */ +typedef int (*request_known_address_ranges_refresh)(struct backtrace_state *state, + uintptr_t pc); + +/* What the backtrace state pointer points to. */ + +struct backtrace_state +{ + /* The name of the executable. */ + const char *filename; + /* Non-zero if threaded. */ + int threaded; + /* The master lock for fileline_fn, fileline_data, syminfo_fn, + syminfo_data, fileline_initialization_failed and everything the + data pointers point to. */ + void *lock; + /* The function that returns file/line information. */ + fileline fileline_fn; + /* The data to pass to FILELINE_FN. */ + void *fileline_data; + /* The function that returns symbol information. */ + syminfo syminfo_fn; + /* The data to pass to SYMINFO_FN. */ + void *syminfo_data; + /* Whether initializing the file/line information failed. */ + int fileline_initialization_failed; + /* The lock for the freelist. */ + int lock_alloc; + /* The freelist when using mmap. */ + struct backtrace_freelist_struct *freelist; + /* Trigger an known address range refresh */ + request_known_address_ranges_refresh request_known_address_ranges_refresh_fn; +}; + +/* Open a file for reading. Returns -1 on error. If DOES_NOT_EXIST + is not NULL, *DOES_NOT_EXIST will be set to 0 normally and set to 1 + if the file does not exist. If the file does not exist and + DOES_NOT_EXIST is not NULL, the function will return -1 and will + not call ERROR_CALLBACK. On other errors, or if DOES_NOT_EXIST is + NULL, the function will call ERROR_CALLBACK before returning. */ +extern int backtrace_open (const char *filename, + backtrace_error_callback error_callback, + void *data, + int *does_not_exist); + +/* A view of the contents of a file. This supports mmap when + available. A view will remain in memory even after backtrace_close + is called on the file descriptor from which the view was + obtained. */ + +struct backtrace_view +{ + /* The data that the caller requested. */ + const void *data; + /* The base of the view. */ + void *base; + /* The total length of the view. */ + size_t len; +}; + +/* Create a view of SIZE bytes from DESCRIPTOR at OFFSET. Store the + result in *VIEW. Returns 1 on success, 0 on error. */ +extern int backtrace_get_view (struct backtrace_state *state, int descriptor, + off_t offset, uint64_t size, + backtrace_error_callback error_callback, + void *data, struct backtrace_view *view); + +/* Release a view created by backtrace_get_view. */ +extern void backtrace_release_view (struct backtrace_state *state, + struct backtrace_view *view, + backtrace_error_callback error_callback, + void *data); + +/* Close a file opened by backtrace_open. Returns 1 on success, 0 on + error. */ + +extern int backtrace_close (int descriptor, + backtrace_error_callback error_callback, + void *data); + +/* Sort without using memory. */ + +extern void backtrace_qsort (void *base, size_t count, size_t size, + int (*compar) (const void *, const void *)); + +/* Allocate memory. This is like malloc. If ERROR_CALLBACK is NULL, + this does not report an error, it just returns NULL. */ + +extern void *backtrace_alloc (struct backtrace_state *state, size_t size, + backtrace_error_callback error_callback, + void *data) ATTRIBUTE_MALLOC; + +/* Free memory allocated by backtrace_alloc. If ERROR_CALLBACK is + NULL, this does not report an error. */ + +extern void backtrace_free (struct backtrace_state *state, void *mem, + size_t size, + backtrace_error_callback error_callback, + void *data); + +/* A growable vector of some struct. This is used for more efficient + allocation when we don't know the final size of some group of data + that we want to represent as an array. */ + +struct backtrace_vector +{ + /* The base of the vector. */ + void *base; + /* The number of bytes in the vector. */ + size_t size; + /* The number of bytes available at the current allocation. */ + size_t alc; +}; + +/* Grow VEC by SIZE bytes. Return a pointer to the newly allocated + bytes. Note that this may move the entire vector to a new memory + location. Returns NULL on failure. */ + +extern void *backtrace_vector_grow (struct backtrace_state *state, size_t size, + backtrace_error_callback error_callback, + void *data, + struct backtrace_vector *vec); + +/* Finish the current allocation on VEC. Prepare to start a new + allocation. The finished allocation will never be freed. Returns + a pointer to the base of the finished entries, or NULL on + failure. */ + +extern void* backtrace_vector_finish (struct backtrace_state *state, + struct backtrace_vector *vec, + backtrace_error_callback error_callback, + void *data); + +/* Release any extra space allocated for VEC. This may change + VEC->base. Returns 1 on success, 0 on failure. */ + +extern int backtrace_vector_release (struct backtrace_state *state, + struct backtrace_vector *vec, + backtrace_error_callback error_callback, + void *data); + +/* Free the space managed by VEC. This will reset VEC. */ + +static inline void +backtrace_vector_free (struct backtrace_state *state, + struct backtrace_vector *vec, + backtrace_error_callback error_callback, void *data) +{ + vec->alc += vec->size; + vec->size = 0; + backtrace_vector_release (state, vec, error_callback, data); +} + +/* Read initial debug data from a descriptor, and set the + fileline_data, syminfo_fn, and syminfo_data fields of STATE. + Return the fileln_fn field in *FILELN_FN--this is done this way so + that the synchronization code is only implemented once. This is + called after the descriptor has first been opened. It will close + the descriptor if it is no longer needed. Returns 1 on success, 0 + on error. There will be multiple implementations of this function, + for different file formats. Each system will compile the + appropriate one. */ + +extern int backtrace_initialize (struct backtrace_state *state, + const char *filename, + int descriptor, + backtrace_error_callback error_callback, + void *data, + fileline *fileline_fn); + +/* An enum for the DWARF sections we care about. */ + +enum dwarf_section +{ + DEBUG_INFO, + DEBUG_LINE, + DEBUG_ABBREV, + DEBUG_RANGES, + DEBUG_STR, + DEBUG_ADDR, + DEBUG_STR_OFFSETS, + DEBUG_LINE_STR, + DEBUG_RNGLISTS, + + DEBUG_MAX +}; + +/* Data for the DWARF sections we care about. */ + +struct dwarf_sections +{ + const unsigned char *data[DEBUG_MAX]; + size_t size[DEBUG_MAX]; +}; + +/* DWARF data read from a file, used for .gnu_debugaltlink. */ + +struct dwarf_data; + +/* The load address mapping. */ + +#if defined(__FDPIC__) && defined(HAVE_DL_ITERATE_PHDR) && (defined(HAVE_LINK_H) || defined(HAVE_SYS_LINK_H)) + +#ifdef HAVE_LINK_H + #include +#endif +#ifdef HAVE_SYS_LINK_H + #include +#endif + +#define libbacktrace_using_fdpic() (1) + +struct libbacktrace_base_address +{ + struct elf32_fdpic_loadaddr m; +}; + +#define libbacktrace_add_base(pc, base) \ + ((uintptr_t) (__RELOC_POINTER ((pc), (base).m))) + +#else /* not _FDPIC__ */ + +#define libbacktrace_using_fdpic() (0) + +struct libbacktrace_base_address +{ + uintptr_t m; +}; + +#define libbacktrace_add_base(pc, base) ((pc) + (base).m) + +#endif /* not _FDPIC__ */ + +/* Add file/line information for a DWARF module. */ + +extern int backtrace_dwarf_add (struct backtrace_state *state, + struct libbacktrace_base_address base_address, + const struct dwarf_sections *dwarf_sections, + int is_bigendian, + struct dwarf_data *fileline_altlink, + backtrace_error_callback error_callback, + void *data, fileline *fileline_fn, + struct dwarf_data **fileline_entry); + +/* A data structure to pass to backtrace_syminfo_to_full. */ + +struct backtrace_call_full +{ + backtrace_full_callback full_callback; + backtrace_error_callback full_error_callback; + void *full_data; + int ret; +}; + +/* A backtrace_syminfo_callback that can call into a + backtrace_full_callback, used when we have a symbol table but no + debug info. */ + +extern void backtrace_syminfo_to_full_callback (void *data, uintptr_t pc, + const char *symname, + uintptr_t symval, + uintptr_t symsize); + +/* An error callback that corresponds to + backtrace_syminfo_to_full_callback. */ + +extern void backtrace_syminfo_to_full_error_callback (void *, const char *, + int); + +/* A test-only hook for elf_uncompress_zdebug. */ + +extern int backtrace_uncompress_zdebug (struct backtrace_state *, + const unsigned char *compressed, + size_t compressed_size, + backtrace_error_callback, void *data, + unsigned char **uncompressed, + size_t *uncompressed_size); + +/* A test-only hook for elf_zstd_decompress. */ + +extern int backtrace_uncompress_zstd (struct backtrace_state *, + const unsigned char *compressed, + size_t compressed_size, + backtrace_error_callback, void *data, + unsigned char *uncompressed, + size_t uncompressed_size); + +/* A test-only hook for elf_uncompress_lzma. */ + +extern int backtrace_uncompress_lzma (struct backtrace_state *, + const unsigned char *compressed, + size_t compressed_size, + backtrace_error_callback, void *data, + unsigned char **uncompressed, + size_t *uncompressed_size); + +} + +#endif diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/macho.cpp b/project/thirdparty/tracy-0.11.1/libbacktrace/macho.cpp new file mode 100644 index 000000000..b9f084565 --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/libbacktrace/macho.cpp @@ -0,0 +1,1367 @@ +/* elf.c -- Get debug data from a Mach-O file for backtraces. + Copyright (C) 2020-2021 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +#include "config.h" + +#include +#include +#include +#include + +#ifdef HAVE_MACH_O_DYLD_H +#include +#endif + +#include "backtrace.hpp" +#include "internal.hpp" + +namespace tracy +{ + +/* Mach-O file header for a 32-bit executable. */ + +struct macho_header_32 +{ + uint32_t magic; /* Magic number (MACH_O_MAGIC_32) */ + uint32_t cputype; /* CPU type */ + uint32_t cpusubtype; /* CPU subtype */ + uint32_t filetype; /* Type of file (object, executable) */ + uint32_t ncmds; /* Number of load commands */ + uint32_t sizeofcmds; /* Total size of load commands */ + uint32_t flags; /* Flags for special features */ +}; + +/* Mach-O file header for a 64-bit executable. */ + +struct macho_header_64 +{ + uint32_t magic; /* Magic number (MACH_O_MAGIC_64) */ + uint32_t cputype; /* CPU type */ + uint32_t cpusubtype; /* CPU subtype */ + uint32_t filetype; /* Type of file (object, executable) */ + uint32_t ncmds; /* Number of load commands */ + uint32_t sizeofcmds; /* Total size of load commands */ + uint32_t flags; /* Flags for special features */ + uint32_t reserved; /* Reserved */ +}; + +/* Mach-O file header for a fat executable. */ + +struct macho_header_fat +{ + uint32_t magic; /* Magic number (MACH_O_MH_(MAGIC|CIGAM)_FAT(_64)?) */ + uint32_t nfat_arch; /* Number of components */ +}; + +/* Values for the header magic field. */ + +#define MACH_O_MH_MAGIC_32 0xfeedface +#define MACH_O_MH_MAGIC_64 0xfeedfacf +#define MACH_O_MH_MAGIC_FAT 0xcafebabe +#define MACH_O_MH_CIGAM_FAT 0xbebafeca +#define MACH_O_MH_MAGIC_FAT_64 0xcafebabf +#define MACH_O_MH_CIGAM_FAT_64 0xbfbafeca + +/* Value for the header filetype field. */ + +#define MACH_O_MH_EXECUTE 0x02 +#define MACH_O_MH_DYLIB 0x06 +#define MACH_O_MH_DSYM 0x0a + +/* A component of a fat file. A fat file starts with a + macho_header_fat followed by nfat_arch instances of this + struct. */ + +struct macho_fat_arch +{ + uint32_t cputype; /* CPU type */ + uint32_t cpusubtype; /* CPU subtype */ + uint32_t offset; /* File offset of this entry */ + uint32_t size; /* Size of this entry */ + uint32_t align; /* Alignment of this entry */ +}; + +/* A component of a 64-bit fat file. This is used if the magic field + is MAGIC_FAT_64. This is only used when some file size or file + offset is too large to represent in the 32-bit format. */ + +struct macho_fat_arch_64 +{ + uint32_t cputype; /* CPU type */ + uint32_t cpusubtype; /* CPU subtype */ + uint64_t offset; /* File offset of this entry */ + uint64_t size; /* Size of this entry */ + uint32_t align; /* Alignment of this entry */ + uint32_t reserved; /* Reserved */ +}; + +/* Values for the fat_arch cputype field (and the header cputype + field). */ + +#define MACH_O_CPU_ARCH_ABI64 0x01000000 + +#define MACH_O_CPU_TYPE_X86 7 +#define MACH_O_CPU_TYPE_ARM 12 +#define MACH_O_CPU_TYPE_PPC 18 + +#define MACH_O_CPU_TYPE_X86_64 (MACH_O_CPU_TYPE_X86 | MACH_O_CPU_ARCH_ABI64) +#define MACH_O_CPU_TYPE_ARM64 (MACH_O_CPU_TYPE_ARM | MACH_O_CPU_ARCH_ABI64) +#define MACH_O_CPU_TYPE_PPC64 (MACH_O_CPU_TYPE_PPC | MACH_O_CPU_ARCH_ABI64) + +/* The header of a load command. */ + +struct macho_load_command +{ + uint32_t cmd; /* The type of load command */ + uint32_t cmdsize; /* Size in bytes of the entire command */ +}; + +/* Values for the load_command cmd field. */ + +#define MACH_O_LC_SEGMENT 0x01 +#define MACH_O_LC_SYMTAB 0x02 +#define MACH_O_LC_SEGMENT_64 0x19 +#define MACH_O_LC_UUID 0x1b + +/* The length of a section of segment name. */ + +#define MACH_O_NAMELEN (16) + +/* LC_SEGMENT load command. */ + +struct macho_segment_command +{ + uint32_t cmd; /* The type of load command (LC_SEGMENT) */ + uint32_t cmdsize; /* Size in bytes of the entire command */ + char segname[MACH_O_NAMELEN]; /* Segment name */ + uint32_t vmaddr; /* Virtual memory address */ + uint32_t vmsize; /* Virtual memory size */ + uint32_t fileoff; /* Offset of data to be mapped */ + uint32_t filesize; /* Size of data in file */ + uint32_t maxprot; /* Maximum permitted virtual protection */ + uint32_t initprot; /* Initial virtual memory protection */ + uint32_t nsects; /* Number of sections in this segment */ + uint32_t flags; /* Flags */ +}; + +/* LC_SEGMENT_64 load command. */ + +struct macho_segment_64_command +{ + uint32_t cmd; /* The type of load command (LC_SEGMENT) */ + uint32_t cmdsize; /* Size in bytes of the entire command */ + char segname[MACH_O_NAMELEN]; /* Segment name */ + uint64_t vmaddr; /* Virtual memory address */ + uint64_t vmsize; /* Virtual memory size */ + uint64_t fileoff; /* Offset of data to be mapped */ + uint64_t filesize; /* Size of data in file */ + uint32_t maxprot; /* Maximum permitted virtual protection */ + uint32_t initprot; /* Initial virtual memory protection */ + uint32_t nsects; /* Number of sections in this segment */ + uint32_t flags; /* Flags */ +}; + +/* LC_SYMTAB load command. */ + +struct macho_symtab_command +{ + uint32_t cmd; /* The type of load command (LC_SEGMENT) */ + uint32_t cmdsize; /* Size in bytes of the entire command */ + uint32_t symoff; /* File offset of symbol table */ + uint32_t nsyms; /* Number of symbols */ + uint32_t stroff; /* File offset of string table */ + uint32_t strsize; /* String table size */ +}; + +/* The length of a Mach-O uuid. */ + +#define MACH_O_UUID_LEN (16) + +/* LC_UUID load command. */ + +struct macho_uuid_command +{ + uint32_t cmd; /* Type of load command (LC_UUID) */ + uint32_t cmdsize; /* Size in bytes of command */ + unsigned char uuid[MACH_O_UUID_LEN]; /* UUID */ +}; + +/* 32-bit section header within a LC_SEGMENT segment. */ + +struct macho_section +{ + char sectname[MACH_O_NAMELEN]; /* Section name */ + char segment[MACH_O_NAMELEN]; /* Segment of this section */ + uint32_t addr; /* Address in memory */ + uint32_t size; /* Section size */ + uint32_t offset; /* File offset */ + uint32_t align; /* Log2 of section alignment */ + uint32_t reloff; /* File offset of relocations */ + uint32_t nreloc; /* Number of relocs for this section */ + uint32_t flags; /* Flags */ + uint32_t reserved1; + uint32_t reserved2; +}; + +/* 64-bit section header within a LC_SEGMENT_64 segment. */ + +struct macho_section_64 +{ + char sectname[MACH_O_NAMELEN]; /* Section name */ + char segment[MACH_O_NAMELEN]; /* Segment of this section */ + uint64_t addr; /* Address in memory */ + uint64_t size; /* Section size */ + uint32_t offset; /* File offset */ + uint32_t align; /* Log2 of section alignment */ + uint32_t reloff; /* File offset of section relocations */ + uint32_t nreloc; /* Number of relocs for this section */ + uint32_t flags; /* Flags */ + uint32_t reserved1; + uint32_t reserved2; + uint32_t reserved3; +}; + +/* 32-bit symbol data. */ + +struct macho_nlist +{ + uint32_t n_strx; /* Index of name in string table */ + uint8_t n_type; /* Type flag */ + uint8_t n_sect; /* Section number */ + uint16_t n_desc; /* Stabs description field */ + uint32_t n_value; /* Value */ +}; + +/* 64-bit symbol data. */ + +struct macho_nlist_64 +{ + uint32_t n_strx; /* Index of name in string table */ + uint8_t n_type; /* Type flag */ + uint8_t n_sect; /* Section number */ + uint16_t n_desc; /* Stabs description field */ + uint64_t n_value; /* Value */ +}; + +/* Value found in nlist n_type field. */ + +#define MACH_O_N_STAB 0xe0 /* Stabs debugging symbol */ +#define MACH_O_N_TYPE 0x0e /* Mask for type bits */ + +/* Values found after masking with MACH_O_N_TYPE. */ +#define MACH_O_N_UNDF 0x00 /* Undefined symbol */ +#define MACH_O_N_ABS 0x02 /* Absolute symbol */ +#define MACH_O_N_SECT 0x0e /* Defined in section from n_sect field */ + + +/* Information we keep for a Mach-O symbol. */ + +struct macho_symbol +{ + const char *name; /* Symbol name */ + uintptr_t address; /* Symbol address */ +}; + +/* Information to pass to macho_syminfo. */ + +struct macho_syminfo_data +{ + struct macho_syminfo_data *next; /* Next module */ + struct macho_symbol *symbols; /* Symbols sorted by address */ + size_t count; /* Number of symbols */ +}; + +/* Names of sections, indexed by enum dwarf_section in internal.h. */ + +static const char * const dwarf_section_names[DEBUG_MAX] = +{ + "__debug_info", + "__debug_line", + "__debug_abbrev", + "__debug_ranges", + "__debug_str", + "", /* DEBUG_ADDR */ + "__debug_str_offs", + "", /* DEBUG_LINE_STR */ + "__debug_rnglists" +}; + +/* Forward declaration. */ + +static int macho_add (struct backtrace_state *, const char *, int, off_t, + const unsigned char *, struct libbacktrace_base_address, + int, backtrace_error_callback, void *, fileline *, + int *); + +/* A dummy callback function used when we can't find any debug info. */ + +static int +macho_nodebug (struct backtrace_state *state ATTRIBUTE_UNUSED, + uintptr_t pc ATTRIBUTE_UNUSED, + backtrace_full_callback callback ATTRIBUTE_UNUSED, + backtrace_error_callback error_callback, void *data) +{ + error_callback (data, "no debug info in Mach-O executable", -1); + return 0; +} + +/* A dummy callback function used when we can't find a symbol + table. */ + +static void +macho_nosyms (struct backtrace_state *state ATTRIBUTE_UNUSED, + uintptr_t addr ATTRIBUTE_UNUSED, + backtrace_syminfo_callback callback ATTRIBUTE_UNUSED, + backtrace_error_callback error_callback, void *data) +{ + error_callback (data, "no symbol table in Mach-O executable", -1); +} + +/* Add a single DWARF section to DWARF_SECTIONS, if we need the + section. Returns 1 on success, 0 on failure. */ + +static int +macho_add_dwarf_section (struct backtrace_state *state, int descriptor, + const char *sectname, uint32_t offset, uint64_t size, + backtrace_error_callback error_callback, void *data, + struct dwarf_sections *dwarf_sections) +{ + int i; + + for (i = 0; i < (int) DEBUG_MAX; ++i) + { + if (dwarf_section_names[i][0] != '\0' + && strncmp (sectname, dwarf_section_names[i], MACH_O_NAMELEN) == 0) + { + struct backtrace_view section_view; + + /* FIXME: Perhaps it would be better to try to use a single + view to read all the DWARF data, as we try to do for + ELF. */ + + if (!backtrace_get_view (state, descriptor, offset, size, + error_callback, data, §ion_view)) + return 0; + dwarf_sections->data[i] = (const unsigned char *) section_view.data; + dwarf_sections->size[i] = size; + break; + } + } + return 1; +} + +/* Collect DWARF sections from a DWARF segment. Returns 1 on success, + 0 on failure. */ + +static int +macho_add_dwarf_segment (struct backtrace_state *state, int descriptor, + off_t offset, unsigned int cmd, const char *psecs, + size_t sizesecs, unsigned int nsects, + backtrace_error_callback error_callback, void *data, + struct dwarf_sections *dwarf_sections) +{ + size_t sec_header_size; + size_t secoffset; + unsigned int i; + + switch (cmd) + { + case MACH_O_LC_SEGMENT: + sec_header_size = sizeof (struct macho_section); + break; + case MACH_O_LC_SEGMENT_64: + sec_header_size = sizeof (struct macho_section_64); + break; + default: + abort (); + } + + secoffset = 0; + for (i = 0; i < nsects; ++i) + { + if (secoffset + sec_header_size > sizesecs) + { + error_callback (data, "section overflow withing segment", 0); + return 0; + } + + switch (cmd) + { + case MACH_O_LC_SEGMENT: + { + struct macho_section section; + + memcpy (§ion, psecs + secoffset, sizeof section); + macho_add_dwarf_section (state, descriptor, section.sectname, + offset + section.offset, section.size, + error_callback, data, dwarf_sections); + } + break; + + case MACH_O_LC_SEGMENT_64: + { + struct macho_section_64 section; + + memcpy (§ion, psecs + secoffset, sizeof section); + macho_add_dwarf_section (state, descriptor, section.sectname, + offset + section.offset, section.size, + error_callback, data, dwarf_sections); + } + break; + + default: + abort (); + } + + secoffset += sec_header_size; + } + + return 1; +} + +/* Compare struct macho_symbol for qsort. */ + +static int +macho_symbol_compare (const void *v1, const void *v2) +{ + const struct macho_symbol *m1 = (const struct macho_symbol *) v1; + const struct macho_symbol *m2 = (const struct macho_symbol *) v2; + + if (m1->address < m2->address) + return -1; + else if (m1->address > m2->address) + return 1; + else + return 0; +} + +/* Compare an address against a macho_symbol for bsearch. We allocate + one extra entry in the array so that this can safely look at the + next entry. */ + +static int +macho_symbol_search (const void *vkey, const void *ventry) +{ + const uintptr_t *key = (const uintptr_t *) vkey; + const struct macho_symbol *entry = (const struct macho_symbol *) ventry; + uintptr_t addr; + + addr = *key; + if (addr < entry->address) + return -1; + else if (entry->name[0] == '\0' + && entry->address == ~(uintptr_t) 0) + return -1; + else if ((entry + 1)->name[0] == '\0' + && (entry + 1)->address == ~(uintptr_t) 0) + return -1; + else if (addr >= (entry + 1)->address) + return 1; + else + return 0; +} + +/* Return whether the symbol type field indicates a symbol table entry + that we care about: a function or data symbol. */ + +static int +macho_defined_symbol (uint8_t type) +{ + if ((type & MACH_O_N_STAB) != 0) + return 0; + switch (type & MACH_O_N_TYPE) + { + case MACH_O_N_UNDF: + return 0; + case MACH_O_N_ABS: + return 1; + case MACH_O_N_SECT: + return 1; + default: + return 0; + } +} + +/* Add symbol table information for a Mach-O file. */ + +static int +macho_add_symtab (struct backtrace_state *state, int descriptor, + struct libbacktrace_base_address base_address, int is_64, + off_t symoff, unsigned int nsyms, off_t stroff, + unsigned int strsize, + backtrace_error_callback error_callback, void *data) +{ + size_t symsize; + struct backtrace_view sym_view; + int sym_view_valid; + struct backtrace_view str_view; + int str_view_valid; + size_t ndefs; + size_t symtaboff; + unsigned int i; + size_t macho_symbol_size; + struct macho_symbol *macho_symbols; + unsigned int j; + struct macho_syminfo_data *sdata; + + sym_view_valid = 0; + str_view_valid = 0; + macho_symbol_size = 0; + macho_symbols = NULL; + + if (is_64) + symsize = sizeof (struct macho_nlist_64); + else + symsize = sizeof (struct macho_nlist); + + if (!backtrace_get_view (state, descriptor, symoff, nsyms * symsize, + error_callback, data, &sym_view)) + goto fail; + sym_view_valid = 1; + + if (!backtrace_get_view (state, descriptor, stroff, strsize, + error_callback, data, &str_view)) + return 0; + str_view_valid = 1; + + ndefs = 0; + symtaboff = 0; + for (i = 0; i < nsyms; ++i, symtaboff += symsize) + { + if (is_64) + { + struct macho_nlist_64 nlist; + + memcpy (&nlist, (const char *) sym_view.data + symtaboff, + sizeof nlist); + if (macho_defined_symbol (nlist.n_type)) + ++ndefs; + } + else + { + struct macho_nlist nlist; + + memcpy (&nlist, (const char *) sym_view.data + symtaboff, + sizeof nlist); + if (macho_defined_symbol (nlist.n_type)) + ++ndefs; + } + } + + /* Add 1 to ndefs to make room for a sentinel. */ + macho_symbol_size = (ndefs + 1) * sizeof (struct macho_symbol); + macho_symbols = ((struct macho_symbol *) + backtrace_alloc (state, macho_symbol_size, error_callback, + data)); + if (macho_symbols == NULL) + goto fail; + + j = 0; + symtaboff = 0; + for (i = 0; i < nsyms; ++i, symtaboff += symsize) + { + uint32_t strx; + uint64_t value; + const char *name; + + strx = 0; + value = 0; + if (is_64) + { + struct macho_nlist_64 nlist; + + memcpy (&nlist, (const char *) sym_view.data + symtaboff, + sizeof nlist); + if (!macho_defined_symbol (nlist.n_type)) + continue; + + strx = nlist.n_strx; + value = nlist.n_value; + } + else + { + struct macho_nlist nlist; + + memcpy (&nlist, (const char *) sym_view.data + symtaboff, + sizeof nlist); + if (!macho_defined_symbol (nlist.n_type)) + continue; + + strx = nlist.n_strx; + value = nlist.n_value; + } + + if (strx >= strsize) + { + error_callback (data, "symbol string index out of range", 0); + goto fail; + } + + name = (const char *) str_view.data + strx; + if (name[0] == '_') + ++name; + macho_symbols[j].name = name; + macho_symbols[j].address = libbacktrace_add_base (value, base_address); + ++j; + } + + sdata = ((struct macho_syminfo_data *) + backtrace_alloc (state, sizeof *sdata, error_callback, data)); + if (sdata == NULL) + goto fail; + + /* We need to keep the string table since it holds the names, but we + can release the symbol table. */ + + backtrace_release_view (state, &sym_view, error_callback, data); + sym_view_valid = 0; + str_view_valid = 0; + + /* Add a trailing sentinel symbol. */ + macho_symbols[j].name = ""; + macho_symbols[j].address = ~(uintptr_t) 0; + + backtrace_qsort (macho_symbols, ndefs + 1, sizeof (struct macho_symbol), + macho_symbol_compare); + + sdata->next = NULL; + sdata->symbols = macho_symbols; + sdata->count = ndefs; + + if (!state->threaded) + { + struct macho_syminfo_data **pp; + + for (pp = (struct macho_syminfo_data **) (void *) &state->syminfo_data; + *pp != NULL; + pp = &(*pp)->next) + ; + *pp = sdata; + } + else + { + while (1) + { + struct macho_syminfo_data **pp; + + pp = (struct macho_syminfo_data **) (void *) &state->syminfo_data; + + while (1) + { + struct macho_syminfo_data *p; + + p = backtrace_atomic_load_pointer (pp); + + if (p == NULL) + break; + + pp = &p->next; + } + + if (__sync_bool_compare_and_swap (pp, NULL, sdata)) + break; + } + } + + return 1; + + fail: + if (macho_symbols != NULL) + backtrace_free (state, macho_symbols, macho_symbol_size, + error_callback, data); + if (sym_view_valid) + backtrace_release_view (state, &sym_view, error_callback, data); + if (str_view_valid) + backtrace_release_view (state, &str_view, error_callback, data); + return 0; +} + +/* Return the symbol name and value for an ADDR. */ + +static void +macho_syminfo (struct backtrace_state *state, uintptr_t addr, + backtrace_syminfo_callback callback, + backtrace_error_callback error_callback ATTRIBUTE_UNUSED, + void *data) +{ + struct macho_syminfo_data *sdata; + struct macho_symbol *sym; + + sym = NULL; + if (!state->threaded) + { + for (sdata = (struct macho_syminfo_data *) state->syminfo_data; + sdata != NULL; + sdata = sdata->next) + { + sym = ((struct macho_symbol *) + bsearch (&addr, sdata->symbols, sdata->count, + sizeof (struct macho_symbol), macho_symbol_search)); + if (sym != NULL) + break; + } + } + else + { + struct macho_syminfo_data **pp; + + pp = (struct macho_syminfo_data **) (void *) &state->syminfo_data; + while (1) + { + sdata = backtrace_atomic_load_pointer (pp); + if (sdata == NULL) + break; + + sym = ((struct macho_symbol *) + bsearch (&addr, sdata->symbols, sdata->count, + sizeof (struct macho_symbol), macho_symbol_search)); + if (sym != NULL) + break; + + pp = &sdata->next; + } + } + + if (sym == NULL) + callback (data, addr, NULL, 0, 0); + else + callback (data, addr, sym->name, sym->address, 0); +} + +/* Look through a fat file to find the relevant executable. Returns 1 + on success, 0 on failure (in both cases descriptor is closed). */ + +static int +macho_add_fat (struct backtrace_state *state, const char *filename, + int descriptor, int swapped, off_t offset, + const unsigned char *match_uuid, + struct libbacktrace_base_address base_address, + int skip_symtab, uint32_t nfat_arch, int is_64, + backtrace_error_callback error_callback, void *data, + fileline *fileline_fn, int *found_sym) +{ + int arch_view_valid; + unsigned int cputype; + size_t arch_size; + struct backtrace_view arch_view; + unsigned int i; + + arch_view_valid = 0; + +#if defined (__x86_64__) + cputype = MACH_O_CPU_TYPE_X86_64; +#elif defined (__i386__) + cputype = MACH_O_CPU_TYPE_X86; +#elif defined (__aarch64__) + cputype = MACH_O_CPU_TYPE_ARM64; +#elif defined (__arm__) + cputype = MACH_O_CPU_TYPE_ARM; +#elif defined (__ppc__) + cputype = MACH_O_CPU_TYPE_PPC; +#elif defined (__ppc64__) + cputype = MACH_O_CPU_TYPE_PPC64; +#else + error_callback (data, "unknown Mach-O architecture", 0); + goto fail; +#endif + + if (is_64) + arch_size = sizeof (struct macho_fat_arch_64); + else + arch_size = sizeof (struct macho_fat_arch); + + if (!backtrace_get_view (state, descriptor, offset, + nfat_arch * arch_size, + error_callback, data, &arch_view)) + goto fail; + + for (i = 0; i < nfat_arch; ++i) + { + uint32_t fcputype; + uint64_t foffset; + + if (is_64) + { + struct macho_fat_arch_64 fat_arch_64; + + memcpy (&fat_arch_64, + (const char *) arch_view.data + i * arch_size, + arch_size); + fcputype = fat_arch_64.cputype; + foffset = fat_arch_64.offset; + if (swapped) + { + fcputype = __builtin_bswap32 (fcputype); + foffset = __builtin_bswap64 (foffset); + } + } + else + { + struct macho_fat_arch fat_arch_32; + + memcpy (&fat_arch_32, + (const char *) arch_view.data + i * arch_size, + arch_size); + fcputype = fat_arch_32.cputype; + foffset = (uint64_t) fat_arch_32.offset; + if (swapped) + { + fcputype = __builtin_bswap32 (fcputype); + foffset = (uint64_t) __builtin_bswap32 ((uint32_t) foffset); + } + } + + if (fcputype == cputype) + { + /* FIXME: What about cpusubtype? */ + backtrace_release_view (state, &arch_view, error_callback, data); + return macho_add (state, filename, descriptor, foffset, match_uuid, + base_address, skip_symtab, error_callback, data, + fileline_fn, found_sym); + } + } + + error_callback (data, "could not find executable in fat file", 0); + + fail: + if (arch_view_valid) + backtrace_release_view (state, &arch_view, error_callback, data); + if (descriptor != -1) + backtrace_close (descriptor, error_callback, data); + return 0; +} + +/* Look for the dsym file for FILENAME. This is called if FILENAME + does not have debug info or a symbol table. Returns 1 on success, + 0 on failure. */ + +static int +macho_add_dsym (struct backtrace_state *state, const char *filename, + struct libbacktrace_base_address base_address, + const unsigned char *uuid, + backtrace_error_callback error_callback, void *data, + fileline* fileline_fn) +{ + const char *p; + const char *dirname; + char *diralc; + size_t dirnamelen; + const char *basename; + size_t basenamelen; + const char *dsymsuffixdir; + size_t dsymsuffixdirlen; + size_t dsymlen; + char *dsym; + char *ps; + int d; + int does_not_exist; + int dummy_found_sym; + + diralc = NULL; + dirnamelen = 0; + dsym = NULL; + dsymlen = 0; + + p = strrchr (filename, '/'); + if (p == NULL) + { + dirname = "."; + dirnamelen = 1; + basename = filename; + basenamelen = strlen (basename); + diralc = NULL; + } + else + { + dirnamelen = p - filename; + diralc = (char*)backtrace_alloc (state, dirnamelen + 1, error_callback, data); + if (diralc == NULL) + goto fail; + memcpy (diralc, filename, dirnamelen); + diralc[dirnamelen] = '\0'; + dirname = diralc; + basename = p + 1; + basenamelen = strlen (basename); + } + + dsymsuffixdir = ".dSYM/Contents/Resources/DWARF/"; + dsymsuffixdirlen = strlen (dsymsuffixdir); + + dsymlen = (dirnamelen + + 1 + + basenamelen + + dsymsuffixdirlen + + basenamelen + + 1); + dsym = (char*)backtrace_alloc (state, dsymlen, error_callback, data); + if (dsym == NULL) + goto fail; + + ps = dsym; + memcpy (ps, dirname, dirnamelen); + ps += dirnamelen; + *ps++ = '/'; + memcpy (ps, basename, basenamelen); + ps += basenamelen; + memcpy (ps, dsymsuffixdir, dsymsuffixdirlen); + ps += dsymsuffixdirlen; + memcpy (ps, basename, basenamelen); + ps += basenamelen; + *ps = '\0'; + + if (diralc != NULL) + { + backtrace_free (state, diralc, dirnamelen + 1, error_callback, data); + diralc = NULL; + } + + d = backtrace_open (dsym, error_callback, data, &does_not_exist); + if (d < 0) + { + /* The file does not exist, so we can't read the debug info. + Just return success. */ + backtrace_free (state, dsym, dsymlen, error_callback, data); + return 1; + } + + if (!macho_add (state, dsym, d, 0, uuid, base_address, 1, + error_callback, data, fileline_fn, &dummy_found_sym)) + goto fail; + + backtrace_free (state, dsym, dsymlen, error_callback, data); + + return 1; + + fail: + if (dsym != NULL) + backtrace_free (state, dsym, dsymlen, error_callback, data); + if (diralc != NULL) + backtrace_free (state, diralc, dirnamelen, error_callback, data); + return 0; +} + +/* Add the backtrace data for a Macho-O file. Returns 1 on success, 0 + on failure (in both cases descriptor is closed). + + FILENAME: the name of the executable. + DESCRIPTOR: an open descriptor for the executable, closed here. + OFFSET: the offset within the file of this executable, for fat files. + MATCH_UUID: if not NULL, UUID that must match. + BASE_ADDRESS: the load address of the executable. + SKIP_SYMTAB: if non-zero, ignore the symbol table; used for dSYM files. + FILELINE_FN: set to the fileline function, by backtrace_dwarf_add. + FOUND_SYM: set to non-zero if we found the symbol table. +*/ + +static int +macho_add (struct backtrace_state *state, const char *filename, int descriptor, + off_t offset, const unsigned char *match_uuid, + struct libbacktrace_base_address base_address, int skip_symtab, + backtrace_error_callback error_callback, void *data, + fileline *fileline_fn, int *found_sym) +{ + struct backtrace_view header_view; + struct macho_header_32 header; + off_t hdroffset; + int is_64; + struct backtrace_view cmds_view; + int cmds_view_valid; + struct dwarf_sections dwarf_sections; + int have_dwarf; + unsigned char uuid[MACH_O_UUID_LEN]; + int have_uuid; + size_t cmdoffset; + unsigned int i; + + *found_sym = 0; + + cmds_view_valid = 0; + + /* The 32-bit and 64-bit file headers start out the same, so we can + just always read the 32-bit version. A fat header is shorter but + it will always be followed by data, so it's OK to read extra. */ + + if (!backtrace_get_view (state, descriptor, offset, + sizeof (struct macho_header_32), + error_callback, data, &header_view)) + goto fail; + + memcpy (&header, header_view.data, sizeof header); + + backtrace_release_view (state, &header_view, error_callback, data); + + switch (header.magic) + { + case MACH_O_MH_MAGIC_32: + is_64 = 0; + hdroffset = offset + sizeof (struct macho_header_32); + break; + case MACH_O_MH_MAGIC_64: + is_64 = 1; + hdroffset = offset + sizeof (struct macho_header_64); + break; + case MACH_O_MH_MAGIC_FAT: + case MACH_O_MH_MAGIC_FAT_64: + { + struct macho_header_fat fat_header; + + hdroffset = offset + sizeof (struct macho_header_fat); + memcpy (&fat_header, &header, sizeof fat_header); + return macho_add_fat (state, filename, descriptor, 0, hdroffset, + match_uuid, base_address, skip_symtab, + fat_header.nfat_arch, + header.magic == MACH_O_MH_MAGIC_FAT_64, + error_callback, data, fileline_fn, found_sym); + } + case MACH_O_MH_CIGAM_FAT: + case MACH_O_MH_CIGAM_FAT_64: + { + struct macho_header_fat fat_header; + uint32_t nfat_arch; + + hdroffset = offset + sizeof (struct macho_header_fat); + memcpy (&fat_header, &header, sizeof fat_header); + nfat_arch = __builtin_bswap32 (fat_header.nfat_arch); + return macho_add_fat (state, filename, descriptor, 1, hdroffset, + match_uuid, base_address, skip_symtab, + nfat_arch, + header.magic == MACH_O_MH_CIGAM_FAT_64, + error_callback, data, fileline_fn, found_sym); + } + default: + error_callback (data, "executable file is not in Mach-O format", 0); + goto fail; + } + + switch (header.filetype) + { + case MACH_O_MH_EXECUTE: + case MACH_O_MH_DYLIB: + case MACH_O_MH_DSYM: + break; + default: + error_callback (data, "executable file is not an executable", 0); + goto fail; + } + + if (!backtrace_get_view (state, descriptor, hdroffset, header.sizeofcmds, + error_callback, data, &cmds_view)) + goto fail; + cmds_view_valid = 1; + + memset (&dwarf_sections, 0, sizeof dwarf_sections); + have_dwarf = 0; + memset (&uuid, 0, sizeof uuid); + have_uuid = 0; + + cmdoffset = 0; + for (i = 0; i < header.ncmds; ++i) + { + const char *pcmd; + struct macho_load_command load_command; + + if (cmdoffset + sizeof load_command > header.sizeofcmds) + break; + + pcmd = (const char *) cmds_view.data + cmdoffset; + memcpy (&load_command, pcmd, sizeof load_command); + + switch (load_command.cmd) + { + case MACH_O_LC_SEGMENT: + { + struct macho_segment_command segcmd; + + memcpy (&segcmd, pcmd, sizeof segcmd); + if (memcmp (segcmd.segname, + "__DWARF\0\0\0\0\0\0\0\0\0", + MACH_O_NAMELEN) == 0) + { + if (!macho_add_dwarf_segment (state, descriptor, offset, + load_command.cmd, + pcmd + sizeof segcmd, + (load_command.cmdsize + - sizeof segcmd), + segcmd.nsects, error_callback, + data, &dwarf_sections)) + goto fail; + have_dwarf = 1; + } + } + break; + + case MACH_O_LC_SEGMENT_64: + { + struct macho_segment_64_command segcmd; + + memcpy (&segcmd, pcmd, sizeof segcmd); + if (memcmp (segcmd.segname, + "__DWARF\0\0\0\0\0\0\0\0\0", + MACH_O_NAMELEN) == 0) + { + if (!macho_add_dwarf_segment (state, descriptor, offset, + load_command.cmd, + pcmd + sizeof segcmd, + (load_command.cmdsize + - sizeof segcmd), + segcmd.nsects, error_callback, + data, &dwarf_sections)) + goto fail; + have_dwarf = 1; + } + } + break; + + case MACH_O_LC_SYMTAB: + if (!skip_symtab) + { + struct macho_symtab_command symcmd; + + memcpy (&symcmd, pcmd, sizeof symcmd); + if (!macho_add_symtab (state, descriptor, base_address, is_64, + offset + symcmd.symoff, symcmd.nsyms, + offset + symcmd.stroff, symcmd.strsize, + error_callback, data)) + goto fail; + + *found_sym = 1; + } + break; + + case MACH_O_LC_UUID: + { + struct macho_uuid_command uuidcmd; + + memcpy (&uuidcmd, pcmd, sizeof uuidcmd); + memcpy (&uuid[0], &uuidcmd.uuid[0], MACH_O_UUID_LEN); + have_uuid = 1; + } + break; + + default: + break; + } + + cmdoffset += load_command.cmdsize; + } + + if (!backtrace_close (descriptor, error_callback, data)) + goto fail; + descriptor = -1; + + backtrace_release_view (state, &cmds_view, error_callback, data); + cmds_view_valid = 0; + + if (match_uuid != NULL) + { + /* If we don't have a UUID, or it doesn't match, just ignore + this file. */ + if (!have_uuid + || memcmp (match_uuid, &uuid[0], MACH_O_UUID_LEN) != 0) + return 1; + } + + if (have_dwarf) + { + int is_big_endian; + + is_big_endian = 0; +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + is_big_endian = 1; +#endif +#endif + + if (!backtrace_dwarf_add (state, base_address, &dwarf_sections, + is_big_endian, NULL, error_callback, data, + fileline_fn, NULL)) + goto fail; + } + + if (!have_dwarf && have_uuid) + { + if (!macho_add_dsym (state, filename, base_address, &uuid[0], + error_callback, data, fileline_fn)) + goto fail; + } + + return 1; + + fail: + if (cmds_view_valid) + backtrace_release_view (state, &cmds_view, error_callback, data); + if (descriptor != -1) + backtrace_close (descriptor, error_callback, data); + return 0; +} + +#ifdef HAVE_MACH_O_DYLD_H + +/* Initialize the backtrace data we need from a Mach-O executable + using the dyld support functions. This closes descriptor. */ + +int +backtrace_initialize (struct backtrace_state *state, const char *filename, + int descriptor, backtrace_error_callback error_callback, + void *data, fileline *fileline_fn) +{ + uint32_t c; + uint32_t i; + int closed_descriptor; + int found_sym; + fileline macho_fileline_fn; + + closed_descriptor = 0; + found_sym = 0; + macho_fileline_fn = macho_nodebug; + + c = _dyld_image_count (); + for (i = 0; i < c; ++i) + { + struct libbacktrace_base_address base_address; + const char *name; + int d; + fileline mff; + int mfs; + + name = _dyld_get_image_name (i); + if (name == NULL) + continue; + + if (strcmp (name, filename) == 0 && !closed_descriptor) + { + d = descriptor; + closed_descriptor = 1; + } + else + { + int does_not_exist; + + d = backtrace_open (name, error_callback, data, &does_not_exist); + if (d < 0) + continue; + } + + base_address.m = _dyld_get_image_vmaddr_slide (i); + + mff = macho_nodebug; + if (!macho_add (state, name, d, 0, NULL, base_address, 0, + error_callback, data, &mff, &mfs)) + continue; + + if (mff != macho_nodebug) + macho_fileline_fn = mff; + if (mfs) + found_sym = 1; + } + + if (!closed_descriptor) + backtrace_close (descriptor, error_callback, data); + + if (!state->threaded) + { + if (found_sym) + state->syminfo_fn = macho_syminfo; + else if (state->syminfo_fn == NULL) + state->syminfo_fn = macho_nosyms; + } + else + { + if (found_sym) + backtrace_atomic_store_pointer (&state->syminfo_fn, &macho_syminfo); + else + (void) __sync_bool_compare_and_swap (&state->syminfo_fn, NULL, + macho_nosyms); + } + + if (!state->threaded) + *fileline_fn = state->fileline_fn; + else + *fileline_fn = backtrace_atomic_load_pointer (&state->fileline_fn); + + if (*fileline_fn == NULL || *fileline_fn == macho_nodebug) + *fileline_fn = macho_fileline_fn; + + return 1; +} + +#else /* !defined (HAVE_MACH_O_DYLD_H) */ + +/* Initialize the backtrace data we need from a Mach-O executable + without using the dyld support functions. This closes + descriptor. */ + +int +backtrace_initialize (struct backtrace_state *state, const char *filename, + int descriptor, backtrace_error_callback error_callback, + void *data, fileline *fileline_fn) +{ + fileline macho_fileline_fn; + struct libbacktrace_base_address zero_base_address; + int found_sym; + + macho_fileline_fn = macho_nodebug; + memset (&zero_base_address, 0, sizeof zero_base_address); + if (!macho_add (state, filename, descriptor, 0, NULL, zero_base_address, 0, + error_callback, data, &macho_fileline_fn, &found_sym)) + return 0; + + if (!state->threaded) + { + if (found_sym) + state->syminfo_fn = macho_syminfo; + else if (state->syminfo_fn == NULL) + state->syminfo_fn = macho_nosyms; + } + else + { + if (found_sym) + backtrace_atomic_store_pointer (&state->syminfo_fn, &macho_syminfo); + else + (void) __sync_bool_compare_and_swap (&state->syminfo_fn, NULL, + macho_nosyms); + } + + if (!state->threaded) + *fileline_fn = state->fileline_fn; + else + *fileline_fn = backtrace_atomic_load_pointer (&state->fileline_fn); + + if (*fileline_fn == NULL || *fileline_fn == macho_nodebug) + *fileline_fn = macho_fileline_fn; + + return 1; +} + +#endif /* !defined (HAVE_MACH_O_DYLD_H) */ + +} diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/mmapio.cpp b/project/thirdparty/tracy-0.11.1/libbacktrace/mmapio.cpp new file mode 100644 index 000000000..0e8f599bb --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/libbacktrace/mmapio.cpp @@ -0,0 +1,115 @@ +/* mmapio.c -- File views using mmap. + Copyright (C) 2012-2021 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +#include "config.h" + +#include +#include +#include +#include + +#include "backtrace.hpp" +#include "internal.hpp" + +#ifndef HAVE_DECL_GETPAGESIZE +extern int getpagesize (void); +#endif + +#ifndef MAP_FAILED +#define MAP_FAILED ((void *)-1) +#endif + +namespace tracy +{ + +/* This file implements file views and memory allocation when mmap is + available. */ + +/* Create a view of SIZE bytes from DESCRIPTOR at OFFSET. */ + +int +backtrace_get_view (struct backtrace_state *state ATTRIBUTE_UNUSED, + int descriptor, off_t offset, uint64_t size, + backtrace_error_callback error_callback, + void *data, struct backtrace_view *view) +{ + size_t pagesize; + unsigned int inpage; + off_t pageoff; + void *map; + + if ((uint64_t) (size_t) size != size) + { + error_callback (data, "file size too large", 0); + return 0; + } + + pagesize = getpagesize (); + inpage = offset % pagesize; + pageoff = offset - inpage; + + size += inpage; + size = (size + (pagesize - 1)) & ~ (pagesize - 1); + + map = mmap (NULL, size, PROT_READ, MAP_PRIVATE, descriptor, pageoff); + if (map == MAP_FAILED) + { + error_callback (data, "mmap", errno); + return 0; + } + + view->data = (char *) map + inpage; + view->base = map; + view->len = size; + + return 1; +} + +/* Release a view read by backtrace_get_view. */ + +void +backtrace_release_view (struct backtrace_state *state ATTRIBUTE_UNUSED, + struct backtrace_view *view, + backtrace_error_callback error_callback, + void *data) +{ + union { + const void *cv; + void *v; + } cc; + + cc.cv = view->base; + if (munmap (cc.v, view->len) < 0) + error_callback (data, "munmap", errno); +} + +} diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/posix.cpp b/project/thirdparty/tracy-0.11.1/libbacktrace/posix.cpp new file mode 100644 index 000000000..8233a8ea3 --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/libbacktrace/posix.cpp @@ -0,0 +1,109 @@ +/* posix.c -- POSIX file I/O routines for the backtrace library. + Copyright (C) 2012-2021 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +#include "config.h" + +#include +#include +#include +#include +#include + +#include "backtrace.hpp" +#include "internal.hpp" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +#ifndef O_CLOEXEC +#define O_CLOEXEC 0 +#endif + +#ifndef FD_CLOEXEC +#define FD_CLOEXEC 1 +#endif + +namespace tracy +{ + +/* Open a file for reading. */ + +int +backtrace_open (const char *filename, backtrace_error_callback error_callback, + void *data, int *does_not_exist) +{ + int descriptor; + + if (does_not_exist != NULL) + *does_not_exist = 0; + + descriptor = open (filename, (int) (O_RDONLY | O_BINARY | O_CLOEXEC)); + if (descriptor < 0) + { + /* If DOES_NOT_EXIST is not NULL, then don't call ERROR_CALLBACK + if the file does not exist. We treat lacking permission to + open the file as the file not existing; this case arises when + running the libgo syscall package tests as root. */ + if (does_not_exist != NULL && (errno == ENOENT || errno == EACCES)) + *does_not_exist = 1; + else + error_callback (data, filename, errno); + return -1; + } + +#ifdef HAVE_FCNTL + /* Set FD_CLOEXEC just in case the kernel does not support + O_CLOEXEC. It doesn't matter if this fails for some reason. + FIXME: At some point it should be safe to only do this if + O_CLOEXEC == 0. */ + fcntl (descriptor, F_SETFD, FD_CLOEXEC); +#endif + + return descriptor; +} + +/* Close DESCRIPTOR. */ + +int +backtrace_close (int descriptor, backtrace_error_callback error_callback, + void *data) +{ + if (close (descriptor) < 0) + { + error_callback (data, "close", errno); + return 0; + } + return 1; +} + +} diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/sort.cpp b/project/thirdparty/tracy-0.11.1/libbacktrace/sort.cpp new file mode 100644 index 000000000..6daee0a64 --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/libbacktrace/sort.cpp @@ -0,0 +1,113 @@ +/* sort.c -- Sort without allocating memory + Copyright (C) 2012-2021 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +#include "config.h" + +#include +#include + +#include "backtrace.hpp" +#include "internal.hpp" + +namespace tracy +{ + +/* The GNU glibc version of qsort allocates memory, which we must not + do if we are invoked by a signal handler. So provide our own + sort. */ + +static void +swap (char *a, char *b, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++, a++, b++) + { + char t; + + t = *a; + *a = *b; + *b = t; + } +} + +void +backtrace_qsort (void *basearg, size_t count, size_t size, + int (*compar) (const void *, const void *)) +{ + char *base = (char *) basearg; + size_t i; + size_t mid; + + tail_recurse: + if (count < 2) + return; + + /* The symbol table and DWARF tables, which is all we use this + routine for, tend to be roughly sorted. Pick the middle element + in the array as our pivot point, so that we are more likely to + cut the array in half for each recursion step. */ + swap (base, base + (count / 2) * size, size); + + mid = 0; + for (i = 1; i < count; i++) + { + if ((*compar) (base, base + i * size) > 0) + { + ++mid; + if (i != mid) + swap (base + mid * size, base + i * size, size); + } + } + + if (mid > 0) + swap (base, base + mid * size, size); + + /* Recurse with the smaller array, loop with the larger one. That + ensures that our maximum stack depth is log count. */ + if (2 * mid < count) + { + backtrace_qsort (base, mid, size, compar); + base += (mid + 1) * size; + count -= mid + 1; + goto tail_recurse; + } + else + { + backtrace_qsort (base + (mid + 1) * size, count - (mid + 1), + size, compar); + count = mid; + goto tail_recurse; + } +} + +} diff --git a/project/thirdparty/tracy-0.11.1/libbacktrace/state.cpp b/project/thirdparty/tracy-0.11.1/libbacktrace/state.cpp new file mode 100644 index 000000000..ea3c137c5 --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/libbacktrace/state.cpp @@ -0,0 +1,76 @@ +/* state.c -- Create the backtrace state. + Copyright (C) 2012-2021 Free Software Foundation, Inc. + Written by Ian Lance Taylor, Google. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + (1) Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + (2) Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + (3) The name of the author may not be used to + endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. */ + +#include "config.h" + +#include +#include + +#include "backtrace.hpp" +#include "internal.hpp" + +namespace tracy +{ + +/* Create the backtrace state. This will then be passed to all the + other routines. */ + +struct backtrace_state * +backtrace_create_state (const char *filename, int threaded, + backtrace_error_callback error_callback, + void *data) +{ + struct backtrace_state init_state; + struct backtrace_state *state; + +#ifndef HAVE_SYNC_FUNCTIONS + if (threaded) + { + error_callback (data, "backtrace library does not support threads", 0); + return NULL; + } +#endif + + memset (&init_state, 0, sizeof init_state); + init_state.filename = filename; + init_state.threaded = threaded; + + state = ((struct backtrace_state *) + backtrace_alloc (&init_state, sizeof *state, error_callback, data)); + if (state == NULL) + return NULL; + *state = init_state; + + return state; +} + +} diff --git a/project/thirdparty/tracy-0.11.1/tracy/Tracy.hpp b/project/thirdparty/tracy-0.11.1/tracy/Tracy.hpp new file mode 100644 index 000000000..e75d02ce9 --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/tracy/Tracy.hpp @@ -0,0 +1,300 @@ +#ifndef __TRACY_HPP__ +#define __TRACY_HPP__ + +#include "../common/TracyColor.hpp" +#include "../common/TracySystem.hpp" + +#ifndef TracyFunction +# define TracyFunction __FUNCTION__ +#endif + +#ifndef TracyFile +# define TracyFile __FILE__ +#endif + +#ifndef TracyLine +# define TracyLine __LINE__ +#endif + +#ifndef TRACY_ENABLE + +#define TracyNoop + +#define ZoneNamed(x,y) +#define ZoneNamedN(x,y,z) +#define ZoneNamedC(x,y,z) +#define ZoneNamedNC(x,y,z,w) + +#define ZoneTransient(x,y) +#define ZoneTransientN(x,y,z) + +#define ZoneScoped +#define ZoneScopedN(x) +#define ZoneScopedC(x) +#define ZoneScopedNC(x,y) + +#define ZoneText(x,y) +#define ZoneTextV(x,y,z) +#define ZoneTextF(x,...) +#define ZoneTextVF(x,y,...) +#define ZoneName(x,y) +#define ZoneNameV(x,y,z) +#define ZoneNameF(x,...) +#define ZoneNameVF(x,y,...) +#define ZoneColor(x) +#define ZoneColorV(x,y) +#define ZoneValue(x) +#define ZoneValueV(x,y) +#define ZoneIsActive false +#define ZoneIsActiveV(x) false + +#define FrameMark +#define FrameMarkNamed(x) +#define FrameMarkStart(x) +#define FrameMarkEnd(x) + +#define FrameImage(x,y,z,w,a) + +#define TracyLockable( type, varname ) type varname +#define TracyLockableN( type, varname, desc ) type varname +#define TracySharedLockable( type, varname ) type varname +#define TracySharedLockableN( type, varname, desc ) type varname +#define LockableBase( type ) type +#define SharedLockableBase( type ) type +#define LockMark(x) (void)x +#define LockableName(x,y,z) + +#define TracyPlot(x,y) +#define TracyPlotConfig(x,y,z,w,a) + +#define TracyMessage(x,y) +#define TracyMessageL(x) +#define TracyMessageC(x,y,z) +#define TracyMessageLC(x,y) +#define TracyAppInfo(x,y) + +#define TracyAlloc(x,y) +#define TracyFree(x) +#define TracySecureAlloc(x,y) +#define TracySecureFree(x) + +#define TracyAllocN(x,y,z) +#define TracyFreeN(x,y) +#define TracySecureAllocN(x,y,z) +#define TracySecureFreeN(x,y) + +#define ZoneNamedS(x,y,z) +#define ZoneNamedNS(x,y,z,w) +#define ZoneNamedCS(x,y,z,w) +#define ZoneNamedNCS(x,y,z,w,a) + +#define ZoneTransientS(x,y,z) +#define ZoneTransientNS(x,y,z,w) + +#define ZoneScopedS(x) +#define ZoneScopedNS(x,y) +#define ZoneScopedCS(x,y) +#define ZoneScopedNCS(x,y,z) + +#define TracyAllocS(x,y,z) +#define TracyFreeS(x,y) +#define TracySecureAllocS(x,y,z) +#define TracySecureFreeS(x,y) + +#define TracyAllocNS(x,y,z,w) +#define TracyFreeNS(x,y,z) +#define TracySecureAllocNS(x,y,z,w) +#define TracySecureFreeNS(x,y,z) + +#define TracyMessageS(x,y,z) +#define TracyMessageLS(x,y) +#define TracyMessageCS(x,y,z,w) +#define TracyMessageLCS(x,y,z) + +#define TracySourceCallbackRegister(x,y) +#define TracyParameterRegister(x,y) +#define TracyParameterSetup(x,y,z,w) +#define TracyIsConnected false +#define TracyIsStarted false +#define TracySetProgramName(x) + +#define TracyFiberEnter(x) +#define TracyFiberEnterHint(x,y) +#define TracyFiberLeave + +#else + +#include + +#include "../client/TracyLock.hpp" +#include "../client/TracyProfiler.hpp" +#include "../client/TracyScoped.hpp" + +#define TracyNoop tracy::ProfilerAvailable() + +#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK +# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) +# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ) + +# define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, TRACY_CALLSTACK, active ) +# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active ) +# define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, TRACY_CALLSTACK, active ) +#else +# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) +# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) +# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) +# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), active ) + +# define ZoneTransient( varname, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, active ) +# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active ) +# define ZoneTransientNC( varname, name, color, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), color, active ) +#endif + +#define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true ) +#define ZoneScopedN( name ) ZoneNamedN( ___tracy_scoped_zone, name, true ) +#define ZoneScopedC( color ) ZoneNamedC( ___tracy_scoped_zone, color, true ) +#define ZoneScopedNC( name, color ) ZoneNamedNC( ___tracy_scoped_zone, name, color, true ) + +#define ZoneText( txt, size ) ___tracy_scoped_zone.Text( txt, size ) +#define ZoneTextV( varname, txt, size ) varname.Text( txt, size ) +#define ZoneTextF( fmt, ... ) ___tracy_scoped_zone.TextFmt( fmt, ##__VA_ARGS__ ) +#define ZoneTextVF( varname, fmt, ... ) varname.TextFmt( fmt, ##__VA_ARGS__ ) +#define ZoneName( txt, size ) ___tracy_scoped_zone.Name( txt, size ) +#define ZoneNameV( varname, txt, size ) varname.Name( txt, size ) +#define ZoneNameF( fmt, ... ) ___tracy_scoped_zone.NameFmt( fmt, ##__VA_ARGS__ ) +#define ZoneNameVF( varname, fmt, ... ) varname.NameFmt( fmt, ##__VA_ARGS__ ) +#define ZoneColor( color ) ___tracy_scoped_zone.Color( color ) +#define ZoneColorV( varname, color ) varname.Color( color ) +#define ZoneValue( value ) ___tracy_scoped_zone.Value( value ) +#define ZoneValueV( varname, value ) varname.Value( value ) +#define ZoneIsActive ___tracy_scoped_zone.IsActive() +#define ZoneIsActiveV( varname ) varname.IsActive() + +#define FrameMark tracy::Profiler::SendFrameMark( nullptr ) +#define FrameMarkNamed( name ) tracy::Profiler::SendFrameMark( name ) +#define FrameMarkStart( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ) +#define FrameMarkEnd( name ) tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ) + +#define FrameImage( image, width, height, offset, flip ) tracy::Profiler::SendFrameImage( image, width, height, offset, flip ) + +#define TracyLockable( type, varname ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, TracyFile, TracyLine, 0 }; return &srcloc; }() } +#define TracyLockableN( type, varname, desc ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() } +#define TracySharedLockable( type, varname ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, TracyFile, TracyLine, 0 }; return &srcloc; }() } +#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, TracyFile, TracyLine, 0 }; return &srcloc; }() } +#define LockableBase( type ) tracy::Lockable +#define SharedLockableBase( type ) tracy::SharedLockable +#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; varname.Mark( &__tracy_lock_location_##varname ) +#define LockableName( varname, txt, size ) varname.CustomName( txt, size ) + +#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val ) +#define TracyPlotConfig( name, type, step, fill, color ) tracy::Profiler::ConfigurePlot( name, type, step, fill, color ) + +#define TracyAppInfo( txt, size ) tracy::Profiler::MessageAppInfo( txt, size ) + +#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK +# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, TRACY_CALLSTACK ) +# define TracyMessageL( txt ) tracy::Profiler::Message( txt, TRACY_CALLSTACK ) +# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK ) +# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK ) + +# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false ) +# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false ) +# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true ) +# define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true ) + +# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name ) +# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name ) +# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name ) +# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name ) +#else +# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, 0 ) +# define TracyMessageL( txt ) tracy::Profiler::Message( txt, 0 ) +# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, 0 ) +# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, 0 ) + +# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, false ) +# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr, false ) +# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, true ) +# define TracySecureFree( ptr ) tracy::Profiler::MemFree( ptr, true ) + +# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, false, name ) +# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, false, name ) +# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, true, name ) +# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, true, name ) +#endif + +#ifdef TRACY_HAS_CALLSTACK +# define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +# define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +# define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { nullptr, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) +# define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,TracyLine), depth, active ) + +# define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), nullptr, 0, depth, active ) +# define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active ) + +# define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true ) +# define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true ) +# define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true ) +# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true ) + +# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false ) +# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false ) +# define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true ) +# define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true ) + +# define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name ) +# define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name ) +# define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name ) +# define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name ) + +# define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth ) +# define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth ) +# define TracyMessageCS( txt, size, color, depth ) tracy::Profiler::MessageColor( txt, size, color, depth ) +# define TracyMessageLCS( txt, color, depth ) tracy::Profiler::MessageColor( txt, color, depth ) +#else +# define ZoneNamedS( varname, depth, active ) ZoneNamed( varname, active ) +# define ZoneNamedNS( varname, name, depth, active ) ZoneNamedN( varname, name, active ) +# define ZoneNamedCS( varname, color, depth, active ) ZoneNamedC( varname, color, active ) +# define ZoneNamedNCS( varname, name, color, depth, active ) ZoneNamedNC( varname, name, color, active ) + +# define ZoneTransientS( varname, depth, active ) ZoneTransient( varname, active ) +# define ZoneTransientNS( varname, name, depth, active ) ZoneTransientN( varname, name, active ) + +# define ZoneScopedS( depth ) ZoneScoped +# define ZoneScopedNS( name, depth ) ZoneScopedN( name ) +# define ZoneScopedCS( color, depth ) ZoneScopedC( color ) +# define ZoneScopedNCS( name, color, depth ) ZoneScopedNC( name, color ) + +# define TracyAllocS( ptr, size, depth ) TracyAlloc( ptr, size ) +# define TracyFreeS( ptr, depth ) TracyFree( ptr ) +# define TracySecureAllocS( ptr, size, depth ) TracySecureAlloc( ptr, size ) +# define TracySecureFreeS( ptr, depth ) TracySecureFree( ptr ) + +# define TracyAllocNS( ptr, size, depth, name ) TracyAllocN( ptr, size, name ) +# define TracyFreeNS( ptr, depth, name ) TracyFreeN( ptr, name ) +# define TracySecureAllocNS( ptr, size, depth, name ) TracySecureAllocN( ptr, size, name ) +# define TracySecureFreeNS( ptr, depth, name ) TracySecureFreeN( ptr, name ) + +# define TracyMessageS( txt, size, depth ) TracyMessage( txt, size ) +# define TracyMessageLS( txt, depth ) TracyMessageL( txt ) +# define TracyMessageCS( txt, size, color, depth ) TracyMessageC( txt, size, color ) +# define TracyMessageLCS( txt, color, depth ) TracyMessageLC( txt, color ) +#endif + +#define TracySourceCallbackRegister( cb, data ) tracy::Profiler::SourceCallbackRegister( cb, data ) +#define TracyParameterRegister( cb, data ) tracy::Profiler::ParameterRegister( cb, data ) +#define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val ) +#define TracyIsConnected tracy::GetProfiler().IsConnected() +#define TracySetProgramName( name ) tracy::GetProfiler().SetProgramName( name ); + +#ifdef TRACY_FIBERS +# define TracyFiberEnter( fiber ) tracy::Profiler::EnterFiber( fiber, 0 ) +# define TracyFiberEnterHint( fiber, groupHint ) tracy::Profiler::EnterFiber( fiber, groupHint ) +# define TracyFiberLeave tracy::Profiler::LeaveFiber() +#endif + +#endif + +#endif diff --git a/project/thirdparty/tracy-0.11.1/tracy/TracyC.h b/project/thirdparty/tracy-0.11.1/tracy/TracyC.h new file mode 100644 index 000000000..8b447beb5 --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/tracy/TracyC.h @@ -0,0 +1,417 @@ +#ifndef __TRACYC_HPP__ +#define __TRACYC_HPP__ + +#include +#include + +#include "../client/TracyCallstack.h" +#include "../common/TracyApi.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum TracyPlotFormatEnum +{ + TracyPlotFormatNumber, + TracyPlotFormatMemory, + TracyPlotFormatPercentage, + TracyPlotFormatWatt +}; + +TRACY_API void ___tracy_set_thread_name( const char* name ); + +#define TracyCSetThreadName( name ) ___tracy_set_thread_name( name ); + +#ifndef TracyFunction +# define TracyFunction __FUNCTION__ +#endif + +#ifndef TracyFile +# define TracyFile __FILE__ +#endif + +#ifndef TracyLine +# define TracyLine __LINE__ +#endif + +#ifndef TRACY_ENABLE + +typedef const void* TracyCZoneCtx; + +typedef const void* TracyCLockCtx; + +#define TracyCZone(c,x) +#define TracyCZoneN(c,x,y) +#define TracyCZoneC(c,x,y) +#define TracyCZoneNC(c,x,y,z) +#define TracyCZoneEnd(c) +#define TracyCZoneText(c,x,y) +#define TracyCZoneName(c,x,y) +#define TracyCZoneColor(c,x) +#define TracyCZoneValue(c,x) + +#define TracyCAlloc(x,y) +#define TracyCFree(x) +#define TracyCSecureAlloc(x,y) +#define TracyCSecureFree(x) + +#define TracyCAllocN(x,y,z) +#define TracyCFreeN(x,y) +#define TracyCSecureAllocN(x,y,z) +#define TracyCSecureFreeN(x,y) + +#define TracyCFrameMark +#define TracyCFrameMarkNamed(x) +#define TracyCFrameMarkStart(x) +#define TracyCFrameMarkEnd(x) +#define TracyCFrameImage(x,y,z,w,a) + +#define TracyCPlot(x,y) +#define TracyCPlotF(x,y) +#define TracyCPlotI(x,y) +#define TracyCPlotConfig(x,y,z,w,a) + +#define TracyCMessage(x,y) +#define TracyCMessageL(x) +#define TracyCMessageC(x,y,z) +#define TracyCMessageLC(x,y) +#define TracyCAppInfo(x,y) + +#define TracyCZoneS(x,y,z) +#define TracyCZoneNS(x,y,z,w) +#define TracyCZoneCS(x,y,z,w) +#define TracyCZoneNCS(x,y,z,w,a) + +#define TracyCAllocS(x,y,z) +#define TracyCFreeS(x,y) +#define TracyCSecureAllocS(x,y,z) +#define TracyCSecureFreeS(x,y) + +#define TracyCAllocNS(x,y,z,w) +#define TracyCFreeNS(x,y,z) +#define TracyCSecureAllocNS(x,y,z,w) +#define TracyCSecureFreeNS(x,y,z) + +#define TracyCMessageS(x,y,z) +#define TracyCMessageLS(x,y) +#define TracyCMessageCS(x,y,z,w) +#define TracyCMessageLCS(x,y,z) + +#define TracyCLockCtx(l) +#define TracyCLockAnnounce(l) +#define TracyCLockTerminate(l) +#define TracyCLockBeforeLock(l) +#define TracyCLockAfterLock(l) +#define TracyCLockAfterUnlock(l) +#define TracyCLockAfterTryLock(l,x) +#define TracyCLockMark(l) +#define TracyCLockCustomName(l,x,y) + +#define TracyCIsConnected 0 +#define TracyCIsStarted 0 + +#ifdef TRACY_FIBERS +# define TracyCFiberEnter(fiber) +# define TracyCFiberLeave +#endif + +#else + +#ifndef TracyConcat +# define TracyConcat(x,y) TracyConcatIndirect(x,y) +#endif +#ifndef TracyConcatIndirect +# define TracyConcatIndirect(x,y) x##y +#endif + +struct ___tracy_source_location_data +{ + const char* name; + const char* function; + const char* file; + uint32_t line; + uint32_t color; +}; + +struct ___tracy_c_zone_context +{ + uint32_t id; + int active; +}; + +struct ___tracy_gpu_time_data +{ + int64_t gpuTime; + uint16_t queryId; + uint8_t context; +}; + +struct ___tracy_gpu_zone_begin_data { + uint64_t srcloc; + uint16_t queryId; + uint8_t context; +}; + +struct ___tracy_gpu_zone_begin_callstack_data { + uint64_t srcloc; + int depth; + uint16_t queryId; + uint8_t context; +}; + +struct ___tracy_gpu_zone_end_data { + uint16_t queryId; + uint8_t context; +}; + +struct ___tracy_gpu_new_context_data { + int64_t gpuTime; + float period; + uint8_t context; + uint8_t flags; + uint8_t type; +}; + +struct ___tracy_gpu_context_name_data { + uint8_t context; + const char* name; + uint16_t len; +}; + +struct ___tracy_gpu_calibration_data { + int64_t gpuTime; + int64_t cpuDelta; + uint8_t context; +}; + +struct ___tracy_gpu_time_sync_data { + int64_t gpuTime; + uint8_t context; +}; + +struct __tracy_lockable_context_data; + +// Some containers don't support storing const types. +// This struct, as visible to user, is immutable, so treat it as if const was declared here. +typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx; + +typedef struct __tracy_lockable_context_data* TracyCLockCtx; + +#ifdef TRACY_MANUAL_LIFETIME +TRACY_API void ___tracy_startup_profiler(void); +TRACY_API void ___tracy_shutdown_profiler(void); +TRACY_API int ___tracy_profiler_started(void); + +# define TracyCIsStarted ___tracy_profiler_started() +#else +# define TracyCIsStarted 1 +#endif + +TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, uint32_t color ); +TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, uint32_t color ); + +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active ); +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active ); +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc( uint64_t srcloc, int active ); +TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srcloc, int depth, int active ); +TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx ); +TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size ); +TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size ); +TRACY_API void ___tracy_emit_zone_color( TracyCZoneCtx ctx, uint32_t color ); +TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value ); + +TRACY_API void ___tracy_emit_gpu_zone_begin( const struct ___tracy_gpu_zone_begin_data ); +TRACY_API void ___tracy_emit_gpu_zone_begin_callstack( const struct ___tracy_gpu_zone_begin_callstack_data ); +TRACY_API void ___tracy_emit_gpu_zone_begin_alloc( const struct ___tracy_gpu_zone_begin_data ); +TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack( const struct ___tracy_gpu_zone_begin_callstack_data ); +TRACY_API void ___tracy_emit_gpu_zone_end( const struct ___tracy_gpu_zone_end_data data ); +TRACY_API void ___tracy_emit_gpu_time( const struct ___tracy_gpu_time_data ); +TRACY_API void ___tracy_emit_gpu_new_context( const struct ___tracy_gpu_new_context_data ); +TRACY_API void ___tracy_emit_gpu_context_name( const struct ___tracy_gpu_context_name_data ); +TRACY_API void ___tracy_emit_gpu_calibration( const struct ___tracy_gpu_calibration_data ); +TRACY_API void ___tracy_emit_gpu_time_sync( const struct ___tracy_gpu_time_sync_data ); + +TRACY_API void ___tracy_emit_gpu_zone_begin_serial( const struct ___tracy_gpu_zone_begin_data ); +TRACY_API void ___tracy_emit_gpu_zone_begin_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data ); +TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_serial( const struct ___tracy_gpu_zone_begin_data ); +TRACY_API void ___tracy_emit_gpu_zone_begin_alloc_callstack_serial( const struct ___tracy_gpu_zone_begin_callstack_data ); +TRACY_API void ___tracy_emit_gpu_zone_end_serial( const struct ___tracy_gpu_zone_end_data data ); +TRACY_API void ___tracy_emit_gpu_time_serial( const struct ___tracy_gpu_time_data ); +TRACY_API void ___tracy_emit_gpu_new_context_serial( const struct ___tracy_gpu_new_context_data ); +TRACY_API void ___tracy_emit_gpu_context_name_serial( const struct ___tracy_gpu_context_name_data ); +TRACY_API void ___tracy_emit_gpu_calibration_serial( const struct ___tracy_gpu_calibration_data ); +TRACY_API void ___tracy_emit_gpu_time_sync_serial( const struct ___tracy_gpu_time_sync_data ); + +TRACY_API int ___tracy_connected(void); + +#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK +# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); +# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); +# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); +# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), TRACY_CALLSTACK, active ); +#else +# define TracyCZone( ctx, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); +# define TracyCZoneN( ctx, name, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); +# define TracyCZoneC( ctx, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); +# define TracyCZoneNC( ctx, name, color, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin( &TracyConcat(__tracy_source_location,TracyLine), active ); +#endif + +#define TracyCZoneEnd( ctx ) ___tracy_emit_zone_end( ctx ); + +#define TracyCZoneText( ctx, txt, size ) ___tracy_emit_zone_text( ctx, txt, size ); +#define TracyCZoneName( ctx, txt, size ) ___tracy_emit_zone_name( ctx, txt, size ); +#define TracyCZoneColor( ctx, color ) ___tracy_emit_zone_color( ctx, color ); +#define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value ); + + +TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ); +TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ); +TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ); +TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ); +TRACY_API void ___tracy_emit_memory_alloc_named( const void* ptr, size_t size, int secure, const char* name ); +TRACY_API void ___tracy_emit_memory_alloc_callstack_named( const void* ptr, size_t size, int depth, int secure, const char* name ); +TRACY_API void ___tracy_emit_memory_free_named( const void* ptr, int secure, const char* name ); +TRACY_API void ___tracy_emit_memory_free_callstack_named( const void* ptr, int depth, int secure, const char* name ); + +TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ); +TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ); +TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ); +TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ); + +#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK +# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 ) +# define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 ) +# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 ) +# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 ) + +# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 0, name ) +# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 0, name ) +# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, TRACY_CALLSTACK, 1, name ) +# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_callstack_named( ptr, TRACY_CALLSTACK, 1, name ) + +# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK ); +# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK ); +# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK ); +# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK ); +#else +# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 0 ); +# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr, 0 ); +# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 1 ); +# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free( ptr, 1 ); + +# define TracyCAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 0, name ); +# define TracyCFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 0, name ); +# define TracyCSecureAllocN( ptr, size, name ) ___tracy_emit_memory_alloc_named( ptr, size, 1, name ); +# define TracyCSecureFreeN( ptr, name ) ___tracy_emit_memory_free_named( ptr, 1, name ); + +# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, 0 ); +# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, 0 ); +# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, 0 ); +# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, 0 ); +#endif + + +TRACY_API void ___tracy_emit_frame_mark( const char* name ); +TRACY_API void ___tracy_emit_frame_mark_start( const char* name ); +TRACY_API void ___tracy_emit_frame_mark_end( const char* name ); +TRACY_API void ___tracy_emit_frame_image( const void* image, uint16_t w, uint16_t h, uint8_t offset, int flip ); + +#define TracyCFrameMark ___tracy_emit_frame_mark( 0 ); +#define TracyCFrameMarkNamed( name ) ___tracy_emit_frame_mark( name ); +#define TracyCFrameMarkStart( name ) ___tracy_emit_frame_mark_start( name ); +#define TracyCFrameMarkEnd( name ) ___tracy_emit_frame_mark_end( name ); +#define TracyCFrameImage( image, width, height, offset, flip ) ___tracy_emit_frame_image( image, width, height, offset, flip ); + + +TRACY_API void ___tracy_emit_plot( const char* name, double val ); +TRACY_API void ___tracy_emit_plot_float( const char* name, float val ); +TRACY_API void ___tracy_emit_plot_int( const char* name, int64_t val ); +TRACY_API void ___tracy_emit_plot_config( const char* name, int type, int step, int fill, uint32_t color ); +TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ); + +#define TracyCPlot( name, val ) ___tracy_emit_plot( name, val ); +#define TracyCPlotF( name, val ) ___tracy_emit_plot_float( name, val ); +#define TracyCPlotI( name, val ) ___tracy_emit_plot_int( name, val ); +#define TracyCPlotConfig( name, type, step, fill, color ) ___tracy_emit_plot_config( name, type, step, fill, color ); +#define TracyCAppInfo( txt, size ) ___tracy_emit_message_appinfo( txt, size ); + + +#ifdef TRACY_HAS_CALLSTACK +# define TracyCZoneS( ctx, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); +# define TracyCZoneNS( ctx, name, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, 0 }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); +# define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); +# define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { name, __func__, TracyFile, (uint32_t)TracyLine, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,TracyLine), depth, active ); + +# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 ) +# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 ) +# define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 ) +# define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 ) + +# define TracyCAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 0, name ) +# define TracyCFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 0, name ) +# define TracyCSecureAllocNS( ptr, size, depth, name ) ___tracy_emit_memory_alloc_callstack_named( ptr, size, depth, 1, name ) +# define TracyCSecureFreeNS( ptr, depth, name ) ___tracy_emit_memory_free_callstack_named( ptr, depth, 1, name ) + +# define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth ); +# define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth ); +# define TracyCMessageCS( txt, size, color, depth ) ___tracy_emit_messageC( txt, size, color, depth ); +# define TracyCMessageLCS( txt, color, depth ) ___tracy_emit_messageLC( txt, color, depth ); +#else +# define TracyCZoneS( ctx, depth, active ) TracyCZone( ctx, active ) +# define TracyCZoneNS( ctx, name, depth, active ) TracyCZoneN( ctx, name, active ) +# define TracyCZoneCS( ctx, color, depth, active ) TracyCZoneC( ctx, color, active ) +# define TracyCZoneNCS( ctx, name, color, depth, active ) TracyCZoneNC( ctx, name, color, active ) + +# define TracyCAllocS( ptr, size, depth ) TracyCAlloc( ptr, size ) +# define TracyCFreeS( ptr, depth ) TracyCFree( ptr ) +# define TracyCSecureAllocS( ptr, size, depth ) TracyCSecureAlloc( ptr, size ) +# define TracyCSecureFreeS( ptr, depth ) TracyCSecureFree( ptr ) + +# define TracyCAllocNS( ptr, size, depth, name ) TracyCAllocN( ptr, size, name ) +# define TracyCFreeNS( ptr, depth, name ) TracyCFreeN( ptr, name ) +# define TracyCSecureAllocNS( ptr, size, depth, name ) TracyCSecureAllocN( ptr, size, name ) +# define TracyCSecureFreeNS( ptr, depth, name ) TracyCSecureFreeN( ptr, name ) + +# define TracyCMessageS( txt, size, depth ) TracyCMessage( txt, size ) +# define TracyCMessageLS( txt, depth ) TracyCMessageL( txt ) +# define TracyCMessageCS( txt, size, color, depth ) TracyCMessageC( txt, size, color ) +# define TracyCMessageLCS( txt, color, depth ) TracyCMessageLC( txt, color ) +#endif + + +TRACY_API struct __tracy_lockable_context_data* ___tracy_announce_lockable_ctx( const struct ___tracy_source_location_data* srcloc ); +TRACY_API void ___tracy_terminate_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API int ___tracy_before_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API void ___tracy_after_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API void ___tracy_after_unlock_lockable_ctx( struct __tracy_lockable_context_data* lockdata ); +TRACY_API void ___tracy_after_try_lock_lockable_ctx( struct __tracy_lockable_context_data* lockdata, int acquired ); +TRACY_API void ___tracy_mark_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const struct ___tracy_source_location_data* srcloc ); +TRACY_API void ___tracy_custom_name_lockable_ctx( struct __tracy_lockable_context_data* lockdata, const char* name, size_t nameSz ); + +#define TracyCLockAnnounce( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; lock = ___tracy_announce_lockable_ctx( &TracyConcat(__tracy_source_location,TracyLine) ); +#define TracyCLockTerminate( lock ) ___tracy_terminate_lockable_ctx( lock ); +#define TracyCLockBeforeLock( lock ) ___tracy_before_lock_lockable_ctx( lock ); +#define TracyCLockAfterLock( lock ) ___tracy_after_lock_lockable_ctx( lock ); +#define TracyCLockAfterUnlock( lock ) ___tracy_after_unlock_lockable_ctx( lock ); +#define TracyCLockAfterTryLock( lock, acquired ) ___tracy_after_try_lock_lockable_ctx( lock, acquired ); +#define TracyCLockMark( lock ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,TracyLine) = { NULL, __func__, TracyFile, (uint32_t)TracyLine, 0 }; ___tracy_mark_lockable_ctx( lock, &TracyConcat(__tracy_source_location,TracyLine) ); +#define TracyCLockCustomName( lock, name, nameSz ) ___tracy_custom_name_lockable_ctx( lock, name, nameSz ); + +#define TracyCIsConnected ___tracy_connected() + +#ifdef TRACY_FIBERS +TRACY_API void ___tracy_fiber_enter( const char* fiber ); +TRACY_API void ___tracy_fiber_leave( void ); + +# define TracyCFiberEnter( fiber ) ___tracy_fiber_enter( fiber ); +# define TracyCFiberLeave ___tracy_fiber_leave(); +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/project/thirdparty/tracy-0.11.1/tracy/TracyD3D11.hpp b/project/thirdparty/tracy-0.11.1/tracy/TracyD3D11.hpp new file mode 100644 index 000000000..3ed151bff --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/tracy/TracyD3D11.hpp @@ -0,0 +1,446 @@ +#ifndef __TRACYD3D11_HPP__ +#define __TRACYD3D11_HPP__ + +#ifndef TRACY_ENABLE + +#define TracyD3D11Context(device,queue) nullptr +#define TracyD3D11Destroy(ctx) +#define TracyD3D11ContextName(ctx, name, size) + +#define TracyD3D11NewFrame(ctx) + +#define TracyD3D11Zone(ctx, name) +#define TracyD3D11ZoneC(ctx, name, color) +#define TracyD3D11NamedZone(ctx, varname, name, active) +#define TracyD3D11NamedZoneC(ctx, varname, name, color, active) +#define TracyD3D11ZoneTransient(ctx, varname, name, active) + +#define TracyD3D11ZoneS(ctx, name, depth) +#define TracyD3D11ZoneCS(ctx, name, color, depth) +#define TracyD3D11NamedZoneS(ctx, varname, name, depth, active) +#define TracyD3D11NamedZoneCS(ctx, varname, name, color, depth, active) +#define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) + +#define TracyD3D11Collect(ctx) + +namespace tracy +{ +class D3D11ZoneScope {}; +} + +using TracyD3D11Ctx = void*; + +#else + +#include +#include +#include + +#include "Tracy.hpp" +#include "../client/TracyProfiler.hpp" +#include "../client/TracyCallstack.hpp" +#include "../common/TracyYield.hpp" + +#include + +#define TracyD3D11Panic(msg, ...) do { assert(false && "TracyD3D11: " msg); TracyMessageLC("TracyD3D11: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false); + +namespace tracy +{ + +class D3D11Ctx +{ + friend class D3D11ZoneScope; + + static constexpr uint32_t MaxQueries = 64 * 1024; + + enum CollectMode { POLL, BLOCK }; + +public: + D3D11Ctx( ID3D11Device* device, ID3D11DeviceContext* devicectx ) + { + // TODO: consider calling ID3D11Device::GetImmediateContext() instead of passing it as an argument + m_device = device; + device->AddRef(); + m_immediateDevCtx = devicectx; + devicectx->AddRef(); + + { + D3D11_QUERY_DESC desc = { }; + desc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; + if (FAILED(m_device->CreateQuery(&desc, &m_disjointQuery))) + { + TracyD3D11Panic("unable to create disjoint timestamp query.", return); + } + } + + for (ID3D11Query*& query : m_queries) + { + D3D11_QUERY_DESC desc = { }; + desc.Query = D3D11_QUERY_TIMESTAMP; + if (FAILED(m_device->CreateQuery(&desc, &query))) + { + TracyD3D11Panic("unable to create timestamp query.", return); + } + } + + // Calibrate CPU and GPU timestamps + int64_t tcpu = 0; + int64_t tgpu = 0; + for (int attempts = 0; attempts < 50; attempts++) + { + m_immediateDevCtx->Begin(m_disjointQuery); + m_immediateDevCtx->End(m_queries[0]); + m_immediateDevCtx->End(m_disjointQuery); + + int64_t tcpu0 = Profiler::GetTime(); + WaitForQuery(m_disjointQuery); + int64_t tcpu1 = Profiler::GetTime(); + + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { }; + if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), 0) != S_OK) + { + TracyMessageLC("TracyD3D11: unable to query GPU timestamp; retrying...", tracy::Color::Tomato); + continue; + } + + if (disjoint.Disjoint) + continue; + + UINT64 timestamp = 0; + if (m_immediateDevCtx->GetData(m_queries[0], ×tamp, sizeof(timestamp), 0) != S_OK) + continue; // this should never happen, since the enclosing disjoint query succeeded + + tcpu = tcpu0 + (tcpu1 - tcpu0) * 1 / 2; + tgpu = timestamp * (1000000000 / disjoint.Frequency); + break; + } + + // ready to roll + m_contextId = GetGpuCtxCounter().fetch_add(1); + m_immediateDevCtx->Begin(m_disjointQuery); + m_previousCheckpoint = m_nextCheckpoint = 0; + + auto* item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuNewContext ); + MemWrite( &item->gpuNewContext.cpuTime, tcpu ); + MemWrite( &item->gpuNewContext.gpuTime, tgpu ); + MemWrite( &item->gpuNewContext.thread, uint32_t(0) ); // #TODO: why not GetThreadHandle()? + MemWrite( &item->gpuNewContext.period, 1.0f ); + MemWrite( &item->gpuNewContext.context, m_contextId); + MemWrite( &item->gpuNewContext.flags, uint8_t(0) ); + MemWrite( &item->gpuNewContext.type, GpuContextType::Direct3D11 ); + +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); +#endif + + Profiler::QueueSerialFinish(); + } + + ~D3D11Ctx() + { + // collect all pending timestamps before destroying everything + do + { + Collect(BLOCK); + } while (m_previousCheckpoint != m_queryCounter); + + for (ID3D11Query* query : m_queries) + { + query->Release(); + } + m_immediateDevCtx->End(m_disjointQuery); + m_disjointQuery->Release(); + m_immediateDevCtx->Release(); + m_device->Release(); + } + + void Name( const char* name, uint16_t len ) + { + auto ptr = (char*)tracy_malloc( len ); + memcpy( ptr, name, len ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuContextName ); + MemWrite( &item->gpuContextNameFat.context, m_contextId ); + MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + MemWrite( &item->gpuContextNameFat.size, len ); +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); +#endif + Profiler::QueueSerialFinish(); + } + + void Collect(CollectMode mode = POLL) + { + ZoneScopedC( Color::Red4 ); + +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) + { + m_previousCheckpoint = m_nextCheckpoint = m_queryCounter; + return; + } +#endif + + if (m_previousCheckpoint == m_nextCheckpoint) + { + uintptr_t nextCheckpoint = m_queryCounter; + if (nextCheckpoint == m_nextCheckpoint) + { + return; + } + m_nextCheckpoint = nextCheckpoint; + m_immediateDevCtx->End(m_disjointQuery); + } + + if (mode == CollectMode::BLOCK) + { + WaitForQuery(m_disjointQuery); + } + + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT disjoint = { }; + if (m_immediateDevCtx->GetData(m_disjointQuery, &disjoint, sizeof(disjoint), D3D11_ASYNC_GETDATA_DONOTFLUSH) != S_OK) + { + return; + } + + if (disjoint.Disjoint == TRUE) + { + m_previousCheckpoint = m_nextCheckpoint; + TracyD3D11Panic("disjoint timestamps detected; dropping."); + return; + } + + auto begin = m_previousCheckpoint; + auto end = m_nextCheckpoint; + for (auto i = begin; i != end; ++i) + { + uint32_t k = RingIndex(i); + UINT64 timestamp = 0; + if (m_immediateDevCtx->GetData(m_queries[k], ×tamp, sizeof(timestamp), 0) != S_OK) + { + TracyD3D11Panic("timestamp expected to be ready, but it was not!"); + break; + } + timestamp *= (1000000000ull / disjoint.Frequency); + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuTime); + MemWrite(&item->gpuTime.gpuTime, static_cast(timestamp)); + MemWrite(&item->gpuTime.queryId, static_cast(k)); + MemWrite(&item->gpuTime.context, m_contextId); + Profiler::QueueSerialFinish(); + } + + // disjoint timestamp queries should only be invoked once per frame or less + // https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ne-d3d11-d3d11_query + m_immediateDevCtx->Begin(m_disjointQuery); + m_previousCheckpoint = m_nextCheckpoint; + } + +private: + tracy_force_inline uint32_t RingIndex(uintptr_t index) + { + index %= MaxQueries; + return static_cast(index); + } + + tracy_force_inline uint32_t RingCount(uintptr_t begin, uintptr_t end) + { + // wrap-around safe: all unsigned + uintptr_t count = end - begin; + return static_cast(count); + } + + tracy_force_inline uint32_t NextQueryId() + { + auto id = m_queryCounter++; + if (RingCount(m_previousCheckpoint, id) >= MaxQueries) + { + TracyD3D11Panic("too many pending timestamp queries."); + // #TODO: return some sentinel value; ideally a "hidden" query index + } + return RingIndex(id); + } + + tracy_force_inline ID3D11Query* GetQueryObjectFromId(uint32_t id) + { + return m_queries[id]; + } + + tracy_force_inline void WaitForQuery(ID3D11Query* query) + { + m_immediateDevCtx->Flush(); + while (m_immediateDevCtx->GetData(query, nullptr, 0, 0) != S_OK) + YieldThread(); // busy-wait :-( attempt to reduce power usage with _mm_pause() & friends... + } + + tracy_force_inline uint8_t GetContextId() const + { + return m_contextId; + } + + ID3D11Device* m_device = nullptr; + ID3D11DeviceContext* m_immediateDevCtx = nullptr; + + ID3D11Query* m_queries[MaxQueries]; + ID3D11Query* m_disjointQuery = nullptr; + + uint8_t m_contextId = 255; // NOTE: apparently, 255 means invalid id; is this documented anywhere? + + uintptr_t m_queryCounter = 0; + + uintptr_t m_previousCheckpoint = 0; + uintptr_t m_nextCheckpoint = 0; +}; + +class D3D11ZoneScope +{ +public: + tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, bool active ) + : D3D11ZoneScope(ctx, active) + { + if( !m_active ) return; + + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcloc)); + } + + tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, const SourceLocationData* srcloc, int depth, bool active ) + : D3D11ZoneScope(ctx, active) + { + if( !m_active ) return; + + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast(srcloc)); + } + + tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool active) + : D3D11ZoneScope(ctx, active) + { + if( !m_active ) return; + + const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); + + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); + } + + tracy_force_inline D3D11ZoneScope(D3D11Ctx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool active) + : D3D11ZoneScope(ctx, active) + { + if( !m_active ) return; + + const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); + + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation); + } + + tracy_force_inline ~D3D11ZoneScope() + { + if( !m_active ) return; + + const auto queryId = m_ctx->NextQueryId(); + m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId)); + + auto* item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); + MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneEnd.context, m_ctx->GetContextId() ); + Profiler::QueueSerialFinish(); + } + +private: + tracy_force_inline D3D11ZoneScope( D3D11Ctx* ctx, bool active ) +#ifdef TRACY_ON_DEMAND + : m_active( active && GetProfiler().IsConnected() ) +#else + : m_active( active ) +#endif + { + if( !m_active ) return; + m_ctx = ctx; + } + + void WriteQueueItem(tracy::QueueItem* item, tracy::QueueType queueItemType, uint64_t sourceLocation) + { + const auto queryId = m_ctx->NextQueryId(); + m_ctx->m_immediateDevCtx->End(m_ctx->GetQueryObjectFromId(queryId)); + + MemWrite( &item->hdr.type, queueItemType); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, sourceLocation ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, m_ctx->GetContextId() ); + Profiler::QueueSerialFinish(); + } + + const bool m_active; + + D3D11Ctx* m_ctx; +}; + +static inline D3D11Ctx* CreateD3D11Context( ID3D11Device* device, ID3D11DeviceContext* devicectx ) +{ + auto ctx = (D3D11Ctx*)tracy_malloc( sizeof( D3D11Ctx ) ); + new(ctx) D3D11Ctx( device, devicectx ); + return ctx; +} + +static inline void DestroyD3D11Context( D3D11Ctx* ctx ) +{ + ctx->~D3D11Ctx(); + tracy_free( ctx ); +} +} + +#undef TracyD3D11Panic + +using TracyD3D11Ctx = tracy::D3D11Ctx*; + +#define TracyD3D11Context( device, devicectx ) tracy::CreateD3D11Context( device, devicectx ); +#define TracyD3D11Destroy(ctx) tracy::DestroyD3D11Context(ctx); +#define TracyD3D11ContextName(ctx, name, size) ctx->Name(name, size); + +#define TracyD3D11UnnamedZone ___tracy_gpu_d3d11_zone +#define TracyD3D11SrcLocSymbol TracyConcat(__tracy_gpu_d3d11_source_location,TracyLine) +#define TracyD3D11SrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyD3D11SrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; + +#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK +# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZoneS( ctx, TracyD3D11UnnamedZone, name, TRACY_CALLSTACK, true ) +# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneCS( ctx, TracyD3D11UnnamedZone, name, color, TRACY_CALLSTACK, true ) +# define TracyD3D11NamedZone( ctx, varname, name, active ) TracyD3D11SrcLocObject(name, 0); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, TRACY_CALLSTACK, active ); +# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) TracyD3D11SrcLocObject(name, color); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, TRACY_CALLSTACK, active ); +# define TracyD3D11ZoneTransient(ctx, varname, name, active) TracyD3D11ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active) +#else +# define TracyD3D11Zone( ctx, name ) TracyD3D11NamedZone( ctx, TracyD3D11UnnamedZone, name, true ) +# define TracyD3D11ZoneC( ctx, name, color ) TracyD3D11NamedZoneC( ctx, TracyD3D11UnnamedZone, name, color, true ) +# define TracyD3D11NamedZone( ctx, varname, name, active ) TracyD3D11SrcLocObject(name, 0); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, active ); +# define TracyD3D11NamedZoneC( ctx, varname, name, color, active ) TracyD3D11SrcLocObject(name, color); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, active ); +# define TracyD3D11ZoneTransient(ctx, varname, name, active) tracy::D3D11ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), active }; +#endif + +#ifdef TRACY_HAS_CALLSTACK +# define TracyD3D11ZoneS( ctx, name, depth ) TracyD3D11NamedZoneS( ctx, TracyD3D11UnnamedZone, name, depth, true ) +# define TracyD3D11ZoneCS( ctx, name, color, depth ) TracyD3D11NamedZoneCS( ctx, TracyD3D11UnnamedZone, name, color, depth, true ) +# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) TracyD3D11SrcLocObject(name, 0); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, depth, active ); +# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) TracyD3D11SrcLocObject(name, color); tracy::D3D11ZoneScope varname( ctx, &TracyD3D11SrcLocSymbol, depth, active ); +# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) tracy::D3D11ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), depth, active }; +#else +# define TracyD3D11ZoneS( ctx, name, depth, active ) TracyD3D11Zone( ctx, name ) +# define TracyD3D11ZoneCS( ctx, name, color, depth, active ) TracyD3D11ZoneC( name, color ) +# define TracyD3D11NamedZoneS( ctx, varname, name, depth, active ) TracyD3D11NamedZone( ctx, varname, name, active ) +# define TracyD3D11NamedZoneCS( ctx, varname, name, color, depth, active ) TracyD3D11NamedZoneC( ctx, varname, name, color, active ) +# define TracyD3D11ZoneTransientS(ctx, varname, name, depth, active) TracyD3D11ZoneTransient(ctx, varname, name, active) +#endif + +#define TracyD3D11Collect( ctx ) ctx->Collect(); + +#endif + +#endif diff --git a/project/thirdparty/tracy-0.11.1/tracy/TracyD3D12.hpp b/project/thirdparty/tracy-0.11.1/tracy/TracyD3D12.hpp new file mode 100644 index 000000000..41567937e --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/tracy/TracyD3D12.hpp @@ -0,0 +1,500 @@ +#ifndef __TRACYD3D12_HPP__ +#define __TRACYD3D12_HPP__ + +#ifndef TRACY_ENABLE + +#define TracyD3D12Context(device, queue) nullptr +#define TracyD3D12Destroy(ctx) +#define TracyD3D12ContextName(ctx, name, size) + +#define TracyD3D12NewFrame(ctx) + +#define TracyD3D12Zone(ctx, cmdList, name) +#define TracyD3D12ZoneC(ctx, cmdList, name, color) +#define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) +#define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) +#define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) + +#define TracyD3D12ZoneS(ctx, cmdList, name, depth) +#define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) +#define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) +#define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) +#define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) + +#define TracyD3D12Collect(ctx) + +namespace tracy +{ + class D3D12ZoneScope {}; +} + +using TracyD3D12Ctx = void*; + +#else + +#include "Tracy.hpp" +#include "../client/TracyProfiler.hpp" +#include "../client/TracyCallstack.hpp" + +#include +#include +#include +#include +#include + +#define TracyD3D12Panic(msg, ...) do { assert(false && "TracyD3D12: " msg); TracyMessageLC("TracyD3D12: " msg, tracy::Color::Red4); __VA_ARGS__; } while(false); + +namespace tracy +{ + + struct D3D12QueryPayload + { + uint32_t m_queryIdStart = 0; + uint32_t m_queryCount = 0; + }; + + // Command queue context. + class D3D12QueueCtx + { + friend class D3D12ZoneScope; + + ID3D12Device* m_device = nullptr; + ID3D12CommandQueue* m_queue = nullptr; + uint8_t m_contextId = 255; // TODO: apparently, 255 means "invalid id"; is this documented somewhere? + ID3D12QueryHeap* m_queryHeap = nullptr; + ID3D12Resource* m_readbackBuffer = nullptr; + + // In-progress payload. + uint32_t m_queryLimit = 0; + std::atomic m_queryCounter = 0; + uint32_t m_previousQueryCounter = 0; + + uint32_t m_activePayload = 0; + ID3D12Fence* m_payloadFence = nullptr; + std::queue m_payloadQueue; + + UINT64 m_prevCalibrationTicksCPU = 0; + + void RecalibrateClocks() + { + UINT64 cpuTimestamp; + UINT64 gpuTimestamp; + if (FAILED(m_queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) + { + TracyD3D12Panic("failed to obtain queue clock calibration counters.", return); + } + + int64_t cpuDeltaTicks = cpuTimestamp - m_prevCalibrationTicksCPU; + if (cpuDeltaTicks > 0) + { + static const int64_t nanosecodsPerTick = int64_t(1000000000) / GetFrequencyQpc(); + int64_t cpuDeltaNS = cpuDeltaTicks * nanosecodsPerTick; + // Save the device cpu timestamp, not the Tracy profiler timestamp: + m_prevCalibrationTicksCPU = cpuTimestamp; + + cpuTimestamp = Profiler::GetTime(); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuCalibration); + MemWrite(&item->gpuCalibration.gpuTime, gpuTimestamp); + MemWrite(&item->gpuCalibration.cpuTime, cpuTimestamp); + MemWrite(&item->gpuCalibration.cpuDelta, cpuDeltaNS); + MemWrite(&item->gpuCalibration.context, GetId()); + SubmitQueueItem(item); + } + } + + tracy_force_inline void SubmitQueueItem(tracy::QueueItem* item) + { +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem(*item); +#endif + Profiler::QueueSerialFinish(); + } + + public: + D3D12QueueCtx(ID3D12Device* device, ID3D12CommandQueue* queue) + : m_device(device) + , m_queue(queue) + { + // Verify we support timestamp queries on this queue. + + if (queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY) + { + D3D12_FEATURE_DATA_D3D12_OPTIONS3 featureData{}; + + HRESULT hr = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &featureData, sizeof(featureData)); + if (FAILED(hr) || (featureData.CopyQueueTimestampQueriesSupported == FALSE)) + { + TracyD3D12Panic("Platform does not support profiling of copy queues.", return); + } + } + + static constexpr uint32_t MaxQueries = 64 * 1024; // Must be even, because queries are (begin, end) pairs + m_queryLimit = MaxQueries; + + D3D12_QUERY_HEAP_DESC heapDesc{}; + heapDesc.Type = queue->GetDesc().Type == D3D12_COMMAND_LIST_TYPE_COPY ? D3D12_QUERY_HEAP_TYPE_COPY_QUEUE_TIMESTAMP : D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + heapDesc.Count = m_queryLimit; + heapDesc.NodeMask = 0; // #TODO: Support multiple adapters. + + while (FAILED(device->CreateQueryHeap(&heapDesc, IID_PPV_ARGS(&m_queryHeap)))) + { + m_queryLimit /= 2; + heapDesc.Count = m_queryLimit; + } + + // Create a readback buffer, which will be used as a destination for the query data. + + D3D12_RESOURCE_DESC readbackBufferDesc{}; + readbackBufferDesc.Alignment = 0; + readbackBufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + readbackBufferDesc.Width = m_queryLimit * sizeof(uint64_t); + readbackBufferDesc.Height = 1; + readbackBufferDesc.DepthOrArraySize = 1; + readbackBufferDesc.Format = DXGI_FORMAT_UNKNOWN; + readbackBufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; // Buffers are always row major. + readbackBufferDesc.MipLevels = 1; + readbackBufferDesc.SampleDesc.Count = 1; + readbackBufferDesc.SampleDesc.Quality = 0; + readbackBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + D3D12_HEAP_PROPERTIES readbackHeapProps{}; + readbackHeapProps.Type = D3D12_HEAP_TYPE_READBACK; + readbackHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + readbackHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + readbackHeapProps.CreationNodeMask = 0; + readbackHeapProps.VisibleNodeMask = 0; // #TODO: Support multiple adapters. + + if (FAILED(device->CreateCommittedResource(&readbackHeapProps, D3D12_HEAP_FLAG_NONE, &readbackBufferDesc, D3D12_RESOURCE_STATE_COPY_DEST, nullptr, IID_PPV_ARGS(&m_readbackBuffer)))) + { + TracyD3D12Panic("Failed to create query readback buffer.", return); + } + + if (FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_payloadFence)))) + { + TracyD3D12Panic("Failed to create payload fence.", return); + } + + float period = [queue]() + { + uint64_t timestampFrequency; + if (FAILED(queue->GetTimestampFrequency(×tampFrequency))) + { + return 0.0f; + } + return static_cast( 1E+09 / static_cast(timestampFrequency) ); + }(); + + if (period == 0.0f) + { + TracyD3D12Panic("Failed to get timestamp frequency.", return); + } + + uint64_t cpuTimestamp; + uint64_t gpuTimestamp; + if (FAILED(queue->GetClockCalibration(&gpuTimestamp, &cpuTimestamp))) + { + TracyD3D12Panic("Failed to get queue clock calibration.", return); + } + + // Save the device cpu timestamp, not the profiler's timestamp. + m_prevCalibrationTicksCPU = cpuTimestamp; + + cpuTimestamp = Profiler::GetTime(); + + // all checked: ready to roll + m_contextId = GetGpuCtxCounter().fetch_add(1); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuNewContext); + MemWrite(&item->gpuNewContext.cpuTime, cpuTimestamp); + MemWrite(&item->gpuNewContext.gpuTime, gpuTimestamp); + MemWrite(&item->gpuNewContext.thread, decltype(item->gpuNewContext.thread)(0)); // #TODO: why 0 instead of GetThreadHandle()? + MemWrite(&item->gpuNewContext.period, period); + MemWrite(&item->gpuNewContext.context, GetId()); + MemWrite(&item->gpuNewContext.flags, GpuContextCalibration); + MemWrite(&item->gpuNewContext.type, GpuContextType::Direct3D12); + SubmitQueueItem(item); + } + + ~D3D12QueueCtx() + { + ZoneScopedC(Color::Red4); + // collect all pending timestamps + while (m_payloadFence->GetCompletedValue() != m_activePayload) + /* busy-wait ... */; + Collect(); + m_payloadFence->Release(); + m_readbackBuffer->Release(); + m_queryHeap->Release(); + } + + + void NewFrame() + { + uint32_t queryCounter = m_queryCounter.exchange(0); + m_payloadQueue.emplace(D3D12QueryPayload{ m_previousQueryCounter, queryCounter }); + m_previousQueryCounter += queryCounter; + + if (m_previousQueryCounter >= m_queryLimit) + { + m_previousQueryCounter -= m_queryLimit; + } + + m_queue->Signal(m_payloadFence, ++m_activePayload); + } + + void Name( const char* name, uint16_t len ) + { + auto ptr = (char*)tracy_malloc( len ); + memcpy( ptr, name, len ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuContextName ); + MemWrite( &item->gpuContextNameFat.context, GetId()); + MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + MemWrite( &item->gpuContextNameFat.size, len ); + SubmitQueueItem(item); + } + + void Collect() + { + ZoneScopedC(Color::Red4); + +#ifdef TRACY_ON_DEMAND + if (!GetProfiler().IsConnected()) + { + m_queryCounter = 0; + + return; + } +#endif + + // Find out what payloads are available. + const auto newestReadyPayload = m_payloadFence->GetCompletedValue(); + const auto payloadCount = m_payloadQueue.size() - (m_activePayload - newestReadyPayload); + + if (!payloadCount) + { + return; // No payloads are available yet, exit out. + } + + D3D12_RANGE mapRange{ 0, m_queryLimit * sizeof(uint64_t) }; + + // Map the readback buffer so we can fetch the query data from the GPU. + void* readbackBufferMapping = nullptr; + + if (FAILED(m_readbackBuffer->Map(0, &mapRange, &readbackBufferMapping))) + { + TracyD3D12Panic("Failed to map readback buffer.", return); + } + + auto* timestampData = static_cast(readbackBufferMapping); + + for (uint32_t i = 0; i < payloadCount; ++i) + { + const auto& payload = m_payloadQueue.front(); + + for (uint32_t j = 0; j < payload.m_queryCount; ++j) + { + const auto counter = (payload.m_queryIdStart + j) % m_queryLimit; + const auto timestamp = timestampData[counter]; + const auto queryId = counter; + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuTime); + MemWrite(&item->gpuTime.gpuTime, timestamp); + MemWrite(&item->gpuTime.queryId, static_cast(queryId)); + MemWrite(&item->gpuTime.context, GetId()); + + Profiler::QueueSerialFinish(); + } + + m_payloadQueue.pop(); + } + + m_readbackBuffer->Unmap(0, nullptr); + + // Recalibrate to account for drift. + RecalibrateClocks(); + } + + private: + tracy_force_inline uint32_t NextQueryId() + { + uint32_t queryCounter = m_queryCounter.fetch_add(2); + if (queryCounter >= m_queryLimit) + { + TracyD3D12Panic("Submitted too many GPU queries! Consider increasing MaxQueries."); + // #TODO: consider returning an invalid id or sentinel value here + } + + const uint32_t id = (m_previousQueryCounter + queryCounter) % m_queryLimit; + + return id; + } + + tracy_force_inline uint8_t GetId() const + { + return m_contextId; + } + }; + + class D3D12ZoneScope + { + const bool m_active; + D3D12QueueCtx* m_ctx = nullptr; + ID3D12GraphicsCommandList* m_cmdList = nullptr; + uint32_t m_queryId = 0; // Used for tracking in nested zones. + + tracy_force_inline void WriteQueueItem(QueueItem* item, QueueType type, uint64_t srcLocation) + { + MemWrite(&item->hdr.type, type); + MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); + MemWrite(&item->gpuZoneBegin.srcloc, srcLocation); + MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); + MemWrite(&item->gpuZoneBegin.queryId, static_cast(m_queryId)); + MemWrite(&item->gpuZoneBegin.context, m_ctx->GetId()); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, bool active) +#ifdef TRACY_ON_DEMAND + : m_active(active&& GetProfiler().IsConnected()) +#else + : m_active(active) +#endif + { + if (!m_active) return; + + m_ctx = ctx; + m_cmdList = cmdList; + + m_queryId = m_ctx->NextQueryId(); + m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId); + } + + public: + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, bool active) + : D3D12ZoneScope(ctx, cmdList, active) + { + if (!m_active) return; + + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginSerial, reinterpret_cast(srcLocation)); + } + + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, ID3D12GraphicsCommandList* cmdList, const SourceLocationData* srcLocation, int depth, bool active) + : D3D12ZoneScope(ctx, cmdList, active) + { + if (!m_active) return; + + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginCallstackSerial, reinterpret_cast(srcLocation)); + } + + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, bool active) + : D3D12ZoneScope(ctx, cmdList, active) + { + if (!m_active) return; + + const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); + + auto* item = Profiler::QueueSerial(); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocSerial, sourceLocation); + } + + tracy_force_inline D3D12ZoneScope(D3D12QueueCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, ID3D12GraphicsCommandList* cmdList, int depth, bool active) + : D3D12ZoneScope(ctx, cmdList, active) + { + if (!m_active) return; + + const auto sourceLocation = Profiler::AllocSourceLocation(line, source, sourceSz, function, functionSz, name, nameSz); + + auto* item = Profiler::QueueSerialCallstack(Callstack(depth)); + WriteQueueItem(item, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial, sourceLocation); + } + + tracy_force_inline ~D3D12ZoneScope() + { + if (!m_active) return; + + const auto queryId = m_queryId + 1; // Our end query slot is immediately after the begin slot. + m_cmdList->EndQuery(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, queryId); + + auto* item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial); + MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime()); + MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle()); + MemWrite(&item->gpuZoneEnd.queryId, static_cast(queryId)); + MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId()); + Profiler::QueueSerialFinish(); + + m_cmdList->ResolveQueryData(m_ctx->m_queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_queryId, 2, m_ctx->m_readbackBuffer, m_queryId * sizeof(uint64_t)); + } + }; + + static inline D3D12QueueCtx* CreateD3D12Context(ID3D12Device* device, ID3D12CommandQueue* queue) + { + auto* ctx = static_cast(tracy_malloc(sizeof(D3D12QueueCtx))); + new (ctx) D3D12QueueCtx{ device, queue }; + + return ctx; + } + + static inline void DestroyD3D12Context(D3D12QueueCtx* ctx) + { + ctx->~D3D12QueueCtx(); + tracy_free(ctx); + } + +} + +#undef TracyD3D12Panic + +using TracyD3D12Ctx = tracy::D3D12QueueCtx*; + +#define TracyD3D12Context(device, queue) tracy::CreateD3D12Context(device, queue); +#define TracyD3D12Destroy(ctx) tracy::DestroyD3D12Context(ctx); +#define TracyD3D12ContextName(ctx, name, size) ctx->Name(name, size); + +#define TracyD3D12NewFrame(ctx) ctx->NewFrame(); + +#define TracyD3D12UnnamedZone ___tracy_gpu_d3d12_zone +#define TracyD3D12SrcLocSymbol TracyConcat(__tracy_d3d12_source_location,TracyLine) +#define TracyD3D12SrcLocObject(name, color) static constexpr tracy::SourceLocationData TracyD3D12SrcLocSymbol { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; + +#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK +# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, TRACY_CALLSTACK, true) +# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, TRACY_CALLSTACK, true) +# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active }; +# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, TRACY_CALLSTACK, active }; +# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, TRACY_CALLSTACK, active) +#else +# define TracyD3D12Zone(ctx, cmdList, name) TracyD3D12NamedZone(ctx, TracyD3D12UnnamedZone, cmdList, name, true) +# define TracyD3D12ZoneC(ctx, cmdList, name, color) TracyD3D12NamedZoneC(ctx, TracyD3D12UnnamedZone, cmdList, name, color, true) +# define TracyD3D12NamedZone(ctx, varname, cmdList, name, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active }; +# define TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, active }; +# define TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, active }; +#endif + +#ifdef TRACY_HAS_CALLSTACK +# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12NamedZoneS(ctx, TracyD3D12UnnamedZone, cmdList, name, depth, true) +# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12NamedZoneCS(ctx, TracyD3D12UnnamedZone, cmdList, name, color, depth, true) +# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12SrcLocObject(name, 0); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active }; +# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12SrcLocObject(name, color); tracy::D3D12ZoneScope varname{ ctx, cmdList, &TracyD3D12SrcLocSymbol, depth, active }; +# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) tracy::D3D12ZoneScope varname{ ctx, TracyLine, TracyFile, strlen(TracyFile), TracyFunction, strlen(TracyFunction), name, strlen(name), cmdList, depth, active }; +#else +# define TracyD3D12ZoneS(ctx, cmdList, name, depth) TracyD3D12Zone(ctx, cmdList, name) +# define TracyD3D12ZoneCS(ctx, cmdList, name, color, depth) TracyD3D12Zone(ctx, cmdList, name, color) +# define TracyD3D12NamedZoneS(ctx, varname, cmdList, name, depth, active) TracyD3D12NamedZone(ctx, varname, cmdList, name, active) +# define TracyD3D12NamedZoneCS(ctx, varname, cmdList, name, color, depth, active) TracyD3D12NamedZoneC(ctx, varname, cmdList, name, color, active) +# define TracyD3D12ZoneTransientS(ctx, varname, cmdList, name, depth, active) TracyD3D12ZoneTransient(ctx, varname, cmdList, name, active) +#endif + +#define TracyD3D12Collect(ctx) ctx->Collect(); + +#endif + +#endif diff --git a/project/thirdparty/tracy-0.11.1/tracy/TracyLua.hpp b/project/thirdparty/tracy-0.11.1/tracy/TracyLua.hpp new file mode 100644 index 000000000..51dead51f --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/tracy/TracyLua.hpp @@ -0,0 +1,446 @@ +#ifndef __TRACYLUA_HPP__ +#define __TRACYLUA_HPP__ + +// Include this file after you include lua headers. + +#ifndef TRACY_ENABLE + +#include + +namespace tracy +{ + +namespace detail +{ +static inline int noop( lua_State* L ) { return 0; } +} + +static inline void LuaRegister( lua_State* L ) +{ + lua_newtable( L ); + lua_pushcfunction( L, detail::noop ); + lua_setfield( L, -2, "ZoneBegin" ); + lua_pushcfunction( L, detail::noop ); + lua_setfield( L, -2, "ZoneBeginN" ); + lua_pushcfunction( L, detail::noop ); + lua_setfield( L, -2, "ZoneBeginS" ); + lua_pushcfunction( L, detail::noop ); + lua_setfield( L, -2, "ZoneBeginNS" ); + lua_pushcfunction( L, detail::noop ); + lua_setfield( L, -2, "ZoneEnd" ); + lua_pushcfunction( L, detail::noop ); + lua_setfield( L, -2, "ZoneText" ); + lua_pushcfunction( L, detail::noop ); + lua_setfield( L, -2, "ZoneName" ); + lua_pushcfunction( L, detail::noop ); + lua_setfield( L, -2, "Message" ); + lua_setglobal( L, "tracy" ); +} + +static inline char* FindEnd( char* ptr ) +{ + unsigned int cnt = 1; + while( cnt != 0 ) + { + if( *ptr == '(' ) cnt++; + else if( *ptr == ')' ) cnt--; + ptr++; + } + return ptr; +} + +static inline void LuaRemove( char* script ) +{ + while( *script ) + { + if( strncmp( script, "tracy.", 6 ) == 0 ) + { + if( strncmp( script + 6, "Zone", 4 ) == 0 ) + { + if( strncmp( script + 10, "End()", 5 ) == 0 ) + { + memset( script, ' ', 15 ); + script += 15; + } + else if( strncmp( script + 10, "Begin()", 7 ) == 0 ) + { + memset( script, ' ', 17 ); + script += 17; + } + else if( strncmp( script + 10, "Text(", 5 ) == 0 ) + { + auto end = FindEnd( script + 15 ); + memset( script, ' ', end - script ); + script = end; + } + else if( strncmp( script + 10, "Name(", 5 ) == 0 ) + { + auto end = FindEnd( script + 15 ); + memset( script, ' ', end - script ); + script = end; + } + else if( strncmp( script + 10, "BeginN(", 7 ) == 0 ) + { + auto end = FindEnd( script + 17 ); + memset( script, ' ', end - script ); + script = end; + } + else if( strncmp( script + 10, "BeginS(", 7 ) == 0 ) + { + auto end = FindEnd( script + 17 ); + memset( script, ' ', end - script ); + script = end; + } + else if( strncmp( script + 10, "BeginNS(", 8 ) == 0 ) + { + auto end = FindEnd( script + 18 ); + memset( script, ' ', end - script ); + script = end; + } + else + { + script += 10; + } + } + else if( strncmp( script + 6, "Message(", 8 ) == 0 ) + { + auto end = FindEnd( script + 14 ); + memset( script, ' ', end - script ); + script = end; + } + else + { + script += 6; + } + } + else + { + script++; + } + } +} + +} + +#else + +#include +#include + +#include "../common/TracyColor.hpp" +#include "../common/TracyAlign.hpp" +#include "../common/TracyForceInline.hpp" +#include "../common/TracySystem.hpp" +#include "../client/TracyProfiler.hpp" + +namespace tracy +{ + +#ifdef TRACY_ON_DEMAND +TRACY_API LuaZoneState& GetLuaZoneState(); +#endif + +namespace detail +{ + +#ifdef TRACY_HAS_CALLSTACK +static tracy_force_inline void SendLuaCallstack( lua_State* L, uint32_t depth ) +{ + assert( depth <= 64 ); + lua_Debug dbg[64]; + const char* func[64]; + uint32_t fsz[64]; + uint32_t ssz[64]; + + uint8_t cnt; + uint16_t spaceNeeded = sizeof( cnt ); + for( cnt=0; cnt::max)() ); + memcpy( dst, fsz+i, 2 ); dst += 2; + memcpy( dst, func[i], fsz[i] ); dst += fsz[i]; + assert( ssz[i] <= (std::numeric_limits::max)() ); + memcpy( dst, ssz+i, 2 ); dst += 2; + memcpy( dst, dbg[i].source, ssz[i] ), dst += ssz[i]; + } + assert( dst - ptr == spaceNeeded + 2 ); + + TracyQueuePrepare( QueueType::CallstackAlloc ); + MemWrite( &item->callstackAllocFat.ptr, (uint64_t)ptr ); + MemWrite( &item->callstackAllocFat.nativePtr, (uint64_t)Callstack( depth ) ); + TracyQueueCommit( callstackAllocFatThread ); +} + +static inline void LuaShortenSrc( char* dst, const char* src ) +{ + size_t l = std::min( (size_t)255, strlen( src ) ); + memcpy( dst, src, l ); + dst[l] = 0; +} + +static inline int LuaZoneBeginS( lua_State* L ) +{ +#ifdef TRACY_ON_DEMAND + const auto zoneCnt = GetLuaZoneState().counter++; + if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0; + GetLuaZoneState().active = GetProfiler().IsConnected(); + if( !GetLuaZoneState().active ) return 0; +#endif + +#ifdef TRACY_CALLSTACK + const uint32_t depth = TRACY_CALLSTACK; +#else + const auto depth = uint32_t( lua_tointeger( L, 1 ) ); +#endif + SendLuaCallstack( L, depth ); + + lua_Debug dbg; + lua_getstack( L, 1, &dbg ); + lua_getinfo( L, "Snl", &dbg ); + char src[256]; + LuaShortenSrc( src, dbg.source ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src ); + + TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack ); + MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); + MemWrite( &item->zoneBegin.srcloc, srcloc ); + TracyQueueCommit( zoneBeginThread ); + + return 0; +} + +static inline int LuaZoneBeginNS( lua_State* L ) +{ +#ifdef TRACY_ON_DEMAND + const auto zoneCnt = GetLuaZoneState().counter++; + if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0; + GetLuaZoneState().active = GetProfiler().IsConnected(); + if( !GetLuaZoneState().active ) return 0; +#endif + +#ifdef TRACY_CALLSTACK + const uint32_t depth = TRACY_CALLSTACK; +#else + const auto depth = uint32_t( lua_tointeger( L, 2 ) ); +#endif + SendLuaCallstack( L, depth ); + + lua_Debug dbg; + lua_getstack( L, 1, &dbg ); + lua_getinfo( L, "Snl", &dbg ); + size_t nsz; + char src[256]; + LuaShortenSrc( src, dbg.source ); + const auto name = lua_tolstring( L, 1, &nsz ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src, name, nsz ); + + TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLocCallstack ); + MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); + MemWrite( &item->zoneBegin.srcloc, srcloc ); + TracyQueueCommit( zoneBeginThread ); + + return 0; +} +#endif + +static inline int LuaZoneBegin( lua_State* L ) +{ +#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK + return LuaZoneBeginS( L ); +#else +#ifdef TRACY_ON_DEMAND + const auto zoneCnt = GetLuaZoneState().counter++; + if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0; + GetLuaZoneState().active = GetProfiler().IsConnected(); + if( !GetLuaZoneState().active ) return 0; +#endif + + lua_Debug dbg; + lua_getstack( L, 1, &dbg ); + lua_getinfo( L, "Snl", &dbg ); + char src[256]; + LuaShortenSrc( src, dbg.source ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src ); + + TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); + MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); + MemWrite( &item->zoneBegin.srcloc, srcloc ); + TracyQueueCommit( zoneBeginThread ); + return 0; +#endif +} + +static inline int LuaZoneBeginN( lua_State* L ) +{ +#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK + return LuaZoneBeginNS( L ); +#else +#ifdef TRACY_ON_DEMAND + const auto zoneCnt = GetLuaZoneState().counter++; + if( zoneCnt != 0 && !GetLuaZoneState().active ) return 0; + GetLuaZoneState().active = GetProfiler().IsConnected(); + if( !GetLuaZoneState().active ) return 0; +#endif + + lua_Debug dbg; + lua_getstack( L, 1, &dbg ); + lua_getinfo( L, "Snl", &dbg ); + size_t nsz; + char src[256]; + LuaShortenSrc( src, dbg.source ); + const auto name = lua_tolstring( L, 1, &nsz ); + const auto srcloc = Profiler::AllocSourceLocation( dbg.currentline, src, dbg.name ? dbg.name : dbg.short_src, name, nsz ); + + TracyQueuePrepare( QueueType::ZoneBeginAllocSrcLoc ); + MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); + MemWrite( &item->zoneBegin.srcloc, srcloc ); + TracyQueueCommit( zoneBeginThread ); + return 0; +#endif +} + +static inline int LuaZoneEnd( lua_State* L ) +{ +#ifdef TRACY_ON_DEMAND + assert( GetLuaZoneState().counter != 0 ); + GetLuaZoneState().counter--; + if( !GetLuaZoneState().active ) return 0; + if( !GetProfiler().IsConnected() ) + { + GetLuaZoneState().active = false; + return 0; + } +#endif + + TracyQueuePrepare( QueueType::ZoneEnd ); + MemWrite( &item->zoneEnd.time, Profiler::GetTime() ); + TracyQueueCommit( zoneEndThread ); + return 0; +} + +static inline int LuaZoneText( lua_State* L ) +{ +#ifdef TRACY_ON_DEMAND + if( !GetLuaZoneState().active ) return 0; + if( !GetProfiler().IsConnected() ) + { + GetLuaZoneState().active = false; + return 0; + } +#endif + + auto txt = lua_tostring( L, 1 ); + const auto size = strlen( txt ); + assert( size < (std::numeric_limits::max)() ); + + auto ptr = (char*)tracy_malloc( size ); + memcpy( ptr, txt, size ); + + TracyQueuePrepare( QueueType::ZoneText ); + MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); + MemWrite( &item->zoneTextFat.size, (uint16_t)size ); + TracyQueueCommit( zoneTextFatThread ); + return 0; +} + +static inline int LuaZoneName( lua_State* L ) +{ +#ifdef TRACY_ON_DEMAND + if( !GetLuaZoneState().active ) return 0; + if( !GetProfiler().IsConnected() ) + { + GetLuaZoneState().active = false; + return 0; + } +#endif + + auto txt = lua_tostring( L, 1 ); + const auto size = strlen( txt ); + assert( size < (std::numeric_limits::max)() ); + + auto ptr = (char*)tracy_malloc( size ); + memcpy( ptr, txt, size ); + + TracyQueuePrepare( QueueType::ZoneName ); + MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); + MemWrite( &item->zoneTextFat.size, (uint16_t)size ); + TracyQueueCommit( zoneTextFatThread ); + return 0; +} + +static inline int LuaMessage( lua_State* L ) +{ +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) return 0; +#endif + + auto txt = lua_tostring( L, 1 ); + const auto size = strlen( txt ); + assert( size < (std::numeric_limits::max)() ); + + auto ptr = (char*)tracy_malloc( size ); + memcpy( ptr, txt, size ); + + TracyQueuePrepare( QueueType::Message ); + MemWrite( &item->messageFat.time, Profiler::GetTime() ); + MemWrite( &item->messageFat.text, (uint64_t)ptr ); + MemWrite( &item->messageFat.size, (uint16_t)size ); + TracyQueueCommit( messageFatThread ); + return 0; +} + +} + +static inline void LuaRegister( lua_State* L ) +{ + lua_newtable( L ); + lua_pushcfunction( L, detail::LuaZoneBegin ); + lua_setfield( L, -2, "ZoneBegin" ); + lua_pushcfunction( L, detail::LuaZoneBeginN ); + lua_setfield( L, -2, "ZoneBeginN" ); +#ifdef TRACY_HAS_CALLSTACK + lua_pushcfunction( L, detail::LuaZoneBeginS ); + lua_setfield( L, -2, "ZoneBeginS" ); + lua_pushcfunction( L, detail::LuaZoneBeginNS ); + lua_setfield( L, -2, "ZoneBeginNS" ); +#else + lua_pushcfunction( L, detail::LuaZoneBegin ); + lua_setfield( L, -2, "ZoneBeginS" ); + lua_pushcfunction( L, detail::LuaZoneBeginN ); + lua_setfield( L, -2, "ZoneBeginNS" ); +#endif + lua_pushcfunction( L, detail::LuaZoneEnd ); + lua_setfield( L, -2, "ZoneEnd" ); + lua_pushcfunction( L, detail::LuaZoneText ); + lua_setfield( L, -2, "ZoneText" ); + lua_pushcfunction( L, detail::LuaZoneName ); + lua_setfield( L, -2, "ZoneName" ); + lua_pushcfunction( L, detail::LuaMessage ); + lua_setfield( L, -2, "Message" ); + lua_setglobal( L, "tracy" ); +} + +static inline void LuaRemove( char* script ) {} + +} + +#endif + +#endif diff --git a/project/thirdparty/tracy-0.11.1/tracy/TracyOpenCL.hpp b/project/thirdparty/tracy-0.11.1/tracy/TracyOpenCL.hpp new file mode 100644 index 000000000..20d0a7cab --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/tracy/TracyOpenCL.hpp @@ -0,0 +1,414 @@ +#ifndef __TRACYOPENCL_HPP__ +#define __TRACYOPENCL_HPP__ + +#if !defined TRACY_ENABLE + +#define TracyCLContext(c, x) nullptr +#define TracyCLDestroy(c) +#define TracyCLContextName(c, x, y) + +#define TracyCLNamedZone(c, x, y, z) +#define TracyCLNamedZoneC(c, x, y, z, w) +#define TracyCLZone(c, x) +#define TracyCLZoneC(c, x, y) +#define TracyCLZoneTransient(c,x,y,z) + +#define TracyCLNamedZoneS(c, x, y, z, w) +#define TracyCLNamedZoneCS(c, x, y, z, w, v) +#define TracyCLZoneS(c, x, y) +#define TracyCLZoneCS(c, x, y, z) +#define TracyCLZoneTransientS(c,x,y,z,w) + +#define TracyCLNamedZoneSetEvent(x, e) +#define TracyCLZoneSetEvent(e) + +#define TracyCLCollect(c) + +namespace tracy +{ + class OpenCLCtxScope {}; +} + +using TracyCLCtx = void*; + +#else + +#include + +#include +#include +#include + +#include "Tracy.hpp" +#include "../client/TracyCallstack.hpp" +#include "../client/TracyProfiler.hpp" +#include "../common/TracyAlloc.hpp" + +#define TRACY_CL_TO_STRING_INDIRECT(T) #T +#define TRACY_CL_TO_STRING(T) TRACY_CL_TO_STRING_INDIRECT(T) +#define TRACY_CL_ASSERT(p) if(!(p)) { \ + TracyMessageL( "TRACY_CL_ASSERT failed on " TracyFile ":" TRACY_CL_TO_STRING(TracyLine) ); \ + assert(false && "TRACY_CL_ASSERT failed"); \ +} +#define TRACY_CL_CHECK_ERROR(err) if(err != CL_SUCCESS) { \ + std::ostringstream oss; \ + oss << "TRACY_CL_CHECK_ERROR failed on " << TracyFile << ":" << TracyLine \ + << ": error code " << err; \ + auto msg = oss.str(); \ + TracyMessage(msg.data(), msg.size()); \ + assert(false && "TRACY_CL_CHECK_ERROR failed"); \ +} + +namespace tracy { + + enum class EventPhase : uint8_t + { + Begin, + End + }; + + struct EventInfo + { + cl_event event; + EventPhase phase; + }; + + class OpenCLCtx + { + public: + enum { QueryCount = 64 * 1024 }; + + OpenCLCtx(cl_context context, cl_device_id device) + : m_contextId(GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed)) + , m_head(0) + , m_tail(0) + { + int64_t tcpu, tgpu; + TRACY_CL_ASSERT(m_contextId != 255); + + cl_int err = CL_SUCCESS; + cl_command_queue queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err); + TRACY_CL_CHECK_ERROR(err) + uint32_t dummyValue = 42; + cl_mem dummyBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(uint32_t), nullptr, &err); + TRACY_CL_CHECK_ERROR(err) + cl_event writeBufferEvent; + TRACY_CL_CHECK_ERROR(clEnqueueWriteBuffer(queue, dummyBuffer, CL_FALSE, 0, sizeof(uint32_t), &dummyValue, 0, nullptr, &writeBufferEvent)); + TRACY_CL_CHECK_ERROR(clWaitForEvents(1, &writeBufferEvent)); + + tcpu = Profiler::GetTime(); + + cl_int eventStatus; + TRACY_CL_CHECK_ERROR(clGetEventInfo(writeBufferEvent, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr)); + TRACY_CL_ASSERT(eventStatus == CL_COMPLETE); + TRACY_CL_CHECK_ERROR(clGetEventProfilingInfo(writeBufferEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &tgpu, nullptr)); + TRACY_CL_CHECK_ERROR(clReleaseEvent(writeBufferEvent)); + TRACY_CL_CHECK_ERROR(clReleaseMemObject(dummyBuffer)); + TRACY_CL_CHECK_ERROR(clReleaseCommandQueue(queue)); + + auto item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuNewContext); + MemWrite(&item->gpuNewContext.cpuTime, tcpu); + MemWrite(&item->gpuNewContext.gpuTime, tgpu); + memset(&item->gpuNewContext.thread, 0, sizeof(item->gpuNewContext.thread)); + MemWrite(&item->gpuNewContext.period, 1.0f); + MemWrite(&item->gpuNewContext.type, GpuContextType::OpenCL); + MemWrite(&item->gpuNewContext.context, (uint8_t) m_contextId); + MemWrite(&item->gpuNewContext.flags, (uint8_t)0); +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem(*item); +#endif + Profiler::QueueSerialFinish(); + } + + void Name( const char* name, uint16_t len ) + { + auto ptr = (char*)tracy_malloc( len ); + memcpy( ptr, name, len ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuContextName ); + MemWrite( &item->gpuContextNameFat.context, (uint8_t)m_contextId ); + MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + MemWrite( &item->gpuContextNameFat.size, len ); +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); +#endif + Profiler::QueueSerialFinish(); + } + + void Collect() + { + ZoneScopedC(Color::Red4); + + if (m_tail == m_head) return; + +#ifdef TRACY_ON_DEMAND + if (!GetProfiler().IsConnected()) + { + m_head = m_tail = 0; + } +#endif + + for (; m_tail != m_head; m_tail = (m_tail + 1) % QueryCount) + { + EventInfo eventInfo = GetQuery(m_tail); + cl_int eventStatus; + cl_int err = clGetEventInfo(eventInfo.event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &eventStatus, nullptr); + if (err != CL_SUCCESS) + { + std::ostringstream oss; + oss << "clGetEventInfo falied with error code " << err << ", on event " << eventInfo.event << ", skipping..."; + auto msg = oss.str(); + TracyMessage(msg.data(), msg.size()); + if (eventInfo.event == nullptr) { + TracyMessageL("A TracyCLZone must be paird with a TracyCLZoneSetEvent, check your code!"); + } + assert(false && "clGetEventInfo failed, maybe a TracyCLZone is not paired with TracyCLZoneSetEvent"); + continue; + } + if (eventStatus != CL_COMPLETE) return; + + cl_int eventInfoQuery = (eventInfo.phase == EventPhase::Begin) + ? CL_PROFILING_COMMAND_START + : CL_PROFILING_COMMAND_END; + + cl_ulong eventTimeStamp = 0; + err = clGetEventProfilingInfo(eventInfo.event, eventInfoQuery, sizeof(cl_ulong), &eventTimeStamp, nullptr); + if (err == CL_PROFILING_INFO_NOT_AVAILABLE) + { + TracyMessageL("command queue is not created with CL_QUEUE_PROFILING_ENABLE flag, check your code!"); + assert(false && "command queue is not created with CL_QUEUE_PROFILING_ENABLE flag"); + } + else + TRACY_CL_CHECK_ERROR(err); + + TRACY_CL_ASSERT(eventTimeStamp != 0); + + auto item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuTime); + MemWrite(&item->gpuTime.gpuTime, (int64_t)eventTimeStamp); + MemWrite(&item->gpuTime.queryId, (uint16_t)m_tail); + MemWrite(&item->gpuTime.context, m_contextId); + Profiler::QueueSerialFinish(); + + if (eventInfo.phase == EventPhase::End) + { + // Done with the event, so release it + TRACY_CL_CHECK_ERROR(clReleaseEvent(eventInfo.event)); + } + } + } + + tracy_force_inline uint8_t GetId() const + { + return m_contextId; + } + + tracy_force_inline unsigned int NextQueryId(EventInfo eventInfo) + { + const auto id = m_head; + m_head = (m_head + 1) % QueryCount; + TRACY_CL_ASSERT(m_head != m_tail); + m_query[id] = eventInfo; + return id; + } + + tracy_force_inline EventInfo& GetQuery(unsigned int id) + { + TRACY_CL_ASSERT(id < QueryCount); + return m_query[id]; + } + + private: + + unsigned int m_contextId; + + EventInfo m_query[QueryCount]; + unsigned int m_head; // index at which a new event should be inserted + unsigned int m_tail; // oldest event + + }; + + class OpenCLCtxScope { + public: + tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, bool is_active) +#ifdef TRACY_ON_DEMAND + : m_active(is_active&& GetProfiler().IsConnected()) +#else + : m_active(is_active) +#endif + , m_ctx(ctx) + , m_event(nullptr) + { + if (!m_active) return; + + m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin }); + + auto item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuZoneBeginSerial); + MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); + MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc); + MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); + MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId); + MemWrite(&item->gpuZoneBegin.context, ctx->GetId()); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, const SourceLocationData* srcLoc, int depth, bool is_active) +#ifdef TRACY_ON_DEMAND + : m_active(is_active&& GetProfiler().IsConnected()) +#else + : m_active(is_active) +#endif + , m_ctx(ctx) + , m_event(nullptr) + { + if (!m_active) return; + + m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin }); + + GetProfiler().SendCallstack(depth); + + auto item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuZoneBeginCallstackSerial); + MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); + MemWrite(&item->gpuZoneBegin.srcloc, (uint64_t)srcLoc); + MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); + MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId); + MemWrite(&item->gpuZoneBegin.context, ctx->GetId()); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active) +#ifdef TRACY_ON_DEMAND + : m_active(is_active && GetProfiler().IsConnected()) +#else + : m_active(is_active) +#endif + , m_ctx(ctx) + , m_event(nullptr) + { + if (!m_active) return; + + m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin }); + + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial ); + MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); + MemWrite(&item->gpuZoneBegin.srcloc, srcloc); + MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); + MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId); + MemWrite(&item->gpuZoneBegin.context, ctx->GetId()); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline OpenCLCtxScope(OpenCLCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active) +#ifdef TRACY_ON_DEMAND + : m_active(is_active && GetProfiler().IsConnected()) +#else + : m_active(is_active) +#endif + , m_ctx(ctx) + , m_event(nullptr) + { + if (!m_active) return; + + m_beginQueryId = ctx->NextQueryId(EventInfo{ nullptr, EventPhase::Begin }); + + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + auto item = Profiler::QueueSerialCallstack( Callstack( depth ) ); + MemWrite(&item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial); + MemWrite(&item->gpuZoneBegin.cpuTime, Profiler::GetTime()); + MemWrite(&item->gpuZoneBegin.srcloc, srcloc); + MemWrite(&item->gpuZoneBegin.thread, GetThreadHandle()); + MemWrite(&item->gpuZoneBegin.queryId, (uint16_t)m_beginQueryId); + MemWrite(&item->gpuZoneBegin.context, ctx->GetId()); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline void SetEvent(cl_event event) + { + if (!m_active) return; + m_event = event; + TRACY_CL_CHECK_ERROR(clRetainEvent(m_event)); + m_ctx->GetQuery(m_beginQueryId).event = m_event; + } + + tracy_force_inline ~OpenCLCtxScope() + { + if (!m_active) return; + const auto queryId = m_ctx->NextQueryId(EventInfo{ m_event, EventPhase::End }); + + auto item = Profiler::QueueSerial(); + MemWrite(&item->hdr.type, QueueType::GpuZoneEndSerial); + MemWrite(&item->gpuZoneEnd.cpuTime, Profiler::GetTime()); + MemWrite(&item->gpuZoneEnd.thread, GetThreadHandle()); + MemWrite(&item->gpuZoneEnd.queryId, (uint16_t)queryId); + MemWrite(&item->gpuZoneEnd.context, m_ctx->GetId()); + Profiler::QueueSerialFinish(); + } + + const bool m_active; + OpenCLCtx* m_ctx; + cl_event m_event; + unsigned int m_beginQueryId; + }; + + static inline OpenCLCtx* CreateCLContext(cl_context context, cl_device_id device) + { + auto ctx = (OpenCLCtx*)tracy_malloc(sizeof(OpenCLCtx)); + new (ctx) OpenCLCtx(context, device); + return ctx; + } + + static inline void DestroyCLContext(OpenCLCtx* ctx) + { + ctx->~OpenCLCtx(); + tracy_free(ctx); + } + +} // namespace tracy + +using TracyCLCtx = tracy::OpenCLCtx*; + +#define TracyCLContext(ctx, device) tracy::CreateCLContext(ctx, device); +#define TracyCLDestroy(ctx) tracy::DestroyCLContext(ctx); +#define TracyCLContextName(ctx, name, size) ctx->Name(name, size); +#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK +# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active ); +# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active ); +# define TracyCLZone(ctx, name) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, TRACY_CALLSTACK, true) +# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, TRACY_CALLSTACK, true) +# define TracyCLZoneTransient( ctx, varname, name, active ) tracy::OpenCLCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active ); +#else +# define TracyCLNamedZone(ctx, varname, name, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), active); +# define TracyCLNamedZoneC(ctx, varname, name, color, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), active); +# define TracyCLZone(ctx, name) TracyCLNamedZone(ctx, __tracy_gpu_zone, name, true) +# define TracyCLZoneC(ctx, name, color) TracyCLNamedZoneC(ctx, __tracy_gpu_zone, name, color, true ) +# define TracyCLZoneTransient( ctx, varname, name, active ) tracy::OpenCLCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active ); +#endif + +#ifdef TRACY_HAS_CALLSTACK +# define TracyCLNamedZoneS(ctx, varname, name, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active); +# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine){ name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::OpenCLCtxScope varname(ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active); +# define TracyCLZoneS(ctx, name, depth) TracyCLNamedZoneS(ctx, __tracy_gpu_zone, name, depth, true) +# define TracyCLZoneCS(ctx, name, color, depth) TracyCLNamedZoneCS(ctx, __tracy_gpu_zone, name, color, depth, true) +# define TracyCLZoneTransientS( ctx, varname, name, depth, active ) tracy::OpenCLCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active ); +#else +# define TracyCLNamedZoneS(ctx, varname, name, depth, active) TracyCLNamedZone(ctx, varname, name, active) +# define TracyCLNamedZoneCS(ctx, varname, name, color, depth, active) TracyCLNamedZoneC(ctx, varname, name, color, active) +# define TracyCLZoneS(ctx, name, depth) TracyCLZone(ctx, name) +# define TracyCLZoneCS(ctx, name, color, depth) TracyCLZoneC(ctx, name, color) +# define TracyCLZoneTransientS( ctx, varname, name, depth, active ) TracyCLZoneTransient( ctx, varname, name, active ) +#endif + +#define TracyCLNamedZoneSetEvent(varname, event) varname.SetEvent(event) +#define TracyCLZoneSetEvent(event) __tracy_gpu_zone.SetEvent(event) + +#define TracyCLCollect(ctx) ctx->Collect() + +#endif + +#endif diff --git a/project/thirdparty/tracy-0.11.1/tracy/TracyOpenGL.hpp b/project/thirdparty/tracy-0.11.1/tracy/TracyOpenGL.hpp new file mode 100644 index 000000000..3bdadccee --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/tracy/TracyOpenGL.hpp @@ -0,0 +1,325 @@ +#ifndef __TRACYOPENGL_HPP__ +#define __TRACYOPENGL_HPP__ + +#if !defined TRACY_ENABLE || defined __APPLE__ + +#define TracyGpuContext +#define TracyGpuContextName(x,y) +#define TracyGpuNamedZone(x,y,z) +#define TracyGpuNamedZoneC(x,y,z,w) +#define TracyGpuZone(x) +#define TracyGpuZoneC(x,y) +#define TracyGpuZoneTransient(x,y,z) +#define TracyGpuCollect + +#define TracyGpuNamedZoneS(x,y,z,w) +#define TracyGpuNamedZoneCS(x,y,z,w,a) +#define TracyGpuZoneS(x,y) +#define TracyGpuZoneCS(x,y,z) +#define TracyGpuZoneTransientS(x,y,z,w) + +namespace tracy +{ +struct SourceLocationData; +class GpuCtxScope +{ +public: + GpuCtxScope( const SourceLocationData*, bool ) {} + GpuCtxScope( const SourceLocationData*, int, bool ) {} +}; +} + +#else + +#include +#include +#include + +#include "Tracy.hpp" +#include "../client/TracyProfiler.hpp" +#include "../client/TracyCallstack.hpp" +#include "../common/TracyAlign.hpp" +#include "../common/TracyAlloc.hpp" + +#if !defined GL_TIMESTAMP && defined GL_TIMESTAMP_EXT +# define GL_TIMESTAMP GL_TIMESTAMP_EXT +# define GL_QUERY_COUNTER_BITS GL_QUERY_COUNTER_BITS_EXT +# define glGetQueryObjectiv glGetQueryObjectivEXT +# define glGetQueryObjectui64v glGetQueryObjectui64vEXT +# define glQueryCounter glQueryCounterEXT +#endif + +#define TracyGpuContext tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx; +#define TracyGpuContextName( name, size ) tracy::GetGpuCtx().ptr->Name( name, size ); +#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK +# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active ); +# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), TRACY_CALLSTACK, active ); +# define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true ) +# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true ) +# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), TRACY_CALLSTACK, active ); +#else +# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), active ); +# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), active ); +# define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true ) +# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true ) +# define TracyGpuZoneTransient( varname, name, active ) tracy::GpuCtxScope varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), active ); +#endif +#define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect(); + +#ifdef TRACY_HAS_CALLSTACK +# define TracyGpuNamedZoneS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active ); +# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,TracyLine), depth, active ); +# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true ) +# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true ) +# define TracyGpuZoneTransientS( varname, name, depth, active ) tracy::GpuCtxScope varname( TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), depth, active ); +#else +# define TracyGpuNamedZoneS( varname, name, depth, active ) TracyGpuNamedZone( varname, name, active ) +# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) TracyGpuNamedZoneC( varname, name, color, active ) +# define TracyGpuZoneS( name, depth ) TracyGpuZone( name ) +# define TracyGpuZoneCS( name, color, depth ) TracyGpuZoneC( name, color ) +# define TracyGpuZoneTransientS( varname, name, depth, active ) TracyGpuZoneTransient( varname, name, active ) +#endif + +namespace tracy +{ + +class GpuCtx +{ + friend class GpuCtxScope; + + enum { QueryCount = 64 * 1024 }; + +public: + GpuCtx() + : m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) ) + , m_head( 0 ) + , m_tail( 0 ) + { + assert( m_context != 255 ); + + glGenQueries( QueryCount, m_query ); + + int64_t tgpu; + glGetInteger64v( GL_TIMESTAMP, &tgpu ); + int64_t tcpu = Profiler::GetTime(); + + GLint bits; + glGetQueryiv( GL_TIMESTAMP, GL_QUERY_COUNTER_BITS, &bits ); + + const float period = 1.f; + const auto thread = GetThreadHandle(); + TracyLfqPrepare( QueueType::GpuNewContext ); + MemWrite( &item->gpuNewContext.cpuTime, tcpu ); + MemWrite( &item->gpuNewContext.gpuTime, tgpu ); + MemWrite( &item->gpuNewContext.thread, thread ); + MemWrite( &item->gpuNewContext.period, period ); + MemWrite( &item->gpuNewContext.context, m_context ); + MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) ); + MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl ); + +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); +#endif + + TracyLfqCommit; + } + + void Name( const char* name, uint16_t len ) + { + auto ptr = (char*)tracy_malloc( len ); + memcpy( ptr, name, len ); + + TracyLfqPrepare( QueueType::GpuContextName ); + MemWrite( &item->gpuContextNameFat.context, m_context ); + MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + MemWrite( &item->gpuContextNameFat.size, len ); +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); +#endif + TracyLfqCommit; + } + + void Collect() + { + ZoneScopedC( Color::Red4 ); + + if( m_tail == m_head ) return; + +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) + { + m_head = m_tail = 0; + return; + } +#endif + + while( m_tail != m_head ) + { + GLint available; + glGetQueryObjectiv( m_query[m_tail], GL_QUERY_RESULT_AVAILABLE, &available ); + if( !available ) return; + + uint64_t time; + glGetQueryObjectui64v( m_query[m_tail], GL_QUERY_RESULT, &time ); + + TracyLfqPrepare( QueueType::GpuTime ); + MemWrite( &item->gpuTime.gpuTime, (int64_t)time ); + MemWrite( &item->gpuTime.queryId, (uint16_t)m_tail ); + MemWrite( &item->gpuTime.context, m_context ); + TracyLfqCommit; + + m_tail = ( m_tail + 1 ) % QueryCount; + } + } + +private: + tracy_force_inline unsigned int NextQueryId() + { + const auto id = m_head; + m_head = ( m_head + 1 ) % QueryCount; + assert( m_head != m_tail ); + return id; + } + + tracy_force_inline unsigned int TranslateOpenGlQueryId( unsigned int id ) + { + return m_query[id]; + } + + tracy_force_inline uint8_t GetId() const + { + return m_context; + } + + unsigned int m_query[QueryCount]; + uint8_t m_context; + + unsigned int m_head; + unsigned int m_tail; +}; + +class GpuCtxScope +{ +public: + tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + + const auto queryId = GetGpuCtx().ptr->NextQueryId(); + glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); + + TracyLfqPrepare( QueueType::GpuZoneBegin ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + TracyLfqCommit; + } + + tracy_force_inline GpuCtxScope( const SourceLocationData* srcloc, int depth, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + + const auto queryId = GetGpuCtx().ptr->NextQueryId(); + glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); + +#ifdef TRACY_FIBERS + TracyLfqPrepare( QueueType::GpuZoneBegin ); + memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) ); +#else + GetProfiler().SendCallstack( depth ); + TracyLfqPrepare( QueueType::GpuZoneBeginCallstack ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); +#endif + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + TracyLfqCommit; + } + + tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + + const auto queryId = GetGpuCtx().ptr->NextQueryId(); + glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); + + TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc ); + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + TracyLfqCommit; + } + + tracy_force_inline GpuCtxScope( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + + const auto queryId = GetGpuCtx().ptr->NextQueryId(); + glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); + +#ifdef TRACY_FIBERS + TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLoc ); + memset( &item->gpuZoneBegin.thread, 0, sizeof( item->gpuZoneBegin.thread ) ); +#else + GetProfiler().SendCallstack( depth ); + TracyLfqPrepare( QueueType::GpuZoneBeginAllocSrcLocCallstack ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); +#endif + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + TracyLfqCommit; + } + + tracy_force_inline ~GpuCtxScope() + { + if( !m_active ) return; + + const auto queryId = GetGpuCtx().ptr->NextQueryId(); + glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); + + TracyLfqPrepare( QueueType::GpuZoneEnd ); + MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); + memset( &item->gpuZoneEnd.thread, 0, sizeof( item->gpuZoneEnd.thread ) ); + MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneEnd.context, GetGpuCtx().ptr->GetId() ); + TracyLfqCommit; + } + +private: + const bool m_active; +}; + +} + +#endif + +#endif diff --git a/project/thirdparty/tracy-0.11.1/tracy/TracyVulkan.hpp b/project/thirdparty/tracy-0.11.1/tracy/TracyVulkan.hpp new file mode 100644 index 000000000..c34b71852 --- /dev/null +++ b/project/thirdparty/tracy-0.11.1/tracy/TracyVulkan.hpp @@ -0,0 +1,723 @@ +#ifndef __TRACYVULKAN_HPP__ +#define __TRACYVULKAN_HPP__ + +#if !defined TRACY_ENABLE + +#define TracyVkContext(x,y,z,w) nullptr +#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr +#if defined VK_EXT_host_query_reset +#define TracyVkContextHostCalibrated(x,y,z,w,a) nullptr +#endif +#define TracyVkDestroy(x) +#define TracyVkContextName(c,x,y) +#define TracyVkNamedZone(c,x,y,z,w) +#define TracyVkNamedZoneC(c,x,y,z,w,a) +#define TracyVkZone(c,x,y) +#define TracyVkZoneC(c,x,y,z) +#define TracyVkZoneTransient(c,x,y,z,w) +#define TracyVkCollect(c,x) + +#define TracyVkNamedZoneS(c,x,y,z,w,a) +#define TracyVkNamedZoneCS(c,x,y,z,w,v,a) +#define TracyVkZoneS(c,x,y,z) +#define TracyVkZoneCS(c,x,y,z,w) +#define TracyVkZoneTransientS(c,x,y,z,w,a) + +namespace tracy +{ +class VkCtxScope {}; +} + +using TracyVkCtx = void*; + +#else + +#if !defined VK_NULL_HANDLE +# error "You must include Vulkan headers before including TracyVulkan.hpp" +#endif + +#include +#include +#include "Tracy.hpp" +#include "../client/TracyProfiler.hpp" +#include "../client/TracyCallstack.hpp" + +#include + +namespace tracy +{ + +#if defined TRACY_VK_USE_SYMBOL_TABLE +#define LoadVkDeviceCoreSymbols(Operation) \ + Operation(vkBeginCommandBuffer) \ + Operation(vkCmdResetQueryPool) \ + Operation(vkCmdWriteTimestamp) \ + Operation(vkCreateQueryPool) \ + Operation(vkDestroyQueryPool) \ + Operation(vkEndCommandBuffer) \ + Operation(vkGetQueryPoolResults) \ + Operation(vkQueueSubmit) \ + Operation(vkQueueWaitIdle) \ + Operation(vkResetQueryPool) + +#define LoadVkDeviceExtensionSymbols(Operation) \ + Operation(vkGetCalibratedTimestampsEXT) + +#define LoadVkInstanceExtensionSymbols(Operation) \ + Operation(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT) + +#define LoadVkInstanceCoreSymbols(Operation) \ + Operation(vkGetPhysicalDeviceProperties) + +struct VkSymbolTable +{ +#define MAKE_PFN(name) PFN_##name name; + LoadVkDeviceCoreSymbols(MAKE_PFN) + LoadVkDeviceExtensionSymbols(MAKE_PFN) + LoadVkInstanceExtensionSymbols(MAKE_PFN) + LoadVkInstanceCoreSymbols(MAKE_PFN) +#undef MAKE_PFN +}; + +#define VK_FUNCTION_WRAPPER(callSignature) m_symbols.callSignature +#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) m_ctx->m_symbols.callSignature +#else +#define VK_FUNCTION_WRAPPER(callSignature) callSignature +#define CONTEXT_VK_FUNCTION_WRAPPER(callSignature) callSignature +#endif + +class VkCtx +{ + friend class VkCtxScope; + + enum { QueryCount = 64 * 1024 }; + +public: +#if defined TRACY_VK_USE_SYMBOL_TABLE + VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr, bool calibrated ) +#else + VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT) +#endif + : m_device( device ) + , m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT ) + , m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) ) + , m_head( 0 ) + , m_tail( 0 ) + , m_oldCnt( 0 ) + , m_queryCount( QueryCount ) +#if !defined TRACY_VK_USE_SYMBOL_TABLE + , m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT ) +#endif + { + assert( m_context != 255 ); + +#if defined TRACY_VK_USE_SYMBOL_TABLE + PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr); + if ( calibrated ) + { + m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT; + } + +#endif + + if( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) && m_vkGetCalibratedTimestampsEXT ) + { + FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) ); + } + + CreateQueryPool(); + + VkCommandBufferBeginInfo beginInfo = {}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &cmdbuf; + + VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) ); + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) ); + VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) ); + VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) ); + VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) ); + + int64_t tcpu, tgpu; + if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT ) + { + VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) ); + VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ) ); + VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) ); + VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) ); + VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) ); + + tcpu = Profiler::GetTime(); + VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT ) ); + + VK_FUNCTION_WRAPPER( vkBeginCommandBuffer( cmdbuf, &beginInfo ) ); + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ) ); + VK_FUNCTION_WRAPPER( vkEndCommandBuffer( cmdbuf ) ); + VK_FUNCTION_WRAPPER( vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ) ); + VK_FUNCTION_WRAPPER( vkQueueWaitIdle( queue ) ); + } + else + { + FindCalibratedTimestampDeviation(); + Calibrate( device, m_prevCalibration, tgpu ); + tcpu = Profiler::GetTime(); + } + + WriteInitialItem( physdev, tcpu, tgpu ); + + m_res = (int64_t*)tracy_malloc( sizeof( int64_t ) * m_queryCount ); + } + +#if defined VK_EXT_host_query_reset + /** + * This alternative constructor does not use command buffers and instead uses functionality from + * VK_EXT_host_query_reset (core with 1.2 and non-optional) and VK_EXT_calibrated_timestamps. This requires + * the physical device to have another time domain apart from DEVICE to be calibrateable. + */ +#if defined TRACY_VK_USE_SYMBOL_TABLE + VkCtx( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr ) +#else + VkCtx( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT vkResetQueryPool, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT vkGetCalibratedTimestampsEXT ) +#endif + : m_device( device ) + , m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT ) + , m_context( GetGpuCtxCounter().fetch_add(1, std::memory_order_relaxed) ) + , m_head( 0 ) + , m_tail( 0 ) + , m_oldCnt( 0 ) + , m_queryCount( QueryCount ) +#if !defined TRACY_VK_USE_SYMBOL_TABLE + , m_vkGetCalibratedTimestampsEXT( vkGetCalibratedTimestampsEXT ) +#endif + { + assert( m_context != 255); + +#if defined TRACY_VK_USE_SYMBOL_TABLE + PopulateSymbolTable(instance, instanceProcAddr, deviceProcAddr); + m_vkGetCalibratedTimestampsEXT = m_symbols.vkGetCalibratedTimestampsEXT; +#endif + + assert( VK_FUNCTION_WRAPPER( vkResetQueryPool ) != nullptr ); + assert( VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) != nullptr ); + assert( VK_FUNCTION_WRAPPER( vkGetCalibratedTimestampsEXT ) != nullptr ); + + FindAvailableTimeDomains( physdev, VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) ); + + // We require a host time domain to be available to properly calibrate. + FindCalibratedTimestampDeviation(); + int64_t tgpu; + Calibrate( device, m_prevCalibration, tgpu ); + int64_t tcpu = Profiler::GetTime(); + + CreateQueryPool(); + VK_FUNCTION_WRAPPER( vkResetQueryPool( device, m_query, 0, m_queryCount ) ); + + WriteInitialItem( physdev, tcpu, tgpu ); + + // We need the buffer to be twice as large for availability values + size_t resSize = sizeof( int64_t ) * m_queryCount * 2; + m_res = (int64_t*)tracy_malloc( resSize ); + } +#endif + + ~VkCtx() + { + tracy_free( m_res ); + VK_FUNCTION_WRAPPER( vkDestroyQueryPool( m_device, m_query, nullptr ) ); + } + + void Name( const char* name, uint16_t len ) + { + auto ptr = (char*)tracy_malloc( len ); + memcpy( ptr, name, len ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuContextName ); + MemWrite( &item->gpuContextNameFat.context, m_context ); + MemWrite( &item->gpuContextNameFat.ptr, (uint64_t)ptr ); + MemWrite( &item->gpuContextNameFat.size, len ); +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); +#endif + Profiler::QueueSerialFinish(); + } + + void Collect( VkCommandBuffer cmdbuf ) + { + ZoneScopedC( Color::Red4 ); + + const uint64_t head = m_head.load(std::memory_order_relaxed); + if( m_tail == head ) return; + +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) + { + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ) ); + m_tail = head; + m_oldCnt = 0; + int64_t tgpu; + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu ); + return; + } +#endif + assert( head > m_tail ); + + const unsigned int wrappedTail = (unsigned int)( m_tail % m_queryCount ); + + unsigned int cnt; + if( m_oldCnt != 0 ) + { + cnt = m_oldCnt; + m_oldCnt = 0; + } + else + { + cnt = (unsigned int)( head - m_tail ); + assert( cnt <= m_queryCount ); + if( wrappedTail + cnt > m_queryCount ) + { + cnt = m_queryCount - wrappedTail; + } + } + + + VK_FUNCTION_WRAPPER( vkGetQueryPoolResults( m_device, m_query, wrappedTail, cnt, sizeof( int64_t ) * m_queryCount * 2, m_res, sizeof( int64_t ) * 2, VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_AVAILABILITY_BIT ) ); + + for( unsigned int idx=0; idxhdr.type, QueueType::GpuTime ); + MemWrite( &item->gpuTime.gpuTime, m_res[idx * 2] ); + MemWrite( &item->gpuTime.queryId, uint16_t( wrappedTail + idx ) ); + MemWrite( &item->gpuTime.context, m_context ); + Profiler::QueueSerialFinish(); + } + + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) + { + int64_t tgpu, tcpu; + Calibrate( m_device, tcpu, tgpu ); + const auto refCpu = Profiler::GetTime(); + const auto delta = tcpu - m_prevCalibration; + if( delta > 0 ) + { + m_prevCalibration = tcpu; + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuCalibration ); + MemWrite( &item->gpuCalibration.gpuTime, tgpu ); + MemWrite( &item->gpuCalibration.cpuTime, refCpu ); + MemWrite( &item->gpuCalibration.cpuDelta, delta ); + MemWrite( &item->gpuCalibration.context, m_context ); + Profiler::QueueSerialFinish(); + } + } + + VK_FUNCTION_WRAPPER( vkCmdResetQueryPool( cmdbuf, m_query, wrappedTail, cnt ) ); + + m_tail += cnt; + } + + tracy_force_inline unsigned int NextQueryId() + { + const uint64_t id = m_head.fetch_add(1, std::memory_order_relaxed); + return id % m_queryCount; + } + + tracy_force_inline uint8_t GetId() const + { + return m_context; + } + + tracy_force_inline VkQueryPool GetQueryPool() const + { + return m_query; + } + +private: + tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu ) + { + assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ); + VkCalibratedTimestampInfoEXT spec[2] = { + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT }, + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain }, + }; + uint64_t ts[2]; + uint64_t deviation; + do + { + m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation ); + } + while( deviation > m_deviation ); + +#if defined _WIN32 + tGpu = ts[0]; + tCpu = ts[1] * m_qpcToNs; +#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW + tGpu = ts[0]; + tCpu = ts[1]; +#else + assert( false ); +#endif + } + + tracy_force_inline void CreateQueryPool() + { + VkQueryPoolCreateInfo poolInfo = {}; + poolInfo.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + poolInfo.queryCount = m_queryCount; + poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; + while ( VK_FUNCTION_WRAPPER( vkCreateQueryPool( m_device, &poolInfo, nullptr, &m_query ) != VK_SUCCESS ) ) + { + m_queryCount /= 2; + poolInfo.queryCount = m_queryCount; + } + } + + tracy_force_inline void FindAvailableTimeDomains( VkPhysicalDevice physicalDevice, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT ) + { + uint32_t num; + _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, nullptr ); + if(num > 4) num = 4; + VkTimeDomainEXT data[4]; + _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physicalDevice, &num, data ); + VkTimeDomainEXT supportedDomain = (VkTimeDomainEXT)-1; +#if defined _WIN32 + supportedDomain = VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT; +#elif defined __linux__ && defined CLOCK_MONOTONIC_RAW + supportedDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT; +#endif + for( uint32_t i=0; i deviation[i] ) { + minDeviation = deviation[i]; + } + } + m_deviation = minDeviation * 3 / 2; + +#if defined _WIN32 + m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() ); +#endif + } + + tracy_force_inline void WriteInitialItem( VkPhysicalDevice physdev, int64_t tcpu, int64_t tgpu ) + { + uint8_t flags = 0; + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration; + + VkPhysicalDeviceProperties prop; + VK_FUNCTION_WRAPPER( vkGetPhysicalDeviceProperties( physdev, &prop ) ); + const float period = prop.limits.timestampPeriod; + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuNewContext ); + MemWrite( &item->gpuNewContext.cpuTime, tcpu ); + MemWrite( &item->gpuNewContext.gpuTime, tgpu ); + memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) ); + MemWrite( &item->gpuNewContext.period, period ); + MemWrite( &item->gpuNewContext.context, m_context ); + MemWrite( &item->gpuNewContext.flags, flags ); + MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan ); + +#ifdef TRACY_ON_DEMAND + GetProfiler().DeferItem( *item ); +#endif + Profiler::QueueSerialFinish(); + } + +#if defined TRACY_VK_USE_SYMBOL_TABLE + void PopulateSymbolTable( VkInstance instance, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr deviceProcAddr ) + { +#define VK_GET_DEVICE_SYMBOL( name ) \ + (PFN_##name)deviceProcAddr( m_device, #name ); +#define VK_LOAD_DEVICE_SYMBOL( name ) \ + m_symbols.name = VK_GET_DEVICE_SYMBOL( name ); +#define VK_GET_INSTANCE_SYMBOL( name ) \ + (PFN_##name)instanceProcAddr( instance, #name ); +#define VK_LOAD_INSTANCE_SYMBOL( name ) \ + m_symbols.name = VK_GET_INSTANCE_SYMBOL( name ); + + LoadVkDeviceCoreSymbols( VK_LOAD_DEVICE_SYMBOL ) + LoadVkDeviceExtensionSymbols( VK_LOAD_DEVICE_SYMBOL ) + LoadVkInstanceExtensionSymbols( VK_LOAD_INSTANCE_SYMBOL ) + LoadVkInstanceCoreSymbols( VK_LOAD_INSTANCE_SYMBOL ) +#undef VK_GET_DEVICE_SYMBOL +#undef VK_LOAD_DEVICE_SYMBOL +#undef VK_GET_INSTANCE_SYMBOL +#undef VK_LOAD_INSTANCE_SYMBOL + } +#endif + + VkDevice m_device; + VkQueryPool m_query; + VkTimeDomainEXT m_timeDomain; +#if defined TRACY_VK_USE_SYMBOL_TABLE + VkSymbolTable m_symbols; +#endif + uint64_t m_deviation; +#ifdef _WIN32 + int64_t m_qpcToNs; +#endif + int64_t m_prevCalibration; + uint8_t m_context; + + std::atomic m_head; + uint64_t m_tail; + unsigned int m_oldCnt; + unsigned int m_queryCount; + + int64_t* m_res; + + PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT; +}; + +class VkCtxScope +{ +public: + tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + m_cmdbuf = cmdbuf; + m_ctx = ctx; + + const auto queryId = ctx->NextQueryId(); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginSerial ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline VkCtxScope( VkCtx* ctx, const SourceLocationData* srcloc, VkCommandBuffer cmdbuf, int depth, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + m_cmdbuf = cmdbuf; + m_ctx = ctx; + + const auto queryId = ctx->NextQueryId(); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); + + auto item = Profiler::QueueSerialCallstack( Callstack( depth ) ); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, (uint64_t)srcloc ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + m_cmdbuf = cmdbuf; + m_ctx = ctx; + + const auto queryId = ctx->NextQueryId(); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); + + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocSerial ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, srcloc ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline VkCtxScope( VkCtx* ctx, uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, VkCommandBuffer cmdbuf, int depth, bool is_active ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; + m_cmdbuf = cmdbuf; + m_ctx = ctx; + + const auto queryId = ctx->NextQueryId(); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ) ); + + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + auto item = Profiler::QueueSerialCallstack( Callstack( depth ) ); + MemWrite( &item->hdr.type, QueueType::GpuZoneBeginAllocSrcLocCallstackSerial ); + MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneBegin.srcloc, srcloc ); + MemWrite( &item->gpuZoneBegin.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); + Profiler::QueueSerialFinish(); + } + + tracy_force_inline ~VkCtxScope() + { + if( !m_active ) return; + + const auto queryId = m_ctx->NextQueryId(); + CONTEXT_VK_FUNCTION_WRAPPER( vkCmdWriteTimestamp( m_cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_ctx->m_query, queryId ) ); + + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuZoneEndSerial ); + MemWrite( &item->gpuZoneEnd.cpuTime, Profiler::GetTime() ); + MemWrite( &item->gpuZoneEnd.thread, GetThreadHandle() ); + MemWrite( &item->gpuZoneEnd.queryId, uint16_t( queryId ) ); + MemWrite( &item->gpuZoneEnd.context, m_ctx->GetId() ); + Profiler::QueueSerialFinish(); + } + +private: + const bool m_active; + + VkCommandBuffer m_cmdbuf; + VkCtx* m_ctx; +}; + +#if defined TRACY_VK_USE_SYMBOL_TABLE +static inline VkCtx* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr, bool calibrated = false ) +#else +static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct ) +#endif +{ + auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); +#if defined TRACY_VK_USE_SYMBOL_TABLE + new(ctx) VkCtx( instance, physdev, device, queue, cmdbuf, instanceProcAddr, getDeviceProcAddr, calibrated ); +#else + new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct ); +#endif + return ctx; +} + +#if defined VK_EXT_host_query_reset +#if defined TRACY_VK_USE_SYMBOL_TABLE +static inline VkCtx* CreateVkContext( VkInstance instance, VkPhysicalDevice physdev, VkDevice device, PFN_vkGetInstanceProcAddr instanceProcAddr, PFN_vkGetDeviceProcAddr getDeviceProcAddr ) +#else +static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, PFN_vkResetQueryPoolEXT qpreset, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct ) +#endif +{ + auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); +#if defined TRACY_VK_USE_SYMBOL_TABLE + new(ctx) VkCtx( instance, physdev, device, instanceProcAddr, getDeviceProcAddr ); +#else + new(ctx) VkCtx( physdev, device, qpreset, gpdctd, gct ); +#endif + return ctx; +} +#endif + +static inline void DestroyVkContext( VkCtx* ctx ) +{ + ctx->~VkCtx(); + tracy_free( ctx ); +} + +} + +using TracyVkCtx = tracy::VkCtx*; + +#if defined TRACY_VK_USE_SYMBOL_TABLE +#define TracyVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ); +#else +#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr ); +#endif +#if defined TRACY_VK_USE_SYMBOL_TABLE +#define TracyVkContextCalibrated( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, queue, cmdbuf, instanceProcAddr, deviceProcAddr, true ); +#else +#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct ); +#endif +#if defined VK_EXT_host_query_reset +#if defined TRACY_VK_USE_SYMBOL_TABLE +#define TracyVkContextHostCalibrated( instance, physdev, device, instanceProcAddr, deviceProcAddr ) tracy::CreateVkContext( instance, physdev, device, instanceProcAddr, deviceProcAddr ); +#else +#define TracyVkContextHostCalibrated( physdev, device, qpreset, gpdctd, gct ) tracy::CreateVkContext( physdev, device, qpreset, gpdctd, gct ); +#endif +#endif +#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx ); +#define TracyVkContextName( ctx, name, size ) ctx->Name( name, size ); +#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK +# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, TRACY_CALLSTACK, active ); +# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, TRACY_CALLSTACK, active ); +# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK, true ) +# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK, true ) +# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) TracyVkZoneTransientS( ctx, varname, cmdbuf, name, TRACY_CALLSTACK, active ) +#else +# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, active ); +# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, active ); +# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name, true ) +# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color, true ) +# define TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, active ); +#endif +#define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf ); + +#ifdef TRACY_HAS_CALLSTACK +# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active ); +# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,TracyLine) { name, TracyFunction, TracyFile, (uint32_t)TracyLine, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,TracyLine), cmdbuf, depth, active ); +# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth, true ) +# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth, true ) +# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) tracy::VkCtxScope varname( ctx, TracyLine, TracyFile, strlen( TracyFile ), TracyFunction, strlen( TracyFunction ), name, strlen( name ), cmdbuf, depth, active ); +#else +# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) +# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) +# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkZone( ctx, cmdbuf, name ) +# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkZoneC( ctx, cmdbuf, name, color ) +# define TracyVkZoneTransientS( ctx, varname, cmdbuf, name, depth, active ) TracyVkZoneTransient( ctx, varname, cmdbuf, name, active ) +#endif + +#endif + +#endif diff --git a/src/hx/Debug.cpp b/src/hx/Debug.cpp index 9cd52c3f0..25c7d4dba 100644 --- a/src/hx/Debug.cpp +++ b/src/hx/Debug.cpp @@ -192,10 +192,8 @@ StackContext::StackContext() mIsUnwindingException = false; #endif - #ifdef HXCPP_TELEMETRY - //mTelemetry = tlmCreate(this); - // Do not automatically start - mTelemetry = 0; + #if HXCPP_TELEMETRY + mTelemetry = tlmCreate(this); #endif #ifdef HXCPP_DEBUGGER diff --git a/src/hx/Telemetry.cpp b/src/hx/Telemetry.cpp index eaa29d2f5..593299a4c 100644 --- a/src/hx/Telemetry.cpp +++ b/src/hx/Telemetry.cpp @@ -649,6 +649,8 @@ void __hxt_new_hash(void* obj, int inSize) stack->mTelemetry->HXTAllocation(obj, inSize, (const char *)"Hash"); #endif } +void __hxt_gc_alloc(void* obj, int inSize) { } +void __hxt_gc_free_large(void*) {} void __hxt_gc_new(hx::StackContext *stack, void* obj, int inSize, const char* name) { #ifdef HXCPP_STACK_TRACE diff --git a/src/hx/TelemetryTracy.cpp b/src/hx/TelemetryTracy.cpp new file mode 100644 index 000000000..4058ac842 --- /dev/null +++ b/src/hx/TelemetryTracy.cpp @@ -0,0 +1,262 @@ +#include +#include +#include + +#include + +namespace +{ + TracyCZoneCtx gcZone; + + // ___tracy_source_location_data gcSourceLocation = { "GC", "__hxt_gc_start", TracyFile, (uint32_t)TracyLine, 0 }; + + const char* sohName = "Small Object Heap"; + const char* lohName = "Large Object Heap"; + + std::vector created; + hx::QuickVec largeAllocs; + HxMutex largeAllocsLock; + + bool isLargeObject(void* ptr) + { + auto ptrHeader = reinterpret_cast(ptr) - sizeof(int); + auto flags = *reinterpret_cast(ptrHeader); + + return (flags & 0xffff) == 0; + } +} + +namespace hx +{ + class Telemetry + { + public: + std::vector tracyZones; + hx::QuickVec smallAllocs; + + Telemetry() : tracyZones(0), smallAllocs() {} + }; +} + +int __hxcpp_hxt_start_telemetry(bool profiler, bool allocations) +{ + hx::Throw(HX_CSTRING("Not implemented")); + + return 0; +} + +TelemetryFrame* __hxcpp_hxt_dump_telemetry(int) +{ + hx::Throw(HX_CSTRING("Not implemented")); + + return 0; +} + +void __hxcpp_hxt_stash_telemetry() { } + +void __hxcpp_hxt_ignore_allocs(int) {} + +void __hxt_new_string(void* obj, int inSize) { } + +void __hxt_new_array(void* obj, int inSize) { } + +void __hxt_new_hash(void* obj, int inSize) { } + +void __hxt_gc_new(hx::StackContext* stack, void* obj, int inSize, const char* name) { } + +void __hxt_gc_alloc(void* obj, int inSize) +{ + #ifdef HXCPP_TRACY_MEMORY + if (isLargeObject(obj)) + { + AutoLock lock(largeAllocsLock); + + largeAllocs.push(obj); + + TracyAllocN(obj, inSize, lohName); + } + else + { + hx::StackContext::getCurrent()->mTelemetry->smallAllocs.push(obj); + + TracyAllocN(obj, inSize, sohName); + } + #endif +} + +void __hxt_gc_free_large(void* obj) +{ + AutoLock lock(largeAllocsLock); + + for (auto i = 0; i < largeAllocs.size(); i++) + { + if (largeAllocs[i] == obj) + { + largeAllocs.erase(i); + + TracyFreeN(obj, lohName); + + return; + } + } +} + +void __hxt_gc_realloc(void* oldObj, void* newObj, int newSize) { } + +void __hxt_gc_after_mark(int byteMarkId, int endianByteMarkId) +{ + #ifdef HXCPP_TRACY_MEMORY + for (auto&& telemetry : created) + { + hx::QuickVec smallRetained; + + smallRetained.safeReserveExtra(telemetry->smallAllocs.size()); + + for (auto i = 0; i < telemetry->smallAllocs.size(); i++) + { + auto ptr = telemetry->smallAllocs[i]; + auto markByte = reinterpret_cast(ptr)[endianByteMarkId]; + if (markByte != byteMarkId) + { + TracyFreeN(ptr, sohName); + } + else + { + smallRetained.push(ptr); + } + } + + telemetry->smallAllocs.swap(smallRetained); + } + + hx::QuickVec largeRetained; + + largeRetained.safeReserveExtra(largeAllocs.size()); + + for (auto i = 0; i < largeAllocs.size(); i++) + { + auto ptr = largeAllocs[i]; + auto markByte = reinterpret_cast(ptr)[endianByteMarkId]; + if (markByte != byteMarkId) + { + TracyFreeN(ptr, lohName); + } + else + { + largeRetained.push(ptr); + } + } + + largeAllocs.swap(largeRetained); + #endif +} + +void __hxt_gc_start() +{ + // gcZone = ___tracy_emit_zone_begin(&gcSourceLocation, true); +} + +void __hxt_gc_end() +{ + // ___tracy_emit_zone_end(gcZone); +} + +hx::Telemetry* hx::tlmCreate(StackContext* stack) +{ + auto obj = new hx::Telemetry(); + + created.push_back(obj); + + return obj; +} + +void hx::tlmDestroy(hx::Telemetry* telemetry) +{ + created.erase(std::find(created.begin(), created.end(), telemetry)); + + delete telemetry; +} + +void hx::tlmAttach(hx::Telemetry* telemetry, hx::StackContext* stack) +{ + // +} + +void hx::tlmDetach(hx::Telemetry* telemetry) +{ + // +} + +void hx::tlmSampleEnter(hx::Telemetry* telemetry, hx::StackFrame* frame) +{ + auto srcloc = + ___tracy_alloc_srcloc( + frame->lineNumber, + frame->position->fileName, + strlen(frame->position->fileName), + frame->position->fullName, + strlen(frame->position->fullName), + 0); + + #if HXCPP_TRACY_INCLUDE_CALLSTACKS + // Note: Tracy doesnt support Callstacks outside this scope: depth >= 1 && depth < 63 + // Determine depth from tracyZones vector: +1 since we are about to add one + auto depth = telemetry->tracyZones.size() + 1; + + telemetry->tracyZones.push_back(___tracy_emit_zone_begin_alloc_callstack(srcloc, depth, true)); + #else + telemetry->tracyZones.push_back(___tracy_emit_zone_begin_alloc(srcloc, true)); + #endif +} + +void hx::tlmSampleExit(hx::Telemetry* telemetry) +{ + if (telemetry->tracyZones.empty()) + { + return; + } + + ___tracy_emit_zone_end(telemetry->tracyZones.back()); + + telemetry->tracyZones.pop_back(); +} + +void __hxcpp_tracy_framemark() +{ + ::tracy::Profiler::SendFrameMark(0); +} + +void __hxcpp_tracy_plot(String name, ::Float val) +{ + hx::strbuf buffer; + ::tracy::Profiler::PlotData(name.utf8_str(&buffer), val); +} + +void __hxcpp_tracy_plot_config(String name, uint8_t format, bool step, bool fill, int color) +{ + hx::strbuf buffer; + ::tracy::Profiler::ConfigurePlot(name.utf8_str(&buffer),::tracy::PlotFormatType(format), step, fill, color); +} + +void __hxcpp_tracy_message(String msg, int color) +{ + hx::strbuf buffer; + ::tracy::Profiler::MessageColor(msg.utf8_str(&buffer), msg.length, color, 0); +} + +void __hxcpp_tracy_message_app_info(String info) +{ + hx::strbuf buffer; + ::tracy::Profiler::MessageAppInfo(info.utf8_str(&buffer), info.length); +} + +void __hxcpp_tracy_set_thread_name_and_group(String name, int groupHint) +{ + hx::strbuf buffer; + ::tracy::SetThreadNameWithHint(name.utf8_str(&buffer), groupHint); +} + +int __hxcpp_tracy_get_zone_count() +{ + return static_cast(hx::StackContext::getCurrent()->mTelemetry->tracyZones.size()); +} diff --git a/src/hx/gc/Immix.cpp b/src/hx/gc/Immix.cpp index 05d0ac842..880b51c62 100644 --- a/src/hx/gc/Immix.cpp +++ b/src/hx/gc/Immix.cpp @@ -3207,6 +3207,10 @@ class GlobalAllocator void FreeLarge(void *inLarge) { +#ifdef HXCPP_TELEMETRY + __hxt_gc_free_large(inLarge); +#endif + ((unsigned char *)inLarge)[HX_ENDIAN_MARK_ID_BYTE] = 0; // AllocLarge will not lock this list unless it decides there is a suitable // value, so we can't doa realloc without potentially crashing it. @@ -3326,6 +3330,10 @@ class GlobalAllocator if (do_lock) mLargeListLock.Unlock(); +#ifdef HXCPP_TELEMETRY + __hxt_gc_alloc(result + 2, inSize); +#endif + return result+2; } @@ -6323,6 +6331,10 @@ class LocalAllocator : public hx::StackContext hx::GCOnNewPointer(buffer); #endif + #ifdef HXCPP_TELEMETRY + __hxt_gc_alloc(buffer, inSize); + #endif + return buffer; } if (mFraggedRows) @@ -6653,8 +6665,7 @@ void SetTopOfStack(int *inTop,bool inForce) void *InternalNew(int inSize,bool inIsObject) { - //HX_STACK_FRAME("GC", "new", 0, "GC::new", "src/hx/GCInternal.cpp", __LINE__, 0) - HX_STACK_FRAME("GC", "new", 0, "GC::new", "src/hx/GCInternal.cpp", inSize, 0) + // HX_STACK_FRAME("GC", "new", 0, "GC::new", __FILE__, __LINE__, 0) #ifdef HXCPP_DEBUG if (sgSpamCollects && sgAllocsSinceLastSpam>=sgSpamCollects) @@ -6765,7 +6776,7 @@ void *InternalRealloc(int inFromSize, void *inData,int inSize, bool inExpand) return hx::InternalNew(inSize,false); } - HX_STACK_FRAME("GC", "realloc", 0, "GC::relloc", __FILE__ , __LINE__, 0) + // HX_STACK_FRAME("GC", "realloc", 0, "GC::relloc", __FILE__ , __LINE__, 0) #ifdef HXCPP_DEBUG if (sgSpamCollects && sgAllocsSinceLastSpam>=sgSpamCollects) diff --git a/toolchain/common-defines.xml b/toolchain/common-defines.xml index 3f8c111a9..5481a41ee 100644 --- a/toolchain/common-defines.xml +++ b/toolchain/common-defines.xml @@ -39,6 +39,18 @@ + + + + + + + + + + + + diff --git a/toolchain/haxe-target.xml b/toolchain/haxe-target.xml index a795596ab..081c15bc5 100644 --- a/toolchain/haxe-target.xml +++ b/toolchain/haxe-target.xml @@ -133,6 +133,16 @@ + + + + + + + + + + @@ -159,7 +169,12 @@ - + +
+ + +
+ @@ -233,6 +248,7 @@ +