From e5e1b7b156fc71823d249e58860d90c7a9a21710 Mon Sep 17 00:00:00 2001 From: Rose <83477269+AtariDreams@users.noreply.github.com> Date: Thu, 28 Sep 2023 20:06:50 -0400 Subject: [PATCH] Optimize _byteCount with compiler intrinsics Use compiler intrinsics to make _byteCount faster --- .../Parsing.subproj/CFBinaryPList.c | 32 ++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/CoreFoundation/Parsing.subproj/CFBinaryPList.c b/CoreFoundation/Parsing.subproj/CFBinaryPList.c index 54a4877c0b..d7367ba2b0 100644 --- a/CoreFoundation/Parsing.subproj/CFBinaryPList.c +++ b/CoreFoundation/Parsing.subproj/CFBinaryPList.c @@ -518,23 +518,47 @@ static void _flattenPlist(CFPropertyListRef plist, CFMutableArrayRef objlist, CF /* Get the number of bytes required to hold the value in 'count'. Will return a power of 2 value big enough to hold 'count'. */ -CF_INLINE uint8_t _byteCount(uint64_t count) { +CF_INLINE uint8_t _byteCount(uint64_t count) +{ + if (count == 0) + return 1U; // Special case 0 since it is undefined for __builtin_clzll + +#if TARGET_OS_MAC + // Count the number of leading 0s and subtract from the max value, which is 64 + unsigned int zeroCount = 64U - __builtin_clzll(count); + + // Round to highest by 8 number + zeroCount += -zeroCount & 7U; + + // Divide by 8 + zeroCount >>= 3; + + // Anything 8 or above just use the zeroCount + if (zeroCount >= 8) + return zeroCount; + + // calculate nearest power of 2; + return 1U << (32U - __builtin_ctz(zeroCount - 1)); +#else uint64_t mask = ~(uint64_t)0; uint8_t size = 0; // Find something big enough to hold 'count' - while (count & mask) { + do + { size++; mask = mask << 8; - } + } while (count & mask); // Ensure that 'count' is a power of 2 // For sizes bigger than 8, just use the required count - while ((size != 1 && size != 2 && size != 4 && size != 8) && size <= 8) { + while ((size != 2 && size != 4) && size <= 8) + { size++; } return size; +#endif } // stream can be a CFWriteStreamRef (on supported platforms) or a CFMutableDataRef