From be2ce7908ee82d5c627e20c15432d1819a14a4af Mon Sep 17 00:00:00 2001 From: Rose <83477269+AtariDreams@users.noreply.github.com> Date: Thu, 28 Sep 2023 20:06:50 -0400 Subject: [PATCH] Optimize _byteCount with compiler intrinsics Use compiler intrinsics to make _byteCount faster --- .../Parsing.subproj/CFBinaryPList.c | 29 ++++++++++--------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/CoreFoundation/Parsing.subproj/CFBinaryPList.c b/CoreFoundation/Parsing.subproj/CFBinaryPList.c index 54a4877c0bc..c0dbe8f7d11 100644 --- a/CoreFoundation/Parsing.subproj/CFBinaryPList.c +++ b/CoreFoundation/Parsing.subproj/CFBinaryPList.c @@ -519,22 +519,25 @@ static void _flattenPlist(CFPropertyListRef plist, CFMutableArrayRef objlist, CF /* Get the number of bytes required to hold the value in 'count'. Will return a power of 2 value big enough to hold 'count'. */ CF_INLINE uint8_t _byteCount(uint64_t count) { - uint64_t mask = ~(uint64_t)0; - uint8_t size = 0; - // Find something big enough to hold 'count' - while (count & mask) { - size++; - mask = mask << 8; - } + if (count == 0) + return 1; // Special case 0 since it is undefined for __builtin_clzll - // Ensure that 'count' is a power of 2 - // For sizes bigger than 8, just use the required count - while ((size != 1 && size != 2 && size != 4 && size != 8) && size <= 8) { - size++; - } + // Count the number of leading 0s and subtract from the max value, which is 64 + int zeroCount = 64 - __builtin_clzll(count); + + // Round to highest by 8 number + zeroCount += -zeroCount & 7; + + // Divide by 8 + zeroCount >>= 3; + + // Anything 8 or above just use the zeroCount + if (zeroCount >= 8) + return zeroCount; - return size; + // calculate nearest power of 2; + return 1U << (32 - __builtin_ctz(zeroCount - 1)); } // stream can be a CFWriteStreamRef (on supported platforms) or a CFMutableDataRef