From 0c1e3b893b94f0351070b970f98c641972161bba Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Thu, 16 May 2024 12:15:41 +0200 Subject: [PATCH] Remove harmful table lookup optimization for bit operations (#5772) CPUs have efficient instructions for querying, setting and clearing bits, and modern compilers know how to turn simple bit indexing code into such instructions. The table lookup optimizations may have been useful in older versions of rustc, but as of rustc 1.78, they are a net loss. See PR description for more details. --- arrow-buffer/src/util/bit_util.rs | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/arrow-buffer/src/util/bit_util.rs b/arrow-buffer/src/util/bit_util.rs index f4aaf571b5d6..bf14525bbd6b 100644 --- a/arrow-buffer/src/util/bit_util.rs +++ b/arrow-buffer/src/util/bit_util.rs @@ -17,18 +17,6 @@ //! Utils for working with bits -const BIT_MASK: [u8; 8] = [1, 2, 4, 8, 16, 32, 64, 128]; -const UNSET_BIT_MASK: [u8; 8] = [ - 255 - 1, - 255 - 2, - 255 - 4, - 255 - 8, - 255 - 16, - 255 - 32, - 255 - 64, - 255 - 128, -]; - /// Returns the nearest number that is `>=` than `num` and is a multiple of 64 #[inline] pub fn round_upto_multiple_of_64(num: usize) -> usize { @@ -47,7 +35,7 @@ pub fn round_upto_power_of_2(num: usize, factor: usize) -> usize { /// Returns whether bit at position `i` in `data` is set or not #[inline] pub fn get_bit(data: &[u8], i: usize) -> bool { - (data[i >> 3] & BIT_MASK[i & 7]) != 0 + data[i / 8] & (1 << (i % 8)) != 0 } /// Returns whether bit at position `i` in `data` is set or not. @@ -58,13 +46,13 @@ pub fn get_bit(data: &[u8], i: usize) -> bool { /// responsible to guarantee that `i` is within bounds. #[inline] pub unsafe fn get_bit_raw(data: *const u8, i: usize) -> bool { - (*data.add(i >> 3) & BIT_MASK[i & 7]) != 0 + (*data.add(i / 8) & (1 << (i % 8))) != 0 } /// Sets bit at position `i` for `data` to 1 #[inline] pub fn set_bit(data: &mut [u8], i: usize) { - data[i >> 3] |= BIT_MASK[i & 7]; + data[i / 8] |= 1 << (i % 8); } /// Sets bit at position `i` for `data` @@ -75,13 +63,13 @@ pub fn set_bit(data: &mut [u8], i: usize) { /// responsible to guarantee that `i` is within bounds. #[inline] pub unsafe fn set_bit_raw(data: *mut u8, i: usize) { - *data.add(i >> 3) |= BIT_MASK[i & 7]; + *data.add(i / 8) |= 1 << (i % 8); } /// Sets bit at position `i` for `data` to 0 #[inline] pub fn unset_bit(data: &mut [u8], i: usize) { - data[i >> 3] &= UNSET_BIT_MASK[i & 7]; + data[i / 8] &= !(1 << (i % 8)); } /// Sets bit at position `i` for `data` to 0 @@ -92,7 +80,7 @@ pub fn unset_bit(data: &mut [u8], i: usize) { /// responsible to guarantee that `i` is within bounds. #[inline] pub unsafe fn unset_bit_raw(data: *mut u8, i: usize) { - *data.add(i >> 3) &= UNSET_BIT_MASK[i & 7]; + *data.add(i / 8) &= !(1 << (i % 8)); } /// Returns the ceil of `value`/`divisor`