diff --git a/lib/pure/bitops.nim b/lib/pure/bitops.nim index 57e3c79896141..377602e75d9a6 100644 --- a/lib/pure/bitops.nim +++ b/lib/pure/bitops.nim @@ -27,6 +27,9 @@ import macros import std/private/since +from std/private/vmutils import forwardImpl, toUnsigned + + func bitnot*[T: SomeInteger](x: T): T {.magic: "BitnotI".} ## Computes the `bitwise complement` of the integer `x`. @@ -58,34 +61,6 @@ macro bitxor*[T: SomeInteger](x, y: T; z: varargs[T]): T = for extra in z: result = newCall(fn, result, extra) -const useBuiltins = not defined(noIntrinsicsBitOpts) -const noUndefined = defined(noUndefinedBitOpts) -const useGCC_builtins = (defined(gcc) or defined(llvm_gcc) or - defined(clang)) and useBuiltins -const useICC_builtins = defined(icc) and useBuiltins -const useVCC_builtins = defined(vcc) and useBuiltins -const arch64 = sizeof(int) == 8 -const useBuiltinsRotate = (defined(amd64) or defined(i386)) and - (defined(gcc) or defined(clang) or defined(vcc) or - (defined(icl) and not defined(cpp))) and useBuiltins - -template toUnsigned(x: int8): uint8 = cast[uint8](x) -template toUnsigned(x: int16): uint16 = cast[uint16](x) -template toUnsigned(x: int32): uint32 = cast[uint32](x) -template toUnsigned(x: int64): uint64 = cast[uint64](x) -template toUnsigned(x: int): uint = cast[uint](x) - -template forwardImpl(impl, arg) {.dirty.} = - when sizeof(x) <= 4: - when x is SomeSignedInt: - impl(cast[uint32](x.int32)) - else: - impl(x.uint32) - else: - when x is SomeSignedInt: - impl(cast[uint64](x.int64)) - else: - impl(x.uint64) type BitsRange*[T] = range[0..sizeof(T)*8-1] ## A range with all bit positions for type `T`. @@ -436,13 +411,12 @@ func fastlog2Nim(x: uint64): int {.inline.} = v = v or v shr 32 result = lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int -# sets.nim cannot import bitops, but bitops can use include -# system/sets to eliminate code duplication. sets.nim defines -# countBits32 and countBits64. import system/countbits_impl -template countSetBitsNim(n: uint32): int = countBits32(n) -template countSetBitsNim(n: uint64): int = countBits64(n) +const arch64 = sizeof(int) == 8 +const useBuiltinsRotate = (defined(amd64) or defined(i386)) and + (defined(gcc) or defined(clang) or defined(vcc) or + (defined(icl) and not defined(cpp))) and useBuiltins template parityImpl[T](value: T): int = # formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel @@ -459,11 +433,6 @@ template parityImpl[T](value: T): int = when useGCC_builtins: - # Returns the number of set 1-bits in value. - proc builtin_popcount(x: cuint): cint {.importc: "__builtin_popcount", cdecl.} - proc builtin_popcountll(x: culonglong): cint {. - importc: "__builtin_popcountll", cdecl.} - # Returns the bit parity in value proc builtin_parity(x: cuint): cint {.importc: "__builtin_parity", cdecl.} proc builtin_parityll(x: culonglong): cint {.importc: "__builtin_parityll", cdecl.} @@ -481,14 +450,6 @@ when useGCC_builtins: proc builtin_ctzll(x: culonglong): cint {.importc: "__builtin_ctzll", cdecl.} elif useVCC_builtins: - # Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer. - func builtin_popcnt16(a2: uint16): uint16 {. - importc: "__popcnt16", header: "".} - func builtin_popcnt32(a2: uint32): uint32 {. - importc: "__popcnt", header: "".} - func builtin_popcnt64(a2: uint64): uint64 {. - importc: "__popcnt64", header: "".} - # Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1). func bitScanReverse(index: ptr culong, mask: culong): cuchar {. importc: "_BitScanReverse", header: "".} @@ -507,15 +468,6 @@ elif useVCC_builtins: index.int elif useICC_builtins: - - # Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm - # see also: https://software.intel.com/en-us/node/523362 - # Count the number of bits set to 1 in an integer a, and return that count in dst. - func builtin_popcnt32(a: cint): cint {. - importc: "_popcnt", header: "".} - func builtin_popcnt64(a: uint64): cint {. - importc: "_popcnt64", header: "".} - # Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined. func bitScanForward(p: ptr uint32, b: uint32): cuchar {. importc: "_BitScanForward", header: "".} @@ -533,37 +485,13 @@ elif useICC_builtins: discard fnc(index.addr, v) index.int - func countSetBits*(x: SomeInteger): int {.inline.} = ## Counts the set bits in an integer (also called `Hamming weight`:idx:). runnableExamples: doAssert countSetBits(0b0000_0011'u8) == 2 doAssert countSetBits(0b1010_1010'u8) == 4 - # TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT. - # like GCC and MSVC - when x is SomeSignedInt: - let x = x.toUnsigned - when nimvm: - result = forwardImpl(countSetBitsNim, x) - else: - when useGCC_builtins: - when sizeof(x) <= 4: result = builtin_popcount(x.cuint).int - else: result = builtin_popcountll(x.culonglong).int - elif useVCC_builtins: - when sizeof(x) <= 2: result = builtin_popcnt16(x.uint16).int - elif sizeof(x) <= 4: result = builtin_popcnt32(x.uint32).int - elif arch64: result = builtin_popcnt64(x.uint64).int - else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).uint32).int + - builtin_popcnt32((x.uint64 shr 32'u64).uint32).int - elif useICC_builtins: - when sizeof(x) <= 4: result = builtin_popcnt32(x.cint).int - elif arch64: result = builtin_popcnt64(x.uint64).int - else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).cint).int + - builtin_popcnt32((x.uint64 shr 32'u64).cint).int - else: - when sizeof(x) <= 4: result = countSetBitsNim(x.uint32) - else: result = countSetBitsNim(x.uint64) + result = countSetBitsImpl(x) func popcount*(x: SomeInteger): int {.inline.} = ## Alias for `countSetBits <#countSetBits,SomeInteger>`_ (Hamming weight). diff --git a/lib/std/private/vmutils.nim b/lib/std/private/vmutils.nim new file mode 100644 index 0000000000000..d54977b4e23a5 --- /dev/null +++ b/lib/std/private/vmutils.nim @@ -0,0 +1,17 @@ +template forwardImpl*(impl, arg) {.dirty.} = + when sizeof(x) <= 4: + when x is SomeSignedInt: + impl(cast[uint32](x.int32)) + else: + impl(x.uint32) + else: + when x is SomeSignedInt: + impl(cast[uint64](x.int64)) + else: + impl(x.uint64) + +template toUnsigned*(x: int8): uint8 = cast[uint8](x) +template toUnsigned*(x: int16): uint16 = cast[uint16](x) +template toUnsigned*(x: int32): uint32 = cast[uint32](x) +template toUnsigned*(x: int64): uint64 = cast[uint64](x) +template toUnsigned*(x: int): uint = cast[uint](x) diff --git a/lib/system/countbits_impl.nim b/lib/system/countbits_impl.nim index 6c85612e25ace..d3c003ff7a181 100644 --- a/lib/system/countbits_impl.nim +++ b/lib/system/countbits_impl.nim @@ -9,17 +9,86 @@ ## Contains the used algorithms for counting bits. -proc countBits32*(n: uint32): int {.compilerproc.} = +from std/private/vmutils import forwardImpl, toUnsigned + + +const useBuiltins* = not defined(noIntrinsicsBitOpts) +const noUndefined* = defined(noUndefinedBitOpts) +const useGCC_builtins* = (defined(gcc) or defined(llvm_gcc) or + defined(clang)) and useBuiltins +const useICC_builtins* = defined(icc) and useBuiltins +const useVCC_builtins* = defined(vcc) and useBuiltins + +template countBitsImpl(n: uint32): int = # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel var v = uint32(n) v = v - ((v shr 1'u32) and 0x55555555'u32) v = (v and 0x33333333'u32) + ((v shr 2'u32) and 0x33333333'u32) - result = (((v + (v shr 4'u32) and 0xF0F0F0F'u32) * 0x1010101'u32) shr 24'u32).int + (((v + (v shr 4'u32) and 0xF0F0F0F'u32) * 0x1010101'u32) shr 24'u32).int -proc countBits64*(n: uint64): int {.compilerproc, inline.} = +template countBitsImpl(n: uint64): int = # generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel var v = uint64(n) v = v - ((v shr 1'u64) and 0x5555555555555555'u64) v = (v and 0x3333333333333333'u64) + ((v shr 2'u64) and 0x3333333333333333'u64) v = (v + (v shr 4'u64) and 0x0F0F0F0F0F0F0F0F'u64) - result = ((v * 0x0101010101010101'u64) shr 56'u64).int + ((v * 0x0101010101010101'u64) shr 56'u64).int + + +when useGCC_builtins: + # Returns the number of set 1-bits in value. + proc builtin_popcount(x: cuint): cint {.importc: "__builtin_popcount", cdecl.} + proc builtin_popcountll(x: culonglong): cint {. + importc: "__builtin_popcountll", cdecl.} + +elif useVCC_builtins: + # Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer. + func builtin_popcnt16(a2: uint16): uint16 {. + importc: "__popcnt16", header: "".} + func builtin_popcnt32(a2: uint32): uint32 {. + importc: "__popcnt", header: "".} + func builtin_popcnt64(a2: uint64): uint64 {. + importc: "__popcnt64", header: "".} + +elif useICC_builtins: + # Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm + # see also: https://software.intel.com/en-us/node/523362 + # Count the number of bits set to 1 in an integer a, and return that count in dst. + func builtin_popcnt32(a: cint): cint {. + importc: "_popcnt", header: "".} + func builtin_popcnt64(a: uint64): cint {. + importc: "_popcnt64", header: "".} + + +func countSetBitsImpl*(x: SomeInteger): int {.inline.} = + ## Counts the set bits in an integer (also called `Hamming weight`:idx:). + # TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT. + # like GCC and MSVC + when x is SomeSignedInt: + let x = x.toUnsigned + when nimvm: + result = forwardImpl(countBitsImpl, x) + else: + when useGCC_builtins: + when sizeof(x) <= 4: result = builtin_popcount(x.cuint).int + else: result = builtin_popcountll(x.culonglong).int + elif useVCC_builtins: + when sizeof(x) <= 2: result = builtin_popcnt16(x.uint16).int + elif sizeof(x) <= 4: result = builtin_popcnt32(x.uint32).int + elif arch64: result = builtin_popcnt64(x.uint64).int + else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).uint32).int + + builtin_popcnt32((x.uint64 shr 32'u64).uint32).int + elif useICC_builtins: + when sizeof(x) <= 4: result = builtin_popcnt32(x.cint).int + elif arch64: result = builtin_popcnt64(x.uint64).int + else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).cint).int + + builtin_popcnt32((x.uint64 shr 32'u64).cint).int + else: + when sizeof(x) <= 4: result = countBitsImpl(x.uint32) + else: result = countBitsImpl(x.uint64) + +proc countBits32*(n: uint32): int {.compilerproc, inline.} = + result = countSetBitsImpl(n) + +proc countBits64*(n: uint64): int {.compilerproc, inline.} = + result = countSetBitsImpl(n) diff --git a/lib/system/sets.nim b/lib/system/sets.nim index 04e10ba04c4b7..103c8d343ebe2 100644 --- a/lib/system/sets.nim +++ b/lib/system/sets.nim @@ -12,7 +12,6 @@ type NimSet = array[0..4*2048-1, uint8] -# bitops can't be imported here, therefore the code duplication. proc cardSet(s: NimSet, len: int): int {.compilerproc, inline.} = var i = 0