Skip to content

Commit

Permalink
Merge pull request #90 from glycerine/fix89
Browse files Browse the repository at this point in the history
atg. elminate ctz asm. fixes #89
  • Loading branch information
lemire authored Dec 23, 2016
2 parents 9641042 + 486d62f commit af992ed
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 147 deletions.
69 changes: 51 additions & 18 deletions ctz.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,37 @@
// +build amd64,!appengine

package roaring

// Reuse of portions of go/src/math/big standard lib code
// under this license:
/*
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

const deBruijn32 = 0x077CB531

var deBruijn32Lookup = []byte{
Expand All @@ -18,19 +48,22 @@ var deBruijn64Lookup = []byte{
54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6,
}

// countTrailingZeros counts the number of zeros
// from the least-significant bit up to the
// first set (1) bit. if x is 0, 64 is returned.
//
// references:
// a. https://en.wikipedia.org/wiki/Find_first_set
// b. TZCNTQ on amd64, page 364 of http://support.amd.com/TechDocs/24594.pdf
//
// *** the following function is defined in ctz_amd64.s
//
// TODO: possibly use "github.com/klauspost/cpuid"
// to check if cpuid.CPU.BMI1() is true before using the assembly version.
//
// The Go version is in ctz_generic.go.
//
func countTrailingZerosAsm(x uint64) int
// trailingZeroBits returns the number of consecutive least significant zero
// bits of x.
func countTrailingZerosDeBruijn(x uint64) int {
// x & -x leaves only the right-most bit set in the word. Let k be the
// index of that bit. Since only a single bit is set, the value is two
// to the power of k. Multiplying by a power of two is equivalent to
// left shifting, in this case by k bits. The de Bruijn constant is
// such that all six bit, consecutive substrings are distinct.
// Therefore, if we have a left shifted version of this constant we can
// find by how many bits it was shifted by looking at which six bit
// substring ended up at the top of the word.
// (Knuth, volume 4, section 7.3.1)
if x == 0 {
// We have to special case 0; the fomula
// below doesn't work for 0.
return 64
}
return int(deBruijn64Lookup[((x&-x)*(deBruijn64))>>58])
}
8 changes: 0 additions & 8 deletions ctz_amd64.s

This file was deleted.

53 changes: 0 additions & 53 deletions ctz_generic.go

This file was deleted.

73 changes: 6 additions & 67 deletions ctz_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package roaring

import (
"encoding/binary"
"fmt"
"math/rand"
"testing"

Expand All @@ -11,14 +10,12 @@ import (

func TestCountTrailingZeros072(t *testing.T) {
Convey("countTrailingZeros", t, func() {
// undefined on older cpus, so skip this check on 0.
//So(countTrailingZerosAsm(0), ShouldEqual, 64)

So(countTrailingZerosAsm(8), ShouldEqual, 3)
So(countTrailingZerosAsm(7), ShouldEqual, 0)
So(countTrailingZerosAsm(1<<17), ShouldEqual, 17)
So(countTrailingZerosAsm(7<<17), ShouldEqual, 17)
So(countTrailingZerosAsm(255<<33), ShouldEqual, 33)
So(numberOfTrailingZeros(0), ShouldEqual, 64)
So(numberOfTrailingZeros(8), ShouldEqual, 3)
So(numberOfTrailingZeros(7), ShouldEqual, 0)
So(numberOfTrailingZeros(1<<17), ShouldEqual, 17)
So(numberOfTrailingZeros(7<<17), ShouldEqual, 17)
So(numberOfTrailingZeros(255<<33), ShouldEqual, 33)

So(countTrailingZerosDeBruijn(0), ShouldEqual, 64)
So(countTrailingZerosDeBruijn(8), ShouldEqual, 3)
Expand Down Expand Up @@ -82,21 +79,6 @@ func Benchmark100CountTrailingZerosDeBruijn(b *testing.B) {
}
}

func Benchmark100CountTrailingZerosAsm(b *testing.B) {
b.StopTimer()

r := getRandomUint64Set(64)
r = append(r, getAllOneBitUint64Set()...)

b.ResetTimer()
b.StartTimer()
for i := 0; i < b.N; i++ {
for i := range r {
countTrailingZerosAsm(r[i])
}
}
}

func numberOfTrailingZeros(i uint64) int {
if i == 0 {
return 64
Expand Down Expand Up @@ -130,46 +112,3 @@ func numberOfTrailingZeros(i uint64) int {
}
return int(n - int64(uint64(x<<1)>>63))
}

/*
//
// on an Intel(R) Core(TM) i7-5557U CPU @ 3.10GHz:
//
Benchmark100CountTrailingZerosDeBruijn-4 10000000 168 ns/op
Benchmark100CountTrailingZerosAsm-4 5000000 278 ns/op
Benchmark100OrigNumberOfTrailingZeros-4 3000000 592 ns/op
// and again:
Benchmark100CountTrailingZerosDeBruijn-4 10000000 168 ns/op
Benchmark100CountTrailingZerosAsm-4 5000000 278 ns/op
Benchmark100OrigNumberOfTrailingZeros-4 3000000 585 ns/op
*/
// go test -v -bench=100 -run 101
func Test101CountTrailingZerosCorrectness(t *testing.T) {
r := getAllOneBitUint64Set()
for i, v := range r {
a := countTrailingZerosDeBruijn(v)
b := countTrailingZerosAsm(v)
if a != b {
panic(fmt.Errorf("on r[%v]= v=%v, a: %v, b:%v", i, v, a, b))
}
}
// don't do zero checks, since the Asm version can be undefined
// for older architectures
/*
related Intel spec:
LZCNT is an extension of the BSR instruction. The key difference
between LZCNT and BSR is that LZCNT provides operand size as output
when source operand is zero, while in the case of BSR instruction,
if source operand is zero, the content of destination operand are
undefined. On processors that do not support LZCNT, the instruction
byte encoding is executed as BSR.
if countTrailingZerosAsm(0) != countTrailingZerosDeBruijn(0) {
panic("disagree on zero value")
}
*/

}
2 changes: 1 addition & 1 deletion serialization_generic.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func (b *bitmapContainer) readFrom(stream io.Reader) (int, error) {
func (bc *bitmapContainer) asLittleEndianByteSlice() []byte {
by := make([]byte, len(bc.bitmap)*8)
for i := range bc.bitmap {
binary.LittleEndian.PutUint64(buf[i*8:], bc.bitmap[i])
binary.LittleEndian.PutUint64(by[i*8:], bc.bitmap[i])
}
return by
}

0 comments on commit af992ed

Please sign in to comment.