diff --git a/arraycontainer.go b/arraycontainer.go index 621616f5..eb124f3b 100644 --- a/arraycontainer.go +++ b/arraycontainer.go @@ -24,6 +24,18 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin } } +func (ac *arrayContainer) iterate(cb func(x uint16) bool) bool { + iterator := shortIterator{ac.content, 0} + + for iterator.hasNext() { + if !cb(iterator.next()) { + return false + } + } + + return true +} + func (ac *arrayContainer) getShortIterator() shortPeekable { return &shortIterator{ac.content, 0} } diff --git a/benchmark_test.go b/benchmark_test.go index b90a5fe5..6693315a 100644 --- a/benchmark_test.go +++ b/benchmark_test.go @@ -363,23 +363,84 @@ func BenchmarkCountBitset(b *testing.B) { // go test -bench BenchmarkIterate -run - func BenchmarkIterateRoaring(b *testing.B) { - b.StopTimer() - r := rand.New(rand.NewSource(0)) - s := NewBitmap() - sz := 150000 - initsize := 65000 - for i := 0; i < initsize; i++ { - s.Add(uint32(r.Int31n(int32(sz)))) - } - b.StartTimer() - for j := 0; j < b.N; j++ { - c9 = uint(0) - i := s.Iterator() - for i.HasNext() { - i.Next() - c9++ + newBitmap := func() *Bitmap { + r := rand.New(rand.NewSource(0)) + s := NewBitmap() + sz := 150000 + initsize := 65000 + for i := 0; i < initsize; i++ { + s.Add(uint32(r.Int31n(int32(sz)))) } + return s } + + b.Run("iterator-compressed", func(b *testing.B) { + b.ReportAllocs() + + s := newBitmap() + s.RunOptimize() + + b.ResetTimer() + + for j := 0; j < b.N; j++ { + c9 = uint(0) + i := s.Iterator() + for i.HasNext() { + i.Next() + c9++ + } + } + }) + + b.Run("iterator", func(b *testing.B) { + b.ReportAllocs() + + s := newBitmap() + + b.ResetTimer() + + for j := 0; j < b.N; j++ { + c9 = uint(0) + i := s.Iterator() + for i.HasNext() { + i.Next() + c9++ + } + } + }) + + b.Run("iterate-compressed", func(b *testing.B) { + b.ReportAllocs() + + s := newBitmap() + s.RunOptimize() + + b.ResetTimer() + + for j := 0; j < b.N; j++ { + c9 = uint(0) + s.Iterate(func(x uint32) bool { + c9++ + return true + }) + } + }) + + b.Run("iterate", func(b *testing.B) { + b.ReportAllocs() + + s := newBitmap() + + b.ResetTimer() + + for j := 0; j < b.N; j++ { + c9 = uint(0) + s.Iterate(func(x uint32) bool { + c9++ + return true + }) + } + }) } // go test -bench BenchmarkSparseIterate -run - diff --git a/bitmapcontainer.go b/bitmapcontainer.go index e749721b..cd259fd2 100644 --- a/bitmapcontainer.go +++ b/bitmapcontainer.go @@ -96,6 +96,18 @@ func (bc *bitmapContainer) maximum() uint16 { return uint16(0) } +func (bc *bitmapContainer) iterate(cb func(x uint16) bool) bool { + iterator := bitmapContainerShortIterator{bc, bc.NextSetBit(0)} + + for iterator.hasNext() { + if !cb(iterator.next()) { + return false + } + } + + return true +} + type bitmapContainerShortIterator struct { ptr *bitmapContainer i int diff --git a/roaring.go b/roaring.go index 51724d67..ed75d58b 100644 --- a/roaring.go +++ b/roaring.go @@ -416,6 +416,38 @@ func (rb *Bitmap) String() string { return buffer.String() } +// Iterate iterates over the bitmap, calling the given callback with each value in the bitmap. If the callback returns +// false, the iteration is halted. +// The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove). +// There is no guarantee as to what order the values will be iterated +func (rb *Bitmap) Iterate(cb func(x uint32) bool) { + for i := 0; i < rb.highlowcontainer.size(); i++ { + hs := uint32(rb.highlowcontainer.getKeyAtIndex(i)) << 16 + c := rb.highlowcontainer.getContainerAtIndex(i) + + var shouldContinue bool + // This is hacky but it avoids allocations from invoking an interface method with a closure + switch t := c.(type) { + case *arrayContainer: + shouldContinue = t.iterate(func(x uint16) bool { + return cb(uint32(x) | hs) + }) + case *runContainer16: + shouldContinue = t.iterate(func(x uint16) bool { + return cb(uint32(x) | hs) + }) + case *bitmapContainer: + shouldContinue = t.iterate(func(x uint16) bool { + return cb(uint32(x) | hs) + }) + } + + if !shouldContinue { + break + } + } +} + // Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order; // the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove). func (rb *Bitmap) Iterator() IntPeekable { diff --git a/roaring_test.go b/roaring_test.go index 5a031a0d..bb0ece29 100644 --- a/roaring_test.go +++ b/roaring_test.go @@ -2308,3 +2308,79 @@ func TestBitmapFlipMaxRangeEnd(t *testing.T) { assert.EqualValues(t, MaxRange, bm.GetCardinality()) } + +func TestIterate(t *testing.T) { + rb := NewBitmap() + + for i := 0; i < 300; i++ { + rb.Add(uint32(i)) + } + + var values []uint32 + rb.Iterate(func(x uint32) bool { + values = append(values, x) + return true + }) + + assert.Equal(t, rb.ToArray(), values) +} + +func TestIterateCompressed(t *testing.T) { + rb := NewBitmap() + + for i := 0; i < 300; i++ { + rb.Add(uint32(i)) + } + + rb.RunOptimize() + + var values []uint32 + rb.Iterate(func(x uint32) bool { + values = append(values, x) + return true + }) + + assert.Equal(t, rb.ToArray(), values) +} + +func TestIterateLargeValues(t *testing.T) { + rb := NewBitmap() + + // This range of values ensures that all different types of containers will be used + for i := 150000; i < 450000; i++ { + rb.Add(uint32(i)) + } + + var values []uint32 + rb.Iterate(func(x uint32) bool { + values = append(values, x) + return true + }) + + assert.Equal(t, rb.ToArray(), values) +} + +func TestIterateHalt(t *testing.T) { + rb := NewBitmap() + + // This range of values ensures that all different types of containers will be used + for i := 150000; i < 450000; i++ { + rb.Add(uint32(i)) + } + + var values []uint32 + count := uint64(0) + stopAt := rb.GetCardinality() - 1 + rb.Iterate(func(x uint32) bool { + values = append(values, x) + count++ + if count == stopAt { + return false + } + return true + }) + + expected := rb.ToArray() + expected = expected[0 : len(expected)-1] + assert.Equal(t, expected, values) +} diff --git a/roaringarray.go b/roaringarray.go index 2f46fbb0..3dddbffd 100644 --- a/roaringarray.go +++ b/roaringarray.go @@ -4,9 +4,10 @@ import ( "bytes" "encoding/binary" "fmt" + "io" + snappy "github.com/glycerine/go-unsnap-stream" "github.com/tinylib/msgp/msgp" - "io" ) //go:generate msgp -unexported @@ -38,6 +39,7 @@ type container interface { inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx) xor(r container) container getShortIterator() shortPeekable + iterate(cb func(x uint16) bool) bool getReverseIterator() shortIterable getManyIterator() manyIterable contains(i uint16) bool diff --git a/runcontainer.go b/runcontainer.go index cbffdaf2..5a0f985f 100644 --- a/runcontainer.go +++ b/runcontainer.go @@ -1162,6 +1162,18 @@ func (rc *runContainer16) newRunIterator16() *runIterator16 { return &runIterator16{rc: rc, curIndex: 0, curPosInIndex: 0} } +func (rc *runContainer16) iterate(cb func(x uint16) bool) bool { + iterator := runIterator16{rc, 0, 0} + + for iterator.hasNext() { + if !cb(iterator.next()) { + return false + } + } + + return true +} + // hasNext returns false if calling next will panic. It // returns true when there is at least one more value // available in the iteration sequence.