Skip to content

Commit

Permalink
Merge pull request #431 from bearrito/feature/roaring64-validation
Browse files Browse the repository at this point in the history
Start roaring64 validation
  • Loading branch information
lemire authored Jun 24, 2024
2 parents 5aca967 + a483eb4 commit a15b8ab
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 13 deletions.
12 changes: 8 additions & 4 deletions roaring64/roaring64.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ import (
"github.com/RoaringBitmap/roaring/v2/internal"
)

const serialCookieNoRunContainer = 12346 // only arrays and bitmaps
const serialCookie = 12347 // runs, arrays, and bitmaps
const (
serialCookieNoRunContainer = 12346 // only arrays and bitmaps
serialCookie = 12347 // runs, arrays, and bitmaps
)

// Bitmap represents a compressed bitmap where you can add integers.
type Bitmap struct {
Expand All @@ -25,7 +27,6 @@ func (rb *Bitmap) ToBase64() (string, error) {
buf := new(bytes.Buffer)
_, err := rb.WriteTo(buf)
return base64.StdEncoding.EncodeToString(buf.Bytes()), err

}

// FromBase64 deserializes a bitmap from Base64
Expand All @@ -52,7 +53,6 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
// implementations (Java, Go, C++) and it has a specification :
// https://github.com/RoaringBitmap/RoaringFormatSpec#extention-for-64-bit-implementations
func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) {

var n int64
buf := make([]byte, 8)
binary.LittleEndian.PutUint64(buf, uint64(rb.highlowcontainer.size()))
Expand Down Expand Up @@ -1243,6 +1243,10 @@ func (rb *Bitmap) GetSerializedSizeInBytes() uint64 {
return rb.highlowcontainer.serializedSizeInBytes()
}

func (rb *Bitmap) Validate() error {
return rb.highlowcontainer.validate()
}

// Roaring32AsRoaring64 inserts a 32-bit roaring bitmap into
// a 64-bit roaring bitmap. No copy is made.
func Roaring32AsRoaring64(bm32 *roaring.Bitmap) *Bitmap {
Expand Down
77 changes: 72 additions & 5 deletions roaring64/roaring64_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ func TestRangeRemovalFromContent(t *testing.T) {
bm.RemoveRange(0, 30000)
c := bm.GetCardinality()

assert.EqualValues(t, 00, c)
assert.EqualValues(t, 0o0, c)
}

func TestFlipOnEmpty(t *testing.T) {
Expand Down Expand Up @@ -624,7 +624,6 @@ func TestBitmap(t *testing.T) {

assert.Equal(t, len(arrayres), len(arrayand))
assert.True(t, ok)

})

t.Run("Test AND 4", func(t *testing.T) {
Expand Down Expand Up @@ -1401,6 +1400,7 @@ func TestBitmap(t *testing.T) {
assert.True(t, valide)
})
}

func TestXORtest4(t *testing.T) {
t.Run("XORtest 4", func(t *testing.T) {
rb := NewBitmap()
Expand Down Expand Up @@ -1895,9 +1895,9 @@ func TestSerialization(t *testing.T) {
//assert.Nil(t, err)
//assert.True(t, bufBmp.Equals(bmp))

//var base64 string
//base64, err = bufBmp.ToBase64()
//assert.Nil(t, err)
// var base64 string
// base64, err = bufBmp.ToBase64()
// assert.Nil(t, err)

//base64Bmp := New()
//_, err = base64Bmp.FromBase64(base64)
Expand Down Expand Up @@ -1988,3 +1988,70 @@ func Test32As64(t *testing.T) {
assert.True(t, r32asr64.Equals(r64))
assert.True(t, r64.Equals(r32asr64))
}

func TestRoaringArray64Validation(t *testing.T) {
a := roaringArray64{}

assert.ErrorIs(t, a.validate(), ErrEmptyKeys)

a.keys = append(a.keys, uint32(3), uint32(1))
assert.ErrorIs(t, a.validate(), ErrKeySortOrder)
a.clear()

// build up cardinality coherent arrays
a.keys = append(a.keys, uint32(1), uint32(3), uint32(10))
assert.ErrorIs(t, a.validate(), ErrCardinalityConstraint)
a.containers = append(a.containers, roaring.NewBitmap(), roaring.NewBitmap(), roaring.NewBitmap())
assert.ErrorIs(t, a.validate(), ErrCardinalityConstraint)
a.needCopyOnWrite = append(a.needCopyOnWrite, true, false, true)
assert.Errorf(t, a.validate(), "zero intervals")
}

func TestBitMapValidation(t *testing.T) {
bm := NewBitmap()
bm.AddRange(0, 100)
bm.AddRange(306, 406)
bm.AddRange(102, 202)
bm.AddRange(204, 304)
assert.NoError(t, bm.Validate())

randomEntries := make([]uint64, 0, 1000)
for i := 0; i < 1000; i++ {
randomEntries = append(randomEntries, rand.Uint64())
}

bm.AddMany(randomEntries)
assert.NoError(t, bm.Validate())

randomEntries = make([]uint64, 0, 1000)
for i := 0; i < 1000; i++ {
randomEntries = append(randomEntries, uint64(i))
}
bm.AddMany(randomEntries)
assert.NoError(t, bm.Validate())
}

func TestRoaringArray64SortOrder(t *testing.T) {
t.Run("Empty", func(t *testing.T) {
a := roaringArray64{}
assert.True(t, a.checkKeysSorted())
})
t.Run("Empty", func(t *testing.T) {
a := roaringArray64{}
assert.True(t, a.checkKeysSorted())
})
t.Run("Cardinality 1", func(t *testing.T) {
bm := NewBitmap()
bm.Add(65)

assert.True(t, bm.highlowcontainer.checkKeysSorted())
})

t.Run("Many Entries", func(t *testing.T) {
bm := NewBitmap()
bm.AddRange(1, 129)
bm.AddRange(511, 2049)

assert.True(t, bm.highlowcontainer.checkKeysSorted())
})
}
55 changes: 54 additions & 1 deletion roaring64/roaringarray64.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package roaring64

import (
"errors"

"github.com/RoaringBitmap/roaring/v2"
)

Expand All @@ -11,6 +13,12 @@ type roaringArray64 struct {
copyOnWrite bool
}

var (
ErrEmptyKeys = errors.New("keys were empty")
ErrKeySortOrder = errors.New("keys were out of order")
ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
)

// runOptimize compresses the element containers to minimize space consumed.
// Q: how does this interact with copyOnWrite and needCopyOnWrite?
// A: since we aren't changing the logical content, just the representation,
Expand Down Expand Up @@ -140,7 +148,6 @@ func (ra *roaringArray64) clear() {
}

func (ra *roaringArray64) clone() *roaringArray64 {

sa := roaringArray64{}
sa.copyOnWrite = ra.copyOnWrite

Expand Down Expand Up @@ -401,3 +408,49 @@ func (ra *roaringArray64) serializedSizeInBytes() uint64 {
}
return answer
}

func (ra *roaringArray64) checkKeysSorted() bool {
if len(ra.keys) == 0 || len(ra.keys) == 1 {
return true
}
previous := ra.keys[0]
for nextIdx := 1; nextIdx < len(ra.keys); nextIdx++ {
next := ra.keys[nextIdx]
if previous >= next {
return false
}
previous = next

}
return true
}

// validate checks the referential integrity
// ensures len(keys) == len(containers), recurses and checks each container type
func (ra *roaringArray64) validate() error {
if len(ra.keys) == 0 {
return ErrEmptyKeys
}

if !ra.checkKeysSorted() {
return ErrKeySortOrder
}

if len(ra.keys) != len(ra.containers) {
return ErrCardinalityConstraint
}

if len(ra.keys) != len(ra.needCopyOnWrite) {
return ErrCardinalityConstraint
}

for _, maps := range ra.containers {

err := maps.Validate()
if err != nil {
return err
}
}

return nil
}
5 changes: 2 additions & 3 deletions roaring64/serialization_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func TestSerializationBasic037(t *testing.T) {
func TestSerializationToFile038(t *testing.T) {
rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000)
fname := "myfile.bin"
fout, err := os.OpenFile(fname, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0660)
fout, err := os.OpenFile(fname, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o660)
if err != nil {
fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n")
return
Expand Down Expand Up @@ -233,7 +233,6 @@ func benchmarkUnserializeFunc(b *testing.B, name string, f func(*Bitmap, []byte)
}

_, err := rb.WriteTo(buf)

if err != nil {
b.Fatalf("Unexpected error occurs: %v", err)
}
Expand Down Expand Up @@ -284,7 +283,7 @@ func Test_tryReadFromRoaring32WithRoaring64_File(t *testing.T) {
}

name := filepath.Join(tempDir, "r32")
if err := ioutil.WriteFile(name, bs, 0600); err != nil {
if err := ioutil.WriteFile(name, bs, 0o600); err != nil {
t.Fatal(err)
}
file, err := os.Open(name)
Expand Down

0 comments on commit a15b8ab

Please sign in to comment.