From b17ff49f042455dd2e1a1397fd79a9fa15cb137c Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Wed, 17 Feb 2021 15:00:45 -0500 Subject: [PATCH] switch to segment.Bitmap interface --- bitmap.go | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ dict.go | 8 +++-- merge.go | 8 +++-- posting.go | 13 +++++--- segment.go | 4 +-- 5 files changed, 113 insertions(+), 10 deletions(-) create mode 100644 bitmap.go diff --git a/bitmap.go b/bitmap.go new file mode 100644 index 00000000..3207907b --- /dev/null +++ b/bitmap.go @@ -0,0 +1,90 @@ +package zap + +import ( + "github.com/RoaringBitmap/roaring" + segment "github.com/blevesearch/scorch_segment_api/v2" + "io" +) + +type bitmap roaring.Bitmap + +func (*ZapPlugin) NewBitmap() segment.Bitmap { + return (*bitmap)(roaring.NewBitmap()) +} + +func (b *bitmap) Add(v uint32) { + (*roaring.Bitmap)(b).Add(v) +} + +func (b *bitmap) AddMany(dat []uint32) { + (*roaring.Bitmap)(b).AddMany(dat) +} + +func (b *bitmap) AddRange(rangeStart, rangeEnd uint64) { + (*roaring.Bitmap)(b).AddRange(rangeStart, rangeEnd) +} + +func (b *bitmap) And(other segment.Bitmap) { + (*roaring.Bitmap)(b).And((*roaring.Bitmap)(other.(*bitmap))) +} + +func (b *bitmap) AndNot(other segment.Bitmap) { + (*roaring.Bitmap)(b).AndNot((*roaring.Bitmap)(other.(*bitmap))) +} + +func (b *bitmap) Clone() segment.Bitmap{ + return (*bitmap)((*roaring.Bitmap)(b).Clone()) +} + +func (b *bitmap) Contains(v uint32) bool { + return (*roaring.Bitmap)(b).Contains(v) +} + +func (b *bitmap) GetCardinality() uint64 { + return (*roaring.Bitmap)(b).GetCardinality() +} + +func (b *bitmap) GetSizeInBytes() uint64 { + return (*roaring.Bitmap)(b).GetSizeInBytes() +} + +func (b *bitmap) IsEmpty() bool { + return (*roaring.Bitmap)(b).IsEmpty() +} + +func (b *bitmap) Iterator() segment.IntPeekable { + return (*roaring.Bitmap)(b).Iterator() +} + +func (b *bitmap) Or(other segment.Bitmap) { + (*roaring.Bitmap)(b).Or((*roaring.Bitmap)(other.(*bitmap))) +} + +func (b *bitmap) ReadFrom(reader io.Reader) (p int64, err error) { + return (*roaring.Bitmap)(b).ReadFrom(reader) +} + +func (b *bitmap) WriteTo(stream io.Writer) (int64, error) { + return (*roaring.Bitmap)(b).WriteTo(stream) +} + +func (b *bitmap) OrNew(other segment.Bitmap) segment.Bitmap { + return (*bitmap)(roaring.Or((*roaring.Bitmap)(b), (*roaring.Bitmap)(other.(*bitmap)))) +} + +func (b *bitmap) AndNew(other segment.Bitmap) segment.Bitmap { + return (*bitmap)(roaring.And((*roaring.Bitmap)(b), (*roaring.Bitmap)(other.(*bitmap)))) +} + +func (b *bitmap) AndNotNew(other segment.Bitmap) segment.Bitmap { + return (*bitmap)(roaring.AndNot((*roaring.Bitmap)(b), (*roaring.Bitmap)(other.(*bitmap)))) +} + +func (b *bitmap) HeapOrNew(bitmaps ...segment.Bitmap) segment.Bitmap { + arg := make([]*roaring.Bitmap, len(bitmaps)+1) + arg[0] = (*roaring.Bitmap)(b) + for i, bm := range bitmaps { + arg[i+1] = (*roaring.Bitmap)(bm.(*bitmap)) + } + return (*bitmap)(roaring.HeapOr(arg...)) +} \ No newline at end of file diff --git a/dict.go b/dict.go index e30bf242..2ee96af0 100644 --- a/dict.go +++ b/dict.go @@ -36,14 +36,18 @@ type Dictionary struct { var emptyDictionary = &Dictionary{} // PostingsList returns the postings list for the specified term -func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap, +func (d *Dictionary) PostingsList(term []byte, except segment.Bitmap, prealloc segment.PostingsList) (segment.PostingsList, error) { var preallocPL *PostingsList pl, ok := prealloc.(*PostingsList) if ok && pl != nil { preallocPL = pl } - return d.postingsList(term, except, preallocPL) + var rb *roaring.Bitmap + if except != nil { + rb = (*roaring.Bitmap)(except.(*bitmap)) + } + return d.postingsList(term, rb, preallocPL) } func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) { diff --git a/merge.go b/merge.go index 887d3447..f2ff086e 100644 --- a/merge.go +++ b/merge.go @@ -36,10 +36,11 @@ const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc // Merge takes a slice of segments and bit masks describing which // documents may be dropped, and creates a new segment containing the // remaining data. This new segment is built at the specified path. -func (*ZapPlugin) Merge(segments []seg.Segment, drops []*roaring.Bitmap, path string, +func (*ZapPlugin) Merge(segments []seg.Segment, drops []seg.Bitmap, path string, closeCh chan struct{}, s seg.StatsReporter) ( [][]uint64, uint64, error) { segmentBases := make([]*SegmentBase, len(segments)) + dropsRoaring := make([]*roaring.Bitmap, len(segments)) for segmenti, segment := range segments { switch segmentx := segment.(type) { case *Segment: @@ -49,8 +50,11 @@ func (*ZapPlugin) Merge(segments []seg.Segment, drops []*roaring.Bitmap, path st default: panic(fmt.Sprintf("oops, unexpected segment type: %T", segment)) } + if drops[segmenti] != nil { + dropsRoaring[segmenti] = (*roaring.Bitmap)(drops[segmenti].(*bitmap)) + } } - return mergeSegmentBases(segmentBases, drops, path, DefaultChunkMode, closeCh, s) + return mergeSegmentBases(segmentBases, dropsRoaring, path, DefaultChunkMode, closeCh, s) } func mergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, path string, diff --git a/posting.go b/posting.go index b1d19e53..1cf710b3 100644 --- a/posting.go +++ b/posting.go @@ -749,14 +749,19 @@ func (p *PostingsIterator) DocNum1Hit() (uint64, bool) { // ActualBitmap returns the underlying actual bitmap // which can be used up the stack for optimizations -func (p *PostingsIterator) ActualBitmap() *roaring.Bitmap { - return p.ActualBM +func (p *PostingsIterator) ActualBitmap() segment.Bitmap { + if p.ActualBM == nil { + // NOTE: this returns nil segment.Bitmap as opposed to a nil *bitmap + // allowing downstream == nil checks to work as expected + return nil + } + return (*bitmap)(p.ActualBM) } // ReplaceActual replaces the ActualBM with the provided // bitmap -func (p *PostingsIterator) ReplaceActual(abm *roaring.Bitmap) { - p.ActualBM = abm +func (p *PostingsIterator) ReplaceActual(abm segment.Bitmap) { + p.ActualBM = (*roaring.Bitmap)(abm.(*bitmap)) p.Actual = abm.Iterator() } diff --git a/segment.go b/segment.go index bc29f3f4..ed0b8a45 100644 --- a/segment.go +++ b/segment.go @@ -418,7 +418,7 @@ func (s *SegmentBase) Count() uint64 { // DocNumbers returns a bitset corresponding to the doc numbers of all the // provided _id strings -func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) { +func (s *SegmentBase) DocNumbers(ids []string) (segment.Bitmap, error) { rv := roaring.New() if len(s.fieldsMap) > 0 { @@ -450,7 +450,7 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) { } } - return rv, nil + return (*bitmap)(rv), nil } // Fields returns the field names used in this segment