Skip to content

Commit

Permalink
field cardinality temp save
Browse files Browse the repository at this point in the history
  • Loading branch information
Thejas-bhat committed Dec 6, 2024
1 parent 0abb3e6 commit 738dfe1
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 19 deletions.
14 changes: 10 additions & 4 deletions index/scorch/snapshot_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@ type asynchSegmentResult struct {
dict segment.TermDictionary
dictItr segment.DictionaryIterator

index int
docs *roaring.Bitmap
cardinality int
index int
docs *roaring.Bitmap

postings segment.PostingsList

Expand Down Expand Up @@ -146,6 +147,7 @@ func (is *IndexSnapshot) newIndexSnapshotFieldDict(field string,

results := make(chan *asynchSegmentResult)
var totalBytesRead uint64
var fieldCardinality int64
for _, s := range is.segment {
go func(s *SegmentSnapshot) {
dict, err := s.segment.Dictionary(field)
Expand All @@ -155,6 +157,8 @@ func (is *IndexSnapshot) newIndexSnapshotFieldDict(field string,
if dictStats, ok := dict.(segment.DiskStatsReporter); ok {
atomic.AddUint64(&totalBytesRead, dictStats.BytesRead())
}

atomic.AddInt64(&fieldCardinality, int64(dict.Cardinality()))
if randomLookup {
results <- &asynchSegmentResult{dict: dict}
} else {
Expand All @@ -166,9 +170,11 @@ func (is *IndexSnapshot) newIndexSnapshotFieldDict(field string,

var err error
rv := &IndexSnapshotFieldDict{
snapshot: is,
cursors: make([]*segmentDictCursor, 0, len(is.segment)),
snapshot: is,
cursors: make([]*segmentDictCursor, 0, len(is.segment)),
cardinality: int(fieldCardinality),
}

for count := 0; count < len(is.segment); count++ {
asr := <-results
if asr.err != nil && err == nil {
Expand Down
10 changes: 6 additions & 4 deletions index/scorch/snapshot_index_dict.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@ type segmentDictCursor struct {
}

type IndexSnapshotFieldDict struct {
snapshot *IndexSnapshot
cursors []*segmentDictCursor
entry index.DictEntry
bytesRead uint64
cardinality int
bytesRead uint64

snapshot *IndexSnapshot
cursors []*segmentDictCursor
entry index.DictEntry
}

func (i *IndexSnapshotFieldDict) BytesRead() uint64 {
Expand Down
2 changes: 1 addition & 1 deletion index_alias_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,7 @@ type preSearchFlags struct {
func preSearchRequired(req *SearchRequest, m mapping.IndexMapping) *preSearchFlags {
// Check for KNN query
knn := requestHasKNN(req)
var synonyms, bm25 bool
var bm25 bool
if !isMatchNoneQuery(req.Query) {
// todo fix this cuRRENTLY ALL INDEX mappings are BM25 mappings, need to fix
// this is just a placeholder.
Expand Down
5 changes: 5 additions & 0 deletions search.go
Original file line number Diff line number Diff line change
Expand Up @@ -592,3 +592,8 @@ func (r *SearchRequest) SortFunc() func(data sort.Interface) {

return sort.Sort
}

func isMatchNoneQuery(q query.Query) bool {
_, ok := q.(*query.MatchNoneQuery)
return ok
}
24 changes: 14 additions & 10 deletions search/searcher/search_term.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,22 @@ func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader, te

func newTermSearcherFromReader(ctx context.Context, indexReader index.IndexReader, reader index.TermFieldReader,
term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
count, ok := ctx.Value(search.BM25PreSearchDataKey).(uint64)
if !ok {
var err error
count, err = indexReader.DocCount()
if err != nil {
_ = reader.Close()
return nil, err
var count uint64
if ctx != nil {
ctxCount, ok := ctx.Value(search.BM25PreSearchDataKey).(uint64)
if !ok {
var err error
ctxCount, err = indexReader.DocCount()
if err != nil {
_ = reader.Close()
return nil, err
}
} else {
fmt.Printf("fetched from ctx \n")
}
} else {
fmt.Printf("fetched from ctx \n")
}
count = ctxCount

}
scorer := scorer.NewTermQueryScorer(term, field, boost, count, reader.Count(), options)
return &TermSearcher{
indexReader: indexReader,
Expand Down

0 comments on commit 738dfe1

Please sign in to comment.