Skip to content

Commit

Permalink
matchtree: regex uses substring iterator for start of search
Browse files Browse the repository at this point in the history
TODO this is not working for case insensitive search yet.

Co-authored-by: Erik Seliger <[email protected]>
  • Loading branch information
keegancsmith and eseliger committed Nov 25, 2021
1 parent 7ad19f6 commit 50ff219
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 1 deletion.
45 changes: 44 additions & 1 deletion matchtree.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ type regexpMatchTree struct {
reEvaluated bool
found []*candidateMatch

literalPrefix *substrMatchTree

// nextDoc, prepare.
bruteForceMatchTree
}
Expand Down Expand Up @@ -651,7 +653,32 @@ func (t *regexpMatchTree) matches(cp *contentProvider, cost int, known map[match
}

cp.stats.RegexpsConsidered++
idxs := t.regexp.FindAllIndex(cp.data(t.fileName), -1)

data := cp.data(t.fileName)

var idxs [][]int
if t.literalPrefix != nil {
t.literalPrefix.prepare(cp.idx)
idxs = make([][]int, 0, len(t.literalPrefix.current))
offset := 0
for _, literalCandidate := range t.literalPrefix.current {
if offset > int(literalCandidate.byteOffset) {
continue
}
offset = int(literalCandidate.byteOffset)
idx := t.regexp.FindIndex(data[offset:])
if idx == nil {
break
}
idx[0] += offset
idx[1] += offset
idxs = append(idxs, idx)
offset = idx[0] + 1
}
} else {
idxs = t.regexp.FindAllIndex(data, -1)
}

found := t.found[:0]
for _, idx := range idxs {
cm := &candidateMatch{
Expand Down Expand Up @@ -788,6 +815,22 @@ func (d *indexData) newMatchTree(q query.Q) (matchTree, error) {
fileName: s.FileName,
}

// TODO LiteralPrefix is always empty for case insensitive search. We want
// this optimization even in that case.
if prefix, _ := tr.regexp.LiteralPrefix(); len(prefix) >= ngramSize {
mt, err := d.newSubstringMatchTree(&query.Substring{
Pattern: prefix,
FileName: s.FileName,
CaseSensitive: s.CaseSensitive,
})
if err != nil {
return nil, err
}
if prefixMT, ok := mt.(*substrMatchTree); ok {
tr.literalPrefix = prefixMT
}
}

return &andMatchTree{
children: []matchTree{
tr, &noVisitMatchTree{subMT},
Expand Down
4 changes: 4 additions & 0 deletions web/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ func TestBasic(t *testing.T) {
"href=\"file-url#line",
"carry <b>water</b>",
},
"/search?q=water.in": {
"href=\"file-url#line",
"carry <b>water in</b>",
},
"/search?q=r:": {
"1234\">master",
"Found 1 repositories",
Expand Down

0 comments on commit 50ff219

Please sign in to comment.