Skip to content
This repository has been archived by the owner on Aug 16, 2024. It is now read-only.

Commit

Permalink
search: support filtering by language
Browse files Browse the repository at this point in the history
Now if you prefix your search query with `go`, `golang`, `md`, `markdown`, `python`, `py`, etc.
it will only search results for that language.

Signed-off-by: Stephen Gutekanst <[email protected]>
  • Loading branch information
emidoots committed May 18, 2022
1 parent 27fd341 commit ca42dad
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 12 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ To get started see [docs/development.md](docs/development.md) and the [language
* Go, Python, Zig, and Markdown basic support
* Basic search navigation experience based on [experimental Sinter search filters](https://github.com/hexops/sinter/blob/c87e502f3cfd468d3d1263b7caf7cea94ff6d084/src/filter.zig#L18-L85)
* Searching globally across all projects, and within specific projects is now possible.
* Searching within a specific language is now supported (add "go", "python", "md" / "markdown" to front of your query string.)
* Markdown files now have headers and sub-headers indexed for search (e.g. `# About doctree > Installation` shows up in search)
* Basic Markdown frontmatter support.
* Initial [doctree schema format](https://github.com/sourcegraph/doctree/blob/main/doctree/schema/schema.go)
Expand Down
77 changes: 65 additions & 12 deletions doctree/indexer/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ func IndexForSearch(projectName, indexDataDir string, indexes map[string]*schema
}

func Search(ctx context.Context, indexDataDir, query, projectName string) ([]Result, error) {
query, language := parseQuery(query)

// TODO: could skip sinter filter indexes from projects without our desired language.
var indexes []string
if projectName == "" {
dir, err := ioutil.ReadDir(indexDataDir)
Expand All @@ -145,6 +148,16 @@ func Search(ctx context.Context, indexDataDir, query, projectName string) ([]Res
))
}

queryKey := strings.FieldsFunc(query, func(r rune) bool { return r == '.' || r == '/' || r == ' ' })
queryKeyHashes := []uint64{}
for _, part := range queryKey {
queryKeyHashes = append(queryKeyHashes, hash(part))
}
if len(queryKeyHashes) == 0 {
// TODO: make QueryLogicalOr handle empty keys set
queryKeyHashes = []uint64{hash(query)}
}

// TODO: return stats about search performance, etc.
// TODO: query limiting support
// TODO: support filtering to specific project
Expand All @@ -158,24 +171,14 @@ func Search(ctx context.Context, indexDataDir, query, projectName string) ([]Res
continue
}

queryKey := strings.FieldsFunc(query, func(r rune) bool { return r == '.' || r == '/' || r == ' ' })
queryKeyHashes := []uint64{}
for _, part := range queryKey {
queryKeyHashes = append(queryKeyHashes, hash(part))
}
if len(queryKeyHashes) == 0 {
// TODO: make QueryLogicalOr handle empty keys set
queryKeyHashes = []uint64{hash(query)}
}

results, err := sinterFilter.QueryLogicalOr(queryKeyHashes)
if err != nil {
log.Println("error searching", sinterFile, "QueryLogicalOr:", err)
continue
}
defer results.Deinit()

out = append(out, decodeResults(results, queryKey, rankedResultLimit-len(out))...)
out = append(out, decodeResults(results, queryKey, language, rankedResultLimit-len(out))...)
if len(out) >= rankedResultLimit {
break
}
Expand All @@ -187,6 +190,53 @@ func Search(ctx context.Context, indexDataDir, query, projectName string) ([]Res
return out, nil
}

var languageSearchTerms = map[string]schema.Language{
"cpp": schema.LanguageCpp,
"c++": schema.LanguageCpp,
"cxx": schema.LanguageCpp,
"go": schema.LanguageGo,
"golang": schema.LanguageGo,
"java": schema.LanguageJava,
"objc": schema.LanguageObjC,
"python": schema.LanguagePython,
"py": schema.LanguagePython,
"typescript": schema.LanguageTypeScript,
"ts": schema.LanguageTypeScript,
"zig": schema.LanguageZig,
"ziglang": schema.LanguageZig,
"markdown": schema.LanguageMarkdown,
"md": schema.LanguageMarkdown,
}

// Examples:
//
// "foo bar" -> ("foo bar", nil)
// "gofoo bar" -> ("gofoo bar", nil)
//
// "go foo bar" -> ("foo bar", schema.LanguageGo)
// "foo bar c++" -> ("foo bar", schema.LanguageCpp)
//
// "go foo bar java" -> ("foo bar java", schema.LanguageGo)
// " go foo bar" -> ("go foo bar", nil)
// "foo bar java " -> ("foo bar java", nil)
//
func parseQuery(query string) (realQuery string, language *schema.Language) {
// If the query starts with a known language term, we use that first.
for term, lang := range languageSearchTerms {
if strings.HasPrefix(query, term+" ") {
return strings.TrimPrefix(query, term+" "), &lang
}
}

// Secondarily, if the query ends with a known language term we use that.
for term, lang := range languageSearchTerms {
if strings.HasSuffix(query, " "+term) {
return strings.TrimSuffix(query, " "+term), &lang
}
}
return query, nil
}

type Result struct {
Language string `json:"language"`
ProjectName string `json:"projectName"`
Expand All @@ -204,7 +254,7 @@ type sinterResult struct {
Path string `json:"path"`
}

func decodeResults(results sinter.FilterResults, queryKey []string, limit int) []Result {
func decodeResults(results sinter.FilterResults, queryKey []string, language *schema.Language, limit int) []Result {
var out []Result
decoding:
for i := 0; i < results.Len(); i++ {
Expand All @@ -214,6 +264,9 @@ decoding:
panic("illegal sinter result value: " + err.Error())
}

if language != nil && result.Language != language.ID {
continue
}
for index, searchKey := range result.SearchKeys {
absoluteKey := append([]string{result.Language, result.ProjectName}, searchKey...)
score := match(queryKey, absoluteKey)
Expand Down

0 comments on commit ca42dad

Please sign in to comment.