diff --git a/CHANGELOG.md b/CHANGELOG.md index 733a6af..f3c7cd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) vNext ----- -In this release the main feature is query performance improvement (5x faster, 99% fewer memory allocations). There's also a new code example for semantic search across 5,000 arXiv papers. +In this release the main feature is query performance improvement (5x faster, 98% fewer memory allocations). There's also a new code example for semantic search across 5,000 arXiv papers. ### Added @@ -18,7 +18,7 @@ In this release the main feature is query performance improvement (5x faster, 99 ### Improved - Changed the example link target to directory instead of `main.go` file (PR [#43](https://github.com/philippgille/chromem-go/pull/43)) -- Improved query performance (5x faster, 99% fewer memory allocations) (PR [#47](https://github.com/philippgille/chromem-go/pull/47), [#53](https://github.com/philippgille/chromem-go/pull/53)) +- Improved query performance (5x faster, 98% fewer memory allocations) (PR [#47](https://github.com/philippgille/chromem-go/pull/47), [#53](https://github.com/philippgille/chromem-go/pull/53)) ### Fixed diff --git a/README.md b/README.md index 5ce3c49..5b817ef 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Because `chromem-go` is embeddable it enables you to add retrieval augmented gen It's *not* a library to connect to Chroma and also not a reimplementation of it in Go. It's a database on its own. -The focus is not scale (millions of documents) or number of features, but simplicity and performance for the most common use cases. On a mid-range 2020 Intel laptop CPU you can query 1,000 documents in 0.3 ms and 100,000 documents in 50-60 ms, both with just 41 memory allocations. See [Benchmarks](#benchmarks) for details. +The focus is not scale (millions of documents) or number of features, but simplicity and performance for the most common use cases. On a mid-range 2020 Intel laptop CPU you can query 1,000 documents in 0.3 ms and 100,000 documents in 40 ms, with very few and small memory allocations. See [Benchmarks](#benchmarks) for details. > ⚠️ The project is in beta, under heavy construction, and may introduce breaking changes in releases before `v1.0.0`. All changes are documented in the [`CHANGELOG`](./CHANGELOG.md). @@ -197,18 +197,18 @@ goos: linux goarch: amd64 pkg: github.com/philippgille/chromem-go cpu: 11th Gen Intel(R) Core(TM) i5-1135G7 @ 2.40GHz -BenchmarkCollection_Query_NoContent_100-8 10000 106441 ns/op 6393 B/op 41 allocs/op -BenchmarkCollection_Query_NoContent_1000-8 2278 494254 ns/op 35570 B/op 41 allocs/op -BenchmarkCollection_Query_NoContent_5000-8 416 2767125 ns/op 166634 B/op 41 allocs/op -BenchmarkCollection_Query_NoContent_25000-8 70 15165139 ns/op 813800 B/op 41 allocs/op -BenchmarkCollection_Query_NoContent_100000-8 19 58823464 ns/op 3205865 B/op 41 allocs/op -BenchmarkCollection_Query_100-8 11269 105990 ns/op 6385 B/op 41 allocs/op -BenchmarkCollection_Query_1000-8 2364 494212 ns/op 35574 B/op 41 allocs/op -BenchmarkCollection_Query_5000-8 481 2750438 ns/op 166647 B/op 41 allocs/op -BenchmarkCollection_Query_25000-8 93 13143419 ns/op 813805 B/op 41 allocs/op -BenchmarkCollection_Query_100000-8 20 51727357 ns/op 3205871 B/op 41 allocs/op +BenchmarkCollection_Query_NoContent_100-8 13164 90276 ns/op 5176 B/op 95 allocs/op +BenchmarkCollection_Query_NoContent_1000-8 2142 520261 ns/op 13558 B/op 141 allocs/op +BenchmarkCollection_Query_NoContent_5000-8 561 2150354 ns/op 47096 B/op 173 allocs/op +BenchmarkCollection_Query_NoContent_25000-8 120 9890177 ns/op 211783 B/op 208 allocs/op +BenchmarkCollection_Query_NoContent_100000-8 30 39574238 ns/op 810370 B/op 232 allocs/op +BenchmarkCollection_Query_100-8 13225 91058 ns/op 5177 B/op 95 allocs/op +BenchmarkCollection_Query_1000-8 2226 519693 ns/op 13552 B/op 140 allocs/op +BenchmarkCollection_Query_5000-8 550 2128121 ns/op 47108 B/op 173 allocs/op +BenchmarkCollection_Query_25000-8 100 10063260 ns/op 211705 B/op 205 allocs/op +BenchmarkCollection_Query_100000-8 30 39404005 ns/op 810295 B/op 229 allocs/op PASS -ok github.com/philippgille/chromem-go 26.187s +ok github.com/philippgille/chromem-go 28.402s ``` ## Motivation diff --git a/collection.go b/collection.go index b4186b0..52d8844 100644 --- a/collection.go +++ b/collection.go @@ -1,7 +1,6 @@ package chromem import ( - "cmp" "context" "errors" "fmt" @@ -323,30 +322,20 @@ func (c *Collection) QueryEmbedding(ctx context.Context, queryEmbedding []float3 return nil, nil } - // For the remaining documents, calculate cosine similarity. - docSims, err := calcDocSimilarity(ctx, queryEmbedding, filteredDocs) + // For the remaining documents, get the most similar docs. + nMaxDocs, err := getMostSimilarDocs(ctx, queryEmbedding, filteredDocs, nResults) if err != nil { - return nil, fmt.Errorf("couldn't calculate cosine similarity: %w", err) + return nil, fmt.Errorf("couldn't get most similar docs: %w", err) } - // Sort by similarity - slices.SortFunc(docSims, func(i, j docSim) int { - // i, j; for descending order - return cmp.Compare(j.similarity, i.similarity) - }) - - // Return the top nResults or len(docSim), whichever is smaller - if len(docSims) < nResults { - nResults = len(docSims) - } res := make([]Result, 0, nResults) for i := 0; i < nResults; i++ { res = append(res, Result{ - ID: docSims[i].docID, - Metadata: c.documents[docSims[i].docID].Metadata, - Embedding: c.documents[docSims[i].docID].Embedding, - Content: c.documents[docSims[i].docID].Content, - Similarity: docSims[i].similarity, + ID: nMaxDocs[i].docID, + Metadata: c.documents[nMaxDocs[i].docID].Metadata, + Embedding: c.documents[nMaxDocs[i].docID].Embedding, + Content: c.documents[nMaxDocs[i].docID].Content, + Similarity: nMaxDocs[i].similarity, }) } diff --git a/query.go b/query.go index 9da1be2..240060c 100644 --- a/query.go +++ b/query.go @@ -1,9 +1,12 @@ package chromem import ( + "cmp" + "container/heap" "context" "fmt" "runtime" + "slices" "strings" "sync" ) @@ -15,6 +18,70 @@ type docSim struct { similarity float32 } +// docMaxHeap is a max-heap of docSims, based on similarity. +// See https://pkg.go.dev/container/heap@go1.22#example-package-IntHeap +type docMaxHeap []docSim + +func (h docMaxHeap) Len() int { return len(h) } +func (h docMaxHeap) Less(i, j int) bool { return h[i].similarity < h[j].similarity } +func (h docMaxHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] } + +func (h *docMaxHeap) Push(x any) { + // Push and Pop use pointer receivers because they modify the slice's length, + // not just its contents. + *h = append(*h, x.(docSim)) +} + +func (h *docMaxHeap) Pop() any { + old := *h + n := len(old) + x := old[n-1] + *h = old[0 : n-1] + return x +} + +// maxDocSims manages a max-heap of docSims with a fixed size, keeping the n highest +// similarities. It's safe for concurrent use, but not the result of values(). +// In our benchmarks this was faster than sorting a slice of docSims at the end. +type maxDocSims struct { + h docMaxHeap + lock sync.RWMutex + size int +} + +// newMaxDocSims creates a new nMaxDocs with a fixed size. +func newMaxDocSims(size int) *maxDocSims { + return &maxDocSims{ + h: make(docMaxHeap, 0, size), + size: size, + } +} + +// add inserts a new docSim into the heap, keeping only the top n similarities. +func (mds *maxDocSims) add(doc docSim) { + mds.lock.Lock() + defer mds.lock.Unlock() + if mds.h.Len() < mds.size { + heap.Push(&mds.h, doc) + } else if mds.h.Len() > 0 && mds.h[0].similarity < doc.similarity { + // Replace the smallest similarity if the new doc's similarity is higher + heap.Pop(&mds.h) + heap.Push(&mds.h, doc) + } +} + +// values returns the docSims in the heap, sorted by similarity (descending). +// The call itself is safe for concurrent use with add(), but the result isn't. +// Only work with the result after all calls to add() have finished. +func (d *maxDocSims) values() []docSim { + d.lock.RLock() + defer d.lock.RUnlock() + slices.SortFunc(d.h, func(i, j docSim) int { + return cmp.Compare(j.similarity, i.similarity) + }) + return d.h +} + // filterDocs filters a map of documents by metadata and content. // It does this concurrently. func filterDocs(docs map[string]*Document, where, whereDocument map[string]string) []*Document { @@ -95,9 +162,8 @@ func documentMatchesFilters(document *Document, where, whereDocument map[string] return true } -func calcDocSimilarity(ctx context.Context, queryVectors []float32, docs []*Document) ([]docSim, error) { - similarities := make([]docSim, 0, len(docs)) - similaritiesLock := sync.Mutex{} +func getMostSimilarDocs(ctx context.Context, queryVectors []float32, docs []*Document, n int) ([]docSim, error) { + nMaxDocs := newMaxDocSims(n) // Determine concurrency. Use number of docs or CPUs, whichever is smaller. numCPUs := runtime.NumCPU() @@ -152,10 +218,7 @@ func calcDocSimilarity(ctx context.Context, queryVectors []float32, docs []*Docu return } - similaritiesLock.Lock() - // We don't defer the unlock because we want to unlock much earlier. - similarities = append(similarities, docSim{docID: doc.ID, similarity: sim}) - similaritiesLock.Unlock() + nMaxDocs.add(docSim{docID: doc.ID, similarity: sim}) } }(docs[start:end]) } @@ -166,5 +229,5 @@ func calcDocSimilarity(ctx context.Context, queryVectors []float32, docs []*Docu return nil, sharedErr } - return similarities, nil + return nMaxDocs.values(), nil }