Skip to content

Commit ba4455a

Browse files
committed
pass tokens for bm25 in javascript
1 parent 86b627a commit ba4455a

File tree

1 file changed

+8
-5
lines changed
  • clients/new-js/packages/ai-embeddings/chroma-bm25/src

1 file changed

+8
-5
lines changed

clients/new-js/packages/ai-embeddings/chroma-bm25/src/index.ts

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -190,18 +190,20 @@ export class ChromaBm25EmbeddingFunction implements SparseEmbeddingFunction {
190190
}
191191

192192
private encode(text: string): SparseVector {
193-
const tokens = this.tokenizer.tokenize(text);
193+
const tokenList = this.tokenizer.tokenize(text);
194194

195-
if (tokens.length === 0) {
195+
if (tokenList.length === 0) {
196196
return { indices: [], values: [] };
197197
}
198198

199-
const docLen = tokens.length;
199+
const docLen = tokenList.length;
200200
const counts = new Map<number, number>();
201+
const tokenMap = new Map<number, string>();
201202

202-
for (const token of tokens) {
203+
for (const token of tokenList) {
203204
const tokenId = this.hasher.hash(token);
204205
counts.set(tokenId, (counts.get(tokenId) ?? 0) + 1);
206+
tokenMap.set(tokenId, token);
205207
}
206208

207209
const indices = Array.from(counts.keys()).sort((a, b) => a - b);
@@ -213,8 +215,9 @@ export class ChromaBm25EmbeddingFunction implements SparseEmbeddingFunction {
213215
(1 - this.b + (this.b * docLen) / this.avgDocLength);
214216
return (tf * (this.k + 1)) / denominator;
215217
});
218+
const tokens = indices.map((idx) => tokenMap.get(idx)!);
216219

217-
return { indices, values };
220+
return { indices, values, tokens };
218221
}
219222

220223
public async generate(texts: string[]): Promise<SparseVector[]> {

0 commit comments

Comments
 (0)