forked from lobehub/lobe-chat
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* ⚡️ perf: 优化渲染长文本时的渲染性能 * 👷 build: fix vercel build * ⚡️ perf: 提高 smooth 每帧速度 * ✅ test: add test for tokenizer edge runtime * 💚 build: fix build * ♻️ refactor: refactor with webapi * 🚨 ci: improve lint * ✅ test: fix test * ⚡️ perf: try o200k_base
- Loading branch information
Showing
17 changed files
with
162 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// @vitest-environment edge-runtime | ||
import { describe, expect, it } from 'vitest'; | ||
|
||
import { POST } from './route'; | ||
|
||
describe('tokenizer Route', () => { | ||
it('count hello world', async () => { | ||
const txt = 'Hello, world!'; | ||
const request = new Request('https://test.com', { | ||
method: 'POST', | ||
body: txt, | ||
}); | ||
|
||
const response = await POST(request); | ||
|
||
const data = await response.json(); | ||
expect(data.count).toEqual(4); | ||
}); | ||
|
||
it('count Chinese', async () => { | ||
const txt = '今天天气真好'; | ||
const request = new Request('https://test.com', { | ||
method: 'POST', | ||
body: txt, | ||
}); | ||
|
||
const response = await POST(request); | ||
|
||
const data = await response.json(); | ||
expect(data.count).toEqual(5); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import { encode } from 'gpt-tokenizer/encoding/o200k_base'; | ||
import { NextResponse } from 'next/server'; | ||
|
||
export const POST = async (req: Request) => { | ||
const str = await req.text(); | ||
|
||
return NextResponse.json({ count: encode(str).length }); | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,32 @@ | ||
import { startTransition, useEffect, useState } from 'react'; | ||
import { debounce } from 'lodash-es'; | ||
import { startTransition, useCallback, useEffect, useState } from 'react'; | ||
|
||
import { encodeAsync } from '@/utils/tokenizer'; | ||
|
||
export const useTokenCount = (input: string = '') => { | ||
const [value, setNum] = useState(0); | ||
|
||
useEffect(() => { | ||
startTransition(() => { | ||
encodeAsync(input || '') | ||
const debouncedEncode = useCallback( | ||
debounce((text: string) => { | ||
encodeAsync(text) | ||
.then(setNum) | ||
.catch(() => { | ||
// 兜底采用字符数 | ||
setNum(input.length); | ||
setNum(text.length); | ||
}); | ||
}, 300), | ||
[], | ||
); | ||
|
||
useEffect(() => { | ||
startTransition(() => { | ||
debouncedEncode(input || ''); | ||
}); | ||
}, [input]); | ||
|
||
// 清理函数 | ||
return () => { | ||
debouncedEncode.cancel(); | ||
}; | ||
}, [input, debouncedEncode]); | ||
|
||
return value; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
declare module '*.worker.ts' { | ||
class WebpackWorker extends Worker { | ||
constructor(); | ||
} | ||
|
||
export default WebpackWorker; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
let worker: Worker | null = null; | ||
|
||
const getWorker = () => { | ||
if (!worker && typeof Worker !== 'undefined') { | ||
worker = new Worker(new URL('tokenizer.worker.ts', import.meta.url)); | ||
} | ||
return worker; | ||
}; | ||
|
||
export const clientEncodeAsync = (str: string): Promise<number> => | ||
new Promise((resolve, reject) => { | ||
const worker = getWorker(); | ||
|
||
if (!worker) { | ||
// 如果 WebWorker 不可用,回退到字符串计算 | ||
resolve(str.length); | ||
return; | ||
} | ||
|
||
const id = Date.now().toString(); | ||
|
||
const handleMessage = (event: MessageEvent) => { | ||
if (event.data.id === id) { | ||
worker.removeEventListener('message', handleMessage); | ||
if (event.data.error) { | ||
reject(new Error(event.data.error)); | ||
} else { | ||
resolve(event.data.result); | ||
} | ||
} | ||
}; | ||
|
||
worker.addEventListener('message', handleMessage); | ||
worker.postMessage({ id, str }); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
export const encodeAsync = async (str: string): Promise<number> => { | ||
if (str.length === 0) return 0; | ||
|
||
// 50_000 is the limit of the client | ||
// if the string is longer than 100_000, we will use the server | ||
if (str.length <= 50_000) { | ||
const { clientEncodeAsync } = await import('./client'); | ||
|
||
return await clientEncodeAsync(str); | ||
} else { | ||
const { serverEncodeAsync } = await import('./server'); | ||
|
||
return await serverEncodeAsync(str); | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
export const serverEncodeAsync = async (str: string): Promise<number> => { | ||
try { | ||
const res = await fetch('/webapi/tokenizer', { body: str, method: 'POST' }); | ||
const data = await res.json(); | ||
|
||
return data.count; | ||
} catch (e) { | ||
console.error('serverEncodeAsync:', e); | ||
return str.length; | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
addEventListener('message', async (event) => { | ||
const { id, str } = event.data; | ||
try { | ||
const { encode } = await import('gpt-tokenizer'); | ||
|
||
console.time('client tokenizer'); | ||
const tokenCount = encode(str).length; | ||
console.timeEnd('client tokenizer'); | ||
|
||
postMessage({ id, result: tokenCount }); | ||
} catch (error) { | ||
postMessage({ error: (error as Error).message, id }); | ||
} | ||
}); |