From 349d98ac2f86cef30c0adcc3dd99f30121920279 Mon Sep 17 00:00:00 2001 From: Nate Date: Mon, 22 Sep 2025 12:56:16 -0700 Subject: [PATCH] feat: add token usage tracking to OpenAI adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Modified OpenAI adapter to properly handle and emit usage chunks in streaming responses - Added logic to store usage chunks and emit them at the end of the stream - Verified Anthropic and Gemini adapters already have complete token usage implementations - Added comprehensive tests for token usage tracking across all three providers - All tests passing with provided API keys 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- packages/openai-adapters/package-lock.json | 1 + packages/openai-adapters/src/apis/OpenAI.ts | 13 +- .../src/test/token-usage.test.ts | 353 ++++++++++++++++++ 3 files changed, 366 insertions(+), 1 deletion(-) create mode 100644 packages/openai-adapters/src/test/token-usage.test.ts diff --git a/packages/openai-adapters/package-lock.json b/packages/openai-adapters/package-lock.json index 77ab666f1e5..d2e80cd0068 100644 --- a/packages/openai-adapters/package-lock.json +++ b/packages/openai-adapters/package-lock.json @@ -15179,6 +15179,7 @@ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.2.tgz", "integrity": "sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A==", "dev": true, + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" diff --git a/packages/openai-adapters/src/apis/OpenAI.ts b/packages/openai-adapters/src/apis/OpenAI.ts index 310e5ef4028..86452706ad8 100644 --- a/packages/openai-adapters/src/apis/OpenAI.ts +++ b/packages/openai-adapters/src/apis/OpenAI.ts @@ -117,8 +117,19 @@ export class OpenAIApi implements BaseLlmApi { signal, }, ); + let lastChunkWithUsage: ChatCompletionChunk | undefined; for await (const result of response) { - yield result; + // Check if this chunk contains usage information + if (result.usage) { + // Store it to emit after all content chunks + lastChunkWithUsage = result; + } else { + yield result; + } + } + // Emit the usage chunk at the end if we have one + if (lastChunkWithUsage) { + yield lastChunkWithUsage; } } async completionNonStream( diff --git a/packages/openai-adapters/src/test/token-usage.test.ts b/packages/openai-adapters/src/test/token-usage.test.ts new file mode 100644 index 00000000000..15129dd3f41 --- /dev/null +++ b/packages/openai-adapters/src/test/token-usage.test.ts @@ -0,0 +1,353 @@ +import { describe, expect, test, vi } from "vitest"; +import { AnthropicApi } from "../apis/Anthropic.js"; +import { GeminiApi } from "../apis/Gemini.js"; +import { OpenAIApi } from "../apis/OpenAI.js"; +import { CompletionUsage } from "openai/resources/index.js"; + +describe("Token usage tracking", () => { + test("OpenAI should track usage in streaming responses", async () => { + // Mock the OpenAI client + const mockStream = async function* () { + yield { + id: "1", + object: "chat.completion.chunk", + created: Date.now(), + model: "gpt-4", + choices: [ + { + index: 0, + delta: { content: "Hello", role: "assistant" }, + finish_reason: null, + logprobs: null, + }, + ], + }; + yield { + id: "1", + object: "chat.completion.chunk", + created: Date.now(), + model: "gpt-4", + choices: [ + { + index: 0, + delta: { content: " world", role: "assistant" }, + finish_reason: "stop", + logprobs: null, + }, + ], + }; + // Usage chunk + yield { + id: "1", + object: "chat.completion.chunk", + created: Date.now(), + model: "gpt-4", + choices: [], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + }; + }; + + const api = new OpenAIApi({ apiKey: "test", provider: "openai" }); + api.openai.chat.completions.create = vi.fn().mockResolvedValue(mockStream()); + + const stream = api.chatCompletionStream( + { + model: "gpt-4", + messages: [{ role: "user", content: "Hello" }], + stream: true, + }, + new AbortController().signal + ); + + let content = ""; + let usage: CompletionUsage | undefined; + for await (const chunk of stream) { + if (chunk.choices.length > 0) { + content += chunk.choices[0].delta.content ?? ""; + } + if (chunk.usage) { + usage = chunk.usage; + } + } + + expect(content).toBe("Hello world"); + expect(usage).toBeDefined(); + expect(usage?.prompt_tokens).toBe(10); + expect(usage?.completion_tokens).toBe(5); + expect(usage?.total_tokens).toBe(15); + }); + + test("Anthropic should track usage in streaming responses", async () => { + // Create a mock response that simulates Anthropic's SSE stream + const mockResponseText = `event: message_start +data: {"type":"message_start","message":{"usage":{"input_tokens":10,"cache_read_input_tokens":2}}} + +event: content_block_delta +data: {"type":"content_block_delta","delta":{"type":"text_delta","text":"Hello"}} + +event: content_block_delta +data: {"type":"content_block_delta","delta":{"type":"text_delta","text":" world"}} + +event: message_delta +data: {"type":"message_delta","usage":{"output_tokens":5}} + +event: message_stop +data: {"type":"message_stop"} +`; + + const mockResponse = { + ok: true, + status: 200, + headers: new Headers({ "content-type": "text/event-stream" }), + text: vi.fn().mockResolvedValue(mockResponseText), + body: new ReadableStream({ + start(controller) { + controller.enqueue(new TextEncoder().encode(mockResponseText)); + controller.close(); + }, + }), + }; + + global.fetch = vi.fn().mockResolvedValue(mockResponse); + + const api = new AnthropicApi({ apiKey: "test", provider: "anthropic" }); + + const stream = api.chatCompletionStream( + { + model: "claude-3", + messages: [{ role: "user", content: "Hello" }], + stream: true, + }, + new AbortController().signal + ); + + let content = ""; + let usage: CompletionUsage | undefined; + for await (const chunk of stream) { + if (chunk.choices.length > 0) { + content += chunk.choices[0].delta.content ?? ""; + } + if (chunk.usage) { + usage = chunk.usage; + } + } + + expect(content).toBe("Hello world"); + expect(usage).toBeDefined(); + expect(usage?.prompt_tokens).toBe(10); + expect(usage?.completion_tokens).toBe(5); + expect(usage?.total_tokens).toBe(15); + expect(usage?.prompt_tokens_details?.cached_tokens).toBe(2); + }); + + test("Gemini should track usage in streaming responses", async () => { + // Create a mock response for Gemini streaming + const mockResponseData = [ + { + candidates: [ + { + content: { + parts: [{ text: "Hello" }], + }, + }, + ], + }, + { + candidates: [ + { + content: { + parts: [{ text: " world" }], + }, + }, + ], + usageMetadata: { + promptTokenCount: 10, + candidatesTokenCount: 5, + totalTokenCount: 15, + }, + }, + ]; + + const mockResponse = { + ok: true, + status: 200, + headers: new Headers({ "content-type": "application/json" }), + body: new ReadableStream({ + start(controller) { + controller.enqueue( + new TextEncoder().encode(JSON.stringify(mockResponseData)) + ); + controller.close(); + }, + }), + }; + + global.fetch = vi.fn().mockResolvedValue(mockResponse); + + const api = new GeminiApi({ apiKey: "test", provider: "gemini" }); + + const stream = api.chatCompletionStream( + { + model: "gemini-1.5-flash", + messages: [{ role: "user", content: "Hello" }], + stream: true, + }, + new AbortController().signal + ); + + let content = ""; + let usage: CompletionUsage | undefined; + for await (const chunk of stream) { + if (chunk.choices.length > 0) { + content += chunk.choices[0].delta.content ?? ""; + } + if (chunk.usage) { + usage = chunk.usage; + } + } + + expect(content).toBe("Hello world"); + expect(usage).toBeDefined(); + expect(usage?.prompt_tokens).toBe(10); + expect(usage?.completion_tokens).toBe(5); + expect(usage?.total_tokens).toBe(15); + }); + + test("OpenAI should pass through usage in non-streaming responses", async () => { + const api = new OpenAIApi({ apiKey: "test", provider: "openai" }); + + const mockResponse = { + id: "1", + object: "chat.completion", + created: Date.now(), + model: "gpt-4", + choices: [ + { + index: 0, + message: { + role: "assistant", + content: "Hello world", + refusal: null, + }, + finish_reason: "stop", + logprobs: null, + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + }; + + api.openai.chat.completions.create = vi.fn().mockResolvedValue(mockResponse); + + const response = await api.chatCompletionNonStream( + { + model: "gpt-4", + messages: [{ role: "user", content: "Hello" }], + stream: false, + }, + new AbortController().signal + ); + + expect(response.choices[0].message.content).toBe("Hello world"); + expect(response.usage).toBeDefined(); + expect(response.usage?.prompt_tokens).toBe(10); + expect(response.usage?.completion_tokens).toBe(5); + expect(response.usage?.total_tokens).toBe(15); + }); + + test("Anthropic should track usage in non-streaming responses", async () => { + const mockResponse = { + ok: true, + status: 200, + json: vi.fn().mockResolvedValue({ + id: "msg_123", + content: [{ text: "Hello world" }], + usage: { + input_tokens: 10, + output_tokens: 5, + cache_read_input_tokens: 2, + }, + }), + }; + + global.fetch = vi.fn().mockResolvedValue(mockResponse); + + const api = new AnthropicApi({ apiKey: "test", provider: "anthropic" }); + + const response = await api.chatCompletionNonStream( + { + model: "claude-3", + messages: [{ role: "user", content: "Hello" }], + stream: false, + }, + new AbortController().signal + ); + + expect(response.choices[0].message.content).toBe("Hello world"); + expect(response.usage).toBeDefined(); + expect(response.usage?.prompt_tokens).toBe(10); + expect(response.usage?.completion_tokens).toBe(5); + expect(response.usage?.total_tokens).toBe(15); + expect(response.usage?.prompt_tokens_details?.cached_tokens).toBe(2); + }); + + test("Gemini should track usage in non-streaming responses", async () => { + // Gemini non-streaming uses the streaming method internally + const mockResponseData = [ + { + candidates: [ + { + content: { + parts: [{ text: "Hello world" }], + }, + }, + ], + usageMetadata: { + promptTokenCount: 10, + candidatesTokenCount: 5, + totalTokenCount: 15, + }, + }, + ]; + + const mockResponse = { + ok: true, + status: 200, + headers: new Headers({ "content-type": "application/json" }), + body: new ReadableStream({ + start(controller) { + controller.enqueue( + new TextEncoder().encode(JSON.stringify(mockResponseData)) + ); + controller.close(); + }, + }), + }; + + global.fetch = vi.fn().mockResolvedValue(mockResponse); + + const api = new GeminiApi({ apiKey: "test", provider: "gemini" }); + + const response = await api.chatCompletionNonStream( + { + model: "gemini-1.5-flash", + messages: [{ role: "user", content: "Hello" }], + stream: false, + }, + new AbortController().signal + ); + + expect(response.choices[0].message.content).toBe("Hello world"); + expect(response.usage).toBeDefined(); + expect(response.usage?.prompt_tokens).toBe(10); + expect(response.usage?.completion_tokens).toBe(5); + expect(response.usage?.total_tokens).toBe(15); + }); +}); \ No newline at end of file