From ea0c687912b7e789c2e66e7728a2eeed9843a6b6 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 21 Oct 2024 11:26:18 +0200 Subject: [PATCH 01/57] debug code --- .../azure-openai/azure-openai-chat-client.ts | 39 +- .../azure-openai/azure-openai-line-decoder.ts | 112 +++++ .../azure-openai/azure-openai-streaming.ts | 398 ++++++++++++++++++ .../src/foundation-models/azure-openai.ts | 16 + sample-code/src/server.ts | 13 + 5 files changed, 577 insertions(+), 1 deletion(-) create mode 100644 packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts create mode 100644 packages/foundation-models/src/azure-openai/azure-openai-streaming.ts diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index bfa1f0b1..98bc79a7 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -7,6 +7,7 @@ import { import { apiVersion, type AzureOpenAiChatModel } from './model-types.js'; import { AzureOpenAiChatCompletionResponse } from './azure-openai-chat-completion-response.js'; import type { AzureOpenAiCreateChatCompletionRequest } from './client/inference/schema/index.js'; +import { Stream } from './azure-openai-streaming.js'; /** * Azure OpenAI client for chat completion. @@ -16,7 +17,7 @@ export class AzureOpenAiChatClient { * Creates an instance of the Azure OpenAI chat client. * @param modelDeployment - This configuration is used to retrieve a deployment. Depending on the configuration use either the given deployment ID or the model name to retrieve matching deployments. If model and deployment ID are given, the model is verified against the deployment. */ - constructor(private modelDeployment: ModelDeployment) {} + constructor(private modelDeployment: ModelDeployment) { } /** * Creates a completion for the chat messages. @@ -44,4 +45,40 @@ export class AzureOpenAiChatClient { ); return new AzureOpenAiChatCompletionResponse(response); } + + async runWithStream( + data: AzureOpenAiCreateChatCompletionRequest, + requestConfig?: CustomRequestConfig + ): Promise { + data = { ...data, stream: true }; + requestConfig = { ...requestConfig, responseType: 'stream' } as any; + + const deploymentId = await getDeploymentId( + this.modelDeployment, + 'azure-openai' + ); + const resourceGroup = getResourceGroup(this.modelDeployment); + const response = await executeRequest( + { + url: `/inference/deployments/${deploymentId}/chat/completions`, + apiVersion, + resourceGroup + }, + data, + requestConfig + ); + + // const reader = response.data.getReader(); + + // const result = await reader.read(); + // console.log(result.toString()); + + response.data.on('data', (data: any) => { + console.log(data.toString()); + }) + + // const stream = Stream.fromSSEResponse(response, new AbortController()); + + return; + } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts b/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts new file mode 100644 index 00000000..630bdc6f --- /dev/null +++ b/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts @@ -0,0 +1,112 @@ +type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; + +/** + * A re-implementation of httpx's `LineDecoder` in Python that handles incrementally + * reading lines from text. + * + * https://github.com/encode/httpx/blob/920333ea98118e9cf617f246905d7b202510941c/httpx/_decoders.py#L258 + */ +export class LineDecoder { + // prettier-ignore + static NEWLINE_CHARS = new Set(['\n', '\r']); + static NEWLINE_REGEXP = /\r\n|[\n\r]/g; + + buffer: string[]; + trailingCR: boolean; + textDecoder: any; // TextDecoder found in browsers; not typed to avoid pulling in either "dom" or "node" types. + + constructor() { + this.buffer = []; + this.trailingCR = false; + } + + decode(chunk: Bytes): string[] { + let text = this.decodeText(chunk); + + if (this.trailingCR) { + text = '\r' + text; + this.trailingCR = false; + } + if (text.endsWith('\r')) { + this.trailingCR = true; + text = text.slice(0, -1); + } + + if (!text) { + return []; + } + + const trailingNewline = LineDecoder.NEWLINE_CHARS.has(text[text.length - 1] || ''); + let lines = text.split(LineDecoder.NEWLINE_REGEXP); + + // if there is a trailing new line then the last entry will be an empty + // string which we don't care about + if (trailingNewline) { + lines.pop(); + } + + if (lines.length === 1 && !trailingNewline) { + this.buffer.push(lines[0]!); + return []; + } + + if (this.buffer.length > 0) { + lines = [this.buffer.join('') + lines[0], ...lines.slice(1)]; + this.buffer = []; + } + + if (!trailingNewline) { + this.buffer = [lines.pop() || '']; + } + + return lines; + } + + decodeText(bytes: Bytes): string { + if (bytes == null) return ''; + if (typeof bytes === 'string') return bytes; + + // Node: + if (typeof Buffer !== 'undefined') { + if (bytes instanceof Buffer) { + return bytes.toString(); + } + if (bytes instanceof Uint8Array) { + return Buffer.from(bytes).toString(); + } + + throw new Error( + `Unexpected: received non-Uint8Array (${bytes.constructor.name}) stream chunk in an environment with a global "Buffer" defined, which this library assumes to be Node. Please report this error.`, + ); + } + + // Browser + if (typeof TextDecoder !== 'undefined') { + if (bytes instanceof Uint8Array || bytes instanceof ArrayBuffer) { + this.textDecoder ??= new TextDecoder('utf8'); + return this.textDecoder.decode(bytes); + } + + throw new Error( + `Unexpected: received non-Uint8Array/ArrayBuffer (${ + (bytes as any).constructor.name + }) in a web platform. Please report this error.`, + ); + } + + throw new Error( + `Unexpected: neither Buffer nor TextDecoder are available as globals. Please report this error.`, + ); + } + + flush(): string[] { + if (!this.buffer.length && !this.trailingCR) { + return []; + } + + const lines = [this.buffer.join('')]; + this.buffer = []; + this.trailingCR = false; + return lines; + } +} \ No newline at end of file diff --git a/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts b/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts new file mode 100644 index 00000000..2e3c91e9 --- /dev/null +++ b/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts @@ -0,0 +1,398 @@ +import { HttpResponse } from '@sap-cloud-sdk/http-client'; +import { LineDecoder } from './azure-openai-line-decoder.js'; + +type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; + +export type ServerSentEvent = { + event: string | null; + data: string; + raw: string[]; +}; + +export class Stream implements AsyncIterable { + controller: AbortController; + + constructor( + private iterator: () => AsyncIterator, + controller: AbortController, + ) { + this.controller = controller; + } + + static fromSSEResponse(response: HttpResponse, controller: AbortController) { + let consumed = false; + + async function* iterator(): AsyncIterator { + if (consumed) { + throw new Error('Cannot iterate over a consumed stream, use `.tee()` to split the stream.'); + } + consumed = true; + let done = false; + try { + for await (const sse of _iterSSEMessages(response, controller)) { + if (done) continue; + + if (sse.data.startsWith('[DONE]')) { + done = true; + continue; + } + + if (sse.event === null) { + let data; + + try { + data = JSON.parse(sse.data); + } catch (e) { + console.error(`Could not parse message into JSON:`, sse.data); + console.error(`From chunk:`, sse.raw); + throw e; + } + + if (data && data.error) { + throw new Error(data.error); + } + + yield data; + } else { + let data; + try { + data = JSON.parse(sse.data); + } catch (e) { + console.error(`Could not parse message into JSON:`, sse.data); + console.error(`From chunk:`, sse.raw); + throw e; + } + // TODO: Is this where the error should be thrown? + if (sse.event == 'error') { + // throw new Error(data.error, data.message); + throw new Error(data.error); + } + yield { event: sse.event, data: data } as any; + } + } + done = true; + } catch (e) { + // If the user calls `stream.controller.abort()`, we should exit without throwing. + if (e instanceof Error && e.name === 'AbortError') return; + throw e; + } finally { + // If the user `break`s, abort the ongoing request. + if (!done) controller.abort(); + } + } + + return new Stream(iterator, controller); + } + + /** + * Generates a Stream from a newline-separated ReadableStream + * where each item is a JSON value. + */ + static fromReadableStream(readableStream: ReadableStream, controller: AbortController) { + let consumed = false; + + async function* iterLines(): AsyncGenerator { + const lineDecoder = new LineDecoder(); + + const iter = readableStreamAsyncIterable(readableStream); + for await (const chunk of iter) { + for (const line of lineDecoder.decode(chunk)) { + yield line; + } + } + + for (const line of lineDecoder.flush()) { + yield line; + } + } + + async function* iterator(): AsyncIterator { + if (consumed) { + throw new Error('Cannot iterate over a consumed stream, use `.tee()` to split the stream.'); + } + consumed = true; + let done = false; + try { + for await (const line of iterLines()) { + if (done) continue; + if (line) yield JSON.parse(line); + } + done = true; + } catch (e) { + // If the user calls `stream.controller.abort()`, we should exit without throwing. + if (e instanceof Error && e.name === 'AbortError') return; + throw e; + } finally { + // If the user `break`s, abort the ongoing request. + if (!done) controller.abort(); + } + } + + return new Stream(iterator, controller); + } + + [Symbol.asyncIterator](): AsyncIterator { + return this.iterator(); + } + + /** + * Splits the stream into two streams which can be + * independently read from at different speeds. + */ + tee(): [Stream, Stream] { + const left: Array>> = []; + const right: Array>> = []; + const iterator = this.iterator(); + + const teeIterator = (queue: Array>>): AsyncIterator => { + return { + next: () => { + if (queue.length === 0) { + const result = iterator.next(); + left.push(result); + right.push(result); + } + return queue.shift()!; + }, + }; + }; + + return [ + new Stream(() => teeIterator(left), this.controller), + new Stream(() => teeIterator(right), this.controller), + ]; + } + + /** + * Converts this stream to a newline-separated ReadableStream of + * JSON stringified values in the stream + * which can be turned back into a Stream with `Stream.fromReadableStream()`. + */ + toReadableStream(): ReadableStream { + const self = this; + let iter: AsyncIterator; + const encoder = new TextEncoder(); + + return new ReadableStream({ + async start() { + iter = self[Symbol.asyncIterator](); + }, + async pull(ctrl: any) { + try { + const { value, done } = await iter.next(); + if (done) return ctrl.close(); + + const bytes = encoder.encode(JSON.stringify(value) + '\n'); + + ctrl.enqueue(bytes); + } catch (err) { + ctrl.error(err); + } + }, + async cancel() { + await iter.return?.(); + }, + }); + } +} + +export async function* _iterSSEMessages( + response: HttpResponse, + controller: AbortController, +): AsyncGenerator { + if (!response.data) { + controller.abort(); + throw new Error(`Attempted to iterate over a response with no body`); + } + + const sseDecoder = new SSEDecoder(); + const lineDecoder = new LineDecoder(); + +// console.log(response.data); + + const iter = readableStreamAsyncIterable(response.data); + for await (const sseChunk of iterSSEChunks(iter)) { + for (const line of lineDecoder.decode(sseChunk)) { + const sse = sseDecoder.decode(line); + if (sse) yield sse; + } + } + + for (const line of lineDecoder.flush()) { + const sse = sseDecoder.decode(line); + if (sse) yield sse; + } +} + +/** + * Given an async iterable iterator, iterates over it and yields full + * SSE chunks, i.e. yields when a double new-line is encountered. + */ +async function* iterSSEChunks(iterator: AsyncIterableIterator): AsyncGenerator { + let data = new Uint8Array(); + + for await (const chunk of iterator) { + if (chunk == null) { + continue; + } + + const binaryChunk = + chunk instanceof ArrayBuffer ? new Uint8Array(chunk) + : typeof chunk === 'string' ? new TextEncoder().encode(chunk) + : chunk; + + let newData = new Uint8Array(data.length + binaryChunk.length); + newData.set(data); + newData.set(binaryChunk, data.length); + data = newData; + + let patternIndex; + while ((patternIndex = findDoubleNewlineIndex(data)) !== -1) { + yield data.slice(0, patternIndex); + data = data.slice(patternIndex); + } + } + + if (data.length > 0) { + yield data; + } +} + +function findDoubleNewlineIndex(buffer: Uint8Array): number { + // This function searches the buffer for the end patterns (\r\r, \n\n, \r\n\r\n) + // and returns the index right after the first occurrence of any pattern, + // or -1 if none of the patterns are found. + const newline = 0x0a; // \n + const carriage = 0x0d; // \r + + for (let i = 0; i < buffer.length - 2; i++) { + if (buffer[i] === newline && buffer[i + 1] === newline) { + // \n\n + return i + 2; + } + if (buffer[i] === carriage && buffer[i + 1] === carriage) { + // \r\r + return i + 2; + } + if ( + buffer[i] === carriage && + buffer[i + 1] === newline && + i + 3 < buffer.length && + buffer[i + 2] === carriage && + buffer[i + 3] === newline + ) { + // \r\n\r\n + return i + 4; + } + } + + return -1; +} + +class SSEDecoder { + private data: string[]; + private event: string | null; + private chunks: string[]; + + constructor() { + this.event = null; + this.data = []; + this.chunks = []; + } + + decode(line: string) { + if (line.endsWith('\r')) { + line = line.substring(0, line.length - 1); + } + + if (!line) { + // empty line and we didn't previously encounter any messages + if (!this.event && !this.data.length) return null; + + const sse: ServerSentEvent = { + event: this.event, + data: this.data.join('\n'), + raw: this.chunks, + }; + + this.event = null; + this.data = []; + this.chunks = []; + + return sse; + } + + this.chunks.push(line); + + if (line.startsWith(':')) { + return null; + } + + let [fieldname, _, value] = partition(line, ':'); + + if (value.startsWith(' ')) { + value = value.substring(1); + } + + if (fieldname === 'event') { + this.event = value; + } else if (fieldname === 'data') { + this.data.push(value); + } + + return null; + } +} + +/** This is an internal helper function that's just used for testing */ +export function _decodeChunks(chunks: string[]): string[] { + const decoder = new LineDecoder(); + const lines: string[] = []; + for (const chunk of chunks) { + lines.push(...decoder.decode(chunk)); + } + + return lines; +} + +function partition(str: string, delimiter: string): [string, string, string] { + const index = str.indexOf(delimiter); + if (index !== -1) { + return [str.substring(0, index), delimiter, str.substring(index + delimiter.length)]; + } + + return [str, '', '']; +} + +/** + * Most browsers don't yet have async iterable support for ReadableStream, + * and Node has a very different way of reading bytes from its "ReadableStream". + * + * This polyfill was pulled from https://github.com/MattiasBuelens/web-streams-polyfill/pull/122#issuecomment-1627354490 + */ +export function readableStreamAsyncIterable(stream: any): AsyncIterableIterator { + if (stream[Symbol.asyncIterator]) return stream; + + const reader = stream.getReader(); + return { + async next() { + try { + const result = await reader.read(); + if (result?.done) reader.releaseLock(); // release lock when stream becomes closed + return result; + } catch (e) { + reader.releaseLock(); // release lock when stream becomes errored + throw e; + } + }, + async return() { + const cancelPromise = reader.cancel(); + reader.releaseLock(); + await cancelPromise; + return { done: true, value: undefined }; + }, + [Symbol.asyncIterator]() { + return this; + }, + }; +} diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index bba43409..81cc45ae 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -44,3 +44,19 @@ export async function computeEmbedding(): Promise return response; } + + +export async function chatCompletionWithStream(): Promise { + const stream = await new AzureOpenAiChatClient('gpt-35-turbo').runWithStream({ + messages: [{ role: 'user', content: 'What is the capital of France?' }] + }); + + // Use getContent() to access the content responded by LLM. + // logger.info(response.getContent()); + +// for await (const chunk of stream) { +// logger.info(chunk.choices[0]?.delta?.content || ""); +// } + + // return response; +} diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 6307bb30..60834b3f 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -2,6 +2,7 @@ import express from 'express'; import { chatCompletion, + chatCompletionWithStream, computeEmbedding // eslint-disable-next-line import/no-internal-modules } from './foundation-models/azure-openai.js'; @@ -39,6 +40,18 @@ app.get('/azure-openai/chat-completion', async (req, res) => { } }); +app.get('/azure-openai/chat-completion-stream', async (req, res)=> { + try { + await chatCompletionWithStream(); + } catch (error: any) { + console.error(error); + res + .status(500) + .send('Yikes, vibes are off apparently 😬 -> ' + error.message); + } +}); + + app.get('/azure-openai/embedding', async (req, res) => { try { const response = await computeEmbedding(); From 5d0985f3f510c7fb050be7480dcfb2b586cf4094 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Tue, 22 Oct 2024 11:24:40 +0200 Subject: [PATCH 02/57] Make streaming work --- .../azure-openai/azure-openai-chat-client.ts | 52 +++++++------------ .../azure-openai/azure-openai-streaming.ts | 37 +------------ .../src/foundation-models/azure-openai.ts | 18 +++---- sample-code/src/server.ts | 4 +- 4 files changed, 30 insertions(+), 81 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index 98bc79a7..34454250 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -8,6 +8,7 @@ import { apiVersion, type AzureOpenAiChatModel } from './model-types.js'; import { AzureOpenAiChatCompletionResponse } from './azure-openai-chat-completion-response.js'; import type { AzureOpenAiCreateChatCompletionRequest } from './client/inference/schema/index.js'; import { Stream } from './azure-openai-streaming.js'; +import { HttpResponse } from '@sap-cloud-sdk/http-client'; /** * Azure OpenAI client for chat completion. @@ -29,36 +30,36 @@ export class AzureOpenAiChatClient { data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig ): Promise { - const deploymentId = await getDeploymentId( - this.modelDeployment, - 'azure-openai' - ); - const resourceGroup = getResourceGroup(this.modelDeployment); - const response = await executeRequest( - { - url: `/inference/deployments/${deploymentId}/chat/completions`, - apiVersion, - resourceGroup - }, - data, - requestConfig - ); + const response = this._executeRequest(data, requestConfig); return new AzureOpenAiChatCompletionResponse(response); } - async runWithStream( + async stream( data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig - ): Promise { - data = { ...data, stream: true }; - requestConfig = { ...requestConfig, responseType: 'stream' } as any; + ): Promise> { + // TODO: The return type `any` should actually be the type of the stream response. + // But `createChatCompletionStreamResponse` is first available in Azure OpenAI spec preview version 2024-08-01. + const response = this._executeRequest({ + ...data, + stream: true + }, { + ...requestConfig, + responseType: 'stream' + }); + return Stream.fromSSEResponse(response, new AbortController()); + } + private async _executeRequest( + data: AzureOpenAiCreateChatCompletionRequest, + requestConfig?: CustomRequestConfig + ): Promise { const deploymentId = await getDeploymentId( this.modelDeployment, 'azure-openai' ); const resourceGroup = getResourceGroup(this.modelDeployment); - const response = await executeRequest( + return await executeRequest( { url: `/inference/deployments/${deploymentId}/chat/completions`, apiVersion, @@ -67,18 +68,5 @@ export class AzureOpenAiChatClient { data, requestConfig ); - - // const reader = response.data.getReader(); - - // const result = await reader.read(); - // console.log(result.toString()); - - response.data.on('data', (data: any) => { - console.log(data.toString()); - }) - - // const stream = Stream.fromSSEResponse(response, new AbortController()); - - return; } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts b/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts index 2e3c91e9..7735ff5c 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts @@ -94,7 +94,7 @@ export class Stream implements AsyncIterable { async function* iterLines(): AsyncGenerator { const lineDecoder = new LineDecoder(); - const iter = readableStreamAsyncIterable(readableStream); + const iter = readableStream; for await (const chunk of iter) { for (const line of lineDecoder.decode(chunk)) { yield line; @@ -210,7 +210,7 @@ export async function* _iterSSEMessages( // console.log(response.data); - const iter = readableStreamAsyncIterable(response.data); + const iter = response.data; for await (const sseChunk of iterSSEChunks(iter)) { for (const line of lineDecoder.decode(sseChunk)) { const sse = sseDecoder.decode(line); @@ -363,36 +363,3 @@ function partition(str: string, delimiter: string): [string, string, string] { return [str, '', '']; } - -/** - * Most browsers don't yet have async iterable support for ReadableStream, - * and Node has a very different way of reading bytes from its "ReadableStream". - * - * This polyfill was pulled from https://github.com/MattiasBuelens/web-streams-polyfill/pull/122#issuecomment-1627354490 - */ -export function readableStreamAsyncIterable(stream: any): AsyncIterableIterator { - if (stream[Symbol.asyncIterator]) return stream; - - const reader = stream.getReader(); - return { - async next() { - try { - const result = await reader.read(); - if (result?.done) reader.releaseLock(); // release lock when stream becomes closed - return result; - } catch (e) { - reader.releaseLock(); // release lock when stream becomes errored - throw e; - } - }, - async return() { - const cancelPromise = reader.cancel(); - reader.releaseLock(); - await cancelPromise; - return { done: true, value: undefined }; - }, - [Symbol.asyncIterator]() { - return this; - }, - }; -} diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index 81cc45ae..0953cccc 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -46,17 +46,11 @@ export async function computeEmbedding(): Promise } -export async function chatCompletionWithStream(): Promise { - const stream = await new AzureOpenAiChatClient('gpt-35-turbo').runWithStream({ - messages: [{ role: 'user', content: 'What is the capital of France?' }] +export async function chatCompletionStream(): Promise { + const stream = await new AzureOpenAiChatClient('gpt-35-turbo').stream({ + messages: [{ role: 'user', content: 'What is the capital of France?' }], }); - - // Use getContent() to access the content responded by LLM. - // logger.info(response.getContent()); - -// for await (const chunk of stream) { -// logger.info(chunk.choices[0]?.delta?.content || ""); -// } - - // return response; + for await (const chunk of stream) { + logger.info(JSON.stringify(chunk)); +} } diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 60834b3f..35648af3 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -2,7 +2,7 @@ import express from 'express'; import { chatCompletion, - chatCompletionWithStream, + chatCompletionStream, computeEmbedding // eslint-disable-next-line import/no-internal-modules } from './foundation-models/azure-openai.js'; @@ -42,7 +42,7 @@ app.get('/azure-openai/chat-completion', async (req, res) => { app.get('/azure-openai/chat-completion-stream', async (req, res)=> { try { - await chatCompletionWithStream(); + await chatCompletionStream(); } catch (error: any) { console.error(error); res From a74cf6bebf125184d00bacb8f203ed4b121f7096 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Tue, 22 Oct 2024 12:02:26 +0200 Subject: [PATCH 03/57] fix: remove await --- .../src/azure-openai/azure-openai-chat-client.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index 34454250..85f25429 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -46,7 +46,7 @@ export class AzureOpenAiChatClient { }, { ...requestConfig, responseType: 'stream' - }); + } as any); return Stream.fromSSEResponse(response, new AbortController()); } @@ -59,7 +59,7 @@ export class AzureOpenAiChatClient { 'azure-openai' ); const resourceGroup = getResourceGroup(this.modelDeployment); - return await executeRequest( + return executeRequest( { url: `/inference/deployments/${deploymentId}/chat/completions`, apiVersion, From 90254519aca7d535be6d8e98550a200f4dc5f92d Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Tue, 22 Oct 2024 12:05:05 +0200 Subject: [PATCH 04/57] fix: await again --- .../src/azure-openai/azure-openai-chat-client.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index 85f25429..849cb6f3 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -30,7 +30,7 @@ export class AzureOpenAiChatClient { data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig ): Promise { - const response = this._executeRequest(data, requestConfig); + const response = await this._executeRequest(data, requestConfig); return new AzureOpenAiChatCompletionResponse(response); } @@ -40,7 +40,7 @@ export class AzureOpenAiChatClient { ): Promise> { // TODO: The return type `any` should actually be the type of the stream response. // But `createChatCompletionStreamResponse` is first available in Azure OpenAI spec preview version 2024-08-01. - const response = this._executeRequest({ + const response = await this._executeRequest({ ...data, stream: true }, { From fc12de05b2f53ce6e73a12245d2291ff35e8fd00 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 23 Oct 2024 10:52:59 +0200 Subject: [PATCH 05/57] small changes --- .../src/azure-openai/azure-openai-chat-client.ts | 2 +- sample-code/src/foundation-models/azure-openai.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index 849cb6f3..1323aca6 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -46,7 +46,7 @@ export class AzureOpenAiChatClient { }, { ...requestConfig, responseType: 'stream' - } as any); + }); return Stream.fromSSEResponse(response, new AbortController()); } diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index 0953cccc..ca663c89 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -52,5 +52,5 @@ export async function chatCompletionStream(): Promise { }); for await (const chunk of stream) { logger.info(JSON.stringify(chunk)); -} + } } From d5d38bd55fe6e68cd60e4eeeb31c10b46e48e18c Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 23 Oct 2024 10:59:58 +0200 Subject: [PATCH 06/57] chore: add missing javadoc --- .../src/azure-openai/azure-openai-chat-client.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index 1323aca6..4317e884 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -6,9 +6,9 @@ import { } from '@sap-ai-sdk/ai-api/internal.js'; import { apiVersion, type AzureOpenAiChatModel } from './model-types.js'; import { AzureOpenAiChatCompletionResponse } from './azure-openai-chat-completion-response.js'; -import type { AzureOpenAiCreateChatCompletionRequest } from './client/inference/schema/index.js'; import { Stream } from './azure-openai-streaming.js'; -import { HttpResponse } from '@sap-cloud-sdk/http-client'; +import type { HttpResponse } from '@sap-cloud-sdk/http-client'; +import type { AzureOpenAiCreateChatCompletionRequest } from './client/inference/schema/index.js'; /** * Azure OpenAI client for chat completion. @@ -34,6 +34,12 @@ export class AzureOpenAiChatClient { return new AzureOpenAiChatCompletionResponse(response); } + /** + * Creates a completion stream for the chat messages. + * @param data - The input parameters for the chat completion. + * @param requestConfig - The request configuration. + * @returns The completion stream. + */ async stream( data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig From 07dda3584227dc1739ebb9a90fa613208f37c7de Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 23 Oct 2024 14:02:32 +0200 Subject: [PATCH 07/57] wip --- .../azure-openai/azure-openai-chat-client.ts | 20 +++++++- ...-openai-chat-completion-stream-response.ts | 48 +++++++++++++++++++ .../azure-openai/azure-openai-streaming.ts | 3 +- .../src/foundation-models/azure-openai.ts | 15 ++++-- 4 files changed, 79 insertions(+), 7 deletions(-) create mode 100644 packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index 4317e884..fb9f6f45 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -48,7 +48,10 @@ export class AzureOpenAiChatClient { // But `createChatCompletionStreamResponse` is first available in Azure OpenAI spec preview version 2024-08-01. const response = await this._executeRequest({ ...data, - stream: true + stream: true, + stream_options: { + include_usage: true + } }, { ...requestConfig, responseType: 'stream' @@ -56,6 +59,21 @@ export class AzureOpenAiChatClient { return Stream.fromSSEResponse(response, new AbortController()); } + async * processStream(stream: Stream): AsyncIterator { + for await (const chunk of stream) { + // Process each item here + yield chunk.getDeltaContent(); + } + } + + async streamString( + data: AzureOpenAiCreateChatCompletionRequest, + requestConfig?: CustomRequestConfig + ): Promise> { + const originalStream = this.stream(data, requestConfig); + return new Stream(this.processStream, new AbortController()); + } + private async _executeRequest( data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts new file mode 100644 index 00000000..c10f6c41 --- /dev/null +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts @@ -0,0 +1,48 @@ +import { createLogger } from '@sap-cloud-sdk/util'; +import type { HttpResponse } from '@sap-cloud-sdk/http-client'; +import type { AzureOpenAiCreateChatCompletionResponse } from './client/inference/schema/index.js'; + +const logger = createLogger({ + package: 'foundation-models', + messageContext: 'azure-openai-chat-completion-stream-response' +}); + +/** + * Azure OpenAI chat completion stream response. + */ +export class AzureOpenAiChatCompletionStreamResponse { + /** + * The chat completion stream response. + */ + constructor(public readonly data: any) { + this.data = data; + } + + /** + * Reason for stopping the completion stream. + * @param choiceIndex - The index of the choice to parse. + * @returns The finish reason. + */ + getFinishReason( + choiceIndex = 0 + ): this['data']['choices'][0]['finish_reason'] { + this.logInvalidChoiceIndex(choiceIndex); + return this.data.choices[choiceIndex]?.finish_reason; + } + + /** + * Parses the Azure OpenAI response and returns the delta content. + * @param choiceIndex - The index of the choice to parse. + * @returns The message delta content. + */ + getDeltaContent(choiceIndex = 0): string | undefined | null { + this.logInvalidChoiceIndex(choiceIndex); + return this.data.choices[choiceIndex]?.delta?.content; + } + + private logInvalidChoiceIndex(choiceIndex: number): void { + if (choiceIndex < 0 || choiceIndex >= this.data.choices.length) { + logger.error(`Choice index ${choiceIndex} is out of bounds.`); + } + } +} diff --git a/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts b/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts index 7735ff5c..097af945 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts @@ -1,5 +1,6 @@ import { HttpResponse } from '@sap-cloud-sdk/http-client'; import { LineDecoder } from './azure-openai-line-decoder.js'; +import { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; @@ -52,7 +53,7 @@ export class Stream implements AsyncIterable { throw new Error(data.error); } - yield data; + yield new AzureOpenAiChatCompletionStreamResponse(data) as any; } else { let data; try { diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index ca663c89..b8ccba93 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -47,10 +47,15 @@ export async function computeEmbedding(): Promise export async function chatCompletionStream(): Promise { - const stream = await new AzureOpenAiChatClient('gpt-35-turbo').stream({ - messages: [{ role: 'user', content: 'What is the capital of France?' }], - }); - for await (const chunk of stream) { - logger.info(JSON.stringify(chunk)); + try { + const stream = await new AzureOpenAiChatClient('gpt-35-turbo').stream({ + messages: [{ role: 'user', content: 'What is the capital of France?' }], + }); + for await (const chunk of stream) { + logger.info(JSON.stringify(chunk)); + } + } catch (error: any) { + console.log(JSON.stringify(error.response.data.message)); } + } From c21821181c838b652086dd720612b88a1e738ec8 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 23 Oct 2024 17:55:04 +0200 Subject: [PATCH 08/57] feat: pipe streams --- .../azure-openai/azure-openai-chat-client.ts | 90 ++++++++++++++++--- ...i-chat-completion-stream-chunk-response.ts | 43 +++++++++ ...-openai-chat-completion-stream-response.ts | 39 ++------ .../azure-openai/azure-openai-streaming.ts | 4 +- .../src/foundation-models/azure-openai.ts | 35 ++++---- sample-code/src/server.ts | 3 +- 6 files changed, 149 insertions(+), 65 deletions(-) create mode 100644 packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index fb9f6f45..16147d5c 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -9,6 +9,14 @@ import { AzureOpenAiChatCompletionResponse } from './azure-openai-chat-completio import { Stream } from './azure-openai-streaming.js'; import type { HttpResponse } from '@sap-cloud-sdk/http-client'; import type { AzureOpenAiCreateChatCompletionRequest } from './client/inference/schema/index.js'; +import { createLogger } from '@sap-cloud-sdk/util'; +import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js'; +import { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; + +const logger = createLogger({ + package: 'foundation-models', + messageContext: 'azure-openai-chat-client' +}); /** * Azure OpenAI client for chat completion. @@ -30,23 +38,17 @@ export class AzureOpenAiChatClient { data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig ): Promise { - const response = await this._executeRequest(data, requestConfig); + const response = await this.executeRequest(data, requestConfig); return new AzureOpenAiChatCompletionResponse(response); } - /** - * Creates a completion stream for the chat messages. - * @param data - The input parameters for the chat completion. - * @param requestConfig - The request configuration. - * @returns The completion stream. - */ - async stream( + private async fromSSEResponse( data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig ): Promise> { // TODO: The return type `any` should actually be the type of the stream response. // But `createChatCompletionStreamResponse` is first available in Azure OpenAI spec preview version 2024-08-01. - const response = await this._executeRequest({ + const response = await this.executeRequest({ ...data, stream: true, stream_options: { @@ -59,22 +61,49 @@ export class AzureOpenAiChatClient { return Stream.fromSSEResponse(response, new AbortController()); } - async * processStream(stream: Stream): AsyncIterator { + private async * pipeString(stream: Stream) { for await (const chunk of stream) { // Process each item here - yield chunk.getDeltaContent(); + const deltaContent = chunk.getDeltaContent(); + if (!deltaContent) { + continue; + } + yield deltaContent; } } + /** + * Creates a completion stream for the chat messages. + * @param data - The input parameters for the chat completion. + * @param requestConfig - The request configuration. + * @returns The completion stream. + */ + async stream( + data: AzureOpenAiCreateChatCompletionRequest, + requestConfig?: CustomRequestConfig + ): Promise> { + const stream1 = await this.fromSSEResponse(data, requestConfig); + const stream2 = new Stream(() => this.pipeFinishReason(stream1), new AbortController()); + const stream3 = new Stream(() => this.pipeTokenUsage(stream2), new AbortController());; + return stream3; + } + + /** + * Creates a completion stream of the delta string for the chat messages. + * @param data - The input parameters for the chat completion. + * @param requestConfig - The request configuration. + * @returns The completion stream of the delta string. + */ async streamString( data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig ): Promise> { - const originalStream = this.stream(data, requestConfig); - return new Stream(this.processStream, new AbortController()); + const stream1 = await this.stream(data, requestConfig); + const stream2 = new Stream(() => this.pipeString(stream1), new AbortController()); + return stream2; } - private async _executeRequest( + private async executeRequest( data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig ): Promise { @@ -93,4 +122,37 @@ export class AzureOpenAiChatClient { requestConfig ); } + + private async * pipeFinishReason(stream: Stream) { + for await (const chunk of stream) { + const finishReason = chunk.getFinishReason(); + if (finishReason) { + // streamResponse.finishReason = finishReason; + // Do some callback maybe + switch (finishReason) { + case 'content_filter': + throw new Error('Stream finished with content filter hit.'); + case 'length': + throw new Error('Stream finished with token length exceeded.'); + case 'stop': + logger.debug('Stream finished.'); + break; + default: + throw new Error(`Stream finished with unknown reason '${finishReason}'.`); + } + } + yield chunk; + } + } + + private async * pipeTokenUsage(stream: Stream) { + for await (const chunk of stream) { + const usage = chunk.getTokenUsage(); + if (usage) { + // streamResponse.usage = usage; + // Do some callback maybe + } + yield chunk; + } + } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts new file mode 100644 index 00000000..82f4b0a5 --- /dev/null +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts @@ -0,0 +1,43 @@ +import { createLogger } from '@sap-cloud-sdk/util'; + +const logger = createLogger({ + package: 'foundation-models', + messageContext: 'azure-openai-chat-completion-stream-chunk-response' +}); + +/** + * Azure OpenAI chat completion stream chunk response. + */ +export class AzureOpenAiChatCompletionStreamChunkResponse { + constructor(public readonly chunk: any) { + this.chunk = chunk; + } + + /** + * Usage of tokens in the chunk response. + * @returns Token usage. + */ + getTokenUsage(): this['chunk']['usage'] { + return this.chunk.usage; + } + + /** + * Reason for stopping the completion stream chunk. + * @param choiceIndex - The index of the choice to parse. + * @returns The finish reason. + */ + getFinishReason( + choiceIndex = 0 + ): this['chunk']['choices'][0]['finish_reason'] { + return this.chunk.choices[choiceIndex]?.finish_reason; + } + + /** + * Parses the chunk response and returns the delta content. + * @param choiceIndex - The index of the choice to parse. + * @returns The message delta content. + */ + getDeltaContent(choiceIndex = 0): string | undefined | null { + return this.chunk.choices[choiceIndex]?.delta?.content; + } +} diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts index c10f6c41..1aa03f3a 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts @@ -1,6 +1,6 @@ import { createLogger } from '@sap-cloud-sdk/util'; -import type { HttpResponse } from '@sap-cloud-sdk/http-client'; -import type { AzureOpenAiCreateChatCompletionResponse } from './client/inference/schema/index.js'; +import { Stream } from './azure-openai-streaming.js'; +import { AzureOpenAiCompletionUsage } from './client/inference/schema/completion-usage.js'; const logger = createLogger({ package: 'foundation-models', @@ -11,38 +11,11 @@ const logger = createLogger({ * Azure OpenAI chat completion stream response. */ export class AzureOpenAiChatCompletionStreamResponse { - /** - * The chat completion stream response. - */ - constructor(public readonly data: any) { - this.data = data; - } - - /** - * Reason for stopping the completion stream. - * @param choiceIndex - The index of the choice to parse. - * @returns The finish reason. - */ - getFinishReason( - choiceIndex = 0 - ): this['data']['choices'][0]['finish_reason'] { - this.logInvalidChoiceIndex(choiceIndex); - return this.data.choices[choiceIndex]?.finish_reason; - } - /** - * Parses the Azure OpenAI response and returns the delta content. - * @param choiceIndex - The index of the choice to parse. - * @returns The message delta content. - */ - getDeltaContent(choiceIndex = 0): string | undefined | null { - this.logInvalidChoiceIndex(choiceIndex); - return this.data.choices[choiceIndex]?.delta?.content; - } + public usage: AzureOpenAiCompletionUsage | undefined; + public finishReason: 'stop' | 'length' | 'content_filter' | undefined; - private logInvalidChoiceIndex(choiceIndex: number): void { - if (choiceIndex < 0 || choiceIndex >= this.data.choices.length) { - logger.error(`Choice index ${choiceIndex} is out of bounds.`); - } + constructor(public stream: Stream) { + this.stream = stream; } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts b/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts index 097af945..c7660ec6 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts @@ -1,6 +1,6 @@ import { HttpResponse } from '@sap-cloud-sdk/http-client'; import { LineDecoder } from './azure-openai-line-decoder.js'; -import { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; +import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js'; type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; @@ -53,7 +53,7 @@ export class Stream implements AsyncIterable { throw new Error(data.error); } - yield new AzureOpenAiChatCompletionStreamResponse(data) as any; + yield new AzureOpenAiChatCompletionStreamChunkResponse(data) as any; } else { let data; try { diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index b8ccba93..7a1c6d9b 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -28,6 +28,26 @@ export async function chatCompletion(): Promise { + const response = await new AzureOpenAiChatClient('gpt-35-turbo').streamString({ + messages: [{ role: 'user', content: 'What is the capital of France?' }] + }); + + let result = ''; + for await (const chunk of response) { + logger.info(`chunk: ${chunk}`); + result += chunk; + } + + // logger.info(`finish reason: ${response.finishReason}`); + // logger.info(`usage: ${JSON.stringify(response.usage)}`); + return result; +} + /** * Embed 'Hello, world!' using the OpenAI ADA model. * @returns The response from Azure OpenAI containing the embedding vector. @@ -44,18 +64,3 @@ export async function computeEmbedding(): Promise return response; } - - -export async function chatCompletionStream(): Promise { - try { - const stream = await new AzureOpenAiChatClient('gpt-35-turbo').stream({ - messages: [{ role: 'user', content: 'What is the capital of France?' }], - }); - for await (const chunk of stream) { - logger.info(JSON.stringify(chunk)); - } - } catch (error: any) { - console.log(JSON.stringify(error.response.data.message)); - } - -} diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 25c1fa12..5e06b403 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -54,7 +54,8 @@ app.get('/azure-openai/chat-completion', async (req, res) => { app.get('/azure-openai/chat-completion-stream', async (req, res)=> { try { - await chatCompletionStream(); + const response = await chatCompletionStream(); + res.send(response); } catch (error: any) { console.error(error); res From 4ebd37db074358122329b99f5c59db36d1d963a9 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 23 Oct 2024 18:04:50 +0200 Subject: [PATCH 09/57] feat: wrap chunk to see usage and finish reason --- .../azure-openai/azure-openai-chat-client.ts | 54 ++++++++++--------- ...-openai-chat-completion-stream-response.ts | 5 +- .../src/foundation-models/azure-openai.ts | 6 +-- 3 files changed, 34 insertions(+), 31 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index 16147d5c..dd177b6c 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -61,16 +61,7 @@ export class AzureOpenAiChatClient { return Stream.fromSSEResponse(response, new AbortController()); } - private async * pipeString(stream: Stream) { - for await (const chunk of stream) { - // Process each item here - const deltaContent = chunk.getDeltaContent(); - if (!deltaContent) { - continue; - } - yield deltaContent; - } - } + /** * Creates a completion stream for the chat messages. @@ -81,11 +72,13 @@ export class AzureOpenAiChatClient { async stream( data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig - ): Promise> { + ): Promise { + const response = new AzureOpenAiChatCompletionStreamResponse(); const stream1 = await this.fromSSEResponse(data, requestConfig); - const stream2 = new Stream(() => this.pipeFinishReason(stream1), new AbortController()); - const stream3 = new Stream(() => this.pipeTokenUsage(stream2), new AbortController());; - return stream3; + const stream2 = new Stream(() => this.pipeFinishReason(stream1, response), new AbortController()); + const stream3 = new Stream(() => this.pipeTokenUsage(stream2, response), new AbortController());; + response.stream = stream3; + return response; } /** @@ -97,10 +90,14 @@ export class AzureOpenAiChatClient { async streamString( data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig - ): Promise> { - const stream1 = await this.stream(data, requestConfig); - const stream2 = new Stream(() => this.pipeString(stream1), new AbortController()); - return stream2; + ): Promise { + const response = new AzureOpenAiChatCompletionStreamResponse(); + const stream1 = await this.fromSSEResponse(data, requestConfig); + const stream2 = new Stream(() => this.pipeFinishReason(stream1, response), new AbortController()); + const stream3 = new Stream(() => this.pipeTokenUsage(stream2, response), new AbortController());; + const stream4 = new Stream(() => this.pipeString(stream3), new AbortController()); + response.stream = stream4; + return response; } private async executeRequest( @@ -123,12 +120,22 @@ export class AzureOpenAiChatClient { ); } - private async * pipeFinishReason(stream: Stream) { + private async * pipeString(stream: Stream) { + for await (const chunk of stream) { + // Process each item here + const deltaContent = chunk.getDeltaContent(); + if (!deltaContent) { + continue; + } + yield deltaContent; + } + } + + private async * pipeFinishReason(stream: Stream, response: AzureOpenAiChatCompletionStreamResponse) { for await (const chunk of stream) { const finishReason = chunk.getFinishReason(); if (finishReason) { - // streamResponse.finishReason = finishReason; - // Do some callback maybe + response.finishReason = finishReason; switch (finishReason) { case 'content_filter': throw new Error('Stream finished with content filter hit.'); @@ -145,12 +152,11 @@ export class AzureOpenAiChatClient { } } - private async * pipeTokenUsage(stream: Stream) { + private async * pipeTokenUsage(stream: Stream, response: AzureOpenAiChatCompletionStreamResponse) { for await (const chunk of stream) { const usage = chunk.getTokenUsage(); if (usage) { - // streamResponse.usage = usage; - // Do some callback maybe + response.usage = usage; } yield chunk; } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts index 1aa03f3a..d65dc095 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts @@ -14,8 +14,5 @@ export class AzureOpenAiChatCompletionStreamResponse { public usage: AzureOpenAiCompletionUsage | undefined; public finishReason: 'stop' | 'length' | 'content_filter' | undefined; - - constructor(public stream: Stream) { - this.stream = stream; - } + public stream: Stream | undefined; } diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index 7a1c6d9b..9c78d92a 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -38,13 +38,13 @@ export async function chatCompletionStream(): Promise { }); let result = ''; - for await (const chunk of response) { + for await (const chunk of response.stream!) { logger.info(`chunk: ${chunk}`); result += chunk; } - // logger.info(`finish reason: ${response.finishReason}`); - // logger.info(`usage: ${JSON.stringify(response.usage)}`); + logger.info(`finish reason: ${response.finishReason}`); + logger.info(`usage: ${JSON.stringify(response.usage)}`); return result; } From 02d193966ed11b55baa8a6daeb0c7e21362d63ce Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Thu, 24 Oct 2024 15:05:05 +0200 Subject: [PATCH 10/57] refactor: pipe streams --- .../azure-openai/azure-openai-chat-client.ts | 74 +++++-------------- .../azure-openai-chat-completion-stream.ts | 73 ++++++++++++++++++ .../azure-openai/azure-openai-streaming.ts | 50 ++++--------- .../src/foundation-models/azure-openai.ts | 2 +- 4 files changed, 110 insertions(+), 89 deletions(-) create mode 100644 packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index dd177b6c..d8913862 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -12,6 +12,7 @@ import type { AzureOpenAiCreateChatCompletionRequest } from './client/inference/ import { createLogger } from '@sap-cloud-sdk/util'; import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js'; import { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; +import { ChatCompletionStream } from './azure-openai-chat-completion-stream.js'; const logger = createLogger({ package: 'foundation-models', @@ -45,7 +46,7 @@ export class AzureOpenAiChatClient { private async fromSSEResponse( data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig - ): Promise> { + ): Promise { // TODO: The return type `any` should actually be the type of the stream response. // But `createChatCompletionStreamResponse` is first available in Azure OpenAI spec preview version 2024-08-01. const response = await this.executeRequest({ @@ -58,11 +59,9 @@ export class AzureOpenAiChatClient { ...requestConfig, responseType: 'stream' }); - return Stream.fromSSEResponse(response, new AbortController()); + return ChatCompletionStream.fromSSEResponse(response); } - - /** * Creates a completion stream for the chat messages. * @param data - The input parameters for the chat completion. @@ -74,10 +73,11 @@ export class AzureOpenAiChatClient { requestConfig?: CustomRequestConfig ): Promise { const response = new AzureOpenAiChatCompletionStreamResponse(); - const stream1 = await this.fromSSEResponse(data, requestConfig); - const stream2 = new Stream(() => this.pipeFinishReason(stream1, response), new AbortController()); - const stream3 = new Stream(() => this.pipeTokenUsage(stream2, response), new AbortController());; - response.stream = stream3; + const stream = await this.fromSSEResponse(data, requestConfig); + response.stream = stream + .pipe(ChatCompletionStream.processChunk, response) + .pipe(ChatCompletionStream.processFinishReason, response) + .pipe(ChatCompletionStream.processTokenUsage, response); return response; } @@ -92,11 +92,17 @@ export class AzureOpenAiChatClient { requestConfig?: CustomRequestConfig ): Promise { const response = new AzureOpenAiChatCompletionStreamResponse(); - const stream1 = await this.fromSSEResponse(data, requestConfig); - const stream2 = new Stream(() => this.pipeFinishReason(stream1, response), new AbortController()); - const stream3 = new Stream(() => this.pipeTokenUsage(stream2, response), new AbortController());; - const stream4 = new Stream(() => this.pipeString(stream3), new AbortController()); - response.stream = stream4; + // const stream1 = await this.fromSSEResponse(data, requestConfig); + // const stream2 = new Stream(() => this.pipeFinishReason(stream1, response)); + // const stream3 = new Stream(() => this.pipeTokenUsage(stream2, response)); + // const stream4 = new Stream(() => this.pipeString(stream3)); + + const stream = await this.fromSSEResponse(data, requestConfig); + response.stream = stream + .pipe(ChatCompletionStream.processChunk, response) + .pipe(ChatCompletionStream.processFinishReason, response) + .pipe(ChatCompletionStream.processTokenUsage, response) + .pipe(ChatCompletionStream.processString, response); return response; } @@ -120,45 +126,5 @@ export class AzureOpenAiChatClient { ); } - private async * pipeString(stream: Stream) { - for await (const chunk of stream) { - // Process each item here - const deltaContent = chunk.getDeltaContent(); - if (!deltaContent) { - continue; - } - yield deltaContent; - } - } - - private async * pipeFinishReason(stream: Stream, response: AzureOpenAiChatCompletionStreamResponse) { - for await (const chunk of stream) { - const finishReason = chunk.getFinishReason(); - if (finishReason) { - response.finishReason = finishReason; - switch (finishReason) { - case 'content_filter': - throw new Error('Stream finished with content filter hit.'); - case 'length': - throw new Error('Stream finished with token length exceeded.'); - case 'stop': - logger.debug('Stream finished.'); - break; - default: - throw new Error(`Stream finished with unknown reason '${finishReason}'.`); - } - } - yield chunk; - } - } - - private async * pipeTokenUsage(stream: Stream, response: AzureOpenAiChatCompletionStreamResponse) { - for await (const chunk of stream) { - const usage = chunk.getTokenUsage(); - if (usage) { - response.usage = usage; - } - yield chunk; - } - } + } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts new file mode 100644 index 00000000..8d6c717d --- /dev/null +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -0,0 +1,73 @@ +import { createLogger } from "@sap-cloud-sdk/util"; +import { AzureOpenAiChatCompletionStreamResponse } from "./azure-openai-chat-completion-stream-response.js"; +import { Stream } from "./azure-openai-streaming.js"; +import { HttpResponse } from "@sap-cloud-sdk/http-client"; +import { AzureOpenAiChatCompletionStreamChunkResponse } from "./azure-openai-chat-completion-stream-chunk-response.js"; + +const logger = createLogger({ + package: 'foundation-models', + messageContext: 'azure-openai-chat-completion-stream' +}); + +export class ChatCompletionStream extends Stream { + constructor(public iterator: () => AsyncIterator) { + super(iterator); + } + + static fromSSEResponse(response: HttpResponse): ChatCompletionStream { + const stream = Stream.fromSSEResponse(response); + return new ChatCompletionStream(stream.iterator); + } + + pipe(pipeFn: (stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) => AsyncIterator, response: AzureOpenAiChatCompletionStreamResponse): ChatCompletionStream { + return new ChatCompletionStream(() => pipeFn(this, response)); + } + + static async * processChunk(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) { + for await (const chunk of stream) { + yield new AzureOpenAiChatCompletionStreamChunkResponse(chunk); + }; + } + + static async * processString(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) { + for await (const chunk of stream) { + // Process each item here + const deltaContent = chunk.getDeltaContent(); + if (!deltaContent) { + continue; + } + yield deltaContent; + } + } + + static async * processFinishReason(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) { + for await (const chunk of stream) { + const finishReason = chunk.getFinishReason(); + if (finishReason) { + response.finishReason = finishReason; + switch (finishReason) { + case 'content_filter': + throw new Error('Stream finished with content filter hit.'); + case 'length': + throw new Error('Stream finished with token length exceeded.'); + case 'stop': + logger.debug('Stream finished.'); + break; + default: + throw new Error(`Stream finished with unknown reason '${finishReason}'.`); + } + } + yield chunk; + } + } + + static async * processTokenUsage(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) { + for await (const chunk of stream) { + const usage = chunk.getTokenUsage(); + if (usage) { + response.usage = usage; + } + yield chunk; + } + } +} \ No newline at end of file diff --git a/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts b/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts index c7660ec6..b4fdbe12 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts @@ -1,6 +1,7 @@ import { HttpResponse } from '@sap-cloud-sdk/http-client'; import { LineDecoder } from './azure-openai-line-decoder.js'; import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js'; +import { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; @@ -11,16 +12,9 @@ export type ServerSentEvent = { }; export class Stream implements AsyncIterable { - controller: AbortController; + constructor(public iterator: () => AsyncIterator) {} - constructor( - private iterator: () => AsyncIterator, - controller: AbortController, - ) { - this.controller = controller; - } - - static fromSSEResponse(response: HttpResponse, controller: AbortController) { + static fromSSEResponse(response: HttpResponse) { let consumed = false; async function* iterator(): AsyncIterator { @@ -30,7 +24,7 @@ export class Stream implements AsyncIterable { consumed = true; let done = false; try { - for await (const sse of _iterSSEMessages(response, controller)) { + for await (const sse of _iterSSEMessages(response)) { if (done) continue; if (sse.data.startsWith('[DONE]')) { @@ -53,7 +47,7 @@ export class Stream implements AsyncIterable { throw new Error(data.error); } - yield new AzureOpenAiChatCompletionStreamChunkResponse(data) as any; + yield data; } else { let data; try { @@ -65,31 +59,26 @@ export class Stream implements AsyncIterable { } // TODO: Is this where the error should be thrown? if (sse.event == 'error') { - // throw new Error(data.error, data.message); - throw new Error(data.error); + // throw new Error(data.error, data.message); + throw new Error(data.error); } yield { event: sse.event, data: data } as any; } } done = true; } catch (e) { - // If the user calls `stream.controller.abort()`, we should exit without throwing. - if (e instanceof Error && e.name === 'AbortError') return; throw e; - } finally { - // If the user `break`s, abort the ongoing request. - if (!done) controller.abort(); } } - return new Stream(iterator, controller); + return new Stream(iterator); } /** * Generates a Stream from a newline-separated ReadableStream * where each item is a JSON value. */ - static fromReadableStream(readableStream: ReadableStream, controller: AbortController) { + static fromReadableStream(readableStream: ReadableStream) { let consumed = false; async function* iterLines(): AsyncGenerator { @@ -120,16 +109,11 @@ export class Stream implements AsyncIterable { } done = true; } catch (e) { - // If the user calls `stream.controller.abort()`, we should exit without throwing. - if (e instanceof Error && e.name === 'AbortError') return; throw e; - } finally { - // If the user `break`s, abort the ongoing request. - if (!done) controller.abort(); } } - return new Stream(iterator, controller); + return new Stream(iterator); } [Symbol.asyncIterator](): AsyncIterator { @@ -159,8 +143,8 @@ export class Stream implements AsyncIterable { }; return [ - new Stream(() => teeIterator(left), this.controller), - new Stream(() => teeIterator(right), this.controller), + new Stream(() => teeIterator(left)), + new Stream(() => teeIterator(right)), ]; } @@ -198,18 +182,16 @@ export class Stream implements AsyncIterable { } export async function* _iterSSEMessages( - response: HttpResponse, - controller: AbortController, + response: HttpResponse ): AsyncGenerator { if (!response.data) { - controller.abort(); throw new Error(`Attempted to iterate over a response with no body`); } const sseDecoder = new SSEDecoder(); const lineDecoder = new LineDecoder(); -// console.log(response.data); + // console.log(response.data); const iter = response.data; for await (const sseChunk of iterSSEChunks(iter)) { @@ -239,8 +221,8 @@ async function* iterSSEChunks(iterator: AsyncIterableIterator): AsyncGene const binaryChunk = chunk instanceof ArrayBuffer ? new Uint8Array(chunk) - : typeof chunk === 'string' ? new TextEncoder().encode(chunk) - : chunk; + : typeof chunk === 'string' ? new TextEncoder().encode(chunk) + : chunk; let newData = new Uint8Array(data.length + binaryChunk.length); newData.set(data); diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index 9c78d92a..f1e020b2 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -39,7 +39,7 @@ export async function chatCompletionStream(): Promise { let result = ''; for await (const chunk of response.stream!) { - logger.info(`chunk: ${chunk}`); + logger.info(`chunk: ${JSON.stringify(chunk)}`); result += chunk; } From 7a00fc38be2073947fa1e3385b8b5410857588c8 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Thu, 24 Oct 2024 15:56:14 +0200 Subject: [PATCH 11/57] refactor --- .../azure-openai/azure-openai-chat-client.ts | 27 ++-------- ...i-chat-completion-stream-chunk-response.ts | 2 +- ...-openai-chat-completion-stream-response.ts | 42 +++++++++++++-- .../azure-openai-chat-completion-stream.ts | 52 ++++++++++++++----- .../azure-openai/azure-openai-line-decoder.ts | 10 ++-- .../azure-openai/azure-openai-streaming.ts | 46 ++++++++-------- .../src/foundation-models/azure-openai.ts | 2 +- 7 files changed, 108 insertions(+), 73 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index d8913862..36d5b7d4 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -6,18 +6,10 @@ import { } from '@sap-ai-sdk/ai-api/internal.js'; import { apiVersion, type AzureOpenAiChatModel } from './model-types.js'; import { AzureOpenAiChatCompletionResponse } from './azure-openai-chat-completion-response.js'; -import { Stream } from './azure-openai-streaming.js'; -import type { HttpResponse } from '@sap-cloud-sdk/http-client'; -import type { AzureOpenAiCreateChatCompletionRequest } from './client/inference/schema/index.js'; -import { createLogger } from '@sap-cloud-sdk/util'; -import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js'; import { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; import { ChatCompletionStream } from './azure-openai-chat-completion-stream.js'; - -const logger = createLogger({ - package: 'foundation-models', - messageContext: 'azure-openai-chat-client' -}); +import type { HttpResponse } from '@sap-cloud-sdk/http-client'; +import type { AzureOpenAiCreateChatCompletionRequest } from './client/inference/schema/index.js'; /** * Azure OpenAI client for chat completion. @@ -43,7 +35,7 @@ export class AzureOpenAiChatClient { return new AzureOpenAiChatCompletionResponse(response); } - private async fromSSEResponse( + private async createStream( data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig ): Promise { @@ -73,8 +65,7 @@ export class AzureOpenAiChatClient { requestConfig?: CustomRequestConfig ): Promise { const response = new AzureOpenAiChatCompletionStreamResponse(); - const stream = await this.fromSSEResponse(data, requestConfig); - response.stream = stream + response.stream = (await this.createStream(data, requestConfig)) .pipe(ChatCompletionStream.processChunk, response) .pipe(ChatCompletionStream.processFinishReason, response) .pipe(ChatCompletionStream.processTokenUsage, response); @@ -92,13 +83,7 @@ export class AzureOpenAiChatClient { requestConfig?: CustomRequestConfig ): Promise { const response = new AzureOpenAiChatCompletionStreamResponse(); - // const stream1 = await this.fromSSEResponse(data, requestConfig); - // const stream2 = new Stream(() => this.pipeFinishReason(stream1, response)); - // const stream3 = new Stream(() => this.pipeTokenUsage(stream2, response)); - // const stream4 = new Stream(() => this.pipeString(stream3)); - - const stream = await this.fromSSEResponse(data, requestConfig); - response.stream = stream + response.stream = (await this.createStream(data, requestConfig)) .pipe(ChatCompletionStream.processChunk, response) .pipe(ChatCompletionStream.processFinishReason, response) .pipe(ChatCompletionStream.processTokenUsage, response) @@ -125,6 +110,4 @@ export class AzureOpenAiChatClient { requestConfig ); } - - } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts index 82f4b0a5..a18e7ff4 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts @@ -28,7 +28,7 @@ export class AzureOpenAiChatCompletionStreamChunkResponse { */ getFinishReason( choiceIndex = 0 - ): this['chunk']['choices'][0]['finish_reason'] { + ): this['chunk']['choices'][0]['finish_reason'] { return this.chunk.choices[choiceIndex]?.finish_reason; } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts index d65dc095..a6d4eaa7 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts @@ -1,6 +1,6 @@ import { createLogger } from '@sap-cloud-sdk/util'; -import { Stream } from './azure-openai-streaming.js'; -import { AzureOpenAiCompletionUsage } from './client/inference/schema/completion-usage.js'; +import type { Stream } from './azure-openai-streaming.js'; +import type { AzureOpenAiCompletionUsage } from './client/inference/schema/completion-usage.js'; const logger = createLogger({ package: 'foundation-models', @@ -11,8 +11,40 @@ const logger = createLogger({ * Azure OpenAI chat completion stream response. */ export class AzureOpenAiChatCompletionStreamResponse { + private _usage: AzureOpenAiCompletionUsage | undefined; + private _finishReason: string | undefined; + private _stream: Stream | undefined; - public usage: AzureOpenAiCompletionUsage | undefined; - public finishReason: 'stop' | 'length' | 'content_filter' | undefined; - public stream: Stream | undefined; + public get usage() { + if (!this._usage) { + throw new Error('Response stream is undefined.'); + } + return this._usage; + } + + public set usage(usage: AzureOpenAiCompletionUsage) { + this._usage = usage; + } + + public get finishReason() { + if (!this._finishReason) { + throw new Error('Response finish reason is undefined.'); + } + return this._finishReason; + } + + public set finishReason(finishReason: string) { + this._finishReason = finishReason; + } + + public get stream(): Stream { + if (!this._stream) { + throw new Error('Response stream is undefined.'); + } + return this._stream; + } + + public set stream(stream: Stream) { + this._stream = stream; + } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 8d6c717d..e4207e1b 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -1,8 +1,8 @@ -import { createLogger } from "@sap-cloud-sdk/util"; -import { AzureOpenAiChatCompletionStreamResponse } from "./azure-openai-chat-completion-stream-response.js"; -import { Stream } from "./azure-openai-streaming.js"; -import { HttpResponse } from "@sap-cloud-sdk/http-client"; -import { AzureOpenAiChatCompletionStreamChunkResponse } from "./azure-openai-chat-completion-stream-chunk-response.js"; +import { createLogger } from '@sap-cloud-sdk/util'; +import { Stream } from './azure-openai-streaming.js'; +import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js'; +import type { HttpResponse } from '@sap-cloud-sdk/http-client'; +import type { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; const logger = createLogger({ package: 'foundation-models', @@ -10,25 +10,32 @@ const logger = createLogger({ }); export class ChatCompletionStream extends Stream { - constructor(public iterator: () => AsyncIterator) { - super(iterator); - } - + /** + * Create a chat completion stream based on the http response. + * @param response - Http response. + * @returns Chat completion stream. + * @internal + */ static fromSSEResponse(response: HttpResponse): ChatCompletionStream { const stream = Stream.fromSSEResponse(response); return new ChatCompletionStream(stream.iterator); } - pipe(pipeFn: (stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) => AsyncIterator, response: AzureOpenAiChatCompletionStreamResponse): ChatCompletionStream { - return new ChatCompletionStream(() => pipeFn(this, response)); - } - + /** + * Wrap raw chunk data with chunk response class to provide helper functions. + * @param stream - Chat completion stream. + * @param response + * @internal + */ static async * processChunk(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) { for await (const chunk of stream) { yield new AzureOpenAiChatCompletionStreamChunkResponse(chunk); }; } + /** + * @internal + */ static async * processString(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) { for await (const chunk of stream) { // Process each item here @@ -40,6 +47,9 @@ export class ChatCompletionStream extends Stream { } } + /** + * @internal + */ static async * processFinishReason(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) { for await (const chunk of stream) { const finishReason = chunk.getFinishReason(); @@ -61,6 +71,9 @@ export class ChatCompletionStream extends Stream { } } + /** + * @internal + */ static async * processTokenUsage(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) { for await (const chunk of stream) { const usage = chunk.getTokenUsage(); @@ -70,4 +83,15 @@ export class ChatCompletionStream extends Stream { yield chunk; } } -} \ No newline at end of file + + constructor(public iterator: () => AsyncIterator) { + super(iterator); + } + + /** + * @internal + */ + pipe(processFn: (stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) => AsyncIterator, response: AzureOpenAiChatCompletionStreamResponse): ChatCompletionStream { + return new ChatCompletionStream(() => processFn(this, response)); + } +} diff --git a/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts b/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts index 630bdc6f..6de19eaa 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts @@ -4,7 +4,7 @@ type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; * A re-implementation of httpx's `LineDecoder` in Python that handles incrementally * reading lines from text. * - * https://github.com/encode/httpx/blob/920333ea98118e9cf617f246905d7b202510941c/httpx/_decoders.py#L258 + * Https://github.com/encode/httpx/blob/920333ea98118e9cf617f246905d7b202510941c/httpx/_decoders.py#L258. */ export class LineDecoder { // prettier-ignore @@ -63,8 +63,8 @@ export class LineDecoder { } decodeText(bytes: Bytes): string { - if (bytes == null) return ''; - if (typeof bytes === 'string') return bytes; + if (bytes == null) {return '';} + if (typeof bytes === 'string') {return bytes;} // Node: if (typeof Buffer !== 'undefined') { @@ -95,7 +95,7 @@ export class LineDecoder { } throw new Error( - `Unexpected: neither Buffer nor TextDecoder are available as globals. Please report this error.`, + 'Unexpected: neither Buffer nor TextDecoder are available as globals. Please report this error.', ); } @@ -109,4 +109,4 @@ export class LineDecoder { this.trailingCR = false; return lines; } -} \ No newline at end of file +} diff --git a/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts b/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts index b4fdbe12..83724f5c 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts @@ -1,15 +1,13 @@ -import { HttpResponse } from '@sap-cloud-sdk/http-client'; import { LineDecoder } from './azure-openai-line-decoder.js'; -import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js'; -import { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; +import type { HttpResponse } from '@sap-cloud-sdk/http-client'; type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; -export type ServerSentEvent = { +export interface ServerSentEvent { event: string | null; data: string; raw: string[]; -}; +} export class Stream implements AsyncIterable { constructor(public iterator: () => AsyncIterator) {} @@ -25,7 +23,7 @@ export class Stream implements AsyncIterable { let done = false; try { for await (const sse of _iterSSEMessages(response)) { - if (done) continue; + if (done) {continue;} if (sse.data.startsWith('[DONE]')) { done = true; @@ -38,8 +36,8 @@ export class Stream implements AsyncIterable { try { data = JSON.parse(sse.data); } catch (e) { - console.error(`Could not parse message into JSON:`, sse.data); - console.error(`From chunk:`, sse.raw); + console.error('Could not parse message into JSON:', sse.data); + console.error('From chunk:', sse.raw); throw e; } @@ -53,8 +51,8 @@ export class Stream implements AsyncIterable { try { data = JSON.parse(sse.data); } catch (e) { - console.error(`Could not parse message into JSON:`, sse.data); - console.error(`From chunk:`, sse.raw); + console.error('Could not parse message into JSON:', sse.data); + console.error('From chunk:', sse.raw); throw e; } // TODO: Is this where the error should be thrown? @@ -62,7 +60,7 @@ export class Stream implements AsyncIterable { // throw new Error(data.error, data.message); throw new Error(data.error); } - yield { event: sse.event, data: data } as any; + yield { event: sse.event, data } as any; } } done = true; @@ -104,8 +102,8 @@ export class Stream implements AsyncIterable { let done = false; try { for await (const line of iterLines()) { - if (done) continue; - if (line) yield JSON.parse(line); + if (done) {continue;} + if (line) {yield JSON.parse(line);} } done = true; } catch (e) { @@ -125,12 +123,11 @@ export class Stream implements AsyncIterable { * independently read from at different speeds. */ tee(): [Stream, Stream] { - const left: Array>> = []; - const right: Array>> = []; + const left: Promise>[] = []; + const right: Promise>[] = []; const iterator = this.iterator(); - const teeIterator = (queue: Array>>): AsyncIterator => { - return { + const teeIterator = (queue: Promise>[]): AsyncIterator => ({ next: () => { if (queue.length === 0) { const result = iterator.next(); @@ -139,8 +136,7 @@ export class Stream implements AsyncIterable { } return queue.shift()!; }, - }; - }; + }); return [ new Stream(() => teeIterator(left)), @@ -165,7 +161,7 @@ export class Stream implements AsyncIterable { async pull(ctrl: any) { try { const { value, done } = await iter.next(); - if (done) return ctrl.close(); + if (done) {return ctrl.close();} const bytes = encoder.encode(JSON.stringify(value) + '\n'); @@ -185,7 +181,7 @@ export async function* _iterSSEMessages( response: HttpResponse ): AsyncGenerator { if (!response.data) { - throw new Error(`Attempted to iterate over a response with no body`); + throw new Error('Attempted to iterate over a response with no body'); } const sseDecoder = new SSEDecoder(); @@ -197,13 +193,13 @@ export async function* _iterSSEMessages( for await (const sseChunk of iterSSEChunks(iter)) { for (const line of lineDecoder.decode(sseChunk)) { const sse = sseDecoder.decode(line); - if (sse) yield sse; + if (sse) {yield sse;} } } for (const line of lineDecoder.flush()) { const sse = sseDecoder.decode(line); - if (sse) yield sse; + if (sse) {yield sse;} } } @@ -224,7 +220,7 @@ async function* iterSSEChunks(iterator: AsyncIterableIterator): AsyncGene : typeof chunk === 'string' ? new TextEncoder().encode(chunk) : chunk; - let newData = new Uint8Array(data.length + binaryChunk.length); + const newData = new Uint8Array(data.length + binaryChunk.length); newData.set(data); newData.set(binaryChunk, data.length); data = newData; @@ -290,7 +286,7 @@ class SSEDecoder { if (!line) { // empty line and we didn't previously encounter any messages - if (!this.event && !this.data.length) return null; + if (!this.event && !this.data.length) {return null;} const sse: ServerSentEvent = { event: this.event, diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index f1e020b2..d9482399 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -38,7 +38,7 @@ export async function chatCompletionStream(): Promise { }); let result = ''; - for await (const chunk of response.stream!) { + for await (const chunk of response.stream) { logger.info(`chunk: ${JSON.stringify(chunk)}`); result += chunk; } From dd0265105eb4829b2b21fbeefa9f40071416c5c3 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Thu, 24 Oct 2024 16:48:16 +0200 Subject: [PATCH 12/57] refactor: change streamString to streamContent --- .../src/azure-openai/azure-openai-chat-client.ts | 11 ++++------- .../azure-openai-chat-completion-stream-response.ts | 7 ++++--- sample-code/src/foundation-models/azure-openai.ts | 4 ++-- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index 36d5b7d4..67f2d8be 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -73,20 +73,17 @@ export class AzureOpenAiChatClient { } /** - * Creates a completion stream of the delta string for the chat messages. + * Creates a completion stream of the delta content for the chat messages. * @param data - The input parameters for the chat completion. * @param requestConfig - The request configuration. - * @returns The completion stream of the delta string. + * @returns The completion stream of the delta content. */ - async streamString( + async streamContent( data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig ): Promise { const response = new AzureOpenAiChatCompletionStreamResponse(); - response.stream = (await this.createStream(data, requestConfig)) - .pipe(ChatCompletionStream.processChunk, response) - .pipe(ChatCompletionStream.processFinishReason, response) - .pipe(ChatCompletionStream.processTokenUsage, response) + (await this.stream(data, requestConfig)).stream .pipe(ChatCompletionStream.processString, response); return response; } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts index a6d4eaa7..adcb2b68 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts @@ -1,6 +1,7 @@ import { createLogger } from '@sap-cloud-sdk/util'; import type { Stream } from './azure-openai-streaming.js'; import type { AzureOpenAiCompletionUsage } from './client/inference/schema/completion-usage.js'; +import { ChatCompletionStream } from './azure-openai-chat-completion-stream.js'; const logger = createLogger({ package: 'foundation-models', @@ -13,7 +14,7 @@ const logger = createLogger({ export class AzureOpenAiChatCompletionStreamResponse { private _usage: AzureOpenAiCompletionUsage | undefined; private _finishReason: string | undefined; - private _stream: Stream | undefined; + private _stream: ChatCompletionStream | undefined; public get usage() { if (!this._usage) { @@ -37,14 +38,14 @@ export class AzureOpenAiChatCompletionStreamResponse { this._finishReason = finishReason; } - public get stream(): Stream { + public get stream(): ChatCompletionStream { if (!this._stream) { throw new Error('Response stream is undefined.'); } return this._stream; } - public set stream(stream: Stream) { + public set stream(stream: ChatCompletionStream) { this._stream = stream; } } diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index d9482399..a7630780 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -33,13 +33,13 @@ export async function chatCompletion(): Promise { - const response = await new AzureOpenAiChatClient('gpt-35-turbo').streamString({ + const response = await new AzureOpenAiChatClient('gpt-35-turbo').streamContent({ messages: [{ role: 'user', content: 'What is the capital of France?' }] }); let result = ''; for await (const chunk of response.stream) { - logger.info(`chunk: ${JSON.stringify(chunk)}`); + logger.info(`chunk: ${chunk}`); result += chunk; } From 50142e224250b5693b0258e6f4a9559b543d5f4b Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Thu, 24 Oct 2024 17:06:40 +0200 Subject: [PATCH 13/57] fix: lint --- .../azure-openai/azure-openai-chat-client.ts | 38 +++++++++---------- ...i-chat-completion-stream-chunk-response.ts | 7 ---- ...-openai-chat-completion-stream-response.ts | 13 ++----- .../azure-openai-chat-completion-stream.ts | 9 ++--- 4 files changed, 26 insertions(+), 41 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index 67f2d8be..65165623 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -35,25 +35,6 @@ export class AzureOpenAiChatClient { return new AzureOpenAiChatCompletionResponse(response); } - private async createStream( - data: AzureOpenAiCreateChatCompletionRequest, - requestConfig?: CustomRequestConfig - ): Promise { - // TODO: The return type `any` should actually be the type of the stream response. - // But `createChatCompletionStreamResponse` is first available in Azure OpenAI spec preview version 2024-08-01. - const response = await this.executeRequest({ - ...data, - stream: true, - stream_options: { - include_usage: true - } - }, { - ...requestConfig, - responseType: 'stream' - }); - return ChatCompletionStream.fromSSEResponse(response); - } - /** * Creates a completion stream for the chat messages. * @param data - The input parameters for the chat completion. @@ -107,4 +88,23 @@ export class AzureOpenAiChatClient { requestConfig ); } + + private async createStream( + data: AzureOpenAiCreateChatCompletionRequest, + requestConfig?: CustomRequestConfig + ): Promise { + // TODO: The return type `any` should actually be the type of the stream response. + // But `createChatCompletionStreamResponse` is first available in Azure OpenAI spec preview version 2024-08-01. + const response = await this.executeRequest({ + ...data, + stream: true, + stream_options: { + include_usage: true + } + }, { + ...requestConfig, + responseType: 'stream' + }); + return ChatCompletionStream.fromSSEResponse(response); + } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts index a18e7ff4..300e9bda 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts @@ -1,10 +1,3 @@ -import { createLogger } from '@sap-cloud-sdk/util'; - -const logger = createLogger({ - package: 'foundation-models', - messageContext: 'azure-openai-chat-completion-stream-chunk-response' -}); - /** * Azure OpenAI chat completion stream chunk response. */ diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts index adcb2b68..4790bbd8 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts @@ -1,12 +1,5 @@ -import { createLogger } from '@sap-cloud-sdk/util'; -import type { Stream } from './azure-openai-streaming.js'; import type { AzureOpenAiCompletionUsage } from './client/inference/schema/completion-usage.js'; -import { ChatCompletionStream } from './azure-openai-chat-completion-stream.js'; - -const logger = createLogger({ - package: 'foundation-models', - messageContext: 'azure-openai-chat-completion-stream-response' -}); +import type { ChatCompletionStream } from './azure-openai-chat-completion-stream.js'; /** * Azure OpenAI chat completion stream response. @@ -16,7 +9,7 @@ export class AzureOpenAiChatCompletionStreamResponse { private _finishReason: string | undefined; private _stream: ChatCompletionStream | undefined; - public get usage() { + public get usage(): AzureOpenAiCompletionUsage { if (!this._usage) { throw new Error('Response stream is undefined.'); } @@ -27,7 +20,7 @@ export class AzureOpenAiChatCompletionStreamResponse { this._usage = usage; } - public get finishReason() { + public get finishReason(): string { if (!this._finishReason) { throw new Error('Response finish reason is undefined.'); } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index e4207e1b..edcc5657 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -27,7 +27,7 @@ export class ChatCompletionStream extends Stream { * @param response * @internal */ - static async * processChunk(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) { + static async * processChunk(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse): AsyncGenerator { for await (const chunk of stream) { yield new AzureOpenAiChatCompletionStreamChunkResponse(chunk); }; @@ -36,9 +36,8 @@ export class ChatCompletionStream extends Stream { /** * @internal */ - static async * processString(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) { + static async * processString(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse): AsyncGenerator { for await (const chunk of stream) { - // Process each item here const deltaContent = chunk.getDeltaContent(); if (!deltaContent) { continue; @@ -50,7 +49,7 @@ export class ChatCompletionStream extends Stream { /** * @internal */ - static async * processFinishReason(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) { + static async * processFinishReason(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse): AsyncGenerator { for await (const chunk of stream) { const finishReason = chunk.getFinishReason(); if (finishReason) { @@ -74,7 +73,7 @@ export class ChatCompletionStream extends Stream { /** * @internal */ - static async * processTokenUsage(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) { + static async * processTokenUsage(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse): AsyncGenerator { for await (const chunk of stream) { const usage = chunk.getTokenUsage(); if (usage) { From c8611d87674b173d6a0bf537a64345b649cc7a6e Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Fri, 25 Oct 2024 17:38:34 +0200 Subject: [PATCH 14/57] refactor --- .../azure-openai/azure-openai-chat-client.ts | 15 ++++++----- .../azure-openai-chat-completion-stream.ts | 27 ++++++++++++------- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index 65165623..4a3bf7ad 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -46,8 +46,9 @@ export class AzureOpenAiChatClient { requestConfig?: CustomRequestConfig ): Promise { const response = new AzureOpenAiChatCompletionStreamResponse(); - response.stream = (await this.createStream(data, requestConfig)) - .pipe(ChatCompletionStream.processChunk, response) + const stream = await this.createStream(data, requestConfig); + response.stream = stream + .pipe(ChatCompletionStream.processChunk) .pipe(ChatCompletionStream.processFinishReason, response) .pipe(ChatCompletionStream.processTokenUsage, response); return response; @@ -64,8 +65,12 @@ export class AzureOpenAiChatClient { requestConfig?: CustomRequestConfig ): Promise { const response = new AzureOpenAiChatCompletionStreamResponse(); - (await this.stream(data, requestConfig)).stream - .pipe(ChatCompletionStream.processString, response); + const stream = await this.createStream(data, requestConfig); + response.stream = stream + .pipe(ChatCompletionStream.processChunk) + .pipe(ChatCompletionStream.processFinishReason, response) + .pipe(ChatCompletionStream.processTokenUsage, response) + .pipe(ChatCompletionStream.processContent, response); return response; } @@ -93,8 +98,6 @@ export class AzureOpenAiChatClient { data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig ): Promise { - // TODO: The return type `any` should actually be the type of the stream response. - // But `createChatCompletionStreamResponse` is first available in Azure OpenAI spec preview version 2024-08-01. const response = await this.executeRequest({ ...data, stream: true, diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index edcc5657..d0bba47f 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -9,6 +9,11 @@ const logger = createLogger({ messageContext: 'azure-openai-chat-completion-stream' }); +/** + * Chat completion stream containing post-processing functions. + */ +// TODO: The Item type `any` should actually be the type of the stream chunk response. +// But `createChatCompletionStreamResponse` is first available in Azure OpenAI spec preview version 2024-08-01. export class ChatCompletionStream extends Stream { /** * Create a chat completion stream based on the http response. @@ -24,19 +29,20 @@ export class ChatCompletionStream extends Stream { /** * Wrap raw chunk data with chunk response class to provide helper functions. * @param stream - Chat completion stream. - * @param response * @internal */ - static async * processChunk(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse): AsyncGenerator { + static async * processChunk(stream: ChatCompletionStream): AsyncGenerator { for await (const chunk of stream) { yield new AzureOpenAiChatCompletionStreamChunkResponse(chunk); }; } /** + * Transform the stream chunk into string. + * @param stream - Chat completion stream. * @internal */ - static async * processString(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse): AsyncGenerator { + static async * processContent(stream: ChatCompletionStream): AsyncGenerator { for await (const chunk of stream) { const deltaContent = chunk.getDeltaContent(); if (!deltaContent) { @@ -49,11 +55,11 @@ export class ChatCompletionStream extends Stream { /** * @internal */ - static async * processFinishReason(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse): AsyncGenerator { + static async * processFinishReason(stream: ChatCompletionStream, response?: AzureOpenAiChatCompletionStreamResponse): AsyncGenerator { for await (const chunk of stream) { const finishReason = chunk.getFinishReason(); if (finishReason) { - response.finishReason = finishReason; + response!.finishReason = finishReason; switch (finishReason) { case 'content_filter': throw new Error('Stream finished with content filter hit.'); @@ -73,11 +79,11 @@ export class ChatCompletionStream extends Stream { /** * @internal */ - static async * processTokenUsage(stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse): AsyncGenerator { + static async * processTokenUsage(stream: ChatCompletionStream, response?: AzureOpenAiChatCompletionStreamResponse): AsyncGenerator { for await (const chunk of stream) { const usage = chunk.getTokenUsage(); if (usage) { - response.usage = usage; + response!.usage = usage; } yield chunk; } @@ -90,7 +96,10 @@ export class ChatCompletionStream extends Stream { /** * @internal */ - pipe(processFn: (stream: ChatCompletionStream, response: AzureOpenAiChatCompletionStreamResponse) => AsyncIterator, response: AzureOpenAiChatCompletionStreamResponse): ChatCompletionStream { - return new ChatCompletionStream(() => processFn(this, response)); + pipe(processFn: (stream: ChatCompletionStream, response?: AzureOpenAiChatCompletionStreamResponse) => AsyncIterator, response?: AzureOpenAiChatCompletionStreamResponse): ChatCompletionStream { + if (response) { + return new ChatCompletionStream(() => processFn(this, response)); + } + return new ChatCompletionStream(() => processFn(this)); } } From 7386dc560c897fc66afd5388a14fb13f2819ea74 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Fri, 25 Oct 2024 17:58:23 +0200 Subject: [PATCH 15/57] feat: demo streaming in sample-code --- .../azure-openai-chat-completion-stream.ts | 6 ++--- .../src/azure-openai/index.ts | 3 +++ packages/foundation-models/src/index.ts | 5 +++- .../src/foundation-models/azure-openai.ts | 18 ++++----------- sample-code/src/server.ts | 23 +++++++++++++++++-- 5 files changed, 36 insertions(+), 19 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index d0bba47f..36a72c54 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -62,14 +62,14 @@ export class ChatCompletionStream extends Stream { response!.finishReason = finishReason; switch (finishReason) { case 'content_filter': - throw new Error('Stream finished with content filter hit.'); + logger.error('Stream finished with content filter hit.'); case 'length': - throw new Error('Stream finished with token length exceeded.'); + logger.error('Stream finished with token length exceeded.'); case 'stop': logger.debug('Stream finished.'); break; default: - throw new Error(`Stream finished with unknown reason '${finishReason}'.`); + logger.error(`Stream finished with unknown reason '${finishReason}'.`); } } yield chunk; diff --git a/packages/foundation-models/src/azure-openai/index.ts b/packages/foundation-models/src/azure-openai/index.ts index 432cf7a6..2f9266b3 100644 --- a/packages/foundation-models/src/azure-openai/index.ts +++ b/packages/foundation-models/src/azure-openai/index.ts @@ -3,4 +3,7 @@ export * from './azure-openai-chat-client.js'; export * from './azure-openai-embedding-client.js'; export * from './azure-openai-chat-completion-response.js'; export * from './azure-openai-embedding-response.js'; +export * from './azure-openai-chat-completion-stream-chunk-response.js'; +export * from './azure-openai-chat-completion-stream-response.js'; +export * from './azure-openai-chat-completion-stream.js'; export * from './model-types.js'; diff --git a/packages/foundation-models/src/index.ts b/packages/foundation-models/src/index.ts index 5f0a191f..e911206a 100644 --- a/packages/foundation-models/src/index.ts +++ b/packages/foundation-models/src/index.ts @@ -9,7 +9,10 @@ export { AzureOpenAiChatClient, AzureOpenAiEmbeddingClient, AzureOpenAiChatCompletionResponse, - AzureOpenAiEmbeddingResponse + AzureOpenAiEmbeddingResponse, + AzureOpenAiChatCompletionStreamChunkResponse, + AzureOpenAiChatCompletionStreamResponse, + ChatCompletionStream } from './azure-openai/index.js'; export type { diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index a7630780..e94a5094 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -1,6 +1,7 @@ import { AzureOpenAiChatClient, - AzureOpenAiEmbeddingClient + AzureOpenAiEmbeddingClient, + AzureOpenAiChatCompletionStreamResponse } from '@sap-ai-sdk/foundation-models'; import { createLogger } from '@sap-cloud-sdk/util'; import type { @@ -32,20 +33,11 @@ export async function chatCompletion(): Promise { +export async function chatCompletionStream(): Promise { const response = await new AzureOpenAiChatClient('gpt-35-turbo').streamContent({ - messages: [{ role: 'user', content: 'What is the capital of France?' }] + messages: [{ role: 'user', content: 'Give me a very long introduction of SAP Cloud SDK.' }] }); - - let result = ''; - for await (const chunk of response.stream) { - logger.info(`chunk: ${chunk}`); - result += chunk; - } - - logger.info(`finish reason: ${response.finishReason}`); - logger.info(`usage: ${JSON.stringify(response.usage)}`); - return result; + return response; } /** diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 5e06b403..29477fa2 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -55,7 +55,27 @@ app.get('/azure-openai/chat-completion', async (req, res) => { app.get('/azure-openai/chat-completion-stream', async (req, res)=> { try { const response = await chatCompletionStream(); - res.send(response); + + res.setHeader('Cache-Control', 'no-cache'); + res.setHeader('Content-Type', 'text/event-stream'); + res.setHeader('Access-Control-Allow-Origin', '*'); + res.setHeader('Connection', 'keep-alive'); + res.flushHeaders(); + + let connectionAlive = true; + + res.on('close', () => { + connectionAlive = false; + res.end(); + }); + + for await (const chunk of response.stream) { + if (!connectionAlive) { + break; + } + res.write(chunk); + + } } catch (error: any) { console.error(error); res @@ -64,7 +84,6 @@ app.get('/azure-openai/chat-completion-stream', async (req, res)=> { } }); - app.get('/azure-openai/embedding', async (req, res) => { try { const response = await computeEmbedding(); From a7ec23ae426c5c1ffbac2901e6ac62b84cb2a025 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Fri, 25 Oct 2024 18:06:11 +0200 Subject: [PATCH 16/57] fix: end res in sample code when finish --- sample-code/src/server.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 29477fa2..2a6c2ac7 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -81,6 +81,8 @@ app.get('/azure-openai/chat-completion-stream', async (req, res)=> { res .status(500) .send('Yikes, vibes are off apparently 😬 -> ' + error.message); + } finally { + res.end(); } }); From bc03fed551ba7e27855f3cd849c3673d743c9c48 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 28 Oct 2024 10:12:32 +0100 Subject: [PATCH 17/57] fix: lint --- .../azure-openai-chat-completion-stream-response.ts | 2 +- .../src/azure-openai/azure-openai-chat-completion-stream.ts | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts index 4790bbd8..97cdc5ff 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts @@ -1,4 +1,4 @@ -import type { AzureOpenAiCompletionUsage } from './client/inference/schema/completion-usage.js'; +import type { AzureOpenAiCompletionUsage } from './client/inference/schema/index.js'; import type { ChatCompletionStream } from './azure-openai-chat-completion-stream.js'; /** diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 36a72c54..8a9bd22a 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -63,8 +63,10 @@ export class ChatCompletionStream extends Stream { switch (finishReason) { case 'content_filter': logger.error('Stream finished with content filter hit.'); + break; case 'length': logger.error('Stream finished with token length exceeded.'); + break; case 'stop': logger.debug('Stream finished.'); break; From c399f090f8e4425072a6895c5eda081be15d73f8 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 28 Oct 2024 12:36:22 +0100 Subject: [PATCH 18/57] refactor --- .../azure-openai-chat-client.test.ts | 2 +- .../azure-openai/azure-openai-chat-client.ts | 43 +-- ...-openai-chat-completion-stream-response.ts | 8 +- .../azure-openai-chat-completion-stream.ts | 48 ++- .../azure-openai/azure-openai-line-decoder.ts | 18 +- .../azure-openai/azure-openai-sse-decoder.ts | 93 ++++++ ...ai-streaming.ts => azure-openai-stream.ts} | 276 ++++++++---------- packages/foundation-models/src/index.ts | 2 +- .../src/foundation-models/azure-openai.ts | 17 +- sample-code/src/server.ts | 3 +- 10 files changed, 305 insertions(+), 205 deletions(-) create mode 100644 packages/foundation-models/src/azure-openai/azure-openai-sse-decoder.ts rename packages/foundation-models/src/azure-openai/{azure-openai-streaming.ts => azure-openai-stream.ts} (53%) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts index 9bcee50e..a4a41f6a 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts @@ -7,7 +7,7 @@ import { } from '../../../../test-util/mock-http.js'; import { AzureOpenAiChatClient } from './azure-openai-chat-client.js'; import { apiVersion } from './model-types.js'; -import type { AzureOpenAiCreateChatCompletionResponse } from './client/inference/schema'; +import type { AzureOpenAiCreateChatCompletionResponse } from './client/inference/schema/index.js'; describe('Azure OpenAI chat client', () => { const chatCompletionEndpoint = { diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index 4a3bf7ad..c44bc7af 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -7,7 +7,7 @@ import { import { apiVersion, type AzureOpenAiChatModel } from './model-types.js'; import { AzureOpenAiChatCompletionResponse } from './azure-openai-chat-completion-response.js'; import { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; -import { ChatCompletionStream } from './azure-openai-chat-completion-stream.js'; +import { AzureOpenAiChatCompletionStream } from './azure-openai-chat-completion-stream.js'; import type { HttpResponse } from '@sap-cloud-sdk/http-client'; import type { AzureOpenAiCreateChatCompletionRequest } from './client/inference/schema/index.js'; @@ -19,7 +19,7 @@ export class AzureOpenAiChatClient { * Creates an instance of the Azure OpenAI chat client. * @param modelDeployment - This configuration is used to retrieve a deployment. Depending on the configuration use either the given deployment ID or the model name to retrieve matching deployments. If model and deployment ID are given, the model is verified against the deployment. */ - constructor(private modelDeployment: ModelDeployment) { } + constructor(private modelDeployment: ModelDeployment) {} /** * Creates a completion for the chat messages. @@ -48,9 +48,9 @@ export class AzureOpenAiChatClient { const response = new AzureOpenAiChatCompletionStreamResponse(); const stream = await this.createStream(data, requestConfig); response.stream = stream - .pipe(ChatCompletionStream.processChunk) - .pipe(ChatCompletionStream.processFinishReason, response) - .pipe(ChatCompletionStream.processTokenUsage, response); + .pipe(AzureOpenAiChatCompletionStream.processChunk) + .pipe(AzureOpenAiChatCompletionStream.processFinishReason, response) + .pipe(AzureOpenAiChatCompletionStream.processTokenUsage, response); return response; } @@ -67,10 +67,10 @@ export class AzureOpenAiChatClient { const response = new AzureOpenAiChatCompletionStreamResponse(); const stream = await this.createStream(data, requestConfig); response.stream = stream - .pipe(ChatCompletionStream.processChunk) - .pipe(ChatCompletionStream.processFinishReason, response) - .pipe(ChatCompletionStream.processTokenUsage, response) - .pipe(ChatCompletionStream.processContent, response); + .pipe(AzureOpenAiChatCompletionStream.processChunk) + .pipe(AzureOpenAiChatCompletionStream.processFinishReason, response) + .pipe(AzureOpenAiChatCompletionStream.processTokenUsage, response) + .pipe(AzureOpenAiChatCompletionStream.processContent, response); return response; } @@ -97,17 +97,20 @@ export class AzureOpenAiChatClient { private async createStream( data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig - ): Promise { - const response = await this.executeRequest({ - ...data, - stream: true, - stream_options: { - include_usage: true + ): Promise { + const response = await this.executeRequest( + { + ...data, + stream: true, + stream_options: { + include_usage: true + } + }, + { + ...requestConfig, + responseType: 'stream' } - }, { - ...requestConfig, - responseType: 'stream' - }); - return ChatCompletionStream.fromSSEResponse(response); + ); + return AzureOpenAiChatCompletionStream.fromSSEResponse(response); } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts index 97cdc5ff..0e409ace 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts @@ -1,5 +1,5 @@ import type { AzureOpenAiCompletionUsage } from './client/inference/schema/index.js'; -import type { ChatCompletionStream } from './azure-openai-chat-completion-stream.js'; +import type { AzureOpenAiChatCompletionStream } from './azure-openai-chat-completion-stream.js'; /** * Azure OpenAI chat completion stream response. @@ -7,7 +7,7 @@ import type { ChatCompletionStream } from './azure-openai-chat-completion-stream export class AzureOpenAiChatCompletionStreamResponse { private _usage: AzureOpenAiCompletionUsage | undefined; private _finishReason: string | undefined; - private _stream: ChatCompletionStream | undefined; + private _stream: AzureOpenAiChatCompletionStream | undefined; public get usage(): AzureOpenAiCompletionUsage { if (!this._usage) { @@ -31,14 +31,14 @@ export class AzureOpenAiChatCompletionStreamResponse { this._finishReason = finishReason; } - public get stream(): ChatCompletionStream { + public get stream(): AzureOpenAiChatCompletionStream { if (!this._stream) { throw new Error('Response stream is undefined.'); } return this._stream; } - public set stream(stream: ChatCompletionStream) { + public set stream(stream: AzureOpenAiChatCompletionStream) { this._stream = stream; } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 8a9bd22a..0d8f9b3c 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -1,5 +1,5 @@ import { createLogger } from '@sap-cloud-sdk/util'; -import { Stream } from './azure-openai-streaming.js'; +import { Stream } from './azure-openai-stream.js'; import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js'; import type { HttpResponse } from '@sap-cloud-sdk/http-client'; import type { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; @@ -14,16 +14,18 @@ const logger = createLogger({ */ // TODO: The Item type `any` should actually be the type of the stream chunk response. // But `createChatCompletionStreamResponse` is first available in Azure OpenAI spec preview version 2024-08-01. -export class ChatCompletionStream extends Stream { +export class AzureOpenAiChatCompletionStream extends Stream { /** * Create a chat completion stream based on the http response. * @param response - Http response. * @returns Chat completion stream. * @internal */ - static fromSSEResponse(response: HttpResponse): ChatCompletionStream { + static fromSSEResponse( + response: HttpResponse + ): AzureOpenAiChatCompletionStream { const stream = Stream.fromSSEResponse(response); - return new ChatCompletionStream(stream.iterator); + return new AzureOpenAiChatCompletionStream(stream.iterator); } /** @@ -31,10 +33,12 @@ export class ChatCompletionStream extends Stream { * @param stream - Chat completion stream. * @internal */ - static async * processChunk(stream: ChatCompletionStream): AsyncGenerator { + static async *processChunk( + stream: AzureOpenAiChatCompletionStream + ): AsyncGenerator { for await (const chunk of stream) { yield new AzureOpenAiChatCompletionStreamChunkResponse(chunk); - }; + } } /** @@ -42,7 +46,9 @@ export class ChatCompletionStream extends Stream { * @param stream - Chat completion stream. * @internal */ - static async * processContent(stream: ChatCompletionStream): AsyncGenerator { + static async *processContent( + stream: AzureOpenAiChatCompletionStream + ): AsyncGenerator { for await (const chunk of stream) { const deltaContent = chunk.getDeltaContent(); if (!deltaContent) { @@ -55,7 +61,10 @@ export class ChatCompletionStream extends Stream { /** * @internal */ - static async * processFinishReason(stream: ChatCompletionStream, response?: AzureOpenAiChatCompletionStreamResponse): AsyncGenerator { + static async *processFinishReason( + stream: AzureOpenAiChatCompletionStream, + response?: AzureOpenAiChatCompletionStreamResponse + ): AsyncGenerator { for await (const chunk of stream) { const finishReason = chunk.getFinishReason(); if (finishReason) { @@ -71,7 +80,9 @@ export class ChatCompletionStream extends Stream { logger.debug('Stream finished.'); break; default: - logger.error(`Stream finished with unknown reason '${finishReason}'.`); + logger.error( + `Stream finished with unknown reason '${finishReason}'.` + ); } } yield chunk; @@ -81,7 +92,10 @@ export class ChatCompletionStream extends Stream { /** * @internal */ - static async * processTokenUsage(stream: ChatCompletionStream, response?: AzureOpenAiChatCompletionStreamResponse): AsyncGenerator { + static async *processTokenUsage( + stream: AzureOpenAiChatCompletionStream, + response?: AzureOpenAiChatCompletionStreamResponse + ): AsyncGenerator { for await (const chunk of stream) { const usage = chunk.getTokenUsage(); if (usage) { @@ -98,10 +112,18 @@ export class ChatCompletionStream extends Stream { /** * @internal */ - pipe(processFn: (stream: ChatCompletionStream, response?: AzureOpenAiChatCompletionStreamResponse) => AsyncIterator, response?: AzureOpenAiChatCompletionStreamResponse): ChatCompletionStream { + pipe( + processFn: ( + stream: AzureOpenAiChatCompletionStream, + response?: AzureOpenAiChatCompletionStreamResponse + ) => AsyncIterator, + response?: AzureOpenAiChatCompletionStreamResponse + ): AzureOpenAiChatCompletionStream { if (response) { - return new ChatCompletionStream(() => processFn(this, response)); + return new AzureOpenAiChatCompletionStream(() => + processFn(this, response) + ); } - return new ChatCompletionStream(() => processFn(this)); + return new AzureOpenAiChatCompletionStream(() => processFn(this)); } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts b/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts index 6de19eaa..1fcc9635 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts @@ -36,7 +36,9 @@ export class LineDecoder { return []; } - const trailingNewline = LineDecoder.NEWLINE_CHARS.has(text[text.length - 1] || ''); + const trailingNewline = LineDecoder.NEWLINE_CHARS.has( + text[text.length - 1] || '' + ); let lines = text.split(LineDecoder.NEWLINE_REGEXP); // if there is a trailing new line then the last entry will be an empty @@ -63,8 +65,12 @@ export class LineDecoder { } decodeText(bytes: Bytes): string { - if (bytes == null) {return '';} - if (typeof bytes === 'string') {return bytes;} + if (bytes == null) { + return ''; + } + if (typeof bytes === 'string') { + return bytes; + } // Node: if (typeof Buffer !== 'undefined') { @@ -76,7 +82,7 @@ export class LineDecoder { } throw new Error( - `Unexpected: received non-Uint8Array (${bytes.constructor.name}) stream chunk in an environment with a global "Buffer" defined, which this library assumes to be Node. Please report this error.`, + `Unexpected: received non-Uint8Array (${bytes.constructor.name}) stream chunk in an environment with a global "Buffer" defined, which this library assumes to be Node. Please report this error.` ); } @@ -90,12 +96,12 @@ export class LineDecoder { throw new Error( `Unexpected: received non-Uint8Array/ArrayBuffer (${ (bytes as any).constructor.name - }) in a web platform. Please report this error.`, + }) in a web platform. Please report this error.` ); } throw new Error( - 'Unexpected: neither Buffer nor TextDecoder are available as globals. Please report this error.', + 'Unexpected: neither Buffer nor TextDecoder are available as globals. Please report this error.' ); } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-sse-decoder.ts b/packages/foundation-models/src/azure-openai/azure-openai-sse-decoder.ts new file mode 100644 index 00000000..6ca2264c --- /dev/null +++ b/packages/foundation-models/src/azure-openai/azure-openai-sse-decoder.ts @@ -0,0 +1,93 @@ +/** + * @internal + */ +export interface ServerSentEvent { + /** + * Name of the event if field name is `event`. + */ + event: string | null; + /** + * Value of the data if field name is `data`. + */ + data: string; + /** + * Raw string chunks. Each line is either in format `data: ...` or `event: ...`. + */ + raw: string[]; +} + +/** + * Server-Sent Event decoder. + * @internal + */ +export class SSEDecoder { + private data: string[]; + private event: string | null; + private chunks: string[]; + + constructor() { + this.event = null; + this.data = []; + this.chunks = []; + } + + /** + * Decode the line into structured server sent event. + * @param line - Line to decode. + * @returns Server sent event if the line is empty meaning the end of the received event, or null if there are more lines to come. + */ + decode(line: string): ServerSentEvent | null { + if (line.endsWith('\r')) { + line = line.substring(0, line.length - 1); + } + + if (!line) { + // empty line and we didn't previously encounter any messages + if (!this.event && !this.data.length) { + return null; + } + + const sse: ServerSentEvent = { + event: this.event, + data: this.data.join('\n'), + raw: this.chunks + }; + + this.event = null; + this.data = []; + this.chunks = []; + + return sse; + } + + this.chunks.push(line); + + if (line.startsWith(':')) { + return null; + } + + const [fieldname, , value] = partition(line, ':'); + const trimedValue = value.startsWith(' ') ? value.substring(1) : value; + + if (fieldname === 'event') { + this.event = trimedValue; + } else if (fieldname === 'data') { + this.data.push(trimedValue); + } + + return null; + } +} + +function partition(str: string, delimiter: string): [string, string, string] { + const index = str.indexOf(delimiter); + if (index !== -1) { + return [ + str.substring(0, index), + delimiter, + str.substring(index + delimiter.length) + ]; + } + + return [str, '', '']; +} diff --git a/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts b/packages/foundation-models/src/azure-openai/azure-openai-stream.ts similarity index 53% rename from packages/foundation-models/src/azure-openai/azure-openai-streaming.ts rename to packages/foundation-models/src/azure-openai/azure-openai-stream.ts index 83724f5c..c6b27216 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-streaming.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-stream.ts @@ -1,72 +1,75 @@ +import { createLogger } from '@sap-cloud-sdk/util'; import { LineDecoder } from './azure-openai-line-decoder.js'; +import { SSEDecoder } from './azure-openai-sse-decoder.js'; +import type { ServerSentEvent } from './azure-openai-sse-decoder.js'; import type { HttpResponse } from '@sap-cloud-sdk/http-client'; -type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; +const logger = createLogger({ + package: 'foundation-models', + messageContext: 'azure-openai-stream' +}); -export interface ServerSentEvent { - event: string | null; - data: string; - raw: string[]; -} +type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; +/** + * Stream implemented as an async iterable. + */ export class Stream implements AsyncIterable { - constructor(public iterator: () => AsyncIterator) {} - - static fromSSEResponse(response: HttpResponse) { + static fromSSEResponse(response: HttpResponse): Stream { let consumed = false; async function* iterator(): AsyncIterator { if (consumed) { - throw new Error('Cannot iterate over a consumed stream, use `.tee()` to split the stream.'); + throw new Error( + 'Cannot iterate over a consumed stream, use `.tee()` to split the stream.' + ); } consumed = true; let done = false; - try { - for await (const sse of _iterSSEMessages(response)) { - if (done) {continue;} + for await (const sse of _iterSSEMessages(response)) { + if (done) { + continue; + } - if (sse.data.startsWith('[DONE]')) { - done = true; - continue; + if (sse.data.startsWith('[DONE]')) { + done = true; + continue; + } + + if (sse.event === null) { + let data; + + try { + data = JSON.parse(sse.data); + } catch (e: any) { + logger.error(`Could not parse message into JSON: ${sse.data}`); + logger.error(`From chunk: ${sse.raw}`); + throw e; + } + + if (data && data.error) { + throw new Error(data.error); } - if (sse.event === null) { - let data; - - try { - data = JSON.parse(sse.data); - } catch (e) { - console.error('Could not parse message into JSON:', sse.data); - console.error('From chunk:', sse.raw); - throw e; - } - - if (data && data.error) { - throw new Error(data.error); - } - - yield data; - } else { - let data; - try { - data = JSON.parse(sse.data); - } catch (e) { - console.error('Could not parse message into JSON:', sse.data); - console.error('From chunk:', sse.raw); - throw e; - } - // TODO: Is this where the error should be thrown? - if (sse.event == 'error') { - // throw new Error(data.error, data.message); - throw new Error(data.error); - } - yield { event: sse.event, data } as any; + yield data; + } else { + let data; + try { + data = JSON.parse(sse.data); + } catch (e: any) { + logger.error(`Could not parse message into JSON: ${sse.data}`); + logger.error(`From chunk: ${sse.raw}`); + throw e; + } + // TODO: Is this where the error should be thrown? + if (sse.event === 'error') { + // throw new Error(data.error, data.message); + throw new Error(data.error); } + yield { event: sse.event, data } as any; } - done = true; - } catch (e) { - throw e; } + done = true; } return new Stream(iterator); @@ -75,8 +78,12 @@ export class Stream implements AsyncIterable { /** * Generates a Stream from a newline-separated ReadableStream * where each item is a JSON value. + * @param readableStream - The ReadableStream to convert to a Stream. + * @returns The Stream. */ - static fromReadableStream(readableStream: ReadableStream) { + static fromReadableStream( + readableStream: ReadableStream + ): Stream { let consumed = false; async function* iterLines(): AsyncGenerator { @@ -96,24 +103,29 @@ export class Stream implements AsyncIterable { async function* iterator(): AsyncIterator { if (consumed) { - throw new Error('Cannot iterate over a consumed stream, use `.tee()` to split the stream.'); + throw new Error( + 'Cannot iterate over a consumed stream, use `.tee()` to split the stream.' + ); } consumed = true; let done = false; - try { - for await (const line of iterLines()) { - if (done) {continue;} - if (line) {yield JSON.parse(line);} + + for await (const line of iterLines()) { + if (done) { + continue; + } + if (line) { + yield JSON.parse(line); } - done = true; - } catch (e) { - throw e; } + done = true; } return new Stream(iterator); } + constructor(public iterator: () => AsyncIterator) {} + [Symbol.asyncIterator](): AsyncIterator { return this.iterator(); } @@ -121,26 +133,29 @@ export class Stream implements AsyncIterable { /** * Splits the stream into two streams which can be * independently read from at different speeds. + * @returns A tuple of two streams. */ tee(): [Stream, Stream] { const left: Promise>[] = []; const right: Promise>[] = []; const iterator = this.iterator(); - const teeIterator = (queue: Promise>[]): AsyncIterator => ({ - next: () => { - if (queue.length === 0) { - const result = iterator.next(); - left.push(result); - right.push(result); - } - return queue.shift()!; - }, - }); + const teeIterator = ( + queue: Promise>[] + ): AsyncIterator => ({ + next: () => { + if (queue.length === 0) { + const result = iterator.next(); + left.push(result); + right.push(result); + } + return queue.shift()!; + } + }); return [ new Stream(() => teeIterator(left)), - new Stream(() => teeIterator(right)), + new Stream(() => teeIterator(right)) ]; } @@ -148,35 +163,40 @@ export class Stream implements AsyncIterable { * Converts this stream to a newline-separated ReadableStream of * JSON stringified values in the stream * which can be turned back into a Stream with `Stream.fromReadableStream()`. + * @returns The ReadableStream. */ toReadableStream(): ReadableStream { - const self = this; let iter: AsyncIterator; const encoder = new TextEncoder(); - return new ReadableStream({ - async start() { - iter = self[Symbol.asyncIterator](); + const underlyingDefaultSource: UnderlyingDefaultSource = { + start: async () => { + iter = this[Symbol.asyncIterator](); }, - async pull(ctrl: any) { + pull: async (ctrl: any) => { try { const { value, done } = await iter.next(); - if (done) {return ctrl.close();} - + if (done) { + return ctrl.close(); + } const bytes = encoder.encode(JSON.stringify(value) + '\n'); - ctrl.enqueue(bytes); } catch (err) { ctrl.error(err); } }, - async cancel() { + cancel: async () => { await iter.return?.(); - }, - }); + } + }; + + return new ReadableStream(underlyingDefaultSource); } } +/** + * @internal + */ export async function* _iterSSEMessages( response: HttpResponse ): AsyncGenerator { @@ -187,27 +207,34 @@ export async function* _iterSSEMessages( const sseDecoder = new SSEDecoder(); const lineDecoder = new LineDecoder(); - // console.log(response.data); - const iter = response.data; for await (const sseChunk of iterSSEChunks(iter)) { for (const line of lineDecoder.decode(sseChunk)) { const sse = sseDecoder.decode(line); - if (sse) {yield sse;} + if (sse) { + yield sse; + } } } for (const line of lineDecoder.flush()) { const sse = sseDecoder.decode(line); - if (sse) {yield sse;} + if (sse) { + yield sse; + } } } /** * Given an async iterable iterator, iterates over it and yields full * SSE chunks, i.e. yields when a double new-line is encountered. + * @param iterator - Async iterable iterator. + * @returns Async generator of Uint8Array. + * @internal */ -async function* iterSSEChunks(iterator: AsyncIterableIterator): AsyncGenerator { +async function* iterSSEChunks( + iterator: AsyncIterableIterator +): AsyncGenerator { let data = new Uint8Array(); for await (const chunk of iterator) { @@ -216,8 +243,10 @@ async function* iterSSEChunks(iterator: AsyncIterableIterator): AsyncGene } const binaryChunk = - chunk instanceof ArrayBuffer ? new Uint8Array(chunk) - : typeof chunk === 'string' ? new TextEncoder().encode(chunk) + chunk instanceof ArrayBuffer + ? new Uint8Array(chunk) + : typeof chunk === 'string' + ? new TextEncoder().encode(chunk) : chunk; const newData = new Uint8Array(data.length + binaryChunk.length); @@ -268,62 +297,12 @@ function findDoubleNewlineIndex(buffer: Uint8Array): number { return -1; } -class SSEDecoder { - private data: string[]; - private event: string | null; - private chunks: string[]; - - constructor() { - this.event = null; - this.data = []; - this.chunks = []; - } - - decode(line: string) { - if (line.endsWith('\r')) { - line = line.substring(0, line.length - 1); - } - - if (!line) { - // empty line and we didn't previously encounter any messages - if (!this.event && !this.data.length) {return null;} - - const sse: ServerSentEvent = { - event: this.event, - data: this.data.join('\n'), - raw: this.chunks, - }; - - this.event = null; - this.data = []; - this.chunks = []; - - return sse; - } - - this.chunks.push(line); - - if (line.startsWith(':')) { - return null; - } - - let [fieldname, _, value] = partition(line, ':'); - - if (value.startsWith(' ')) { - value = value.substring(1); - } - - if (fieldname === 'event') { - this.event = value; - } else if (fieldname === 'data') { - this.data.push(value); - } - - return null; - } -} - -/** This is an internal helper function that's just used for testing */ +/** + * This is an internal helper function that's just used for testing. + * @param chunks - The chunks to decode. + * @returns The decoded lines. + * @internal + */ export function _decodeChunks(chunks: string[]): string[] { const decoder = new LineDecoder(); const lines: string[] = []; @@ -333,12 +312,3 @@ export function _decodeChunks(chunks: string[]): string[] { return lines; } - -function partition(str: string, delimiter: string): [string, string, string] { - const index = str.indexOf(delimiter); - if (index !== -1) { - return [str.substring(0, index), delimiter, str.substring(index + delimiter.length)]; - } - - return [str, '', '']; -} diff --git a/packages/foundation-models/src/index.ts b/packages/foundation-models/src/index.ts index e911206a..0017fce2 100644 --- a/packages/foundation-models/src/index.ts +++ b/packages/foundation-models/src/index.ts @@ -12,7 +12,7 @@ export { AzureOpenAiEmbeddingResponse, AzureOpenAiChatCompletionStreamChunkResponse, AzureOpenAiChatCompletionStreamResponse, - ChatCompletionStream + AzureOpenAiChatCompletionStream } from './azure-openai/index.js'; export type { diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index e94a5094..58908a20 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -1,12 +1,12 @@ import { AzureOpenAiChatClient, - AzureOpenAiEmbeddingClient, - AzureOpenAiChatCompletionStreamResponse + AzureOpenAiEmbeddingClient } from '@sap-ai-sdk/foundation-models'; import { createLogger } from '@sap-cloud-sdk/util'; import type { AzureOpenAiChatCompletionResponse, - AzureOpenAiEmbeddingResponse + AzureOpenAiEmbeddingResponse, + AzureOpenAiChatCompletionStreamResponse } from '@sap-ai-sdk/foundation-models'; const logger = createLogger({ @@ -34,8 +34,15 @@ export async function chatCompletion(): Promise { - const response = await new AzureOpenAiChatClient('gpt-35-turbo').streamContent({ - messages: [{ role: 'user', content: 'Give me a very long introduction of SAP Cloud SDK.' }] + const response = await new AzureOpenAiChatClient( + 'gpt-35-turbo' + ).streamContent({ + messages: [ + { + role: 'user', + content: 'Give me a very long introduction of SAP Cloud SDK.' + } + ] }); return response; } diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 2a6c2ac7..b895538a 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -52,7 +52,7 @@ app.get('/azure-openai/chat-completion', async (req, res) => { } }); -app.get('/azure-openai/chat-completion-stream', async (req, res)=> { +app.get('/azure-openai/chat-completion-stream', async (req, res) => { try { const response = await chatCompletionStream(); @@ -74,7 +74,6 @@ app.get('/azure-openai/chat-completion-stream', async (req, res)=> { break; } res.write(chunk); - } } catch (error: any) { console.error(error); From b3f4e7168c23fe54a761f90be20f380e9b0a9176 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 28 Oct 2024 12:55:36 +0100 Subject: [PATCH 19/57] fix: check public-api --- .../src/azure-openai/azure-openai-line-decoder.ts | 1 + .../foundation-models/src/azure-openai/azure-openai-stream.ts | 1 + 2 files changed, 2 insertions(+) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts b/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts index 1fcc9635..c1226b44 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts @@ -5,6 +5,7 @@ type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; * reading lines from text. * * Https://github.com/encode/httpx/blob/920333ea98118e9cf617f246905d7b202510941c/httpx/_decoders.py#L258. + * @internal */ export class LineDecoder { // prettier-ignore diff --git a/packages/foundation-models/src/azure-openai/azure-openai-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-stream.ts index c6b27216..5219b0c4 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-stream.ts @@ -13,6 +13,7 @@ type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; /** * Stream implemented as an async iterable. + * @internal */ export class Stream implements AsyncIterable { static fromSSEResponse(response: HttpResponse): Stream { From fa91209babae8b1217a01009afcecfbd1aa76379 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 28 Oct 2024 15:49:56 +0100 Subject: [PATCH 20/57] chore: add tests for stream chunk response --- ...t-completion-stream-chunk-response.test.ts | 59 +++++++++++++++++++ .../azure-openai-chat-completion-stream.ts | 8 +-- ...n-stream-chunk-response-delta-content.json | 1 + ...n-stream-chunk-response-finish-reason.json | 1 + ...ion-stream-chunk-response-token-usage.json | 1 + ...-openai-chat-completion-stream-chunks.json | 34 +++++++++++ test-util/mock-http.ts | 14 +++++ 7 files changed, 112 insertions(+), 6 deletions(-) create mode 100644 packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts create mode 100644 test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-delta-content.json create mode 100644 test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-finish-reason.json create mode 100644 test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-token-usage.json create mode 100644 test-util/data/foundation-models/azure-openai-chat-completion-stream-chunks.json diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts new file mode 100644 index 00000000..1925ec2c --- /dev/null +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts @@ -0,0 +1,59 @@ +import { AzureOpenAiChatCompletionStreamChunkResponse } from "./azure-openai-chat-completion-stream-chunk-response.js"; +import { parseMockResponse } from '../../../../test-util/mock-http.js'; + +describe('OpenAI chat completion stream chunk response', () => { + let mockResponses: { + tokenUsage: any; + finishReason: any; + deltaContent: any; + }; + let azureOpenAiChatCompletionStreamChunkResponses: { + tokenUsage: AzureOpenAiChatCompletionStreamChunkResponse; + finishReason: AzureOpenAiChatCompletionStreamChunkResponse; + deltaContent: AzureOpenAiChatCompletionStreamChunkResponse; + }; + + beforeAll(async () => { + mockResponses = { + tokenUsage: await parseMockResponse( + 'foundation-models', + 'azure-openai-chat-completion-stream-chunk-response-token-usage.json' + ), + finishReason: await parseMockResponse( + 'foundation-models', + 'azure-openai-chat-completion-stream-chunk-response-finish-reason.json' + ), + deltaContent: await parseMockResponse( + 'foundation-models', + 'azure-openai-chat-completion-stream-chunk-response-delta-content.json' + ) + }; + azureOpenAiChatCompletionStreamChunkResponses = { + tokenUsage: new AzureOpenAiChatCompletionStreamChunkResponse(mockResponses.tokenUsage), + finishReason: new AzureOpenAiChatCompletionStreamChunkResponse(mockResponses.finishReason), + deltaContent: new AzureOpenAiChatCompletionStreamChunkResponse(mockResponses.deltaContent) + }; + }); + + it('should return the chat completion stream chunk response', () => { + expect(azureOpenAiChatCompletionStreamChunkResponses.tokenUsage.chunk).toStrictEqual(mockResponses.tokenUsage); + expect(azureOpenAiChatCompletionStreamChunkResponses.finishReason.chunk).toStrictEqual(mockResponses.finishReason); + expect(azureOpenAiChatCompletionStreamChunkResponses.deltaContent.chunk).toStrictEqual(mockResponses.deltaContent); + }); + + it('should get token usage', () => { + expect(azureOpenAiChatCompletionStreamChunkResponses.tokenUsage.getTokenUsage()).toMatchObject({ + completion_tokens: expect.any(Number), + prompt_tokens: expect.any(Number), + total_tokens: expect.any(Number) + }); + }); + + it('should return finish reason', () => { + expect(azureOpenAiChatCompletionStreamChunkResponses.finishReason.getFinishReason()).toBe('stop'); + }); + + it('should return delta content with default index 0', () => { + expect(azureOpenAiChatCompletionStreamChunkResponses.deltaContent.getDeltaContent()).toBe(' is'); + }); +}); \ No newline at end of file diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 0d8f9b3c..d165c6ba 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -21,9 +21,7 @@ export class AzureOpenAiChatCompletionStream extends Stream { * @returns Chat completion stream. * @internal */ - static fromSSEResponse( - response: HttpResponse - ): AzureOpenAiChatCompletionStream { + static fromSSEResponse(response: HttpResponse): AzureOpenAiChatCompletionStream { const stream = Stream.fromSSEResponse(response); return new AzureOpenAiChatCompletionStream(stream.iterator); } @@ -120,9 +118,7 @@ export class AzureOpenAiChatCompletionStream extends Stream { response?: AzureOpenAiChatCompletionStreamResponse ): AzureOpenAiChatCompletionStream { if (response) { - return new AzureOpenAiChatCompletionStream(() => - processFn(this, response) - ); + return new AzureOpenAiChatCompletionStream(() => processFn(this, response)); } return new AzureOpenAiChatCompletionStream(() => processFn(this)); } diff --git a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-delta-content.json b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-delta-content.json new file mode 100644 index 00000000..4ff90148 --- /dev/null +++ b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-delta-content.json @@ -0,0 +1 @@ +{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"delta":{"content":" is"},"finish_reason":null,"index":0,"logprobs":null}],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":null} \ No newline at end of file diff --git a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-finish-reason.json b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-finish-reason.json new file mode 100644 index 00000000..4aeae959 --- /dev/null +++ b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-finish-reason.json @@ -0,0 +1 @@ +{"choices":[{"content_filter_results":{},"delta":{},"finish_reason":"stop","index":0,"logprobs":null}],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":null} \ No newline at end of file diff --git a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-token-usage.json b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-token-usage.json new file mode 100644 index 00000000..558fe0c5 --- /dev/null +++ b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-token-usage.json @@ -0,0 +1 @@ +{"choices":[],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":{"completion_tokens":7,"prompt_tokens":14,"total_tokens":21}} \ No newline at end of file diff --git a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunks.json b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunks.json new file mode 100644 index 00000000..0a7dafb8 --- /dev/null +++ b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunks.json @@ -0,0 +1,34 @@ +data: {"choices":[],"created":0,"id":"","model":"","object":"","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}]} + + +data: {"choices":[{"content_filter_results":{},"delta":{"content":"","role":"assistant"},"finish_reason":null,"index":0,"logprobs":null}],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":null} + + +data: {"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"delta":{"content":"The"},"finish_reason":null,"index":0,"logprobs":null}],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":null} + + +data: {"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"delta":{"content":" capital"},"finish_reason":null,"index":0,"logprobs":null}],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":null} + + +data: {"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"delta":{"content":" of"},"finish_reason":null,"index":0,"logprobs":null}],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":null} + + +data: {"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"delta":{"content":" France"},"finish_reason":null,"index":0,"logprobs":null}],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":null} + + +data: {"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"delta":{"content":" is"},"finish_reason":null,"index":0,"logprobs":null}],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":null} + + +data: {"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"delta":{"content":" Paris"},"finish_reason":null,"index":0,"logprobs":null}],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":null} + + +data: {"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"delta":{"content":"."},"finish_reason":null,"index":0,"logprobs":null}],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":null} + + +data: {"choices":[{"content_filter_results":{},"delta":{},"finish_reason":"stop","index":0,"logprobs":null}],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":null} + + +data: {"choices":[],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":{"completion_tokens":7,"prompt_tokens":14,"total_tokens":21}} + + +data: [DONE] \ No newline at end of file diff --git a/test-util/mock-http.ts b/test-util/mock-http.ts index 3bcf6451..e839ce9a 100644 --- a/test-util/mock-http.ts +++ b/test-util/mock-http.ts @@ -140,6 +140,20 @@ export function mockDeploymentsList( }); } +/** + * @internal + */ +export async function parseMockResponseToString( + client: string, + fileName: string +): Promise { + const fileContent = await readFile( + path.join(__dirname, 'data', client, fileName), + 'utf-8' + ); + return fileContent; +} + /** * @internal */ From 56e619710d0f174d8b3382bc14e12e7253694a43 Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Mon, 28 Oct 2024 14:50:54 +0000 Subject: [PATCH 21/57] fix: Changes from lint --- ...t-completion-stream-chunk-response.test.ts | 42 +++++++++++++------ .../azure-openai-chat-completion-stream.ts | 8 +++- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts index 1925ec2c..49edc7fd 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts @@ -1,5 +1,5 @@ -import { AzureOpenAiChatCompletionStreamChunkResponse } from "./azure-openai-chat-completion-stream-chunk-response.js"; import { parseMockResponse } from '../../../../test-util/mock-http.js'; +import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js'; describe('OpenAI chat completion stream chunk response', () => { let mockResponses: { @@ -12,7 +12,7 @@ describe('OpenAI chat completion stream chunk response', () => { finishReason: AzureOpenAiChatCompletionStreamChunkResponse; deltaContent: AzureOpenAiChatCompletionStreamChunkResponse; }; - + beforeAll(async () => { mockResponses = { tokenUsage: await parseMockResponse( @@ -29,20 +29,34 @@ describe('OpenAI chat completion stream chunk response', () => { ) }; azureOpenAiChatCompletionStreamChunkResponses = { - tokenUsage: new AzureOpenAiChatCompletionStreamChunkResponse(mockResponses.tokenUsage), - finishReason: new AzureOpenAiChatCompletionStreamChunkResponse(mockResponses.finishReason), - deltaContent: new AzureOpenAiChatCompletionStreamChunkResponse(mockResponses.deltaContent) + tokenUsage: new AzureOpenAiChatCompletionStreamChunkResponse( + mockResponses.tokenUsage + ), + finishReason: new AzureOpenAiChatCompletionStreamChunkResponse( + mockResponses.finishReason + ), + deltaContent: new AzureOpenAiChatCompletionStreamChunkResponse( + mockResponses.deltaContent + ) }; }); it('should return the chat completion stream chunk response', () => { - expect(azureOpenAiChatCompletionStreamChunkResponses.tokenUsage.chunk).toStrictEqual(mockResponses.tokenUsage); - expect(azureOpenAiChatCompletionStreamChunkResponses.finishReason.chunk).toStrictEqual(mockResponses.finishReason); - expect(azureOpenAiChatCompletionStreamChunkResponses.deltaContent.chunk).toStrictEqual(mockResponses.deltaContent); + expect( + azureOpenAiChatCompletionStreamChunkResponses.tokenUsage.chunk + ).toStrictEqual(mockResponses.tokenUsage); + expect( + azureOpenAiChatCompletionStreamChunkResponses.finishReason.chunk + ).toStrictEqual(mockResponses.finishReason); + expect( + azureOpenAiChatCompletionStreamChunkResponses.deltaContent.chunk + ).toStrictEqual(mockResponses.deltaContent); }); it('should get token usage', () => { - expect(azureOpenAiChatCompletionStreamChunkResponses.tokenUsage.getTokenUsage()).toMatchObject({ + expect( + azureOpenAiChatCompletionStreamChunkResponses.tokenUsage.getTokenUsage() + ).toMatchObject({ completion_tokens: expect.any(Number), prompt_tokens: expect.any(Number), total_tokens: expect.any(Number) @@ -50,10 +64,14 @@ describe('OpenAI chat completion stream chunk response', () => { }); it('should return finish reason', () => { - expect(azureOpenAiChatCompletionStreamChunkResponses.finishReason.getFinishReason()).toBe('stop'); + expect( + azureOpenAiChatCompletionStreamChunkResponses.finishReason.getFinishReason() + ).toBe('stop'); }); it('should return delta content with default index 0', () => { - expect(azureOpenAiChatCompletionStreamChunkResponses.deltaContent.getDeltaContent()).toBe(' is'); + expect( + azureOpenAiChatCompletionStreamChunkResponses.deltaContent.getDeltaContent() + ).toBe(' is'); }); -}); \ No newline at end of file +}); diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index d165c6ba..0d8f9b3c 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -21,7 +21,9 @@ export class AzureOpenAiChatCompletionStream extends Stream { * @returns Chat completion stream. * @internal */ - static fromSSEResponse(response: HttpResponse): AzureOpenAiChatCompletionStream { + static fromSSEResponse( + response: HttpResponse + ): AzureOpenAiChatCompletionStream { const stream = Stream.fromSSEResponse(response); return new AzureOpenAiChatCompletionStream(stream.iterator); } @@ -118,7 +120,9 @@ export class AzureOpenAiChatCompletionStream extends Stream { response?: AzureOpenAiChatCompletionStreamResponse ): AzureOpenAiChatCompletionStream { if (response) { - return new AzureOpenAiChatCompletionStream(() => processFn(this, response)); + return new AzureOpenAiChatCompletionStream(() => + processFn(this, response) + ); } return new AzureOpenAiChatCompletionStream(() => processFn(this)); } From 62976264810bf78a328ddba64d30d0ea566d0b80 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Tue, 29 Oct 2024 23:06:56 +0100 Subject: [PATCH 22/57] fix: chunk type inference --- .../azure-openai/azure-openai-chat-client.ts | 22 ++-- ...t-completion-stream-chunk-response.test.ts | 6 +- ...i-chat-completion-stream-chunk-response.ts | 14 +-- ...-openai-chat-completion-stream-response.ts | 8 +- ...zure-openai-chat-completion-stream.test.ts | 5 + .../azure-openai-chat-completion-stream.ts | 32 ++--- .../src/azure-openai/azure-openai-stream.ts | 114 +----------------- .../src/foundation-models/azure-openai.ts | 4 +- sample-code/src/server.ts | 7 ++ 9 files changed, 58 insertions(+), 154 deletions(-) create mode 100644 packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index c44bc7af..beaa85a4 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -8,6 +8,7 @@ import { apiVersion, type AzureOpenAiChatModel } from './model-types.js'; import { AzureOpenAiChatCompletionResponse } from './azure-openai-chat-completion-response.js'; import { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; import { AzureOpenAiChatCompletionStream } from './azure-openai-chat-completion-stream.js'; +import type { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js'; import type { HttpResponse } from '@sap-cloud-sdk/http-client'; import type { AzureOpenAiCreateChatCompletionRequest } from './client/inference/schema/index.js'; @@ -44,10 +45,12 @@ export class AzureOpenAiChatClient { async stream( data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig - ): Promise { - const response = new AzureOpenAiChatCompletionStreamResponse(); - const stream = await this.createStream(data, requestConfig); - response.stream = stream + ): Promise< + AzureOpenAiChatCompletionStreamResponse + > { + const response = + new AzureOpenAiChatCompletionStreamResponse(); + response.stream = (await this.createStream(data, requestConfig)) .pipe(AzureOpenAiChatCompletionStream.processChunk) .pipe(AzureOpenAiChatCompletionStream.processFinishReason, response) .pipe(AzureOpenAiChatCompletionStream.processTokenUsage, response); @@ -63,10 +66,9 @@ export class AzureOpenAiChatClient { async streamContent( data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig - ): Promise { - const response = new AzureOpenAiChatCompletionStreamResponse(); - const stream = await this.createStream(data, requestConfig); - response.stream = stream + ): Promise> { + const response = new AzureOpenAiChatCompletionStreamResponse(); + response.stream = (await this.createStream(data, requestConfig)) .pipe(AzureOpenAiChatCompletionStream.processChunk) .pipe(AzureOpenAiChatCompletionStream.processFinishReason, response) .pipe(AzureOpenAiChatCompletionStream.processTokenUsage, response) @@ -97,7 +99,7 @@ export class AzureOpenAiChatClient { private async createStream( data: AzureOpenAiCreateChatCompletionRequest, requestConfig?: CustomRequestConfig - ): Promise { + ): Promise> { const response = await this.executeRequest( { ...data, @@ -111,6 +113,6 @@ export class AzureOpenAiChatClient { responseType: 'stream' } ); - return AzureOpenAiChatCompletionStream.fromSSEResponse(response); + return AzureOpenAiChatCompletionStream.create(response); } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts index 49edc7fd..fa5ae87d 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts @@ -43,13 +43,13 @@ describe('OpenAI chat completion stream chunk response', () => { it('should return the chat completion stream chunk response', () => { expect( - azureOpenAiChatCompletionStreamChunkResponses.tokenUsage.chunk + azureOpenAiChatCompletionStreamChunkResponses.tokenUsage.data ).toStrictEqual(mockResponses.tokenUsage); expect( - azureOpenAiChatCompletionStreamChunkResponses.finishReason.chunk + azureOpenAiChatCompletionStreamChunkResponses.finishReason.data ).toStrictEqual(mockResponses.finishReason); expect( - azureOpenAiChatCompletionStreamChunkResponses.deltaContent.chunk + azureOpenAiChatCompletionStreamChunkResponses.deltaContent.data ).toStrictEqual(mockResponses.deltaContent); }); diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts index 300e9bda..43823246 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts @@ -2,16 +2,16 @@ * Azure OpenAI chat completion stream chunk response. */ export class AzureOpenAiChatCompletionStreamChunkResponse { - constructor(public readonly chunk: any) { - this.chunk = chunk; + constructor(public readonly data: any) { + this.data = data; } /** * Usage of tokens in the chunk response. * @returns Token usage. */ - getTokenUsage(): this['chunk']['usage'] { - return this.chunk.usage; + getTokenUsage(): this['data']['usage'] { + return this.data.usage; } /** @@ -21,8 +21,8 @@ export class AzureOpenAiChatCompletionStreamChunkResponse { */ getFinishReason( choiceIndex = 0 - ): this['chunk']['choices'][0]['finish_reason'] { - return this.chunk.choices[choiceIndex]?.finish_reason; + ): this['data']['choices'][0]['finish_reason'] { + return this.data.choices[choiceIndex]?.finish_reason; } /** @@ -31,6 +31,6 @@ export class AzureOpenAiChatCompletionStreamChunkResponse { * @returns The message delta content. */ getDeltaContent(choiceIndex = 0): string | undefined | null { - return this.chunk.choices[choiceIndex]?.delta?.content; + return this.data.choices[choiceIndex]?.delta?.content; } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts index 0e409ace..2d0aef80 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts @@ -4,10 +4,10 @@ import type { AzureOpenAiChatCompletionStream } from './azure-openai-chat-comple /** * Azure OpenAI chat completion stream response. */ -export class AzureOpenAiChatCompletionStreamResponse { +export class AzureOpenAiChatCompletionStreamResponse { private _usage: AzureOpenAiCompletionUsage | undefined; private _finishReason: string | undefined; - private _stream: AzureOpenAiChatCompletionStream | undefined; + private _stream: AzureOpenAiChatCompletionStream | undefined; public get usage(): AzureOpenAiCompletionUsage { if (!this._usage) { @@ -31,14 +31,14 @@ export class AzureOpenAiChatCompletionStreamResponse { this._finishReason = finishReason; } - public get stream(): AzureOpenAiChatCompletionStream { + public get stream(): AzureOpenAiChatCompletionStream { if (!this._stream) { throw new Error('Response stream is undefined.'); } return this._stream; } - public set stream(stream: AzureOpenAiChatCompletionStream) { + public set stream(stream: AzureOpenAiChatCompletionStream) { this._stream = stream; } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts new file mode 100644 index 00000000..c9f2db3d --- /dev/null +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts @@ -0,0 +1,5 @@ +describe('OpenAI chat completion stream', () => { + beforeAll(async () => {}); + + it('should create a chat completion stream from sse response', () => {}); +}); diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 0d8f9b3c..bb8fa858 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -14,16 +14,16 @@ const logger = createLogger({ */ // TODO: The Item type `any` should actually be the type of the stream chunk response. // But `createChatCompletionStreamResponse` is first available in Azure OpenAI spec preview version 2024-08-01. -export class AzureOpenAiChatCompletionStream extends Stream { +export class AzureOpenAiChatCompletionStream extends Stream { /** * Create a chat completion stream based on the http response. * @param response - Http response. * @returns Chat completion stream. * @internal */ - static fromSSEResponse( + public static create( response: HttpResponse - ): AzureOpenAiChatCompletionStream { + ): AzureOpenAiChatCompletionStream { const stream = Stream.fromSSEResponse(response); return new AzureOpenAiChatCompletionStream(stream.iterator); } @@ -34,7 +34,7 @@ export class AzureOpenAiChatCompletionStream extends Stream { * @internal */ static async *processChunk( - stream: AzureOpenAiChatCompletionStream + stream: AzureOpenAiChatCompletionStream ): AsyncGenerator { for await (const chunk of stream) { yield new AzureOpenAiChatCompletionStreamChunkResponse(chunk); @@ -47,7 +47,7 @@ export class AzureOpenAiChatCompletionStream extends Stream { * @internal */ static async *processContent( - stream: AzureOpenAiChatCompletionStream + stream: AzureOpenAiChatCompletionStream ): AsyncGenerator { for await (const chunk of stream) { const deltaContent = chunk.getDeltaContent(); @@ -62,8 +62,8 @@ export class AzureOpenAiChatCompletionStream extends Stream { * @internal */ static async *processFinishReason( - stream: AzureOpenAiChatCompletionStream, - response?: AzureOpenAiChatCompletionStreamResponse + stream: AzureOpenAiChatCompletionStream, + response?: AzureOpenAiChatCompletionStreamResponse ): AsyncGenerator { for await (const chunk of stream) { const finishReason = chunk.getFinishReason(); @@ -93,8 +93,8 @@ export class AzureOpenAiChatCompletionStream extends Stream { * @internal */ static async *processTokenUsage( - stream: AzureOpenAiChatCompletionStream, - response?: AzureOpenAiChatCompletionStreamResponse + stream: AzureOpenAiChatCompletionStream, + response?: AzureOpenAiChatCompletionStreamResponse ): AsyncGenerator { for await (const chunk of stream) { const usage = chunk.getTokenUsage(); @@ -105,20 +105,20 @@ export class AzureOpenAiChatCompletionStream extends Stream { } } - constructor(public iterator: () => AsyncIterator) { + constructor(public iterator: () => AsyncIterator) { super(iterator); } /** * @internal */ - pipe( + pipe( processFn: ( - stream: AzureOpenAiChatCompletionStream, - response?: AzureOpenAiChatCompletionStreamResponse - ) => AsyncIterator, - response?: AzureOpenAiChatCompletionStreamResponse - ): AzureOpenAiChatCompletionStream { + stream: AzureOpenAiChatCompletionStream, + response?: AzureOpenAiChatCompletionStreamResponse + ) => AsyncIterator, + response?: AzureOpenAiChatCompletionStreamResponse + ): AzureOpenAiChatCompletionStream { if (response) { return new AzureOpenAiChatCompletionStream(() => processFn(this, response) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-stream.ts index 5219b0c4..b3c05955 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-stream.ts @@ -16,7 +16,7 @@ type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; * @internal */ export class Stream implements AsyncIterable { - static fromSSEResponse(response: HttpResponse): Stream { + protected static fromSSEResponse(response: HttpResponse): Stream { let consumed = false; async function* iterator(): AsyncIterator { @@ -76,123 +76,11 @@ export class Stream implements AsyncIterable { return new Stream(iterator); } - /** - * Generates a Stream from a newline-separated ReadableStream - * where each item is a JSON value. - * @param readableStream - The ReadableStream to convert to a Stream. - * @returns The Stream. - */ - static fromReadableStream( - readableStream: ReadableStream - ): Stream { - let consumed = false; - - async function* iterLines(): AsyncGenerator { - const lineDecoder = new LineDecoder(); - - const iter = readableStream; - for await (const chunk of iter) { - for (const line of lineDecoder.decode(chunk)) { - yield line; - } - } - - for (const line of lineDecoder.flush()) { - yield line; - } - } - - async function* iterator(): AsyncIterator { - if (consumed) { - throw new Error( - 'Cannot iterate over a consumed stream, use `.tee()` to split the stream.' - ); - } - consumed = true; - let done = false; - - for await (const line of iterLines()) { - if (done) { - continue; - } - if (line) { - yield JSON.parse(line); - } - } - done = true; - } - - return new Stream(iterator); - } - constructor(public iterator: () => AsyncIterator) {} [Symbol.asyncIterator](): AsyncIterator { return this.iterator(); } - - /** - * Splits the stream into two streams which can be - * independently read from at different speeds. - * @returns A tuple of two streams. - */ - tee(): [Stream, Stream] { - const left: Promise>[] = []; - const right: Promise>[] = []; - const iterator = this.iterator(); - - const teeIterator = ( - queue: Promise>[] - ): AsyncIterator => ({ - next: () => { - if (queue.length === 0) { - const result = iterator.next(); - left.push(result); - right.push(result); - } - return queue.shift()!; - } - }); - - return [ - new Stream(() => teeIterator(left)), - new Stream(() => teeIterator(right)) - ]; - } - - /** - * Converts this stream to a newline-separated ReadableStream of - * JSON stringified values in the stream - * which can be turned back into a Stream with `Stream.fromReadableStream()`. - * @returns The ReadableStream. - */ - toReadableStream(): ReadableStream { - let iter: AsyncIterator; - const encoder = new TextEncoder(); - - const underlyingDefaultSource: UnderlyingDefaultSource = { - start: async () => { - iter = this[Symbol.asyncIterator](); - }, - pull: async (ctrl: any) => { - try { - const { value, done } = await iter.next(); - if (done) { - return ctrl.close(); - } - const bytes = encoder.encode(JSON.stringify(value) + '\n'); - ctrl.enqueue(bytes); - } catch (err) { - ctrl.error(err); - } - }, - cancel: async () => { - await iter.return?.(); - } - }; - - return new ReadableStream(underlyingDefaultSource); - } } /** diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index 58908a20..cbb88ae8 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -33,7 +33,9 @@ export async function chatCompletion(): Promise { +export async function chatCompletionStream(): Promise< + AzureOpenAiChatCompletionStreamResponse +> { const response = await new AzureOpenAiChatClient( 'gpt-35-turbo' ).streamContent({ diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index b895538a..58e5142c 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -75,6 +75,13 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { } res.write(chunk); } + + res.write('\n\n---------------------------\n'); + res.write(`Finish reason: ${response.finishReason}\n`); + res.write('Token usage:\n'); + res.write(` - Completion tokens: ${response.usage.completion_tokens}\n`); + res.write(` - Prompt tokens: ${response.usage.prompt_tokens}\n`); + res.write(` - Total tokens: ${response.usage.total_tokens}\n`); } catch (error: any) { console.error(error); res From f22bed70e38ef101edc0d2783a7f7e3c7cf81c67 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 30 Oct 2024 09:20:18 +0100 Subject: [PATCH 23/57] refactor: change some types --- ...i-chat-completion-stream-chunk-response.ts | 1 + .../azure-openai-chat-completion-stream.ts | 40 +++++++++++-------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts index 43823246..c2246e1b 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts @@ -3,6 +3,7 @@ */ export class AzureOpenAiChatCompletionStreamChunkResponse { constructor(public readonly data: any) { + // TODO: Change `any` to `CreateChatCompletionStreamResponse` once the preview spec becomes stable. this.data = data; } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index bb8fa858..9760f136 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -12,9 +12,7 @@ const logger = createLogger({ /** * Chat completion stream containing post-processing functions. */ -// TODO: The Item type `any` should actually be the type of the stream chunk response. -// But `createChatCompletionStreamResponse` is first available in Azure OpenAI spec preview version 2024-08-01. -export class AzureOpenAiChatCompletionStream extends Stream { +export class AzureOpenAiChatCompletionStream extends Stream { /** * Create a chat completion stream based on the http response. * @param response - Http response. @@ -24,7 +22,8 @@ export class AzureOpenAiChatCompletionStream extends Stream { public static create( response: HttpResponse ): AzureOpenAiChatCompletionStream { - const stream = Stream.fromSSEResponse(response); + // TODO: Change `any` to `CreateChatCompletionStreamResponse` once the preview spec becomes stable. + const stream = Stream.fromSSEResponse(response); // TODO: Change `any` to `CreateChatCompletionStreamResponse` once the preview spec becomes stable. return new AzureOpenAiChatCompletionStream(stream.iterator); } @@ -34,8 +33,8 @@ export class AzureOpenAiChatCompletionStream extends Stream { * @internal */ static async *processChunk( - stream: AzureOpenAiChatCompletionStream - ): AsyncGenerator { + stream: AzureOpenAiChatCompletionStream // TODO: Change `any` to `CreateChatCompletionStreamResponse` once the preview spec becomes stable. + ): AsyncGenerator { for await (const chunk of stream) { yield new AzureOpenAiChatCompletionStreamChunkResponse(chunk); } @@ -48,7 +47,7 @@ export class AzureOpenAiChatCompletionStream extends Stream { */ static async *processContent( stream: AzureOpenAiChatCompletionStream - ): AsyncGenerator { + ): AsyncGenerator { for await (const chunk of stream) { const deltaContent = chunk.getDeltaContent(); if (!deltaContent) { @@ -64,11 +63,13 @@ export class AzureOpenAiChatCompletionStream extends Stream { static async *processFinishReason( stream: AzureOpenAiChatCompletionStream, response?: AzureOpenAiChatCompletionStreamResponse - ): AsyncGenerator { + ): AsyncGenerator { for await (const chunk of stream) { const finishReason = chunk.getFinishReason(); if (finishReason) { - response!.finishReason = finishReason; + if (response) { + response.finishReason = finishReason; + } switch (finishReason) { case 'content_filter': logger.error('Stream finished with content filter hit.'); @@ -95,30 +96,37 @@ export class AzureOpenAiChatCompletionStream extends Stream { static async *processTokenUsage( stream: AzureOpenAiChatCompletionStream, response?: AzureOpenAiChatCompletionStreamResponse - ): AsyncGenerator { + ): AsyncGenerator { for await (const chunk of stream) { const usage = chunk.getTokenUsage(); if (usage) { - response!.usage = usage; + if (response) { + response.usage = usage; + } + logger.debug(`Token usage: ${JSON.stringify(usage)}`); } yield chunk; } } - constructor(public iterator: () => AsyncIterator) { + constructor(public iterator: () => AsyncIterator) { super(iterator); } /** + * Pipe the stream through a processing function. + * @param processFn - The function to process the input stream. + * @param response - The `AzureOpenAiChatCompletionStreamResponse` object for process function to store finish reason, token usage, etc. + * @returns The output stream containing processed items. * @internal */ - pipe( + pipe( processFn: ( - stream: AzureOpenAiChatCompletionStream, + stream: AzureOpenAiChatCompletionStream, response?: AzureOpenAiChatCompletionStreamResponse - ) => AsyncIterator, + ) => AsyncIterator, response?: AzureOpenAiChatCompletionStreamResponse - ): AzureOpenAiChatCompletionStream { + ): AzureOpenAiChatCompletionStream { if (response) { return new AzureOpenAiChatCompletionStream(() => processFn(this, response) From 1348b9761feae1058ca1a4d7fc4db556aa8c5171 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 30 Oct 2024 10:47:50 +0100 Subject: [PATCH 24/57] wip --- ...zure-openai-chat-completion-stream.test.ts | 38 ++++++++++++++++++- .../azure-openai/azure-openai-line-decoder.ts | 14 ------- ...-openai-chat-completion-stream-chunks.txt} | 3 +- test-util/mock-http.ts | 2 +- 4 files changed, 39 insertions(+), 18 deletions(-) rename test-util/data/foundation-models/{azure-openai-chat-completion-stream-chunks.json => azure-openai-chat-completion-stream-chunks.txt} (99%) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts index c9f2db3d..a7a13dbc 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts @@ -1,5 +1,39 @@ +import { parseFileToString } from "../../../../test-util/mock-http.js"; +import { AzureOpenAiChatCompletionStream } from "../../internal.js"; +import { LineDecoder } from "./azure-openai-line-decoder.js"; +import { SSEDecoder } from "./azure-openai-sse-decoder.js"; + describe('OpenAI chat completion stream', () => { - beforeAll(async () => {}); + let sseChunks: string[]; + let originalChatCompletionStream: AzureOpenAiChatCompletionStream; + + beforeEach(async () => { + const rawChunksString = await parseFileToString( + 'foundation-models', + 'azure-openai-chat-completion-stream-chunks.txt' + ); + const lineDecoder = new LineDecoder(); + const sseDecoder = new SSEDecoder(); + const rawLines: string[] = lineDecoder.decode(Buffer.from(rawChunksString, 'utf-8')); + + sseChunks = rawLines + .map((chunk) => sseDecoder.decode(chunk)) + .filter((sse) => sse !== null) + .filter((sse) => !sse.data.startsWith('[DONE]')) + .map((sse) => JSON.parse(sse.data)); + + async function *iterator(): AsyncGenerator { + for (let sseChunk of sseChunks) { + yield sseChunk; + } + } + originalChatCompletionStream = new AzureOpenAiChatCompletionStream(iterator); + }); - it('should create a chat completion stream from sse response', () => {}); + it('should wrap the raw chunk', async () => { + for await (const chunk of AzureOpenAiChatCompletionStream.processChunk(originalChatCompletionStream)) { + expect(chunk).toBeDefined(); + console.log(chunk.getDeltaContent()); + } + }); }); diff --git a/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts b/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts index c1226b44..990f2350 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts @@ -87,20 +87,6 @@ export class LineDecoder { ); } - // Browser - if (typeof TextDecoder !== 'undefined') { - if (bytes instanceof Uint8Array || bytes instanceof ArrayBuffer) { - this.textDecoder ??= new TextDecoder('utf8'); - return this.textDecoder.decode(bytes); - } - - throw new Error( - `Unexpected: received non-Uint8Array/ArrayBuffer (${ - (bytes as any).constructor.name - }) in a web platform. Please report this error.` - ); - } - throw new Error( 'Unexpected: neither Buffer nor TextDecoder are available as globals. Please report this error.' ); diff --git a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunks.json b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunks.txt similarity index 99% rename from test-util/data/foundation-models/azure-openai-chat-completion-stream-chunks.json rename to test-util/data/foundation-models/azure-openai-chat-completion-stream-chunks.txt index 0a7dafb8..99ff4ead 100644 --- a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunks.json +++ b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunks.txt @@ -31,4 +31,5 @@ data: {"choices":[{"content_filter_results":{},"delta":{},"finish_reason":"stop" data: {"choices":[],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":{"completion_tokens":7,"prompt_tokens":14,"total_tokens":21}} -data: [DONE] \ No newline at end of file +data: [DONE] + diff --git a/test-util/mock-http.ts b/test-util/mock-http.ts index e839ce9a..b49ed2d0 100644 --- a/test-util/mock-http.ts +++ b/test-util/mock-http.ts @@ -143,7 +143,7 @@ export function mockDeploymentsList( /** * @internal */ -export async function parseMockResponseToString( +export async function parseFileToString( client: string, fileName: string ): Promise { From 8086b70b93634f4f1de01d2e1a2897f150b910b1 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 30 Oct 2024 10:50:43 +0100 Subject: [PATCH 25/57] fix: internal.js.map issue --- .../azure-openai/azure-openai-chat-completion-stream.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts index a7a13dbc..d6ced8c7 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts @@ -1,5 +1,5 @@ import { parseFileToString } from "../../../../test-util/mock-http.js"; -import { AzureOpenAiChatCompletionStream } from "../../internal.js"; +import { AzureOpenAiChatCompletionStream } from "./azure-openai-chat-completion-stream.js"; import { LineDecoder } from "./azure-openai-line-decoder.js"; import { SSEDecoder } from "./azure-openai-sse-decoder.js"; From 40ad3d2c7ffc0f2cda0d6e587f187846ec84f7d5 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 30 Oct 2024 13:15:01 +0100 Subject: [PATCH 26/57] chore: add tests for chat completion stream --- ...zure-openai-chat-completion-stream.test.ts | 121 +++++++++++++++--- .../src/azure-openai/azure-openai-stream.ts | 4 +- 2 files changed, 106 insertions(+), 19 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts index d6ced8c7..5cb6c5f8 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts @@ -1,7 +1,9 @@ -import { parseFileToString } from "../../../../test-util/mock-http.js"; -import { AzureOpenAiChatCompletionStream } from "./azure-openai-chat-completion-stream.js"; -import { LineDecoder } from "./azure-openai-line-decoder.js"; -import { SSEDecoder } from "./azure-openai-sse-decoder.js"; +import { createLogger } from '@sap-cloud-sdk/util'; +import { jest } from '@jest/globals'; +import { parseFileToString } from '../../../../test-util/mock-http.js'; +import { AzureOpenAiChatCompletionStream } from './azure-openai-chat-completion-stream.js'; +import { LineDecoder } from './azure-openai-line-decoder.js'; +import { SSEDecoder } from './azure-openai-sse-decoder.js'; describe('OpenAI chat completion stream', () => { let sseChunks: string[]; @@ -14,26 +16,113 @@ describe('OpenAI chat completion stream', () => { ); const lineDecoder = new LineDecoder(); const sseDecoder = new SSEDecoder(); - const rawLines: string[] = lineDecoder.decode(Buffer.from(rawChunksString, 'utf-8')); + const rawLines: string[] = lineDecoder.decode( + Buffer.from(rawChunksString, 'utf-8') + ); sseChunks = rawLines - .map((chunk) => sseDecoder.decode(chunk)) - .filter((sse) => sse !== null) - .filter((sse) => !sse.data.startsWith('[DONE]')) - .map((sse) => JSON.parse(sse.data)); - - async function *iterator(): AsyncGenerator { - for (let sseChunk of sseChunks) { - yield sseChunk; + .map(chunk => sseDecoder.decode(chunk)) + .filter(sse => sse !== null) + .filter(sse => !sse.data.startsWith('[DONE]')) + .map(sse => JSON.parse(sse.data)); + + async function* iterator(): AsyncGenerator { + for (const sseChunk of sseChunks) { + yield sseChunk; } } - originalChatCompletionStream = new AzureOpenAiChatCompletionStream(iterator); + originalChatCompletionStream = new AzureOpenAiChatCompletionStream( + iterator + ); }); it('should wrap the raw chunk', async () => { - for await (const chunk of AzureOpenAiChatCompletionStream.processChunk(originalChatCompletionStream)) { + let output = ''; + const asnycGenerator = AzureOpenAiChatCompletionStream.processChunk( + originalChatCompletionStream + ); + for await (const chunk of asnycGenerator) { + expect(chunk).toBeDefined(); + chunk.getDeltaContent() ? (output += chunk.getDeltaContent()) : null; + } + expect(output).toEqual('The capital of France is Paris.'); + }); + + it('should process the finish reason', async () => { + const logger = createLogger({ + package: 'foundation-models', + messageContext: 'azure-openai-chat-completion-stream' + }); + const debugSpy = jest.spyOn(logger, 'debug'); + const asyncGeneratorChunk = AzureOpenAiChatCompletionStream.processChunk( + originalChatCompletionStream + ); + const asyncGeneratorFinishReason = + AzureOpenAiChatCompletionStream.processFinishReason( + new AzureOpenAiChatCompletionStream(() => asyncGeneratorChunk) + ); + + for await (const chunk of asyncGeneratorFinishReason) { expect(chunk).toBeDefined(); - console.log(chunk.getDeltaContent()); } + expect(debugSpy).toHaveBeenCalledWith('Stream finished.'); + }); + it('should process the finish reason', async () => { + const logger = createLogger({ + package: 'foundation-models', + messageContext: 'azure-openai-chat-completion-stream' + }); + const debugSpy = jest.spyOn(logger, 'debug'); + const asyncGeneratorChunk = AzureOpenAiChatCompletionStream.processChunk( + originalChatCompletionStream + ); + const asyncGeneratorFinishReason = + AzureOpenAiChatCompletionStream.processFinishReason( + new AzureOpenAiChatCompletionStream(() => asyncGeneratorChunk) + ); + + for await (const chunk of asyncGeneratorFinishReason) { + expect(chunk).toBeDefined(); + } + expect(debugSpy).toHaveBeenCalledWith('Stream finished.'); + }); + + it('should process the token usage', async () => { + const logger = createLogger({ + package: 'foundation-models', + messageContext: 'azure-openai-chat-completion-stream' + }); + const debugSpy = jest.spyOn(logger, 'debug'); + const asyncGeneratorChunk = AzureOpenAiChatCompletionStream.processChunk( + originalChatCompletionStream + ); + const asyncGeneratorTokenUsage = + AzureOpenAiChatCompletionStream.processTokenUsage( + new AzureOpenAiChatCompletionStream(() => asyncGeneratorChunk) + ); + + for await (const chunk of asyncGeneratorTokenUsage) { + expect(chunk).toBeDefined(); + } + expect(debugSpy).toHaveBeenCalledWith( + expect.stringContaining('Token usage:') + ); + }); + + it('should process the content', async () => { + const asyncGeneratorChunk = AzureOpenAiChatCompletionStream.processChunk( + originalChatCompletionStream + ); + const asyncGeneratorContent = + AzureOpenAiChatCompletionStream.processContent( + new AzureOpenAiChatCompletionStream(() => asyncGeneratorChunk) + ); + + let output = ''; + for await (const chunk of asyncGeneratorContent) { + expect(typeof chunk).toBe('string'); + output += chunk; + } + expect(output).toEqual('The capital of France is Paris.'); }); }); diff --git a/packages/foundation-models/src/azure-openai/azure-openai-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-stream.ts index b3c05955..c7c1a612 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-stream.ts @@ -21,9 +21,7 @@ export class Stream implements AsyncIterable { async function* iterator(): AsyncIterator { if (consumed) { - throw new Error( - 'Cannot iterate over a consumed stream, use `.tee()` to split the stream.' - ); + throw new Error('Cannot iterate over a consumed stream.'); } consumed = true; let done = false; From dcb6d54175afff9f974b3fa8cf4cd179c6f9d38d Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 30 Oct 2024 13:17:48 +0100 Subject: [PATCH 27/57] refactor: move stream files --- ...zure-openai-chat-completion-stream.test.ts | 4 +- .../azure-openai-chat-completion-stream.ts | 2 +- .../{azure-openai-stream.ts => stream.ts} | 6 +- .../line-decoder.ts} | 0 .../sse-decoder.ts} | 0 .../src/azure-openai/stream/stream.ts | 201 ++++++++++++++++++ 6 files changed, 207 insertions(+), 6 deletions(-) rename packages/foundation-models/src/azure-openai/{azure-openai-stream.ts => stream.ts} (96%) rename packages/foundation-models/src/azure-openai/{azure-openai-line-decoder.ts => stream/line-decoder.ts} (100%) rename packages/foundation-models/src/azure-openai/{azure-openai-sse-decoder.ts => stream/sse-decoder.ts} (100%) create mode 100644 packages/foundation-models/src/azure-openai/stream/stream.ts diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts index 5cb6c5f8..e3176bdc 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts @@ -2,8 +2,8 @@ import { createLogger } from '@sap-cloud-sdk/util'; import { jest } from '@jest/globals'; import { parseFileToString } from '../../../../test-util/mock-http.js'; import { AzureOpenAiChatCompletionStream } from './azure-openai-chat-completion-stream.js'; -import { LineDecoder } from './azure-openai-line-decoder.js'; -import { SSEDecoder } from './azure-openai-sse-decoder.js'; +import { LineDecoder } from './stream/line-decoder.js'; +import { SSEDecoder } from './stream/sse-decoder.js'; describe('OpenAI chat completion stream', () => { let sseChunks: string[]; diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 9760f136..c8cb945d 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -1,5 +1,5 @@ import { createLogger } from '@sap-cloud-sdk/util'; -import { Stream } from './azure-openai-stream.js'; +import { Stream } from './stream/stream.js'; import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js'; import type { HttpResponse } from '@sap-cloud-sdk/http-client'; import type { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; diff --git a/packages/foundation-models/src/azure-openai/azure-openai-stream.ts b/packages/foundation-models/src/azure-openai/stream.ts similarity index 96% rename from packages/foundation-models/src/azure-openai/azure-openai-stream.ts rename to packages/foundation-models/src/azure-openai/stream.ts index c7c1a612..25fb7e39 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-stream.ts +++ b/packages/foundation-models/src/azure-openai/stream.ts @@ -1,7 +1,7 @@ import { createLogger } from '@sap-cloud-sdk/util'; -import { LineDecoder } from './azure-openai-line-decoder.js'; -import { SSEDecoder } from './azure-openai-sse-decoder.js'; -import type { ServerSentEvent } from './azure-openai-sse-decoder.js'; +import { LineDecoder } from './stream/line-decoder.js'; +import { SSEDecoder } from './stream/sse-decoder.js'; +import type { ServerSentEvent } from './stream/sse-decoder.js'; import type { HttpResponse } from '@sap-cloud-sdk/http-client'; const logger = createLogger({ diff --git a/packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts b/packages/foundation-models/src/azure-openai/stream/line-decoder.ts similarity index 100% rename from packages/foundation-models/src/azure-openai/azure-openai-line-decoder.ts rename to packages/foundation-models/src/azure-openai/stream/line-decoder.ts diff --git a/packages/foundation-models/src/azure-openai/azure-openai-sse-decoder.ts b/packages/foundation-models/src/azure-openai/stream/sse-decoder.ts similarity index 100% rename from packages/foundation-models/src/azure-openai/azure-openai-sse-decoder.ts rename to packages/foundation-models/src/azure-openai/stream/sse-decoder.ts diff --git a/packages/foundation-models/src/azure-openai/stream/stream.ts b/packages/foundation-models/src/azure-openai/stream/stream.ts new file mode 100644 index 00000000..2f783ca6 --- /dev/null +++ b/packages/foundation-models/src/azure-openai/stream/stream.ts @@ -0,0 +1,201 @@ +import { createLogger } from '@sap-cloud-sdk/util'; +import { LineDecoder } from './line-decoder.js'; +import { SSEDecoder } from './sse-decoder.js'; +import type { ServerSentEvent } from './sse-decoder.js'; +import type { HttpResponse } from '@sap-cloud-sdk/http-client'; + +const logger = createLogger({ + package: 'foundation-models', + messageContext: 'azure-openai-stream' +}); + +type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; + +/** + * Stream implemented as an async iterable. + * @internal + */ +export class Stream implements AsyncIterable { + protected static fromSSEResponse(response: HttpResponse): Stream { + let consumed = false; + + async function* iterator(): AsyncIterator { + if (consumed) { + throw new Error('Cannot iterate over a consumed stream.'); + } + consumed = true; + let done = false; + for await (const sse of _iterSSEMessages(response)) { + if (done) { + continue; + } + + if (sse.data.startsWith('[DONE]')) { + done = true; + continue; + } + + if (sse.event === null) { + let data; + + try { + data = JSON.parse(sse.data); + } catch (e: any) { + logger.error(`Could not parse message into JSON: ${sse.data}`); + logger.error(`From chunk: ${sse.raw}`); + throw e; + } + + if (data && data.error) { + throw new Error(data.error); + } + + yield data; + } else { + let data; + try { + data = JSON.parse(sse.data); + } catch (e: any) { + logger.error(`Could not parse message into JSON: ${sse.data}`); + logger.error(`From chunk: ${sse.raw}`); + throw e; + } + // TODO: Is this where the error should be thrown? + if (sse.event === 'error') { + // throw new Error(data.error, data.message); + throw new Error(data.error); + } + yield { event: sse.event, data } as any; + } + } + done = true; + } + + return new Stream(iterator); + } + + constructor(public iterator: () => AsyncIterator) {} + + [Symbol.asyncIterator](): AsyncIterator { + return this.iterator(); + } +} + +/** + * @internal + */ +export async function* _iterSSEMessages( + response: HttpResponse +): AsyncGenerator { + if (!response.data) { + throw new Error('Attempted to iterate over a response with no body'); + } + + const sseDecoder = new SSEDecoder(); + const lineDecoder = new LineDecoder(); + + const iter = response.data; + for await (const sseChunk of iterSSEChunks(iter)) { + for (const line of lineDecoder.decode(sseChunk)) { + const sse = sseDecoder.decode(line); + if (sse) { + yield sse; + } + } + } + + for (const line of lineDecoder.flush()) { + const sse = sseDecoder.decode(line); + if (sse) { + yield sse; + } + } +} + +/** + * Given an async iterable iterator, iterates over it and yields full + * SSE chunks, i.e. yields when a double new-line is encountered. + * @param iterator - Async iterable iterator. + * @returns Async generator of Uint8Array. + * @internal + */ +async function* iterSSEChunks( + iterator: AsyncIterableIterator +): AsyncGenerator { + let data = new Uint8Array(); + + for await (const chunk of iterator) { + if (chunk == null) { + continue; + } + + const binaryChunk = + chunk instanceof ArrayBuffer + ? new Uint8Array(chunk) + : typeof chunk === 'string' + ? new TextEncoder().encode(chunk) + : chunk; + + const newData = new Uint8Array(data.length + binaryChunk.length); + newData.set(data); + newData.set(binaryChunk, data.length); + data = newData; + + let patternIndex; + while ((patternIndex = findDoubleNewlineIndex(data)) !== -1) { + yield data.slice(0, patternIndex); + data = data.slice(patternIndex); + } + } + + if (data.length > 0) { + yield data; + } +} + +function findDoubleNewlineIndex(buffer: Uint8Array): number { + // This function searches the buffer for the end patterns (\r\r, \n\n, \r\n\r\n) + // and returns the index right after the first occurrence of any pattern, + // or -1 if none of the patterns are found. + const newline = 0x0a; // \n + const carriage = 0x0d; // \r + + for (let i = 0; i < buffer.length - 2; i++) { + if (buffer[i] === newline && buffer[i + 1] === newline) { + // \n\n + return i + 2; + } + if (buffer[i] === carriage && buffer[i + 1] === carriage) { + // \r\r + return i + 2; + } + if ( + buffer[i] === carriage && + buffer[i + 1] === newline && + i + 3 < buffer.length && + buffer[i + 2] === carriage && + buffer[i + 3] === newline + ) { + // \r\n\r\n + return i + 4; + } + } + + return -1; +} + +/** + * This is an internal helper function that's just used for testing. + * @param chunks - The chunks to decode. + * @returns The decoded lines. + * @internal + */ +export function _decodeChunks(chunks: string[]): string[] { + const decoder = new LineDecoder(); + const lines: string[] = []; + for (const chunk of chunks) { + lines.push(...decoder.decode(chunk)); + } + + return lines; +} From 4bde96b4ee45cff450707b8492046cc4272d9b17 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 30 Oct 2024 13:28:27 +0100 Subject: [PATCH 28/57] fix: remove duplicated file --- .../src/azure-openai/stream.ts | 201 ------------------ 1 file changed, 201 deletions(-) delete mode 100644 packages/foundation-models/src/azure-openai/stream.ts diff --git a/packages/foundation-models/src/azure-openai/stream.ts b/packages/foundation-models/src/azure-openai/stream.ts deleted file mode 100644 index 25fb7e39..00000000 --- a/packages/foundation-models/src/azure-openai/stream.ts +++ /dev/null @@ -1,201 +0,0 @@ -import { createLogger } from '@sap-cloud-sdk/util'; -import { LineDecoder } from './stream/line-decoder.js'; -import { SSEDecoder } from './stream/sse-decoder.js'; -import type { ServerSentEvent } from './stream/sse-decoder.js'; -import type { HttpResponse } from '@sap-cloud-sdk/http-client'; - -const logger = createLogger({ - package: 'foundation-models', - messageContext: 'azure-openai-stream' -}); - -type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; - -/** - * Stream implemented as an async iterable. - * @internal - */ -export class Stream implements AsyncIterable { - protected static fromSSEResponse(response: HttpResponse): Stream { - let consumed = false; - - async function* iterator(): AsyncIterator { - if (consumed) { - throw new Error('Cannot iterate over a consumed stream.'); - } - consumed = true; - let done = false; - for await (const sse of _iterSSEMessages(response)) { - if (done) { - continue; - } - - if (sse.data.startsWith('[DONE]')) { - done = true; - continue; - } - - if (sse.event === null) { - let data; - - try { - data = JSON.parse(sse.data); - } catch (e: any) { - logger.error(`Could not parse message into JSON: ${sse.data}`); - logger.error(`From chunk: ${sse.raw}`); - throw e; - } - - if (data && data.error) { - throw new Error(data.error); - } - - yield data; - } else { - let data; - try { - data = JSON.parse(sse.data); - } catch (e: any) { - logger.error(`Could not parse message into JSON: ${sse.data}`); - logger.error(`From chunk: ${sse.raw}`); - throw e; - } - // TODO: Is this where the error should be thrown? - if (sse.event === 'error') { - // throw new Error(data.error, data.message); - throw new Error(data.error); - } - yield { event: sse.event, data } as any; - } - } - done = true; - } - - return new Stream(iterator); - } - - constructor(public iterator: () => AsyncIterator) {} - - [Symbol.asyncIterator](): AsyncIterator { - return this.iterator(); - } -} - -/** - * @internal - */ -export async function* _iterSSEMessages( - response: HttpResponse -): AsyncGenerator { - if (!response.data) { - throw new Error('Attempted to iterate over a response with no body'); - } - - const sseDecoder = new SSEDecoder(); - const lineDecoder = new LineDecoder(); - - const iter = response.data; - for await (const sseChunk of iterSSEChunks(iter)) { - for (const line of lineDecoder.decode(sseChunk)) { - const sse = sseDecoder.decode(line); - if (sse) { - yield sse; - } - } - } - - for (const line of lineDecoder.flush()) { - const sse = sseDecoder.decode(line); - if (sse) { - yield sse; - } - } -} - -/** - * Given an async iterable iterator, iterates over it and yields full - * SSE chunks, i.e. yields when a double new-line is encountered. - * @param iterator - Async iterable iterator. - * @returns Async generator of Uint8Array. - * @internal - */ -async function* iterSSEChunks( - iterator: AsyncIterableIterator -): AsyncGenerator { - let data = new Uint8Array(); - - for await (const chunk of iterator) { - if (chunk == null) { - continue; - } - - const binaryChunk = - chunk instanceof ArrayBuffer - ? new Uint8Array(chunk) - : typeof chunk === 'string' - ? new TextEncoder().encode(chunk) - : chunk; - - const newData = new Uint8Array(data.length + binaryChunk.length); - newData.set(data); - newData.set(binaryChunk, data.length); - data = newData; - - let patternIndex; - while ((patternIndex = findDoubleNewlineIndex(data)) !== -1) { - yield data.slice(0, patternIndex); - data = data.slice(patternIndex); - } - } - - if (data.length > 0) { - yield data; - } -} - -function findDoubleNewlineIndex(buffer: Uint8Array): number { - // This function searches the buffer for the end patterns (\r\r, \n\n, \r\n\r\n) - // and returns the index right after the first occurrence of any pattern, - // or -1 if none of the patterns are found. - const newline = 0x0a; // \n - const carriage = 0x0d; // \r - - for (let i = 0; i < buffer.length - 2; i++) { - if (buffer[i] === newline && buffer[i + 1] === newline) { - // \n\n - return i + 2; - } - if (buffer[i] === carriage && buffer[i + 1] === carriage) { - // \r\r - return i + 2; - } - if ( - buffer[i] === carriage && - buffer[i + 1] === newline && - i + 3 < buffer.length && - buffer[i + 2] === carriage && - buffer[i + 3] === newline - ) { - // \r\n\r\n - return i + 4; - } - } - - return -1; -} - -/** - * This is an internal helper function that's just used for testing. - * @param chunks - The chunks to decode. - * @returns The decoded lines. - * @internal - */ -export function _decodeChunks(chunks: string[]): string[] { - const decoder = new LineDecoder(); - const lines: string[] = []; - for (const chunk of chunks) { - lines.push(...decoder.decode(chunk)); - } - - return lines; -} From 3d5554c92f47da01fc32fbb29bc2a89ff3b9b496 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 30 Oct 2024 13:31:47 +0100 Subject: [PATCH 29/57] refactor: rename stream --- .../src/azure-openai/azure-openai-chat-completion-stream.ts | 6 +++--- .../src/azure-openai/stream/{stream.ts => sse-stream.ts} | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) rename packages/foundation-models/src/azure-openai/stream/{stream.ts => sse-stream.ts} (97%) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index c8cb945d..1b0abc31 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -1,5 +1,5 @@ import { createLogger } from '@sap-cloud-sdk/util'; -import { Stream } from './stream/stream.js'; +import { SseStream } from './stream/sse-stream.js'; import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js'; import type { HttpResponse } from '@sap-cloud-sdk/http-client'; import type { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; @@ -12,7 +12,7 @@ const logger = createLogger({ /** * Chat completion stream containing post-processing functions. */ -export class AzureOpenAiChatCompletionStream extends Stream { +export class AzureOpenAiChatCompletionStream extends SseStream { /** * Create a chat completion stream based on the http response. * @param response - Http response. @@ -23,7 +23,7 @@ export class AzureOpenAiChatCompletionStream extends Stream { response: HttpResponse ): AzureOpenAiChatCompletionStream { // TODO: Change `any` to `CreateChatCompletionStreamResponse` once the preview spec becomes stable. - const stream = Stream.fromSSEResponse(response); // TODO: Change `any` to `CreateChatCompletionStreamResponse` once the preview spec becomes stable. + const stream = SseStream.fromSSEResponse(response); // TODO: Change `any` to `CreateChatCompletionStreamResponse` once the preview spec becomes stable. return new AzureOpenAiChatCompletionStream(stream.iterator); } diff --git a/packages/foundation-models/src/azure-openai/stream/stream.ts b/packages/foundation-models/src/azure-openai/stream/sse-stream.ts similarity index 97% rename from packages/foundation-models/src/azure-openai/stream/stream.ts rename to packages/foundation-models/src/azure-openai/stream/sse-stream.ts index 2f783ca6..92635587 100644 --- a/packages/foundation-models/src/azure-openai/stream/stream.ts +++ b/packages/foundation-models/src/azure-openai/stream/sse-stream.ts @@ -15,8 +15,8 @@ type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; * Stream implemented as an async iterable. * @internal */ -export class Stream implements AsyncIterable { - protected static fromSSEResponse(response: HttpResponse): Stream { +export class SseStream implements AsyncIterable { + protected static fromSSEResponse(response: HttpResponse): SseStream { let consumed = false; async function* iterator(): AsyncIterator { @@ -71,7 +71,7 @@ export class Stream implements AsyncIterable { done = true; } - return new Stream(iterator); + return new SseStream(iterator); } constructor(public iterator: () => AsyncIterator) {} From 0b79c66ec5ec0709cb98808f0e3015251aede3b0 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 30 Oct 2024 13:50:18 +0100 Subject: [PATCH 30/57] refactor: openai stream --- .../src/azure-openai/stream/sse-stream.ts | 37 +++++-------------- 1 file changed, 9 insertions(+), 28 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/stream/sse-stream.ts b/packages/foundation-models/src/azure-openai/stream/sse-stream.ts index 92635587..710bceb3 100644 --- a/packages/foundation-models/src/azure-openai/stream/sse-stream.ts +++ b/packages/foundation-models/src/azure-openai/stream/sse-stream.ts @@ -35,37 +35,18 @@ export class SseStream implements AsyncIterable { continue; } - if (sse.event === null) { - let data; - - try { - data = JSON.parse(sse.data); - } catch (e: any) { - logger.error(`Could not parse message into JSON: ${sse.data}`); - logger.error(`From chunk: ${sse.raw}`); - throw e; - } + try { + const data = JSON.parse(sse.data); - if (data && data.error) { - throw new Error(data.error); - } - - yield data; - } else { - let data; - try { - data = JSON.parse(sse.data); - } catch (e: any) { - logger.error(`Could not parse message into JSON: ${sse.data}`); - logger.error(`From chunk: ${sse.raw}`); - throw e; - } - // TODO: Is this where the error should be thrown? - if (sse.event === 'error') { - // throw new Error(data.error, data.message); + if (data?.error) { throw new Error(data.error); } - yield { event: sse.event, data } as any; + + yield sse.event === null ? data : { event: sse.event, data } as any; + } catch(error: any) { + logger.error(`Could not parse message into JSON: ${sse.data}`); + logger.error(`From chunk: ${sse.raw}`); + throw error; } } done = true; From 7104fc5c373f0937fb04a234d7ee3856619f6276 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 30 Oct 2024 15:14:07 +0100 Subject: [PATCH 31/57] chore: add tests for sse-stream (copied from openai) --- .../azure-openai/stream/sse-stream.test.ts | 284 ++++++++++++++++++ 1 file changed, 284 insertions(+) create mode 100644 packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts diff --git a/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts b/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts new file mode 100644 index 00000000..c94a8d89 --- /dev/null +++ b/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts @@ -0,0 +1,284 @@ +import type { HttpResponse } from '@sap-cloud-sdk/http-client'; +import { PassThrough } from 'stream'; +import assert from 'assert'; +import { _iterSSEMessages, _decodeChunks as decodeChunks } from './sse-stream'; + +describe('line decoder', () => { + test('basic', () => { + // baz is not included because the line hasn't ended yet + expect(decodeChunks(['foo', ' bar\nbaz'])).toEqual(['foo bar']); + }); + + test('basic with \\r', () => { + // baz is not included because the line hasn't ended yet + expect(decodeChunks(['foo', ' bar\r\nbaz'])).toEqual(['foo bar']); + }); + + test('trailing new lines', () => { + expect(decodeChunks(['foo', ' bar', 'baz\n', 'thing\n'])).toEqual(['foo barbaz', 'thing']); + }); + + test('trailing new lines with \\r', () => { + expect(decodeChunks(['foo', ' bar', 'baz\r\n', 'thing\r\n'])).toEqual(['foo barbaz', 'thing']); + }); + + test('escaped new lines', () => { + expect(decodeChunks(['foo', ' bar\\nbaz\n'])).toEqual(['foo bar\\nbaz']); + }); + + test('escaped new lines with \\r', () => { + expect(decodeChunks(['foo', ' bar\\r\\nbaz\n'])).toEqual(['foo bar\\r\\nbaz']); + }); +}); + +describe('streaming decoding', () => { + test('basic', async () => { + async function* body(): AsyncGenerator { + yield Buffer.from('event: completion\n'); + yield Buffer.from('data: {"foo":true}\n'); + yield Buffer.from('\n'); + } + + const response = { + data: body() + } as any; + + const stream = _iterSSEMessages(response)[ + Symbol.asyncIterator + ](); + + let event = await stream.next(); + assert(event.value); + expect(JSON.parse(event.value.data)).toEqual({ foo: true }); + + event = await stream.next(); + expect(event.done).toBeTruthy(); + }); + + test('data without event', async () => { + async function* body(): AsyncGenerator { + yield Buffer.from('data: {"foo":true}\n'); + yield Buffer.from('\n'); + } + + const response = { + data: body() + } as any; + + const stream = _iterSSEMessages(response)[ + Symbol.asyncIterator + ](); + + let event = await stream.next(); + assert(event.value); + expect(event.value.event).toBeNull(); + expect(JSON.parse(event.value.data)).toEqual({ foo: true }); + + event = await stream.next(); + expect(event.done).toBeTruthy(); + }); + + test('event without data', async () => { + async function* body(): AsyncGenerator { + yield Buffer.from('event: foo\n'); + yield Buffer.from('\n'); + } + + const response = { + data: body() + } as any; + + const stream = _iterSSEMessages(response)[ + Symbol.asyncIterator + ](); + + let event = await stream.next(); + assert(event.value); + expect(event.value.event).toEqual('foo'); + expect(event.value.data).toEqual(''); + + event = await stream.next(); + expect(event.done).toBeTruthy(); + }); + + test('multiple events', async () => { + async function* body(): AsyncGenerator { + yield Buffer.from('event: foo\n'); + yield Buffer.from('\n'); + yield Buffer.from('event: ping\n'); + yield Buffer.from('\n'); + } + + const response = { + data: body() + } as any; + + const stream = _iterSSEMessages(response)[ + Symbol.asyncIterator + ](); + + let event = await stream.next(); + assert(event.value); + expect(event.value.event).toEqual('foo'); + expect(event.value.data).toEqual(''); + + event = await stream.next(); + assert(event.value); + expect(event.value.event).toEqual('ping'); + expect(event.value.data).toEqual(''); + + event = await stream.next(); + expect(event.done).toBeTruthy(); + }); + + test('multiple events with data', async () => { + async function* body(): AsyncGenerator { + yield Buffer.from('event: foo\n'); + yield Buffer.from('data: {"foo":true}\n'); + yield Buffer.from('\n'); + yield Buffer.from('event: ping\n'); + yield Buffer.from('data: {"bar":false}\n'); + yield Buffer.from('\n'); + } + + const response = { + data: body() + } as any; + + const stream = _iterSSEMessages(response)[ + Symbol.asyncIterator + ](); + + let event = await stream.next(); + assert(event.value); + expect(event.value.event).toEqual('foo'); + expect(JSON.parse(event.value.data)).toEqual({ foo: true }); + + event = await stream.next(); + assert(event.value); + expect(event.value.event).toEqual('ping'); + expect(JSON.parse(event.value.data)).toEqual({ bar: false }); + + event = await stream.next(); + expect(event.done).toBeTruthy(); + }); + + test('multiple data lines with empty line', async () => { + async function* body(): AsyncGenerator { + yield Buffer.from('event: ping\n'); + yield Buffer.from('data: {\n'); + yield Buffer.from('data: "foo":\n'); + yield Buffer.from('data: \n'); + yield Buffer.from('data:\n'); + yield Buffer.from('data: true}\n'); + yield Buffer.from('\n\n'); + } + + const response = { + data: body() + } as any; + + const stream = _iterSSEMessages(response)[ + Symbol.asyncIterator + ](); + + let event = await stream.next(); + assert(event.value); + expect(event.value.event).toEqual('ping'); + expect(JSON.parse(event.value.data)).toEqual({ foo: true }); + expect(event.value.data).toEqual('{\n"foo":\n\n\ntrue}'); + + event = await stream.next(); + expect(event.done).toBeTruthy(); + }); + + test('data json escaped double new line', async () => { + async function* body(): AsyncGenerator { + yield Buffer.from('event: ping\n'); + yield Buffer.from('data: {"foo": "my long\\n\\ncontent"}'); + yield Buffer.from('\n\n'); + } + + const response = { + data: body() + } as any; + + const stream = _iterSSEMessages(response)[ + Symbol.asyncIterator + ](); + + let event = await stream.next(); + assert(event.value); + expect(event.value.event).toEqual('ping'); + expect(JSON.parse(event.value.data)).toEqual({ foo: 'my long\n\ncontent' }); + + event = await stream.next(); + expect(event.done).toBeTruthy(); + }); + + test('special new line characters', async () => { + async function* body(): AsyncGenerator { + yield Buffer.from('data: {"content": "culpa "}\n'); + yield Buffer.from('\n'); + yield Buffer.from('data: {"content": "'); + yield Buffer.from([0xe2, 0x80, 0xa8]); + yield Buffer.from('"}\n'); + yield Buffer.from('\n'); + yield Buffer.from('data: {"content": "foo"}\n'); + yield Buffer.from('\n'); + } + + const response = { + data: body() + } as any; + + const stream = _iterSSEMessages(response)[ + Symbol.asyncIterator + ](); + + let event = await stream.next(); + assert(event.value); + expect(JSON.parse(event.value.data)).toEqual({ content: 'culpa ' }); + + event = await stream.next(); + assert(event.value); + expect(JSON.parse(event.value.data)).toEqual({ content: Buffer.from([0xe2, 0x80, 0xa8]).toString() }); + + event = await stream.next(); + assert(event.value); + expect(JSON.parse(event.value.data)).toEqual({ content: 'foo' }); + + event = await stream.next(); + expect(event.done).toBeTruthy(); + }); + + test('multi-byte characters across chunks', async () => { + async function* body(): AsyncGenerator { + yield Buffer.from('event: completion\n'); + yield Buffer.from('data: {"content": "'); + // bytes taken from the string 'известни' and arbitrarily split + // so that some multi-byte characters span multiple chunks + yield Buffer.from([0xd0]); + yield Buffer.from([0xb8, 0xd0, 0xb7, 0xd0]); + yield Buffer.from([0xb2, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbd, 0xd0, 0xb8]); + yield Buffer.from('"}\n'); + yield Buffer.from('\n'); + } + + const response = { + data: body() + } as any; + + const stream = _iterSSEMessages(response)[ + Symbol.asyncIterator + ](); + + let event = await stream.next(); + assert(event.value); + expect(event.value.event).toEqual('completion'); + expect(JSON.parse(event.value.data)).toEqual({ content: 'известни' }); + + event = await stream.next(); + expect(event.done).toBeTruthy(); + }); +}); From 2c5247af869423801412b7e95dedd9a9ec302066 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 4 Nov 2024 10:52:25 +0100 Subject: [PATCH 32/57] refactor: rename test responses --- ...t-completion-stream-chunk-response.test.ts | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts index fa5ae87d..22f15539 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.test.ts @@ -3,59 +3,59 @@ import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-cha describe('OpenAI chat completion stream chunk response', () => { let mockResponses: { - tokenUsage: any; - finishReason: any; - deltaContent: any; + tokenUsageResponse: any; + finishReasonResponse: any; + deltaContentResponse: any; }; let azureOpenAiChatCompletionStreamChunkResponses: { - tokenUsage: AzureOpenAiChatCompletionStreamChunkResponse; - finishReason: AzureOpenAiChatCompletionStreamChunkResponse; - deltaContent: AzureOpenAiChatCompletionStreamChunkResponse; + tokenUsageResponse: AzureOpenAiChatCompletionStreamChunkResponse; + finishReasonResponse: AzureOpenAiChatCompletionStreamChunkResponse; + deltaContentResponse: AzureOpenAiChatCompletionStreamChunkResponse; }; beforeAll(async () => { mockResponses = { - tokenUsage: await parseMockResponse( + tokenUsageResponse: await parseMockResponse( 'foundation-models', 'azure-openai-chat-completion-stream-chunk-response-token-usage.json' ), - finishReason: await parseMockResponse( + finishReasonResponse: await parseMockResponse( 'foundation-models', 'azure-openai-chat-completion-stream-chunk-response-finish-reason.json' ), - deltaContent: await parseMockResponse( + deltaContentResponse: await parseMockResponse( 'foundation-models', 'azure-openai-chat-completion-stream-chunk-response-delta-content.json' ) }; azureOpenAiChatCompletionStreamChunkResponses = { - tokenUsage: new AzureOpenAiChatCompletionStreamChunkResponse( - mockResponses.tokenUsage + tokenUsageResponse: new AzureOpenAiChatCompletionStreamChunkResponse( + mockResponses.tokenUsageResponse ), - finishReason: new AzureOpenAiChatCompletionStreamChunkResponse( - mockResponses.finishReason + finishReasonResponse: new AzureOpenAiChatCompletionStreamChunkResponse( + mockResponses.finishReasonResponse ), - deltaContent: new AzureOpenAiChatCompletionStreamChunkResponse( - mockResponses.deltaContent + deltaContentResponse: new AzureOpenAiChatCompletionStreamChunkResponse( + mockResponses.deltaContentResponse ) }; }); it('should return the chat completion stream chunk response', () => { expect( - azureOpenAiChatCompletionStreamChunkResponses.tokenUsage.data - ).toStrictEqual(mockResponses.tokenUsage); + azureOpenAiChatCompletionStreamChunkResponses.tokenUsageResponse.data + ).toStrictEqual(mockResponses.tokenUsageResponse); expect( - azureOpenAiChatCompletionStreamChunkResponses.finishReason.data - ).toStrictEqual(mockResponses.finishReason); + azureOpenAiChatCompletionStreamChunkResponses.finishReasonResponse.data + ).toStrictEqual(mockResponses.finishReasonResponse); expect( - azureOpenAiChatCompletionStreamChunkResponses.deltaContent.data - ).toStrictEqual(mockResponses.deltaContent); + azureOpenAiChatCompletionStreamChunkResponses.deltaContentResponse.data + ).toStrictEqual(mockResponses.deltaContentResponse); }); it('should get token usage', () => { expect( - azureOpenAiChatCompletionStreamChunkResponses.tokenUsage.getTokenUsage() + azureOpenAiChatCompletionStreamChunkResponses.tokenUsageResponse.getTokenUsage() ).toMatchObject({ completion_tokens: expect.any(Number), prompt_tokens: expect.any(Number), @@ -65,13 +65,13 @@ describe('OpenAI chat completion stream chunk response', () => { it('should return finish reason', () => { expect( - azureOpenAiChatCompletionStreamChunkResponses.finishReason.getFinishReason() + azureOpenAiChatCompletionStreamChunkResponses.finishReasonResponse.getFinishReason() ).toBe('stop'); }); it('should return delta content with default index 0', () => { expect( - azureOpenAiChatCompletionStreamChunkResponses.deltaContent.getDeltaContent() + azureOpenAiChatCompletionStreamChunkResponses.deltaContentResponse.getDeltaContent() ).toBe(' is'); }); }); From 6570bd2f00b8bb9452a853d9c568df0fd2666ad3 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 11 Nov 2024 11:26:19 +0100 Subject: [PATCH 33/57] refactor: replace streamContent with a method --- .../azure-openai/azure-openai-chat-client.ts | 27 ++------- ...zure-openai-chat-completion-stream.test.ts | 27 ++++----- .../azure-openai-chat-completion-stream.ts | 46 +++++++------- .../src/azure-openai/stream/index.ts | 1 + .../azure-openai/stream/sse-stream.test.ts | 60 ++++++++----------- .../src/azure-openai/stream/sse-stream.ts | 10 ++-- .../src/foundation-models/azure-openai.ts | 9 ++- sample-code/src/server.ts | 2 +- 8 files changed, 78 insertions(+), 104 deletions(-) create mode 100644 packages/foundation-models/src/azure-openai/stream/index.ts diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index beaa85a4..52b37393 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -51,28 +51,9 @@ export class AzureOpenAiChatClient { const response = new AzureOpenAiChatCompletionStreamResponse(); response.stream = (await this.createStream(data, requestConfig)) - .pipe(AzureOpenAiChatCompletionStream.processChunk) - .pipe(AzureOpenAiChatCompletionStream.processFinishReason, response) - .pipe(AzureOpenAiChatCompletionStream.processTokenUsage, response); - return response; - } - - /** - * Creates a completion stream of the delta content for the chat messages. - * @param data - The input parameters for the chat completion. - * @param requestConfig - The request configuration. - * @returns The completion stream of the delta content. - */ - async streamContent( - data: AzureOpenAiCreateChatCompletionRequest, - requestConfig?: CustomRequestConfig - ): Promise> { - const response = new AzureOpenAiChatCompletionStreamResponse(); - response.stream = (await this.createStream(data, requestConfig)) - .pipe(AzureOpenAiChatCompletionStream.processChunk) - .pipe(AzureOpenAiChatCompletionStream.processFinishReason, response) - .pipe(AzureOpenAiChatCompletionStream.processTokenUsage, response) - .pipe(AzureOpenAiChatCompletionStream.processContent, response); + ._pipe(AzureOpenAiChatCompletionStream._processChunk) + ._pipe(AzureOpenAiChatCompletionStream._processFinishReason, response) + ._pipe(AzureOpenAiChatCompletionStream._processTokenUsage, response); return response; } @@ -113,6 +94,6 @@ export class AzureOpenAiChatClient { responseType: 'stream' } ); - return AzureOpenAiChatCompletionStream.create(response); + return AzureOpenAiChatCompletionStream._create(response); } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts index e3176bdc..7b748bdc 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts @@ -38,7 +38,7 @@ describe('OpenAI chat completion stream', () => { it('should wrap the raw chunk', async () => { let output = ''; - const asnycGenerator = AzureOpenAiChatCompletionStream.processChunk( + const asnycGenerator = AzureOpenAiChatCompletionStream._processChunk( originalChatCompletionStream ); for await (const chunk of asnycGenerator) { @@ -54,11 +54,11 @@ describe('OpenAI chat completion stream', () => { messageContext: 'azure-openai-chat-completion-stream' }); const debugSpy = jest.spyOn(logger, 'debug'); - const asyncGeneratorChunk = AzureOpenAiChatCompletionStream.processChunk( + const asyncGeneratorChunk = AzureOpenAiChatCompletionStream._processChunk( originalChatCompletionStream ); const asyncGeneratorFinishReason = - AzureOpenAiChatCompletionStream.processFinishReason( + AzureOpenAiChatCompletionStream._processFinishReason( new AzureOpenAiChatCompletionStream(() => asyncGeneratorChunk) ); @@ -73,11 +73,11 @@ describe('OpenAI chat completion stream', () => { messageContext: 'azure-openai-chat-completion-stream' }); const debugSpy = jest.spyOn(logger, 'debug'); - const asyncGeneratorChunk = AzureOpenAiChatCompletionStream.processChunk( + const asyncGeneratorChunk = AzureOpenAiChatCompletionStream._processChunk( originalChatCompletionStream ); const asyncGeneratorFinishReason = - AzureOpenAiChatCompletionStream.processFinishReason( + AzureOpenAiChatCompletionStream._processFinishReason( new AzureOpenAiChatCompletionStream(() => asyncGeneratorChunk) ); @@ -93,11 +93,11 @@ describe('OpenAI chat completion stream', () => { messageContext: 'azure-openai-chat-completion-stream' }); const debugSpy = jest.spyOn(logger, 'debug'); - const asyncGeneratorChunk = AzureOpenAiChatCompletionStream.processChunk( + const asyncGeneratorChunk = AzureOpenAiChatCompletionStream._processChunk( originalChatCompletionStream ); const asyncGeneratorTokenUsage = - AzureOpenAiChatCompletionStream.processTokenUsage( + AzureOpenAiChatCompletionStream._processTokenUsage( new AzureOpenAiChatCompletionStream(() => asyncGeneratorChunk) ); @@ -109,17 +109,16 @@ describe('OpenAI chat completion stream', () => { ); }); - it('should process the content', async () => { - const asyncGeneratorChunk = AzureOpenAiChatCompletionStream.processChunk( + it('should transform the original stream to string stream', async () => { + const asyncGeneratorChunk = AzureOpenAiChatCompletionStream._processChunk( originalChatCompletionStream ); - const asyncGeneratorContent = - AzureOpenAiChatCompletionStream.processContent( - new AzureOpenAiChatCompletionStream(() => asyncGeneratorChunk) - ); + const chunkStream = new AzureOpenAiChatCompletionStream( + () => asyncGeneratorChunk + ); let output = ''; - for await (const chunk of asyncGeneratorContent) { + for await (const chunk of chunkStream.toStringStream()) { expect(typeof chunk).toBe('string'); output += chunk; } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 1b0abc31..197ae39f 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -1,5 +1,5 @@ import { createLogger } from '@sap-cloud-sdk/util'; -import { SseStream } from './stream/sse-stream.js'; +import { SseStream } from './stream/index.js'; import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js'; import type { HttpResponse } from '@sap-cloud-sdk/http-client'; import type { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; @@ -19,7 +19,7 @@ export class AzureOpenAiChatCompletionStream extends SseStream { * @returns Chat completion stream. * @internal */ - public static create( + public static _create( response: HttpResponse ): AzureOpenAiChatCompletionStream { // TODO: Change `any` to `CreateChatCompletionStreamResponse` once the preview spec becomes stable. @@ -32,7 +32,7 @@ export class AzureOpenAiChatCompletionStream extends SseStream { * @param stream - Chat completion stream. * @internal */ - static async *processChunk( + static async *_processChunk( stream: AzureOpenAiChatCompletionStream // TODO: Change `any` to `CreateChatCompletionStreamResponse` once the preview spec becomes stable. ): AsyncGenerator { for await (const chunk of stream) { @@ -40,27 +40,10 @@ export class AzureOpenAiChatCompletionStream extends SseStream { } } - /** - * Transform the stream chunk into string. - * @param stream - Chat completion stream. - * @internal - */ - static async *processContent( - stream: AzureOpenAiChatCompletionStream - ): AsyncGenerator { - for await (const chunk of stream) { - const deltaContent = chunk.getDeltaContent(); - if (!deltaContent) { - continue; - } - yield deltaContent; - } - } - /** * @internal */ - static async *processFinishReason( + static async *_processFinishReason( stream: AzureOpenAiChatCompletionStream, response?: AzureOpenAiChatCompletionStreamResponse ): AsyncGenerator { @@ -93,7 +76,7 @@ export class AzureOpenAiChatCompletionStream extends SseStream { /** * @internal */ - static async *processTokenUsage( + static async *_processTokenUsage( stream: AzureOpenAiChatCompletionStream, response?: AzureOpenAiChatCompletionStreamResponse ): AsyncGenerator { @@ -120,7 +103,7 @@ export class AzureOpenAiChatCompletionStream extends SseStream { * @returns The output stream containing processed items. * @internal */ - pipe( + _pipe( processFn: ( stream: AzureOpenAiChatCompletionStream, response?: AzureOpenAiChatCompletionStreamResponse @@ -134,4 +117,21 @@ export class AzureOpenAiChatCompletionStream extends SseStream { } return new AzureOpenAiChatCompletionStream(() => processFn(this)); } + + /** + * Transform a stream of chunks into a stream of content strings. + * @param stream - Chat completion stream. + * @internal + */ + async *toStringStream( + this: AzureOpenAiChatCompletionStream + ): AsyncGenerator { + for await (const chunk of this) { + const deltaContent = chunk.getDeltaContent(); + if (!deltaContent) { + continue; + } + yield deltaContent; + } + } } diff --git a/packages/foundation-models/src/azure-openai/stream/index.ts b/packages/foundation-models/src/azure-openai/stream/index.ts new file mode 100644 index 00000000..4f2ea9e7 --- /dev/null +++ b/packages/foundation-models/src/azure-openai/stream/index.ts @@ -0,0 +1 @@ +export { SseStream } from './sse-stream.js'; diff --git a/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts b/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts index c94a8d89..27e79f6a 100644 --- a/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts @@ -1,5 +1,3 @@ -import type { HttpResponse } from '@sap-cloud-sdk/http-client'; -import { PassThrough } from 'stream'; import assert from 'assert'; import { _iterSSEMessages, _decodeChunks as decodeChunks } from './sse-stream'; @@ -15,11 +13,17 @@ describe('line decoder', () => { }); test('trailing new lines', () => { - expect(decodeChunks(['foo', ' bar', 'baz\n', 'thing\n'])).toEqual(['foo barbaz', 'thing']); + expect(decodeChunks(['foo', ' bar', 'baz\n', 'thing\n'])).toEqual([ + 'foo barbaz', + 'thing' + ]); }); test('trailing new lines with \\r', () => { - expect(decodeChunks(['foo', ' bar', 'baz\r\n', 'thing\r\n'])).toEqual(['foo barbaz', 'thing']); + expect(decodeChunks(['foo', ' bar', 'baz\r\n', 'thing\r\n'])).toEqual([ + 'foo barbaz', + 'thing' + ]); }); test('escaped new lines', () => { @@ -27,7 +31,9 @@ describe('line decoder', () => { }); test('escaped new lines with \\r', () => { - expect(decodeChunks(['foo', ' bar\\r\\nbaz\n'])).toEqual(['foo bar\\r\\nbaz']); + expect(decodeChunks(['foo', ' bar\\r\\nbaz\n'])).toEqual([ + 'foo bar\\r\\nbaz' + ]); }); }); @@ -43,9 +49,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[ - Symbol.asyncIterator - ](); + const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -65,9 +69,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[ - Symbol.asyncIterator - ](); + const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -88,9 +90,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[ - Symbol.asyncIterator - ](); + const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -113,9 +113,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[ - Symbol.asyncIterator - ](); + const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -145,9 +143,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[ - Symbol.asyncIterator - ](); + const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -178,9 +174,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[ - Symbol.asyncIterator - ](); + const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -203,9 +197,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[ - Symbol.asyncIterator - ](); + const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -232,9 +224,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[ - Symbol.asyncIterator - ](); + const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -242,7 +232,9 @@ describe('streaming decoding', () => { event = await stream.next(); assert(event.value); - expect(JSON.parse(event.value.data)).toEqual({ content: Buffer.from([0xe2, 0x80, 0xa8]).toString() }); + expect(JSON.parse(event.value.data)).toEqual({ + content: Buffer.from([0xe2, 0x80, 0xa8]).toString() + }); event = await stream.next(); assert(event.value); @@ -260,7 +252,9 @@ describe('streaming decoding', () => { // so that some multi-byte characters span multiple chunks yield Buffer.from([0xd0]); yield Buffer.from([0xb8, 0xd0, 0xb7, 0xd0]); - yield Buffer.from([0xb2, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbd, 0xd0, 0xb8]); + yield Buffer.from([ + 0xb2, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbd, 0xd0, 0xb8 + ]); yield Buffer.from('"}\n'); yield Buffer.from('\n'); } @@ -269,9 +263,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[ - Symbol.asyncIterator - ](); + const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); diff --git a/packages/foundation-models/src/azure-openai/stream/sse-stream.ts b/packages/foundation-models/src/azure-openai/stream/sse-stream.ts index 710bceb3..5fbea69c 100644 --- a/packages/foundation-models/src/azure-openai/stream/sse-stream.ts +++ b/packages/foundation-models/src/azure-openai/stream/sse-stream.ts @@ -16,7 +16,9 @@ type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; * @internal */ export class SseStream implements AsyncIterable { - protected static fromSSEResponse(response: HttpResponse): SseStream { + protected static fromSSEResponse( + response: HttpResponse + ): SseStream { let consumed = false; async function* iterator(): AsyncIterator { @@ -41,9 +43,9 @@ export class SseStream implements AsyncIterable { if (data?.error) { throw new Error(data.error); } - - yield sse.event === null ? data : { event: sse.event, data } as any; - } catch(error: any) { + + yield sse.event === null ? data : ({ event: sse.event, data } as any); + } catch (error: any) { logger.error(`Could not parse message into JSON: ${sse.data}`); logger.error(`From chunk: ${sse.raw}`); throw error; diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index cbb88ae8..98133a5e 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -6,7 +6,8 @@ import { createLogger } from '@sap-cloud-sdk/util'; import type { AzureOpenAiChatCompletionResponse, AzureOpenAiEmbeddingResponse, - AzureOpenAiChatCompletionStreamResponse + AzureOpenAiChatCompletionStreamResponse, + AzureOpenAiChatCompletionStreamChunkResponse } from '@sap-ai-sdk/foundation-models'; const logger = createLogger({ @@ -34,11 +35,9 @@ export async function chatCompletion(): Promise + AzureOpenAiChatCompletionStreamResponse > { - const response = await new AzureOpenAiChatClient( - 'gpt-35-turbo' - ).streamContent({ + const response = await new AzureOpenAiChatClient('gpt-35-turbo').stream({ messages: [ { role: 'user', diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 58e5142c..ce0a40b2 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -69,7 +69,7 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { res.end(); }); - for await (const chunk of response.stream) { + for await (const chunk of response.stream.toStringStream()) { if (!connectionAlive) { break; } From 9187988ad45d2b83e27fea02d90154cecd50264e Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 11 Nov 2024 12:32:38 +0100 Subject: [PATCH 34/57] feat: support multiple choices --- .../azure-openai-chat-completion-response.ts | 2 +- ...i-chat-completion-stream-chunk-response.ts | 18 ++++-- ...-openai-chat-completion-stream-response.ts | 13 ++-- ...zure-openai-chat-completion-stream.test.ts | 4 +- .../azure-openai-chat-completion-stream.ts | 59 +++++++++++-------- .../src/foundation-models/azure-openai.ts | 19 ++++++ sample-code/src/server.ts | 48 ++++++++++++++- 7 files changed, 119 insertions(+), 44 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-response.ts index 3765b737..e67a3b1b 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-response.ts @@ -34,7 +34,7 @@ export class AzureOpenAiChatCompletionResponse { */ getFinishReason( choiceIndex = 0 - ): this['data']['choices'][0]['finish_reason'] { + ): string | undefined | null { this.logInvalidChoiceIndex(choiceIndex); return this.data.choices[choiceIndex]?.finish_reason; } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts index c2246e1b..2c8ef451 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts @@ -20,10 +20,13 @@ export class AzureOpenAiChatCompletionStreamChunkResponse { * @param choiceIndex - The index of the choice to parse. * @returns The finish reason. */ - getFinishReason( - choiceIndex = 0 - ): this['data']['choices'][0]['finish_reason'] { - return this.data.choices[choiceIndex]?.finish_reason; + getFinishReason(choiceIndex = 0): string | undefined | null { + for (const choice of this.data.choices) { + if (choice.index === choiceIndex) { + return choice.finish_reason; + } + } + return undefined; } /** @@ -32,6 +35,11 @@ export class AzureOpenAiChatCompletionStreamChunkResponse { * @returns The message delta content. */ getDeltaContent(choiceIndex = 0): string | undefined | null { - return this.data.choices[choiceIndex]?.delta?.content; + for (const choice of this.data.choices) { + if (choice.index === choiceIndex) { + return choice.delta.content; + } + } + return undefined; } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts index 2d0aef80..2024ef25 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts @@ -6,7 +6,7 @@ import type { AzureOpenAiChatCompletionStream } from './azure-openai-chat-comple */ export class AzureOpenAiChatCompletionStreamResponse { private _usage: AzureOpenAiCompletionUsage | undefined; - private _finishReason: string | undefined; + private _finishReasons: Map = new Map(); private _stream: AzureOpenAiChatCompletionStream | undefined; public get usage(): AzureOpenAiCompletionUsage { @@ -20,15 +20,12 @@ export class AzureOpenAiChatCompletionStreamResponse { this._usage = usage; } - public get finishReason(): string { - if (!this._finishReason) { - throw new Error('Response finish reason is undefined.'); - } - return this._finishReason; + public get finishReasons(): Map { + return this._finishReasons; } - public set finishReason(finishReason: string) { - this._finishReason = finishReason; + public set finishReasons(finishReasons: Map) { + this._finishReasons = finishReasons; } public get stream(): AzureOpenAiChatCompletionStream { diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts index 7b748bdc..e628280c 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts @@ -48,7 +48,7 @@ describe('OpenAI chat completion stream', () => { expect(output).toEqual('The capital of France is Paris.'); }); - it('should process the finish reason', async () => { + it('should process the finish reasons', async () => { const logger = createLogger({ package: 'foundation-models', messageContext: 'azure-openai-chat-completion-stream' @@ -118,7 +118,7 @@ describe('OpenAI chat completion stream', () => { ); let output = ''; - for await (const chunk of chunkStream.toStringStream()) { + for await (const chunk of chunkStream.toContentStream()) { expect(typeof chunk).toBe('string'); output += chunk; } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 197ae39f..966ba808 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -48,27 +48,32 @@ export class AzureOpenAiChatCompletionStream extends SseStream { response?: AzureOpenAiChatCompletionStreamResponse ): AsyncGenerator { for await (const chunk of stream) { - const finishReason = chunk.getFinishReason(); - if (finishReason) { - if (response) { - response.finishReason = finishReason; - } - switch (finishReason) { - case 'content_filter': - logger.error('Stream finished with content filter hit.'); - break; - case 'length': - logger.error('Stream finished with token length exceeded.'); - break; - case 'stop': - logger.debug('Stream finished.'); - break; - default: - logger.error( - `Stream finished with unknown reason '${finishReason}'.` - ); - } - } + chunk.data.choices.forEach((choice: any) => { + const choiceIndex = choice.index; + if (choiceIndex) { + const finishReason = chunk.getFinishReason(choiceIndex); + if (finishReason) { + if (response) { + response.finishReasons.set(choiceIndex, finishReason); + } + switch (finishReason) { + case 'content_filter': + logger.error(`Choice ${choiceIndex}: Stream finished with content filter hit.`); + break; + case 'length': + logger.error(`Choice ${choiceIndex}: Stream finished with token length exceeded.`); + break; + case 'stop': + logger.debug(`Choice ${choiceIndex}: Stream finished.`); + break; + default: + logger.error( + `Choice ${choiceIndex}: Stream finished with unknown reason '${finishReason}'.` + ); + } + } + }; + }); yield chunk; } } @@ -76,7 +81,7 @@ export class AzureOpenAiChatCompletionStream extends SseStream { /** * @internal */ - static async *_processTokenUsage( + static async * _processTokenUsage( stream: AzureOpenAiChatCompletionStream, response?: AzureOpenAiChatCompletionStreamResponse ): AsyncGenerator { @@ -120,14 +125,16 @@ export class AzureOpenAiChatCompletionStream extends SseStream { /** * Transform a stream of chunks into a stream of content strings. - * @param stream - Chat completion stream. + * @param this - Chat completion stream. + * @param choiceIndex - The index of the choice to parse. * @internal */ - async *toStringStream( - this: AzureOpenAiChatCompletionStream + async * toContentStream( + this: AzureOpenAiChatCompletionStream, + choiceIndex = 0 ): AsyncGenerator { for await (const chunk of this) { - const deltaContent = chunk.getDeltaContent(); + const deltaContent = chunk.getDeltaContent(choiceIndex); if (!deltaContent) { continue; } diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index 98133a5e..96dbf1fe 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -48,6 +48,25 @@ export async function chatCompletionStream(): Promise< return response; } +/** + * Ask Azure OpenAI model about the capital of France with streaming. + * @returns The response from Azure OpenAI containing the response content. + */ +export async function chatCompletionStreamMultipleChoices(): Promise< + AzureOpenAiChatCompletionStreamResponse +> { + const response = await new AzureOpenAiChatClient('gpt-35-turbo').stream({ + messages: [ + { + role: 'user', + content: 'Give me a very long introduction of SAP Cloud SDK.' + } + ], + n: 2 + }); + return response; +} + /** * Embed 'Hello, world!' using the OpenAI ADA model. * @returns The response from Azure OpenAI containing the embedding vector. diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index ce0a40b2..4a00acf6 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -3,6 +3,7 @@ import express from 'express'; import { chatCompletion, chatCompletionStream, + chatCompletionStreamMultipleChoices, computeEmbedding // eslint-disable-next-line import/no-internal-modules } from './foundation-models/azure-openai.js'; @@ -69,7 +70,7 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { res.end(); }); - for await (const chunk of response.stream.toStringStream()) { + for await (const chunk of response.stream.toContentStream()) { if (!connectionAlive) { break; } @@ -77,7 +78,7 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { } res.write('\n\n---------------------------\n'); - res.write(`Finish reason: ${response.finishReason}\n`); + res.write(`Finish reason: ${response.finishReasons.get(0)}\n`); res.write('Token usage:\n'); res.write(` - Completion tokens: ${response.usage.completion_tokens}\n`); res.write(` - Prompt tokens: ${response.usage.prompt_tokens}\n`); @@ -92,6 +93,49 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { } }); +app.get('/azure-openai/chat-completion-stream-multiple-choices', async (req, res) => { + try { + const response = await chatCompletionStreamMultipleChoices(); + + res.setHeader('Cache-Control', 'no-cache'); + res.setHeader('Content-Type', 'text/event-stream'); + res.setHeader('Access-Control-Allow-Origin', '*'); + res.setHeader('Connection', 'keep-alive'); + res.flushHeaders(); + + let connectionAlive = true; + + res.on('close', () => { + connectionAlive = false; + res.end(); + }); + + for await (const chunk of response.stream.toContentStream(1)) { + if (!connectionAlive) { + break; + } + res.write(chunk); + } + + res.write('\n\n---------------------------\n'); + res.write(`Finish reason: ${response.finishReasons.get(1)}\n`); + res.write('Token usage:\n'); + res.write(` - Completion tokens: ${response.usage.completion_tokens}\n`); + res.write(` - Prompt tokens: ${response.usage.prompt_tokens}\n`); + res.write(` - Total tokens: ${response.usage.total_tokens}\n`); + + + } catch (error: any) { + console.error(error); + res + .status(500) + .send('Yikes, vibes are off apparently 😬 -> ' + error.message); + } finally { + res.end(); + } +}); + + app.get('/azure-openai/embedding', async (req, res) => { try { const response = await computeEmbedding(); From 0bd1c92c451ed245417e10c09ccfab926aa30631 Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Mon, 11 Nov 2024 11:33:36 +0000 Subject: [PATCH 35/57] fix: Changes from lint --- .../azure-openai-chat-completion-response.ts | 4 +- .../azure-openai-chat-completion-stream.ts | 14 ++-- sample-code/src/server.ts | 76 +++++++++---------- 3 files changed, 48 insertions(+), 46 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-response.ts index e67a3b1b..a1cf649b 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-response.ts @@ -32,9 +32,7 @@ export class AzureOpenAiChatCompletionResponse { * @param choiceIndex - The index of the choice to parse. * @returns The finish reason. */ - getFinishReason( - choiceIndex = 0 - ): string | undefined | null { + getFinishReason(choiceIndex = 0): string | undefined | null { this.logInvalidChoiceIndex(choiceIndex); return this.data.choices[choiceIndex]?.finish_reason; } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 966ba808..5466e4f0 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -58,10 +58,14 @@ export class AzureOpenAiChatCompletionStream extends SseStream { } switch (finishReason) { case 'content_filter': - logger.error(`Choice ${choiceIndex}: Stream finished with content filter hit.`); + logger.error( + `Choice ${choiceIndex}: Stream finished with content filter hit.` + ); break; case 'length': - logger.error(`Choice ${choiceIndex}: Stream finished with token length exceeded.`); + logger.error( + `Choice ${choiceIndex}: Stream finished with token length exceeded.` + ); break; case 'stop': logger.debug(`Choice ${choiceIndex}: Stream finished.`); @@ -72,7 +76,7 @@ export class AzureOpenAiChatCompletionStream extends SseStream { ); } } - }; + } }); yield chunk; } @@ -81,7 +85,7 @@ export class AzureOpenAiChatCompletionStream extends SseStream { /** * @internal */ - static async * _processTokenUsage( + static async *_processTokenUsage( stream: AzureOpenAiChatCompletionStream, response?: AzureOpenAiChatCompletionStreamResponse ): AsyncGenerator { @@ -129,7 +133,7 @@ export class AzureOpenAiChatCompletionStream extends SseStream { * @param choiceIndex - The index of the choice to parse. * @internal */ - async * toContentStream( + async *toContentStream( this: AzureOpenAiChatCompletionStream, choiceIndex = 0 ): AsyncGenerator { diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 4a00acf6..c290dc41 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -93,48 +93,48 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { } }); -app.get('/azure-openai/chat-completion-stream-multiple-choices', async (req, res) => { - try { - const response = await chatCompletionStreamMultipleChoices(); - - res.setHeader('Cache-Control', 'no-cache'); - res.setHeader('Content-Type', 'text/event-stream'); - res.setHeader('Access-Control-Allow-Origin', '*'); - res.setHeader('Connection', 'keep-alive'); - res.flushHeaders(); - - let connectionAlive = true; +app.get( + '/azure-openai/chat-completion-stream-multiple-choices', + async (req, res) => { + try { + const response = await chatCompletionStreamMultipleChoices(); + + res.setHeader('Cache-Control', 'no-cache'); + res.setHeader('Content-Type', 'text/event-stream'); + res.setHeader('Access-Control-Allow-Origin', '*'); + res.setHeader('Connection', 'keep-alive'); + res.flushHeaders(); + + let connectionAlive = true; + + res.on('close', () => { + connectionAlive = false; + res.end(); + }); + + for await (const chunk of response.stream.toContentStream(1)) { + if (!connectionAlive) { + break; + } + res.write(chunk); + } - res.on('close', () => { - connectionAlive = false; + res.write('\n\n---------------------------\n'); + res.write(`Finish reason: ${response.finishReasons.get(1)}\n`); + res.write('Token usage:\n'); + res.write(` - Completion tokens: ${response.usage.completion_tokens}\n`); + res.write(` - Prompt tokens: ${response.usage.prompt_tokens}\n`); + res.write(` - Total tokens: ${response.usage.total_tokens}\n`); + } catch (error: any) { + console.error(error); + res + .status(500) + .send('Yikes, vibes are off apparently 😬 -> ' + error.message); + } finally { res.end(); - }); - - for await (const chunk of response.stream.toContentStream(1)) { - if (!connectionAlive) { - break; - } - res.write(chunk); } - - res.write('\n\n---------------------------\n'); - res.write(`Finish reason: ${response.finishReasons.get(1)}\n`); - res.write('Token usage:\n'); - res.write(` - Completion tokens: ${response.usage.completion_tokens}\n`); - res.write(` - Prompt tokens: ${response.usage.prompt_tokens}\n`); - res.write(` - Total tokens: ${response.usage.total_tokens}\n`); - - - } catch (error: any) { - console.error(error); - res - .status(500) - .send('Yikes, vibes are off apparently 😬 -> ' + error.message); - } finally { - res.end(); } -}); - +); app.get('/azure-openai/embedding', async (req, res) => { try { From 0510c2a550dbd3460fd6b7c4ef0bfdccba972201 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 11 Nov 2024 13:09:45 +0100 Subject: [PATCH 36/57] fix: add abortcontroler and fix sample code --- .../azure-openai/azure-openai-chat-client.ts | 6 +- .../azure-openai-chat-completion-stream.ts | 21 +++--- .../azure-openai/stream/sse-stream.test.ts | 20 +++--- .../src/azure-openai/stream/sse-stream.ts | 67 ++++++++++++------- .../src/foundation-models/azure-openai.ts | 8 +-- sample-code/src/server.ts | 37 ++++++---- 6 files changed, 98 insertions(+), 61 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index 52b37393..6155e236 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -44,13 +44,14 @@ export class AzureOpenAiChatClient { */ async stream( data: AzureOpenAiCreateChatCompletionRequest, + controller = new AbortController(), requestConfig?: CustomRequestConfig ): Promise< AzureOpenAiChatCompletionStreamResponse > { const response = new AzureOpenAiChatCompletionStreamResponse(); - response.stream = (await this.createStream(data, requestConfig)) + response.stream = (await this.createStream(data, controller, requestConfig)) ._pipe(AzureOpenAiChatCompletionStream._processChunk) ._pipe(AzureOpenAiChatCompletionStream._processFinishReason, response) ._pipe(AzureOpenAiChatCompletionStream._processTokenUsage, response); @@ -79,6 +80,7 @@ export class AzureOpenAiChatClient { private async createStream( data: AzureOpenAiCreateChatCompletionRequest, + controller: AbortController, requestConfig?: CustomRequestConfig ): Promise> { const response = await this.executeRequest( @@ -94,6 +96,6 @@ export class AzureOpenAiChatClient { responseType: 'stream' } ); - return AzureOpenAiChatCompletionStream._create(response); + return AzureOpenAiChatCompletionStream._create(response, controller); } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 5466e4f0..6263cb19 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -20,11 +20,12 @@ export class AzureOpenAiChatCompletionStream extends SseStream { * @internal */ public static _create( - response: HttpResponse + response: HttpResponse, + controller: AbortController ): AzureOpenAiChatCompletionStream { // TODO: Change `any` to `CreateChatCompletionStreamResponse` once the preview spec becomes stable. - const stream = SseStream.fromSSEResponse(response); // TODO: Change `any` to `CreateChatCompletionStreamResponse` once the preview spec becomes stable. - return new AzureOpenAiChatCompletionStream(stream.iterator); + const stream = SseStream.fromSSEResponse(response, controller); // TODO: Change `any` to `CreateChatCompletionStreamResponse` once the preview spec becomes stable. + return new AzureOpenAiChatCompletionStream(stream.iterator, controller); } /** @@ -101,8 +102,11 @@ export class AzureOpenAiChatCompletionStream extends SseStream { } } - constructor(public iterator: () => AsyncIterator) { - super(iterator); + constructor( + public iterator: () => AsyncIterator, + controller: AbortController + ) { + super(iterator, controller); } /** @@ -120,11 +124,12 @@ export class AzureOpenAiChatCompletionStream extends SseStream { response?: AzureOpenAiChatCompletionStreamResponse ): AzureOpenAiChatCompletionStream { if (response) { - return new AzureOpenAiChatCompletionStream(() => - processFn(this, response) + return new AzureOpenAiChatCompletionStream( + () => processFn(this, response), + this.controller ); } - return new AzureOpenAiChatCompletionStream(() => processFn(this)); + return new AzureOpenAiChatCompletionStream(() => processFn(this), this.controller); } /** diff --git a/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts b/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts index 27e79f6a..95ec327b 100644 --- a/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts @@ -1,5 +1,5 @@ import assert from 'assert'; -import { _iterSSEMessages, _decodeChunks as decodeChunks } from './sse-stream'; +import { _iterSSEMessages, _decodeChunks as decodeChunks } from './sse-stream.js'; describe('line decoder', () => { test('basic', () => { @@ -49,7 +49,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -69,7 +69,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -90,7 +90,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -113,7 +113,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -143,7 +143,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -174,7 +174,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -197,7 +197,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -224,7 +224,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); @@ -263,7 +263,7 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response)[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); let event = await stream.next(); assert(event.value); diff --git a/packages/foundation-models/src/azure-openai/stream/sse-stream.ts b/packages/foundation-models/src/azure-openai/stream/sse-stream.ts index 5fbea69c..aeb0a4e7 100644 --- a/packages/foundation-models/src/azure-openai/stream/sse-stream.ts +++ b/packages/foundation-models/src/azure-openai/stream/sse-stream.ts @@ -16,8 +16,11 @@ type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; * @internal */ export class SseStream implements AsyncIterable { + controller: AbortController; + protected static fromSSEResponse( - response: HttpResponse + response: HttpResponse, + controller: AbortController ): SseStream { let consumed = false; @@ -27,37 +30,53 @@ export class SseStream implements AsyncIterable { } consumed = true; let done = false; - for await (const sse of _iterSSEMessages(response)) { - if (done) { - continue; - } - if (sse.data.startsWith('[DONE]')) { - done = true; - continue; - } - - try { - const data = JSON.parse(sse.data); + try { + for await (const sse of _iterSSEMessages(response, controller)) { + if (done) { + continue; + } - if (data?.error) { - throw new Error(data.error); + if (sse.data.startsWith('[DONE]')) { + done = true; + continue; } - yield sse.event === null ? data : ({ event: sse.event, data } as any); - } catch (error: any) { - logger.error(`Could not parse message into JSON: ${sse.data}`); - logger.error(`From chunk: ${sse.raw}`); - throw error; + try { + const data = JSON.parse(sse.data); + if (data?.error) { + throw new Error(data.error); + } + yield sse.event === null ? data : ({ event: sse.event, data } as any); + } catch (e: any) { + logger.error(`Could not parse message into JSON: ${sse.data}`); + logger.error(`From chunk: ${sse.raw}`); + throw e; + } + } + done = true; + } catch (e: any) { + if (e instanceof Error && e.name === 'AbortError') { + return; + } else { + logger.error('Error while iterating over SSE stream:', e); + } + } finally { + if (!done) { + controller.abort(); } } - done = true; } - return new SseStream(iterator); + return new SseStream(iterator, controller); } - constructor(public iterator: () => AsyncIterator) {} + constructor( + public iterator: () => AsyncIterator, + controller: AbortController + ) { + this.controller = controller; + } [Symbol.asyncIterator](): AsyncIterator { return this.iterator(); @@ -68,9 +87,11 @@ export class SseStream implements AsyncIterable { * @internal */ export async function* _iterSSEMessages( - response: HttpResponse + response: HttpResponse, + controller: AbortController ): AsyncGenerator { if (!response.data) { + controller.abort(); throw new Error('Attempted to iterate over a response with no body'); } diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index 96dbf1fe..33770ed3 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -34,7 +34,7 @@ export async function chatCompletion(): Promise > { const response = await new AzureOpenAiChatClient('gpt-35-turbo').stream({ @@ -44,7 +44,7 @@ export async function chatCompletionStream(): Promise< content: 'Give me a very long introduction of SAP Cloud SDK.' } ] - }); + }, controller); return response; } @@ -52,7 +52,7 @@ export async function chatCompletionStream(): Promise< * Ask Azure OpenAI model about the capital of France with streaming. * @returns The response from Azure OpenAI containing the response content. */ -export async function chatCompletionStreamMultipleChoices(): Promise< +export async function chatCompletionStreamMultipleChoices(controller: AbortController): Promise< AzureOpenAiChatCompletionStreamResponse > { const response = await new AzureOpenAiChatClient('gpt-35-turbo').stream({ @@ -63,7 +63,7 @@ export async function chatCompletionStreamMultipleChoices(): Promise< } ], n: 2 - }); + }, controller); return response; } diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index c290dc41..2bdc351f 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -54,8 +54,10 @@ app.get('/azure-openai/chat-completion', async (req, res) => { }); app.get('/azure-openai/chat-completion-stream', async (req, res) => { + const controller = new AbortController(); try { - const response = await chatCompletionStream(); + + const response = await chatCompletionStream(controller); res.setHeader('Cache-Control', 'no-cache'); res.setHeader('Content-Type', 'text/event-stream'); @@ -66,6 +68,7 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { let connectionAlive = true; res.on('close', () => { + controller.abort(); connectionAlive = false; res.end(); }); @@ -77,12 +80,14 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { res.write(chunk); } - res.write('\n\n---------------------------\n'); - res.write(`Finish reason: ${response.finishReasons.get(0)}\n`); - res.write('Token usage:\n'); - res.write(` - Completion tokens: ${response.usage.completion_tokens}\n`); - res.write(` - Prompt tokens: ${response.usage.prompt_tokens}\n`); - res.write(` - Total tokens: ${response.usage.total_tokens}\n`); + if (connectionAlive) { + res.write('\n\n---------------------------\n'); + res.write(`Finish reason: ${response.finishReasons.get(0)}\n`); + res.write('Token usage:\n'); + res.write(` - Completion tokens: ${response.usage.completion_tokens}\n`); + res.write(` - Prompt tokens: ${response.usage.prompt_tokens}\n`); + res.write(` - Total tokens: ${response.usage.total_tokens}\n`); + } } catch (error: any) { console.error(error); res @@ -96,8 +101,9 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { app.get( '/azure-openai/chat-completion-stream-multiple-choices', async (req, res) => { + const controller = new AbortController(); try { - const response = await chatCompletionStreamMultipleChoices(); + const response = await chatCompletionStreamMultipleChoices(controller); res.setHeader('Cache-Control', 'no-cache'); res.setHeader('Content-Type', 'text/event-stream'); @@ -108,6 +114,7 @@ app.get( let connectionAlive = true; res.on('close', () => { + controller.abort(); connectionAlive = false; res.end(); }); @@ -119,12 +126,14 @@ app.get( res.write(chunk); } - res.write('\n\n---------------------------\n'); - res.write(`Finish reason: ${response.finishReasons.get(1)}\n`); - res.write('Token usage:\n'); - res.write(` - Completion tokens: ${response.usage.completion_tokens}\n`); - res.write(` - Prompt tokens: ${response.usage.prompt_tokens}\n`); - res.write(` - Total tokens: ${response.usage.total_tokens}\n`); + if (connectionAlive) { + res.write('\n\n---------------------------\n'); + res.write(`Finish reason: ${response.finishReasons.get(1)}\n`); + res.write('Token usage:\n'); + res.write(` - Completion tokens: ${response.usage.completion_tokens}\n`); + res.write(` - Prompt tokens: ${response.usage.prompt_tokens}\n`); + res.write(` - Total tokens: ${response.usage.total_tokens}\n`); + } } catch (error: any) { console.error(error); res From 2bf0e7e5672225c809f909e7d318f861694d3cde Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 11 Nov 2024 13:16:38 +0100 Subject: [PATCH 37/57] fix: add controller signal to axios --- .../azure-openai/azure-openai-chat-client.ts | 6 ++- .../azure-openai-chat-completion-stream.ts | 5 +- .../azure-openai/stream/sse-stream.test.ts | 41 +++++++++++---- .../src/azure-openai/stream/sse-stream.ts | 15 +++--- .../src/foundation-models/azure-openai.ts | 50 ++++++++++++------- sample-code/src/server.ts | 5 +- 6 files changed, 81 insertions(+), 41 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index 6155e236..d07dbc89 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -39,6 +39,7 @@ export class AzureOpenAiChatClient { /** * Creates a completion stream for the chat messages. * @param data - The input parameters for the chat completion. + * @param controller - The abort controller. * @param requestConfig - The request configuration. * @returns The completion stream. */ @@ -71,7 +72,7 @@ export class AzureOpenAiChatClient { { url: `/inference/deployments/${deploymentId}/chat/completions`, apiVersion, - resourceGroup + resourceGroup, }, data, requestConfig @@ -93,7 +94,8 @@ export class AzureOpenAiChatClient { }, { ...requestConfig, - responseType: 'stream' + responseType: 'stream', + signal: controller.signal } ); return AzureOpenAiChatCompletionStream._create(response, controller); diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 6263cb19..5cc00b56 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -129,7 +129,10 @@ export class AzureOpenAiChatCompletionStream extends SseStream { this.controller ); } - return new AzureOpenAiChatCompletionStream(() => processFn(this), this.controller); + return new AzureOpenAiChatCompletionStream( + () => processFn(this), + this.controller + ); } /** diff --git a/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts b/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts index 95ec327b..042b5ebc 100644 --- a/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts @@ -1,5 +1,8 @@ import assert from 'assert'; -import { _iterSSEMessages, _decodeChunks as decodeChunks } from './sse-stream.js'; +import { + _iterSSEMessages, + _decodeChunks as decodeChunks +} from './sse-stream.js'; describe('line decoder', () => { test('basic', () => { @@ -49,7 +52,9 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[ + Symbol.asyncIterator + ](); let event = await stream.next(); assert(event.value); @@ -69,7 +74,9 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[ + Symbol.asyncIterator + ](); let event = await stream.next(); assert(event.value); @@ -90,7 +97,9 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[ + Symbol.asyncIterator + ](); let event = await stream.next(); assert(event.value); @@ -113,7 +122,9 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[ + Symbol.asyncIterator + ](); let event = await stream.next(); assert(event.value); @@ -143,7 +154,9 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[ + Symbol.asyncIterator + ](); let event = await stream.next(); assert(event.value); @@ -174,7 +187,9 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[ + Symbol.asyncIterator + ](); let event = await stream.next(); assert(event.value); @@ -197,7 +212,9 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[ + Symbol.asyncIterator + ](); let event = await stream.next(); assert(event.value); @@ -224,7 +241,9 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[ + Symbol.asyncIterator + ](); let event = await stream.next(); assert(event.value); @@ -263,7 +282,9 @@ describe('streaming decoding', () => { data: body() } as any; - const stream = _iterSSEMessages(response, new AbortController())[Symbol.asyncIterator](); + const stream = _iterSSEMessages(response, new AbortController())[ + Symbol.asyncIterator + ](); let event = await stream.next(); assert(event.value); diff --git a/packages/foundation-models/src/azure-openai/stream/sse-stream.ts b/packages/foundation-models/src/azure-openai/stream/sse-stream.ts index aeb0a4e7..1ebd6178 100644 --- a/packages/foundation-models/src/azure-openai/stream/sse-stream.ts +++ b/packages/foundation-models/src/azure-openai/stream/sse-stream.ts @@ -6,7 +6,7 @@ import type { HttpResponse } from '@sap-cloud-sdk/http-client'; const logger = createLogger({ package: 'foundation-models', - messageContext: 'azure-openai-stream' + messageContext: 'azure-openai-sse-stream' }); type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; @@ -16,8 +16,6 @@ type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; * @internal */ export class SseStream implements AsyncIterable { - controller: AbortController; - protected static fromSSEResponse( response: HttpResponse, controller: AbortController @@ -47,7 +45,9 @@ export class SseStream implements AsyncIterable { if (data?.error) { throw new Error(data.error); } - yield sse.event === null ? data : ({ event: sse.event, data } as any); + yield sse.event === null + ? data + : ({ event: sse.event, data } as any); } catch (e: any) { logger.error(`Could not parse message into JSON: ${sse.data}`); logger.error(`From chunk: ${sse.raw}`); @@ -56,11 +56,10 @@ export class SseStream implements AsyncIterable { } done = true; } catch (e: any) { - if (e instanceof Error && e.name === 'AbortError') { + if (e instanceof Error && e.name === 'CanceledError') { return; - } else { - logger.error('Error while iterating over SSE stream:', e); } + logger.error('Error while iterating over SSE stream:', e); } finally { if (!done) { controller.abort(); @@ -71,6 +70,8 @@ export class SseStream implements AsyncIterable { return new SseStream(iterator, controller); } + controller: AbortController; + constructor( public iterator: () => AsyncIterator, controller: AbortController diff --git a/sample-code/src/foundation-models/azure-openai.ts b/sample-code/src/foundation-models/azure-openai.ts index 33770ed3..86494f5b 100644 --- a/sample-code/src/foundation-models/azure-openai.ts +++ b/sample-code/src/foundation-models/azure-openai.ts @@ -32,38 +32,50 @@ export async function chatCompletion(): Promise > { - const response = await new AzureOpenAiChatClient('gpt-35-turbo').stream({ - messages: [ - { - role: 'user', - content: 'Give me a very long introduction of SAP Cloud SDK.' - } - ] - }, controller); + const response = await new AzureOpenAiChatClient('gpt-35-turbo').stream( + { + messages: [ + { + role: 'user', + content: 'Give me a very long introduction of SAP Cloud SDK.' + } + ] + }, + controller + ); return response; } /** * Ask Azure OpenAI model about the capital of France with streaming. + * @param controller - The abort controller. * @returns The response from Azure OpenAI containing the response content. */ -export async function chatCompletionStreamMultipleChoices(controller: AbortController): Promise< +export async function chatCompletionStreamMultipleChoices( + controller: AbortController +): Promise< AzureOpenAiChatCompletionStreamResponse > { - const response = await new AzureOpenAiChatClient('gpt-35-turbo').stream({ - messages: [ - { - role: 'user', - content: 'Give me a very long introduction of SAP Cloud SDK.' - } - ], - n: 2 - }, controller); + const response = await new AzureOpenAiChatClient('gpt-35-turbo').stream( + { + messages: [ + { + role: 'user', + content: 'Give me a very long introduction of SAP Cloud SDK.' + } + ], + n: 2 + }, + controller + ); return response; } diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 2bdc351f..5a6799c2 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -56,7 +56,6 @@ app.get('/azure-openai/chat-completion', async (req, res) => { app.get('/azure-openai/chat-completion-stream', async (req, res) => { const controller = new AbortController(); try { - const response = await chatCompletionStream(controller); res.setHeader('Cache-Control', 'no-cache'); @@ -130,7 +129,9 @@ app.get( res.write('\n\n---------------------------\n'); res.write(`Finish reason: ${response.finishReasons.get(1)}\n`); res.write('Token usage:\n'); - res.write(` - Completion tokens: ${response.usage.completion_tokens}\n`); + res.write( + ` - Completion tokens: ${response.usage.completion_tokens}\n` + ); res.write(` - Prompt tokens: ${response.usage.prompt_tokens}\n`); res.write(` - Total tokens: ${response.usage.total_tokens}\n`); } From 050d0db28cc3bba3b1427f73ab356df3f1c19f80 Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Mon, 11 Nov 2024 12:17:29 +0000 Subject: [PATCH 38/57] fix: Changes from lint --- .../src/azure-openai/azure-openai-chat-client.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts index d07dbc89..df50913f 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.ts @@ -72,7 +72,7 @@ export class AzureOpenAiChatClient { { url: `/inference/deployments/${deploymentId}/chat/completions`, apiVersion, - resourceGroup, + resourceGroup }, data, requestConfig From 1399a910418e7fc8ecc7d875846a93f4e736e0cb Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 11 Nov 2024 13:46:15 +0100 Subject: [PATCH 39/57] chore: add unit test for stream() --- .../azure-openai-chat-client.test.ts | 45 +++++++++++++++++++ ...pletion-stream-chunk-response-initial.json | 1 + 2 files changed, 46 insertions(+) create mode 100644 test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-initial.json diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts index a4a41f6a..4d6d4d8a 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts @@ -3,11 +3,13 @@ import { mockClientCredentialsGrantCall, mockDeploymentsList, mockInference, + parseFileToString, parseMockResponse } from '../../../../test-util/mock-http.js'; import { AzureOpenAiChatClient } from './azure-openai-chat-client.js'; import { apiVersion } from './model-types.js'; import type { AzureOpenAiCreateChatCompletionResponse } from './client/inference/schema/index.js'; +import { stream } from '@sap/cds'; describe('Azure OpenAI chat client', () => { const chatCompletionEndpoint = { @@ -127,4 +129,47 @@ describe('Azure OpenAI chat client', () => { const response = await clientWithResourceGroup.run(prompt); expect(response.data).toEqual(mockResponse); }); + + it('executes a streaming request with correct chunk response', async () => { + const prompt = { + messages: [ + { + role: 'user' as const, + content: 'Where is the deepest place on earth located' + } + ], + stream: true, + stream_options: { + include_usage: true + } + }; + + const mockResponse = + await parseFileToString( + 'foundation-models', + 'azure-openai-chat-completion-stream-chunks.txt' + ); + + mockInference( + { + data: prompt + }, + { + data: mockResponse, + status: 200 + }, + chatCompletionEndpoint + ); + + const initialResponse = await parseFileToString( + 'foundation-models', + 'azure-openai-chat-completion-stream-chunk-response-initial.json' + ); + + const response = await client.stream(prompt); + for await (const chunk of response.stream) { + expect(JSON.stringify(chunk.data)).toEqual(initialResponse); + break; + } + }); }); diff --git a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-initial.json b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-initial.json new file mode 100644 index 00000000..af38e868 --- /dev/null +++ b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-initial.json @@ -0,0 +1 @@ +{"choices":[],"created":0,"id":"","model":"","object":"","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}]} \ No newline at end of file From ad6551890bd7dce235486fca872fc878e5d8cacb Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Mon, 11 Nov 2024 12:47:12 +0000 Subject: [PATCH 40/57] fix: Changes from lint --- .../src/azure-openai/azure-openai-chat-client.test.ts | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts index 4d6d4d8a..d7bad864 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts @@ -9,7 +9,6 @@ import { import { AzureOpenAiChatClient } from './azure-openai-chat-client.js'; import { apiVersion } from './model-types.js'; import type { AzureOpenAiCreateChatCompletionResponse } from './client/inference/schema/index.js'; -import { stream } from '@sap/cds'; describe('Azure OpenAI chat client', () => { const chatCompletionEndpoint = { @@ -144,11 +143,10 @@ describe('Azure OpenAI chat client', () => { } }; - const mockResponse = - await parseFileToString( - 'foundation-models', - 'azure-openai-chat-completion-stream-chunks.txt' - ); + const mockResponse = await parseFileToString( + 'foundation-models', + 'azure-openai-chat-completion-stream-chunks.txt' + ); mockInference( { From 2a940b1ea9661770209c06c58876d7c698031a77 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 11 Nov 2024 14:19:19 +0100 Subject: [PATCH 41/57] fix: stream finish reason index 0 --- ...zure-openai-chat-completion-stream.test.ts | 31 +++++-------------- .../azure-openai-chat-completion-stream.ts | 2 +- 2 files changed, 8 insertions(+), 25 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts index e628280c..aa6205f2 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts @@ -32,7 +32,8 @@ describe('OpenAI chat completion stream', () => { } } originalChatCompletionStream = new AzureOpenAiChatCompletionStream( - iterator + iterator, + new AbortController() ); }); @@ -59,32 +60,13 @@ describe('OpenAI chat completion stream', () => { ); const asyncGeneratorFinishReason = AzureOpenAiChatCompletionStream._processFinishReason( - new AzureOpenAiChatCompletionStream(() => asyncGeneratorChunk) + new AzureOpenAiChatCompletionStream(() => asyncGeneratorChunk, new AbortController()) ); for await (const chunk of asyncGeneratorFinishReason) { expect(chunk).toBeDefined(); } - expect(debugSpy).toHaveBeenCalledWith('Stream finished.'); - }); - it('should process the finish reason', async () => { - const logger = createLogger({ - package: 'foundation-models', - messageContext: 'azure-openai-chat-completion-stream' - }); - const debugSpy = jest.spyOn(logger, 'debug'); - const asyncGeneratorChunk = AzureOpenAiChatCompletionStream._processChunk( - originalChatCompletionStream - ); - const asyncGeneratorFinishReason = - AzureOpenAiChatCompletionStream._processFinishReason( - new AzureOpenAiChatCompletionStream(() => asyncGeneratorChunk) - ); - - for await (const chunk of asyncGeneratorFinishReason) { - expect(chunk).toBeDefined(); - } - expect(debugSpy).toHaveBeenCalledWith('Stream finished.'); + expect(debugSpy).toHaveBeenCalledWith(`Choice 0: Stream finished.`); }); it('should process the token usage', async () => { @@ -98,7 +80,7 @@ describe('OpenAI chat completion stream', () => { ); const asyncGeneratorTokenUsage = AzureOpenAiChatCompletionStream._processTokenUsage( - new AzureOpenAiChatCompletionStream(() => asyncGeneratorChunk) + new AzureOpenAiChatCompletionStream(() => asyncGeneratorChunk, new AbortController()) ); for await (const chunk of asyncGeneratorTokenUsage) { @@ -114,7 +96,8 @@ describe('OpenAI chat completion stream', () => { originalChatCompletionStream ); const chunkStream = new AzureOpenAiChatCompletionStream( - () => asyncGeneratorChunk + () => asyncGeneratorChunk, + new AbortController() ); let output = ''; diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 5cc00b56..6654126c 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -51,7 +51,7 @@ export class AzureOpenAiChatCompletionStream extends SseStream { for await (const chunk of stream) { chunk.data.choices.forEach((choice: any) => { const choiceIndex = choice.index; - if (choiceIndex) { + if (choiceIndex !== undefined && choiceIndex !== null) { const finishReason = chunk.getFinishReason(choiceIndex); if (finishReason) { if (response) { From 8bc6364b8399f56bbfd1a6f884d4a9c4c29452cc Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 11 Nov 2024 14:19:58 +0100 Subject: [PATCH 42/57] lint --- .../azure-openai-chat-completion-stream.test.ts | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts index aa6205f2..f4a058bc 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts @@ -60,13 +60,16 @@ describe('OpenAI chat completion stream', () => { ); const asyncGeneratorFinishReason = AzureOpenAiChatCompletionStream._processFinishReason( - new AzureOpenAiChatCompletionStream(() => asyncGeneratorChunk, new AbortController()) + new AzureOpenAiChatCompletionStream( + () => asyncGeneratorChunk, + new AbortController() + ) ); for await (const chunk of asyncGeneratorFinishReason) { expect(chunk).toBeDefined(); } - expect(debugSpy).toHaveBeenCalledWith(`Choice 0: Stream finished.`); + expect(debugSpy).toHaveBeenCalledWith('Choice 0: Stream finished.'); }); it('should process the token usage', async () => { @@ -80,7 +83,10 @@ describe('OpenAI chat completion stream', () => { ); const asyncGeneratorTokenUsage = AzureOpenAiChatCompletionStream._processTokenUsage( - new AzureOpenAiChatCompletionStream(() => asyncGeneratorChunk, new AbortController()) + new AzureOpenAiChatCompletionStream( + () => asyncGeneratorChunk, + new AbortController() + ) ); for await (const chunk of asyncGeneratorTokenUsage) { From 841d45270a5dbd0e20da71df858916aae6caaf59 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 11 Nov 2024 14:23:48 +0100 Subject: [PATCH 43/57] fix: type test --- tests/type-tests/test/azure-openai.test-d.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/type-tests/test/azure-openai.test-d.ts b/tests/type-tests/test/azure-openai.test-d.ts index 1dfbe4b7..49060c5b 100644 --- a/tests/type-tests/test/azure-openai.test-d.ts +++ b/tests/type-tests/test/azure-openai.test-d.ts @@ -43,7 +43,7 @@ expectType( ).getContent() ); -expectType( +expectType( ( await new AzureOpenAiChatClient('gpt-4').run({ messages: [{ role: 'user', content: 'test prompt' }] From 39675b594f160c74c84f42a09b6bb047afcd527d Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 11 Nov 2024 14:38:31 +0100 Subject: [PATCH 44/57] fix: make toContentStream return AzureOpenAiChatCompletionStream --- .../azure-openai-chat-completion-stream.ts | 21 ++++++++++++--- tests/type-tests/test/azure-openai.test-d.ts | 26 ++++++++++++++++++- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 6654126c..d154f496 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -137,15 +137,15 @@ export class AzureOpenAiChatCompletionStream extends SseStream { /** * Transform a stream of chunks into a stream of content strings. - * @param this - Chat completion stream. + * @param stream - Chat completion stream. * @param choiceIndex - The index of the choice to parse. * @internal */ - async *toContentStream( - this: AzureOpenAiChatCompletionStream, + static async *_processContentStream( + stream: AzureOpenAiChatCompletionStream, choiceIndex = 0 ): AsyncGenerator { - for await (const chunk of this) { + for await (const chunk of stream) { const deltaContent = chunk.getDeltaContent(choiceIndex); if (!deltaContent) { continue; @@ -153,4 +153,17 @@ export class AzureOpenAiChatCompletionStream extends SseStream { yield deltaContent; } } + + public toContentStream( + this: AzureOpenAiChatCompletionStream, + choiceIndex?: number + ): AzureOpenAiChatCompletionStream { + return new AzureOpenAiChatCompletionStream( + () => AzureOpenAiChatCompletionStream._processContentStream( + this, + choiceIndex + ), + this.controller + ); + } } diff --git a/tests/type-tests/test/azure-openai.test-d.ts b/tests/type-tests/test/azure-openai.test-d.ts index 49060c5b..caf9375b 100644 --- a/tests/type-tests/test/azure-openai.test-d.ts +++ b/tests/type-tests/test/azure-openai.test-d.ts @@ -6,7 +6,10 @@ import { AzureOpenAiEmbeddingClient, AzureOpenAiChatCompletionResponse, AzureOpenAiCreateChatCompletionResponse, - AzureOpenAiCompletionUsage + AzureOpenAiCompletionUsage, + AzureOpenAiChatCompletionStreamResponse, + AzureOpenAiChatCompletionStreamChunkResponse, + AzureOpenAiChatCompletionStream } from '@sap-ai-sdk/foundation-models'; /** @@ -133,3 +136,24 @@ expectType>( expect('custom-model'); expect('gpt-4-32k'); + +/** + * Streaming. + */ +expectType>>( + new AzureOpenAiChatClient('gpt-4').stream({ + messages: [{ role: 'user', content: 'test prompt' }] + }) +); + +expectType>( + (await new AzureOpenAiChatClient('gpt-4').stream({ + messages: [{ role: 'user', content: 'test prompt' }] + })).stream +); + +expectType>( + (await new AzureOpenAiChatClient('gpt-4').stream({ + messages: [{ role: 'user', content: 'test prompt' }] + })).stream.toContentStream() +); From 658d1bca6cf0412f8f73b58198c420d7568203fc Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 11 Nov 2024 14:41:00 +0100 Subject: [PATCH 45/57] fix: lint --- .../azure-openai-chat-completion-stream.ts | 47 ++++++++++--------- tests/type-tests/test/azure-openai.test-d.ts | 28 +++++++---- 2 files changed, 43 insertions(+), 32 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index d154f496..05c672a5 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -102,6 +102,25 @@ export class AzureOpenAiChatCompletionStream extends SseStream { } } + /** + * Transform a stream of chunks into a stream of content strings. + * @param stream - Chat completion stream. + * @param choiceIndex - The index of the choice to parse. + * @internal + */ + static async *_processContentStream( + stream: AzureOpenAiChatCompletionStream, + choiceIndex = 0 + ): AsyncGenerator { + for await (const chunk of stream) { + const deltaContent = chunk.getDeltaContent(choiceIndex); + if (!deltaContent) { + continue; + } + yield deltaContent; + } + } + constructor( public iterator: () => AsyncIterator, controller: AbortController @@ -135,34 +154,16 @@ export class AzureOpenAiChatCompletionStream extends SseStream { ); } - /** - * Transform a stream of chunks into a stream of content strings. - * @param stream - Chat completion stream. - * @param choiceIndex - The index of the choice to parse. - * @internal - */ - static async *_processContentStream( - stream: AzureOpenAiChatCompletionStream, - choiceIndex = 0 - ): AsyncGenerator { - for await (const chunk of stream) { - const deltaContent = chunk.getDeltaContent(choiceIndex); - if (!deltaContent) { - continue; - } - yield deltaContent; - } - } - public toContentStream( this: AzureOpenAiChatCompletionStream, choiceIndex?: number ): AzureOpenAiChatCompletionStream { return new AzureOpenAiChatCompletionStream( - () => AzureOpenAiChatCompletionStream._processContentStream( - this, - choiceIndex - ), + () => + AzureOpenAiChatCompletionStream._processContentStream( + this, + choiceIndex + ), this.controller ); } diff --git a/tests/type-tests/test/azure-openai.test-d.ts b/tests/type-tests/test/azure-openai.test-d.ts index caf9375b..78eb488c 100644 --- a/tests/type-tests/test/azure-openai.test-d.ts +++ b/tests/type-tests/test/azure-openai.test-d.ts @@ -137,23 +137,33 @@ expectType>( expect('custom-model'); expect('gpt-4-32k'); -/** +/** * Streaming. */ -expectType>>( +expectType< + Promise< + AzureOpenAiChatCompletionStreamResponse + > +>( new AzureOpenAiChatClient('gpt-4').stream({ messages: [{ role: 'user', content: 'test prompt' }] }) ); -expectType>( - (await new AzureOpenAiChatClient('gpt-4').stream({ - messages: [{ role: 'user', content: 'test prompt' }] - })).stream +expectType< + AzureOpenAiChatCompletionStream +>( + ( + await new AzureOpenAiChatClient('gpt-4').stream({ + messages: [{ role: 'user', content: 'test prompt' }] + }) + ).stream ); expectType>( - (await new AzureOpenAiChatClient('gpt-4').stream({ - messages: [{ role: 'user', content: 'test prompt' }] - })).stream.toContentStream() + ( + await new AzureOpenAiChatClient('gpt-4').stream({ + messages: [{ role: 'user', content: 'test prompt' }] + }) + ).stream.toContentStream() ); From df6ee3fc55078d33265f091db130299a006c48ad Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 11 Nov 2024 15:46:20 +0100 Subject: [PATCH 46/57] feat: throw if sse payload invalid --- .../src/azure-openai/stream/sse-decoder.ts | 2 ++ .../azure-openai/stream/sse-stream.test.ts | 24 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/packages/foundation-models/src/azure-openai/stream/sse-decoder.ts b/packages/foundation-models/src/azure-openai/stream/sse-decoder.ts index 6ca2264c..e007540e 100644 --- a/packages/foundation-models/src/azure-openai/stream/sse-decoder.ts +++ b/packages/foundation-models/src/azure-openai/stream/sse-decoder.ts @@ -73,6 +73,8 @@ export class SSEDecoder { this.event = trimedValue; } else if (fieldname === 'data') { this.data.push(trimedValue); + } else { + throw new Error(`Invalid SSE payload: ${line}`); } return null; diff --git a/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts b/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts index 042b5ebc..1c252a08 100644 --- a/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts @@ -294,4 +294,28 @@ describe('streaming decoding', () => { event = await stream.next(); expect(event.done).toBeTruthy(); }); + + test('invalid payload', async () => { + async function* body(): AsyncGenerator { + yield Buffer.from('data: {"content": "culpa "}\n'); + yield Buffer.from('\n'); + yield Buffer.from('{"error":"Something went wrong"}\n'); + } + + const response = { + data: body() + } as any; + + const stream = _iterSSEMessages(response, new AbortController())[ + Symbol.asyncIterator + ](); + + let event = await stream.next(); + assert(event.value); + expect(event.value.event).toBeNull(); + expect(JSON.parse(event.value.data)).toEqual({"content": "culpa "}); + + expect(stream.next()).rejects.toThrow('Invalid SSE payload: {"error":"Something went wrong"}'); + }); + }); From d3ba1d8a8de0787bd129168dd4aae8a295fb4bb0 Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Mon, 11 Nov 2024 14:47:12 +0000 Subject: [PATCH 47/57] fix: Changes from lint --- .../src/azure-openai/stream/sse-stream.test.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts b/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts index 1c252a08..9c1561e1 100644 --- a/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts @@ -310,12 +310,13 @@ describe('streaming decoding', () => { Symbol.asyncIterator ](); - let event = await stream.next(); + const event = await stream.next(); assert(event.value); expect(event.value.event).toBeNull(); - expect(JSON.parse(event.value.data)).toEqual({"content": "culpa "}); + expect(JSON.parse(event.value.data)).toEqual({ content: 'culpa ' }); - expect(stream.next()).rejects.toThrow('Invalid SSE payload: {"error":"Something went wrong"}'); + expect(stream.next()).rejects.toThrow( + 'Invalid SSE payload: {"error":"Something went wrong"}' + ); }); - }); From 819692f890234f445945ab2cf9e56541e1d60a3a Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Tue, 12 Nov 2024 15:16:59 +0100 Subject: [PATCH 48/57] refactor: interface --- ...i-chat-completion-stream-chunk-response.ts | 4 ++- ...-openai-chat-completion-stream-response.ts | 31 +++++++++++++------ .../azure-openai-chat-completion-stream.ts | 4 +-- sample-code/src/server.ts | 27 ++++++++-------- 4 files changed, 41 insertions(+), 25 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts index 2c8ef451..81b788af 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts @@ -1,3 +1,5 @@ +import { AzureOpenAiCompletionUsage } from "./client/inference/schema/completion-usage.js"; + /** * Azure OpenAI chat completion stream chunk response. */ @@ -11,7 +13,7 @@ export class AzureOpenAiChatCompletionStreamChunkResponse { * Usage of tokens in the chunk response. * @returns Token usage. */ - getTokenUsage(): this['data']['usage'] { + getTokenUsage(): AzureOpenAiCompletionUsage { return this.data.usage; } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts index 2024ef25..074a042f 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts @@ -9,33 +9,46 @@ export class AzureOpenAiChatCompletionStreamResponse { private _finishReasons: Map = new Map(); private _stream: AzureOpenAiChatCompletionStream | undefined; - public get usage(): AzureOpenAiCompletionUsage { - if (!this._usage) { - throw new Error('Response stream is undefined.'); - } + public getTokenUsage(): AzureOpenAiCompletionUsage | undefined { return this._usage; } - public set usage(usage: AzureOpenAiCompletionUsage) { + /** + * @internal + */ + _setTokenUsage(usage: AzureOpenAiCompletionUsage) { this._usage = usage; } - public get finishReasons(): Map { + public getFinishReason(choiceIndex = 0): string | undefined | null { + return this._finishReasons.get(choiceIndex); + } + + /** + * @internal + */ + _getFinishReasons() { return this._finishReasons; } - public set finishReasons(finishReasons: Map) { + /** + * @internal + */ + _setFinishReasons(finishReasons: Map) { this._finishReasons = finishReasons; } - public get stream(): AzureOpenAiChatCompletionStream { + getStream(): AzureOpenAiChatCompletionStream { if (!this._stream) { throw new Error('Response stream is undefined.'); } return this._stream; } - public set stream(stream: AzureOpenAiChatCompletionStream) { + /** + * @internal + */ + _setStream(stream: AzureOpenAiChatCompletionStream) { this._stream = stream; } } diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 05c672a5..89c76f35 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -55,7 +55,7 @@ export class AzureOpenAiChatCompletionStream extends SseStream { const finishReason = chunk.getFinishReason(choiceIndex); if (finishReason) { if (response) { - response.finishReasons.set(choiceIndex, finishReason); + response._getFinishReasons().set(choiceIndex, finishReason); } switch (finishReason) { case 'content_filter': @@ -94,7 +94,7 @@ export class AzureOpenAiChatCompletionStream extends SseStream { const usage = chunk.getTokenUsage(); if (usage) { if (response) { - response.usage = usage; + response._setTokenUsage(usage); } logger.debug(`Token usage: ${JSON.stringify(usage)}`); } diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 5a6799c2..93c6a1a7 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -72,7 +72,7 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { res.end(); }); - for await (const chunk of response.stream.toContentStream()) { + for await (const chunk of response.getStream().toContentStream()) { if (!connectionAlive) { break; } @@ -80,12 +80,14 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { } if (connectionAlive) { + const finishReason = response.getFinishReason(); + const tokenUsage = response.getTokenUsage()!; res.write('\n\n---------------------------\n'); - res.write(`Finish reason: ${response.finishReasons.get(0)}\n`); + res.write(`Finish reason: ${finishReason}\n`); res.write('Token usage:\n'); - res.write(` - Completion tokens: ${response.usage.completion_tokens}\n`); - res.write(` - Prompt tokens: ${response.usage.prompt_tokens}\n`); - res.write(` - Total tokens: ${response.usage.total_tokens}\n`); + res.write(` - Completion tokens: ${tokenUsage.completion_tokens}\n`); + res.write(` - Prompt tokens: ${tokenUsage.prompt_tokens}\n`); + res.write(` - Total tokens: ${tokenUsage.total_tokens}\n`); } } catch (error: any) { console.error(error); @@ -118,7 +120,7 @@ app.get( res.end(); }); - for await (const chunk of response.stream.toContentStream(1)) { + for await (const chunk of response.getStream().toContentStream(1)) { if (!connectionAlive) { break; } @@ -126,14 +128,13 @@ app.get( } if (connectionAlive) { + const finishReason = response.getFinishReason(1); + const tokenUsage = response.getTokenUsage()!; res.write('\n\n---------------------------\n'); - res.write(`Finish reason: ${response.finishReasons.get(1)}\n`); - res.write('Token usage:\n'); - res.write( - ` - Completion tokens: ${response.usage.completion_tokens}\n` - ); - res.write(` - Prompt tokens: ${response.usage.prompt_tokens}\n`); - res.write(` - Total tokens: ${response.usage.total_tokens}\n`); + res.write(`Finish reason: ${finishReason}\n`); + res.write(` - Completion tokens: ${tokenUsage.completion_tokens}\n`); + res.write(` - Prompt tokens: ${tokenUsage.prompt_tokens}\n`); + res.write(` - Total tokens: ${tokenUsage.total_tokens}\n`); } } catch (error: any) { console.error(error); From a06cd03d61864c8917f7968a5966db3f7fc66286 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Tue, 12 Nov 2024 15:21:44 +0100 Subject: [PATCH 49/57] refactor --- ...ure-openai-chat-completion-stream-chunk-response.ts | 2 +- .../azure-openai-chat-completion-stream-response.ts | 10 +++++----- sample-code/src/server.ts | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts index 81b788af..906b57d7 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts @@ -1,4 +1,4 @@ -import { AzureOpenAiCompletionUsage } from "./client/inference/schema/completion-usage.js"; +import type { AzureOpenAiCompletionUsage } from './client/inference/schema/index.js'; /** * Azure OpenAI chat completion stream chunk response. diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts index 074a042f..f04a7477 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-response.ts @@ -16,7 +16,7 @@ export class AzureOpenAiChatCompletionStreamResponse { /** * @internal */ - _setTokenUsage(usage: AzureOpenAiCompletionUsage) { + _setTokenUsage(usage: AzureOpenAiCompletionUsage): void { this._usage = usage; } @@ -27,18 +27,18 @@ export class AzureOpenAiChatCompletionStreamResponse { /** * @internal */ - _getFinishReasons() { + _getFinishReasons(): Map { return this._finishReasons; } /** * @internal */ - _setFinishReasons(finishReasons: Map) { + _setFinishReasons(finishReasons: Map): void { this._finishReasons = finishReasons; } - getStream(): AzureOpenAiChatCompletionStream { + get stream(): AzureOpenAiChatCompletionStream { if (!this._stream) { throw new Error('Response stream is undefined.'); } @@ -48,7 +48,7 @@ export class AzureOpenAiChatCompletionStreamResponse { /** * @internal */ - _setStream(stream: AzureOpenAiChatCompletionStream) { + set stream(stream: AzureOpenAiChatCompletionStream) { this._stream = stream; } } diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 93c6a1a7..998fd5f4 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -72,7 +72,7 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { res.end(); }); - for await (const chunk of response.getStream().toContentStream()) { + for await (const chunk of response.stream.toContentStream()) { if (!connectionAlive) { break; } @@ -120,7 +120,7 @@ app.get( res.end(); }); - for await (const chunk of response.getStream().toContentStream(1)) { + for await (const chunk of response.stream.toContentStream(1)) { if (!connectionAlive) { break; } From 862ff0f3f6dab318602c98198c44ca3984ee5897 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Tue, 12 Nov 2024 16:15:17 +0100 Subject: [PATCH 50/57] chore: add changeset --- .changeset/seven-chairs-change.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/seven-chairs-change.md diff --git a/.changeset/seven-chairs-change.md b/.changeset/seven-chairs-change.md new file mode 100644 index 00000000..f72c91a1 --- /dev/null +++ b/.changeset/seven-chairs-change.md @@ -0,0 +1,5 @@ +--- +'@sap-ai-sdk/foundation-models': minor +--- + +[New Functionality] Support streaming for Azure OpenAI chat completion in `foudation-models`. From 18f40b68dd055a2aba496cd46860857f1b51af85 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 13 Nov 2024 13:17:22 +0100 Subject: [PATCH 51/57] chore: improve sample code for streaming --- sample-code/src/server.ts | 54 +++------------------------------------ 1 file changed, 4 insertions(+), 50 deletions(-) diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 998fd5f4..d573056d 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -58,20 +58,21 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { try { const response = await chatCompletionStream(controller); - res.setHeader('Cache-Control', 'no-cache'); + // Set headers for event stream res.setHeader('Content-Type', 'text/event-stream'); - res.setHeader('Access-Control-Allow-Origin', '*'); res.setHeader('Connection', 'keep-alive'); res.flushHeaders(); let connectionAlive = true; + // Abort the stream if the client connection is closed res.on('close', () => { controller.abort(); connectionAlive = false; res.end(); }); + // Stream the delta content for await (const chunk of response.stream.toContentStream()) { if (!connectionAlive) { break; @@ -79,6 +80,7 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { res.write(chunk); } + // Write the finish reason and token usage after the stream ends if (connectionAlive) { const finishReason = response.getFinishReason(); const tokenUsage = response.getTokenUsage()!; @@ -99,54 +101,6 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { } }); -app.get( - '/azure-openai/chat-completion-stream-multiple-choices', - async (req, res) => { - const controller = new AbortController(); - try { - const response = await chatCompletionStreamMultipleChoices(controller); - - res.setHeader('Cache-Control', 'no-cache'); - res.setHeader('Content-Type', 'text/event-stream'); - res.setHeader('Access-Control-Allow-Origin', '*'); - res.setHeader('Connection', 'keep-alive'); - res.flushHeaders(); - - let connectionAlive = true; - - res.on('close', () => { - controller.abort(); - connectionAlive = false; - res.end(); - }); - - for await (const chunk of response.stream.toContentStream(1)) { - if (!connectionAlive) { - break; - } - res.write(chunk); - } - - if (connectionAlive) { - const finishReason = response.getFinishReason(1); - const tokenUsage = response.getTokenUsage()!; - res.write('\n\n---------------------------\n'); - res.write(`Finish reason: ${finishReason}\n`); - res.write(` - Completion tokens: ${tokenUsage.completion_tokens}\n`); - res.write(` - Prompt tokens: ${tokenUsage.prompt_tokens}\n`); - res.write(` - Total tokens: ${tokenUsage.total_tokens}\n`); - } - } catch (error: any) { - console.error(error); - res - .status(500) - .send('Yikes, vibes are off apparently 😬 -> ' + error.message); - } finally { - res.end(); - } - } -); - app.get('/azure-openai/embedding', async (req, res) => { try { const response = await computeEmbedding(); From 347753f93909ae5c4721c4602b2f351702e2cf53 Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Wed, 13 Nov 2024 12:18:14 +0000 Subject: [PATCH 52/57] fix: Changes from lint --- sample-code/src/server.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index d573056d..9a19a31a 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -3,7 +3,6 @@ import express from 'express'; import { chatCompletion, chatCompletionStream, - chatCompletionStreamMultipleChoices, computeEmbedding // eslint-disable-next-line import/no-internal-modules } from './foundation-models/azure-openai.js'; From b933d2c79bb14e15e5fa3bada4c4844f022889b7 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 13 Nov 2024 13:19:21 +0100 Subject: [PATCH 53/57] docs --- sample-code/src/server.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 9a19a31a..6aa8d516 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -57,21 +57,21 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { try { const response = await chatCompletionStream(controller); - // Set headers for event stream + // Set headers for event stream. res.setHeader('Content-Type', 'text/event-stream'); res.setHeader('Connection', 'keep-alive'); res.flushHeaders(); let connectionAlive = true; - // Abort the stream if the client connection is closed + // Abort the stream if the client connection is closed. res.on('close', () => { controller.abort(); connectionAlive = false; res.end(); }); - // Stream the delta content + // Stream the delta content. for await (const chunk of response.stream.toContentStream()) { if (!connectionAlive) { break; @@ -79,7 +79,7 @@ app.get('/azure-openai/chat-completion-stream', async (req, res) => { res.write(chunk); } - // Write the finish reason and token usage after the stream ends + // Write the finish reason and token usage after the stream ends. if (connectionAlive) { const finishReason = response.getFinishReason(); const tokenUsage = response.getTokenUsage()!; From 83db52b8b6557406933708b4600100291835fc22 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 13 Nov 2024 17:12:05 +0100 Subject: [PATCH 54/57] refactor: get by index --- ...openai-chat-completion-stream-chunk-response.ts | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts index 906b57d7..3cf6b154 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts @@ -23,12 +23,7 @@ export class AzureOpenAiChatCompletionStreamChunkResponse { * @returns The finish reason. */ getFinishReason(choiceIndex = 0): string | undefined | null { - for (const choice of this.data.choices) { - if (choice.index === choiceIndex) { - return choice.finish_reason; - } - } - return undefined; + return this.data.choices.find((choice: any) => choice.index === choiceIndex)?.finish_reason; } /** @@ -37,11 +32,6 @@ export class AzureOpenAiChatCompletionStreamChunkResponse { * @returns The message delta content. */ getDeltaContent(choiceIndex = 0): string | undefined | null { - for (const choice of this.data.choices) { - if (choice.index === choiceIndex) { - return choice.delta.content; - } - } - return undefined; + return this.data.choices.find((choice: any) => choice.index === choiceIndex)?.delta.content; } } From 31fc14e4d0cb986d127bca503392e4810b5f3f3e Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Wed, 13 Nov 2024 17:13:33 +0100 Subject: [PATCH 55/57] fix: lint --- .../azure-openai-chat-completion-stream-chunk-response.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts index 3cf6b154..27739303 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts @@ -23,7 +23,8 @@ export class AzureOpenAiChatCompletionStreamChunkResponse { * @returns The finish reason. */ getFinishReason(choiceIndex = 0): string | undefined | null { - return this.data.choices.find((choice: any) => choice.index === choiceIndex)?.finish_reason; + return this.data.choices.find((choice: any) => choice.index === choiceIndex) + ?.finish_reason; } /** @@ -32,6 +33,7 @@ export class AzureOpenAiChatCompletionStreamChunkResponse { * @returns The message delta content. */ getDeltaContent(choiceIndex = 0): string | undefined | null { - return this.data.choices.find((choice: any) => choice.index === choiceIndex)?.delta.content; + return this.data.choices.find((choice: any) => choice.index === choiceIndex) + ?.delta.content; } } From bdc18d53691373f79e98aeea442ee3bfca374779 Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Thu, 14 Nov 2024 11:21:08 +0100 Subject: [PATCH 56/57] chore: small changes --- .../azure-openai-chat-completion-stream-chunk-response.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts index 27739303..11b5da22 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts @@ -23,7 +23,7 @@ export class AzureOpenAiChatCompletionStreamChunkResponse { * @returns The finish reason. */ getFinishReason(choiceIndex = 0): string | undefined | null { - return this.data.choices.find((choice: any) => choice.index === choiceIndex) + return this.data.choices.find((c: any) => c.index === choiceIndex) ?.finish_reason; } @@ -33,7 +33,7 @@ export class AzureOpenAiChatCompletionStreamChunkResponse { * @returns The message delta content. */ getDeltaContent(choiceIndex = 0): string | undefined | null { - return this.data.choices.find((choice: any) => choice.index === choiceIndex) + return this.data.choices.find((c: any) => c.index === choiceIndex) ?.delta.content; } } From 120c0e97ce3376ccacfdb8886a73dc999fb66a8b Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Thu, 14 Nov 2024 10:22:06 +0000 Subject: [PATCH 57/57] fix: Changes from lint --- .../azure-openai-chat-completion-stream-chunk-response.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts index 11b5da22..5f940818 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream-chunk-response.ts @@ -33,7 +33,7 @@ export class AzureOpenAiChatCompletionStreamChunkResponse { * @returns The message delta content. */ getDeltaContent(choiceIndex = 0): string | undefined | null { - return this.data.choices.find((c: any) => c.index === choiceIndex) - ?.delta.content; + return this.data.choices.find((c: any) => c.index === choiceIndex)?.delta + .content; } }