From beac1bdce3480bb3663b85198d6e4220d05edbb6 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Wed, 4 Dec 2024 14:00:01 +0100 Subject: [PATCH 01/53] branch --- packages/core/src/index.ts | 1 + .../src/azure-openai => core/src}/stream/index.ts | 0 .../src/azure-openai => core/src}/stream/line-decoder.ts | 0 .../src/azure-openai => core/src}/stream/sse-decoder.ts | 0 .../src/azure-openai => core/src}/stream/sse-stream.test.ts | 0 .../src/azure-openai => core/src}/stream/sse-stream.ts | 0 .../src/azure-openai/azure-openai-chat-completion-stream.ts | 2 +- 7 files changed, 2 insertions(+), 1 deletion(-) rename packages/{foundation-models/src/azure-openai => core/src}/stream/index.ts (100%) rename packages/{foundation-models/src/azure-openai => core/src}/stream/line-decoder.ts (100%) rename packages/{foundation-models/src/azure-openai => core/src}/stream/sse-decoder.ts (100%) rename packages/{foundation-models/src/azure-openai => core/src}/stream/sse-stream.test.ts (100%) rename packages/{foundation-models/src/azure-openai => core/src}/stream/sse-stream.ts (100%) diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index aaf1e0af..f3051029 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -9,3 +9,4 @@ export { AwsBedrockChatModel, AiCoreOpenSourceChatModel } from './model-types.js'; +export { SseStream } from './stream/index.js'; diff --git a/packages/foundation-models/src/azure-openai/stream/index.ts b/packages/core/src/stream/index.ts similarity index 100% rename from packages/foundation-models/src/azure-openai/stream/index.ts rename to packages/core/src/stream/index.ts diff --git a/packages/foundation-models/src/azure-openai/stream/line-decoder.ts b/packages/core/src/stream/line-decoder.ts similarity index 100% rename from packages/foundation-models/src/azure-openai/stream/line-decoder.ts rename to packages/core/src/stream/line-decoder.ts diff --git a/packages/foundation-models/src/azure-openai/stream/sse-decoder.ts b/packages/core/src/stream/sse-decoder.ts similarity index 100% rename from packages/foundation-models/src/azure-openai/stream/sse-decoder.ts rename to packages/core/src/stream/sse-decoder.ts diff --git a/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts b/packages/core/src/stream/sse-stream.test.ts similarity index 100% rename from packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts rename to packages/core/src/stream/sse-stream.test.ts diff --git a/packages/foundation-models/src/azure-openai/stream/sse-stream.ts b/packages/core/src/stream/sse-stream.ts similarity index 100% rename from packages/foundation-models/src/azure-openai/stream/sse-stream.ts rename to packages/core/src/stream/sse-stream.ts diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 70c699fe..f4ad8082 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -1,5 +1,5 @@ import { createLogger } from '@sap-cloud-sdk/util'; -import { SseStream } from './stream/index.js'; +import { SseStream } from '@sap-ai-sdk/core'; import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js'; import type { HttpResponse } from '@sap-cloud-sdk/http-client'; import type { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; From d55ec778f612e10c415e38af2504cbdf08b0fa15 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Thu, 5 Dec 2024 15:39:29 +0100 Subject: [PATCH 02/53] add most parts --- ...n-chat-completion-stream-chunk-response.ts | 39 ++++ ...tration-chat-completion-stream-response.ts | 57 ++++++ .../orchestration-chat-completion-stream.ts | 170 ++++++++++++++++++ .../orchestration/src/orchestration-client.ts | 30 +++- 4 files changed, 295 insertions(+), 1 deletion(-) create mode 100644 packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts create mode 100644 packages/orchestration/src/orchestration-chat-completion-stream-response.ts create mode 100644 packages/orchestration/src/orchestration-chat-completion-stream.ts diff --git a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts b/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts new file mode 100644 index 00000000..f253a147 --- /dev/null +++ b/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts @@ -0,0 +1,39 @@ +import type { CompletionPostResponseStreaming } from './client/api/schema/index.js'; +import type { TokenUsage } from './index.js'; + +/** + * Azure OpenAI chat completion stream chunk response. + */ +export class OrchestrationChatCompletionStreamChunkResponse { + constructor(public readonly data: CompletionPostResponseStreaming) { + this.data = data; + } + + /** + * Usage of tokens in the chunk response. + * @returns Token usage. + */ + getTokenUsage(): TokenUsage | undefined { + return this.data.orchestration_result?.usage; + } + + /** + * Reason for stopping the completion stream chunk. + * @param choiceIndex - The index of the choice to parse. + * @returns The finish reason. + */ + getFinishReason(choiceIndex = 0): string | undefined { + return this.data.orchestration_result?.choices.find((c: any) => c.index === choiceIndex) + ?.finish_reason; + } + + /** + * Parses the chunk response and returns the delta content. + * @param choiceIndex - The index of the choice to parse. + * @returns The message delta content. + */ + getDeltaContent(choiceIndex = 0): string | undefined { + return this.data.orchestration_result?.choices.find((c: any) => c.index === choiceIndex)?.delta + .content; + } +} diff --git a/packages/orchestration/src/orchestration-chat-completion-stream-response.ts b/packages/orchestration/src/orchestration-chat-completion-stream-response.ts new file mode 100644 index 00000000..7605d423 --- /dev/null +++ b/packages/orchestration/src/orchestration-chat-completion-stream-response.ts @@ -0,0 +1,57 @@ +import type { TokenUsage } from './client/api/schema/index.js'; +import type { OrchestrationChatCompletionStream } from './orchestration-chat-completion-stream.js'; + +/** + * Azure OpenAI chat completion stream response. + */ +export class OrchestrationChatCompletionStreamResponse { + private _usage: TokenUsage | undefined; + /** + * Finish reasons for all choices. + */ + private _finishReasons: Map = new Map(); + private _stream: OrchestrationChatCompletionStream | undefined; + + public getTokenUsage(): TokenUsage | undefined { + return this._usage; + } + + /** + * @internal + */ + _setTokenUsage(usage: TokenUsage): void { + this._usage = usage; + } + + public getFinishReason(choiceIndex = 0): string | undefined | null { + return this._finishReasons.get(choiceIndex); + } + + /** + * @internal + */ + _getFinishReasons(): Map { + return this._finishReasons; + } + + /** + * @internal + */ + _setFinishReasons(finishReasons: Map): void { + this._finishReasons = finishReasons; + } + + get stream(): OrchestrationChatCompletionStream { + if (!this._stream) { + throw new Error('Response stream is undefined.'); + } + return this._stream; + } + + /** + * @internal + */ + set stream(stream: OrchestrationChatCompletionStream) { + this._stream = stream; + } +} diff --git a/packages/orchestration/src/orchestration-chat-completion-stream.ts b/packages/orchestration/src/orchestration-chat-completion-stream.ts new file mode 100644 index 00000000..8bcb0da4 --- /dev/null +++ b/packages/orchestration/src/orchestration-chat-completion-stream.ts @@ -0,0 +1,170 @@ +import { createLogger } from '@sap-cloud-sdk/util'; +import { SseStream } from '@sap-ai-sdk/core'; +import { OrchestrationChatCompletionStreamChunkResponse } from './orchestration-chat-completion-stream-chunk-response.js'; +import type { CompletionPostResponseStreaming } from './client/api/schema/index.js'; +import type { HttpResponse } from '@sap-cloud-sdk/http-client'; +import type { OrchestrationChatCompletionStreamResponse } from './orchestration-chat-completion-stream-response.js'; + +const logger = createLogger({ + package: 'foundation-models', + messageContext: 'azure-openai-chat-completion-stream' +}); + +/** + * Chat completion stream containing post-processing functions. + */ +export class OrchestrationChatCompletionStream extends SseStream { + /** + * Create a chat completion stream based on the http response. + * @param response - Http response. + * @returns Chat completion stream. + * @internal + */ + public static _create( + response: HttpResponse, + controller: AbortController + ): OrchestrationChatCompletionStream { + const stream = SseStream.transformToSseStream(response, controller); + return new OrchestrationChatCompletionStream(stream.iterator, controller); + } + + /** + * Wrap raw chunk data with chunk response class to provide helper functions. + * @param stream - Chat completion stream. + * @internal + */ + static async *_processChunk( + stream: OrchestrationChatCompletionStream + ): AsyncGenerator { + for await (const chunk of stream) { + yield new OrchestrationChatCompletionStreamChunkResponse(chunk); + } + } + + /** + * @internal + */ + static async *_processFinishReason( + stream: OrchestrationChatCompletionStream, + response?: OrchestrationChatCompletionStreamResponse + ): AsyncGenerator { + for await (const chunk of stream) { + chunk.data.choices.forEach((choice: any) => { + const choiceIndex = choice.index; + if (choiceIndex >= 0) { + const finishReason = chunk.getFinishReason(choiceIndex); + if (finishReason) { + if (response) { + response._getFinishReasons().set(choiceIndex, finishReason); + } + switch (finishReason) { + case 'content_filter': + logger.error( + `Choice ${choiceIndex}: Stream finished with content filter hit.` + ); + break; + case 'length': + logger.error( + `Choice ${choiceIndex}: Stream finished with token length exceeded.` + ); + break; + case 'stop': + logger.debug(`Choice ${choiceIndex}: Stream finished.`); + break; + default: + logger.error( + `Choice ${choiceIndex}: Stream finished with unknown reason '${finishReason}'.` + ); + } + } + } + }); + yield chunk; + } + } + + /** + * @internal + */ + static async *_processTokenUsage( + stream: OrchestrationChatCompletionStream, + response?: OrchestrationChatCompletionStreamResponse + ): AsyncGenerator { + for await (const chunk of stream) { + const usage = chunk.getTokenUsage(); + if (usage) { + if (response) { + response._setTokenUsage(usage); + } + logger.debug(`Token usage: ${JSON.stringify(usage)}`); + } + yield chunk; + } + } + + /** + * Transform a stream of chunks into a stream of content strings. + * @param stream - Chat completion stream. + * @param choiceIndex - The index of the choice to parse. + * @internal + */ + static async *_processContentStream( + stream: OrchestrationChatCompletionStream, + choiceIndex = 0 + ): AsyncGenerator { + for await (const chunk of stream) { + const deltaContent = chunk.getDeltaContent(choiceIndex); + if (!deltaContent) { + continue; + } + yield deltaContent; + } + } + + constructor( + public iterator: () => AsyncIterator, + controller: AbortController + ) { + super(iterator, controller); + } + + /** + * Pipe the stream through a processing function. + * @param processFn - The function to process the input stream. + * @param response - The `AzureOpenAiChatCompletionStreamResponse` object for process function to store finish reason, token usage, etc. + * @returns The output stream containing processed items. + * @internal + */ + _pipe( + processFn: ( + stream: OrchestrationChatCompletionStream, + response?: OrchestrationChatCompletionStreamResponse + ) => AsyncIterator, + response?: OrchestrationChatCompletionStreamResponse + ): OrchestrationChatCompletionStream { + if (response) { + return new OrchestrationChatCompletionStream( + () => processFn(this, response), + this.controller + ); + } + return new OrchestrationChatCompletionStream( + () => processFn(this), + this.controller + ); + } + + public toContentStream( + this: OrchestrationChatCompletionStream, + choiceIndex?: number + ): OrchestrationChatCompletionStream { + return new OrchestrationChatCompletionStream( + () => + OrchestrationChatCompletionStream._processContentStream( + this, + choiceIndex + ), + this.controller + ); + } +} diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index 95cc3670..f7415ae6 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -1,6 +1,7 @@ import { executeRequest } from '@sap-ai-sdk/core'; import { resolveDeploymentId } from '@sap-ai-sdk/ai-api/internal.js'; import { OrchestrationResponse } from './orchestration-response.js'; +import { OrchestrationChatCompletionStream } from './orchestration-chat-completion-stream.js'; import type { CustomRequestConfig } from '@sap-ai-sdk/core'; import type { ResourceGroupConfig } from '@sap-ai-sdk/ai-api/internal.js'; import type { CompletionPostRequest } from './client/api/schema/index.js'; @@ -50,6 +51,31 @@ export class OrchestrationClient { return new OrchestrationResponse(response); } + + private async createStream( + controller: AbortController, + prompt?: Prompt, + requestConfig?: CustomRequestConfig + ): Promise> { + const body = constructCompletionPostRequest(this.config, prompt, true); + const deploymentId = await resolveDeploymentId({ + scenarioId: 'orchestration', + resourceGroup: this.deploymentConfig?.resourceGroup + }); + + const response = await executeRequest( + { + url: `/inference/deployments/${deploymentId}/completion`, + resourceGroup: this.deploymentConfig?.resourceGroup + }, + body, + { + ...requestConfig, + signal: controller.signal + } + ); + return OrchestrationChatCompletionStream._create(response, controller); + }; } /** @@ -57,10 +83,12 @@ export class OrchestrationClient { */ export function constructCompletionPostRequest( config: OrchestrationModuleConfig, - prompt?: Prompt + prompt?: Prompt, + stream = false ): CompletionPostRequest { return { orchestration_config: { + stream, module_configurations: { templating_module_config: { template: config.templating.template From 3e36337974a5ba821744762911ecf8e7a737a6ec Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Thu, 5 Dec 2024 15:52:47 +0100 Subject: [PATCH 03/53] stream method --- .../azure-openai/azure-openai-embedding-client.ts | 2 +- .../src/orchestration-chat-completion-stream.ts | 10 +++++----- .../orchestration/src/orchestration-client.ts | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-embedding-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-embedding-client.ts index 0281a04c..66c98e89 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-embedding-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-embedding-client.ts @@ -9,7 +9,7 @@ import { apiVersion, type AzureOpenAiEmbeddingModel } from './model-types.js'; import type { AzureOpenAiEmbeddingParameters } from './azure-openai-embedding-types.js'; /** - * Azure OpenAI client for embeddings. + * Azure OpenAI client for embeddings./home/i519840/code/ai-sdk-js/packages/orchestration/src. */ export class AzureOpenAiEmbeddingClient { /** diff --git a/packages/orchestration/src/orchestration-chat-completion-stream.ts b/packages/orchestration/src/orchestration-chat-completion-stream.ts index 8bcb0da4..ce44ba5c 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream.ts +++ b/packages/orchestration/src/orchestration-chat-completion-stream.ts @@ -6,8 +6,8 @@ import type { HttpResponse } from '@sap-cloud-sdk/http-client'; import type { OrchestrationChatCompletionStreamResponse } from './orchestration-chat-completion-stream-response.js'; const logger = createLogger({ - package: 'foundation-models', - messageContext: 'azure-openai-chat-completion-stream' + package: 'orchestration', + messageContext: 'orchestration-chat-completion-stream' }); /** @@ -24,7 +24,7 @@ export class OrchestrationChatCompletionStream extends SseStream { response: HttpResponse, controller: AbortController ): OrchestrationChatCompletionStream { - const stream = SseStream.transformToSseStream(response, controller); + const stream = SseStream.transformToSseStream(response, controller); // TODO: Check if this can be narrowed return new OrchestrationChatCompletionStream(stream.iterator, controller); } @@ -49,7 +49,7 @@ export class OrchestrationChatCompletionStream extends SseStream { response?: OrchestrationChatCompletionStreamResponse ): AsyncGenerator { for await (const chunk of stream) { - chunk.data.choices.forEach((choice: any) => { + chunk.data.orchestration_result?.choices.forEach((choice: any) => { const choiceIndex = choice.index; if (choiceIndex >= 0) { const finishReason = chunk.getFinishReason(choiceIndex); @@ -57,7 +57,7 @@ export class OrchestrationChatCompletionStream extends SseStream { if (response) { response._getFinishReasons().set(choiceIndex, finishReason); } - switch (finishReason) { + switch (finishReason) { // TODO: Cover all finish reasons case 'content_filter': logger.error( `Choice ${choiceIndex}: Stream finished with content filter hit.` diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index f7415ae6..692452b0 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -2,6 +2,7 @@ import { executeRequest } from '@sap-ai-sdk/core'; import { resolveDeploymentId } from '@sap-ai-sdk/ai-api/internal.js'; import { OrchestrationResponse } from './orchestration-response.js'; import { OrchestrationChatCompletionStream } from './orchestration-chat-completion-stream.js'; +import { OrchestrationChatCompletionStreamResponse } from './orchestration-chat-completion-stream-response.js'; import type { CustomRequestConfig } from '@sap-ai-sdk/core'; import type { ResourceGroupConfig } from '@sap-ai-sdk/ai-api/internal.js'; import type { CompletionPostRequest } from './client/api/schema/index.js'; @@ -9,6 +10,7 @@ import type { OrchestrationModuleConfig, Prompt } from './orchestration-types.js'; +import type { OrchestrationChatCompletionStreamChunkResponse } from './orchestration-chat-completion-stream-chunk-response.js'; /** * Get the orchestration client. @@ -52,6 +54,19 @@ export class OrchestrationClient { return new OrchestrationResponse(response); } + async stream( + prompt?: Prompt, + controller = new AbortController(), + requestConfig?: CustomRequestConfig + ): Promise> { + const response = new OrchestrationChatCompletionStreamResponse(); + response.stream = (await this.createStream(controller, prompt, requestConfig)) + ._pipe(OrchestrationChatCompletionStream._processChunk) + ._pipe(OrchestrationChatCompletionStream._processFinishReason, response) + ._pipe(OrchestrationChatCompletionStream._processTokenUsage, response); + return response; + } + private async createStream( controller: AbortController, prompt?: Prompt, From d0f0277312bb10da53af4d5b4b04d0c75bd35865 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Mon, 9 Dec 2024 16:15:04 +0100 Subject: [PATCH 04/53] remove path --- .../src/azure-openai/azure-openai-embedding-client.ts | 2 +- packages/orchestration/src/orchestration-client.ts | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-embedding-client.ts b/packages/foundation-models/src/azure-openai/azure-openai-embedding-client.ts index 66c98e89..0281a04c 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-embedding-client.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-embedding-client.ts @@ -9,7 +9,7 @@ import { apiVersion, type AzureOpenAiEmbeddingModel } from './model-types.js'; import type { AzureOpenAiEmbeddingParameters } from './azure-openai-embedding-types.js'; /** - * Azure OpenAI client for embeddings./home/i519840/code/ai-sdk-js/packages/orchestration/src. + * Azure OpenAI client for embeddings. */ export class AzureOpenAiEmbeddingClient { /** diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index 692452b0..4d88ece3 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -5,7 +5,7 @@ import { OrchestrationChatCompletionStream } from './orchestration-chat-completi import { OrchestrationChatCompletionStreamResponse } from './orchestration-chat-completion-stream-response.js'; import type { CustomRequestConfig } from '@sap-ai-sdk/core'; import type { ResourceGroupConfig } from '@sap-ai-sdk/ai-api/internal.js'; -import type { CompletionPostRequest } from './client/api/schema/index.js'; +import type { CompletionPostRequest, CompletionPostResponseStreaming } from './client/api/schema/index.js'; import type { OrchestrationModuleConfig, Prompt @@ -56,8 +56,8 @@ export class OrchestrationClient { async stream( prompt?: Prompt, - controller = new AbortController(), - requestConfig?: CustomRequestConfig + requestConfig?: CustomRequestConfig, + controller = new AbortController() ): Promise> { const response = new OrchestrationChatCompletionStreamResponse(); response.stream = (await this.createStream(controller, prompt, requestConfig)) @@ -71,7 +71,7 @@ export class OrchestrationClient { controller: AbortController, prompt?: Prompt, requestConfig?: CustomRequestConfig - ): Promise> { + ): Promise> { const body = constructCompletionPostRequest(this.config, prompt, true); const deploymentId = await resolveDeploymentId({ scenarioId: 'orchestration', From 28e9146c9052b173a873600941012af6a36e9a94 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Mon, 9 Dec 2024 16:33:36 +0100 Subject: [PATCH 05/53] change types --- .../src/orchestration-chat-completion-stream.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/orchestration/src/orchestration-chat-completion-stream.ts b/packages/orchestration/src/orchestration-chat-completion-stream.ts index ce44ba5c..fc28c327 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream.ts +++ b/packages/orchestration/src/orchestration-chat-completion-stream.ts @@ -46,7 +46,7 @@ export class OrchestrationChatCompletionStream extends SseStream { */ static async *_processFinishReason( stream: OrchestrationChatCompletionStream, - response?: OrchestrationChatCompletionStreamResponse + response?: OrchestrationChatCompletionStreamResponse ): AsyncGenerator { for await (const chunk of stream) { chunk.data.orchestration_result?.choices.forEach((choice: any) => { @@ -88,7 +88,7 @@ export class OrchestrationChatCompletionStream extends SseStream { */ static async *_processTokenUsage( stream: OrchestrationChatCompletionStream, - response?: OrchestrationChatCompletionStreamResponse + response?: OrchestrationChatCompletionStreamResponse ): AsyncGenerator { for await (const chunk of stream) { const usage = chunk.getTokenUsage(); @@ -138,9 +138,9 @@ export class OrchestrationChatCompletionStream extends SseStream { _pipe( processFn: ( stream: OrchestrationChatCompletionStream, - response?: OrchestrationChatCompletionStreamResponse + response?: OrchestrationChatCompletionStreamResponse ) => AsyncIterator, - response?: OrchestrationChatCompletionStreamResponse + response?: OrchestrationChatCompletionStreamResponse ): OrchestrationChatCompletionStream { if (response) { return new OrchestrationChatCompletionStream( From 27f22a2992876b04dd9edd960a4e3e4744b11cdc Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Mon, 9 Dec 2024 15:34:49 +0000 Subject: [PATCH 06/53] fix: Changes from lint --- ...n-chat-completion-stream-chunk-response.ts | 10 +++++---- .../orchestration-chat-completion-stream.ts | 4 +++- .../orchestration/src/orchestration-client.ts | 22 ++++++++++++++----- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts b/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts index f253a147..8e0313bb 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts +++ b/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts @@ -23,8 +23,9 @@ export class OrchestrationChatCompletionStreamChunkResponse { * @returns The finish reason. */ getFinishReason(choiceIndex = 0): string | undefined { - return this.data.orchestration_result?.choices.find((c: any) => c.index === choiceIndex) - ?.finish_reason; + return this.data.orchestration_result?.choices.find( + (c: any) => c.index === choiceIndex + )?.finish_reason; } /** @@ -33,7 +34,8 @@ export class OrchestrationChatCompletionStreamChunkResponse { * @returns The message delta content. */ getDeltaContent(choiceIndex = 0): string | undefined { - return this.data.orchestration_result?.choices.find((c: any) => c.index === choiceIndex)?.delta - .content; + return this.data.orchestration_result?.choices.find( + (c: any) => c.index === choiceIndex + )?.delta.content; } } diff --git a/packages/orchestration/src/orchestration-chat-completion-stream.ts b/packages/orchestration/src/orchestration-chat-completion-stream.ts index fc28c327..2293345b 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream.ts +++ b/packages/orchestration/src/orchestration-chat-completion-stream.ts @@ -57,7 +57,9 @@ export class OrchestrationChatCompletionStream extends SseStream { if (response) { response._getFinishReasons().set(choiceIndex, finishReason); } - switch (finishReason) { // TODO: Cover all finish reasons + switch ( + finishReason // TODO: Cover all finish reasons + ) { case 'content_filter': logger.error( `Choice ${choiceIndex}: Stream finished with content filter hit.` diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index 4d88ece3..ab7ce65e 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -5,7 +5,10 @@ import { OrchestrationChatCompletionStream } from './orchestration-chat-completi import { OrchestrationChatCompletionStreamResponse } from './orchestration-chat-completion-stream-response.js'; import type { CustomRequestConfig } from '@sap-ai-sdk/core'; import type { ResourceGroupConfig } from '@sap-ai-sdk/ai-api/internal.js'; -import type { CompletionPostRequest, CompletionPostResponseStreaming } from './client/api/schema/index.js'; +import type { + CompletionPostRequest, + CompletionPostResponseStreaming +} from './client/api/schema/index.js'; import type { OrchestrationModuleConfig, Prompt @@ -58,9 +61,14 @@ export class OrchestrationClient { prompt?: Prompt, requestConfig?: CustomRequestConfig, controller = new AbortController() - ): Promise> { - const response = new OrchestrationChatCompletionStreamResponse(); - response.stream = (await this.createStream(controller, prompt, requestConfig)) + ): Promise< + OrchestrationChatCompletionStreamResponse + > { + const response = + new OrchestrationChatCompletionStreamResponse(); + response.stream = ( + await this.createStream(controller, prompt, requestConfig) + ) ._pipe(OrchestrationChatCompletionStream._processChunk) ._pipe(OrchestrationChatCompletionStream._processFinishReason, response) ._pipe(OrchestrationChatCompletionStream._processTokenUsage, response); @@ -71,7 +79,9 @@ export class OrchestrationClient { controller: AbortController, prompt?: Prompt, requestConfig?: CustomRequestConfig - ): Promise> { + ): Promise< + OrchestrationChatCompletionStream + > { const body = constructCompletionPostRequest(this.config, prompt, true); const deploymentId = await resolveDeploymentId({ scenarioId: 'orchestration', @@ -90,7 +100,7 @@ export class OrchestrationClient { } ); return OrchestrationChatCompletionStream._create(response, controller); - }; + } } /** From 7c57cf0c5e6f7ca285830d1c15ea5f4b4eb110cb Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Mon, 9 Dec 2024 17:17:04 +0100 Subject: [PATCH 07/53] remove todo --- .../orchestration/src/orchestration-chat-completion-stream.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/orchestration/src/orchestration-chat-completion-stream.ts b/packages/orchestration/src/orchestration-chat-completion-stream.ts index fc28c327..5a3fb2fa 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream.ts +++ b/packages/orchestration/src/orchestration-chat-completion-stream.ts @@ -57,7 +57,7 @@ export class OrchestrationChatCompletionStream extends SseStream { if (response) { response._getFinishReasons().set(choiceIndex, finishReason); } - switch (finishReason) { // TODO: Cover all finish reasons + switch (finishReason) { case 'content_filter': logger.error( `Choice ${choiceIndex}: Stream finished with content filter hit.` From 27f4032cc3069e90742170d20c9c35c1595adb47 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Mon, 9 Dec 2024 18:06:17 +0100 Subject: [PATCH 08/53] common execute --- .../orchestration/src/orchestration-client.ts | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index ab7ce65e..4487fd0d 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -3,6 +3,7 @@ import { resolveDeploymentId } from '@sap-ai-sdk/ai-api/internal.js'; import { OrchestrationResponse } from './orchestration-response.js'; import { OrchestrationChatCompletionStream } from './orchestration-chat-completion-stream.js'; import { OrchestrationChatCompletionStreamResponse } from './orchestration-chat-completion-stream-response.js'; +import type { HttpResponse } from '@sap-cloud-sdk/http-client'; import type { CustomRequestConfig } from '@sap-ai-sdk/core'; import type { ResourceGroupConfig } from '@sap-ai-sdk/ai-api/internal.js'; import type { @@ -39,18 +40,8 @@ export class OrchestrationClient { prompt?: Prompt, requestConfig?: CustomRequestConfig ): Promise { - const body = constructCompletionPostRequest(this.config, prompt); - const deploymentId = await resolveDeploymentId({ - scenarioId: 'orchestration', - resourceGroup: this.deploymentConfig?.resourceGroup - }); - - const response = await executeRequest( - { - url: `/inference/deployments/${deploymentId}/completion`, - resourceGroup: this.deploymentConfig?.resourceGroup - }, - body, + const response = await this.executeRequest( + prompt, requestConfig ); @@ -75,29 +66,41 @@ export class OrchestrationClient { return response; } - private async createStream( - controller: AbortController, + private async executeRequest( prompt?: Prompt, - requestConfig?: CustomRequestConfig - ): Promise< - OrchestrationChatCompletionStream - > { - const body = constructCompletionPostRequest(this.config, prompt, true); + requestConfig?: CustomRequestConfig, + streaming: boolean = false + ): Promise { + const body = constructCompletionPostRequest(this.config, prompt, streaming); const deploymentId = await resolveDeploymentId({ scenarioId: 'orchestration', resourceGroup: this.deploymentConfig?.resourceGroup }); - const response = await executeRequest( + return executeRequest( { url: `/inference/deployments/${deploymentId}/completion`, resourceGroup: this.deploymentConfig?.resourceGroup }, body, + requestConfig + ); + } + + private async createStream( + controller: AbortController, + prompt?: Prompt, + requestConfig?: CustomRequestConfig + ): Promise< + OrchestrationChatCompletionStream + > { + const response = await this.executeRequest( + prompt, { ...requestConfig, signal: controller.signal - } + }, + true ); return OrchestrationChatCompletionStream._create(response, controller); } From 15fae8a0cb5b85162140d9c9fe9dd778f66eaab0 Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Mon, 9 Dec 2024 17:07:23 +0000 Subject: [PATCH 09/53] fix: Changes from lint --- packages/orchestration/src/orchestration-client.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index 4487fd0d..493cba7f 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -40,10 +40,7 @@ export class OrchestrationClient { prompt?: Prompt, requestConfig?: CustomRequestConfig ): Promise { - const response = await this.executeRequest( - prompt, - requestConfig - ); + const response = await this.executeRequest(prompt, requestConfig); return new OrchestrationResponse(response); } From 188e0ba689b9a554978bae2630a23865ad64890c Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Mon, 16 Dec 2024 15:32:34 +0100 Subject: [PATCH 10/53] update docs, types --- ...orchestration-chat-completion-stream-chunk-response.ts | 8 ++++---- .../src/orchestration-chat-completion-stream-response.ts | 2 +- .../src/orchestration-chat-completion-stream.ts | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts b/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts index 8e0313bb..ad1ca2b0 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts +++ b/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts @@ -1,8 +1,8 @@ -import type { CompletionPostResponseStreaming } from './client/api/schema/index.js'; +import type { CompletionPostResponseStreaming, LLMChoiceStreaming } from './client/api/schema/index.js'; import type { TokenUsage } from './index.js'; /** - * Azure OpenAI chat completion stream chunk response. + * Orchestration chat completion stream chunk response. */ export class OrchestrationChatCompletionStreamChunkResponse { constructor(public readonly data: CompletionPostResponseStreaming) { @@ -24,7 +24,7 @@ export class OrchestrationChatCompletionStreamChunkResponse { */ getFinishReason(choiceIndex = 0): string | undefined { return this.data.orchestration_result?.choices.find( - (c: any) => c.index === choiceIndex + (c: LLMChoiceStreaming) => c.index === choiceIndex )?.finish_reason; } @@ -35,7 +35,7 @@ export class OrchestrationChatCompletionStreamChunkResponse { */ getDeltaContent(choiceIndex = 0): string | undefined { return this.data.orchestration_result?.choices.find( - (c: any) => c.index === choiceIndex + (c: LLMChoiceStreaming) => c.index === choiceIndex )?.delta.content; } } diff --git a/packages/orchestration/src/orchestration-chat-completion-stream-response.ts b/packages/orchestration/src/orchestration-chat-completion-stream-response.ts index 7605d423..3276b8ac 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream-response.ts +++ b/packages/orchestration/src/orchestration-chat-completion-stream-response.ts @@ -2,7 +2,7 @@ import type { TokenUsage } from './client/api/schema/index.js'; import type { OrchestrationChatCompletionStream } from './orchestration-chat-completion-stream.js'; /** - * Azure OpenAI chat completion stream response. + * Orchestration chat completion stream response. */ export class OrchestrationChatCompletionStreamResponse { private _usage: TokenUsage | undefined; diff --git a/packages/orchestration/src/orchestration-chat-completion-stream.ts b/packages/orchestration/src/orchestration-chat-completion-stream.ts index 5a3fb2fa..8d80001d 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream.ts +++ b/packages/orchestration/src/orchestration-chat-completion-stream.ts @@ -1,7 +1,7 @@ import { createLogger } from '@sap-cloud-sdk/util'; import { SseStream } from '@sap-ai-sdk/core'; import { OrchestrationChatCompletionStreamChunkResponse } from './orchestration-chat-completion-stream-chunk-response.js'; -import type { CompletionPostResponseStreaming } from './client/api/schema/index.js'; +import type { CompletionPostResponseStreaming, LLMChoiceStreaming } from './client/api/schema/index.js'; import type { HttpResponse } from '@sap-cloud-sdk/http-client'; import type { OrchestrationChatCompletionStreamResponse } from './orchestration-chat-completion-stream-response.js'; @@ -49,7 +49,7 @@ export class OrchestrationChatCompletionStream extends SseStream { response?: OrchestrationChatCompletionStreamResponse ): AsyncGenerator { for await (const chunk of stream) { - chunk.data.orchestration_result?.choices.forEach((choice: any) => { + chunk.data.orchestration_result?.choices.forEach((choice: LLMChoiceStreaming) => { const choiceIndex = choice.index; if (choiceIndex >= 0) { const finishReason = chunk.getFinishReason(choiceIndex); @@ -131,7 +131,7 @@ export class OrchestrationChatCompletionStream extends SseStream { /** * Pipe the stream through a processing function. * @param processFn - The function to process the input stream. - * @param response - The `AzureOpenAiChatCompletionStreamResponse` object for process function to store finish reason, token usage, etc. + * @param response - The `OrchestrationChatCompletionStreamResponse` object for process function to store finish reason, token usage, etc. * @returns The output stream containing processed items. * @internal */ From 431abce3a6097305f0dc6bc853524770b16bd8a8 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Mon, 16 Dec 2024 16:56:10 +0100 Subject: [PATCH 11/53] narrow type --- .../orchestration/src/orchestration-chat-completion-stream.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/orchestration/src/orchestration-chat-completion-stream.ts b/packages/orchestration/src/orchestration-chat-completion-stream.ts index 8d80001d..eeff6187 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream.ts +++ b/packages/orchestration/src/orchestration-chat-completion-stream.ts @@ -24,7 +24,7 @@ export class OrchestrationChatCompletionStream extends SseStream { response: HttpResponse, controller: AbortController ): OrchestrationChatCompletionStream { - const stream = SseStream.transformToSseStream(response, controller); // TODO: Check if this can be narrowed + const stream = SseStream.transformToSseStream(response, controller); return new OrchestrationChatCompletionStream(stream.iterator, controller); } From 3fdd473bec767c24e7cf6521c0684725b5aff460 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Tue, 17 Dec 2024 15:35:49 +0100 Subject: [PATCH 12/53] add sample code, tests, ... --- packages/core/src/index.ts | 2 +- packages/core/src/stream/index.ts | 4 +- ...zure-openai-chat-completion-stream.test.ts | 3 +- packages/orchestration/src/index.ts | 4 + ...t-completion-stream-chunk-response.test.ts | 77 ++++++++++++ ...chestration-chat-completion-stream.test.ts | 116 ++++++++++++++++++ sample-code/src/orchestration.ts | 30 ++++- sample-code/src/server.ts | 55 ++++++++- 8 files changed, 283 insertions(+), 8 deletions(-) create mode 100644 packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.test.ts create mode 100644 packages/orchestration/src/orchestration-chat-completion-stream.test.ts diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index f3051029..0b6b336e 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -9,4 +9,4 @@ export { AwsBedrockChatModel, AiCoreOpenSourceChatModel } from './model-types.js'; -export { SseStream } from './stream/index.js'; +export { SseStream, LineDecoder, SSEDecoder } from './stream/index.js'; diff --git a/packages/core/src/stream/index.ts b/packages/core/src/stream/index.ts index 4f2ea9e7..992c8e02 100644 --- a/packages/core/src/stream/index.ts +++ b/packages/core/src/stream/index.ts @@ -1 +1,3 @@ -export { SseStream } from './sse-stream.js'; +export * from './sse-stream.js'; +export * from './sse-decoder.js'; +export * from './line-decoder.js'; diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts index f4a058bc..8ded3b79 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts @@ -1,9 +1,8 @@ import { createLogger } from '@sap-cloud-sdk/util'; import { jest } from '@jest/globals'; +import { LineDecoder, SSEDecoder } from '@sap-ai-sdk/core'; import { parseFileToString } from '../../../../test-util/mock-http.js'; import { AzureOpenAiChatCompletionStream } from './azure-openai-chat-completion-stream.js'; -import { LineDecoder } from './stream/line-decoder.js'; -import { SSEDecoder } from './stream/sse-decoder.js'; describe('OpenAI chat completion stream', () => { let sseChunks: string[]; diff --git a/packages/orchestration/src/index.ts b/packages/orchestration/src/index.ts index 74cc98c4..40f75ea2 100644 --- a/packages/orchestration/src/index.ts +++ b/packages/orchestration/src/index.ts @@ -40,6 +40,10 @@ export type { Prompt } from './orchestration-types.js'; +export { OrchestrationChatCompletionStreamResponse } from './orchestration-chat-completion-stream-response.js'; + +export { OrchestrationChatCompletionStreamChunkResponse } from './orchestration-chat-completion-stream-chunk-response.js'; + export { OrchestrationClient } from './orchestration-client.js'; export { buildAzureContentFilter } from './orchestration-filter-utility.js'; diff --git a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.test.ts b/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.test.ts new file mode 100644 index 00000000..6556f9b3 --- /dev/null +++ b/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.test.ts @@ -0,0 +1,77 @@ +import { parseMockResponse } from '../../../test-util/mock-http.js'; +import { OrchestrationChatCompletionStreamChunkResponse } from './orchestration-chat-completion-stream-chunk-response.js'; + +describe('OpenAI chat completion stream chunk response', () => { + let mockResponses: { + tokenUsageResponse: any; + finishReasonResponse: any; + deltaContentResponse: any; + }; + let azureOpenAiChatCompletionStreamChunkResponses: { + tokenUsageResponse: OrchestrationChatCompletionStreamChunkResponse; + finishReasonResponse: OrchestrationChatCompletionStreamChunkResponse; + deltaContentResponse: OrchestrationChatCompletionStreamChunkResponse; + }; + + beforeAll(async () => { + mockResponses = { + tokenUsageResponse: await parseMockResponse( + 'foundation-models', + 'azure-openai-chat-completion-stream-chunk-response-token-usage.json' + ), + finishReasonResponse: await parseMockResponse( + 'foundation-models', + 'azure-openai-chat-completion-stream-chunk-response-finish-reason.json' + ), + deltaContentResponse: await parseMockResponse( + 'foundation-models', + 'azure-openai-chat-completion-stream-chunk-response-delta-content.json' + ) + }; + azureOpenAiChatCompletionStreamChunkResponses = { + tokenUsageResponse: new OrchestrationChatCompletionStreamChunkResponse( + mockResponses.tokenUsageResponse + ), + finishReasonResponse: new OrchestrationChatCompletionStreamChunkResponse( + mockResponses.finishReasonResponse + ), + deltaContentResponse: new OrchestrationChatCompletionStreamChunkResponse( + mockResponses.deltaContentResponse + ) + }; + }); + + it('should return the chat completion stream chunk response', () => { + expect( + azureOpenAiChatCompletionStreamChunkResponses.tokenUsageResponse.data + ).toStrictEqual(mockResponses.tokenUsageResponse); + expect( + azureOpenAiChatCompletionStreamChunkResponses.finishReasonResponse.data + ).toStrictEqual(mockResponses.finishReasonResponse); + expect( + azureOpenAiChatCompletionStreamChunkResponses.deltaContentResponse.data + ).toStrictEqual(mockResponses.deltaContentResponse); + }); + + it('should get token usage', () => { + expect( + azureOpenAiChatCompletionStreamChunkResponses.tokenUsageResponse.getTokenUsage() + ).toMatchObject({ + completion_tokens: expect.any(Number), + prompt_tokens: expect.any(Number), + total_tokens: expect.any(Number) + }); + }); + + it('should return finish reason', () => { + expect( + azureOpenAiChatCompletionStreamChunkResponses.finishReasonResponse.getFinishReason() + ).toBe('stop'); + }); + + it('should return delta content with default index 0', () => { + expect( + azureOpenAiChatCompletionStreamChunkResponses.deltaContentResponse.getDeltaContent() + ).toBe(' is'); + }); +}); diff --git a/packages/orchestration/src/orchestration-chat-completion-stream.test.ts b/packages/orchestration/src/orchestration-chat-completion-stream.test.ts new file mode 100644 index 00000000..592eca7b --- /dev/null +++ b/packages/orchestration/src/orchestration-chat-completion-stream.test.ts @@ -0,0 +1,116 @@ +import { createLogger } from '@sap-cloud-sdk/util'; +import { jest } from '@jest/globals'; +import { LineDecoder, SSEDecoder } from '@sap-ai-sdk/core'; +import { parseFileToString } from '../../../test-util/mock-http.js'; +import { OrchestrationChatCompletionStream } from './orchestration-chat-completion-stream.js'; +import type { CompletionPostResponseStreaming } from './client/api/schema/index.js'; + +describe('OpenAI chat completion stream', () => { + let sseChunks: string[]; + let originalChatCompletionStream: OrchestrationChatCompletionStream; + + beforeEach(async () => { + const rawChunksString = await parseFileToString( + 'foundation-models', + 'azure-openai-chat-completion-stream-chunks.txt' + ); + const lineDecoder = new LineDecoder(); + const sseDecoder = new SSEDecoder(); + const rawLines: string[] = lineDecoder.decode( + Buffer.from(rawChunksString, 'utf-8') + ); + + sseChunks = rawLines + .map(chunk => sseDecoder.decode(chunk)) + .filter(sse => sse !== null) + .filter(sse => !sse.data.startsWith('[DONE]')) + .map(sse => JSON.parse(sse.data)); + + async function* iterator(): AsyncGenerator { + for (const sseChunk of sseChunks) { + yield sseChunk; + } + } + originalChatCompletionStream = new OrchestrationChatCompletionStream( + iterator, + new AbortController() + ); + }); + + it('should wrap the raw chunk', async () => { + let output = ''; + const asnycGenerator = OrchestrationChatCompletionStream._processChunk( + originalChatCompletionStream + ); + for await (const chunk of asnycGenerator) { + expect(chunk).toBeDefined(); + chunk.getDeltaContent() ? (output += chunk.getDeltaContent()) : null; + } + expect(output).toEqual('The capital of France is Paris.'); + }); + + it('should process the finish reasons', async () => { + const logger = createLogger({ + package: 'foundation-models', + messageContext: 'azure-openai-chat-completion-stream' + }); + const debugSpy = jest.spyOn(logger, 'debug'); + const asyncGeneratorChunk = OrchestrationChatCompletionStream._processChunk( + originalChatCompletionStream + ); + const asyncGeneratorFinishReason = + OrchestrationChatCompletionStream._processFinishReason( + new OrchestrationChatCompletionStream( + () => asyncGeneratorChunk, + new AbortController() + ) + ); + + for await (const chunk of asyncGeneratorFinishReason) { + expect(chunk).toBeDefined(); + } + expect(debugSpy).toHaveBeenCalledWith('Choice 0: Stream finished.'); + }); + + it('should process the token usage', async () => { + const logger = createLogger({ + package: 'foundation-models', + messageContext: 'azure-openai-chat-completion-stream' + }); + const debugSpy = jest.spyOn(logger, 'debug'); + const asyncGeneratorChunk = OrchestrationChatCompletionStream._processChunk( + originalChatCompletionStream + ); + const asyncGeneratorTokenUsage = + OrchestrationChatCompletionStream._processTokenUsage( + new OrchestrationChatCompletionStream( + () => asyncGeneratorChunk, + new AbortController() + ) + ); + + for await (const chunk of asyncGeneratorTokenUsage) { + expect(chunk).toBeDefined(); + } + expect(debugSpy).toHaveBeenCalledWith( + expect.stringContaining('Token usage:') + ); + }); + + it('should transform the original stream to string stream', async () => { + const asyncGeneratorChunk = OrchestrationChatCompletionStream._processChunk( + originalChatCompletionStream + ); + const chunkStream = new OrchestrationChatCompletionStream( + () => asyncGeneratorChunk, + new AbortController() + ); + + let output = ''; + for await (const chunk of chunkStream.toContentStream()) { + expect(typeof chunk).toBe('string'); + output += chunk; + } + expect(output).toEqual('The capital of France is Paris.'); + }); +}); diff --git a/sample-code/src/orchestration.ts b/sample-code/src/orchestration.ts index f23aecd6..9fbfae7f 100644 --- a/sample-code/src/orchestration.ts +++ b/sample-code/src/orchestration.ts @@ -5,6 +5,8 @@ import { import { createLogger } from '@sap-cloud-sdk/util'; import type { LlmModuleConfig, + OrchestrationChatCompletionStreamChunkResponse, + OrchestrationChatCompletionStreamResponse, OrchestrationResponse } from '@sap-ai-sdk/orchestration'; @@ -30,7 +32,7 @@ export async function orchestrationChatCompletion(): Promise +> { + const orchestrationClient = new OrchestrationClient({ + // define the language model to be used + llm: { + model_name: 'gpt-4o', + model_params: {} + }, + // define the prompt + templating: { + template: [{ role: 'user', content: 'What is the capital of France?' }] + } + }); + + const response = orchestrationClient.stream(undefined, undefined, controller); + return response; +} + const llm: LlmModuleConfig = { model_name: 'gpt-4o', model_params: {} diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 595abc0e..13707a4a 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -2,7 +2,7 @@ import express from 'express'; import { chatCompletion, - chatCompletionStream, + chatCompletionStream as azureChatCompletionStream, computeEmbedding // eslint-disable-next-line import/no-internal-modules } from './foundation-models/azure-openai.js'; @@ -11,7 +11,8 @@ import { orchestrationTemplating, orchestrationInputFiltering, orchestrationOutputFiltering, - orchestrationRequestConfig + orchestrationRequestConfig, + chatCompletionStream as orchestrationChatCompletionStream } from './orchestration.js'; import { getDeployments, @@ -139,7 +140,7 @@ app.get('/azure-openai/chat-completion', async (req, res) => { app.get('/azure-openai/chat-completion-stream', async (req, res) => { const controller = new AbortController(); try { - const response = await chatCompletionStream(controller); + const response = await azureChatCompletionStream(controller); // Set headers for event stream. res.setHeader('Content-Type', 'text/event-stream'); @@ -232,6 +233,54 @@ app.get('/orchestration/:sampleCase', async (req, res) => { } }); +app.get('test/chat-completion-stream', async (req, res) => { + const controller = new AbortController(); + try { + const response = await orchestrationChatCompletionStream(controller); + + // Set headers for event stream. + res.setHeader('Content-Type', 'text/event-stream'); + res.setHeader('Connection', 'keep-alive'); + res.flushHeaders(); + + let connectionAlive = true; + + // Abort the stream if the client connection is closed. + res.on('close', () => { + controller.abort(); + connectionAlive = false; + res.end(); + }); + + // Stream the delta content. + for await (const chunk of response.stream.toContentStream()) { + if (!connectionAlive) { + break; + } + res.write(chunk); + } + + // Write the finish reason and token usage after the stream ends. + if (connectionAlive) { + const finishReason = response.getFinishReason(); + const tokenUsage = response.getTokenUsage(); + res.write('\n\n---------------------------\n'); + res.write(`Finish reason: ${finishReason}\n`); + res.write('Token usage:\n'); + res.write(` - Completion tokens: ${tokenUsage?.completion_tokens}\n`); + res.write(` - Prompt tokens: ${tokenUsage?.prompt_tokens}\n`); + res.write(` - Total tokens: ${tokenUsage?.total_tokens}\n`); + } + } catch (error: any) { + console.error(error); + res + .status(500) + .send('Yikes, vibes are off apparently 😬 -> ' + error.message); + } finally { + res.end(); + } +}); + /* Langchain */ app.get('/langchain/invoke', async (req, res) => { try { From f04582ddbce87ea83853f075ab30f613ce9b13d3 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Tue, 17 Dec 2024 17:22:49 +0100 Subject: [PATCH 13/53] update code --- ...tration-chat-completion-stream-chunk-response.ts | 8 ++++++-- packages/orchestration/src/orchestration-client.ts | 13 ++++++++++++- sample-code/src/orchestration.ts | 4 ++-- sample-code/src/server.ts | 2 +- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts b/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts index ad1ca2b0..306f020a 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts +++ b/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts @@ -23,7 +23,7 @@ export class OrchestrationChatCompletionStreamChunkResponse { * @returns The finish reason. */ getFinishReason(choiceIndex = 0): string | undefined { - return this.data.orchestration_result?.choices.find( + return this.getChoices()?.find( (c: LLMChoiceStreaming) => c.index === choiceIndex )?.finish_reason; } @@ -34,8 +34,12 @@ export class OrchestrationChatCompletionStreamChunkResponse { * @returns The message delta content. */ getDeltaContent(choiceIndex = 0): string | undefined { - return this.data.orchestration_result?.choices.find( + return this.getChoices()?.find( (c: LLMChoiceStreaming) => c.index === choiceIndex )?.delta.content; } + + private getChoices(): LLMChoiceStreaming[] | undefined { + return this.data.orchestration_result?.choices; + } } diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index 493cba7f..cde71537 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -11,6 +11,7 @@ import type { CompletionPostResponseStreaming } from './client/api/schema/index.js'; import type { + LlmModuleConfig, OrchestrationModuleConfig, Prompt } from './orchestration-types.js'; @@ -118,7 +119,7 @@ export function constructCompletionPostRequest( templating_module_config: { template: config.templating.template }, - llm_module_config: config.llm, + llm_module_config: configureLLM(config.llm, stream), ...(Object.keys(config?.filtering || {}).length && { filtering_module_config: config.filtering }), @@ -138,3 +139,13 @@ export function constructCompletionPostRequest( }) }; } + +function configureLLM(llm: LlmModuleConfig, stream: boolean): LlmModuleConfig { + if (stream) { + llm.model_params = { + ...llm.model_params, + stream_options: { include_usage: true } + }; + } + return llm; +} diff --git a/sample-code/src/orchestration.ts b/sample-code/src/orchestration.ts index 9fbfae7f..d0535f5a 100644 --- a/sample-code/src/orchestration.ts +++ b/sample-code/src/orchestration.ts @@ -59,11 +59,11 @@ export async function chatCompletionStream( }, // define the prompt templating: { - template: [{ role: 'user', content: 'What is the capital of France?' }] + template: [{ role: 'user', content: 'Give me a short introduction of SAP Cloud SDK.' }] } }); - const response = orchestrationClient.stream(undefined, undefined, controller); + const response = await orchestrationClient.stream(undefined, undefined, controller); return response; } diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 13707a4a..47bb1fcf 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -233,7 +233,7 @@ app.get('/orchestration/:sampleCase', async (req, res) => { } }); -app.get('test/chat-completion-stream', async (req, res) => { +app.get('/orchestration-stream/chat-completion-stream', async (req, res) => { const controller = new AbortController(); try { const response = await orchestrationChatCompletionStream(controller); From 8b5b61e76758e511381821ce7de6bf0ca2c6d0e1 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Tue, 17 Dec 2024 17:59:26 +0100 Subject: [PATCH 14/53] fix streaming --- packages/orchestration/src/orchestration-client.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index cde71537..43c7cb4c 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -96,6 +96,7 @@ export class OrchestrationClient { prompt, { ...requestConfig, + responseType: 'stream', signal: controller.signal }, true From c64d3bd38c21a3c1c563c13b83386d175ff4d2af Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Wed, 18 Dec 2024 15:16:39 +0100 Subject: [PATCH 15/53] add mock responses --- .../orchestration/src/orchestration-client.ts | 4 +- sample-code/src/orchestration.ts | 2 +- sample-code/src/server.ts | 4 +- ...n-stream-chunk-response-delta-content.json | 37 +++++++++++++- ...n-stream-chunk-response-finish-reason.json | 18 ++++++- ...pletion-stream-chunk-response-initial.json | 31 +++++++++++- ...ion-stream-chunk-response-token-usage.json | 14 +++++- ...n-stream-chunk-response-delta-content.json | 39 +++++++++++++++ ...n-stream-chunk-response-finish-reason.json | 49 +++++++++++++++++++ ...pletion-stream-chunk-response-initial.json | 28 +++++++++++ ...ion-stream-chunk-response-token-usage.json | 49 +++++++++++++++++++ ...stration-chat-completion-stream-chunks.txt | 35 +++++++++++++ 12 files changed, 301 insertions(+), 9 deletions(-) create mode 100644 test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-delta-content.json create mode 100644 test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-finish-reason.json create mode 100644 test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-initial.json create mode 100644 test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-token-usage.json create mode 100644 test-util/data/orchestration/orchestration-chat-completion-stream-chunks.txt diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index 43c7cb4c..250bd381 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -48,8 +48,8 @@ export class OrchestrationClient { async stream( prompt?: Prompt, - requestConfig?: CustomRequestConfig, - controller = new AbortController() + controller = new AbortController(), + requestConfig?: CustomRequestConfig ): Promise< OrchestrationChatCompletionStreamResponse > { diff --git a/sample-code/src/orchestration.ts b/sample-code/src/orchestration.ts index d0535f5a..90b7113c 100644 --- a/sample-code/src/orchestration.ts +++ b/sample-code/src/orchestration.ts @@ -63,7 +63,7 @@ export async function chatCompletionStream( } }); - const response = await orchestrationClient.stream(undefined, undefined, controller); + const response = await orchestrationClient.stream(undefined, controller); return response; } diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 47bb1fcf..9fcf5b12 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -253,11 +253,11 @@ app.get('/orchestration-stream/chat-completion-stream', async (req, res) => { }); // Stream the delta content. - for await (const chunk of response.stream.toContentStream()) { + for await (const chunk of response.stream) { if (!connectionAlive) { break; } - res.write(chunk); + res.write(chunk.getDeltaContent() + '\n'); } // Write the finish reason and token usage after the stream ends. diff --git a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-delta-content.json b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-delta-content.json index 4ff90148..90efd5ba 100644 --- a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-delta-content.json +++ b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-delta-content.json @@ -1 +1,36 @@ -{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"delta":{"content":" is"},"finish_reason":null,"index":0,"logprobs":null}],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":null} \ No newline at end of file +{ + "choices": [ + { + "content_filter_results": { + "hate": { + "filtered": false, + "severity": "safe" + }, + "self_harm": { + "filtered": false, + "severity": "safe" + }, + "sexual": { + "filtered": false, + "severity": "safe" + }, + "violence": { + "filtered": false, + "severity": "safe" + } + }, + "delta": { + "content": " is" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1730125149, + "id": "chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I", + "model": "gpt-35-turbo", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_808245b034", + "usage": null +} \ No newline at end of file diff --git a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-finish-reason.json b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-finish-reason.json index 4aeae959..78d3dbfc 100644 --- a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-finish-reason.json +++ b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-finish-reason.json @@ -1 +1,17 @@ -{"choices":[{"content_filter_results":{},"delta":{},"finish_reason":"stop","index":0,"logprobs":null}],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":null} \ No newline at end of file +{ + "choices": [ + { + "content_filter_results": {}, + "delta": {}, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1730125149, + "id": "chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I", + "model": "gpt-35-turbo", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_808245b034", + "usage": null +} \ No newline at end of file diff --git a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-initial.json b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-initial.json index af38e868..e7704f46 100644 --- a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-initial.json +++ b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-initial.json @@ -1 +1,30 @@ -{"choices":[],"created":0,"id":"","model":"","object":"","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}]} \ No newline at end of file +{ + "choices": [], + "created": 0, + "id": "", + "model": "", + "object": "", + "prompt_filter_results": [ + { + "prompt_index": 0, + "content_filter_results": { + "hate": { + "filtered": false, + "severity": "safe" + }, + "self_harm": { + "filtered": false, + "severity": "safe" + }, + "sexual": { + "filtered": false, + "severity": "safe" + }, + "violence": { + "filtered": false, + "severity": "safe" + } + } + } + ] +} \ No newline at end of file diff --git a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-token-usage.json b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-token-usage.json index 558fe0c5..3c384c75 100644 --- a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-token-usage.json +++ b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-token-usage.json @@ -1 +1,13 @@ -{"choices":[],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":{"completion_tokens":7,"prompt_tokens":14,"total_tokens":21}} \ No newline at end of file +{ + "choices": [], + "created": 1730125149, + "id": "chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I", + "model": "gpt-35-turbo", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_808245b034", + "usage": { + "completion_tokens": 7, + "prompt_tokens": 14, + "total_tokens": 21 + } +} \ No newline at end of file diff --git a/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-delta-content.json b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-delta-content.json new file mode 100644 index 00000000..27f0166b --- /dev/null +++ b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-delta-content.json @@ -0,0 +1,39 @@ +{ + "request_id": "ceaa358c-48b8-4ce1-8a62-b0c47675fc9c", + "module_results": { + "llm": { + "id": "chatcmpl-AfmsPYkaH9uHogKZusAaVPC3zSNys", + "object": "chat.completion.chunk", + "created": 1734522693, + "model": "gpt-4o-2024-08-06", + "system_fingerprint": "fp_4e924a4b48", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "rimarily focusing on Java and JavaScript/Node.js environments, allowing developers to work in their " + }, + "finish_reason": "" + } + ] + } + }, + "orchestration_result": { + "id": "chatcmpl-AfmsPYkaH9uHogKZusAaVPC3zSNys", + "object": "chat.completion.chunk", + "created": 1734522693, + "model": "gpt-4o-2024-08-06", + "system_fingerprint": "fp_4e924a4b48", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "rimarily focusing on Java and JavaScript/Node.js environments, allowing developers to work in their " + }, + "finish_reason": "" + } + ] + } +} \ No newline at end of file diff --git a/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-finish-reason.json b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-finish-reason.json new file mode 100644 index 00000000..6da1a7ab --- /dev/null +++ b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-finish-reason.json @@ -0,0 +1,49 @@ +{ + "request_id": "66172762-8c47-4438-89e7-2689be8f370b", + "module_results": { + "llm": { + "id": "chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp", + "object": "chat.completion.chunk", + "created": 1734524005, + "model": "gpt-4o-2024-08-06", + "system_fingerprint": "fp_4e924a4b48", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "rate with SAP's enterprise solutions." + }, + "finish_reason": "stop" + } + ], + "usage": { + "completion_tokens": 271, + "prompt_tokens": 17, + "total_tokens": 288 + } + } + }, + "orchestration_result": { + "id": "chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp", + "object": "chat.completion.chunk", + "created": 1734524005, + "model": "gpt-4o-2024-08-06", + "system_fingerprint": "fp_4e924a4b48", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "rate with SAP's enterprise solutions." + }, + "finish_reason": "stop" + } + ], + "usage": { + "completion_tokens": 271, + "prompt_tokens": 17, + "total_tokens": 288 + } + } +} \ No newline at end of file diff --git a/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-initial.json b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-initial.json new file mode 100644 index 00000000..3798f75b --- /dev/null +++ b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-initial.json @@ -0,0 +1,28 @@ +{ + "request_id": "ceaa358c-48b8-4ce1-8a62-b0c47675fc9c", + "module_results": { + "templating": [ + { + "role": "user", + "content": "Give me a short introduction of SAP Cloud SDK." + } + ] + }, + "orchestration_result": { + "id": "", + "object": "", + "created": 0, + "model": "", + "system_fingerprint": "", + "choices": [ + { + "index": 0, + "delta": { + "role": "", + "content": "" + }, + "finish_reason": "" + } + ] + } +} \ No newline at end of file diff --git a/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-token-usage.json b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-token-usage.json new file mode 100644 index 00000000..6da1a7ab --- /dev/null +++ b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-token-usage.json @@ -0,0 +1,49 @@ +{ + "request_id": "66172762-8c47-4438-89e7-2689be8f370b", + "module_results": { + "llm": { + "id": "chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp", + "object": "chat.completion.chunk", + "created": 1734524005, + "model": "gpt-4o-2024-08-06", + "system_fingerprint": "fp_4e924a4b48", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "rate with SAP's enterprise solutions." + }, + "finish_reason": "stop" + } + ], + "usage": { + "completion_tokens": 271, + "prompt_tokens": 17, + "total_tokens": 288 + } + } + }, + "orchestration_result": { + "id": "chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp", + "object": "chat.completion.chunk", + "created": 1734524005, + "model": "gpt-4o-2024-08-06", + "system_fingerprint": "fp_4e924a4b48", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "rate with SAP's enterprise solutions." + }, + "finish_reason": "stop" + } + ], + "usage": { + "completion_tokens": 271, + "prompt_tokens": 17, + "total_tokens": 288 + } + } +} \ No newline at end of file diff --git a/test-util/data/orchestration/orchestration-chat-completion-stream-chunks.txt b/test-util/data/orchestration/orchestration-chat-completion-stream-chunks.txt new file mode 100644 index 00000000..8af9a41b --- /dev/null +++ b/test-util/data/orchestration/orchestration-chat-completion-stream-chunks.txt @@ -0,0 +1,35 @@ +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"templating":[{"role":"user","content":"Give me a short introduction of SAP Cloud SDK."}]},"orchestration_result":{"id":"","object":"","created":0,"model":"","system_fingerprint":"","choices":[{"index":0,"delta":{"role":"","content":""},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"The SAP Cloud SDK is a comprehensive development toolkit designed to simplify and accelerate the cre"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"The SAP Cloud SDK is a comprehensive development toolkit designed to simplify and accelerate the cre"},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"ation of applications that integrate with SAP solutions, particularly those built on the SAP Busines"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"ation of applications that integrate with SAP solutions, particularly those built on the SAP Busines"},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"s Technology Platform (BTP). It provides developers with libraries, tools, and best practices that s"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"s Technology Platform (BTP). It provides developers with libraries, tools, and best practices that s"},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"treamline the process of connecting to SAP systems, such as S/4HANA and other services available on "},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"treamline the process of connecting to SAP systems, such as S/4HANA and other services available on "},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"the SAP Cloud Platform.\n\nKey features of the SAP Cloud SDK include:\n\n1. **Simplified Connectivity**:"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"the SAP Cloud Platform.\n\nKey features of the SAP Cloud SDK include:\n\n1. **Simplified Connectivity**:"},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" The SDK offers pre-built libraries to easily interact with SAP services, providing capabilities for"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" The SDK offers pre-built libraries to easily interact with SAP services, providing capabilities for"},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" authentication, service consumption, and OData/REST client generation.\n\n2. **Multi-cloud Support**:"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" authentication, service consumption, and OData/REST client generation.\n\n2. **Multi-cloud Support**:"},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" It supports multiple cloud environments, ensuring that applications remain flexible and can be depl"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" It supports multiple cloud environments, ensuring that applications remain flexible and can be depl"},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"oyed across various cloud providers.\n\n3. **Best Practices and Guidelines**: The SDK includes best pr"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"oyed across various cloud providers.\n\n3. **Best Practices and Guidelines**: The SDK includes best pr"},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"actices for development, ensuring high-quality, scalable, and maintainable code.\n\n4. **Project Scaff"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"actices for development, ensuring high-quality, scalable, and maintainable code.\n\n4. **Project Scaff"},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"olding and Code Samples**: Developers can quickly start their projects using provided templates and "},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"olding and Code Samples**: Developers can quickly start their projects using provided templates and "},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"samples, accelerating the development process and reducing the learning curve.\n\n5. **Extensive Docum"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"samples, accelerating the development process and reducing the learning curve.\n\n5. **Extensive Docum"},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"entation and Community Support**: Ample documentation, tutorials, and an active community help devel"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"entation and Community Support**: Ample documentation, tutorials, and an active community help devel"},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"opers overcome challenges and adopt the SDK efficiently.\n\nOverall, the SAP Cloud SDK is an essential"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"opers overcome challenges and adopt the SDK efficiently.\n\nOverall, the SAP Cloud SDK is an essential"},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" tool for developers looking to build cloud-native applications and extensions that seamlessly integ"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" tool for developers looking to build cloud-native applications and extensions that seamlessly integ"},"finish_reason":""}]}}} + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"rate with SAP's enterprise solutions."},"finish_reason":"stop"}],"usage":{"completion_tokens":271,"prompt_tokens":17,"total_tokens":288}}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"rate with SAP's enterprise solutions."},"finish_reason":"stop"}],"usage":{"completion_tokens":271,"prompt_tokens":17,"total_tokens":288}}}} + +data: [DONE] \ No newline at end of file From 032f46f74777d8045442546497c1ac5c77108d3a Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Wed, 18 Dec 2024 16:27:16 +0100 Subject: [PATCH 16/53] update test --- ...ration-chat-completion-stream.test.ts.snap | 37 +++++++++++++ ...t-completion-stream-chunk-response.test.ts | 32 +++++------ ...chestration-chat-completion-stream.test.ts | 18 +++---- ...stration-chat-completion-stream-chunks.txt | 54 ++++++++++++------- ...tion-chat-completion-success-response.json | 53 ------------------ test-util/data/orchestration/test.json | 49 +++++++++++++++++ 6 files changed, 148 insertions(+), 95 deletions(-) create mode 100644 packages/orchestration/src/__snapshots__/orchestration-chat-completion-stream.test.ts.snap delete mode 100644 test-util/data/orchestration/orchestration-chat-completion-success-response.json create mode 100644 test-util/data/orchestration/test.json diff --git a/packages/orchestration/src/__snapshots__/orchestration-chat-completion-stream.test.ts.snap b/packages/orchestration/src/__snapshots__/orchestration-chat-completion-stream.test.ts.snap new file mode 100644 index 00000000..6048c4ef --- /dev/null +++ b/packages/orchestration/src/__snapshots__/orchestration-chat-completion-stream.test.ts.snap @@ -0,0 +1,37 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`Orchestration chat completion stream should transform the original stream to string stream 1`] = ` +"The SAP Cloud SDK is a comprehensive development toolkit designed to simplify and accelerate the creation of applications that integrate with SAP solutions, particularly those built on the SAP Business Technology Platform (BTP). It provides developers with libraries, tools, and best practices that streamline the process of connecting to SAP systems, such as S/4HANA and other services available on the SAP Cloud Platform. + +Key features of the SAP Cloud SDK include: + +1. **Simplified Connectivity**: The SDK offers pre-built libraries to easily interact with SAP services, providing capabilities for authentication, service consumption, and OData/REST client generation. + +2. **Multi-cloud Support**: It supports multiple cloud environments, ensuring that applications remain flexible and can be deployed across various cloud providers. + +3. **Best Practices and Guidelines**: The SDK includes best practices for development, ensuring high-quality, scalable, and maintainable code. + +4. **Project Scaffolding and Code Samples**: Developers can quickly start their projects using provided templates and samples, accelerating the development process and reducing the learning curve. + +5. **Extensive Documentation and Community Support**: Ample documentation, tutorials, and an active community help developers overcome challenges and adopt the SDK efficiently. + +Overall, the SAP Cloud SDK is an essential tool for developers looking to build cloud-native applications and extensions that seamlessly integrate with SAP's enterprise solutions." +`; + +exports[`Orchestration chat completion stream should wrap the raw chunk 1`] = ` +"The SAP Cloud SDK is a comprehensive development toolkit designed to simplify and accelerate the creation of applications that integrate with SAP solutions, particularly those built on the SAP Business Technology Platform (BTP). It provides developers with libraries, tools, and best practices that streamline the process of connecting to SAP systems, such as S/4HANA and other services available on the SAP Cloud Platform. + +Key features of the SAP Cloud SDK include: + +1. **Simplified Connectivity**: The SDK offers pre-built libraries to easily interact with SAP services, providing capabilities for authentication, service consumption, and OData/REST client generation. + +2. **Multi-cloud Support**: It supports multiple cloud environments, ensuring that applications remain flexible and can be deployed across various cloud providers. + +3. **Best Practices and Guidelines**: The SDK includes best practices for development, ensuring high-quality, scalable, and maintainable code. + +4. **Project Scaffolding and Code Samples**: Developers can quickly start their projects using provided templates and samples, accelerating the development process and reducing the learning curve. + +5. **Extensive Documentation and Community Support**: Ample documentation, tutorials, and an active community help developers overcome challenges and adopt the SDK efficiently. + +Overall, the SAP Cloud SDK is an essential tool for developers looking to build cloud-native applications and extensions that seamlessly integrate with SAP's enterprise solutions." +`; diff --git a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.test.ts b/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.test.ts index 6556f9b3..3a4bc4da 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.test.ts +++ b/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.test.ts @@ -7,7 +7,7 @@ describe('OpenAI chat completion stream chunk response', () => { finishReasonResponse: any; deltaContentResponse: any; }; - let azureOpenAiChatCompletionStreamChunkResponses: { + let orchestrationChatCompletionStreamChunkResponses: { tokenUsageResponse: OrchestrationChatCompletionStreamChunkResponse; finishReasonResponse: OrchestrationChatCompletionStreamChunkResponse; deltaContentResponse: OrchestrationChatCompletionStreamChunkResponse; @@ -16,19 +16,19 @@ describe('OpenAI chat completion stream chunk response', () => { beforeAll(async () => { mockResponses = { tokenUsageResponse: await parseMockResponse( - 'foundation-models', - 'azure-openai-chat-completion-stream-chunk-response-token-usage.json' + 'orchestration', + 'orchestration-chat-completion-stream-chunk-response-token-usage.json' ), finishReasonResponse: await parseMockResponse( - 'foundation-models', - 'azure-openai-chat-completion-stream-chunk-response-finish-reason.json' + 'orchestration', + 'orchestration-chat-completion-stream-chunk-response-finish-reason.json' ), deltaContentResponse: await parseMockResponse( - 'foundation-models', - 'azure-openai-chat-completion-stream-chunk-response-delta-content.json' + 'orchestration', + 'orchestration-chat-completion-stream-chunk-response-delta-content.json' ) }; - azureOpenAiChatCompletionStreamChunkResponses = { + orchestrationChatCompletionStreamChunkResponses = { tokenUsageResponse: new OrchestrationChatCompletionStreamChunkResponse( mockResponses.tokenUsageResponse ), @@ -43,19 +43,19 @@ describe('OpenAI chat completion stream chunk response', () => { it('should return the chat completion stream chunk response', () => { expect( - azureOpenAiChatCompletionStreamChunkResponses.tokenUsageResponse.data + orchestrationChatCompletionStreamChunkResponses.tokenUsageResponse.data ).toStrictEqual(mockResponses.tokenUsageResponse); expect( - azureOpenAiChatCompletionStreamChunkResponses.finishReasonResponse.data + orchestrationChatCompletionStreamChunkResponses.finishReasonResponse.data ).toStrictEqual(mockResponses.finishReasonResponse); expect( - azureOpenAiChatCompletionStreamChunkResponses.deltaContentResponse.data + orchestrationChatCompletionStreamChunkResponses.deltaContentResponse.data ).toStrictEqual(mockResponses.deltaContentResponse); }); it('should get token usage', () => { expect( - azureOpenAiChatCompletionStreamChunkResponses.tokenUsageResponse.getTokenUsage() + orchestrationChatCompletionStreamChunkResponses.tokenUsageResponse.getTokenUsage() ).toMatchObject({ completion_tokens: expect.any(Number), prompt_tokens: expect.any(Number), @@ -65,13 +65,15 @@ describe('OpenAI chat completion stream chunk response', () => { it('should return finish reason', () => { expect( - azureOpenAiChatCompletionStreamChunkResponses.finishReasonResponse.getFinishReason() + orchestrationChatCompletionStreamChunkResponses.finishReasonResponse.getFinishReason() ).toBe('stop'); }); it('should return delta content with default index 0', () => { expect( - azureOpenAiChatCompletionStreamChunkResponses.deltaContentResponse.getDeltaContent() - ).toBe(' is'); + orchestrationChatCompletionStreamChunkResponses.deltaContentResponse.getDeltaContent() + ).toMatchInlineSnapshot( + '"rimarily focusing on Java and JavaScript/Node.js environments, allowing developers to work in their "' + ); }); }); diff --git a/packages/orchestration/src/orchestration-chat-completion-stream.test.ts b/packages/orchestration/src/orchestration-chat-completion-stream.test.ts index 592eca7b..73613a9f 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream.test.ts +++ b/packages/orchestration/src/orchestration-chat-completion-stream.test.ts @@ -5,14 +5,14 @@ import { parseFileToString } from '../../../test-util/mock-http.js'; import { OrchestrationChatCompletionStream } from './orchestration-chat-completion-stream.js'; import type { CompletionPostResponseStreaming } from './client/api/schema/index.js'; -describe('OpenAI chat completion stream', () => { +describe('Orchestration chat completion stream', () => { let sseChunks: string[]; let originalChatCompletionStream: OrchestrationChatCompletionStream; beforeEach(async () => { const rawChunksString = await parseFileToString( - 'foundation-models', - 'azure-openai-chat-completion-stream-chunks.txt' + 'orchestration', + 'orchestration-chat-completion-stream-chunks.txt' ); const lineDecoder = new LineDecoder(); const sseDecoder = new SSEDecoder(); @@ -46,13 +46,13 @@ describe('OpenAI chat completion stream', () => { expect(chunk).toBeDefined(); chunk.getDeltaContent() ? (output += chunk.getDeltaContent()) : null; } - expect(output).toEqual('The capital of France is Paris.'); + expect(output).toMatchSnapshot(); }); it('should process the finish reasons', async () => { const logger = createLogger({ - package: 'foundation-models', - messageContext: 'azure-openai-chat-completion-stream' + package: 'orchestration', + messageContext: 'orchestration-chat-completion-stream' }); const debugSpy = jest.spyOn(logger, 'debug'); const asyncGeneratorChunk = OrchestrationChatCompletionStream._processChunk( @@ -74,8 +74,8 @@ describe('OpenAI chat completion stream', () => { it('should process the token usage', async () => { const logger = createLogger({ - package: 'foundation-models', - messageContext: 'azure-openai-chat-completion-stream' + package: 'orchestration', + messageContext: 'orchestration-chat-completion-stream' }); const debugSpy = jest.spyOn(logger, 'debug'); const asyncGeneratorChunk = OrchestrationChatCompletionStream._processChunk( @@ -111,6 +111,6 @@ describe('OpenAI chat completion stream', () => { expect(typeof chunk).toBe('string'); output += chunk; } - expect(output).toEqual('The capital of France is Paris.'); + expect(output).toMatchSnapshot(); }); }); diff --git a/test-util/data/orchestration/orchestration-chat-completion-stream-chunks.txt b/test-util/data/orchestration/orchestration-chat-completion-stream-chunks.txt index 8af9a41b..d139b84d 100644 --- a/test-util/data/orchestration/orchestration-chat-completion-stream-chunks.txt +++ b/test-util/data/orchestration/orchestration-chat-completion-stream-chunks.txt @@ -1,35 +1,53 @@ -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"templating":[{"role":"user","content":"Give me a short introduction of SAP Cloud SDK."}]},"orchestration_result":{"id":"","object":"","created":0,"model":"","system_fingerprint":"","choices":[{"index":0,"delta":{"role":"","content":""},"finish_reason":""}]}}} +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"templating":[{"role":"user","content":"Give me a short introduction of SAP Cloud SDK."}]},"orchestration_result":{"id":"","object":"","created":0,"model":"","system_fingerprint":"","choices":[{"index":0,"delta":{"role":"","content":""},"finish_reason":""}]}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"The SAP Cloud SDK is a comprehensive development toolkit designed to simplify and accelerate the cre"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"The SAP Cloud SDK is a comprehensive development toolkit designed to simplify and accelerate the cre"},"finish_reason":""}]}}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"ation of applications that integrate with SAP solutions, particularly those built on the SAP Busines"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"ation of applications that integrate with SAP solutions, particularly those built on the SAP Busines"},"finish_reason":""}]}}} +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"The SAP Cloud SDK is a comprehensive development toolkit designed to simplify and accelerate the cre"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"The SAP Cloud SDK is a comprehensive development toolkit designed to simplify and accelerate the cre"},"finish_reason":""}]}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"s Technology Platform (BTP). It provides developers with libraries, tools, and best practices that s"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"s Technology Platform (BTP). It provides developers with libraries, tools, and best practices that s"},"finish_reason":""}]}}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"treamline the process of connecting to SAP systems, such as S/4HANA and other services available on "},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"treamline the process of connecting to SAP systems, such as S/4HANA and other services available on "},"finish_reason":""}]}}} +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"ation of applications that integrate with SAP solutions, particularly those built on the SAP Busines"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"ation of applications that integrate with SAP solutions, particularly those built on the SAP Busines"},"finish_reason":""}]}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"the SAP Cloud Platform.\n\nKey features of the SAP Cloud SDK include:\n\n1. **Simplified Connectivity**:"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"the SAP Cloud Platform.\n\nKey features of the SAP Cloud SDK include:\n\n1. **Simplified Connectivity**:"},"finish_reason":""}]}}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" The SDK offers pre-built libraries to easily interact with SAP services, providing capabilities for"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" The SDK offers pre-built libraries to easily interact with SAP services, providing capabilities for"},"finish_reason":""}]}}} +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"s Technology Platform (BTP). It provides developers with libraries, tools, and best practices that s"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"s Technology Platform (BTP). It provides developers with libraries, tools, and best practices that s"},"finish_reason":""}]}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" authentication, service consumption, and OData/REST client generation.\n\n2. **Multi-cloud Support**:"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" authentication, service consumption, and OData/REST client generation.\n\n2. **Multi-cloud Support**:"},"finish_reason":""}]}}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" It supports multiple cloud environments, ensuring that applications remain flexible and can be depl"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" It supports multiple cloud environments, ensuring that applications remain flexible and can be depl"},"finish_reason":""}]}}} +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"treamline the process of connecting to SAP systems, such as S/4HANA and other services available on "},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"treamline the process of connecting to SAP systems, such as S/4HANA and other services available on "},"finish_reason":""}]}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"oyed across various cloud providers.\n\n3. **Best Practices and Guidelines**: The SDK includes best pr"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"oyed across various cloud providers.\n\n3. **Best Practices and Guidelines**: The SDK includes best pr"},"finish_reason":""}]}}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"actices for development, ensuring high-quality, scalable, and maintainable code.\n\n4. **Project Scaff"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"actices for development, ensuring high-quality, scalable, and maintainable code.\n\n4. **Project Scaff"},"finish_reason":""}]}}} +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"the SAP Cloud Platform.\n\nKey features of the SAP Cloud SDK include:\n\n1. **Simplified Connectivity**:"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"the SAP Cloud Platform.\n\nKey features of the SAP Cloud SDK include:\n\n1. **Simplified Connectivity**:"},"finish_reason":""}]}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"olding and Code Samples**: Developers can quickly start their projects using provided templates and "},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"olding and Code Samples**: Developers can quickly start their projects using provided templates and "},"finish_reason":""}]}}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"samples, accelerating the development process and reducing the learning curve.\n\n5. **Extensive Docum"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"samples, accelerating the development process and reducing the learning curve.\n\n5. **Extensive Docum"},"finish_reason":""}]}}} +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" The SDK offers pre-built libraries to easily interact with SAP services, providing capabilities for"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" The SDK offers pre-built libraries to easily interact with SAP services, providing capabilities for"},"finish_reason":""}]}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"entation and Community Support**: Ample documentation, tutorials, and an active community help devel"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"entation and Community Support**: Ample documentation, tutorials, and an active community help devel"},"finish_reason":""}]}}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"opers overcome challenges and adopt the SDK efficiently.\n\nOverall, the SAP Cloud SDK is an essential"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"opers overcome challenges and adopt the SDK efficiently.\n\nOverall, the SAP Cloud SDK is an essential"},"finish_reason":""}]}}} +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" authentication, service consumption, and OData/REST client generation.\n\n2. **Multi-cloud Support**:"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" authentication, service consumption, and OData/REST client generation.\n\n2. **Multi-cloud Support**:"},"finish_reason":""}]}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" tool for developers looking to build cloud-native applications and extensions that seamlessly integ"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" tool for developers looking to build cloud-native applications and extensions that seamlessly integ"},"finish_reason":""}]}}} -data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"rate with SAP's enterprise solutions."},"finish_reason":"stop"}],"usage":{"completion_tokens":271,"prompt_tokens":17,"total_tokens":288}}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"rate with SAP's enterprise solutions."},"finish_reason":"stop"}],"usage":{"completion_tokens":271,"prompt_tokens":17,"total_tokens":288}}}} +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" It supports multiple cloud environments, ensuring that applications remain flexible and can be depl"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" It supports multiple cloud environments, ensuring that applications remain flexible and can be depl"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"oyed across various cloud providers.\n\n3. **Best Practices and Guidelines**: The SDK includes best pr"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"oyed across various cloud providers.\n\n3. **Best Practices and Guidelines**: The SDK includes best pr"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"actices for development, ensuring high-quality, scalable, and maintainable code.\n\n4. **Project Scaff"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"actices for development, ensuring high-quality, scalable, and maintainable code.\n\n4. **Project Scaff"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"olding and Code Samples**: Developers can quickly start their projects using provided templates and "},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"olding and Code Samples**: Developers can quickly start their projects using provided templates and "},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"samples, accelerating the development process and reducing the learning curve.\n\n5. **Extensive Docum"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"samples, accelerating the development process and reducing the learning curve.\n\n5. **Extensive Docum"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"entation and Community Support**: Ample documentation, tutorials, and an active community help devel"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"entation and Community Support**: Ample documentation, tutorials, and an active community help devel"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"opers overcome challenges and adopt the SDK efficiently.\n\nOverall, the SAP Cloud SDK is an essential"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"opers overcome challenges and adopt the SDK efficiently.\n\nOverall, the SAP Cloud SDK is an essential"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" tool for developers looking to build cloud-native applications and extensions that seamlessly integ"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" tool for developers looking to build cloud-native applications and extensions that seamlessly integ"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"rate with SAP's enterprise solutions."},"finish_reason":"stop"}],"usage":{"completion_tokens":271,"prompt_tokens":17,"total_tokens":288}}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"rate with SAP's enterprise solutions."},"finish_reason":"stop"}],"usage":{"completion_tokens":271,"prompt_tokens":17,"total_tokens":288}}} + + +data: [DONE] -data: [DONE] \ No newline at end of file diff --git a/test-util/data/orchestration/orchestration-chat-completion-success-response.json b/test-util/data/orchestration/orchestration-chat-completion-success-response.json deleted file mode 100644 index 884a19f5..00000000 --- a/test-util/data/orchestration/orchestration-chat-completion-success-response.json +++ /dev/null @@ -1,53 +0,0 @@ -{ - "request_id": "request-id", - "module_results": { - "templating": [ - { - "role": "user", - "content": "Hello!" - } - ], - "llm": { - "id": "llm-id", - "object": "chat.completion", - "created": 172, - "model": "gpt-35-turbo", - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": "Hello! How can I assist you today?" - }, - "finish_reason": "stop" - } - ], - "usage": { - "completion_tokens": 9, - "prompt_tokens": 9, - "total_tokens": 18 - } - } - }, - "orchestration_result": { - "id": "orchestration-id", - "object": "chat.completion", - "created": 172, - "model": "gpt-35-turbo", - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": "Hello! How can I assist you today?" - }, - "finish_reason": "stop" - } - ], - "usage": { - "completion_tokens": 9, - "prompt_tokens": 9, - "total_tokens": 18 - } - } -} diff --git a/test-util/data/orchestration/test.json b/test-util/data/orchestration/test.json new file mode 100644 index 00000000..6da1a7ab --- /dev/null +++ b/test-util/data/orchestration/test.json @@ -0,0 +1,49 @@ +{ + "request_id": "66172762-8c47-4438-89e7-2689be8f370b", + "module_results": { + "llm": { + "id": "chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp", + "object": "chat.completion.chunk", + "created": 1734524005, + "model": "gpt-4o-2024-08-06", + "system_fingerprint": "fp_4e924a4b48", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "rate with SAP's enterprise solutions." + }, + "finish_reason": "stop" + } + ], + "usage": { + "completion_tokens": 271, + "prompt_tokens": 17, + "total_tokens": 288 + } + } + }, + "orchestration_result": { + "id": "chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp", + "object": "chat.completion.chunk", + "created": 1734524005, + "model": "gpt-4o-2024-08-06", + "system_fingerprint": "fp_4e924a4b48", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "rate with SAP's enterprise solutions." + }, + "finish_reason": "stop" + } + ], + "usage": { + "completion_tokens": 271, + "prompt_tokens": 17, + "total_tokens": 288 + } + } +} \ No newline at end of file From 728ff82f37542f14525e8d112cc61e3829d3df54 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Wed, 18 Dec 2024 16:27:37 +0100 Subject: [PATCH 17/53] update test --- test-util/data/orchestration/test.json | 49 -------------------------- 1 file changed, 49 deletions(-) delete mode 100644 test-util/data/orchestration/test.json diff --git a/test-util/data/orchestration/test.json b/test-util/data/orchestration/test.json deleted file mode 100644 index 6da1a7ab..00000000 --- a/test-util/data/orchestration/test.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "request_id": "66172762-8c47-4438-89e7-2689be8f370b", - "module_results": { - "llm": { - "id": "chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp", - "object": "chat.completion.chunk", - "created": 1734524005, - "model": "gpt-4o-2024-08-06", - "system_fingerprint": "fp_4e924a4b48", - "choices": [ - { - "index": 0, - "delta": { - "role": "assistant", - "content": "rate with SAP's enterprise solutions." - }, - "finish_reason": "stop" - } - ], - "usage": { - "completion_tokens": 271, - "prompt_tokens": 17, - "total_tokens": 288 - } - } - }, - "orchestration_result": { - "id": "chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp", - "object": "chat.completion.chunk", - "created": 1734524005, - "model": "gpt-4o-2024-08-06", - "system_fingerprint": "fp_4e924a4b48", - "choices": [ - { - "index": 0, - "delta": { - "role": "assistant", - "content": "rate with SAP's enterprise solutions." - }, - "finish_reason": "stop" - } - ], - "usage": { - "completion_tokens": 271, - "prompt_tokens": 17, - "total_tokens": 288 - } - } -} \ No newline at end of file From 4180d545519a40790bbe34c1f635f5806b268527 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Wed, 18 Dec 2024 16:57:29 +0100 Subject: [PATCH 18/53] add last test --- .../src/orchestration-client.test.ts | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/packages/orchestration/src/orchestration-client.test.ts b/packages/orchestration/src/orchestration-client.test.ts index 4dd6b60d..512f8804 100644 --- a/packages/orchestration/src/orchestration-client.test.ts +++ b/packages/orchestration/src/orchestration-client.test.ts @@ -3,6 +3,7 @@ import { mockClientCredentialsGrantCall, mockDeploymentsList, mockInference, + parseFileToString, parseMockResponse } from '../../../test-util/mock-http.js'; import { @@ -337,4 +338,45 @@ describe('orchestration service client', () => { const response = await clientWithResourceGroup.chatCompletion(prompt); expect(response.data).toEqual(mockResponse); }); + + it('executes a streaming request with correct chunk response', async () => { + const config: OrchestrationModuleConfig = { + llm: { + model_name: 'gpt-4o', + model_params: {} + }, + templating: { + template: [{ role: 'user', content: 'Give me a short introduction of SAP Cloud SDK.' }] + } + }; + + const mockResponse = await parseFileToString( + 'orchestration', + 'orchestration-chat-completion-stream-chunks.txt' + ); + + mockInference( + { + data: constructCompletionPostRequest(config, undefined, true) + }, + { + data: mockResponse, + status: 200 + }, + { + url: 'inference/deployments/1234/completion' + } + ); + const response = await new OrchestrationClient(config).stream(); + + const initialResponse = await parseFileToString( + 'orchestration', + 'orchestration-chat-completion-stream-chunk-response-initial.json' + ); + + for await (const chunk of response.stream) { + expect(JSON.stringify(chunk.data)).toEqual(initialResponse); + break; + } + }); }); From 94d9c35f008eca124fd2b0e54f6be51bb844aee6 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Thu, 19 Dec 2024 13:41:03 +0100 Subject: [PATCH 19/53] fix tests --- .../azure-openai-chat-client.test.ts | 2 +- .../src/orchestration-client.test.ts | 2 +- ...hestration-completion-post-request.test.ts | 10 +++- sample-code/src/orchestration.ts | 2 +- ...pletion-stream-chunk-response-initial.json | 2 +- ...tion-chat-completion-success-response.json | 53 +++++++++++++++++++ 6 files changed, 66 insertions(+), 5 deletions(-) create mode 100644 test-util/data/orchestration/orchestration-chat-completion-success-response.json diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts index 1310254d..ebfae76a 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts @@ -198,7 +198,7 @@ describe('Azure OpenAI chat client', () => { const response = await client.stream(prompt); for await (const chunk of response.stream) { - expect(JSON.stringify(chunk.data)).toEqual(initialResponse); + expect(chunk.data).toEqual(JSON.parse(initialResponse)); break; } }); diff --git a/packages/orchestration/src/orchestration-client.test.ts b/packages/orchestration/src/orchestration-client.test.ts index 512f8804..c7e10a82 100644 --- a/packages/orchestration/src/orchestration-client.test.ts +++ b/packages/orchestration/src/orchestration-client.test.ts @@ -375,7 +375,7 @@ describe('orchestration service client', () => { ); for await (const chunk of response.stream) { - expect(JSON.stringify(chunk.data)).toEqual(initialResponse); + expect(chunk.data).toEqual(JSON.parse(initialResponse)); break; } }); diff --git a/packages/orchestration/src/orchestration-completion-post-request.test.ts b/packages/orchestration/src/orchestration-completion-post-request.test.ts index d3a1f8fa..8584c795 100644 --- a/packages/orchestration/src/orchestration-completion-post-request.test.ts +++ b/packages/orchestration/src/orchestration-completion-post-request.test.ts @@ -1,6 +1,6 @@ import { constructCompletionPostRequest } from './orchestration-client.js'; import { buildAzureContentFilter } from './orchestration-filter-utility.js'; -import type { CompletionPostRequest } from './client/api/schema'; +import type { CompletionPostRequest } from './client/api/schema/index.js'; import type { OrchestrationModuleConfig } from './orchestration-types.js'; describe('constructCompletionPostRequest()', () => { @@ -17,6 +17,7 @@ describe('constructCompletionPostRequest()', () => { it('with model configuration and prompt template', async () => { const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { + stream: false, module_configurations: { templating_module_config: defaultConfig.templating, llm_module_config: defaultConfig.llm @@ -36,6 +37,7 @@ describe('constructCompletionPostRequest()', () => { }; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { + stream: false, module_configurations: { templating_module_config: config.templating, llm_module_config: config.llm @@ -62,6 +64,7 @@ describe('constructCompletionPostRequest()', () => { const inputParams = { phrase: 'I hate you.', number: '3' }; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { + stream: false, module_configurations: { templating_module_config: config.templating, llm_module_config: config.llm @@ -89,6 +92,7 @@ describe('constructCompletionPostRequest()', () => { const inputParams = {}; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { + stream: false, module_configurations: { templating_module_config: config.templating, llm_module_config: config.llm @@ -112,6 +116,7 @@ describe('constructCompletionPostRequest()', () => { }; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { + stream: false, module_configurations: { templating_module_config: config.templating, llm_module_config: config.llm @@ -148,6 +153,7 @@ describe('constructCompletionPostRequest()', () => { ]; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { + stream: false, module_configurations: { templating_module_config: config.templating, llm_module_config: config.llm @@ -169,6 +175,7 @@ describe('constructCompletionPostRequest()', () => { }; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { + stream: false, module_configurations: { templating_module_config: config.templating, llm_module_config: config.llm, @@ -189,6 +196,7 @@ describe('constructCompletionPostRequest()', () => { }; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { + stream: false, module_configurations: { templating_module_config: config.templating, llm_module_config: config.llm diff --git a/sample-code/src/orchestration.ts b/sample-code/src/orchestration.ts index 90b7113c..45b2dc04 100644 --- a/sample-code/src/orchestration.ts +++ b/sample-code/src/orchestration.ts @@ -54,7 +54,7 @@ export async function chatCompletionStream( const orchestrationClient = new OrchestrationClient({ // define the language model to be used llm: { - model_name: 'gpt-4o', + model_name: 'gpt-35-turbo', model_params: {} }, // define the prompt diff --git a/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-initial.json b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-initial.json index 3798f75b..11430fbf 100644 --- a/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-initial.json +++ b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-initial.json @@ -1,5 +1,5 @@ { - "request_id": "ceaa358c-48b8-4ce1-8a62-b0c47675fc9c", + "request_id": "66172762-8c47-4438-89e7-2689be8f370b", "module_results": { "templating": [ { diff --git a/test-util/data/orchestration/orchestration-chat-completion-success-response.json b/test-util/data/orchestration/orchestration-chat-completion-success-response.json new file mode 100644 index 00000000..ca4709b5 --- /dev/null +++ b/test-util/data/orchestration/orchestration-chat-completion-success-response.json @@ -0,0 +1,53 @@ +{ + "request_id": "request-id", + "module_results": { + "templating": [ + { + "role": "user", + "content": "Hello!" + } + ], + "llm": { + "id": "llm-id", + "object": "chat.completion", + "created": 172, + "model": "gpt-35-turbo", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello! How can I assist you today?" + }, + "finish_reason": "stop" + } + ], + "usage": { + "completion_tokens": 9, + "prompt_tokens": 9, + "total_tokens": 18 + } + } + }, + "orchestration_result": { + "id": "orchestration-id", + "object": "chat.completion", + "created": 172, + "model": "gpt-35-turbo", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello! How can I assist you today?" + }, + "finish_reason": "stop" + } + ], + "usage": { + "completion_tokens": 9, + "prompt_tokens": 9, + "total_tokens": 18 + } + } +} \ No newline at end of file From 48b55fe4a96b2ea6bc24a7bda2fdfc91fcb027cf Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Thu, 19 Dec 2024 13:44:23 +0100 Subject: [PATCH 20/53] lint:fix --- ...n-chat-completion-stream-chunk-response.ts | 5 +- ...chestration-chat-completion-stream.test.ts | 4 +- .../orchestration-chat-completion-stream.ts | 67 +++++++++------- .../src/orchestration-client.test.ts | 77 ++++++++++--------- sample-code/src/orchestration.ts | 7 +- 5 files changed, 91 insertions(+), 69 deletions(-) diff --git a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts b/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts index 306f020a..545284c7 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts +++ b/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts @@ -1,4 +1,7 @@ -import type { CompletionPostResponseStreaming, LLMChoiceStreaming } from './client/api/schema/index.js'; +import type { + CompletionPostResponseStreaming, + LLMChoiceStreaming +} from './client/api/schema/index.js'; import type { TokenUsage } from './index.js'; /** diff --git a/packages/orchestration/src/orchestration-chat-completion-stream.test.ts b/packages/orchestration/src/orchestration-chat-completion-stream.test.ts index 73613a9f..e67d511f 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream.test.ts +++ b/packages/orchestration/src/orchestration-chat-completion-stream.test.ts @@ -59,7 +59,7 @@ describe('Orchestration chat completion stream', () => { originalChatCompletionStream ); const asyncGeneratorFinishReason = - OrchestrationChatCompletionStream._processFinishReason( + OrchestrationChatCompletionStream._processFinishReason( new OrchestrationChatCompletionStream( () => asyncGeneratorChunk, new AbortController() @@ -82,7 +82,7 @@ describe('Orchestration chat completion stream', () => { originalChatCompletionStream ); const asyncGeneratorTokenUsage = - OrchestrationChatCompletionStream._processTokenUsage( + OrchestrationChatCompletionStream._processTokenUsage( new OrchestrationChatCompletionStream( () => asyncGeneratorChunk, new AbortController() diff --git a/packages/orchestration/src/orchestration-chat-completion-stream.ts b/packages/orchestration/src/orchestration-chat-completion-stream.ts index eeff6187..c172928f 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream.ts +++ b/packages/orchestration/src/orchestration-chat-completion-stream.ts @@ -1,7 +1,10 @@ import { createLogger } from '@sap-cloud-sdk/util'; import { SseStream } from '@sap-ai-sdk/core'; import { OrchestrationChatCompletionStreamChunkResponse } from './orchestration-chat-completion-stream-chunk-response.js'; -import type { CompletionPostResponseStreaming, LLMChoiceStreaming } from './client/api/schema/index.js'; +import type { + CompletionPostResponseStreaming, + LLMChoiceStreaming +} from './client/api/schema/index.js'; import type { HttpResponse } from '@sap-cloud-sdk/http-client'; import type { OrchestrationChatCompletionStreamResponse } from './orchestration-chat-completion-stream-response.js'; @@ -24,7 +27,11 @@ export class OrchestrationChatCompletionStream extends SseStream { response: HttpResponse, controller: AbortController ): OrchestrationChatCompletionStream { - const stream = SseStream.transformToSseStream(response, controller); + const stream = + SseStream.transformToSseStream( + response, + controller + ); return new OrchestrationChatCompletionStream(stream.iterator, controller); } @@ -49,36 +56,38 @@ export class OrchestrationChatCompletionStream extends SseStream { response?: OrchestrationChatCompletionStreamResponse ): AsyncGenerator { for await (const chunk of stream) { - chunk.data.orchestration_result?.choices.forEach((choice: LLMChoiceStreaming) => { - const choiceIndex = choice.index; - if (choiceIndex >= 0) { - const finishReason = chunk.getFinishReason(choiceIndex); - if (finishReason) { - if (response) { - response._getFinishReasons().set(choiceIndex, finishReason); - } - switch (finishReason) { - case 'content_filter': - logger.error( - `Choice ${choiceIndex}: Stream finished with content filter hit.` - ); - break; - case 'length': - logger.error( - `Choice ${choiceIndex}: Stream finished with token length exceeded.` - ); - break; - case 'stop': - logger.debug(`Choice ${choiceIndex}: Stream finished.`); - break; - default: - logger.error( - `Choice ${choiceIndex}: Stream finished with unknown reason '${finishReason}'.` - ); + chunk.data.orchestration_result?.choices.forEach( + (choice: LLMChoiceStreaming) => { + const choiceIndex = choice.index; + if (choiceIndex >= 0) { + const finishReason = chunk.getFinishReason(choiceIndex); + if (finishReason) { + if (response) { + response._getFinishReasons().set(choiceIndex, finishReason); + } + switch (finishReason) { + case 'content_filter': + logger.error( + `Choice ${choiceIndex}: Stream finished with content filter hit.` + ); + break; + case 'length': + logger.error( + `Choice ${choiceIndex}: Stream finished with token length exceeded.` + ); + break; + case 'stop': + logger.debug(`Choice ${choiceIndex}: Stream finished.`); + break; + default: + logger.error( + `Choice ${choiceIndex}: Stream finished with unknown reason '${finishReason}'.` + ); + } } } } - }); + ); yield chunk; } } diff --git a/packages/orchestration/src/orchestration-client.test.ts b/packages/orchestration/src/orchestration-client.test.ts index c7e10a82..81e7f71c 100644 --- a/packages/orchestration/src/orchestration-client.test.ts +++ b/packages/orchestration/src/orchestration-client.test.ts @@ -339,44 +339,49 @@ describe('orchestration service client', () => { expect(response.data).toEqual(mockResponse); }); - it('executes a streaming request with correct chunk response', async () => { - const config: OrchestrationModuleConfig = { - llm: { - model_name: 'gpt-4o', - model_params: {} - }, - templating: { - template: [{ role: 'user', content: 'Give me a short introduction of SAP Cloud SDK.' }] - } - }; + it('executes a streaming request with correct chunk response', async () => { + const config: OrchestrationModuleConfig = { + llm: { + model_name: 'gpt-4o', + model_params: {} + }, + templating: { + template: [ + { + role: 'user', + content: 'Give me a short introduction of SAP Cloud SDK.' + } + ] + } + }; - const mockResponse = await parseFileToString( - 'orchestration', - 'orchestration-chat-completion-stream-chunks.txt' - ); + const mockResponse = await parseFileToString( + 'orchestration', + 'orchestration-chat-completion-stream-chunks.txt' + ); - mockInference( - { - data: constructCompletionPostRequest(config, undefined, true) - }, - { - data: mockResponse, - status: 200 - }, - { - url: 'inference/deployments/1234/completion' - } - ); - const response = await new OrchestrationClient(config).stream(); + mockInference( + { + data: constructCompletionPostRequest(config, undefined, true) + }, + { + data: mockResponse, + status: 200 + }, + { + url: 'inference/deployments/1234/completion' + } + ); + const response = await new OrchestrationClient(config).stream(); - const initialResponse = await parseFileToString( - 'orchestration', - 'orchestration-chat-completion-stream-chunk-response-initial.json' - ); + const initialResponse = await parseFileToString( + 'orchestration', + 'orchestration-chat-completion-stream-chunk-response-initial.json' + ); - for await (const chunk of response.stream) { - expect(chunk.data).toEqual(JSON.parse(initialResponse)); - break; - } - }); + for await (const chunk of response.stream) { + expect(chunk.data).toEqual(JSON.parse(initialResponse)); + break; + } + }); }); diff --git a/sample-code/src/orchestration.ts b/sample-code/src/orchestration.ts index 45b2dc04..e4c7b40d 100644 --- a/sample-code/src/orchestration.ts +++ b/sample-code/src/orchestration.ts @@ -59,7 +59,12 @@ export async function chatCompletionStream( }, // define the prompt templating: { - template: [{ role: 'user', content: 'Give me a short introduction of SAP Cloud SDK.' }] + template: [ + { + role: 'user', + content: 'Give me a short introduction of SAP Cloud SDK.' + } + ] } }); From f6735f97ba0fe56c116719619aba88d0b99ecdd9 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Thu, 19 Dec 2024 14:49:14 +0100 Subject: [PATCH 21/53] remove internal from core --- packages/core/src/stream/line-decoder.ts | 1 - packages/core/src/stream/sse-decoder.ts | 1 - packages/core/src/stream/sse-stream.ts | 1 - 3 files changed, 3 deletions(-) diff --git a/packages/core/src/stream/line-decoder.ts b/packages/core/src/stream/line-decoder.ts index 676860c1..2a999024 100644 --- a/packages/core/src/stream/line-decoder.ts +++ b/packages/core/src/stream/line-decoder.ts @@ -5,7 +5,6 @@ type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; * reading lines from text. * * Https://github.com/encode/httpx/blob/920333ea98118e9cf617f246905d7b202510941c/httpx/_decoders.py#L258. - * @internal */ export class LineDecoder { // prettier-ignore diff --git a/packages/core/src/stream/sse-decoder.ts b/packages/core/src/stream/sse-decoder.ts index 046939ac..7c6f6b22 100644 --- a/packages/core/src/stream/sse-decoder.ts +++ b/packages/core/src/stream/sse-decoder.ts @@ -19,7 +19,6 @@ export interface ServerSentEvent { /** * Server-Sent Event decoder. - * @internal */ export class SSEDecoder { private data: string[]; diff --git a/packages/core/src/stream/sse-stream.ts b/packages/core/src/stream/sse-stream.ts index f36f9dc6..8af138b0 100644 --- a/packages/core/src/stream/sse-stream.ts +++ b/packages/core/src/stream/sse-stream.ts @@ -13,7 +13,6 @@ type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; /** * Stream implemented as an async iterable. - * @internal */ export class SseStream implements AsyncIterable { protected static transformToSseStream( From 80efcd87ca149eb98a649ed4b6e975444db875fb Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Thu, 19 Dec 2024 14:56:12 +0100 Subject: [PATCH 22/53] expose stream class --- packages/orchestration/src/index.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/orchestration/src/index.ts b/packages/orchestration/src/index.ts index 40f75ea2..d2565182 100644 --- a/packages/orchestration/src/index.ts +++ b/packages/orchestration/src/index.ts @@ -44,6 +44,8 @@ export { OrchestrationChatCompletionStreamResponse } from './orchestration-chat- export { OrchestrationChatCompletionStreamChunkResponse } from './orchestration-chat-completion-stream-chunk-response.js'; +export { OrchestrationChatCompletionStream } from './orchestration-chat-completion-stream.js'; + export { OrchestrationClient } from './orchestration-client.js'; export { buildAzureContentFilter } from './orchestration-filter-utility.js'; From 87c9557d3c9673bbba44db1a14022527464d3851 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Thu, 19 Dec 2024 15:04:55 +0100 Subject: [PATCH 23/53] add dev dependencies --- packages/orchestration/package.json | 3 +++ pnpm-lock.yaml | 10 +++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/packages/orchestration/package.json b/packages/orchestration/package.json index 3d5fc8f1..cf28acf4 100644 --- a/packages/orchestration/package.json +++ b/packages/orchestration/package.json @@ -34,5 +34,8 @@ "@sap-ai-sdk/ai-api": "workspace:^", "@sap-cloud-sdk/http-client": "^3.24.0", "@sap-cloud-sdk/connectivity": "^3.24.0" + }, + "devDependencies": { + "@sap-cloud-sdk/util": "^3.24.0" } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4bb39a51..199dc7dc 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -178,6 +178,10 @@ importers: '@sap-cloud-sdk/http-client': specifier: ^3.24.0 version: 3.24.0 + devDependencies: + '@sap-cloud-sdk/util': + specifier: ^3.24.0 + version: 3.24.0 sample-cap: dependencies: @@ -5613,7 +5617,7 @@ snapshots: eslint: 9.17.0 eslint-config-prettier: 9.1.0(eslint@9.17.0) eslint-import-resolver-typescript: 3.6.3(@typescript-eslint/parser@8.16.0(eslint@9.17.0)(typescript@5.7.2))(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.16.0(eslint@9.17.0)(typescript@5.7.2))(eslint@9.17.0))(eslint@9.17.0) - eslint-plugin-import: 2.31.0(@typescript-eslint/parser@8.16.0(eslint@9.17.0)(typescript@5.7.2))(eslint-import-resolver-typescript@3.6.3)(eslint@9.17.0) + eslint-plugin-import: 2.31.0(@typescript-eslint/parser@8.16.0(eslint@9.17.0)(typescript@5.7.2))(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@8.16.0(eslint@9.17.0)(typescript@5.7.2))(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.16.0(eslint@9.17.0)(typescript@5.7.2))(eslint@9.17.0))(eslint@9.17.0))(eslint@9.17.0) eslint-plugin-jsdoc: 50.6.0(eslint@9.17.0) eslint-plugin-prettier: 5.2.1(@types/eslint@8.56.10)(eslint-config-prettier@9.1.0(eslint@9.17.0))(eslint@9.17.0)(prettier@3.4.2) eslint-plugin-regex: 1.10.0(eslint@9.17.0) @@ -7012,7 +7016,7 @@ snapshots: is-bun-module: 1.3.0 is-glob: 4.0.3 optionalDependencies: - eslint-plugin-import: 2.31.0(@typescript-eslint/parser@8.16.0(eslint@9.17.0)(typescript@5.7.2))(eslint-import-resolver-typescript@3.6.3)(eslint@9.17.0) + eslint-plugin-import: 2.31.0(@typescript-eslint/parser@8.16.0(eslint@9.17.0)(typescript@5.7.2))(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@8.16.0(eslint@9.17.0)(typescript@5.7.2))(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.16.0(eslint@9.17.0)(typescript@5.7.2))(eslint@9.17.0))(eslint@9.17.0))(eslint@9.17.0) transitivePeerDependencies: - '@typescript-eslint/parser' - eslint-import-resolver-node @@ -7030,7 +7034,7 @@ snapshots: transitivePeerDependencies: - supports-color - eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.16.0(eslint@9.17.0)(typescript@5.7.2))(eslint-import-resolver-typescript@3.6.3)(eslint@9.17.0): + eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.16.0(eslint@9.17.0)(typescript@5.7.2))(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@8.16.0(eslint@9.17.0)(typescript@5.7.2))(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.16.0(eslint@9.17.0)(typescript@5.7.2))(eslint@9.17.0))(eslint@9.17.0))(eslint@9.17.0): dependencies: '@rtsao/scc': 1.1.0 array-includes: 3.1.8 From a2f23874ae0c7d9a5593dac2c22e13df19821a4b Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Thu, 19 Dec 2024 15:59:31 +0100 Subject: [PATCH 24/53] add docs --- packages/foundation-models/README.md | 2 +- packages/orchestration/README.md | 88 ++++++++++++++++++++++++++++ sample-code/README.md | 14 +++++ 3 files changed, 103 insertions(+), 1 deletion(-) diff --git a/packages/foundation-models/README.md b/packages/foundation-models/README.md index 6b621f89..eb024138 100644 --- a/packages/foundation-models/README.md +++ b/packages/foundation-models/README.md @@ -198,7 +198,7 @@ Additionally, it can be aborted manually by calling the `stream()` method with a ```ts const chatClient = new AzureOpenAiChatClient('gpt-4o'); const controller = new AbortController(); -const response = await new AzureOpenAiChatClient('gpt-35-turbo').stream( +const response = await chatClient.stream( { messages: [ { diff --git a/packages/orchestration/README.md b/packages/orchestration/README.md index f6032371..e36a712d 100644 --- a/packages/orchestration/README.md +++ b/packages/orchestration/README.md @@ -84,6 +84,94 @@ The client allows you to combine various modules, such as templating and content In addition to the examples below, you can find more **sample code** [here](https://github.com/SAP/ai-sdk-js/blob/main/sample-code/src/orchestration.ts). +### Streaming + +The `OrchestrationClient` supports streaming response for chat completion requests based on the [Server-sent events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events) standard. + +Use the `stream()` method to receive a stream of chunk responses from the model. +After consuming the stream, call the helper methods to get the finish reason and token usage information respectively. + +```ts +const orchestrationClient = new OrchestrationClient({ + llm: { + model_name: 'gpt-4o', + model_params: { max_tokens: 50, temperature: 0.1 } + }, + templating: { + template: [ + { role: 'user', content: 'What is the capital of {{?country}}?' } + ] + } +}); + +const response = await orchestrationClient.stream({ + inputParams: { country: 'France' } +}); + +for await (const chunk of response.stream) { + console.log(JSON.stringify(chunk)); +} + +const finishReason = response.getFinishReason(); +const tokenUsage = response.getTokenUsage(); + +console.log(`Finish reason: ${finishReason}\n`); +console.log(`Token usage: ${JSON.stringify(tokenUsage)}\n`); +``` + +#### Streaming the Delta Content + +The client provides a helper method to extract delta content and stream string directly. + +```ts +for await (const chunk of response.stream.toContentStream()) { + console.log(chunk); // will log the delta content +} +``` + +Each chunk will be a defined string containing the delta content. +Set `choiceIndex` parameter for `toContentStream()` method to stream a specific choice. + +#### Streaming with Abort Controller + +Streaming request can be aborted using the `AbortController` API. +In case of an error, the SAP Cloud SDK for AI will automatically close the stream. +Additionally, it can be aborted manually by calling the `stream()` method with an `AbortController` object. + +```ts +const orchestrationClient = new OrchestrationClient({ + llm: { + model_name: 'gpt-4o', + model_params: { max_tokens: 50, temperature: 0.1 } + }, + templating: { + template: [ + { role: 'user', content: 'What is the capital of {{?country}}?' } + ] + } +}); + +const controller = new AbortController(); +const response = await orchestrationClient.stream( + { + inputParams: { country: 'France' } + }, + controller +); + +// Abort the streaming request after one second +setTimeout(() => { + controller.abort(); +}, 1000); + +for await (const chunk of response.stream) { + console.log(JSON.stringify(chunk)); +} +``` + +In this example, streaming request will be aborted after one second. +Abort controller can be useful, e.g., when end-user wants to stop the stream or refreshes the page. + ### Templating Use the orchestration client with templating to pass a prompt containing placeholders that will be replaced with input parameters during a chat completion request. diff --git a/sample-code/README.md b/sample-code/README.md index 70d19cdd..4b9b2308 100644 --- a/sample-code/README.md +++ b/sample-code/README.md @@ -153,6 +153,20 @@ Use `buildAzureContentFilter()` to build the content filter. Send chat completion request with a custom header as the custom request configuration. +#### Chat Completion Streaming + +`GET /orchestration-stream/chat-completion-stream` + +Get chat completion response with streaming. + +The response header is set with `Content-Type: text/event-stream` to stream the text. + +`AbortController` is used to cancel the request in case user closes or refreshes the page, or there is an error. + +The `toContentStream()` method is called to extract the content of the chunk for convenience. + +Once the streaming is done, finish reason and token usage are printed out. + ### Langchain #### Invoke with a Simple Input From 460d223b7c514258d1c083732c1e2ca104aef2a9 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Wed, 1 Jan 2025 13:20:54 +0100 Subject: [PATCH 25/53] refactor --- packages/foundation-models/README.md | 2 +- packages/orchestration/README.md | 2 +- ...snap => orchestration-stream.test.ts.snap} | 0 packages/orchestration/src/index.ts | 6 +-- .../orchestration/src/orchestration-client.ts | 22 ++++---- ...chestration-stream-chunk-response.test.ts} | 32 ++++++------ ...=> orchestration-stream-chunk-response.ts} | 6 +-- ...se.ts => orchestration-stream-response.ts} | 12 ++--- ...m.test.ts => orchestration-stream.test.ts} | 24 ++++----- ...tion-stream.ts => orchestration-stream.ts} | 52 +++++++++---------- sample-code/src/orchestration.ts | 6 +-- 11 files changed, 82 insertions(+), 82 deletions(-) rename packages/orchestration/src/__snapshots__/{orchestration-chat-completion-stream.test.ts.snap => orchestration-stream.test.ts.snap} (100%) rename packages/orchestration/src/{orchestration-chat-completion-stream-chunk-response.test.ts => orchestration-stream-chunk-response.test.ts} (57%) rename packages/orchestration/src/{orchestration-chat-completion-stream-chunk-response.ts => orchestration-stream-chunk-response.ts} (90%) rename packages/orchestration/src/{orchestration-chat-completion-stream-response.ts => orchestration-stream-response.ts} (69%) rename packages/orchestration/src/{orchestration-chat-completion-stream.test.ts => orchestration-stream.test.ts} (77%) rename packages/orchestration/src/{orchestration-chat-completion-stream.ts => orchestration-stream.ts} (65%) diff --git a/packages/foundation-models/README.md b/packages/foundation-models/README.md index eb024138..82649fae 100644 --- a/packages/foundation-models/README.md +++ b/packages/foundation-models/README.md @@ -152,7 +152,7 @@ Refer to `AzureOpenAiChatCompletionParameters` interface for other parameters th The `AzureOpenAiChatClient` supports streaming response for chat completion requests based on the [Server-sent events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events) standard. Use the `stream()` method to receive a stream of chunk responses from the model. -After consuming the stream, call the helper methods to get the finish reason and token usage information respectively. +After consuming the stream, call the helper methods to get the finish reason and token usage information. ```ts const chatClient = new AzureOpenAiChatClient('gpt-4o'); diff --git a/packages/orchestration/README.md b/packages/orchestration/README.md index a6928c23..179e3bd4 100644 --- a/packages/orchestration/README.md +++ b/packages/orchestration/README.md @@ -90,7 +90,7 @@ In addition to the examples below, you can find more **sample code** [here](http The `OrchestrationClient` supports streaming response for chat completion requests based on the [Server-sent events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events) standard. Use the `stream()` method to receive a stream of chunk responses from the model. -After consuming the stream, call the helper methods to get the finish reason and token usage information respectively. +After consuming the stream, call the helper methods to get the finish reason and token usage information. ```ts const orchestrationClient = new OrchestrationClient({ diff --git a/packages/orchestration/src/__snapshots__/orchestration-chat-completion-stream.test.ts.snap b/packages/orchestration/src/__snapshots__/orchestration-stream.test.ts.snap similarity index 100% rename from packages/orchestration/src/__snapshots__/orchestration-chat-completion-stream.test.ts.snap rename to packages/orchestration/src/__snapshots__/orchestration-stream.test.ts.snap diff --git a/packages/orchestration/src/index.ts b/packages/orchestration/src/index.ts index d2565182..970c38e2 100644 --- a/packages/orchestration/src/index.ts +++ b/packages/orchestration/src/index.ts @@ -40,11 +40,11 @@ export type { Prompt } from './orchestration-types.js'; -export { OrchestrationChatCompletionStreamResponse } from './orchestration-chat-completion-stream-response.js'; +export { OrchestrationStreamResponse } from './orchestration-stream-response.js'; -export { OrchestrationChatCompletionStreamChunkResponse } from './orchestration-chat-completion-stream-chunk-response.js'; +export { OrchestrationStreamChunkResponse } from './orchestration-stream-chunk-response.js'; -export { OrchestrationChatCompletionStream } from './orchestration-chat-completion-stream.js'; +export { OrchestrationStream } from './orchestration-stream.js'; export { OrchestrationClient } from './orchestration-client.js'; diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index e6f2721d..b3650885 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -1,8 +1,8 @@ import { executeRequest } from '@sap-ai-sdk/core'; import { resolveDeploymentId } from '@sap-ai-sdk/ai-api/internal.js'; import { OrchestrationResponse } from './orchestration-response.js'; -import { OrchestrationChatCompletionStream } from './orchestration-chat-completion-stream.js'; -import { OrchestrationChatCompletionStreamResponse } from './orchestration-chat-completion-stream-response.js'; +import { OrchestrationStream } from './orchestration-stream.js'; +import { OrchestrationStreamResponse } from './orchestration-stream-response.js'; import type { HttpResponse, CustomRequestConfig @@ -14,7 +14,7 @@ import type { OrchestrationModuleConfig, Prompt } from './orchestration-types.js'; -import type { OrchestrationChatCompletionStreamChunkResponse } from './orchestration-chat-completion-stream-chunk-response.js'; +import type { OrchestrationStreamChunkResponse } from './orchestration-stream-chunk-response.js'; import type { HttpDestinationOrFetchOptions } from '@sap-cloud-sdk/connectivity'; interface RequestOptions { @@ -56,7 +56,7 @@ export class OrchestrationClient { destination?: HttpDestinationOrFetchOptions, requestConfig?: CustomRequestConfig ): Promise< - OrchestrationChatCompletionStreamResponse + OrchestrationStreamResponse > { return OrchestrationClient.createStreamResponse( { @@ -107,10 +107,10 @@ export class OrchestrationClient { options: RequestOptions, controller: AbortController ): Promise< - OrchestrationChatCompletionStreamResponse + OrchestrationStreamResponse > { const response = - new OrchestrationChatCompletionStreamResponse(); + new OrchestrationStreamResponse(); const streamResponse = await OrchestrationClient.executeRequest({ ...options, @@ -121,14 +121,14 @@ export class OrchestrationClient { } }); - const stream = await OrchestrationChatCompletionStream._create( + const stream = OrchestrationStream._create( streamResponse, controller ); response.stream = stream - ._pipe(OrchestrationChatCompletionStream._processChunk) - ._pipe(OrchestrationChatCompletionStream._processFinishReason, response) - ._pipe(OrchestrationChatCompletionStream._processTokenUsage, response); + ._pipe(OrchestrationStream._processChunk) + ._pipe(OrchestrationStream._processFinishReason, response) + ._pipe(OrchestrationStream._processTokenUsage, response); return response; } @@ -158,7 +158,7 @@ export class OrchestrationClient { controller = new AbortController(), requestConfig?: CustomRequestConfig ): Promise< - OrchestrationChatCompletionStreamResponse + OrchestrationStreamResponse > { return OrchestrationClient.createStreamResponse( { diff --git a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.test.ts b/packages/orchestration/src/orchestration-stream-chunk-response.test.ts similarity index 57% rename from packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.test.ts rename to packages/orchestration/src/orchestration-stream-chunk-response.test.ts index 3a4bc4da..d293eb66 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.test.ts +++ b/packages/orchestration/src/orchestration-stream-chunk-response.test.ts @@ -1,16 +1,16 @@ import { parseMockResponse } from '../../../test-util/mock-http.js'; -import { OrchestrationChatCompletionStreamChunkResponse } from './orchestration-chat-completion-stream-chunk-response.js'; +import { OrchestrationStreamChunkResponse } from './orchestration-stream-chunk-response.js'; -describe('OpenAI chat completion stream chunk response', () => { +describe('Orchestration chat completion stream chunk response', () => { let mockResponses: { tokenUsageResponse: any; finishReasonResponse: any; deltaContentResponse: any; }; - let orchestrationChatCompletionStreamChunkResponses: { - tokenUsageResponse: OrchestrationChatCompletionStreamChunkResponse; - finishReasonResponse: OrchestrationChatCompletionStreamChunkResponse; - deltaContentResponse: OrchestrationChatCompletionStreamChunkResponse; + let orchestrationStreamChunkResponses: { + tokenUsageResponse: OrchestrationStreamChunkResponse; + finishReasonResponse: OrchestrationStreamChunkResponse; + deltaContentResponse: OrchestrationStreamChunkResponse; }; beforeAll(async () => { @@ -28,14 +28,14 @@ describe('OpenAI chat completion stream chunk response', () => { 'orchestration-chat-completion-stream-chunk-response-delta-content.json' ) }; - orchestrationChatCompletionStreamChunkResponses = { - tokenUsageResponse: new OrchestrationChatCompletionStreamChunkResponse( + orchestrationStreamChunkResponses = { + tokenUsageResponse: new OrchestrationStreamChunkResponse( mockResponses.tokenUsageResponse ), - finishReasonResponse: new OrchestrationChatCompletionStreamChunkResponse( + finishReasonResponse: new OrchestrationStreamChunkResponse( mockResponses.finishReasonResponse ), - deltaContentResponse: new OrchestrationChatCompletionStreamChunkResponse( + deltaContentResponse: new OrchestrationStreamChunkResponse( mockResponses.deltaContentResponse ) }; @@ -43,19 +43,19 @@ describe('OpenAI chat completion stream chunk response', () => { it('should return the chat completion stream chunk response', () => { expect( - orchestrationChatCompletionStreamChunkResponses.tokenUsageResponse.data + orchestrationStreamChunkResponses.tokenUsageResponse.data ).toStrictEqual(mockResponses.tokenUsageResponse); expect( - orchestrationChatCompletionStreamChunkResponses.finishReasonResponse.data + orchestrationStreamChunkResponses.finishReasonResponse.data ).toStrictEqual(mockResponses.finishReasonResponse); expect( - orchestrationChatCompletionStreamChunkResponses.deltaContentResponse.data + orchestrationStreamChunkResponses.deltaContentResponse.data ).toStrictEqual(mockResponses.deltaContentResponse); }); it('should get token usage', () => { expect( - orchestrationChatCompletionStreamChunkResponses.tokenUsageResponse.getTokenUsage() + orchestrationStreamChunkResponses.tokenUsageResponse.getTokenUsage() ).toMatchObject({ completion_tokens: expect.any(Number), prompt_tokens: expect.any(Number), @@ -65,13 +65,13 @@ describe('OpenAI chat completion stream chunk response', () => { it('should return finish reason', () => { expect( - orchestrationChatCompletionStreamChunkResponses.finishReasonResponse.getFinishReason() + orchestrationStreamChunkResponses.finishReasonResponse.getFinishReason() ).toBe('stop'); }); it('should return delta content with default index 0', () => { expect( - orchestrationChatCompletionStreamChunkResponses.deltaContentResponse.getDeltaContent() + orchestrationStreamChunkResponses.deltaContentResponse.getDeltaContent() ).toMatchInlineSnapshot( '"rimarily focusing on Java and JavaScript/Node.js environments, allowing developers to work in their "' ); diff --git a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts b/packages/orchestration/src/orchestration-stream-chunk-response.ts similarity index 90% rename from packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts rename to packages/orchestration/src/orchestration-stream-chunk-response.ts index 545284c7..78f2f834 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream-chunk-response.ts +++ b/packages/orchestration/src/orchestration-stream-chunk-response.ts @@ -1,13 +1,13 @@ import type { CompletionPostResponseStreaming, - LLMChoiceStreaming + LLMChoiceStreaming, + TokenUsage } from './client/api/schema/index.js'; -import type { TokenUsage } from './index.js'; /** * Orchestration chat completion stream chunk response. */ -export class OrchestrationChatCompletionStreamChunkResponse { +export class OrchestrationStreamChunkResponse { constructor(public readonly data: CompletionPostResponseStreaming) { this.data = data; } diff --git a/packages/orchestration/src/orchestration-chat-completion-stream-response.ts b/packages/orchestration/src/orchestration-stream-response.ts similarity index 69% rename from packages/orchestration/src/orchestration-chat-completion-stream-response.ts rename to packages/orchestration/src/orchestration-stream-response.ts index 3276b8ac..ce03cf75 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream-response.ts +++ b/packages/orchestration/src/orchestration-stream-response.ts @@ -1,16 +1,16 @@ import type { TokenUsage } from './client/api/schema/index.js'; -import type { OrchestrationChatCompletionStream } from './orchestration-chat-completion-stream.js'; +import type { OrchestrationStream } from './orchestration-stream.js'; /** * Orchestration chat completion stream response. */ -export class OrchestrationChatCompletionStreamResponse { +export class OrchestrationStreamResponse { private _usage: TokenUsage | undefined; /** * Finish reasons for all choices. */ private _finishReasons: Map = new Map(); - private _stream: OrchestrationChatCompletionStream | undefined; + private _stream: OrchestrationStream | undefined; public getTokenUsage(): TokenUsage | undefined { return this._usage; @@ -23,7 +23,7 @@ export class OrchestrationChatCompletionStreamResponse { this._usage = usage; } - public getFinishReason(choiceIndex = 0): string | undefined | null { + public getFinishReason(choiceIndex = 0): string | undefined { return this._finishReasons.get(choiceIndex); } @@ -41,7 +41,7 @@ export class OrchestrationChatCompletionStreamResponse { this._finishReasons = finishReasons; } - get stream(): OrchestrationChatCompletionStream { + get stream(): OrchestrationStream { if (!this._stream) { throw new Error('Response stream is undefined.'); } @@ -51,7 +51,7 @@ export class OrchestrationChatCompletionStreamResponse { /** * @internal */ - set stream(stream: OrchestrationChatCompletionStream) { + set stream(stream: OrchestrationStream) { this._stream = stream; } } diff --git a/packages/orchestration/src/orchestration-chat-completion-stream.test.ts b/packages/orchestration/src/orchestration-stream.test.ts similarity index 77% rename from packages/orchestration/src/orchestration-chat-completion-stream.test.ts rename to packages/orchestration/src/orchestration-stream.test.ts index e67d511f..4f31a77a 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream.test.ts +++ b/packages/orchestration/src/orchestration-stream.test.ts @@ -2,12 +2,12 @@ import { createLogger } from '@sap-cloud-sdk/util'; import { jest } from '@jest/globals'; import { LineDecoder, SSEDecoder } from '@sap-ai-sdk/core'; import { parseFileToString } from '../../../test-util/mock-http.js'; -import { OrchestrationChatCompletionStream } from './orchestration-chat-completion-stream.js'; +import { OrchestrationStream } from './orchestration-stream.js'; import type { CompletionPostResponseStreaming } from './client/api/schema/index.js'; describe('Orchestration chat completion stream', () => { let sseChunks: string[]; - let originalChatCompletionStream: OrchestrationChatCompletionStream; + let originalChatCompletionStream: OrchestrationStream; beforeEach(async () => { const rawChunksString = await parseFileToString( @@ -31,7 +31,7 @@ describe('Orchestration chat completion stream', () => { yield sseChunk; } } - originalChatCompletionStream = new OrchestrationChatCompletionStream( + originalChatCompletionStream = new OrchestrationStream( iterator, new AbortController() ); @@ -39,7 +39,7 @@ describe('Orchestration chat completion stream', () => { it('should wrap the raw chunk', async () => { let output = ''; - const asnycGenerator = OrchestrationChatCompletionStream._processChunk( + const asnycGenerator = OrchestrationStream._processChunk( originalChatCompletionStream ); for await (const chunk of asnycGenerator) { @@ -55,12 +55,12 @@ describe('Orchestration chat completion stream', () => { messageContext: 'orchestration-chat-completion-stream' }); const debugSpy = jest.spyOn(logger, 'debug'); - const asyncGeneratorChunk = OrchestrationChatCompletionStream._processChunk( + const asyncGeneratorChunk = OrchestrationStream._processChunk( originalChatCompletionStream ); const asyncGeneratorFinishReason = - OrchestrationChatCompletionStream._processFinishReason( - new OrchestrationChatCompletionStream( + OrchestrationStream._processFinishReason( + new OrchestrationStream( () => asyncGeneratorChunk, new AbortController() ) @@ -78,12 +78,12 @@ describe('Orchestration chat completion stream', () => { messageContext: 'orchestration-chat-completion-stream' }); const debugSpy = jest.spyOn(logger, 'debug'); - const asyncGeneratorChunk = OrchestrationChatCompletionStream._processChunk( + const asyncGeneratorChunk = OrchestrationStream._processChunk( originalChatCompletionStream ); const asyncGeneratorTokenUsage = - OrchestrationChatCompletionStream._processTokenUsage( - new OrchestrationChatCompletionStream( + OrchestrationStream._processTokenUsage( + new OrchestrationStream( () => asyncGeneratorChunk, new AbortController() ) @@ -98,10 +98,10 @@ describe('Orchestration chat completion stream', () => { }); it('should transform the original stream to string stream', async () => { - const asyncGeneratorChunk = OrchestrationChatCompletionStream._processChunk( + const asyncGeneratorChunk = OrchestrationStream._processChunk( originalChatCompletionStream ); - const chunkStream = new OrchestrationChatCompletionStream( + const chunkStream = new OrchestrationStream( () => asyncGeneratorChunk, new AbortController() ); diff --git a/packages/orchestration/src/orchestration-chat-completion-stream.ts b/packages/orchestration/src/orchestration-stream.ts similarity index 65% rename from packages/orchestration/src/orchestration-chat-completion-stream.ts rename to packages/orchestration/src/orchestration-stream.ts index c172928f..9067df23 100644 --- a/packages/orchestration/src/orchestration-chat-completion-stream.ts +++ b/packages/orchestration/src/orchestration-stream.ts @@ -1,12 +1,12 @@ import { createLogger } from '@sap-cloud-sdk/util'; import { SseStream } from '@sap-ai-sdk/core'; -import { OrchestrationChatCompletionStreamChunkResponse } from './orchestration-chat-completion-stream-chunk-response.js'; +import { OrchestrationStreamChunkResponse } from './orchestration-stream-chunk-response.js'; import type { CompletionPostResponseStreaming, LLMChoiceStreaming } from './client/api/schema/index.js'; import type { HttpResponse } from '@sap-cloud-sdk/http-client'; -import type { OrchestrationChatCompletionStreamResponse } from './orchestration-chat-completion-stream-response.js'; +import type { OrchestrationStreamResponse } from './orchestration-stream-response.js'; const logger = createLogger({ package: 'orchestration', @@ -16,7 +16,7 @@ const logger = createLogger({ /** * Chat completion stream containing post-processing functions. */ -export class OrchestrationChatCompletionStream extends SseStream { +export class OrchestrationStream extends SseStream { /** * Create a chat completion stream based on the http response. * @param response - Http response. @@ -26,13 +26,13 @@ export class OrchestrationChatCompletionStream extends SseStream { public static _create( response: HttpResponse, controller: AbortController - ): OrchestrationChatCompletionStream { + ): OrchestrationStream { const stream = SseStream.transformToSseStream( response, controller ); - return new OrchestrationChatCompletionStream(stream.iterator, controller); + return new OrchestrationStream(stream.iterator, controller); } /** @@ -41,10 +41,10 @@ export class OrchestrationChatCompletionStream extends SseStream { * @internal */ static async *_processChunk( - stream: OrchestrationChatCompletionStream - ): AsyncGenerator { + stream: OrchestrationStream + ): AsyncGenerator { for await (const chunk of stream) { - yield new OrchestrationChatCompletionStreamChunkResponse(chunk); + yield new OrchestrationStreamChunkResponse(chunk); } } @@ -52,9 +52,9 @@ export class OrchestrationChatCompletionStream extends SseStream { * @internal */ static async *_processFinishReason( - stream: OrchestrationChatCompletionStream, - response?: OrchestrationChatCompletionStreamResponse - ): AsyncGenerator { + stream: OrchestrationStream, + response?: OrchestrationStreamResponse + ): AsyncGenerator { for await (const chunk of stream) { chunk.data.orchestration_result?.choices.forEach( (choice: LLMChoiceStreaming) => { @@ -96,9 +96,9 @@ export class OrchestrationChatCompletionStream extends SseStream { * @internal */ static async *_processTokenUsage( - stream: OrchestrationChatCompletionStream, - response?: OrchestrationChatCompletionStreamResponse - ): AsyncGenerator { + stream: OrchestrationStream, + response?: OrchestrationStreamResponse + ): AsyncGenerator { for await (const chunk of stream) { const usage = chunk.getTokenUsage(); if (usage) { @@ -118,7 +118,7 @@ export class OrchestrationChatCompletionStream extends SseStream { * @internal */ static async *_processContentStream( - stream: OrchestrationChatCompletionStream, + stream: OrchestrationStream, choiceIndex = 0 ): AsyncGenerator { for await (const chunk of stream) { @@ -140,36 +140,36 @@ export class OrchestrationChatCompletionStream extends SseStream { /** * Pipe the stream through a processing function. * @param processFn - The function to process the input stream. - * @param response - The `OrchestrationChatCompletionStreamResponse` object for process function to store finish reason, token usage, etc. + * @param response - The `OrchestrationStreamResponse` object for process function to store finish reason, token usage, etc. * @returns The output stream containing processed items. * @internal */ _pipe( processFn: ( - stream: OrchestrationChatCompletionStream, - response?: OrchestrationChatCompletionStreamResponse + stream: OrchestrationStream, + response?: OrchestrationStreamResponse ) => AsyncIterator, - response?: OrchestrationChatCompletionStreamResponse - ): OrchestrationChatCompletionStream { + response?: OrchestrationStreamResponse + ): OrchestrationStream { if (response) { - return new OrchestrationChatCompletionStream( + return new OrchestrationStream( () => processFn(this, response), this.controller ); } - return new OrchestrationChatCompletionStream( + return new OrchestrationStream( () => processFn(this), this.controller ); } public toContentStream( - this: OrchestrationChatCompletionStream, + this: OrchestrationStream, choiceIndex?: number - ): OrchestrationChatCompletionStream { - return new OrchestrationChatCompletionStream( + ): OrchestrationStream { + return new OrchestrationStream( () => - OrchestrationChatCompletionStream._processContentStream( + OrchestrationStream._processContentStream( this, choiceIndex ), diff --git a/sample-code/src/orchestration.ts b/sample-code/src/orchestration.ts index de40a953..43def248 100644 --- a/sample-code/src/orchestration.ts +++ b/sample-code/src/orchestration.ts @@ -6,8 +6,8 @@ import { import { createLogger } from '@sap-cloud-sdk/util'; import type { LlmModuleConfig, - OrchestrationChatCompletionStreamChunkResponse, - OrchestrationChatCompletionStreamResponse, + OrchestrationStreamChunkResponse, + OrchestrationStreamResponse, OrchestrationResponse } from '@sap-ai-sdk/orchestration'; @@ -50,7 +50,7 @@ export async function orchestrationChatCompletion(): Promise + OrchestrationStreamResponse > { const orchestrationClient = new OrchestrationClient({ // define the language model to be used From 45d1a5abe479db91bd5b31d303296d2b99a93979 Mon Sep 17 00:00:00 2001 From: Tom Frenken <54979414+tomfrenken@users.noreply.github.com> Date: Thu, 2 Jan 2025 11:25:46 +0100 Subject: [PATCH 26/53] Apply suggestions from code review Co-authored-by: KavithaSiva <32287936+KavithaSiva@users.noreply.github.com> --- packages/orchestration/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/orchestration/README.md b/packages/orchestration/README.md index 179e3bd4..6d6e8c00 100644 --- a/packages/orchestration/README.md +++ b/packages/orchestration/README.md @@ -100,7 +100,7 @@ const orchestrationClient = new OrchestrationClient({ }, templating: { template: [ - { role: 'user', content: 'What is the capital of {{?country}}?' } + { role: 'user', content: 'Give a long history of {{?country}}?' } ] } }); @@ -147,7 +147,7 @@ const orchestrationClient = new OrchestrationClient({ }, templating: { template: [ - { role: 'user', content: 'What is the capital of {{?country}}?' } + { role: 'user', content: 'Give a long history of {{?country}}?' } ] } }); From 08f3e9133ac3d0bca57a60dc8d6d5309a1785bc3 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Fri, 3 Jan 2025 20:33:34 +0100 Subject: [PATCH 27/53] refactor all suggestions --- .../src/orchestration-client.test.ts | 6 +- .../orchestration/src/orchestration-client.ts | 156 +++++++++++------- ...-completion-post-request-from-json.test.ts | 8 +- .../src/orchestration-stream.test.ts | 20 +-- .../orchestration/src/orchestration-stream.ts | 11 +- sample-code/src/orchestration.ts | 7 +- 6 files changed, 113 insertions(+), 95 deletions(-) diff --git a/packages/orchestration/src/orchestration-client.test.ts b/packages/orchestration/src/orchestration-client.test.ts index b4b7dae9..9bbeca40 100644 --- a/packages/orchestration/src/orchestration-client.test.ts +++ b/packages/orchestration/src/orchestration-client.test.ts @@ -7,7 +7,7 @@ import { parseMockResponse } from '../../../test-util/mock-http.js'; import { - constructCompletionPostRequestFromJson, + constructCompletionPostRequestFromJsonModuleConfig, constructCompletionPostRequest, OrchestrationClient } from './orchestration-client.js'; @@ -89,7 +89,7 @@ describe('orchestration service client', () => { mockInference( { - data: constructCompletionPostRequestFromJson(jsonConfig) + data: constructCompletionPostRequestFromJsonModuleConfig(jsonConfig) }, { data: mockResponse, @@ -101,7 +101,7 @@ describe('orchestration service client', () => { ); const response = - await OrchestrationClient.chatCompletionWithJson(jsonConfig); + await OrchestrationClient.chatCompletionWithJsonModuleConfig(jsonConfig); expect(response).toBeInstanceOf(OrchestrationResponse); expect(response.data).toEqual(mockResponse); diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index b3650885..577405d4 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -8,9 +8,12 @@ import type { CustomRequestConfig } from '@sap-cloud-sdk/http-client'; import type { ResourceGroupConfig } from '@sap-ai-sdk/ai-api/internal.js'; -import type { CompletionPostRequest } from './client/api/schema/index.js'; import type { - LlmModuleConfig, + CompletionPostRequest, + ModuleConfigs, + OrchestrationConfig +} from './client/api/schema/index.js'; +import type { OrchestrationModuleConfig, Prompt } from './orchestration-types.js'; @@ -20,52 +23,58 @@ import type { HttpDestinationOrFetchOptions } from '@sap-cloud-sdk/connectivity' interface RequestOptions { prompt?: Prompt; requestConfig?: CustomRequestConfig; - streaming?: boolean; - jsonConfig?: string; + stream?: boolean; + streamOptions?: StreamOptions; + jsonModuleConfig?: string; deploymentConfig?: ResourceGroupConfig; destination?: HttpDestinationOrFetchOptions; config?: OrchestrationModuleConfig; } +interface ConfigurationOptions { + deploymentConfig?: ResourceGroupConfig; + destination?: HttpDestinationOrFetchOptions; + requestConfig?: CustomRequestConfig; +} + +interface StreamOptions { + chunkSize?: number; + llm?: { includeUsage?: boolean; [key: string]: any }; + // Add more options as they are implemented + // masking?: { }; + // grounding?: { }; + // inputFiltering?: { }; + outputFiltering?: { overlap?: number }; +} + /** * Get the orchestration client. */ export class OrchestrationClient { - static async chatCompletionWithJson( - jsonConfig: string, + static async chatCompletionWithJsonModuleConfig( + jsonModuleConfig: string, prompt?: Prompt, - deploymentConfig?: ResourceGroupConfig, - destination?: HttpDestinationOrFetchOptions, - requestConfig?: CustomRequestConfig + options?: ConfigurationOptions ): Promise { const response = await OrchestrationClient.executeRequest({ + jsonModuleConfig, prompt, - requestConfig, - jsonConfig, - deploymentConfig, - destination + ...options }); return new OrchestrationResponse(response); } - static async streamWithJson( - jsonConfig: string, + static async streamWithJsonModuleConfig( + jsonModuleConfig: string, prompt?: Prompt, controller = new AbortController(), - deploymentConfig?: ResourceGroupConfig, - destination?: HttpDestinationOrFetchOptions, - requestConfig?: CustomRequestConfig - ): Promise< - OrchestrationStreamResponse - > { + options?: ConfigurationOptions + ): Promise> { return OrchestrationClient.createStreamResponse( { + jsonModuleConfig, prompt, - requestConfig, - streaming: true, - jsonConfig, - deploymentConfig, - destination + ...options }, controller ); @@ -77,25 +86,30 @@ export class OrchestrationClient { const { prompt, requestConfig, - streaming = false, - jsonConfig, - deploymentConfig: { resourceGroup } = {}, - destination + stream, + streamOptions, + jsonModuleConfig, + deploymentConfig, + destination, + config } = options; - const body = jsonConfig - ? constructCompletionPostRequestFromJson(jsonConfig, prompt) - : constructCompletionPostRequest(options.config!, prompt, streaming); + const body = jsonModuleConfig + ? constructCompletionPostRequestFromJsonModuleConfig( + jsonModuleConfig, + prompt + ) + : constructCompletionPostRequest(config!, prompt, stream, streamOptions); const deploymentId = await resolveDeploymentId({ scenarioId: 'orchestration', - resourceGroup + ...deploymentConfig }); return executeRequest( { url: `/inference/deployments/${deploymentId}/completion`, - resourceGroup + ...deploymentConfig }, body, requestConfig, @@ -106,9 +120,7 @@ export class OrchestrationClient { private static async createStreamResponse( options: RequestOptions, controller: AbortController - ): Promise< - OrchestrationStreamResponse - > { + ): Promise> { const response = new OrchestrationStreamResponse(); @@ -121,10 +133,7 @@ export class OrchestrationClient { } }); - const stream = OrchestrationStream._create( - streamResponse, - controller - ); + const stream = OrchestrationStream._create(streamResponse, controller); response.stream = stream ._pipe(OrchestrationStream._processChunk) ._pipe(OrchestrationStream._processFinishReason, response) @@ -146,6 +155,7 @@ export class OrchestrationClient { const response = await OrchestrationClient.executeRequest({ prompt, requestConfig, + stream: false, config: this.config, deploymentConfig: this.deploymentConfig, destination: this.destination @@ -156,15 +166,15 @@ export class OrchestrationClient { async stream( prompt?: Prompt, controller = new AbortController(), + options?: StreamOptions, requestConfig?: CustomRequestConfig - ): Promise< - OrchestrationStreamResponse - > { + ): Promise> { return OrchestrationClient.createStreamResponse( { prompt, requestConfig, - streaming: true, + streamOptions: options, + stream: true, config: this.config, deploymentConfig: this.deploymentConfig, destination: this.destination @@ -177,7 +187,7 @@ export class OrchestrationClient { /** * @internal */ -export function constructCompletionPostRequestFromJson( +export function constructCompletionPostRequestFromJsonModuleConfig( config: string, prompt?: Prompt ): Record { @@ -192,14 +202,38 @@ export function constructCompletionPostRequestFromJson( } } -function configureLLM(llm: LlmModuleConfig, stream: boolean): LlmModuleConfig { - if (stream) { - llm.model_params = { - ...llm.model_params, - stream_options: { include_usage: true } - }; - } - return llm; +function addStreamOptions( + moduleConfigs: ModuleConfigs, + streamOptions?: StreamOptions +): OrchestrationConfig { + const { llm, outputFiltering, chunkSize } = streamOptions; + + return { + stream: true, + stream_options: { + chunk_size: chunkSize + }, + module_configurations: { + ...moduleConfigs, + llm_module_config: { + ...moduleConfigs.llm_module_config, + stream_options: { + include_usage: llm?.includeUsage ?? true, + ...llm + } + }, + ...(outputFiltering && + Object.keys(outputFiltering).length && { + filtering_module_config: { + ...moduleConfigs.filtering_module_config, + output: { + ...(moduleConfigs.filtering_module_config?.output || {}), + stream_options: outputFiltering + } + } + }) + } + } as OrchestrationConfig; } /** @@ -208,13 +242,14 @@ function configureLLM(llm: LlmModuleConfig, stream: boolean): LlmModuleConfig { export function constructCompletionPostRequest( config: OrchestrationModuleConfig, prompt?: Prompt, - stream = false + stream?: boolean, + streamOptions?: StreamOptions ): CompletionPostRequest { const moduleConfigurations = { templating_module_config: { template: config.templating.template }, - llm_module_config: configureLLM(config.llm, stream), + llm_module_config: config.llm, ...(config?.filtering && Object.keys(config.filtering).length && { filtering_module_config: config.filtering @@ -230,10 +265,9 @@ export function constructCompletionPostRequest( }; return { - orchestration_config: { - stream, - module_configurations: moduleConfigurations - }, + orchestration_config: stream + ? addStreamOptions(moduleConfigurations, streamOptions) + : { module_configurations: moduleConfigurations, stream: false }, ...(prompt?.inputParams && { input_params: prompt.inputParams }), diff --git a/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts b/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts index 823e0930..49f2822b 100644 --- a/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts +++ b/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts @@ -1,11 +1,11 @@ -import { constructCompletionPostRequestFromJson } from './orchestration-client.js'; +import { constructCompletionPostRequestFromJsonModuleConfig } from './orchestration-client.js'; describe('construct completion post request from JSON', () => { it('should throw an error when invalid JSON is provided', () => { const invalidJsonConfig = '{ "module_configurations": {}, '; expect(() => - constructCompletionPostRequestFromJson(invalidJsonConfig) + constructCompletionPostRequestFromJsonModuleConfig(invalidJsonConfig) ).toThrow('Could not parse JSON'); }); @@ -32,7 +32,7 @@ describe('construct completion post request from JSON', () => { }; const completionPostRequestFromJson: Record = - constructCompletionPostRequestFromJson(jsonConfig); + constructCompletionPostRequestFromJsonModuleConfig(jsonConfig); expect(expectedCompletionPostRequestFromJson).toEqual( completionPostRequestFromJson @@ -85,7 +85,7 @@ describe('construct completion post request from JSON', () => { }; const completionPostRequestFromJson: Record = - constructCompletionPostRequestFromJson(jsonConfig, { + constructCompletionPostRequestFromJsonModuleConfig(jsonConfig, { inputParams, messagesHistory }); diff --git a/packages/orchestration/src/orchestration-stream.test.ts b/packages/orchestration/src/orchestration-stream.test.ts index 4f31a77a..1510cfd1 100644 --- a/packages/orchestration/src/orchestration-stream.test.ts +++ b/packages/orchestration/src/orchestration-stream.test.ts @@ -58,13 +58,9 @@ describe('Orchestration chat completion stream', () => { const asyncGeneratorChunk = OrchestrationStream._processChunk( originalChatCompletionStream ); - const asyncGeneratorFinishReason = - OrchestrationStream._processFinishReason( - new OrchestrationStream( - () => asyncGeneratorChunk, - new AbortController() - ) - ); + const asyncGeneratorFinishReason = OrchestrationStream._processFinishReason( + new OrchestrationStream(() => asyncGeneratorChunk, new AbortController()) + ); for await (const chunk of asyncGeneratorFinishReason) { expect(chunk).toBeDefined(); @@ -81,13 +77,9 @@ describe('Orchestration chat completion stream', () => { const asyncGeneratorChunk = OrchestrationStream._processChunk( originalChatCompletionStream ); - const asyncGeneratorTokenUsage = - OrchestrationStream._processTokenUsage( - new OrchestrationStream( - () => asyncGeneratorChunk, - new AbortController() - ) - ); + const asyncGeneratorTokenUsage = OrchestrationStream._processTokenUsage( + new OrchestrationStream(() => asyncGeneratorChunk, new AbortController()) + ); for await (const chunk of asyncGeneratorTokenUsage) { expect(chunk).toBeDefined(); diff --git a/packages/orchestration/src/orchestration-stream.ts b/packages/orchestration/src/orchestration-stream.ts index 9067df23..0ff6d92c 100644 --- a/packages/orchestration/src/orchestration-stream.ts +++ b/packages/orchestration/src/orchestration-stream.ts @@ -157,10 +157,7 @@ export class OrchestrationStream extends SseStream { this.controller ); } - return new OrchestrationStream( - () => processFn(this), - this.controller - ); + return new OrchestrationStream(() => processFn(this), this.controller); } public toContentStream( @@ -168,11 +165,7 @@ export class OrchestrationStream extends SseStream { choiceIndex?: number ): OrchestrationStream { return new OrchestrationStream( - () => - OrchestrationStream._processContentStream( - this, - choiceIndex - ), + () => OrchestrationStream._processContentStream(this, choiceIndex), this.controller ); } diff --git a/sample-code/src/orchestration.ts b/sample-code/src/orchestration.ts index 43def248..c6b24f71 100644 --- a/sample-code/src/orchestration.ts +++ b/sample-code/src/orchestration.ts @@ -49,9 +49,7 @@ export async function orchestrationChatCompletion(): Promise -> { +): Promise> { const orchestrationClient = new OrchestrationClient({ // define the language model to be used llm: { @@ -255,7 +253,8 @@ export async function orchestrationFromJSON(): Promise< './src/model-orchestration-config.json', 'utf-8' ); - const response = await OrchestrationClient.chatCompletionWithJson(jsonConfig); + const response = + await OrchestrationClient.chatCompletionWithJsonModuleConfig(jsonConfig); logger.info(response.getContent()); return response; From 495c7f64822119e20509cfc7d9226349288d7345 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Fri, 3 Jan 2025 20:41:20 +0100 Subject: [PATCH 28/53] refactor all suggestions4 --- .../orchestration/src/orchestration-client.ts | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index 577405d4..4f595c44 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -206,7 +206,9 @@ function addStreamOptions( moduleConfigs: ModuleConfigs, streamOptions?: StreamOptions ): OrchestrationConfig { - const { llm, outputFiltering, chunkSize } = streamOptions; + const llm = streamOptions?.llm ?? {}; + const outputFiltering = streamOptions?.outputFiltering ?? {}; + const chunkSize = streamOptions?.chunkSize; return { stream: true, @@ -218,22 +220,21 @@ function addStreamOptions( llm_module_config: { ...moduleConfigs.llm_module_config, stream_options: { - include_usage: llm?.includeUsage ?? true, + include_usage: llm.includeUsage ?? true, ...llm } }, - ...(outputFiltering && - Object.keys(outputFiltering).length && { - filtering_module_config: { - ...moduleConfigs.filtering_module_config, - output: { - ...(moduleConfigs.filtering_module_config?.output || {}), - stream_options: outputFiltering - } + ...(Object.keys(outputFiltering).length && { + filtering_module_config: { + ...moduleConfigs.filtering_module_config, + output: { + ...(moduleConfigs.filtering_module_config?.output || {}), + stream_options: outputFiltering } - }) + } + }) } - } as OrchestrationConfig; + } as OrchestrationConfig; // TODO: Remove typecast when types are re-generated; } /** From 87201de59d8679bd896ca0a0a9a01574c368dcf8 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Sun, 5 Jan 2025 20:47:19 +0100 Subject: [PATCH 29/53] fix type tests --- tests/type-tests/test/orchestration.test-d.ts | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/type-tests/test/orchestration.test-d.ts b/tests/type-tests/test/orchestration.test-d.ts index 2583e2d1..12ff4cf4 100644 --- a/tests/type-tests/test/orchestration.test-d.ts +++ b/tests/type-tests/test/orchestration.test-d.ts @@ -1,6 +1,8 @@ import { expectError, expectType } from 'tsd'; import { - OrchestrationClient, + OrchestrationClient +} from '@sap-ai-sdk/orchestration'; +import type { CompletionPostResponse, OrchestrationResponse, TokenUsage, @@ -168,22 +170,20 @@ expectType>( * Chat Completion with JSON configuration. */ expectType>( - new OrchestrationClient( - `{ - "module_configurations": { - "llm_module_config": { - "model_name": "gpt-35-turbo-16k", - "model_params": { - "max_tokens": 50, - "temperature": 0.1 - } - }, - "templating_module_config": { - "template": [{ "role": "user", "content": "Hello!" }] + OrchestrationClient.chatCompletionWithJsonModuleConfig(`{ + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-35-turbo-16k", + "model_params": { + "max_tokens": 50, + "temperature": 0.1 } + }, + "templating_module_config": { + "template": [{ "role": "user", "content": "Hello!" }] } - }` - ).chatCompletion() + } + }`) ); /** From c18265252af1c274b6160ce18517f6ce68126bfe Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Sun, 5 Jan 2025 20:00:17 +0000 Subject: [PATCH 30/53] fix: Changes from lint --- tests/type-tests/test/orchestration.test-d.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/type-tests/test/orchestration.test-d.ts b/tests/type-tests/test/orchestration.test-d.ts index 12ff4cf4..c4bbeada 100644 --- a/tests/type-tests/test/orchestration.test-d.ts +++ b/tests/type-tests/test/orchestration.test-d.ts @@ -1,7 +1,5 @@ import { expectError, expectType } from 'tsd'; -import { - OrchestrationClient -} from '@sap-ai-sdk/orchestration'; +import { OrchestrationClient } from '@sap-ai-sdk/orchestration'; import type { CompletionPostResponse, OrchestrationResponse, From 5c94d41d5b992a930474e97132c1f9028599de8e Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Mon, 6 Jan 2025 16:48:59 +0100 Subject: [PATCH 31/53] refactor everything, again --- .../orchestration/src/orchestration-client.ts | 267 ++++-------------- .../orchestration/src/orchestration-types.ts | 39 +++ .../orchestration/src/orchestration-utils.ts | 155 ++++++++++ 3 files changed, 256 insertions(+), 205 deletions(-) create mode 100644 packages/orchestration/src/orchestration-utils.ts diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index 4f595c44..e03c74fa 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -1,130 +1,122 @@ import { executeRequest } from '@sap-ai-sdk/core'; import { resolveDeploymentId } from '@sap-ai-sdk/ai-api/internal.js'; -import { OrchestrationResponse } from './orchestration-response.js'; +import { createLogger } from '@sap-cloud-sdk/util'; import { OrchestrationStream } from './orchestration-stream.js'; import { OrchestrationStreamResponse } from './orchestration-stream-response.js'; +import { OrchestrationResponse } from './orchestration-response.js'; +import { + constructCompletionPostRequest, + constructCompletionPostRequestFromJsonModuleConfig +} from './orchestration-utils.js'; import type { HttpResponse, CustomRequestConfig } from '@sap-cloud-sdk/http-client'; import type { ResourceGroupConfig } from '@sap-ai-sdk/ai-api/internal.js'; -import type { - CompletionPostRequest, - ModuleConfigs, - OrchestrationConfig -} from './client/api/schema/index.js'; import type { OrchestrationModuleConfig, - Prompt + Prompt, + RequestOptions, + StreamOptions } from './orchestration-types.js'; import type { OrchestrationStreamChunkResponse } from './orchestration-stream-chunk-response.js'; import type { HttpDestinationOrFetchOptions } from '@sap-cloud-sdk/connectivity'; -interface RequestOptions { - prompt?: Prompt; - requestConfig?: CustomRequestConfig; - stream?: boolean; - streamOptions?: StreamOptions; - jsonModuleConfig?: string; - deploymentConfig?: ResourceGroupConfig; - destination?: HttpDestinationOrFetchOptions; - config?: OrchestrationModuleConfig; -} - -interface ConfigurationOptions { - deploymentConfig?: ResourceGroupConfig; - destination?: HttpDestinationOrFetchOptions; - requestConfig?: CustomRequestConfig; -} - -interface StreamOptions { - chunkSize?: number; - llm?: { includeUsage?: boolean; [key: string]: any }; - // Add more options as they are implemented - // masking?: { }; - // grounding?: { }; - // inputFiltering?: { }; - outputFiltering?: { overlap?: number }; -} +const logger = createLogger({ messageContext: 'orchestration-client' }); /** * Get the orchestration client. */ export class OrchestrationClient { - static async chatCompletionWithJsonModuleConfig( - jsonModuleConfig: string, + constructor( + private config: OrchestrationModuleConfig | string, + private deploymentConfig?: ResourceGroupConfig, + private destination?: HttpDestinationOrFetchOptions + ) { + try { + if (typeof config === 'string') { + JSON.parse(config); + } + } catch (error) { + throw new Error(`Could not parse JSON: ${error}`); + } + } + + async chatCompletion( prompt?: Prompt, - options?: ConfigurationOptions + requestConfig?: CustomRequestConfig ): Promise { - const response = await OrchestrationClient.executeRequest({ - jsonModuleConfig, + const response = await this.executeRequest({ prompt, - ...options + requestConfig, + stream: false }); return new OrchestrationResponse(response); } - static async streamWithJsonModuleConfig( - jsonModuleConfig: string, + async stream( prompt?: Prompt, controller = new AbortController(), - options?: ConfigurationOptions + options?: StreamOptions, + requestConfig?: CustomRequestConfig ): Promise> { - return OrchestrationClient.createStreamResponse( + if (typeof this.config === 'string' && options) { + logger.warn( + 'Stream options are not supported when using a JSON module config.' + ); + } + + return this.createStreamResponse( { - jsonModuleConfig, prompt, - ...options + requestConfig, + streamOptions: options }, controller ); } - private static async executeRequest( - options: RequestOptions - ): Promise { - const { - prompt, - requestConfig, - stream, - streamOptions, - jsonModuleConfig, - deploymentConfig, - destination, - config - } = options; - - const body = jsonModuleConfig - ? constructCompletionPostRequestFromJsonModuleConfig( - jsonModuleConfig, - prompt - ) - : constructCompletionPostRequest(config!, prompt, stream, streamOptions); + private async executeRequest(options: RequestOptions): Promise { + const { prompt, requestConfig, stream, streamOptions } = options; + + const body = + typeof this.config === 'string' + ? constructCompletionPostRequestFromJsonModuleConfig( + JSON.parse(this.config), + prompt, + stream + ) + : constructCompletionPostRequest( + this.config, + prompt, + stream, + streamOptions + ); const deploymentId = await resolveDeploymentId({ scenarioId: 'orchestration', - ...deploymentConfig + ...(this.deploymentConfig || {}) }); return executeRequest( { url: `/inference/deployments/${deploymentId}/completion`, - ...deploymentConfig + ...(this.deploymentConfig || {}) }, body, requestConfig, - destination + this.destination ); } - private static async createStreamResponse( + private async createStreamResponse( options: RequestOptions, controller: AbortController ): Promise> { const response = new OrchestrationStreamResponse(); - const streamResponse = await OrchestrationClient.executeRequest({ + const streamResponse = await this.executeRequest({ ...options, requestConfig: { ...options.requestConfig, @@ -141,139 +133,4 @@ export class OrchestrationClient { return response; } - - constructor( - private config: OrchestrationModuleConfig, - private deploymentConfig?: ResourceGroupConfig, - private destination?: HttpDestinationOrFetchOptions - ) {} - - async chatCompletion( - prompt?: Prompt, - requestConfig?: CustomRequestConfig - ): Promise { - const response = await OrchestrationClient.executeRequest({ - prompt, - requestConfig, - stream: false, - config: this.config, - deploymentConfig: this.deploymentConfig, - destination: this.destination - }); - return new OrchestrationResponse(response); - } - - async stream( - prompt?: Prompt, - controller = new AbortController(), - options?: StreamOptions, - requestConfig?: CustomRequestConfig - ): Promise> { - return OrchestrationClient.createStreamResponse( - { - prompt, - requestConfig, - streamOptions: options, - stream: true, - config: this.config, - deploymentConfig: this.deploymentConfig, - destination: this.destination - }, - controller - ); - } -} - -/** - * @internal - */ -export function constructCompletionPostRequestFromJsonModuleConfig( - config: string, - prompt?: Prompt -): Record { - try { - return { - messages_history: prompt?.messagesHistory || [], - input_params: prompt?.inputParams || {}, - orchestration_config: JSON.parse(config) - }; - } catch (error) { - throw new Error(`Could not parse JSON: ${error}`); - } -} - -function addStreamOptions( - moduleConfigs: ModuleConfigs, - streamOptions?: StreamOptions -): OrchestrationConfig { - const llm = streamOptions?.llm ?? {}; - const outputFiltering = streamOptions?.outputFiltering ?? {}; - const chunkSize = streamOptions?.chunkSize; - - return { - stream: true, - stream_options: { - chunk_size: chunkSize - }, - module_configurations: { - ...moduleConfigs, - llm_module_config: { - ...moduleConfigs.llm_module_config, - stream_options: { - include_usage: llm.includeUsage ?? true, - ...llm - } - }, - ...(Object.keys(outputFiltering).length && { - filtering_module_config: { - ...moduleConfigs.filtering_module_config, - output: { - ...(moduleConfigs.filtering_module_config?.output || {}), - stream_options: outputFiltering - } - } - }) - } - } as OrchestrationConfig; // TODO: Remove typecast when types are re-generated; -} - -/** - * @internal - */ -export function constructCompletionPostRequest( - config: OrchestrationModuleConfig, - prompt?: Prompt, - stream?: boolean, - streamOptions?: StreamOptions -): CompletionPostRequest { - const moduleConfigurations = { - templating_module_config: { - template: config.templating.template - }, - llm_module_config: config.llm, - ...(config?.filtering && - Object.keys(config.filtering).length && { - filtering_module_config: config.filtering - }), - ...(config?.masking && - Object.keys(config.masking).length && { - masking_module_config: config.masking - }), - ...(config?.grounding && - Object.keys(config.grounding).length && { - grounding_module_config: config.grounding - }) - }; - - return { - orchestration_config: stream - ? addStreamOptions(moduleConfigurations, streamOptions) - : { module_configurations: moduleConfigurations, stream: false }, - ...(prompt?.inputParams && { - input_params: prompt.inputParams - }), - ...(prompt?.messagesHistory && { - messages_history: prompt.messagesHistory - }) - }; } diff --git a/packages/orchestration/src/orchestration-types.ts b/packages/orchestration/src/orchestration-types.ts index 82ceaf77..40c436b3 100644 --- a/packages/orchestration/src/orchestration-types.ts +++ b/packages/orchestration/src/orchestration-types.ts @@ -1,7 +1,10 @@ +import type { CustomRequestConfig } from '@sap-cloud-sdk/http-client'; import type { ChatModel } from './model-types.js'; import type { ChatMessages, FilteringModuleConfig, + FilteringStreamOptions, + GlobalStreamOptions, GroundingModuleConfig, MaskingModuleConfig, LlmModuleConfig as OriginalLlmModuleConfig, @@ -56,3 +59,39 @@ export interface OrchestrationModuleConfig { */ grounding?: GroundingModuleConfig; } + +/** + * Request options for orchestration. + */ +export interface RequestOptions { + /** + * Prompt configuration. + */ + prompt?: Prompt; + /** + * Custom request configuration. + */ + requestConfig?: CustomRequestConfig; + /** + * Whether to stream the response. + */ + stream?: boolean; + /** + * Options for the stream. + */ + streamOptions?: StreamOptions; +} + +/** + * Options for the stream. + */ +export interface StreamOptions extends GlobalStreamOptions { + /** + * LLM specific stream options. + */ + llm?: { includeUsage?: boolean; [key: string]: any }; + /** + * Output filtering stream options. + */ + outputFiltering?: FilteringStreamOptions; +} diff --git a/packages/orchestration/src/orchestration-utils.ts b/packages/orchestration/src/orchestration-utils.ts new file mode 100644 index 00000000..f10b8867 --- /dev/null +++ b/packages/orchestration/src/orchestration-utils.ts @@ -0,0 +1,155 @@ +import { createLogger } from '@sap-cloud-sdk/util'; +import type { + CompletionPostRequest, + FilteringStreamOptions, + ModuleConfigs, + OrchestrationConfig, + OutputFilteringConfig +} from './client/api/schema/index.js'; +import type { + Prompt, + StreamOptions, + LlmModuleConfig, + OrchestrationModuleConfig +} from './orchestration-types.js'; + +const logger = createLogger({ messageContext: 'orchestration-utils' }); + +/** + * @internal + */ +export function constructCompletionPostRequestFromJsonModuleConfig( + config: Record, + prompt?: Prompt, + stream?: boolean +): Record { + return { + messages_history: prompt?.messagesHistory || [], + input_params: prompt?.inputParams || {}, + orchestration_config: stream + ? { ...config, stream: true } + : { ...config, stream: false } + }; +} + +/** + * @internal + */ +export function addStreamOptionsToLlmModuleConfig( + llmModuleConfig: LlmModuleConfig, + streamOptions?: StreamOptions +): LlmModuleConfig { + if (!streamOptions?.llm) { + return llmModuleConfig; + } + return { + ...llmModuleConfig, + model_params: { + ...llmModuleConfig.model_params, + stream_options: { + include_usage: true, + ...(llmModuleConfig.model_params.stream_options || {}), + ...streamOptions.llm + } + } + }; +} + +/** + * @internal + */ +export function addStreamOptionsToOutputFilteringConfig( + outputFilteringConfig: OutputFilteringConfig, + filteringStreamOptions: FilteringStreamOptions +): OutputFilteringConfig { + return { + ...outputFilteringConfig, + stream_options: { + ...(outputFilteringConfig.stream_options || {}), + ...filteringStreamOptions + } + }; +} + +/** + * @internal + */ +export function addStreamOptions( + moduleConfigs: ModuleConfigs, + streamOptions?: StreamOptions +): OrchestrationConfig { + const { llm_module_config, filtering_module_config } = moduleConfigs; + const outputFiltering = streamOptions?.outputFiltering; + const chunkSize = streamOptions?.chunk_size; + + if (!moduleConfigs?.filtering_module_config?.output && outputFiltering) { + logger.warn( + 'Output filter stream options are not applied because filtering module is not configured.' + ); + } + + return { + stream: true, + stream_options: { + chunk_size: chunkSize + }, + module_configurations: { + ...moduleConfigs, + llm_module_config: addStreamOptionsToLlmModuleConfig( + llm_module_config, + streamOptions + ), + ...(outputFiltering && + filtering_module_config?.output && { + filtering_module_config: { + ...filtering_module_config, + output: addStreamOptionsToOutputFilteringConfig( + filtering_module_config.output, + outputFiltering + ) + } + }) + } + }; +} + +/** + * @internal + */ +export function constructCompletionPostRequest( + config: OrchestrationModuleConfig, + prompt?: Prompt, + stream?: boolean, + streamOptions?: StreamOptions +): CompletionPostRequest { + const moduleConfigurations = { + templating_module_config: { + template: config.templating.template + }, + llm_module_config: config.llm, + ...(config?.filtering && + Object.keys(config.filtering).length && { + filtering_module_config: config.filtering + }), + ...(config?.masking && + Object.keys(config.masking).length && { + masking_module_config: config.masking + }), + ...(config?.grounding && + Object.keys(config.grounding).length && { + grounding_module_config: config.grounding + }) + }; + + return { + orchestration_config: stream + ? addStreamOptions(moduleConfigurations, streamOptions) + : { module_configurations: moduleConfigurations, stream }, + ...(prompt?.inputParams && { + input_params: prompt.inputParams + }), + ...(prompt?.messagesHistory && { + messages_history: prompt.messagesHistory + }) + }; +} From f0bae6e5ffa84dc14e26c3686b190015d7401eb8 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Mon, 6 Jan 2025 16:51:42 +0100 Subject: [PATCH 32/53] merge main --- sample-code/src/orchestration.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sample-code/src/orchestration.ts b/sample-code/src/orchestration.ts index c6b24f71..134a2928 100644 --- a/sample-code/src/orchestration.ts +++ b/sample-code/src/orchestration.ts @@ -254,7 +254,7 @@ export async function orchestrationFromJSON(): Promise< 'utf-8' ); const response = - await OrchestrationClient.chatCompletionWithJsonModuleConfig(jsonConfig); + await new OrchestrationClient(jsonConfig).chatCompletion(); logger.info(response.getContent()); return response; From 3018ad540078068ffe819064a4e5bb35d2647452 Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Mon, 6 Jan 2025 15:55:18 +0000 Subject: [PATCH 33/53] fix: Changes from lint --- sample-code/src/orchestration.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sample-code/src/orchestration.ts b/sample-code/src/orchestration.ts index 134a2928..2a0547d6 100644 --- a/sample-code/src/orchestration.ts +++ b/sample-code/src/orchestration.ts @@ -253,8 +253,7 @@ export async function orchestrationFromJSON(): Promise< './src/model-orchestration-config.json', 'utf-8' ); - const response = - await new OrchestrationClient(jsonConfig).chatCompletion(); + const response = await new OrchestrationClient(jsonConfig).chatCompletion(); logger.info(response.getContent()); return response; From 2b03915664f4425395b7b68820a77ecf70491369 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Tue, 7 Jan 2025 17:44:32 +0100 Subject: [PATCH 34/53] adjust tests & logic --- ...zure-openai-chat-completion-stream.test.ts | 6 ++--- .../src/orchestration-client.test.ts | 25 +++++++++++++------ .../orchestration/src/orchestration-client.ts | 4 +-- ...-completion-post-request-from-json.test.ts | 10 +------- ...hestration-completion-post-request.test.ts | 2 +- .../src/orchestration-filter-utility.test.ts | 2 +- .../src/orchestration-stream.test.ts | 6 ++--- .../orchestration/src/orchestration-types.ts | 6 ++++- .../orchestration/src/orchestration-utils.ts | 6 ++--- sample-code/src/orchestration.ts | 3 +-- tests/type-tests/test/orchestration.test-d.ts | 4 +-- 11 files changed, 38 insertions(+), 36 deletions(-) diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts index 8ded3b79..e2b249a8 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts @@ -38,12 +38,12 @@ describe('OpenAI chat completion stream', () => { it('should wrap the raw chunk', async () => { let output = ''; - const asnycGenerator = AzureOpenAiChatCompletionStream._processChunk( + const asyncGenerator = AzureOpenAiChatCompletionStream._processChunk( originalChatCompletionStream ); - for await (const chunk of asnycGenerator) { + for await (const chunk of asyncGenerator) { expect(chunk).toBeDefined(); - chunk.getDeltaContent() ? (output += chunk.getDeltaContent()) : null; + output += chunk.getDeltaContent() ?? ''; } expect(output).toEqual('The capital of France is Paris.'); }); diff --git a/packages/orchestration/src/orchestration-client.test.ts b/packages/orchestration/src/orchestration-client.test.ts index 9bbeca40..997c0ed3 100644 --- a/packages/orchestration/src/orchestration-client.test.ts +++ b/packages/orchestration/src/orchestration-client.test.ts @@ -6,13 +6,13 @@ import { parseFileToString, parseMockResponse } from '../../../test-util/mock-http.js'; -import { - constructCompletionPostRequestFromJsonModuleConfig, - constructCompletionPostRequest, - OrchestrationClient -} from './orchestration-client.js'; +import { OrchestrationClient } from './orchestration-client.js'; import { buildAzureContentFilter } from './orchestration-filter-utility.js'; import { OrchestrationResponse } from './orchestration-response.js'; +import { + constructCompletionPostRequest, + constructCompletionPostRequestFromJsonModuleConfig +} from './orchestration-utils.js'; import type { CompletionPostResponse } from './client/api/schema/index.js'; import type { OrchestrationModuleConfig, @@ -89,7 +89,9 @@ describe('orchestration service client', () => { mockInference( { - data: constructCompletionPostRequestFromJsonModuleConfig(jsonConfig) + data: constructCompletionPostRequestFromJsonModuleConfig( + JSON.parse(jsonConfig) + ) }, { data: mockResponse, @@ -100,8 +102,7 @@ describe('orchestration service client', () => { } ); - const response = - await OrchestrationClient.chatCompletionWithJsonModuleConfig(jsonConfig); + const response = await new OrchestrationClient(jsonConfig).chatCompletion(); expect(response).toBeInstanceOf(OrchestrationResponse); expect(response.data).toEqual(mockResponse); @@ -426,4 +427,12 @@ describe('orchestration service client', () => { break; } }); + + it('should throw an error when invalid JSON is provided', () => { + const invalidJsonConfig = '{ "module_configurations": {}, '; + + expect(() => + new OrchestrationClient(invalidJsonConfig) + ).toThrow('Could not parse JSON'); + }); }); diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index e03c74fa..0b2f86b6 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -95,13 +95,13 @@ export class OrchestrationClient { const deploymentId = await resolveDeploymentId({ scenarioId: 'orchestration', - ...(this.deploymentConfig || {}) + ...(this.deploymentConfig ?? {}) }); return executeRequest( { url: `/inference/deployments/${deploymentId}/completion`, - ...(this.deploymentConfig || {}) + ...(this.deploymentConfig ?? {}) }, body, requestConfig, diff --git a/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts b/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts index 49f2822b..c9614686 100644 --- a/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts +++ b/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts @@ -1,14 +1,6 @@ -import { constructCompletionPostRequestFromJsonModuleConfig } from './orchestration-client.js'; +import { constructCompletionPostRequestFromJsonModuleConfig } from './orchestration-utils.js'; describe('construct completion post request from JSON', () => { - it('should throw an error when invalid JSON is provided', () => { - const invalidJsonConfig = '{ "module_configurations": {}, '; - - expect(() => - constructCompletionPostRequestFromJsonModuleConfig(invalidJsonConfig) - ).toThrow('Could not parse JSON'); - }); - it('should construct completion post request from JSON', () => { const jsonConfig = `{ "module_configurations": { diff --git a/packages/orchestration/src/orchestration-completion-post-request.test.ts b/packages/orchestration/src/orchestration-completion-post-request.test.ts index 1bb19b56..7d6a7207 100644 --- a/packages/orchestration/src/orchestration-completion-post-request.test.ts +++ b/packages/orchestration/src/orchestration-completion-post-request.test.ts @@ -1,4 +1,4 @@ -import { constructCompletionPostRequest } from './orchestration-client.js'; +import { constructCompletionPostRequest } from './orchestration-utils.js'; import { buildAzureContentFilter } from './orchestration-filter-utility.js'; import type { CompletionPostRequest } from './client/api/schema/index.js'; import type { OrchestrationModuleConfig } from './orchestration-types.js'; diff --git a/packages/orchestration/src/orchestration-filter-utility.test.ts b/packages/orchestration/src/orchestration-filter-utility.test.ts index e0c426c8..5fe7243f 100644 --- a/packages/orchestration/src/orchestration-filter-utility.test.ts +++ b/packages/orchestration/src/orchestration-filter-utility.test.ts @@ -1,5 +1,5 @@ -import { constructCompletionPostRequest } from './orchestration-client.js'; import { buildAzureContentFilter } from './orchestration-filter-utility.js'; +import { constructCompletionPostRequest } from './orchestration-utils.js'; import type { CompletionPostRequest, FilteringModuleConfig diff --git a/packages/orchestration/src/orchestration-stream.test.ts b/packages/orchestration/src/orchestration-stream.test.ts index 1510cfd1..5305f080 100644 --- a/packages/orchestration/src/orchestration-stream.test.ts +++ b/packages/orchestration/src/orchestration-stream.test.ts @@ -39,12 +39,12 @@ describe('Orchestration chat completion stream', () => { it('should wrap the raw chunk', async () => { let output = ''; - const asnycGenerator = OrchestrationStream._processChunk( + const asyncGenerator = OrchestrationStream._processChunk( originalChatCompletionStream ); - for await (const chunk of asnycGenerator) { + for await (const chunk of asyncGenerator) { expect(chunk).toBeDefined(); - chunk.getDeltaContent() ? (output += chunk.getDeltaContent()) : null; + output += chunk.getDeltaContent() ?? ''; } expect(output).toMatchSnapshot(); }); diff --git a/packages/orchestration/src/orchestration-types.ts b/packages/orchestration/src/orchestration-types.ts index 40c436b3..cbedda2f 100644 --- a/packages/orchestration/src/orchestration-types.ts +++ b/packages/orchestration/src/orchestration-types.ts @@ -85,7 +85,7 @@ export interface RequestOptions { /** * Options for the stream. */ -export interface StreamOptions extends GlobalStreamOptions { +export interface StreamOptions { /** * LLM specific stream options. */ @@ -94,4 +94,8 @@ export interface StreamOptions extends GlobalStreamOptions { * Output filtering stream options. */ outputFiltering?: FilteringStreamOptions; + /** + * Global stream options. + */ + global?: GlobalStreamOptions; } diff --git a/packages/orchestration/src/orchestration-utils.ts b/packages/orchestration/src/orchestration-utils.ts index f10b8867..b7a69715 100644 --- a/packages/orchestration/src/orchestration-utils.ts +++ b/packages/orchestration/src/orchestration-utils.ts @@ -80,7 +80,7 @@ export function addStreamOptions( ): OrchestrationConfig { const { llm_module_config, filtering_module_config } = moduleConfigs; const outputFiltering = streamOptions?.outputFiltering; - const chunkSize = streamOptions?.chunk_size; + const globalOptions = streamOptions?.global; if (!moduleConfigs?.filtering_module_config?.output && outputFiltering) { logger.warn( @@ -90,9 +90,7 @@ export function addStreamOptions( return { stream: true, - stream_options: { - chunk_size: chunkSize - }, + stream_options: globalOptions, module_configurations: { ...moduleConfigs, llm_module_config: addStreamOptionsToLlmModuleConfig( diff --git a/sample-code/src/orchestration.ts b/sample-code/src/orchestration.ts index 134a2928..2a0547d6 100644 --- a/sample-code/src/orchestration.ts +++ b/sample-code/src/orchestration.ts @@ -253,8 +253,7 @@ export async function orchestrationFromJSON(): Promise< './src/model-orchestration-config.json', 'utf-8' ); - const response = - await new OrchestrationClient(jsonConfig).chatCompletion(); + const response = await new OrchestrationClient(jsonConfig).chatCompletion(); logger.info(response.getContent()); return response; diff --git a/tests/type-tests/test/orchestration.test-d.ts b/tests/type-tests/test/orchestration.test-d.ts index c4bbeada..8a3a2065 100644 --- a/tests/type-tests/test/orchestration.test-d.ts +++ b/tests/type-tests/test/orchestration.test-d.ts @@ -168,7 +168,7 @@ expectType>( * Chat Completion with JSON configuration. */ expectType>( - OrchestrationClient.chatCompletionWithJsonModuleConfig(`{ + new OrchestrationClient(`{ "module_configurations": { "llm_module_config": { "model_name": "gpt-35-turbo-16k", @@ -181,7 +181,7 @@ expectType>( "template": [{ "role": "user", "content": "Hello!" }] } } - }`) + }`).chatCompletion() ); /** From 5dcbf068e3c3f9cb7675cd077f7d1e5898f26f31 Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Tue, 7 Jan 2025 16:46:14 +0000 Subject: [PATCH 35/53] fix: Changes from lint --- packages/orchestration/src/orchestration-client.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/orchestration/src/orchestration-client.test.ts b/packages/orchestration/src/orchestration-client.test.ts index 997c0ed3..016b3a5c 100644 --- a/packages/orchestration/src/orchestration-client.test.ts +++ b/packages/orchestration/src/orchestration-client.test.ts @@ -431,8 +431,8 @@ describe('orchestration service client', () => { it('should throw an error when invalid JSON is provided', () => { const invalidJsonConfig = '{ "module_configurations": {}, '; - expect(() => - new OrchestrationClient(invalidJsonConfig) - ).toThrow('Could not parse JSON'); + expect(() => new OrchestrationClient(invalidJsonConfig)).toThrow( + 'Could not parse JSON' + ); }); }); From cb35f424b26c6206e10ad1ca269ddcf168cd306a Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Tue, 7 Jan 2025 18:28:15 +0100 Subject: [PATCH 36/53] final logic fixes --- .../orchestration/src/orchestration-client.ts | 1 + ...tion-completion-post-request-from-json.test.ts | 15 ++++++++++----- .../orchestration-completion-post-request.test.ts | 8 -------- packages/orchestration/src/orchestration-utils.ts | 13 +++++++++---- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index 0b2f86b6..659001ef 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -70,6 +70,7 @@ export class OrchestrationClient { { prompt, requestConfig, + stream: true, streamOptions: options }, controller diff --git a/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts b/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts index c9614686..d58e7985 100644 --- a/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts +++ b/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts @@ -24,7 +24,9 @@ describe('construct completion post request from JSON', () => { }; const completionPostRequestFromJson: Record = - constructCompletionPostRequestFromJsonModuleConfig(jsonConfig); + constructCompletionPostRequestFromJsonModuleConfig( + JSON.parse(jsonConfig) + ); expect(expectedCompletionPostRequestFromJson).toEqual( completionPostRequestFromJson @@ -77,10 +79,13 @@ describe('construct completion post request from JSON', () => { }; const completionPostRequestFromJson: Record = - constructCompletionPostRequestFromJsonModuleConfig(jsonConfig, { - inputParams, - messagesHistory - }); + constructCompletionPostRequestFromJsonModuleConfig( + JSON.parse(jsonConfig), + { + inputParams, + messagesHistory + } + ); expect(expectedCompletionPostRequestFromJson).toEqual( completionPostRequestFromJson diff --git a/packages/orchestration/src/orchestration-completion-post-request.test.ts b/packages/orchestration/src/orchestration-completion-post-request.test.ts index 7d6a7207..8c0c64c4 100644 --- a/packages/orchestration/src/orchestration-completion-post-request.test.ts +++ b/packages/orchestration/src/orchestration-completion-post-request.test.ts @@ -17,7 +17,6 @@ describe('construct completion post request', () => { it('should construct completion post request with llm and templating module', async () => { const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { - stream: false, module_configurations: { templating_module_config: defaultConfig.templating, llm_module_config: defaultConfig.llm @@ -37,7 +36,6 @@ describe('construct completion post request', () => { }; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { - stream: false, module_configurations: { templating_module_config: config.templating, llm_module_config: config.llm @@ -64,7 +62,6 @@ describe('construct completion post request', () => { const inputParams = { phrase: 'I hate you.', number: '3' }; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { - stream: false, module_configurations: { templating_module_config: config.templating, llm_module_config: config.llm @@ -92,7 +89,6 @@ describe('construct completion post request', () => { const inputParams = {}; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { - stream: false, module_configurations: { templating_module_config: config.templating, llm_module_config: config.llm @@ -116,7 +112,6 @@ describe('construct completion post request', () => { }; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { - stream: false, module_configurations: { templating_module_config: config.templating, llm_module_config: config.llm @@ -153,7 +148,6 @@ describe('construct completion post request', () => { ]; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { - stream: false, module_configurations: { templating_module_config: config.templating, llm_module_config: config.llm @@ -175,7 +169,6 @@ describe('construct completion post request', () => { }; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { - stream: false, module_configurations: { templating_module_config: config.templating, llm_module_config: config.llm, @@ -196,7 +189,6 @@ describe('construct completion post request', () => { }; const expectedCompletionPostRequest: CompletionPostRequest = { orchestration_config: { - stream: false, module_configurations: { templating_module_config: config.templating, llm_module_config: config.llm diff --git a/packages/orchestration/src/orchestration-utils.ts b/packages/orchestration/src/orchestration-utils.ts index b7a69715..ec832e2c 100644 --- a/packages/orchestration/src/orchestration-utils.ts +++ b/packages/orchestration/src/orchestration-utils.ts @@ -23,12 +23,17 @@ export function constructCompletionPostRequestFromJsonModuleConfig( prompt?: Prompt, stream?: boolean ): Record { + const orchestration_config = { ...config }; + if (stream) { + orchestration_config.stream = true; + } else { + delete orchestration_config.stream; + } + return { messages_history: prompt?.messagesHistory || [], input_params: prompt?.inputParams || {}, - orchestration_config: stream - ? { ...config, stream: true } - : { ...config, stream: false } + orchestration_config }; } @@ -142,7 +147,7 @@ export function constructCompletionPostRequest( return { orchestration_config: stream ? addStreamOptions(moduleConfigurations, streamOptions) - : { module_configurations: moduleConfigurations, stream }, + : { module_configurations: moduleConfigurations }, ...(prompt?.inputParams && { input_params: prompt.inputParams }), From 15bd1cd4784cb20399f81cc43e6699e4a23588bd Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Wed, 8 Jan 2025 10:45:59 +0100 Subject: [PATCH 37/53] Merge main --- packages/orchestration/src/orchestration-client.ts | 6 ++++++ sample-code/src/orchestration.ts | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index 659001ef..4ea9f6e1 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -28,6 +28,12 @@ const logger = createLogger({ messageContext: 'orchestration-client' }); * Get the orchestration client. */ export class OrchestrationClient { + /** + * Creates an instance of the orchestration client. + * @param config - Orchestration module configuration. This can either be an `OrchestrationModuleConfig` object or a JSON string obtained from AI Launchpad. + * @param deploymentConfig - Deployment configuration. + * @param destination - The destination to use for the request. + */ constructor( private config: OrchestrationModuleConfig | string, private deploymentConfig?: ResourceGroupConfig, diff --git a/sample-code/src/orchestration.ts b/sample-code/src/orchestration.ts index 2a0547d6..9bce462a 100644 --- a/sample-code/src/orchestration.ts +++ b/sample-code/src/orchestration.ts @@ -33,7 +33,7 @@ export async function orchestrationChatCompletion(): Promise Date: Wed, 8 Jan 2025 11:26:39 +0100 Subject: [PATCH 38/53] Add ability to remove options --- packages/orchestration/src/orchestration-client.ts | 2 +- packages/orchestration/src/orchestration-types.ts | 2 +- packages/orchestration/src/orchestration-utils.ts | 12 +++++++----- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index 4ea9f6e1..c1f275ac 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -63,7 +63,7 @@ export class OrchestrationClient { async stream( prompt?: Prompt, controller = new AbortController(), - options?: StreamOptions, + options: StreamOptions = {}, requestConfig?: CustomRequestConfig ): Promise> { if (typeof this.config === 'string' && options) { diff --git a/packages/orchestration/src/orchestration-types.ts b/packages/orchestration/src/orchestration-types.ts index cbedda2f..fc04faba 100644 --- a/packages/orchestration/src/orchestration-types.ts +++ b/packages/orchestration/src/orchestration-types.ts @@ -89,7 +89,7 @@ export interface StreamOptions { /** * LLM specific stream options. */ - llm?: { includeUsage?: boolean; [key: string]: any }; + llm?: { include_usage?: boolean; [key: string]: any }; /** * Output filtering stream options. */ diff --git a/packages/orchestration/src/orchestration-utils.ts b/packages/orchestration/src/orchestration-utils.ts index ec832e2c..198e3080 100644 --- a/packages/orchestration/src/orchestration-utils.ts +++ b/packages/orchestration/src/orchestration-utils.ts @@ -51,11 +51,13 @@ export function addStreamOptionsToLlmModuleConfig( ...llmModuleConfig, model_params: { ...llmModuleConfig.model_params, - stream_options: { - include_usage: true, - ...(llmModuleConfig.model_params.stream_options || {}), - ...streamOptions.llm - } + ...((streamOptions || llmModuleConfig.model_params.stream_options) && { + stream_options: { + include_usage: true, + ...(llmModuleConfig.model_params.stream_options || {}), + ...streamOptions.llm + } + }) } }; } From ee5b88b8e8e781ac9063459097b2ac3291f8948a Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Wed, 8 Jan 2025 11:45:22 +0100 Subject: [PATCH 39/53] last exception --- packages/orchestration/src/orchestration-utils.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/orchestration/src/orchestration-utils.ts b/packages/orchestration/src/orchestration-utils.ts index 21b912e9..4a288fe0 100644 --- a/packages/orchestration/src/orchestration-utils.ts +++ b/packages/orchestration/src/orchestration-utils.ts @@ -51,7 +51,7 @@ export function addStreamOptionsToLlmModuleConfig( ...llmModuleConfig, model_params: { ...llmModuleConfig.model_params, - ...((streamOptions || llmModuleConfig.model_params.stream_options) && { + ...((streamOptions && streamOptions?.llm !== null) || llmModuleConfig.model_params.stream_options) && { stream_options: { include_usage: true, ...(llmModuleConfig.model_params.stream_options || {}), From d45494170af7619946c0d4cc86ce4dfd84742363 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Wed, 8 Jan 2025 15:54:38 +0100 Subject: [PATCH 40/53] add docs --- packages/foundation-models/README.md | 2 +- packages/orchestration/README.md | 35 ++++++++++++++++--- .../orchestration-stream-chunk-response.ts | 2 +- .../src/orchestration-stream-response.ts | 2 +- .../orchestration/src/orchestration-stream.ts | 23 ++++++------ .../orchestration/src/orchestration-utils.ts | 4 +-- sample-code/src/index.ts | 2 +- sample-code/src/orchestration.ts | 6 ++-- sample-code/src/server.ts | 4 +-- tests/type-tests/test/azure-openai.test-d.ts | 8 +++-- 10 files changed, 60 insertions(+), 28 deletions(-) diff --git a/packages/foundation-models/README.md b/packages/foundation-models/README.md index 82649fae..0bbb0b15 100644 --- a/packages/foundation-models/README.md +++ b/packages/foundation-models/README.md @@ -178,7 +178,7 @@ console.log(`Token usage: ${JSON.stringify(tokenUsage)}\n`); ##### Streaming the Delta Content -The client provides a helper method to extract delta content and stream string directly. +The client provides a helper method to extract the text chunks as strings: ```ts for await (const chunk of response.stream.toContentStream()) { diff --git a/packages/orchestration/README.md b/packages/orchestration/README.md index 6d6e8c00..34926054 100644 --- a/packages/orchestration/README.md +++ b/packages/orchestration/README.md @@ -87,7 +87,7 @@ In addition to the examples below, you can find more **sample code** [here](http ### Streaming -The `OrchestrationClient` supports streaming response for chat completion requests based on the [Server-sent events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events) standard. +The `OrchestrationClient` supports streaming responses for chat completion requests based on the [Server-sent events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events) standard. Use the `stream()` method to receive a stream of chunk responses from the model. After consuming the stream, call the helper methods to get the finish reason and token usage information. @@ -122,7 +122,7 @@ console.log(`Token usage: ${JSON.stringify(tokenUsage)}\n`); #### Streaming the Delta Content -The client provides a helper method to extract delta content and stream string directly. +The client provides a helper method to extract the text chunks as strings: ```ts for await (const chunk of response.stream.toContentStream()) { @@ -130,8 +130,7 @@ for await (const chunk of response.stream.toContentStream()) { } ``` -Each chunk will be a defined string containing the delta content. -Set `choiceIndex` parameter for `toContentStream()` method to stream a specific choice. +Each chunk will be a string containing the delta content. #### Streaming with Abort Controller @@ -173,6 +172,34 @@ for await (const chunk of response.stream) { In this example, streaming request will be aborted after one second. Abort controller can be useful, e.g., when end-user wants to stop the stream or refreshes the page. +#### Stream Options +The orchestration service offers multiple streaming options, which you can configure in addition to the llm's streaming options. There are two ways to add specific streaming options to your client, either at initalization, or dynamically when calling the stream API. + +Dynamically setting these options after client initialization is particularly helpful when you've initialized a client with a config ment for regular chat completion and now want to switch to using streaming. + +You can check the list of available stream options in the [orchestration service's documentation](https://help.sap.com/docs/sap-ai-core/sap-ai-core-service-guide/streaming). + +An example for setting the streaming options when calling the stream API looks like the following: +```ts +const response = orchestrationClient.stream( + { + inputParams: { country: 'France' } + }, + controller, + { + llm: { include_usage: false }, + global: { chunk_size: 10 }, + outputFiltering: { overlap: 200 } + } +) +``` + +Usage metrics are collected by default, if you do not want to receive them, set include_usage to false. +If you don't want any streaming options as part of your call to the LLM, set options.llm = null. + +> [!NOTE] +> When initalizing a client with a JSON module config, providing streaming options is not possible. + ### Templating Use the orchestration client with templating to pass a prompt containing placeholders that will be replaced with input parameters during a chat completion request. diff --git a/packages/orchestration/src/orchestration-stream-chunk-response.ts b/packages/orchestration/src/orchestration-stream-chunk-response.ts index 78f2f834..a180938e 100644 --- a/packages/orchestration/src/orchestration-stream-chunk-response.ts +++ b/packages/orchestration/src/orchestration-stream-chunk-response.ts @@ -5,7 +5,7 @@ import type { } from './client/api/schema/index.js'; /** - * Orchestration chat completion stream chunk response. + * Orchestration stream chunk response. */ export class OrchestrationStreamChunkResponse { constructor(public readonly data: CompletionPostResponseStreaming) { diff --git a/packages/orchestration/src/orchestration-stream-response.ts b/packages/orchestration/src/orchestration-stream-response.ts index ce03cf75..ed2f3290 100644 --- a/packages/orchestration/src/orchestration-stream-response.ts +++ b/packages/orchestration/src/orchestration-stream-response.ts @@ -2,7 +2,7 @@ import type { TokenUsage } from './client/api/schema/index.js'; import type { OrchestrationStream } from './orchestration-stream.js'; /** - * Orchestration chat completion stream response. + * Orchestration stream response. */ export class OrchestrationStreamResponse { private _usage: TokenUsage | undefined; diff --git a/packages/orchestration/src/orchestration-stream.ts b/packages/orchestration/src/orchestration-stream.ts index 0ff6d92c..a889bacd 100644 --- a/packages/orchestration/src/orchestration-stream.ts +++ b/packages/orchestration/src/orchestration-stream.ts @@ -14,13 +14,13 @@ const logger = createLogger({ }); /** - * Chat completion stream containing post-processing functions. + * Orchestration stream containing post-processing functions. */ export class OrchestrationStream extends SseStream { /** - * Create a chat completion stream based on the http response. + * Create an orchestration stream based on the http response. * @param response - Http response. - * @returns Chat completion stream. + * @returns An orchestration stream. * @internal */ public static _create( @@ -37,7 +37,7 @@ export class OrchestrationStream extends SseStream { /** * Wrap raw chunk data with chunk response class to provide helper functions. - * @param stream - Chat completion stream. + * @param stream - Orchestration stream. * @internal */ static async *_processChunk( @@ -113,16 +113,15 @@ export class OrchestrationStream extends SseStream { /** * Transform a stream of chunks into a stream of content strings. - * @param stream - Chat completion stream. + * @param stream - Orchestration stream. * @param choiceIndex - The index of the choice to parse. * @internal */ static async *_processContentStream( stream: OrchestrationStream, - choiceIndex = 0 ): AsyncGenerator { for await (const chunk of stream) { - const deltaContent = chunk.getDeltaContent(choiceIndex); + const deltaContent = chunk.getDeltaContent(); if (!deltaContent) { continue; } @@ -160,12 +159,16 @@ export class OrchestrationStream extends SseStream { return new OrchestrationStream(() => processFn(this), this.controller); } + /** + * Transform the stream of chunks into a stream of content strings. + * @param this - Orchestration stream. + * @returns A stream of content strings. + */ public toContentStream( - this: OrchestrationStream, - choiceIndex?: number + this: OrchestrationStream ): OrchestrationStream { return new OrchestrationStream( - () => OrchestrationStream._processContentStream(this, choiceIndex), + () => OrchestrationStream._processContentStream(this), this.controller ); } diff --git a/packages/orchestration/src/orchestration-utils.ts b/packages/orchestration/src/orchestration-utils.ts index 4a288fe0..3f753c4b 100644 --- a/packages/orchestration/src/orchestration-utils.ts +++ b/packages/orchestration/src/orchestration-utils.ts @@ -44,14 +44,14 @@ export function addStreamOptionsToLlmModuleConfig( llmModuleConfig: LlmModuleConfig, streamOptions?: StreamOptions ): LlmModuleConfig { - if (!streamOptions?.llm) { + if (streamOptions?.llm === undefined) { return llmModuleConfig; } return { ...llmModuleConfig, model_params: { ...llmModuleConfig.model_params, - ...((streamOptions && streamOptions?.llm !== null) || llmModuleConfig.model_params.stream_options) && { + ...(streamOptions?.llm !== null && { stream_options: { include_usage: true, ...(llmModuleConfig.model_params.stream_options || {}), diff --git a/sample-code/src/index.ts b/sample-code/src/index.ts index cc0f5bbc..aca6b195 100644 --- a/sample-code/src/index.ts +++ b/sample-code/src/index.ts @@ -12,7 +12,7 @@ export { orchestrationOutputFiltering, orchestrationRequestConfig, orchestrationCompletionMasking, - orchestrationFromJSON, + orchestrationFromJson, orchestrationGrounding } from './orchestration.js'; export { diff --git a/sample-code/src/orchestration.ts b/sample-code/src/orchestration.ts index 9bce462a..e03cedc4 100644 --- a/sample-code/src/orchestration.ts +++ b/sample-code/src/orchestration.ts @@ -61,13 +61,13 @@ export async function chatCompletionStream( template: [ { role: 'user', - content: 'Give me a short introduction of SAP Cloud SDK.' + content: 'Give me a long introduction of {{?input}}' } ] } }); - const response = await orchestrationClient.stream(undefined, controller); + const response = await orchestrationClient.stream({ inputParams: { input: 'SAP Cloud SDK' } }, controller); return response; } @@ -245,7 +245,7 @@ export async function orchestrationRequestConfig(): Promise { // You can also provide the JSON configuration as a plain string in the code directly instead. diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 35fec453..b7b54840 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -14,7 +14,7 @@ import { orchestrationOutputFiltering, orchestrationRequestConfig, chatCompletionStream as orchestrationChatCompletionStream, - orchestrationFromJSON, + orchestrationFromJson, orchestrationGrounding } from './orchestration.js'; import { @@ -252,7 +252,7 @@ app.get('/orchestration/:sampleCase', async (req, res) => { inputFiltering: orchestrationInputFiltering, outputFiltering: orchestrationOutputFiltering, requestConfig: orchestrationRequestConfig, - fromJSON: orchestrationFromJSON + fromJson: orchestrationFromJson }[sampleCase] || orchestrationChatCompletion; try { diff --git a/tests/type-tests/test/azure-openai.test-d.ts b/tests/type-tests/test/azure-openai.test-d.ts index a7f93abe..f80a2b0d 100644 --- a/tests/type-tests/test/azure-openai.test-d.ts +++ b/tests/type-tests/test/azure-openai.test-d.ts @@ -1,12 +1,14 @@ import { expectType } from 'tsd'; import { - type AzureOpenAiChatModel, - AzureOpenAiEmbeddingResponse, AzureOpenAiChatClient, - AzureOpenAiEmbeddingClient, + AzureOpenAiEmbeddingClient +} from '@sap-ai-sdk/foundation-models'; +import type { + AzureOpenAiEmbeddingResponse, AzureOpenAiChatCompletionResponse, AzureOpenAiCreateChatCompletionResponse, AzureOpenAiCompletionUsage, + AzureOpenAiChatModel, AzureOpenAiChatCompletionStreamResponse, AzureOpenAiChatCompletionStreamChunkResponse, AzureOpenAiChatCompletionStream From b73ad7d97f8bb614763dc2a48206a330dd1d7d3d Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Wed, 8 Jan 2025 14:55:45 +0000 Subject: [PATCH 41/53] fix: Changes from lint --- packages/orchestration/README.md | 4 +++- packages/orchestration/src/orchestration-stream.ts | 2 +- sample-code/src/orchestration.ts | 5 ++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/packages/orchestration/README.md b/packages/orchestration/README.md index 34926054..cfed8220 100644 --- a/packages/orchestration/README.md +++ b/packages/orchestration/README.md @@ -173,6 +173,7 @@ In this example, streaming request will be aborted after one second. Abort controller can be useful, e.g., when end-user wants to stop the stream or refreshes the page. #### Stream Options + The orchestration service offers multiple streaming options, which you can configure in addition to the llm's streaming options. There are two ways to add specific streaming options to your client, either at initalization, or dynamically when calling the stream API. Dynamically setting these options after client initialization is particularly helpful when you've initialized a client with a config ment for regular chat completion and now want to switch to using streaming. @@ -180,6 +181,7 @@ Dynamically setting these options after client initialization is particularly he You can check the list of available stream options in the [orchestration service's documentation](https://help.sap.com/docs/sap-ai-core/sap-ai-core-service-guide/streaming). An example for setting the streaming options when calling the stream API looks like the following: + ```ts const response = orchestrationClient.stream( { @@ -191,7 +193,7 @@ const response = orchestrationClient.stream( global: { chunk_size: 10 }, outputFiltering: { overlap: 200 } } -) +); ``` Usage metrics are collected by default, if you do not want to receive them, set include_usage to false. diff --git a/packages/orchestration/src/orchestration-stream.ts b/packages/orchestration/src/orchestration-stream.ts index a889bacd..89623a20 100644 --- a/packages/orchestration/src/orchestration-stream.ts +++ b/packages/orchestration/src/orchestration-stream.ts @@ -118,7 +118,7 @@ export class OrchestrationStream extends SseStream { * @internal */ static async *_processContentStream( - stream: OrchestrationStream, + stream: OrchestrationStream ): AsyncGenerator { for await (const chunk of stream) { const deltaContent = chunk.getDeltaContent(); diff --git a/sample-code/src/orchestration.ts b/sample-code/src/orchestration.ts index e03cedc4..ce524761 100644 --- a/sample-code/src/orchestration.ts +++ b/sample-code/src/orchestration.ts @@ -67,7 +67,10 @@ export async function chatCompletionStream( } }); - const response = await orchestrationClient.stream({ inputParams: { input: 'SAP Cloud SDK' } }, controller); + const response = await orchestrationClient.stream( + { inputParams: { input: 'SAP Cloud SDK' } }, + controller + ); return response; } From 1eb65d7e134969aa880197a07779a45fc084eb50 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Wed, 8 Jan 2025 16:08:01 +0100 Subject: [PATCH 42/53] vale --- packages/orchestration/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/orchestration/README.md b/packages/orchestration/README.md index cfed8220..be2494d3 100644 --- a/packages/orchestration/README.md +++ b/packages/orchestration/README.md @@ -174,9 +174,10 @@ Abort controller can be useful, e.g., when end-user wants to stop the stream or #### Stream Options -The orchestration service offers multiple streaming options, which you can configure in addition to the llm's streaming options. There are two ways to add specific streaming options to your client, either at initalization, or dynamically when calling the stream API. +The orchestration service offers multiple streaming options, which you can configure in addition to the LLM's streaming options. +There are two ways to add specific streaming options to your client, either at initalization, or dynamically when calling the stream API. -Dynamically setting these options after client initialization is particularly helpful when you've initialized a client with a config ment for regular chat completion and now want to switch to using streaming. +Dynamically setting these options after client initialization is particularly helpful when you've initialized a client with a config meant for regular chat completion and now want to switch to using streaming. You can check the list of available stream options in the [orchestration service's documentation](https://help.sap.com/docs/sap-ai-core/sap-ai-core-service-guide/streaming). From 27a335056eea5069c593088e5c7b4fe77665f441 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Wed, 8 Jan 2025 16:14:13 +0100 Subject: [PATCH 43/53] allow-list llm --- styles/config/vocabularies/SAP/accept.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/styles/config/vocabularies/SAP/accept.txt b/styles/config/vocabularies/SAP/accept.txt index 35b4cc00..5c717847 100644 --- a/styles/config/vocabularies/SAP/accept.txt +++ b/styles/config/vocabularies/SAP/accept.txt @@ -137,3 +137,6 @@ seldomly lookups CDS + +llm's +[Ll][Ll][Mm]'s \ No newline at end of file From 5351267a11db11559eaadb3d609cbe8b15903dbe Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Thu, 9 Jan 2025 17:24:35 +0100 Subject: [PATCH 44/53] add tests and docs --- packages/orchestration/README.md | 9 ++-- .../src/orchestration-client.test.ts | 2 + ...rchestration-stream-chunk-response.test.ts | 19 +++---- .../src/orchestration-utils.temp.ts | 4 ++ packages/orchestration/tsconfig.json | 2 +- ...sponse-token-usage-and-finish-reason.json} | 0 ...ion-stream-chunk-response-token-usage.json | 49 ------------------- 7 files changed, 19 insertions(+), 66 deletions(-) create mode 100644 packages/orchestration/src/orchestration-utils.temp.ts rename test-util/data/orchestration/{orchestration-chat-completion-stream-chunk-response-finish-reason.json => orchestration-chat-completion-stream-chunk-response-token-usage-and-finish-reason.json} (100%) delete mode 100644 test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-token-usage.json diff --git a/packages/orchestration/README.md b/packages/orchestration/README.md index be2494d3..7bcf268f 100644 --- a/packages/orchestration/README.md +++ b/packages/orchestration/README.md @@ -175,9 +175,10 @@ Abort controller can be useful, e.g., when end-user wants to stop the stream or #### Stream Options The orchestration service offers multiple streaming options, which you can configure in addition to the LLM's streaming options. -There are two ways to add specific streaming options to your client, either at initalization, or dynamically when calling the stream API. +These include options like definining the maximum number of characters per chunk or modifying the output filter behavior. +There are two ways to add specific streaming options to your client, either at initialization of orchestration client, or when calling the stream API. -Dynamically setting these options after client initialization is particularly helpful when you've initialized a client with a config meant for regular chat completion and now want to switch to using streaming. +Setting streaming options dynamically could be useful if an initialized orchestration client will also be used for streaming. You can check the list of available stream options in the [orchestration service's documentation](https://help.sap.com/docs/sap-ai-core/sap-ai-core-service-guide/streaming). @@ -197,8 +198,8 @@ const response = orchestrationClient.stream( ); ``` -Usage metrics are collected by default, if you do not want to receive them, set include_usage to false. -If you don't want any streaming options as part of your call to the LLM, set options.llm = null. +Usage metrics are collected by default, if you do not want to receive them, set `include_usage` to `false`. +If you don't want any streaming options as part of your call to the LLM, set `streamOptions.llm` to `null`. > [!NOTE] > When initalizing a client with a JSON module config, providing streaming options is not possible. diff --git a/packages/orchestration/src/orchestration-client.test.ts b/packages/orchestration/src/orchestration-client.test.ts index 016b3a5c..731be1eb 100644 --- a/packages/orchestration/src/orchestration-client.test.ts +++ b/packages/orchestration/src/orchestration-client.test.ts @@ -435,4 +435,6 @@ describe('orchestration service client', () => { 'Could not parse JSON' ); }); + + // add test for executing streaming with options with a JSON client, check for warning log { let mockResponses: { - tokenUsageResponse: any; - finishReasonResponse: any; + tokenUsageAndFinishReasonResponse: any; deltaContentResponse: any; }; let orchestrationStreamChunkResponses: { @@ -15,13 +14,9 @@ describe('Orchestration chat completion stream chunk response', () => { beforeAll(async () => { mockResponses = { - tokenUsageResponse: await parseMockResponse( + tokenUsageAndFinishReasonResponse: await parseMockResponse( 'orchestration', - 'orchestration-chat-completion-stream-chunk-response-token-usage.json' - ), - finishReasonResponse: await parseMockResponse( - 'orchestration', - 'orchestration-chat-completion-stream-chunk-response-finish-reason.json' + 'orchestration-chat-completion-stream-chunk-response-token-usage-and-finish-reason.json' ), deltaContentResponse: await parseMockResponse( 'orchestration', @@ -30,10 +25,10 @@ describe('Orchestration chat completion stream chunk response', () => { }; orchestrationStreamChunkResponses = { tokenUsageResponse: new OrchestrationStreamChunkResponse( - mockResponses.tokenUsageResponse + mockResponses.tokenUsageAndFinishReasonResponse ), finishReasonResponse: new OrchestrationStreamChunkResponse( - mockResponses.finishReasonResponse + mockResponses.tokenUsageAndFinishReasonResponse ), deltaContentResponse: new OrchestrationStreamChunkResponse( mockResponses.deltaContentResponse @@ -44,10 +39,10 @@ describe('Orchestration chat completion stream chunk response', () => { it('should return the chat completion stream chunk response', () => { expect( orchestrationStreamChunkResponses.tokenUsageResponse.data - ).toStrictEqual(mockResponses.tokenUsageResponse); + ).toStrictEqual(mockResponses.tokenUsageAndFinishReasonResponse); expect( orchestrationStreamChunkResponses.finishReasonResponse.data - ).toStrictEqual(mockResponses.finishReasonResponse); + ).toStrictEqual(mockResponses.tokenUsageAndFinishReasonResponse); expect( orchestrationStreamChunkResponses.deltaContentResponse.data ).toStrictEqual(mockResponses.deltaContentResponse); diff --git a/packages/orchestration/src/orchestration-utils.temp.ts b/packages/orchestration/src/orchestration-utils.temp.ts new file mode 100644 index 00000000..bcc1a03c --- /dev/null +++ b/packages/orchestration/src/orchestration-utils.temp.ts @@ -0,0 +1,4 @@ +// create complete config +// test with config + addStreamOptionsToLlmModuleConfig +// test with config + addStreamOptionsToOutputFilteringConfig +// test complete flo with addStreamOptions diff --git a/packages/orchestration/tsconfig.json b/packages/orchestration/tsconfig.json index 2caf7a3c..0cb2f59d 100644 --- a/packages/orchestration/tsconfig.json +++ b/packages/orchestration/tsconfig.json @@ -6,7 +6,7 @@ "tsBuildInfoFile": "./dist/.tsbuildinfo", "composite": true }, - "include": ["src/**/*.ts"], + "include": ["src/**/*.ts", "src/orchestration-utils.temp.ts"], "exclude": ["dist/**/*", "test/**/*", "**/*.test.ts", "node_modules/**/*"], "references": [{ "path": "../core" }, { "path": "../ai-api" }] } diff --git a/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-finish-reason.json b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-token-usage-and-finish-reason.json similarity index 100% rename from test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-finish-reason.json rename to test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-token-usage-and-finish-reason.json diff --git a/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-token-usage.json b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-token-usage.json deleted file mode 100644 index 6da1a7ab..00000000 --- a/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-token-usage.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "request_id": "66172762-8c47-4438-89e7-2689be8f370b", - "module_results": { - "llm": { - "id": "chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp", - "object": "chat.completion.chunk", - "created": 1734524005, - "model": "gpt-4o-2024-08-06", - "system_fingerprint": "fp_4e924a4b48", - "choices": [ - { - "index": 0, - "delta": { - "role": "assistant", - "content": "rate with SAP's enterprise solutions." - }, - "finish_reason": "stop" - } - ], - "usage": { - "completion_tokens": 271, - "prompt_tokens": 17, - "total_tokens": 288 - } - } - }, - "orchestration_result": { - "id": "chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp", - "object": "chat.completion.chunk", - "created": 1734524005, - "model": "gpt-4o-2024-08-06", - "system_fingerprint": "fp_4e924a4b48", - "choices": [ - { - "index": 0, - "delta": { - "role": "assistant", - "content": "rate with SAP's enterprise solutions." - }, - "finish_reason": "stop" - } - ], - "usage": { - "completion_tokens": 271, - "prompt_tokens": 17, - "total_tokens": 288 - } - } -} \ No newline at end of file From 674657737b40216007b9ebe50c01469e633bc454 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Thu, 9 Jan 2025 17:40:52 +0100 Subject: [PATCH 45/53] change orchestration exports --- packages/orchestration/src/index.ts | 40 +++-------------------------- 1 file changed, 4 insertions(+), 36 deletions(-) diff --git a/packages/orchestration/src/index.ts b/packages/orchestration/src/index.ts index 970c38e2..d1319d2e 100644 --- a/packages/orchestration/src/index.ts +++ b/packages/orchestration/src/index.ts @@ -1,43 +1,11 @@ -export type { - CompletionPostResponse, - ChatMessages, - TokenUsage, - TemplatingModuleConfig, - OrchestrationConfig, - ModuleResults, - ModuleConfigs, - MaskingModuleConfig, - MaskingProviderConfig, - GroundingModuleConfig, - DocumentGroundingFilter, - GroundingFilterId, - GroundingFilterSearchConfiguration, - DataRepositoryType, - KeyValueListPair, - SearchDocumentKeyValueListPair, - SearchSelectOptionEnum, - LlmModuleResult, - LlmChoice, - GenericModuleResult, - FilteringModuleConfig, - InputFilteringConfig, - OutputFilteringConfig, - FilterConfig, - ErrorResponse, - DpiEntities, - DpiEntityConfig, - DpiConfig, - CompletionPostRequest, - ChatMessage, - AzureThreshold, - AzureContentSafety, - AzureContentSafetyFilterConfig -} from './client/api/schema/index.js'; +export * from './client/api/schema/index.js'; export type { OrchestrationModuleConfig, LlmModuleConfig, - Prompt + Prompt, + RequestOptions, + StreamOptions } from './orchestration-types.js'; export { OrchestrationStreamResponse } from './orchestration-stream-response.js'; From a5d65efcf56e6e26c02359c9a51024bbc1c1565a Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Fri, 10 Jan 2025 14:27:12 +0100 Subject: [PATCH 46/53] add a billion tests --- .../src/orchestration-client.test.ts | 95 +++++++++--- .../orchestration/src/orchestration-client.ts | 5 +- ...hestration-completion-post-request.test.ts | 51 ++++++- .../orchestration/src/orchestration-types.ts | 2 +- .../src/orchestration-utils.temp.ts | 4 - .../src/orchestration-utils.test.ts | 135 ++++++++++++++++++ .../orchestration/src/orchestration-utils.ts | 11 +- packages/orchestration/tsconfig.json | 2 +- tests/e2e-tests/src/orchestration.test.ts | 3 + 9 files changed, 275 insertions(+), 33 deletions(-) delete mode 100644 packages/orchestration/src/orchestration-utils.temp.ts create mode 100644 packages/orchestration/src/orchestration-utils.test.ts diff --git a/packages/orchestration/src/orchestration-client.test.ts b/packages/orchestration/src/orchestration-client.test.ts index 731be1eb..53fcbd77 100644 --- a/packages/orchestration/src/orchestration-client.test.ts +++ b/packages/orchestration/src/orchestration-client.test.ts @@ -1,4 +1,6 @@ import nock from 'nock'; +import { jest } from '@jest/globals'; +import { createLogger } from '@sap-cloud-sdk/util'; import { mockClientCredentialsGrantCall, mockDeploymentsList, @@ -29,6 +31,21 @@ describe('orchestration service client', () => { nock.cleanAll(); }); + const jsonConfig = `{ + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-35-turbo-16k", + "model_params": { + "max_tokens": 50, + "temperature": 0.1 + } + }, + "templating_module_config": { + "template": [{ "role": "user", "content": "What is the capital of France?" }] + } + } + }`; + it('calls chatCompletion with minimum configuration', async () => { const config: OrchestrationModuleConfig = { llm: { @@ -66,22 +83,15 @@ describe('orchestration service client', () => { expect(response.getTokenUsage().completion_tokens).toEqual(9); }); - it('calls chatCompletion with valid JSON configuration', async () => { - const jsonConfig = `{ - "module_configurations": { - "llm_module_config": { - "model_name": "gpt-35-turbo-16k", - "model_params": { - "max_tokens": 50, - "temperature": 0.1 - } - }, - "templating_module_config": { - "template": [{ "role": "user", "content": "What is the capital of France?" }] - } - } - }`; + it('should throw an error when invalid JSON is provided', () => { + const invalidJsonConfig = '{ "module_configurations": {}, '; + expect(() => new OrchestrationClient(invalidJsonConfig)).toThrow( + 'Could not parse JSON' + ); + }); + + it('calls chatCompletion with valid JSON configuration', async () => { const mockResponse = await parseMockResponse( 'orchestration', 'orchestration-chat-completion-success-response.json' @@ -428,13 +438,56 @@ describe('orchestration service client', () => { } }); - it('should throw an error when invalid JSON is provided', () => { - const invalidJsonConfig = '{ "module_configurations": {}, '; + it('executes a streaming request with JSON config and logs warning for stream options', async () => { + const mockResponse = await parseFileToString( + 'orchestration', + 'orchestration-chat-completion-stream-chunks.txt' + ); - expect(() => new OrchestrationClient(invalidJsonConfig)).toThrow( - 'Could not parse JSON' + mockInference( + { + data: constructCompletionPostRequestFromJsonModuleConfig( + JSON.parse(jsonConfig), + undefined, + true + ) + }, + { + data: mockResponse, + status: 200 + }, + { + url: 'inference/deployments/1234/completion' + } ); - }); - // add test for executing streaming with options with a JSON client, check for warning log { const defaultConfig: OrchestrationModuleConfig = { @@ -199,4 +202,50 @@ describe('construct completion post request', () => { constructCompletionPostRequest(config); expect(completionPostRequest).toEqual(expectedCompletionPostRequest); }); + + it('should construct completion post request with stream options', async () => { + const config: OrchestrationModuleConfig = { + ...defaultConfig, + filtering: { + output: buildAzureContentFilter({ Hate: 4, SelfHarm: 0 }) + } + }; + + const streamOptions: StreamOptions = { + global: { chunk_size: 100 }, + outputFiltering: { overlap: 100 } + }; + + const expectedCompletionPostRequest: CompletionPostRequest = { + orchestration_config: { + stream: true, + stream_options: streamOptions.global, + module_configurations: { + templating_module_config: config.templating, + llm_module_config: { + ...config.llm, + model_params: { + ...config.llm.model_params, + stream_options: { include_usage: true } + } + }, + filtering_module_config: { + output: { + ...config.filtering!.output!, + stream_options: streamOptions.outputFiltering + } + } + } + }, + input_params: { phrase: 'I hate you.' } + }; + const completionPostRequest: CompletionPostRequest = + constructCompletionPostRequest( + config, + { inputParams: { phrase: 'I hate you.' } }, + true, + streamOptions + ); + expect(completionPostRequest).toEqual(expectedCompletionPostRequest); + }); }); diff --git a/packages/orchestration/src/orchestration-types.ts b/packages/orchestration/src/orchestration-types.ts index fc04faba..1a3dbb55 100644 --- a/packages/orchestration/src/orchestration-types.ts +++ b/packages/orchestration/src/orchestration-types.ts @@ -89,7 +89,7 @@ export interface StreamOptions { /** * LLM specific stream options. */ - llm?: { include_usage?: boolean; [key: string]: any }; + llm?: { include_usage?: boolean; [key: string]: any } | null; /** * Output filtering stream options. */ diff --git a/packages/orchestration/src/orchestration-utils.temp.ts b/packages/orchestration/src/orchestration-utils.temp.ts deleted file mode 100644 index bcc1a03c..00000000 --- a/packages/orchestration/src/orchestration-utils.temp.ts +++ /dev/null @@ -1,4 +0,0 @@ -// create complete config -// test with config + addStreamOptionsToLlmModuleConfig -// test with config + addStreamOptionsToOutputFilteringConfig -// test complete flo with addStreamOptions diff --git a/packages/orchestration/src/orchestration-utils.test.ts b/packages/orchestration/src/orchestration-utils.test.ts new file mode 100644 index 00000000..4abbe7d8 --- /dev/null +++ b/packages/orchestration/src/orchestration-utils.test.ts @@ -0,0 +1,135 @@ +import { createLogger } from '@sap-cloud-sdk/util'; +import { jest } from '@jest/globals'; +import { + addStreamOptions, + addStreamOptionsToLlmModuleConfig, + addStreamOptionsToOutputFilteringConfig +} from './orchestration-utils.js'; +import { buildAzureContentFilter } from './orchestration-filter-utility.js'; +import type { + OrchestrationModuleConfig, + StreamOptions +} from './orchestration-types.js'; +import type { + ModuleConfigs, + OrchestrationConfig +} from './client/api/schema/index.js'; + +describe('construct completion post request', () => { + const defaultConfig: OrchestrationModuleConfig = { + llm: { + model_name: 'gpt-35-turbo-16k', + model_params: { max_tokens: 50, temperature: 0.1 } + }, + templating: { + template: [{ role: 'user', content: 'Create paraphrases of {{?phrase}}' }] + } + }; + + const defaultModuleConfigs: ModuleConfigs = { + llm_module_config: defaultConfig.llm, + templating_module_config: defaultConfig.templating + }; + + const defaultStreamOptions: StreamOptions = { + global: { chunk_size: 100 }, + llm: { include_usage: false }, + outputFiltering: { overlap: 100 } + }; + + it('should add include_usage to llm module config', () => { + const llmConfig = addStreamOptionsToLlmModuleConfig(defaultConfig.llm); + expect(llmConfig.model_params.stream_options).toEqual({ + include_usage: true + }); + }); + + it('should set include_usage to false in llm module config', () => { + const llmConfig = addStreamOptionsToLlmModuleConfig( + defaultConfig.llm, + defaultStreamOptions + ); + expect(llmConfig.model_params.stream_options).toEqual({ + include_usage: false + }); + }); + + it('should not add any stream options to llm module config', () => { + const llmConfig = addStreamOptionsToLlmModuleConfig(defaultConfig.llm, { + llm: null + }); + expect( + Object.keys(llmConfig.model_params).every(key => key !== 'stream_options') + ).toBe(true); + }); + + it('should add stream options to output filtering config', () => { + const config: OrchestrationModuleConfig = { + ...defaultConfig, + filtering: { + output: buildAzureContentFilter({ Hate: 4, SelfHarm: 0 }) + } + }; + const filteringConfig = addStreamOptionsToOutputFilteringConfig( + config.filtering!.output!, + defaultStreamOptions.outputFiltering! + ); + expect(filteringConfig.filters).toEqual(config.filtering?.output?.filters); + expect(filteringConfig.stream_options).toEqual({ + overlap: 100 + }); + }); + + it('should add stream options to orchestration config', () => { + const config: ModuleConfigs = { + ...defaultModuleConfigs, + filtering_module_config: { + output: buildAzureContentFilter({ Hate: 4, SelfHarm: 0 }) + } + }; + + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const { llm, ...streamOptions } = defaultStreamOptions; + + const expectedOrchestrationConfig: OrchestrationConfig = { + stream: true, + stream_options: streamOptions.global, + module_configurations: { + ...config, + llm_module_config: { + ...config.llm_module_config, + model_params: { + ...config.llm_module_config.model_params, + stream_options: { include_usage: true } + } + }, + filtering_module_config: { + output: { + ...config.filtering_module_config!.output!, + stream_options: streamOptions.outputFiltering + } + } + } + }; + const orchestrationConfig = addStreamOptions(config, streamOptions); + expect(orchestrationConfig).toEqual(expectedOrchestrationConfig); + }); + + it('should warn if no filter config was set, but streaming options were set', () => { + const logger = createLogger({ + package: 'orchestration', + messageContext: 'orchestration-utils' + }); + + const warnSpy = jest.spyOn(logger, 'warn'); + + const config = addStreamOptions(defaultModuleConfigs, defaultStreamOptions); + + expect(warnSpy).toHaveBeenCalledWith( + 'Output filter stream options are not applied because filtering module is not configured.' + ); + expect( + config.module_configurations.filtering_module_config + ).toBeUndefined(); + }); +}); diff --git a/packages/orchestration/src/orchestration-utils.ts b/packages/orchestration/src/orchestration-utils.ts index 3f753c4b..0e27e2d7 100644 --- a/packages/orchestration/src/orchestration-utils.ts +++ b/packages/orchestration/src/orchestration-utils.ts @@ -13,7 +13,10 @@ import type { OrchestrationModuleConfig } from './orchestration-types.js'; -const logger = createLogger({ messageContext: 'orchestration-utils' }); +const logger = createLogger({ + package: 'orchestration', + messageContext: 'orchestration-utils' +}); /** * @internal @@ -44,7 +47,7 @@ export function addStreamOptionsToLlmModuleConfig( llmModuleConfig: LlmModuleConfig, streamOptions?: StreamOptions ): LlmModuleConfig { - if (streamOptions?.llm === undefined) { + if (streamOptions?.llm === null) { return llmModuleConfig; } return { @@ -55,7 +58,7 @@ export function addStreamOptionsToLlmModuleConfig( stream_options: { include_usage: true, ...(llmModuleConfig.model_params.stream_options || {}), - ...streamOptions.llm + ...(streamOptions?.llm || {}) } }) } @@ -97,7 +100,7 @@ export function addStreamOptions( return { stream: true, - stream_options: globalOptions, + ...(globalOptions && { stream_options: globalOptions }), module_configurations: { ...moduleConfigs, llm_module_config: addStreamOptionsToLlmModuleConfig( diff --git a/packages/orchestration/tsconfig.json b/packages/orchestration/tsconfig.json index 0cb2f59d..78a9a5e2 100644 --- a/packages/orchestration/tsconfig.json +++ b/packages/orchestration/tsconfig.json @@ -6,7 +6,7 @@ "tsBuildInfoFile": "./dist/.tsbuildinfo", "composite": true }, - "include": ["src/**/*.ts", "src/orchestration-utils.temp.ts"], + "include": ["src/**/*.ts", "src/orchestration-utils.test.ts"], "exclude": ["dist/**/*", "test/**/*", "**/*.test.ts", "node_modules/**/*"], "references": [{ "path": "../core" }, { "path": "../ai-api" }] } diff --git a/tests/e2e-tests/src/orchestration.test.ts b/tests/e2e-tests/src/orchestration.test.ts index d6b8008c..30c89671 100644 --- a/tests/e2e-tests/src/orchestration.test.ts +++ b/tests/e2e-tests/src/orchestration.test.ts @@ -55,4 +55,7 @@ describe('orchestration', () => { const result = await orchestrationCompletionMasking(); expect(result).toEqual(expect.any(String)); }); + + // add e2e test + // java tests only that there is any response }); From 4d78c6a59c7af8f8ae0134799743b644f6734bbe Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Fri, 10 Jan 2025 14:04:31 +0000 Subject: [PATCH 47/53] fix: Changes from lint --- .../src/orchestration-client.test.ts | 3 ++- ...hestration-completion-post-request.test.ts | 5 +++- .../src/orchestration-utils.test.ts | 27 ++++++++++++------- .../orchestration/src/orchestration-utils.ts | 1 - tests/type-tests/test/orchestration.test-d.ts | 4 +-- 5 files changed, 24 insertions(+), 16 deletions(-) diff --git a/packages/orchestration/src/orchestration-client.test.ts b/packages/orchestration/src/orchestration-client.test.ts index c64bab08..f3eef6fb 100644 --- a/packages/orchestration/src/orchestration-client.test.ts +++ b/packages/orchestration/src/orchestration-client.test.ts @@ -9,7 +9,8 @@ import { parseMockResponse } from '../../../test-util/mock-http.js'; import { OrchestrationClient } from './orchestration-client.js'; -import { buildAzureContentFilter, +import { + buildAzureContentFilter, constructCompletionPostRequest, constructCompletionPostRequestFromJsonModuleConfig } from './orchestration-utils.js'; diff --git a/packages/orchestration/src/orchestration-completion-post-request.test.ts b/packages/orchestration/src/orchestration-completion-post-request.test.ts index 932d335e..d3d8bf52 100644 --- a/packages/orchestration/src/orchestration-completion-post-request.test.ts +++ b/packages/orchestration/src/orchestration-completion-post-request.test.ts @@ -1,4 +1,7 @@ -import { constructCompletionPostRequest, buildAzureContentFilter } from './orchestration-utils.js'; +import { + constructCompletionPostRequest, + buildAzureContentFilter +} from './orchestration-utils.js'; import type { CompletionPostRequest } from './client/api/schema/index.js'; import type { OrchestrationModuleConfig, diff --git a/packages/orchestration/src/orchestration-utils.test.ts b/packages/orchestration/src/orchestration-utils.test.ts index dd5085f2..9c5a24f5 100644 --- a/packages/orchestration/src/orchestration-utils.test.ts +++ b/packages/orchestration/src/orchestration-utils.test.ts @@ -3,17 +3,17 @@ import { jest } from '@jest/globals'; import { addStreamOptions, addStreamOptionsToLlmModuleConfig, - addStreamOptionsToOutputFilteringConfig -, buildAzureContentFilter, -constructCompletionPostRequest } from './orchestration-utils.js'; + addStreamOptionsToOutputFilteringConfig, + buildAzureContentFilter, + constructCompletionPostRequest +} from './orchestration-utils.js'; import type { OrchestrationModuleConfig, StreamOptions } from './orchestration-types.js'; import type { ModuleConfigs, - OrchestrationConfig -, + OrchestrationConfig, CompletionPostRequest, FilteringModuleConfig } from './client/api/schema/index.js'; @@ -41,7 +41,9 @@ describe('construct completion post request', () => { }; it('should add include_usage to llm module config', () => { - const llmConfig = addStreamOptionsToLlmModuleConfig(defaultOrchestrationModuleConfig.llm); + const llmConfig = addStreamOptionsToLlmModuleConfig( + defaultOrchestrationModuleConfig.llm + ); expect(llmConfig.model_params?.stream_options).toEqual({ include_usage: true }); @@ -58,11 +60,16 @@ describe('construct completion post request', () => { }); it('should not add any stream options to llm module config', () => { - const llmConfig = addStreamOptionsToLlmModuleConfig(defaultOrchestrationModuleConfig.llm, { - llm: null - }); + const llmConfig = addStreamOptionsToLlmModuleConfig( + defaultOrchestrationModuleConfig.llm, + { + llm: null + } + ); expect( - Object.keys((llmConfig.model_params ?? {})).every(key => key !== 'stream_options') + Object.keys(llmConfig.model_params ?? {}).every( + key => key !== 'stream_options' + ) ).toBe(true); }); diff --git a/packages/orchestration/src/orchestration-utils.ts b/packages/orchestration/src/orchestration-utils.ts index 5f3df2a8..b0f9b359 100644 --- a/packages/orchestration/src/orchestration-utils.ts +++ b/packages/orchestration/src/orchestration-utils.ts @@ -2,7 +2,6 @@ import { createLogger } from '@sap-cloud-sdk/util'; import type { AzureContentSafety, InputFilteringConfig, - CompletionPostRequest, FilteringStreamOptions, ModuleConfigs, diff --git a/tests/type-tests/test/orchestration.test-d.ts b/tests/type-tests/test/orchestration.test-d.ts index 9a337224..5047c527 100644 --- a/tests/type-tests/test/orchestration.test-d.ts +++ b/tests/type-tests/test/orchestration.test-d.ts @@ -1,7 +1,5 @@ import { expectError, expectType, expectAssignable } from 'tsd'; -import { - OrchestrationClient -} from '@sap-ai-sdk/orchestration'; +import { OrchestrationClient } from '@sap-ai-sdk/orchestration'; import type { CompletionPostResponse, OrchestrationResponse, From 5663d4e6867c05cb7da09fa9619a8e06dbdfcd66 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Fri, 10 Jan 2025 16:52:51 +0100 Subject: [PATCH 48/53] add sample code and e2e tests --- sample-code/src/index.ts | 4 +- sample-code/src/orchestration.ts | 45 ++++++- sample-code/src/server.ts | 138 ++++++++++++++++------ tests/e2e-tests/src/orchestration.test.ts | 27 ++++- 4 files changed, 166 insertions(+), 48 deletions(-) diff --git a/sample-code/src/index.ts b/sample-code/src/index.ts index 73b51c0a..5135ea80 100644 --- a/sample-code/src/index.ts +++ b/sample-code/src/index.ts @@ -14,7 +14,9 @@ export { orchestrationCompletionMasking, orchestrationFromJson, orchestrationGrounding, - orchestrationChatCompletionImage + orchestrationChatCompletionImage, + chatCompletionStreamWithJsonModuleConfig, + chatCompletionStream } from './orchestration.js'; export { invoke, diff --git a/sample-code/src/orchestration.ts b/sample-code/src/orchestration.ts index 7fb3b9be..a97df50b 100644 --- a/sample-code/src/orchestration.ts +++ b/sample-code/src/orchestration.ts @@ -8,7 +8,8 @@ import type { LlmModuleConfig, OrchestrationStreamChunkResponse, OrchestrationStreamResponse, - OrchestrationResponse + OrchestrationResponse, + StreamOptions } from '@sap-ai-sdk/orchestration'; const logger = createLogger({ @@ -44,10 +45,12 @@ export async function orchestrationChatCompletion(): Promise> { const orchestrationClient = new OrchestrationClient({ // define the language model to be used @@ -66,11 +69,43 @@ export async function chatCompletionStream( } }); - const response = await orchestrationClient.stream( + return orchestrationClient.stream( + { inputParams: { input: 'SAP Cloud SDK' } }, + controller, + streamOptions + ); +} + +/** + * Ask ChatGPT through the orchestration service about SAP Cloud SDK with streaming and JSON module configuration. + * @param controller - The abort controller. + * @returns The response from the orchestration service containing the response content. + */ +export async function chatCompletionStreamWithJsonModuleConfig( + controller: AbortController +): Promise> { + const jsonConfig = `{ + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-35-turbo", + "model_params": { + "stream_options": { + "include_usage": true + } + } + }, + "templating_module_config": { + "template": [{ "role": "user", "content": "Give me a long introduction of {{?input}}" }] + } + } + }`; + + const orchestrationClient = new OrchestrationClient(jsonConfig); + + return orchestrationClient.stream( { inputParams: { input: 'SAP Cloud SDK' } }, controller ); - return response; } const llm: LlmModuleConfig = { diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 46123224..a768e1a2 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -16,7 +16,8 @@ import { chatCompletionStream as orchestrationChatCompletionStream, orchestrationFromJson, orchestrationGrounding, - orchestrationChatCompletionImage + orchestrationChatCompletionImage, + chatCompletionStreamWithJsonModuleConfig as orchestrationChatCompletionStreamWithJsonModuleConfig } from './orchestration.js'; import { getDeployments, @@ -276,53 +277,112 @@ app.get('/orchestration/:sampleCase', async (req, res) => { } }); -app.get('/orchestration-stream/chat-completion-stream', async (req, res) => { - const controller = new AbortController(); - try { - const response = await orchestrationChatCompletionStream(controller); +app.post( + '/orchestration-stream/chat-completion-stream', + express.json(), + async (req, res) => { + const controller = new AbortController(); + try { + const response = await orchestrationChatCompletionStream( + controller, + req.body + ); - // Set headers for event stream. - res.setHeader('Content-Type', 'text/event-stream'); - res.setHeader('Connection', 'keep-alive'); - res.flushHeaders(); + // Set headers for event stream. + res.setHeader('Content-Type', 'text/event-stream'); + res.setHeader('Connection', 'keep-alive'); + res.flushHeaders(); - let connectionAlive = true; + let connectionAlive = true; + + // Abort the stream if the client connection is closed. + res.on('close', () => { + controller.abort(); + connectionAlive = false; + res.end(); + }); + + // Stream the delta content. + for await (const chunk of response.stream) { + if (!connectionAlive) { + break; + } + res.write(chunk.getDeltaContent() + '\n'); + } - // Abort the stream if the client connection is closed. - res.on('close', () => { - controller.abort(); - connectionAlive = false; + // Write the finish reason and token usage after the stream ends. + if (connectionAlive) { + const finishReason = response.getFinishReason(); + const tokenUsage = response.getTokenUsage(); + res.write('\n\n---------------------------\n'); + res.write(`Finish reason: ${finishReason}\n`); + res.write('Token usage:\n'); + res.write(` - Completion tokens: ${tokenUsage?.completion_tokens}\n`); + res.write(` - Prompt tokens: ${tokenUsage?.prompt_tokens}\n`); + res.write(` - Total tokens: ${tokenUsage?.total_tokens}\n`); + } + } catch (error: any) { + console.error(error); + res + .status(500) + .send('Yikes, vibes are off apparently 😬 -> ' + error.message); + } finally { res.end(); - }); + } + } +); - // Stream the delta content. - for await (const chunk of response.stream) { - if (!connectionAlive) { - break; +app.get( + '/orchestration-stream/chat-completion-stream-json', + async (req, res) => { + const controller = new AbortController(); + try { + const response = + await orchestrationChatCompletionStreamWithJsonModuleConfig(controller); + + // Set headers for event stream. + res.setHeader('Content-Type', 'text/event-stream'); + res.setHeader('Connection', 'keep-alive'); + res.flushHeaders(); + + let connectionAlive = true; + + // Abort the stream if the client connection is closed. + res.on('close', () => { + controller.abort(); + connectionAlive = false; + res.end(); + }); + + // Stream the delta content. + for await (const chunk of response.stream) { + if (!connectionAlive) { + break; + } + res.write(chunk.getDeltaContent() + '\n'); } - res.write(chunk.getDeltaContent() + '\n'); - } - // Write the finish reason and token usage after the stream ends. - if (connectionAlive) { - const finishReason = response.getFinishReason(); - const tokenUsage = response.getTokenUsage(); - res.write('\n\n---------------------------\n'); - res.write(`Finish reason: ${finishReason}\n`); - res.write('Token usage:\n'); - res.write(` - Completion tokens: ${tokenUsage?.completion_tokens}\n`); - res.write(` - Prompt tokens: ${tokenUsage?.prompt_tokens}\n`); - res.write(` - Total tokens: ${tokenUsage?.total_tokens}\n`); + // Write the finish reason and token usage after the stream ends. + if (connectionAlive) { + const finishReason = response.getFinishReason(); + const tokenUsage = response.getTokenUsage(); + res.write('\n\n---------------------------\n'); + res.write(`Finish reason: ${finishReason}\n`); + res.write('Token usage:\n'); + res.write(` - Completion tokens: ${tokenUsage?.completion_tokens}\n`); + res.write(` - Prompt tokens: ${tokenUsage?.prompt_tokens}\n`); + res.write(` - Total tokens: ${tokenUsage?.total_tokens}\n`); + } + } catch (error: any) { + console.error(error); + res + .status(500) + .send('Yikes, vibes are off apparently 😬 -> ' + error.message); + } finally { + res.end(); } - } catch (error: any) { - console.error(error); - res - .status(500) - .send('Yikes, vibes are off apparently 😬 -> ' + error.message); - } finally { - res.end(); } -}); +); /* Langchain */ app.get('/langchain/invoke', async (req, res) => { diff --git a/tests/e2e-tests/src/orchestration.test.ts b/tests/e2e-tests/src/orchestration.test.ts index 61c96e5e..4cddd28f 100644 --- a/tests/e2e-tests/src/orchestration.test.ts +++ b/tests/e2e-tests/src/orchestration.test.ts @@ -5,7 +5,9 @@ import { orchestrationOutputFiltering, orchestrationRequestConfig, orchestrationCompletionMasking, - orchestrationChatCompletionImage + orchestrationChatCompletionImage, + chatCompletionStreamWithJsonModuleConfig, + chatCompletionStream } from '@sap-ai-sdk/sample-code'; import { loadEnv } from './utils/load-env.js'; import type { OrchestrationResponse } from '@sap-ai-sdk/orchestration'; @@ -63,6 +65,25 @@ describe('orchestration', () => { expect(response.getContent()?.includes('logo')).toBe(true); }); - // add e2e test - // java tests only that there is any response + it('should return stream of orchestration responses', async () => { + const response = await chatCompletionStream(new AbortController()); + + for await (const chunk of response.stream) { + expect(chunk).toBeDefined(); + } + expect(response.getFinishReason()).toEqual('stop'); + expect(response.getTokenUsage()).toBeDefined(); + }); + + it('should return stream of orchestration responses, using a JSON client', async () => { + const response = await chatCompletionStreamWithJsonModuleConfig( + new AbortController() + ); + + for await (const chunk of response.stream) { + expect(chunk).toBeDefined(); + } + expect(response.getFinishReason()).toEqual('stop'); + expect(response.getTokenUsage()).toBeDefined(); + }); }); From 77f08d47a17f508a3c4b16527f3bc99229a6cb74 Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Mon, 13 Jan 2025 13:07:07 +0000 Subject: [PATCH 49/53] fix: Changes from lint --- .../orchestration/src/orchestration-utils.test.ts | 13 ++++++++++--- packages/orchestration/src/orchestration-utils.ts | 3 ++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/packages/orchestration/src/orchestration-utils.test.ts b/packages/orchestration/src/orchestration-utils.test.ts index 6e95f93c..353cda28 100644 --- a/packages/orchestration/src/orchestration-utils.test.ts +++ b/packages/orchestration/src/orchestration-utils.test.ts @@ -28,7 +28,9 @@ describe('orchestration utils', () => { model_params: { max_tokens: 50, temperature: 0.1 } }, templating: { - template: [{ role: 'user', content: 'Create paraphrases of {{?phrase}}' }] + template: [ + { role: 'user', content: 'Create paraphrases of {{?phrase}}' } + ] } }; @@ -87,7 +89,9 @@ describe('orchestration utils', () => { config.filtering!.output!, defaultStreamOptions.outputFiltering! ); - expect(filteringConfig.filters).toEqual(config.filtering?.output?.filters); + expect(filteringConfig.filters).toEqual( + config.filtering?.output?.filters + ); expect(filteringConfig.stream_options).toEqual({ overlap: 100 }); @@ -136,7 +140,10 @@ describe('orchestration utils', () => { const warnSpy = jest.spyOn(logger, 'warn'); - const config = addStreamOptions(defaultModuleConfigs, defaultStreamOptions); + const config = addStreamOptions( + defaultModuleConfigs, + defaultStreamOptions + ); expect(warnSpy).toHaveBeenCalledWith( 'Output filter stream options are not applied because filtering module is not configured.' diff --git a/packages/orchestration/src/orchestration-utils.ts b/packages/orchestration/src/orchestration-utils.ts index a500becc..966fcd6a 100644 --- a/packages/orchestration/src/orchestration-utils.ts +++ b/packages/orchestration/src/orchestration-utils.ts @@ -1,5 +1,6 @@ import { createLogger } from '@sap-cloud-sdk/util'; -import type { DocumentGroundingServiceConfig , +import type { + DocumentGroundingServiceConfig, Prompt, StreamOptions, LlmModuleConfig, From 8aadbac889aafc137706758e9c02346130dce559 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Mon, 13 Jan 2025 14:21:22 +0100 Subject: [PATCH 50/53] add changelog, adjust docs --- .changeset/hip-melons-destroy.md | 5 +++++ sample-code/README.md | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 .changeset/hip-melons-destroy.md diff --git a/.changeset/hip-melons-destroy.md b/.changeset/hip-melons-destroy.md new file mode 100644 index 00000000..01ac9480 --- /dev/null +++ b/.changeset/hip-melons-destroy.md @@ -0,0 +1,5 @@ +--- +'@sap-ai-sdk/orchestration': minor +--- + +[New Funcionality] Add support for streaming in the orchestration client. diff --git a/sample-code/README.md b/sample-code/README.md index f7913972..f9066cff 100644 --- a/sample-code/README.md +++ b/sample-code/README.md @@ -164,9 +164,37 @@ Send chat completion request with a custom header as the custom request configur #### Chat Completion Streaming -`GET /orchestration-stream/chat-completion-stream` +`POST /orchestration-stream/chat-completion-stream` -Get chat completion response with streaming. +Get a chat completion response with streaming. + +You can set the streaming options in the body of the request. + +An example for setting the chunk size would look like this: + +``` +curl -X POST http://localhost:8080/orchestration-stream/chat-completion-stream \ +-H "Content-Type: application/json" \ +-d '{ + "global": { + "chunk_size": 10 + } +}' +``` + +The response header is set with `Content-Type: text/event-stream` to stream the text. + +`AbortController` is used to cancel the request in case user closes or refreshes the page, or there is an error. + +The `toContentStream()` method is called to extract the content of the chunk for convenience. + +Once the streaming is done, finish reason and token usage are printed out. + +#### Chat Completion Streaming With JSON Module Config + +`GET /orchestration-stream/chat-completion-stream-json` + +Get a chat completion response with streaming with a JSON Module Config initalized client. The response header is set with `Content-Type: text/event-stream` to stream the text. From fba722d8a92a60362473d62a507ea5e4e8db6a13 Mon Sep 17 00:00:00 2001 From: tomfrenken Date: Mon, 13 Jan 2025 14:23:17 +0100 Subject: [PATCH 51/53] adjust type test imports --- tests/type-tests/test/orchestration.test-d.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/type-tests/test/orchestration.test-d.ts b/tests/type-tests/test/orchestration.test-d.ts index 75b6b32c..47356e5d 100644 --- a/tests/type-tests/test/orchestration.test-d.ts +++ b/tests/type-tests/test/orchestration.test-d.ts @@ -1,12 +1,13 @@ import { expectError, expectType, expectAssignable } from 'tsd'; -import { OrchestrationClient } from '@sap-ai-sdk/orchestration'; +import { OrchestrationClient , + buildDocumentGroundingConfig +} from '@sap-ai-sdk/orchestration'; import type { CompletionPostResponse, OrchestrationResponse, TokenUsage, ChatModel, GroundingModuleConfig, - buildDocumentGroundingConfig, LlmModelParams } from '@sap-ai-sdk/orchestration'; @@ -243,7 +244,7 @@ expect('custom-model'); expect('gemini-1.0-pro'); /** - * Grounding util + * Grounding util. */ expectType( buildDocumentGroundingConfig({ From 50c0dc0387500a638950852d0996ef90edd3a7a3 Mon Sep 17 00:00:00 2001 From: cloud-sdk-js Date: Mon, 13 Jan 2025 13:24:29 +0000 Subject: [PATCH 52/53] fix: Changes from lint --- tests/type-tests/test/orchestration.test-d.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/type-tests/test/orchestration.test-d.ts b/tests/type-tests/test/orchestration.test-d.ts index 47356e5d..d3accedf 100644 --- a/tests/type-tests/test/orchestration.test-d.ts +++ b/tests/type-tests/test/orchestration.test-d.ts @@ -1,5 +1,6 @@ import { expectError, expectType, expectAssignable } from 'tsd'; -import { OrchestrationClient , +import { + OrchestrationClient, buildDocumentGroundingConfig } from '@sap-ai-sdk/orchestration'; import type { From 5ed1be2d6a6f7596012c73f940ac072eaddbfe3d Mon Sep 17 00:00:00 2001 From: Zhongpin Wang Date: Mon, 13 Jan 2025 14:37:42 +0100 Subject: [PATCH 53/53] Update .changeset/hip-melons-destroy.md --- .changeset/hip-melons-destroy.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changeset/hip-melons-destroy.md b/.changeset/hip-melons-destroy.md index 01ac9480..14da6bf5 100644 --- a/.changeset/hip-melons-destroy.md +++ b/.changeset/hip-melons-destroy.md @@ -2,4 +2,4 @@ '@sap-ai-sdk/orchestration': minor --- -[New Funcionality] Add support for streaming in the orchestration client. +[New Functionality] Add support for streaming in the orchestration client.