diff --git a/.changeset/hip-melons-destroy.md b/.changeset/hip-melons-destroy.md new file mode 100644 index 00000000..14da6bf5 --- /dev/null +++ b/.changeset/hip-melons-destroy.md @@ -0,0 +1,5 @@ +--- +'@sap-ai-sdk/orchestration': minor +--- + +[New Functionality] Add support for streaming in the orchestration client. diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index aaf1e0af..0b6b336e 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -9,3 +9,4 @@ export { AwsBedrockChatModel, AiCoreOpenSourceChatModel } from './model-types.js'; +export { SseStream, LineDecoder, SSEDecoder } from './stream/index.js'; diff --git a/packages/core/src/stream/index.ts b/packages/core/src/stream/index.ts new file mode 100644 index 00000000..992c8e02 --- /dev/null +++ b/packages/core/src/stream/index.ts @@ -0,0 +1,3 @@ +export * from './sse-stream.js'; +export * from './sse-decoder.js'; +export * from './line-decoder.js'; diff --git a/packages/foundation-models/src/azure-openai/stream/line-decoder.ts b/packages/core/src/stream/line-decoder.ts similarity index 99% rename from packages/foundation-models/src/azure-openai/stream/line-decoder.ts rename to packages/core/src/stream/line-decoder.ts index 676860c1..2a999024 100644 --- a/packages/foundation-models/src/azure-openai/stream/line-decoder.ts +++ b/packages/core/src/stream/line-decoder.ts @@ -5,7 +5,6 @@ type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; * reading lines from text. * * Https://github.com/encode/httpx/blob/920333ea98118e9cf617f246905d7b202510941c/httpx/_decoders.py#L258. - * @internal */ export class LineDecoder { // prettier-ignore diff --git a/packages/foundation-models/src/azure-openai/stream/sse-decoder.ts b/packages/core/src/stream/sse-decoder.ts similarity index 99% rename from packages/foundation-models/src/azure-openai/stream/sse-decoder.ts rename to packages/core/src/stream/sse-decoder.ts index 046939ac..7c6f6b22 100644 --- a/packages/foundation-models/src/azure-openai/stream/sse-decoder.ts +++ b/packages/core/src/stream/sse-decoder.ts @@ -19,7 +19,6 @@ export interface ServerSentEvent { /** * Server-Sent Event decoder. - * @internal */ export class SSEDecoder { private data: string[]; diff --git a/packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts b/packages/core/src/stream/sse-stream.test.ts similarity index 100% rename from packages/foundation-models/src/azure-openai/stream/sse-stream.test.ts rename to packages/core/src/stream/sse-stream.test.ts diff --git a/packages/foundation-models/src/azure-openai/stream/sse-stream.ts b/packages/core/src/stream/sse-stream.ts similarity index 99% rename from packages/foundation-models/src/azure-openai/stream/sse-stream.ts rename to packages/core/src/stream/sse-stream.ts index f36f9dc6..8af138b0 100644 --- a/packages/foundation-models/src/azure-openai/stream/sse-stream.ts +++ b/packages/core/src/stream/sse-stream.ts @@ -13,7 +13,6 @@ type Bytes = string | ArrayBuffer | Uint8Array | Buffer | null | undefined; /** * Stream implemented as an async iterable. - * @internal */ export class SseStream implements AsyncIterable { protected static transformToSseStream( diff --git a/packages/foundation-models/README.md b/packages/foundation-models/README.md index 6b621f89..0bbb0b15 100644 --- a/packages/foundation-models/README.md +++ b/packages/foundation-models/README.md @@ -152,7 +152,7 @@ Refer to `AzureOpenAiChatCompletionParameters` interface for other parameters th The `AzureOpenAiChatClient` supports streaming response for chat completion requests based on the [Server-sent events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events) standard. Use the `stream()` method to receive a stream of chunk responses from the model. -After consuming the stream, call the helper methods to get the finish reason and token usage information respectively. +After consuming the stream, call the helper methods to get the finish reason and token usage information. ```ts const chatClient = new AzureOpenAiChatClient('gpt-4o'); @@ -178,7 +178,7 @@ console.log(`Token usage: ${JSON.stringify(tokenUsage)}\n`); ##### Streaming the Delta Content -The client provides a helper method to extract delta content and stream string directly. +The client provides a helper method to extract the text chunks as strings: ```ts for await (const chunk of response.stream.toContentStream()) { @@ -198,7 +198,7 @@ Additionally, it can be aborted manually by calling the `stream()` method with a ```ts const chatClient = new AzureOpenAiChatClient('gpt-4o'); const controller = new AbortController(); -const response = await new AzureOpenAiChatClient('gpt-35-turbo').stream( +const response = await chatClient.stream( { messages: [ { diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts index 1310254d..ebfae76a 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-client.test.ts @@ -198,7 +198,7 @@ describe('Azure OpenAI chat client', () => { const response = await client.stream(prompt); for await (const chunk of response.stream) { - expect(JSON.stringify(chunk.data)).toEqual(initialResponse); + expect(chunk.data).toEqual(JSON.parse(initialResponse)); break; } }); diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts index f4a058bc..e2b249a8 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.test.ts @@ -1,9 +1,8 @@ import { createLogger } from '@sap-cloud-sdk/util'; import { jest } from '@jest/globals'; +import { LineDecoder, SSEDecoder } from '@sap-ai-sdk/core'; import { parseFileToString } from '../../../../test-util/mock-http.js'; import { AzureOpenAiChatCompletionStream } from './azure-openai-chat-completion-stream.js'; -import { LineDecoder } from './stream/line-decoder.js'; -import { SSEDecoder } from './stream/sse-decoder.js'; describe('OpenAI chat completion stream', () => { let sseChunks: string[]; @@ -39,12 +38,12 @@ describe('OpenAI chat completion stream', () => { it('should wrap the raw chunk', async () => { let output = ''; - const asnycGenerator = AzureOpenAiChatCompletionStream._processChunk( + const asyncGenerator = AzureOpenAiChatCompletionStream._processChunk( originalChatCompletionStream ); - for await (const chunk of asnycGenerator) { + for await (const chunk of asyncGenerator) { expect(chunk).toBeDefined(); - chunk.getDeltaContent() ? (output += chunk.getDeltaContent()) : null; + output += chunk.getDeltaContent() ?? ''; } expect(output).toEqual('The capital of France is Paris.'); }); diff --git a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts index 70c699fe..f4ad8082 100644 --- a/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts +++ b/packages/foundation-models/src/azure-openai/azure-openai-chat-completion-stream.ts @@ -1,5 +1,5 @@ import { createLogger } from '@sap-cloud-sdk/util'; -import { SseStream } from './stream/index.js'; +import { SseStream } from '@sap-ai-sdk/core'; import { AzureOpenAiChatCompletionStreamChunkResponse } from './azure-openai-chat-completion-stream-chunk-response.js'; import type { HttpResponse } from '@sap-cloud-sdk/http-client'; import type { AzureOpenAiChatCompletionStreamResponse } from './azure-openai-chat-completion-stream-response.js'; diff --git a/packages/foundation-models/src/azure-openai/stream/index.ts b/packages/foundation-models/src/azure-openai/stream/index.ts deleted file mode 100644 index 4f2ea9e7..00000000 --- a/packages/foundation-models/src/azure-openai/stream/index.ts +++ /dev/null @@ -1 +0,0 @@ -export { SseStream } from './sse-stream.js'; diff --git a/packages/orchestration/README.md b/packages/orchestration/README.md index 14fb69c8..a5abcec0 100644 --- a/packages/orchestration/README.md +++ b/packages/orchestration/README.md @@ -86,6 +86,125 @@ The client allows you to combine various modules, such as templating and content In addition to the examples below, you can find more **sample code** [here](https://github.com/SAP/ai-sdk-js/blob/main/sample-code/src/orchestration.ts). +### Streaming + +The `OrchestrationClient` supports streaming responses for chat completion requests based on the [Server-sent events](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events) standard. + +Use the `stream()` method to receive a stream of chunk responses from the model. +After consuming the stream, call the helper methods to get the finish reason and token usage information. + +```ts +const orchestrationClient = new OrchestrationClient({ + llm: { + model_name: 'gpt-4o', + model_params: { max_tokens: 50, temperature: 0.1 } + }, + templating: { + template: [ + { role: 'user', content: 'Give a long history of {{?country}}?' } + ] + } +}); + +const response = await orchestrationClient.stream({ + inputParams: { country: 'France' } +}); + +for await (const chunk of response.stream) { + console.log(JSON.stringify(chunk)); +} + +const finishReason = response.getFinishReason(); +const tokenUsage = response.getTokenUsage(); + +console.log(`Finish reason: ${finishReason}\n`); +console.log(`Token usage: ${JSON.stringify(tokenUsage)}\n`); +``` + +#### Streaming the Delta Content + +The client provides a helper method to extract the text chunks as strings: + +```ts +for await (const chunk of response.stream.toContentStream()) { + console.log(chunk); // will log the delta content +} +``` + +Each chunk will be a string containing the delta content. + +#### Streaming with Abort Controller + +Streaming request can be aborted using the `AbortController` API. +In case of an error, the SAP Cloud SDK for AI will automatically close the stream. +Additionally, it can be aborted manually by calling the `stream()` method with an `AbortController` object. + +```ts +const orchestrationClient = new OrchestrationClient({ + llm: { + model_name: 'gpt-4o', + model_params: { max_tokens: 50, temperature: 0.1 } + }, + templating: { + template: [ + { role: 'user', content: 'Give a long history of {{?country}}?' } + ] + } +}); + +const controller = new AbortController(); +const response = await orchestrationClient.stream( + { + inputParams: { country: 'France' } + }, + controller +); + +// Abort the streaming request after one second +setTimeout(() => { + controller.abort(); +}, 1000); + +for await (const chunk of response.stream) { + console.log(JSON.stringify(chunk)); +} +``` + +In this example, streaming request will be aborted after one second. +Abort controller can be useful, e.g., when end-user wants to stop the stream or refreshes the page. + +#### Stream Options + +The orchestration service offers multiple streaming options, which you can configure in addition to the LLM's streaming options. +These include options like definining the maximum number of characters per chunk or modifying the output filter behavior. +There are two ways to add specific streaming options to your client, either at initialization of orchestration client, or when calling the stream API. + +Setting streaming options dynamically could be useful if an initialized orchestration client will also be used for streaming. + +You can check the list of available stream options in the [orchestration service's documentation](https://help.sap.com/docs/sap-ai-core/sap-ai-core-service-guide/streaming). + +An example for setting the streaming options when calling the stream API looks like the following: + +```ts +const response = orchestrationClient.stream( + { + inputParams: { country: 'France' } + }, + controller, + { + llm: { include_usage: false }, + global: { chunk_size: 10 }, + outputFiltering: { overlap: 200 } + } +); +``` + +Usage metrics are collected by default, if you do not want to receive them, set `include_usage` to `false`. +If you don't want any streaming options as part of your call to the LLM, set `streamOptions.llm` to `null`. + +> [!NOTE] +> When initalizing a client with a JSON module config, providing streaming options is not possible. + ### Templating Use the orchestration client with templating to pass a prompt containing placeholders that will be replaced with input parameters during a chat completion request. diff --git a/packages/orchestration/package.json b/packages/orchestration/package.json index 339626eb..893fe49e 100644 --- a/packages/orchestration/package.json +++ b/packages/orchestration/package.json @@ -32,6 +32,9 @@ "dependencies": { "@sap-ai-sdk/core": "workspace:^", "@sap-ai-sdk/ai-api": "workspace:^", + "@sap-cloud-sdk/util": "^3.25.0" + }, + "devDependencies": { "@sap-cloud-sdk/http-client": "^3.25.0", "@sap-cloud-sdk/connectivity": "^3.25.0" } diff --git a/packages/orchestration/src/__snapshots__/orchestration-stream.test.ts.snap b/packages/orchestration/src/__snapshots__/orchestration-stream.test.ts.snap new file mode 100644 index 00000000..6048c4ef --- /dev/null +++ b/packages/orchestration/src/__snapshots__/orchestration-stream.test.ts.snap @@ -0,0 +1,37 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`Orchestration chat completion stream should transform the original stream to string stream 1`] = ` +"The SAP Cloud SDK is a comprehensive development toolkit designed to simplify and accelerate the creation of applications that integrate with SAP solutions, particularly those built on the SAP Business Technology Platform (BTP). It provides developers with libraries, tools, and best practices that streamline the process of connecting to SAP systems, such as S/4HANA and other services available on the SAP Cloud Platform. + +Key features of the SAP Cloud SDK include: + +1. **Simplified Connectivity**: The SDK offers pre-built libraries to easily interact with SAP services, providing capabilities for authentication, service consumption, and OData/REST client generation. + +2. **Multi-cloud Support**: It supports multiple cloud environments, ensuring that applications remain flexible and can be deployed across various cloud providers. + +3. **Best Practices and Guidelines**: The SDK includes best practices for development, ensuring high-quality, scalable, and maintainable code. + +4. **Project Scaffolding and Code Samples**: Developers can quickly start their projects using provided templates and samples, accelerating the development process and reducing the learning curve. + +5. **Extensive Documentation and Community Support**: Ample documentation, tutorials, and an active community help developers overcome challenges and adopt the SDK efficiently. + +Overall, the SAP Cloud SDK is an essential tool for developers looking to build cloud-native applications and extensions that seamlessly integrate with SAP's enterprise solutions." +`; + +exports[`Orchestration chat completion stream should wrap the raw chunk 1`] = ` +"The SAP Cloud SDK is a comprehensive development toolkit designed to simplify and accelerate the creation of applications that integrate with SAP solutions, particularly those built on the SAP Business Technology Platform (BTP). It provides developers with libraries, tools, and best practices that streamline the process of connecting to SAP systems, such as S/4HANA and other services available on the SAP Cloud Platform. + +Key features of the SAP Cloud SDK include: + +1. **Simplified Connectivity**: The SDK offers pre-built libraries to easily interact with SAP services, providing capabilities for authentication, service consumption, and OData/REST client generation. + +2. **Multi-cloud Support**: It supports multiple cloud environments, ensuring that applications remain flexible and can be deployed across various cloud providers. + +3. **Best Practices and Guidelines**: The SDK includes best practices for development, ensuring high-quality, scalable, and maintainable code. + +4. **Project Scaffolding and Code Samples**: Developers can quickly start their projects using provided templates and samples, accelerating the development process and reducing the learning curve. + +5. **Extensive Documentation and Community Support**: Ample documentation, tutorials, and an active community help developers overcome challenges and adopt the SDK efficiently. + +Overall, the SAP Cloud SDK is an essential tool for developers looking to build cloud-native applications and extensions that seamlessly integrate with SAP's enterprise solutions." +`; diff --git a/packages/orchestration/src/index.ts b/packages/orchestration/src/index.ts index 57eae616..ef3e251b 100644 --- a/packages/orchestration/src/index.ts +++ b/packages/orchestration/src/index.ts @@ -1,52 +1,22 @@ -export type { - CompletionPostResponse, - ChatMessages, - TokenUsage, - TemplatingModuleConfig, - OrchestrationConfig, - ModuleResults, - ModuleConfigs, - MaskingModuleConfig, - MaskingProviderConfig, - GroundingModuleConfig, - DocumentGroundingFilter, - GroundingFilterId, - GroundingFilterSearchConfiguration, - DataRepositoryType, - KeyValueListPair, - SearchDocumentKeyValueListPair, - SearchSelectOptionEnum, - LlmModuleResult, - LlmChoice, - GenericModuleResult, - FilteringModuleConfig, - InputFilteringConfig, - OutputFilteringConfig, - FilterConfig, - ErrorResponse, - DpiEntities, - DpiEntityConfig, - DpiConfig, - CompletionPostRequest, - ChatMessage, - AzureThreshold, - AzureContentSafety, - AzureContentSafetyFilterConfig, - ImageContent, - TextContent, - MultiChatMessageContent, - MultiChatMessage -} from './client/api/schema/index.js'; +export * from './client/api/schema/index.js'; export type { OrchestrationModuleConfig, LlmModuleConfig, Prompt, + RequestOptions, + StreamOptions, DocumentGroundingServiceConfig, DocumentGroundingServiceFilter, LlmModelParams } from './orchestration-types.js'; +export { OrchestrationStreamResponse } from './orchestration-stream-response.js'; + +export { OrchestrationStreamChunkResponse } from './orchestration-stream-chunk-response.js'; + +export { OrchestrationStream } from './orchestration-stream.js'; + export { OrchestrationClient } from './orchestration-client.js'; export { diff --git a/packages/orchestration/src/orchestration-client.test.ts b/packages/orchestration/src/orchestration-client.test.ts index 56855687..f3eef6fb 100644 --- a/packages/orchestration/src/orchestration-client.test.ts +++ b/packages/orchestration/src/orchestration-client.test.ts @@ -1,16 +1,19 @@ import nock from 'nock'; +import { jest } from '@jest/globals'; +import { createLogger } from '@sap-cloud-sdk/util'; import { mockClientCredentialsGrantCall, mockDeploymentsList, mockInference, + parseFileToString, parseMockResponse } from '../../../test-util/mock-http.js'; +import { OrchestrationClient } from './orchestration-client.js'; import { - constructCompletionPostRequestFromJson, + buildAzureContentFilter, constructCompletionPostRequest, - OrchestrationClient -} from './orchestration-client.js'; -import { buildAzureContentFilter } from './orchestration-utils.js'; + constructCompletionPostRequestFromJsonModuleConfig +} from './orchestration-utils.js'; import { OrchestrationResponse } from './orchestration-response.js'; import type { CompletionPostResponse } from './client/api/schema/index.js'; import type { @@ -28,6 +31,21 @@ describe('orchestration service client', () => { nock.cleanAll(); }); + const jsonConfig = `{ + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-35-turbo-16k", + "model_params": { + "max_tokens": 50, + "temperature": 0.1 + } + }, + "templating_module_config": { + "template": [{ "role": "user", "content": "What is the capital of France?" }] + } + } + }`; + it('calls chatCompletion with minimum configuration', async () => { const config: OrchestrationModuleConfig = { llm: { @@ -65,22 +83,15 @@ describe('orchestration service client', () => { expect(response.getTokenUsage().completion_tokens).toEqual(9); }); - it('calls chatCompletion with valid JSON configuration', async () => { - const jsonConfig = `{ - "module_configurations": { - "llm_module_config": { - "model_name": "gpt-35-turbo-16k", - "model_params": { - "max_tokens": 50, - "temperature": 0.1 - } - }, - "templating_module_config": { - "template": [{ "role": "user", "content": "What is the capital of France?" }] - } - } - }`; + it('should throw an error when invalid JSON is provided', () => { + const invalidJsonConfig = '{ "module_configurations": {}, '; + expect(() => new OrchestrationClient(invalidJsonConfig)).toThrow( + 'Could not parse JSON' + ); + }); + + it('calls chatCompletion with valid JSON configuration', async () => { const mockResponse = await parseMockResponse( 'orchestration', 'orchestration-chat-completion-success-response.json' @@ -88,7 +99,9 @@ describe('orchestration service client', () => { mockInference( { - data: constructCompletionPostRequestFromJson(jsonConfig) + data: constructCompletionPostRequestFromJsonModuleConfig( + JSON.parse(jsonConfig) + ) }, { data: mockResponse, @@ -376,4 +389,103 @@ describe('orchestration service client', () => { const response = await clientWithResourceGroup.chatCompletion(prompt); expect(response.data).toEqual(mockResponse); }); + + it('executes a streaming request with correct chunk response', async () => { + const config: OrchestrationModuleConfig = { + llm: { + model_name: 'gpt-4o', + model_params: {} + }, + templating: { + template: [ + { + role: 'user', + content: 'Give me a short introduction of SAP Cloud SDK.' + } + ] + } + }; + + const mockResponse = await parseFileToString( + 'orchestration', + 'orchestration-chat-completion-stream-chunks.txt' + ); + + mockInference( + { + data: constructCompletionPostRequest(config, undefined, true) + }, + { + data: mockResponse, + status: 200 + }, + { + url: 'inference/deployments/1234/completion' + } + ); + const response = await new OrchestrationClient(config).stream(); + + const initialResponse = await parseFileToString( + 'orchestration', + 'orchestration-chat-completion-stream-chunk-response-initial.json' + ); + + for await (const chunk of response.stream) { + expect(chunk.data).toEqual(JSON.parse(initialResponse)); + break; + } + }); + + it('executes a streaming request with JSON config and logs warning for stream options', async () => { + const mockResponse = await parseFileToString( + 'orchestration', + 'orchestration-chat-completion-stream-chunks.txt' + ); + + mockInference( + { + data: constructCompletionPostRequestFromJsonModuleConfig( + JSON.parse(jsonConfig), + undefined, + true + ) + }, + { + data: mockResponse, + status: 200 + }, + { + url: 'inference/deployments/1234/completion' + } + ); + + const logger = createLogger({ + package: 'orchestration', + messageContext: 'orchestration-client' + }); + + const warnSpy = jest.spyOn(logger, 'warn'); + + const response = await new OrchestrationClient(jsonConfig).stream( + undefined, + undefined, + { + outputFiltering: { overlap: 100 } + } + ); + + expect(warnSpy).toHaveBeenCalledWith( + 'Stream options are not supported when using a JSON module config.' + ); + + const initialResponse = await parseFileToString( + 'orchestration', + 'orchestration-chat-completion-stream-chunk-response-initial.json' + ); + + for await (const chunk of response.stream) { + expect(chunk.data).toEqual(JSON.parse(initialResponse)); + break; + } + }); }); diff --git a/packages/orchestration/src/orchestration-client.ts b/packages/orchestration/src/orchestration-client.ts index 5da9ecc3..17c441e9 100644 --- a/packages/orchestration/src/orchestration-client.ts +++ b/packages/orchestration/src/orchestration-client.ts @@ -1,15 +1,32 @@ import { executeRequest } from '@sap-ai-sdk/core'; import { resolveDeploymentId } from '@sap-ai-sdk/ai-api/internal.js'; +import { createLogger } from '@sap-cloud-sdk/util'; +import { OrchestrationStream } from './orchestration-stream.js'; +import { OrchestrationStreamResponse } from './orchestration-stream-response.js'; import { OrchestrationResponse } from './orchestration-response.js'; -import type { CustomRequestConfig } from '@sap-ai-sdk/core'; +import { + constructCompletionPostRequest, + constructCompletionPostRequestFromJsonModuleConfig +} from './orchestration-utils.js'; +import type { + HttpResponse, + CustomRequestConfig +} from '@sap-cloud-sdk/http-client'; import type { ResourceGroupConfig } from '@sap-ai-sdk/ai-api/internal.js'; -import type { CompletionPostRequest } from './client/api/schema/index.js'; import type { OrchestrationModuleConfig, - Prompt + Prompt, + RequestOptions, + StreamOptions } from './orchestration-types.js'; +import type { OrchestrationStreamChunkResponse } from './orchestration-stream-chunk-response.js'; import type { HttpDestinationOrFetchOptions } from '@sap-cloud-sdk/connectivity'; +const logger = createLogger({ + package: 'orchestration', + messageContext: 'orchestration-client' +}); + /** * Get the orchestration client. */ @@ -24,88 +41,106 @@ export class OrchestrationClient { private config: OrchestrationModuleConfig | string, private deploymentConfig?: ResourceGroupConfig, private destination?: HttpDestinationOrFetchOptions - ) {} + ) { + try { + if (typeof config === 'string') { + JSON.parse(config); + } + } catch (error) { + throw new Error(`Could not parse JSON: ${error}`); + } + } - /** - * Creates a completion for the chat messages. - * @param prompt - Prompt configuration. - * @param requestConfig - Request configuration. - * @returns The completion result. - */ async chatCompletion( prompt?: Prompt, requestConfig?: CustomRequestConfig ): Promise { + const response = await this.executeRequest({ + prompt, + requestConfig, + stream: false + }); + return new OrchestrationResponse(response); + } + + async stream( + prompt?: Prompt, + controller = new AbortController(), + options: StreamOptions = {}, + requestConfig?: CustomRequestConfig + ): Promise> { + if (typeof this.config === 'string' && options) { + logger.warn( + 'Stream options are not supported when using a JSON module config.' + ); + } + + return this.createStreamResponse( + { + prompt, + requestConfig, + stream: true, + streamOptions: options + }, + controller + ); + } + + private async executeRequest(options: RequestOptions): Promise { + const { prompt, requestConfig, stream, streamOptions } = options; + const body = typeof this.config === 'string' - ? constructCompletionPostRequestFromJson(this.config, prompt) - : constructCompletionPostRequest(this.config, prompt); + ? constructCompletionPostRequestFromJsonModuleConfig( + JSON.parse(this.config), + prompt, + stream + ) + : constructCompletionPostRequest( + this.config, + prompt, + stream, + streamOptions + ); const deploymentId = await resolveDeploymentId({ scenarioId: 'orchestration', - resourceGroup: this.deploymentConfig?.resourceGroup + ...(this.deploymentConfig ?? {}) }); - const response = await executeRequest( + return executeRequest( { url: `/inference/deployments/${deploymentId}/completion`, - resourceGroup: this.deploymentConfig?.resourceGroup + ...(this.deploymentConfig ?? {}) }, body, requestConfig, this.destination ); - - return new OrchestrationResponse(response); } -} -/** - * @internal - */ -export function constructCompletionPostRequestFromJson( - config: string, - prompt?: Prompt -): Record { - try { - return { - messages_history: prompt?.messagesHistory || [], - input_params: prompt?.inputParams || {}, - orchestration_config: JSON.parse(config) - }; - } catch (error) { - throw new Error(`Could not parse JSON: ${error}`); - } -} + private async createStreamResponse( + options: RequestOptions, + controller: AbortController + ): Promise> { + const response = + new OrchestrationStreamResponse(); -/** - * @internal - */ -export function constructCompletionPostRequest( - config: OrchestrationModuleConfig, - prompt?: Prompt -): CompletionPostRequest { - return { - orchestration_config: { - module_configurations: { - templating_module_config: config.templating, - llm_module_config: config.llm, - ...(Object.keys(config?.filtering || {}).length && { - filtering_module_config: config.filtering - }), - ...(Object.keys(config?.masking || {}).length && { - masking_module_config: config.masking - }), - ...(Object.keys(config?.grounding || {}).length && { - grounding_module_config: config.grounding - }) + const streamResponse = await this.executeRequest({ + ...options, + requestConfig: { + ...options.requestConfig, + responseType: 'stream', + signal: controller.signal } - }, - ...(prompt?.inputParams && { - input_params: prompt.inputParams - }), - ...(prompt?.messagesHistory && { - messages_history: prompt.messagesHistory - }) - }; + }); + + const stream = OrchestrationStream._create(streamResponse, controller); + response.stream = stream + ._pipe(OrchestrationStream._processChunk) + ._pipe(OrchestrationStream._processFinishReason, response) + ._pipe(OrchestrationStream._processTokenUsage, response); + + return response; + } } diff --git a/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts b/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts index 823e0930..d58e7985 100644 --- a/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts +++ b/packages/orchestration/src/orchestration-completion-post-request-from-json.test.ts @@ -1,14 +1,6 @@ -import { constructCompletionPostRequestFromJson } from './orchestration-client.js'; +import { constructCompletionPostRequestFromJsonModuleConfig } from './orchestration-utils.js'; describe('construct completion post request from JSON', () => { - it('should throw an error when invalid JSON is provided', () => { - const invalidJsonConfig = '{ "module_configurations": {}, '; - - expect(() => - constructCompletionPostRequestFromJson(invalidJsonConfig) - ).toThrow('Could not parse JSON'); - }); - it('should construct completion post request from JSON', () => { const jsonConfig = `{ "module_configurations": { @@ -32,7 +24,9 @@ describe('construct completion post request from JSON', () => { }; const completionPostRequestFromJson: Record = - constructCompletionPostRequestFromJson(jsonConfig); + constructCompletionPostRequestFromJsonModuleConfig( + JSON.parse(jsonConfig) + ); expect(expectedCompletionPostRequestFromJson).toEqual( completionPostRequestFromJson @@ -85,10 +79,13 @@ describe('construct completion post request from JSON', () => { }; const completionPostRequestFromJson: Record = - constructCompletionPostRequestFromJson(jsonConfig, { - inputParams, - messagesHistory - }); + constructCompletionPostRequestFromJsonModuleConfig( + JSON.parse(jsonConfig), + { + inputParams, + messagesHistory + } + ); expect(expectedCompletionPostRequestFromJson).toEqual( completionPostRequestFromJson diff --git a/packages/orchestration/src/orchestration-completion-post-request.test.ts b/packages/orchestration/src/orchestration-completion-post-request.test.ts index c7b19fcb..d3d8bf52 100644 --- a/packages/orchestration/src/orchestration-completion-post-request.test.ts +++ b/packages/orchestration/src/orchestration-completion-post-request.test.ts @@ -1,7 +1,12 @@ -import { constructCompletionPostRequest } from './orchestration-client.js'; -import { buildAzureContentFilter } from './orchestration-utils.js'; +import { + constructCompletionPostRequest, + buildAzureContentFilter +} from './orchestration-utils.js'; import type { CompletionPostRequest } from './client/api/schema/index.js'; -import type { OrchestrationModuleConfig } from './orchestration-types.js'; +import type { + OrchestrationModuleConfig, + StreamOptions +} from './orchestration-types.js'; describe('construct completion post request', () => { const defaultConfig: OrchestrationModuleConfig = { @@ -199,4 +204,50 @@ describe('construct completion post request', () => { constructCompletionPostRequest(config); expect(completionPostRequest).toEqual(expectedCompletionPostRequest); }); + + it('should construct completion post request with stream options', async () => { + const config: OrchestrationModuleConfig = { + ...defaultConfig, + filtering: { + output: buildAzureContentFilter({ Hate: 4, SelfHarm: 0 }) + } + }; + + const streamOptions: StreamOptions = { + global: { chunk_size: 100 }, + outputFiltering: { overlap: 100 } + }; + + const expectedCompletionPostRequest: CompletionPostRequest = { + orchestration_config: { + stream: true, + stream_options: streamOptions.global, + module_configurations: { + templating_module_config: config.templating, + llm_module_config: { + ...config.llm, + model_params: { + ...config.llm.model_params, + stream_options: { include_usage: true } + } + }, + filtering_module_config: { + output: { + ...config.filtering!.output!, + stream_options: streamOptions.outputFiltering + } + } + } + }, + input_params: { phrase: 'I hate you.' } + }; + const completionPostRequest: CompletionPostRequest = + constructCompletionPostRequest( + config, + { inputParams: { phrase: 'I hate you.' } }, + true, + streamOptions + ); + expect(completionPostRequest).toEqual(expectedCompletionPostRequest); + }); }); diff --git a/packages/orchestration/src/orchestration-stream-chunk-response.test.ts b/packages/orchestration/src/orchestration-stream-chunk-response.test.ts new file mode 100644 index 00000000..703a2735 --- /dev/null +++ b/packages/orchestration/src/orchestration-stream-chunk-response.test.ts @@ -0,0 +1,74 @@ +import { parseMockResponse } from '../../../test-util/mock-http.js'; +import { OrchestrationStreamChunkResponse } from './orchestration-stream-chunk-response.js'; + +describe('Orchestration chat completion stream chunk response', () => { + let mockResponses: { + tokenUsageAndFinishReasonResponse: any; + deltaContentResponse: any; + }; + let orchestrationStreamChunkResponses: { + tokenUsageResponse: OrchestrationStreamChunkResponse; + finishReasonResponse: OrchestrationStreamChunkResponse; + deltaContentResponse: OrchestrationStreamChunkResponse; + }; + + beforeAll(async () => { + mockResponses = { + tokenUsageAndFinishReasonResponse: await parseMockResponse( + 'orchestration', + 'orchestration-chat-completion-stream-chunk-response-token-usage-and-finish-reason.json' + ), + deltaContentResponse: await parseMockResponse( + 'orchestration', + 'orchestration-chat-completion-stream-chunk-response-delta-content.json' + ) + }; + orchestrationStreamChunkResponses = { + tokenUsageResponse: new OrchestrationStreamChunkResponse( + mockResponses.tokenUsageAndFinishReasonResponse + ), + finishReasonResponse: new OrchestrationStreamChunkResponse( + mockResponses.tokenUsageAndFinishReasonResponse + ), + deltaContentResponse: new OrchestrationStreamChunkResponse( + mockResponses.deltaContentResponse + ) + }; + }); + + it('should return the chat completion stream chunk response', () => { + expect( + orchestrationStreamChunkResponses.tokenUsageResponse.data + ).toStrictEqual(mockResponses.tokenUsageAndFinishReasonResponse); + expect( + orchestrationStreamChunkResponses.finishReasonResponse.data + ).toStrictEqual(mockResponses.tokenUsageAndFinishReasonResponse); + expect( + orchestrationStreamChunkResponses.deltaContentResponse.data + ).toStrictEqual(mockResponses.deltaContentResponse); + }); + + it('should get token usage', () => { + expect( + orchestrationStreamChunkResponses.tokenUsageResponse.getTokenUsage() + ).toMatchObject({ + completion_tokens: expect.any(Number), + prompt_tokens: expect.any(Number), + total_tokens: expect.any(Number) + }); + }); + + it('should return finish reason', () => { + expect( + orchestrationStreamChunkResponses.finishReasonResponse.getFinishReason() + ).toBe('stop'); + }); + + it('should return delta content with default index 0', () => { + expect( + orchestrationStreamChunkResponses.deltaContentResponse.getDeltaContent() + ).toMatchInlineSnapshot( + '"rimarily focusing on Java and JavaScript/Node.js environments, allowing developers to work in their "' + ); + }); +}); diff --git a/packages/orchestration/src/orchestration-stream-chunk-response.ts b/packages/orchestration/src/orchestration-stream-chunk-response.ts new file mode 100644 index 00000000..a180938e --- /dev/null +++ b/packages/orchestration/src/orchestration-stream-chunk-response.ts @@ -0,0 +1,48 @@ +import type { + CompletionPostResponseStreaming, + LLMChoiceStreaming, + TokenUsage +} from './client/api/schema/index.js'; + +/** + * Orchestration stream chunk response. + */ +export class OrchestrationStreamChunkResponse { + constructor(public readonly data: CompletionPostResponseStreaming) { + this.data = data; + } + + /** + * Usage of tokens in the chunk response. + * @returns Token usage. + */ + getTokenUsage(): TokenUsage | undefined { + return this.data.orchestration_result?.usage; + } + + /** + * Reason for stopping the completion stream chunk. + * @param choiceIndex - The index of the choice to parse. + * @returns The finish reason. + */ + getFinishReason(choiceIndex = 0): string | undefined { + return this.getChoices()?.find( + (c: LLMChoiceStreaming) => c.index === choiceIndex + )?.finish_reason; + } + + /** + * Parses the chunk response and returns the delta content. + * @param choiceIndex - The index of the choice to parse. + * @returns The message delta content. + */ + getDeltaContent(choiceIndex = 0): string | undefined { + return this.getChoices()?.find( + (c: LLMChoiceStreaming) => c.index === choiceIndex + )?.delta.content; + } + + private getChoices(): LLMChoiceStreaming[] | undefined { + return this.data.orchestration_result?.choices; + } +} diff --git a/packages/orchestration/src/orchestration-stream-response.ts b/packages/orchestration/src/orchestration-stream-response.ts new file mode 100644 index 00000000..ed2f3290 --- /dev/null +++ b/packages/orchestration/src/orchestration-stream-response.ts @@ -0,0 +1,57 @@ +import type { TokenUsage } from './client/api/schema/index.js'; +import type { OrchestrationStream } from './orchestration-stream.js'; + +/** + * Orchestration stream response. + */ +export class OrchestrationStreamResponse { + private _usage: TokenUsage | undefined; + /** + * Finish reasons for all choices. + */ + private _finishReasons: Map = new Map(); + private _stream: OrchestrationStream | undefined; + + public getTokenUsage(): TokenUsage | undefined { + return this._usage; + } + + /** + * @internal + */ + _setTokenUsage(usage: TokenUsage): void { + this._usage = usage; + } + + public getFinishReason(choiceIndex = 0): string | undefined { + return this._finishReasons.get(choiceIndex); + } + + /** + * @internal + */ + _getFinishReasons(): Map { + return this._finishReasons; + } + + /** + * @internal + */ + _setFinishReasons(finishReasons: Map): void { + this._finishReasons = finishReasons; + } + + get stream(): OrchestrationStream { + if (!this._stream) { + throw new Error('Response stream is undefined.'); + } + return this._stream; + } + + /** + * @internal + */ + set stream(stream: OrchestrationStream) { + this._stream = stream; + } +} diff --git a/packages/orchestration/src/orchestration-stream.test.ts b/packages/orchestration/src/orchestration-stream.test.ts new file mode 100644 index 00000000..5305f080 --- /dev/null +++ b/packages/orchestration/src/orchestration-stream.test.ts @@ -0,0 +1,108 @@ +import { createLogger } from '@sap-cloud-sdk/util'; +import { jest } from '@jest/globals'; +import { LineDecoder, SSEDecoder } from '@sap-ai-sdk/core'; +import { parseFileToString } from '../../../test-util/mock-http.js'; +import { OrchestrationStream } from './orchestration-stream.js'; +import type { CompletionPostResponseStreaming } from './client/api/schema/index.js'; + +describe('Orchestration chat completion stream', () => { + let sseChunks: string[]; + let originalChatCompletionStream: OrchestrationStream; + + beforeEach(async () => { + const rawChunksString = await parseFileToString( + 'orchestration', + 'orchestration-chat-completion-stream-chunks.txt' + ); + const lineDecoder = new LineDecoder(); + const sseDecoder = new SSEDecoder(); + const rawLines: string[] = lineDecoder.decode( + Buffer.from(rawChunksString, 'utf-8') + ); + + sseChunks = rawLines + .map(chunk => sseDecoder.decode(chunk)) + .filter(sse => sse !== null) + .filter(sse => !sse.data.startsWith('[DONE]')) + .map(sse => JSON.parse(sse.data)); + + async function* iterator(): AsyncGenerator { + for (const sseChunk of sseChunks) { + yield sseChunk; + } + } + originalChatCompletionStream = new OrchestrationStream( + iterator, + new AbortController() + ); + }); + + it('should wrap the raw chunk', async () => { + let output = ''; + const asyncGenerator = OrchestrationStream._processChunk( + originalChatCompletionStream + ); + for await (const chunk of asyncGenerator) { + expect(chunk).toBeDefined(); + output += chunk.getDeltaContent() ?? ''; + } + expect(output).toMatchSnapshot(); + }); + + it('should process the finish reasons', async () => { + const logger = createLogger({ + package: 'orchestration', + messageContext: 'orchestration-chat-completion-stream' + }); + const debugSpy = jest.spyOn(logger, 'debug'); + const asyncGeneratorChunk = OrchestrationStream._processChunk( + originalChatCompletionStream + ); + const asyncGeneratorFinishReason = OrchestrationStream._processFinishReason( + new OrchestrationStream(() => asyncGeneratorChunk, new AbortController()) + ); + + for await (const chunk of asyncGeneratorFinishReason) { + expect(chunk).toBeDefined(); + } + expect(debugSpy).toHaveBeenCalledWith('Choice 0: Stream finished.'); + }); + + it('should process the token usage', async () => { + const logger = createLogger({ + package: 'orchestration', + messageContext: 'orchestration-chat-completion-stream' + }); + const debugSpy = jest.spyOn(logger, 'debug'); + const asyncGeneratorChunk = OrchestrationStream._processChunk( + originalChatCompletionStream + ); + const asyncGeneratorTokenUsage = OrchestrationStream._processTokenUsage( + new OrchestrationStream(() => asyncGeneratorChunk, new AbortController()) + ); + + for await (const chunk of asyncGeneratorTokenUsage) { + expect(chunk).toBeDefined(); + } + expect(debugSpy).toHaveBeenCalledWith( + expect.stringContaining('Token usage:') + ); + }); + + it('should transform the original stream to string stream', async () => { + const asyncGeneratorChunk = OrchestrationStream._processChunk( + originalChatCompletionStream + ); + const chunkStream = new OrchestrationStream( + () => asyncGeneratorChunk, + new AbortController() + ); + + let output = ''; + for await (const chunk of chunkStream.toContentStream()) { + expect(typeof chunk).toBe('string'); + output += chunk; + } + expect(output).toMatchSnapshot(); + }); +}); diff --git a/packages/orchestration/src/orchestration-stream.ts b/packages/orchestration/src/orchestration-stream.ts new file mode 100644 index 00000000..89623a20 --- /dev/null +++ b/packages/orchestration/src/orchestration-stream.ts @@ -0,0 +1,175 @@ +import { createLogger } from '@sap-cloud-sdk/util'; +import { SseStream } from '@sap-ai-sdk/core'; +import { OrchestrationStreamChunkResponse } from './orchestration-stream-chunk-response.js'; +import type { + CompletionPostResponseStreaming, + LLMChoiceStreaming +} from './client/api/schema/index.js'; +import type { HttpResponse } from '@sap-cloud-sdk/http-client'; +import type { OrchestrationStreamResponse } from './orchestration-stream-response.js'; + +const logger = createLogger({ + package: 'orchestration', + messageContext: 'orchestration-chat-completion-stream' +}); + +/** + * Orchestration stream containing post-processing functions. + */ +export class OrchestrationStream extends SseStream { + /** + * Create an orchestration stream based on the http response. + * @param response - Http response. + * @returns An orchestration stream. + * @internal + */ + public static _create( + response: HttpResponse, + controller: AbortController + ): OrchestrationStream { + const stream = + SseStream.transformToSseStream( + response, + controller + ); + return new OrchestrationStream(stream.iterator, controller); + } + + /** + * Wrap raw chunk data with chunk response class to provide helper functions. + * @param stream - Orchestration stream. + * @internal + */ + static async *_processChunk( + stream: OrchestrationStream + ): AsyncGenerator { + for await (const chunk of stream) { + yield new OrchestrationStreamChunkResponse(chunk); + } + } + + /** + * @internal + */ + static async *_processFinishReason( + stream: OrchestrationStream, + response?: OrchestrationStreamResponse + ): AsyncGenerator { + for await (const chunk of stream) { + chunk.data.orchestration_result?.choices.forEach( + (choice: LLMChoiceStreaming) => { + const choiceIndex = choice.index; + if (choiceIndex >= 0) { + const finishReason = chunk.getFinishReason(choiceIndex); + if (finishReason) { + if (response) { + response._getFinishReasons().set(choiceIndex, finishReason); + } + switch (finishReason) { + case 'content_filter': + logger.error( + `Choice ${choiceIndex}: Stream finished with content filter hit.` + ); + break; + case 'length': + logger.error( + `Choice ${choiceIndex}: Stream finished with token length exceeded.` + ); + break; + case 'stop': + logger.debug(`Choice ${choiceIndex}: Stream finished.`); + break; + default: + logger.error( + `Choice ${choiceIndex}: Stream finished with unknown reason '${finishReason}'.` + ); + } + } + } + } + ); + yield chunk; + } + } + + /** + * @internal + */ + static async *_processTokenUsage( + stream: OrchestrationStream, + response?: OrchestrationStreamResponse + ): AsyncGenerator { + for await (const chunk of stream) { + const usage = chunk.getTokenUsage(); + if (usage) { + if (response) { + response._setTokenUsage(usage); + } + logger.debug(`Token usage: ${JSON.stringify(usage)}`); + } + yield chunk; + } + } + + /** + * Transform a stream of chunks into a stream of content strings. + * @param stream - Orchestration stream. + * @param choiceIndex - The index of the choice to parse. + * @internal + */ + static async *_processContentStream( + stream: OrchestrationStream + ): AsyncGenerator { + for await (const chunk of stream) { + const deltaContent = chunk.getDeltaContent(); + if (!deltaContent) { + continue; + } + yield deltaContent; + } + } + + constructor( + public iterator: () => AsyncIterator, + controller: AbortController + ) { + super(iterator, controller); + } + + /** + * Pipe the stream through a processing function. + * @param processFn - The function to process the input stream. + * @param response - The `OrchestrationStreamResponse` object for process function to store finish reason, token usage, etc. + * @returns The output stream containing processed items. + * @internal + */ + _pipe( + processFn: ( + stream: OrchestrationStream, + response?: OrchestrationStreamResponse + ) => AsyncIterator, + response?: OrchestrationStreamResponse + ): OrchestrationStream { + if (response) { + return new OrchestrationStream( + () => processFn(this, response), + this.controller + ); + } + return new OrchestrationStream(() => processFn(this), this.controller); + } + + /** + * Transform the stream of chunks into a stream of content strings. + * @param this - Orchestration stream. + * @returns A stream of content strings. + */ + public toContentStream( + this: OrchestrationStream + ): OrchestrationStream { + return new OrchestrationStream( + () => OrchestrationStream._processContentStream(this), + this.controller + ); + } +} diff --git a/packages/orchestration/src/orchestration-types.ts b/packages/orchestration/src/orchestration-types.ts index 8edf616b..89a1c52b 100644 --- a/packages/orchestration/src/orchestration-types.ts +++ b/packages/orchestration/src/orchestration-types.ts @@ -1,9 +1,12 @@ +import type { CustomRequestConfig } from '@sap-cloud-sdk/http-client'; import type { ChatModel } from './model-types.js'; import type { ChatMessages, DataRepositoryType, DocumentGroundingFilter, FilteringModuleConfig, + FilteringStreamOptions, + GlobalStreamOptions, GroundingModuleConfig, MaskingModuleConfig, LlmModuleConfig as OriginalLlmModuleConfig, @@ -72,6 +75,46 @@ export interface OrchestrationModuleConfig { grounding?: GroundingModuleConfig; } +/** + * Request options for orchestration. + */ +export interface RequestOptions { + /** + * Prompt configuration. + */ + prompt?: Prompt; + /** + * Custom request configuration. + */ + requestConfig?: CustomRequestConfig; + /** + * Whether to stream the response. + */ + stream?: boolean; + /** + * Options for the stream. + */ + streamOptions?: StreamOptions; +} + +/** + * Options for the stream. + */ +export interface StreamOptions { + /** + * LLM specific stream options. + */ + llm?: { include_usage?: boolean; [key: string]: any } | null; + /** + * Output filtering stream options. + */ + outputFiltering?: FilteringStreamOptions; + /** + * Global stream options. + */ + global?: GlobalStreamOptions; +} + /** * Represents a filter configuration for the Document Grounding Service. */ diff --git a/packages/orchestration/src/orchestration-utils.test.ts b/packages/orchestration/src/orchestration-utils.test.ts index 70d584b0..353cda28 100644 --- a/packages/orchestration/src/orchestration-utils.test.ts +++ b/packages/orchestration/src/orchestration-utils.test.ts @@ -1,18 +1,159 @@ -import { constructCompletionPostRequest } from './orchestration-client.js'; +import { createLogger } from '@sap-cloud-sdk/util'; +import { jest } from '@jest/globals'; import { + addStreamOptions, + addStreamOptionsToLlmModuleConfig, + addStreamOptionsToOutputFilteringConfig, buildAzureContentFilter, - buildDocumentGroundingConfig + buildDocumentGroundingConfig, + constructCompletionPostRequest } from './orchestration-utils.js'; import type { CompletionPostRequest, - FilteringModuleConfig + FilteringModuleConfig, + ModuleConfigs, + OrchestrationConfig } from './client/api/schema/index.js'; import type { OrchestrationModuleConfig, - DocumentGroundingServiceConfig + DocumentGroundingServiceConfig, + StreamOptions } from './orchestration-types.js'; describe('orchestration utils', () => { + describe('stream util tests', () => { + const defaultOrchestrationModuleConfig: OrchestrationModuleConfig = { + llm: { + model_name: 'gpt-35-turbo-16k', + model_params: { max_tokens: 50, temperature: 0.1 } + }, + templating: { + template: [ + { role: 'user', content: 'Create paraphrases of {{?phrase}}' } + ] + } + }; + + const defaultModuleConfigs: ModuleConfigs = { + llm_module_config: defaultOrchestrationModuleConfig.llm, + templating_module_config: defaultOrchestrationModuleConfig.templating + }; + + const defaultStreamOptions: StreamOptions = { + global: { chunk_size: 100 }, + llm: { include_usage: false }, + outputFiltering: { overlap: 100 } + }; + + it('should add include_usage to llm module config', () => { + const llmConfig = addStreamOptionsToLlmModuleConfig( + defaultOrchestrationModuleConfig.llm + ); + expect(llmConfig.model_params?.stream_options).toEqual({ + include_usage: true + }); + }); + + it('should set include_usage to false in llm module config', () => { + const llmConfig = addStreamOptionsToLlmModuleConfig( + defaultOrchestrationModuleConfig.llm, + defaultStreamOptions + ); + expect(llmConfig.model_params?.stream_options).toEqual({ + include_usage: false + }); + }); + + it('should not add any stream options to llm module config', () => { + const llmConfig = addStreamOptionsToLlmModuleConfig( + defaultOrchestrationModuleConfig.llm, + { + llm: null + } + ); + expect( + Object.keys(llmConfig.model_params ?? {}).every( + key => key !== 'stream_options' + ) + ).toBe(true); + }); + + it('should add stream options to output filtering config', () => { + const config: OrchestrationModuleConfig = { + ...defaultOrchestrationModuleConfig, + filtering: { + output: buildAzureContentFilter({ Hate: 4, SelfHarm: 0 }) + } + }; + const filteringConfig = addStreamOptionsToOutputFilteringConfig( + config.filtering!.output!, + defaultStreamOptions.outputFiltering! + ); + expect(filteringConfig.filters).toEqual( + config.filtering?.output?.filters + ); + expect(filteringConfig.stream_options).toEqual({ + overlap: 100 + }); + }); + + it('should add stream options to orchestration config', () => { + const config: ModuleConfigs = { + ...defaultModuleConfigs, + filtering_module_config: { + output: buildAzureContentFilter({ Hate: 4, SelfHarm: 0 }) + } + }; + + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const { llm, ...streamOptions } = defaultStreamOptions; + + const expectedOrchestrationConfig: OrchestrationConfig = { + stream: true, + stream_options: streamOptions.global, + module_configurations: { + ...config, + llm_module_config: { + ...config.llm_module_config, + model_params: { + ...config.llm_module_config.model_params, + stream_options: { include_usage: true } + } + }, + filtering_module_config: { + output: { + ...config.filtering_module_config!.output!, + stream_options: streamOptions.outputFiltering + } + } + } + }; + const orchestrationConfig = addStreamOptions(config, streamOptions); + expect(orchestrationConfig).toEqual(expectedOrchestrationConfig); + }); + + it('should warn if no filter config was set, but streaming options were set', () => { + const logger = createLogger({ + package: 'orchestration', + messageContext: 'orchestration-utils' + }); + + const warnSpy = jest.spyOn(logger, 'warn'); + + const config = addStreamOptions( + defaultModuleConfigs, + defaultStreamOptions + ); + + expect(warnSpy).toHaveBeenCalledWith( + 'Output filter stream options are not applied because filtering module is not configured.' + ); + expect( + config.module_configurations.filtering_module_config + ).toBeUndefined(); + }); + }); + describe('azure filter', () => { const config: OrchestrationModuleConfig = { llm: { diff --git a/packages/orchestration/src/orchestration-utils.ts b/packages/orchestration/src/orchestration-utils.ts index 7f553f28..966fcd6a 100644 --- a/packages/orchestration/src/orchestration-utils.ts +++ b/packages/orchestration/src/orchestration-utils.ts @@ -1,11 +1,169 @@ -import type { DocumentGroundingServiceConfig } from './orchestration-types.js'; +import { createLogger } from '@sap-cloud-sdk/util'; +import type { + DocumentGroundingServiceConfig, + Prompt, + StreamOptions, + LlmModuleConfig, + OrchestrationModuleConfig +} from './orchestration-types.js'; import type { AzureContentSafety, GroundingModuleConfig, InputFilteringConfig, + CompletionPostRequest, + FilteringStreamOptions, + ModuleConfigs, + OrchestrationConfig, OutputFilteringConfig } from './client/api/schema/index.js'; +const logger = createLogger({ + package: 'orchestration', + messageContext: 'orchestration-utils' +}); + +/** + * @internal + */ +export function constructCompletionPostRequestFromJsonModuleConfig( + config: Record, + prompt?: Prompt, + stream?: boolean +): Record { + const orchestration_config = { ...config }; + if (stream) { + orchestration_config.stream = true; + } else { + delete orchestration_config.stream; + } + + return { + messages_history: prompt?.messagesHistory || [], + input_params: prompt?.inputParams || {}, + orchestration_config + }; +} + +/** + * @internal + */ +export function addStreamOptionsToLlmModuleConfig( + llmModuleConfig: LlmModuleConfig, + streamOptions?: StreamOptions +): LlmModuleConfig { + if (streamOptions?.llm === null) { + return llmModuleConfig; + } + return { + ...llmModuleConfig, + model_params: { + ...llmModuleConfig.model_params, + ...(streamOptions?.llm !== null && { + stream_options: { + include_usage: true, + ...(llmModuleConfig.model_params?.stream_options || {}), + ...(streamOptions?.llm || {}) + } + }) + } + }; +} + +/** + * @internal + */ +export function addStreamOptionsToOutputFilteringConfig( + outputFilteringConfig: OutputFilteringConfig, + filteringStreamOptions: FilteringStreamOptions +): OutputFilteringConfig { + return { + ...outputFilteringConfig, + stream_options: { + ...(outputFilteringConfig.stream_options || {}), + ...filteringStreamOptions + } + }; +} + +/** + * @internal + */ +export function addStreamOptions( + moduleConfigs: ModuleConfigs, + streamOptions?: StreamOptions +): OrchestrationConfig { + const { llm_module_config, filtering_module_config } = moduleConfigs; + const outputFiltering = streamOptions?.outputFiltering; + const globalOptions = streamOptions?.global; + + if (!moduleConfigs?.filtering_module_config?.output && outputFiltering) { + logger.warn( + 'Output filter stream options are not applied because filtering module is not configured.' + ); + } + + return { + stream: true, + ...(globalOptions && { stream_options: globalOptions }), + module_configurations: { + ...moduleConfigs, + llm_module_config: addStreamOptionsToLlmModuleConfig( + llm_module_config, + streamOptions + ), + ...(outputFiltering && + filtering_module_config?.output && { + filtering_module_config: { + ...filtering_module_config, + output: addStreamOptionsToOutputFilteringConfig( + filtering_module_config.output, + outputFiltering + ) + } + }) + } + }; +} + +/** + * @internal + */ +export function constructCompletionPostRequest( + config: OrchestrationModuleConfig, + prompt?: Prompt, + stream?: boolean, + streamOptions?: StreamOptions +): CompletionPostRequest { + const moduleConfigurations = { + templating_module_config: config.templating, + llm_module_config: config.llm, + ...(config?.filtering && + Object.keys(config.filtering).length && { + filtering_module_config: config.filtering + }), + ...(config?.masking && + Object.keys(config.masking).length && { + masking_module_config: config.masking + }), + ...(config?.grounding && + Object.keys(config.grounding).length && { + grounding_module_config: config.grounding + }) + }; + + return { + orchestration_config: stream + ? addStreamOptions(moduleConfigurations, streamOptions) + : { module_configurations: moduleConfigurations }, + ...(prompt?.inputParams && { + input_params: prompt.inputParams + }), + ...(prompt?.messagesHistory && { + messages_history: prompt.messagesHistory + }) + }; +} + /** * Convenience function to create Azure content filters. * @param filter - Filtering configuration for Azure filter. If skipped, the default Azure content filter configuration is used. diff --git a/packages/orchestration/tsconfig.json b/packages/orchestration/tsconfig.json index 2caf7a3c..78a9a5e2 100644 --- a/packages/orchestration/tsconfig.json +++ b/packages/orchestration/tsconfig.json @@ -6,7 +6,7 @@ "tsBuildInfoFile": "./dist/.tsbuildinfo", "composite": true }, - "include": ["src/**/*.ts"], + "include": ["src/**/*.ts", "src/orchestration-utils.test.ts"], "exclude": ["dist/**/*", "test/**/*", "**/*.test.ts", "node_modules/**/*"], "references": [{ "path": "../core" }, { "path": "../ai-api" }] } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9be8db3b..c23fcfbf 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -172,6 +172,10 @@ importers: '@sap-ai-sdk/core': specifier: workspace:^ version: link:../core + '@sap-cloud-sdk/util': + specifier: ^3.25.0 + version: 3.25.0 + devDependencies: '@sap-cloud-sdk/connectivity': specifier: ^3.25.0 version: 3.25.0 diff --git a/sample-code/README.md b/sample-code/README.md index f8e8335f..f9066cff 100644 --- a/sample-code/README.md +++ b/sample-code/README.md @@ -162,6 +162,48 @@ Use `buildAzureContentFilter()` to build the content filter. Send chat completion request with a custom header as the custom request configuration. +#### Chat Completion Streaming + +`POST /orchestration-stream/chat-completion-stream` + +Get a chat completion response with streaming. + +You can set the streaming options in the body of the request. + +An example for setting the chunk size would look like this: + +``` +curl -X POST http://localhost:8080/orchestration-stream/chat-completion-stream \ +-H "Content-Type: application/json" \ +-d '{ + "global": { + "chunk_size": 10 + } +}' +``` + +The response header is set with `Content-Type: text/event-stream` to stream the text. + +`AbortController` is used to cancel the request in case user closes or refreshes the page, or there is an error. + +The `toContentStream()` method is called to extract the content of the chunk for convenience. + +Once the streaming is done, finish reason and token usage are printed out. + +#### Chat Completion Streaming With JSON Module Config + +`GET /orchestration-stream/chat-completion-stream-json` + +Get a chat completion response with streaming with a JSON Module Config initalized client. + +The response header is set with `Content-Type: text/event-stream` to stream the text. + +`AbortController` is used to cancel the request in case user closes or refreshes the page, or there is an error. + +The `toContentStream()` method is called to extract the content of the chunk for convenience. + +Once the streaming is done, finish reason and token usage are printed out. + ### Langchain #### Invoke with a Simple Input diff --git a/sample-code/src/index.ts b/sample-code/src/index.ts index 7943fab8..5135ea80 100644 --- a/sample-code/src/index.ts +++ b/sample-code/src/index.ts @@ -12,9 +12,11 @@ export { orchestrationOutputFiltering, orchestrationRequestConfig, orchestrationCompletionMasking, - orchestrationFromJSON, + orchestrationFromJson, orchestrationGrounding, - orchestrationChatCompletionImage + orchestrationChatCompletionImage, + chatCompletionStreamWithJsonModuleConfig, + chatCompletionStream } from './orchestration.js'; export { invoke, diff --git a/sample-code/src/orchestration.ts b/sample-code/src/orchestration.ts index beb7ec77..094f277b 100644 --- a/sample-code/src/orchestration.ts +++ b/sample-code/src/orchestration.ts @@ -7,7 +7,10 @@ import { import { createLogger } from '@sap-cloud-sdk/util'; import type { LlmModuleConfig, - OrchestrationResponse + OrchestrationStreamChunkResponse, + OrchestrationStreamResponse, + OrchestrationResponse, + StreamOptions } from '@sap-ai-sdk/orchestration'; const logger = createLogger({ @@ -40,6 +43,72 @@ export async function orchestrationChatCompletion(): Promise> { + const orchestrationClient = new OrchestrationClient({ + // define the language model to be used + llm: { + model_name: 'gpt-35-turbo', + model_params: {} + }, + // define the prompt + templating: { + template: [ + { + role: 'user', + content: 'Give me a long introduction of {{?input}}' + } + ] + } + }); + + return orchestrationClient.stream( + { inputParams: { input: 'SAP Cloud SDK' } }, + controller, + streamOptions + ); +} + +/** + * Ask ChatGPT through the orchestration service about SAP Cloud SDK with streaming and JSON module configuration. + * @param controller - The abort controller. + * @returns The response from the orchestration service containing the response content. + */ +export async function chatCompletionStreamWithJsonModuleConfig( + controller: AbortController +): Promise> { + const jsonConfig = `{ + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-35-turbo", + "model_params": { + "stream_options": { + "include_usage": true + } + } + }, + "templating_module_config": { + "template": [{ "role": "user", "content": "Give me a long introduction of {{?input}}" }] + } + } + }`; + + const orchestrationClient = new OrchestrationClient(jsonConfig); + + return orchestrationClient.stream( + { inputParams: { input: 'SAP Cloud SDK' } }, + controller + ); +} + const llm: LlmModuleConfig = { model_name: 'gpt-4o' }; @@ -212,7 +281,7 @@ export async function orchestrationRequestConfig(): Promise { // You can also provide the JSON configuration as a plain string in the code directly instead. diff --git a/sample-code/src/server.ts b/sample-code/src/server.ts index 017343cd..a768e1a2 100644 --- a/sample-code/src/server.ts +++ b/sample-code/src/server.ts @@ -2,7 +2,7 @@ import express from 'express'; import { chatCompletion, - chatCompletionStream, + chatCompletionStream as azureChatCompletionStream, chatCompletionWithDestination, computeEmbedding // eslint-disable-next-line import/no-internal-modules @@ -13,9 +13,11 @@ import { orchestrationInputFiltering, orchestrationOutputFiltering, orchestrationRequestConfig, - orchestrationFromJSON, + chatCompletionStream as orchestrationChatCompletionStream, + orchestrationFromJson, orchestrationGrounding, - orchestrationChatCompletionImage + orchestrationChatCompletionImage, + chatCompletionStreamWithJsonModuleConfig as orchestrationChatCompletionStreamWithJsonModuleConfig } from './orchestration.js'; import { getDeployments, @@ -180,7 +182,7 @@ app.get('/azure-openai/chat-completion-with-destination', async (req, res) => { app.get('/azure-openai/chat-completion-stream', async (req, res) => { const controller = new AbortController(); try { - const response = await chatCompletionStream(controller); + const response = await azureChatCompletionStream(controller); // Set headers for event stream. res.setHeader('Content-Type', 'text/event-stream'); @@ -252,7 +254,7 @@ app.get('/orchestration/:sampleCase', async (req, res) => { inputFiltering: orchestrationInputFiltering, outputFiltering: orchestrationOutputFiltering, requestConfig: orchestrationRequestConfig, - fromJSON: orchestrationFromJSON, + fromJson: orchestrationFromJson, image: orchestrationChatCompletionImage }[sampleCase] || orchestrationChatCompletion; @@ -275,6 +277,113 @@ app.get('/orchestration/:sampleCase', async (req, res) => { } }); +app.post( + '/orchestration-stream/chat-completion-stream', + express.json(), + async (req, res) => { + const controller = new AbortController(); + try { + const response = await orchestrationChatCompletionStream( + controller, + req.body + ); + + // Set headers for event stream. + res.setHeader('Content-Type', 'text/event-stream'); + res.setHeader('Connection', 'keep-alive'); + res.flushHeaders(); + + let connectionAlive = true; + + // Abort the stream if the client connection is closed. + res.on('close', () => { + controller.abort(); + connectionAlive = false; + res.end(); + }); + + // Stream the delta content. + for await (const chunk of response.stream) { + if (!connectionAlive) { + break; + } + res.write(chunk.getDeltaContent() + '\n'); + } + + // Write the finish reason and token usage after the stream ends. + if (connectionAlive) { + const finishReason = response.getFinishReason(); + const tokenUsage = response.getTokenUsage(); + res.write('\n\n---------------------------\n'); + res.write(`Finish reason: ${finishReason}\n`); + res.write('Token usage:\n'); + res.write(` - Completion tokens: ${tokenUsage?.completion_tokens}\n`); + res.write(` - Prompt tokens: ${tokenUsage?.prompt_tokens}\n`); + res.write(` - Total tokens: ${tokenUsage?.total_tokens}\n`); + } + } catch (error: any) { + console.error(error); + res + .status(500) + .send('Yikes, vibes are off apparently 😬 -> ' + error.message); + } finally { + res.end(); + } + } +); + +app.get( + '/orchestration-stream/chat-completion-stream-json', + async (req, res) => { + const controller = new AbortController(); + try { + const response = + await orchestrationChatCompletionStreamWithJsonModuleConfig(controller); + + // Set headers for event stream. + res.setHeader('Content-Type', 'text/event-stream'); + res.setHeader('Connection', 'keep-alive'); + res.flushHeaders(); + + let connectionAlive = true; + + // Abort the stream if the client connection is closed. + res.on('close', () => { + controller.abort(); + connectionAlive = false; + res.end(); + }); + + // Stream the delta content. + for await (const chunk of response.stream) { + if (!connectionAlive) { + break; + } + res.write(chunk.getDeltaContent() + '\n'); + } + + // Write the finish reason and token usage after the stream ends. + if (connectionAlive) { + const finishReason = response.getFinishReason(); + const tokenUsage = response.getTokenUsage(); + res.write('\n\n---------------------------\n'); + res.write(`Finish reason: ${finishReason}\n`); + res.write('Token usage:\n'); + res.write(` - Completion tokens: ${tokenUsage?.completion_tokens}\n`); + res.write(` - Prompt tokens: ${tokenUsage?.prompt_tokens}\n`); + res.write(` - Total tokens: ${tokenUsage?.total_tokens}\n`); + } + } catch (error: any) { + console.error(error); + res + .status(500) + .send('Yikes, vibes are off apparently 😬 -> ' + error.message); + } finally { + res.end(); + } + } +); + /* Langchain */ app.get('/langchain/invoke', async (req, res) => { try { diff --git a/styles/config/vocabularies/SAP/accept.txt b/styles/config/vocabularies/SAP/accept.txt index 35b4cc00..5c717847 100644 --- a/styles/config/vocabularies/SAP/accept.txt +++ b/styles/config/vocabularies/SAP/accept.txt @@ -137,3 +137,6 @@ seldomly lookups CDS + +llm's +[Ll][Ll][Mm]'s \ No newline at end of file diff --git a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-delta-content.json b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-delta-content.json index 4ff90148..90efd5ba 100644 --- a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-delta-content.json +++ b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-delta-content.json @@ -1 +1,36 @@ -{"choices":[{"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}},"delta":{"content":" is"},"finish_reason":null,"index":0,"logprobs":null}],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":null} \ No newline at end of file +{ + "choices": [ + { + "content_filter_results": { + "hate": { + "filtered": false, + "severity": "safe" + }, + "self_harm": { + "filtered": false, + "severity": "safe" + }, + "sexual": { + "filtered": false, + "severity": "safe" + }, + "violence": { + "filtered": false, + "severity": "safe" + } + }, + "delta": { + "content": " is" + }, + "finish_reason": null, + "index": 0, + "logprobs": null + } + ], + "created": 1730125149, + "id": "chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I", + "model": "gpt-35-turbo", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_808245b034", + "usage": null +} \ No newline at end of file diff --git a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-finish-reason.json b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-finish-reason.json index 4aeae959..78d3dbfc 100644 --- a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-finish-reason.json +++ b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-finish-reason.json @@ -1 +1,17 @@ -{"choices":[{"content_filter_results":{},"delta":{},"finish_reason":"stop","index":0,"logprobs":null}],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":null} \ No newline at end of file +{ + "choices": [ + { + "content_filter_results": {}, + "delta": {}, + "finish_reason": "stop", + "index": 0, + "logprobs": null + } + ], + "created": 1730125149, + "id": "chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I", + "model": "gpt-35-turbo", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_808245b034", + "usage": null +} \ No newline at end of file diff --git a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-initial.json b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-initial.json index af38e868..e7704f46 100644 --- a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-initial.json +++ b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-initial.json @@ -1 +1,30 @@ -{"choices":[],"created":0,"id":"","model":"","object":"","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}]} \ No newline at end of file +{ + "choices": [], + "created": 0, + "id": "", + "model": "", + "object": "", + "prompt_filter_results": [ + { + "prompt_index": 0, + "content_filter_results": { + "hate": { + "filtered": false, + "severity": "safe" + }, + "self_harm": { + "filtered": false, + "severity": "safe" + }, + "sexual": { + "filtered": false, + "severity": "safe" + }, + "violence": { + "filtered": false, + "severity": "safe" + } + } + } + ] +} \ No newline at end of file diff --git a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-token-usage.json b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-token-usage.json index 558fe0c5..3c384c75 100644 --- a/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-token-usage.json +++ b/test-util/data/foundation-models/azure-openai-chat-completion-stream-chunk-response-token-usage.json @@ -1 +1,13 @@ -{"choices":[],"created":1730125149,"id":"chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I","model":"gpt-35-turbo","object":"chat.completion.chunk","system_fingerprint":"fp_808245b034","usage":{"completion_tokens":7,"prompt_tokens":14,"total_tokens":21}} \ No newline at end of file +{ + "choices": [], + "created": 1730125149, + "id": "chatcmpl-ANKsHIdjvozwuOGpGI6rygvwSJH0I", + "model": "gpt-35-turbo", + "object": "chat.completion.chunk", + "system_fingerprint": "fp_808245b034", + "usage": { + "completion_tokens": 7, + "prompt_tokens": 14, + "total_tokens": 21 + } +} \ No newline at end of file diff --git a/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-delta-content.json b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-delta-content.json new file mode 100644 index 00000000..27f0166b --- /dev/null +++ b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-delta-content.json @@ -0,0 +1,39 @@ +{ + "request_id": "ceaa358c-48b8-4ce1-8a62-b0c47675fc9c", + "module_results": { + "llm": { + "id": "chatcmpl-AfmsPYkaH9uHogKZusAaVPC3zSNys", + "object": "chat.completion.chunk", + "created": 1734522693, + "model": "gpt-4o-2024-08-06", + "system_fingerprint": "fp_4e924a4b48", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "rimarily focusing on Java and JavaScript/Node.js environments, allowing developers to work in their " + }, + "finish_reason": "" + } + ] + } + }, + "orchestration_result": { + "id": "chatcmpl-AfmsPYkaH9uHogKZusAaVPC3zSNys", + "object": "chat.completion.chunk", + "created": 1734522693, + "model": "gpt-4o-2024-08-06", + "system_fingerprint": "fp_4e924a4b48", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "rimarily focusing on Java and JavaScript/Node.js environments, allowing developers to work in their " + }, + "finish_reason": "" + } + ] + } +} \ No newline at end of file diff --git a/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-initial.json b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-initial.json new file mode 100644 index 00000000..11430fbf --- /dev/null +++ b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-initial.json @@ -0,0 +1,28 @@ +{ + "request_id": "66172762-8c47-4438-89e7-2689be8f370b", + "module_results": { + "templating": [ + { + "role": "user", + "content": "Give me a short introduction of SAP Cloud SDK." + } + ] + }, + "orchestration_result": { + "id": "", + "object": "", + "created": 0, + "model": "", + "system_fingerprint": "", + "choices": [ + { + "index": 0, + "delta": { + "role": "", + "content": "" + }, + "finish_reason": "" + } + ] + } +} \ No newline at end of file diff --git a/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-token-usage-and-finish-reason.json b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-token-usage-and-finish-reason.json new file mode 100644 index 00000000..6da1a7ab --- /dev/null +++ b/test-util/data/orchestration/orchestration-chat-completion-stream-chunk-response-token-usage-and-finish-reason.json @@ -0,0 +1,49 @@ +{ + "request_id": "66172762-8c47-4438-89e7-2689be8f370b", + "module_results": { + "llm": { + "id": "chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp", + "object": "chat.completion.chunk", + "created": 1734524005, + "model": "gpt-4o-2024-08-06", + "system_fingerprint": "fp_4e924a4b48", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "rate with SAP's enterprise solutions." + }, + "finish_reason": "stop" + } + ], + "usage": { + "completion_tokens": 271, + "prompt_tokens": 17, + "total_tokens": 288 + } + } + }, + "orchestration_result": { + "id": "chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp", + "object": "chat.completion.chunk", + "created": 1734524005, + "model": "gpt-4o-2024-08-06", + "system_fingerprint": "fp_4e924a4b48", + "choices": [ + { + "index": 0, + "delta": { + "role": "assistant", + "content": "rate with SAP's enterprise solutions." + }, + "finish_reason": "stop" + } + ], + "usage": { + "completion_tokens": 271, + "prompt_tokens": 17, + "total_tokens": 288 + } + } +} \ No newline at end of file diff --git a/test-util/data/orchestration/orchestration-chat-completion-stream-chunks.txt b/test-util/data/orchestration/orchestration-chat-completion-stream-chunks.txt new file mode 100644 index 00000000..d139b84d --- /dev/null +++ b/test-util/data/orchestration/orchestration-chat-completion-stream-chunks.txt @@ -0,0 +1,53 @@ +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"templating":[{"role":"user","content":"Give me a short introduction of SAP Cloud SDK."}]},"orchestration_result":{"id":"","object":"","created":0,"model":"","system_fingerprint":"","choices":[{"index":0,"delta":{"role":"","content":""},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"The SAP Cloud SDK is a comprehensive development toolkit designed to simplify and accelerate the cre"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"The SAP Cloud SDK is a comprehensive development toolkit designed to simplify and accelerate the cre"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"ation of applications that integrate with SAP solutions, particularly those built on the SAP Busines"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"ation of applications that integrate with SAP solutions, particularly those built on the SAP Busines"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"s Technology Platform (BTP). It provides developers with libraries, tools, and best practices that s"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"s Technology Platform (BTP). It provides developers with libraries, tools, and best practices that s"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"treamline the process of connecting to SAP systems, such as S/4HANA and other services available on "},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"treamline the process of connecting to SAP systems, such as S/4HANA and other services available on "},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"the SAP Cloud Platform.\n\nKey features of the SAP Cloud SDK include:\n\n1. **Simplified Connectivity**:"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"the SAP Cloud Platform.\n\nKey features of the SAP Cloud SDK include:\n\n1. **Simplified Connectivity**:"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" The SDK offers pre-built libraries to easily interact with SAP services, providing capabilities for"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" The SDK offers pre-built libraries to easily interact with SAP services, providing capabilities for"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" authentication, service consumption, and OData/REST client generation.\n\n2. **Multi-cloud Support**:"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" authentication, service consumption, and OData/REST client generation.\n\n2. **Multi-cloud Support**:"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" It supports multiple cloud environments, ensuring that applications remain flexible and can be depl"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" It supports multiple cloud environments, ensuring that applications remain flexible and can be depl"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"oyed across various cloud providers.\n\n3. **Best Practices and Guidelines**: The SDK includes best pr"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"oyed across various cloud providers.\n\n3. **Best Practices and Guidelines**: The SDK includes best pr"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"actices for development, ensuring high-quality, scalable, and maintainable code.\n\n4. **Project Scaff"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"actices for development, ensuring high-quality, scalable, and maintainable code.\n\n4. **Project Scaff"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"olding and Code Samples**: Developers can quickly start their projects using provided templates and "},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"olding and Code Samples**: Developers can quickly start their projects using provided templates and "},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"samples, accelerating the development process and reducing the learning curve.\n\n5. **Extensive Docum"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"samples, accelerating the development process and reducing the learning curve.\n\n5. **Extensive Docum"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"entation and Community Support**: Ample documentation, tutorials, and an active community help devel"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"entation and Community Support**: Ample documentation, tutorials, and an active community help devel"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"opers overcome challenges and adopt the SDK efficiently.\n\nOverall, the SAP Cloud SDK is an essential"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"opers overcome challenges and adopt the SDK efficiently.\n\nOverall, the SAP Cloud SDK is an essential"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" tool for developers looking to build cloud-native applications and extensions that seamlessly integ"},"finish_reason":""}]}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":" tool for developers looking to build cloud-native applications and extensions that seamlessly integ"},"finish_reason":""}]}} + + +data: {"request_id":"66172762-8c47-4438-89e7-2689be8f370b","module_results":{"llm":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"rate with SAP's enterprise solutions."},"finish_reason":"stop"}],"usage":{"completion_tokens":271,"prompt_tokens":17,"total_tokens":288}}},"orchestration_result":{"id":"chatcmpl-AfnDZfYvuE4SDplaLGF9v0PJjB0wp","object":"chat.completion.chunk","created":1734524005,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_4e924a4b48","choices":[{"index":0,"delta":{"role":"assistant","content":"rate with SAP's enterprise solutions."},"finish_reason":"stop"}],"usage":{"completion_tokens":271,"prompt_tokens":17,"total_tokens":288}}} + + +data: [DONE] + diff --git a/test-util/data/orchestration/orchestration-chat-completion-success-response.json b/test-util/data/orchestration/orchestration-chat-completion-success-response.json index 884a19f5..ca4709b5 100644 --- a/test-util/data/orchestration/orchestration-chat-completion-success-response.json +++ b/test-util/data/orchestration/orchestration-chat-completion-success-response.json @@ -1,53 +1,53 @@ { - "request_id": "request-id", - "module_results": { - "templating": [ - { - "role": "user", - "content": "Hello!" - } - ], - "llm": { - "id": "llm-id", - "object": "chat.completion", - "created": 172, - "model": "gpt-35-turbo", - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": "Hello! How can I assist you today?" - }, - "finish_reason": "stop" + "request_id": "request-id", + "module_results": { + "templating": [ + { + "role": "user", + "content": "Hello!" + } + ], + "llm": { + "id": "llm-id", + "object": "chat.completion", + "created": 172, + "model": "gpt-35-turbo", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello! How can I assist you today?" + }, + "finish_reason": "stop" + } + ], + "usage": { + "completion_tokens": 9, + "prompt_tokens": 9, + "total_tokens": 18 + } + } + }, + "orchestration_result": { + "id": "orchestration-id", + "object": "chat.completion", + "created": 172, + "model": "gpt-35-turbo", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Hello! How can I assist you today?" + }, + "finish_reason": "stop" + } + ], + "usage": { + "completion_tokens": 9, + "prompt_tokens": 9, + "total_tokens": 18 } - ], - "usage": { - "completion_tokens": 9, - "prompt_tokens": 9, - "total_tokens": 18 - } - } - }, - "orchestration_result": { - "id": "orchestration-id", - "object": "chat.completion", - "created": 172, - "model": "gpt-35-turbo", - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": "Hello! How can I assist you today?" - }, - "finish_reason": "stop" - } - ], - "usage": { - "completion_tokens": 9, - "prompt_tokens": 9, - "total_tokens": 18 } - } -} +} \ No newline at end of file diff --git a/tests/e2e-tests/src/orchestration.test.ts b/tests/e2e-tests/src/orchestration.test.ts index 3027cf75..4cddd28f 100644 --- a/tests/e2e-tests/src/orchestration.test.ts +++ b/tests/e2e-tests/src/orchestration.test.ts @@ -5,7 +5,9 @@ import { orchestrationOutputFiltering, orchestrationRequestConfig, orchestrationCompletionMasking, - orchestrationChatCompletionImage + orchestrationChatCompletionImage, + chatCompletionStreamWithJsonModuleConfig, + chatCompletionStream } from '@sap-ai-sdk/sample-code'; import { loadEnv } from './utils/load-env.js'; import type { OrchestrationResponse } from '@sap-ai-sdk/orchestration'; @@ -62,4 +64,26 @@ describe('orchestration', () => { expect(response.getContent()?.includes('SAP')).toBe(true); expect(response.getContent()?.includes('logo')).toBe(true); }); + + it('should return stream of orchestration responses', async () => { + const response = await chatCompletionStream(new AbortController()); + + for await (const chunk of response.stream) { + expect(chunk).toBeDefined(); + } + expect(response.getFinishReason()).toEqual('stop'); + expect(response.getTokenUsage()).toBeDefined(); + }); + + it('should return stream of orchestration responses, using a JSON client', async () => { + const response = await chatCompletionStreamWithJsonModuleConfig( + new AbortController() + ); + + for await (const chunk of response.stream) { + expect(chunk).toBeDefined(); + } + expect(response.getFinishReason()).toEqual('stop'); + expect(response.getTokenUsage()).toBeDefined(); + }); }); diff --git a/tests/type-tests/test/azure-openai.test-d.ts b/tests/type-tests/test/azure-openai.test-d.ts index a7f93abe..f80a2b0d 100644 --- a/tests/type-tests/test/azure-openai.test-d.ts +++ b/tests/type-tests/test/azure-openai.test-d.ts @@ -1,12 +1,14 @@ import { expectType } from 'tsd'; import { - type AzureOpenAiChatModel, - AzureOpenAiEmbeddingResponse, AzureOpenAiChatClient, - AzureOpenAiEmbeddingClient, + AzureOpenAiEmbeddingClient +} from '@sap-ai-sdk/foundation-models'; +import type { + AzureOpenAiEmbeddingResponse, AzureOpenAiChatCompletionResponse, AzureOpenAiCreateChatCompletionResponse, AzureOpenAiCompletionUsage, + AzureOpenAiChatModel, AzureOpenAiChatCompletionStreamResponse, AzureOpenAiChatCompletionStreamChunkResponse, AzureOpenAiChatCompletionStream diff --git a/tests/type-tests/test/orchestration.test-d.ts b/tests/type-tests/test/orchestration.test-d.ts index 180afade..d3accedf 100644 --- a/tests/type-tests/test/orchestration.test-d.ts +++ b/tests/type-tests/test/orchestration.test-d.ts @@ -1,12 +1,14 @@ import { expectError, expectType, expectAssignable } from 'tsd'; import { OrchestrationClient, + buildDocumentGroundingConfig +} from '@sap-ai-sdk/orchestration'; +import type { CompletionPostResponse, OrchestrationResponse, TokenUsage, ChatModel, GroundingModuleConfig, - buildDocumentGroundingConfig, LlmModelParams } from '@sap-ai-sdk/orchestration'; @@ -165,22 +167,20 @@ expectType>( * Chat Completion with JSON configuration. */ expectType>( - new OrchestrationClient( - `{ - "module_configurations": { - "llm_module_config": { - "model_name": "gpt-35-turbo-16k", - "model_params": { - "max_tokens": 50, - "temperature": 0.1 - } - }, - "templating_module_config": { - "template": [{ "role": "user", "content": "Hello!" }] + new OrchestrationClient(`{ + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-35-turbo-16k", + "model_params": { + "max_tokens": 50, + "temperature": 0.1 } + }, + "templating_module_config": { + "template": [{ "role": "user", "content": "Hello!" }] } - }` - ).chatCompletion() + } + }`).chatCompletion() ); /** @@ -245,7 +245,7 @@ expect('custom-model'); expect('gemini-1.0-pro'); /** - * Grounding util + * Grounding util. */ expectType( buildDocumentGroundingConfig({