Fix local response bug and update Readme (#19)

Riandy · web-flow · commit 045845f09645 · 2025-01-27T16:29:12.000-08:00
* Fix ResponseUtil and bump SDK version

- Fix ResponseUtil
- bump SDK version

* Delete invalid tests due to codegen issue

* Update README.md

* Update README.md
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@ Features:
 - Remote Inferencing: Perform inferencing tasks remotely with Llama models hosted on a remote connection (or serverless localhost).
 - Simple Integration: With easy-to-use APIs, a developer can quickly integrate Llama Stack in their Android app. The difference with local vs remote inferencing is also minimal.
 
-Latest Release Notes: [v0.0.58](https://github.com/meta-llama/llama-stack-client-kotlin/releases/tag/v0.0.58) 
+Latest Release Notes: [v0.1.0](https://github.com/meta-llama/llama-stack-client-kotlin/releases/tag/v0.1.0) 
 
 *Tagged releases are stable versions of the project. While we strive to maintain a stable main branch, it's not guaranteed to be free of bugs or issues.*
 
@@ -24,7 +24,7 @@ The key files in the app are `ExampleLlamaStackLocalInference.kt`, `ExampleLlama
 Add the following dependency in your `build.gradle.kts` file:
 ```
 dependencies {
- implementation("com.llama.llamastack:llama-stack-client-kotlin:0.0.58")
+ implementation("com.llama.llamastack:llama-stack-client-kotlin:0.1.0")
 }
 ```
 This will download jar files in your gradle cache in a directory like `~/.gradle/caches/modules-2/files-2.1/com.llama.llamastack/` 
@@ -60,7 +60,7 @@ Start a Llama Stack server on localhost. Here is an example of how you can do th
 ```
 conda create -n stack-fireworks python=3.10 
 conda activate stack-fireworks
-pip install llama-stack=0.0.58
+pip install llama-stack=0.1.0
 llama stack build --template fireworks --image-type conda
 export FIREWORKS_API_KEY=<SOME_KEY>
 llama stack run /Users/<your_username>/.llama/distributions/llamastack-fireworks/fireworks-run.yaml --port=5050
@@ -98,7 +98,8 @@ client = LlamaStackClientLocalClient
 // remoteURL is a string like "http://localhost:5050"
 client = LlamaStackClientOkHttpClient
                 .builder()
-                .baseUrl(remoteURL) 
+                .baseUrl(remoteURL)
+                .headers(mapOf("x-llamastack-client-version" to listOf("0.1.0")))
                 .build()
 ```
 </td>
@@ -114,22 +115,18 @@ Create the agent configuration:
         val agentConfig =
             AgentConfig.builder()
                 .enableSessionPersistence(false)
-                .instructions("You are a helpful assistant")
+                .instructions("You're a helpful assistant")
                 .maxInferIters(100)
-                .model("meta-llama/Llama-3.2-3B-Instruct")
+                .model("meta-llama/Llama-3.1-8B-Instruct")
                 .samplingParams(
                     SamplingParams.builder()
                         .strategy(
-                            SamplingParams.Strategy.ofGreedySamplingStrategy(
-                                SamplingParams.Strategy.GreedySamplingStrategy.builder()
-                                    .type(SamplingParams.Strategy.GreedySamplingStrategy.Type.GREEDY)
-                                    .build()
-                            )
+                            SamplingParams.Strategy.ofGreedySampling()
                         )
                         .build()
                 )
                 .toolChoice(AgentConfig.ToolChoice.AUTO)
-                .toolPromptFormat(AgentConfig.ToolPromptFormat.PYTHON_LIST)
+                .toolPromptFormat(AgentConfig.ToolPromptFormat.JSON)
                 .clientTools(
                     listOf(
                         CustomTools.getCreateCalendarEventTool() #Custom local tools
@@ -140,7 +137,7 @@ Create the agent configuration:
 
 Create the agent:
 ```
-        val agentService = client!!.agents() #LlamaStackClientLocalClient
+        val agentService = client!!.agents()
         val agentCreateResponse = agentService.create(
             AgentCreateParams.builder()
                 .agentConfig(agentConfig)
@@ -170,10 +167,9 @@ Create a turn:
                 .agentId(agentId)
                 .messages(
                     listOf(
-                        AgentTurnCreateParams.Message.ofUserMessage(
+                        AgentTurnCreateParams.Message.ofUser(
                             UserMessage.builder()
                                 .content(InterleavedContent.ofString("What is the capital of France?"))
-                                .role(UserMessage.Role.USER)
                                 .build()
                             )
                     )
@@ -185,30 +181,32 @@ Create a turn:
 Handle the stream chunk callback:
 ```
         agentTurnCreateResponseStream.use {
-                    agentTurnCreateResponseStream.asSequence().forEach {
-                        val agentResponsePayload = it.agentTurnResponseStreamChunk()?.event()?.payload()
-                        if (agentResponsePayload != null) {
-                            when {
-                                agentResponsePayload.isTurnStart() -> {
-                                    // Handle Turn Start Payload
-                                }
-                                agentResponsePayload.isStepStart() -> {
-                                    // Handle Step Start Payload
-                                }
-                                agentResponsePayload.isStepProgress() -> {
-                                    // Handle Step Progress Payload
-                                }
-                                agentResponsePayload.isStepComplete() -> {
-                                    // Handle Step Complete Payload
-                                }
-                                agentResponsePayload.isTurnComplete() -> {
-                                    // Handle Turn Complete Payload
-                                }
-                            }
+            agentTurnCreateResponseStream.asSequence().forEach {
+                val agentResponsePayload = it.responseStreamChunk()?.event()?.payload()
+                if (agentResponsePayload != null) {
+                    when {
+                        agentResponsePayload.isAgentTurnResponseTurnStart() -> {
+                            // Handle Turn Start Payload
+                        }
+                        agentResponsePayload.isAgentTurnResponseStepStart() -> {
+                            // Handle Step Start Payload
+                        }
+                        agentResponsePayload.isAgentTurnResponseStepProgress() -> {
+                            // Handle Step Progress Payload
+                        }
+                        agentResponsePayload.isAgentTurnResponseStepComplete() -> {
+                            // Handle Step Complete Payload
+                        }
+                        agentResponsePayload.isAgentTurnResponseTurnComplete() -> {
+                            // Handle Turn Complete Payload
                         }
+                    }
+                }
+            }
+        }
 ```
 
-More examples can be found in our demo app (TO-ADD Agent section)  
+More examples can be found in our [demo app](https://github.com/meta-llama/llama-stack-apps/tree/main/examples/android_app) 
 
 
 ### Run Image Reasoning
@@ -223,21 +221,19 @@ Create an image inference with agent:
                     .agentId(agentId)
                     .messages(
                         listOf(
-                            AgentTurnCreateParams.Message.ofUserMessage(
+                            AgentTurnCreateParams.Message.ofUser(
                                 UserMessage.builder()
                                     .content(InterleavedContent.ofString("What is in the image?"))
-                                    .role(UserMessage.Role.USER)
                                     .build()
                             ),
-                            AgentTurnCreateParams.Message.ofUserMessage(
+                            AgentTurnCreateParams.Message.ofUser(
                                 UserMessage.builder()
                                     .content(InterleavedContent.ofImageContentItem(
                                         InterleavedContent.ImageContentItem.builder()
-                                            .image(imageUrl)
-                                            .type(InterleavedContent.ImageContentItem.Type.IMAGE)
+                                            .image(image)
+                                            .type(JsonValue.from("image"))
                                             .build()
                                     ))
-                                    .role(UserMessage.Role.USER)
                                     .build()
                             )
                          )
@@ -247,7 +243,7 @@ Create an image inference with agent:
             )
 ```
 
-Note that image captured on device needs to be encoded with Base64 before sending it to the model. Check out our demo app example here (TO-ADD Image Reasoning section)
+Note that image captured on device needs to be encoded with Base64 before sending it to the model. Check out our demo app example [here](https://github.com/meta-llama/llama-stack-apps/tree/main/examples/android_app)
 
 
 ### Run Simple Inference
@@ -290,7 +286,7 @@ The purpose of this section is to share more details with users that would like
 ### Prerequisite
 
 You must complete the following steps:
-1. Clone the repo (`git clone https://github.com/meta-llama/llama-stack-client-kotlin.git -b release/0.0.58`)
+1. Clone the repo (`git clone https://github.com/meta-llama/llama-stack-client-kotlin.git -b release/0.1.0`)
 2. Port the appropriate ExecuTorch libraries over into your Llama Stack Kotlin library environment.
 ```
 cd llama-stack-client-kotlin-client-local
@@ -396,9 +392,7 @@ If you encountered any bugs or issues following this guide please file a bug/iss
 
 ## Known Issues
 We're aware of the following issues and are working to resolve them:
-1. Streaming response is a work-in-progress for local and remote inference
-2. Due to #1, agents are not supported at the time. LS agents only work in streaming mode
-3. Changing to another model is a work in progress for local and remote platforms
+- Because of the different model behavior when handling function calls and special tags such as "ipython", Llama Stack currently returning streaming events payload for Llama 3.2 1B/3B models as textDelta object rather than toolCallDelta object when making a tool call. At the the StepComplete, the Llama Stack will still return the entire toolCall detail.
 
 ## Thanks
 We'd like to extend our thanks to the ExecuTorch team for providing their support as we integrated ExecuTorch as one of the local inference distributors for Llama Stack. Checkout [ExecuTorch Github repo](https://github.com/pytorch/executorch/tree/main) for more information.
diff --git a/build.gradle.kts b/build.gradle.kts
@@ -4,5 +4,5 @@ plugins {
 
 allprojects {
     group = "com.llama.llamastack"
-    version = "0.1.0.rc14.manual-patch"
+    version = "0.1.0"
 }
diff --git a/llama-stack-client-kotlin-client-local/src/main/kotlin/com/llama/llamastack/client/local/util/ResponseUtil.kt b/llama-stack-client-kotlin-client-local/src/main/kotlin/com/llama/llamastack/client/local/util/ResponseUtil.kt
@@ -20,14 +20,12 @@ fun buildInferenceChatCompletionResponse(
             CompletionMessage.builder()
                 .toolCalls(createCustomToolCalls(response))
                 .content(InterleavedContent.ofString(""))
-                //                .role(CompletionMessage.Role.ASSISTANT)
                 .stopReason(mapStopTokenToReason(stopToken))
                 .build()
         } else {
             CompletionMessage.builder()
                 .toolCalls(listOf())
                 .content(InterleavedContent.ofString(response))
-                //                .role(CompletionMessage.Role.ASSISTANT)
                 .stopReason(mapStopTokenToReason(stopToken))
                 .build()
         }
@@ -89,8 +87,21 @@ fun buildInferenceChatCompletionResponseForCustomToolCallStream(
     stopToken: String,
     stats: Float
 ): InferenceChatCompletionResponse {
-    // Convert ToolCall to ToolCallDelta
-    val delta = ContentDelta.ToolCallDelta.builder().toolCall(toolCall.toString()).build()
+    val delta =
+        ContentDelta.ToolCallDelta.builder()
+            .parseStatus(ContentDelta.ToolCallDelta.ParseStatus.SUCCEEDED)
+            .toolCall(
+                ContentDelta.ToolCallDelta.ToolCall.InnerToolCall.builder()
+                    .toolName(toolCall.toolName().toString())
+                    .arguments(
+                        ContentDelta.ToolCallDelta.ToolCall.InnerToolCall.Arguments.builder()
+                            .additionalProperties(toolCall.arguments()._additionalProperties())
+                            .build()
+                    )
+                    .callId(toolCall.callId())
+                    .build()
+            )
+            .build()
     return InferenceChatCompletionResponse.ofChatCompletionResponseStreamChunk(
         InferenceChatCompletionResponse.ChatCompletionResponseStreamChunk.builder()
             .event(
diff --git a/llama-stack-client-kotlin-core/src/test/kotlin/com/llama/llamastack/models/TelemetryQuerySpansParamsTest.kt b/llama-stack-client-kotlin-core/src/test/kotlin/com/llama/llamastack/models/TelemetryQuerySpansParamsTest.kt
@@ -23,59 +23,4 @@ class TelemetryQuerySpansParamsTest {
             .build()
     }
 
-    //    @Test
-    //    fun getQueryParams() {
-    //        val params =
-    //            TelemetryQuerySpansParams.builder()
-    //                .addAttributeFilter(
-    //                    QueryCondition.builder()
-    //                        .key("key")
-    //                        .op(QueryCondition.Op.EQ)
-    //                        .value(QueryCondition.Value.ofBoolean(true))
-    //                        .build()
-    //                )
-    //                .addAttributesToReturn("string")
-    //                .maxDepth(0L)
-    //                .xLlamaStackClientVersion("X-LlamaStack-Client-Version")
-    //                .xLlamaStackProviderData("X-LlamaStack-Provider-Data")
-    //                .build()
-    //        val expected = QueryParams.builder()
-    //        expected.put(
-    //            "attribute_filters",
-    //            QueryCondition.builder()
-    //                .key("key")
-    //                .op(QueryCondition.Op.EQ.toString())
-    //                .value(QueryCondition.Value.ofBoolean("true").toString())
-    //                .build()
-    //        )
-    //        expected.put("attributes_to_return", "string")
-    //        expected.put("max_depth", "0")
-    //        assertThat(params.getQueryParams()).isEqualTo(expected.build())
-    //    }
-
-    //    @Test
-    //    fun getQueryParamsWithoutOptionalFields() {
-    //        val params =
-    //            TelemetryQuerySpansParams.builder()
-    //                .addAttributeFilter(
-    //                    QueryCondition.builder()
-    //                        .key("key")
-    //                        .op(QueryCondition.Op.EQ)
-    //                        .value(QueryCondition.Value.ofBoolean(true))
-    //                        .build()
-    //                )
-    //                .addAttributesToReturn("string")
-    //                .build()
-    //        val expected = QueryParams.builder()
-    //        expected.put(
-    //            "attribute_filters",
-    //            QueryCondition.builder()
-    //                .key("key")
-    //                .op(QueryCondition.Op.EQ.toString())
-    //                .value(QueryCondition.Value.ofBoolean("true").toString())
-    //                .build()
-    //        )
-    //        expected.put("attributes_to_return", "string")
-    //        assertThat(params.getQueryParams()).isEqualTo(expected.build())
-    //    }
 }
diff --git a/llama-stack-client-kotlin-core/src/test/kotlin/com/llama/llamastack/models/TelemetryQueryTracesParamsTest.kt b/llama-stack-client-kotlin-core/src/test/kotlin/com/llama/llamastack/models/TelemetryQueryTracesParamsTest.kt
@@ -26,38 +26,6 @@ class TelemetryQueryTracesParamsTest {
             .build()
     }
 
-    //    @Test
-    //    fun getQueryParams() {
-    //        val params =
-    //            TelemetryQueryTracesParams.builder()
-    //                .addAttributeFilter(
-    //                    QueryCondition.builder()
-    //                        .key("key")
-    //                        .op(QueryCondition.Op.EQ)
-    //                        .value(QueryCondition.Value.ofBoolean(true))
-    //                        .build()
-    //                )
-    //                .limit(0L)
-    //                .offset(0L)
-    //                .addOrderBy("string")
-    //                .xLlamaStackClientVersion("X-LlamaStack-Client-Version")
-    //                .xLlamaStackProviderData("X-LlamaStack-Provider-Data")
-    //                .build()
-    //        val expected = QueryParams.builder()
-    //        expected.put(
-    //            "attribute_filters",
-    //            QueryCondition.builder()
-    //                .key("key")
-    //                .op(QueryCondition.Op.EQ.toString())
-    //                .value(QueryCondition.Value.ofBoolean("true").toString())
-    //                .build()
-    //        )
-    //        expected.put("limit", "0")
-    //        expected.put("offset", "0")
-    //        expected.put("order_by", "string")
-    //        assertThat(params.getQueryParams()).isEqualTo(expected.build())
-    //    }
-
     @Test
     fun getQueryParamsWithoutOptionalFields() {
         val params = TelemetryQueryTracesParams.builder().build()
diff --git a/llama-stack-client-kotlin-core/src/test/kotlin/com/llama/llamastack/models/ToolRuntimeListToolsParamsTest.kt b/llama-stack-client-kotlin-core/src/test/kotlin/com/llama/llamastack/models/ToolRuntimeListToolsParamsTest.kt
@@ -18,23 +18,6 @@ class ToolRuntimeListToolsParamsTest {
             .build()
     }
 
-    //    @Test
-    //    fun getQueryParams() {
-    //        val params =
-    //            ToolRuntimeListToolsParams.builder()
-    //                .mcpEndpoint(Url.builder().uri("uri").build())
-    //                .toolGroupId("tool_group_id")
-    //                .xLlamaStackClientVersion("X-LlamaStack-Client-Version")
-    //                .xLlamaStackProviderData("X-LlamaStack-Provider-Data")
-    //                .build()
-    //        val expected = QueryParams.builder()
-    //        Url.builder().uri("uri").build().forEachQueryParam { key, values ->
-    //            expected.put("mcp_endpoint[$key]", values)
-    //        }
-    //        expected.put("tool_group_id", "tool_group_id")
-    //        assertThat(params.getQueryParams()).isEqualTo(expected.build())
-    //    }
-
     @Test
     fun getQueryParamsWithoutOptionalFields() {
         val params = ToolRuntimeListToolsParams.builder().build()

Original file line number	Diff line number	Diff line change
`@@ -4,5 +4,5 @@ plugins {`
`4`	`4`
`5`	`5`	`allprojects {`
`6`	`6`	`group = "com.llama.llamastack"`
`7`		`- version = "0.1.0.rc14.manual-patch"`
	`7`	`+ version = "0.1.0"`
`8`	`8`	`}`