Skip to content

Commit 045845f

Browse files
authored
Fix local response bug and update Readme (#19)
* Fix ResponseUtil and bump SDK version - Fix ResponseUtil - bump SDK version * Delete invalid tests due to codegen issue * Update README.md * Update README.md
1 parent 9e8a665 commit 045845f

File tree

6 files changed

+57
-156
lines changed

6 files changed

+57
-156
lines changed

README.md

Lines changed: 41 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Features:
88
- Remote Inferencing: Perform inferencing tasks remotely with Llama models hosted on a remote connection (or serverless localhost).
99
- Simple Integration: With easy-to-use APIs, a developer can quickly integrate Llama Stack in their Android app. The difference with local vs remote inferencing is also minimal.
1010

11-
Latest Release Notes: [v0.0.58](https://github.com/meta-llama/llama-stack-client-kotlin/releases/tag/v0.0.58)
11+
Latest Release Notes: [v0.1.0](https://github.com/meta-llama/llama-stack-client-kotlin/releases/tag/v0.1.0)
1212

1313
*Tagged releases are stable versions of the project. While we strive to maintain a stable main branch, it's not guaranteed to be free of bugs or issues.*
1414

@@ -24,7 +24,7 @@ The key files in the app are `ExampleLlamaStackLocalInference.kt`, `ExampleLlama
2424
Add the following dependency in your `build.gradle.kts` file:
2525
```
2626
dependencies {
27-
implementation("com.llama.llamastack:llama-stack-client-kotlin:0.0.58")
27+
implementation("com.llama.llamastack:llama-stack-client-kotlin:0.1.0")
2828
}
2929
```
3030
This will download jar files in your gradle cache in a directory like `~/.gradle/caches/modules-2/files-2.1/com.llama.llamastack/`
@@ -60,7 +60,7 @@ Start a Llama Stack server on localhost. Here is an example of how you can do th
6060
```
6161
conda create -n stack-fireworks python=3.10
6262
conda activate stack-fireworks
63-
pip install llama-stack=0.0.58
63+
pip install llama-stack=0.1.0
6464
llama stack build --template fireworks --image-type conda
6565
export FIREWORKS_API_KEY=<SOME_KEY>
6666
llama stack run /Users/<your_username>/.llama/distributions/llamastack-fireworks/fireworks-run.yaml --port=5050
@@ -98,7 +98,8 @@ client = LlamaStackClientLocalClient
9898
// remoteURL is a string like "http://localhost:5050"
9999
client = LlamaStackClientOkHttpClient
100100
.builder()
101-
.baseUrl(remoteURL)
101+
.baseUrl(remoteURL)
102+
.headers(mapOf("x-llamastack-client-version" to listOf("0.1.0")))
102103
.build()
103104
```
104105
</td>
@@ -114,22 +115,18 @@ Create the agent configuration:
114115
val agentConfig =
115116
AgentConfig.builder()
116117
.enableSessionPersistence(false)
117-
.instructions("You are a helpful assistant")
118+
.instructions("You're a helpful assistant")
118119
.maxInferIters(100)
119-
.model("meta-llama/Llama-3.2-3B-Instruct")
120+
.model("meta-llama/Llama-3.1-8B-Instruct")
120121
.samplingParams(
121122
SamplingParams.builder()
122123
.strategy(
123-
SamplingParams.Strategy.ofGreedySamplingStrategy(
124-
SamplingParams.Strategy.GreedySamplingStrategy.builder()
125-
.type(SamplingParams.Strategy.GreedySamplingStrategy.Type.GREEDY)
126-
.build()
127-
)
124+
SamplingParams.Strategy.ofGreedySampling()
128125
)
129126
.build()
130127
)
131128
.toolChoice(AgentConfig.ToolChoice.AUTO)
132-
.toolPromptFormat(AgentConfig.ToolPromptFormat.PYTHON_LIST)
129+
.toolPromptFormat(AgentConfig.ToolPromptFormat.JSON)
133130
.clientTools(
134131
listOf(
135132
CustomTools.getCreateCalendarEventTool() #Custom local tools
@@ -140,7 +137,7 @@ Create the agent configuration:
140137

141138
Create the agent:
142139
```
143-
val agentService = client!!.agents() #LlamaStackClientLocalClient
140+
val agentService = client!!.agents()
144141
val agentCreateResponse = agentService.create(
145142
AgentCreateParams.builder()
146143
.agentConfig(agentConfig)
@@ -170,10 +167,9 @@ Create a turn:
170167
.agentId(agentId)
171168
.messages(
172169
listOf(
173-
AgentTurnCreateParams.Message.ofUserMessage(
170+
AgentTurnCreateParams.Message.ofUser(
174171
UserMessage.builder()
175172
.content(InterleavedContent.ofString("What is the capital of France?"))
176-
.role(UserMessage.Role.USER)
177173
.build()
178174
)
179175
)
@@ -185,30 +181,32 @@ Create a turn:
185181
Handle the stream chunk callback:
186182
```
187183
agentTurnCreateResponseStream.use {
188-
agentTurnCreateResponseStream.asSequence().forEach {
189-
val agentResponsePayload = it.agentTurnResponseStreamChunk()?.event()?.payload()
190-
if (agentResponsePayload != null) {
191-
when {
192-
agentResponsePayload.isTurnStart() -> {
193-
// Handle Turn Start Payload
194-
}
195-
agentResponsePayload.isStepStart() -> {
196-
// Handle Step Start Payload
197-
}
198-
agentResponsePayload.isStepProgress() -> {
199-
// Handle Step Progress Payload
200-
}
201-
agentResponsePayload.isStepComplete() -> {
202-
// Handle Step Complete Payload
203-
}
204-
agentResponsePayload.isTurnComplete() -> {
205-
// Handle Turn Complete Payload
206-
}
207-
}
184+
agentTurnCreateResponseStream.asSequence().forEach {
185+
val agentResponsePayload = it.responseStreamChunk()?.event()?.payload()
186+
if (agentResponsePayload != null) {
187+
when {
188+
agentResponsePayload.isAgentTurnResponseTurnStart() -> {
189+
// Handle Turn Start Payload
190+
}
191+
agentResponsePayload.isAgentTurnResponseStepStart() -> {
192+
// Handle Step Start Payload
193+
}
194+
agentResponsePayload.isAgentTurnResponseStepProgress() -> {
195+
// Handle Step Progress Payload
196+
}
197+
agentResponsePayload.isAgentTurnResponseStepComplete() -> {
198+
// Handle Step Complete Payload
199+
}
200+
agentResponsePayload.isAgentTurnResponseTurnComplete() -> {
201+
// Handle Turn Complete Payload
208202
}
203+
}
204+
}
205+
}
206+
}
209207
```
210208

211-
More examples can be found in our demo app (TO-ADD Agent section)
209+
More examples can be found in our [demo app](https://github.com/meta-llama/llama-stack-apps/tree/main/examples/android_app)
212210

213211

214212
### Run Image Reasoning
@@ -223,21 +221,19 @@ Create an image inference with agent:
223221
.agentId(agentId)
224222
.messages(
225223
listOf(
226-
AgentTurnCreateParams.Message.ofUserMessage(
224+
AgentTurnCreateParams.Message.ofUser(
227225
UserMessage.builder()
228226
.content(InterleavedContent.ofString("What is in the image?"))
229-
.role(UserMessage.Role.USER)
230227
.build()
231228
),
232-
AgentTurnCreateParams.Message.ofUserMessage(
229+
AgentTurnCreateParams.Message.ofUser(
233230
UserMessage.builder()
234231
.content(InterleavedContent.ofImageContentItem(
235232
InterleavedContent.ImageContentItem.builder()
236-
.image(imageUrl)
237-
.type(InterleavedContent.ImageContentItem.Type.IMAGE)
233+
.image(image)
234+
.type(JsonValue.from("image"))
238235
.build()
239236
))
240-
.role(UserMessage.Role.USER)
241237
.build()
242238
)
243239
)
@@ -247,7 +243,7 @@ Create an image inference with agent:
247243
)
248244
```
249245

250-
Note that image captured on device needs to be encoded with Base64 before sending it to the model. Check out our demo app example here (TO-ADD Image Reasoning section)
246+
Note that image captured on device needs to be encoded with Base64 before sending it to the model. Check out our demo app example [here](https://github.com/meta-llama/llama-stack-apps/tree/main/examples/android_app)
251247

252248

253249
### Run Simple Inference
@@ -290,7 +286,7 @@ The purpose of this section is to share more details with users that would like
290286
### Prerequisite
291287

292288
You must complete the following steps:
293-
1. Clone the repo (`git clone https://github.com/meta-llama/llama-stack-client-kotlin.git -b release/0.0.58`)
289+
1. Clone the repo (`git clone https://github.com/meta-llama/llama-stack-client-kotlin.git -b release/0.1.0`)
294290
2. Port the appropriate ExecuTorch libraries over into your Llama Stack Kotlin library environment.
295291
```
296292
cd llama-stack-client-kotlin-client-local
@@ -396,9 +392,7 @@ If you encountered any bugs or issues following this guide please file a bug/iss
396392

397393
## Known Issues
398394
We're aware of the following issues and are working to resolve them:
399-
1. Streaming response is a work-in-progress for local and remote inference
400-
2. Due to #1, agents are not supported at the time. LS agents only work in streaming mode
401-
3. Changing to another model is a work in progress for local and remote platforms
395+
- Because of the different model behavior when handling function calls and special tags such as "ipython", Llama Stack currently returning streaming events payload for Llama 3.2 1B/3B models as textDelta object rather than toolCallDelta object when making a tool call. At the the StepComplete, the Llama Stack will still return the entire toolCall detail.
402396

403397
## Thanks
404398
We'd like to extend our thanks to the ExecuTorch team for providing their support as we integrated ExecuTorch as one of the local inference distributors for Llama Stack. Checkout [ExecuTorch Github repo](https://github.com/pytorch/executorch/tree/main) for more information.

build.gradle.kts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@ plugins {
44

55
allprojects {
66
group = "com.llama.llamastack"
7-
version = "0.1.0.rc14.manual-patch"
7+
version = "0.1.0"
88
}

llama-stack-client-kotlin-client-local/src/main/kotlin/com/llama/llamastack/client/local/util/ResponseUtil.kt

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,12 @@ fun buildInferenceChatCompletionResponse(
2020
CompletionMessage.builder()
2121
.toolCalls(createCustomToolCalls(response))
2222
.content(InterleavedContent.ofString(""))
23-
// .role(CompletionMessage.Role.ASSISTANT)
2423
.stopReason(mapStopTokenToReason(stopToken))
2524
.build()
2625
} else {
2726
CompletionMessage.builder()
2827
.toolCalls(listOf())
2928
.content(InterleavedContent.ofString(response))
30-
// .role(CompletionMessage.Role.ASSISTANT)
3129
.stopReason(mapStopTokenToReason(stopToken))
3230
.build()
3331
}
@@ -89,8 +87,21 @@ fun buildInferenceChatCompletionResponseForCustomToolCallStream(
8987
stopToken: String,
9088
stats: Float
9189
): InferenceChatCompletionResponse {
92-
// Convert ToolCall to ToolCallDelta
93-
val delta = ContentDelta.ToolCallDelta.builder().toolCall(toolCall.toString()).build()
90+
val delta =
91+
ContentDelta.ToolCallDelta.builder()
92+
.parseStatus(ContentDelta.ToolCallDelta.ParseStatus.SUCCEEDED)
93+
.toolCall(
94+
ContentDelta.ToolCallDelta.ToolCall.InnerToolCall.builder()
95+
.toolName(toolCall.toolName().toString())
96+
.arguments(
97+
ContentDelta.ToolCallDelta.ToolCall.InnerToolCall.Arguments.builder()
98+
.additionalProperties(toolCall.arguments()._additionalProperties())
99+
.build()
100+
)
101+
.callId(toolCall.callId())
102+
.build()
103+
)
104+
.build()
94105
return InferenceChatCompletionResponse.ofChatCompletionResponseStreamChunk(
95106
InferenceChatCompletionResponse.ChatCompletionResponseStreamChunk.builder()
96107
.event(

llama-stack-client-kotlin-core/src/test/kotlin/com/llama/llamastack/models/TelemetryQuerySpansParamsTest.kt

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -23,59 +23,4 @@ class TelemetryQuerySpansParamsTest {
2323
.build()
2424
}
2525

26-
// @Test
27-
// fun getQueryParams() {
28-
// val params =
29-
// TelemetryQuerySpansParams.builder()
30-
// .addAttributeFilter(
31-
// QueryCondition.builder()
32-
// .key("key")
33-
// .op(QueryCondition.Op.EQ)
34-
// .value(QueryCondition.Value.ofBoolean(true))
35-
// .build()
36-
// )
37-
// .addAttributesToReturn("string")
38-
// .maxDepth(0L)
39-
// .xLlamaStackClientVersion("X-LlamaStack-Client-Version")
40-
// .xLlamaStackProviderData("X-LlamaStack-Provider-Data")
41-
// .build()
42-
// val expected = QueryParams.builder()
43-
// expected.put(
44-
// "attribute_filters",
45-
// QueryCondition.builder()
46-
// .key("key")
47-
// .op(QueryCondition.Op.EQ.toString())
48-
// .value(QueryCondition.Value.ofBoolean("true").toString())
49-
// .build()
50-
// )
51-
// expected.put("attributes_to_return", "string")
52-
// expected.put("max_depth", "0")
53-
// assertThat(params.getQueryParams()).isEqualTo(expected.build())
54-
// }
55-
56-
// @Test
57-
// fun getQueryParamsWithoutOptionalFields() {
58-
// val params =
59-
// TelemetryQuerySpansParams.builder()
60-
// .addAttributeFilter(
61-
// QueryCondition.builder()
62-
// .key("key")
63-
// .op(QueryCondition.Op.EQ)
64-
// .value(QueryCondition.Value.ofBoolean(true))
65-
// .build()
66-
// )
67-
// .addAttributesToReturn("string")
68-
// .build()
69-
// val expected = QueryParams.builder()
70-
// expected.put(
71-
// "attribute_filters",
72-
// QueryCondition.builder()
73-
// .key("key")
74-
// .op(QueryCondition.Op.EQ.toString())
75-
// .value(QueryCondition.Value.ofBoolean("true").toString())
76-
// .build()
77-
// )
78-
// expected.put("attributes_to_return", "string")
79-
// assertThat(params.getQueryParams()).isEqualTo(expected.build())
80-
// }
8126
}

llama-stack-client-kotlin-core/src/test/kotlin/com/llama/llamastack/models/TelemetryQueryTracesParamsTest.kt

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -26,38 +26,6 @@ class TelemetryQueryTracesParamsTest {
2626
.build()
2727
}
2828

29-
// @Test
30-
// fun getQueryParams() {
31-
// val params =
32-
// TelemetryQueryTracesParams.builder()
33-
// .addAttributeFilter(
34-
// QueryCondition.builder()
35-
// .key("key")
36-
// .op(QueryCondition.Op.EQ)
37-
// .value(QueryCondition.Value.ofBoolean(true))
38-
// .build()
39-
// )
40-
// .limit(0L)
41-
// .offset(0L)
42-
// .addOrderBy("string")
43-
// .xLlamaStackClientVersion("X-LlamaStack-Client-Version")
44-
// .xLlamaStackProviderData("X-LlamaStack-Provider-Data")
45-
// .build()
46-
// val expected = QueryParams.builder()
47-
// expected.put(
48-
// "attribute_filters",
49-
// QueryCondition.builder()
50-
// .key("key")
51-
// .op(QueryCondition.Op.EQ.toString())
52-
// .value(QueryCondition.Value.ofBoolean("true").toString())
53-
// .build()
54-
// )
55-
// expected.put("limit", "0")
56-
// expected.put("offset", "0")
57-
// expected.put("order_by", "string")
58-
// assertThat(params.getQueryParams()).isEqualTo(expected.build())
59-
// }
60-
6129
@Test
6230
fun getQueryParamsWithoutOptionalFields() {
6331
val params = TelemetryQueryTracesParams.builder().build()

llama-stack-client-kotlin-core/src/test/kotlin/com/llama/llamastack/models/ToolRuntimeListToolsParamsTest.kt

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,23 +18,6 @@ class ToolRuntimeListToolsParamsTest {
1818
.build()
1919
}
2020

21-
// @Test
22-
// fun getQueryParams() {
23-
// val params =
24-
// ToolRuntimeListToolsParams.builder()
25-
// .mcpEndpoint(Url.builder().uri("uri").build())
26-
// .toolGroupId("tool_group_id")
27-
// .xLlamaStackClientVersion("X-LlamaStack-Client-Version")
28-
// .xLlamaStackProviderData("X-LlamaStack-Provider-Data")
29-
// .build()
30-
// val expected = QueryParams.builder()
31-
// Url.builder().uri("uri").build().forEachQueryParam { key, values ->
32-
// expected.put("mcp_endpoint[$key]", values)
33-
// }
34-
// expected.put("tool_group_id", "tool_group_id")
35-
// assertThat(params.getQueryParams()).isEqualTo(expected.build())
36-
// }
37-
3821
@Test
3922
fun getQueryParamsWithoutOptionalFields() {
4023
val params = ToolRuntimeListToolsParams.builder().build()

0 commit comments

Comments
 (0)