llamastack
diff --git a/‎README.md‎
Lines changed: 5 additions & 5 deletions b/‎README.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎build.gradle.kts‎
Lines changed: 1 addition & 1 deletion b/‎build.gradle.kts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎buildSrc/build.gradle.kts‎
Lines changed: 1 addition & 1 deletion b/‎buildSrc/build.gradle.kts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎buildSrc/src/main/kotlin/llama-stack-client.kotlin.gradle.kts‎
Lines changed: 16 additions & 6 deletions b/‎buildSrc/src/main/kotlin/llama-stack-client.kotlin.gradle.kts‎
Lines changed: 16 additions & 6 deletions
diff --git a/‎llama-stack-client-kotlin-client-local/src/main/kotlin/com/llama/llamastack/client/local/InferenceServiceLocalImpl.kt‎
Lines changed: 7 additions & 9 deletions b/‎llama-stack-client-kotlin-client-local/src/main/kotlin/com/llama/llamastack/client/local/InferenceServiceLocalImpl.kt‎
Lines changed: 7 additions & 9 deletions
diff --git a/‎llama-stack-client-kotlin-client-local/src/main/kotlin/com/llama/llamastack/client/local/LlamaStackClientClientLocalImpl.kt‎
Lines changed: 3 additions & 5 deletions b/‎llama-stack-client-kotlin-client-local/src/main/kotlin/com/llama/llamastack/client/local/LlamaStackClientClientLocalImpl.kt‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎llama-stack-client-kotlin-client-local/src/main/kotlin/com/llama/llamastack/client/local/LocalClientOptions.kt‎
Lines changed: 2 additions & 2 deletions b/‎llama-stack-client-kotlin-client-local/src/main/kotlin/com/llama/llamastack/client/local/LocalClientOptions.kt‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎llama-stack-client-kotlin-client-local/src/main/kotlin/com/llama/llamastack/client/local/util/ResponseUtil.kt‎
Lines changed: 5 additions & 5 deletions b/‎llama-stack-client-kotlin-client-local/src/main/kotlin/com/llama/llamastack/client/local/util/ResponseUtil.kt‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎llama-stack-client-kotlin-client-okhttp/src/main/kotlin/com/llama/llamastack/client/okhttp/OkHttpClient.kt‎
Lines changed: 4 additions & 7 deletions b/‎llama-stack-client-kotlin-client-okhttp/src/main/kotlin/com/llama/llamastack/client/okhttp/OkHttpClient.kt‎
Lines changed: 4 additions & 7 deletions
diff --git a/‎llama-stack-client-kotlin-core/src/main/kotlin/com/llama/llamastack/client/LlamaStackClientClient.kt‎
Lines changed: 2 additions & 2 deletions b/‎llama-stack-client-kotlin-core/src/main/kotlin/com/llama/llamastack/client/LlamaStackClientClient.kt‎
Lines changed: 2 additions & 2 deletions
@@ -8,7 +8,7 @@ Features:
 - Remote Inferencing: Perform inferencing tasks remotely with Llama models hosted on a remote connection (or serverless localhost).
 - Simple Integration: With easy-to-use APIs, a developer can quickly integrate Llama Stack in their Android app. The difference with local vs remote inferencing is also minimal.
 
-Latest Release Notes: [v0.1.2](https://github.com/meta-llama/llama-stack-client-kotlin/releases/tag/v0.1.2) 
+Latest Release Notes: [v0.1.4](https://github.com/meta-llama/llama-stack-client-kotlin/releases/tag/v0.1.4) 
 
 *Tagged releases are stable versions of the project. While we strive to maintain a stable main branch, it's not guaranteed to be free of bugs or issues.*
 
@@ -24,7 +24,7 @@ The key files in the app are `ExampleLlamaStackLocalInference.kt`, `ExampleLlama
 Add the following dependency in your `build.gradle.kts` file:
 ```
 dependencies {
- implementation("com.llama.llamastack:llama-stack-client-kotlin:0.1.2")
+ implementation("com.llama.llamastack:llama-stack-client-kotlin:0.1.4")
 }
 ```
 This will download jar files in your gradle cache in a directory like `~/.gradle/caches/modules-2/files-2.1/com.llama.llamastack/` 
@@ -60,7 +60,7 @@ Start a Llama Stack server on localhost. Here is an example of how you can do th
 ```
 conda create -n stack-fireworks python=3.10 
 conda activate stack-fireworks
-pip install llama-stack=0.1.2
+pip install llama-stack=0.1.4
 llama stack build --template fireworks --image-type conda
 export FIREWORKS_API_KEY=<SOME_KEY>
 llama stack run /Users/<your_username>/.llama/distributions/llamastack-fireworks/fireworks-run.yaml --port=5050
@@ -99,7 +99,7 @@ client = LlamaStackClientLocalClient
 client = LlamaStackClientOkHttpClient
                 .builder()
                 .baseUrl(remoteURL)
-                .headers(mapOf("x-llamastack-client-version" to listOf("0.1.2")))
+                .headers(mapOf("x-llamastack-client-version" to listOf("0.1.4")))
                 .build()
 ```
 </td>
@@ -286,7 +286,7 @@ The purpose of this section is to share more details with users that would like
 ### Prerequisite
 
 You must complete the following steps:
-1. Clone the repo (`git clone https://github.com/meta-llama/llama-stack-client-kotlin.git -b release/0.1.2`)
+1. Clone the repo (`git clone https://github.com/meta-llama/llama-stack-client-kotlin.git -b release/0.1.4`)
 2. Port the appropriate ExecuTorch libraries over into your Llama Stack Kotlin library environment.
 ```
 cd llama-stack-client-kotlin-client-local
 
@@ -4,5 +4,5 @@ plugins {
 
 allprojects {
     group = "com.llama.llamastack"
-    version = "0.1.2"
+    version = "0.1.4"
 }
@@ -10,7 +10,7 @@ repositories {
 }
 
 dependencies {
-    implementation("com.diffplug.spotless:spotless-plugin-gradle:6.25.0")
+    implementation("com.diffplug.spotless:spotless-plugin-gradle:7.0.2")
     implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:1.9.23")
     implementation("com.vanniktech:gradle-maven-publish-plugin:0.28.0")
 }
@@ -1,6 +1,6 @@
 import com.diffplug.gradle.spotless.SpotlessExtension
+import org.jetbrains.kotlin.gradle.dsl.JvmTarget
 import org.jetbrains.kotlin.gradle.tasks.KotlinCompile
-import com.vanniktech.maven.publish.*
 
 plugins {
     id("llama-stack-client.java")
@@ -21,9 +21,19 @@ configure<SpotlessExtension> {
 }
 
 tasks.withType<KotlinCompile>().configureEach {
-    kotlinOptions {
-        allWarningsAsErrors = true
-        freeCompilerArgs = listOf("-Xjvm-default=all", "-Xjdk-release=1.8")
-        jvmTarget = "1.8"
+    compilerOptions {
+        freeCompilerArgs = listOf(
+          "-Xjvm-default=all",
+          "-Xjdk-release=1.8",
+          // Suppress deprecation warnings because we may still reference and test deprecated members.
+          "-Xsuppress-warning=DEPRECATION"
+        )
+        jvmTarget.set(JvmTarget.JVM_1_8)
     }
-}
+}
+
+// Run tests in parallel to some degree.
+tasks.withType<Test>().configureEach {
+    maxParallelForks = (Runtime.getRuntime().availableProcessors() / 2).coerceAtLeast(1)
+    forkEvery = 100
+}
@@ -18,10 +18,8 @@ import com.llama.llamastack.models.InferenceEmbeddingsParams
 import com.llama.llamastack.services.blocking.InferenceService
 import org.pytorch.executorch.LlamaCallback
 
-class InferenceServiceLocalImpl
-constructor(
-    private val clientOptions: LocalClientOptions,
-) : InferenceService, LlamaCallback {
+class InferenceServiceLocalImpl constructor(private val clientOptions: LocalClientOptions) :
+    InferenceService, LlamaCallback {
 
     private var resultMessage: String = ""
     private var onResultComplete: Boolean = false
@@ -69,7 +67,7 @@ constructor(
 
     override fun chatCompletion(
         params: InferenceChatCompletionParams,
-        requestOptions: RequestOptions
+        requestOptions: RequestOptions,
     ): ChatCompletionResponse {
         isStreaming = false
         clearElements()
@@ -132,7 +130,7 @@ constructor(
 
     override fun chatCompletionStreaming(
         params: InferenceChatCompletionParams,
-        requestOptions: RequestOptions
+        requestOptions: RequestOptions,
     ): StreamResponse<ChatCompletionResponseStreamChunk> {
         isStreaming = true
         streamingResponseList.clear()
@@ -156,21 +154,21 @@ constructor(
 
     override fun completion(
         params: InferenceCompletionParams,
-        requestOptions: RequestOptions
+        requestOptions: RequestOptions,
     ): CompletionResponse {
         TODO("Not yet implemented")
     }
 
     override fun completionStreaming(
         params: InferenceCompletionParams,
-        requestOptions: RequestOptions
+        requestOptions: RequestOptions,
     ): StreamResponse<CompletionResponse> {
         TODO("Not yet implemented")
     }
 
     override fun embeddings(
         params: InferenceEmbeddingsParams,
-        requestOptions: RequestOptions
+        requestOptions: RequestOptions,
     ): EmbeddingsResponse {
         TODO("Not yet implemented")
     }
 
@@ -7,10 +7,8 @@ import com.llama.llamastack.client.LlamaStackClientClientAsync
 import com.llama.llamastack.models.*
 import com.llama.llamastack.services.blocking.*
 
-class LlamaStackClientClientLocalImpl
-constructor(
-    private val clientOptions: LocalClientOptions,
-) : LlamaStackClientClient {
+class LlamaStackClientClientLocalImpl constructor(private val clientOptions: LocalClientOptions) :
+    LlamaStackClientClient {
 
     private val inference: InferenceService by lazy { InferenceServiceLocalImpl(clientOptions) }
 
@@ -56,7 +54,7 @@ constructor(
         TODO("Not yet implemented")
     }
 
-    override fun evalTasks(): EvalTaskService {
+    override fun benchmarks(): BenchmarkService {
         TODO("Not yet implemented")
     }
 
 
@@ -10,7 +10,7 @@ private constructor(
     val modelPath: String,
     val tokenizerPath: String,
     val temperature: Float,
-    val llamaModule: LlamaModule
+    val llamaModule: LlamaModule,
 ) {
 
     companion object {
@@ -49,7 +49,7 @@ private constructor(
                     "ExecuTorch AAR file needs to be included in the libs/ for your app. " +
                         "Please see the README for more details: " +
                         "https://github.com/meta-llama/llama-stack-client-kotlin/tree/main",
-                    e
+                    e,
                 )
             }
         }
 
@@ -12,7 +12,7 @@ import java.util.UUID
 fun buildInferenceChatCompletionResponse(
     response: String,
     stats: Float,
-    stopToken: String
+    stopToken: String,
 ): ChatCompletionResponse {
     // check for prefix [ and suffix ] if so then tool call.
     // parse for "toolName", "additionalProperties"
@@ -41,7 +41,7 @@ fun buildInferenceChatCompletionResponse(
 }
 
 fun buildInferenceChatCompletionResponseFromStream(
-    response: String,
+    response: String
 ): ChatCompletionResponseStreamChunk {
     return ChatCompletionResponseStreamChunk.builder()
         .event(
@@ -66,7 +66,7 @@ fun buildLastInferenceChatCompletionResponsesFromStream(
                 buildInferenceChatCompletionResponseForCustomToolCallStream(
                     toolCall,
                     stopToken,
-                    stats
+                    stats,
                 )
             )
         }
@@ -79,7 +79,7 @@ fun buildLastInferenceChatCompletionResponsesFromStream(
 fun buildInferenceChatCompletionResponseForCustomToolCallStream(
     toolCall: ToolCall,
     stopToken: String,
-    stats: Float
+    stats: Float,
 ): ChatCompletionResponseStreamChunk {
     val delta =
         ContentDelta.ToolCallDelta.builder()
@@ -101,7 +101,7 @@ fun buildInferenceChatCompletionResponseForCustomToolCallStream(
 fun buildInferenceChatCompletionResponseForStringStream(
     str: String,
     stopToken: String,
-    stats: Float
+    stats: Float,
 ): ChatCompletionResponseStreamChunk {
 
     return ChatCompletionResponseStreamChunk.builder()
 
@@ -31,10 +31,7 @@ class OkHttpClient
 private constructor(private val okHttpClient: okhttp3.OkHttpClient, private val baseUrl: HttpUrl) :
     HttpClient {
 
-    override fun execute(
-        request: HttpRequest,
-        requestOptions: RequestOptions,
-    ): HttpResponse {
+    override fun execute(request: HttpRequest, requestOptions: RequestOptions): HttpResponse {
         val call = newCall(request, requestOptions)
 
         return try {
@@ -71,7 +68,7 @@ private constructor(private val okHttpClient: okhttp3.OkHttpClient, private val
         val clientBuilder = okHttpClient.newBuilder()
 
         val logLevel =
-            when (System.getenv("LLAMA_STACK_CLIENT_LOG")?.lowercase()) {
+            when (System.getenv("LLAMA_STACK_LOG")?.lowercase()) {
                 "info" -> HttpLoggingInterceptor.Level.BASIC
                 "debug" -> HttpLoggingInterceptor.Level.BODY
                 else -> null
@@ -128,13 +125,13 @@ private constructor(private val okHttpClient: okhttp3.OkHttpClient, private val
         ) {
             builder.header(
                 "X-Stainless-Read-Timeout",
-                Duration.ofMillis(client.readTimeoutMillis.toLong()).seconds.toString()
+                Duration.ofMillis(client.readTimeoutMillis.toLong()).seconds.toString(),
             )
         }
         if (!headers.names().contains("X-Stainless-Timeout") && client.callTimeoutMillis != 0) {
             builder.header(
                 "X-Stainless-Timeout",
-                Duration.ofMillis(client.callTimeoutMillis.toLong()).seconds.toString()
+                Duration.ofMillis(client.callTimeoutMillis.toLong()).seconds.toString(),
             )
         }
 
 
@@ -4,10 +4,10 @@ package com.llama.llamastack.client
 
 import com.llama.llamastack.services.blocking.AgentService
 import com.llama.llamastack.services.blocking.BatchInferenceService
+import com.llama.llamastack.services.blocking.BenchmarkService
 import com.llama.llamastack.services.blocking.DatasetService
 import com.llama.llamastack.services.blocking.DatasetioService
 import com.llama.llamastack.services.blocking.EvalService
-import com.llama.llamastack.services.blocking.EvalTaskService
 import com.llama.llamastack.services.blocking.InferenceService
 import com.llama.llamastack.services.blocking.InspectService
 import com.llama.llamastack.services.blocking.ModelService
@@ -94,7 +94,7 @@ interface LlamaStackClientClient {
 
     fun scoringFunctions(): ScoringFunctionService
 
-    fun evalTasks(): EvalTaskService
+    fun benchmarks(): BenchmarkService
 
     /**
      * Closes this client, relinquishing any underlying resources.
Original file line number	Diff line number	Diff line change
`@@ -4,5 +4,5 @@ plugins {`
`4`	`4`
`5`	`5`	`allprojects {`
`6`	`6`	`group = "com.llama.llamastack"`
`7`		`- version = "0.1.2"`
	`7`	`+ version = "0.1.4"`
`8`	`8`	`}`
Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@ repositories {`
`10`	`10`	`}`
`11`	`11`
`12`	`12`	`dependencies {`
`13`		`- implementation("com.diffplug.spotless:spotless-plugin-gradle:6.25.0")`
	`13`	`+ implementation("com.diffplug.spotless:spotless-plugin-gradle:7.0.2")`
`14`	`14`	`implementation("org.jetbrains.kotlin:kotlin-gradle-plugin:1.9.23")`
`15`	`15`	`implementation("com.vanniktech:gradle-maven-publish-plugin:0.28.0")`
`16`	`16`	`}`
Original file line number	Diff line number	Diff line change
`@@ -10,7 +10,7 @@ private constructor(`
`10`	`10`	`val modelPath: String,`
`11`	`11`	`val tokenizerPath: String,`
`12`	`12`	`val temperature: Float,`
`13`		`- val llamaModule: LlamaModule`
	`13`	`+ val llamaModule: LlamaModule,`
`14`	`14`	`) {`
`15`	`15`
`16`	`16`	`companion object {`
`@@ -49,7 +49,7 @@ private constructor(`
`49`	`49`	`"ExecuTorch AAR file needs to be included in the libs/ for your app. " +`
`50`	`50`	`"Please see the README for more details: " +`
`51`	`51`	`"https://github.com/meta-llama/llama-stack-client-kotlin/tree/main",`
`52`		`- e`
	`52`	`+ e,`
`53`	`53`	`)`
`54`	`54`	`}`
`55`	`55`	`}`