new: implemented nvidia nim support

evilsocket · Dec 3, 2024 · bd73b0c · bd73b0c
1 parent 60fb40c
commit bd73b0c
Show file tree

Hide file tree

Showing 6 changed files with 80 additions and 6 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/README.md b/README.md
@@ -23,7 +23,7 @@ The project's main goal and core difference with other tools is to allow the use
 
 ## LLM Support
 
-Nerve features integrations for any model accessible via the [ollama](https://github.com/ollama/ollama), [groq](https://groq.com), [OpenAI](https://openai.com/index/openai-api/), [Anthropic](https://www.anthropic.com/), [Fireworks](https://fireworks.ai/), [Huggingface](https://huggingface.co/blog/tgi-messages-api#using-inference-endpoints-with-openai-client-libraries) and [NovitaAI](https://novita.ai/model-api/product/llm-api) APIs. 
+Nerve features integrations for any model accessible via the [ollama](https://github.com/ollama/ollama), [groq](https://groq.com), [OpenAI](https://openai.com/index/openai-api/), [Anthropic](https://www.anthropic.com/), [Fireworks](https://fireworks.ai/), [Huggingface](https://huggingface.co/blog/tgi-messages-api#using-inference-endpoints-with-openai-client-libraries), [Nvidia NIM](https://www.nvidia.com/en-us/ai/) and [NovitaAI](https://novita.ai/model-api/product/llm-api) APIs. 
 
 **The tool will automatically detect if the selected model natively supports function calling. If not, it will provide a compatibility layer that empowers older models to perform function calling anyway.**
 
@@ -67,6 +67,12 @@ For **Anthropic**:
 ANTHROPIC_API_KEY=you-api-key nerve -G "anthropic://claude" ...
 ```
 
+For **Nvidia NIM**:
+
+```sh
+NIM_API_KEY=you-api-key nerve -G "nim://nvidia/nemotron-4-340b-instruct" ...
+```
+
 For **Novita**:
 
 ```sh

diff --git a/crates/openai_api_rust/src/requests.rs b/crates/openai_api_rust/src/requests.rs
@@ -71,12 +71,16 @@ fn deal_response(response: Result<ureq::Response, ureq::Error>, sub_url: &str) -
 		},
 		Err(err) => match err {
 			ureq::Error::Status(status, response) => {
-				let error_msg = response.into_json::<Json>().unwrap();
-				error!("<== ❌\n\tError api: {sub_url}, status: {status}, error: {error_msg}");
+				let raw = response.status_text().to_string();
+				let error_msg = match response.into_json::<Json>() {
+					Ok(json) => json.to_string(),
+					Err(_) => raw,
+				};
+				error!("api: {sub_url}, status: {status}, error: {error_msg}");
 				Err(Error::ApiError(format!("{error_msg}")))
 			},
 			ureq::Error::Transport(e) => {
-				error!("<== ❌\n\tError api: {sub_url}, error: {:?}", e.to_string());
+				error!("api: {sub_url}, error: {:?}", e.to_string());
 				Err(Error::RequestError(e.to_string()))
 			},
 		},

diff --git a/nerve-core/src/agent/generator/mod.rs b/nerve-core/src/agent/generator/mod.rs
@@ -17,6 +17,8 @@ mod fireworks;
 mod groq;
 #[cfg(feature = "hf")]
 mod huggingface;
+#[cfg(feature = "nim")]
+mod nim;
 #[cfg(feature = "novita")]
 mod novita;
 #[cfg(feature = "ollama")]
@@ -202,6 +204,13 @@ macro_rules! factory_body {
                 $model_name,
                 $context_window,
             )?)),
+            #[cfg(feature = "nim")]
+            "nim" | "nvidia" => Ok(Box::new(nim::NvidiaNIMClient::new(
+                $url,
+                $port,
+                $model_name,
+                $context_window,
+            )?)),
             "http" => Ok(Box::new(openai_compatible::OpenAiCompatibleClient::new(
                 $url,
                 $port,

diff --git a/nerve-core/src/agent/generator/nim.rs b/nerve-core/src/agent/generator/nim.rs
@@ -0,0 +1,51 @@
+use anyhow::Result;
+use async_trait::async_trait;
+
+use crate::agent::state::SharedState;
+
+use super::{openai::OpenAIClient, ChatOptions, ChatResponse, Client};
+
+pub struct NvidiaNIMClient {
+    client: OpenAIClient,
+}
+
+#[async_trait]
+impl Client for NvidiaNIMClient {
+    fn new(_: &str, _: u16, model_name: &str, _: u32) -> anyhow::Result<Self>
+    where
+        Self: Sized,
+    {
+        let model_name = if model_name.contains("/") {
+            model_name
+        } else {
+            &format!("nvidia/{}", model_name)
+        };
+
+        let client = OpenAIClient::custom(
+            model_name,
+            "NIM_API_KEY",
+            "https://integrate.api.nvidia.com/v1/",
+        )?;
+
+        Ok(Self { client })
+    }
+
+    async fn check_native_tools_support(&self) -> Result<bool> {
+        self.client.check_native_tools_support().await
+    }
+
+    async fn chat(
+        &self,
+        state: SharedState,
+        options: &ChatOptions,
+    ) -> anyhow::Result<ChatResponse> {
+        self.client.chat(state, options).await
+    }
+}
+
+#[async_trait]
+impl mini_rag::Embedder for NvidiaNIMClient {
+    async fn embed(&self, text: &str) -> Result<mini_rag::Embeddings> {
+        self.client.embed(text).await
+    }
+}
diff --git a/nerve-core/src/agent/generator/openai_compatible.rs b/nerve-core/src/agent/generator/openai_compatible.rs
@@ -30,6 +30,10 @@ impl Client for OpenAiCompatibleClient {
         Ok(Self { client })
     }
 
+    async fn check_native_tools_support(&self) -> Result<bool> {
+        self.client.check_native_tools_support().await
+    }
+
     async fn chat(
         &self,
         state: SharedState,