feat(weave): Add autopatching of ChatNVIDIA in langchain (#3264)

* feat(langchain): Add autopatching of ChatNVIDIA * include langchain-nvidia-ai-endpoints as dependency * add input and output processors * fix accumulator * change docs title * address all comments * Remove extra #
wandb · Dec 31, 2024 · bdc9a1d · bdc9a1d
1 parent 709e145
commit bdc9a1d
Show file tree

Hide file tree

Showing 22 changed files with 1,638 additions and 0 deletions.
diff --git a/docs/docs/guides/integrations/imgs/chatnvidia_model.png b/docs/docs/guides/integrations/imgs/chatnvidia_model.png
diff --git a/docs/docs/guides/integrations/imgs/chatnvidia_trace.png b/docs/docs/guides/integrations/imgs/chatnvidia_trace.png
diff --git a/docs/docs/guides/integrations/imgs/nvidia_pokedex.png b/docs/docs/guides/integrations/imgs/nvidia_pokedex.png
diff --git a/docs/docs/guides/integrations/index.md b/docs/docs/guides/integrations/index.md
@@ -20,6 +20,8 @@ LLM providers are the vendors that offer access to large language models for gen
 - **[Groq](/guides/integrations/groq)**
 - **[Open Router](/guides/integrations/openrouter)**
 - **[LiteLLM](/guides/integrations/litellm)**
+- **[NVIDIA NIM](/guides/integrations/nvidia_nim)**
+
 
 
 **[Local Models](/guides/integrations/local_models)**: For when you're running models on your own infrastructure.

diff --git a/docs/docs/guides/integrations/nvidia_nim.md b/docs/docs/guides/integrations/nvidia_nim.md
@@ -0,0 +1,176 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# NVIDIA NIM
+
+Weave automatically tracks and logs LLM calls made via the [ChatNVIDIA](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/) library, after `weave.init()` is called.
+
+## Tracing
+
+It’s important to store traces of LLM applications in a central database, both during development and in production. You’ll use these traces for debugging and to help build a dataset of tricky examples to evaluate against while improving your application.
+
+<Tabs groupId="programming-language">
+  <TabItem value="python" label="Python" default>
+    Weave can automatically capture traces for the [ChatNVIDIA python library](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/).
+
+    Start capturing by calling `weave.init(<project-name>)` with a project name your choice.
+
+    ```python
+    from langchain_nvidia_ai_endpoints import ChatNVIDIA
+    import weave
+    client = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", temperature=0.8, max_tokens=64, top_p=1)
+    # highlight-next-line
+    weave.init('emoji-bot')
+
+    messages=[
+        {
+          "role": "system",
+          "content": "You are AGI. You will be provided with a message, and your task is to respond using emojis only."
+        }]
+
+    response = client.invoke(messages)
+    ```
+
+  </TabItem>
+  <TabItem value="typescript" label="TypeScript">
+      ```plaintext
+      This feature is not available in TypeScript yet since this library is only in Python.
+      ```
+  </TabItem>
+</Tabs>
+
+![chatnvidia_trace.png](imgs/chatnvidia_trace.png)
+
+## Track your own ops
+
+<Tabs groupId="programming-language">
+  <TabItem value="python" label="Python" default>
+Wrapping a function with `@weave.op` starts capturing inputs, outputs and app logic so you can debug how data flows through your app. You can deeply nest ops and build a tree of functions that you want to track. This also starts automatically versioning code as you experiment to capture ad-hoc details that haven't been committed to git.
+
+Simply create a function decorated with [`@weave.op`](/guides/tracking/ops) that calls into [ChatNVIDIA python library](https://python.langchain.com/docs/integrations/chat/nvidia_ai_endpoints/).
+
+In the example below, we have 2 functions wrapped with op. This helps us see how intermediate steps, like the retrieval step in a RAG app, are affecting how our app behaves.
+
+    ```python
+    # highlight-next-line
+    import weave
+    from langchain_nvidia_ai_endpoints import ChatNVIDIA
+    import requests, random
+    PROMPT="""Emulate the Pokedex from early Pokémon episodes. State the name of the Pokemon and then describe it.
+            Your tone is informative yet sassy, blending factual details with a touch of dry humor. Be concise, no more than 3 sentences. """
+    POKEMON = ['pikachu', 'charmander', 'squirtle', 'bulbasaur', 'jigglypuff', 'meowth', 'eevee']
+    client = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", temperature=0.7, max_tokens=100, top_p=1)
+
+    # highlight-next-line
+    @weave.op
+    def get_pokemon_data(pokemon_name):
+        # highlight-next-line
+        # This is a step within your application, like the retrieval step within a RAG app
+        url = f"https://pokeapi.co/api/v2/pokemon/{pokemon_name}"
+        response = requests.get(url)
+        if response.status_code == 200:
+            data = response.json()
+            name = data["name"]
+            types = [t["type"]["name"] for t in data["types"]]
+            species_url = data["species"]["url"]
+            species_response = requests.get(species_url)
+            evolved_from = "Unknown"
+            if species_response.status_code == 200:
+                species_data = species_response.json()
+                if species_data["evolves_from_species"]:
+                    evolved_from = species_data["evolves_from_species"]["name"]
+            return {"name": name, "types": types, "evolved_from": evolved_from}
+        else:
+            return None
+
+    # highlight-next-line
+    @weave.op
+    def pokedex(name: str, prompt: str) -> str:
+        # highlight-next-line
+        # This is your root op that calls out to other ops
+        # highlight-next-line
+        data = get_pokemon_data(name)
+        if not data: return "Error: Unable to fetch data"
+
+        messages=[
+                {"role": "system","content": prompt},
+                {"role": "user", "content": str(data)}
+            ]
+
+        response = client.invoke(messages)
+        return response.content
+
+    # highlight-next-line
+    weave.init('pokedex-nvidia')
+    # Get data for a specific Pokémon
+    pokemon_data = pokedex(random.choice(POKEMON), PROMPT)
+    ```
+
+Navigate to Weave and you can click `get_pokemon_data` in the UI to see the inputs & outputs of that step.
+</TabItem>
+<TabItem value="typescript" label="TypeScript">
+    ```plaintext
+    This feature is not available in TypeScript yet since this library is only in Python.
+    ```
+</TabItem>
+</Tabs>
+
+![nvidia_pokedex.png](imgs/nvidia_pokedex.png)
+
+## Create a `Model` for easier experimentation
+
+<Tabs groupId="programming-language">
+  <TabItem value="python" label="Python" default>
+    Organizing experimentation is difficult when there are many moving pieces. By using the [`Model`](/guides/core-types/models) class, you can capture and organize the experimental details of your app like your system prompt or the model you're using. This helps organize and compare different iterations of your app.
+
+    In addition to versioning code and capturing inputs/outputs, [`Model`](/guides/core-types/models)s capture structured parameters that control your application’s behavior, making it easy to find what parameters worked best. You can also use Weave Models with `serve`, and [`Evaluation`](/guides/core-types/evaluations)s.
+
+    In the example below, you can experiment with `model` and `system_message`. Every time you change one of these, you'll get a new _version_ of `GrammarCorrectorModel`.
+
+    ```python
+    import weave
+    from langchain_nvidia_ai_endpoints import ChatNVIDIA
+
+    weave.init('grammar-nvidia')
+
+    class GrammarCorrectorModel(weave.Model): # Change to `weave.Model`
+      system_message: str
+
+      @weave.op()
+      def predict(self, user_input): # Change to `predict`
+        client = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", temperature=0, max_tokens=100, top_p=1)
+
+        messages=[
+              {
+                  "role": "system",
+                  "content": self.system_message
+              },
+              {
+                  "role": "user",
+                  "content": user_input
+              }
+              ]
+
+        response = client.invoke(messages)
+        return response.content
+
+
+    corrector = GrammarCorrectorModel(
+        system_message = "You are a grammar checker, correct the following user input.")
+    result = corrector.predict("That was so easy, it was a piece of pie!")
+    print(result)
+    ```
+  </TabItem>
+  <TabItem value="typescript" label="TypeScript">
+    ```plaintext
+    This feature is not available in TypeScript yet since this library is only in Python.
+    ```
+  </TabItem>
+</Tabs>
+
+![chatnvidia_model.png](imgs/chatnvidia_model.png)
+
+## Usage Info
+
+The ChatNVIDIA integration supports `invoke`, `stream` and their async variants. It also supports tool use. 
+As ChatNVIDIA is meant to be used with many types of models, it does not have function calling support.
diff --git a/docs/sidebars.ts b/docs/sidebars.ts
@@ -101,6 +101,7 @@ const sidebars: SidebarsConfig = {
             "guides/integrations/groq",
             "guides/integrations/openrouter",
             "guides/integrations/litellm",
+            "guides/integrations/nvidia_nim",
           ],
         },
         "guides/integrations/local_models",

diff --git a/noxfile.py b/noxfile.py
@@ -8,6 +8,7 @@
     "cohere",
     "dspy",
     "langchain",
+    "langchain_nvidia_ai_endpoints",
     "litellm",
     "notdiamond",
     "google_ai_studio",
@@ -40,6 +41,7 @@ def lint(session):
         "google_ai_studio",
         "groq",
         "instructor",
+        "langchain_nvidia_ai_endpoints",
         "langchain",
         "litellm",
         "llamaindex",
@@ -73,6 +75,10 @@ def tests(session, shard):
     if shard == "google_ai_studio":
         env["GOOGLE_API_KEY"] = session.env.get("GOOGLE_API_KEY")
 
+    # Add the NVIDIA_API_KEY environment variable for the "langchain_nvidia_ai_endpoints" shard
+    if shard == "langchain_nvidia_ai_endpoints":
+        env["NVIDIA_API_KEY"] = session.env.get("NVIDIA_API_KEY")
+
     # we are doing some integration test in test_llm_integrations.py that requires
     # setting some environment variables for the LLM providers
     if shard == "scorers_tests":

diff --git a/pyproject.toml b/pyproject.toml
@@ -66,6 +66,10 @@ langchain = [
   "pysqlite3",
   "opentelemetry-exporter-otlp",
 ]
+langchain_nvidia_ai_endpoints = [
+  "langchain-core>=0.2.1",
+  "langchain-nvidia-ai-endpoints",
+]
 litellm = ["litellm>=1.36.1"]
 llamaindex = ["llama-index>=0.10.35"]
 mistral0 = ["mistralai>=0.1.8,<1.0.0"]

diff --git a/tests/integrations/__init__.py b/tests/integrations/__init__.py
diff --git a/tests/integrations/langchain_nvidia_ai_endpoints/__init__.py b/tests/integrations/langchain_nvidia_ai_endpoints/__init__.py
diff --git a/..._endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_quickstart.yaml b/..._endpoints/cassettes/langchain_nv_ai_endpoints_test/test_chatnvidia_async_quickstart.yaml
@@ -0,0 +1,50 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Hello!"}], "model": "meta/llama-3.1-8b-instruct",
+      "temperature": 0.0, "max_tokens": 64, "top_p": 1.0, "stream": false}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '161'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - langchain-nvidia-ai-endpoints
+    method: POST
+    uri: https://integrate.api.nvidia.com/v1/chat/completions
+  response:
+    body:
+      string: '{"id":"chat-8bfccc9544b64c70b47605a647b69b8a","object":"chat.completion","created":1734992505,"model":"meta/llama-3.1-8b-instruct","choices":[{"index":0,"message":{"role":"assistant","content":"Hello!
+        It''s nice to meet you. Is there something I can help you with or would you
+        like to chat?"},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":12,"total_tokens":36,"completion_tokens":24},"prompt_logprobs":null}'
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Expose-Headers:
+      - nvcf-reqid
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '445'
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 23 Dec 2024 22:21:45 GMT
+      Nvcf-Reqid:
+      - 704f40c5-4d25-46fb-8d76-66364bc9e156
+      Nvcf-Status:
+      - fulfilled
+      Server:
+      - uvicorn
+      Vary:
+      - Origin
+      - origin, access-control-request-method, access-control-request-headers
+    status:
+      code: 200
+      message: OK
+version: 1