From 61e285fc87c6ea4311a84588a673b1fae6b7e476 Mon Sep 17 00:00:00 2001
From: Adnaan Sachidanandan <adnaan@vocode.dev>
Date: Tue, 18 Jun 2024 17:01:56 -0700
Subject: [PATCH] add langchain agent docs (#564)

---
 docs/mint.json                            |   5 +-
 docs/open-source/langchain-agent-dep.mdx  | 209 ++++++++++++++++
 docs/open-source/langchain-agent.mdx      | 285 ++++++++--------------
 vocode/streaming/agent/langchain_agent.py |  32 ++-
 4 files changed, 340 insertions(+), 191 deletions(-)
 create mode 100644 docs/open-source/langchain-agent-dep.mdx
diff --git a/docs/mint.json b/docs/mint.json
index f2a629542..1271ee8c0 100644
--- a/docs/mint.json
+++ b/docs/mint.json
@@ -82,13 +82,14 @@
         "open-source/sentry",
         "open-source/logging-with-loguru",
         "open-source/turn-based-conversation",
-        "open-source/language-support"
+        "open-source/language-support",
+        "open-source/langchain-agent"
       ]
     },
     {
       "group": "Legacy (0.0.111) Guides",
       "pages": [
-        "open-source/langchain-agent",
+        "open-source/langchain-agent-dep",
         "open-source/local-conversation"
       ]
     },
diff --git a/docs/open-source/langchain-agent-dep.mdx b/docs/open-source/langchain-agent-dep.mdx
new file mode 100644
index 000000000..a5cafccb3
--- /dev/null
+++ b/docs/open-source/langchain-agent-dep.mdx
@@ -0,0 +1,209 @@
+---
+title: "(Deprecated) Langchain Agent"
+description: "Empower Langchain agents to interact with the real world via phone calls"
+---
+
+# Introduction
+
+This example shows how to use Vocode as a tool augmenting the abilities of a Langchain
+agent. By providing it with access to Vocode, a [Langchain Agent](https://python.langchain.com/en/latest/modules/agents.html)
+can now make autonomous phone calls and take action based on the outcome of the calls.
+
+Our demo will walk through how to instruct the agent to lookup a phone number and make the
+appropriate call.
+
+<div>
+  <iframe
+    style={{
+      width: "100%",
+      height: "300px",
+      paddingLeft: "75px",
+      paddingRight: "75px",
+    }}
+    src="https://www.loom.com/embed/ba80f55524944ccc8c3b437b7a909270"
+  ></iframe>
+</div>
+<br />
+
+# How to run it
+
+## Requirements
+
+1. Install [Ngrok](https://ngrok.com/)
+2. Install [Redis](https://redis.com/)
+3. Install [Poetry](https://python-poetry.org/)
+
+## Run the example
+
+Note: `gpt-4` is required for this to work. `gpt-3.5-turbo` or older models are not smart enough to parse JSON responses in Langchain agents reliably out-of-the-box.
+
+To get started, clone the Vocode repo or copy the [Langchain agent app](https://github.com/vocodedev/vocode-python/tree/main/apps/langchain_agent) directory.
+
+```bash
+git clone https://github.com/vocodedev/vocode-python.git
+```
+
+### Environment
+
+1. Copy the `.env.template` and fill in your API keys. You'll need:
+
+- [Deepgram](https://deepgram.com) (for speech transcription)
+- [OpenAI](https://platform.openai.com) (for the underlying agent)
+- [Azure](https://azure.microsoft.com/en-us/products/cognitive-services/text-to-speech/) (for speech synthesis)
+- [Twilio](https://console.twilio.com/) (for telephony)
+
+2. Tunnel port 3000 to ngrok by running:
+
+```
+ngrok http 3000
+```
+
+Fill in the `TELEPHONY_SERVER_BASE_URL` environment variable with your ngrok base URL: don't include `https://` so should be something like:
+
+```
+TELEPHONY_SERVER_BASE_URL=asdf1234.ngrok.app
+```
+
+3. Buy a phone number on Twilio or [verify your caller ID](https://support.twilio.com/hc/en-us/articles/223180048-Adding-a-Verified-Phone-Number-or-Caller-ID-with-Twilio) to use as the outbound phone number.
+   Set this phone number as the `OUTBOUND_CALLER_NUMBER` environment variable. Include `+` and the area code, so for a US phone number, it would look something like.
+
+```
+OUTBOUND_CALLER_NUMBER=+15555555555
+```
+
+### Set up self-hosted telephony server
+
+Run the following setups from the `langchain_agent` directory.
+
+#### Running with Docker
+
+1. Build the telephony server Docker image
+
+```
+docker build -t vocode-langchain-agent-telephony-app .
+```
+
+2. Run the service using `docker-compose`
+
+```
+docker-compose up
+```
+
+#### Running with Python
+
+1. (optional) Set up a Python environment: we recommend `virtualenv`
+
+```
+python3 -m venv venv
+source venv/bin/activate
+```
+
+2. Install requirements
+
+```bash
+poetry install
+```
+
+3. Run an instance of Redis at http://localhost:6379. With Docker, this can be done with:
+
+```
+docker run -dp 6379:6379 -it redis/redis-stack:latest
+```
+
+4. Run the `TelephonyServer`:
+
+```bash
+uvicorn telephony_app:app --reload --port 3000
+```
+
+### Set up the Langchain agent
+
+With the self-hosted telephony server running:
+
+1. Update the phone numbers in the contact book in `tools/contacts.py`
+
+```python
+CONTACTS = [{"name": "Kian", "phone": "+123456789"}]
+```
+
+2. Run `main.py`
+
+```bash
+poetry install
+poetry run python main.py
+```
+
+# Code explanation
+The Langchain agent is implemented in `main.py`. It uses the Langchain library to initialize an agent that can have a conversation.
+
+## Langchain Agent
+`main.py` instantiates the langchain agent and relevant tools. It sets an objective, initializes a Langchain agent, and runs the conversation.
+```python
+agent = initialize_agent(
+    tools=[get_all_contacts, call_phone_number, word_of_the_day],
+    llm=llm,
+    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
+    verbose=verbose,
+    memory=memory,
+)
+```
+
+## Langchain tools
+
+### Tool to get all contacts
+```python
+@tool("get_all_contacts")
+def get_all_contacts(placeholder: str) -> List[dict]:
+    """Get contacts."""
+    return CONTACTS
+```
+
+### Tool to call phone number
+`tools/vocode.py` makes use of the `OutboundCall` class to initiate a phone call
+```python
+@tool("call phone number")
+def call_phone_number(input: str) -> str:
+    """calls a phone number as a bot and returns a transcript of the conversation.
+    the input to this tool is a pipe separated list of a phone number, a prompt, and the first thing the bot should say.
+    The prompt should instruct the bot with what to do on the call and be in the 3rd person,
+    like 'the assistant is performing this task' instead of 'perform this task'.
+
+    should only use this tool once it has found an adequate phone number to call.
+
+    for example, `+15555555555|the assistant is explaining the meaning of life|i'm going to tell you the meaning of life` will call +15555555555, say 'i'm going to tell you the meaning of life', and instruct the assistant to tell the human what the meaning of life is.
+    """
+    phone_number, prompt, initial_message = input.split("|", 2)
+    call = OutboundCall(
+        base_url=os.environ["TELEPHONY_SERVER_BASE_URL"],
+        to_phone=phone_number,
+        from_phone=os.environ["OUTBOUND_CALLER_NUMBER"],
+        config_manager=RedisConfigManager(),
+        agent_config=ChatGPTAgentConfig(
+            prompt_preamble=prompt,
+            initial_message=BaseMessage(text=initial_message),
+        ),
+        logger=logging.Logger("call_phone_number"),
+    )
+    LOOP.run_until_complete(call.start())
+    while True:
+        maybe_transcript = get_transcript(call.conversation_id)
+        if maybe_transcript:
+            delete_transcript(call.conversation_id)
+            return maybe_transcript
+        else:
+            time.sleep(1)
+```
+
+## TelephonyServer
+`telephony_app.py` instantiates a `TelephonyServer` object to manage the phone call initiated by `OutboundCall`
+```python
+telephony_server = TelephonyServer(
+    base_url=BASE_URL,
+    config_manager=config_manager,
+    inbound_call_configs=[],
+    events_manager=EventsManager(),
+    logger=logger,
+)
+
+app.include_router(telephony_server.get_router())
+```
\ No newline at end of file
diff --git a/docs/open-source/langchain-agent.mdx b/docs/open-source/langchain-agent.mdx
index d2ceb2efb..dbb1e79f5 100644
--- a/docs/open-source/langchain-agent.mdx
+++ b/docs/open-source/langchain-agent.mdx
@@ -1,209 +1,136 @@
 ---
-title: "Langchain agent"
-description: "Empower Langchain agents to interact with the real world via phone calls"
+title: "Langchain Agent"
+description: "Use Langchain to determine your agent's responses."
 ---
 
-# Introduction
+## Overview
 
-This example shows how to use Vocode as a tool augmenting the abilities of a Langchain
-agent. By providing it with access to Vocode, a [Langchain Agent](https://python.langchain.com/en/latest/modules/agents.html)
-can now make autonomous phone calls and take action based on the outcome of the calls.
+[Langchain](https://python.langchain.com/v0.2/docs/introduction/) offers tooling to create custom LLM pipelines for complex decision-making. 
+Through Langchain, you can manage your LLM and prompts, and combine them with advanced techniques like RAG and multi-stage prompting, and sub-chains.
+The library also offers components for output parsing, complex document loading, and callbacks.
 
-Our demo will walk through how to instruct the agent to lookup a phone number and make the
-appropriate call.
-
-<div>
-  <iframe
-    style={{
-      width: "100%",
-      height: "300px",
-      paddingLeft: "75px",
-      paddingRight: "75px",
-    }}
-    src="https://www.loom.com/embed/ba80f55524944ccc8c3b437b7a909270"
-  ></iframe>
-</div>
-<br />
-
-# How to run it
-
-## Requirements
-
-1. Install [Ngrok](https://ngrok.com/)
-2. Install [Redis](https://redis.com/)
-3. Install [Poetry](https://python-poetry.org/)
-
-## Run the example
-
-Note: `gpt-4` is required for this to work. `gpt-3.5-turbo` or older models are not smart enough to parse JSON responses in Langchain agents reliably out-of-the-box.
-
-To get started, clone the Vocode repo or copy the [Langchain agent app](https://github.com/vocodedev/vocode-python/tree/main/apps/langchain_agent) directory.
-
-```bash
-git clone https://github.com/vocodedev/vocode-python.git
-```
-
-### Environment
-
-1. Copy the `.env.template` and fill in your API keys. You'll need:
-
-- [Deepgram](https://deepgram.com) (for speech transcription)
-- [OpenAI](https://platform.openai.com) (for the underlying agent)
-- [Azure](https://azure.microsoft.com/en-us/products/cognitive-services/text-to-speech/) (for speech synthesis)
-- [Twilio](https://console.twilio.com/) (for telephony)
-
-2. Tunnel port 3000 to ngrok by running:
-
-```
-ngrok http 3000
-```
-
-Fill in the `TELEPHONY_SERVER_BASE_URL` environment variable with your ngrok base URL: don't include `https://` so should be something like:
-
-```
-TELEPHONY_SERVER_BASE_URL=asdf1234.ngrok.app
-```
-
-3. Buy a phone number on Twilio or [verify your caller ID](https://support.twilio.com/hc/en-us/articles/223180048-Adding-a-Verified-Phone-Number-or-Caller-ID-with-Twilio) to use as the outbound phone number.
-   Set this phone number as the `OUTBOUND_CALLER_NUMBER` environment variable. Include `+` and the area code, so for a US phone number, it would look something like.
-
-```
-OUTBOUND_CALLER_NUMBER=+15555555555
-```
-
-### Set up self-hosted telephony server
-
-Run the following setups from the `langchain_agent` directory.
-
-#### Running with Docker
-
-1. Build the telephony server Docker image
+**Note:** Vocode does not support actions with Langchain agents.*
 
+### Installation
+Make sure to install the langchain optional dependencies by running
 ```
-docker build -t vocode-langchain-agent-telephony-app .
+poetry install -E langchain -E langchain-extras
 ```
-
-2. Run the service using `docker-compose`
-
-```
-docker-compose up
-```
-
-#### Running with Python
-
-1. (optional) Set up a Python environment: we recommend `virtualenv`
-
-```
-python3 -m venv venv
-source venv/bin/activate
-```
-
-2. Install requirements
-
-```bash
-poetry install
-```
-
-3. Run an instance of Redis at http://localhost:6379. With Docker, this can be done with:
-
+or
 ```
-docker run -dp 6379:6379 -it redis/redis-stack:latest
+poetry install -E all
 ```
 
-4. Run the `TelephonyServer`:
-
-```bash
-uvicorn telephony_app:app --reload --port 3000
-```
-
-### Set up the Langchain agent
-
-With the self-hosted telephony server running:
-
-1. Update the phone numbers in the contact book in `tools/contacts.py`
+## Default Chain
+Vocode Core's Langchain agent defaults to using the `init_chat_model()` method described [here](https://python.langchain.com/v0.2/docs/how_to/chat_models_universal_init/).
+This implementation allows users to create a Langchain agent using a variety of [different model providers](https://api.python.langchain.com/en/latest/chat_models/langchain.chat_models.base.init_chat_model.html)
+by passing in the relevant `model` and `provider` params into the `LangchainAgentConfig`. For example, if I want to use an OpenAI agent, I would pass in an agent config like:
 
 ```python
-CONTACTS = [{"name": "Kian", "phone": "+123456789"}]
+from vocode.streaming.models.agent import LangchainAgentConfig
+
+agent_config = LangchainAgentConfig(
+    ...
+    model_name = "gpt-4o"
+    provider = "openai"
+    ...
+)
 ```
 
-2. Run `main.py`
+**Note:** Vocode Core includes the OpenAI, Anthropic, and Google VertexAI Langchain packages when you install the langchain extras in Poetry. If you want to use other LLM providers
+like AWS Bedrock, Cohere, Mistral, etc, you will need to manually install their Langchain integration packages.
 
-```bash
-poetry install
-poetry run python main.py
-```
+## Using Custom Chains
+Our Langchain Agent is designed to make it easy to plug in your own custom Langchain chains. You can either:
+1. Manually pass in a chain to the Langchain Agent
+2. Subclass the Langchain Agent and build custom processing to create a chain based off a `LangchainAgentConfig`
 
-# Code explanation
-The Langchain agent is implemented in `main.py`. It uses the Langchain library to initialize an agent that can have a conversation.
+### Manually pass in a chain
+The `LangchainAgent` constructor has a `chain` parameter where you can directly pass your chain. So, to use this in a conversation, you can create a custom `AgentFactory` that builds
+your chain when initializing the langchain agent.
 
-## Langchain Agent
-`main.py` instantiates the langchain agent and relevant tools. It sets an objective, initializes a Langchain agent, and runs the conversation.
+For example, we will design a factory which makes a custom chain querying Anthropic Claude Opus to make a poem at each agent turn:
 ```python
-agent = initialize_agent(
-    tools=[get_all_contacts, call_phone_number, word_of_the_day],
-    llm=llm,
-    agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION,
-    verbose=verbose,
-    memory=memory,
-)
+from vocode.streaming.agent.abstract_factory import AbstractAgentFactory
+from vocode.streaming.models.agent import LangchainAgentConfig
+from vocode.streaming.agent.langchain_agent import LangchainAgent
+from langchain_anthropic import ChatAnthropic
+from langchain_core.prompts import ChatPromptTemplate
+
+class PoemAgentFactory(AbstractAgentFactory):
+    def create_agent(
+        self, agent_config: AgentConfig, logger: Optional[logging.Logger] = None
+    ) -> BaseAgent:
+        if isinstance(agent_config, LangchainAgentConfig):
+            prompt_template = ChatPromptTemplate.from_template("Make a random poem")
+            model = ChatAnthropic(model='claude-3-opus-20240229')
+            chain = prompt_template | model
+            return LangchainAgent(
+                agent_config = agent_config,
+                chain = chain,
+            )
+        else:
+            raise Exception("Invalid agent config")
 ```
 
-## Langchain tools
+### Creating custom chains from `LangchainAgentConfig`
+In some scenarios, you may want to create a complex chain from a config, where you can have different models and providers. For these cases, we recommend creating a subclass of the `LangchainAgent`
+and overwriting the `self.create_chain()` method. This method is called when a `LangchainAgent` is initialized without a `chain` manually passed into the constructor.
+Within this method, you can directly access the agent config at `self.agent_config` and build your own chain using its fields.
 
-### Tool to get all contacts
-```python
-@tool("get_all_contacts")
-def get_all_contacts(placeholder: str) -> List[dict]:
-    """Get contacts."""
-    return CONTACTS
-```
+For example below, we will design agent that builds a custom chain to query a Gemini LLM to generate a poem on a topic. 
+The topic and LLM setup (provider and model name) will all be passed in via the config, allowing for strong customization.
+As a further example of this customizability, we will confirm the LLM provider is set to Google GenAI and raise an error otherwise.
 
-### Tool to call phone number
-`tools/vocode.py` makes use of the `OutboundCall` class to initiate a phone call
 ```python
-@tool("call phone number")
-def call_phone_number(input: str) -> str:
-    """calls a phone number as a bot and returns a transcript of the conversation.
-    the input to this tool is a pipe separated list of a phone number, a prompt, and the first thing the bot should say.
-    The prompt should instruct the bot with what to do on the call and be in the 3rd person,
-    like 'the assistant is performing this task' instead of 'perform this task'.
-
-    should only use this tool once it has found an adequate phone number to call.
-
-    for example, `+15555555555|the assistant is explaining the meaning of life|i'm going to tell you the meaning of life` will call +15555555555, say 'i'm going to tell you the meaning of life', and instruct the assistant to tell the human what the meaning of life is.
-    """
-    phone_number, prompt, initial_message = input.split("|", 2)
-    call = OutboundCall(
-        base_url=os.environ["TELEPHONY_SERVER_BASE_URL"],
-        to_phone=phone_number,
-        from_phone=os.environ["OUTBOUND_CALLER_NUMBER"],
-        config_manager=RedisConfigManager(),
-        agent_config=ChatGPTAgentConfig(
-            prompt_preamble=prompt,
-            initial_message=BaseMessage(text=initial_message),
-        ),
-        logger=logging.Logger("call_phone_number"),
-    )
-    LOOP.run_until_complete(call.start())
-    while True:
-        maybe_transcript = get_transcript(call.conversation_id)
-        if maybe_transcript:
-            delete_transcript(call.conversation_id)
-            return maybe_transcript
+from vocode.streaming.agent.abstract_factory import AbstractAgentFactory
+from vocode.streaming.models.agent import LangchainAgentConfig
+from vocode.streaming.agent.langchain_agent import LangchainAgent
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_core.prompts import ChatPromptTemplate
+
+class PoemLangchainAgentConfig(LangchainAgentConfig):
+    poem_topic: str
+
+class PoemLangchainAgent(LangchainAgent):
+    def create_chain(self):
+        if self.agent_config.provider != "google_genai":
+            raise Exception("PoemLangchainAgent only supports Google Generative AI models")
+        
+        prompt_template = ChatPromptTemplate.from_template(f"Make a random poem about {self.agent_config.poem_topic}")
+        model = ChatGoogleGenerativeAI(
+            model=self.agent_config.model_name, 
+            temperature=self.agent_config.temperature,
+            max_output_tokens=self.agent_config.max_tokens
+        )
+        chain = prompt_template | model
+        return chain
+
+
+class MyAgentFactory(AbstractAgentFactory):
+    def create_agent(
+        self, agent_config: AgentConfig, logger: Optional[logging.Logger] = None
+    ) -> BaseAgent:
+        if isinstance(agent_config, PoemLangchainAgentConfig):
+            return PoemLangchainAgent(
+                agent_config=agent_config
+            )
+        elif isinstance(agent_config, LangchainAgentConfig):
+            return LangchainAgent(
+                agent_config=agent_config,
+            )
         else:
-            time.sleep(1)
+            raise Exception("Invalid agent config")
 ```
 
-## TelephonyServer
-`telephony_app.py` instantiates a `TelephonyServer` object to manage the phone call initiated by `OutboundCall`
+Then, we can use the following agent config in conversations to use make poems about Vocode!
 ```python
-telephony_server = TelephonyServer(
-    base_url=BASE_URL,
-    config_manager=config_manager,
-    inbound_call_configs=[],
-    events_manager=EventsManager(),
-    logger=logger,
-)
+from vocode.streaming.models.agent import LangchainAgentConfig
 
-app.include_router(telephony_server.get_router())
+agent_config = LangchainAgentConfig(
+    ...
+    model_name = "Vocode"
+    provider = "poem"
+    ...
+)
 ```
\ No newline at end of file
diff --git a/vocode/streaming/agent/langchain_agent.py b/vocode/streaming/agent/langchain_agent.py
index 9daa426ca..77045cb5e 100644
--- a/vocode/streaming/agent/langchain_agent.py
+++ b/vocode/streaming/agent/langchain_agent.py
@@ -37,10 +37,13 @@ def __init__(
         self.chain = chain if chain else self.create_chain()
 
     def create_chain(self):
-        model = init_chat_model(model = self.agent_config.model_name, model_provider=self.agent_config.provider, temperature=self.agent_config.temperature, max_tokens=self.agent_config.max_tokens)
-        messages_for_prompt_template = [
-            ("placeholder", "{chat_history}")
-        ]
+        model = init_chat_model(
+            model=self.agent_config.model_name,
+            model_provider=self.agent_config.provider,
+            temperature=self.agent_config.temperature,
+            max_tokens=self.agent_config.max_tokens,
+        )
+        messages_for_prompt_template = [("placeholder", "{chat_history}")]
         if self.agent_config.prompt_preamble:
             messages_for_prompt_template.insert(0, ("system", self.agent_config.prompt_preamble))
         prompt_template = ChatPromptTemplate.from_messages(messages_for_prompt_template)
@@ -55,7 +58,9 @@ async def token_generator(
             if isinstance(chunk.content, str):
                 yield chunk.content
             else:
-                raise ValueError(f"Received unexpected message type {type(chunk)} from Langchain. Expected str.")
+                raise ValueError(
+                    f"Received unexpected message type {type(chunk)} from Langchain. Expected str."
+                )
 
     def format_langchain_messages_from_transcript(self) -> list[tuple]:
         if not self.transcript:
@@ -64,14 +69,19 @@ def format_langchain_messages_from_transcript(self) -> list[tuple]:
         for event_log in self.transcript.event_logs:
             if isinstance(event_log, Message):
                 messages.append(
-                    ("ai" if event_log.sender == Sender.BOT else "human", event_log.to_string(include_sender=False))
+                    (
+                        "ai" if event_log.sender == Sender.BOT else "human",
+                        event_log.to_string(include_sender=False),
+                    )
                 )
             else:
-                raise ValueError(f"Invalid event log type {type(event_log)}. Langchain currently only supports human and bot messages")
-            
+                raise ValueError(
+                    f"Invalid event log type {type(event_log)}. Langchain currently only supports human and bot messages"
+                )
+
         if self.agent_config.provider == "anthropic":
             messages = merge_bot_messages_for_langchain(messages)
-        
+
         return messages
 
     async def generate_response(
@@ -91,7 +101,9 @@ async def generate_response(
             ttft_span = sentry_create_span(
                 sentry_callable=sentry_sdk.start_span, op=CustomSentrySpans.TIME_TO_FIRST_TOKEN
             )
-            stream = self.chain.astream({"chat_history": self.format_langchain_messages_from_transcript()})
+            stream = self.chain.astream(
+                {"chat_history": self.format_langchain_messages_from_transcript()}
+            )
         except Exception as e:
             logger.error(
                 f"Error while hitting Langchain",