From 61e285fc87c6ea4311a84588a673b1fae6b7e476 Mon Sep 17 00:00:00 2001 From: Adnaan Sachidanandan Date: Tue, 18 Jun 2024 17:01:56 -0700 Subject: [PATCH] add langchain agent docs (#564) --- docs/mint.json | 5 +- docs/open-source/langchain-agent-dep.mdx | 209 ++++++++++++++++ docs/open-source/langchain-agent.mdx | 285 ++++++++-------------- vocode/streaming/agent/langchain_agent.py | 32 ++- 4 files changed, 340 insertions(+), 191 deletions(-) create mode 100644 docs/open-source/langchain-agent-dep.mdx diff --git a/docs/mint.json b/docs/mint.json index f2a629542..1271ee8c0 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -82,13 +82,14 @@ "open-source/sentry", "open-source/logging-with-loguru", "open-source/turn-based-conversation", - "open-source/language-support" + "open-source/language-support", + "open-source/langchain-agent" ] }, { "group": "Legacy (0.0.111) Guides", "pages": [ - "open-source/langchain-agent", + "open-source/langchain-agent-dep", "open-source/local-conversation" ] }, diff --git a/docs/open-source/langchain-agent-dep.mdx b/docs/open-source/langchain-agent-dep.mdx new file mode 100644 index 000000000..a5cafccb3 --- /dev/null +++ b/docs/open-source/langchain-agent-dep.mdx @@ -0,0 +1,209 @@ +--- +title: "(Deprecated) Langchain Agent" +description: "Empower Langchain agents to interact with the real world via phone calls" +--- + +# Introduction + +This example shows how to use Vocode as a tool augmenting the abilities of a Langchain +agent. By providing it with access to Vocode, a [Langchain Agent](https://python.langchain.com/en/latest/modules/agents.html) +can now make autonomous phone calls and take action based on the outcome of the calls. + +Our demo will walk through how to instruct the agent to lookup a phone number and make the +appropriate call. + +
+ +
+
+ +# How to run it + +## Requirements + +1. Install [Ngrok](https://ngrok.com/) +2. Install [Redis](https://redis.com/) +3. Install [Poetry](https://python-poetry.org/) + +## Run the example + +Note: `gpt-4` is required for this to work. `gpt-3.5-turbo` or older models are not smart enough to parse JSON responses in Langchain agents reliably out-of-the-box. + +To get started, clone the Vocode repo or copy the [Langchain agent app](https://github.com/vocodedev/vocode-python/tree/main/apps/langchain_agent) directory. + +```bash +git clone https://github.com/vocodedev/vocode-python.git +``` + +### Environment + +1. Copy the `.env.template` and fill in your API keys. You'll need: + +- [Deepgram](https://deepgram.com) (for speech transcription) +- [OpenAI](https://platform.openai.com) (for the underlying agent) +- [Azure](https://azure.microsoft.com/en-us/products/cognitive-services/text-to-speech/) (for speech synthesis) +- [Twilio](https://console.twilio.com/) (for telephony) + +2. Tunnel port 3000 to ngrok by running: + +``` +ngrok http 3000 +``` + +Fill in the `TELEPHONY_SERVER_BASE_URL` environment variable with your ngrok base URL: don't include `https://` so should be something like: + +``` +TELEPHONY_SERVER_BASE_URL=asdf1234.ngrok.app +``` + +3. Buy a phone number on Twilio or [verify your caller ID](https://support.twilio.com/hc/en-us/articles/223180048-Adding-a-Verified-Phone-Number-or-Caller-ID-with-Twilio) to use as the outbound phone number. + Set this phone number as the `OUTBOUND_CALLER_NUMBER` environment variable. Include `+` and the area code, so for a US phone number, it would look something like. + +``` +OUTBOUND_CALLER_NUMBER=+15555555555 +``` + +### Set up self-hosted telephony server + +Run the following setups from the `langchain_agent` directory. + +#### Running with Docker + +1. Build the telephony server Docker image + +``` +docker build -t vocode-langchain-agent-telephony-app . +``` + +2. Run the service using `docker-compose` + +``` +docker-compose up +``` + +#### Running with Python + +1. (optional) Set up a Python environment: we recommend `virtualenv` + +``` +python3 -m venv venv +source venv/bin/activate +``` + +2. Install requirements + +```bash +poetry install +``` + +3. Run an instance of Redis at http://localhost:6379. With Docker, this can be done with: + +``` +docker run -dp 6379:6379 -it redis/redis-stack:latest +``` + +4. Run the `TelephonyServer`: + +```bash +uvicorn telephony_app:app --reload --port 3000 +``` + +### Set up the Langchain agent + +With the self-hosted telephony server running: + +1. Update the phone numbers in the contact book in `tools/contacts.py` + +```python +CONTACTS = [{"name": "Kian", "phone": "+123456789"}] +``` + +2. Run `main.py` + +```bash +poetry install +poetry run python main.py +``` + +# Code explanation +The Langchain agent is implemented in `main.py`. It uses the Langchain library to initialize an agent that can have a conversation. + +## Langchain Agent +`main.py` instantiates the langchain agent and relevant tools. It sets an objective, initializes a Langchain agent, and runs the conversation. +```python +agent = initialize_agent( + tools=[get_all_contacts, call_phone_number, word_of_the_day], + llm=llm, + agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, + verbose=verbose, + memory=memory, +) +``` + +## Langchain tools + +### Tool to get all contacts +```python +@tool("get_all_contacts") +def get_all_contacts(placeholder: str) -> List[dict]: + """Get contacts.""" + return CONTACTS +``` + +### Tool to call phone number +`tools/vocode.py` makes use of the `OutboundCall` class to initiate a phone call +```python +@tool("call phone number") +def call_phone_number(input: str) -> str: + """calls a phone number as a bot and returns a transcript of the conversation. + the input to this tool is a pipe separated list of a phone number, a prompt, and the first thing the bot should say. + The prompt should instruct the bot with what to do on the call and be in the 3rd person, + like 'the assistant is performing this task' instead of 'perform this task'. + + should only use this tool once it has found an adequate phone number to call. + + for example, `+15555555555|the assistant is explaining the meaning of life|i'm going to tell you the meaning of life` will call +15555555555, say 'i'm going to tell you the meaning of life', and instruct the assistant to tell the human what the meaning of life is. + """ + phone_number, prompt, initial_message = input.split("|", 2) + call = OutboundCall( + base_url=os.environ["TELEPHONY_SERVER_BASE_URL"], + to_phone=phone_number, + from_phone=os.environ["OUTBOUND_CALLER_NUMBER"], + config_manager=RedisConfigManager(), + agent_config=ChatGPTAgentConfig( + prompt_preamble=prompt, + initial_message=BaseMessage(text=initial_message), + ), + logger=logging.Logger("call_phone_number"), + ) + LOOP.run_until_complete(call.start()) + while True: + maybe_transcript = get_transcript(call.conversation_id) + if maybe_transcript: + delete_transcript(call.conversation_id) + return maybe_transcript + else: + time.sleep(1) +``` + +## TelephonyServer +`telephony_app.py` instantiates a `TelephonyServer` object to manage the phone call initiated by `OutboundCall` +```python +telephony_server = TelephonyServer( + base_url=BASE_URL, + config_manager=config_manager, + inbound_call_configs=[], + events_manager=EventsManager(), + logger=logger, +) + +app.include_router(telephony_server.get_router()) +``` \ No newline at end of file diff --git a/docs/open-source/langchain-agent.mdx b/docs/open-source/langchain-agent.mdx index d2ceb2efb..dbb1e79f5 100644 --- a/docs/open-source/langchain-agent.mdx +++ b/docs/open-source/langchain-agent.mdx @@ -1,209 +1,136 @@ --- -title: "Langchain agent" -description: "Empower Langchain agents to interact with the real world via phone calls" +title: "Langchain Agent" +description: "Use Langchain to determine your agent's responses." --- -# Introduction +## Overview -This example shows how to use Vocode as a tool augmenting the abilities of a Langchain -agent. By providing it with access to Vocode, a [Langchain Agent](https://python.langchain.com/en/latest/modules/agents.html) -can now make autonomous phone calls and take action based on the outcome of the calls. +[Langchain](https://python.langchain.com/v0.2/docs/introduction/) offers tooling to create custom LLM pipelines for complex decision-making. +Through Langchain, you can manage your LLM and prompts, and combine them with advanced techniques like RAG and multi-stage prompting, and sub-chains. +The library also offers components for output parsing, complex document loading, and callbacks. -Our demo will walk through how to instruct the agent to lookup a phone number and make the -appropriate call. - -
- -
-
- -# How to run it - -## Requirements - -1. Install [Ngrok](https://ngrok.com/) -2. Install [Redis](https://redis.com/) -3. Install [Poetry](https://python-poetry.org/) - -## Run the example - -Note: `gpt-4` is required for this to work. `gpt-3.5-turbo` or older models are not smart enough to parse JSON responses in Langchain agents reliably out-of-the-box. - -To get started, clone the Vocode repo or copy the [Langchain agent app](https://github.com/vocodedev/vocode-python/tree/main/apps/langchain_agent) directory. - -```bash -git clone https://github.com/vocodedev/vocode-python.git -``` - -### Environment - -1. Copy the `.env.template` and fill in your API keys. You'll need: - -- [Deepgram](https://deepgram.com) (for speech transcription) -- [OpenAI](https://platform.openai.com) (for the underlying agent) -- [Azure](https://azure.microsoft.com/en-us/products/cognitive-services/text-to-speech/) (for speech synthesis) -- [Twilio](https://console.twilio.com/) (for telephony) - -2. Tunnel port 3000 to ngrok by running: - -``` -ngrok http 3000 -``` - -Fill in the `TELEPHONY_SERVER_BASE_URL` environment variable with your ngrok base URL: don't include `https://` so should be something like: - -``` -TELEPHONY_SERVER_BASE_URL=asdf1234.ngrok.app -``` - -3. Buy a phone number on Twilio or [verify your caller ID](https://support.twilio.com/hc/en-us/articles/223180048-Adding-a-Verified-Phone-Number-or-Caller-ID-with-Twilio) to use as the outbound phone number. - Set this phone number as the `OUTBOUND_CALLER_NUMBER` environment variable. Include `+` and the area code, so for a US phone number, it would look something like. - -``` -OUTBOUND_CALLER_NUMBER=+15555555555 -``` - -### Set up self-hosted telephony server - -Run the following setups from the `langchain_agent` directory. - -#### Running with Docker - -1. Build the telephony server Docker image +**Note:** Vocode does not support actions with Langchain agents.* +### Installation +Make sure to install the langchain optional dependencies by running ``` -docker build -t vocode-langchain-agent-telephony-app . +poetry install -E langchain -E langchain-extras ``` - -2. Run the service using `docker-compose` - -``` -docker-compose up -``` - -#### Running with Python - -1. (optional) Set up a Python environment: we recommend `virtualenv` - -``` -python3 -m venv venv -source venv/bin/activate -``` - -2. Install requirements - -```bash -poetry install -``` - -3. Run an instance of Redis at http://localhost:6379. With Docker, this can be done with: - +or ``` -docker run -dp 6379:6379 -it redis/redis-stack:latest +poetry install -E all ``` -4. Run the `TelephonyServer`: - -```bash -uvicorn telephony_app:app --reload --port 3000 -``` - -### Set up the Langchain agent - -With the self-hosted telephony server running: - -1. Update the phone numbers in the contact book in `tools/contacts.py` +## Default Chain +Vocode Core's Langchain agent defaults to using the `init_chat_model()` method described [here](https://python.langchain.com/v0.2/docs/how_to/chat_models_universal_init/). +This implementation allows users to create a Langchain agent using a variety of [different model providers](https://api.python.langchain.com/en/latest/chat_models/langchain.chat_models.base.init_chat_model.html) +by passing in the relevant `model` and `provider` params into the `LangchainAgentConfig`. For example, if I want to use an OpenAI agent, I would pass in an agent config like: ```python -CONTACTS = [{"name": "Kian", "phone": "+123456789"}] +from vocode.streaming.models.agent import LangchainAgentConfig + +agent_config = LangchainAgentConfig( + ... + model_name = "gpt-4o" + provider = "openai" + ... +) ``` -2. Run `main.py` +**Note:** Vocode Core includes the OpenAI, Anthropic, and Google VertexAI Langchain packages when you install the langchain extras in Poetry. If you want to use other LLM providers +like AWS Bedrock, Cohere, Mistral, etc, you will need to manually install their Langchain integration packages. -```bash -poetry install -poetry run python main.py -``` +## Using Custom Chains +Our Langchain Agent is designed to make it easy to plug in your own custom Langchain chains. You can either: +1. Manually pass in a chain to the Langchain Agent +2. Subclass the Langchain Agent and build custom processing to create a chain based off a `LangchainAgentConfig` -# Code explanation -The Langchain agent is implemented in `main.py`. It uses the Langchain library to initialize an agent that can have a conversation. +### Manually pass in a chain +The `LangchainAgent` constructor has a `chain` parameter where you can directly pass your chain. So, to use this in a conversation, you can create a custom `AgentFactory` that builds +your chain when initializing the langchain agent. -## Langchain Agent -`main.py` instantiates the langchain agent and relevant tools. It sets an objective, initializes a Langchain agent, and runs the conversation. +For example, we will design a factory which makes a custom chain querying Anthropic Claude Opus to make a poem at each agent turn: ```python -agent = initialize_agent( - tools=[get_all_contacts, call_phone_number, word_of_the_day], - llm=llm, - agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, - verbose=verbose, - memory=memory, -) +from vocode.streaming.agent.abstract_factory import AbstractAgentFactory +from vocode.streaming.models.agent import LangchainAgentConfig +from vocode.streaming.agent.langchain_agent import LangchainAgent +from langchain_anthropic import ChatAnthropic +from langchain_core.prompts import ChatPromptTemplate + +class PoemAgentFactory(AbstractAgentFactory): + def create_agent( + self, agent_config: AgentConfig, logger: Optional[logging.Logger] = None + ) -> BaseAgent: + if isinstance(agent_config, LangchainAgentConfig): + prompt_template = ChatPromptTemplate.from_template("Make a random poem") + model = ChatAnthropic(model='claude-3-opus-20240229') + chain = prompt_template | model + return LangchainAgent( + agent_config = agent_config, + chain = chain, + ) + else: + raise Exception("Invalid agent config") ``` -## Langchain tools +### Creating custom chains from `LangchainAgentConfig` +In some scenarios, you may want to create a complex chain from a config, where you can have different models and providers. For these cases, we recommend creating a subclass of the `LangchainAgent` +and overwriting the `self.create_chain()` method. This method is called when a `LangchainAgent` is initialized without a `chain` manually passed into the constructor. +Within this method, you can directly access the agent config at `self.agent_config` and build your own chain using its fields. -### Tool to get all contacts -```python -@tool("get_all_contacts") -def get_all_contacts(placeholder: str) -> List[dict]: - """Get contacts.""" - return CONTACTS -``` +For example below, we will design agent that builds a custom chain to query a Gemini LLM to generate a poem on a topic. +The topic and LLM setup (provider and model name) will all be passed in via the config, allowing for strong customization. +As a further example of this customizability, we will confirm the LLM provider is set to Google GenAI and raise an error otherwise. -### Tool to call phone number -`tools/vocode.py` makes use of the `OutboundCall` class to initiate a phone call ```python -@tool("call phone number") -def call_phone_number(input: str) -> str: - """calls a phone number as a bot and returns a transcript of the conversation. - the input to this tool is a pipe separated list of a phone number, a prompt, and the first thing the bot should say. - The prompt should instruct the bot with what to do on the call and be in the 3rd person, - like 'the assistant is performing this task' instead of 'perform this task'. - - should only use this tool once it has found an adequate phone number to call. - - for example, `+15555555555|the assistant is explaining the meaning of life|i'm going to tell you the meaning of life` will call +15555555555, say 'i'm going to tell you the meaning of life', and instruct the assistant to tell the human what the meaning of life is. - """ - phone_number, prompt, initial_message = input.split("|", 2) - call = OutboundCall( - base_url=os.environ["TELEPHONY_SERVER_BASE_URL"], - to_phone=phone_number, - from_phone=os.environ["OUTBOUND_CALLER_NUMBER"], - config_manager=RedisConfigManager(), - agent_config=ChatGPTAgentConfig( - prompt_preamble=prompt, - initial_message=BaseMessage(text=initial_message), - ), - logger=logging.Logger("call_phone_number"), - ) - LOOP.run_until_complete(call.start()) - while True: - maybe_transcript = get_transcript(call.conversation_id) - if maybe_transcript: - delete_transcript(call.conversation_id) - return maybe_transcript +from vocode.streaming.agent.abstract_factory import AbstractAgentFactory +from vocode.streaming.models.agent import LangchainAgentConfig +from vocode.streaming.agent.langchain_agent import LangchainAgent +from langchain_google_genai import ChatGoogleGenerativeAI +from langchain_core.prompts import ChatPromptTemplate + +class PoemLangchainAgentConfig(LangchainAgentConfig): + poem_topic: str + +class PoemLangchainAgent(LangchainAgent): + def create_chain(self): + if self.agent_config.provider != "google_genai": + raise Exception("PoemLangchainAgent only supports Google Generative AI models") + + prompt_template = ChatPromptTemplate.from_template(f"Make a random poem about {self.agent_config.poem_topic}") + model = ChatGoogleGenerativeAI( + model=self.agent_config.model_name, + temperature=self.agent_config.temperature, + max_output_tokens=self.agent_config.max_tokens + ) + chain = prompt_template | model + return chain + + +class MyAgentFactory(AbstractAgentFactory): + def create_agent( + self, agent_config: AgentConfig, logger: Optional[logging.Logger] = None + ) -> BaseAgent: + if isinstance(agent_config, PoemLangchainAgentConfig): + return PoemLangchainAgent( + agent_config=agent_config + ) + elif isinstance(agent_config, LangchainAgentConfig): + return LangchainAgent( + agent_config=agent_config, + ) else: - time.sleep(1) + raise Exception("Invalid agent config") ``` -## TelephonyServer -`telephony_app.py` instantiates a `TelephonyServer` object to manage the phone call initiated by `OutboundCall` +Then, we can use the following agent config in conversations to use make poems about Vocode! ```python -telephony_server = TelephonyServer( - base_url=BASE_URL, - config_manager=config_manager, - inbound_call_configs=[], - events_manager=EventsManager(), - logger=logger, -) +from vocode.streaming.models.agent import LangchainAgentConfig -app.include_router(telephony_server.get_router()) +agent_config = LangchainAgentConfig( + ... + model_name = "Vocode" + provider = "poem" + ... +) ``` \ No newline at end of file diff --git a/vocode/streaming/agent/langchain_agent.py b/vocode/streaming/agent/langchain_agent.py index 9daa426ca..77045cb5e 100644 --- a/vocode/streaming/agent/langchain_agent.py +++ b/vocode/streaming/agent/langchain_agent.py @@ -37,10 +37,13 @@ def __init__( self.chain = chain if chain else self.create_chain() def create_chain(self): - model = init_chat_model(model = self.agent_config.model_name, model_provider=self.agent_config.provider, temperature=self.agent_config.temperature, max_tokens=self.agent_config.max_tokens) - messages_for_prompt_template = [ - ("placeholder", "{chat_history}") - ] + model = init_chat_model( + model=self.agent_config.model_name, + model_provider=self.agent_config.provider, + temperature=self.agent_config.temperature, + max_tokens=self.agent_config.max_tokens, + ) + messages_for_prompt_template = [("placeholder", "{chat_history}")] if self.agent_config.prompt_preamble: messages_for_prompt_template.insert(0, ("system", self.agent_config.prompt_preamble)) prompt_template = ChatPromptTemplate.from_messages(messages_for_prompt_template) @@ -55,7 +58,9 @@ async def token_generator( if isinstance(chunk.content, str): yield chunk.content else: - raise ValueError(f"Received unexpected message type {type(chunk)} from Langchain. Expected str.") + raise ValueError( + f"Received unexpected message type {type(chunk)} from Langchain. Expected str." + ) def format_langchain_messages_from_transcript(self) -> list[tuple]: if not self.transcript: @@ -64,14 +69,19 @@ def format_langchain_messages_from_transcript(self) -> list[tuple]: for event_log in self.transcript.event_logs: if isinstance(event_log, Message): messages.append( - ("ai" if event_log.sender == Sender.BOT else "human", event_log.to_string(include_sender=False)) + ( + "ai" if event_log.sender == Sender.BOT else "human", + event_log.to_string(include_sender=False), + ) ) else: - raise ValueError(f"Invalid event log type {type(event_log)}. Langchain currently only supports human and bot messages") - + raise ValueError( + f"Invalid event log type {type(event_log)}. Langchain currently only supports human and bot messages" + ) + if self.agent_config.provider == "anthropic": messages = merge_bot_messages_for_langchain(messages) - + return messages async def generate_response( @@ -91,7 +101,9 @@ async def generate_response( ttft_span = sentry_create_span( sentry_callable=sentry_sdk.start_span, op=CustomSentrySpans.TIME_TO_FIRST_TOKEN ) - stream = self.chain.astream({"chat_history": self.format_langchain_messages_from_transcript()}) + stream = self.chain.astream( + {"chat_history": self.format_langchain_messages_from_transcript()} + ) except Exception as e: logger.error( f"Error while hitting Langchain",