add langchain RAG agent to integrations

fetchai · Feb 15, 2024 · f9a7e38 · f9a7e38
1 parent 8257a9e
commit f9a7e38
Show file tree

Hide file tree

Showing 8 changed files with 3,210 additions and 0 deletions.
diff --git a/integrations/langchain-rag/README.md b/integrations/langchain-rag/README.md
@@ -0,0 +1,69 @@
+# Langchain RAG integration
+
+Langchain RAG integration example offers a guide to setting up and using RAG (retrieval augmented generation) technology in a uagent. This example shows how to create a RAG application that can answer question based on a document.
+
+- Python (v3.10+ recommended)
+- Poetry (A Python packaging and dependency management tool)
+
+## Setup
+
+1. For the demo to work, you need to get some API keys:
+
+    - Visit the [Cohere website](https://dashboard.cohere.com/).
+    - Sign up or log in.
+    - Navigate to `API Keys`.
+    - Copy an existing key or create a new one.
+
+    - Visit the [OpenAI website](https://openai.com/).
+    - Sign up or log in.
+    - Navigate to the API section to obtain your API key.
+
+    Note that if you’ve run out of OpenAI credits, you will not be able to get results for this example.
+
+2. In the `langchain-rag/src` directory, create a `.env` file and set your API keys:
+
+    ```
+    export COHERE_API_KEY="{GET THE API KEY}"
+    export OPENAI_API_KEY="{GET THE API KEY}"
+    export LANGCHAIN_RAG_SEED="{GET THE API KEY}"
+    ```
+
+3. In the `langchain-rag` directory install all dependencies
+
+    ```bash
+    poetry install
+    ```
+
+3. To load the environment variables from `.env:
+
+    ```bash
+    cd src
+    source .env
+    ```
+
+## Running The Main Script
+
+To run the project, use the command:
+
+```
+poetry run python main.py
+```
+
+
+After running the command, a request is sent to the agent in every minute. The results can be seen in the console. Look for the following output in the logs:
+
+```
+Adding RAG agent to Bureau: {agent_address}
+```
+
+Copy the {agent_address} value and replace RAG_AGENT_ADDRESS with this value in src/langchain_rag_user.py.
+
+In the src/langchain_rag_user.py file, there are variables QUESTION, URL, DEEP_READ. Change the value of these variables to customize the question you want to get answered. Default values are:
+
+```
+QUESTION = "How to install uagents using pip"
+URL = "https://fetch.ai/docs/guides/agents/installing-uagent"
+DEEP_READ = "no"  # it means nested pages at the URL won't be parsed, just the actual URL
+```
+
+Now you can enjoy answering questions with Langchan RAG agent!
diff --git a/integrations/langchain-rag/poetry.lock b/integrations/langchain-rag/poetry.lock
diff --git a/integrations/langchain-rag/project.json b/integrations/langchain-rag/project.json
@@ -0,0 +1,5 @@
+{
+    "title": "Langchain RAG",
+    "description": "Langchain RAG. RAG (retrieval augmentred generation) enables developers to improve the quality of LLM-generated responses by grounding the model on external sources of knowledge.",
+    "categories":   ["Text Generation", "LLM", "OpenAI", "RAG"]
+}
diff --git a/integrations/langchain-rag/pyproject.toml b/integrations/langchain-rag/pyproject.toml
@@ -0,0 +1,19 @@
+[tool.poetry]
+name = "langchain-rag"
+version = "0.1.0"
+description = "langchain-rag-uagent-integration"
+authors = ["zmezei <[email protected]>"]
+
+[tool.poetry.dependencies]
+python = ">=3.10,<3.12"
+uagents = "*"
+requests = "^2.31.0"
+langchain = "^0.1.7"
+openai = "^1.12.0"
+langchain-openai = "^0.0.6"
+tiktoken = "^0.6.0"
+cohere = "^4.47"
+faiss-cpu = "^1.7.4"
+validators = "^0.22.0"
+uagents-ai-engine = "^0.1.2"
+unstructured = "^0.12.4"
diff --git a/integrations/langchain-rag/src/agents/langchain_rag_agent.py b/integrations/langchain-rag/src/agents/langchain_rag_agent.py
@@ -0,0 +1,137 @@
+import traceback
+from uagents import Agent, Context, Protocol
+import validators
+from messages.requests import RagRequest
+import os
+from langchain_openai import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+from langchain_community.document_loaders import UnstructuredURLLoader
+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urlparse
+from langchain_openai import OpenAIEmbeddings
+from langchain_community.vectorstores import FAISS
+from langchain.retrievers import ContextualCompressionRetriever
+from langchain.retrievers.document_compressors import CohereRerank
+from ai_engine import UAgentResponse, UAgentResponseType
+import nltk
+
+nltk.download("punkt")
+nltk.download("averaged_perceptron_tagger")
+
+
+LANGCHAIN_RAG_SEED = os.getenv("LANGCHAIN_RAG_SEED", "")
+assert (
+    LANGCHAIN_RAG_SEED
+), "LANGCHAIN_RAG_SEED environment variable is missing from .env"
+
+agent = Agent(
+    name="langchain_rag_agent",
+    seed=LANGCHAIN_RAG_SEED,
+    port=8001,
+    endpoint=["http://127.0.0.1:8001/submit"],
+)
+
+docs_bot_protocol = Protocol("DocsBot")
+
+
+PROMPT_TEMPLATE = """
+Answer the question based only on the following context:
+
+{context}
+
+---
+
+Answer the question based on the above context: {question}
+"""
+
+
+def create_retriever(
+    ctx: Context, url: str, deep_read: bool
+) -> ContextualCompressionRetriever:
+    def scrape(site: str):
+        if not validators.url(site):
+            ctx.logger.info(f"Url {site} is not valid")
+            return
+
+        r = requests.get(site)
+        soup = BeautifulSoup(r.text, "html.parser")
+
+        parsed_url = urlparse(url)
+        base_domain = parsed_url.scheme + "://" + parsed_url.netloc
+
+        link_array = soup.find_all("a")
+        for link in link_array:
+            href: str = link.get("href", "")
+            if len(href) == 0:
+                continue
+            current_site = f"{base_domain}{href}" if href.startswith("/") else href
+            if (
+                ".php" in current_site
+                or "#" in current_site
+                or not current_site.startswith(url)
+                or current_site in urls
+            ):
+                continue
+            urls.append(current_site)
+            scrape(current_site)
+
+    urls = [url]
+    if deep_read:
+        scrape(url)
+        ctx.logger.info(f"After deep scraping - urls to parse: {urls}")
+
+    try:
+        loader = UnstructuredURLLoader(urls=urls)
+        docs = loader.load_and_split()
+        db = FAISS.from_documents(docs, OpenAIEmbeddings())
+        compression_retriever = ContextualCompressionRetriever(
+            base_compressor=CohereRerank(), base_retriever=db.as_retriever()
+        )
+        return compression_retriever
+    except Exception as exc:
+        ctx.logger.error(f"Error happened: {exc}")
+        traceback.format_exception(exc)
+
+
+@docs_bot_protocol.on_message(model=RagRequest, replies={UAgentResponse})
+async def answer_question(ctx: Context, sender: str, msg: RagRequest):
+    ctx.logger.info(f"Received message from {sender}, session: {ctx.session}")
+    ctx.logger.info(
+        f"input url: {msg.url}, question: {msg.question}, is deep scraping: {msg.deep_read}"
+    )
+
+    parsed_url = urlparse(msg.url)
+    if not parsed_url.scheme or not parsed_url.netloc:
+        ctx.logger.error("invalid input url")
+        await ctx.send(
+            sender,
+            UAgentResponse(
+                message="Input url is not valid",
+                type=UAgentResponseType.FINAL,
+            ),
+        )
+        return
+    base_domain = parsed_url.scheme + "://" + parsed_url.netloc
+    ctx.logger.info(f"Base domain: {base_domain}")
+
+    retriever = create_retriever(ctx, url=msg.url, deep_read=msg.deep_read == "yes")
+
+    compressed_docs = retriever.get_relevant_documents(msg.question)
+    context_text = "\n\n---\n\n".join([doc.page_content for doc in compressed_docs])
+    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
+    prompt = prompt_template.format(context=context_text, question=msg.question)
+
+    model = ChatOpenAI(model="gpt-3.5-turbo-1106")
+    response = model.predict(prompt)
+    ctx.logger.info(f"Response: {response}")
+    await ctx.send(
+        sender, UAgentResponse(message=response, type=UAgentResponseType.FINAL)
+    )
+
+
+agent.include(docs_bot_protocol, publish_manifest=True)
+
+
+if __name__ == "__main__":
+    agent.run()
diff --git a/integrations/langchain-rag/src/agents/langchain_rag_user.py b/integrations/langchain-rag/src/agents/langchain_rag_user.py
@@ -0,0 +1,41 @@
+from uagents import Agent, Context, Protocol
+from messages.requests import RagRequest
+from ai_engine import UAgentResponse
+
+
+QUESTION = "How to install uagents using pip"
+URL = "https://fetch.ai/docs/guides/agents/installing-uagent"
+DEEP_READ = (
+    "no"  # it means nested pages at the URL won't be parsed, just the actual URL
+)
+
+RAG_AGENT_ADDRESS = "agent1q0yu4450vryngsxv6un8t5x8hwrprkznay2f49a5y4384jn0tgxj62jf3h8"
+
+user = Agent(
+    name="langchain_rag_user",
+    port=8000,
+    endpoint=["http://127.0.0.1:8000/submit"],
+)
+
+langchain_rag_user = Protocol("Langchain RAG user")
+
+
+@langchain_rag_user.on_interval(60, messages=RagRequest)
+async def ask_question(ctx: Context):
+    ctx.logger.info(
+        f"Asking RAG agent to answer {QUESTION} based on document located at {URL}, readin nested pages too: {DEEP_READ}"
+    )
+    await ctx.send(
+        RAG_AGENT_ADDRESS, RagRequest(question=QUESTION, url=URL, deep_read=DEEP_READ)
+    )
+
+
+@langchain_rag_user.on_message(model=UAgentResponse)
+async def handle_data(ctx: Context, sender: str, data: UAgentResponse):
+    ctx.logger.info(f"Got response from RAG agent: {data.message}")
+
+
+user.include(langchain_rag_user)
+
+if __name__ == "__main__":
+    langchain_rag_user.run()
diff --git a/integrations/langchain-rag/src/main.py b/integrations/langchain-rag/src/main.py
@@ -0,0 +1,12 @@
+from uagents import Bureau
+from agents.langchain_rag_agent import agent
+from agents.langchain_rag_user import user
+
+
+if __name__ == "__main__":
+    bureau = Bureau(endpoint="http://127.0.0.1:8000/submit", port=8000)
+    print(f"Adding RAG agent to Bureau: {agent.address}")
+    bureau.add(agent)
+    print(f"Adding user agent to Bureau: {user.address}")
+    bureau.add(user)
+    bureau.run()
diff --git a/integrations/langchain-rag/src/messages/requests.py b/integrations/langchain-rag/src/messages/requests.py
@@ -0,0 +1,14 @@
+from typing import Optional
+from pydantic import Field
+from uagents import Model
+
+
+class RagRequest(Model):
+    question: str = Field(
+        description="The question that the user wants to have an answer for."
+    )
+    url: str = Field(description="The url of the docs where the answer is.")
+    deep_read: Optional[str] = Field(
+        description="Specifies weather all nested pages referenced from the starting URL should be read or not. The value should be yes or no.",
+        default="no",
+    )