Skip to content

Commit

Permalink
add langchain RAG agent to integrations
Browse files Browse the repository at this point in the history
  • Loading branch information
zmezei committed Feb 15, 2024
1 parent 8257a9e commit f9a7e38
Show file tree
Hide file tree
Showing 8 changed files with 3,210 additions and 0 deletions.
69 changes: 69 additions & 0 deletions integrations/langchain-rag/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Langchain RAG integration

Langchain RAG integration example offers a guide to setting up and using RAG (retrieval augmented generation) technology in a uagent. This example shows how to create a RAG application that can answer question based on a document.

- Python (v3.10+ recommended)
- Poetry (A Python packaging and dependency management tool)

## Setup

1. For the demo to work, you need to get some API keys:

- Visit the [Cohere website](https://dashboard.cohere.com/).
- Sign up or log in.
- Navigate to `API Keys`.
- Copy an existing key or create a new one.

- Visit the [OpenAI website](https://openai.com/).
- Sign up or log in.
- Navigate to the API section to obtain your API key.

Note that if you’ve run out of OpenAI credits, you will not be able to get results for this example.

2. In the `langchain-rag/src` directory, create a `.env` file and set your API keys:

```
export COHERE_API_KEY="{GET THE API KEY}"
export OPENAI_API_KEY="{GET THE API KEY}"
export LANGCHAIN_RAG_SEED="{GET THE API KEY}"
```
3. In the `langchain-rag` directory install all dependencies
```bash
poetry install
```
3. To load the environment variables from `.env:
```bash
cd src
source .env
```
## Running The Main Script
To run the project, use the command:
```
poetry run python main.py
```
After running the command, a request is sent to the agent in every minute. The results can be seen in the console. Look for the following output in the logs:
```
Adding RAG agent to Bureau: {agent_address}
```
Copy the {agent_address} value and replace RAG_AGENT_ADDRESS with this value in src/langchain_rag_user.py.
In the src/langchain_rag_user.py file, there are variables QUESTION, URL, DEEP_READ. Change the value of these variables to customize the question you want to get answered. Default values are:
```
QUESTION = "How to install uagents using pip"
URL = "https://fetch.ai/docs/guides/agents/installing-uagent"
DEEP_READ = "no" # it means nested pages at the URL won't be parsed, just the actual URL
```
Now you can enjoy answering questions with Langchan RAG agent!
2,913 changes: 2,913 additions & 0 deletions integrations/langchain-rag/poetry.lock

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions integrations/langchain-rag/project.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"title": "Langchain RAG",
"description": "Langchain RAG. RAG (retrieval augmentred generation) enables developers to improve the quality of LLM-generated responses by grounding the model on external sources of knowledge.",
"categories": ["Text Generation", "LLM", "OpenAI", "RAG"]
}
19 changes: 19 additions & 0 deletions integrations/langchain-rag/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[tool.poetry]
name = "langchain-rag"
version = "0.1.0"
description = "langchain-rag-uagent-integration"
authors = ["zmezei <[email protected]>"]

[tool.poetry.dependencies]
python = ">=3.10,<3.12"
uagents = "*"
requests = "^2.31.0"
langchain = "^0.1.7"
openai = "^1.12.0"
langchain-openai = "^0.0.6"
tiktoken = "^0.6.0"
cohere = "^4.47"
faiss-cpu = "^1.7.4"
validators = "^0.22.0"
uagents-ai-engine = "^0.1.2"
unstructured = "^0.12.4"
137 changes: 137 additions & 0 deletions integrations/langchain-rag/src/agents/langchain_rag_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import traceback
from uagents import Agent, Context, Protocol
import validators
from messages.requests import RagRequest
import os
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain_community.document_loaders import UnstructuredURLLoader
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CohereRerank
from ai_engine import UAgentResponse, UAgentResponseType
import nltk

nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")


LANGCHAIN_RAG_SEED = os.getenv("LANGCHAIN_RAG_SEED", "")
assert (
LANGCHAIN_RAG_SEED
), "LANGCHAIN_RAG_SEED environment variable is missing from .env"

agent = Agent(
name="langchain_rag_agent",
seed=LANGCHAIN_RAG_SEED,
port=8001,
endpoint=["http://127.0.0.1:8001/submit"],
)

docs_bot_protocol = Protocol("DocsBot")


PROMPT_TEMPLATE = """
Answer the question based only on the following context:
{context}
---
Answer the question based on the above context: {question}
"""


def create_retriever(
ctx: Context, url: str, deep_read: bool
) -> ContextualCompressionRetriever:
def scrape(site: str):
if not validators.url(site):
ctx.logger.info(f"Url {site} is not valid")
return

r = requests.get(site)
soup = BeautifulSoup(r.text, "html.parser")

parsed_url = urlparse(url)
base_domain = parsed_url.scheme + "://" + parsed_url.netloc

link_array = soup.find_all("a")
for link in link_array:
href: str = link.get("href", "")
if len(href) == 0:
continue
current_site = f"{base_domain}{href}" if href.startswith("/") else href
if (
".php" in current_site
or "#" in current_site
or not current_site.startswith(url)
or current_site in urls
):
continue
urls.append(current_site)
scrape(current_site)

urls = [url]
if deep_read:
scrape(url)
ctx.logger.info(f"After deep scraping - urls to parse: {urls}")

try:
loader = UnstructuredURLLoader(urls=urls)
docs = loader.load_and_split()
db = FAISS.from_documents(docs, OpenAIEmbeddings())
compression_retriever = ContextualCompressionRetriever(
base_compressor=CohereRerank(), base_retriever=db.as_retriever()
)
return compression_retriever
except Exception as exc:
ctx.logger.error(f"Error happened: {exc}")
traceback.format_exception(exc)


@docs_bot_protocol.on_message(model=RagRequest, replies={UAgentResponse})
async def answer_question(ctx: Context, sender: str, msg: RagRequest):
ctx.logger.info(f"Received message from {sender}, session: {ctx.session}")
ctx.logger.info(
f"input url: {msg.url}, question: {msg.question}, is deep scraping: {msg.deep_read}"
)

parsed_url = urlparse(msg.url)
if not parsed_url.scheme or not parsed_url.netloc:
ctx.logger.error("invalid input url")
await ctx.send(
sender,
UAgentResponse(
message="Input url is not valid",
type=UAgentResponseType.FINAL,
),
)
return
base_domain = parsed_url.scheme + "://" + parsed_url.netloc
ctx.logger.info(f"Base domain: {base_domain}")

retriever = create_retriever(ctx, url=msg.url, deep_read=msg.deep_read == "yes")

compressed_docs = retriever.get_relevant_documents(msg.question)
context_text = "\n\n---\n\n".join([doc.page_content for doc in compressed_docs])
prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = prompt_template.format(context=context_text, question=msg.question)

model = ChatOpenAI(model="gpt-3.5-turbo-1106")
response = model.predict(prompt)
ctx.logger.info(f"Response: {response}")
await ctx.send(
sender, UAgentResponse(message=response, type=UAgentResponseType.FINAL)
)


agent.include(docs_bot_protocol, publish_manifest=True)


if __name__ == "__main__":
agent.run()
41 changes: 41 additions & 0 deletions integrations/langchain-rag/src/agents/langchain_rag_user.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from uagents import Agent, Context, Protocol
from messages.requests import RagRequest
from ai_engine import UAgentResponse


QUESTION = "How to install uagents using pip"
URL = "https://fetch.ai/docs/guides/agents/installing-uagent"
DEEP_READ = (
"no" # it means nested pages at the URL won't be parsed, just the actual URL
)

RAG_AGENT_ADDRESS = "agent1q0yu4450vryngsxv6un8t5x8hwrprkznay2f49a5y4384jn0tgxj62jf3h8"

user = Agent(
name="langchain_rag_user",
port=8000,
endpoint=["http://127.0.0.1:8000/submit"],
)

langchain_rag_user = Protocol("Langchain RAG user")


@langchain_rag_user.on_interval(60, messages=RagRequest)
async def ask_question(ctx: Context):
ctx.logger.info(
f"Asking RAG agent to answer {QUESTION} based on document located at {URL}, readin nested pages too: {DEEP_READ}"
)
await ctx.send(
RAG_AGENT_ADDRESS, RagRequest(question=QUESTION, url=URL, deep_read=DEEP_READ)
)


@langchain_rag_user.on_message(model=UAgentResponse)
async def handle_data(ctx: Context, sender: str, data: UAgentResponse):
ctx.logger.info(f"Got response from RAG agent: {data.message}")


user.include(langchain_rag_user)

if __name__ == "__main__":
langchain_rag_user.run()
12 changes: 12 additions & 0 deletions integrations/langchain-rag/src/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from uagents import Bureau
from agents.langchain_rag_agent import agent
from agents.langchain_rag_user import user


if __name__ == "__main__":
bureau = Bureau(endpoint="http://127.0.0.1:8000/submit", port=8000)
print(f"Adding RAG agent to Bureau: {agent.address}")
bureau.add(agent)
print(f"Adding user agent to Bureau: {user.address}")
bureau.add(user)
bureau.run()
14 changes: 14 additions & 0 deletions integrations/langchain-rag/src/messages/requests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from typing import Optional
from pydantic import Field
from uagents import Model


class RagRequest(Model):
question: str = Field(
description="The question that the user wants to have an answer for."
)
url: str = Field(description="The url of the docs where the answer is.")
deep_read: Optional[str] = Field(
description="Specifies weather all nested pages referenced from the starting URL should be read or not. The value should be yes or no.",
default="no",
)

0 comments on commit f9a7e38

Please sign in to comment.