Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(integration): multilingual-agent #294

Merged
merged 1 commit into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,899 changes: 1,899 additions & 0 deletions integrations/multilingual-agent/poetry.lock

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions integrations/multilingual-agent/project.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"title": "Multilingual-agent",
"description": "Agent with multilingual capabilities to summarize a youtube video, translate the summary to a different language, and generate a text response in the target language.",
"categories": ["Text Summarization", "Hugging Face"],
"deltav": false
}
18 changes: 18 additions & 0 deletions integrations/multilingual-agent/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[tool.poetry]
name = "multi-lingual-video-agent"
version = "0.1.0"
description = "Agent that can summarize a video in different languages from english"
authors = ["Lenin Kennedy <[email protected]>", "Samuela Abigail Mathew <[email protected]>"]
license = "MIT License"
readme = "README.md"

[tool.poetry.dependencies]
python = ">=3.10,<3.12"
uagents = "0.11.0"
youtube-transcript-api = "^0.6.2"
langchain = "^0.1.12"


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Necessary imports: uagents for agent creation and message handling,
# os and requests for managing API calls
from uagents import Agent, Context, Protocol, Model
from pydantic import Field
from ai_engine import UAgentResponse, UAgentResponseType
from messages.basic import UAResponse, UARequest, Error
from uagents.setup import fund_agent_if_low
import os
import requests
from utils.functions import get_video_script, summarize_transcript

# The access token and URL for the SAMSUM BART model, served by Hugging Face
HUGGING_FACE_ACCESS_TOKEN = os.getenv(
"HUGGING_FACE_ACCESS_TOKEN", "HUGGING FACE secret phrase :)")
SAMSUM_BART_URL = "https://api-inference.huggingface.co/models/Samuela39/my-samsum-model"

# Setting the headers for the API call
HEADERS = {
"Authorization": f"Bearer {HUGGING_FACE_ACCESS_TOKEN}"
}

SEED=HUGGING_FACE_ACCESS_TOKEN


# Copy the address shown below
print(f"Your agent's address is: {Agent(seed=SEED).address}")


# Then go to https://agentverse.ai, register your agent in the Mailroom
# and copy the agent's mailbox key
AGENT_MAILBOX_KEY = os.getenv("AGENT_MAILBOX_KEY", None)
assert (
AGENT_MAILBOX_KEY is not None
), "AGENT_MAILBOX_KEY environment variable is not set! Please set it to your agent's mailbox key!"

# Now your agent is ready to join the agentverse!
agent = Agent(
name="multilingual-agent",
seed=SEED,
mailbox=f"{AGENT_MAILBOX_KEY}@https://agentverse.ai",
)

# # Creating the agent and funding it if necessary
# agent = Agent(
# name="multilingual-agent",
# seed=SEED,
# port=8001,
# endpoint=["http://127.0.0.1:8001/submit"],
# )
# fund_agent_if_low(agent.wallet.address())

class UAResponse(Model):
response: list

class SummarizationRequest(Model):
url: str = Field(description="URL of the video you want to summarize")

# Protocol declaration for UARequests
multilingual_agent = Protocol("summary-request","0.1.0")

# Declaration of a message event handler for handling UARequests and send respective response.
@multilingual_agent.on_message(model=SummarizationRequest, replies={UAResponse, Error})
async def handle_request(ctx: Context, sender: str, msg: SummarizationRequest):
# Logging the request information
ctx.logger.info(
f"Got request from {sender} for summarization : {msg.url}")

try:
transcript = get_video_script(msg.video_link)
except Exception as e:
await ctx.send(
sender,
UAgentResponse(
message=f"No transcript found for video! Error: {e}",
type=UAgentResponseType.ERROR,
),
)
return

summary = await summarize_transcript(transcript, msg.model)

print("Sending short article")
await ctx.send(
sender,
UAgentResponse(
message=f"Summary:\n{summary}",
type=UAgentResponseType.FINAL,
),
)
return


# Include protocol to the agent
agent.include(multilingual_agent, publish_manifest=True)

if __name__ == "__main__":
agent.run()
Empty file.
13 changes: 13 additions & 0 deletions integrations/multilingual-agent/src/messages/basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from uagents import Model


class UARequest(Model):
text: str


class Error(Model):
error: str


class UAResponse(Model):
response: list
Empty file.
91 changes: 91 additions & 0 deletions integrations/multilingual-agent/src/utils/functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from urllib.parse import urlparse, parse_qs
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
import requests

# The access token and URL for the SAMSUM BART model, served by Hugging Face
HUGGING_FACE_ACCESS_TOKEN = os.getenv(
"HUGGING_FACE_ACCESS_TOKEN", "HUGGING FACE secret phrase :)")
SAMSUM_BART_URL = "https://api-inference.huggingface.co/models/Samuela39/my-samsum-model"

# Setting the headers for the API call
HEADERS = {
"Authorization": f"Bearer {HUGGING_FACE_ACCESS_TOKEN}"
}


def get_video_script(url: str) -> list:
"""
Get the script of a YouTube video by its URL and return it as a list of strings
"""

if not url:
raise ValueError("YouTube video url is required")

video_url = urlparse("https://www.youtube.com/watch?v=1c9iyoVIwDs")
video_query = parse_qs(video_url.query)

video_id = None

if "v" in video_query:
video_id = video_query["v"][0]
else:
raise ValueError("Invalid YouTube video url")

if not video_id:
raise ValueError("Invalid YouTube video url")

video_script = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])

video_text = ""

for segment in video_script:
video_text += segment['text'] + " "

return [segment['text'] for segment in video_script if segment["text"]]

def chunk_text(text: list, chunk_size: int = 1000, chunk_overlap: int = 100) -> list:
"""
Split a list of strings into chunks of strings
"""
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
length_function=len,
)

texts = text_splitter.create_documents(text)

return texts

def get_summarization(text: str) -> str:
"""
Summarize a string
"""
data = {
"inputs": text
}

response = requests.post(SAMSUM_BART_URL, headers=HEADERS, json=data)
model_res = response.json()[0]
summary = model_res['summary_text']
return summary

def summarize_transcript(text: list, chunk_size: int = 1000, chunk_overlap: int = 100) -> list:
"""
Summarize a list of strings
"""
chunks = chunk_text(text, chunk_size, chunk_overlap)

summarized_chunks = []

for chunk in chunks:
summarized_text = get_summarization(chunk)
summarized_chunks.append(summarized_text)

summarized_text =''
for summary in summarized_chunks:
summarized_text += get_summarization(summary)

return summarized_text
Loading