Skip to content

Commit

Permalink
feat(integration): multilingual-agent (#294)
Browse files Browse the repository at this point in the history
  • Loading branch information
lem0n4id authored Apr 16, 2024
1 parent c61819b commit 3c6b163
Show file tree
Hide file tree
Showing 9 changed files with 2,124 additions and 0 deletions.
1,899 changes: 1,899 additions & 0 deletions integrations/multilingual-agent/poetry.lock

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions integrations/multilingual-agent/project.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"title": "Multilingual-agent",
"description": "Agent with multilingual capabilities to summarize a youtube video, translate the summary to a different language, and generate a text response in the target language.",
"categories": ["Text Summarization", "Hugging Face"],
"deltav": false
}
18 changes: 18 additions & 0 deletions integrations/multilingual-agent/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[tool.poetry]
name = "multi-lingual-video-agent"
version = "0.1.0"
description = "Agent that can summarize a video in different languages from english"
authors = ["Lenin Kennedy <[email protected]>", "Samuela Abigail Mathew <[email protected]>"]
license = "MIT License"
readme = "README.md"

[tool.poetry.dependencies]
python = ">=3.10,<3.12"
uagents = "0.11.0"
youtube-transcript-api = "^0.6.2"
langchain = "^0.1.12"


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
Empty file.
97 changes: 97 additions & 0 deletions integrations/multilingual-agent/src/agents/multilingual_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Necessary imports: uagents for agent creation and message handling,
# os and requests for managing API calls
from uagents import Agent, Context, Protocol, Model
from pydantic import Field
from ai_engine import UAgentResponse, UAgentResponseType
from messages.basic import UAResponse, UARequest, Error
from uagents.setup import fund_agent_if_low
import os
import requests
from utils.functions import get_video_script, summarize_transcript

# The access token and URL for the SAMSUM BART model, served by Hugging Face
HUGGING_FACE_ACCESS_TOKEN = os.getenv(
"HUGGING_FACE_ACCESS_TOKEN", "HUGGING FACE secret phrase :)")
SAMSUM_BART_URL = "https://api-inference.huggingface.co/models/Samuela39/my-samsum-model"

# Setting the headers for the API call
HEADERS = {
"Authorization": f"Bearer {HUGGING_FACE_ACCESS_TOKEN}"
}

SEED=HUGGING_FACE_ACCESS_TOKEN


# Copy the address shown below
print(f"Your agent's address is: {Agent(seed=SEED).address}")


# Then go to https://agentverse.ai, register your agent in the Mailroom
# and copy the agent's mailbox key
AGENT_MAILBOX_KEY = os.getenv("AGENT_MAILBOX_KEY", None)
assert (
AGENT_MAILBOX_KEY is not None
), "AGENT_MAILBOX_KEY environment variable is not set! Please set it to your agent's mailbox key!"

# Now your agent is ready to join the agentverse!
agent = Agent(
name="multilingual-agent",
seed=SEED,
mailbox=f"{AGENT_MAILBOX_KEY}@https://agentverse.ai",
)

# # Creating the agent and funding it if necessary
# agent = Agent(
# name="multilingual-agent",
# seed=SEED,
# port=8001,
# endpoint=["http://127.0.0.1:8001/submit"],
# )
# fund_agent_if_low(agent.wallet.address())

class UAResponse(Model):
response: list

class SummarizationRequest(Model):
url: str = Field(description="URL of the video you want to summarize")

# Protocol declaration for UARequests
multilingual_agent = Protocol("summary-request","0.1.0")

# Declaration of a message event handler for handling UARequests and send respective response.
@multilingual_agent.on_message(model=SummarizationRequest, replies={UAResponse, Error})
async def handle_request(ctx: Context, sender: str, msg: SummarizationRequest):
# Logging the request information
ctx.logger.info(
f"Got request from {sender} for summarization : {msg.url}")

try:
transcript = get_video_script(msg.video_link)
except Exception as e:
await ctx.send(
sender,
UAgentResponse(
message=f"No transcript found for video! Error: {e}",
type=UAgentResponseType.ERROR,
),
)
return

summary = await summarize_transcript(transcript, msg.model)

print("Sending short article")
await ctx.send(
sender,
UAgentResponse(
message=f"Summary:\n{summary}",
type=UAgentResponseType.FINAL,
),
)
return


# Include protocol to the agent
agent.include(multilingual_agent, publish_manifest=True)

if __name__ == "__main__":
agent.run()
Empty file.
13 changes: 13 additions & 0 deletions integrations/multilingual-agent/src/messages/basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from uagents import Model


class UARequest(Model):
text: str


class Error(Model):
error: str


class UAResponse(Model):
response: list
Empty file.
91 changes: 91 additions & 0 deletions integrations/multilingual-agent/src/utils/functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from urllib.parse import urlparse, parse_qs
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
import requests

# The access token and URL for the SAMSUM BART model, served by Hugging Face
HUGGING_FACE_ACCESS_TOKEN = os.getenv(
"HUGGING_FACE_ACCESS_TOKEN", "HUGGING FACE secret phrase :)")
SAMSUM_BART_URL = "https://api-inference.huggingface.co/models/Samuela39/my-samsum-model"

# Setting the headers for the API call
HEADERS = {
"Authorization": f"Bearer {HUGGING_FACE_ACCESS_TOKEN}"
}


def get_video_script(url: str) -> list:
"""
Get the script of a YouTube video by its URL and return it as a list of strings
"""

if not url:
raise ValueError("YouTube video url is required")

video_url = urlparse("https://www.youtube.com/watch?v=1c9iyoVIwDs")
video_query = parse_qs(video_url.query)

video_id = None

if "v" in video_query:
video_id = video_query["v"][0]
else:
raise ValueError("Invalid YouTube video url")

if not video_id:
raise ValueError("Invalid YouTube video url")

video_script = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])

video_text = ""

for segment in video_script:
video_text += segment['text'] + " "

return [segment['text'] for segment in video_script if segment["text"]]

def chunk_text(text: list, chunk_size: int = 1000, chunk_overlap: int = 100) -> list:
"""
Split a list of strings into chunks of strings
"""
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
length_function=len,
)

texts = text_splitter.create_documents(text)

return texts

def get_summarization(text: str) -> str:
"""
Summarize a string
"""
data = {
"inputs": text
}

response = requests.post(SAMSUM_BART_URL, headers=HEADERS, json=data)
model_res = response.json()[0]
summary = model_res['summary_text']
return summary

def summarize_transcript(text: list, chunk_size: int = 1000, chunk_overlap: int = 100) -> list:
"""
Summarize a list of strings
"""
chunks = chunk_text(text, chunk_size, chunk_overlap)

summarized_chunks = []

for chunk in chunks:
summarized_text = get_summarization(chunk)
summarized_chunks.append(summarized_text)

summarized_text =''
for summary in summarized_chunks:
summarized_text += get_summarization(summary)

return summarized_text

0 comments on commit 3c6b163

Please sign in to comment.