-
Notifications
You must be signed in to change notification settings - Fork 257
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
2,124 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
{ | ||
"title": "Multilingual-agent", | ||
"description": "Agent with multilingual capabilities to summarize a youtube video, translate the summary to a different language, and generate a text response in the target language.", | ||
"categories": ["Text Summarization", "Hugging Face"], | ||
"deltav": false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
[tool.poetry] | ||
name = "multi-lingual-video-agent" | ||
version = "0.1.0" | ||
description = "Agent that can summarize a video in different languages from english" | ||
authors = ["Lenin Kennedy <[email protected]>", "Samuela Abigail Mathew <[email protected]>"] | ||
license = "MIT License" | ||
readme = "README.md" | ||
|
||
[tool.poetry.dependencies] | ||
python = ">=3.10,<3.12" | ||
uagents = "0.11.0" | ||
youtube-transcript-api = "^0.6.2" | ||
langchain = "^0.1.12" | ||
|
||
|
||
[build-system] | ||
requires = ["poetry-core"] | ||
build-backend = "poetry.core.masonry.api" |
Empty file.
97 changes: 97 additions & 0 deletions
97
integrations/multilingual-agent/src/agents/multilingual_agent.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
# Necessary imports: uagents for agent creation and message handling, | ||
# os and requests for managing API calls | ||
from uagents import Agent, Context, Protocol, Model | ||
from pydantic import Field | ||
from ai_engine import UAgentResponse, UAgentResponseType | ||
from messages.basic import UAResponse, UARequest, Error | ||
from uagents.setup import fund_agent_if_low | ||
import os | ||
import requests | ||
from utils.functions import get_video_script, summarize_transcript | ||
|
||
# The access token and URL for the SAMSUM BART model, served by Hugging Face | ||
HUGGING_FACE_ACCESS_TOKEN = os.getenv( | ||
"HUGGING_FACE_ACCESS_TOKEN", "HUGGING FACE secret phrase :)") | ||
SAMSUM_BART_URL = "https://api-inference.huggingface.co/models/Samuela39/my-samsum-model" | ||
|
||
# Setting the headers for the API call | ||
HEADERS = { | ||
"Authorization": f"Bearer {HUGGING_FACE_ACCESS_TOKEN}" | ||
} | ||
|
||
SEED=HUGGING_FACE_ACCESS_TOKEN | ||
|
||
|
||
# Copy the address shown below | ||
print(f"Your agent's address is: {Agent(seed=SEED).address}") | ||
|
||
|
||
# Then go to https://agentverse.ai, register your agent in the Mailroom | ||
# and copy the agent's mailbox key | ||
AGENT_MAILBOX_KEY = os.getenv("AGENT_MAILBOX_KEY", None) | ||
assert ( | ||
AGENT_MAILBOX_KEY is not None | ||
), "AGENT_MAILBOX_KEY environment variable is not set! Please set it to your agent's mailbox key!" | ||
|
||
# Now your agent is ready to join the agentverse! | ||
agent = Agent( | ||
name="multilingual-agent", | ||
seed=SEED, | ||
mailbox=f"{AGENT_MAILBOX_KEY}@https://agentverse.ai", | ||
) | ||
|
||
# # Creating the agent and funding it if necessary | ||
# agent = Agent( | ||
# name="multilingual-agent", | ||
# seed=SEED, | ||
# port=8001, | ||
# endpoint=["http://127.0.0.1:8001/submit"], | ||
# ) | ||
# fund_agent_if_low(agent.wallet.address()) | ||
|
||
class UAResponse(Model): | ||
response: list | ||
|
||
class SummarizationRequest(Model): | ||
url: str = Field(description="URL of the video you want to summarize") | ||
|
||
# Protocol declaration for UARequests | ||
multilingual_agent = Protocol("summary-request","0.1.0") | ||
|
||
# Declaration of a message event handler for handling UARequests and send respective response. | ||
@multilingual_agent.on_message(model=SummarizationRequest, replies={UAResponse, Error}) | ||
async def handle_request(ctx: Context, sender: str, msg: SummarizationRequest): | ||
# Logging the request information | ||
ctx.logger.info( | ||
f"Got request from {sender} for summarization : {msg.url}") | ||
|
||
try: | ||
transcript = get_video_script(msg.video_link) | ||
except Exception as e: | ||
await ctx.send( | ||
sender, | ||
UAgentResponse( | ||
message=f"No transcript found for video! Error: {e}", | ||
type=UAgentResponseType.ERROR, | ||
), | ||
) | ||
return | ||
|
||
summary = await summarize_transcript(transcript, msg.model) | ||
|
||
print("Sending short article") | ||
await ctx.send( | ||
sender, | ||
UAgentResponse( | ||
message=f"Summary:\n{summary}", | ||
type=UAgentResponseType.FINAL, | ||
), | ||
) | ||
return | ||
|
||
|
||
# Include protocol to the agent | ||
agent.include(multilingual_agent, publish_manifest=True) | ||
|
||
if __name__ == "__main__": | ||
agent.run() |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from uagents import Model | ||
|
||
|
||
class UARequest(Model): | ||
text: str | ||
|
||
|
||
class Error(Model): | ||
error: str | ||
|
||
|
||
class UAResponse(Model): | ||
response: list |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
from urllib.parse import urlparse, parse_qs | ||
from youtube_transcript_api import YouTubeTranscriptApi | ||
from langchain_text_splitters import RecursiveCharacterTextSplitter | ||
import os | ||
import requests | ||
|
||
# The access token and URL for the SAMSUM BART model, served by Hugging Face | ||
HUGGING_FACE_ACCESS_TOKEN = os.getenv( | ||
"HUGGING_FACE_ACCESS_TOKEN", "HUGGING FACE secret phrase :)") | ||
SAMSUM_BART_URL = "https://api-inference.huggingface.co/models/Samuela39/my-samsum-model" | ||
|
||
# Setting the headers for the API call | ||
HEADERS = { | ||
"Authorization": f"Bearer {HUGGING_FACE_ACCESS_TOKEN}" | ||
} | ||
|
||
|
||
def get_video_script(url: str) -> list: | ||
""" | ||
Get the script of a YouTube video by its URL and return it as a list of strings | ||
""" | ||
|
||
if not url: | ||
raise ValueError("YouTube video url is required") | ||
|
||
video_url = urlparse("https://www.youtube.com/watch?v=1c9iyoVIwDs") | ||
video_query = parse_qs(video_url.query) | ||
|
||
video_id = None | ||
|
||
if "v" in video_query: | ||
video_id = video_query["v"][0] | ||
else: | ||
raise ValueError("Invalid YouTube video url") | ||
|
||
if not video_id: | ||
raise ValueError("Invalid YouTube video url") | ||
|
||
video_script = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"]) | ||
|
||
video_text = "" | ||
|
||
for segment in video_script: | ||
video_text += segment['text'] + " " | ||
|
||
return [segment['text'] for segment in video_script if segment["text"]] | ||
|
||
def chunk_text(text: list, chunk_size: int = 1000, chunk_overlap: int = 100) -> list: | ||
""" | ||
Split a list of strings into chunks of strings | ||
""" | ||
text_splitter = RecursiveCharacterTextSplitter( | ||
chunk_size=chunk_size, | ||
chunk_overlap=chunk_overlap, | ||
length_function=len, | ||
) | ||
|
||
texts = text_splitter.create_documents(text) | ||
|
||
return texts | ||
|
||
def get_summarization(text: str) -> str: | ||
""" | ||
Summarize a string | ||
""" | ||
data = { | ||
"inputs": text | ||
} | ||
|
||
response = requests.post(SAMSUM_BART_URL, headers=HEADERS, json=data) | ||
model_res = response.json()[0] | ||
summary = model_res['summary_text'] | ||
return summary | ||
|
||
def summarize_transcript(text: list, chunk_size: int = 1000, chunk_overlap: int = 100) -> list: | ||
""" | ||
Summarize a list of strings | ||
""" | ||
chunks = chunk_text(text, chunk_size, chunk_overlap) | ||
|
||
summarized_chunks = [] | ||
|
||
for chunk in chunks: | ||
summarized_text = get_summarization(chunk) | ||
summarized_chunks.append(summarized_text) | ||
|
||
summarized_text ='' | ||
for summary in summarized_chunks: | ||
summarized_text += get_summarization(summary) | ||
|
||
return summarized_text |