Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add image support in file upload #2007

Merged
merged 20 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cookbook/models/perplexity/basic_stream.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Iterator # noqa
from agno.agent import Agent, RunResponse # noqa
from agno.models.perplexity import Perplexity

agent = Agent(model=Perplexity(id="sonar"), markdown=True)

# Get the response in a variable
Expand Down
1 change: 0 additions & 1 deletion libs/agno/agno/models/perplexity/perplexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ class Perplexity(OpenAILike):
max_tokens (int): The maximum number of tokens. Defaults to 1024.
"""


id: str = "sonar"
name: str = "Perplexity"
provider: str = "Perplexity: " + id
Expand Down
122 changes: 64 additions & 58 deletions libs/agno/agno/playground/async_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,24 +125,20 @@ async def create_agent_run(
session_id: Optional[str] = Form(None),
user_id: Optional[str] = Form(None),
files: Optional[List[UploadFile]] = File(None),
image: Optional[UploadFile] = File(None),
):
logger.debug(f"AgentRunRequest: {message} {session_id} {user_id} {agent_id}")
agent = get_agent_by_id(agent_id, agents)
if agent is None:
raise HTTPException(status_code=404, detail="Agent not found")

if files:
if agent.knowledge is None:
raise HTTPException(status_code=404, detail="KnowledgeBase not found")

if session_id is not None:
logger.debug(f"Continuing session: {session_id}")
else:
logger.debug("Creating new session")

# Create a new instance of this agent
new_agent_instance = agent.deep_copy(update={"session_id": session_id})
new_agent_instance.session_name = None
if user_id is not None:
new_agent_instance.user_id = user_id

Expand All @@ -151,72 +147,82 @@ async def create_agent_run(
else:
new_agent_instance.monitoring = False

base64_image: Optional[Image] = None
if image:
base64_image = await process_image(image)
base64_images: List[Image] = []

if files:
for file in files:
if file.content_type == "application/pdf":
from agno.document.reader.pdf_reader import PDFReader

contents = await file.read()
pdf_file = BytesIO(contents)
pdf_file.name = file.filename
file_content = PDFReader().read(pdf_file)
if agent.knowledge is not None:
agent.knowledge.load_documents(file_content)
elif file.content_type == "text/csv":
from agno.document.reader.csv_reader import CSVReader

contents = await file.read()
csv_file = BytesIO(contents)
csv_file.name = file.filename
file_content = CSVReader().read(csv_file)
if agent.knowledge is not None:
agent.knowledge.load_documents(file_content)
elif file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
from agno.document.reader.docx_reader import DocxReader

contents = await file.read()
docx_file = BytesIO(contents)
docx_file.name = file.filename
file_content = DocxReader().read(docx_file)
if agent.knowledge is not None:
agent.knowledge.load_documents(file_content)
elif file.content_type == "text/plain":
from agno.document.reader.text_reader import TextReader

contents = await file.read()
text_file = BytesIO(contents)
text_file.name = file.filename
file_content = TextReader().read(text_file)
if agent.knowledge is not None:
agent.knowledge.load_documents(file_content)

elif file.content_type == "application/json":
from agno.document.reader.json_reader import JSONReader

contents = await file.read()
json_file = BytesIO(contents)
json_file.name = file.filename
file_content = JSONReader().read(json_file)
if agent.knowledge is not None:
agent.knowledge.load_documents(file_content)
if file.content_type in ["image/png", "image/jpeg", "image/jpg", "image/webp"]:
try:
base64_image = await process_image(file)
base64_images.append(base64_image)
except Exception as e:
logger.error(f"Error processing image {file.filename}: {e}")
continue
else:
raise HTTPException(status_code=400, detail="Unsupported file type")
# Check for knowledge base before processing documents
if new_agent_instance.knowledge is None:
raise HTTPException(status_code=404, detail="KnowledgeBase not found")

if file.content_type == "application/pdf":
from agno.document.reader.pdf_reader import PDFReader

contents = await file.read()
pdf_file = BytesIO(contents)
pdf_file.name = file.filename
file_content = PDFReader().read(pdf_file)
if new_agent_instance.knowledge is not None:
new_agent_instance.knowledge.load_documents(file_content)
elif file.content_type == "text/csv":
from agno.document.reader.csv_reader import CSVReader

contents = await file.read()
csv_file = BytesIO(contents)
csv_file.name = file.filename
file_content = CSVReader().read(csv_file)
if new_agent_instance.knowledge is not None:
new_agent_instance.knowledge.load_documents(file_content)
elif file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
from agno.document.reader.docx_reader import DocxReader

contents = await file.read()
docx_file = BytesIO(contents)
docx_file.name = file.filename
file_content = DocxReader().read(docx_file)
if new_agent_instance.knowledge is not None:
new_agent_instance.knowledge.load_documents(file_content)
elif file.content_type == "text/plain":
from agno.document.reader.text_reader import TextReader

contents = await file.read()
text_file = BytesIO(contents)
text_file.name = file.filename
file_content = TextReader().read(text_file)
if new_agent_instance.knowledge is not None:
new_agent_instance.knowledge.load_documents(file_content)

elif file.content_type == "application/json":
from agno.document.reader.json_reader import JSONReader

contents = await file.read()
json_file = BytesIO(contents)
json_file.name = file.filename
file_content = JSONReader().read(json_file)
if new_agent_instance.knowledge is not None:
new_agent_instance.knowledge.load_documents(file_content)
else:
raise HTTPException(status_code=400, detail="Unsupported file type")

if stream:
return StreamingResponse(
chat_response_streamer(new_agent_instance, message, images=[base64_image] if base64_image else None),
chat_response_streamer(new_agent_instance, message, images=base64_images if base64_images else None),
media_type="text/event-stream",
)
else:
run_response = cast(
RunResponse,
await new_agent_instance.arun(
message,
images=[base64_image] if base64_image else None,
message=message,
images=base64_images if base64_images else None,
stream=False,
),
)
Expand Down
124 changes: 64 additions & 60 deletions libs/agno/agno/playground/sync_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ def chat_response_streamer(agent: Agent, message: str, images: Optional[List[Ima

def process_image(file: UploadFile) -> Image:
content = file.file.read()

if not content:
raise HTTPException(status_code=400, detail="Empty file")
return Image(content=content)

@playground_router.post("/agents/{agent_id}/runs")
Expand All @@ -107,17 +108,12 @@ def create_agent_run(
session_id: Optional[str] = Form(None),
user_id: Optional[str] = Form(None),
files: Optional[List[UploadFile]] = File(None),
image: Optional[UploadFile] = File(None),
):
logger.debug(f"AgentRunRequest: {message} {agent_id} {stream} {monitor} {session_id} {user_id} {files}")
agent = get_agent_by_id(agent_id, agents)
if agent is None:
raise HTTPException(status_code=404, detail="Agent not found")

if files:
if agent.knowledge is None:
raise HTTPException(status_code=404, detail="KnowledgeBase not found")

if session_id is not None:
logger.debug(f"Continuing session: {session_id}")
else:
Expand All @@ -135,73 +131,81 @@ def create_agent_run(
else:
new_agent_instance.monitoring = False

base64_image: Optional[Image] = None
if image:
base64_image = process_image(image)
base64_images: List[Image] = []

if files:
for file in files:
if file.content_type == "application/pdf":
from agno.document.reader.pdf_reader import PDFReader

contents = file.file.read()
pdf_file = BytesIO(contents)
pdf_file.name = file.filename
file_content = PDFReader().read(pdf_file)
if agent.knowledge is not None:
agent.knowledge.load_documents(file_content)
elif file.content_type == "text/csv":
from agno.document.reader.csv_reader import CSVReader

contents = file.file.read()
csv_file = BytesIO(contents)
csv_file.name = file.filename
file_content = CSVReader().read(csv_file)
if agent.knowledge is not None:
agent.knowledge.load_documents(file_content)
elif file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
from agno.document.reader.docx_reader import DocxReader

contents = file.file.read()
docx_file = BytesIO(contents)
docx_file.name = file.filename
file_content = DocxReader().read(docx_file)
if agent.knowledge is not None:
agent.knowledge.load_documents(file_content)
elif file.content_type == "text/plain":
from agno.document.reader.text_reader import TextReader

contents = file.file.read()
text_file = BytesIO(contents)
text_file.name = file.filename
file_content = TextReader().read(text_file)
if agent.knowledge is not None:
agent.knowledge.load_documents(file_content)

elif file.content_type == "application/json":
from agno.document.reader.json_reader import JSONReader

content = file.read()
json_file = BytesIO(content)
json_file.name = file.filename
file_content = JSONReader().read(json_file)
if agent.knowledge is not None:
agent.knowledge.load_documents(file_content)

if file.content_type in ["image/png", "image/jpeg", "image/jpg", "image/webp"]:
try:
base64_image = process_image(file)
base64_images.append(base64_image)
except Exception as e:
logger.error(f"Error processing image {file.filename}: {e}")
continue
else:
raise HTTPException(status_code=400, detail="Unsupported file type")
# Check for knowledge base before processing documents
if new_agent_instance.knowledge is None:
raise HTTPException(status_code=404, detail="KnowledgeBase not found")

if file.content_type == "application/pdf":
from agno.document.reader.pdf_reader import PDFReader

contents = file.file.read()
pdf_file = BytesIO(contents)
pdf_file.name = file.filename
file_content = PDFReader().read(pdf_file)
if new_agent_instance.knowledge is not None:
new_agent_instance.knowledge.load_documents(file_content)
elif file.content_type == "text/csv":
from agno.document.reader.csv_reader import CSVReader

contents = file.file.read()
csv_file = BytesIO(contents)
csv_file.name = file.filename
file_content = CSVReader().read(csv_file)
if new_agent_instance.knowledge is not None:
new_agent_instance.knowledge.load_documents(file_content)
elif file.content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
from agno.document.reader.docx_reader import DocxReader

contents = file.file.read()
docx_file = BytesIO(contents)
docx_file.name = file.filename
file_content = DocxReader().read(docx_file)
if new_agent_instance.knowledge is not None:
new_agent_instance.knowledge.load_documents(file_content)
elif file.content_type == "text/plain":
from agno.document.reader.text_reader import TextReader

contents = file.file.read()
text_file = BytesIO(contents)
text_file.name = file.filename
file_content = TextReader().read(text_file)
if new_agent_instance.knowledge is not None:
new_agent_instance.knowledge.load_documents(file_content)
elif file.content_type == "application/json":
from agno.document.reader.json_reader import JSONReader

contents = file.file.read()
json_file = BytesIO(contents)
json_file.name = file.filename
file_content = JSONReader().read(json_file)
if new_agent_instance.knowledge is not None:
new_agent_instance.knowledge.load_documents(file_content)
else:
raise HTTPException(status_code=400, detail="Unsupported file type")

if stream:
return StreamingResponse(
chat_response_streamer(new_agent_instance, message, images=[base64_image] if base64_image else None),
chat_response_streamer(new_agent_instance, message, images=base64_images if base64_images else None),
media_type="text/event-stream",
)
else:
run_response = cast(
RunResponse,
new_agent_instance.run(
message,
images=[base64_image] if base64_image else None,
message=message,
images=base64_images if base64_images else None,
stream=False,
),
)
Expand Down
Empty file.
Loading