Skip to content

Commit

Permalink
feat: image to image agent
Browse files Browse the repository at this point in the history
  • Loading branch information
anuragts committed Dec 24, 2024
1 parent b65c7a7 commit b00921b
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 4 deletions.
21 changes: 21 additions & 0 deletions cookbook/agents/47_image_to_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.tools.fal_tools import FalTools

agent = Agent(
model=OpenAIChat(id="gpt-4o"),
agent_id="image-to-image",
name="Image to Image Agent",
tools=[FalTools(image_url="https://fal.media/files/koala/Chls9L2ZnvuipUTEwlnJC.png")],
markdown=True,
debug=True,
show_tool_calls=True,
instructions=[
"You have to use the `image_to_image` tool to generate the image.",
"You are an AI agent that can generate images using the Fal AI API.",
"You will be given a prompt and an image URL.",
"You have to return the image URL as provided, don't convert it to markdown or anything else.",
],
)

agent.print_response("a cat dressed as a wizard with a background of a mystic forest", stream=True)
22 changes: 19 additions & 3 deletions cookbook/playground/multimodal_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,26 @@
storage=SqlAgentStorage(table_name="audio_agent", db_file=image_agent_storage_file),
)


app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent]).get_app(
use_async=False
image_to_image_agent = Agent(
name="Image to Image Agent",
agent_id="image_to_image_agent",
model=OpenAIChat(id="gpt-4o"),
tools=[FalTools(image_url="https://fal.media/files/koala/Chls9L2ZnvuipUTEwlnJC.png")],
markdown=True,
debug=True,
show_tool_calls=True,
instructions=[
"You have to use the `image_to_image` tool to generate the image.",
"You are an AI agent that can generate images using the Fal AI API.",
"You will be given a prompt and an image URL.",
"Don't return file name or file url in your response or markdown just tell the audio was created successfully.",
],
storage=SqlAgentStorage(table_name="image_to_image_agent", db_file=image_agent_storage_file),
)

app = Playground(
agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent, image_to_image_agent]
).get_app(use_async=False)

if __name__ == "__main__":
serve_playground_app("multimodal_agent:app", reload=True)
2 changes: 1 addition & 1 deletion cookbook/workflows/startup_idea_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def run(self, startup_idea: str) -> Iterator[RunResponse]:
table_name="validate_startup_ideas_workflow",
db_file="tmp/workflows.db",
),
debug_mode=True
debug_mode=True,
)

final_report: Iterator[RunResponse] = startup_idea_validator.run(startup_idea=idea)
Expand Down
39 changes: 39 additions & 0 deletions phi/tools/fal_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,18 @@ def __init__(
self,
api_key: Optional[str] = None,
model: str = "fal-ai/hunyuan-video",
image_url: Optional[str] = None,
):
super().__init__(name="fal")

self.api_key = api_key or getenv("FAL_KEY")
if not self.api_key:
logger.error("FAL_KEY not set. Please set the FAL_KEY environment variable.")
self.model = model
self.image_url = image_url
self.seen_logs: set[str] = set()
self.register(self.generate_media)
self.register(self.image_to_image)

def on_queue_update(self, update):
if isinstance(update, fal_client.InProgress) and update.logs:
Expand Down Expand Up @@ -86,3 +89,39 @@ def generate_media(self, agent: Agent, prompt: str) -> str:
except Exception as e:
logger.error(f"Failed to run model: {e}")
return f"Error: {e}"

def image_to_image(self, agent: Agent, prompt: str) -> str:
"""
Use this function to generate an image from a given image using the Fal AI API.
Args:
prompt (str): A text description of the task.
image_url (str): The URL of the image to use for the generation.
Returns:
str: Return the result of the model.
"""
if not self.image_url:
raise ValueError("Image URL is required but not provided.")

try:
result = fal_client.subscribe(
"fal-ai/flux/dev/image-to-image",
arguments={"image_url": self.image_url, "prompt": prompt},
with_logs=True,
on_queue_update=self.on_queue_update,
)
print("result - ", result)
url = result.get("images", [{}])[0].get("url", "")
media_id = str(uuid4())
agent.add_image(
Image(
id=media_id,
url=url,
)
)

return f"Image generated successfully at {url}"

except Exception as e:
logger.error(f"Failed to generate image: {e}")
return f"Error: {e}"

0 comments on commit b00921b

Please sign in to comment.