Skip to content

Commit

Permalink
Image to image agent (#1628)
Browse files Browse the repository at this point in the history
## Description

Image to Image agent that uses Fal tools.
  • Loading branch information
anuragts authored Jan 3, 2025
1 parent 85ea7e2 commit 0d4725f
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 1 deletion.
21 changes: 21 additions & 0 deletions cookbook/agents/47_image_to_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from phi.agent import Agent
from phi.model.openai import OpenAIChat
from phi.tools.fal_tools import FalTools

agent = Agent(
model=OpenAIChat(id="gpt-4o"),
agent_id="image-to-image",
name="Image to Image Agent",
tools=[FalTools()],
markdown=True,
debug=True,
show_tool_calls=True,
instructions=[
"You have to use the `image_to_image` tool to generate the image.",
"You are an AI agent that can generate images using the Fal AI API.",
"You will be given a prompt and an image URL.",
"You have to return the image URL as provided, don't convert it to markdown or anything else.",
],
)

agent.print_response("a cat dressed as a wizard with a background of a mystic forest. Make it look like 'https://fal.media/files/koala/Chls9L2ZnvuipUTEwlnJC.png'", stream=True)
19 changes: 18 additions & 1 deletion cookbook/playground/multimodal_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,23 @@
storage=SqlAgentStorage(table_name="audio_agent", db_file=image_agent_storage_file),
)

image_to_image_agent = Agent(
name="Image to Image Agent",
agent_id="image_to_image_agent",
model=OpenAIChat(id="gpt-4o"),
tools=[FalTools()],
markdown=True,
debug=True,
show_tool_calls=True,
instructions=[
"You have to use the `image_to_image` tool to generate the image.",
"You are an AI agent that can generate images using the Fal AI API.",
"You will be given a prompt and an image URL.",
"Don't provide the URL of the image in the response. Only describe what image was generated.",
],
storage=SqlAgentStorage(table_name="image_to_image_agent", db_file=image_agent_storage_file),
)

hindi_audio_agent = Agent(
name="Hindi Audio Generator Agent",
agent_id="hindi_audio_agent",
Expand All @@ -152,7 +169,7 @@


app = Playground(
agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent, hindi_audio_agent]
agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent, hindi_audio_agent,image_to_image_agent]
).get_app(use_async=False)

if __name__ == "__main__":
Expand Down
38 changes: 38 additions & 0 deletions phi/tools/fal_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def __init__(
self.model = model
self.seen_logs: set[str] = set()
self.register(self.generate_media)
self.register(self.image_to_image)

def on_queue_update(self, update):
if isinstance(update, fal_client.InProgress) and update.logs:
Expand Down Expand Up @@ -86,3 +87,40 @@ def generate_media(self, agent: Agent, prompt: str) -> str:
except Exception as e:
logger.error(f"Failed to run model: {e}")
return f"Error: {e}"

def image_to_image(self, agent: Agent, prompt: str, image_url: Optional[str] = None) -> str:
"""
Use this function to transform an input image based on a text prompt using the Fal AI image-to-image model.
The model takes an existing image and generates a new version modified according to your prompt.
See https://fal.ai/models/fal-ai/flux/dev/image-to-image/api for more details about the image-to-image capabilities.
Args:
prompt (str): A text description of the task.
image_url (str): The URL of the image to use for the generation.
Returns:
str: Return the result of the model.
"""


try:
result = fal_client.subscribe(
"fal-ai/flux/dev/image-to-image",
arguments={"image_url": image_url, "prompt": prompt},
with_logs=True,
on_queue_update=self.on_queue_update,
)
url = result.get("images", [{}])[0].get("url", "")
media_id = str(uuid4())
agent.add_image(
Image(
id=media_id,
url=url,
)
)

return f"Image generated successfully at {url}"

except Exception as e:
logger.error(f"Failed to generate image: {e}")
return f"Error: {e}"

0 comments on commit 0d4725f

Please sign in to comment.