Image to image agent (#1628)

## Description Image to Image agent that uses Fal tools.
phidatahq · Jan 3, 2025 · 0d4725f · 0d4725f
1 parent 85ea7e2
commit 0d4725f
Show file tree

Hide file tree

Showing 3 changed files with 77 additions and 1 deletion.
diff --git a/cookbook/agents/47_image_to_image.py b/cookbook/agents/47_image_to_image.py
@@ -0,0 +1,21 @@
+from phi.agent import Agent
+from phi.model.openai import OpenAIChat
+from phi.tools.fal_tools import FalTools
+
+agent = Agent(
+    model=OpenAIChat(id="gpt-4o"),
+    agent_id="image-to-image",
+    name="Image to Image Agent",
+    tools=[FalTools()],
+    markdown=True,
+    debug=True,
+    show_tool_calls=True,
+    instructions=[
+        "You have to use the `image_to_image` tool to generate the image.",
+        "You are an AI agent that can generate images using the Fal AI API.",
+        "You will be given a prompt and an image URL.",
+        "You have to return the image URL as provided, don't convert it to markdown or anything else.",
+    ],
+)
+
+agent.print_response("a cat dressed as a wizard with a background of a mystic forest. Make it look like 'https://fal.media/files/koala/Chls9L2ZnvuipUTEwlnJC.png'", stream=True)
diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py
@@ -129,6 +129,23 @@
     storage=SqlAgentStorage(table_name="audio_agent", db_file=image_agent_storage_file),
 )
 
+image_to_image_agent = Agent(
+    name="Image to Image Agent",
+    agent_id="image_to_image_agent",
+    model=OpenAIChat(id="gpt-4o"),
+    tools=[FalTools()],
+    markdown=True,
+    debug=True,
+    show_tool_calls=True,
+    instructions=[
+        "You have to use the `image_to_image` tool to generate the image.",
+        "You are an AI agent that can generate images using the Fal AI API.",
+        "You will be given a prompt and an image URL.",
+        "Don't provide the URL of the image in the response. Only describe what image was generated.",
+    ],
+    storage=SqlAgentStorage(table_name="image_to_image_agent", db_file=image_agent_storage_file),
+)
+
 hindi_audio_agent = Agent(
     name="Hindi Audio Generator Agent",
     agent_id="hindi_audio_agent",
@@ -152,7 +169,7 @@
 
 
 app = Playground(
-    agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent, hindi_audio_agent]
+    agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent, hindi_audio_agent,image_to_image_agent]
 ).get_app(use_async=False)
 
 if __name__ == "__main__":

diff --git a/phi/tools/fal_tools.py b/phi/tools/fal_tools.py
@@ -32,6 +32,7 @@ def __init__(
         self.model = model
         self.seen_logs: set[str] = set()
         self.register(self.generate_media)
+        self.register(self.image_to_image)
 
     def on_queue_update(self, update):
         if isinstance(update, fal_client.InProgress) and update.logs:
@@ -86,3 +87,40 @@ def generate_media(self, agent: Agent, prompt: str) -> str:
         except Exception as e:
             logger.error(f"Failed to run model: {e}")
             return f"Error: {e}"
+
+    def image_to_image(self, agent: Agent, prompt: str, image_url: Optional[str] = None) -> str:
+        """
+        Use this function to transform an input image based on a text prompt using the Fal AI image-to-image model.
+        The model takes an existing image and generates a new version modified according to your prompt.
+        See https://fal.ai/models/fal-ai/flux/dev/image-to-image/api for more details about the image-to-image capabilities.
+
+        Args:
+            prompt (str): A text description of the task.
+            image_url (str): The URL of the image to use for the generation.
+            
+        Returns:
+            str: Return the result of the model.
+        """
+
+
+        try:
+            result = fal_client.subscribe(
+                "fal-ai/flux/dev/image-to-image",
+                arguments={"image_url": image_url, "prompt": prompt},
+                with_logs=True,
+                on_queue_update=self.on_queue_update,
+            )
+            url = result.get("images", [{}])[0].get("url", "")
+            media_id = str(uuid4())
+            agent.add_image(
+                Image(
+                    id=media_id,
+                    url=url,
+                )
+            )
+
+            return f"Image generated successfully at {url}"
+
+        except Exception as e:
+            logger.error(f"Failed to generate image: {e}")
+            return f"Error: {e}"