feat: image to image agent

phidatahq · Dec 24, 2024 · b00921b · b00921b
1 parent b65c7a7
commit b00921b
Show file tree

Hide file tree

Showing 4 changed files with 80 additions and 4 deletions.
diff --git a/cookbook/agents/47_image_to_image.py b/cookbook/agents/47_image_to_image.py
@@ -0,0 +1,21 @@
+from phi.agent import Agent
+from phi.model.openai import OpenAIChat
+from phi.tools.fal_tools import FalTools
+
+agent = Agent(
+    model=OpenAIChat(id="gpt-4o"),
+    agent_id="image-to-image",
+    name="Image to Image Agent",
+    tools=[FalTools(image_url="https://fal.media/files/koala/Chls9L2ZnvuipUTEwlnJC.png")],
+    markdown=True,
+    debug=True,
+    show_tool_calls=True,
+    instructions=[
+        "You have to use the `image_to_image` tool to generate the image.",
+        "You are an AI agent that can generate images using the Fal AI API.",
+        "You will be given a prompt and an image URL.",
+        "You have to return the image URL as provided, don't convert it to markdown or anything else.",
+    ],
+)
+
+agent.print_response("a cat dressed as a wizard with a background of a mystic forest", stream=True)
diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py
@@ -128,10 +128,26 @@
     storage=SqlAgentStorage(table_name="audio_agent", db_file=image_agent_storage_file),
 )
 
-
-app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent]).get_app(
-    use_async=False
+image_to_image_agent = Agent(
+    name="Image to Image Agent",
+    agent_id="image_to_image_agent",
+    model=OpenAIChat(id="gpt-4o"),
+    tools=[FalTools(image_url="https://fal.media/files/koala/Chls9L2ZnvuipUTEwlnJC.png")],
+    markdown=True,
+    debug=True,
+    show_tool_calls=True,
+    instructions=[
+        "You have to use the `image_to_image` tool to generate the image.",
+        "You are an AI agent that can generate images using the Fal AI API.",
+        "You will be given a prompt and an image URL.",
+        "Don't return file name or file url in your response or markdown just tell the audio was created successfully.",
+    ],
+    storage=SqlAgentStorage(table_name="image_to_image_agent", db_file=image_agent_storage_file),
 )
 
+app = Playground(
+    agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent, image_to_image_agent]
+).get_app(use_async=False)
+
 if __name__ == "__main__":
     serve_playground_app("multimodal_agent:app", reload=True)
diff --git a/cookbook/workflows/startup_idea_validator.py b/cookbook/workflows/startup_idea_validator.py
@@ -205,7 +205,7 @@ def run(self, startup_idea: str) -> Iterator[RunResponse]:
             table_name="validate_startup_ideas_workflow",
             db_file="tmp/workflows.db",
         ),
-        debug_mode=True
+        debug_mode=True,
     )
 
     final_report: Iterator[RunResponse] = startup_idea_validator.run(startup_idea=idea)

diff --git a/phi/tools/fal_tools.py b/phi/tools/fal_tools.py
@@ -23,15 +23,18 @@ def __init__(
         self,
         api_key: Optional[str] = None,
         model: str = "fal-ai/hunyuan-video",
+        image_url: Optional[str] = None,
     ):
         super().__init__(name="fal")
 
         self.api_key = api_key or getenv("FAL_KEY")
         if not self.api_key:
             logger.error("FAL_KEY not set. Please set the FAL_KEY environment variable.")
         self.model = model
+        self.image_url = image_url
         self.seen_logs: set[str] = set()
         self.register(self.generate_media)
+        self.register(self.image_to_image)
 
     def on_queue_update(self, update):
         if isinstance(update, fal_client.InProgress) and update.logs:
@@ -86,3 +89,39 @@ def generate_media(self, agent: Agent, prompt: str) -> str:
         except Exception as e:
             logger.error(f"Failed to run model: {e}")
             return f"Error: {e}"
+
+    def image_to_image(self, agent: Agent, prompt: str) -> str:
+        """
+        Use this function to generate an image from a given image using the Fal AI API.
+
+        Args:
+            prompt (str): A text description of the task.
+            image_url (str): The URL of the image to use for the generation.
+        Returns:
+            str: Return the result of the model.
+        """
+        if not self.image_url:
+            raise ValueError("Image URL is required but not provided.")
+
+        try:
+            result = fal_client.subscribe(
+                "fal-ai/flux/dev/image-to-image",
+                arguments={"image_url": self.image_url, "prompt": prompt},
+                with_logs=True,
+                on_queue_update=self.on_queue_update,
+            )
+            print("result - ", result)
+            url = result.get("images", [{}])[0].get("url", "")
+            media_id = str(uuid4())
+            agent.add_image(
+                Image(
+                    id=media_id,
+                    url=url,
+                )
+            )
+
+            return f"Image generated successfully at {url}"
+
+        except Exception as e:
+            logger.error(f"Failed to generate image: {e}")
+            return f"Error: {e}"