diff --git a/cookbook/agents/47_image_to_image.py b/cookbook/agents/47_image_to_image.py new file mode 100644 index 0000000000..2f921ae4c0 --- /dev/null +++ b/cookbook/agents/47_image_to_image.py @@ -0,0 +1,21 @@ +from phi.agent import Agent +from phi.model.openai import OpenAIChat +from phi.tools.fal_tools import FalTools + +agent = Agent( + model=OpenAIChat(id="gpt-4o"), + agent_id="image-to-image", + name="Image to Image Agent", + tools=[FalTools(image_url="https://fal.media/files/koala/Chls9L2ZnvuipUTEwlnJC.png")], + markdown=True, + debug=True, + show_tool_calls=True, + instructions=[ + "You have to use the `image_to_image` tool to generate the image.", + "You are an AI agent that can generate images using the Fal AI API.", + "You will be given a prompt and an image URL.", + "You have to return the image URL as provided, don't convert it to markdown or anything else.", + ], +) + +agent.print_response("a cat dressed as a wizard with a background of a mystic forest", stream=True) diff --git a/cookbook/playground/multimodal_agent.py b/cookbook/playground/multimodal_agent.py index 412168ec52..31d6ca5eee 100644 --- a/cookbook/playground/multimodal_agent.py +++ b/cookbook/playground/multimodal_agent.py @@ -128,10 +128,26 @@ storage=SqlAgentStorage(table_name="audio_agent", db_file=image_agent_storage_file), ) - -app = Playground(agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent]).get_app( - use_async=False +image_to_image_agent = Agent( + name="Image to Image Agent", + agent_id="image_to_image_agent", + model=OpenAIChat(id="gpt-4o"), + tools=[FalTools(image_url="https://fal.media/files/koala/Chls9L2ZnvuipUTEwlnJC.png")], + markdown=True, + debug=True, + show_tool_calls=True, + instructions=[ + "You have to use the `image_to_image` tool to generate the image.", + "You are an AI agent that can generate images using the Fal AI API.", + "You will be given a prompt and an image URL.", + "Don't return file name or file url in your response or markdown just tell the audio was created successfully.", + ], + storage=SqlAgentStorage(table_name="image_to_image_agent", db_file=image_agent_storage_file), ) +app = Playground( + agents=[image_agent, ml_gif_agent, ml_video_agent, fal_agent, gif_agent, audio_agent, image_to_image_agent] +).get_app(use_async=False) + if __name__ == "__main__": serve_playground_app("multimodal_agent:app", reload=True) diff --git a/cookbook/workflows/startup_idea_validator.py b/cookbook/workflows/startup_idea_validator.py index c4070f6227..99bc89dfc9 100644 --- a/cookbook/workflows/startup_idea_validator.py +++ b/cookbook/workflows/startup_idea_validator.py @@ -205,7 +205,7 @@ def run(self, startup_idea: str) -> Iterator[RunResponse]: table_name="validate_startup_ideas_workflow", db_file="tmp/workflows.db", ), - debug_mode=True + debug_mode=True, ) final_report: Iterator[RunResponse] = startup_idea_validator.run(startup_idea=idea) diff --git a/phi/tools/fal_tools.py b/phi/tools/fal_tools.py index 8be8d9e7fc..ca40aa3120 100644 --- a/phi/tools/fal_tools.py +++ b/phi/tools/fal_tools.py @@ -23,6 +23,7 @@ def __init__( self, api_key: Optional[str] = None, model: str = "fal-ai/hunyuan-video", + image_url: Optional[str] = None, ): super().__init__(name="fal") @@ -30,8 +31,10 @@ def __init__( if not self.api_key: logger.error("FAL_KEY not set. Please set the FAL_KEY environment variable.") self.model = model + self.image_url = image_url self.seen_logs: set[str] = set() self.register(self.generate_media) + self.register(self.image_to_image) def on_queue_update(self, update): if isinstance(update, fal_client.InProgress) and update.logs: @@ -86,3 +89,39 @@ def generate_media(self, agent: Agent, prompt: str) -> str: except Exception as e: logger.error(f"Failed to run model: {e}") return f"Error: {e}" + + def image_to_image(self, agent: Agent, prompt: str) -> str: + """ + Use this function to generate an image from a given image using the Fal AI API. + + Args: + prompt (str): A text description of the task. + image_url (str): The URL of the image to use for the generation. + Returns: + str: Return the result of the model. + """ + if not self.image_url: + raise ValueError("Image URL is required but not provided.") + + try: + result = fal_client.subscribe( + "fal-ai/flux/dev/image-to-image", + arguments={"image_url": self.image_url, "prompt": prompt}, + with_logs=True, + on_queue_update=self.on_queue_update, + ) + print("result - ", result) + url = result.get("images", [{}])[0].get("url", "") + media_id = str(uuid4()) + agent.add_image( + Image( + id=media_id, + url=url, + ) + ) + + return f"Image generated successfully at {url}" + + except Exception as e: + logger.error(f"Failed to generate image: {e}") + return f"Error: {e}"