run-llama · logan-markewich · Dec 10, 2024 · Dec 18, 2024 · Dec 19, 2024 · Dec 24, 2024
diff --git a/docs/docs/understanding/agent/multi_agents.md b/docs/docs/understanding/agent/multi_agents.md
@@ -0,0 +1,250 @@
+# Multi-Agent Workflows
+
+The MultiAgentWorkflow allows you to create a system of multiple agents that can collaborate and hand off tasks to each other based on their specialized capabilities. This enables building more complex agent systems where different agents handle different aspects of a task.
+
+## Quick Start
+
+Here's a simple example of setting up a multi-agent workflow with a calculator agent and a retriever agent:
+
+```python
+from llama_index.core.agent.multi_agent import (
+    MultiAgentWorkflow,
+    AgentConfig,
+    AgentMode,
+)
+from llama_index.core.tools import FunctionTool
+from llama_index.core.workflow import FunctionToolWithContext
+
+
+# Define some tools
+def add(a: int, b: int) -> int:
+    """Add two numbers."""
+    return a + b
+
+
+def subtract(a: int, b: int) -> int:
+    """Subtract two numbers."""
+    return a - b
+
+
+# Create agent configs
+calculator_agent = AgentConfig(
+    name="calculator",
+    description="Performs basic arithmetic operations",
+    system_prompt="You are a calculator assistant.",
+    mode=AgentMode.REACT,
+    tools=[
+        FunctionTool.from_defaults(fn=add),
+        FunctionTool.from_defaults(fn=subtract),
+    ],
+    llm=OpenAI(model="gpt-4"),
+)
+
+retriever_agent = AgentConfig(
+    name="retriever",
+    description="Manages data retrieval",
+    system_prompt="You are a retrieval assistant.",
+    mode=AgentMode.FUNCTION,
+    is_entrypoint_agent=True,
+    llm=OpenAI(model="gpt-4"),
+)
+
+# Create and run the workflow
+workflow = MultiAgentWorkflow(
+    agent_configs=[calculator_agent, retriever_agent]
+)
+
+# Run the system
+response = await workflow.run(user_msg="Can you add 5 and 3?")
+
+#  Or stream the events
+handler = workflow.run(user_msg="Can you add 5 and 3?")
+async for event in handler.stream_events():
+    if hasattr(event, "delta"):
+        print(event.delta, end="", flush=True)
+```
+
+## How It Works
+
+The MultiAgentWorkflow manages a collection of agents, each with their own specialized capabilities. One agent must be designated as the entry point agent (is_entrypoint_agent=True).
+
+When a user message comes in, it's first routed to the entry point agent. Each agent can then:
+
+1. Handle the request directly using their tools
+2. Hand off to another agent better suited for the task
+3. Return a response to the user
+
+Agents can be configured in two modes:
+- REACT: Uses ReAct prompting for reasoning about tool usage
+- FUNCTION: Uses OpenAI function calling style for tool usage
+
+## Configuration Options
+
+### Agent Config
+
+Each agent is configured with an `AgentConfig`:
+
+```python
+AgentConfig(
+    # Unique name for the agent (str)
+    name="name",
+    # Description of agent's capabilities (str)
+    description="description",
+    # System prompt for the agent (str)
+    system_prompt="system_prompt",
+    # react or function -- defaults to function when possible. (str)
+    mode="function",
+    # Tools available to this agent (List[BaseTool])
+    tools=[...],
+    # LLM to use for this agent. (BaseLLM)
+    llm=OpenAI(model="gpt-4"),
+    # Whether this is the entry point. (bool)
+    is_entrypoint_agent=True,
+    # List of agents this one can hand off to. Defaults to all agents. (List[str])
+    can_handoff_to=[...],
+)
+```
+
+### Workflow Options
+
+The MultiAgentWorkflow constructor accepts:
+
+```python
+MultiAgentWorkflow(
+    # List of agent configs. (List[AgentConfig])
+    agent_configs=[...],
+    # Initial state dict. (Optional[dict])
+    initial_state=None,
+    # Custom prompt for handoffs. Should contain the `agent_info` string variable. (Optional[str])
+    handoff_prompt=None,
+    # Custom prompt for state. Should contain the `state` and `msg` string variables. (Optional[str])
+    state_prompt=None,
+)
+```
+
+### State Management
+
+You can provide an initial state dict that will be available to all agents:
+
+```python
+workflow = MultiAgentWorkflow(
+    agent_configs=[...],
+    initial_state={"counter": 0},
+    state_prompt="Current state: {state}. User message: {msg}",
+)
+```
+
+The state is stored in the `state` key of the workflow context.
+
+In order to persist state between runs, you can pass in the context from the previous run:
+
+```python
+workflow = MultiAgentWorkflow(...)
+
+# Run the workflow
+handler = workflow.run(user_msg="Can you add 5 and 3?")
+response = await handler
+
+# Pass in the context from the previous run
+response = await workflow.run(ctx=handler.ctx, user_msg="Can you add 5 and 3?")
+```
+
+As with normal workflows, the context is serializable:
+
+```python
+from llama_index.core.workflow import (
+    Context,
+    JsonSerializer,
+    JsonPickleSerializer,
+)
+
+# the default serializer is JsonSerializer for safety
+ctx_dict = handler.ctx.to_dict(serializer=JsonSerializer())
+
+# then you can rehydrate the context
+ctx = Context.from_dict(ctx_dict, serializer=JsonSerializer())
+```
+
+## Streaming Events
+
+The workflow emits various events during execution that you can stream:
+
+```python
+async for event in workflow.run(...).stream_events():
+    if isinstance(event, AgentInput):
+        print(event.input)
+        print(event.current_agent)
+    elif isinstance(event, AgentStream):
+        # Agent thinking/tool calling response stream
+        print(event.delta)
+        print(event.current_agent)
+    elif isinstance(event, AgentOutput):
+        print(event.response)
+        print(event.tool_calls)
+        print(event.raw_response)
+        print(event.current_agent)
+    elif isinstance(event, ToolCall):
+        # Tool being called
+        print(event.tool_name)
+        print(event.tool_kwargs)
+    elif isinstance(event, ToolCallResult):
+        # Result of tool call
+        print(event.tool_output)
+```
+
+## Accessing Context in Tools
+
+The `FunctionToolWithContext` allows tools to access the workflow context:
+
+```python
+from llama_index.core.workflow import FunctionToolWithContext
+
+
+async def get_counter(ctx: Context) -> int:
+    """Get the current counter value."""
+    return await ctx.get("counter", default=0)
+
+
+counter_tool = FunctionToolWithContext.from_defaults(
+    async_fn=get_counter, description="Get the current counter value"
+)
+```
+
+### Human in the Loop
+
+Using the context, you can implement a human in the loop pattern in your tools:
+
+```python
+from llama_index.core.workflow import Event
+
+
+class AskForConfirmationEvent(Event):
+    """Ask for confirmation event."""
+
+    confirmation_id: str
+
+
+class ConfirmationEvent(Event):
+    """Confirmation event."""
+
+    confirmation: bool
+    confirmation_id: str
+
+
+async def ask_for_confirmation(ctx: Context) -> bool:
+    """Ask the user for confirmation."""
+    ctx.write_event_to_stream(AskForConfirmationEvent(confirmation_id="1234"))
+
+    result = await ctx.wait_for_event(
+        ConfirmationEvent, requirements={"confirmation_id": "1234"}
+    )
+    return result.confirmation
+```
+
+When this function is called, it will block the workflow execution until the user sends the required confirmation event.
+
+```python
+handler.ctx.send_event(
+    ConfirmationEvent(confirmation=True, confirmation_id="1234")
+)
+```
diff --git a/llama-index-core/llama_index/core/agent/multi_agent/BUILD b/llama-index-core/llama_index/core/agent/multi_agent/BUILD
@@ -0,0 +1 @@
+python_sources()
diff --git a/llama-index-core/llama_index/core/agent/multi_agent/__init__.py b/llama-index-core/llama_index/core/agent/multi_agent/__init__.py
diff --git a/llama-index-core/llama_index/core/agent/multi_agent/agent_config.py b/llama-index-core/llama_index/core/agent/multi_agent/agent_config.py
@@ -0,0 +1,43 @@
+from enum import Enum
+from typing import List, Optional
+
+from llama_index.core.bridge.pydantic import BaseModel, Field, ConfigDict
+from llama_index.core.llms import LLM
+from llama_index.core.objects import ObjectRetriever
+from llama_index.core.tools import BaseTool
+
+
+class AgentMode(str, Enum):
+    """Agent mode."""
+
+    DEFAULT = "default"
+    REACT = "react"
+    FUNCTION = "function"
+
+
+class AgentConfig(BaseModel):
+    """Configuration for a single agent in the multi-agent system."""
+
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    name: str
+    description: str
+    system_prompt: Optional[str] = None
+    tools: Optional[List[BaseTool]] = None
+    tool_retriever: Optional[ObjectRetriever] = None
+    can_handoff_to: Optional[List[str]] = Field(default=None)
+    handoff_prompt_template: Optional[str] = None
+    llm: Optional[LLM] = None
+    is_entrypoint_agent: bool = False
+    mode: AgentMode = AgentMode.DEFAULT
+
+    def get_mode(self) -> AgentMode:
+        """Resolve the mode of the agent."""
+        if self.mode == AgentMode.DEFAULT:
+            return (
+                AgentMode.FUNCTION
+                if self.llm.metadata.is_function_calling_model
+                else AgentMode.REACT
+            )
+
+        return self.mode