diff --git a/.gitignore b/.gitignore index b0760ca5..cb5b0650 100644 --- a/.gitignore +++ b/.gitignore @@ -174,3 +174,4 @@ cython_debug/ src/controlflow/_version.py all_code.md all_docs.md +llm_guides.md \ No newline at end of file diff --git a/docs/examples/examples-guide.md b/docs/llm-guides/examples-guide.md similarity index 100% rename from docs/examples/examples-guide.md rename to docs/llm-guides/examples-guide.md diff --git a/docs/llm-guides/llm-guide.md b/docs/llm-guides/llm-guide.md new file mode 100644 index 00000000..36b706af --- /dev/null +++ b/docs/llm-guides/llm-guide.md @@ -0,0 +1,427 @@ +# Comprehensive ControlFlow Guide for LLMs: From Basics to Advanced Techniques + +## Introduction + +This guide is designed to provide a comprehensive understanding of ControlFlow, a Python framework for building agentic AI workflows. As an LLM, your goal is to become an expert in ControlFlow, capable of explaining its concepts, writing idiomatic code, and producing accurate documentation. This guide will give you the foundation to effectively query and utilize the complete ControlFlow documentation. + +## Core Concepts + +### 1. Tasks + +Tasks are the fundamental building blocks of ControlFlow workflows. They represent discrete, observable units of work for AI agents to complete. + +Key properties of tasks: +- `objective`: A string describing the task's goal +- `result_type`: The expected output type (e.g., `str`, `int`, `list`, or a Pydantic model) +- `instructions`: Detailed guidance for completing the task +- `context`: Additional information or data needed for the task +- `agents`: AI entities assigned to work on the task +- `tools`: Functions that extend the task's capabilities + +Example of creating and running a basic task: + +```python +import controlflow as cf + +result = cf.run("Write a haiku about artificial intelligence") +print(result) +``` + +Tasks have a defined lifecycle with various states: +- PENDING: Initial state when a task is created +- RUNNING: Task is currently being executed by an agent +- SUCCESSFUL: Task has completed successfully +- FAILED: Task has failed to complete +- SKIPPED: Task was skipped (usually due to workflow logic) + +Example of checking task states: + +```python +task = cf.Task("Perform complex calculation") +print(task.status) # TaskStatus.PENDING + +task.run() + +if task.is_successful(): + print("Task completed successfully") +elif task.is_failed(): + print(f"Task failed: {task.result}") +``` + +### 2. Agents + +Agents are AI entities that execute tasks. They can be configured with specific LLM models, instructions, and tools. + +Example of creating a specialized agent: + +```python +data_analyst = cf.Agent( + name="Data Analyst", + model="openai/gpt-4o", + instructions=""" + You are a data analysis expert. + Always provide detailed statistical insights. + Use Python for data manipulation when necessary. + """, + tools=[cf.tools.code.python, custom_data_tool], + interactive=True +) + +result = cf.run( + "Analyze the provided dataset", + agents=[data_analyst], + context={"dataset": large_dataset} +) +``` + +### 3. Flows + +Flows are high-level containers that orchestrate tasks and agents. They provide shared context and history for all components within a workflow. + +Example of a basic flow: + +```python +@cf.flow +def simple_analysis_flow(data: dict): + cleaned_data = cf.run("Clean and preprocess the data", context={"data": data}) + analysis_result = cf.run("Perform data analysis", depends_on=[cleaned_data]) + return cf.run("Generate report", depends_on=[analysis_result]) + +result = simple_analysis_flow(my_data) +``` + +### 4. Tools + +Tools are functions that extend agent capabilities, enabling interactions with external systems or complex computations. + +Example of defining and using a custom tool: + +```python +import controlflow as cf + +def fetch_stock_price(symbol: str) -> float: + # Simulating API call + import random + return random.uniform(10, 1000) + +@cf.flow +def stock_analysis_flow(symbols: list[str]): + prices = cf.run( + "Fetch current stock prices", + tools=[fetch_stock_price], + context={"symbols": symbols} + ) + return cf.run("Analyze stock prices", context={"prices": prices}) + +result = stock_analysis_flow(["AAPL", "GOOGL", "MSFT"]) +``` + +## Advanced Features + +### 1. Structured Results + +ControlFlow supports structured outputs using Pydantic models or Python types: + +```python +from pydantic import BaseModel + +class StockAnalysis(BaseModel): + symbol: str + current_price: float + recommendation: str + +result = cf.run( + "Analyze Apple stock", + result_type=StockAnalysis, + context={"symbol": "AAPL"} +) +print(f"Recommendation for {result.symbol}: {result.recommendation}") +``` + +### 2. Multi-Agent Collaboration + +Assign multiple agents to tasks for complex problem-solving: + +```python +researcher = cf.Agent(name="Researcher", instructions="Conduct thorough research on topics") +writer = cf.Agent(name="Writer", instructions="Write clear and engaging content") +editor = cf.Agent(name="Editor", instructions="Review and improve written content") + +@cf.flow +def collaborative_writing_flow(topic: str): + research = cf.run("Research the topic", agents=[researcher], context={"topic": topic}) + draft = cf.run("Write initial draft", agents=[writer], depends_on=[research]) + final = cf.run("Edit and finalize the content", agents=[editor], depends_on=[draft]) + return final + +article = collaborative_writing_flow("Impact of AI on healthcare") +``` + +### 3. Dependency Management + +Define relationships between tasks for logical execution order: + +```python +@cf.flow +def data_pipeline_flow(raw_data: dict): + cleaned_data = cf.Task("Clean raw data", context={"data": raw_data}) + transformed_data = cf.Task("Transform cleaned data", depends_on=[cleaned_data]) + analyzed_data = cf.Task("Analyze transformed data", depends_on=[transformed_data]) + visualization = cf.Task("Create data visualizations", depends_on=[analyzed_data]) + report = cf.Task("Generate final report", depends_on=[analyzed_data, visualization]) + + return report.run() +``` + +### 4. Context Sharing + +ControlFlow automatically shares context between related tasks and flows: + +```python +@cf.flow +def context_sharing_flow(initial_data: dict): + task1 = cf.run("Process initial data", context={"data": initial_data}) + # task2 automatically has access to task1's result + task2 = cf.run("Further process data", depends_on=[task1]) + # task3 has access to results from both task1 and task2 + return cf.run("Synthesize results", depends_on=[task1, task2]) +``` + +### 5. Flexible Control + +Balance between AI autonomy and developer-defined structure: + +```python +@cf.flow +def flexible_analysis_flow(data: dict, analysis_type: str): + if analysis_type == "quick": + return cf.run("Perform quick analysis", context={"data": data}) + elif analysis_type == "deep": + subtask1 = cf.run("Perform in-depth analysis part 1", context={"data": data}) + subtask2 = cf.run("Perform in-depth analysis part 2", depends_on=[subtask1]) + return cf.run("Synthesize in-depth analysis", depends_on=[subtask2]) + else: + return cf.run("Determine and perform appropriate analysis", context={"data": data, "type": analysis_type}) +``` + +### 6. Interactivity + +Enable user interactions within workflows: + +```python +@cf.flow +def interactive_research_flow(): + topic = cf.run( + "Determine the research topic with user input", + interactive=True, + result_type=str + ) + + outline = cf.run( + "Create a research outline", + context={"topic": topic}, + result_type=list[str] + ) + + for section in outline: + content = cf.run( + f"Write content for section: {section}", + interactive=True, + result_type=str + ) + cf.run( + "Review and improve section content", + context={"section": section, "content": content}, + interactive=True + ) + + return cf.run( + "Compile final research document", + context={"outline": outline}, + result_type=str + ) + +result = interactive_research_flow() +``` + +### 7. Nested Flows and Private Contexts + +Create sub-flows for modular workflow design and data privacy: + +```python +@cf.flow +def main_workflow(sensitive_data: str): + print("Starting main workflow") + + with cf.Flow() as private_flow: + # This task runs in an isolated context + processed_data = cf.run( + "Process sensitive data", + context={"sensitive_data": sensitive_data}, + result_type=str + ) + + # Use processed data without exposing sensitive information + return cf.run( + "Generate report based on processed data", + context={"processed_data": processed_data}, + result_type=str + ) + +result = main_workflow("CONFIDENTIAL_INFO") +``` + +### 8. Dynamic Task Generation + +Use AI to create new tasks based on intermediate results: + +```python +@cf.flow +def dynamic_workflow(): + initial_task = cf.run("Analyze the problem and identify necessary subtasks") + subtasks = cf.run( + "Generate a list of subtasks based on the analysis", + depends_on=[initial_task], + result_type=list[str] + ) + + results = [] + for subtask in subtasks: + result = cf.run(subtask) + results.append(result) + + return cf.run( + "Synthesize results from all subtasks", + context={"subtask_results": results}, + result_type=str + ) + +result = dynamic_workflow() +``` + +### 9. Error Handling and Retries + +Implement robust error handling and retry mechanisms: + +```python +import controlflow as cf +from tenacity import retry, stop_after_attempt, wait_exponential + +@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) +def unreliable_external_api(): + # Simulating an unreliable API call + import random + if random.random() < 0.7: + raise ConnectionError("API request failed") + return "API request successful" + +@cf.flow +def robust_workflow(): + try: + api_result = cf.run( + "Call external API", + tools=[unreliable_external_api], + result_type=str + ) + return cf.run("Process API result", context={"api_result": api_result}) + except Exception as e: + return cf.run("Handle API failure", context={"error": str(e)}) + +result = robust_workflow() +``` + +### 10. Customizing LLM Behavior + +Fine-tune LLM behavior through various settings: + +```python +import controlflow as cf + +# Customizing the default model +cf.defaults.model = "anthropic/claude-3-opus-20240229" + +# Creating an agent with specific LLM settings +precise_agent = cf.Agent( + name="Precise Analyst", + model=cf.ChatOpenAI(temperature=0.2, model="gpt-4o"), + instructions="Provide concise, fact-based responses." +) + +result = cf.run("Analyze market trends", agents=[precise_agent]) +``` + +### 11. Parallel Task Execution + +Execute independent tasks concurrently for improved performance: + +```python +import controlflow as cf +import asyncio + +@cf.flow +async def parallel_workflow(): + task1 = cf.Task("Perform analysis on dataset A") + task2 = cf.Task("Perform analysis on dataset B") + task3 = cf.Task("Perform analysis on dataset C") + + results = await asyncio.gather( + task1.run_async(), + task2.run_async(), + task3.run_async() + ) + + return cf.run( + "Synthesize results from parallel analyses", + context={"parallel_results": results}, + result_type=str + ) + +result = asyncio.run(parallel_workflow()) +``` + +## Best Practices + +1. Design tasks with clear, specific objectives. +2. Use appropriate result types to ensure type safety and validation. +3. Leverage agent specialization for complex workflows. +4. Implement proper error handling and logging for robust applications. +5. Optimize LLM usage by using the right model for each task. +6. Utilize tools to extend agent capabilities when needed. +7. Design flows to maximize reusability and modularity. +8. Use structured outputs (Pydantic models) for complex data structures. +9. Implement retry mechanisms for unreliable operations. +10. Leverage parallel execution for independent tasks to improve performance. + +## Documentation Tips + +1. Always mention the `import controlflow as cf` statement at the beginning of code examples. +2. Use type hints in function definitions and variable assignments for clarity. +3. Provide clear explanations for each task's objective and expected output. +4. When describing agents, include their name, model, and any special instructions or tools. +5. Explain the purpose and structure of flows, emphasizing their role in orchestration. +6. Highlight the use of structured outputs and their benefits for integrating with traditional software. +7. Demonstrate how to use tools and explain their impact on agent capabilities. +8. Include examples of error handling and best practices for robust workflows. +9. Showcase advanced features like nested flows, dynamic task generation, and parallel execution. +10. Provide context and use cases for when to apply specific ControlFlow features. + +## Querying for More Information + +To access more detailed information about specific aspects of ControlFlow, you can query for relevant documents. Here are some example queries: + +- "Show me the documentation for the Task class in ControlFlow" +- "Provide examples of using tools in ControlFlow" +- "Explain the concept of flows in ControlFlow" +- "How do I implement error handling in ControlFlow?" +- "What are the best practices for designing efficient workflows in ControlFlow?" +- "Show me advanced examples of multi-agent collaboration in ControlFlow" +- "Explain how to use custom LLM models with ControlFlow" +- "Provide documentation on ControlFlow's asyncio integration" +- "How can I optimize performance in large ControlFlow workflows?" +- "Show me examples of integrating external APIs in ControlFlow tasks" + +By using these types of queries, you can retrieve specific details, examples, and best practices from the available documentation to supplement your understanding and provide more accurate and detailed information about ControlFlow. + +## Conclusion + +This comprehensive guide provides a strong foundation for understanding and utilizing ControlFlow. As an LLM, you now have the knowledge to effectively explain ControlFlow concepts, write idiomatic code, and produce accurate documentation. Remember to leverage the querying capability to access more detailed information when needed, allowing you to provide in-depth explanations and examples for specific ControlFlow features and use cases. \ No newline at end of file diff --git a/docs/style_guide.mdx b/docs/llm-guides/style-guide.md similarity index 100% rename from docs/style_guide.mdx rename to docs/llm-guides/style-guide.md diff --git a/src/controlflow/cli/dev.py b/src/controlflow/cli/dev.py index 896d4034..8e715d2c 100644 --- a/src/controlflow/cli/dev.py +++ b/src/controlflow/cli/dev.py @@ -17,14 +17,15 @@ def ai_files( ), ): """ - Generates two markdown files that contain all of ControlFlow's source code and documentation, - which can be used to provide context to an AI. + Generates three markdown files that contain all of ControlFlow's source code, documentation, + and LLM guides, which can be used to provide context to an AI. """ try: # Get the absolute path of the ControlFlow main repo repo_root = Path(__file__).resolve().parents[3] src_path = repo_root / "src" docs_path = repo_root / "docs" + llm_guides_path = docs_path / "llm-guides" output_dir = Path(output_path).resolve() def generate_file_content(file_paths, output_file): @@ -40,11 +41,17 @@ def generate_file_content(file_paths, output_file): + list(docs_path.glob("*.md")) + [docs_path / "mint.json", repo_root / "README.md"] ) + llm_guide_files = list(llm_guides_path.glob("*.md")) + list( + llm_guides_path.glob("*.mdx") + ) generate_file_content(code_files, "all_code.md") generate_file_content(doc_files, "all_docs.md") + generate_file_content(llm_guide_files, "llm_guides.md") - typer.echo(f"Generated all_code.md and all_docs.md in {output_dir}") + typer.echo( + f"Generated all_code.md, all_docs.md, and llm-guides.md in {output_dir}" + ) except Exception as e: typer.echo(f"An error occurred: {str(e)}", err=True) raise typer.Exit(code=1)