-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Utilizes arxiv, exa to write a detailed blog on a research topic that can help the user understand the topic with supported articles and papers.
- Loading branch information
1 parent
a4cf57f
commit 0398626
Showing
4 changed files
with
343 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# AI Research Workflow | ||
|
||
We've created a constrained AI Research Workflow that uses Agents to writes a detailed blog on topic by utilizing LLM models and external tools: | ||
- Exa | ||
- ArXiv | ||
|
||
### 1. Create a virtual environment | ||
|
||
```shell | ||
python3 -m venv ~/.venvs/aienv | ||
source ~/.venvs/aienv/bin/activate | ||
``` | ||
|
||
### 2. Install requirements | ||
|
||
```shell | ||
pip install -r cookbook/examples/agents/research_agent_app/requirements.txt | ||
``` | ||
|
||
### 3. Export `OPENAI_API_KEY` and `EXA_API_KEY` | ||
|
||
```shell | ||
export OPENAI_API_KEY=sk-*** | ||
export EXA_API_KEY=*** | ||
``` | ||
|
||
### 4. Run Streamlit App | ||
|
||
```shell | ||
streamlit run cookbook/examples/agents/research_agent_app/app.py | ||
``` |
139 changes: 139 additions & 0 deletions
139
cookbook/examples/agents/research_agent_app/ai_research_agent.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
import os | ||
from pathlib import Path | ||
from typing import List | ||
from pydantic import BaseModel, Field | ||
from phi.agent import Agent | ||
from dotenv import load_dotenv | ||
from phi.model.openai import OpenAIChat | ||
from phi.tools.arxiv_toolkit import ArxivToolkit | ||
from phi.tools.exa import ExaTools | ||
|
||
load_dotenv() | ||
|
||
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | ||
|
||
|
||
# Define data models | ||
class SearchTerms(BaseModel): | ||
terms: List[str] = Field(..., description="List of search terms related to a topic.") | ||
|
||
|
||
class ArxivSearchResult(BaseModel): | ||
title: str = Field(..., description="Title of the article.") | ||
id: str = Field(..., description="The ID of the article.") | ||
authors: List[str] = Field(..., description="Authors of the article.") | ||
summary: str = Field(..., description="Summary of the article.") | ||
pdf_url: str = Field(..., description="URL of the PDF of the article.") | ||
links: List[str] = Field(..., description="Links related to the article.") | ||
reasoning: str = Field(..., description="Reason for selecting this article.") | ||
|
||
|
||
class ArxivSearchResults(BaseModel): | ||
results: List[ArxivSearchResult] = Field(..., description="List of top search results.") | ||
|
||
|
||
class WebSearchResult(BaseModel): | ||
title: str = Field(..., description="Title of the article.") | ||
summary: str = Field(..., description="Summary of the article.") | ||
links: List[str] = Field(..., description="Links related to the article.") | ||
reasoning: str = Field(..., description="Reason for selecting this article.") | ||
|
||
|
||
class WebSearchResults(BaseModel): | ||
results: List[WebSearchResult] = Field(..., description="List of top search results.") | ||
|
||
|
||
# Initialize tools | ||
arxiv_toolkit = ArxivToolkit(download_dir=Path(__file__).parent.parent.parent.parent.joinpath("wip", "arxiv_pdfs")) | ||
exa_tools = ExaTools() | ||
|
||
# Initialize agents | ||
search_term_generator = Agent( | ||
model=OpenAIChat(id="gpt-4o"), | ||
description=""" | ||
You are an expert research strategist. Generate 2 specific and distinct search terms that will capture different key aspects of the given topic. | ||
Focus on terms that are: | ||
- Specific enough to yield relevant results | ||
- Cover both technical and practical aspects of the topic | ||
- Relevant to current developments | ||
- Optimized for searching academic and web resources effectively | ||
Provide the search terms as a list of strings like ["xyz", "abc", ...] | ||
""", | ||
response_model=SearchTerms, | ||
structured_output=True, | ||
) | ||
|
||
arxiv_search_agent = Agent( | ||
model=OpenAIChat(id="gpt-4o"), | ||
description=""" | ||
You are an expert in academic research with access to ArXiv's database. | ||
Your task is to: | ||
1. Search ArXiv for the top 10 papers related to the provided search term. | ||
2. Select the 3 most relevant research papers based on: | ||
- Direct relevance to the search term. | ||
- Scientific impact (e.g., citations, journal reputation). | ||
- Recency of publication. | ||
For each selected paper, the output should be in json structure have these details: | ||
- title | ||
- id | ||
- authors | ||
- a concise summary | ||
- the PDF link of the research paper | ||
- links related to the research paper | ||
- reasoning for why the paper was chosen | ||
Ensure the selected research papers directly address the topic and offer valuable insights. | ||
""", | ||
tools=[arxiv_toolkit], | ||
response_model=ArxivSearchResults, | ||
structured_output=True, | ||
) | ||
|
||
exa_search_agent = Agent( | ||
model=OpenAIChat(id="gpt-4o"), | ||
description=f""" | ||
You are a web search expert specializing in extracting high-quality information. | ||
Your task is to: | ||
1. Given a topic, search Exa for the top 10 articles about that topic. | ||
2. Select the 3 most relevant articles based on: | ||
- Source credibility. | ||
- Content depth and relevance. | ||
For each selected article, the output should have: | ||
- title | ||
- a concise summary | ||
- related links to the article | ||
- reasoning for why the article was chosen and how it contributes to understanding the topic. | ||
Ensure the selected articles are credible, relevant, and provide significant insights into the topic. | ||
""", | ||
tools=[ExaTools()], | ||
response_model=WebSearchResults, | ||
structured_output=True, | ||
) | ||
|
||
research_editor = Agent( | ||
model=OpenAIChat(id="gpt-4o"), | ||
description=""" | ||
You are a senior research editor specializing in breaking complex topics and information into understandable, engaging, high-quality blogs. | ||
Your task is to: | ||
1. Create a detailed blog within 1000 words based on the given topic. | ||
2. The blog should be of max 7-8 paragraphs, understandable, intuitive, making things easy to understand for the reader. | ||
2. Highlight key findings and provide a clear, high-level overview of the topic. | ||
4. At the end add the supporting articles link, paper link or any findings you think is necessary to add. | ||
The blog should help the reader in getting a decent understanding of the topic. | ||
The blog should me in markdown format. | ||
""", | ||
instructions=[ | ||
"Analyze all materials before writing.", | ||
"Build a clear narrative structure.", | ||
"Balance technical accuracy with explainability.", | ||
], | ||
markdown=True, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
import json | ||
from typing import Optional | ||
import streamlit as st | ||
import pandas as pd | ||
from cookbook.examples.agents.research_agent_app.ai_research_agent import ( | ||
SearchTerms, | ||
search_term_generator, | ||
arxiv_search_agent, | ||
exa_search_agent, | ||
research_editor, | ||
arxiv_toolkit, | ||
) | ||
|
||
# Streamlit App Configuration | ||
st.set_page_config( | ||
page_title="Research Workflow", | ||
page_icon=":orange_heart:", | ||
) | ||
st.title("AI Research Workflow") | ||
st.markdown("##### :orange_heart: built by [phidata](https://github.com/phidatahq/phidata)") | ||
|
||
|
||
def main() -> None: | ||
# Get topic for report | ||
input_topic = st.sidebar.text_input( | ||
":female-scientist: Enter a topic", | ||
value="LLM evals in multi-agentic space", | ||
) | ||
# Button to generate report | ||
generate_report = st.sidebar.button("Generate Report") | ||
if generate_report: | ||
st.session_state["topic"] = input_topic | ||
|
||
# Checkboxes for search | ||
st.sidebar.markdown("## Assistants") | ||
search_exa = st.sidebar.checkbox("Exa Search", value=True) | ||
search_arxiv = st.sidebar.checkbox("ArXiv Search", value=False) | ||
search_pubmed = st.sidebar.checkbox("PubMed Search", disabled=True) # noqa | ||
search_google_scholar = st.sidebar.checkbox("Google Scholar Search", disabled=True) # noqa | ||
use_cache = st.sidebar.toggle("Use Cache", value=False, disabled=True) # noqa | ||
num_search_terms = st.sidebar.number_input( | ||
"Number of Search Terms", value=1, min_value=1, max_value=3, help="This will increase latency." | ||
) | ||
|
||
st.sidebar.markdown("---") | ||
st.sidebar.markdown("## Trending Topics") | ||
topic = "Humanoid and Autonomous Agents" | ||
if st.sidebar.button(topic): | ||
st.session_state["topic"] = topic | ||
|
||
topic = "Gene Editing for Disease Treatment" | ||
if st.sidebar.button(topic): | ||
st.session_state["topic"] = topic | ||
|
||
topic = "Multimodal AI in healthcare" | ||
if st.sidebar.button(topic): | ||
st.session_state["topic"] = topic | ||
|
||
topic = "Brain Aging and Neurodegenerative Diseases" | ||
if st.sidebar.button(topic): | ||
st.session_state["topic"] = topic | ||
|
||
if "topic" in st.session_state: | ||
report_topic = st.session_state["topic"] | ||
|
||
search_terms: Optional[SearchTerms] = None | ||
with st.status("Generating Search Terms", expanded=True) as status: | ||
with st.container(): | ||
search_terms_container = st.empty() | ||
search_generator_input = {"topic": report_topic, "num_terms": num_search_terms} | ||
search_terms = search_term_generator.run(json.dumps(search_generator_input)) | ||
if search_terms: | ||
search_terms_container.json(search_terms.model_dump()) | ||
status.update(label="Search Terms Generated", state="complete", expanded=False) | ||
|
||
if not search_terms: | ||
st.write("Sorry report generation failed. Please try again.") | ||
return | ||
|
||
exa_content: Optional[str] = None | ||
arxiv_content: Optional[str] = None | ||
|
||
if search_exa: | ||
with st.status("Searching Exa", expanded=True) as status: | ||
with st.container(): | ||
exa_container = st.empty() | ||
try: | ||
exa_search_results = exa_search_agent.run(search_terms.model_dump_json(indent=4)) | ||
if isinstance(exa_search_results, str): | ||
raise ValueError("Unexpected string response from exa_search_agent") | ||
if exa_search_results and len(exa_search_results.content.results) > 0: | ||
exa_content = exa_search_results.model_dump_json(indent=4) | ||
exa_container.json(exa_search_results.content.results) | ||
status.update(label="Exa Search Complete", state="complete", expanded=False) | ||
except Exception as e: | ||
st.error(f"An error occurred during Exa search: {e}") | ||
status.update(label="Exa Search Failed", state="error", expanded=True) | ||
exa_content = None | ||
|
||
if search_arxiv: | ||
with st.status("Searching ArXiv (this takes a while)", expanded=True) as status: | ||
with st.container(): | ||
arxiv_container = st.empty() | ||
arxiv_search_results = arxiv_search_agent.run(search_terms.model_dump_json(indent=4)) | ||
if arxiv_search_results and arxiv_search_results.content.results: | ||
arxiv_container.json([result.model_dump() for result in arxiv_search_results.content.results]) | ||
status.update(label="ArXiv Search Complete", state="complete", expanded=False) | ||
|
||
if arxiv_search_results and arxiv_search_results.content.results: | ||
paper_summaries = [] | ||
for result in arxiv_search_results.content.results: | ||
summary = { | ||
"ID": result.id, | ||
"Title": result.title, | ||
"Authors": ", ".join(result.authors) if result.authors else "No authors available", | ||
"Summary": result.summary[:200] + "..." if len(result.summary) > 200 else result.summary, | ||
} | ||
paper_summaries.append(summary) | ||
|
||
if paper_summaries: | ||
with st.status("Displaying ArXiv Paper Summaries", expanded=True) as status: | ||
with st.container(): | ||
st.subheader("ArXiv Paper Summaries") | ||
df = pd.DataFrame(paper_summaries) | ||
st.dataframe(df, use_container_width=True) | ||
status.update(label="ArXiv Paper Summaries Displayed", state="complete", expanded=False) | ||
|
||
arxiv_paper_ids = [summary["ID"] for summary in paper_summaries] | ||
if arxiv_paper_ids: | ||
with st.status("Reading ArXiv Papers", expanded=True) as status: | ||
with st.container(): | ||
arxiv_content = arxiv_toolkit.read_arxiv_papers(arxiv_paper_ids, pages_to_read=2) | ||
st.write(f"Read {len(arxiv_paper_ids)} ArXiv papers") | ||
status.update(label="Reading ArXiv Papers Complete", state="complete", expanded=False) | ||
|
||
report_input = "" | ||
report_input += f"# Topic: {report_topic}\n\n" | ||
report_input += "## Search Terms\n\n" | ||
report_input += f"{search_terms}\n\n" | ||
if arxiv_content: | ||
report_input += "## ArXiv Papers\n\n" | ||
report_input += "<arxiv_papers>\n\n" | ||
report_input += f"{arxiv_content}\n\n" | ||
report_input += "</arxiv_papers>\n\n" | ||
if exa_content: | ||
report_input += "## Web Search Content from Exa\n\n" | ||
report_input += "<exa_content>\n\n" | ||
report_input += f"{exa_content}\n\n" | ||
report_input += "</exa_content>\n\n" | ||
|
||
# Only generate the report if we have content | ||
if arxiv_content or exa_content: | ||
with st.spinner("Generating Report"): | ||
final_report = "" | ||
final_report_container = st.empty() | ||
research_report = research_editor.run(report_input) | ||
final_report_container.markdown(research_report.content) | ||
else: | ||
st.error( | ||
"Report generation cancelled due to search failure. Please try again or select another search option." | ||
) | ||
|
||
st.sidebar.markdown("---") | ||
if st.sidebar.button("Restart"): | ||
st.rerun() | ||
|
||
|
||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
phidata | ||
openai | ||
streamlit | ||
exa_py | ||
arxiv |