Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Agent that thinks more thoroughly about question and considers possible outcomes #47

Merged
merged 21 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,119 changes: 2,119 additions & 0 deletions agent_thinks_more.ipynb

Large diffs are not rendered by default.

1,002 changes: 1,002 additions & 0 deletions crewai_multiple_agent.ipynb

Large diffs are not rendered by default.

583 changes: 582 additions & 1 deletion poetry.lock

Large diffs are not rendered by default.

117 changes: 117 additions & 0 deletions prediction_market_agent/agents/crewai_subsequential_agent/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import typing as t
from datetime import datetime

import typer
from loguru import logger
from prediction_market_agent_tooling.benchmark.agents import (
AbstractBenchmarkedAgent,
FixedAgent,
RandomAgent,
)
from prediction_market_agent_tooling.benchmark.benchmark import Benchmarker
from prediction_market_agent_tooling.benchmark.utils import (
OutcomePrediction,
Prediction,
)
from prediction_market_agent_tooling.gtypes import Probability
from prediction_market_agent_tooling.markets.agent_market import (
AgentMarket,
FilterBy,
SortBy,
)
from prediction_market_agent_tooling.markets.markets import (
MarketType,
get_binary_markets,
)

from prediction_market_agent.agents.crewai_subsequential_agent.crewai_agent_subquestions import (
CrewAIAgentSubquestions,
)


def build_binary_agent_market_from_question(question: str) -> AgentMarket:
return AgentMarket(
id=question,
url=question,
close_time=None,
volume=None,
question=question,
p_yes=Probability(0.5),
created_time=datetime(2024, 1, 1),
resolution=None,
outcomes=["YES", "NO"],
)


class CrewAIAgentSubquestionsBenchmark(AbstractBenchmarkedAgent):
def __init__(
self,
max_workers: int,
agent_name: str,
) -> None:
self.agent = CrewAIAgentSubquestions()
kongzii marked this conversation as resolved.
Show resolved Hide resolved
super().__init__(agent_name=agent_name, max_workers=max_workers)

def predict(self, market_question: str) -> Prediction:
result = self.agent.answer_binary_market(market_question)
return Prediction(
outcome_prediction=OutcomePrediction(
p_yes=result.p_yes, confidence=result.confidence, info_utility=None
)
)


def main(
n: int = 50,
output: str = "./benchmark_report_50markets.md",
reference: MarketType = MarketType.MANIFOLD,
filter: FilterBy = FilterBy.OPEN,
sort: SortBy = SortBy.NONE,
max_workers: int = 1,
cache_path: t.Optional[str] = "predictions_cache.json",
only_cached: bool = False,
) -> None:
"""
Polymarket usually contains higher quality questions,
but on Manifold, additionally to filtering by MarketFilter.resolved, you can sort by MarketSort.newest.
"""
markets = get_binary_markets(n, reference, filter_by=filter, sort_by=sort)
markets_deduplicated = list(({m.question: m for m in markets}.values()))
if len(markets) != len(markets_deduplicated):
logger.debug(
f"Warning: Deduplicated markets from {len(markets)} to {len(markets_deduplicated)}."
)

logger.debug(f"Found {len(markets_deduplicated)} markets.")

benchmarker = Benchmarker(
markets=markets_deduplicated,
agents=[
CrewAIAgentSubquestionsBenchmark(
agent_name="subsequential-questions-crewai",
max_workers=max_workers,
),
RandomAgent(agent_name="random", max_workers=max_workers),
FixedAgent(
fixed_answer=False, agent_name="fixed-no", max_workers=max_workers
),
FixedAgent(
fixed_answer=True, agent_name="fixed-yes", max_workers=max_workers
),
],
cache_path=cache_path,
only_cached=only_cached,
)

benchmarker.run_agents(
enable_timing=False
) # Caching of search etc. can distort timings
md = benchmarker.generate_markdown_report()

with open(output, "w") as f:
logger.info(f"Writing benchmark report to: {output}")
f.write(md)


if __name__ == "__main__":
typer.run(main)
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
import typing as t

from crewai import Agent, Crew, Process, Task
from langchain_core.language_models import BaseChatModel
from langchain_openai import ChatOpenAI
from loguru import logger
from pydantic import BaseModel

from prediction_market_agent.agents.crewai_subsequential_agent.prompts import (
CREATE_OUTCOMES_FROM_SCENARIO_OUTPUT,
CREATE_OUTCOMES_FROM_SCENARIO_PROMPT,
FINAL_DECISION_PROMPT,
PROBABILITY_CLASS_OUTPUT,
PROBABILITY_FOR_ONE_OUTCOME_PROMPT,
RESEARCH_OUTCOME_OUTPUT,
RESEARCH_OUTCOME_PROMPT,
)
from prediction_market_agent.tools.crewai_tools import TavilyDevTool
from prediction_market_agent.utils import APIKeys

tavily_search = TavilyDevTool()


class Outcomes(BaseModel):
outcomes: list[str]


class ProbabilityOutput(BaseModel):
decision: str
p_yes: float
p_no: float
confidence: float


class CrewAIAgentSubquestions:
def __init__(self) -> None:
llm = self._build_llm()
self.researcher = Agent(
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
role="Research Analyst",
goal="Research and report on some future event, giving high quality and nuanced analysis",
backstory="You are a senior research analyst who is adept at researching and reporting on future events.",
verbose=True,
allow_delegation=False,
tools=[tavily_search],
llm=llm,
)

self.predictor = Agent(
role="Professional Gambler",
goal="Predict, based on some research you are presented with, whether or not a given event will occur",
backstory="You are a professional gambler who is adept at predicting and betting on the outcomes of future events.",
verbose=True,
allow_delegation=False,
llm=llm,
)

def _build_llm(self) -> BaseChatModel:
keys = APIKeys()
llm = ChatOpenAI(
model="gpt-3.5-turbo-0125",
api_key=keys.openai_api_key.get_secret_value(),
)
return llm

def split_research_into_outcomes(self, question: str) -> Outcomes:
create_outcomes_task = Task(
description=CREATE_OUTCOMES_FROM_SCENARIO_PROMPT,
expected_output=CREATE_OUTCOMES_FROM_SCENARIO_OUTPUT,
output_json=Outcomes,
agent=self.researcher,
)

report_crew = Crew(
agents=[self.researcher],
tasks=[create_outcomes_task],
)
result = report_crew.kickoff(inputs={"scenario": question})
return Outcomes.model_validate_json(result)

def build_tasks_for_outcome(self, input_dict: dict[str, t.Any] = {}) -> list[Task]:
task_research_one_outcome = Task(
description=RESEARCH_OUTCOME_PROMPT.format(**input_dict),
agent=self.researcher,
expected_output=RESEARCH_OUTCOME_OUTPUT,
async_execution=True,
)
task_create_probability_for_one_outcome = Task(
description=PROBABILITY_FOR_ONE_OUTCOME_PROMPT,
expected_output=PROBABILITY_CLASS_OUTPUT,
agent=self.predictor,
output_json=ProbabilityOutput,
async_execution=False,
context=[task_research_one_outcome],
)

return [task_research_one_outcome, task_create_probability_for_one_outcome]

def generate_prediction_for_one_outcome(self, sentence: str) -> ProbabilityOutput:
task_research_one_outcome = Task(
description=RESEARCH_OUTCOME_PROMPT,
agent=self.researcher,
expected_output=RESEARCH_OUTCOME_OUTPUT,
)
task_create_probability_for_one_outcome = Task(
description=PROBABILITY_FOR_ONE_OUTCOME_PROMPT,
expected_output=PROBABILITY_CLASS_OUTPUT,
agent=self.predictor,
output_json=ProbabilityOutput,
context=[task_research_one_outcome],
)
crew = Crew(
agents=[self.researcher, self.predictor],
tasks=[task_research_one_outcome, task_create_probability_for_one_outcome],
verbose=2,
process=Process.sequential,
)

result = crew.kickoff(inputs={"sentence": sentence})
return ProbabilityOutput.model_validate_json(result)

def generate_final_decision(
self, outcomes_with_probabilities: list[t.Tuple[str, ProbabilityOutput]]
) -> ProbabilityOutput:
task_final_decision = Task(
description=FINAL_DECISION_PROMPT,
agent=self.predictor,
expected_output=PROBABILITY_CLASS_OUTPUT,
output_json=ProbabilityOutput,
)

crew = Crew(
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
agents=[self.predictor],
tasks=[task_final_decision],
verbose=2,
)

crew.kickoff(
inputs={
"outcomes_with_probabilities": [
(i[0], i[1].dict()) for i in outcomes_with_probabilities
],
"number_of_outcomes": len(outcomes_with_probabilities),
"outcome_to_assess": outcomes_with_probabilities[0][0],
}
)
return ProbabilityOutput.model_validate_json(
task_final_decision.output.raw_output
)

def answer_binary_market(self, question: str) -> ProbabilityOutput:
outcomes = self.split_research_into_outcomes(question)
logger.debug("outcomes ", outcomes)

outcomes_with_probs = []
task_map = {}
for outcome in outcomes.outcomes:
tasks_for_outcome = self.build_tasks_for_outcome(
input_dict={"sentence": outcome}
)
task_map[outcome] = tasks_for_outcome

# flatten nested list
all_tasks = sum(task_map.values(), [])
crew = Crew(
agents=[self.researcher, self.predictor],
tasks=all_tasks,
verbose=2,
process=Process.sequential,
)

# crew.kickoff doesn't finish all async tasks when done.
crew.kickoff()

# We parse individual task results to build outcomes_with_probs
for outcome, tasks in task_map.items():
try:
prediction_result = ProbabilityOutput.model_validate_json(
tasks[1].output.raw_output
)
except Exception as e:
logger.error("Could not parse result as ProbabilityOutput ", e)
prediction_result = ProbabilityOutput(
p_yes=0.5, p_no=0.5, confidence=0, decision=""
)

outcomes_with_probs.append((outcome, prediction_result))

final_answer = self.generate_final_decision(outcomes_with_probs)
return final_answer
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import random

from prediction_market_agent_tooling.deploy.agent import DeployableAgent
from prediction_market_agent_tooling.markets.agent_market import AgentMarket
from prediction_market_agent_tooling.markets.markets import MarketType
from prediction_market_agent_tooling.tools.utils import should_not_happen

from prediction_market_agent.agents.crewai_subsequential_agent.crewai_agent_subquestions import (
CrewAIAgentSubquestions,
)
from prediction_market_agent.agents.utils import market_is_saturated


class DeployableThinkThoroughlyAgent(DeployableAgent):
# For cheaper credits at this experimental stage
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self) -> None:
super().__init__()

def pick_markets(self, markets: list[AgentMarket]) -> list[AgentMarket]:
# We simply pick 5 random markets to bet on
picked_markets: list[AgentMarket] = []
random.shuffle(markets)
for market in markets:
# Assume very high probability markets are already known, and have
# been correctly bet on, and therefore the value of betting on them
# is low.
if not market_is_saturated(market=market):
picked_markets.append(market)
if len(picked_markets) == 5:
break

return picked_markets
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved

def answer_binary_market(self, market: AgentMarket) -> bool:
# The answer has already been determined in `pick_markets` so we just
# return it here.
result = CrewAIAgentSubquestions().answer_binary_market(market.question)
return (
True
if result.decision == "y"
else False
if result.decision == "n"
else should_not_happen()
)


if __name__ == "__main__":
agent = DeployableThinkThoroughlyAgent()
agent.deploy_local(
market_type=MarketType.OMEN, sleep_time=540, timeout=180, place_bet=False
)
Loading