Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Agent that thinks more thoroughly about question and considers possible outcomes #47

Merged
merged 21 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,119 changes: 2,119 additions & 0 deletions agent_thinks_more.ipynb

Large diffs are not rendered by default.

1,002 changes: 1,002 additions & 0 deletions crewai_multiple_agent.ipynb

Large diffs are not rendered by default.

2,346 changes: 2,262 additions & 84 deletions poetry.lock

Large diffs are not rendered by default.

159 changes: 159 additions & 0 deletions prediction_market_agent/agents/crewai_subsequential_agent/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import time
from datetime import timedelta, datetime

from dotenv import load_dotenv
from prediction_market_agent_tooling.benchmark.agents import AbstractBenchmarkedAgent
from prediction_market_agent_tooling.benchmark.benchmark import Benchmarker
from prediction_market_agent_tooling.benchmark.utils import (
OutcomePrediction,
Prediction,
)
from prediction_market_agent_tooling.gtypes import Probability, DatetimeWithTimezone
from prediction_market_agent_tooling.markets.markets import AgentMarket
from prediction_market_agent_tooling.tools.utils import utcnow
from pydantic import BaseModel

from prediction_market_agent.agents.crewai_subsequential_agent.crewai_agent_subquestions import CrewAIAgentSubquestions
from prediction_market_agent.agents.known_outcome_agent.known_outcome_agent import (
Result,
get_known_outcome,
)


def build_market_from_question_without_validation(question: str) -> Market:
return Market(url=question,
question=question, p_yes=0.5,
source=MarketSource.MANIFOLD,
volume=0,
created_time=DatetimeWithTimezone(datetime(2024, 1, 1)),
close_time=DatetimeWithTimezone(datetime(2024, 3, 15))
)


def build_binary_agent_market_from_question(question: str) -> AgentMarket:
return AgentMarket(
id=question,
question=question,
p_yes=Probability(0.5),
created_time=datetime(2024, 1, 1),
resolution=None,
outcomes=["YES", "NO"],
)


class QuestionAndAnswer(BaseModel):
question: str
result: Result
bet_correct: bool


class CrewAIAgentSubquestionsBenchmark(AbstractBenchmarkedAgent):
def __init__(
self,
agent_name: str,
max_workers: int,
model: str,
max_tries: int,
) -> None:
self.model = model
self.max_tries = max_tries
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
self.agent = CrewAIAgentSubquestions()
kongzii marked this conversation as resolved.
Show resolved Hide resolved
super().__init__(agent_name=agent_name, max_workers=max_workers)

def predict(self, market_question: str) -> Prediction:

market = build_binary_agent_market_from_question(market_question)
result = self.agent.answer_binary_market(market)

answer = get_known_outcome(
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
model=self.model,
question=market_question,
max_tries=self.max_tries,
)
print(f"Answered {market_question=} with {answer.result=}, {answer.reasoning=}")
if not answer.has_known_result():
return Prediction(
is_predictable=False,
outcome_prediction=None,
)
else:
return Prediction(
is_predictable=True,
outcome_prediction=OutcomePrediction(
p_yes=answer.result.to_p_yes(),
confidence=1.0,
info_utility=None,
),
)


if __name__ == "__main__":
load_dotenv()
tomorrow_str = (utcnow() + timedelta(days=1)).strftime("%d %B %Y")

# Fetch example questions which our agents answered in the past.
questions = [
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
QuestionAndAnswer(
question="Will the stock price of Donald Trump's media company exceed $100 on 1 April 2024?",
result=Result.NO,
bet_correct=True
),
QuestionAndAnswer(
question="Will Andy Murray return to professional tennis from his ankle injury on or before 31 March 2024?",
result=Result.NO,
bet_correct=True
),
QuestionAndAnswer(
question="Will any legislation be signed by President Biden that could potentially lead to the ban of TikTok by 1 April 2024?",
result=Result.YES,
bet_correct=False
),
QuestionAndAnswer(
question="Will the United States v. Apple case have a verdict by 1 April 2024?",
result=Result.NO,
bet_correct=True
),
QuestionAndAnswer(
question="Will Microsoft Teams launch the announced Copilot AI features by 1 April 2024?",
result=Result.YES,
bet_correct=True
),
QuestionAndAnswer(
question="Will the Francis Scott Key Bridge in Baltimore be fully rebuilt by 2 April 2024?",
result=Result.NO,
bet_correct=True
),
QuestionAndAnswer(
question="Will iOS 18 break the iPhone's iconic app grid by 1 April 2024?",
result=Result.YES,
bet_correct=False
),
QuestionAndAnswer(
question="Will a winner of the Mega Millions jackpot be announced by 26 March 2024?",
result=Result.YES,
bet_correct=False
),
]

benchmarker = Benchmarker(
markets=[build_market_from_question_without_validation(q.question) for q in questions][:1],
agents=[
CrewAIAgentSubquestionsBenchmark(
agent_name="subsequential_questions",
model="gpt-3.5-turbo-0125",
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
max_tries=3,
max_workers=1,
),
],
)
benchmarker.run_agents()
md = benchmarker.generate_markdown_report()

output = f"./subsequential_questions_agent_benchmark_report.{int(time.time())}.md"
with open(output, "w") as f:
print(f"Writing benchmark report to: {output}")
f.write(md)

# Check all predictions are correct, i.e. mean-squared-error == 0
metrics = benchmarker.compute_metrics()
assert metrics["MSE for `p_yes`"][0] == 0.0
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import typing as t

from crewai import Agent, Task, Process, Crew
from crewai_tools import SerperDevTool
from prediction_market_agent_tooling.markets.agent_market import AgentMarket
from pydantic import BaseModel

from prediction_market_agent.agents.abstract import AbstractAgent
from prediction_market_agent.agents.crewai_subsequential_agent.prompts import *
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved

search_tool = SerperDevTool()


class Outcomes(BaseModel):
outcomes: list[str]


class ProbabilityOutput(BaseModel):
decision: str
p_yes: float
p_no: float
confidence: float


class CrewAIAgentSubquestions(AbstractAgent):
def __init__(self) -> None:
self.researcher = Agent(
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
role="Research Analyst",
goal="Research and report on some future event, giving high quality and nuanced analysis",
backstory="You are a senior research analyst who is adept at researching and reporting on future events.",
verbose=True,
allow_delegation=False,
tools=[search_tool],
)

self.predictor = Agent(
role="Professional Gambler",
goal="Predict, based on some research you are presented with, whether or not a given event will occur",
backstory="You are a professional gambler who is adept at predicting and betting on the outcomes of future events.",
verbose=True,
allow_delegation=False,
)

def split_research_into_outcomes(self, question: str) -> Outcomes:
create_outcomes_task = Task(
description=CREATE_OUTCOMES_FROM_SCENARIO_PROMPT,
expected_output=CREATE_OUTCOMES_FROM_SCENARIO_OUTPUT,
output_json=Outcomes,
agent=self.researcher,
)

report_crew = Crew(
agents=[self.researcher],
tasks=[create_outcomes_task],
)
result = report_crew.kickoff(inputs={'scenario': question})
return Outcomes.model_validate_json(result)

def build_tasks_for_outcome(self, input_dict: dict[str, t.Any] = {}) -> list[Task]:
task_research_one_outcome = Task(
description=RESEARCH_OUTCOME_PROMPT.format(**input_dict),
agent=self.researcher,
expected_output=RESEARCH_OUTCOME_OUTPUT,
async_execution=True
)
task_create_probability_for_one_outcome = Task(
description=PROBABILITY_FOR_ONE_OUTCOME_PROMPT,
expected_output=PROBABILITY_CLASS_OUTPUT,
agent=self.predictor,
output_json=ProbabilityOutput,
async_execution=True,
context=[task_research_one_outcome]
)

return [task_research_one_outcome, task_create_probability_for_one_outcome]

def generate_prediction_for_one_outcome(self, sentence: str) -> ProbabilityOutput:
task_research_one_outcome = Task(
description=RESEARCH_OUTCOME_PROMPT,
agent=self.researcher,
expected_output=RESEARCH_OUTCOME_OUTPUT,
)
task_create_probability_for_one_outcome = Task(
description=PROBABILITY_FOR_ONE_OUTCOME_PROMPT,
expected_output=PROBABILITY_CLASS_OUTPUT,
agent=self.predictor,
output_json=ProbabilityOutput,
context=[task_research_one_outcome]
)
crew = Crew(
agents=[self.researcher, self.predictor],
tasks=[task_research_one_outcome, task_create_probability_for_one_outcome],
verbose=2,
process=Process.sequential
)

result = crew.kickoff(inputs={'sentence': sentence})
return ProbabilityOutput.model_validate_json(result)

def generate_final_decision(self, outcomes_with_probabilities: list[t.Tuple[str, ProbabilityOutput]]) -> ProbabilityOutput:
task_final_decision = Task(
description=(FINAL_DECISION_PROMPT),
agent=self.predictor,
expected_output=(PROBABILITY_CLASS_OUTPUT),
output_json=ProbabilityOutput,
)

crew = Crew(
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
agents=[self.predictor],
tasks=[task_final_decision],
verbose=2,
)

result = crew.kickoff(inputs={'outcomes_with_probabilities':
[(i[0], i[1].dict()) for i in outcomes_with_probabilities],
'number_of_outcomes': len(outcomes_with_probabilities),
'outcome_to_assess': outcomes_with_probabilities[0][0]})
return ProbabilityOutput.model_validate_json(result)

def answer_binary_market(self, market: AgentMarket) -> bool:

outcomes = self.split_research_into_outcomes(market.question)
print ("outcomes ", outcomes)

outcomes_with_probs = []
task_map = {}
for outcome in outcomes.outcomes:
tasks_for_outcome = self.build_tasks_for_outcome(input_dict={"sentence": outcome})
task_map[outcome] = tasks_for_outcome

# flatten nested list
all_tasks = sum(task_map.values(), [])
crew = Crew(
agents=[self.researcher, self.predictor],
tasks=all_tasks,
verbose=2,
)

crew.kickoff()

# We parse individual task results to build outcomes_with_probs
for outcome, tasks in task_map.items():
try:
prediction_result = ProbabilityOutput.model_validate_json(tasks[1].output.raw_output)
except Exception as e:
print("Could not parse result as ProbabilityOutput ", e)
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
prediction_result = ProbabilityOutput(p_yes=0.5, p_no=0.5, confidence=0, decision="")

outcomes_with_probs.append((outcome, prediction_result))

final_answer = self.generate_final_decision(outcomes_with_probs)
return True if final_answer.decision == "y" else False
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from decimal import Decimal
import random
from prediction_market_agent_tooling.deploy.agent import DeployableAgent
from prediction_market_agent_tooling.markets.agent_market import AgentMarket
from prediction_market_agent_tooling.markets.data_models import BetAmount, Currency
from prediction_market_agent_tooling.markets.markets import MarketType

from prediction_market_agent.agents.crewai_subsequential_agent.crewai_agent_subquestions import CrewAIAgentSubquestions
from prediction_market_agent.agents.known_outcome_agent.known_outcome_agent import (
Result,
)
from prediction_market_agent.agents.utils import market_is_saturated


class DeployableThinkThoroughlyAgent(DeployableAgent):
# For cheaper credits at this experimental stage
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
model = "gpt-3.5-turbo"

def load(self) -> None:
self.markets_with_known_outcomes: dict[str, Result] = {}
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved

def pick_markets(self, markets: list[AgentMarket]) -> list[AgentMarket]:
# We simply pick 5 random markets to bet on
picked_markets: list[AgentMarket] = []
random.shuffle(markets)
for market in markets:
# Assume very high probability markets are already known, and have
# been correctly bet on, and therefore the value of betting on them
# is low.
if not market_is_saturated(market=market):
picked_markets.append(market)
if len(picked_markets) == 5:
break

return picked_markets
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved

def answer_binary_market(self, market: AgentMarket) -> bool:
# The answer has already been determined in `pick_markets` so we just
# return it here.
agent = CrewAIAgentSubquestions()
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
result = agent.answer_binary_market(market)
return result

def calculate_bet_amount(self, answer: bool, market: AgentMarket) -> BetAmount:
if market.currency == Currency.xDai:
return BetAmount(amount=Decimal(0.1), currency=Currency.xDai)
else:
raise NotImplementedError("This agent only supports xDai markets")
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved


if __name__ == "__main__":
agent = DeployableThinkThoroughlyAgent()
agent.deploy_local(market_type=MarketType.OMEN,
sleep_time=540,
timeout=180,
place_bet=False)
# agent.deploy_gcp(
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
# repository=f"git+{get_current_git_url()}@{get_current_git_commit_sha()}",
# market_type=MarketType.OMEN,
# labels={OWNER_KEY: getpass.getuser()},
# secrets={
# "TAVILY_API_KEY": "GNOSIS_AI_TAVILY_API_KEY:latest",
# },
# memory=1024,
# api_keys=APIKeys(
# BET_FROM_ADDRESS=verify_address(
# "0xb611A9f02B318339049264c7a66ac3401281cc3c"
# ),
# BET_FROM_PRIVATE_KEY=private_key_type("EVAN_OMEN_BETTER_0_PKEY:latest"),
# OPENAI_API_KEY=SecretStr("EVAN_OPENAI_API_KEY:latest"),
# MANIFOLD_API_KEY=None,
# ),
# cron_schedule="0 */12 * * *",
# timeout=540,
# )
Loading
Loading