Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Agent that thinks more thoroughly about question and considers possible outcomes #47

Merged
merged 21 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,119 changes: 2,119 additions & 0 deletions agent_thinks_more.ipynb

Large diffs are not rendered by default.

1,002 changes: 1,002 additions & 0 deletions crewai_multiple_agent.ipynb

Large diffs are not rendered by default.

583 changes: 582 additions & 1 deletion poetry.lock

Large diffs are not rendered by default.

124 changes: 124 additions & 0 deletions prediction_market_agent/agents/crewai_subsequential_agent/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import typing as t
from datetime import datetime

import typer
from dotenv import load_dotenv
from prediction_market_agent_tooling.benchmark.agents import (
AbstractBenchmarkedAgent,
FixedAgent,
RandomAgent,
)
from prediction_market_agent_tooling.benchmark.benchmark import Benchmarker
from prediction_market_agent_tooling.benchmark.utils import (
OutcomePrediction,
Prediction,
)
from prediction_market_agent_tooling.gtypes import Probability
from prediction_market_agent_tooling.markets.agent_market import (
AgentMarket,
FilterBy,
SortBy,
)
from prediction_market_agent_tooling.markets.markets import (
MarketType,
get_binary_markets,
)

from prediction_market_agent.agents.crewai_subsequential_agent.crewai_agent_subquestions import (
CrewAIAgentSubquestions,
)


def build_binary_agent_market_from_question(question: str) -> AgentMarket:
return AgentMarket(
id=question,
url=question,
close_time=None,
volume=None,
question=question,
p_yes=Probability(0.5),
created_time=datetime(2024, 1, 1),
resolution=None,
outcomes=["YES", "NO"],
)


class CrewAIAgentSubquestionsBenchmark(AbstractBenchmarkedAgent):
def __init__(
self,
agent_name: str,
max_workers: int,
model: str,
max_tries: int,
) -> None:
self.model = model
self.max_tries = max_tries
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
self.agent = CrewAIAgentSubquestions()
kongzii marked this conversation as resolved.
Show resolved Hide resolved
super().__init__(agent_name=agent_name, max_workers=max_workers)

def predict(self, market_question: str) -> Prediction:
result = self.agent.answer_binary_market(market_question)
return Prediction(
outcome_prediction=OutcomePrediction(
p_yes=result.p_yes, confidence=result.confidence, info_utility=None
)
)


def main(
n: int = 5,
output: str = "./benchmark_report.md",
reference: MarketType = MarketType.MANIFOLD,
filter: FilterBy = FilterBy.OPEN,
sort: SortBy = SortBy.NONE,
max_workers: int = 1,
cache_path: t.Optional[str] = "predictions_cache.json",
only_cached: bool = False,
) -> None:
"""
Polymarket usually contains higher quality questions,
but on Manifold, additionally to filtering by MarketFilter.resolved, you can sort by MarketSort.newest.
"""
load_dotenv()
markets = get_binary_markets(n, reference, filter_by=filter, sort_by=sort)
markets_deduplicated = list(({m.question: m for m in markets}.values()))
if len(markets) != len(markets_deduplicated):
print(
f"Warning: Deduplicated markets from {len(markets)} to {len(markets_deduplicated)}."
)

print(f"Found {len(markets_deduplicated)} markets.")

benchmarker = Benchmarker(
markets=markets_deduplicated,
agents=[
CrewAIAgentSubquestionsBenchmark(
"subsequential-questions-crewai",
max_workers=max_workers,
max_tries=1,
model="gpt-3.5-turbo-0125",
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
),
RandomAgent(agent_name="random", max_workers=max_workers),
FixedAgent(
fixed_answer=False, agent_name="fixed-no", max_workers=max_workers
),
FixedAgent(
fixed_answer=True, agent_name="fixed-yes", max_workers=max_workers
),
],
cache_path=cache_path,
only_cached=only_cached,
)

benchmarker.run_agents(
enable_timing=False
) # Caching of search etc. can distort timings
md = benchmarker.generate_markdown_report()

with open(output, "w") as f:
print(f"Writing benchmark report to: {output}")
f.write(md)


if __name__ == "__main__":
typer.run(main)
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import typing as t

from crewai import Agent, Crew, Process, Task
from pydantic import BaseModel

from prediction_market_agent.agents.crewai_subsequential_agent.prompts import *
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
from prediction_market_agent.tools.crewai_tools import TavilyDevTool

# search_tool = SerperDevTool()
tavily_search = TavilyDevTool()


class Outcomes(BaseModel):
outcomes: list[str]


class ProbabilityOutput(BaseModel):
decision: str
p_yes: float
p_no: float
confidence: float


class CrewAIAgentSubquestions:
def __init__(self) -> None:
# openai_model_name as str automatically interpreted by CrewAI, else create LLM object.
self.researcher = Agent(
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
role="Research Analyst",
goal="Research and report on some future event, giving high quality and nuanced analysis",
backstory="You are a senior research analyst who is adept at researching and reporting on future events.",
verbose=True,
allow_delegation=False,
tools=[tavily_search],
)

self.predictor = Agent(
role="Professional Gambler",
goal="Predict, based on some research you are presented with, whether or not a given event will occur",
backstory="You are a professional gambler who is adept at predicting and betting on the outcomes of future events.",
verbose=True,
allow_delegation=False,
)

def split_research_into_outcomes(self, question: str) -> Outcomes:
create_outcomes_task = Task(
description=CREATE_OUTCOMES_FROM_SCENARIO_PROMPT,
expected_output=CREATE_OUTCOMES_FROM_SCENARIO_OUTPUT,
output_json=Outcomes,
agent=self.researcher,
)

report_crew = Crew(
agents=[self.researcher],
tasks=[create_outcomes_task],
)
result = report_crew.kickoff(inputs={"scenario": question})
return Outcomes.model_validate_json(result)

def build_tasks_for_outcome(self, input_dict: dict[str, t.Any] = {}) -> list[Task]:
task_research_one_outcome = Task(
description=RESEARCH_OUTCOME_PROMPT.format(**input_dict),
agent=self.researcher,
expected_output=RESEARCH_OUTCOME_OUTPUT,
async_execution=True,
)
task_create_probability_for_one_outcome = Task(
description=PROBABILITY_FOR_ONE_OUTCOME_PROMPT,
expected_output=PROBABILITY_CLASS_OUTPUT,
agent=self.predictor,
output_json=ProbabilityOutput,
async_execution=False,
context=[task_research_one_outcome],
)

return [task_research_one_outcome, task_create_probability_for_one_outcome]

def generate_prediction_for_one_outcome(self, sentence: str) -> ProbabilityOutput:
task_research_one_outcome = Task(
description=RESEARCH_OUTCOME_PROMPT,
agent=self.researcher,
expected_output=RESEARCH_OUTCOME_OUTPUT,
)
task_create_probability_for_one_outcome = Task(
description=PROBABILITY_FOR_ONE_OUTCOME_PROMPT,
expected_output=PROBABILITY_CLASS_OUTPUT,
agent=self.predictor,
output_json=ProbabilityOutput,
context=[task_research_one_outcome],
)
crew = Crew(
agents=[self.researcher, self.predictor],
tasks=[task_research_one_outcome, task_create_probability_for_one_outcome],
verbose=2,
process=Process.sequential,
)

result = crew.kickoff(inputs={"sentence": sentence})
return ProbabilityOutput.model_validate_json(result)

def generate_final_decision(
self, outcomes_with_probabilities: list[t.Tuple[str, ProbabilityOutput]]
) -> ProbabilityOutput:
task_final_decision = Task(
description=FINAL_DECISION_PROMPT,
agent=self.predictor,
expected_output=PROBABILITY_CLASS_OUTPUT,
output_json=ProbabilityOutput,
)

crew = Crew(
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
agents=[self.predictor],
tasks=[task_final_decision],
verbose=2,
)

crew.kickoff(
inputs={
"outcomes_with_probabilities": [
(i[0], i[1].dict()) for i in outcomes_with_probabilities
],
"number_of_outcomes": len(outcomes_with_probabilities),
"outcome_to_assess": outcomes_with_probabilities[0][0],
}
)
return ProbabilityOutput.model_validate_json(
task_final_decision.output.raw_output
)

def answer_binary_market(self, question: str) -> ProbabilityOutput:
outcomes = self.split_research_into_outcomes(question)
print("outcomes ", outcomes)
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved

outcomes_with_probs = []
task_map = {}
for outcome in outcomes.outcomes:
tasks_for_outcome = self.build_tasks_for_outcome(
input_dict={"sentence": outcome}
)
task_map[outcome] = tasks_for_outcome

# flatten nested list
all_tasks = sum(task_map.values(), [])
crew = Crew(
agents=[self.researcher, self.predictor],
tasks=all_tasks,
verbose=2,
process=Process.sequential,
)

# crew.kickoff doesn't finish all async tasks when done.
crew.kickoff()

# We parse individual task results to build outcomes_with_probs
for outcome, tasks in task_map.items():
try:
prediction_result = ProbabilityOutput.model_validate_json(
tasks[1].output.raw_output
)
except Exception as e:
print("Could not parse result as ProbabilityOutput ", e)
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
prediction_result = ProbabilityOutput(
p_yes=0.5, p_no=0.5, confidence=0, decision=""
)

outcomes_with_probs.append((outcome, prediction_result))

final_answer = self.generate_final_decision(outcomes_with_probs)
return final_answer
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import os
import random
from decimal import Decimal

from langchain_openai import OpenAI
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
from prediction_market_agent_tooling.deploy.agent import DeployableAgent
from prediction_market_agent_tooling.markets.agent_market import AgentMarket
from prediction_market_agent_tooling.markets.data_models import BetAmount, Currency
from prediction_market_agent_tooling.markets.markets import MarketType

from prediction_market_agent.agents.crewai_subsequential_agent.crewai_agent_subquestions import (
CrewAIAgentSubquestions,
)
from prediction_market_agent.agents.known_outcome_agent.known_outcome_agent import (
Result,
)
from prediction_market_agent.agents.utils import market_is_saturated


class DeployableThinkThoroughlyAgent(DeployableAgent):
# For cheaper credits at this experimental stage
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
model = "gpt-4-turbo-preview"

def load(self) -> None:
self.markets_with_known_outcomes: dict[str, Result] = {}
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved

def pick_markets(self, markets: list[AgentMarket]) -> list[AgentMarket]:
# We simply pick 5 random markets to bet on
picked_markets: list[AgentMarket] = []
random.shuffle(markets)
for market in markets:
# Assume very high probability markets are already known, and have
# been correctly bet on, and therefore the value of betting on them
# is low.
if not market_is_saturated(market=market):
picked_markets.append(market)
if len(picked_markets) == 5:
break

return picked_markets
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved

def answer_binary_market(self, market: AgentMarket) -> bool:
# The answer has already been determined in `pick_markets` so we just
# return it here.
os.environ["OPENAI_MODEL_NAME"] = "gpt-4-turbo-preview"
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
agent = CrewAIAgentSubquestions()
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
result = agent.answer_binary_market(market.question)
return True if result.decision == "y" else False
kongzii marked this conversation as resolved.
Show resolved Hide resolved
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved

def calculate_bet_amount(self, answer: bool, market: AgentMarket) -> BetAmount:
if market.currency == Currency.xDai:
return BetAmount(amount=Decimal(0.1), currency=Currency.xDai)
else:
raise NotImplementedError("This agent only supports xDai markets")
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved


if __name__ == "__main__":
agent = DeployableThinkThoroughlyAgent()
agent.deploy_local(
market_type=MarketType.OMEN, sleep_time=540, timeout=180, place_bet=False
)
# agent.deploy_gcp(
gabrielfior marked this conversation as resolved.
Show resolved Hide resolved
# repository=f"git+{get_current_git_url()}@{get_current_git_commit_sha()}",
# market_type=MarketType.OMEN,
# labels={OWNER_KEY: getpass.getuser()},
# secrets={
# "TAVILY_API_KEY": "GNOSIS_AI_TAVILY_API_KEY:latest",
# },
# memory=1024,
# api_keys=APIKeys(
# BET_FROM_ADDRESS=verify_address(
# "0xb611A9f02B318339049264c7a66ac3401281cc3c"
# ),
# BET_FROM_PRIVATE_KEY=private_key_type("EVAN_OMEN_BETTER_0_PKEY:latest"),
# OPENAI_API_KEY=SecretStr("EVAN_OPENAI_API_KEY:latest"),
# MANIFOLD_API_KEY=None,
# ),
# cron_schedule="0 */12 * * *",
# timeout=540,
# )
Loading
Loading