gnosis · gabrielfior · Apr 11, 2024 · Mar 28, 2024 · Apr 3, 2024 · Apr 4, 2024
diff --git a/agent_thinks_more.ipynb b/agent_thinks_more.ipynb
diff --git a/crewai_multiple_agent.ipynb b/crewai_multiple_agent.ipynb
diff --git a/poetry.lock b/poetry.lock
diff --git a/prediction_market_agent/agents/crewai_subsequential_agent/benchmark.py b/prediction_market_agent/agents/crewai_subsequential_agent/benchmark.py
@@ -0,0 +1,157 @@
+import time
+from datetime import timedelta, datetime
+
+from dotenv import load_dotenv
+from prediction_market_agent_tooling.benchmark.agents import AbstractBenchmarkedAgent
+from prediction_market_agent_tooling.benchmark.benchmark import Benchmarker
+from prediction_market_agent_tooling.benchmark.utils import (
+    OutcomePrediction,
+    Prediction, Market,
+)
+from prediction_market_agent_tooling.gtypes import Probability
+from prediction_market_agent_tooling.markets.markets import AgentMarket
+from prediction_market_agent_tooling.tools.utils import utcnow
+from pydantic import BaseModel
+
+from prediction_market_agent.agents.crewai_subsequential_agent.crewai_agent_subquestions import CrewAIAgentSubquestions
+from prediction_market_agent.agents.known_outcome_agent.known_outcome_agent import (
+    Result,
+    get_known_outcome,
+)
+
+
+def build_market_from_question_without_validation(question: str) -> Market:
+    return Market.model_construct(url=question,question=question, p_yes = 0.5)
+
+
+def build_binary_agent_market_from_question(question: str) -> AgentMarket:
+    return AgentMarket(
+        url="",
+        id=question,
+        question=question,
+        p_yes=Probability(0.5),
+        volume=None,
+        created_time=datetime(2024,1,1),
+        close_time=None,
+        resolution=None,
+        outcomes=["YES", "NO"],
+    )
+
+class QuestionAndAnswer(BaseModel):
+    question: str
+    result: Result
+    bet_correct: bool
+
+
+
+class CrewAIAgentSubquestionsBenchmark(AbstractBenchmarkedAgent):
+    def __init__(
+        self,
+        agent_name: str,
+        max_workers: int,
+        model: str,
+        max_tries: int,
+    ) -> None:
+        self.model = model
+        self.max_tries = max_tries
+        self.agent = CrewAIAgentSubquestions()
+        super().__init__(agent_name=agent_name, max_workers=max_workers)
+
+
+    def predict(self, market_question: str) -> Prediction:
+
+        market = build_binary_agent_market_from_question(market_question)
+        result = self.agent.answer_binary_market(market)
+
+        answer = get_known_outcome(
+            model=self.model,
+            question=market_question,
+            max_tries=self.max_tries,
+        )
+        print(f"Answered {market_question=} with {answer.result=}, {answer.reasoning=}")
+        if not answer.has_known_result():
+            return Prediction(
+                is_predictable=False,
+                outcome_prediction=None,
+            )
+        else:
+            return Prediction(
+                is_predictable=True,
+                outcome_prediction=OutcomePrediction(
+                    p_yes=answer.result.to_p_yes(),
+                    confidence=1.0,
+                    info_utility=None,
+                ),
+            )
+
+
+if __name__ == "__main__":
+    load_dotenv()
+    tomorrow_str = (utcnow() + timedelta(days=1)).strftime("%d %B %Y")
+
+    # Fetch example questions which our agents answered in the past.
+    questions = [
+        QuestionAndAnswer(
+            question="Will the stock price of Donald Trump's media company exceed $100 on 1 April 2024?",
+            result=Result.NO,
+            bet_correct=True
+        ),
+        QuestionAndAnswer(
+            question="Will Andy Murray return to professional tennis from his ankle injury on or before 31 March 2024?",
+            result=Result.NO,
+            bet_correct=True
+        ),
+        QuestionAndAnswer(
+            question="Will any legislation be signed by President Biden that could potentially lead to the ban of TikTok by 1 April 2024?",
+            result=Result.YES,
+            bet_correct=False
+        ),
+        QuestionAndAnswer(
+            question="Will the United States v. Apple case have a verdict by 1 April 2024?",
+            result=Result.NO,
+            bet_correct=True
+        ),
+        QuestionAndAnswer(
+            question="Will Microsoft Teams launch the announced Copilot AI features by 1 April 2024?",
+            result=Result.YES,
+            bet_correct=True
+        ),
+        QuestionAndAnswer(
+            question="Will the Francis Scott Key Bridge in Baltimore be fully rebuilt by 2 April 2024?",
+            result=Result.NO,
+            bet_correct=True
+        ),
+        QuestionAndAnswer(
+            question="Will iOS 18 break the iPhone's iconic app grid by 1 April 2024?",
+            result=Result.YES,
+            bet_correct=False
+        ),
+        QuestionAndAnswer(
+            question="Will a winner of the Mega Millions jackpot be announced by 26 March 2024?",
+            result=Result.YES,
+            bet_correct=False
+        ),
+    ]
+
+    benchmarker = Benchmarker(
+        markets=[build_market_from_question_without_validation(q.question) for q in questions][:1],
+        agents=[
+            CrewAIAgentSubquestionsBenchmark(
+                agent_name="subsequential_questions",
+                model="gpt-3.5-turbo-0125",
+                max_tries=3,
+                max_workers=1,
+            ),
+        ],
+    )
+    benchmarker.run_agents()
+    md = benchmarker.generate_markdown_report()
+
+    output = f"./subsequential_questions_agent_benchmark_report.{int(time.time())}.md"
+    with open(output, "w") as f:
+        print(f"Writing benchmark report to: {output}")
+        f.write(md)
+
+    # Check all predictions are correct, i.e. mean-squared-error == 0
+    metrics = benchmarker.compute_metrics()
+    assert metrics["MSE for `p_yes`"][0] == 0.0
diff --git a/prediction_market_agent/agents/crewai_subsequential_agent/crewai_agent_subquestions.py b/prediction_market_agent/agents/crewai_subsequential_agent/crewai_agent_subquestions.py
@@ -0,0 +1,156 @@
+from crewai import Agent, Task, Process, Crew
+from crewai_tools import SerperDevTool
+from langchain_community.callbacks.manager import get_openai_callback
+from prediction_market_agent_tooling.markets.agent_market import AgentMarket
+from pydantic import BaseModel
+from tqdm import tqdm
+import typing as t
+
+from prediction_market_agent.agents.abstract import AbstractAgent
+from prediction_market_agent.agents.crewai_subsequential_agent.prompts import *
+
+search_tool = SerperDevTool()
+
+
+class Outcomes(BaseModel):
+    outcomes: list[str]
+
+
+class ProbabilityOutput(BaseModel):
+    decision: str
+    p_yes: float
+    p_no: float
+    confidence: float
+
+
+get_openai_callback()
+
+
+class CrewAIAgentSubquestions(AbstractAgent):
+    def __init__(self) -> None:
+        self.researcher = Agent(
+            role="Research Analyst",
+            goal="Research and report on some future event, giving high quality and nuanced analysis",
+            backstory="You are a senior research analyst who is adept at researching and reporting on future events.",
+            verbose=True,
+            allow_delegation=False,
+            tools=[search_tool],
+        )
+
+        self.predictor = Agent(
+            role="Professional Gambler",
+            goal="Predict, based on some research you are presented with, whether or not a given event will occur",
+            backstory="You are a professional gambler who is adept at predicting and betting on the outcomes of future events.",
+            verbose=True,
+            allow_delegation=False,
+        )
+
+    def split_research_into_outcomes(self, question: str) -> Outcomes:
+        create_outcomes_task = Task(
+            description=CREATE_OUTCOMES_FROM_SCENARIO_PROMPT,
+            expected_output=CREATE_OUTCOMES_FROM_SCENARIO_OUTPUT,
+            output_json=Outcomes,
+            agent=self.researcher,
+        )
+
+        report_crew = Crew(
+            agents=[self.researcher],
+            tasks=[create_outcomes_task],
+        )
+        result = report_crew.kickoff(inputs={'scenario': question})
+        return Outcomes.model_validate_json(result)
+
+    def build_tasks_for_outcome(self, input_dict: dict[str, t.Any] = {}) -> list[Task]:
+        task_research_one_outcome = Task(
+            description=RESEARCH_OUTCOME_PROMPT.format(**input_dict),
+            agent=self.researcher,
+            expected_output=RESEARCH_OUTCOME_OUTPUT,
+            async_execution=True
+        )
+        task_create_probability_for_one_outcome = Task(
+            description=PROBABILITY_FOR_ONE_OUTCOME_PROMPT,
+            expected_output=PROBABILITY_CLASS_OUTPUT,
+            agent=self.predictor,
+            output_json=ProbabilityOutput,
+            async_execution=True,
+            context=[task_research_one_outcome]
+        )
+
+        return [task_research_one_outcome, task_create_probability_for_one_outcome]
+
+    def generate_prediction_for_one_outcome(self, sentence: str) -> ProbabilityOutput:
+        task_research_one_outcome = Task(
+            description=RESEARCH_OUTCOME_PROMPT,
+            agent=self.researcher,
+            expected_output=RESEARCH_OUTCOME_OUTPUT,
+        )
+        task_create_probability_for_one_outcome = Task(
+            description=PROBABILITY_FOR_ONE_OUTCOME_PROMPT,
+            expected_output=PROBABILITY_CLASS_OUTPUT,
+            agent=self.predictor,
+            output_json=ProbabilityOutput,
+            context=[task_research_one_outcome]
+        )
+        crew = Crew(
+            agents=[self.researcher, self.predictor],
+            tasks=[task_research_one_outcome, task_create_probability_for_one_outcome],
+            verbose=2,
+            process=Process.sequential
+        )
+
+        result = crew.kickoff(inputs={'sentence': sentence})
+        return ProbabilityOutput.model_validate_json(result)
+
+    def generate_final_decision(self, outcomes_with_probabilities) -> ProbabilityOutput:
+        task_final_decision = Task(
+            description=(FINAL_DECISION_PROMPT),
+            agent=self.predictor,
+            expected_output=(PROBABILITY_CLASS_OUTPUT),
+            output_json=ProbabilityOutput,
+        )
+
+        crew = Crew(
+            agents=[self.predictor],
+            tasks=[task_final_decision],
+            verbose=2,
+        )
+
+        result = crew.kickoff(inputs={'outcomes_with_probabilities':
+                                          [(i[0], i[1].dict()) for i in outcomes_with_probabilities],
+                                      'number_of_outcomes': len(outcomes_with_probabilities),
+                                      'outcome_to_assess': outcomes_with_probabilities[0][0]})
+        return ProbabilityOutput.model_validate_json(result)
+
+    def answer_binary_market(self, market: AgentMarket) -> bool:
+
+        outcomes = self.split_research_into_outcomes(market.question)
+        print ("outcomes ", outcomes)
+
+        outcomes_with_probs = []
+        task_map = {}
+        for outcome in tqdm(outcomes.outcomes):
+            tasks_for_outcome = self.build_tasks_for_outcome(input_dict={"sentence": outcome})
+            task_map[outcome] = tasks_for_outcome
+
+        # flatten nested list
+        all_tasks = sum(task_map.values(), [])
+        crew = Crew(
+            agents=[self.researcher, self.predictor],
+            tasks=all_tasks,
+            verbose=2,
+        )
+
+        crew.kickoff()
+
+        # We parse individual task results to build outcomes_with_probs
+        for outcome, tasks in task_map.items():
+            try:
+                prediction_result = ProbabilityOutput.model_validate_json(tasks[1].output.raw_output)
+            except Exception as e:
+                print("Could not parse result as ProbabilityOutput ", e)
+                prediction_result = ProbabilityOutput(p_yes=0.5, p_no=0.5, confidence=0, decision="")
+
+            outcomes_with_probs.append((outcome, prediction_result))
+
+        final_answer = self.generate_final_decision(outcomes_with_probs)
+        return True if final_answer.decision == "y" else False
diff --git a/prediction_market_agent/agents/crewai_subsequential_agent/deploy.py b/prediction_market_agent/agents/crewai_subsequential_agent/deploy.py
@@ -0,0 +1,75 @@
+from decimal import Decimal
+import random
+from prediction_market_agent_tooling.deploy.agent import DeployableAgent
+from prediction_market_agent_tooling.markets.agent_market import AgentMarket
+from prediction_market_agent_tooling.markets.data_models import BetAmount, Currency
+from prediction_market_agent_tooling.markets.markets import MarketType
+
+from prediction_market_agent.agents.crewai_subsequential_agent.crewai_agent_subquestions import CrewAIAgentSubquestions
+from prediction_market_agent.agents.known_outcome_agent.known_outcome_agent import (
+    Result,
+)
+from prediction_market_agent.agents.utils import market_is_saturated
+
+
+class DeployableThinkThoroughlyAgent(DeployableAgent):
+    # For cheaper credits at this experimental stage
+    model = "gpt-3.5-turbo"
+
+    def load(self) -> None:
+        self.markets_with_known_outcomes: dict[str, Result] = {}
+
+    def pick_markets(self, markets: list[AgentMarket]) -> list[AgentMarket]:
+        # We simply pick 5 random markets to bet on
+        picked_markets: list[AgentMarket] = []
+        random.shuffle(markets)
+        for market in markets:
+            # Assume very high probability markets are already known, and have
+            # been correctly bet on, and therefore the value of betting on them
+            # is low.
+            if not market_is_saturated(market=market):
+                picked_markets.append(market)
+                if len(picked_markets) == 5:
+                    break
+
+        return picked_markets
+
+    def answer_binary_market(self, market: AgentMarket) -> bool:
+        # The answer has already been determined in `pick_markets` so we just
+        # return it here.
+        agent = CrewAIAgentSubquestions()
+        result = agent.answer_binary_market(market)
+        return result
+
+    def calculate_bet_amount(self, answer: bool, market: AgentMarket) -> BetAmount:
+        if market.currency == Currency.xDai:
+            return BetAmount(amount=Decimal(0.1), currency=Currency.xDai)
+        else:
+            raise NotImplementedError("This agent only supports xDai markets")
+
+
+if __name__ == "__main__":
+    agent = DeployableThinkThoroughlyAgent()
+    agent.deploy_local(market_type=MarketType.OMEN,
+                       sleep_time=540,
+                       timeout=180,
+                       place_bet=False)
+    # agent.deploy_gcp(
+    #     repository=f"git+{get_current_git_url()}@{get_current_git_commit_sha()}",
+    #     market_type=MarketType.OMEN,
+    #     labels={OWNER_KEY: getpass.getuser()},
+    #     secrets={
+    #         "TAVILY_API_KEY": "GNOSIS_AI_TAVILY_API_KEY:latest",
+    #     },
+    #     memory=1024,
+    #     api_keys=APIKeys(
+    #         BET_FROM_ADDRESS=verify_address(
+    #             "0xb611A9f02B318339049264c7a66ac3401281cc3c"
+    #         ),
+    #         BET_FROM_PRIVATE_KEY=private_key_type("EVAN_OMEN_BETTER_0_PKEY:latest"),
+    #         OPENAI_API_KEY=SecretStr("EVAN_OPENAI_API_KEY:latest"),
+    #         MANIFOLD_API_KEY=None,
+    #     ),
+    #     cron_schedule="0 */12 * * *",
+    #     timeout=540,
+    # )