diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml
index f074f2ffd4..b6e83d169e 100644
--- a/.github/workflows/openai.yml
+++ b/.github/workflows/openai.yml
@@ -34,6 +34,10 @@ jobs:
           pip install -e .[autogen,blendsearch]
           python -c "import flaml"
           pip install coverage pytest datasets
+      - name: Install packages for MathChat when needed
+        if: matrix.python-version != '3.11'
+        run: |
+          pip install -e .[mathchat]
       - name: Coverage
         if: matrix.python-version == '3.9'
         env:
@@ -49,6 +53,7 @@ jobs:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
           AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
+          WOLFRAM_ALPHA_APPID: ${{ secrets.WOLFRAM_ALPHA_APPID }}
         run: |
           pip install nbconvert nbformat ipykernel
           coverage run -a -m pytest test/autogen/oai/test_notebook.py
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index 0cdfae4e1a..bc90024c19 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -29,7 +29,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-latest, windows-2019]
-        python-version: ["3.7", "3.8", "3.9", "3.10"]
+        python-version: ["3.8", "3.9", "3.10"]
     steps:
       - uses: actions/checkout@v3
       - name: Set up Python ${{ matrix.python-version }}
@@ -82,17 +82,17 @@ jobs:
           # Uninstall pyspark to test env without pyspark
           pip uninstall -y pyspark
       - name: Test with pytest
-        if: (matrix.python-version != '3.7' || matrix.os == 'macos-latest') && matrix.python-version != '3.10'
+        if: matrix.python-version != '3.10'
         run: |
           pytest test
       - name: Coverage
-        if: (matrix.python-version == '3.7') && matrix.os != 'macos-latest' || matrix.python-version == '3.10'
+        if: matrix.python-version == '3.10'
         run: |
           pip install coverage
           coverage run -a -m pytest test
           coverage xml
       - name: Upload coverage to Codecov
-        if: (matrix.python-version == '3.7') && matrix.os != 'macos-latest' || matrix.python-version == '3.10'
+        if: matrix.python-version == '3.10'
         uses: codecov/codecov-action@v3
         with:
           file: ./coverage.xml
diff --git a/README.md b/README.md
index aa6a66afef..333769e56a 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 [![PyPI version](https://badge.fury.io/py/FLAML.svg)](https://badge.fury.io/py/FLAML)
 ![Conda version](https://img.shields.io/conda/vn/conda-forge/flaml)
 [![Build](https://github.com/microsoft/FLAML/actions/workflows/python-package.yml/badge.svg)](https://github.com/microsoft/FLAML/actions/workflows/python-package.yml)
-![Python Version](https://img.shields.io/badge/3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10-blue)
+![Python Version](https://img.shields.io/badge/3.8%20%7C%203.9%20%7C%203.10-blue)
 [![Downloads](https://pepy.tech/badge/flaml)](https://pepy.tech/project/flaml)
 [![](https://img.shields.io/discord/1025786666260111483?logo=discord&style=flat)](https://discord.gg/Cppx2vSPVP)
 <!-- [![Join the chat at https://gitter.im/FLAMLer/community](https://badges.gitter.im/FLAMLer/community.svg)](https://gitter.im/FLAMLer/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -->
diff --git a/flaml/autogen/agent/math_user_proxy_agent.py b/flaml/autogen/agent/math_user_proxy_agent.py
new file mode 100644
index 0000000000..c90f39b433
--- /dev/null
+++ b/flaml/autogen/agent/math_user_proxy_agent.py
@@ -0,0 +1,462 @@
+from .user_proxy_agent import UserProxyAgent
+from flaml.autogen.code_utils import UNKNOWN, extract_code, execute_code, infer_lang
+from flaml.autogen.math_utils import get_answer
+from collections import defaultdict
+import re
+import os
+from pydantic import BaseModel, Extra, root_validator
+from typing import Any, Dict, Optional
+from time import sleep
+
+
+PROMPTS = {
+    # default
+    "default": """Let's use Python to solve a math problem.
+
+Query requirements:
+You should always use the 'print' function for the output and use fractions/radical forms instead of decimals.
+You can use packages like sympy to help you.
+You must follow the formats below to write your code:
+```python
+# your code
+```
+
+First state the key idea to solve the problem. You may choose from three ways to solve the problem:
+Case 1: If the problem can be solved with Python code directly, please write a program to solve it. You can enumerate all possible arrangements if needed.
+Case 2: If the problem is mostly reasoning, you can solve it by yourself directly.
+Case 3: If the problem cannot be handled in the above two ways, please follow this process:
+1. Solve the problem step by step (do not over-divide the steps).
+2. Take out any queries that can be asked through Python (for example, any calculations or equations that can be calculated).
+3. Wait for me to give the results.
+4. Continue if you think the result is correct. If the result is invalid or unexpected, please correct your query or reasoning.
+
+After all the queries are run and you get the answer, put the answer in \\boxed{}.
+
+Problem:
+""",
+    # select python or wolfram
+    "two_tools": """Let's use two tools (Python and Wolfram alpha) to solve a math problem.
+
+Query requirements:
+You must follow the formats below to write your query:
+For Wolfram Alpha:
+```wolfram
+# one wolfram query
+```
+For Python:
+```python
+# your code
+```
+When using Python, you should always use the 'print' function for the output and use fractions/radical forms instead of decimals. You can use packages like sympy to help you.
+When using wolfram, give one query in each code block.
+
+Please follow this process:
+1. Solve the problem step by step (do not over-divide the steps).
+2. Take out any queries that can be asked through Python or Wolfram Alpha, select the most suitable tool to be used (for example, any calculations or equations that can be calculated).
+3. Wait for me to give the results.
+4. Continue if you think the result is correct. If the result is invalid or unexpected, please correct your query or reasoning.
+
+After all the queries are run and you get the answer, put the final answer in \\boxed{}.
+
+Problem: """,
+    # use python step by step
+    "python": """Let's use Python to solve a math problem.
+
+Query requirements:
+You should always use the 'print' function for the output and use fractions/radical forms instead of decimals.
+You can use packages like sympy to help you.
+You must follow the formats below to write your code:
+```python
+# your code
+```
+
+Please follow this process:
+1. Solve the problem step by step (do not over-divide the steps).
+2. Take out any queries that can be asked through Python (for example, any calculations or equations that can be calculated).
+3. Wait for me to give the results.
+4. Continue if you think the result is correct. If the result is invalid or unexpected, please correct your query or reasoning.
+
+After all the queries are run and you get the answer, put the answer in \\boxed{}.
+
+Problem: """,
+}
+
+
+def is_termination_msg(x):
+    """Check if a message is a termination message."""
+    cb = extract_code(x)
+    contain_code = False
+    for c in cb:
+        if c[0] == "python" or c[0] == "wolfram":
+            contain_code = True
+            break
+    return not contain_code and get_answer(x) is not None and get_answer(x) != ""
+
+
+def add_print_to_last_line(s):
+    """Add print() to the last line of a string."""
+    # 1. check if there is already a print statement
+    if "print(" in s:
+        return s
+    # 2. extract the last line, enclose it in print() and return the new string
+    lines = s.splitlines()
+    last_line = lines[-1]
+    if "\t" in last_line or "=" in last_line:
+        return s
+    if "=" in last_line:
+        last_line = "print(" + last_line.split(" = ")[0] + ")"
+        lines.append(last_line)
+    else:
+        lines[-1] = "print(" + last_line + ")"
+    # 3. join the lines back together
+    return "\n".join(lines)
+
+
+def remove_print(s):
+    """remove all print statements from a string."""
+    lines = s.splitlines()
+    lines = [line for line in lines if not line.startswith("print(")]
+    return "\n".join(lines)
+
+
+class MathUserProxyAgent(UserProxyAgent):
+    """(Experimental) A MathChat agent that can handle math problems."""
+
+    MAX_CONSECUTIVE_AUTO_REPLY = 15  # maximum number of consecutive auto replies (subject to future change)
+
+    def __init__(
+        self,
+        name="MathChatAgent",  # default set to MathChatAgent
+        system_message="",
+        work_dir=None,
+        human_input_mode="NEVER",  # Fully automated
+        max_consecutive_auto_reply=None,
+        is_termination_msg=is_termination_msg,
+        use_docker=True,
+        max_invalid_q_per_step=3,  # a parameter needed in MathChat
+        **config,
+    ):
+        """
+        Args:
+            name (str): name of the agent
+            system_message (str): system message to be sent to the agent
+            work_dir (str): working directory for the agent
+            human_input_mode (str): whether to ask for human inputs every time a message is received.
+                Possible values are "ALWAYS", "TERMINATE", "NEVER".
+                (1) When "ALWAYS", the agent prompts for human input every time a message is received.
+                    Under this mode, the conversation stops when the human input is "exit",
+                    or when is_termination_msg is True and there is no human input.
+                (2) When "TERMINATE", the agent only prompts for human input only when a termination message is received or
+                    the number of auto reply reaches the max_consecutive_auto_reply.
+                (3) When "NEVER", the agent will never prompt for human input. Under this mode, the conversation stops
+                    when the number of auto reply reaches the max_consecutive_auto_reply or when is_termination_msg is True.
+            max_consecutive_auto_reply (int): the maximum number of consecutive auto replies.
+                default to None (no limit provided, class attribute MAX_CONSECUTIVE_AUTO_REPLY will be used as the limit in this case).
+                The limit only plays a role when human_input_mode is not "ALWAYS".
+            is_termination_msg (function): a function that takes a message and returns a boolean value.
+                This function is used to determine if a received message is a termination message.
+            use_docker (bool): whether to use docker to execute the code.
+            max_invalid_q_per_step (int): (ADDED) the maximum number of invalid queries per step.
+            **config (dict): other configurations.
+        """
+        super().__init__(
+            name=name,
+            system_message=system_message,
+            work_dir=work_dir,
+            human_input_mode=human_input_mode,
+            max_consecutive_auto_reply=max_consecutive_auto_reply,
+            is_termination_msg=is_termination_msg,
+            use_docker=use_docker,
+            **config,
+        )
+
+        # fixed var
+        self._max_invalid_q_per_step = max_invalid_q_per_step
+
+        # mutable
+        self._valid_q_count = 0
+        self._total_q_count = 0
+        self._accum_invalid_q_per_step = 0
+        self._previous_code = ""
+        self.last_reply = None
+
+    def generate_init_prompt(self, problem, prompt_type="default", customized_prompt=None):
+        """Generate a prompt for the assitant agent with the given problem and prompt.
+
+        Args:
+            problem (str): the problem to be solved.
+            prompt_type (str): the type of the prompt. Possible values are "default", "python", "wolfram".
+                (1) "default": the prompt that allows the agent to choose between 3 ways to solve a problem:
+                    1. write a python program to solve it directly.
+                    2. solve it directly without python.
+                    3. solve it step by step with python.
+                (2) "python":
+                    a simplified prompt from the third way of the "default" prompt, that asks the assistant
+                    to solve the problem step by step with python.
+                (3) "two_tools":
+                    a simplified prompt similar to the "python" prompt, but allows the model to choose between
+                    Python and Wolfram Alpha to solve the problem.
+            customized_prompt (str): a customized prompt to be used. If it is not None, the prompt_type will be ignored.
+
+        Returns:
+            str: the generated prompt ready to be sent to the assistant agent.
+        """
+        self._reset()
+        if customized_prompt is not None:
+            return customized_prompt + problem
+        else:
+            return PROMPTS[prompt_type] + problem
+
+    def _reset(self):
+        self._conversations.clear()
+        self._valid_q_count = 0
+        self._total_q_count = 0
+        self._accum_invalid_q_per_step = 0
+        self._previous_code = ""
+        self.last_reply = None
+
+    def _execute_one_python_code(self, pycode):
+        """Execute python code blocks.
+
+        Previous python code will be saved and executed together with the new code.
+        the "print" function will also be added to the last line of the code if needed
+        """
+        # Need to replace all "; " with "\n" to avoid syntax error when adding `print` to the last line
+        pycode = pycode.replace("; ", "\n").replace(";", "\n")
+        pycode = self._previous_code + add_print_to_last_line(pycode)
+
+        return_code, output, _ = execute_code(pycode, use_docker=self._use_docker, timeout=5)
+        is_success = return_code == 0
+
+        # Decode the output
+        if isinstance(output, bytes):
+            try:
+                output = output.decode("utf-8")
+            except Exception:
+                is_success = False
+                output = "The return cannot be decoded."
+
+        if not is_success:
+            # Remove the file information from the error string
+            pattern = r'File "/[^"]+\.py", line \d+, in .+\n'
+            if type(output) == str:
+                output = re.sub(pattern, "", output)
+            output = "Error: " + output
+        elif output == "":
+            # Check if there is any print statement
+            if "print" not in pycode:
+                output = "No output found. Make sure you print the results."
+                is_success = False
+            else:
+                output = "No output found."
+                is_success = True
+
+        if len(output) > 2000:
+            output = "Your requested query response is too long. You might have made a mistake. Please revise your reasoning and query."
+            is_success = False
+
+        if is_success:
+            # remove print and check if it still works
+            tmp = self._previous_code + "\n" + remove_print(pycode) + "\n"
+            rcode, _, _ = execute_code(tmp, use_docker=self._use_docker)
+        else:
+            # only add imports and check if it works
+            tmp = self._previous_code + "\n"
+            for line in pycode.split("\n"):
+                if "import" in line:
+                    tmp += line + "\n"
+            rcode, _, _ = execute_code(tmp, use_docker=self._use_docker)
+
+        if rcode == 0:
+            self._previous_code = tmp
+        return output, is_success
+
+    def _execute_one_wolfram_query(self, query: str):
+        """
+        Run one wolfram query and return the output.
+        return:
+            output: string with the output of the query
+            is_success: boolean indicating whether the query was successful
+        """
+        # wolfram query handler
+        wolfram = WolframAlphaAPIWrapper()
+        output, is_success = wolfram.run(query)
+        if output == "":
+            output = "Error: The wolfram query is invalid."
+            is_success = False
+        return output, is_success
+
+    def auto_reply(self, message, sender, default_reply=""):
+        """Generate an auto reply."""
+        code_blocks = extract_code(message)
+
+        if len(code_blocks) == 1 and code_blocks[0][0] == UNKNOWN:
+            # no code block is found, lang should be `UNKNOWN``
+            if default_reply == "":
+                default_reply = "Continue. Please keep solving the problem until you need to query. (If you get to the answer, put it in \\boxed{}.)"
+            self._send(default_reply, sender)
+        else:
+            is_success, all_success = True, True
+            reply = ""
+            for code_block in code_blocks:
+                lang, code = code_block
+                if not lang:
+                    lang = infer_lang(code)
+                if lang == "python":
+                    output, is_success = self._execute_one_python_code(code)
+                elif lang == "wolfram":
+                    output, is_success = self._execute_one_wolfram_query(code)
+
+                reply += output + "\n"
+                if not is_success:
+                    all_success = False
+                    self._valid_q_count -= 1  # count invalid queries
+
+            reply = reply.strip()
+
+            if self.last_reply == reply:
+                return (
+                    reply + "\nYour query or result is same from the last, please try a new approach.",
+                    False,
+                )
+            self.last_reply = reply
+
+            if not all_success:
+                self._accum_invalid_q_per_step += 1
+                if self._accum_invalid_q_per_step > self._max_invalid_q_per_step:
+                    self._accum_invalid_q_per_step = 0
+                    reply = "Please revisit the problem statement and your reasoning. If you think this step is correct, solve it yourself and continue the next step. Otherwise, correct this step."
+
+            self._send(reply, sender)
+
+
+# Imported from langchain. Langchain is licensed under MIT License:
+# The MIT License
+
+# Copyright (c) Harrison Chase
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+
+def get_from_dict_or_env(data: Dict[str, Any], key: str, env_key: str, default: Optional[str] = None) -> str:
+    """Get a value from a dictionary or an environment variable."""
+    if key in data and data[key]:
+        return data[key]
+    elif env_key in os.environ and os.environ[env_key]:
+        return os.environ[env_key]
+    elif default is not None:
+        return default
+    else:
+        raise ValueError(
+            f"Did not find {key}, please add an environment variable"
+            f" `{env_key}` which contains it, or pass"
+            f"  `{key}` as a named parameter."
+        )
+
+
+# Imported from langchain
+class WolframAlphaAPIWrapper(BaseModel):
+    """Wrapper for Wolfram Alpha.
+
+    Docs for using:
+
+    1. Go to wolfram alpha and sign up for a developer account
+    2. Create an app and get your APP ID
+    3. Save your APP ID into WOLFRAM_ALPHA_APPID env variable
+    4. pip install wolframalpha
+
+    """
+
+    wolfram_client: Any  #: :meta private:
+    wolfram_alpha_appid: Optional[str] = None
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        extra = Extra.forbid
+
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that api key and python package exists in environment."""
+        wolfram_alpha_appid = get_from_dict_or_env(values, "wolfram_alpha_appid", "WOLFRAM_ALPHA_APPID")
+        values["wolfram_alpha_appid"] = wolfram_alpha_appid
+
+        try:
+            import wolframalpha
+
+        except ImportError:
+            raise ImportError("wolframalpha is not installed. " "Please install it with `pip install wolframalpha`")
+        client = wolframalpha.Client(wolfram_alpha_appid)
+        values["wolfram_client"] = client
+
+        return values
+
+    def run(self, query: str) -> str:
+        """Run query through WolframAlpha and parse result."""
+        from urllib.error import HTTPError
+
+        is_success = False  # added
+        res = None
+        for _ in range(20):
+            try:
+                res = self.wolfram_client.query(query)
+                break
+            except HTTPError:
+                sleep(1)
+            except Exception:
+                return (
+                    "Wolfram Alpha wasn't able to answer it. Please try a new query for wolfram or use python.",
+                    is_success,
+                )
+        if res is None:
+            return (
+                "Wolfram Alpha wasn't able to answer it (may due to web error), you can try again or use python.",
+                is_success,
+            )
+
+        try:
+            if not res["@success"]:
+                return (
+                    "Your Wolfram query is invalid. Please try a new query for wolfram or use python.",
+                    is_success,
+                )
+            assumption = next(res.pods).text
+            answer = ""
+            for r in res["pod"]:
+                if r["@title"] == "Solution":
+                    answer = r["subpod"]["plaintext"]
+                if r["@title"] == "Results" or r["@title"] == "Solutions":
+                    for i, sub in enumerate(r["subpod"]):
+                        answer += f"ans {i}: " + sub["plaintext"] + "\n"
+                    break
+            if answer == "":
+                answer = next(res.results).text
+
+        except Exception:
+            return (
+                "Wolfram Alpha wasn't able to answer it. Please try a new query for wolfram or use python.",
+                is_success,
+            )
+
+        if answer is None or answer == "":
+            # We don't want to return the assumption alone if answer is empty
+            return "No good Wolfram Alpha Result was found", is_success
+        else:
+            is_success = True
+            return f"Assumption: {assumption} \nAnswer: {answer}", is_success
diff --git a/flaml/autogen/agent/user_proxy_agent.py b/flaml/autogen/agent/user_proxy_agent.py
index dbc986064a..b97bc8aa75 100644
--- a/flaml/autogen/agent/user_proxy_agent.py
+++ b/flaml/autogen/agent/user_proxy_agent.py
@@ -32,7 +32,7 @@ def __init__(
                 (2) When "TERMINATE", the agent only prompts for human input only when a termination message is received or
                     the number of auto reply reaches the max_consecutive_auto_reply.
                 (3) When "NEVER", the agent will never prompt for human input. Under this mode, the conversation stops
-                    when the number of auto reply reaches the max_consecutive_auto_reply or or when is_termination_msg is True.
+                    when the number of auto reply reaches the max_consecutive_auto_reply or when is_termination_msg is True.
             max_consecutive_auto_reply (int): the maximum number of consecutive auto replies.
                 default to None (no limit provided, class attribute MAX_CONSECUTIVE_AUTO_REPLY will be used as the limit in this case).
                 The limit only plays a role when human_input_mode is not "ALWAYS".
diff --git a/notebook/autogen_agent_MathChat.ipynb b/notebook/autogen_agent_MathChat.ipynb
new file mode 100644
index 0000000000..3de628bfe7
--- /dev/null
+++ b/notebook/autogen_agent_MathChat.ipynb
@@ -0,0 +1,937 @@
+{
+    "cells": [
+     {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "<a href=\"https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/autogen_agent_MathChat.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+     },
+     {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "# Using MathChat to solve math problems\n",
+       "\n",
+       "MathChat is a convesational framework for math problem solving. In this notebook, we demonstrate how to use MathChat to solve math problems. MathChat uses the `AssistantAgent` and `MathUserProxyAgent`, which is similar to the usage of `AssistantAgent` and `UserProxyAgent` in other notebooks (More details in `autogen_agent_auto_feedback_from_code_execution.ipynb`). Essentially, `MathUserProxyAgent` implements a different auto reply mechanism corresponding to the MathChat prompts. The original implementation and exeperiments of MathChat are in this [branch](https://github.com/kevin666aa/FLAML/tree/gpt_math_solver/flaml/autogen/math), and you can find more details in our paper [An Empirical Study on Challenging Math Problem Solving with GPT-4](https://arxiv.org/abs/2306.01337).\n",
+       "\n",
+       "## Requirements\n",
+       "\n",
+       "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the [mathchat] option.\n",
+       "```bash\n",
+       "pip install flaml[mathchat]\n",
+       "```"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "# %pip install flaml[mathchat]==2.0.0rc1"
+      ]
+     },
+     {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "## Set your API Endpoint\n",
+       "\n",
+       "The [`config_list_gpt4_gpt35`](https://microsoft.github.io/FLAML/docs/reference/autogen/oai/openai_utils#config_list_gpt4_gpt35) function tries to create a list of gpt-4 and gpt-3.5 configurations using Azure OpenAI endpoints and OpenAI endpoints. It assumes the api keys and api bases are stored in the corresponding environment variables or local txt files:\n",
+       "\n",
+       "- OpenAI API key: os.environ[\"OPENAI_API_KEY\"] or `openai_api_key_file=\"key_openai.txt\"`.\n",
+       "- Azure OpenAI API key: os.environ[\"AZURE_OPENAI_API_KEY\"] or `aoai_api_key_file=\"key_aoai.txt\"`. Multiple keys can be stored, one per line.\n",
+       "- Azure OpenAI API base: os.environ[\"AZURE_OPENAI_API_BASE\"] or `aoai_api_base_file=\"base_aoai.txt\"`. Multiple bases can be stored, one per line.\n",
+       "\n",
+       "It's OK to have only the OpenAI API key, or only the Azure OpenAI API key + base.\n",
+       "\n",
+       "If you open this notebook in google colab, you can upload your files by click the file icon on the left panel and then choose \"upload file\" icon.\n"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "from flaml import oai\n",
+       "\n",
+       "config_list = oai.config_list_gpt4_gpt35()"
+      ]
+     },
+     {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "The config list looks like the following:\n",
+       "```python\n",
+       "config_list = [\n",
+       "    {\n",
+       "        'model': 'gpt-4',\n",
+       "        'api_key': '<your OpenAI API key here>',\n",
+       "    },  # only if OpenAI API key is found\n",
+       "    {\n",
+       "        'model': 'gpt-4',\n",
+       "        'api_key': '<your first Azure OpenAI API key here>',\n",
+       "        'api_base': '<your first Azure OpenAI API base here>',\n",
+       "        'api_type': 'azure',\n",
+       "        'api_version': '2023-03-15-preview',\n",
+       "    },  # only if the at least one Azure OpenAI API key is found\n",
+       "    {\n",
+       "        'model': 'gpt-4',\n",
+       "        'api_key': '<your second Azure OpenAI API key here>',\n",
+       "        'api_base': '<your second Azure OpenAI API base here>',\n",
+       "        'api_type': 'azure',\n",
+       "        'api_version': '2023-03-15-preview',\n",
+       "    },  # only if the second Azure OpenAI API key is found\n",
+       "]\n",
+       "```\n",
+       "\n",
+       "You can directly override it if the above function returns an empty list, i.e., it doesn't find the keys in the specified locations."
+      ]
+     },
+     {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "## Construct agents for MathChat\n",
+       "\n",
+       "We start by initialzing the `AssistantAgent` and `MathUserProxyAgent`. The system message needs to be set to \"You are a helpful assistant.\" for MathChat. The detailed instructions are given in the user message. Later we will use the `MathUserProxyAgent.generate_init_prompt` to combine the instructions and a math problem for an initial prompt to be sent to the LLM assistant."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": 11,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+       "from flaml.autogen.agent.assistant_agent import AssistantAgent\n",
+       "from flaml.autogen.agent.math_user_proxy_agent import MathUserProxyAgent\n",
+       "\n",
+       "oai.ChatCompletion.start_logging()\n",
+       "\n",
+       "# 1. create an AssistantAgent instance named \"assistant\"\n",
+       "assistant = AssistantAgent(\n",
+       "    name=\"assistant\", \n",
+       "    system_message=\"You are a helpful assistant.\",\n",
+       "    request_timeout=600, \n",
+       "    seed=42, \n",
+       "    config_list=config_list)\n",
+       "\n",
+       "# 2. create the MathUserProxyAgent instance named \"mathproxyagent\"\n",
+       "# By default, the human_input_mode is \"NEVER\", which means the agent will not ask for human input.\n",
+       "mathproxyagent = MathUserProxyAgent(\n",
+       "    name=\"MathChatAgent\", \n",
+       "    human_input_mode=\"NEVER\",\n",
+       "    use_docker=False)"
+      ]
+     },
+     {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Example 1\n",
+       "\n",
+       "Problem: Find all $x$ that satisfy the inequality $(2x+10)(x+3)<(3x+9)(x+8)$. Express your answer in interval notation.\n",
+       "\n",
+       "Correct Solution: \n",
+       "We have \\begin{align*} (2x+10)(x+3)&<(3x+9)(x+8) \\quad \\Rightarrow\n",
+       "\\\\ 2(x+5)(x+3)&<3(x+3)(x+8) \\quad \\Rightarrow\n",
+       "\\\\ 2(x+5)(x+3)-3(x+3)(x+8)&<0 \\quad \\Rightarrow\n",
+       "\\\\ (2x+10-(3x+24))(x+3)&<0 \\quad \\Rightarrow\n",
+       "\\\\ (-x-14)(x+3)&<0 \\quad \\Rightarrow\n",
+       "\\\\ (x+14)(x+3)&>0.\n",
+       "\\end{align*} This inequality is satisfied if and only if $(x+14)$ and $(x+3)$ are either both positive or both negative.  Both factors are positive for $x>-3$ and both factors are negative for $x<-14$.  When $-14<x<-3$, one factor is positive and the other negative, so their product is negative.   Therefore, the range of $x$ that satisfies the inequality is $ \\boxed{(-\\infty, -14)\\cup(-3,\\infty)} $."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {},
+      "outputs": [
+       {
+        "name": "stdout",
+        "output_type": "stream",
+        "text": [
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "MathChatAgent (to assistant):\n",
+         "Let's use Python to solve a math problem.\n",
+         "\n",
+         "Query requirements:\n",
+         "You should always use the 'print' function for the output and use fractions/radical forms instead of decimals.\n",
+         "You can use packages like sympy to help you.\n",
+         "You must follow the formats below to write your code:\n",
+         "```python\n",
+         "# your code\n",
+         "```\n",
+         "\n",
+         "First state the key idea to solve the problem. You may choose from three ways to solve the problem:\n",
+         "Case 1: If the problem can be solved with Python code directly, please write a program to solve it. You can enumerate all possible arrangements if needed.\n",
+         "Case 2: If the problem is mostly reasoning, you can solve it by yourself directly.\n",
+         "Case 3: If the problem cannot be handled in the above two ways, please follow this process:\n",
+         "1. Solve the problem step by step (do not over-divide the steps).\n",
+         "2. Take out any queries that can be asked through Python (for example, any calculations or equations that can be calculated).\n",
+         "3. Wait for me to give the results.\n",
+         "4. Continue if you think the result is correct. If the result is invalid or unexpected, please correct your query or reasoning.\n",
+         "\n",
+         "After all the queries are run and you get the answer, put the answer in \\boxed{}.\n",
+         "\n",
+         "Find all $x$ that satisfy the inequality $(2x+10)(x+3)<(3x+9)(x+8)$. Express your answer in interval notation.\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "assistant (to MathChatAgent):\n",
+         "The key idea to solve this problem is to find the critical points, which are the points where the inequality is equal. Then, test the intervals between the critical points to find the interval for which the inequality holds true.\n",
+         "\n",
+         "We start by finding the critical points:\n",
+         "\n",
+         "1. Set the given inequality to be equal: $(2x+10)(x+3)=(3x+9)(x+8)$\n",
+         "2. Expand both sides and simplify\n",
+         "3. Bring all terms to one side of the equation\n",
+         "4. Solve for `x`\n",
+         "\n",
+         "Then we'll test the intervals between the critical points.\n",
+         "  1. Test a point less than the smallest critical point\n",
+         "  2. Test a point between the critical points\n",
+         "  3. Test a point greater than the largest critical point\n",
+         "\n",
+         "Now we will find the critical points using Python:\n",
+         "```python\n",
+         "from sympy import symbols, Eq, solve\n",
+         "\n",
+         "x = symbols(\"x\")\n",
+         "lhs = (2*x + 10)*(x + 3)\n",
+         "rhs = (3*x + 9)*(x + 8)\n",
+         "equation = Eq(lhs, rhs)\n",
+         "\n",
+         "critical_points = solve(equation, x)\n",
+         "critical_points\n",
+         "```\n",
+         "\n",
+         "\n",
+         ">>>>>>>> NO HUMAN INPUT RECEIVED. USING AUTO REPLY FOR THE USER...\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "MathChatAgent (to assistant):\n",
+         "[-14, -3]\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "assistant (to MathChatAgent):\n",
+         "We found the critical points to be $x=-14$ and $x=-3$. Now we'll test the intervals between these critical points as well as points before and after the critical points.\n",
+         "\n",
+         "The intervals are:\n",
+         "\n",
+         "1. $x < -14$\n",
+         "2. $-14 < x < -3$\n",
+         "3. $x > -3$\n",
+         "\n",
+         "Next, we will test a point in each interval using Python and the original inequality:\n",
+         "\n",
+         "```python\n",
+         "from sympy import symbols\n",
+         "\n",
+         "x = symbols(\"x\")\n",
+         "inequality = (2*x + 10)*(x + 3) < (3*x + 9)*(x + 8)\n",
+         "\n",
+         "test_points = [-15, -5, 0]\n",
+         "intervals = []\n",
+         "\n",
+         "for point in test_points:\n",
+         "    if inequality.subs(x, point):\n",
+         "        intervals.append(point)\n",
+         "\n",
+         "intervals\n",
+         "```\n",
+         "\n",
+         "\n",
+         ">>>>>>>> NO HUMAN INPUT RECEIVED. USING AUTO REPLY FOR THE USER...\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "MathChatAgent (to assistant):\n",
+         "[-15, 0]\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "assistant (to MathChatAgent):\n",
+         "We have tested the intervals and found that the inequality holds true for:\n",
+         "\n",
+         "1. $x < -14$ (test point: $-15$)\n",
+         "3. $x > -3$ (test point: $0$)\n",
+         "\n",
+         "Therefore, the solution to the inequality $(2x+10)(x+3)<(3x+9)(x+8)$ is:\n",
+         "\n",
+         "$x \\in \\boxed{(-\\infty, -14) \\cup (-3, \\infty)}$\n"
+        ]
+       }
+      ],
+      "source": [
+       "# reset the assistant. Always reset the assistant before starting a new conversation.\n",
+       "assistant.reset()\n",
+       "\n",
+       "# given a math problem, we use the mathproxyagent to generate a prompt to be sent to the assistant as the initial message.\n",
+       "# the assistant receives the message and generates a response. The response will be sent back to the mathproxyagent for processing.\n",
+       "# The conversation continues until the termination condition is met, in MathChat, the termination condition is the detect of \"\\boxed{}\" in the response.\n",
+       "math_problem =  \"Find all $x$ that satisfy the inequality $(2x+10)(x+3)<(3x+9)(x+8)$. Express your answer in interval notation.\"\n",
+       "assistant.receive(\n",
+       "    message=mathproxyagent.generate_init_prompt(math_problem),\n",
+       "    sender=mathproxyagent,\n",
+       ")"
+      ]
+     },
+     {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Example 2\n",
+       "Problem: For what negative value of $k$ is there exactly one solution to the system of equations \\begin{align*}\n",
+       "y &= 2x^2 + kx + 6 \\\\\n",
+       "y &= -x + 4?\n",
+       "\\end{align*}\n",
+       "\n",
+       "Correct Solution: Setting the two expressions for $y$ equal to each other, it follows that $2x^2 + kx + 6 = -x + 4$. Re-arranging, $2x^2 + (k+1)x + 2 = 0$. For there to be exactly one solution for $x$, then the discriminant of the given quadratic must be equal to zero. Thus, $(k+1)^2 - 4 \\cdot 2 \\cdot 2 = (k+1)^2 - 16 = 0$, so $k+1 = \\pm 4$. Taking the negative value, $k = \\boxed{-5}$."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": 27,
+      "metadata": {},
+      "outputs": [
+       {
+        "name": "stdout",
+        "output_type": "stream",
+        "text": [
+         "\n",
+         "**** assistant received message from MathChatAgent ****\n",
+         "\n",
+         "\n",
+         "Let's use Python to solve a math problem.\n",
+         "\n",
+         "Query requirements:\n",
+         "You should always use the 'print' function for the output and use fractions/radical forms instead of decimals.\n",
+         "You can use packages like sympy to help you.\n",
+         "You must follow the formats below to write your code:\n",
+         "```python\n",
+         "# your code\n",
+         "```\n",
+         "\n",
+         "You may choose from three ways to solve the problem:\n",
+         "Case 1: If the problem can be solved with Python code directly, please write a program to solve it. You can enumerate all possible arrangements if needed.\n",
+         "Case 2: If the problem cannot be handled in the above two ways, please follow this process:\n",
+         "1. Solve the problem step by step (do not over-divide the steps).\n",
+         "2. Take out any queries that can be asked through Python (for example, any calculations or equations that can be calculated).\n",
+         "3. Wait for me to give the results.\n",
+         "4. Continue if you think the result is correct. If the result is invalid or unexpected, please correct your query or reasoning.\n",
+         "\n",
+         "After all the queries are run and you get the answer, put the answer in \\boxed{}.\n",
+         "\n",
+         "Problem: For what negative value of $k$ is there exactly one solution to the system of equations \\begin{align*}\n",
+         "y &= 2x^2 + kx + 6 \\\\\n",
+         "y &= -x + 4?\n",
+         "\\end{align*}\n",
+         "\n",
+         "**** MathChatAgent received message from assistant ****\n",
+         "\n",
+         "To find the value of k for which there is exactly one solution to the system, we need to solve the two equations for y and set them equal to each other. Then, we will find the discriminant and set it equal to 0. The discriminant will be equal to 0 if there is exactly one solution.\n",
+         "\n",
+         "Step 1: Set the equations equal to each other\n",
+         "$$2x^2 + kx + 6 = -x + 4$$\n",
+         "\n",
+         "Step 2: Rearrange the equation to get a quadratic equation\n",
+         "We can rearrange the equation to get a quadratic equation in terms of x:\n",
+         "$$2x^2 + (k+1)x + 2 = 0$$\n",
+         "\n",
+         "Step 3: Calculate the discriminant\n",
+         "The discriminant of a quadratic equation $ax^2 + bx + c = 0$ is given by $\\Delta = b^2 - 4ac$. In our case, $a=2$, $b=k+1$, and $c=2$.\n",
+         "\n",
+         "Now, let's compute the discriminant using Python:\n",
+         "\n",
+         "```python\n",
+         "from sympy import symbols, Eq, solve\n",
+         "\n",
+         "a, b, c, k = 2, k+1, 2, symbols('k')\n",
+         "delta = b**2 - 4*a*c\n",
+         "delta\n",
+         "```\n",
+         "\n",
+         "\n",
+         ">>>>>>>> NO HUMAN INPUT RECEIVED. USING AUTO REPLY FOR THE USER...\n",
+         "\n",
+         "**** assistant received message from MathChatAgent ****\n",
+         "\n",
+         "Error: Traceback (most recent call last):\n",
+         "      a, b, c, k = 2, k+1, 2, symbols('k')\n",
+         "NameError: name 'k' is not defined\n",
+         "\n",
+         "\n",
+         "**** MathChatAgent received message from assistant ****\n",
+         "\n",
+         "Apologies for the mistake. I made an error in the code. Let me correct it and compute the discriminant again. Here's the corrected code:\n",
+         "\n",
+         "```python\n",
+         "from sympy import symbols, Eq, solve\n",
+         "\n",
+         "k = symbols('k')\n",
+         "a, b, c = 2, k + 1, 2\n",
+         "delta = b**2 - 4*a*c\n",
+         "delta\n",
+         "```\n",
+         "\n",
+         ">>>>>>>> NO HUMAN INPUT RECEIVED. USING AUTO REPLY FOR THE USER...\n",
+         "\n",
+         "**** assistant received message from MathChatAgent ****\n",
+         "\n",
+         "(k + 1)**2 - 16\n",
+         "\n",
+         "\n",
+         "**** MathChatAgent received message from assistant ****\n",
+         "\n",
+         "Yes, that's correct. So the discriminant is $\\Delta = (k+1)^2 - 16$. Since we want exactly one solution, the discriminant must be equal to 0:\n",
+         "\n",
+         "$$(k+1)^2 - 16 = 0$$\n",
+         "\n",
+         "Now let's solve this equation for k using Python:\n",
+         "\n",
+         "```python\n",
+         "k_val = solve(delta, k)\n",
+         "k_val\n",
+         "```\n",
+         "\n",
+         ">>>>>>>> NO HUMAN INPUT RECEIVED. USING AUTO REPLY FOR THE USER...\n",
+         "\n",
+         "**** assistant received message from MathChatAgent ****\n",
+         "\n",
+         "[-5, 3]\n",
+         "\n",
+         "\n",
+         "**** MathChatAgent received message from assistant ****\n",
+         "\n",
+         "The solutions for k are $k=-5$ and $k=3$. However, we are looking for the negative value of k, so the answer is:\n",
+         "\n",
+         "$$\\boxed{k = -5}$$\n",
+         "\n",
+         "For k = -5, there is exactly one solution to the given system of equations.\n"
+        ]
+       }
+      ],
+      "source": [
+       "assistant.reset()\n",
+       "\n",
+       "math_problem =  \"For what negative value of $k$ is there exactly one solution to the system of equations \\\\begin{align*}\\ny &= 2x^2 + kx + 6 \\\\\\\\\\ny &= -x + 4?\\n\\\\end{align*}\"\n",
+       "assistant.receive(\n",
+       "    mathproxyagent.generate_init_prompt(math_problem),\n",
+       "    mathproxyagent,\n",
+       ")"
+      ]
+     },
+     {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "### Example 3\n",
+       "Problem: Find all positive integer values of $c$ such that the equation $x^2-7x+c=0$ only has roots that are real and rational. Express them in decreasing order, separated by commas.\n",
+       "\n",
+       "Correct Solution: For the roots to be real and rational, the discriminant must be a perfect square. Therefore, $(-7)^2-4 \\cdot 1 \\cdot c = 49-4c$ must be a perfect square. The only positive perfect squares less than 49 are $1$, $4$, $9$, $16$, $25$, and $36$. The perfect squares that give a integer value of $c$ are $1$, $9$, and $25$. Thus, we have the equations $49-4c=1$, $49-4c=9$, and $49-4c=25$. Solving, we get that the positive integer values of c are $\\boxed{12, 10, 6}$."
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": 30,
+      "metadata": {},
+      "outputs": [
+       {
+        "name": "stdout",
+        "output_type": "stream",
+        "text": [
+         "\n",
+         "**** assistant received message from MathChatAgent ****\n",
+         "\n",
+         "\n",
+         "Let's use Python to solve a math problem.\n",
+         "\n",
+         "Query requirements:\n",
+         "You should always use the 'print' function for the output and use fractions/radical forms instead of decimals.\n",
+         "You can use packages like sympy to help you.\n",
+         "You must follow the formats below to write your code:\n",
+         "```python\n",
+         "# your code\n",
+         "```\n",
+         "\n",
+         "You may choose from three ways to solve the problem:\n",
+         "Case 1: If the problem can be solved with Python code directly, please write a program to solve it. You can enumerate all possible arrangements if needed.\n",
+         "Case 2: If the problem cannot be handled in the above two ways, please follow this process:\n",
+         "1. Solve the problem step by step (do not over-divide the steps).\n",
+         "2. Take out any queries that can be asked through Python (for example, any calculations or equations that can be calculated).\n",
+         "3. Wait for me to give the results.\n",
+         "4. Continue if you think the result is correct. If the result is invalid or unexpected, please correct your query or reasoning.\n",
+         "\n",
+         "After all the queries are run and you get the answer, put the answer in \\boxed{}.\n",
+         "\n",
+         "Problem: Find all positive integer values of $c$ such that the equation $x^2-7x+c=0$ only has roots that are real and rational. Express them in decreasing order, separated by commas.\n",
+         "\n",
+         "**** MathChatAgent received message from assistant ****\n",
+         "\n",
+         "To solve this problem, we'll use the discriminant formula, which is $\\Delta = b^2 - 4ac$. For the roots to be real and rational, the discriminant must be non-negative and a perfect square. In our case, the equation is $x^2 - 7x + c = 0$, so $a = 1, b = -7, c = c$.\n",
+         "\n",
+         "The discriminant formula becomes $\\Delta = (-7)^2 - 4(1)(c) = 49 - 4c$. We want to find all possible positive integer values of $c$ such that $\\Delta \\geq 0$ and is a perfect square.\n",
+         "\n",
+         "```python\n",
+         "from sympy import *\n",
+         "\n",
+         "# List for storing c values that satisfy the conditions\n",
+         "c_values = []\n",
+         "\n",
+         "# Check for all c values from 1 to 49\n",
+         "for c in range(1, 50):\n",
+         "    delta = 49 - 4*c\n",
+         "    # Condition for the discriminant to be non-negative and a perfect square\n",
+         "    if delta >= 0 and sqrt(delta).is_integer():\n",
+         "        c_values.append(c)\n",
+         "\n",
+         "# Reverse the list to get decreasing order\n",
+         "c_values.reverse()\n",
+         "\n",
+         "# Printing the result\n",
+         "print(\"The values of c that make the roots real and rational are:\", \",\".join(map(str, c_values)))\n",
+         "```\n",
+         "\n",
+         "Let's run this code to find the valid values of $c$.\n",
+         "\n",
+         ">>>>>>>> NO HUMAN INPUT RECEIVED. USING AUTO REPLY FOR THE USER...\n",
+         "\n",
+         "**** assistant received message from MathChatAgent ****\n",
+         "\n",
+         "Error: Traceback (most recent call last):\n",
+         "      if delta >= 0 and sqrt(delta).is_integer():\n",
+         "TypeError: 'bool' object is not callable\n",
+         "\n",
+         "\n",
+         "**** MathChatAgent received message from assistant ****\n",
+         "\n",
+         "I apologize for the error in my code. I used the wrong function to check if the square root is an integer. I will correct the code and try again.\n",
+         "\n",
+         "```python\n",
+         "import math\n",
+         "\n",
+         "# List for storing c values that satisfy the conditions\n",
+         "c_values = []\n",
+         "\n",
+         "# Check for all c values from 1 to 49\n",
+         "for c in range(1, 50):\n",
+         "    delta = 49 - 4*c\n",
+         "    # Condition for the discriminant to be non-negative and a perfect square\n",
+         "    if delta >= 0 and (math.sqrt(delta) % 1 == 0):\n",
+         "        c_values.append(c)\n",
+         "\n",
+         "# Reverse the list to get decreasing order\n",
+         "c_values.reverse()\n",
+         "\n",
+         "# Printing the result\n",
+         "print(\"The values of c that make the roots real and rational are:\", \",\".join(map(str, c_values)))\n",
+         "```\n",
+         "\n",
+         "Let's run this corrected code to find the valid values of $c$.\n",
+         "\n",
+         ">>>>>>>> NO HUMAN INPUT RECEIVED. USING AUTO REPLY FOR THE USER...\n",
+         "\n",
+         "**** assistant received message from MathChatAgent ****\n",
+         "\n",
+         "The values of c that make the roots real and rational are: 12,10,6\n",
+         "\n",
+         "\n",
+         "**** MathChatAgent received message from assistant ****\n",
+         "\n",
+         "Great! So, the positive integer values of $c$ such that the equation $x^2 - 7x + c = 0$ only has roots that are real and rational are $\\boxed{12, 10, 6}$ in decreasing order.\n"
+        ]
+       }
+      ],
+      "source": [
+       "assistant.reset()\n",
+       "\n",
+       "math_problem =  \"Find all positive integer values of $c$ such that the equation $x^2-7x+c=0$ only has roots that are real and rational. Express them in decreasing order, separated by commas.\"\n",
+       "assistant.receive(\n",
+       "    mathproxyagent.generate_init_prompt(math_problem),\n",
+       "    mathproxyagent,\n",
+       ")"
+      ]
+     },
+     {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "------------------------------------\n",
+       "### Using other prompts\n",
+       "\n",
+       "MathChat allows different prompts that instruct assistant to solve the problem.\n",
+       "\n",
+       "Check out `MathUserProxyAgent.generate_init_prompt(problem, prompt_type='default', customized_prompt=None)`:\n",
+       "- You may choose from `['default', 'python', 'two_tools']` for parameter `prompt_type`.  We include two more prompts in the paper: \n",
+       "    1. `'python'` is a simplified prompt from the default prompt that uses Python only. \n",
+       "    2. `'two_tools'` further allows the selection of Python or Wolfram Alpha based on this simplified `python` prompt. Note that this option requries a Wolfram Alpha API key and put it in `wolfram.txt`.\n",
+       "\n",
+       "- You can also input your customized prompt if needed: `mathproxyagent.generate_init_prompt(problem, customized_prompt=\"Your customized prompt\")`. Since this mathproxyagent detects '\\boxed{}' as termination, you need to have a similar termination sentence in the prompt: \"If you get the answer, put the answer in \\\\boxed{}.\". If the customized is provided, the `prompt_type` will be ignored.\n",
+       "\n",
+       "\n",
+       "### Example 4 (Use the \"python\" prompt):\n",
+       "\n",
+       "Problem: If $725x + 727y = 1500$ and $729x+ 731y = 1508$, what is the value of $x - y$ ?\n",
+       "\n",
+       "Correct Solution: Subtracting the two equations gives: \n",
+       "\\begin{align*}\n",
+       "(729x+731y)-(725x+727y) &= 1508-1500\\\\\n",
+       "\\Rightarrow\\qquad 4x+4y &= 8\\\\\n",
+       "\\Rightarrow\\qquad x+y &= 2.\n",
+       "\\end{align*}\n",
+       "\n",
+       "Multiplying this equation by 725 and subtracting this equation from $725x+727y=1500$ gives \\begin{align*}\n",
+       "(725x+727y) - 725(x+y) &= 1500-725(x+y) \\implies \\\\\n",
+       "2y &= 50.\n",
+       "\\end{align*}So we can write $x-y$ as $(x+y) - 2y$, which equals  $2 - 50 = \\boxed{-48}$.\n"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": 13,
+      "metadata": {},
+      "outputs": [
+       {
+        "name": "stdout",
+        "output_type": "stream",
+        "text": [
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "MathChatAgent (to assistant):\n",
+         "Let's use Python to solve a math problem.\n",
+         "\n",
+         "Query requirements:\n",
+         "You should always use the 'print' function for the output and use fractions/radical forms instead of decimals.\n",
+         "You can use packages like sympy to help you.\n",
+         "You must follow the formats below to write your code:\n",
+         "```python\n",
+         "# your code\n",
+         "```\n",
+         "\n",
+         "Please follow this process:\n",
+         "1. Solve the problem step by step (do not over-divide the steps).\n",
+         "2. Take out any queries that can be asked through Python (for example, any calculations or equations that can be calculated).\n",
+         "3. Wait for me to give the results.\n",
+         "4. Continue if you think the result is correct. If the result is invalid or unexpected, please correct your query or reasoning.\n",
+         "\n",
+         "After all the queries are run and you get the answer, put the answer in \\boxed{}.\n",
+         "\n",
+         "Problem: Problem: If $725x + 727y = 1500$ and $729x+ 731y = 1508$, what is the value of $x - y$ ?\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "assistant (to MathChatAgent):\n",
+         "First, let's solve these two linear equations step by step. We can start by eliminating one variable by subtracting one equation from the other.\n",
+         "\n",
+         "So, let's subtract the first equation from the second:\n",
+         "\n",
+         "(729x + 731y) - (725x + 727y) = (1508 - 1500)\n",
+         "\n",
+         "Now let's simplify this:\n",
+         "\n",
+         "```python\n",
+         "from sympy import symbols, Eq, simplify\n",
+         "\n",
+         "x, y = symbols('x y')\n",
+         "eq1 = Eq(729*x + 731*y - (725*x + 727*y), 1508 - 1500)\n",
+         "eq1_simplified = simplify(eq1)\n",
+         "eq1_simplified\n",
+         "```\n",
+         "\n",
+         ">>>>>>>> NO HUMAN INPUT RECEIVED. USING AUTO REPLY FOR THE USER...\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "MathChatAgent (to assistant):\n",
+         "Eq(x + y, 2)\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "assistant (to MathChatAgent):\n",
+         "Now we have the simplified equation:\n",
+         "\n",
+         "x + y = 2\n",
+         "\n",
+         "Next, we need to use this equation to eliminate either x or y from one of the given equations. Let's eliminate y by expressing y in terms of x, and then substitute it into the second given equation.\n",
+         "\n",
+         "y = 2 - x\n",
+         "\n",
+         "Now let's substitute this into the second equation:\n",
+         "\n",
+         "729x + 731(2 - x) = 1508\n",
+         "\n",
+         "Now let's simplify this equation:\n",
+         "\n",
+         "```python\n",
+         "eq2 = Eq(729*x + 731*(2 - x), 1508)\n",
+         "eq2_simplified = simplify(eq2)\n",
+         "eq2_simplified\n",
+         "```\n",
+         "\n",
+         ">>>>>>>> NO HUMAN INPUT RECEIVED. USING AUTO REPLY FOR THE USER...\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "MathChatAgent (to assistant):\n",
+         "Eq(x, -23)\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "assistant (to MathChatAgent):\n",
+         "Now we have found the value of x:\n",
+         "\n",
+         "x = -23\n",
+         "\n",
+         "We can now find the value of y using the simplified equation x + y = 2:\n",
+         "\n",
+         "y = 2 - x\n",
+         "\n",
+         "Let's calculate the value of y:\n",
+         "\n",
+         "```python\n",
+         "y_value = 2 - (-23)\n",
+         "y_value\n",
+         "```\n",
+         "\n",
+         ">>>>>>>> NO HUMAN INPUT RECEIVED. USING AUTO REPLY FOR THE USER...\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "MathChatAgent (to assistant):\n",
+         "25\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "assistant (to MathChatAgent):\n",
+         "Now we have found the value of y:\n",
+         "\n",
+         "y = 25\n",
+         "\n",
+         "Now we can find the value of x - y:\n",
+         "\n",
+         "x - y = -23 - 25\n",
+         "\n",
+         "Let's calculate the value of x - y:\n",
+         "\n",
+         "```python\n",
+         "x_minus_y = -23 - 25\n",
+         "x_minus_y\n",
+         "```\n",
+         "\n",
+         ">>>>>>>> NO HUMAN INPUT RECEIVED. USING AUTO REPLY FOR THE USER...\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "MathChatAgent (to assistant):\n",
+         "-48\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "assistant (to MathChatAgent):\n",
+         "So, the value of x - y is:\n",
+         "\n",
+         "x - y = -48\n",
+         "\n",
+         "Thus, the final answer is:\n",
+         "\n",
+         "$\\boxed{-48}$\n"
+        ]
+       }
+      ],
+      "source": [
+       "assistant.reset() # clear LLM assistant's message history\n",
+       "\n",
+       "# we set the prompt_type to \"python\", which is a simplied version of the default prompt.\n",
+       "math_problem =  \"Problem: If $725x + 727y = 1500$ and $729x+ 731y = 1508$, what is the value of $x - y$ ?\"\n",
+       "assistant.receive(\n",
+       "    mathproxyagent.generate_init_prompt(math_problem, prompt_type=\"python\"),\n",
+       "    mathproxyagent,\n",
+       ")"
+      ]
+     },
+     {
+      "attachments": {},
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+       "## Example 5 (Use the \"two_tools\" prompt)\n",
+       "\n",
+       "Problem: Find all numbers $a$ for which the graph of $y=x^2+a$ and the graph of $y=ax$ intersect. Express your answer in interval notation.\n",
+       "\n",
+       "\n",
+       "Correct Solution: If these two graphs intersect then the points of intersection occur when  \\[x^2+a=ax,\\] or  \\[x^2-ax+a=0.\\] This quadratic has solutions exactly when the discriminant is nonnegative: \\[(-a)^2-4\\cdot1\\cdot a\\geq0.\\] This simplifies to  \\[a(a-4)\\geq0.\\] This quadratic (in $a$) is nonnegative when $a$ and $a-4$ are either both $\\ge 0$ or both $\\le 0$. This is true for $a$ in $$(-\\infty,0]\\cup[4,\\infty).$$ Therefore the line and quadratic intersect exactly when $a$ is in $\\boxed{(-\\infty,0]\\cup[4,\\infty)}$.\n"
+      ]
+     },
+     {
+      "cell_type": "code",
+      "execution_count": 15,
+      "metadata": {},
+      "outputs": [
+       {
+        "name": "stdout",
+        "output_type": "stream",
+        "text": [
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "MathChatAgent (to assistant):\n",
+         "Let's use two tools (Python and Wolfram alpha) to solve a math problem.\n",
+         "\n",
+         "Query requirements:\n",
+         "You must follow the formats below to write your query:\n",
+         "For Wolfram Alpha:\n",
+         "```wolfram\n",
+         "# one wolfram query\n",
+         "```\n",
+         "For Python:\n",
+         "```python\n",
+         "# your code\n",
+         "```\n",
+         "When using Python, you should always use the 'print' function for the output and use fractions/radical forms instead of decimals. You can use packages like sympy to help you.\n",
+         "When using wolfram, give one query in each code block.\n",
+         "\n",
+         "Please follow this process:\n",
+         "1. Solve the problem step by step (do not over-divide the steps).\n",
+         "2. Take out any queries that can be asked through Python or Wolfram Alpha, select the most suitable tool to be used (for example, any calculations or equations that can be calculated).\n",
+         "3. Wait for me to give the results.\n",
+         "4. Continue if you think the result is correct. If the result is invalid or unexpected, please correct your query or reasoning.\n",
+         "\n",
+         "After all the queries are run and you get the answer, put the final answer in \\boxed{}.\n",
+         "\n",
+         "Problem: Find all numbers $a$ for which the graph of $y=x^2+a$ and the graph of $y=ax$ intersect. Express your answer in interval notation.\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "assistant (to MathChatAgent):\n",
+         "First, we need to find the intersection points of the two given equations, i.e., when $y = x^2 + a$ and $y = ax$ are equal. So, we will set the two equations equal to each other and solve for $x$:\n",
+         "\n",
+         "$x^2 + a = ax$\n",
+         "\n",
+         "Now, let's rearrange the equation to get a quadratic equation in terms of $x$:\n",
+         "\n",
+         "$x^2 - ax + a = 0$\n",
+         "\n",
+         "We can use the quadratic formula to solve for $x$ in terms of $a$:\n",
+         "\n",
+         "$x = \\frac{-(-a) \\pm \\sqrt{(-a)^2 - 4(1)(a)}}{2(1)}$\n",
+         "\n",
+         "```wolfram\n",
+         "x = (a + sqrt(a^2 - 4*a)) / 2, x = (a - sqrt(a^2 - 4*a)) / 2\n",
+         "```\n",
+         "\n",
+         ">>>>>>>> NO HUMAN INPUT RECEIVED. USING AUTO REPLY FOR THE USER...\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "MathChatAgent (to assistant):\n",
+         "Assumption: {x = 1/2 (a + sqrt(a^2 - 4 a)), x = 1/2 (a - sqrt(a^2 - 4 a))} \n",
+         "Answer: ans 0: a = 0, x = 0\n",
+         "ans 1: a = 4, x = 2\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "assistant (to MathChatAgent):\n",
+         "We found two solutions for $x$ in terms of $a$: \n",
+         "\n",
+         "$x = \\frac{1}{2}(a + \\sqrt{a^2 - 4a})$\n",
+         "\n",
+         "$x = \\frac{1}{2}(a - \\sqrt{a^2 - 4a})$\n",
+         "\n",
+         "And we have two possible answers: \n",
+         "\n",
+         "1) When $a=0$, $x=0$.\n",
+         "\n",
+         "2) When $a=4$, $x=2$.\n",
+         "\n",
+         "Now, we need to find the interval of $a$ for which the two graphs intersect. As we know the roots of the quadratic equation, we can look for the conditions under which the discriminant is greater or equal to zero:\n",
+         "\n",
+         "$D = a^2 - 4a \\ge 0$\n",
+         "\n",
+         "Let's solve this inequality to find the interval of $a$:\n",
+         "\n",
+         "```wolfram\n",
+         "solve a^2 - 4a >= 0 for a\n",
+         "```\n",
+         "\n",
+         ">>>>>>>> NO HUMAN INPUT RECEIVED. USING AUTO REPLY FOR THE USER...\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "MathChatAgent (to assistant):\n",
+         "Assumption: solve a^2 - 4 a>=0 for a \n",
+         "Answer: ans 0: a<=0\n",
+         "ans 1: a>=4\n",
+         "\n",
+         " -------------------------------------------------------------------------------- \n",
+         "\n",
+         "assistant (to MathChatAgent):\n",
+         "We found the intervals for $a$ that satisfy the inequality $a^2 - 4a \\ge 0$:\n",
+         "\n",
+         "1) $a \\le 0$\n",
+         "\n",
+         "2) $a \\ge 4$\n",
+         "\n",
+         "So, the graph of $y=x^2+a$ and the graph of $y=ax$ intersect when $a$ is in these intervals. Therefore, the final answer is:\n",
+         "\n",
+         "$\\boxed{(-\\infty, 0] \\cup [4, \\infty)}$\n"
+        ]
+       }
+      ],
+      "source": [
+       "assistant.reset() # clear LLM assistant's message history\n",
+       "\n",
+       "# The wolfram alpha appid is required for this example (the assistant may choose to query Wolfram Alpha).\n",
+       "import os\n",
+       "if \"WOLFRAM_ALPHA_APPID\" not in os.environ:\n",
+       "    os.environ[\"WOLFRAM_ALPHA_APPID\"] = open(\"wolfram.txt\").read().strip()\n",
+       "\n",
+       "# we set the prompt_type to \"two_tools\", which allows the assistant to select wolfram alpha when necessary.\n",
+       "math_problem =  \"Find all numbers $a$ for which the graph of $y=x^2+a$ and the graph of $y=ax$ intersect. Express your answer in interval notation.\"\n",
+       "assistant.receive(\n",
+       "    mathproxyagent.generate_init_prompt(math_problem, prompt_type=\"two_tools\"),\n",
+       "    mathproxyagent,\n",
+       ")"
+      ]
+     }
+    ],
+    "metadata": {
+     "kernelspec": {
+      "display_name": "flaml_dev",
+      "language": "python",
+      "name": "python3"
+     },
+     "language_info": {
+      "codemirror_mode": {
+       "name": "ipython",
+       "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.9.16"
+     },
+     "orig_nbformat": 4
+    },
+    "nbformat": 4,
+    "nbformat_minor": 2
+   }
diff --git a/setup.py b/setup.py
index fd588084db..3f8d90e082 100644
--- a/setup.py
+++ b/setup.py
@@ -84,6 +84,9 @@
             "tensorboardX==2.6",  # test_forecast_panel
             "requests<2.29.0",  # https://github.com/docker/docker-py/issues/3113
             "packaging",
+            "pydantic",
+            "sympy",
+            "wolframalpha",
         ],
         "catboost": ["catboost>=0.26"],
         "blendsearch": ["optuna==2.8.0"],
@@ -132,6 +135,7 @@
         "benchmark": ["catboost>=0.26", "psutil==5.8.0", "xgboost==1.3.3", "pandas==1.1.4"],
         "openai": ["openai==0.27.8", "diskcache"],
         "autogen": ["openai==0.27.8", "diskcache", "docker"],
+        "mathchat": ["openai==0.27.8", "diskcache", "docker", "sympy", "pydantic", "wolframalpha"],
         "synapse": [
             "joblibspark>=0.5.0",
             "optuna==2.8.0",
diff --git a/test/autogen/oai/test_notebook.py b/test/autogen/oai/test_notebook.py
index 773520f624..1a4cd5b8f6 100644
--- a/test/autogen/oai/test_notebook.py
+++ b/test/autogen/oai/test_notebook.py
@@ -59,6 +59,14 @@ def test_autogen_openai_completion(save=False):
     run_notebook("autogen_openai_completion.ipynb", save=save)
 
 
+@pytest.mark.skipif(
+    skip or not sys.version.startswith("3.10"),
+    reason="do not run if openai is not installed or py!=3.10",
+)
+def test_autogen_agent_MathChat(save=False):
+    run_notebook("autogen_agent_MathChat.ipynb", save=save)
+
+
 @pytest.mark.skipif(
     skip or not sys.version.startswith("3.11"),
     reason="do not run if openai is not installed or py!=3.11",
@@ -70,3 +78,4 @@ def test_autogen_chatgpt_gpt4(save=False):
 if __name__ == "__main__":
     test_autogen_chatgpt_gpt4(save=True)
     test_autogen_openai_completion(save=True)
+    test_autogen_agent_MathChat(save=True)
diff --git a/test/autogen/test_math_user_proxy_agent.py b/test/autogen/test_math_user_proxy_agent.py
new file mode 100644
index 0000000000..a2c9267079
--- /dev/null
+++ b/test/autogen/test_math_user_proxy_agent.py
@@ -0,0 +1,111 @@
+from flaml import oai
+from flaml.autogen.agent.math_user_proxy_agent import MathUserProxyAgent, remove_print, add_print_to_last_line
+import pytest
+import sys
+
+KEY_LOC = "test/autogen"
+
+
+@pytest.mark.skipif(
+    sys.platform in ["darwin", "win32"],
+    reason="do not run on MacOS or windows",
+)
+def test_math_user_proxy_agent():
+    try:
+        import openai
+    except ImportError:
+        return
+
+    from flaml.autogen.agent.assistant_agent import AssistantAgent
+
+    conversations = {}
+    oai.ChatCompletion.start_logging(conversations)
+
+    config_list = oai.config_list_openai_aoai(key_file_path=KEY_LOC)
+    assistant = AssistantAgent(
+        "assistant",
+        system_message="You are a helpful assistant.",
+        request_timeout=600,
+        seed=42,
+        config_list=config_list,
+    )
+
+    mathproxyagent = MathUserProxyAgent(name="MathChatAgent", human_input_mode="NEVER")
+    assistant.reset()
+
+    math_problem = "$x^3=125$. What is x?"
+    assistant.receive(
+        message=mathproxyagent.generate_init_prompt(math_problem),
+        sender=mathproxyagent,
+    )
+    print(conversations)
+
+
+def test_add_remove_print():
+    # test add print
+    code = "a = 4\nb = 5\na,b"
+    assert add_print_to_last_line(code) == "a = 4\nb = 5\nprint(a,b)"
+
+    # test remove print
+    code = """print("hello")\na = 4*5\nprint("wolrld")"""
+    assert remove_print(code) == "a = 4*5"
+
+    # test remove print. Only remove prints without indentation
+    code = "if 4 > 5:\n\tprint('True')"
+    assert remove_print(code) == code
+
+
+@pytest.mark.skipif(
+    sys.platform in ["darwin", "win32"],
+    reason="do not run on MacOS or windows",
+)
+def test_execute_one_python_code():
+    mathproxyagent = MathUserProxyAgent(name="MathChatAgent", human_input_mode="NEVER")
+
+    # no output found 1
+    code = "x=3"
+    assert mathproxyagent._execute_one_python_code(code)[0] == "No output found. Make sure you print the results."
+
+    # no output found 2
+    code = "if 4 > 5:\n\tprint('True')"
+
+    assert mathproxyagent._execute_one_python_code(code)[0] == "No output found."
+
+    # return error
+    code = "2+'2'"
+    assert "Error:" in mathproxyagent._execute_one_python_code(code)[0]
+
+    # save previous status
+    mathproxyagent._execute_one_python_code("x=3\ny=x*2")
+    assert mathproxyagent._execute_one_python_code("print(y)")[0].strip() == "6"
+
+    code = "print('*'*2001)"
+    assert (
+        mathproxyagent._execute_one_python_code(code)[0]
+        == "Your requested query response is too long. You might have made a mistake. Please revise your reasoning and query."
+    )
+
+
+def test_execute_one_wolfram_query():
+    mathproxyagent = MathUserProxyAgent(name="MathChatAgent", human_input_mode="NEVER")
+    code = "2x=3"
+
+    try:
+        mathproxyagent._execute_one_wolfram_query(code)[0]
+    except ValueError:
+        print("Wolfrma API key not found. Skip test.")
+
+
+def test_generate_prompt():
+    mathproxyagent = MathUserProxyAgent(name="MathChatAgent", human_input_mode="NEVER")
+
+    assert "customized" in mathproxyagent.generate_init_prompt(
+        problem="2x=4", prompt_type="python", customized_prompt="customized"
+    )
+
+
+if __name__ == "__main__":
+    test_add_remove_print()
+    test_execute_one_python_code()
+    test_generate_prompt()
+    test_math_user_proxy_agent()
diff --git a/website/docs/Research.md b/website/docs/Research.md
index a510d41fad..16f5bdc3c7 100644
--- a/website/docs/Research.md
+++ b/website/docs/Research.md
@@ -101,3 +101,14 @@ For technical details, please check our research publications.
     booktitle={ArXiv preprint arXiv:2303.04673},
 }
 ```
+
+* [An Empirical Study on Challenging Math Problem Solving with GPT-4](https://arxiv.org/abs/2306.01337). Yiran Wu, Feiran Jia, Shaokun Zhang, Hangyu Li, Erkang Zhu, Yue Wang, Yin Tat Lee, Richard Peng, Qingyun Wu, Chi Wang. ArXiv preprint arXiv:2306.01337 (2023).
+
+```bibtex
+@inproceedings{wu2023empirical,
+    title={An Empirical Study on Challenging Math Problem Solving with GPT-4},
+    author={Yiran Wu and Feiran Jia and Shaokun Zhang and Hangyu Li and Erkang Zhu and Yue Wang and Yin Tat Lee and Richard Peng and Qingyun Wu and Chi Wang},
+    year={2023},
+    booktitle={ArXiv preprint arXiv:2306.01337},
+}
+```