From fa5ccea86253fdb2e4e17531749d7d228c98d76c Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Sun, 23 Apr 2023 04:50:29 -0700 Subject: [PATCH] extract code from text; solve_problem; request_timeout in config; improve code (#999) * extract code from text * solve_problem; request_timeout in config * improve * move import statement * improve code * generate assertions * constant * configs for implement; voting * doc * execute code in docker * success indicator of code executation in docker * success indicator * execute code * strip n * add cost in generate_code * add docstr * filename * bytes * check docker version * print log * python test * remove api key address * rename exit code * success exit code * datasets * exit code * recover openai tests * cache and pattern match * wait * wait * cache and test * timeout test * python image name and skip macos * windows image * docker images * volume path and yaml * win path -> posix * extensions * path * path * path * path * path * path * path * path * path * path * path * skip windows * path * timeout in windows * use_docker * use_docker * hot fix from #1000 --------- Co-authored-by: Qingyun Wu --- .github/workflows/openai.yml | 4 +- README.md | 2 +- flaml/autogen/__init__.py | 2 + flaml/autogen/code_utils.py | 316 +++++++++++++++++--- flaml/autogen/extensions/__init__.py | 0 flaml/autogen/math_utils.py | 24 ++ flaml/autogen/oai/completion.py | 16 +- flaml/version.py | 2 +- notebook/autogen_chatgpt.ipynb | 6 +- notebook/autogen_openai.ipynb | 14 +- notebook/research/autogen_code.ipynb | 8 +- notebook/research/math_level5counting.ipynb | 4 +- setup.py | 3 +- test/openai/test_completion.py | 134 ++++++++- website/docs/Examples/AutoGen-OpenAI.md | 4 +- website/docs/Use-Cases/Auto-Generation.md | 21 +- 16 files changed, 488 insertions(+), 72 deletions(-) create mode 100644 flaml/autogen/extensions/__init__.py diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml index 5eb442d971..3ec06f8479 100644 --- a/.github/workflows/openai.yml +++ b/.github/workflows/openai.yml @@ -29,10 +29,10 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install packages and dependencies run: | + docker --version python -m pip install --upgrade pip wheel - pip install -e . + pip install -e .[autogen,blendsearch] python -c "import flaml" - pip install -e .[openai] - name: Coverage env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} diff --git a/README.md b/README.md index 6ade62fde7..a23a8e9d53 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,8 @@ [![Build](https://github.com/microsoft/FLAML/actions/workflows/python-package.yml/badge.svg)](https://github.com/microsoft/FLAML/actions/workflows/python-package.yml) ![Python Version](https://img.shields.io/badge/3.7%20%7C%203.8%20%7C%203.9%20%7C%203.10-blue) [![Downloads](https://pepy.tech/badge/flaml)](https://pepy.tech/project/flaml) - [![](https://img.shields.io/discord/1025786666260111483?logo=discord&style=flat)](https://discord.gg/Cppx2vSPVP) + # A Fast Library for Automated Machine Learning & Tuning diff --git a/flaml/autogen/__init__.py b/flaml/autogen/__init__.py index e69de29bb2..d13c36540e 100644 --- a/flaml/autogen/__init__.py +++ b/flaml/autogen/__init__.py @@ -0,0 +1,2 @@ +DEFAULT_MODEL = "gpt-4" +FAST_MODEL = "gpt-3.5-turbo" diff --git a/flaml/autogen/code_utils.py b/flaml/autogen/code_utils.py index e7215100d6..18738e3de5 100644 --- a/flaml/autogen/code_utils.py +++ b/flaml/autogen/code_utils.py @@ -1,56 +1,287 @@ import signal import subprocess import sys +import os +import pathlib from typing import List, Dict, Tuple, Optional, Union, Callable -from flaml import oai +import re +import time +from flaml.autogen import oai, DEFAULT_MODEL, FAST_MODEL + +# Regular expression for finding a code block +CODE_BLOCK_PATTERN = r"```\w*\n(.*?)\n```" +WORKING_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "extensions") + + +def extract_code(text: str, pattern: str = CODE_BLOCK_PATTERN) -> str: + # Use a regular expression to find the code block + match = re.search(pattern, text, flags=re.DOTALL) + # If a match is found, return the code + if match: + return match.group(1) + # If no code block is found, return the whole text + return text + + +def generate_code(pattern: str = CODE_BLOCK_PATTERN, **config) -> Tuple[str, float]: + """Generate code. + + Args: + pattern (Optional, str): The regular expression pattern for finding the code block. + The default pattern is for finding a code block in a markdown file. + config (Optional, dict): The configuration for the API call. + + Returns: + str: The generated code. + float: The cost of the generation. + """ + response = oai.Completion.create(**config) + cost = oai.Completion.cost(config["model"], response) + return extract_code(oai.Completion.extract_text(response)[0], pattern), cost + + +_IMPROVE_FUNCTION_CONFIG = { + "prompt": """Improve the function '{func_name}' to achieve the objective '{objective}'. +The current implementation of the function is as follows: +{file_string}""", + "model": DEFAULT_MODEL, + "request_timeout": 300, +} + + +def improve_function(file_name, func_name, objective, **config): + """(work in progress) Improve the function to achieve the objective.""" + params = {**_IMPROVE_FUNCTION_CONFIG, **config} + # read the entire file into a str + with open(file_name, "r") as f: + file_string = f.read() + response = oai.Completion.create( + {"func_name": func_name, "objective": objective, "file_string": file_string}, **params + ) + cost = oai.Completion.cost(params["model"], response) + return oai.Completion.extract_text(response)[0], cost + + +_IMPROVE_CODE_CONFIG = { + "prompt": """Analyze the code in the following files and return a list of suggestions for improvement{followup}, to achieve the objective of '{objective}'. +{code} +""", + "model": DEFAULT_MODEL, + "request_timeout": 900, +} + + +def improve_code(files, objective, suggest_only=True, **config): + """Improve the code to achieve a given objective. + + Args: + files (list): A list of file names containing the source code. + objective (str): The objective to achieve. + suggest_only (bool): Whether to return only the suggestions or the improved code. + config (Optional, dict): The configuration for the API call. + + Returns: + str: The improved code if suggest_only=False; a list of suggestions if suggest_only=True (default). + float: The cost of the generation. + """ + code = "" + for file_name in files: + # read the entire file into a string + with open(file_name, "r") as f: + file_string = f.read() + code += f"""{file_name}: +{file_string} + +""" + params = {**_IMPROVE_CODE_CONFIG, **config} + followup = "" if suggest_only else " followed by the improved code" + response = oai.Completion.create({"objective": objective, "code": code, "followup": followup}, **params) + cost = oai.Completion.cost(params["model"], response) + return oai.Completion.extract_text(response)[0], cost def timeout_handler(signum, frame): raise TimeoutError("Timed out!") -def execute_code(code: str, max_exec_time: Optional[int] = 3): - signal.signal(signal.SIGALRM, timeout_handler) - code = code.strip() - with open("codetest.py", "w") as fout: - fout.write(code) - try: - signal.alarm(max_exec_time) - result = subprocess.run( - [sys.executable, "codetest.py"], - stdout=subprocess.DEVNULL, - stderr=subprocess.PIPE, - ) - signal.alarm(0) - except TimeoutError: - return 0 - return int(result.returncode == 0) - - -def generate_assertions(definition: str, model: Optional[str] = "gpt-3.5-turbo") -> Tuple[str, float]: +def execute_code( + code: Optional[str] = None, + timeout: Optional[int] = 600, + filename: Optional[str] = None, + work_dir: Optional[str] = None, + use_docker: Optional[bool] = True, +) -> Tuple[int, bytes]: + """Execute code in a docker container. + This function is not tested on MacOS. + + Args: + code (Optional, str): The code to execute. + If None, the code from the file specified by filename will be executed. + Either code or filename must be provided. + timeout (Optional, int): The maximum execution time in seconds. + filename (Optional, str): The file name to save the code or where the code is stored when `code` is None. + If None, a file with a randomly generated name will be created. + The randomly generated file will be deleted after execution. + The file name must be a relative path. Relative paths are relative to the working directory. + work_dir (Optional, str): The working directory for the code execution. + If None, a default working directory will be used. + The default working directory is the "extensions" directory under + "xxx/flaml/autogen", where "xxx" is the path to the flaml package. + use_docker (Optional, bool): Whether to use a docker container for code execution. + If True, the code will be executed in a docker container. + If False, the code will be executed in the current environment. + Default is True. If the code is executed in the current environment, + the code must be trusted. + + Returns: + int: 0 if the code executes successfully. + bytes: The error message if the code fails to execute; the stdout otherwise. + """ + assert code is not None or filename is not None, "Either code or filename must be provided." + + original_filename = filename + if filename is None: + code_hash = hash(code) + # create a file with a automatically generated name + filename = f"tmp_code_{code_hash}.py" + if work_dir is None: + work_dir = WORKING_DIR + filepath = os.path.join(work_dir, filename) + file_dir = os.path.dirname(filepath) + os.makedirs(file_dir, exist_ok=True) + + if code is not None: + code = code.strip() + with open(filepath, "w") as fout: + fout.write(code) + # check if already running in a docker container + in_docker_container = os.path.exists("/.dockerenv") + if not use_docker or in_docker_container: + # already running in a docker container + signal.signal(signal.SIGALRM, timeout_handler) + try: + signal.alarm(timeout) + # run the code in a subprocess in the current docker container in the working directory + result = subprocess.run( + [sys.executable, filename], + cwd=work_dir, + capture_output=True, + ) + signal.alarm(0) + except TimeoutError: + if original_filename is None: + os.remove(filepath) + return 1, "Timeout" + if original_filename is None: + os.remove(filepath) + return result.returncode, result.stderr if result.returncode else result.stdout + + import docker + from requests.exceptions import ReadTimeout, ConnectionError + + # create a docker client + client = docker.from_env() + image_list = ["python:3-alpine", "python:3", "python:3-windowsservercore"] + for image in image_list: + # check if the image exists + try: + client.images.get(image) + break + except docker.errors.ImageNotFound: + # pull the image + print("Pulling image", image) + try: + client.images.pull(image) + break + except docker.errors.DockerException: + print("Failed to pull image", image) + # get a randomized str based on current time to wrap the exit code + exit_code_str = f"exitcode{time.time()}" + abs_path = pathlib.Path(work_dir).absolute() + # if sys.platform == "win32": + # abs_path = str(abs_path).replace("\\", "/") + # abs_path = f"/{abs_path[0].lower()}{abs_path[2:]}" + # create a docker container + container = client.containers.run( + image, + command=[ + "sh", + "-c", + f"python {filename}; exit_code=$?; echo -n {exit_code_str}; echo -n $exit_code; echo {exit_code_str}", + ], + working_dir="/workspace", + detach=True, + # get absolute path to the working directory + volumes={abs_path: {"bind": "/workspace", "mode": "rw"}}, + ) + start_time = time.time() + while container.status != "exited" and time.time() - start_time < timeout: + # Reload the container object + container.reload() + if container.status != "exited": + container.stop() + container.remove() + if original_filename is None: + os.remove(filepath) + return 1, "Timeout" + # try: + # container.wait(timeout=timeout) + # except (ReadTimeout, ConnectionError): + # container.stop() + # container.remove() + # if original_filename is None: + # os.remove(filepath) + # return 1, "Timeout" + # get the container logs + logs = container.logs().decode("utf-8").rstrip() + # remove the container + container.remove() + # check if the code executed successfully + exit_code = container.attrs["State"]["ExitCode"] + if exit_code == 0: + # extract the exit code from the logs + pattern = re.compile(f"{exit_code_str}(\\d+){exit_code_str}") + match = pattern.search(logs) + exit_code = int(match.group(1)) + # remove the exit code from the logs + logs = pattern.sub("", logs) + + logs = bytes(logs, "utf-8") + if original_filename is None: + os.remove(filepath) + # return the exit code and logs + return exit_code, logs + + +_GENERATE_ASSERTIONS_CONFIG = { + "prompt": """Given the signature and docstring, write the exactly same number of assertion(s) for the provided example(s) in the docstring, without assertion messages. + +func signature: +{definition} +assertions:""", + "model": FAST_MODEL, + "max_tokens": 256, + "stop": "\n\n", +} + + +def generate_assertions(definition: str, **config) -> Tuple[str, float]: """Generate assertions for a function. Args: definition (str): The function definition, including the signature and docstr. - model (str): The model used for generation. + config (Optional, dict): The configuration for the API call. Returns: str: The generated assertions. float: The cost of the generation. """ - prompt = """Given the signature and docstring, write the exactly same number of assertion(s) for the provided example(s) in the docstring, without assertion messages. - -func signature: -{definition} -assertions:""" + params = {**_GENERATE_ASSERTIONS_CONFIG, **config} response = oai.Completion.create( {"definition": definition}, - model=model, - prompt=prompt, - max_tokens=256, - stop="\n\n", + **params, ) - cost = oai.Completion.cost(model, response) + cost = oai.Completion.cost(params["model"], response) assertions = oai.Completion.extract_text(response)[0] return assertions, cost @@ -70,6 +301,8 @@ def eval_function_completions( test: Optional[str] = None, entry_point: Optional[str] = None, assertions: Optional[Union[str, Callable[[str], Tuple[str, float]]]] = None, + timeout: Optional[float] = 3, + use_docker: Optional[bool] = True, ) -> Dict: """Select a response from a list of responses for the function completion task (using generated assertions), and/or evaluate if the task is successful using a gold test. @@ -80,6 +313,7 @@ def eval_function_completions( entry_point (Optional, str): The name of the function. assertions (Optional, str or Callable): The assertion code which serves as a filter of the responses, or an assertion generator. When provided, only the responses that pass the assertions will be considered for the actual test (if provided). + timeout (Optional, float): The timeout for executing the code. Returns: dict: The success metrics. @@ -95,7 +329,7 @@ def eval_function_completions( if response.startswith("def") else f"{definition}{response}\n{test}\ncheck({entry_point})" ) - success = execute_code(code) + success = execute_code(code, timeout=timeout, use_docker=use_docker)[0] == 0 success_list.append(success) return { "expected_success": 1 - pow(1 - sum(success_list) / n, n), @@ -112,7 +346,7 @@ def eval_function_completions( code = ( f"{response}\n{assertions}" if response.startswith("def") else f"{definition}{response}\n{assertions}" ) - succeed_assertions = execute_code(code) + succeed_assertions = execute_code(code, timeout=timeout, use_docker=use_docker)[0] == 0 if succeed_assertions: break else: @@ -132,7 +366,7 @@ def eval_function_completions( if response.startswith("def") else f"{definition}{response}\n{test}\ncheck({entry_point})" ) - success = execute_code(code_test) + success = execute_code(code_test, timeout=timeout, use_docker=use_docker)[0] == 0 return { "index_selected": i, "succeed_assertions": succeed_assertions, @@ -142,9 +376,20 @@ def eval_function_completions( } +_FUNC_COMPLETION_PROMPT = "# Python 3{definition}" +_FUNC_COMPLETION_STOP = ["\nclass", "\ndef", "\nif", "\nprint"] +_IMPLEMENT_CONFIGS = [ + {"model": FAST_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "temperature": 0, "seed": 0}, + {"model": FAST_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 7, "seed": 0}, + {"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "temperature": 0, "seed": 1}, + {"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 2, "seed": 2}, + {"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 1, "seed": 2}, +] + + def implement( definition: str, - configs: List[Dict], + configs: Optional[List[Dict]] = None, assertions: Optional[Union[str, Callable[[str], Tuple[str, float]]]] = generate_assertions, ) -> Tuple[str, float]: """Implement a function from a definition. @@ -160,6 +405,7 @@ def implement( int: The index of the configuration which generates the implementation. """ cost = 0 + configs = configs or _IMPLEMENT_CONFIGS if len(configs) > 1 and callable(assertions): assertions, cost = assertions(definition) for i, config in enumerate(configs): diff --git a/flaml/autogen/extensions/__init__.py b/flaml/autogen/extensions/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/flaml/autogen/math_utils.py b/flaml/autogen/math_utils.py index b5e0807e7e..38752203f8 100644 --- a/flaml/autogen/math_utils.py +++ b/flaml/autogen/math_utils.py @@ -1,4 +1,28 @@ from typing import Optional +from flaml.autogen import oai, DEFAULT_MODEL + +_MATH_PROMPT = "{problem} Solve the problem carefully. Simplify your answer as much as possible. Put the final answer in \\boxed{{}}." +_MATH_CONFIG = { + "model": DEFAULT_MODEL, + "prompt": _MATH_PROMPT, +} + + +def solve_problem(problem: str, **config) -> str: + """(work in progress) Solve the math problem. + + Args: + problem (str): The problem statement. + config (Optional, dict): The configuration for the API call. + + Returns: + str: The solution to the problem. + """ + params = {**_MATH_CONFIG, **config} + response = oai.Completion.create({"problem": problem}, **params) + cost = oai.Completion.cost(params["model"], response) + results = eval_math_responses(oai.Completion.extract_text(response)) + return results.get("voted_answer"), cost def remove_boxed(string: str) -> Optional[str]: diff --git a/flaml/autogen/oai/completion.py b/flaml/autogen/oai/completion.py index 1d8f8844f1..6bce0e1717 100644 --- a/flaml/autogen/oai/completion.py +++ b/flaml/autogen/oai/completion.py @@ -145,9 +145,10 @@ def _get_response(cls, config: dict, eval_only=False, use_cache=True): request_timeout = cls.request_timeout while True: try: - response = openai_completion.create(request_timeout=request_timeout, **config) - cls._cache.set(key, response) - return response + if "request_timeout" in config: + response = openai_completion.create(**config) + else: + response = openai_completion.create(request_timeout=request_timeout, **config) except ( ServiceUnavailableError, APIError, @@ -170,6 +171,8 @@ def _get_response(cls, config: dict, eval_only=False, use_cache=True): else: break if isinstance(e, Timeout): + if "request_timeout" in config: + raise request_timeout <<= 1 request_timeout = min(request_timeout, time_left) sleep(cls.retry_time) @@ -180,11 +183,16 @@ def _get_response(cls, config: dict, eval_only=False, use_cache=True): config["engine"] = config.pop("model").replace("gpt-3.5-turbo", "gpt-35-turbo") else: raise + else: + if use_cache: + cls._cache.set(key, response) + return response logger.warning( f"Failed to get response from openai api due to getting RateLimitError or Timeout for {cls.retry_timeout} seconds." ) response = -1 - cls._cache.set(key, response) + if use_cache: + cls._cache.set(key, response) return response @classmethod diff --git a/flaml/version.py b/flaml/version.py index a955fdae12..bc86c944fe 100644 --- a/flaml/version.py +++ b/flaml/version.py @@ -1 +1 @@ -__version__ = "1.2.1" +__version__ = "1.2.2" diff --git a/notebook/autogen_chatgpt.ipynb b/notebook/autogen_chatgpt.ipynb index 6ac607077b..e0016922da 100644 --- a/notebook/autogen_chatgpt.ipynb +++ b/notebook/autogen_chatgpt.ipynb @@ -21,9 +21,9 @@ "\n", "## Requirements\n", "\n", - "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the [openai] option:\n", + "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the [openai,blendsearch] option:\n", "```bash\n", - "pip install flaml[openai]==1.2.0\n", + "pip install flaml[openai,blendsearch]==1.2.1\n", "```" ] }, @@ -40,7 +40,7 @@ }, "outputs": [], "source": [ - "# %pip install flaml[openai]==1.2.0 datasets" + "# %pip install flaml[openai,blendsearch]==1.2.1 datasets" ] }, { diff --git a/notebook/autogen_openai.ipynb b/notebook/autogen_openai.ipynb index 8d1c0877e2..58e87f4707 100644 --- a/notebook/autogen_openai.ipynb +++ b/notebook/autogen_openai.ipynb @@ -21,9 +21,9 @@ "\n", "## Requirements\n", "\n", - "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the [openai] option:\n", + "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the [autogen,blendsearch] option:\n", "```bash\n", - "pip install flaml[openai]==1.2.0\n", + "pip install flaml[autogen,blendsearch]==1.2.1\n", "```" ] }, @@ -40,7 +40,7 @@ }, "outputs": [], "source": [ - "# %pip install flaml[openai]==1.2.0 datasets" + "# %pip install flaml[autogen,blendsearch]==1.2.1 datasets" ] }, { @@ -297,7 +297,13 @@ "from functools import partial\n", "from flaml.autogen.code_utils import eval_function_completions, generate_assertions\n", "\n", - "eval_with_generated_assertions = partial(eval_function_completions, assertions=generate_assertions)" + "eval_with_generated_assertions = partial(\n", + " eval_function_completions,\n", + " assertions=generate_assertions,\n", + " use_docker=False,\n", + " # Please set use_docker=True if you have docker available to run the generated code.\n", + " # Using docker is safer than running the generated code directly.\n", + ")\n" ] }, { diff --git a/notebook/research/autogen_code.ipynb b/notebook/research/autogen_code.ipynb index a796761eb2..29b3f3ae4e 100644 --- a/notebook/research/autogen_code.ipynb +++ b/notebook/research/autogen_code.ipynb @@ -19,9 +19,9 @@ "\n", "## Requirements\n", "\n", - "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the [openai] option:\n", + "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the [autogen] option:\n", "```bash\n", - "pip install flaml[openai]==1.2.0\n", + "pip install flaml[autogen]==1.2.1\n", "```" ] }, @@ -38,7 +38,7 @@ }, "outputs": [], "source": [ - "# %pip install flaml[openai]==1.2.0 datasets" + "# %pip install flaml[autogen]==1.2.1 datasets" ] }, { @@ -381,7 +381,7 @@ "success = 0\n", "for i, d in enumerate(data):\n", " response, cost_i, j = implement(d[\"definition\"], configs)\n", - " metrics = eval_function_completions(responses=[response], **d)\n", + " metrics = eval_function_completions(responses=[response], use_docker=False, **d)\n", " success += metrics[\"success\"]\n", " cost += cost_i\n", " print(f\"Example {i}, config {j}, success {success}\")\n", diff --git a/notebook/research/math_level5counting.ipynb b/notebook/research/math_level5counting.ipynb index 5f2ba0bca3..e7e7e04336 100644 --- a/notebook/research/math_level5counting.ipynb +++ b/notebook/research/math_level5counting.ipynb @@ -21,7 +21,7 @@ "\n", "FLAML requires `Python>=3.7`. To run this notebook example, please install flaml with the [openai] option:\n", "```bash\n", - "pip install flaml[openai]==1.2.0\n", + "pip install flaml[openai]==1.2.1\n", "```" ] }, @@ -38,7 +38,7 @@ }, "outputs": [], "source": [ - "# %pip install flaml[openai]==1.2.0 datasets" + "# %pip install flaml[openai]==1.2.1 datasets" ] }, { diff --git a/setup.py b/setup.py index 8a518b7c49..da3207478c 100644 --- a/setup.py +++ b/setup.py @@ -120,7 +120,8 @@ "pytorch-forecasting>=0.9.0", ], "benchmark": ["catboost>=0.26", "psutil==5.8.0", "xgboost==1.3.3"], - "openai": ["openai==0.27.4", "diskcache", "optuna==2.8.0"], + "openai": ["openai==0.27.4", "diskcache"], + "autogen": ["openai==0.27.4", "diskcache", "docker"], "synapse": ["joblibspark>=0.5.0", "optuna==2.8.0", "pyspark>=3.2.0"], }, classifiers=[ diff --git a/test/openai/test_completion.py b/test/openai/test_completion.py index 1299f537d9..6a86b218af 100644 --- a/test/openai/test_completion.py +++ b/test/openai/test_completion.py @@ -8,8 +8,70 @@ eval_function_completions, generate_assertions, implement, + generate_code, + extract_code, + improve_function, + improve_code, + execute_code, ) -from flaml.autogen.math_utils import eval_math_responses +from flaml.autogen.math_utils import eval_math_responses, solve_problem + + +@pytest.mark.skipif( + sys.platform in ["darwin", "win32"], + reason="do not run on MacOS or windows", +) +def test_execute_code(): + try: + import docker + except ImportError as exc: + print(exc) + return + exitcode, msg = execute_code("print('hello world')", filename="tmp/codetest.py") + assert exitcode == 0 and msg == b"hello world\n", msg + # read a file + print(execute_code("with open('tmp/codetest.py', 'r') as f: a=f.read()")) + # create a file + print(execute_code("with open('tmp/codetest.py', 'w') as f: f.write('b=1')", work_dir="test/openai/my_tmp")) + # execute code in a file + print(execute_code(filename="tmp/codetest.py")) + # execute code for assertion error + exit_code, msg = execute_code("assert 1==2") + assert exit_code, msg + # execute code which takes a long time + exit_code, error = execute_code("import time; time.sleep(2)", timeout=1) + assert exit_code and error == "Timeout" + exit_code, error = execute_code("import time; time.sleep(2)", timeout=1, use_docker=False) + assert exit_code and error == "Timeout" + + +def test_improve(): + try: + import openai + import diskcache + except ImportError as exc: + print(exc) + return + improved, _ = improve_function( + "flaml/autogen/math_utils.py", + "solve_problem", + "Solve math problems accurately, by avoiding calculation errors and reduce reasoning errors.", + ) + with open("test/openai/math_utils.py.improved", "w") as f: + f.write(improved) + suggestion, _ = improve_code( + ["flaml/autogen/code_utils.py", "flaml/autogen/math_utils.py"], + "leverage generative AI smartly and cost-effectively", + ) + print(suggestion) + improvement, cost = improve_code( + ["flaml/autogen/code_utils.py", "flaml/autogen/math_utils.py"], + "leverage generative AI smartly and cost-effectively", + suggest_only=False, + ) + print(cost) + with open("test/openai/suggested_improvement.txt", "w") as f: + f.write(improvement) def test_nocontext(): @@ -19,8 +81,59 @@ def test_nocontext(): except ImportError as exc: print(exc) return - response = oai.Completion.create(model="text-ada-001", prompt="1+1=", max_tokens=1) + response = oai.Completion.create( + model="text-ada-001", prompt="1+1=", max_tokens=1, use_cache=False, request_timeout=10 + ) print(response) + code, _ = generate_code( + model="gpt-3.5-turbo", + messages=[ + { + "role": "system", + "content": "You want to become a better assistant by learning new skills and improving your existing ones.", + }, + { + "role": "user", + "content": "Write reusable code to use web scraping to get information from websites.", + }, + ], + ) + print(code) + # test extract_code from markdown + code = extract_code( + """ +Example: +``` +print("hello extract code") +``` +""" + ) + print(code) + + code = extract_code( + """ +Example: +```python +def scrape(url): + import requests + from bs4 import BeautifulSoup + response = requests.get(url) + soup = BeautifulSoup(response.text, "html.parser") + title = soup.find("title").text + text = soup.find("div", {"id": "bodyContent"}).text + return title, text +``` +Test: +```python +url = "https://en.wikipedia.org/wiki/Web_scraping" +title, text = scrape(url) +print(f"Title: {title}") +print(f"Text: {text}") +""" + ) + print(code) + solution, cost = solve_problem("1+1=") + print(solution, cost) @pytest.mark.skipif( @@ -102,6 +215,7 @@ def test_humaneval(num_samples=1): inference_budget=0.002, optimization_budget=2, num_samples=num_samples, + # logging_level=logging.INFO, prompt=[ "{definition}", "# Python 3{definition}", @@ -175,12 +289,10 @@ def test_math(num_samples=-1): } test_data_sample = test_data[0:3] result = oai.ChatCompletion.test(test_data_sample, vanilla_config, eval_math_responses) - test_data_sample = test_data[3:6] result = oai.ChatCompletion.test( test_data_sample, vanilla_config, eval_math_responses, - use_cache=False, agg_method="median", ) @@ -194,14 +306,12 @@ def my_average(results): test_data_sample, vanilla_config, eval_math_responses, - use_cache=False, agg_method=my_median, ) result = oai.ChatCompletion.test( test_data_sample, vanilla_config, eval_math_responses, - use_cache=False, agg_method={ "expected_success": my_median, "success": my_average, @@ -231,9 +341,11 @@ def my_average(results): if __name__ == "__main__": - import openai + # import openai - openai.api_key_path = "test/openai/key.txt" - test_nocontext() - test_humaneval(1) - test_math(1) + # openai.api_key_path = "test/openai/key.txt" + test_execute_code() + # test_improve() + # test_nocontext() + # test_humaneval(1) + # test_math(1) diff --git a/website/docs/Examples/AutoGen-OpenAI.md b/website/docs/Examples/AutoGen-OpenAI.md index 037770eee4..4f83cd99fb 100644 --- a/website/docs/Examples/AutoGen-OpenAI.md +++ b/website/docs/Examples/AutoGen-OpenAI.md @@ -5,9 +5,9 @@ In this example, we will tune several hyperparameters for the OpenAI's completio ### Prerequisites -Install the [openai] option. The OpenAI integration is in preview. +Install the [autogen,blendsearch] option. The OpenAI integration is in preview. ```bash -pip install "flaml[openai]==1.2.0" +pip install "flaml[autogen,blendsearch]==1.2.1 datasets" ``` Setup your OpenAI key: diff --git a/website/docs/Use-Cases/Auto-Generation.md b/website/docs/Use-Cases/Auto-Generation.md index 56842e0dce..8f51a1fa33 100644 --- a/website/docs/Use-Cases/Auto-Generation.md +++ b/website/docs/Use-Cases/Auto-Generation.md @@ -126,12 +126,29 @@ response = oai.Completion.create(problme=problem, prompt="{problem} Solve the pr ``` ## Other utilities -`flaml.oai.Completion` also offers some additional utilities, such as: + +### Completion + +[`flaml.oai.Completion`](../reference/autogen/oai/completion) also offers some additional utilities, such as: - a [`cost`](../reference/autogen/oai/completion#cost) function to calculate the cost of an API call. - a [`test`](../reference/autogen/oai/completion#test) function to conveniently evaluate the configuration over test data. - a [`extract_text`](../reference/autogen/oai/completion#extract_text) function to extract the text from a completion or chat response. - a [`set_cache`](../reference/autogen/oai/completion#extract_text) function to set the seed and cache path. The caching is introduced in the section above, with the benefit of cost saving, reproducibility, and controlled randomness. -Interested in trying it yourself? Please check the following notebook examples: +### Code + +[`flaml.autogen.code_utils`](../reference/autogen/code_utils) offers code-related utilities, such as: +- a [`improve_code`](../reference/autogen/code_utils#improve_code) function to improve code for a given objective. +- a [`generate_assertions`](../reference/autogen/code_utils#generate_assertions) function to generate assertion statements from function signature and docstr. +- a [`implement`](../reference/autogen/code_utils#implement) function to implement a function from a definition. +- a [`eval_function_completions`](../reference/autogen/code_utils#eval_function_completions) function to evaluate the success of a function completion task, or select a response from a list of responses using generated assertions. + +### Math + +[`flaml.autogen.math_utils`](../reference/autogen/math_utils) offers utilities for math problems, such as: +- a [eval_math_responses](../reference/autogen/math_utils#eval_math_responses) function to select a response using voting, and check if the final answer is correct if the canonical solution is provided. + + +*Interested in trying it yourself? Please check the following notebook examples:* * [Optimize for Code Gen](https://github.com/microsoft/FLAML/blob/main/notebook/autogen_openai.ipynb) * [Optimize for Math](https://github.com/microsoft/FLAML/blob/main/notebook/autogen_chatgpt.ipynb)