diff --git a/l2mac/core.py b/l2mac/core.py index 7c021a08..8cdf0501 100644 --- a/l2mac/core.py +++ b/l2mac/core.py @@ -9,11 +9,11 @@ from l2mac.l2mac import L2MAC from l2mac.llm_providers.general import setup_chat_rate_limiter from l2mac.llm_providers.rate_limiter import ChatRateLimiter +from l2mac.prompts.load_prompts import L2MACPrompts, get_l2mac_prompts from l2mac.utils.logging import create_logger_in_process, generate_log_file_path from l2mac.utils.run import ( DebuggingLevel, Domain, - get_l2mac_prompts, load_prompt_program, seed_all, to_dotdict, @@ -68,7 +68,7 @@ def run_l2mac( # Process inputs if prompt_program is not None: prompt_program = load_prompt_program(prompt_program) - get_l2mac_prompts(prompts_file_path) + l2mac_prompts = get_l2mac_prompts(prompts_file_path, domain) config = load_config() log_path = generate_log_file_path(__file__, log_folder=config.setup.log_dir, config=config) config.setup.log_path = log_path @@ -101,6 +101,7 @@ def run_l2mac( config, rate_limiter, wandb, + l2mac_prompts, logger, ) else: @@ -118,6 +119,7 @@ def run_l2mac( config, rate_limiter, wandb, + l2mac_prompts, logger, ) except Exception as e: @@ -144,6 +146,7 @@ def l2mac_internal( config: L2MACConfig, rate_limiter: ChatRateLimiter, wandb: Optional[WandbConfig], + l2mac_prompts: L2MACPrompts, logger=None, ): env = get_env(domain=domain, config=config, logger=logger, seed=config.setup.seed) @@ -155,6 +158,7 @@ def l2mac_internal( config=config, logger=logger, rate_limiter=rate_limiter, + l2mac_prompts=l2mac_prompts, run_tests=run_tests, project_name=project_name, prompt_program=prompt_program, @@ -192,9 +196,54 @@ def generate_codebase( ) -def generate_book(*args, **kwargs): - kwargs["domain"] = Domain.book - return run_l2mac(*args, **kwargs) +def generate_book( + prompt_task: str, + run_tests: bool = False, + project_name: Optional[str] = None, + steps: int = 10, + prompt_program: Optional[str] = None, + prompts_file_path: Optional[str] = None, + tools_enabled: Optional[str] = None, + debugging_level: DebuggingLevel = DebuggingLevel.info, + init_config: bool = False, +): + return run_l2mac( + prompt_task=prompt_task, + domain=Domain.book, + run_tests=run_tests, + project_name=project_name, + steps=steps, + prompt_program=prompt_program, + prompts_file_path=prompts_file_path, + tools_enabled=tools_enabled, + debugging_level=debugging_level, + init_config=init_config, + ) + + +def generate_custom( + prompt_task: str, + run_tests: bool = False, + project_name: Optional[str] = None, + steps: int = 10, + prompt_program: Optional[str] = None, + prompts_file_path: Optional[str] = None, + tools_enabled: Optional[str] = None, + debugging_level: DebuggingLevel = DebuggingLevel.info, + init_config: bool = False, +): + return run_l2mac( + prompt_task=prompt_task, + domain=Domain.custom, + run_tests=run_tests, + project_name=project_name, + steps=steps, + prompt_program=prompt_program, + prompts_file_path=prompts_file_path, + tools_enabled=tools_enabled, + debugging_level=debugging_level, + init_config=init_config, + ) if __name__ == "__main__": diff --git a/l2mac/envs/__init__.py b/l2mac/envs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/l2mac/l2mac.py b/l2mac/l2mac.py index 5bfb473c..3946b308 100644 --- a/l2mac/l2mac.py +++ b/l2mac/l2mac.py @@ -9,13 +9,17 @@ import openai from openai import APIError +from l2mac.config import L2MACConfig +from l2mac.envs.general import Environment from l2mac.llm_providers.general import ( chat_completion_rl, get_llm_config, get_model_max_tokens, ) from l2mac.llm_providers.openai import num_tokens_consumed_by_chat_request +from l2mac.llm_providers.rate_limiter import ChatRateLimiter from l2mac.llm_providers.utils import pretty_print_chat_messages +from l2mac.prompts.load_prompts import L2MACPrompts from l2mac.tools.core import ( available_functions_factory, function_definition_list_factory, @@ -30,11 +34,12 @@ class L2MAC: def __init__( self, - prompt_task, - env, - config, - logger, - rate_limiter, + prompt_task: str, + env: Environment, + config: L2MACConfig, + logger: object, + rate_limiter: ChatRateLimiter, + l2mac_prompts: L2MACPrompts, run_tests: bool = True, project_name: Optional[str] = None, steps: int = 10, @@ -50,6 +55,7 @@ def __init__( self.seed_value = None self.logger = logger self.rate_limiter = rate_limiter + self.l2mac_prompts = l2mac_prompts self.name = "L2MAC" self.run_tests = run_tests self.project_name = project_name @@ -110,8 +116,7 @@ def reset(self): self.functions = [ tool for tool in self.functions if tool["function"]["name"] in self.tools_enabled.split(",") ] - if self.env.env_name: - system_message = "" + system_message = self.l2mac_prompts.system self.system_message = {"role": "system", "content": system_message} def print_dialog(self, messages, response_msg=False): diff --git a/l2mac/llm_providers/__init__.py b/l2mac/llm_providers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/l2mac/prompts/__init__.py b/l2mac/prompts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/l2mac/prompts/codebase.yaml b/l2mac/prompts/codebase.yaml index 0ff068ce..ca20d720 100644 --- a/l2mac/prompts/codebase.yaml +++ b/l2mac/prompts/codebase.yaml @@ -1,56 +1,55 @@ -prompts: - system: | - Objective: Write code for a large system design task. - Please note that the code should be fully functional. No placeholders. - Only use the functions you have been provided with. - Only use the `write_files` to output code. - - You must act autonomously and you will receive no human input at any stage. You have to return as output the complete code for completing this task, and correctly incorporate it into the existing code base. - You always write out the whole file contents. You always indent code with tabs. - Please always view the files before writing to them, to make sure you are writing to the correct files. - When writing a test, make the filename start with the prefix 'test_'. - - Provide the minimal code necessary to achieve the task conditioned on the existing generated code---including changing the existing generated code. - - You cannot visualize any graphical output. You exist within a Actor Model machine, and when you list out steps, each step will be taken by a new separate sub-ChatGPT model. When you list out a sub-task steps, you can optionally specify the sub-task validation to check that it has been completed successfully. - - You cannot use any databases as none are setup in the local environment, instead mock a database with an in memory dictionary to store data. No data saved to disk will persist between steps or write operations. - - If a test is failing the error could be the code, or the test is incorrect, so feel free to overwrite and change the tests when they are incorrect, to make all tests pass. - - Use the functions provided. When calling functions only provide a RFC8259 compliant JSON request following this format without deviation. - first_message: | - You will get instructions for code to write. - First lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose. - Do not comment on what every file does. Please note that the code should be fully functional. No placeholders. - - You will start with the "entrypoint" file, then go to the ones that are imported by that file, and so on. - Please note that the code should be fully functional. No placeholders. - - Follow a language and framework appropriate best practice file naming convention. - Make sure that files contain all imports, types etc. The code should be fully functional. Make sure that code in different files are compatible with each other. - When writing code if you are unsure, write a plausible implementation. - Include module dependency or package manager dependency definition file. - - Useful to know: - - For Python, you always create an appropriate requirements.txt file. - Always add a comment briefly describing the purpose of the function definition. - Add comments explaining very complex bits of logic. - Always follow the best practices for the requested languages for folder/file structure and how to package the project. - You can use any package and any other packages you wish to install. - You cannot use any databases as none are setup in the local environment, instead mock a database with an in memory dictionary to store data. No data saved to disk will persist between steps or write operations. - When writing a test, make the filename start with the prefix 'test_'. - When putting files in folders, always be sure to include a file called __init__.py where relevant, or put all files in the same working directory. Always prefer the most simplest approach. - Always add a readme on how to run the code, or a .sh file to run the code. - - Python toolbelt preferences: - - pytest - - dataclasses - - flask - - Objective:``` - {prompt_task} - ``` - - Understand the problem, by creating an extremely detailed step-by-step plan, where each step is long (multiple sentences) and in total includes every single feature requirement specified above, feel free to copy directly from it. Use no more than {steps} steps in the plan. Create additional tests, checks and evaluation at each step when applicable to help make an excellent code implementation, where all the code is fully functional. Use best software design practices, and you can output large amounts of code at once. Please include a last sentence to create and run tests when implementing or writing code in that same step. You will receive no human input at any stage, so you cannot use a human to test. Only create a detailed plan to begin with, which includes designing and running tests to check that they all pass. Please be sure to include all of the specified feature requirements in the following plan. \ No newline at end of file +system: | + Objective: Write code for a large system design task. + Please note that the code should be fully functional. No placeholders. + Only use the functions you have been provided with. + Only use the `write_files` to output code. + + You must act autonomously and you will receive no human input at any stage. You have to return as output the complete code for completing this task, and correctly incorporate it into the existing code base. + You always write out the whole file contents. You always indent code with tabs. + Please always view the files before writing to them, to make sure you are writing to the correct files. + When writing a test, make the filename start with the prefix 'test_'. + + Provide the minimal code necessary to achieve the task conditioned on the existing generated code---including changing the existing generated code. + + You cannot visualize any graphical output. You exist within a Actor Model machine, and when you list out steps, each step will be taken by a new separate sub-ChatGPT model. When you list out a sub-task steps, you can optionally specify the sub-task validation to check that it has been completed successfully. + + You cannot use any databases as none are setup in the local environment, instead mock a database with an in memory dictionary to store data. No data saved to disk will persist between steps or write operations. + + If a test is failing the error could be the code, or the test is incorrect, so feel free to overwrite and change the tests when they are incorrect, to make all tests pass. + + Use the functions provided. When calling functions only provide a RFC8259 compliant JSON request following this format without deviation. +first_message: | + You will get instructions for code to write. + First lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose. + Do not comment on what every file does. Please note that the code should be fully functional. No placeholders. + + You will start with the "entrypoint" file, then go to the ones that are imported by that file, and so on. + Please note that the code should be fully functional. No placeholders. + + Follow a language and framework appropriate best practice file naming convention. + Make sure that files contain all imports, types etc. The code should be fully functional. Make sure that code in different files are compatible with each other. + When writing code if you are unsure, write a plausible implementation. + Include module dependency or package manager dependency definition file. + + Useful to know: + + For Python, you always create an appropriate requirements.txt file. + Always add a comment briefly describing the purpose of the function definition. + Add comments explaining very complex bits of logic. + Always follow the best practices for the requested languages for folder/file structure and how to package the project. + You can use any package and any other packages you wish to install. + You cannot use any databases as none are setup in the local environment, instead mock a database with an in memory dictionary to store data. No data saved to disk will persist between steps or write operations. + When writing a test, make the filename start with the prefix 'test_'. + When putting files in folders, always be sure to include a file called __init__.py where relevant, or put all files in the same working directory. Always prefer the most simplest approach. + Always add a readme on how to run the code, or a .sh file to run the code. + + Python toolbelt preferences: + - pytest + - dataclasses + - flask + + Objective:``` + {prompt_task} + ``` + + Understand the problem, by creating an extremely detailed step-by-step plan, where each step is long (multiple sentences) and in total includes every single feature requirement specified above, feel free to copy directly from it. Use no more than {steps} steps in the plan. Create additional tests, checks and evaluation at each step when applicable to help make an excellent code implementation, where all the code is fully functional. Use best software design practices, and you can output large amounts of code at once. Please include a last sentence to create and run tests when implementing or writing code in that same step. You will receive no human input at any stage, so you cannot use a human to test. Only create a detailed plan to begin with, which includes designing and running tests to check that they all pass. Please be sure to include all of the specified feature requirements in the following plan. \ No newline at end of file diff --git a/l2mac/prompts/load_prompts.py b/l2mac/prompts/load_prompts.py new file mode 100644 index 00000000..b4fe1cdf --- /dev/null +++ b/l2mac/prompts/load_prompts.py @@ -0,0 +1,49 @@ +import os +from importlib import resources +from typing import Optional + +import yaml +from pydantic import BaseModel, ValidationError + +from l2mac.utils.run import Domain + + +class L2MACPrompts(BaseModel): + system: str + first_message: str + + +def get_l2mac_prompts(prompts_file_path: Optional[str], domain: Domain) -> L2MACPrompts: + """ + Loads the L2MAC prompts from a given file path. + + Args: + prompts_file_path Optional(str): The path to the L2MAC prompts file. + + Returns: + list: The loaded L2MAC prompts as L2MAC prompt objects + """ + if prompts_file_path is not None: + if os.path.isfile(prompts_file_path): + with open(prompts_file_path, "r") as file: + prompts_data = yaml.safe_load(file) + try: + return L2MACPrompts(**prompts_data) + except ValidationError as e: + print(f"Invalid prompts file at `{prompts_file_path}`:", e) + raise e + else: + raise FileNotFoundError(f"File not found at `prompts_file_path` of {prompts_file_path}") + elif domain.codebase: + prompts_file = "codebase.yaml" + elif domain.book: + prompts_file = "book.yaml" + elif domain.custom: + prompts_file = "custom.yaml" + with resources.open_text("l2mac.prompts", prompts_file) as file: + prompts_data = yaml.safe_load(file) + try: + return L2MACPrompts(**prompts_data) + except ValidationError as e: + print(f"Invalid prompts file at `{prompts_file_path}`:", e) + raise e diff --git a/l2mac/utils/__init__.py b/l2mac/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/l2mac/utils/run.py b/l2mac/utils/run.py index a9304b73..7b3516ae 100644 --- a/l2mac/utils/run.py +++ b/l2mac/utils/run.py @@ -1,9 +1,7 @@ import ast -import os import random import time from enum import Enum -from typing import Optional import numpy as np from pydantic import BaseModel @@ -96,34 +94,3 @@ def load_prompt_program(input_string: str): except (SyntaxError, ValueError) as e: print(f"Error reading or parsing `prompt_program` string encoded of {input_string} | Error: {e}") raise e - - -def get_l2mac_prompts(prompts_file_path: Optional[str], domain: Domain): - """ - Loads the L2MAC prompts from a given file path. - - Args: - prompts_file_path Optional(str): The path to the L2MAC prompts file. - - Returns: - list: The loaded L2MAC prompts as L2MAC prompt objects - """ - if prompts_file_path is not None: - if os.path.isfile(prompts_file_path): - pass - # try: - # with open(prompts_file_path, "r", encoding="utf-8") as file: - # # Read the file content and attempt to parse it - # file_content = file.read() - # return ast.literal_eval(file_content) - # except (SyntaxError, ValueError, IOError) as e: - # print(f"Error reading or parsing `prompts_file_path` file path of {prompts_file_path} | Error: {e}") - # raise e - else: - raise FileNotFoundError(f"File not found at `prompts_file_path` of {prompts_file_path}") - elif domain.codebase: - prompts_file_path = "prompts/codebase.yaml" - elif domain.book: - prompts_file_path = "prompts/book.yaml" - elif domain.custom: - prompts_file_path = "prompts/custom.yaml"