Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
samholt committed Apr 17, 2024
1 parent 1b1672f commit 22c04d9
Show file tree
Hide file tree
Showing 12 changed files with 216 additions and 104 deletions.
2 changes: 1 addition & 1 deletion examples/generate_codebase_simple_blackjack.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from l2mac import generate_codebase

codebase: dict = generate_codebase("Create a cli blackjack game")
codebase: dict = generate_codebase("Create a cli blackjack game", steps=2)

print(codebase) # it will print the codebase (repo) complete with all the files as a dictionary
15 changes: 11 additions & 4 deletions l2mac/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,20 @@ def l2mac_internal(prompt_task: str, domain: Domain, run_tests: bool, project_na
env.set_seed(seed=config.setup.seed)
env.reset()
l2mac = L2MAC(prompt_task=prompt_task, env=env, config=config, logger=logger, rate_limiter=rate_limiter)
output_file_store = l2mac.run()
output_file_store = l2mac.run(steps=steps)
return output_file_store


def generate_codebase(*args, **kwargs):
kwargs['domain'] = Domain.codebase
return run_l2mac(*args, **kwargs)
def generate_codebase(prompt_task: str,
run_tests: bool = False,
project_name: Optional[str] = None,
steps: int = 10,
prompt_program: Optional[str] = None,
prompts_file_path: Optional[str] = None,
tools_enabled: Optional[str] = None,
debugging_level: DebuggingLevel = DebuggingLevel.info,
init_config: bool = False):
return run_l2mac(prompt_task=prompt_task, domain=Domain.codebase, run_tests=run_tests, project_name=project_name, steps=steps, prompt_program=prompt_program, prompts_file_path=prompts_file_path, tools_enabled=tools_enabled, debugging_level=debugging_level, init_config=init_config)

def generate_book(*args, **kwargs):
kwargs['domain'] = Domain.book
Expand Down
63 changes: 30 additions & 33 deletions l2mac/l2mac.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import openai
from l2mac.llm_providers.general import get_llm_config, get_model_max_tokens, chat_completion_rl
from l2mac.utils.l2mac import hash_messages, clean_string, detect_cycles
from l2mac.tools.write import write_files_from_dict
from l2mac.tools.utils import write_files_from_dict
from l2mac.tools.core import function_definition_list_factory, process_functions_into_function_names, available_functions_factory
from l2mac.llm_providers.openai import num_tokens_consumed_by_chat_request
from l2mac.llm_providers.utils import pretty_print_chat_messages
Expand All @@ -22,6 +22,7 @@ def __init__(self, prompt_task, env, config, logger, rate_limiter):
self.seed_value = None
self.logger = logger
self.rate_limiter = rate_limiter
self.name = 'L2MAC'
self.reset()

def seed(self, seed_value):
Expand All @@ -34,7 +35,6 @@ def get_llm_config(self):


def reset(self):
self.name = 'L2MAC'
self.load_from_checkpoint = ''
self.replay_llm_responses_path = ''
self.replay_llm_responses_path_index = 0
Expand Down Expand Up @@ -117,7 +117,7 @@ def save_agent_state(self, messages, beginning_step=''):
with open(path, 'w') as f:
json.dump(data_to_save, f)

def get_llm_response(self, messages, max_tokens=None):
def get_llm_response(self, messages, max_tokens=None, tool_choice='auto'):
self.print_dialog(messages)
self.save_agent_state(messages)
llm_config = self.get_llm_config()
Expand Down Expand Up @@ -147,10 +147,11 @@ def get_llm_response(self, messages, max_tokens=None):
# else:
# self.message_hash_same_increase_temperature = 0
# self.message_hash = message_hash
llm_config['functions'] = self.functions
if messages[-1].get('function_call'):
llm_config['function_call'] = messages[-1]['function_call']
del(messages[-1]['function_call'])
llm_config['tools'] = self.functions
if tool_choice is not None:
llm_config['tool_choice'] = {"type": "function", "function": {"name": tool_choice}}
else:
llm_config['tool_choice'] = 'none'
if self.replay_llm_responses_path:
with open(self.replay_llm_responses_path, 'r') as f:
responses = json.load(f)
Expand All @@ -175,8 +176,8 @@ def get_llm_response(self, messages, max_tokens=None):
self.logger.info("Error:", e.__dict__) # or use a logging framework
raise e
message_response = response["choices"][0]["message"]
if not message_response.get('content'):
message_response['content'] = None
# if not message_response.get('content'):
# message_response['content'] = None
self.print_dialog([message_response], response_msg=True)
return message_response

Expand All @@ -185,10 +186,10 @@ def get_function_names_as_str(self):
return ', '.join([f'`{fn}`'for fn in fns])


def run(self, state=''):
return self._run(state)
def run(self, steps: int = 10):
return self._run(steps=steps)

def _run(self, state=''):
def _run(self, steps: int = 10):
self.reset()
if not self.load_from_checkpoint:
self.meta_messages = [self.system_message]
Expand Down Expand Up @@ -225,37 +226,38 @@ def _run(self, state=''):
{task_description}
```
Understand the problem, by creating an extremely detailed step-by-step plan, where each step is long (multiple sentences) and in total includes every single feature requirement specified above, feel free to copy directly from it. Use no more than 10 steps in the plan. Create additional tests, checks and evaluation at each step when applicable to help make an excellent code implementation, where all the code is fully functional. Use best software design practices, and you can output large amounts of code at once. Please include a last sentence to create and run tests when implementing or writing code in that same step. You will receive no human input at any stage, so you cannot use a human to test. Only create a detailed plan to begin with, which includes designing and running tests to check that they all pass. Please be sure to include all of the specified feature requirements in the following plan.
Understand the problem, by creating an extremely detailed step-by-step plan, where each step is long (multiple sentences) and in total includes every single feature requirement specified above, feel free to copy directly from it. Use no more than {steps} steps in the plan. Create additional tests, checks and evaluation at each step when applicable to help make an excellent code implementation, where all the code is fully functional. Use best software design practices, and you can output large amounts of code at once. Please include a last sentence to create and run tests when implementing or writing code in that same step. You will receive no human input at any stage, so you cannot use a human to test. Only create a detailed plan to begin with, which includes designing and running tests to check that they all pass. Please be sure to include all of the specified feature requirements in the following plan.
"""
self.meta_messages.append({"role": "user", "content": first_message, "function_call": {"name": "provide_detailed_sub_task_steps_for_sub_agents"}})
self.meta_messages.append({"role": "user", "content": first_message})
steps = []
# Loop until we get a multi-step plan, as sometimes the first plan is not multi-step, and only a single step.
max_reflections = 1
current_reflection = 0
current_dialog = deepcopy(self.meta_messages)
while len(steps) <= 50 and current_reflection < max_reflections:
current_reflection += 1
initial_response_message = self.get_llm_response(current_dialog)
initial_response_message = self.get_llm_response(current_dialog, tool_choice='provide_detailed_sub_task_steps_for_sub_agents')
current_dialog.append(initial_response_message)
current_dialog.append({"role": "user", "content": f"""
Please reflect on the plan, and increase the number of generated steps to that of 100 or so very detailed steps that include all the feature requirements.
"""})
# Could reflect and improve plan etc a few times here.
function_name = initial_response_message["function_call"]["name"]
function_response = initial_response_message['tool_calls'][0]["function"]
function_name = function_response["name"]
try:
function_args = json.loads(initial_response_message["function_call"]["arguments"])
function_args = json.loads(function_response["arguments"])
except json.decoder.JSONDecodeError:
try:
function_args = json.loads(initial_response_message["function_call"]["arguments"].replace('\n', ''))
function_args = json.loads(function_response["arguments"].replace('\n', ''))
except json.decoder.JSONDecodeError:
try:
function_args = json.loads(initial_response_message["function_call"]["arguments"] + '"]}')
function_args = json.loads(function_response["arguments"] + '"]}')
except json.decoder.JSONDecodeError:
try:
function_args = json.loads(initial_response_message["function_call"]["arguments"] + '"]}')
function_args = json.loads(function_response["arguments"] + '"]}')
except json.decoder.JSONDecodeError:
try:
function_args = json.loads(initial_response_message["function_call"]["arguments"] + ']}')
function_args = json.loads(function_response["arguments"] + ']}')
except Exception as e:
print(e)
fuction_to_call = available_functions_factory()[function_name]
Expand All @@ -264,7 +266,7 @@ def _run(self, state=''):
# self.base_dialog = deepcopy(current_dialog)
self.base_dialog = deepcopy([self.system_message, {"role": "user", "content": first_message}])
# Remove provide_detailed_sub_task_steps_for_sub_agents function from functions list
self.functions = [function for function in self.functions if function['name'] != 'provide_detailed_sub_task_steps_for_sub_agents']
self.functions = [tool for tool in self.functions if tool['function']['name'] != 'provide_detailed_sub_task_steps_for_sub_agents']
previous_step_output_summary = ""
# errors_df_l = []
# count_change_times = 0
Expand Down Expand Up @@ -295,10 +297,10 @@ def _run(self, state=''):
# self.sub_messages = self.sub_messages[:-1]
self.sub_messages.pop(3)
self.sub_messages.append({"role": "user", "content": f"""
You have exhausted your context window. Reflect on your progress. Provide a short concise response, of two sentences maximum, this will be used to restart this step from the beginning without the previous messages.""", "function_call": 'none'})
You have exhausted your context window. Reflect on your progress. Provide a short concise response, of two sentences maximum, this will be used to restart this step from the beginning without the previous messages."""})
# self.sub_messages.append({"role": "user", "content": f"""
# You have exhausted your context window. Please state only which files are necessary to view to complete this task, i.e. those files which the newly written files import from. Also reflect on your progress. Provide a short concise response, of two sentences maximum, this will be used to restart this step from the beginning without the previous messages.""", "function_call": 'none'})
response_message = self.get_llm_response(self.sub_messages)
response_message = self.get_llm_response(self.sub_messages, tool_choice=None)
summary_step_message = response_message['content']
# if 'maximum context' in e.args[0]:
self.re_tries += 1
Expand All @@ -321,7 +323,7 @@ def _run(self, state=''):
if 'status' in json.loads(function_return_message['content']) and json.loads(function_return_message['content'])['status'] == 'TASK_STEP_COMPLETE':
task_step_complete = True
self.sub_messages.append({"role": "user", "content": f"""
Please provide a one or two sentence summary of the output of this step, which is useful for the next step. Your response will be used when starting the next step without any of the previous messages.""", "function_call": 'none'})
Please provide a one or two sentence summary of the output of this step, which is useful for the next step. Your response will be used when starting the next step without any of the previous messages."""})
continue
self.sub_messages.append(function_return_message)
if 'name' in function_return_message and function_return_message['name'] == 'sub_task_step_complete' and json.loads(function_return_message['content'])['status'] == 'error':
Expand All @@ -346,11 +348,6 @@ def _run(self, state=''):
self.logger.info('[STEP COMPLETE] sub step completed')
self.logger.info('[TASK COMPLETE SUCCESSFULLY!!] All steps complete')
self.logger.info('')
if self.env.env_task_id == 'HumanEval':
benchmark_task_id = state['task_id'].split('/')[1]
write_files_from_dict(self.file_dict, base_dir=f'{self.folder_path}{self.name}/{benchmark_task_id}')
return self.file_dict
else:
write_files_from_dict(self.file_dict, base_dir=f'{self.folder_path}{self.name}')
self.save_agent_state(self.sub_messages)
return f'{self.folder_path}/{self.name}'
write_files_from_dict(self.file_dict, base_dir=f'{self.folder_path}{self.name}')
self.save_agent_state(self.sub_messages)
return f'{self.folder_path}/{self.name}'
3 changes: 3 additions & 0 deletions l2mac/llm_providers/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,15 +231,18 @@ def chat_completion_rl_inner(**kwargs):
kwargs.pop('_rate_limiter', None)
kwargs.pop('_rate_limiter', None)
kwargs.pop('stream', None)

t0 = perf_counter()
# if logger:
# logger.info(f"[{name}][OpenAI API Request] {kwargs}")
# pretty_print_chat_messages(kwargs['messages'])

if rate_limiter:
rate_limiter.consume(**kwargs)
print(kwargs)
response = client.chat.completions.create(**kwargs)
else:
print(kwargs)
response = client.chat.completions.create(**kwargs)
# if logger:
# logger.info(f"[{name}][OpenAI API Returned] Elapsed request time: {perf_counter() - t0}s | response: {response}")
Expand Down
12 changes: 6 additions & 6 deletions l2mac/llm_providers/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,15 +451,15 @@ def num_tokens_from_messages(messages, model="gpt-4-0613"):
return num_tokens


def num_tokens_from_functions(functions, model="gpt-3.5-turbo-0613"):
def num_tokens_from_functions(tools, model="gpt-3.5-turbo-0613"):
"""Return the number of tokens used by a list of functions."""
num_tokens = 0
for function in functions:
function_tokens = len(CL100K_ENCODER.encode(function['name']))
function_tokens += len(CL100K_ENCODER.encode(function['description']))
for tool in tools:
function_tokens = len(CL100K_ENCODER.encode(tool['function']['name']))
function_tokens += len(CL100K_ENCODER.encode(tool['function']['description']))

if 'parameters' in function:
parameters = function['parameters']
if 'parameters' in tool['function']:
parameters = tool['function']['parameters']
if 'properties' in parameters:
for propertiesKey in parameters['properties']:
function_tokens += len(CL100K_ENCODER.encode(propertiesKey))
Expand Down
9 changes: 5 additions & 4 deletions l2mac/llm_providers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,12 @@ def pretty_print_chat_messages(messages, num_tokens=None, max_tokens=None, logge
color = COLORS.get(role, COLORS["system"]) # Default to system color if role not found
formatted_role = role.capitalize()
content = msg['content']
if role == "assistant" and 'function_call' in msg:
if role == "assistant" and msg['tool_calls']:
formatted_role = "Function Call"
print(f"{color}[{formatted_role}] [{msg['function_call']['name']}] {msg['function_call']['arguments']}\033[0m") # Reset color at the end
if logger:
logger.info(f"[{formatted_role}] [{msg['function_call']['name']}] {msg['function_call']['arguments']}")
for tool_call in msg['tool_calls']:
print(f"{color}[{formatted_role}] [{tool_call['function']['name']}] {tool_call['function']['arguments']}\033[0m") # Reset color at the end
if logger:
logger.info(f"[{formatted_role}] [{tool_call['function']['name']}] {tool_call['function']['arguments']}")
else:
print(f"{color}[{formatted_role}] {content}\033[0m") # Reset color at the end
if logger:
Expand Down
8 changes: 7 additions & 1 deletion l2mac/tools/run_code.py → l2mac/tools/code_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from timeout_decorator import timeout
import xml.etree.ElementTree as ET
from pathlib import Path
from l2mac.tools.write import write_files_from_dict
import re
from l2mac.tools.utils import write_files_from_dict
from l2mac.tools.read import load_code_files_into_dict

def check_syntax_with_timeout(file_dict):
Expand Down Expand Up @@ -116,6 +117,11 @@ def pytest_code_base(file_dict, files_to_test=None):
print('')
return captured_output

def count_errors_in_syntax(syntax_output: str):
pattern = r".+:\d+:\d+: [E]\d+: .+"
errors = re.findall(pattern, syntax_output)
return len(errors)


def parse_and_print_junit_xml(file_path):
tree = ET.parse(file_path)
Expand Down
2 changes: 1 addition & 1 deletion l2mac/tools/control_unit.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List
import json
from l2mac.tools.run_code import check_syntax_with_timeout, check_pytest_with_timeout, count_errors_in_syntax
from l2mac.tools.code_analysis import check_syntax_with_timeout, check_pytest_with_timeout, count_errors_in_syntax

def provide_detailed_sub_task_steps_for_sub_agents(steps: List[str] = []):
return steps
Expand Down
24 changes: 17 additions & 7 deletions l2mac/tools/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from l2mac.tools.control_unit import provide_detailed_sub_task_steps_for_sub_agents, check_sub_task_step_complete
from l2mac.tools.read import view_files, list_files
from l2mac.tools.write import write_files, delete_files
from l2mac.tools.run_code import run_python_file, pytest_files
from l2mac.tools.code_analysis import run_python_file, pytest_files



Expand All @@ -23,6 +23,16 @@ def available_functions_factory():
return available_functions

def function_definition_list_factory():
# Following OpenAI's updated format for function definitions
functions = function_definition_list_factory_internal()
tools = []
for function in functions:
tools.append({"type": "function",
"function": function})
return tools


def function_definition_list_factory_internal():
functions = [
{
"name": "provide_detailed_sub_task_steps_for_sub_agents",
Expand Down Expand Up @@ -163,17 +173,17 @@ def function_definition_list_factory():
},]
return functions

def process_functions_into_function_names(functions: List[dict] = []):
def process_functions_into_function_names(tools: List[dict] = []):
function_names = []
for function in functions:
function_names.append(function['name'])
for tool in tools:
function_names.append(tool['function']['name'])
return function_names


def process_function_call_and_return_message(message_function_call: dict, file_dict: dict, logger=None, functions=[], enable_tests=True):
def process_function_call_and_return_message(message_function_call: dict, file_dict: dict, logger=None, tools=[], enable_tests=True):
function_name = ''
if len(functions) >= 1:
functions_available_keys = process_functions_into_function_names(functions)
if len(tools) >= 1:
functions_available_keys = process_functions_into_function_names(tools)
else:
functions_available_keys = list(available_functions_factory().keys())
try:
Expand Down
Loading

0 comments on commit 22c04d9

Please sign in to comment.