Skip to content

Commit

Permalink
Fix moa committing issues, and integrate with benchmark harness
Browse files Browse the repository at this point in the history
  • Loading branch information
gembancud committed Dec 18, 2024
1 parent f05e021 commit 49eb1d2
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 6 deletions.
11 changes: 10 additions & 1 deletion aider/coders/mixture_of_architects_coder.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def get_architect_response(self, architect, current_user_message):
repo=self.repo,
map_tokens=self.repo_map.max_map_tokens if self.repo_map else 0,
summarize_from_coder=False,
stream=True,
stream=self.stream,
)
ask_coder.auto_commits = self.auto_commits
ask_coder.gpt_prompts = MixturePrompts()
Expand Down Expand Up @@ -195,6 +195,10 @@ def run_discussion_round(self, user_message):
self.io.rule()
finally:
self.io.tool_output("Discussion round complete.")
# Yes is proxy for auto running code, As proxy for benchmarking
# TODO: Replace with a better testing strategy
if self.io.yes:
self.run_coding_phase(user_message)

def preproc_user_input(self, inp):
if not inp:
Expand Down Expand Up @@ -308,14 +312,19 @@ def run_coding_phase(self, message):
kwargs["cache_prompts"] = False
kwargs["num_cache_warming_pings"] = 0
kwargs["summarize_from_coder"] = False
kwargs["stream"] = self.stream
kwargs["auto_commits"] = self.auto_commits

new_kwargs = dict(io=self.io)
new_kwargs.update(kwargs)

editor_coder = Coder.create(**new_kwargs)
editor_coder.abs_fnames = set(self.abs_fnames)
editor_coder.abs_read_only_fnames = set(self.abs_read_only_fnames)
editor_coder.auto_commits = self.auto_commits
editor_coder.cur_messages = []
editor_coder.done_messages = []
editor_coder.repo = self.repo

if self.verbose:
editor_coder.show_announcements()
Expand Down
31 changes: 26 additions & 5 deletions benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,9 @@ def main(
num_ctx: Optional[int] = typer.Option(
None, "--num-ctx", help="Override model context window size"
),
moa: Optional[List[str]] = typer.Option(
None, "--moa", help="List of additional architect models"
),
exercises_dir: str = typer.Option(
EXERCISES_DIR_DEFAULT, "--exercises-dir", help="Directory with exercise files"
),
Expand Down Expand Up @@ -294,6 +297,7 @@ def main(
editor_edit_format,
num_ctx,
sleep,
moa,
)

all_results.append(results)
Expand All @@ -317,6 +321,9 @@ def main(
max_apply_update_errors,
editor_model,
editor_edit_format,
num_ctx,
sleep,
moa,
)
all_results = run_test_threaded.gather(tqdm=True)

Expand Down Expand Up @@ -577,6 +584,7 @@ def run_test_real(
editor_edit_format,
num_ctx=None,
sleep=0,
moa=None,
):
if not os.path.isdir(testdir):
print("Not a dir:", testdir)
Expand Down Expand Up @@ -651,10 +659,10 @@ def run_test_real(
show_fnames = ",".join(map(str, fnames))
print("fnames:", show_fnames)

coder = Coder.create(
main_model,
edit_format,
io,
coder_kwargs = dict(
main_model=main_model,
edit_format=edit_format,
io=io,
fnames=fnames,
use_git=False,
stream=False,
Expand All @@ -663,6 +671,14 @@ def run_test_real(
cache_prompts=True,
suggest_shell_commands=False,
)

# Add architect_models if moa parameter provided
if moa:
# moa is already a list of models
architect_models = [models.Model(m) for m in moa]
coder_kwargs["architect_models"] = architect_models

coder = Coder.create(**coder_kwargs)
coder.max_apply_update_errors = max_apply_update_errors
coder.show_announcements()

Expand Down Expand Up @@ -730,10 +746,15 @@ def run_test_real(
instructions = errors
instructions += prompts.test_failures.format(file_list=file_list)

# For MOA Benchmark, add the MOA models to the model name
model_name = main_model.name
if moa:
model_name = f"{model_name}, {', '.join(moa)}"

results = dict(
testdir=str(testdir),
testcase=testdir.name,
model=main_model.name,
model=model_name,
edit_format=edit_format,
tests_outcomes=test_outcomes,
cost=coder.total_cost,
Expand Down

0 comments on commit 49eb1d2

Please sign in to comment.