Fix moa committing issues, and integrate with benchmark harness

Aider-AI · Dec 18, 2024 · 49eb1d2 · 49eb1d2
1 parent f05e021
commit 49eb1d2
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 6 deletions.
diff --git a/aider/coders/mixture_of_architects_coder.py b/aider/coders/mixture_of_architects_coder.py
@@ -52,7 +52,7 @@ def get_architect_response(self, architect, current_user_message):
                 repo=self.repo,
                 map_tokens=self.repo_map.max_map_tokens if self.repo_map else 0,
                 summarize_from_coder=False,
-                stream=True,
+                stream=self.stream,
             )
             ask_coder.auto_commits = self.auto_commits
             ask_coder.gpt_prompts = MixturePrompts()
@@ -195,6 +195,10 @@ def run_discussion_round(self, user_message):
             self.io.rule()
         finally:
             self.io.tool_output("Discussion round complete.")
+        # Yes is proxy for auto running code, As proxy for benchmarking
+        # TODO: Replace with a better testing strategy
+        if self.io.yes:
+            self.run_coding_phase(user_message)
 
     def preproc_user_input(self, inp):
         if not inp:
@@ -308,14 +312,19 @@ def run_coding_phase(self, message):
         kwargs["cache_prompts"] = False
         kwargs["num_cache_warming_pings"] = 0
         kwargs["summarize_from_coder"] = False
+        kwargs["stream"] = self.stream
+        kwargs["auto_commits"] = self.auto_commits
 
         new_kwargs = dict(io=self.io)
         new_kwargs.update(kwargs)
 
         editor_coder = Coder.create(**new_kwargs)
+        editor_coder.abs_fnames = set(self.abs_fnames)
+        editor_coder.abs_read_only_fnames = set(self.abs_read_only_fnames)
         editor_coder.auto_commits = self.auto_commits
         editor_coder.cur_messages = []
         editor_coder.done_messages = []
+        editor_coder.repo = self.repo
 
         if self.verbose:
             editor_coder.show_announcements()

diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
@@ -195,6 +195,9 @@ def main(
     num_ctx: Optional[int] = typer.Option(
         None, "--num-ctx", help="Override model context window size"
     ),
+    moa: Optional[List[str]] = typer.Option(
+        None, "--moa", help="List of additional architect models"
+    ),
     exercises_dir: str = typer.Option(
         EXERCISES_DIR_DEFAULT, "--exercises-dir", help="Directory with exercise files"
     ),
@@ -294,6 +297,7 @@ def main(
                 editor_edit_format,
                 num_ctx,
                 sleep,
+                moa,
             )
 
             all_results.append(results)
@@ -317,6 +321,9 @@ def main(
                 max_apply_update_errors,
                 editor_model,
                 editor_edit_format,
+                num_ctx,
+                sleep,
+                moa,
             )
         all_results = run_test_threaded.gather(tqdm=True)
 
@@ -577,6 +584,7 @@ def run_test_real(
     editor_edit_format,
     num_ctx=None,
     sleep=0,
+    moa=None,
 ):
     if not os.path.isdir(testdir):
         print("Not a dir:", testdir)
@@ -651,10 +659,10 @@ def run_test_real(
     show_fnames = ",".join(map(str, fnames))
     print("fnames:", show_fnames)
 
-    coder = Coder.create(
-        main_model,
-        edit_format,
-        io,
+    coder_kwargs = dict(
+        main_model=main_model,
+        edit_format=edit_format,
+        io=io,
         fnames=fnames,
         use_git=False,
         stream=False,
@@ -663,6 +671,14 @@ def run_test_real(
         cache_prompts=True,
         suggest_shell_commands=False,
     )
+
+    # Add architect_models if moa parameter provided
+    if moa:
+        # moa is already a list of models
+        architect_models = [models.Model(m) for m in moa]
+        coder_kwargs["architect_models"] = architect_models
+
+    coder = Coder.create(**coder_kwargs)
     coder.max_apply_update_errors = max_apply_update_errors
     coder.show_announcements()
 
@@ -730,10 +746,15 @@ def run_test_real(
         instructions = errors
         instructions += prompts.test_failures.format(file_list=file_list)
 
+    # For MOA Benchmark, add the MOA models to the model name 
+    model_name = main_model.name
+    if moa:
+        model_name = f"{model_name}, {', '.join(moa)}"
+
     results = dict(
         testdir=str(testdir),
         testcase=testdir.name,
-        model=main_model.name,
+        model=model_name,
         edit_format=edit_format,
         tests_outcomes=test_outcomes,
         cost=coder.total_cost,