google · AmPaschal · May 21, 2025 · May 21, 2025 · May 21, 2025 · May 22, 2025
diff --git a/agent/base_agent.py b/agent/base_agent.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 """The abstract base class for LLM agents in stages."""
 import argparse
+import asyncio
 import os
 import random
 import re
@@ -23,11 +24,14 @@
 from typing import Any, Optional
 
 import requests
+from google.adk import agents, runners, sessions
+from google.genai import errors, types
 
 import logger
 import utils
 from data_prep import introspector
-from llm_toolkit.models import LLM
+from experiment import benchmark as benchmarklib
+from llm_toolkit.models import LLM, VertexAIModel
 from llm_toolkit.prompts import Prompt
 from results import Result
 from tool.base_tool import BaseTool
@@ -269,6 +273,18 @@ def _preprocess_fi_setup(cls) -> None:
 
     introspector.set_introspector_endpoints('http://127.0.0.1:8080/api')
 
+  def get_function_requirements(self) -> str:
+    """Gets the function requirements from the result."""
+
+    requirements_path = self.args.work_dirs.requirements_file_path(self.trial)
+    if os.path.isfile(requirements_path):
+      with open(requirements_path, 'r') as file:
+        function_requirements = file.read()
+    else:
+      function_requirements = ''
+
+    return function_requirements
+
   @classmethod
   def cloud_main(cls) -> None:
     """Executes agent using dill files. This is for cloud experiments launched
@@ -295,6 +311,107 @@ def execute(self, result_history: list[Result]) -> Result:
     """Executes the agent based on previous result."""
 
 
+class ADKBaseAgent(BaseAgent):
+  """The abstract base class for agents created using the ADK library."""
+
+  def __init__(self,
+               trial: int,
+               llm: LLM,
+               args: argparse.Namespace,
+               benchmark: benchmarklib.Benchmark,
+               description: str = '',
+               instruction: str = '',
+               tools: Optional[list] = None,
+               name: str = ''):
+
+    super().__init__(trial, llm, args, tools, name)
+
+    self.benchmark = benchmark
+
+    # For now, ADKBaseAgents only support the Vertex AI Models.
+    if not isinstance(llm, VertexAIModel):
+      raise ValueError(f'{self.name} only supports Vertex AI models.')
+
+    # Create the agent using the ADK library
+    adk_agent = agents.LlmAgent(
+        name=self.name,
+        model=llm._vertex_ai_model,
+        description=description,
+        instruction=instruction,
+        tools=tools or [],
+    )
+
+    # Create the session service
+    session_service = sessions.InMemorySessionService()
+    session_service.create_session(
+        app_name=self.name,
+        user_id=benchmark.id,
+        session_id=f'session_{self.trial}',
+    )
+
+    # Create the runner
+    self.runner = runners.Runner(
+        agent=adk_agent,
+        app_name=self.name,
+        session_service=session_service,
+    )
+
+    self.round = 0
+
+    logger.info('ADK Agent %s created.', self.name, trial=self.trial)
+
+  def chat_llm(self, cur_round: int, client: Any, prompt: Prompt,
+               trial: int) -> str:
+    """Call the agent with the given prompt, running async code in sync."""
+
+    self.round = cur_round
+
+    self.log_llm_prompt(prompt.get())
+
+    async def _call():
+      user_id = self.benchmark.id
+      session_id = f'session_{self.trial}'
+      content = types.Content(role='user',
+                              parts=[types.Part(text=prompt.get())])
+
+      final_response_text = ''
+
+      async for event in self.runner.run_async(
+          user_id=user_id,
+          session_id=session_id,
+          new_message=content,
+      ):
+        if event.is_final_response():
+          if (event.content and event.content.parts and
+              event.content.parts[0].text):
+            final_response_text = event.content.parts[0].text
+          elif event.actions and event.actions.escalate:
+            error_message = event.error_message
+            logger.error('Agent escalated: %s', error_message, trial=self.trial)
+
+      self.log_llm_response(final_response_text)
+
+      return final_response_text
+
+    return self.llm.with_retry_on_error(lambda: asyncio.run(_call()),
+                                        [errors.ClientError])
+
+  def log_llm_prompt(self, promt: str) -> None:
+    self.round += 1
+    logger.info('<CHAT PROMPT:ROUND %02d>%s</CHAT PROMPT:ROUND %02d>',
+                self.round,
+                promt,
+                self.round,
+                trial=self.trial)
+
+  def log_llm_response(self, response: str) -> None:
+    logger.info('<CHAT RESPONSE:ROUND %02d>%s</CHAT RESPONSE:ROUND %02d>',
+                self.round,
+                response,
+                self.round,
+                trial=self.trial)
+
+
 if __name__ == "__main__":
   # For cloud experiments.
   BaseAgent.cloud_main()
diff --git a/agent/crash_analyzer.py b/agent/crash_analyzer.py
@@ -209,7 +209,8 @@ def execute(self, result_history: list[Result]) -> AnalysisResult:
                                trial=last_result.trial,
                                work_dirs=last_result.work_dirs,
                                author=self,
-                               chat_history={self.name: ''})
+                               chat_history={self.name: ''},
+                               stacktrace=last_result.run_error)
     cur_round = 1
     try:
       client = self.llm.get_chat_client(model=self.llm.get_model())

diff --git a/agent/enhancer.py b/agent/enhancer.py
@@ -17,6 +17,7 @@
 import logger
 from agent.prototyper import Prototyper
 from llm_toolkit.prompt_builder import (CoverageEnhancerTemplateBuilder,
+                                        CrashEnhancerTemplateBuilder,
                                         EnhancerTemplateBuilder,
                                         JvmFixingBuilder)
 from llm_toolkit.prompts import Prompt, TextPrompt
@@ -48,6 +49,8 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
                    trial=self.trial)
       return Prompt()
 
+    function_requirements = self.get_function_requirements()
+
     if benchmark.language == 'jvm':
       # TODO: Do this in a separate agent for JVM coverage.
       builder = JvmFixingBuilder(self.llm, benchmark,
@@ -59,6 +62,12 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
         error_desc, errors = last_result.semantic_result.get_error_info()
         builder = EnhancerTemplateBuilder(self.llm, benchmark,
                                           last_build_result, error_desc, errors)
+      elif last_result.crash_result:
+        crash_result = last_result.crash_result
+        builder = CrashEnhancerTemplateBuilder(self.llm, benchmark,
+                                               last_build_result,
+                                               crash_result.insight,
+                                               crash_result.stacktrace)
       elif last_result.coverage_result:
         builder = CoverageEnhancerTemplateBuilder(
             self.llm,
@@ -77,7 +86,8 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
         return prompt
       prompt = builder.build(example_pair=[],
                              tool_guides=self.inspect_tool.tutorial(),
-                             project_dir=self.inspect_tool.project_dir)
+                             project_dir=self.inspect_tool.project_dir,
+                             function_requirements=function_requirements)
       # TODO: A different file name/dir.
       prompt.save(self.args.work_dirs.prompt)