Organize prototyper and container tool prompts (#675)

Restructure and decouple prompts, add clearer instructions and steps. Although we should keep this, there are still problems that cannot be fixed by simple prompting. E.g., LLM cannot figure out which `*.o` files to add to include the function-under-test, even though there are examples from other fuzz targets. I will do this via more agents later.
google · Nov 7, 2024 · 78999af · 78999af
1 parent ccbeb65
commit 78999af
Show file tree

Hide file tree

Showing 6 changed files with 469 additions and 169 deletions.
diff --git a/agent/prototyper.py b/agent/prototyper.py
@@ -6,8 +6,11 @@
 
 import logger
 from agent.base_agent import BaseAgent
+from data_prep.project_context.context_introspector import ContextRetriever
 from experiment.benchmark import Benchmark
-from llm_toolkit.prompt_builder import DefaultTemplateBuilder
+from llm_toolkit.prompt_builder import EXAMPLES as EXAMPLE_FUZZ_TARGETS
+from llm_toolkit.prompt_builder import (DefaultTemplateBuilder,
+                                        PrototyperTemplateBuilder)
 from llm_toolkit.prompts import Prompt
 from results import BuildResult, Result
 from tool.container_tool import ProjectContainerTool
@@ -21,10 +24,18 @@ class Prototyper(BaseAgent):
   def _initial_prompt(self, results: list[Result]) -> Prompt:
     """Constructs initial prompt of the agent."""
     benchmark = results[-1].benchmark
-
-    default_prompt_builder = DefaultTemplateBuilder(model=self.llm,
-                                                    benchmark=benchmark)
-    prompt = default_prompt_builder.build([])
+    retriever = ContextRetriever(benchmark)
+    context_info = retriever.get_context_info()
+    prompt_builder = PrototyperTemplateBuilder(
+        model=self.llm,
+        benchmark=benchmark,
+    )
+    prompt = prompt_builder.build(example_pair=[],
+                                  project_context_content=context_info,
+                                  tool_guides=self.inspect_tool.tutorial())
+    # prompt = prompt_builder.build(example_pair=EXAMPLE_FUZZ_TARGETS.get(
+    #     benchmark.language, []),
+    #                               tool_guides=self.inspect_tool.tutorial())
     return prompt
 
   def _update_fuzz_target_and_build_script(self, cur_round: int, response: str,
@@ -163,13 +174,31 @@ def _container_handle_conclusion(
       compile_log = self.llm.truncate_prompt(build_result.compile_log)
       logger.info('***** Failed to recompile in %02d rounds *****', cur_round)
       prompt_text = (
-          'Failed to build fuzz target. Here is the fuzz target, build'
-          ' script, compliation command, and other compilation runtime'
-          ' output.\n<fuzz target>\n'
-          f'{build_result.fuzz_target_source}\n</fuzz target>\n'
-          f'<build script>\n{build_result.build_script_source}\n'
-          f'</build script>\n<compilation log>\n{compile_log}\n'
-          '</compilation log>\n')
+          'Failed to build fuzz target. Here is the fuzz target, build script, '
+          'compliation command, and other compilation runtime output. Analyze '
+          'the error messages, the fuzz target, and the build script carefully '
+          'to identify the root cause. Avoid making random changes to the fuzz '
+          'target or build script without a clear understanding of the error. '
+          'If necessary, #include necessary headers and #define required macros'
+          'or constants in the fuzz target, or adjust compiler flags to link '
+          'required libraries in the build script. After collecting information'
+          ', analyzing and understanding the error root cause, YOU MUST take at'
+          ' least one step to validate your theory with source code evidence. '
+          'Only if your theory is verified, respond the revised fuzz target and'
+          'build script in FULL.\n'
+          'Always try to learn from the source code about how to fix errors, '
+          'for example, search for the key words (e.g., function name, type '
+          'name, constant name) in the source code to learn how they are used. '
+          'Similarly, learn from the other fuzz targets and the build script to'
+          'understand how to include the correct headers.\n'
+          'Focus on writing a minimum buildable fuzz target that calls the '
+          'target function. We can increase its complexity later, but first try'
+          'to make it compile successfully.'
+          'If an error happens repeatedly and cannot be fixed, try to '
+          'mitigate it. For example, replace or remove the line.'
+          f'<fuzz target>\n{build_result.fuzz_target_source}\n</fuzz target>\n'
+          f'<build script>\n{build_result.build_script_source}\n</build script>'
+          f'\n<compilation log>\n{compile_log}\n</compilation log>\n')
     elif not build_result.is_function_referenced:
       logger.info(
           '***** Fuzz target does not reference function-under-test in %02d '
@@ -199,17 +228,16 @@ def execute(self, result_history: list[Result]) -> BuildResult:
     """Executes the agent based on previous result."""
     logger.info('Executing Prototyper')
     last_result = result_history[-1]
-    prompt = self._initial_prompt(result_history)
     benchmark = last_result.benchmark
     self.inspect_tool = ProjectContainerTool(benchmark, name='inspect')
     self.inspect_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null')
     cur_round = 1
-    prompt.append(self.inspect_tool.tutorial())
     build_result = BuildResult(benchmark=benchmark,
                                trial=last_result.trial,
                                work_dirs=last_result.work_dirs,
                                author=self,
                                chat_history={self.name: ''})
+    prompt = self._initial_prompt(result_history)
     try:
       client = self.llm.get_chat_client(model=self.llm.get_model())
       while prompt and cur_round < MAX_ROUND:

diff --git a/llm_toolkit/models.py b/llm_toolkit/models.py
@@ -654,16 +654,11 @@ def _do_generate(self, client: ChatSession, prompt: str,
                    config: dict[str, Any]) -> Any:
     """Generates chat response."""
     logger.info('%s generating response with config: %s', self.name, config)
-    try:
-      return client.send_message(
-          prompt,
-          stream=False,
-          generation_config=config,
-          safety_settings=self.safety_config).text  # type: ignore
-    except Exception as e:
-      logger.error('%s failed to generated response: %s; Config: %s', e,
-                   self.name, config)
-      return ''
+    return client.send_message(
+        prompt,
+        stream=False,
+        generation_config=config,
+        safety_settings=self.safety_config).text  # type: ignore
 
   def truncate_prompt(self,
                       raw_prompt_text: Any,

diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py
@@ -32,6 +32,7 @@
 logger = logging.getLogger(__name__)
 
 DEFAULT_TEMPLATE_DIR: str = 'prompts/template_xml/'
+AGENT_TEMPLATE_DIR: str = 'prompts/agent/'
 
 # TODO(Dongge): Refactor this tot avoid hard-coding.
 # Example files.
@@ -141,21 +142,21 @@ def __init__(self,
     self.triager_problem_template_file = self._find_template(
         template_dir, 'triager_problem.txt')
 
-  def _format_priming(self, target_file_type: FileType,
-                      needs_extern: bool) -> str:
+  def _format_priming(self, benchmark: Benchmark) -> str:
     """Formats a priming based on the prompt template."""
     priming = self._get_template(self.priming_template_file)
-    priming = priming.replace('{LANGUAGE}', target_file_type.value)
+    priming = priming.replace('{LANGUAGE}', benchmark.file_type.value)
+    priming = priming.replace('{FUZZ_TARGET_PATH}', benchmark.target_path)
     # TODO(Dongge): Add project name and fuzz target file path.
-    if needs_extern:
+    if benchmark.needs_extern:
       priming += (
           'IMPORTANT: The fuzz target is written in C++, whereas the '
           'project-under-test is written in C. All headers, functions, and code'
           'from the project must be consistently wrapped in '
           '<code>extern "C"</code> to ensure error-free compilation and linkage'
           'between C and C++:\n<code>\nextern "C" {\n    //Include necessary C '
           'headers, source files, functions, and code here.\n}\n</code>\n')
-    if target_file_type == FileType.CPP:
+    if benchmark.file_type == FileType.CPP:
       type_specific_priming = self._get_template(self.cpp_priming_filler_file)
     else:
       type_specific_priming = ''
@@ -287,8 +288,7 @@ def build(self,
     """Constructs a prompt using the templates in |self| and saves it."""
     if not self.benchmark:
       return self._prompt
-    priming = self._format_priming(self.benchmark.file_type,
-                                   self.benchmark.needs_extern)
+    priming = self._format_priming(self.benchmark)
     final_problem = self.format_problem(self.benchmark.function_signature)
     final_problem += (f'You MUST call <code>\n'
                       f'{self.benchmark.function_signature}\n'
@@ -537,6 +537,51 @@ def _slice_func_code(self, project: str, func_name: str,
     return ''
 
 
+class PrototyperTemplateBuilder(DefaultTemplateBuilder):
+  """Builder specifically targeted C (and excluding C++)."""
+
+  def __init__(self,
+               model: models.LLM,
+               benchmark: Benchmark,
+               template_dir: str = DEFAULT_TEMPLATE_DIR):
+    super().__init__(model)
+    self._template_dir = template_dir
+    self.agent_templare_dir = AGENT_TEMPLATE_DIR
+    self.benchmark = benchmark
+
+    # Load templates.
+    self.priming_template_file = self._find_template(self.agent_templare_dir,
+                                                     'prototyper-priming.txt')
+    self.cpp_priming_filler_file = self._find_template(
+        template_dir, 'cpp-specific-priming-filler.txt')
+    self.problem_template_file = self._find_template(template_dir,
+                                                     'problem.txt')
+    self.solution_template_file = self._find_template(template_dir,
+                                                      'solution.txt')
+    self.context_template_file = self._find_template(template_dir,
+                                                     'context.txt')
+
+  def build(self,
+            example_pair: list[list[str]],
+            project_example_content: Optional[list[list[str]]] = None,
+            project_context_content: Optional[dict] = None,
+            tool_guides: str = '') -> prompts.Prompt:
+    """Constructs a prompt using the templates in |self| and saves it."""
+    if not self.benchmark:
+      return self._prompt
+    priming = self._format_priming(self.benchmark)
+    final_problem = self.format_problem(self.benchmark.function_signature)
+    final_problem += (f'You MUST call <code>\n'
+                      f'{self.benchmark.function_signature}\n'
+                      f'</code> in your solution!\n')
+    if project_context_content:
+      final_problem += self.format_context(project_context_content)
+    self._prepare_prompt(priming, final_problem, example_pair,
+                         project_example_content)
+    self._prompt.append(tool_guides)
+    return self._prompt
+
+
 class DefaultJvmTemplateBuilder(PromptBuilder):
   """Default builder for JVM projects."""
 

diff --git a/prompts/agent/prototyper-priming.txt b/prompts/agent/prototyper-priming.txt
@@ -0,0 +1,143 @@
+<system>
+As a security testing engineer, you must write an `int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)` fuzz target in {LANGUAGE}.
+Objective: Your goal is to modify an existing fuzz target `{FUZZ_TARGET_PATH}` to write a minimum fuzz target of a given function-under-test that can build successfully.
+</system>
+
+<steps>
+Follow these steps to write a minimum fuzz target:
+
+Step 1. Determine the information you need to write an effective fuzz target.
+This includes:
+    * **Source code** of the function under test.
+    * **Custom Types and Dependencies** definitions and implementations.
+    * **Initialization and setup** requirements and steps.
+    * **Build details** and integration steps.
+    * Valid and edge-case input values.
+    * Environmental and runtime dependencies.
+
+Step 2. Collect information using the Bash tool.
+Use the bash tool (see <tool> section) and follow its rules to gather the necessary information. You can collect information from:
+    * The existing human written fuzz target at `{FUZZ_TARGET_PATH}`.
+    * The existing human written build script `/src/build.sh`.
+    * The project source code (in `.`, or `/src/<project-under-test>/`) clone from the project repository.
+    * Documentation about the project, the function, and the variables/constants involved.
+    * Environment variables.
+    * Knowledge about OSS-Fuzz's build infrastructure: It will compile your fuzz target in the same way as the exiting human written fuzz target with the build script.
+
+Step 3. Analyze the function and its parameters.
+Understand the function under test by analyzing its source code and documentation:
+    * **Purpose and functionality** of the function.
+    * **Input processing** and internal logic.
+    * **Dependencies** on other functions or global variables.
+    * **Error handling** and edge cases.
+
+Step 4. Understand initialization requirements.
+Identify what is needed to properly initialize the function:
+    * **Header files** and their relative paths used by include statements in the fuzz target.
+    * **Complex input parameters or objects** initialization.
+    * **Constructor functions** or initialization routines.
+    * **Global state** or configuration needs to be set up.
+    * **Mocking** external dependencies if necessary.
+
+Step 5. Understand Constraints and edge cases.
+For each input parameter, understand:
+    * Valid ranges and data types.
+    * Invalid or edge-case values (e.g., zero, NULL, predefined constants, maximum values).
+    * Special values that trigger different code paths.
+
+Step 6: Plan Fuzz Target Implementation.
+Decide how to implement the fuzz target:
+    * **Extract parameters** from the `data` and `size` variable of `LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)`.
+    * Handle fixed-size versus variable-size data.
+    * **Initialize function's parameters** by appropriately mapping the raw input bytes.
+    * Ensure that the fuzz target remains deterministic and avoids side effects.
+    * Avoid `goto` statements.
+
+Step 7: **Write** the fuzz target code.
+Implement the `LLVMFuzzerTestOneInput` function:
+    * Header files:
+        * Investigate how existing fuzz targets include headers.
+        * Investigate where they are located in the project
+        * Collect all headers required by your fuzz target and their locations.
+        * Include their relative path in the same way as the existing fuzz targets.
+    * Macros or Constants:
+        * Include or define necessary macros or constants.
+    * Input Handling:
+        * Use `FuzzedDataProvider` if and only if the fuzz target at `{FUZZ_TARGET_PATH}` is a C++ file.
+        * Use `extern "C"` if and only if the fuzz target at `{FUZZ_TARGET_PATH}` is a C++ file.
+        * Check that the input size is sufficient.
+        * Extract parameters from the input data.
+        * Handle any necessary conversions or validations.
+    * Function Invocation:
+        * Initialize required objects or state.
+        * Modify the existing fuzz target at `{FUZZ_TARGET_PATH}` to fuzz the function under test with the fuzzed parameters.
+        * Ensure proper error handling.
+    *
+    * Cleanup:
+        * Free any allocated resources.
+        * Reset any global state if necessary.
+
+Step 8 (Optional): **Modify** the Build Script.
+Write a new build script only if the existing one (`/src/build.sh`) is insufficient:
+    * Decide if you need to modify the build script at `/src/build.sh` to successfully build the new fuzz target.
+    * Include compilation steps for the project under test.
+    * Include compilation steps for the new fuzz target.
+    * Specify necessary compiler and linker flags.
+    * Ensure all dependencies are correctly linked.
+
+Step 9: Providing Your Conclusion:
+    * Provide your conclusion on the FULL new fuzz target and build script **ONLY AFTER** you have gathered all necessary information.
+    * **DO NOT SEND** any other content (e.g., bash tool commands) in the conclusion message. ALWAYS send other commands individually and ONLY SEND conclusion after collecting all information.
+    * Conclusion Format:
+        * Overall Description:
+            * Summarize your findings and describe your fuzz target design.
+            * Wrap this summary within <conclusion> and </conclusion> tags.
+    * Modified Fuzz Target:
+        * Provide the full code of the modified fuzz target.
+        * Wrap the code within <fuzz target> and </fuzz target> tags.
+    * Modified Build Script (if applicable):
+        * If you need to modify the build script, provide the full code.
+        * Wrap it within <build script> and </build script> tags.
+    * Format Example:
+        <conclusion>
+        I determined that the fuzz target needs to include specific header files and adjust the `LLVMFuzzerTestOneInput` function to call the new function-under-test. Additionally, the build script requires modification to link against the necessary libraries.
+        </conclusion>
+        <fuzz target>
+        [Your FULL fuzz target code here.]
+        </fuzz target>
+        <build script>
+        [Your FULL build script code here, if applicable.]
+        </build script>
+
+</steps>
+
+{TYPE_SPECIFIC_PRIMING}
+
+<instructions>
+3. Methodical Approach:
+    * Be systematic to cover all necessary aspects, such as:
+        * Understanding the function's parameters and dependencies.
+        * Identifying required header files and libraries.
+        * Recognizing any special initialization or environmental requirements.
+1. Utilizing Existing Examples:
+    * Use the existing fuzz target at `{FUZZ_TARGET_PATH}` and other fuzz targets with `LLVMFuzzerTestOneInput` in its parent directory as references.
+    * Pay special attention to:
+        * How header files are included.
+        * The structure and content of the `LLVMFuzzerTestOneInput` function.
+    * Typically, you only need to modify the content of `LLVMFuzzerTestOneInput`.
+2. Investigating Header Inclusions:
+    * Use bash tool to find required headers and libraries.
+    * Examine library files built by `/src/build.sh` to understand available functions and symbols.
+3. Modifying the Build Script (if necessary):
+    * Modifying `/src/build.sh` to build the necessary components or include required libraries if function-under-test is not included.
+    * The project's directory may contain a `README.md` with build instructions (e.g., at `/src/<project-name>/README.md`
+4. Do Not Compile:
+    * **Do not compile** the fuzz target during your investigation.
+    * Provide your conclusions based on the information gathered after you have a solution.
+5. Formatting Code Snippets:
+    * Do not wrap code snippets with triple backticks (```).
+    * Use the specified XML-style tags for wrapping code and other content.
+6. DO NOT send the <conclusion> early: Provide conclusions **only after** gathering all necessary information.
+7. Focus on Final Goals:
+    * Ensure that your fuzz target and build script aim to successfully build the fuzz target and fuzz the function-under-test.
+</instructions>