From 683caaa918c480492faab8bbd7cefa6279f1478e Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 21 May 2025 13:59:54 +0000 Subject: [PATCH 01/64] Added a basic function analyzer agent using the ADK library. This commit includes the following tasks: - implements a function analyzer agent using the ADK library. - implements a function that gets a function's source using the FuzzIntrospector API. - Provides this function as a tool to the function analyzer agent. - Adds basic prompt templates for the agent. - Creates a test file that can be used to access the agent independently. --- agent/function_analyzer.py | 188 ++++++++++++++++++ agent_tests/__init__.py | 0 agent_tests/function_analyzer_test.py | 56 ++++++ llm_toolkit/prompt_builder.py | 53 +++++ .../agent/function-analyzer-instruction.txt | 8 + prompts/agent/function-analyzer-priming.txt | 83 ++++++++ requirements.in | 1 + results.py | 29 +++ 8 files changed, 418 insertions(+) create mode 100644 agent/function_analyzer.py create mode 100644 agent_tests/__init__.py create mode 100644 agent_tests/function_analyzer_test.py create mode 100644 prompts/agent/function-analyzer-instruction.txt create mode 100644 prompts/agent/function-analyzer-priming.txt diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py new file mode 100644 index 0000000000..88d58d622b --- /dev/null +++ b/agent/function_analyzer.py @@ -0,0 +1,188 @@ + +""" +An LLM agent to analyze a function and identify its implicit requirements. +The results of this analysis will be used by the writer agents to +generate correct fuzz target for the function. +""" + +import argparse +import asyncio + +from typing import Optional + +import logging +from agent.base_agent import BaseAgent +from data_prep import introspector +from experiment import benchmark as benchmarklib +from llm_toolkit.models import LLM +from llm_toolkit.prompts import Prompt +from llm_toolkit import prompt_builder +from results import Result, PreWritingResult +from tool.base_tool import BaseTool + +from google.adk.agents import Agent +from google.adk.runners import Runner +from google.adk.sessions import InMemorySessionService +from google.genai import types # For creating message Content/Parts + +# logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +handler = logging.StreamHandler() +logger.addHandler(handler) + + +def get_function_source_tool(project_name: str, function_signature: str): + + """ + Retrieves a function's source using the project name and function signature. + + Args: + project_name (str): The name of the project. + function_signature (str): The signature of the function. + + Returns: + str: The source code of the function if found, otherwise an empty string. + """ + + function_code = introspector.query_introspector_function_source(project_name, function_signature) + + if function_code: + logger.info(f"Function with signature '{function_signature}' found and extracted.") + return function_code + else: + logger.info(f"Error: Function with signature '{function_signature}' not found in project '{project_name}'.") + return "" + +class FunctionAnalyzer (BaseAgent): + """An LLM agent to analyze a function and identify its implicit requirements. + The results of this analysis will be used by the writer agents to + generate correct fuzz target for the function. + """ + + def __init__(self, + trial: int, + llm: LLM, + args: argparse.Namespace, + tools: Optional[list[BaseTool]] = None, + name: str = 'function_analyzer_agent',): + + # Call the parent constructor + super().__init__(trial, llm, args, tools, name) + + def initialize(self, benchmark: benchmarklib.Benchmark): + + self.benchmark = benchmark + + # Initialize the prompt builder + self.prompt_builder = prompt_builder.FunctionAnalyzerTemplateBuilder(self.llm, self.benchmark) + + # Get the agent's instructions + analyzer_instruction = self.prompt_builder.build_instruction() + + # Create the agent using the ADK library + function_analyzer = Agent( + name=self.name, + model='gemini-2.0-flash', #TODO: Get the model name from args. Currently, some of the default names are incompatible with the ADK library. + description=("Agent to analyze a function and identify its implicit requirements."), + instruction=analyzer_instruction.get(), + tools=[get_function_source_tool] + ) + + # Get user id and session id + # TODO: Figure out how to get this data + user_id = "user" + session_id = "session" + + # Create the session service + session_service = InMemorySessionService() + session = session_service.create_session( + app_name=self.name, + user_id=user_id, + session_id=session_id, + ) + + # Create the runner + self.runner = Runner( + agent=function_analyzer, + app_name=self.name, + session_service=session_service, + ) + + logger.info(f"Function Analyzer Agent created, with name: {self.name}, and session id: {session_id}") + + async def call_agent_async(self, query:str, runner, user_id:str, session_id:str) -> PreWritingResult: + + logger.info(f">>> User query: {query}") + + content = types.Content(role='user', parts=[types.Part(text=query)]) + + final_response_text = "Agent did not produce a final response." + + result_available = False + + async for event in runner.run_async( + user_id=user_id, + session_id=session_id, + new_message=content, + ): + if event.is_final_response(): + if event.content and event.content.parts: + final_response_text = event.content.parts[0].text + result_available = True + elif event.actions and event.actions.escalate: + final_response_text = f"Agent escalated: {event.error_message or 'No specific message.'}" + break + + logger.info(f"<<< Agent response: {final_response_text}") + + if result_available: + # Get the requirements from the response + requirements = self._parse_tags(final_response_text, 'requirement') + else: + requirements = [] + + # Prepare the result + result = PreWritingResult( + benchmark=self.benchmark, + trial=self.trial, + work_dirs=self.args.work_dir, + result_available=result_available, + requirements=requirements, + ) + + return result + + def execute(self, result_history: list[Result]) -> PreWritingResult: + """Execute the agent with the given results.""" + + # Call the agent asynchronously and return the result + prompt = self._initial_prompt(result_history) + query = prompt.gettext() + user_id = "user" + session_id = "session" + result = asyncio.run(self.call_agent_async(query, self.runner, user_id, session_id)) + + if result.result_available: + # Save the result to the history + result_history.append(result) + + logger.info(f"Result available: {result.result_available}") + logger.info(f"Requirements: {result.requirements}") + return result + + def _initial_prompt(self, results: list[Result]) -> Prompt: + """Create the initial prompt for the agent.""" + + prompt = self.prompt_builder.build(project_name=self.benchmark.project, + function_signature=self.benchmark.function_signature) + + return prompt + + + + + + + + \ No newline at end of file diff --git a/agent_tests/__init__.py b/agent_tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py new file mode 100644 index 0000000000..ce16254880 --- /dev/null +++ b/agent_tests/function_analyzer_test.py @@ -0,0 +1,56 @@ +import argparse + +from agent.function_analyzer import FunctionAnalyzer +from experiment import benchmark as benchmarklib +from llm_toolkit import models + +RESULTS_DIR = './results' + + +def parse_args() -> argparse.Namespace: + """Parses command line arguments.""" + parser = argparse.ArgumentParser( + description='Evaluate the function analyzer agent.') + + parser.add_argument('-y', + '--benchmark-yaml', + type=str, + required=True, + help='A benchmark YAML file.') + + parser.add_argument('-w', + '--work-dir', + default=RESULTS_DIR) + + parser.add_argument('-mr', + '--max-round', + type=int, + default=100, + help='Max trial round for agents.') + + args = parser.parse_args() + + return args + +if __name__ == "__main__": + + model = models.LLM.setup( + ai_binary='', + name='vertex_ai_gemini-1-5-chat' + ) + + + args = parse_args() + + function_analyzer = FunctionAnalyzer(trial=1, llm=model, args=args) + + benchmarks = benchmarklib.Benchmark.from_yaml(args.benchmark_yaml) + + if len(benchmarks) == 0: + raise ValueError("No benchmarks found in the YAML file.") + + # Initialize the function analyzer with the first benchmark + function_analyzer.initialize(benchmarks[0]) + + # Run the function analyzer + function_analyzer.execute([]) \ No newline at end of file diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 930392bb84..b6b7a9792a 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -808,6 +808,59 @@ def build(self, return self._prompt +class FunctionAnalyzerTemplateBuilder(PrototyperTemplateBuilder): + """ Builder for function analyzer. """ + def __init__(self, + model: models.LLM, + benchmark: Benchmark, + template_dir: str = DEFAULT_TEMPLATE_DIR, + initial: Any = None): + super().__init__(model, benchmark, template_dir, initial) + + # Load templates. + self.function_analyzer_instruction_template_file = self._find_template( + self.agent_templare_dir, 'function-analyzer-instruction.txt') + self.function_analyzer_prompt_template_file = self._find_template( + self.agent_templare_dir, 'function-analyzer-priming.txt') + + def build_instruction(self) -> prompts.Prompt: + """Constructs a prompt using the templates in |self| and saves it.""" + if not self.benchmark: + return self._prompt + + prompt = self._get_template(self.function_analyzer_instruction_template_file) + + self._prompt.append(prompt) + + return self._prompt + + def build_prompt(self, project_name, function_signature) -> prompts.Prompt: + """Constructs a prompt using the templates in |self| and saves it.""" + if not self.benchmark: + return self._prompt + + prompt = self._get_template(self.function_analyzer_prompt_template_file) + + prompt.replace('{PROJECT_NAME}', project_name) + prompt.replace('{FUNCTION_SIGNATURE}', function_signature) + + self._prompt.append(prompt) + + return self._prompt + + def build(self, + example_pair: Optional[list[list[str]]] = None, + project_example_content: Optional[list[list[str]]] = None, + project_context_content: Optional[dict] = None, + tool_guides: str = '', + project_dir: str = '', + project_name: str = '', + function_signature: str = '') -> prompts.Prompt: + + """Constructs a prompt using the templates in |self| and saves it.""" + return self.build_prompt(project_name, function_signature) + + class DefaultJvmTemplateBuilder(PromptBuilder): """Default builder for JVM projects.""" diff --git a/prompts/agent/function-analyzer-instruction.txt b/prompts/agent/function-analyzer-instruction.txt new file mode 100644 index 0000000000..1dfd4fa79a --- /dev/null +++ b/prompts/agent/function-analyzer-instruction.txt @@ -0,0 +1,8 @@ +You are a professional software engineer whose job is to review software functions, understand their intent, and identify important requirements of input and global variables that the function expects and needs to execute properly. + +You will be provided a function signature and project name. Your task is to use the function signature and project name to get the function's source code, analyze the function's implementation, and provide a response in the expected format. + +We have provided you with the following tools. +1. get_function_source_tool: A tool for getting the function's source using the project name and function signature. + + diff --git a/prompts/agent/function-analyzer-priming.txt b/prompts/agent/function-analyzer-priming.txt new file mode 100644 index 0000000000..6c6152fbb4 --- /dev/null +++ b/prompts/agent/function-analyzer-priming.txt @@ -0,0 +1,83 @@ + +As a professional security engineer, you must analyze the source code of the provided function and retrieve its input requirements. +The input requirements are necessary to enable the generation of valid fuzz drivers. +In each requirement, you should include a one sentence summary of the reason why you included it. +Objective: Your goal is to retrieve the input requirements for the function signature `{FUNCTION_SIGNATURE}` in the project `{PROJECT_NAME}. + + + + +Follow these steps to analyze a function and identify its input requirements: + +Step 1: Get the function's source +Use the get_function_source_tool tool provided to retrieve the source code of the provided function. +You should invoke the tool using the function signature and proejct name provided. + +Step 2: Analyze the retrieved function source +Identify any potential bugs, crashes or assertion violations in the retrieved function source. +Then identify requirements or constraints on the input variables that, if satisfied, will avoid the identified crashes. + + + + +Make sure your response follows the following format, enclosed in ``` ```. + +``` + + +project name: the name of the project provided +function signature: The function's signature + + + +The function's description + + + + +First requirement + + +Second requirement + +... + +nth requirement + + + + + + + +Here is an example response + +project name: htslib +function signature: int sam_index_build(const char *, int) + + + +The sam_index_build function is used to build a sam index. It uses the input arguments to identify and retrieve the index to build. It returns 1 if the build succeeds and 0 if the build fails. + + + + +The first input argument should be a validly allocated string. This is because the pointer is dereferenced by the function and there is no null check. + + +The first input argument should be null-terminated. This is because it is an argument to strlen, which requires null termination. + + +The second input argument should have a maximum value of MAX_INDEX. This is because it is used to index a static array of size MAX_INDEX. + + + + + + +Make sure you follow these instructions: + +1. Keep each requirement short and simple. Let the requirement contain the affected parameter, the expectation, and why the expectation is necessary. + + + diff --git a/requirements.in b/requirements.in index ac802b5287..c2a7ca9680 100644 --- a/requirements.in +++ b/requirements.in @@ -17,3 +17,4 @@ requests==2.32.3 rust-demangler==1.0 tiktoken==0.7.0 yapf==0.40.1 +google-adk==0.5.0 diff --git a/results.py b/results.py index c78b302dee..f0fee1d982 100644 --- a/results.py +++ b/results.py @@ -586,6 +586,35 @@ def to_dict(self) -> dict: } +class PreWritingResult(Result): + """ The result of the function analyzer. """ + result_available: bool + requirements: list[str] + explanation: str + + def __init__(self, + benchmark: Benchmark, + trial: int, + work_dirs: WorkDirs, + result_available: bool, + requirements: list[str] = [], + explanation: str = '', + fuzz_target_source: str = '', + build_script_source: str = '', + author: Any = None, + chat_history: Optional[dict] = None, + default_success: bool = False) -> None: + + super().__init__(benchmark, trial, work_dirs, fuzz_target_source, + build_script_source, author, chat_history, + default_success) + + self.result_available = result_available + if result_available: + self.requirements = requirements + self.explanation = explanation + + class BenchmarkResult: """All trial results for a benchmark in an experiment.""" benchmark: Benchmark From 16a687cf3a0f52c69a09fd3248ac23c8a2ee8841 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 21 May 2025 19:29:57 +0000 Subject: [PATCH 02/64] Resolved review comments. --- agent/function_analyzer.py | 324 +++++++++--------- agent_tests/__init__.py | 13 + agent_tests/function_analyzer_test.py | 81 +++-- llm_toolkit/models.py | 8 + llm_toolkit/prompt_builder.py | 7 +- .../agent/function-analyzer-instruction.txt | 4 +- prompts/agent/function-analyzer-priming.txt | 2 +- pyproject.toml | 2 +- requirements.in | 8 +- requirements.txt | 166 +++++++-- results.py | 33 +- 11 files changed, 406 insertions(+), 242 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 88d58d622b..323586a8be 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -1,188 +1,186 @@ - -""" -An LLM agent to analyze a function and identify its implicit requirements. -The results of this analysis will be used by the writer agents to +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +An LLM agent to analyze a function and identify its implicit requirements. +The results of this analysis will be used by the writer agents to generate correct fuzz target for the function. """ -import argparse import asyncio - -from typing import Optional - import logging -from agent.base_agent import BaseAgent -from data_prep import introspector -from experiment import benchmark as benchmarklib -from llm_toolkit.models import LLM -from llm_toolkit.prompts import Prompt -from llm_toolkit import prompt_builder -from results import Result, PreWritingResult -from tool.base_tool import BaseTool from google.adk.agents import Agent from google.adk.runners import Runner from google.adk.sessions import InMemorySessionService from google.genai import types # For creating message Content/Parts -# logging.basicConfig(level=logging.INFO) +from agent.base_agent import BaseAgent +from data_prep import introspector +from experiment import benchmark as benchmarklib +from llm_toolkit import prompt_builder +from llm_toolkit.prompts import Prompt +from results import PreWritingResult, Result + logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -handler = logging.StreamHandler() -logger.addHandler(handler) def get_function_source_tool(project_name: str, function_signature: str): - - """ - Retrieves a function's source using the project name and function signature. - - Args: - project_name (str): The name of the project. - function_signature (str): The signature of the function. - - Returns: - str: The source code of the function if found, otherwise an empty string. - """ - - function_code = introspector.query_introspector_function_source(project_name, function_signature) - - if function_code: - logger.info(f"Function with signature '{function_signature}' found and extracted.") - return function_code + """ + Retrieves a function's source using the project name and function signature. + + Args: + project_name (str): The name of the project. + function_signature (str): The signature of the function. + + Returns: + str: The source code of the function if found, otherwise an empty string. + """ + + function_code = introspector.query_introspector_function_source( + project_name, function_signature) + + if function_code: + logger.info("Function with signature '%s' found and extracted.", + function_signature) + else: + logger.info( + "Error: Function with signature '%s'" + " not found in project '%s'.", + function_signature, project_name) + + return function_code + + +class FunctionAnalyzer(BaseAgent): + """An LLM agent to analyze a function and identify its implicit requirements. + The results of this analysis will be used by the writer agents to + generate correct fuzz target for the function. + """ + + def initialize(self, benchmark: benchmarklib.Benchmark): + """Initialize the function analyzer agent with the given benchmark.""" + + self.benchmark = benchmark + + # Initialize the prompt builder + self.prompt_builder = prompt_builder.FunctionAnalyzerTemplateBuilder( + self.llm, self.benchmark) + + # Get the agent's instructions + analyzer_instruction = self.prompt_builder.build_instruction() + + # Create the agent using the ADK library + function_analyzer = Agent( + name=self.name, + # TODO: Get the model name from args. + # Currently, the default names are incompatible with the ADK library. + model='gemini-2.0-flash', + description=( + "Agent to analyze a function and identify its requirements."), + instruction=analyzer_instruction.get(), + tools=[get_function_source_tool]) + + # Get user id and session id + # TODO: Figure out how to get this data + user_id = "user" + session_id = "session" + + # Create the session service + session_service = InMemorySessionService() + session_service.create_session( + app_name=self.name, + user_id=user_id, + session_id=session_id, + ) + + # Create the runner + self.runner = Runner( + agent=function_analyzer, + app_name=self.name, + session_service=session_service, + ) + + logger.info( + "Function Analyzer Agent created, with name: %s, and session id: %s", + self.name, session_id) + + async def call_agent_async(self, query: str, runner, user_id: str, + session_id: str) -> PreWritingResult: + """Call the agent asynchronously with the given query.""" + + logger.info(">>> User query: %s", query) + + content = types.Content(role='user', parts=[types.Part(text=query)]) + + final_response_text = "Agent did not produce a final response." + + result_available = False + + async for event in runner.run_async( + user_id=user_id, + session_id=session_id, + new_message=content, + ): + if event.is_final_response(): + if event.content and event.content.parts: + final_response_text = event.content.parts[0].text + result_available = True + elif event.actions and event.actions.escalate: + error_message = event.error_message or 'No specific message.' + final_response_text = f"Agent escalated: {error_message}" + break + + logger.info("<<< Agent response: %s", final_response_text) + + if result_available: + # Get the requirements from the response + requirements = self._parse_tags(final_response_text, 'requirement') else: - logger.info(f"Error: Function with signature '{function_signature}' not found in project '{project_name}'.") - return "" - -class FunctionAnalyzer (BaseAgent): - """An LLM agent to analyze a function and identify its implicit requirements. - The results of this analysis will be used by the writer agents to - generate correct fuzz target for the function. - """ - - def __init__(self, - trial: int, - llm: LLM, - args: argparse.Namespace, - tools: Optional[list[BaseTool]] = None, - name: str = 'function_analyzer_agent',): - - # Call the parent constructor - super().__init__(trial, llm, args, tools, name) - - def initialize(self, benchmark: benchmarklib.Benchmark): - - self.benchmark = benchmark - - # Initialize the prompt builder - self.prompt_builder = prompt_builder.FunctionAnalyzerTemplateBuilder(self.llm, self.benchmark) - - # Get the agent's instructions - analyzer_instruction = self.prompt_builder.build_instruction() - - # Create the agent using the ADK library - function_analyzer = Agent( - name=self.name, - model='gemini-2.0-flash', #TODO: Get the model name from args. Currently, some of the default names are incompatible with the ADK library. - description=("Agent to analyze a function and identify its implicit requirements."), - instruction=analyzer_instruction.get(), - tools=[get_function_source_tool] - ) - - # Get user id and session id - # TODO: Figure out how to get this data - user_id = "user" - session_id = "session" - - # Create the session service - session_service = InMemorySessionService() - session = session_service.create_session( - app_name=self.name, - user_id=user_id, - session_id=session_id, - ) - - # Create the runner - self.runner = Runner( - agent=function_analyzer, - app_name=self.name, - session_service=session_service, - ) - - logger.info(f"Function Analyzer Agent created, with name: {self.name}, and session id: {session_id}") - - async def call_agent_async(self, query:str, runner, user_id:str, session_id:str) -> PreWritingResult: - - logger.info(f">>> User query: {query}") - - content = types.Content(role='user', parts=[types.Part(text=query)]) - - final_response_text = "Agent did not produce a final response." - - result_available = False - - async for event in runner.run_async( - user_id=user_id, - session_id=session_id, - new_message=content, - ): - if event.is_final_response(): - if event.content and event.content.parts: - final_response_text = event.content.parts[0].text - result_available = True - elif event.actions and event.actions.escalate: - final_response_text = f"Agent escalated: {event.error_message or 'No specific message.'}" - break - - logger.info(f"<<< Agent response: {final_response_text}") - - if result_available: - # Get the requirements from the response - requirements = self._parse_tags(final_response_text, 'requirement') - else: - requirements = [] - - # Prepare the result - result = PreWritingResult( - benchmark=self.benchmark, - trial=self.trial, - work_dirs=self.args.work_dir, - result_available=result_available, - requirements=requirements, - ) + requirements = [] - return result + # Prepare the result + result = PreWritingResult( + benchmark=self.benchmark, + trial=self.trial, + work_dirs=self.args.work_dir, + result_available=result_available, + requirements=requirements, + ) - def execute(self, result_history: list[Result]) -> PreWritingResult: - """Execute the agent with the given results.""" + return result - # Call the agent asynchronously and return the result - prompt = self._initial_prompt(result_history) - query = prompt.gettext() - user_id = "user" - session_id = "session" - result = asyncio.run(self.call_agent_async(query, self.runner, user_id, session_id)) + def execute(self, result_history: list[Result]) -> PreWritingResult: + """Execute the agent with the given results.""" - if result.result_available: - # Save the result to the history - result_history.append(result) + # Call the agent asynchronously and return the result + prompt = self._initial_prompt(result_history) + query = prompt.gettext() + user_id = "user" + session_id = "session" + result = asyncio.run( + self.call_agent_async(query, self.runner, user_id, session_id)) - logger.info(f"Result available: {result.result_available}") - logger.info(f"Requirements: {result.requirements}") - return result + if result.result_available: + # Save the result to the history + result_history.append(result) - def _initial_prompt(self, results: list[Result]) -> Prompt: - """Create the initial prompt for the agent.""" - - prompt = self.prompt_builder.build(project_name=self.benchmark.project, - function_signature=self.benchmark.function_signature) - - return prompt - - - + return result + def _initial_prompt(self, results: list[Result]) -> Prompt: + """Create the initial prompt for the agent.""" + prompt = self.prompt_builder.build( + project_name=self.benchmark.project, + function_signature=self.benchmark.function_signature) - - \ No newline at end of file + return prompt diff --git a/agent_tests/__init__.py b/agent_tests/__init__.py index e69de29bb2..0a2669d7a2 100644 --- a/agent_tests/__init__.py +++ b/agent_tests/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py index ce16254880..89604cf0fc 100644 --- a/agent_tests/function_analyzer_test.py +++ b/agent_tests/function_analyzer_test.py @@ -1,56 +1,81 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A test for the function analyzer agent.""" + import argparse +import logging +from typing import List from agent.function_analyzer import FunctionAnalyzer from experiment import benchmark as benchmarklib +from experiment.benchmark import Benchmark from llm_toolkit import models +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + RESULTS_DIR = './results' -def parse_args() -> argparse.Namespace: - """Parses command line arguments.""" +def parse_args() -> argparse.Namespace: + """Parses command line arguments.""" parser = argparse.ArgumentParser( description='Evaluate the function analyzer agent.') parser.add_argument('-y', - '--benchmark-yaml', - type=str, - required=True, - help='A benchmark YAML file.') - - parser.add_argument('-w', - '--work-dir', - default=RESULTS_DIR) - + '--benchmark-yaml', + type=str, + required=True, + help='A benchmark YAML file.') + + parser.add_argument('-w', '--work-dir', default=RESULTS_DIR) + parser.add_argument('-mr', '--max-round', type=int, default=100, help='Max trial round for agents.') - args = parser.parse_args() + parsed_args = parser.parse_args() + + return parsed_args - return args if __name__ == "__main__": - - model = models.LLM.setup( - ai_binary='', - name='vertex_ai_gemini-1-5-chat' - ) + model = models.LLM.setup(ai_binary='', name='vertex_ai_gemini-1-5-chat') + + args = parse_args() + + function_analyzer = FunctionAnalyzer(trial=1, llm=model, args=args) + + benchmarks: List[Benchmark] = benchmarklib.Benchmark.from_yaml( + args.benchmark_yaml) - args = parse_args() + if len(benchmarks) == 0: + raise ValueError("No benchmarks found in the YAML file.") - function_analyzer = FunctionAnalyzer(trial=1, llm=model, args=args) + test_benchmark = benchmarks[0] + logger.info("Loaded benchmark for function: %s", test_benchmark.function_name) - benchmarks = benchmarklib.Benchmark.from_yaml(args.benchmark_yaml) + # Initialize the function analyzer with the first benchmark + function_analyzer.initialize(test_benchmark) - if len(benchmarks) == 0: - raise ValueError("No benchmarks found in the YAML file.") - - # Initialize the function analyzer with the first benchmark - function_analyzer.initialize(benchmarks[0]) + # Run the function analyzer + result = function_analyzer.execute([]) - # Run the function analyzer - function_analyzer.execute([]) \ No newline at end of file + # Print the result + logger.info("Function Analyzer Result:") + logger.info("Result available: %s", result.result_available) + logger.info("Requirements: %s", result.requirements) diff --git a/llm_toolkit/models.py b/llm_toolkit/models.py index 5f32024888..42afa18a20 100644 --- a/llm_toolkit/models.py +++ b/llm_toolkit/models.py @@ -385,6 +385,14 @@ class GPT4o(GPT): """OpenAI's GPT-4o model.""" name = 'gpt-4o' + MAX_INPUT_TOKEN = 128000 + + +class ChatGPT4oLatest(GPT): + """OpenAI's chatgpt-4o-latest model.""" + + name = 'chatgpt-4o-latest' + MAX_INPUT_TOKEN = 128000 class GPT4oMini(GPT): diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index b6b7a9792a..c57dd86dbb 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -810,6 +810,7 @@ def build(self, class FunctionAnalyzerTemplateBuilder(PrototyperTemplateBuilder): """ Builder for function analyzer. """ + def __init__(self, model: models.LLM, benchmark: Benchmark, @@ -828,7 +829,8 @@ def build_instruction(self) -> prompts.Prompt: if not self.benchmark: return self._prompt - prompt = self._get_template(self.function_analyzer_instruction_template_file) + prompt = self._get_template( + self.function_analyzer_instruction_template_file) self._prompt.append(prompt) @@ -856,10 +858,9 @@ def build(self, project_dir: str = '', project_name: str = '', function_signature: str = '') -> prompts.Prompt: - """Constructs a prompt using the templates in |self| and saves it.""" return self.build_prompt(project_name, function_signature) - + class DefaultJvmTemplateBuilder(PromptBuilder): """Default builder for JVM projects.""" diff --git a/prompts/agent/function-analyzer-instruction.txt b/prompts/agent/function-analyzer-instruction.txt index 1dfd4fa79a..093d560c13 100644 --- a/prompts/agent/function-analyzer-instruction.txt +++ b/prompts/agent/function-analyzer-instruction.txt @@ -1,6 +1,6 @@ -You are a professional software engineer whose job is to review software functions, understand their intent, and identify important requirements of input and global variables that the function expects and needs to execute properly. +You are a professional software engineer whose job is to review software functions, understand their intent, and identify important requirements of input and global variables that the function expects and needs to execute properly. -You will be provided a function signature and project name. Your task is to use the function signature and project name to get the function's source code, analyze the function's implementation, and provide a response in the expected format. +You will be provided a function signature and project name. Your task is to use the function signature and project name to get the function's source code, analyze the function's implementation, and provide a response in the expected format. We have provided you with the following tools. 1. get_function_source_tool: A tool for getting the function's source using the project name and function signature. diff --git a/prompts/agent/function-analyzer-priming.txt b/prompts/agent/function-analyzer-priming.txt index 6c6152fbb4..474f30186a 100644 --- a/prompts/agent/function-analyzer-priming.txt +++ b/prompts/agent/function-analyzer-priming.txt @@ -1,5 +1,5 @@ -As a professional security engineer, you must analyze the source code of the provided function and retrieve its input requirements. +As a professional security engineer, you must analyze the source code of the provided function and retrieve its input requirements. The input requirements are necessary to enable the generation of valid fuzz drivers. In each requirement, you should include a one sentence summary of the reason why you included it. Objective: Your goal is to retrieve the input requirements for the function signature `{FUNCTION_SIGNATURE}` in the project `{PROJECT_NAME}. diff --git a/pyproject.toml b/pyproject.toml index 7de9bd52dc..c76d0f8070 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "PyYAML==6.0.1", "requests==2.32.3", "rust-demangler==1.0", -"tiktoken==0.7.0", +"tiktoken==0.9.0", "yapf==0.40.1", "fuzz-introspector==0.1.10" ] diff --git a/requirements.in b/requirements.in index c2a7ca9680..01ab66cac9 100644 --- a/requirements.in +++ b/requirements.in @@ -2,9 +2,9 @@ anthropic==0.31.2 chardet==5.2.0 cxxfilt==0.3.0 GitPython==3.1.43 -google-api-python-client==2.143.0 +google-api-python-client==2.157.0 google-cloud-aiplatform==1.91.0 -google-cloud-storage==2.9.0 +google-cloud-storage==2.18.0 google-cloud-logging==3.11.2 Jinja2==3.1.6 openai==1.72.0 @@ -12,9 +12,9 @@ pandas==2.2.2 pylint==3.2.5 pyright==1.1.345 #pytype==2023.7.28 -PyYAML==6.0.1 +PyYAML==6.0.2 requests==2.32.3 rust-demangler==1.0 -tiktoken==0.7.0 +tiktoken==0.9.0 yapf==0.40.1 google-adk==0.5.0 diff --git a/requirements.txt b/requirements.txt index b85a3c9de1..0fcbfdca54 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile requirements.in @@ -11,10 +11,16 @@ anthropic==0.31.2 anyio==4.9.0 # via # anthropic + # google-genai # httpx + # mcp # openai + # sse-starlette + # starlette astroid==3.2.4 # via pylint +authlib==1.5.2 + # via google-adk cachetools==5.5.2 # via google-auth certifi==2025.4.26 @@ -22,14 +28,24 @@ certifi==2025.4.26 # httpcore # httpx # requests +cffi==1.17.1 + # via cryptography chardet==5.2.0 # via -r requirements.in charset-normalizer==3.4.2 # via requests +click==8.2.1 + # via + # google-adk + # uvicorn +cryptography==45.0.2 + # via authlib cxxfilt==0.3.0 # via -r requirements.in deprecated==1.2.18 - # via opentelemetry-api + # via + # opentelemetry-api + # opentelemetry-semantic-conventions dill==0.4.0 # via pylint distro==1.9.0 @@ -38,15 +54,19 @@ distro==1.9.0 # openai docstring-parser==0.16 # via google-cloud-aiplatform +fastapi==0.115.12 + # via google-adk filelock==3.18.0 # via huggingface-hub -fsspec==2025.3.2 +fsspec==2025.5.0 # via huggingface-hub gitdb==4.0.12 # via gitpython gitpython==3.1.43 # via -r requirements.in -google-api-core[grpc]==2.24.2 +google-adk==0.5.0 + # via -r requirements.in +google-api-core[grpc]==2.25.0rc1 # via # google-api-python-client # google-cloud-aiplatform @@ -55,10 +75,15 @@ google-api-core[grpc]==2.24.2 # google-cloud-core # google-cloud-logging # google-cloud-resource-manager + # google-cloud-secret-manager + # google-cloud-speech # google-cloud-storage -google-api-python-client==2.143.0 - # via -r requirements.in -google-auth==2.40.1 + # google-cloud-trace +google-api-python-client==2.157.0 + # via + # -r requirements.in + # google-adk +google-auth==2.40.2 # via # google-api-core # google-api-python-client @@ -69,16 +94,22 @@ google-auth==2.40.1 # google-cloud-core # google-cloud-logging # google-cloud-resource-manager + # google-cloud-secret-manager + # google-cloud-speech # google-cloud-storage + # google-cloud-trace + # google-genai google-auth-httplib2==0.2.0 # via google-api-python-client google-cloud-aiplatform==1.91.0 - # via -r requirements.in + # via + # -r requirements.in + # google-adk google-cloud-appengine-logging==1.6.1 # via google-cloud-logging google-cloud-audit-log==0.3.2 # via google-cloud-logging -google-cloud-bigquery==3.31.0 +google-cloud-bigquery==3.33.0 # via google-cloud-aiplatform google-cloud-core==2.4.3 # via @@ -89,12 +120,23 @@ google-cloud-logging==3.11.2 # via -r requirements.in google-cloud-resource-manager==1.14.2 # via google-cloud-aiplatform -google-cloud-storage==2.9.0 +google-cloud-secret-manager==2.23.3 + # via google-adk +google-cloud-speech==2.32.0 + # via google-adk +google-cloud-storage==2.18.0 # via # -r requirements.in + # google-adk # google-cloud-aiplatform +google-cloud-trace==1.16.1 + # via opentelemetry-exporter-gcp-trace google-crc32c==1.7.1 - # via google-resumable-media + # via + # google-cloud-storage + # google-resumable-media +google-genai==1.16.1 + # via google-adk google-resumable-media==2.7.2 # via # google-cloud-bigquery @@ -105,10 +147,15 @@ googleapis-common-protos[grpc]==1.70.0 # google-cloud-audit-log # grpc-google-iam-v1 # grpcio-status +graphviz==0.20.3 + # via google-adk +greenlet==3.2.2 + # via sqlalchemy grpc-google-iam-v1==0.14.2 # via # google-cloud-logging # google-cloud-resource-manager + # google-cloud-secret-manager grpcio==1.71.0 # via # google-api-core @@ -118,7 +165,9 @@ grpcio==1.71.0 grpcio-status==1.71.0 # via google-api-core h11==0.16.0 - # via httpcore + # via + # httpcore + # uvicorn httpcore==1.0.9 # via httpx httplib2==0.22.0 @@ -128,8 +177,12 @@ httplib2==0.22.0 httpx==0.28.1 # via # anthropic + # google-genai + # mcp # openai -huggingface-hub==0.30.2 +httpx-sse==0.4.0 + # via mcp +huggingface-hub==0.31.4 # via tokenizers idna==3.10 # via @@ -144,7 +197,7 @@ isort==5.13.2 # via pylint jinja2==3.1.6 # via -r requirements.in -jiter==0.9.0 +jiter==0.10.0 # via # anthropic # openai @@ -152,16 +205,35 @@ markupsafe==3.0.2 # via jinja2 mccabe==0.7.0 # via pylint +mcp==1.9.0 + # via google-adk nodeenv==1.9.1 # via pyright -numpy==2.2.5 +numpy==2.2.6 # via # pandas # shapely openai==1.72.0 # via -r requirements.in -opentelemetry-api==1.32.1 - # via google-cloud-logging +opentelemetry-api==1.33.1 + # via + # google-adk + # google-cloud-logging + # opentelemetry-exporter-gcp-trace + # opentelemetry-resourcedetector-gcp + # opentelemetry-sdk + # opentelemetry-semantic-conventions +opentelemetry-exporter-gcp-trace==1.9.0 + # via google-adk +opentelemetry-resourcedetector-gcp==1.9.0a0 + # via opentelemetry-exporter-gcp-trace +opentelemetry-sdk==1.33.1 + # via + # google-adk + # opentelemetry-exporter-gcp-trace + # opentelemetry-resourcedetector-gcp +opentelemetry-semantic-conventions==0.54b1 + # via opentelemetry-sdk packaging==25.0 # via # google-cloud-aiplatform @@ -169,7 +241,7 @@ packaging==25.0 # huggingface-hub pandas==2.2.2 # via -r requirements.in -platformdirs==4.3.7 +platformdirs==4.3.8 # via # pylint # yapf @@ -180,6 +252,9 @@ proto-plus==1.26.1 # google-cloud-appengine-logging # google-cloud-logging # google-cloud-resource-manager + # google-cloud-secret-manager + # google-cloud-speech + # google-cloud-trace protobuf==5.29.4 # via # google-api-core @@ -188,6 +263,9 @@ protobuf==5.29.4 # google-cloud-audit-log # google-cloud-logging # google-cloud-resource-manager + # google-cloud-secret-manager + # google-cloud-speech + # google-cloud-trace # googleapis-common-protos # grpc-google-iam-v1 # grpcio-status @@ -198,13 +276,22 @@ pyasn1==0.6.1 # rsa pyasn1-modules==0.4.2 # via google-auth +pycparser==2.22 + # via cffi pydantic==2.11.4 # via # anthropic + # fastapi + # google-adk # google-cloud-aiplatform + # google-genai + # mcp # openai + # pydantic-settings pydantic-core==2.33.2 # via pydantic +pydantic-settings==2.9.1 + # via mcp pylint==3.2.5 # via -r requirements.in pyparsing==3.2.3 @@ -215,11 +302,18 @@ python-dateutil==2.9.0.post0 # via # google-cloud-bigquery # pandas +python-dotenv==1.1.0 + # via + # google-adk + # pydantic-settings +python-multipart==0.0.20 + # via mcp pytz==2025.2 # via pandas -pyyaml==6.0.1 +pyyaml==6.0.2 # via # -r requirements.in + # google-adk # huggingface-hub regex==2024.11.6 # via tiktoken @@ -229,13 +323,15 @@ requests==2.32.3 # google-api-core # google-cloud-bigquery # google-cloud-storage + # google-genai # huggingface-hub + # opentelemetry-resourcedetector-gcp # tiktoken rsa==4.9.1 # via google-auth rust-demangler==1.0 # via -r requirements.in -shapely==2.1.0 +shapely==2.1.1 # via google-cloud-aiplatform six==1.17.0 # via python-dateutil @@ -246,7 +342,16 @@ sniffio==1.3.1 # anthropic # anyio # openai -tiktoken==0.7.0 +sqlalchemy==2.0.41 + # via google-adk +sse-starlette==2.3.5 + # via mcp +starlette==0.46.2 + # via + # fastapi + # mcp + # sse-starlette +tiktoken==0.9.0 # via -r requirements.in tokenizers==0.21.1 # via anthropic @@ -262,20 +367,35 @@ typing-extensions==4.13.2 # via # anthropic # anyio + # fastapi # google-cloud-aiplatform + # google-genai # huggingface-hub # openai + # opentelemetry-resourcedetector-gcp + # opentelemetry-sdk # pydantic # pydantic-core + # sqlalchemy # typing-inspection -typing-inspection==0.4.0 - # via pydantic +typing-inspection==0.4.1 + # via + # pydantic + # pydantic-settings tzdata==2025.2 # via pandas +tzlocal==5.3.1 + # via google-adk uritemplate==4.1.1 # via google-api-python-client urllib3==2.4.0 # via requests +uvicorn==0.34.2 + # via + # google-adk + # mcp +websockets==15.0.1 + # via google-genai wrapt==1.17.2 # via deprecated yapf==0.40.1 diff --git a/results.py b/results.py index f0fee1d982..589303097d 100644 --- a/results.py +++ b/results.py @@ -593,27 +593,26 @@ class PreWritingResult(Result): explanation: str def __init__(self, - benchmark: Benchmark, - trial: int, - work_dirs: WorkDirs, - result_available: bool, - requirements: list[str] = [], - explanation: str = '', - fuzz_target_source: str = '', - build_script_source: str = '', - author: Any = None, - chat_history: Optional[dict] = None, - default_success: bool = False) -> None: - + benchmark: Benchmark, + trial: int, + work_dirs: WorkDirs, + result_available: bool, + requirements: Optional[list[str]] = None, + explanation: str = '', + fuzz_target_source: str = '', + build_script_source: str = '', + author: Any = None, + chat_history: Optional[dict] = None, + default_success: bool = False) -> None: + super().__init__(benchmark, trial, work_dirs, fuzz_target_source, - build_script_source, author, chat_history, - default_success) - + build_script_source, author, chat_history, default_success) + self.result_available = result_available - if result_available: + if result_available and requirements is not None: self.requirements = requirements self.explanation = explanation - + class BenchmarkResult: """All trial results for a benchmark in an experiment.""" From 554e5d3dddbbb9bab50f2e39d3fb0bc0c7043094 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Thu, 22 May 2025 13:40:47 +0000 Subject: [PATCH 03/64] Implemented a FuzzIntrospector tool and provided to the agent. --- agent/function_analyzer.py | 64 +++++++------------ agent_tests/function_analyzer_test.py | 19 +++--- llm_toolkit/prompt_builder.py | 27 +++++--- .../agent/function-analyzer-instruction.txt | 2 +- prompts/agent/function-analyzer-priming.txt | 4 +- tool/fuzz_introspector_tool.py | 35 ++++++++++ 6 files changed, 87 insertions(+), 64 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 88d58d622b..f4cfd6bc56 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -1,7 +1,7 @@ -""" -An LLM agent to analyze a function and identify its implicit requirements. -The results of this analysis will be used by the writer agents to +""" +An LLM agent to analyze a function and identify its implicit requirements. +The results of this analysis will be used by the writer agents to generate correct fuzz target for the function. """ @@ -19,44 +19,22 @@ from llm_toolkit import prompt_builder from results import Result, PreWritingResult from tool.base_tool import BaseTool +from tool.fuzz_introspector_tool import FuzzIntrospectorTool from google.adk.agents import Agent from google.adk.runners import Runner from google.adk.sessions import InMemorySessionService -from google.genai import types # For creating message Content/Parts +from google.genai import types -# logging.basicConfig(level=logging.INFO) +logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) -handler = logging.StreamHandler() -logger.addHandler(handler) - - -def get_function_source_tool(project_name: str, function_signature: str): - - """ - Retrieves a function's source using the project name and function signature. - - Args: - project_name (str): The name of the project. - function_signature (str): The signature of the function. - - Returns: - str: The source code of the function if found, otherwise an empty string. - """ - - function_code = introspector.query_introspector_function_source(project_name, function_signature) - - if function_code: - logger.info(f"Function with signature '{function_signature}' found and extracted.") - return function_code - else: - logger.info(f"Error: Function with signature '{function_signature}' not found in project '{project_name}'.") - return "" +# logger.setLevel(logging.INFO) +# handler = logging.StreamHandler() +# logger.addHandler(handler) class FunctionAnalyzer (BaseAgent): """An LLM agent to analyze a function and identify its implicit requirements. - The results of this analysis will be used by the writer agents to + The results of this analysis will be used by the writer agents to generate correct fuzz target for the function. """ @@ -75,10 +53,13 @@ def initialize(self, benchmark: benchmarklib.Benchmark): self.benchmark = benchmark # Initialize the prompt builder - self.prompt_builder = prompt_builder.FunctionAnalyzerTemplateBuilder(self.llm, self.benchmark) + builder = prompt_builder.FunctionAnalyzerTemplateBuilder(self.llm, self.benchmark) # Get the agent's instructions - analyzer_instruction = self.prompt_builder.build_instruction() + analyzer_instruction = builder.build_instruction() + + # Initialize the Fuzz Introspector tool + introspector_tool = FuzzIntrospectorTool(benchmark, self.name) # Create the agent using the ADK library function_analyzer = Agent( @@ -86,7 +67,7 @@ def initialize(self, benchmark: benchmarklib.Benchmark): model='gemini-2.0-flash', #TODO: Get the model name from args. Currently, some of the default names are incompatible with the ADK library. description=("Agent to analyze a function and identify its implicit requirements."), instruction=analyzer_instruction.get(), - tools=[get_function_source_tool] + tools=[introspector_tool._function_source], ) # Get user id and session id @@ -173,16 +154,17 @@ def execute(self, result_history: list[Result]) -> PreWritingResult: def _initial_prompt(self, results: list[Result]) -> Prompt: """Create the initial prompt for the agent.""" - - prompt = self.prompt_builder.build(project_name=self.benchmark.project, - function_signature=self.benchmark.function_signature) + + # Initialize the prompt builder + builder = prompt_builder.FunctionAnalyzerTemplateBuilder(self.llm, self.benchmark) + + prompt = builder.build_prompt() return prompt - - - \ No newline at end of file + + diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py index ce16254880..f870490be9 100644 --- a/agent_tests/function_analyzer_test.py +++ b/agent_tests/function_analyzer_test.py @@ -7,21 +7,21 @@ RESULTS_DIR = './results' -def parse_args() -> argparse.Namespace: - """Parses command line arguments.""" +def parse_args() -> argparse.Namespace: + """Parses command line arguments.""" parser = argparse.ArgumentParser( description='Evaluate the function analyzer agent.') parser.add_argument('-y', '--benchmark-yaml', - type=str, - required=True, + type=str, + required=True, help='A benchmark YAML file.') - - parser.add_argument('-w', - '--work-dir', + + parser.add_argument('-w', + '--work-dir', default=RESULTS_DIR) - + parser.add_argument('-mr', '--max-round', type=int, @@ -33,7 +33,6 @@ def parse_args() -> argparse.Namespace: return args if __name__ == "__main__": - model = models.LLM.setup( ai_binary='', name='vertex_ai_gemini-1-5-chat' @@ -48,7 +47,7 @@ def parse_args() -> argparse.Namespace: if len(benchmarks) == 0: raise ValueError("No benchmarks found in the YAML file.") - + # Initialize the function analyzer with the first benchmark function_analyzer.initialize(benchmarks[0]) diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index b6b7a9792a..7dbba5e19d 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -808,7 +808,7 @@ def build(self, return self._prompt -class FunctionAnalyzerTemplateBuilder(PrototyperTemplateBuilder): +class FunctionAnalyzerTemplateBuilder(DefaultTemplateBuilder): """ Builder for function analyzer. """ def __init__(self, model: models.LLM, @@ -819,9 +819,9 @@ def __init__(self, # Load templates. self.function_analyzer_instruction_template_file = self._find_template( - self.agent_templare_dir, 'function-analyzer-instruction.txt') + AGENT_TEMPLATE_DIR, 'function-analyzer-instruction.txt') self.function_analyzer_prompt_template_file = self._find_template( - self.agent_templare_dir, 'function-analyzer-priming.txt') + AGENT_TEMPLATE_DIR, 'function-analyzer-priming.txt') def build_instruction(self) -> prompts.Prompt: """Constructs a prompt using the templates in |self| and saves it.""" @@ -834,15 +834,20 @@ def build_instruction(self) -> prompts.Prompt: return self._prompt - def build_prompt(self, project_name, function_signature) -> prompts.Prompt: + def build_prompt(self) -> prompts.Prompt: """Constructs a prompt using the templates in |self| and saves it.""" + if not self.benchmark: + logger.error('No benchmark provided for function analyzer template builder.') return self._prompt + print('Building function analyzer prompt for %s in %s', + self.benchmark.function_name, self.benchmark.project) + prompt = self._get_template(self.function_analyzer_prompt_template_file) - prompt.replace('{PROJECT_NAME}', project_name) - prompt.replace('{FUNCTION_SIGNATURE}', function_signature) + prompt = prompt.replace('{PROJECT_NAME}', self.benchmark.project) + prompt = prompt.replace('{FUNCTION_SIGNATURE}', self.benchmark.function_signature) self._prompt.append(prompt) @@ -856,10 +861,12 @@ def build(self, project_dir: str = '', project_name: str = '', function_signature: str = '') -> prompts.Prompt: - - """Constructs a prompt using the templates in |self| and saves it.""" - return self.build_prompt(project_name, function_signature) - + + raise NotImplementedError( + 'FunctionAnalyzerTemplateBuilder.build() should not be called. ' + 'Use build_instruction() or build_prompt() instead.' + ) + class DefaultJvmTemplateBuilder(PromptBuilder): """Default builder for JVM projects.""" diff --git a/prompts/agent/function-analyzer-instruction.txt b/prompts/agent/function-analyzer-instruction.txt index 1dfd4fa79a..494a7274a3 100644 --- a/prompts/agent/function-analyzer-instruction.txt +++ b/prompts/agent/function-analyzer-instruction.txt @@ -3,6 +3,6 @@ You are a professional software engineer whose job is to review software functio You will be provided a function signature and project name. Your task is to use the function signature and project name to get the function's source code, analyze the function's implementation, and provide a response in the expected format. We have provided you with the following tools. -1. get_function_source_tool: A tool for getting the function's source using the project name and function signature. +1. _function_source: A tool for getting the function's source using the project name and function signature. diff --git a/prompts/agent/function-analyzer-priming.txt b/prompts/agent/function-analyzer-priming.txt index 6c6152fbb4..54f07e4c3a 100644 --- a/prompts/agent/function-analyzer-priming.txt +++ b/prompts/agent/function-analyzer-priming.txt @@ -2,7 +2,7 @@ As a professional security engineer, you must analyze the source code of the provided function and retrieve its input requirements. The input requirements are necessary to enable the generation of valid fuzz drivers. In each requirement, you should include a one sentence summary of the reason why you included it. -Objective: Your goal is to retrieve the input requirements for the function signature `{FUNCTION_SIGNATURE}` in the project `{PROJECT_NAME}. +Objective: Your goal is to retrieve the input requirements for the function signature {FUNCTION_SIGNATURE} in the project {PROJECT_NAME}. @@ -10,7 +10,7 @@ Objective: Your goal is to retrieve the input requirements for the function sign Follow these steps to analyze a function and identify its input requirements: Step 1: Get the function's source -Use the get_function_source_tool tool provided to retrieve the source code of the provided function. +Use the _function_source tool provided to retrieve the source code of the provided function. You should invoke the tool using the function signature and proejct name provided. Step 2: Analyze the retrieved function source diff --git a/tool/fuzz_introspector_tool.py b/tool/fuzz_introspector_tool.py index e36e45f7fc..cbe16c5c03 100644 --- a/tool/fuzz_introspector_tool.py +++ b/tool/fuzz_introspector_tool.py @@ -14,6 +14,10 @@ """A tool for LLM agents to interact within Fuzz Introspector to access the project's information.""" from tool.base_tool import BaseTool +from data_prep import introspector +import logging + +logger = logging.getLogger(__name__) class FuzzIntrospectorTool(BaseTool): @@ -38,3 +42,34 @@ def _function_signature(self, function_name: str) -> str: """Calls the function signature API of the Fuzz Introspector.""" # A placeholder raise NotImplementedError + + def _function_source(self, project_name: str, function_signature: str) -> str: + + """ + Retrieves a function's source from the fuzz introspector API, + using the project's name and function's signature. + + Args: + project_name (str): The name of the project. + function_signature (str): The signature of the function. + + Returns: + str: The source code of the function if found, otherwise an empty string. + """ + + function_code = introspector.query_introspector_function_source(project_name, function_signature) + + if function_code.strip(): + logger.info("Function with signature '%s' found and extracted.", function_signature) + else: + logger.error("Error: Function with signature '%s' not found in project '%s'.", + function_signature, project_name) + + return function_code + + def tutorial(self) -> str: + raise NotImplementedError + + def execute(self, command: str) -> introspector.Any: + raise NotImplementedError + From fb8607d887235c47a82f6962404fedf4eb61e2df Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Thu, 22 May 2025 14:07:06 +0000 Subject: [PATCH 04/64] Formatted code using presubmit. --- agent/function_analyzer.py | 41 +++++++++------------------ agent_tests/function_analyzer_test.py | 30 ++++++++------------ llm_toolkit/prompt_builder.py | 9 +++--- tool/fuzz_introspector_tool.py | 20 +++++++------ 4 files changed, 42 insertions(+), 58 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 8a8b3d2ad9..f177c34663 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -17,34 +17,26 @@ generate correct fuzz target for the function. """ -import argparse import asyncio - -from typing import Optional - import logging -from agent.base_agent import BaseAgent -from data_prep import introspector -from experiment import benchmark as benchmarklib -from llm_toolkit.models import LLM -from llm_toolkit.prompts import Prompt -from llm_toolkit import prompt_builder -from results import Result, PreWritingResult -from tool.base_tool import BaseTool -from tool.fuzz_introspector_tool import FuzzIntrospectorTool from google.adk.agents import Agent from google.adk.runners import Runner from google.adk.sessions import InMemorySessionService from google.genai import types +from agent.base_agent import BaseAgent +from experiment import benchmark as benchmarklib +from llm_toolkit import prompt_builder +from llm_toolkit.prompts import Prompt +from results import PreWritingResult, Result +from tool.fuzz_introspector_tool import FuzzIntrospectorTool + logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -# logger.setLevel(logging.INFO) -# handler = logging.StreamHandler() -# logger.addHandler(handler) -class FunctionAnalyzer (BaseAgent): + +class FunctionAnalyzer(BaseAgent): """An LLM agent to analyze a function and identify its implicit requirements. The results of this analysis will be used by the writer agents to generate correct fuzz target for the function. @@ -56,7 +48,8 @@ def initialize(self, benchmark: benchmarklib.Benchmark): self.benchmark = benchmark # Initialize the prompt builder - builder = prompt_builder.FunctionAnalyzerTemplateBuilder(self.llm, self.benchmark) + builder = prompt_builder.FunctionAnalyzerTemplateBuilder( + self.llm, self.benchmark) # Get the agent's instructions analyzer_instruction = builder.build_instruction() @@ -82,7 +75,7 @@ def initialize(self, benchmark: benchmarklib.Benchmark): # Create the session service session_service = InMemorySessionService() - session = session_service.create_session( + session_service.create_session( app_name=self.name, user_id=user_id, session_id=session_id, @@ -165,15 +158,9 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: """Create the initial prompt for the agent.""" # Initialize the prompt builder - builder = prompt_builder.FunctionAnalyzerTemplateBuilder(self.llm, self.benchmark) + builder = prompt_builder.FunctionAnalyzerTemplateBuilder( + self.llm, self.benchmark) prompt = builder.build_prompt() return prompt - - - - - - - diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py index 900e7b883d..89604cf0fc 100644 --- a/agent_tests/function_analyzer_test.py +++ b/agent_tests/function_analyzer_test.py @@ -31,23 +31,21 @@ def parse_args() -> argparse.Namespace: """Parses command line arguments.""" parser = argparse.ArgumentParser( - description='Evaluate the function analyzer agent.') + description='Evaluate the function analyzer agent.') parser.add_argument('-y', - '--benchmark-yaml', - type=str, - required=True, - help='A benchmark YAML file.') + '--benchmark-yaml', + type=str, + required=True, + help='A benchmark YAML file.') - parser.add_argument('-w', - '--work-dir', - default=RESULTS_DIR) + parser.add_argument('-w', '--work-dir', default=RESULTS_DIR) parser.add_argument('-mr', - '--max-round', - type=int, - default=100, - help='Max trial round for agents.') + '--max-round', + type=int, + default=100, + help='Max trial round for agents.') parsed_args = parser.parse_args() @@ -56,18 +54,14 @@ def parse_args() -> argparse.Namespace: if __name__ == "__main__": - model = models.LLM.setup( - ai_binary='', - name='vertex_ai_gemini-1-5-chat' - ) - + model = models.LLM.setup(ai_binary='', name='vertex_ai_gemini-1-5-chat') args = parse_args() function_analyzer = FunctionAnalyzer(trial=1, llm=model, args=args) benchmarks: List[Benchmark] = benchmarklib.Benchmark.from_yaml( - args.benchmark_yaml) + args.benchmark_yaml) if len(benchmarks) == 0: raise ValueError("No benchmarks found in the YAML file.") diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 9fce66c456..cb7c06becd 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -840,7 +840,8 @@ def build_prompt(self) -> prompts.Prompt: """Constructs a prompt using the templates in |self| and saves it.""" if not self.benchmark: - logger.error('No benchmark provided for function analyzer template builder.') + logger.error( + 'No benchmark provided for function analyzer template builder.') return self._prompt print('Building function analyzer prompt for %s in %s', @@ -849,7 +850,8 @@ def build_prompt(self) -> prompts.Prompt: prompt = self._get_template(self.function_analyzer_prompt_template_file) prompt = prompt.replace('{PROJECT_NAME}', self.benchmark.project) - prompt = prompt.replace('{FUNCTION_SIGNATURE}', self.benchmark.function_signature) + prompt = prompt.replace('{FUNCTION_SIGNATURE}', + self.benchmark.function_signature) self._prompt.append(prompt) @@ -866,8 +868,7 @@ def build(self, raise NotImplementedError( 'FunctionAnalyzerTemplateBuilder.build() should not be called. ' - 'Use build_instruction() or build_prompt() instead.' - ) + 'Use build_instruction() or build_prompt() instead.') class DefaultJvmTemplateBuilder(PromptBuilder): diff --git a/tool/fuzz_introspector_tool.py b/tool/fuzz_introspector_tool.py index cbe16c5c03..f2626082bc 100644 --- a/tool/fuzz_introspector_tool.py +++ b/tool/fuzz_introspector_tool.py @@ -13,10 +13,11 @@ # limitations under the License. """A tool for LLM agents to interact within Fuzz Introspector to access the project's information.""" -from tool.base_tool import BaseTool -from data_prep import introspector import logging +from data_prep import introspector +from tool.base_tool import BaseTool + logger = logging.getLogger(__name__) @@ -44,7 +45,6 @@ def _function_signature(self, function_name: str) -> str: raise NotImplementedError def _function_source(self, project_name: str, function_signature: str) -> str: - """ Retrieves a function's source from the fuzz introspector API, using the project's name and function's signature. @@ -54,16 +54,19 @@ def _function_source(self, project_name: str, function_signature: str) -> str: function_signature (str): The signature of the function. Returns: - str: The source code of the function if found, otherwise an empty string. + str: Source code of the function if found, otherwise an empty string. """ - function_code = introspector.query_introspector_function_source(project_name, function_signature) + function_code = introspector.query_introspector_function_source( + project_name, function_signature) if function_code.strip(): - logger.info("Function with signature '%s' found and extracted.", function_signature) + logger.info("Function with signature '%s' found and extracted.", + function_signature) else: - logger.error("Error: Function with signature '%s' not found in project '%s'.", - function_signature, project_name) + logger.error( + "Error: Function with signature '%s' not found in project '%s'.", + function_signature, project_name) return function_code @@ -72,4 +75,3 @@ def tutorial(self) -> str: def execute(self, command: str) -> introspector.Any: raise NotImplementedError - From 52a5c125a1b078a2fe9afd99dc0afbb057a16e06 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Fri, 23 May 2025 13:11:14 +0000 Subject: [PATCH 05/64] Made the agent call synchronous. This is because the main program does not need to perform any operation while waiting for results from the agent. --- agent/function_analyzer.py | 12 +++++------- agent_tests/function_analyzer_test.py | 3 ++- llm_toolkit/prompt_builder.py | 4 ++-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 323586a8be..361688583c 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -17,7 +17,6 @@ generate correct fuzz target for the function. """ -import asyncio import logging from google.adk.agents import Agent @@ -114,8 +113,8 @@ def initialize(self, benchmark: benchmarklib.Benchmark): "Function Analyzer Agent created, with name: %s, and session id: %s", self.name, session_id) - async def call_agent_async(self, query: str, runner, user_id: str, - session_id: str) -> PreWritingResult: + def call_agent(self, query: str, runner: Runner, user_id: str, + session_id: str) -> PreWritingResult: """Call the agent asynchronously with the given query.""" logger.info(">>> User query: %s", query) @@ -126,7 +125,7 @@ async def call_agent_async(self, query: str, runner, user_id: str, result_available = False - async for event in runner.run_async( + for event in runner.run( user_id=user_id, session_id=session_id, new_message=content, @@ -142,7 +141,7 @@ async def call_agent_async(self, query: str, runner, user_id: str, logger.info("<<< Agent response: %s", final_response_text) - if result_available: + if result_available and final_response_text: # Get the requirements from the response requirements = self._parse_tags(final_response_text, 'requirement') else: @@ -167,8 +166,7 @@ def execute(self, result_history: list[Result]) -> PreWritingResult: query = prompt.gettext() user_id = "user" session_id = "session" - result = asyncio.run( - self.call_agent_async(query, self.runner, user_id, session_id)) + result = self.call_agent(query, self.runner, user_id, session_id) if result.result_available: # Save the result to the history diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py index 89604cf0fc..8b2c2af73f 100644 --- a/agent_tests/function_analyzer_test.py +++ b/agent_tests/function_analyzer_test.py @@ -78,4 +78,5 @@ def parse_args() -> argparse.Namespace: # Print the result logger.info("Function Analyzer Result:") logger.info("Result available: %s", result.result_available) - logger.info("Requirements: %s", result.requirements) + if result.result_available: + logger.info("Requirements: %s", result.requirements) diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index c57dd86dbb..a874237fcf 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -843,8 +843,8 @@ def build_prompt(self, project_name, function_signature) -> prompts.Prompt: prompt = self._get_template(self.function_analyzer_prompt_template_file) - prompt.replace('{PROJECT_NAME}', project_name) - prompt.replace('{FUNCTION_SIGNATURE}', function_signature) + prompt = prompt.replace('{PROJECT_NAME}', project_name) + prompt = prompt.replace('{FUNCTION_SIGNATURE}', function_signature) self._prompt.append(prompt) From b69f5062717a50bb4f3e1313c2df944c57ed88a7 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Fri, 23 May 2025 14:52:51 +0000 Subject: [PATCH 06/64] This commit includes the function's source in the agent's prompt. This commit refactors the agentic workflow so that the function's source is retrieved deterministically and included in the agemt. The prompts are also more specific on what requirements should be extracted. --- agent/function_analyzer.py | 9 +- llm_toolkit/prompt_builder.py | 10 +- .../agent/function-analyzer-instruction.txt | 96 ++++++++++++++++++- prompts/agent/function-analyzer-priming.txt | 92 ++---------------- 4 files changed, 114 insertions(+), 93 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index f177c34663..ddac4bc6c6 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -32,7 +32,6 @@ from results import PreWritingResult, Result from tool.fuzz_introspector_tool import FuzzIntrospectorTool -logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -66,7 +65,7 @@ def initialize(self, benchmark: benchmarklib.Benchmark): description=( "Agent to analyze a function and identify its requirements."), instruction=analyzer_instruction.get(), - tools=[introspector_tool._function_source]) + ) # Get user id and session id # TODO: Figure out how to get this data @@ -92,7 +91,7 @@ def initialize(self, benchmark: benchmarklib.Benchmark): "Function Analyzer Agent created, with name: %s, and session id: %s", self.name, session_id) - async def call_agent_async(self, query: str, runner, user_id: str, + async def call_agent_async(self, query: str, runner:Runner, user_id: str, session_id: str) -> PreWritingResult: """Call the agent asynchronously with the given query.""" @@ -109,6 +108,8 @@ async def call_agent_async(self, query: str, runner, user_id: str, session_id=session_id, new_message=content, ): + + logger.info("Event is %s", event.content) if event.is_final_response(): if event.content and event.content.parts: final_response_text = event.content.parts[0].text @@ -120,7 +121,7 @@ async def call_agent_async(self, query: str, runner, user_id: str, logger.info("<<< Agent response: %s", final_response_text) - if result_available: + if result_available and final_response_text: # Get the requirements from the response requirements = self._parse_tags(final_response_text, 'requirement') else: diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index cb7c06becd..2147937f17 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -844,14 +844,20 @@ def build_prompt(self) -> prompts.Prompt: 'No benchmark provided for function analyzer template builder.') return self._prompt - print('Building function analyzer prompt for %s in %s', - self.benchmark.function_name, self.benchmark.project) + # First, we get the function's source + function_source = introspector.query_introspector_function_source( + self.benchmark.project, self.benchmark.function_signature) + + if not function_source: + logger.error("The source code for function %s was not found.", self.benchmark.function_name) + return self._prompt prompt = self._get_template(self.function_analyzer_prompt_template_file) prompt = prompt.replace('{PROJECT_NAME}', self.benchmark.project) prompt = prompt.replace('{FUNCTION_SIGNATURE}', self.benchmark.function_signature) + prompt = prompt.replace('{FUNCTION_SOURCE}', function_source) self._prompt.append(prompt) diff --git a/prompts/agent/function-analyzer-instruction.txt b/prompts/agent/function-analyzer-instruction.txt index deee9b9e1a..2c21215485 100644 --- a/prompts/agent/function-analyzer-instruction.txt +++ b/prompts/agent/function-analyzer-instruction.txt @@ -1,8 +1,96 @@ -You are a professional software engineer whose job is to review software functions, understand their intent, and identify important requirements of input and global variables that the function expects and needs to execute properly. +You are a professional software engineer whose job is to review software functions, understand their intent, and identify important requirements of input variables that the function expects and needs to execute without crashing. +The requirements you provide will be used by another agent to generate valid fuzz drivers for the target function. -You will be provided a function signature and project name. Your task is to use the function signature and project name to get the function's source code, analyze the function's implementation, and provide a response in the expected format. +You will be provided a project name, function signature and the implementation for the function. +Your task is to analyze the function's implementation using the steps provided and return a response in the expected format. -We have provided you with the following tools. -1. _function_source: A tool for getting the function's source using the project name and function signature. + +Follow these steps to analyze a function and identify its input requirements: + +Step 1: Get the function's source + * Use the _function_source tool to get the function's source. + * Invoke the tool using the project name and function signature. + * YOU MUST USE THE PROVIDED TOOL. DO NOT MAKE UP YOUR OWN CODE. + +Step 2: Identify all assertion statemens in the function. + * Note that some programs can have custom assertion statements, like require() or ensure(). + +Step 3: Identify input requirements necessary to satisfy the assertion statements. + * Each requirement MUST be precise for it to be useful. + * You MUST include a one-sentence summary why a specific requirement was included. + +Step 4: Compile the requirements you derived and return in the expected format. + + + + +Make sure your response follows the following format, enclosed in ``` ```. + +``` + + +project name: the name of the project provided +function signature: The function's signature + + + +The implementation of the function you were provided. + + + +A summary of what the function does. + + + + +First requirement + + +Second requirement + +... + +nth requirement + + + + + + + +Here is an example response + +project name: htslib +function signature: int sam_index_build(const char *, int) + + + +static inline unsigned int func_source( + unsigned int bitcount, + unsigned int bitoffset, + const uint8_t* ptr +) { + require(bitoffset < 64) + unsigned int mask = (1 << bitcount) - 1; + ptr += bitoffset >> 3; + bitoffset &= 7; + unsigned int value = ptr[0] | (ptr[1] << 8); + value >>= bitoffset; + value &= mask; + return value; +} + + + +The sam_index_build function is used to build a sam index. It uses the input arguments to identify and retrieve the index to build. It returns 1 if the build succeeds and 0 if the build fails. + + + + +The second argument should be less than 64. This is to prevent an assertion violation in the program. + + + + diff --git a/prompts/agent/function-analyzer-priming.txt b/prompts/agent/function-analyzer-priming.txt index 4c67d1c5fd..489bef61ab 100644 --- a/prompts/agent/function-analyzer-priming.txt +++ b/prompts/agent/function-analyzer-priming.txt @@ -1,83 +1,9 @@ - -As a professional security engineer, you must analyze the source code of the provided function and retrieve its input requirements. -The input requirements are necessary to enable the generation of valid fuzz drivers. -In each requirement, you should include a one sentence summary of the reason why you included it. -Objective: Your goal is to retrieve the input requirements for the function signature {FUNCTION_SIGNATURE} in the project {PROJECT_NAME}. - - - - -Follow these steps to analyze a function and identify its input requirements: - -Step 1: Get the function's source -Use the _function_source tool provided to retrieve the source code of the provided function. -You should invoke the tool using the function signature and proejct name provided. - -Step 2: Analyze the retrieved function source -Identify any potential bugs, crashes or assertion violations in the retrieved function source. -Then identify requirements or constraints on the input variables that, if satisfied, will avoid the identified crashes. - - - - -Make sure your response follows the following format, enclosed in ``` ```. - -``` - - -project name: the name of the project provided -function signature: The function's signature - - - -The function's description - - - - -First requirement - - -Second requirement - -... - -nth requirement - - - - - - - -Here is an example response - -project name: htslib -function signature: int sam_index_build(const char *, int) - - - -The sam_index_build function is used to build a sam index. It uses the input arguments to identify and retrieve the index to build. It returns 1 if the build succeeds and 0 if the build fails. - - - - -The first input argument should be a validly allocated string. This is because the pointer is dereferenced by the function and there is no null check. - - -The first input argument should be null-terminated. This is because it is an argument to strlen, which requires null termination. - - -The second input argument should have a maximum value of MAX_INDEX. This is because it is used to index a static array of size MAX_INDEX. - - - - - - -Make sure you follow these instructions: - -1. Keep each requirement short and simple. Let the requirement contain the affected parameter, the expectation, and why the expectation is necessary. - - - + +Analyze and return the description and requirements for the function signature {FUNCTION_SIGNATURE} in the project {PROJECT_NAME}. +You will be provided the source implementation for the function. +You MUST ensure that all requirements returned where derived from the function. + + + +{FUNCTION_SOURCE} + From 4e632c0cb2101b9ecb43ebc864a2b6ff57bed78b Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Tue, 27 May 2025 14:45:25 +0000 Subject: [PATCH 07/64] Impleemnted the function analyzer agent as a sequential agent. First, the context retriever agent retrieves the source code of the function and its children for analysis. Then, the requirements extractor agent --- agent/function_analyzer.py | 49 +++++++++---- data_prep/introspector.py | 15 +++- llm_toolkit/prompt_builder.py | 28 +++++--- .../agent/context-retriever-instruction.txt | 72 +++++++++++++++++++ .../agent/function-analyzer-instruction.txt | 45 ++++-------- prompts/agent/function-analyzer-priming.txt | 12 +--- tool/fuzz_introspector_tool.py | 53 +++++++++++++- 7 files changed, 207 insertions(+), 67 deletions(-) create mode 100644 prompts/agent/context-retriever-instruction.txt diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index f6a92e21d0..83ea64d8ce 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -17,9 +17,11 @@ generate correct fuzz target for the function. """ +import asyncio import logging +from typing import Optional -from google.adk.agents import Agent +from google.adk.agents import Agent, SequentialAgent, LlmAgent from google.adk.runners import Runner from google.adk.sessions import InMemorySessionService from google.genai import types @@ -49,22 +51,40 @@ def initialize(self, benchmark: benchmarklib.Benchmark): builder = prompt_builder.FunctionAnalyzerTemplateBuilder( self.llm, self.benchmark) - # Get the agent's instructions - analyzer_instruction = builder.build_instruction() - # Initialize the Fuzz Introspector tool introspector_tool = FuzzIntrospectorTool(benchmark, self.name) + context_retriever = LlmAgent( + name="ContextRetrieverAgent", + model='gemini-2.0-flash', + description=( + "Retrieves the implementation of a function and its children from Fuzz Introspector."), + instruction=builder.build_context_retriever_instruction().get(), + tools=[introspector_tool._function_source_with_signature, introspector_tool._function_source_with_name], + generate_content_config=types.GenerateContentConfig( + temperature=0.0,), + output_key="FUNCTION_SOURCE", + ) + # Create the agent using the ADK library - function_analyzer = Agent( - name=self.name, + requirements_extractor = LlmAgent( + name="RequirementsExtractorAgent", # TODO: Get the model name from args. # Currently, the default names are incompatible with the ADK library. model='gemini-2.0-flash', description=( - "Agent to analyze a function and identify its requirements."), - instruction=analyzer_instruction.get(), - ) + "Extracts a function's requirements from its source implementation."), + instruction=builder.build_instruction().get(), + output_key="FUNCTION_REQUIREMENTS", + ) + + # Create the function analyzer agent + function_analyzer = SequentialAgent( + name="FunctionAnalyzerAgent", + sub_agents=[context_retriever, requirements_extractor], + description=( + "Sequential agent to retrieve a function's source, analyze it and extract its requirements."), + ) # Get user id and session id # TODO: Figure out how to get this data @@ -90,7 +110,7 @@ def initialize(self, benchmark: benchmarklib.Benchmark): "Function Analyzer Agent created, with name: %s, and session id: %s", self.name, session_id) - def call_agent(self, query: str, runner: Runner, user_id: str, + async def call_agent(self, query: str, runner: Runner, user_id: str, session_id: str) -> PreWritingResult: """Call the agent asynchronously with the given query.""" @@ -102,7 +122,7 @@ def call_agent(self, query: str, runner: Runner, user_id: str, result_available = False - for event in runner.run( + async for event in runner.run_async( user_id=user_id, session_id=session_id, new_message=content, @@ -116,7 +136,6 @@ def call_agent(self, query: str, runner: Runner, user_id: str, elif event.actions and event.actions.escalate: error_message = event.error_message or 'No specific message.' final_response_text = f"Agent escalated: {error_message}" - break logger.info("<<< Agent response: %s", final_response_text) @@ -145,15 +164,15 @@ def execute(self, result_history: list[Result]) -> PreWritingResult: query = prompt.gettext() user_id = "user" session_id = "session" - result = self.call_agent(query, self.runner, user_id, session_id) + result = asyncio.run(self.call_agent(query, self.runner, user_id, session_id)) - if result.result_available: + if result and result.result_available: # Save the result to the history result_history.append(result) return result - def _initial_prompt(self, results: list[Result]) -> Prompt: + def _initial_prompt(self, results: Optional[list[Result]] = None) -> Prompt: """Create the initial prompt for the agent.""" # Initialize the prompt builder diff --git a/data_prep/introspector.py b/data_prep/introspector.py index 0f6d010e2f..ef6bcb3f6f 100755 --- a/data_prep/introspector.py +++ b/data_prep/introspector.py @@ -76,6 +76,7 @@ INTROSPECTOR_HARNESS_SOURCE_AND_EXEC = '' INTROSPECTOR_LANGUAGE_STATS = '' INTROSPECTOR_GET_TARGET_FUNCTION = '' +INTROSPECTOR_GET_ALL_FUNCTIONS = '' INTROSPECTOR_HEADERS_FOR_FUNC = '' INTROSPECTOR_SAMPLE_XREFS = '' @@ -114,7 +115,7 @@ def set_introspector_endpoints(endpoint): INTROSPECTOR_ORACLE_ALL_TESTS, INTROSPECTOR_JVM_PROPERTIES, \ INTROSPECTOR_TEST_SOURCE, INTROSPECTOR_HARNESS_SOURCE_AND_EXEC, \ INTROSPECTOR_JVM_PUBLIC_CLASSES, INTROSPECTOR_LANGUAGE_STATS, \ - INTROSPECTOR_GET_TARGET_FUNCTION + INTROSPECTOR_GET_TARGET_FUNCTION, INTROSPECTOR_GET_ALL_FUNCTIONS INTROSPECTOR_ENDPOINT = endpoint @@ -156,6 +157,8 @@ def set_introspector_endpoints(endpoint): f'{INTROSPECTOR_ENDPOINT}/database-language-stats') INTROSPECTOR_GET_TARGET_FUNCTION = ( f'{INTROSPECTOR_ENDPOINT}/get-target-function') + INTROSPECTOR_GET_ALL_FUNCTIONS = ( + f'{INTROSPECTOR_ENDPOINT}/all-functions') def _construct_url(api: str, params: dict) -> str: @@ -166,6 +169,8 @@ def _construct_url(api: str, params: dict) -> str: def _query_introspector(api: str, params: dict) -> Optional[requests.Response]: """Queries FuzzIntrospector API and returns the json payload, returns an empty dict if unable to get data.""" + + logger.info('Querying FuzzIntrospector API: %s\n', api) for attempt_num in range(1, MAX_RETRY + 1): try: resp = requests.get(api, params, timeout=TIMEOUT) @@ -322,6 +327,14 @@ def query_introspector_function_source(project: str, func_sig: str) -> str: return _get_data(resp, 'source', '') +def query_introspector_all_functions(project: str) -> list[dict]: + """Queries FuzzIntrospector API for all functions of |project|.""" + resp = _query_introspector(INTROSPECTOR_GET_ALL_FUNCTIONS, { + 'project': project, + }) + return _get_data(resp, 'functions', []) + + def query_introspector_function_line(project: str, func_sig: str) -> list: """Queries FuzzIntrospector API for source line of |func_sig|.""" resp = _query_introspector(INTROSPECTOR_FUNCTION_SOURCE, { diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 2147937f17..cb088a4ce8 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -821,11 +821,15 @@ def __init__(self, # Load templates. self.function_analyzer_instruction_template_file = self._find_template( AGENT_TEMPLATE_DIR, 'function-analyzer-instruction.txt') + self.context_retrieve_template_file = self._find_template( + AGENT_TEMPLATE_DIR, 'context-retriever-instruction.txt') self.function_analyzer_prompt_template_file = self._find_template( AGENT_TEMPLATE_DIR, 'function-analyzer-priming.txt') def build_instruction(self) -> prompts.Prompt: """Constructs a prompt using the templates in |self| and saves it.""" + + self._prompt = self._model.prompt_type()(None) if not self.benchmark: return self._prompt @@ -836,20 +840,27 @@ def build_instruction(self) -> prompts.Prompt: return self._prompt - def build_prompt(self) -> prompts.Prompt: + def build_context_retriever_instruction(self) -> prompts.Prompt: """Constructs a prompt using the templates in |self| and saves it.""" + self._prompt = self._model.prompt_type()(None) + if not self.benchmark: - logger.error( - 'No benchmark provided for function analyzer template builder.') return self._prompt - # First, we get the function's source - function_source = introspector.query_introspector_function_source( - self.benchmark.project, self.benchmark.function_signature) + prompt = self._get_template( + self.context_retrieve_template_file) + + self._prompt.append(prompt) - if not function_source: - logger.error("The source code for function %s was not found.", self.benchmark.function_name) + return self._prompt + + def build_prompt(self) -> prompts.Prompt: + """Constructs a prompt using the templates in |self| and saves it.""" + + if not self.benchmark: + logger.error( + 'No benchmark provided for function analyzer template builder.') return self._prompt prompt = self._get_template(self.function_analyzer_prompt_template_file) @@ -857,7 +868,6 @@ def build_prompt(self) -> prompts.Prompt: prompt = prompt.replace('{PROJECT_NAME}', self.benchmark.project) prompt = prompt.replace('{FUNCTION_SIGNATURE}', self.benchmark.function_signature) - prompt = prompt.replace('{FUNCTION_SOURCE}', function_source) self._prompt.append(prompt) diff --git a/prompts/agent/context-retriever-instruction.txt b/prompts/agent/context-retriever-instruction.txt new file mode 100644 index 0000000000..36fbbf76d7 --- /dev/null +++ b/prompts/agent/context-retriever-instruction.txt @@ -0,0 +1,72 @@ +You are a helpful agent. Your task is to use the provided tools to retrieve the source code implementations of a target function and those of the children functions it calls. + +You will be provided with two tools, _function_source_with_signature and _function_source_with_name. + +You MUST use these tools to get the requested function implementations. +DO NOT MAKE UP A FUNCTION BY YOURSELF! + +Here are the steps you should take to perform your task. + +Step 1: Get the implementation of the target function. + * Use the _function_source_with_signature tool to get the implementation of the target function. + * This tool takes as argument, a project name and a function signature. + * If successful, it returns the function source. If not, it returns an empty string. + +Step 2: Get the names of children functions from the target function. + * Analyze the target function and retrieve the names of other functions it calls. + +Step 3: Get the implementations of the first-level children functions. + * For each called function retrieved in step 2, use the _function_source_with_name function to retrieve the implementation of the function. + * This tool takes as argument, a project name and a function name. + * If successful, it returns the function source. Else, it returns an empty string. + +Before you return a response, MAKE SURE you check that you used the provided tools to get the functions you are returning. + +You are to return your result using the following format. + + + + +Function's signature + + +The retrieved source code. + + + + + + +Function's name + + +The retrieved source code. + + + + + +Function's name + + +The retrieved source code. + + + +... + + + +Function's name + + +The retrieved source code. + + + + + + +You will be provided with the following tools. +1. _function_source_with_signature: Use this tool to retrieve the function's implementation in step 1, where you'll have the project's name and function's signature. +2. _function_source_with_name: Use this tool to retrieve the function's implementation in step 3, where you will only have the project's name and function's name. \ No newline at end of file diff --git a/prompts/agent/function-analyzer-instruction.txt b/prompts/agent/function-analyzer-instruction.txt index 2c21215485..6f9fef3acf 100644 --- a/prompts/agent/function-analyzer-instruction.txt +++ b/prompts/agent/function-analyzer-instruction.txt @@ -1,25 +1,27 @@ -You are a professional software engineer whose job is to review software functions, understand their intent, and identify important requirements of input variables that the function expects and needs to execute without crashing. +You are a professional security engineer. + +Your objective is to analyze the function's implementation using the steps provided and return a response in the expected format. The requirements you provide will be used by another agent to generate valid fuzz drivers for the target function. -You will be provided a project name, function signature and the implementation for the function. -Your task is to analyze the function's implementation using the steps provided and return a response in the expected format. +The function you will analyze is provided below. We have provided the target function, and the implementations of its children functions. + + +{{FUNCTION_SOURCE}} Follow these steps to analyze a function and identify its input requirements: -Step 1: Get the function's source - * Use the _function_source tool to get the function's source. - * Invoke the tool using the project name and function signature. - * YOU MUST USE THE PROVIDED TOOL. DO NOT MAKE UP YOUR OWN CODE. - -Step 2: Identify all assertion statemens in the function. +Step 1: Identify all Fuzzing Crash Indicators (FCI) in the function. + * Fuzz Crash Indicators are statements that can cause the program to crash if expected conditions are violated. + * They include assertion statements, array indexing statements, pointer dereferencing statements, memory access statements, string handling statements, etc. * Note that some programs can have custom assertion statements, like require() or ensure(). -Step 3: Identify input requirements necessary to satisfy the assertion statements. +Step 2: Identify the input requirements necessary to ensure the safety of each identified Fuzzing Crash Indicators. * Each requirement MUST be precise for it to be useful. * You MUST include a one-sentence summary why a specific requirement was included. + * You should not repeat any requirement, even if it is necessary to satisfy multiple FCIs. -Step 4: Compile the requirements you derived and return in the expected format. +Step 3: Compile the requirements you derived and return in the expected format. @@ -33,10 +35,6 @@ project name: the name of the project provided function signature: The function's signature - -The implementation of the function you were provided. - - A summary of what the function does. @@ -64,23 +62,6 @@ project name: htslib function signature: int sam_index_build(const char *, int) - -static inline unsigned int func_source( - unsigned int bitcount, - unsigned int bitoffset, - const uint8_t* ptr -) { - require(bitoffset < 64) - unsigned int mask = (1 << bitcount) - 1; - ptr += bitoffset >> 3; - bitoffset &= 7; - unsigned int value = ptr[0] | (ptr[1] << 8); - value >>= bitoffset; - value &= mask; - return value; -} - - The sam_index_build function is used to build a sam index. It uses the input arguments to identify and retrieve the index to build. It returns 1 if the build succeeds and 0 if the build fails. diff --git a/prompts/agent/function-analyzer-priming.txt b/prompts/agent/function-analyzer-priming.txt index 489bef61ab..e5b0f587d1 100644 --- a/prompts/agent/function-analyzer-priming.txt +++ b/prompts/agent/function-analyzer-priming.txt @@ -1,9 +1,3 @@ - -Analyze and return the description and requirements for the function signature {FUNCTION_SIGNATURE} in the project {PROJECT_NAME}. -You will be provided the source implementation for the function. -You MUST ensure that all requirements returned where derived from the function. - - - -{FUNCTION_SOURCE} - +First, get the implementations of the target function {FUNCTION_SIGNATURE} + and all its children functions (functions it calls) in the project {PROJECT_NAME}. + Then, analyze the implementation and extract the requirements of the target function. \ No newline at end of file diff --git a/tool/fuzz_introspector_tool.py b/tool/fuzz_introspector_tool.py index f2626082bc..94988f0bd1 100644 --- a/tool/fuzz_introspector_tool.py +++ b/tool/fuzz_introspector_tool.py @@ -16,6 +16,7 @@ import logging from data_prep import introspector +from experiment.benchmark import Benchmark from tool.base_tool import BaseTool logger = logging.getLogger(__name__) @@ -24,6 +25,10 @@ class FuzzIntrospectorTool(BaseTool): """Calls FI API with params.""" + def __init__(self, benchmark: Benchmark, name: str = ''): + super().__init__(benchmark, name) + self.project_functions = None + def _source_code(self, filename: str, start_line: int, end_line: int) -> str: """Calls the source code API of the Fuzz Introspector.""" # A placeholder @@ -44,7 +49,7 @@ def _function_signature(self, function_name: str) -> str: # A placeholder raise NotImplementedError - def _function_source(self, project_name: str, function_signature: str) -> str: + def _function_source_with_signature(self, project_name: str, function_signature: str) -> str: """ Retrieves a function's source from the fuzz introspector API, using the project's name and function's signature. @@ -57,6 +62,9 @@ def _function_source(self, project_name: str, function_signature: str) -> str: str: Source code of the function if found, otherwise an empty string. """ + logger.info("Retrieving function source for '%s' in project '%s'.", + function_signature, project_name) + function_code = introspector.query_introspector_function_source( project_name, function_signature) @@ -70,6 +78,49 @@ def _function_source(self, project_name: str, function_signature: str) -> str: return function_code + def _function_source_with_name(self, project_name: str, function_name: str) -> str: + """ + Retrieves a function's source from the fuzz introspector API, + using the project's name and function's name. + This function first retrieves the list of all functions in the project, so it can get the function's signature. + Then it uses the function's signature to retrieve the source code. + + Args: + project_name (str): The name of the project. + function_name (str): The name of the function. + + Returns: + str: Source code of the function if found, otherwise an empty string. + """ + + logger.info("Retrieving function source for '%s' in project '%s'.", + function_name, project_name) + + if self.project_functions is None: + logger.info( + "Project functions not initialized. Initializing for project '%s'.", + project_name) + functions_list = introspector.query_introspector_all_functions(project_name) + logger.info("Functions list:\n%s", functions_list) + if functions_list: + self.project_functions = { + func["debug_summary"]["name"]: func + for func in functions_list + if "debug_summary" in func and "name" in func["debug_summary"] + } + else: + self.project_functions = None + + if self.project_functions is None or function_name not in self.project_functions: + logger.error("Error: Required function not found for project '%s'.", + project_name) + return "" + + function_signature = self.project_functions[function_name]["function_signature"] + + return self._function_source_with_signature(project_name, + function_signature) + def tutorial(self) -> str: raise NotImplementedError From e82309e5051474915d252a48e9b5fbf4e26e6f4d Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Tue, 27 May 2025 18:41:01 +0000 Subject: [PATCH 08/64] This commits modifies the function analyzer agent to return both the raw result and the list of requirements. The commit also updates function_analyzer_test to process multiple benchmarks in a provided file and write the results to a results file. --- agent/function_analyzer.py | 11 ++-- agent_tests/function_analyzer_test.py | 57 +++++++++++++------ .../agent/context-retriever-instruction.txt | 5 +- .../agent/function-analyzer-instruction.txt | 5 +- results.py | 10 ++-- 5 files changed, 61 insertions(+), 27 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 83ea64d8ce..d91df2f537 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -130,20 +130,22 @@ async def call_agent(self, query: str, runner: Runner, user_id: str, logger.info("Event is %s", event.content) if event.is_final_response(): - if event.content and event.content.parts: + if event.content and event.content.parts and event.content.parts[0].text: final_response_text = event.content.parts[0].text result_available = True elif event.actions and event.actions.escalate: - error_message = event.error_message or 'No specific message.' - final_response_text = f"Agent escalated: {error_message}" + error_message = event.error_message + logger.error(f"Agent escalated: %s", error_message) logger.info("<<< Agent response: %s", final_response_text) - if result_available and final_response_text: + if result_available and self._parse_tag(final_response_text, 'response'): # Get the requirements from the response requirements = self._parse_tags(final_response_text, 'requirement') + result_raw = self._parse_tag(final_response_text, 'response') else: requirements = [] + result_raw = '' # Prepare the result result = PreWritingResult( @@ -151,6 +153,7 @@ async def call_agent(self, query: str, runner: Runner, user_id: str, trial=self.trial, work_dirs=self.args.work_dir, result_available=result_available, + result_raw=result_raw, requirements=requirements, ) diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py index 8b2c2af73f..15e634b7da 100644 --- a/agent_tests/function_analyzer_test.py +++ b/agent_tests/function_analyzer_test.py @@ -15,6 +15,7 @@ import argparse import logging +import os from typing import List from agent.function_analyzer import FunctionAnalyzer @@ -22,6 +23,8 @@ from experiment.benchmark import Benchmark from llm_toolkit import models +from run_all_experiments import prepare_experiment_targets + logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -39,6 +42,11 @@ def parse_args() -> argparse.Namespace: required=True, help='A benchmark YAML file.') + parser.add_argument('-d', + '--benchmarks-directory', + type=str, + help='A directory containing benchmark YAML files.') + parser.add_argument('-w', '--work-dir', default=RESULTS_DIR) parser.add_argument('-mr', @@ -58,25 +66,42 @@ def parse_args() -> argparse.Namespace: args = parse_args() + # Initialize the working directory + args.work_dirs = os.makedirs(args.work_dir, exist_ok=True) + + # Initialize the function analyzer function_analyzer = FunctionAnalyzer(trial=1, llm=model, args=args) - benchmarks: List[Benchmark] = benchmarklib.Benchmark.from_yaml( - args.benchmark_yaml) + # Initialize benchmarks + benchmarks: List[Benchmark] = prepare_experiment_targets(args) if len(benchmarks) == 0: raise ValueError("No benchmarks found in the YAML file.") - test_benchmark = benchmarks[0] - logger.info("Loaded benchmark for function: %s", test_benchmark.function_name) - - # Initialize the function analyzer with the first benchmark - function_analyzer.initialize(test_benchmark) - - # Run the function analyzer - result = function_analyzer.execute([]) - - # Print the result - logger.info("Function Analyzer Result:") - logger.info("Result available: %s", result.result_available) - if result.result_available: - logger.info("Requirements: %s", result.requirements) + logger.info("Loaded %d benchmarks from the YAML file %s.", len(benchmarks), args.benchmark_yaml) + + # Analyze each benchmark + for test_benchmark in benchmarks: + logger.info("Loaded benchmark (%d/%d) for function: %s", + benchmarks.index(test_benchmark) + 1, + len(benchmarks), + test_benchmark.function_name) + + # Initialize the function analyzer with the first benchmark + function_analyzer.initialize(test_benchmark) + + # Run the function analyzer + result = function_analyzer.execute([]) + + # If result is available, write it to the work_dirs directory + if result.result_available and result.result_raw: + result_file = os.path.join(args.work_dir, + f"{test_benchmark.project}_{test_benchmark.function_name}.txt") + with open(result_file, 'w') as f: + # f.write(f"Requirements for {test_benchmark.function_name}:\n") + # for req in result.requirements: + # f.write(f"- {req}\n") + f.write(result.result_raw) + logger.info("Analysis results written to %s", result_file) + else: + logger.info("No requirements found for benchmark %s", test_benchmark.function_name) diff --git a/prompts/agent/context-retriever-instruction.txt b/prompts/agent/context-retriever-instruction.txt index 36fbbf76d7..33387eda0f 100644 --- a/prompts/agent/context-retriever-instruction.txt +++ b/prompts/agent/context-retriever-instruction.txt @@ -5,6 +5,8 @@ You will be provided with two tools, _function_source_with_signature and _functi You MUST use these tools to get the requested function implementations. DO NOT MAKE UP A FUNCTION BY YOURSELF! +YOU MUST USE AT LEAST, ONE TOOL, WHEN YOU ARE CALLED. + Here are the steps you should take to perform your task. Step 1: Get the implementation of the target function. @@ -13,7 +15,8 @@ Step 1: Get the implementation of the target function. * If successful, it returns the function source. If not, it returns an empty string. Step 2: Get the names of children functions from the target function. - * Analyze the target function and retrieve the names of other functions it calls. + * Analyze the target function and retrieve the names of other functions it calls (eg function()). + * Also retrieve the names of any class or struct functions it calls (eg struct.function()) Step 3: Get the implementations of the first-level children functions. * For each called function retrieved in step 2, use the _function_source_with_name function to retrieve the implementation of the function. diff --git a/prompts/agent/function-analyzer-instruction.txt b/prompts/agent/function-analyzer-instruction.txt index 6f9fef3acf..e1dc112b9d 100644 --- a/prompts/agent/function-analyzer-instruction.txt +++ b/prompts/agent/function-analyzer-instruction.txt @@ -29,7 +29,7 @@ Step 3: Compile the requirements you derived and return in the expected format. Make sure your response follows the following format, enclosed in ``` ```. ``` - + project name: the name of the project provided function signature: The function's signature @@ -51,12 +51,14 @@ Second requirement nth requirement + Here is an example response + project name: htslib function signature: int sam_index_build(const char *, int) @@ -71,6 +73,7 @@ The sam_index_build function is used to build a sam index. It uses the input arg The second argument should be less than 64. This is to prevent an assertion violation in the program. + diff --git a/results.py b/results.py index 589303097d..135a86827d 100644 --- a/results.py +++ b/results.py @@ -589,16 +589,16 @@ def to_dict(self) -> dict: class PreWritingResult(Result): """ The result of the function analyzer. """ result_available: bool - requirements: list[str] - explanation: str + result_raw: str = '' + requirements: Optional[list[str]] def __init__(self, benchmark: Benchmark, trial: int, work_dirs: WorkDirs, result_available: bool, + result_raw: str = '', requirements: Optional[list[str]] = None, - explanation: str = '', fuzz_target_source: str = '', build_script_source: str = '', author: Any = None, @@ -609,9 +609,9 @@ def __init__(self, build_script_source, author, chat_history, default_success) self.result_available = result_available - if result_available and requirements is not None: + if result_available: self.requirements = requirements - self.explanation = explanation + self.result_raw = result_raw class BenchmarkResult: From a242059bed638a0cc0d88c92f5e86d81bd804adb Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Tue, 27 May 2025 19:17:45 +0000 Subject: [PATCH 09/64] Ran presubmit and fixed reported issues. --- agent/function_analyzer.py | 47 +++++++++---------- agent_tests/function_analyzer_test.py | 19 ++++---- data_prep/introspector.py | 3 +- ...er__Z10encode_ise12quant_methodjPKhPhj.txt | 26 ++++++++++ ...criptorRK25symbolic_compressed_blockPh.txt | 20 ++++++++ llm_toolkit/prompt_builder.py | 3 +- tool/fuzz_introspector_tool.py | 21 +++++---- 7 files changed, 93 insertions(+), 46 deletions(-) create mode 100644 function-analyzer-result-sample/astc-encoder__Z10encode_ise12quant_methodjPKhPhj.txt create mode 100644 function-analyzer-result-sample/astc-encoder__Z20symbolic_to_physicalRK21block_size_descriptorRK25symbolic_compressed_blockPh.txt diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index d91df2f537..9cf343b75c 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -21,7 +21,7 @@ import logging from typing import Optional -from google.adk.agents import Agent, SequentialAgent, LlmAgent +from google.adk.agents import LlmAgent, SequentialAgent from google.adk.runners import Runner from google.adk.sessions import InMemorySessionService from google.genai import types @@ -57,12 +57,14 @@ def initialize(self, benchmark: benchmarklib.Benchmark): context_retriever = LlmAgent( name="ContextRetrieverAgent", model='gemini-2.0-flash', - description=( - "Retrieves the implementation of a function and its children from Fuzz Introspector."), + description="""Retrieves the implementation of a function + and its children from Fuzz Introspector.""", instruction=builder.build_context_retriever_instruction().get(), - tools=[introspector_tool._function_source_with_signature, introspector_tool._function_source_with_name], - generate_content_config=types.GenerateContentConfig( - temperature=0.0,), + tools=[ + introspector_tool.function_source_with_signature, + introspector_tool.function_source_with_name + ], + generate_content_config=types.GenerateContentConfig(temperature=0.0,), output_key="FUNCTION_SOURCE", ) @@ -72,8 +74,8 @@ def initialize(self, benchmark: benchmarklib.Benchmark): # TODO: Get the model name from args. # Currently, the default names are incompatible with the ADK library. model='gemini-2.0-flash', - description=( - "Extracts a function's requirements from its source implementation."), + description="""Extracts a function's requirements + from its source implementation.""", instruction=builder.build_instruction().get(), output_key="FUNCTION_REQUIREMENTS", ) @@ -82,21 +84,16 @@ def initialize(self, benchmark: benchmarklib.Benchmark): function_analyzer = SequentialAgent( name="FunctionAnalyzerAgent", sub_agents=[context_retriever, requirements_extractor], - description=( - "Sequential agent to retrieve a function's source, analyze it and extract its requirements."), + description="""Sequential agent to retrieve a function's source, + analyze it and extract its requirements.""", ) - # Get user id and session id - # TODO: Figure out how to get this data - user_id = "user" - session_id = "session" - # Create the session service session_service = InMemorySessionService() session_service.create_session( app_name=self.name, - user_id=user_id, - session_id=session_id, + user_id="user", + session_id=f"session_{self.trial}", ) # Create the runner @@ -106,12 +103,10 @@ def initialize(self, benchmark: benchmarklib.Benchmark): session_service=session_service, ) - logger.info( - "Function Analyzer Agent created, with name: %s, and session id: %s", - self.name, session_id) + logger.info("Function Analyzer Agent created, with name: %s", self.name) async def call_agent(self, query: str, runner: Runner, user_id: str, - session_id: str) -> PreWritingResult: + session_id: str) -> PreWritingResult: """Call the agent asynchronously with the given query.""" logger.info(">>> User query: %s", query) @@ -130,12 +125,13 @@ async def call_agent(self, query: str, runner: Runner, user_id: str, logger.info("Event is %s", event.content) if event.is_final_response(): - if event.content and event.content.parts and event.content.parts[0].text: + if (event.content and event.content.parts and + event.content.parts[0].text): final_response_text = event.content.parts[0].text result_available = True elif event.actions and event.actions.escalate: error_message = event.error_message - logger.error(f"Agent escalated: %s", error_message) + logger.error("Agent escalated: %s", error_message) logger.info("<<< Agent response: %s", final_response_text) @@ -166,8 +162,9 @@ def execute(self, result_history: list[Result]) -> PreWritingResult: prompt = self._initial_prompt(result_history) query = prompt.gettext() user_id = "user" - session_id = "session" - result = asyncio.run(self.call_agent(query, self.runner, user_id, session_id)) + session_id = f"session_{self.trial}" + result = asyncio.run( + self.call_agent(query, self.runner, user_id, session_id)) if result and result.result_available: # Save the result to the history diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py index 15e634b7da..35105522d8 100644 --- a/agent_tests/function_analyzer_test.py +++ b/agent_tests/function_analyzer_test.py @@ -19,10 +19,9 @@ from typing import List from agent.function_analyzer import FunctionAnalyzer -from experiment import benchmark as benchmarklib from experiment.benchmark import Benchmark +from experiment.workdir import WorkDirs from llm_toolkit import models - from run_all_experiments import prepare_experiment_targets logging.basicConfig(level=logging.INFO) @@ -67,7 +66,7 @@ def parse_args() -> argparse.Namespace: args = parse_args() # Initialize the working directory - args.work_dirs = os.makedirs(args.work_dir, exist_ok=True) + args.work_dirs = WorkDirs(args.work_dir) # Initialize the function analyzer function_analyzer = FunctionAnalyzer(trial=1, llm=model, args=args) @@ -78,13 +77,13 @@ def parse_args() -> argparse.Namespace: if len(benchmarks) == 0: raise ValueError("No benchmarks found in the YAML file.") - logger.info("Loaded %d benchmarks from the YAML file %s.", len(benchmarks), args.benchmark_yaml) + logger.info("Loaded %d benchmarks from the YAML file %s.", len(benchmarks), + args.benchmark_yaml) # Analyze each benchmark for test_benchmark in benchmarks: logger.info("Loaded benchmark (%d/%d) for function: %s", - benchmarks.index(test_benchmark) + 1, - len(benchmarks), + benchmarks.index(test_benchmark) + 1, len(benchmarks), test_benchmark.function_name) # Initialize the function analyzer with the first benchmark @@ -95,8 +94,9 @@ def parse_args() -> argparse.Namespace: # If result is available, write it to the work_dirs directory if result.result_available and result.result_raw: - result_file = os.path.join(args.work_dir, - f"{test_benchmark.project}_{test_benchmark.function_name}.txt") + result_file = os.path.join( + args.work_dirs.base, + f"{test_benchmark.project}_{test_benchmark.function_name}.txt") with open(result_file, 'w') as f: # f.write(f"Requirements for {test_benchmark.function_name}:\n") # for req in result.requirements: @@ -104,4 +104,5 @@ def parse_args() -> argparse.Namespace: f.write(result.result_raw) logger.info("Analysis results written to %s", result_file) else: - logger.info("No requirements found for benchmark %s", test_benchmark.function_name) + logger.info("No requirements found for benchmark %s", + test_benchmark.function_name) diff --git a/data_prep/introspector.py b/data_prep/introspector.py index ef6bcb3f6f..7d71a702c5 100755 --- a/data_prep/introspector.py +++ b/data_prep/introspector.py @@ -157,8 +157,7 @@ def set_introspector_endpoints(endpoint): f'{INTROSPECTOR_ENDPOINT}/database-language-stats') INTROSPECTOR_GET_TARGET_FUNCTION = ( f'{INTROSPECTOR_ENDPOINT}/get-target-function') - INTROSPECTOR_GET_ALL_FUNCTIONS = ( - f'{INTROSPECTOR_ENDPOINT}/all-functions') + INTROSPECTOR_GET_ALL_FUNCTIONS = f'{INTROSPECTOR_ENDPOINT}/all-functions' def _construct_url(api: str, params: dict) -> str: diff --git a/function-analyzer-result-sample/astc-encoder__Z10encode_ise12quant_methodjPKhPhj.txt b/function-analyzer-result-sample/astc-encoder__Z10encode_ise12quant_methodjPKhPhj.txt new file mode 100644 index 0000000000..8e4b4d0a32 --- /dev/null +++ b/function-analyzer-result-sample/astc-encoder__Z10encode_ise12quant_methodjPKhPhj.txt @@ -0,0 +1,26 @@ + +project name: astc-encoder +function signature: void encode_ise(DW_TAG_enumeration_typequant_method, unsigned int, const uint8_t *, uint8_t *, unsigned int) + + + +The function `encode_ise` encodes input data based on the specified quantization level and writes the encoded bits to the output data buffer. It handles different encoding schemes based on the number of trits or quints associated with the quantization level, or simply writes out the raw bits if neither trits nor quints are used. + + + + +`character_count` must be greater than 0. This is enforced by the `promise` statement. + + +`input_data` must be a valid pointer. This pointer is dereferenced to read input values. + + +`output_data` must be a valid pointer. This pointer is dereferenced in `write_bits` to write output values. + + +The size of `input_data` should be at least `character_count` bytes. Otherwise, there will be an out-of-bounds read in the loops. + + +The size of `output_data` should be large enough to hold the encoded data, based on the `character_count`, `quant_level` and `bit_offset`. Otherwise, there will be an out-of-bounds write in `write_bits`. + + \ No newline at end of file diff --git a/function-analyzer-result-sample/astc-encoder__Z20symbolic_to_physicalRK21block_size_descriptorRK25symbolic_compressed_blockPh.txt b/function-analyzer-result-sample/astc-encoder__Z20symbolic_to_physicalRK21block_size_descriptorRK25symbolic_compressed_blockPh.txt new file mode 100644 index 0000000000..0c60c1e81f --- /dev/null +++ b/function-analyzer-result-sample/astc-encoder__Z20symbolic_to_physicalRK21block_size_descriptorRK25symbolic_compressed_blockPh.txt @@ -0,0 +1,20 @@ + +project name: astc-encoder +function signature: void symbolic_to_physical(const struct block_size_descriptor &, const struct symbolic_compressed_block &, uint8_t *) + + + +The function `symbolic_to_physical` converts a symbolic compressed block representation to a physical compressed block representation. It handles different block types and encodes various parameters into the physical block. + + + + +`scb.block_type` must not be equal to `SYM_BTYPE_ERROR` to satisfy the assertion at the beginning of the function. + + +`scb.quant_mode` must be greater than or equal to `QUANT_6` because it is used as an index into the `color_uquant_to_scrambled_pquant_tables` array. + + +For each `i` in the range `[0, scb.partition_count)`, `scb.color_values[i][j]` must be a valid index into `pack_table` (i.e., less than the size of `pack_table`) within the nested loop where `j` ranges from `0` to `2 * (scb.color_formats[i] >> 2) + 2`, and `scb.partition_count` should result in `vals` being less than or equal to 8 due to the assert statement. This prevents out-of-bounds access to `pack_table`. + + \ No newline at end of file diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index cb088a4ce8..27530101e2 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -848,8 +848,7 @@ def build_context_retriever_instruction(self) -> prompts.Prompt: if not self.benchmark: return self._prompt - prompt = self._get_template( - self.context_retrieve_template_file) + prompt = self._get_template(self.context_retrieve_template_file) self._prompt.append(prompt) diff --git a/tool/fuzz_introspector_tool.py b/tool/fuzz_introspector_tool.py index 94988f0bd1..8812e0557d 100644 --- a/tool/fuzz_introspector_tool.py +++ b/tool/fuzz_introspector_tool.py @@ -49,7 +49,8 @@ def _function_signature(self, function_name: str) -> str: # A placeholder raise NotImplementedError - def _function_source_with_signature(self, project_name: str, function_signature: str) -> str: + def function_source_with_signature(self, project_name: str, + function_signature: str) -> str: """ Retrieves a function's source from the fuzz introspector API, using the project's name and function's signature. @@ -78,11 +79,13 @@ def _function_source_with_signature(self, project_name: str, function_signature: return function_code - def _function_source_with_name(self, project_name: str, function_name: str) -> str: + def function_source_with_name(self, project_name: str, + function_name: str) -> str: """ Retrieves a function's source from the fuzz introspector API, using the project's name and function's name. - This function first retrieves the list of all functions in the project, so it can get the function's signature. + This function first retrieves the list of all + functions in the project, so it can get the function's signature. Then it uses the function's signature to retrieve the source code. Args: @@ -100,7 +103,8 @@ def _function_source_with_name(self, project_name: str, function_name: str) -> s logger.info( "Project functions not initialized. Initializing for project '%s'.", project_name) - functions_list = introspector.query_introspector_all_functions(project_name) + functions_list = introspector.query_introspector_all_functions( + project_name) logger.info("Functions list:\n%s", functions_list) if functions_list: self.project_functions = { @@ -111,15 +115,16 @@ def _function_source_with_name(self, project_name: str, function_name: str) -> s else: self.project_functions = None - if self.project_functions is None or function_name not in self.project_functions: + if (self.project_functions is None or + function_name not in self.project_functions): logger.error("Error: Required function not found for project '%s'.", project_name) return "" - function_signature = self.project_functions[function_name]["function_signature"] + function_signature = self.project_functions[function_name][ + "function_signature"] - return self._function_source_with_signature(project_name, - function_signature) + return self.function_source_with_signature(project_name, function_signature) def tutorial(self) -> str: raise NotImplementedError From 47010af1e2557c11c1804c63c868674fc9439152 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 28 May 2025 13:42:12 +0000 Subject: [PATCH 10/64] Refactored imports --- agent/function_analyzer.py | 42 ++++++++++++++------------- agent_tests/function_analyzer_test.py | 18 +++++++----- tool/fuzz_introspector_tool.py | 8 ++--- 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 9cf343b75c..c64e556924 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -21,22 +21,19 @@ import logging from typing import Optional -from google.adk.agents import LlmAgent, SequentialAgent -from google.adk.runners import Runner -from google.adk.sessions import InMemorySessionService +from google.adk import agents, runners, sessions from google.genai import types -from agent.base_agent import BaseAgent +import results as resultslib +from agent import base_agent from experiment import benchmark as benchmarklib -from llm_toolkit import prompt_builder -from llm_toolkit.prompts import Prompt -from results import PreWritingResult, Result -from tool.fuzz_introspector_tool import FuzzIntrospectorTool +from llm_toolkit import prompt_builder, prompts +from tool import fuzz_introspector_tool logger = logging.getLogger(__name__) -class FunctionAnalyzer(BaseAgent): +class FunctionAnalyzer(base_agent.BaseAgent): """An LLM agent to analyze a function and identify its implicit requirements. The results of this analysis will be used by the writer agents to generate correct fuzz target for the function. @@ -52,9 +49,10 @@ def initialize(self, benchmark: benchmarklib.Benchmark): self.llm, self.benchmark) # Initialize the Fuzz Introspector tool - introspector_tool = FuzzIntrospectorTool(benchmark, self.name) + introspector_tool = fuzz_introspector_tool.FuzzIntrospectorTool( + benchmark, self.name) - context_retriever = LlmAgent( + context_retriever = agents.LlmAgent( name="ContextRetrieverAgent", model='gemini-2.0-flash', description="""Retrieves the implementation of a function @@ -69,7 +67,7 @@ def initialize(self, benchmark: benchmarklib.Benchmark): ) # Create the agent using the ADK library - requirements_extractor = LlmAgent( + requirements_extractor = agents.LlmAgent( name="RequirementsExtractorAgent", # TODO: Get the model name from args. # Currently, the default names are incompatible with the ADK library. @@ -81,7 +79,7 @@ def initialize(self, benchmark: benchmarklib.Benchmark): ) # Create the function analyzer agent - function_analyzer = SequentialAgent( + function_analyzer = agents.SequentialAgent( name="FunctionAnalyzerAgent", sub_agents=[context_retriever, requirements_extractor], description="""Sequential agent to retrieve a function's source, @@ -89,7 +87,7 @@ def initialize(self, benchmark: benchmarklib.Benchmark): ) # Create the session service - session_service = InMemorySessionService() + session_service = sessions.InMemorySessionService() session_service.create_session( app_name=self.name, user_id="user", @@ -97,7 +95,7 @@ def initialize(self, benchmark: benchmarklib.Benchmark): ) # Create the runner - self.runner = Runner( + self.runner = runners.Runner( agent=function_analyzer, app_name=self.name, session_service=session_service, @@ -105,8 +103,8 @@ def initialize(self, benchmark: benchmarklib.Benchmark): logger.info("Function Analyzer Agent created, with name: %s", self.name) - async def call_agent(self, query: str, runner: Runner, user_id: str, - session_id: str) -> PreWritingResult: + async def call_agent(self, query: str, runner: runners.Runner, user_id: str, + session_id: str) -> resultslib.PreWritingResult: """Call the agent asynchronously with the given query.""" logger.info(">>> User query: %s", query) @@ -144,7 +142,7 @@ async def call_agent(self, query: str, runner: Runner, user_id: str, result_raw = '' # Prepare the result - result = PreWritingResult( + result = resultslib.PreWritingResult( benchmark=self.benchmark, trial=self.trial, work_dirs=self.args.work_dir, @@ -155,7 +153,9 @@ async def call_agent(self, query: str, runner: Runner, user_id: str, return result - def execute(self, result_history: list[Result]) -> PreWritingResult: + def execute( + self, + result_history: list[resultslib.Result]) -> resultslib.PreWritingResult: """Execute the agent with the given results.""" # Call the agent asynchronously and return the result @@ -172,7 +172,9 @@ def execute(self, result_history: list[Result]) -> PreWritingResult: return result - def _initial_prompt(self, results: Optional[list[Result]] = None) -> Prompt: + def _initial_prompt( + self, + results: Optional[list[resultslib.Result]] = None) -> prompts.Prompt: """Create the initial prompt for the agent.""" # Initialize the prompt builder diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py index 35105522d8..a82640f4ad 100644 --- a/agent_tests/function_analyzer_test.py +++ b/agent_tests/function_analyzer_test.py @@ -18,11 +18,11 @@ import os from typing import List -from agent.function_analyzer import FunctionAnalyzer -from experiment.benchmark import Benchmark -from experiment.workdir import WorkDirs +import run_all_experiments +from agent import function_analyzer +from experiment import benchmark as benchmarklib +from experiment import workdir from llm_toolkit import models -from run_all_experiments import prepare_experiment_targets logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -66,13 +66,17 @@ def parse_args() -> argparse.Namespace: args = parse_args() # Initialize the working directory - args.work_dirs = WorkDirs(args.work_dir) + args.work_dirs = workdir.WorkDirs(args.work_dir) # Initialize the function analyzer - function_analyzer = FunctionAnalyzer(trial=1, llm=model, args=args) + function_analyzer = function_analyzer.FunctionAnalyzer(trial=1, + llm=model, + args=args) # Initialize benchmarks - benchmarks: List[Benchmark] = prepare_experiment_targets(args) + benchmarks: List[ + benchmarklib.Benchmark] = run_all_experiments.prepare_experiment_targets( + args) if len(benchmarks) == 0: raise ValueError("No benchmarks found in the YAML file.") diff --git a/tool/fuzz_introspector_tool.py b/tool/fuzz_introspector_tool.py index 8812e0557d..5a1a56ffe1 100644 --- a/tool/fuzz_introspector_tool.py +++ b/tool/fuzz_introspector_tool.py @@ -16,16 +16,16 @@ import logging from data_prep import introspector -from experiment.benchmark import Benchmark -from tool.base_tool import BaseTool +from experiment import benchmark as benchmarklib +from tool import base_tool logger = logging.getLogger(__name__) -class FuzzIntrospectorTool(BaseTool): +class FuzzIntrospectorTool(base_tool.BaseTool): """Calls FI API with params.""" - def __init__(self, benchmark: Benchmark, name: str = ''): + def __init__(self, benchmark: benchmarklib.Benchmark, name: str = ''): super().__init__(benchmark, name) self.project_functions = None From 4314b3084b25449e6d1730171cc5295d3606f7fb Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Thu, 29 May 2025 14:46:22 +0000 Subject: [PATCH 11/64] Parallelize analysis of a benchmark and created a script to upload analysis result to google cloud storage. --- agent_tests/function_analyzer_test.py | 138 ++++++++++++++++++++------ agent_tests/upload_analysis_result.py | 58 +++++++++++ experiment/workdir.py | 20 ++-- 3 files changed, 174 insertions(+), 42 deletions(-) create mode 100644 agent_tests/upload_analysis_result.py diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py index a82640f4ad..537e8c491c 100644 --- a/agent_tests/function_analyzer_test.py +++ b/agent_tests/function_analyzer_test.py @@ -15,6 +15,7 @@ import argparse import logging +import multiprocessing import os from typing import List @@ -30,6 +31,8 @@ RESULTS_DIR = './results' +NUM_ANA = int(os.getenv('LLM_NUM_ANA', '2')) + def parse_args() -> argparse.Namespace: """Parses command line arguments.""" parser = argparse.ArgumentParser( @@ -38,14 +41,27 @@ def parse_args() -> argparse.Namespace: parser.add_argument('-y', '--benchmark-yaml', type=str, - required=True, help='A benchmark YAML file.') - parser.add_argument('-d', + parser.add_argument('-b', '--benchmarks-directory', type=str, help='A directory containing benchmark YAML files.') + parser.add_argument( + '-g', + '--generate-benchmarks', + help=('Generate benchmarks and use those for analysis. This is a string ' + 'of comma-separated heuristics to use when identifying benchmark ' + 'targets.'), + type=str) + + parser.add_argument('-np', + '--num-pools', + type=int, + default=NUM_ANA, + help='Number of parallel processes to use for analysis.') + parser.add_argument('-w', '--work-dir', default=RESULTS_DIR) parser.add_argument('-mr', @@ -56,9 +72,62 @@ def parse_args() -> argparse.Namespace: parsed_args = parser.parse_args() + benchmark_yaml = parsed_args.benchmark_yaml + if benchmark_yaml: + assert (benchmark_yaml.endswith('.yaml') or + benchmark_yaml.endswith('yml')), ( + "--benchmark-yaml needs to take an YAML file.") + + bench_yml = bool(benchmark_yaml) + bench_dir = bool(parsed_args.benchmarks_directory) + bench_gen = bool(parsed_args.generate_benchmarks) + + num_options = int(bench_yml) + int(bench_dir) + int(bench_gen) + assert num_options == 1, ( + 'One and only one of --benchmark-yaml, --benchmarks-directory and ' + '--generate-benchmarks. --benchmark-yaml takes one benchmark YAML file, ' + '--benchmarks-directory takes: a directory of them and ' + '--generate-benchmarks generates them during analysis.') + return parsed_args +def analyze_benchmark(benchmark: benchmarklib.Benchmark, + model: models.LLM, + args: argparse.Namespace) -> bool: + """Analyzes the benchmark using the function analyzer.""" + + logger.info("Loaded benchmark (%d/%d) for function: %s", + benchmarks.index(benchmark) + 1, len(benchmarks), + benchmark.function_name) + + # Initialize the function analyzer + analyzer = function_analyzer.FunctionAnalyzer(trial=1, + llm=model, + args=args) + + # Initialize the function analyzer with the first benchmark + analyzer.initialize(benchmark) + + # Run the function analyzer + result = analyzer.execute([]) + + # If result is available, write it to the work_dirs directory + if result.result_available and result.result_raw: + result_file = os.path.join( + args.work_dirs.base, + f"{benchmark.id}.txt") + + with open(result_file, 'w') as f: + f.write(result.result_raw) + + logger.info("Analysis result for benchmark %s written to %s", benchmark.function_name, result_file) + else: + logger.info("No requirements found for benchmark %s", + benchmark.function_name) + + return result.result_available + if __name__ == "__main__": model = models.LLM.setup(ai_binary='', name='vertex_ai_gemini-1-5-chat') @@ -66,12 +135,7 @@ def parse_args() -> argparse.Namespace: args = parse_args() # Initialize the working directory - args.work_dirs = workdir.WorkDirs(args.work_dir) - - # Initialize the function analyzer - function_analyzer = function_analyzer.FunctionAnalyzer(trial=1, - llm=model, - args=args) + args.work_dirs = workdir.WorkDirs(args.work_dir, create_children_dirs=False) # Initialize benchmarks benchmarks: List[ @@ -85,28 +149,36 @@ def parse_args() -> argparse.Namespace: args.benchmark_yaml) # Analyze each benchmark - for test_benchmark in benchmarks: - logger.info("Loaded benchmark (%d/%d) for function: %s", - benchmarks.index(test_benchmark) + 1, len(benchmarks), - test_benchmark.function_name) - - # Initialize the function analyzer with the first benchmark - function_analyzer.initialize(test_benchmark) - - # Run the function analyzer - result = function_analyzer.execute([]) - - # If result is available, write it to the work_dirs directory - if result.result_available and result.result_raw: - result_file = os.path.join( - args.work_dirs.base, - f"{test_benchmark.project}_{test_benchmark.function_name}.txt") - with open(result_file, 'w') as f: - # f.write(f"Requirements for {test_benchmark.function_name}:\n") - # for req in result.requirements: - # f.write(f"- {req}\n") - f.write(result.result_raw) - logger.info("Analysis results written to %s", result_file) - else: - logger.info("No requirements found for benchmark %s", - test_benchmark.function_name) + success_count = 0 + + if NUM_ANA == 2: + for test_benchmark in benchmarks: + if analyze_benchmark(test_benchmark, model, args): + success_count += 1 + else: + + logger.info("Running analysis in parallel with %d processes.", args.num_pools) + with multiprocessing.Pool(args.num_pools, maxtasksperchild=1) as pool: + + results = {} + for test_benchmark in benchmarks: + # Pass a new analyzer instance to each process to avoid sharing state + result = pool.apply_async( + analyze_benchmark, + args=(test_benchmark, model, args) + ) + results[test_benchmark.id] = result + + pool.close() + + # Wait for all results to complete and count successes + for benchmark_id, result in results.items(): + try: + if result.get(): + success_count += 1 + except Exception as e: + logger.error(f"Error during analysis for benchmark %s: %s", benchmark_id, e) + + pool.join() + + print(f"{success_count} out of {len(benchmarks)} analyses completed successfully.") diff --git a/agent_tests/upload_analysis_result.py b/agent_tests/upload_analysis_result.py new file mode 100644 index 0000000000..45ec86a06b --- /dev/null +++ b/agent_tests/upload_analysis_result.py @@ -0,0 +1,58 @@ +import argparse +import os +from google.cloud import storage + +GCS_BUCKET_NAME = 'pamusuo-tests' + +CGS_RESULTS_DIR = "Function-analysis-results" + +def upload_directory_to_gcs(local_folder_path, bucket_name, destination_blob_prefix=""): + """ + Uploads all .txt files from a local folder to a Google Cloud Storage bucket. + + Args: + local_folder_path (str): The path to the local folder containing the .txt files. + bucket_name (str): The name of your Google Cloud Storage bucket. + destination_blob_prefix (str): An optional prefix for the blob names in GCS. + Useful for organizing files within the bucket. + e.g., "my_text_files/" + """ + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) + + print(f"Starting upload from local folder: {local_folder_path}") + + for root, _, files in os.walk(local_folder_path): + for file_name in files: + if file_name.endswith(".txt"): + local_file_path = os.path.join(root, file_name) + + # Construct the blob path in GCS + # This ensures the folder structure is maintained if needed + # For simplicity, we'll just put all files directly under the prefix + # If you want to maintain subdirectories, you'd adjust this. + relative_path = os.path.relpath(local_file_path, local_folder_path) + destination_blob_name = os.path.join(destination_blob_prefix, relative_path).replace("\\", "/") # Replace backslashes for Linux/GCS compatibility + + + blob = bucket.blob(destination_blob_name) + + try: + blob.upload_from_filename(local_file_path) + print(f"Uploaded {local_file_path} to gs://{bucket_name}/{destination_blob_name}") + except Exception as e: + print(f"Error uploading {local_file_path}: {e}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Upload a directory to a Google Cloud Storage bucket.") + parser.add_argument("-d", "--directory", help="Path to the directory to upload", required=True) + parser.add_argument("-b", "--bucket", help="Name of the GCS bucket", default=GCS_BUCKET_NAME) + args = parser.parse_args() + + # Ensure the directory exists + if not os.path.isdir(args.directory): + raise ValueError(f"The specified directory does not exist: {args.directory}") + + # Upload the directory to GCS + upload_directory_to_gcs(args.directory, args.bucket, CGS_RESULTS_DIR) \ No newline at end of file diff --git a/experiment/workdir.py b/experiment/workdir.py index d6d83951fe..6ba4419206 100644 --- a/experiment/workdir.py +++ b/experiment/workdir.py @@ -26,21 +26,23 @@ class WorkDirs: RUN_LOG_NAME_PATTERN = re.compile(r'.*-F(\d+).log') - def __init__(self, base_dir, keep: bool = False): + def __init__(self, base_dir, keep: bool = False, create_children_dirs: bool = True): self._base_dir = os.path.realpath(base_dir) if os.path.exists(self._base_dir) and not keep: # Clear existing directory. rmtree(self._base_dir, ignore_errors=True) os.makedirs(self._base_dir, exist_ok=True) - os.makedirs(self.status, exist_ok=True) - os.makedirs(self.raw_targets, exist_ok=True) - os.makedirs(self.fixed_targets, exist_ok=True) - os.makedirs(self.build_logs, exist_ok=True) - os.makedirs(self.run_logs, exist_ok=True) - os.makedirs(self._corpus_base, exist_ok=True) - os.makedirs(self.dills, exist_ok=True) - os.makedirs(self.fuzz_targets, exist_ok=True) + + if create_children_dirs: + os.makedirs(self.status, exist_ok=True) + os.makedirs(self.raw_targets, exist_ok=True) + os.makedirs(self.fixed_targets, exist_ok=True) + os.makedirs(self.build_logs, exist_ok=True) + os.makedirs(self.run_logs, exist_ok=True) + os.makedirs(self._corpus_base, exist_ok=True) + os.makedirs(self.dills, exist_ok=True) + os.makedirs(self.fuzz_targets, exist_ok=True) def __repr__(self) -> str: return self._base_dir From 1266603695b1d8f01d2e236ae26ef0bee070f6cd Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Thu, 29 May 2025 18:52:20 +0000 Subject: [PATCH 12/64] Upgraded function analyzer test to use 2.5 pro and updated function analyzer to get the model name dynamically. --- agent/function_analyzer.py | 27 +++++++++++++++++++++------ agent_tests/function_analyzer_test.py | 5 +---- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index c64e556924..65592b1679 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -17,6 +17,7 @@ generate correct fuzz target for the function. """ +import argparse import asyncio import logging from typing import Optional @@ -27,8 +28,8 @@ import results as resultslib from agent import base_agent from experiment import benchmark as benchmarklib -from llm_toolkit import prompt_builder, prompts -from tool import fuzz_introspector_tool +from llm_toolkit import models, prompt_builder, prompts +from tool import base_tool, fuzz_introspector_tool logger = logging.getLogger(__name__) @@ -39,6 +40,22 @@ class FunctionAnalyzer(base_agent.BaseAgent): generate correct fuzz target for the function. """ + def __init__(self, + trial: int, + llm: models.LLM, + args: argparse.Namespace, + tools: Optional[list[base_tool.BaseTool]] = None, + name: str = ''): + + # Ensure the llm is an instance of VertexAIModel + if not isinstance(llm, models.VertexAIModel): + raise ValueError( + "FunctionAnalyzer agent requires a VertexAIModel instance for llm.") + + self.vertex_ai_model = llm._vertex_ai_model + + super().__init__(trial, llm, args, tools, name) + def initialize(self, benchmark: benchmarklib.Benchmark): """Initialize the function analyzer agent with the given benchmark.""" @@ -54,7 +71,7 @@ def initialize(self, benchmark: benchmarklib.Benchmark): context_retriever = agents.LlmAgent( name="ContextRetrieverAgent", - model='gemini-2.0-flash', + model=self.vertex_ai_model, description="""Retrieves the implementation of a function and its children from Fuzz Introspector.""", instruction=builder.build_context_retriever_instruction().get(), @@ -69,9 +86,7 @@ def initialize(self, benchmark: benchmarklib.Benchmark): # Create the agent using the ADK library requirements_extractor = agents.LlmAgent( name="RequirementsExtractorAgent", - # TODO: Get the model name from args. - # Currently, the default names are incompatible with the ADK library. - model='gemini-2.0-flash', + model=self.vertex_ai_model, description="""Extracts a function's requirements from its source implementation.""", instruction=builder.build_instruction().get(), diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py index a82640f4ad..3f1e10ee63 100644 --- a/agent_tests/function_analyzer_test.py +++ b/agent_tests/function_analyzer_test.py @@ -61,7 +61,7 @@ def parse_args() -> argparse.Namespace: if __name__ == "__main__": - model = models.LLM.setup(ai_binary='', name='vertex_ai_gemini-1-5-chat') + model = models.LLM.setup(ai_binary='', name='vertex_ai_gemini-2-5-pro-chat') args = parse_args() @@ -102,9 +102,6 @@ def parse_args() -> argparse.Namespace: args.work_dirs.base, f"{test_benchmark.project}_{test_benchmark.function_name}.txt") with open(result_file, 'w') as f: - # f.write(f"Requirements for {test_benchmark.function_name}:\n") - # for req in result.requirements: - # f.write(f"- {req}\n") f.write(result.result_raw) logger.info("Analysis results written to %s", result_file) else: From 44e73254b4d6c06f3855367596a162ed74b19402 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Mon, 2 Jun 2025 18:11:20 +0000 Subject: [PATCH 13/64] Implemented an alternative agentic architecture where the target function and its context are first retrieved, and passed to the agent in the prompt. --- agent/function_analyzer.py | 42 +++------ agent_tests/function_analyzer_test.py | 33 ++++--- llm_toolkit/prompt_builder.py | 25 ++++++ prompts/agent/function-analyzer-priming.txt | 96 ++++++++++++++++++++- tool/fuzz_introspector_tool.py | 2 +- 5 files changed, 153 insertions(+), 45 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 65592b1679..8027042566 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -69,36 +69,17 @@ def initialize(self, benchmark: benchmarklib.Benchmark): introspector_tool = fuzz_introspector_tool.FuzzIntrospectorTool( benchmark, self.name) - context_retriever = agents.LlmAgent( - name="ContextRetrieverAgent", - model=self.vertex_ai_model, - description="""Retrieves the implementation of a function - and its children from Fuzz Introspector.""", - instruction=builder.build_context_retriever_instruction().get(), - tools=[ - introspector_tool.function_source_with_signature, - introspector_tool.function_source_with_name - ], - generate_content_config=types.GenerateContentConfig(temperature=0.0,), - output_key="FUNCTION_SOURCE", - ) - # Create the agent using the ADK library - requirements_extractor = agents.LlmAgent( - name="RequirementsExtractorAgent", + function_analyzer = agents.LlmAgent( + name="FunctionAnalyzer", model=self.vertex_ai_model, description="""Extracts a function's requirements from its source implementation.""", - instruction=builder.build_instruction().get(), - output_key="FUNCTION_REQUIREMENTS", - ) - - # Create the function analyzer agent - function_analyzer = agents.SequentialAgent( - name="FunctionAnalyzerAgent", - sub_agents=[context_retriever, requirements_extractor], - description="""Sequential agent to retrieve a function's source, - analyze it and extract its requirements.""", + instruction="""You are a security engineer tasked with analyzing a function + and extracting its input requirements, necessary for it to execute correctly.""", + tools=[ + introspector_tool.function_source_with_name + ], ) # Create the session service @@ -122,7 +103,7 @@ async def call_agent(self, query: str, runner: runners.Runner, user_id: str, session_id: str) -> resultslib.PreWritingResult: """Call the agent asynchronously with the given query.""" - logger.info(">>> User query: %s", query) + # logger.info(">>> User query: %s", query) content = types.Content(role='user', parts=[types.Part(text=query)]) @@ -136,7 +117,7 @@ async def call_agent(self, query: str, runner: runners.Runner, user_id: str, new_message=content, ): - logger.info("Event is %s", event.content) + # logger.info("Event is %s", event.content) if event.is_final_response(): if (event.content and event.content.parts and event.content.parts[0].text): @@ -176,6 +157,11 @@ def execute( # Call the agent asynchronously and return the result prompt = self._initial_prompt(result_history) query = prompt.gettext() + + # Validate query is not empty + if not query.strip(): + raise ValueError("Query is empty. Cannot call the agent.") + user_id = "user" session_id = f"session_{self.trial}" result = asyncio.run( diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py index 16631eba65..1dc27913ba 100644 --- a/agent_tests/function_analyzer_test.py +++ b/agent_tests/function_analyzer_test.py @@ -97,10 +97,6 @@ def analyze_benchmark(benchmark: benchmarklib.Benchmark, args: argparse.Namespace) -> bool: """Analyzes the benchmark using the function analyzer.""" - logger.info("Loaded benchmark (%d/%d) for function: %s", - benchmarks.index(benchmark) + 1, len(benchmarks), - benchmark.function_name) - # Initialize the function analyzer analyzer = function_analyzer.FunctionAnalyzer(trial=1, llm=model, @@ -110,7 +106,11 @@ def analyze_benchmark(benchmark: benchmarklib.Benchmark, analyzer.initialize(benchmark) # Run the function analyzer - result = analyzer.execute([]) + try: + result = analyzer.execute([]) + except Exception as e: + logger.error("Error during analysis for benchmark %s: %s", benchmark.function_name, e) + return False # If result is available, write it to the work_dirs directory if result.result_available and result.result_raw: @@ -151,9 +151,12 @@ def analyze_benchmark(benchmark: benchmarklib.Benchmark, # Analyze each benchmark success_count = 0 - if NUM_ANA == 2: - for test_benchmark in benchmarks: - if analyze_benchmark(test_benchmark, model, args): + if NUM_ANA == 1: + for benchmark in benchmarks: + logger.info("Loaded benchmark (%d/%d) for function: %s", + benchmarks.index(benchmark) + 1, len(benchmarks), + benchmark.function_name) + if analyze_benchmark(benchmark, model, args): success_count += 1 else: @@ -161,16 +164,22 @@ def analyze_benchmark(benchmark: benchmarklib.Benchmark, with multiprocessing.Pool(args.num_pools, maxtasksperchild=1) as pool: results = {} - for test_benchmark in benchmarks: + for benchmark in benchmarks: # Pass a new analyzer instance to each process to avoid sharing state + logger.info("Submitted benchmark (%d/%d) for function: %s to the pool.", + benchmarks.index(benchmark) + 1, len(benchmarks), + benchmark.function_name) result = pool.apply_async( analyze_benchmark, - args=(test_benchmark, model, args) + args=(benchmark, model, args) ) - results[test_benchmark.id] = result + results[benchmark.id] = result pool.close() + # Wait for all processes to complete + pool.join() + # Wait for all results to complete and count successes for benchmark_id, result in results.items(): try: @@ -179,6 +188,4 @@ def analyze_benchmark(benchmark: benchmarklib.Benchmark, except Exception as e: logger.error(f"Error during analysis for benchmark %s: %s", benchmark_id, e) - pool.join() - print(f"{success_count} out of {len(benchmarks)} analyses completed successfully.") diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 27530101e2..76270ee995 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -868,6 +868,31 @@ def build_prompt(self) -> prompts.Prompt: prompt = prompt.replace('{FUNCTION_SIGNATURE}', self.benchmark.function_signature) + # Get the function source + func_source = introspector.query_introspector_function_source( + self.benchmark.project, self.benchmark.function_signature) + + if not func_source: + logger.error( + 'No function source found for project: %s, function: %s', + self.benchmark.project, self.benchmark.function_signature) + return prompts.TextPrompt() + + prompt = prompt.replace('{FUNCTION_SOURCE}', func_source) + + # Get the function's references + xrefs = introspector.query_introspector_cross_references(self.benchmark.project, + self.benchmark.function_signature) + if not xrefs: + logger.error( + 'No cross references found for project: %s, function: %s', + self.benchmark.project, self.benchmark.function_signature) + prompt = prompt.replace('\n{FUNCTION_REFERENCES}\n}', '') + else: + references = [f"\n{xref}\n" for xref in xrefs] + references_str = '\n'.join(references) + prompt = prompt.replace('{FUNCTION_REFERENCES}', references_str) + self._prompt.append(prompt) return self._prompt diff --git a/prompts/agent/function-analyzer-priming.txt b/prompts/agent/function-analyzer-priming.txt index f4f95db171..6a289e8b15 100644 --- a/prompts/agent/function-analyzer-priming.txt +++ b/prompts/agent/function-analyzer-priming.txt @@ -1,3 +1,93 @@ -First, get the implementations of the target function {FUNCTION_SIGNATURE} - and all its children functions (functions it calls) in the project {PROJECT_NAME}. - Then, analyze the implementation and extract the requirements of the target function. +You are a professional security engineer working on creating a valid fuzzing driver for the target function `{FUNCTION_SIGNATURE}` in the project {PROJECT_NAME}. + +We will provide you with the implementation of the target function, implementations of other functions that reference the target function, and a set of tools that you can use to get additional function implementations and context information. + +Your goal is to analyze the provided functions and its usages, provide a clear detailed description of the function, and identify the important input requirements for the target function to execute correctly. + +The requirements we are interested in include the following: +5. WHat constraints on input arguments is necessary to prevent assertion failures, out-of-bound array indexing, null pointer dereferencing, invalid memory access, invalid string access, and other crashes. +1. What setup functions must be called before the target function? +2. What existing function in the project should we use to create valid inputs for the target function? +3. What inputs, or members of an input, should we initialize with random fuzz data? +4. What inputs must we initialize by calling another existing function? + +Keep your responses concise. Each requirement should contain two sentences. The first is the requirement. The second is a brief reason why it is important. + +Here is the provided data. + + +{FUNCTION_SOURCE} + + + +{FUNCTION_REFERENCES} + + + +You MUST return your response in the format below. + + +Make sure your response follows the following format, enclosed in ``` ```. + +``` + + +project name: the name of the project provided +function signature: The function's signature + + + +A summary of what the function does. + + + + +First requirement + + +Second requirement + +... + +nth requirement + + + + + + + + + +Here is an example response + + +project name: htslib +function signature: int sam_index_build(int, const char *, int) + + + +The sam_index_build function is used to build a sam index. It uses the input arguments to identify and retrieve the index to build. It returns 1 if the build succeeds and 0 if the build fails. + + + + +The index_open function should be called before sam_index_build. This is because it creates the index that is built in the latter function. + + +The second argument should be a valid, null-terminated string. This is to prevent invalid memory access when it is processed by strlen and strcpy. + + +The third argument should be greater than zero. This is to prevent an assertion violation in the program. + + +The third argument should be less than 16. This is to prevent an out-of-bound array access when the argument is used to index the fixed-size array `stores`. + + + + + + + +You will be provided with the following tools. +1. _function_source_with_name: Use this tool to retrieve the implementation of a function. You will invoke the tool using the project's name and function's name as arguments. \ No newline at end of file diff --git a/tool/fuzz_introspector_tool.py b/tool/fuzz_introspector_tool.py index 5a1a56ffe1..fe950abc4b 100644 --- a/tool/fuzz_introspector_tool.py +++ b/tool/fuzz_introspector_tool.py @@ -105,7 +105,7 @@ def function_source_with_name(self, project_name: str, project_name) functions_list = introspector.query_introspector_all_functions( project_name) - logger.info("Functions list:\n%s", functions_list) + if functions_list: self.project_functions = { func["debug_summary"]["name"]: func From 69c03a3e51370c138dc520e5ce92faad269bc43b Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 4 Jun 2025 22:05:37 +0000 Subject: [PATCH 14/64] function-analyzer: Implemented flow of requirements from function analyzer to other agents. Modified the prototyper to consume requirements from function analyzer. --- agent/function_analyzer.py | 62 ++++++++++++++++++--------- agent/prototyper.py | 13 +++++- agent_tests/function_analyzer_test.py | 16 +------ common/cloud_builder.py | 48 +++++++++++++++++++-- experiment/workdir.py | 5 +++ llm_toolkit/prompt_builder.py | 18 +++++--- results.py | 14 +++++- 7 files changed, 129 insertions(+), 47 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 8027042566..98f7a239ee 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -20,11 +20,13 @@ import argparse import asyncio import logging +import os from typing import Optional from google.adk import agents, runners, sessions from google.genai import types +from experiment.workdir import WorkDirs import results as resultslib from agent import base_agent from experiment import benchmark as benchmarklib @@ -100,14 +102,14 @@ def initialize(self, benchmark: benchmarklib.Benchmark): logger.info("Function Analyzer Agent created, with name: %s", self.name) async def call_agent(self, query: str, runner: runners.Runner, user_id: str, - session_id: str) -> resultslib.PreWritingResult: + session_id: str) -> str: """Call the agent asynchronously with the given query.""" # logger.info(">>> User query: %s", query) content = types.Content(role='user', parts=[types.Part(text=query)]) - final_response_text = "Agent did not produce a final response." + final_response_text = '' result_available = False @@ -131,29 +133,36 @@ async def call_agent(self, query: str, runner: runners.Runner, user_id: str, if result_available and self._parse_tag(final_response_text, 'response'): # Get the requirements from the response - requirements = self._parse_tags(final_response_text, 'requirement') - result_raw = self._parse_tag(final_response_text, 'response') + result_str = self._parse_tag(final_response_text, 'response') else: - requirements = [] - result_raw = '' + result_str = '' - # Prepare the result - result = resultslib.PreWritingResult( - benchmark=self.benchmark, - trial=self.trial, - work_dirs=self.args.work_dir, - result_available=result_available, - result_raw=result_raw, - requirements=requirements, - ) + return result_str - return result + def write_requirements_to_file(self, args, requirements: str) -> str: + """Write the requirements to a file.""" + if not requirements: + logger.warning("No requirements to write to file.") + return '' + + requirement_path = os.path.join( + args.work_dirs.requirements, + f"{self.benchmark.id}.txt") + + with open(requirement_path, 'w') as f: + f.write(requirements) + + logger.info("Requirements written to %s", requirement_path) + + return requirement_path def execute( self, - result_history: list[resultslib.Result]) -> resultslib.PreWritingResult: + result_history: list[resultslib.Result]) -> resultslib.Result: """Execute the agent with the given results.""" + WorkDirs(self.args.work_dirs.base, keep=True) + # Call the agent asynchronously and return the result prompt = self._initial_prompt(result_history) query = prompt.gettext() @@ -164,12 +173,23 @@ def execute( user_id = "user" session_id = f"session_{self.trial}" - result = asyncio.run( + result_str = asyncio.run( self.call_agent(query, self.runner, user_id, session_id)) - if result and result.result_available: - # Save the result to the history - result_history.append(result) + if result_str: + # Write the requirements to a file + requirement_path = self.write_requirements_to_file( + self.args, result_str) + function_analysis = resultslib.FunctionAnalysisResult(requirement_path) + else: + function_analysis = None + + result = resultslib.Result( + benchmark=self.benchmark, + trial=self.trial, + work_dirs=self.args.work_dirs, + function_analysis=function_analysis, + ) return result diff --git a/agent/prototyper.py b/agent/prototyper.py index 8bfbac9245..b7d89fa171 100644 --- a/agent/prototyper.py +++ b/agent/prototyper.py @@ -38,7 +38,8 @@ class Prototyper(BaseAgent): def _initial_prompt(self, results: list[Result]) -> Prompt: """Constructs initial prompt of the agent.""" - benchmark = results[-1].benchmark + last_result = results[-1] + benchmark = last_result.benchmark if benchmark.use_project_examples: project_examples = project_targets.generate_data( @@ -54,6 +55,13 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: else: context_info = {} + function_analysis = last_result.function_analysis + if function_analysis and os.path.isfile(function_analysis.function_analysis_path): + with open(function_analysis.function_analysis_path, 'r') as file: + function_requirements = file.read() + else: + function_requirements = '' + builder = prompt_builder.PrototyperTemplateBuilder( model=self.llm, benchmark=benchmark, @@ -63,7 +71,8 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: project_example_content=project_examples, project_context_content=context_info, tool_guides=self.inspect_tool.tutorial(), - project_dir=self.inspect_tool.project_dir) + project_dir=self.inspect_tool.project_dir, + function_requirements=function_requirements,) return prompt def _update_fuzz_target_and_build_script(self, response: str, diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py index 1dc27913ba..600d3e5f21 100644 --- a/agent_tests/function_analyzer_test.py +++ b/agent_tests/function_analyzer_test.py @@ -112,21 +112,7 @@ def analyze_benchmark(benchmark: benchmarklib.Benchmark, logger.error("Error during analysis for benchmark %s: %s", benchmark.function_name, e) return False - # If result is available, write it to the work_dirs directory - if result.result_available and result.result_raw: - result_file = os.path.join( - args.work_dirs.base, - f"{benchmark.id}.txt") - - with open(result_file, 'w') as f: - f.write(result.result_raw) - - logger.info("Analysis result for benchmark %s written to %s", benchmark.function_name, result_file) - else: - logger.info("No requirements found for benchmark %s", - benchmark.function_name) - - return result.result_available + return result.function_analysis is not None if __name__ == "__main__": diff --git a/common/cloud_builder.py b/common/cloud_builder.py index 79847da5a4..e9a1a157ad 100644 --- a/common/cloud_builder.py +++ b/common/cloud_builder.py @@ -120,6 +120,22 @@ def _upload_files(self, archive_name: str, target_dir: str, logging.error("Failed to create archive: %s", archive_path) return self._upload_to_gcs(archive_path) + def _upload_directory(self, directory_path: str) -> str: + """Archives and uploads local OFG repo to cloud build.""" + + if not os.path.isdir(directory_path): + logging.error("Directory does not exist: %s", directory_path) + return '' + + files_to_upload = list( + os.path.relpath(os.path.join(root, file)) + for root, _, files in os.walk(directory_path) + for file in files) + + # TODO(pamusuo): Check if directory_path is the right base directory to use, or OFG_ROOT_DIR? + return self._upload_files(f'ofg-exp-{uuid.uuid4().hex}.tar.gz', + directory_path, files_to_upload) + def _upload_to_gcs(self, local_file_path: str) -> str: """Uploads a file to Google Cloud Storage.""" dest_file_name = os.path.basename(local_file_path) @@ -182,7 +198,8 @@ def _upload_fi_oss_fuzz_data(self) -> str: def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, results_dill_url: str, artifact_url: str, artifact_path: str, oss_fuzz_data_url: str, - data_dir_url: str, new_result_filename: str) -> str: + data_dir_url: str, new_result_filename: str, + experiment_url: str, experiment_path: str) -> str: """Requests Cloud Build to execute the operation.""" # Used for injecting additional OSS-Fuzz project integrations not in @@ -199,7 +216,7 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, cloud_build_config = { 'steps': [ - # Step 1: Download the dill and artifact files from GCS bucket. + # Step 1: Download the dill, artifact and experiment files from GCS bucket. { 'name': 'bash', 'dir': '/workspace', @@ -232,6 +249,18 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, ], 'allowFailure': True, }, + { + 'name': + 'gcr.io/cloud-builders/gsutil', + 'entrypoint': + 'bash', + 'args': [ + '-c', f'gsutil cp {experiment_url} /tmp/ofg-exp.tar.gz && ' + f'mkdir /workspace/host/{experiment_path} && ' + f'tar -xzf /tmp/ofg-exp.tar.gz -C /workspace/host/{experiment_path}' + ], + 'allowFailure': True, + }, # Step 2: Prepare OFG and OF repos. { 'name': @@ -449,6 +478,18 @@ def run(self, agent: BaseAgent, result_history: list[Result], logging.info('Uploaded artifact to %s', artifact_url) else: logging.error('No artifact_path found in RunResult.') + + # TODO(pamusuo): Where should we get the experiment path from? self.exp_args.work_dirs.base + experiment_path = result_history[-1].work_dirs.base + experiment_url = '' + if os.path.exists(experiment_path): + experiment_url = self._upload_directory(experiment_path) + if experiment_url: + logging.info('Uploaded experiment to %s', experiment_url) + else: + logging.error('Experiment path %s empty or invalid.', + experiment_path) + oss_fuzz_data_url = self._upload_oss_fuzz_data() data_dir_url = self._upload_fi_oss_fuzz_data() @@ -457,7 +498,8 @@ def run(self, agent: BaseAgent, result_history: list[Result], build_id = self._request_cloud_build(ofg_url, agent_url, results_url, artifact_url, artifact_path, oss_fuzz_data_url, data_dir_url, - new_result_filename) + new_result_filename, experiment_url, + experiment_path) # Step 4: Download new result dill. cloud_build_log = '' diff --git a/experiment/workdir.py b/experiment/workdir.py index b3edb75523..8e4e15780d 100644 --- a/experiment/workdir.py +++ b/experiment/workdir.py @@ -44,6 +44,7 @@ def __init__(self, base_dir, keep: bool = False, create_children_dirs: bool = Tr os.makedirs(self.dills, exist_ok=True) os.makedirs(self.fuzz_targets, exist_ok=True) os.makedirs(self._artifact_base, exist_ok=True) + os.makedirs(self.requirements, exist_ok=True) def __repr__(self) -> str: return self._base_dir @@ -114,6 +115,10 @@ def dills(self) -> str: def run_logs(self) -> str: return os.path.join(self._base_dir, 'logs', 'run') + @property + def requirements(self) -> str: + return os.path.join(self._base_dir, 'requirements') + def build_logs_target(self, generated_target_name: str, iteration: int, trial: int) -> str: return os.path.join( diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 0e2c741d48..96e01e95bd 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -587,7 +587,8 @@ def build(self, project_example_content: Optional[list[list[str]]] = None, project_context_content: Optional[dict] = None, tool_guides: str = '', - project_dir: str = '') -> prompts.Prompt: + project_dir: str = '', + function_requirements: str = '') -> prompts.Prompt: """Constructs a prompt using the templates in |self| and saves it.""" if not self.benchmark: return self._prompt @@ -599,6 +600,9 @@ def build(self, f' in your solution!\n') if project_context_content: final_problem += self.format_context(project_context_content) + if function_requirements: + final_problem += (f'\nHere are the requirements for the function:\n' + f'{function_requirements}\n') self._prepare_prompt(priming, final_problem, example_pair, project_example_content) self._prompt.append(tool_guides, True) @@ -627,7 +631,8 @@ def build(self, project_example_content: Optional[list[list[str]]] = None, project_context_content: Optional[dict] = None, tool_guides: str = '', - project_dir: str = '') -> prompts.Prompt: + project_dir: str = '', + function_requirements: str = '') -> prompts.Prompt: """Constructs a prompt using the templates in |self| and saves it.""" del (example_pair, project_example_content, project_context_content, tool_guides) @@ -673,7 +678,8 @@ def build(self, project_example_content: Optional[list[list[str]]] = None, project_context_content: Optional[dict] = None, tool_guides: str = '', - project_dir: str = '') -> prompts.Prompt: + project_dir: str = '', + function_requirements: str = '') -> prompts.Prompt: """Constructs a prompt using the templates in |self| and saves it.""" del (example_pair, project_example_content, project_context_content) if not self.benchmark: @@ -720,7 +726,8 @@ def build(self, project_example_content: Optional[list[list[str]]] = None, project_context_content: Optional[dict] = None, tool_guides: str = '', - project_dir: str = '') -> prompts.Prompt: + project_dir: str = '', + function_requirements: str = '') -> prompts.Prompt: """Constructs a prompt using the templates in |self| and saves it.""" del (example_pair, project_example_content, project_context_content) if not self.benchmark: @@ -779,7 +786,8 @@ def build(self, project_example_content: Optional[list[list[str]]] = None, project_context_content: Optional[dict] = None, tool_guides: str = '', - project_dir: str = '') -> prompts.Prompt: + project_dir: str = '', + function_requirements: str = '') -> prompts.Prompt: """Constructs a prompt using the templates in |self| and saves it.""" del (example_pair, project_example_content, project_context_content) if not self.benchmark: diff --git a/results.py b/results.py index 4dc6e9f4cf..cfac3d8a46 100644 --- a/results.py +++ b/results.py @@ -32,6 +32,7 @@ class Result: author: Any chat_history: dict _repr_exclude = {'_repr_exclude', 'chat_history'} + function_analysis: Optional['FunctionAnalysisResult'] def __init__(self, benchmark: Benchmark, @@ -41,7 +42,8 @@ def __init__(self, build_script_source: str = '', author: Any = None, chat_history: Optional[dict] = None, - default_success: bool = False) -> None: + default_success: bool = False, + function_analysis: Optional['FunctionAnalysisResult'] = None) -> None: self.benchmark = benchmark self.trial = trial self.work_dirs = work_dirs @@ -50,6 +52,7 @@ def __init__(self, self.author = author self.chat_history = chat_history or {} self.default_success = default_success + self.function_analysis = function_analysis def __repr__(self) -> str: attributes = [ @@ -736,3 +739,12 @@ def textcov_diff(self) -> textcov.Textcov: for result in self.trial_results: all_textcov.merge(result.textcov_diff) return all_textcov + + + +class FunctionAnalysisResult: + """The result of the function analyzer.""" + function_analysis_path: str + + def __init__(self, function_analysis_path: str): + self.function_analysis_path = function_analysis_path \ No newline at end of file From 40cbdb46b1787a4819ef3c446bba1638164c33bd Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Sun, 1 Jun 2025 10:27:45 -0700 Subject: [PATCH 15/64] base-agent: adjust logging (#1075) It's a bit confusing we use CHAT for both chat and ask utility functions. This changes so it's clear from the logging when each of them is used. Signed-off-by: David Korczynski --- agent/base_agent.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index e8d394117f..fbbc0aef48 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -77,14 +77,14 @@ def chat_llm(self, cur_round: int, client: Any, prompt: Prompt, return response def ask_llm(self, cur_round: int, prompt: Prompt, trial: int) -> str: - """Chat with LLM.""" - logger.info('%s', + """Ask LLM.""" + logger.info('%s', cur_round, prompt.gettext(), cur_round, trial=trial) response = self.llm.ask_llm(prompt=prompt) - logger.info('%s', + logger.info('%s', cur_round, response, cur_round, From be2edbacd98e77bd26f145069c9292b4ee2ccbb6 Mon Sep 17 00:00:00 2001 From: Maoyi Xie Date: Mon, 2 Jun 2025 13:05:42 +0800 Subject: [PATCH 16/64] Crash Analyzer Agent (#814) This PR mainly implements a crash analyzer that can interact with LLDB in the multi-agent framework, and supports GPT. In addition, this PR attempts to fix the problem of not replacing the fuzz target and build script. This PR is under testing. The main logic is no longer changing, and minor bugs are being fixed. TODO: Optimize the process of agent interaction with LLDB. Solve the problem of missing debugging information for some projects. Try to add LLM-based static methods to enhance the crash analyzer. --------- Co-authored-by: Dongge Liu --- agent/crash_analyzer.py | 201 ++++++++++++++++++++++- common/cloud_builder.py | 78 ++++++++- experiment/builder_runner.py | 81 ++++++--- experiment/evaluator.py | 60 +++++-- experiment/fuzz_target_error.py | 12 ++ experiment/workdir.py | 52 ++++-- llm_toolkit/crash_triager.py | 2 +- llm_toolkit/models.py | 74 ++++++++- llm_toolkit/prompt_builder.py | 81 +++++++++ llm_toolkit/prompts.py | 6 +- pipeline.py | 20 ++- prompts/agent/crash_analyzer-priming.txt | 19 +++ prompts/agent/prototyper-priming.txt | 18 +- prompts/template_xml/triager_priming.txt | 2 +- prompts/tool/container_tool.txt | 2 +- prompts/tool/lldb_tool.txt | 42 +++++ results.py | 59 ++++++- run_one_experiment.py | 2 + stage/analysis_stage.py | 5 +- stage/execution_stage.py | 21 ++- stage/writing_stage.py | 3 +- tool/container_tool.py | 12 +- tool/lldb_tool.py | 50 +++++- 23 files changed, 803 insertions(+), 99 deletions(-) create mode 100644 prompts/agent/crash_analyzer-priming.txt create mode 100644 prompts/tool/lldb_tool.txt diff --git a/agent/crash_analyzer.py b/agent/crash_analyzer.py index a298ec5207..8437bbac7a 100644 --- a/agent/crash_analyzer.py +++ b/agent/crash_analyzer.py @@ -14,8 +14,207 @@ """An LLM agent to analyze and provide insight of a fuzz target's runtime crash. Use it as a usual module locally, or as script in cloud builds. """ +import argparse +import os +import subprocess as sp +from typing import Optional + +import logger from agent.base_agent import BaseAgent +from experiment import evaluator as evaluator_lib +from experiment import oss_fuzz_checkout +from experiment.workdir import WorkDirs +from llm_toolkit import prompt_builder +from llm_toolkit.models import LLM +from llm_toolkit.prompts import Prompt +from results import AnalysisResult, CrashResult, Result, RunResult +from tool.base_tool import BaseTool +from tool.container_tool import ProjectContainerTool +from tool.lldb_tool import LLDBTool + +MAX_ROUND = 100 class CrashAnalyzer(BaseAgent): - pass + """The Agent to analyze a runtime crash and provide insight to fuzz target.""" + + def __init__(self, + trial: int, + llm: LLM, + args: argparse.Namespace, + tools: Optional[list[BaseTool]] = None, + name: str = '', + artifact_path: str = '') -> None: + super().__init__(trial, llm, args, tools, name) + self.artifact_path = artifact_path + + def _initial_prompt(self, results: list[Result]) -> Prompt: + """Constructs initial prompt of the agent.""" + last_result = results[-1] + + if isinstance(last_result, RunResult): + crash_analyzer_prompt_builder = \ + prompt_builder.CrashAnalyzerTemplateBuilder( + model=self.llm, + benchmark=last_result.benchmark) + prompt = crash_analyzer_prompt_builder.build_crash_analyzer_prompt( + last_result.benchmark, last_result.fuzz_target_source, + last_result.run_error, last_result.crash_func) + return prompt + + logger.error("Expected a RunResult object in results list", + trial=self.trial) + return prompt_builder.CrashAnalyzerTemplateBuilder(self.llm).build([]) + + def _format_lldb_execution_result( + self, + lldb_command: str, + process: sp.CompletedProcess, + previous_prompt: Optional[Prompt] = None) -> str: + """Formats a prompt based on lldb execution result.""" + if previous_prompt: + previous_prompt_text = previous_prompt.get() + else: + previous_prompt_text = '' + stdout = self.llm.truncate_prompt(process.stdout, + previous_prompt_text).strip() + stderr = self.llm.truncate_prompt(process.stderr, + stdout + previous_prompt_text).strip() + return (f'\n{lldb_command.strip()}\n\n' + f'\n{stdout}\n\n' + f'\n{stderr}\n\n') + + def _container_handle_lldb_command(self, response: str, tool: LLDBTool, + prompt: Prompt) -> Prompt: + """Handles the command from LLM with lldb tool.""" + prompt_text = '' + for command in self._parse_tags(response, 'lldb'): + process = tool.execute_in_screen(command) + prompt_text += self._format_lldb_execution_result( + command, process, previous_prompt=prompt) + '\n' + prompt.append(prompt_text) + return prompt + + def _container_handle_conclusion(self, cur_round: int, response: str, + crash_result: CrashResult) -> None: + """Parses LLM conclusion, analysis and suggestion.""" + logger.info('----- ROUND %02d Received conclusion -----', + cur_round, + trial=self.trial) + + conclusion = self._parse_tag(response, 'conclusion') + if conclusion == 'Crash is caused by bug in fuzz driver.': + crash_result.true_bug = False + elif conclusion == 'Crash is caused by bug in project.': + crash_result.true_bug = True + else: + logger.error('***** Failed to match conclusion in %02d rounds *****', + cur_round, + trial=self.trial) + + crash_result.insight = self._parse_tag(response, 'analysis and suggestion') + if not crash_result.insight: + logger.error('Round %02d No analysis and suggestion in conclusion: %s', + cur_round, + response, + trial=self.trial) + + def _container_tool_reaction(self, cur_round: int, response: str, + crash_result: CrashResult) -> Optional[Prompt]: + """Validates LLM conclusion or executes its command.""" + if self._parse_tag(response, 'conclusion'): + return self._container_handle_conclusion(cur_round, response, + crash_result) + prompt = prompt_builder.CrashAnalyzerTemplateBuilder(self.llm, + None).build([]) + if self._parse_tag(response, 'lldb'): + return self._container_handle_lldb_command(response, self.analyze_tool, + prompt) + if self._parse_tag(response, 'bash'): + return self._container_handle_bash_command(response, self.check_tool, + prompt) + return None + + def execute(self, result_history: list[Result]) -> AnalysisResult: + """Executes the agent based on previous run result.""" + WorkDirs(self.args.work_dirs.base, keep=True) + last_result = result_history[-1] + benchmark = last_result.benchmark + logger.info('Executing Crash Analyzer', trial=self.trial) + assert isinstance(last_result, RunResult) + + if not os.path.exists(last_result.artifact_path): + logger.error('Artifact path %s does not exist', + last_result.artifact_path, + trial=self.trial) + + # TODO(dongge): Move these to oss_fuzz_checkout. + generated_target_name = os.path.basename(benchmark.target_path) + sample_id = os.path.splitext(generated_target_name)[0] + generated_oss_fuzz_project = ( + f'{benchmark.id}-{sample_id}-lldb-{self.trial:02d}') + generated_oss_fuzz_project = oss_fuzz_checkout.rectify_docker_tag( + generated_oss_fuzz_project) + + # TODO(dongge): Write to OSS-Fuzz project dir files directly. + fuzz_target_path = os.path.join(last_result.work_dirs.fuzz_targets, + f'{self.trial:02d}.fuzz_target') + with open(fuzz_target_path, 'w') as ft_file: + ft_file.write(last_result.fuzz_target_source) + if last_result.build_script_source: + build_script_path = os.path.join(last_result.work_dirs.fuzz_targets, + f'{self.trial:02d}.build_script') + with open(build_script_path, 'w') as ft_file: + ft_file.write(last_result.build_script_source) + else: + build_script_path = '' + + evaluator_lib.Evaluator.create_ossfuzz_project_with_lldb( + benchmark, generated_oss_fuzz_project, fuzz_target_path, last_result, + build_script_path, last_result.artifact_path) + + self.analyze_tool = LLDBTool(benchmark, + result=last_result, + name='lldb', + project_name=generated_oss_fuzz_project) + self.analyze_tool.execute('compile > /dev/null') + # Launch LLDB and load fuzz target binary + self.analyze_tool.execute(f'screen -dmS lldb_session -L ' + f'-Logfile /tmp/lldb_log.txt ' + f'lldb /out/{last_result.benchmark.target_name}') + self.check_tool = ProjectContainerTool( + benchmark, name='check', project_name=generated_oss_fuzz_project) + self.check_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null') + prompt = self._initial_prompt(result_history) + prompt.add_problem(self.analyze_tool.tutorial()) + prompt.add_problem(self.check_tool.tutorial()) + crash_result = CrashResult(benchmark=benchmark, + trial=last_result.trial, + work_dirs=last_result.work_dirs, + author=self, + chat_history={self.name: ''}) + cur_round = 1 + try: + client = self.llm.get_chat_client(model=self.llm.get_model()) + while prompt and cur_round < MAX_ROUND: + response = self.chat_llm(cur_round=cur_round, + client=client, + prompt=prompt, + trial=self.trial) + prompt = self._container_tool_reaction(cur_round, response, + crash_result) + cur_round += 1 + self._sleep_random_duration(trial=self.trial) + finally: + # Cleanup: stop the container + logger.debug('Stopping the crash analyze container %s', + self.analyze_tool.container_id, + trial=self.trial) + self.analyze_tool.terminate() + + analysis_result = AnalysisResult( + author=self, + run_result=last_result, + crash_result=crash_result, + chat_history={self.name: crash_result.to_dict()}) + return analysis_result diff --git a/common/cloud_builder.py b/common/cloud_builder.py index cb08d44077..79847da5a4 100644 --- a/common/cloud_builder.py +++ b/common/cloud_builder.py @@ -32,7 +32,7 @@ import utils from agent.base_agent import BaseAgent -from results import Result +from results import Result, RunResult OF_REPO = 'https://github.com/google/oss-fuzz.git' OFG_ROOT_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) @@ -82,11 +82,42 @@ def __init__(self, args: argparse.Namespace) -> None: def _upload_files(self, archive_name: str, target_dir: str, files_to_upload: list[str]) -> str: """Archive and upload files to GCS.""" + valid_files = [] + for f in files_to_upload: + file_path = os.path.join(target_dir, f) + if os.path.exists(file_path): + valid_files.append(f) + else: + logging.error("File does not exist: %s", file_path) + + valid_files.sort() + with tempfile.TemporaryDirectory() as tmpdirname: archive_path = os.path.join(tmpdirname, archive_name) - tar_command = ['tar', '-czf', archive_path] + files_to_upload - subprocess.run(tar_command, cwd=target_dir, check=True) - logging.info('Created archive: %s', archive_path) + tar_command = ['tar', '-czf', archive_path] + valid_files + logging.error("Archive path: %s (exists: %s)", archive_path, + os.path.exists(archive_path)) + logging.error("Tar command: %s", ' '.join(tar_command)) + + try: + result = subprocess.run(tar_command, + cwd=target_dir, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True) + logging.error("subprocess stdout:\n%s", result.stdout) + logging.error("subprocess stderr:\n%s", result.stderr) + except subprocess.CalledProcessError as e: + logging.error("Tar command failed with return code %d", e.returncode) + logging.error("stdout:\n%s", e.stdout) + logging.error("stderr:\n%s", e.stderr) + raise + + if os.path.exists(archive_path): + logging.info("Successfully created archive: %s", archive_path) + else: + logging.error("Failed to create archive: %s", archive_path) return self._upload_to_gcs(archive_path) def _upload_to_gcs(self, local_file_path: str) -> str: @@ -149,7 +180,8 @@ def _upload_fi_oss_fuzz_data(self) -> str: files_to_upload) def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, - results_dill_url: str, oss_fuzz_data_url: str, + results_dill_url: str, artifact_url: str, + artifact_path: str, oss_fuzz_data_url: str, data_dir_url: str, new_result_filename: str) -> str: """Requests Cloud Build to execute the operation.""" @@ -167,7 +199,7 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, cloud_build_config = { 'steps': [ - # Step 1: Download the dill files from GCS bucket. + # Step 1: Download the dill and artifact files from GCS bucket. { 'name': 'bash', 'dir': '/workspace', @@ -183,6 +215,23 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, 'dir': '/workspace', 'args': ['cp', results_dill_url, 'dills/result_history.pkl'] }, + { + 'name': 'gcr.io/cloud-builders/gsutil', + 'entrypoint': 'bash', + 'args': [ + '-c', + f'mkdir -p /workspace/host/{os.path.dirname(artifact_path)}' + ], + 'allowFailure': True, + }, + { + 'name': 'gcr.io/cloud-builders/gsutil', + 'dir': '/workspace', + 'args': [ + 'cp', artifact_url, f'/workspace/host/{artifact_path}' + ], + 'allowFailure': True, + }, # Step 2: Prepare OFG and OF repos. { 'name': @@ -256,6 +305,8 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, '-v', '/workspace:/workspace', '-v', + '/workspace/host/experiment:/experiment', + '-v', '/var/run/docker.sock:/var/run/docker.sock', '-e', 'VERTEX_AI_LOCATIONS=' + @@ -275,7 +326,7 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, '/workspace/dills/new_result.pkl' ], }, - # Step 4: Upload the result to GCS bucket + # Step 6: Upload the result to GCS bucket { 'name': 'bash', 'dir': '/workspace', @@ -388,12 +439,23 @@ def run(self, agent: BaseAgent, result_history: list[Result], ofg_url = self._prepare_and_upload_archive(result_history) agent_url = self._upload_to_gcs(agent_dill) results_url = self._upload_to_gcs(results_dill) + artifact_url = '' + artifact_path = '' + if isinstance(result_history[-1], RunResult): + artifact_path = result_history[-1].artifact_path + if artifact_path: + logging.info('Found artifact_path: %s in RunResult.', artifact_path) + artifact_url = self._upload_to_gcs(artifact_path) + logging.info('Uploaded artifact to %s', artifact_url) + else: + logging.error('No artifact_path found in RunResult.') oss_fuzz_data_url = self._upload_oss_fuzz_data() data_dir_url = self._upload_fi_oss_fuzz_data() # Step 3: Request Cloud Build. new_result_filename = f'{uuid.uuid4().hex}.pkl' build_id = self._request_cloud_build(ofg_url, agent_url, results_url, + artifact_url, artifact_path, oss_fuzz_data_url, data_dir_url, new_result_filename) @@ -416,7 +478,7 @@ def run(self, agent: BaseAgent, result_history: list[Result], cloud_build_log += self._get_build_log(build_id) - # Step 4: Deserialize dilld file. + # Step 5: Deserialize dilld file. result = utils.deserialize_from_dill(new_result_dill) if not result: cloud_build_log += f'Failed to deserialize from dill {new_result_dill}.\n' diff --git a/experiment/builder_runner.py b/experiment/builder_runner.py index 5e52add3b2..671dca0546 100644 --- a/experiment/builder_runner.py +++ b/experiment/builder_runner.py @@ -60,9 +60,10 @@ EARLY_FUZZING_ROUND_THRESHOLD = 3 -ParseResult = namedtuple( - 'ParseResult', - ['cov_pcs', 'total_pcs', 'crashes', 'crash_info', 'semantic_check_result']) +ParseResult = namedtuple('ParseResult', [ + 'cov_pcs', 'total_pcs', 'crashes', 'crash_info', 'artifact_name', + 'semantic_check_result' +]) @dataclasses.dataclass @@ -88,6 +89,9 @@ class RunResult: corpus_path: str = '' coverage_report_path: str = '' reproducer_path: str = '' + artifact_path: str = '' + artifact_name: str = '' + sanitizer: str = '' cov_pcs: int = 0 total_pcs: int = 0 crashes: bool = False @@ -350,7 +354,7 @@ def _parse_libfuzzer_logs(self, except MemoryError as e: # Some logs from abnormal fuzz targets are too large to be parsed. logger.error('%s is too large to parse: %s', log_handle.name, e) - return ParseResult(0, 0, False, '', + return ParseResult(0, 0, False, '', '', SemanticCheckResult(SemanticCheckResult.LOG_MESS_UP)) cov_pcs, total_pcs, crashes = 0, 0, False @@ -382,13 +386,14 @@ def _parse_libfuzzer_logs(self, crash_stacks = self._parse_stacks_from_libfuzzer_logs(lines) crash_func = self._parse_func_from_stacks(project_name, crash_stacks) crash_info = SemanticCheckResult.extract_crash_info(fuzzlog) + artifact_name = SemanticCheckResult.extract_artifact_name(fuzzlog) # FP case 1: Common fuzz target errors. # Null-deref, normally indicating inadequate parameter initialization or # wrong function usage. if symptom == 'null-deref': return ParseResult( - cov_pcs, total_pcs, True, crash_info, + cov_pcs, total_pcs, True, crash_info, artifact_name, SemanticCheckResult(SemanticCheckResult.NULL_DEREF, symptom, crash_stacks, crash_func)) @@ -396,7 +401,7 @@ def _parse_libfuzzer_logs(self, # parameter initialization or wrong function usage. if symptom == 'signal': return ParseResult( - cov_pcs, total_pcs, True, crash_info, + cov_pcs, total_pcs, True, crash_info, artifact_name, SemanticCheckResult(SemanticCheckResult.SIGNAL, symptom, crash_stacks, crash_func)) @@ -404,14 +409,14 @@ def _parse_libfuzzer_logs(self, # blocking its bug discovery. if symptom.endswith('fuzz target exited'): return ParseResult( - cov_pcs, total_pcs, True, crash_info, + cov_pcs, total_pcs, True, crash_info, artifact_name, SemanticCheckResult(SemanticCheckResult.EXIT, symptom, crash_stacks, crash_func)) # Fuzz target modified constants. if symptom.endswith('fuzz target overwrites its const input'): return ParseResult( - cov_pcs, total_pcs, True, crash_info, + cov_pcs, total_pcs, True, crash_info, artifact_name, SemanticCheckResult(SemanticCheckResult.OVERWRITE_CONST, symptom, crash_stacks, crash_func)) @@ -421,7 +426,7 @@ def _parse_libfuzzer_logs(self, # from reproducer name; 2) Capture the actual number in (malloc(\d+)). if 'out-of-memory' in symptom or 'out of memory' in symptom: return ParseResult( - cov_pcs, total_pcs, True, crash_info, + cov_pcs, total_pcs, True, crash_info, artifact_name, SemanticCheckResult(SemanticCheckResult.FP_OOM, symptom, crash_stacks, crash_func)) @@ -430,7 +435,7 @@ def _parse_libfuzzer_logs(self, # No cov line has been identified or only INITED round has been passed. # This is very likely the false positive cases. return ParseResult( - cov_pcs, total_pcs, True, crash_info, + cov_pcs, total_pcs, True, crash_info, artifact_name, SemanticCheckResult(SemanticCheckResult.FP_NEAR_INIT_CRASH, symptom, crash_stacks, crash_func)) @@ -441,13 +446,13 @@ def _parse_libfuzzer_logs(self, if self._stack_func_is_of_testing_project(stack_frame): if 'LLVMFuzzerTestOneInput' in stack_frame: return ParseResult( - cov_pcs, total_pcs, True, crash_info, + cov_pcs, total_pcs, True, crash_info, artifact_name, SemanticCheckResult(SemanticCheckResult.FP_TARGET_CRASH, symptom, crash_stacks, crash_func)) break return ParseResult( - cov_pcs, total_pcs, True, crash_info, + cov_pcs, total_pcs, True, crash_info, artifact_name, SemanticCheckResult(SemanticCheckResult.NO_SEMANTIC_ERR, symptom, crash_stacks, crash_func)) @@ -457,12 +462,28 @@ def _parse_libfuzzer_logs(self, # interesting inputs were found. This may happen if the target rejected # all inputs we tried. return ParseResult( - cov_pcs, total_pcs, False, '', + cov_pcs, total_pcs, False, '', '', SemanticCheckResult(SemanticCheckResult.NO_COV_INCREASE)) - return ParseResult(cov_pcs, total_pcs, crashes, '', + return ParseResult(cov_pcs, total_pcs, crashes, '', '', SemanticCheckResult(SemanticCheckResult.NO_SEMANTIC_ERR)) + def _copy_crash_file(self, outdir: str, artifact_dir: str, + run_result: RunResult) -> None: + """Copies the first crash file to the artifact directory.""" + # Only consider testcases starting with 'crash-' + crash_files = [ + f for f in os.listdir(outdir) + if f.startswith('crash-') and os.path.isfile(os.path.join(outdir, f)) + ] + if len(crash_files) != 0: + crash_file = crash_files[0] + src = os.path.join(outdir, crash_file) + dst = os.path.join(artifact_dir, crash_file) + run_result.artifact_path = dst + shutil.copy2(src, dst) + logger.info('Copied crash file %s to %s', crash_file, artifact_dir) + def build_and_run( self, generated_project: str, @@ -503,7 +524,7 @@ def build_and_run_local( benchmark_target_name = os.path.basename(target_path) project_target_name = os.path.basename(self.benchmark.target_path) benchmark_log_path = self.work_dirs.build_logs_target( - benchmark_target_name, iteration) + benchmark_target_name, iteration, trial) build_result.succeeded = self.build_target_local(generated_project, benchmark_log_path) if not build_result.succeeded: @@ -519,6 +540,11 @@ def build_and_run_local( run_log_path = os.path.join(self.work_dirs.run_logs, f'{trial:02d}.log') self.run_target_local(generated_project, benchmark_target_name, run_log_path) + artifact_dir = self.work_dirs.artifact(benchmark_target_name, iteration, + trial) + outdir = get_build_artifact_dir(generated_project, 'out') + self._copy_crash_file(outdir, artifact_dir, run_result) + run_result.coverage, run_result.coverage_summary = (self.get_coverage_local( generated_project, benchmark_target_name)) @@ -533,7 +559,7 @@ def build_and_run_local( flag = not self.benchmark.language in ['jvm', 'python', 'rust'] run_result.cov_pcs, run_result.total_pcs, \ run_result.crashes, run_result.crash_info, \ - run_result.semantic_check = \ + run_result.artifact_name, run_result.semantic_check = \ self._parse_libfuzzer_logs(f, project_name, flag) return build_result, run_result @@ -962,8 +988,8 @@ def build_and_run_cloud( generated_target_name = os.path.basename(target_path) with open( - self.work_dirs.build_logs_target(generated_target_name, iteration), - 'wb') as f: + self.work_dirs.build_logs_target(generated_target_name, iteration, + trial), 'wb') as f: blob = bucket.blob(build_log_name) if blob.exists(): logger.info('Downloading cloud build log of %s: %s to %s', @@ -990,7 +1016,8 @@ def build_and_run_cloud( if not build_result.succeeded: errors = code_fixer.extract_error_message( - self.work_dirs.build_logs_target(generated_target_name, iteration), + self.work_dirs.build_logs_target(generated_target_name, iteration, + trial), os.path.basename(self.benchmark.target_path), language) build_result.errors = errors logger.info('Cloud evaluation of %s indicates a failure: %s', @@ -1068,9 +1095,23 @@ def build_and_run_cloud( with open(run_log_path, 'rb') as f: run_result.cov_pcs, run_result.total_pcs, \ run_result.crashes, run_result.crash_info, \ - run_result.semantic_check = \ + run_result.artifact_name, run_result.semantic_check = \ self._parse_libfuzzer_logs(f, project_name) + artifact_dir = self.work_dirs.artifact(generated_target_name, iteration, + trial) + blobs = list(bucket.list_blobs(prefix=f'{reproducer_name}/artifacts/')) + if blobs: + blob = blobs[0] + artifact_path = os.path.join(artifact_dir, os.path.basename(blob.name)) + # TOOD: Some try-catch here. + blob.download_to_filename(artifact_path) + run_result.artifact_path = artifact_path + else: + logger.warning('Cloud evaluation of %s failed to downlod artifact:%s', + os.path.realpath(target_path), + f'{reproducer_name}/artifacts/') + return build_result, run_result def _copy_textcov_to_workdir(self, bucket, textcov_blob_path: str, diff --git a/experiment/evaluator.py b/experiment/evaluator.py index a21251a11c..6990279dfb 100644 --- a/experiment/evaluator.py +++ b/experiment/evaluator.py @@ -23,6 +23,7 @@ from google.cloud import storage +import results from experiment import builder_runner, oss_fuzz_checkout, textcov from experiment.benchmark import Benchmark from experiment.builder_runner import BuildResult, RunResult @@ -251,14 +252,18 @@ def __init__(self, runner: builder_runner.BuilderRunner, benchmark: Benchmark, self.benchmark = benchmark self.work_dirs = work_dirs - def build_log_path(self, generated_target_name: str, iteration: int): - return os.path.join(self.work_dirs.run_logs, - f'{generated_target_name}-F{iteration}.log') + def build_log_path(self, generated_target_name: str, iteration: int, + trial: int): + return os.path.join( + self.work_dirs.run_logs, + f'{generated_target_name}-F{iteration}-{trial:02d}.log') - def run_log_path(self, generated_target_name: str): - return os.path.join(self.work_dirs.run_logs, f'{generated_target_name}.log') + def run_log_path(self, generated_target_name: str, trial: int): + return os.path.join(self.work_dirs.run_logs, + f'{generated_target_name}-{trial:02d}.log') - def create_ossfuzz_project(self, + @staticmethod + def create_ossfuzz_project(benchmark: Benchmark, name: str, target_file: str, build_script_path: str = '') -> str: @@ -267,7 +272,7 @@ def create_ossfuzz_project(self, and build script with the new |target_file| and |build_script_path|.""" logger.info('target file: %s', target_file) generated_project_path = oss_fuzz_checkout.create_ossfuzz_project( - self.benchmark, name) + benchmark, name) # Copy generated fuzzers to generated_project_path shutil.copyfile( @@ -277,7 +282,7 @@ def create_ossfuzz_project(self, # Add additional statement in dockerfile to overwrite with generated fuzzer with open(os.path.join(generated_project_path, 'Dockerfile'), 'a') as f: f.write(f'\nCOPY {os.path.basename(target_file)} ' - f'{self.benchmark.target_path}\n') + f'{benchmark.target_path}\n') if not build_script_path or os.path.getsize(build_script_path) == 0: return name @@ -288,7 +293,8 @@ def create_ossfuzz_project(self, os.path.join(generated_project_path, os.path.basename('agent-build.sh'))) - # Add additional statement in dockerfile to overwrite with generated fuzzer + # Add additional statement in dockerfile to overwrite with generated + # build script with open(os.path.join(generated_project_path, 'Dockerfile'), 'a') as f: f.write('\nRUN cp /src/build.sh /src/build.bk.sh\n') with open(os.path.join(generated_project_path, 'Dockerfile'), 'a') as f: @@ -296,6 +302,39 @@ def create_ossfuzz_project(self, return name + @staticmethod + def create_ossfuzz_project_with_lldb(benchmark: Benchmark, + name: str, + target_file: str, + run_result: results.RunResult, + build_script_path: str = '', + artifact_path: str = '') -> str: + """Creates an OSS-Fuzz project with the generated target and new dockerfile. + The new project will replicate an existing project |name| but replace its + fuzz target and build script with the new |target_file| and + |build_script_path| and modify its dockerfile.""" + Evaluator.create_ossfuzz_project(benchmark, name, target_file, + build_script_path) + generated_project_path = os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, + 'projects', name) + + shutil.copyfile( + artifact_path, + os.path.join(generated_project_path, os.path.basename(artifact_path))) + # Add additional statement in dockerfile to copy testcase, + # enable -g, install lldb and screen + with open(os.path.join(generated_project_path, 'Dockerfile'), 'a') as f: + f.write( + '\nRUN mkdir -p /artifact\n' + f'\nCOPY {os.path.basename(run_result.artifact_path)} /artifact/\n' + '\nENV CFLAGS="${CFLAGS} -g -O0"\n' + '\nENV CXXFLAGS="${CXXFLAGS} -g -O0"\n' + '\nRUN apt-get update\n' + '\nRUN apt-get install -y lldb\n' + '\nRUN apt-get install -y screen\n') + + return name + def _fix_generated_fuzz_target(self, ai_binary: str, generated_oss_fuzz_project: str, target_path: str, iteration: int, @@ -380,7 +419,8 @@ def check_target(self, ai_binary, target_path: str) -> Result: generated_oss_fuzz_project = f'{self.benchmark.id}-{sample_id}' generated_oss_fuzz_project = oss_fuzz_checkout.rectify_docker_tag( generated_oss_fuzz_project) - self.create_ossfuzz_project(generated_oss_fuzz_project, target_path) + Evaluator.create_ossfuzz_project(self.benchmark, generated_oss_fuzz_project, + target_path) status_path = os.path.join(self.work_dirs.status, sample_id) os.makedirs(status_path, exist_ok=True) diff --git a/experiment/fuzz_target_error.py b/experiment/fuzz_target_error.py index 109e4e2ca5..cf305a8a97 100644 --- a/experiment/fuzz_target_error.py +++ b/experiment/fuzz_target_error.py @@ -52,6 +52,8 @@ class SemanticCheckResult: # Regex for extract crash information. INFO_CRASH = re.compile(r'ERROR: (.*?)(?=SUMMARY)', re.DOTALL) + # Regex for extract artifact file name. + ARTIFACT_NAME = re.compile(r'(?<=written to ./)crash-[\w]+') NO_COV_INCREASE_MSG_PREFIX = 'No code coverage increasement' @@ -88,6 +90,16 @@ def extract_crash_info(cls, fuzzlog: str) -> str: logging.warning('Failed to match crash information.') return '' + @classmethod + def extract_artifact_name(cls, fuzzlog: str) -> str: + """Extracts artifact file name from fuzzing logs.""" + match = cls.ARTIFACT_NAME.search(fuzzlog) + if match: + return match.group(0).strip() + + logging.warning('Failed to match artifact file name.') + return 'testcase' + def __init__(self, err_type: str, crash_symptom: str = '', diff --git a/experiment/workdir.py b/experiment/workdir.py index d6d83951fe..b218956409 100644 --- a/experiment/workdir.py +++ b/experiment/workdir.py @@ -41,24 +41,37 @@ def __init__(self, base_dir, keep: bool = False): os.makedirs(self._corpus_base, exist_ok=True) os.makedirs(self.dills, exist_ok=True) os.makedirs(self.fuzz_targets, exist_ok=True) + os.makedirs(self._artifact_base, exist_ok=True) def __repr__(self) -> str: return self._base_dir @property - def base(self): + def base(self) -> str: return self._base_dir @property - def _corpus_base(self): + def _corpus_base(self) -> str: return os.path.join(self._base_dir, 'corpora') - def corpus(self, sample_id): + @property + def _artifact_base(self) -> str: + return os.path.join(self._base_dir, 'artifacts') + + def corpus(self, sample_id) -> str: corpus_dir = os.path.join(self._corpus_base, str(sample_id)) os.makedirs(corpus_dir, exist_ok=True) return corpus_dir - def code_coverage_report(self, benchmark): + def artifact(self, generated_target_name: str, iteration: int, + trial: int) -> str: + artifact_dir = os.path.join( + self._artifact_base, + f'{generated_target_name}-F{iteration}-{trial:02d}') + os.makedirs(artifact_dir, exist_ok=True) + return artifact_dir + + def code_coverage_report(self, benchmark) -> str: coverage_dir = os.path.join(self._base_dir, 'code-coverage-reports') os.makedirs(coverage_dir, exist_ok=True) @@ -66,46 +79,49 @@ def code_coverage_report(self, benchmark): return benchmark_coverage @property - def status(self): + def status(self) -> str: return os.path.join(self._base_dir, 'status') @property - def prompt(self): + def prompt(self) -> str: return os.path.join(self._base_dir, 'prompt.txt') @property - def fuzz_targets(self): + def fuzz_targets(self) -> str: return os.path.join(self._base_dir, 'fuzz_targets') # TODO(dongge): Deprecate this. @property - def raw_targets(self): + def raw_targets(self) -> str: return os.path.join(self._base_dir, 'raw_targets') # TODO(dongge): Deprecate this. @property - def fixed_targets(self): + def fixed_targets(self) -> str: return os.path.join(self._base_dir, 'fixed_targets') @property - def build_logs(self): + def build_logs(self) -> str: return os.path.join(self._base_dir, 'logs', 'build') @property - def dills(self): + def dills(self) -> str: return os.path.join(self._base_dir, 'dills') @property - def run_logs(self): + def run_logs(self) -> str: return os.path.join(self._base_dir, 'logs', 'run') - def build_logs_target(self, generated_target_name: str, iteration: int): - return os.path.join(self.build_logs, - f'{generated_target_name}-F{iteration}.log') + def build_logs_target(self, generated_target_name: str, iteration: int, + trial: int) -> str: + return os.path.join( + self.build_logs, + f'{generated_target_name}-F{iteration}-{trial:02d}.log') - def run_logs_target(self, generated_target_name: str, iteration: int): - return os.path.join(self.run_logs, - f'{generated_target_name}-F{iteration}.log') + def run_logs_target(self, generated_target_name: str, iteration: int, + trial: int) -> str: + return os.path.join( + self.run_logs, f'{generated_target_name}-F{iteration}-{trial:02d}.log') @classmethod def get_run_log_iteration(cls, filename: str) -> Optional[int]: diff --git a/llm_toolkit/crash_triager.py b/llm_toolkit/crash_triager.py index c352553479..6b4c9511ef 100644 --- a/llm_toolkit/crash_triager.py +++ b/llm_toolkit/crash_triager.py @@ -70,7 +70,7 @@ def llm_triage( logging.warning('LLM did not generate rawoutput for %s', prompt_path) return TriageResult.NOT_APPLICABLE - # TODO(fdt622): Use the common vote + # TODO(maoyixie): Use the common vote # Currently, we prefer the longest triage. preferred_triage_path, preferred_triage = max(triage_candidates, key=lambda x: len(x[1])) diff --git a/llm_toolkit/models.py b/llm_toolkit/models.py index 42afa18a20..edb5031d43 100644 --- a/llm_toolkit/models.py +++ b/llm_toolkit/models.py @@ -229,7 +229,6 @@ class GPT(LLM): """OpenAI's GPT model encapsulator.""" name = 'gpt-3.5-turbo' - MAX_INPUT_TOKEN = 100000 def get_model(self) -> Any: """Returns the underlying model instance.""" @@ -314,7 +313,7 @@ def prompt_type(self) -> type[prompts.Prompt]: return prompts.OpenAIPrompt def chat_llm(self, client: Any, prompt: prompts.Prompt) -> str: - """Queries LLM a single prompt and returns its response.""" + """Queries LLM in a chat session and returns its response.""" if self.ai_binary: raise ValueError(f'OpenAI does not use local AI binary: {self.ai_binary}') if self.temperature_list: @@ -386,6 +385,7 @@ class GPT4o(GPT): name = 'gpt-4o' MAX_INPUT_TOKEN = 128000 + _gpt_ai_model = 'gpt-4o' class ChatGPT4oLatest(GPT): @@ -393,6 +393,7 @@ class ChatGPT4oLatest(GPT): name = 'chatgpt-4o-latest' MAX_INPUT_TOKEN = 128000 + _gpt_ai_model = 'gpt-4o' class GPT4oMini(GPT): @@ -407,6 +408,75 @@ class GPT4Turbo(GPT): name = 'gpt-4-turbo' +class ChatGPT(GPT): + """OpenAI's GPT model with chat session.""" + + name = 'chatgpt-3.5-turbo' + + def __init__( + self, + ai_binary: str, + max_tokens: int = MAX_TOKENS, + num_samples: int = NUM_SAMPLES, + temperature: float = TEMPERATURE, + temperature_list: Optional[list[float]] = None, + ): + super().__init__(ai_binary, max_tokens, num_samples, temperature, + temperature_list) + self.conversation_history = [] + + def chat_llm(self, client: Any, prompt: prompts.Prompt) -> str: + """Queries the LLM in the given chat session and returns the response.""" + if self.ai_binary: + raise ValueError(f'OpenAI does not use local AI binary: {self.ai_binary}') + if self.temperature_list: + logger.info('OpenAI does not allow temperature list: %s', + self.temperature_list) + + self.conversation_history.extend(prompt.get()) + + completion = self.with_retry_on_error( + lambda: client.chat.completions.create( + messages=self.conversation_history, + model=self.name, + n=self.num_samples, + temperature=self.temperature), [openai.OpenAIError]) + + # Choose the longest response + longest_response = max( + (choice.message.content for choice in completion.choices), key=len) + self.conversation_history.append({ + 'role': 'assistant', + 'content': longest_response + }) + + return longest_response + + +class ChatGPT4(ChatGPT): + """OpenAI's GPT4 model with chat session.""" + + name = 'chatgpt-4' + + +class ChatGPT4o(ChatGPT): + """OpenAI's GPT-4o model with chat session.""" + + name = 'chatgpt-4o' + + +class ChatGPT4oMini(ChatGPT): + """OpenAI's GPT-4o-mini model with chat session.""" + + name = 'chatgpt-4o-mini' + + +class ChatGPT4Turbo(ChatGPT): + """OpenAI's GPT-4 Turbo model with chat session.""" + + name = 'chatgpt-4-turbo' + + class AzureGPT(GPT): """Azure's GPT model.""" diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 27530101e2..a87d3a1afb 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -886,6 +886,87 @@ def build(self, 'Use build_instruction() or build_prompt() instead.') +class CrashAnalyzerTemplateBuilder(DefaultTemplateBuilder): + """Builder for C/C++.""" + + def __init__(self, + model: models.LLM, + benchmark: Optional[Benchmark] = None, + template_dir: str = DEFAULT_TEMPLATE_DIR, + initial: Any = None): + super().__init__(model, benchmark, template_dir, initial) + self.agent_templare_dir = AGENT_TEMPLATE_DIR + + self.crash_analyzer_priming_template_file = self._find_template( + self.agent_templare_dir, 'crash_analyzer-priming.txt') + + def _prepare_prompt( + self, + priming: str, + final_problem: str, + example_pair: Optional[list[list[str]]] = None, + project_example_content: Optional[list[list[str]]] = None): + """Constructs a prompt using the parameters and saves it.""" + self._prompt.add_priming(priming) + + def build_crash_analyzer_prompt(self, benchmark: Benchmark, driver_code: str, + crash_info: str, + crash_func: dict) -> prompts.Prompt: + """Prepares the crash analyzer prompt.""" + all_func_code = [] + for func_name, line_number in crash_func.items(): + if func_name == 'LLVMFuzzerTestOneInput': + driver_code = self._slice_driver_code(benchmark.project, driver_code, + line_number) + else: + func_code = self._slice_func_code(benchmark.project, func_name, + line_number) + all_func_code.append(func_code) + + with open(self.crash_analyzer_priming_template_file) as f: + priming = f.read().strip() + priming = priming.replace('{CRASH_REPORT}', crash_info.strip())\ + .replace('{DRIVER_CODE}', driver_code.strip()) + + priming_prompt = self._prompt.create_prompt_piece(priming, 'user') + template_piece = self._prompt.create_prompt_piece('{PROJECT_FUNCTION_CODE}', + 'user') + + priming_weight = self._model.estimate_token_num(priming_prompt) + template_weight = self._model.estimate_token_num(template_piece) + + prompt_size = priming_weight - template_weight + # Add extra 20-tokens redundancy + prompt_size += 20 + + # Add function code one by one until we reach the maximum prompt size + selected_func_code = [] + for func_code in all_func_code: + func_code_prompt = self._prompt.create_prompt_piece(func_code, 'user') + func_code_token_num = self._model.estimate_token_num(func_code_prompt) + if prompt_size + func_code_token_num >= self._model.context_window: + # The estimation is inaccurate, if an example's size equals to + # the limit, it's safer to not include the example. + logger.warning('Breaking because adding this function code \ + would exceed context window') + break + prompt_size += func_code_token_num + selected_func_code.append(func_code) + + project_function_code = '\n'.join(selected_func_code) + if project_function_code.strip(): + priming.replace('{PROJECT_FUNCTION_CODE}', project_function_code.strip()) + else: + logger.warning( + 'Empty project function code in triage prompt for project: %s, \ + function name: %s', benchmark.project, benchmark.function_name) + priming.replace('{PROJECT_FUNCTION_CODE}', \ + 'No relevant project function code') + + self._prepare_prompt(priming, '') + return self._prompt + + class DefaultJvmTemplateBuilder(PromptBuilder): """Default builder for JVM projects.""" diff --git a/llm_toolkit/prompts.py b/llm_toolkit/prompts.py index 469ac6d205..6459045b49 100644 --- a/llm_toolkit/prompts.py +++ b/llm_toolkit/prompts.py @@ -71,7 +71,7 @@ def __init__(self, initial=None): self._text = initial def append(self, text: str, to_existing: bool = False) -> None: - """Gets the final formatted prompt.""" + """Appends the final formatted prompt.""" # TextPrompt only got one text element, ignoring to_existing flag self._text += text @@ -92,7 +92,7 @@ def add_problem(self, problem_content: str) -> None: self._text += f'{problem_content}\n' def add_solution(self, solution_content: str) -> None: - """Constructs the prompt problem in the required format.""" + """Constructs the prompt solution in the required format.""" self._text += f'{solution_content}\n' def create_prompt_piece(self, content: str, role: str) -> Any: @@ -153,7 +153,7 @@ def add_problem(self, problem_content: str) -> None: }) def add_solution(self, solution_content: str) -> None: - """Constructs the prompt problem in the required format.""" + """Constructs the prompt solution in the required format.""" if not solution_content: logger.warning('Content is empty, skipping the prompt append process') return diff --git a/pipeline.py b/pipeline.py index a738d8e06a..13834ebbe4 100644 --- a/pipeline.py +++ b/pipeline.py @@ -28,9 +28,9 @@ class Pipeline(): 1. Writing stage generates or refines the fuzz target and its associated build script to improve code coverage and enhance bug-finding capabilities for the function under test. - 2. Evaluation stage assesses the fuzz target's performance by measuring + 2. Execution stage assesses the fuzz target's performance by measuring code coverage and detecting runtime crashes. - 3. Analysis stage examines the results from the evaluation stage, extracting + 3. Analysis stage examines the results from the execution stage, extracting insights from the coverage and crash data to suggest improvements for the writing stage in the next iteration. """ @@ -68,7 +68,12 @@ def _terminate(self, result_history: list[Result], cycle_count: int) -> bool: cycle_count, last_result) return True - if isinstance(last_result, AnalysisResult) and last_result.success: + if not isinstance(last_result, AnalysisResult): + self.logger.warning('[Cycle %d] Last result is not AnalysisResult: %s', + cycle_count, result_history) + return True + + if last_result.success: self.logger.info('[Cycle %d] Generation succeeds: %s', cycle_count, result_history) return True @@ -122,6 +127,11 @@ def _execute_one_cycle(self, result_history: list[Result], # Analysis stage. result_history.append( self.analysis_stage.execute(result_history=result_history)) + # TODO(maoyi): add the indicator for the success of analysis stage + if not isinstance(result_history[-1], AnalysisResult): + self.logger.warning( + '[Cycle %d] Analysis failure, skipping the rest steps', cycle_count) + return self._update_status(result_history=result_history) self.logger.info('[Cycle %d] Analysis result %s: %s', cycle_count, result_history[-1].success, result_history[-1]) @@ -131,8 +141,8 @@ def execute(self, result_history: list[Result]) -> list[Result]: Runs the fuzzing pipeline iteratively to assess and refine the fuzz target. 1. Writing Stage refines the fuzz target and its build script using insights from the previous cycle. - 2. Evaluation Stage measures the performance of the revised fuzz target. - 3. Analysis Stage examines the evaluation results to guide the next cycle's + 2. Execution Stage measures the performance of the revised fuzz target. + 3. Analysis Stage examines the execution results to guide the next cycle's improvements. The process repeats until the termination conditions are met. """ diff --git a/prompts/agent/crash_analyzer-priming.txt b/prompts/agent/crash_analyzer-priming.txt new file mode 100644 index 0000000000..0101bd16c2 --- /dev/null +++ b/prompts/agent/crash_analyzer-priming.txt @@ -0,0 +1,19 @@ +Given the following crash report, fuzz driver code and relevant project function code, analyze the cause of the crash using LLDB tool step by step. +First, make a conclusion, only answer “Crash is caused by bug in fuzz driver” or “Crash is caused by bug in project”. Second, offer succinct and to-the-point analyses and suggestions. + +Below is crash report: + +{CRASH_REPORT} + + +Below is driver code: + +{DRIVER_CODE} + + +Below is relevant project function code: + +{PROJECT_FUNCTION_CODE} + + +To help analyze the root cause behind the runtime crash, you can leverage LLDB tool and BASH tool to obtain information. diff --git a/prompts/agent/prototyper-priming.txt b/prompts/agent/prototyper-priming.txt index c2f6ec6a77..2c6479dc29 100644 --- a/prompts/agent/prototyper-priming.txt +++ b/prompts/agent/prototyper-priming.txt @@ -114,30 +114,30 @@ Step 9: Providing Your Conclusion: {TYPE_SPECIFIC_PRIMING} -3. Methodical Approach: +1. Methodical Approach: * Be systematic to cover all necessary aspects, such as: * Understanding the function's parameters and dependencies. * Identifying required header files and libraries. * Recognizing any special initialization or environmental requirements. -1. Utilizing Existing Examples: +2. Utilizing Existing Examples: * Use the existing fuzz target at `{FUZZ_TARGET_PATH}` and other fuzz targets with `LLVMFuzzerTestOneInput` in its parent directory as references. * Pay special attention to: * How header files are included. * The structure and content of the `LLVMFuzzerTestOneInput` function. * Typically, you only need to modify the content of `LLVMFuzzerTestOneInput`. -2. Investigating Header Inclusions: +3. Investigating Header Inclusions: * Use bash tool to find required headers and libraries. * Examine library files built by `/src/build.bk.sh` to understand available functions and symbols. -3. Modifying the Build Script (if necessary): +4. Modifying the Build Script (if necessary): * Modifying `/src/build.bk.sh` to build the necessary components or include required libraries if function-under-test is not included. - * The project's directory may contain a `README.md` with build instructions (e.g., at `/src//README.md` -4. Do Not Compile: + * The project's directory may contain a `README.md` with build instructions (e.g., at `/src//README.md`) +5. Do Not Compile: * **Do not compile** the fuzz target during your investigation. * Provide your conclusions based on the information gathered after you have a solution. -5. Formatting Code Snippets: +6. Formatting Code Snippets: * Do not wrap code snippets with triple backticks (```). * Use the specified XML-style tags for wrapping code and other content. -6. DO NOT send the early: Provide conclusions **only after** gathering all necessary information. -7. Focus on Final Goals: +7. DO NOT send the early: Provide conclusions **only after** gathering all necessary information. +8. Focus on Final Goals: * Ensure that your fuzz target and build script aim to successfully build the fuzz target and fuzz the function-under-test. diff --git a/prompts/template_xml/triager_priming.txt b/prompts/template_xml/triager_priming.txt index 079a7f847b..adadfa985c 100644 --- a/prompts/template_xml/triager_priming.txt +++ b/prompts/template_xml/triager_priming.txt @@ -1,3 +1,3 @@ Given the following crash report, fuzz driver code and relevant project function code, analyze the cause of the crash. -First, only answer “Crash is caused by bug in fuzz driver” or “Crash is caused by bug in project”. Second, offer a succinct and to-the-point analysis. +First, make a conclusion, only answer “Crash is caused by bug in fuzz driver” or “Crash is caused by bug in project”. Second, offer succinct and to-the-point analyses and suggestions. diff --git a/prompts/tool/container_tool.txt b/prompts/tool/container_tool.txt index 9270442ebd..50bc0653b2 100644 --- a/prompts/tool/container_tool.txt +++ b/prompts/tool/container_tool.txt @@ -1,5 +1,5 @@ -**Bash too Guilde** +**Bash tool Guide** Use the bash tool to investigate files in the fuzz target's build environment. This will help you understand the project source code, the function under test, its dependencies, and any compilation requirements. diff --git a/prompts/tool/lldb_tool.txt b/prompts/tool/lldb_tool.txt new file mode 100644 index 0000000000..8936eef789 --- /dev/null +++ b/prompts/tool/lldb_tool.txt @@ -0,0 +1,42 @@ + +**LLDB tool Guide** +You can leverage LLDB by iteractively sending me a LLDB command, and I will provide you with the output of the command. The path of fuzz driver binary is '/out/{TARGET_NAME}'. The testcase that triggers runtime crash is stored at '/artifact/{AFTIFACT_NAME}'. + + +1. I have executed 'lldb /out/{TARGET_NAME}'. You are now in LLDB session, NOT in shell session. DO NOT run 'lldb /out/{TARGET_NAME}' again! DO NOT run shell commands! +2. Strictly ONE LLDB command at a time! +3. Each message you send should first explain the reason why you want to run the command wrapped by , then provide the command to run wrapped in in this format: + +Reasons here. + + +One lldb command here. + +4. Each reponse I send will repeat the command you sent wrapped in for you to double-check, followed by the command standard output wrapped in and stderr wrapped in in this format: + +The command I executed, copied from the command you sent. + + +The standard output of the command. + + +The standard error of the command. + +5. The final goal is to answer questions about runtime crash, executed fuzz driver and project under test: a) ‘Crash is caused by bug in fuzz driver’ or ‘Crash is caused by bug in project’? b) If the crash is caused by bug in fuzz driver, provide analyses, and are there any suggestions for modifying the fuzz driver? c) If the crash is caused by bug in project, provide analyses, and are there any suggestions for patching the project? +6. If you have a conclusion on above questions, output the conclusion wrapped by followed by the analysis and suggestion wrapped in : + +‘Crash is caused by bug in fuzz driver’ or ‘Crash is caused by bug in project’. + + +Analysis and suggestion. + + + + +1. DO NOT wrap code snippets with ```, using the XML-style tags above will suffice. +2. DO NOT Compile or Run Code! +3. Strictly ONE LLDB command at a time! +4. DO NOT run 'lldb /out/{TARGET_NAME}' again! +5. DO NOT run shell commands! + + \ No newline at end of file diff --git a/results.py b/results.py index 135a86827d..4dc6e9f4cf 100644 --- a/results.py +++ b/results.py @@ -13,6 +13,7 @@ # limitations under the License. """The data structure of all result kinds.""" import dataclasses +import os from typing import Any, Optional from experiment import textcov @@ -127,11 +128,14 @@ class RunResult(BuildResult): """The fuzzing run-time result info.""" crashes: bool run_error: str + crash_func: dict run_log: str coverage_summary: dict coverage: float line_coverage_diff: float reproducer_path: str + artifact_path: str + sanitizer: str textcov_diff: Optional[textcov.Textcov] log_path: str corpus_path: str @@ -139,6 +143,9 @@ class RunResult(BuildResult): cov_pcs: int total_pcs: int _repr_exclude = BuildResult._repr_exclude | {'textcov_diff'} + err_type: str + crash_sypmtom: str + crash_stacks: Optional[list[list[str]]] def __init__( self, @@ -152,17 +159,23 @@ def __init__( is_function_referenced: bool = False, crashes: bool = False, # Runtime crash. run_error: str = '', # Runtime crash error message. + crash_func: Optional[dict] = None, run_log: str = '', # Full fuzzing output. coverage_summary: Optional[dict] = None, coverage: float = 0.0, line_coverage_diff: float = 0.0, textcov_diff: Optional[textcov.Textcov] = None, reproducer_path: str = '', + artifact_path: str = '', + sanitizer: str = '', log_path: str = '', corpus_path: str = '', coverage_report_path: str = '', cov_pcs: int = 0, total_pcs: int = 0, + err_type: str = SemanticCheckResult.NOT_APPLICABLE, + crash_sypmtom: str = '', + crash_stacks: Optional[list[list[str]]] = None, fuzz_target_source: str = '', build_script_source: str = '', author: Any = None, @@ -173,17 +186,27 @@ def __init__( chat_history) self.crashes = crashes self.run_error = run_error + self.crash_func = crash_func or {} self.run_log = run_log self.coverage_summary = coverage_summary or {} self.coverage = coverage self.line_coverage_diff = line_coverage_diff self.reproducer_path = reproducer_path + self.artifact_path = artifact_path + self.sanitizer = sanitizer self.textcov_diff = textcov_diff self.log_path = log_path self.corpus_path = corpus_path self.coverage_report_path = coverage_report_path self.cov_pcs = cov_pcs self.total_pcs = total_pcs + self.err_type = err_type + self.crash_sypmtom = crash_sypmtom + self.crash_stacks = crash_stacks or [] + + @property + def artifact_name(self) -> str: + return os.path.basename(self.artifact_path) def to_dict(self) -> dict: return super().to_dict() | { @@ -191,6 +214,8 @@ def to_dict(self) -> dict: self.crashes, 'run_error': self.run_error, + 'crash_func': + self.crash_func or {}, 'run_log': self.run_log, 'coverage_summary': @@ -201,6 +226,12 @@ def to_dict(self) -> dict: self.line_coverage_diff, 'reproducer_path': self.reproducer_path, + 'artifact_path': + self.artifact_path, + 'artifact_name': + self.artifact_name, + 'sanitizer': + self.sanitizer, 'textcov_diff': dataclasses.asdict(self.textcov_diff) if self.textcov_diff else '', 'log_path': @@ -213,17 +244,41 @@ def to_dict(self) -> dict: self.cov_pcs, 'total_pcs': self.total_pcs, + 'err_type': + self.err_type, + 'crash_sypmtom': + self.crash_sypmtom, + 'crash_stacks': + self.crash_stacks, } # TODO(dongge): Define success property to show if the fuzz target was run. -class CrashResult(RunResult): +class CrashResult(Result): """The fuzzing run-time result with crash info.""" stacktrace: str true_bug: bool # True/False positive crash insight: str # Reason and fixes for crashes + def __init__(self, + *args, + stacktrace: str = '', + true_bug: bool = False, + insight: str = '', + **kwargs): + super().__init__(*args, **kwargs) + self.stacktrace = stacktrace + self.true_bug = true_bug + self.insight = insight + + def to_dict(self) -> dict: + return { + 'stacktrace': self.stacktrace, + 'true_bug': self.true_bug, + 'insight': self.insight, + } + class CoverageResult(): """The fuzzing run-time result with code coverage info.""" @@ -281,6 +336,8 @@ def to_dict(self) -> dict: self.coverage_result.to_dict() if self.coverage_result else {}, } + # TODO(maoyi): maybe we should redefine success property or + # rename the property @property def success(self) -> bool: if self.semantic_result: diff --git a/run_one_experiment.py b/run_one_experiment.py index 0a0d7f470c..cb1e9a0464 100644 --- a/run_one_experiment.py +++ b/run_one_experiment.py @@ -25,6 +25,7 @@ import logger import pipeline from agent.coverage_analyzer import CoverageAnalyzer +from agent.crash_analyzer import CrashAnalyzer from agent.enhancer import Enhancer from agent.one_prompt_enhancer import OnePromptEnhancer from agent.one_prompt_prototyper import OnePromptPrototyper @@ -256,6 +257,7 @@ def _fuzzing_pipeline(benchmark: Benchmark, model: models.LLM, CoverageAnalyzer(trial=trial, llm=model, args=args), + CrashAnalyzer(trial=trial, llm=model, args=args), ]) else: p = pipeline.Pipeline(args=args, diff --git a/stage/analysis_stage.py b/stage/analysis_stage.py index 04fe61c92a..a57eafec17 100644 --- a/stage/analysis_stage.py +++ b/stage/analysis_stage.py @@ -14,6 +14,7 @@ """The Analysis Stage class for examining the performance of fuzz targets. This stage is responsible for categorizing run-time crashes and detecting untested code blocks.""" + from results import Result, RunResult from stage.base_stage import BaseStage @@ -32,8 +33,9 @@ def execute(self, result_history: list[Result]) -> Result: self.logger.info('Analysis Stage') last_result = result_history[-1] assert isinstance(last_result, RunResult) + if last_result.crashes: - agent = self.get_agent(agent_name='SemanticAnalyzer') + agent = self.get_agent(agent_name='CrashAnalyzer') else: try: agent = self.get_agent(agent_name='CoverageAnalyzer') @@ -45,4 +47,5 @@ def execute(self, result_history: list[Result]) -> Result: self.logger.write_chat_history(analysis_result) self.logger.debug('Analysis stage completed with with result:\n%s', analysis_result) + return analysis_result diff --git a/stage/execution_stage.py b/stage/execution_stage.py index bdc1abb4ad..a7666728ae 100644 --- a/stage/execution_stage.py +++ b/stage/execution_stage.py @@ -57,14 +57,14 @@ def execute(self, result_history: list[Result]) -> Result: generated_oss_fuzz_project) fuzz_target_path = os.path.join(last_result.work_dirs.fuzz_targets, - f'{last_result.trial:02d}.fuzz_target') + f'{self.trial:02d}.fuzz_target') build_script_path = os.path.join(last_result.work_dirs.fuzz_targets, - f'{last_result.trial:02d}.build_script') - evaluator.create_ossfuzz_project(generated_oss_fuzz_project, + f'{self.trial:02d}.build_script') + evaluator.create_ossfuzz_project(benchmark, generated_oss_fuzz_project, fuzz_target_path, build_script_path) status_path = os.path.join(last_result.work_dirs.status, - f'{last_result.trial:02}') + f'{self.trial:02d}') os.makedirs(status_path, exist_ok=True) # Try building and running the new target. @@ -78,19 +78,19 @@ def execute(self, result_history: list[Result]) -> Result: raise TypeError try: - _, run_result = evaluator.builder_runner.build_and_run( + build_result, run_result = evaluator.builder_runner.build_and_run( generated_oss_fuzz_project, fuzz_target_path, 0, benchmark.language, cloud_build_tags=[ - str(last_result.trial), + str(self.trial), 'Execution', 'ofg', # TODO(dongge): Tag function name, compatible with tag format. last_result.benchmark.project, ], - trial=last_result.trial) + trial=self.trial) if not run_result: raise Exception('No RunResult received from build_and_run') if run_result.coverage_summary is None or run_result.coverage is None: @@ -138,7 +138,7 @@ def execute(self, result_history: list[Result]) -> Result: runresult = RunResult( benchmark=benchmark, - trial=last_result.trial, + trial=self.trial, work_dirs=last_result.work_dirs, fuzz_target_source=last_result.fuzz_target_source, build_script_source=last_result.build_script_source, @@ -150,12 +150,15 @@ def execute(self, result_history: list[Result]) -> Result: is_function_referenced=last_result.is_function_referenced, crashes=run_result.crashes, run_error=run_result.crash_info, + crash_func=run_result.semantic_check.crash_func, # TODO: This should be the content of log_path. run_log=run_log_content, coverage_summary=run_result.coverage_summary, coverage=coverage_percent, line_coverage_diff=coverage_diff, reproducer_path=run_result.reproducer_path, + artifact_path=run_result.artifact_path, + sanitizer=run_result.sanitizer, textcov_diff=run_result.coverage, log_path=run_result.log_path, corpus_path=run_result.corpus_path, @@ -167,7 +170,7 @@ def execute(self, result_history: list[Result]) -> Result: self.logger.error('Exception %s occurred on %s', e, last_result) runresult = RunResult( benchmark=benchmark, - trial=last_result.trial, + trial=self.trial, work_dirs=last_result.work_dirs, fuzz_target_source=last_result.fuzz_target_source, build_script_source=last_result.build_script_source, diff --git a/stage/writing_stage.py b/stage/writing_stage.py index e1f882fe3a..69799af8e5 100644 --- a/stage/writing_stage.py +++ b/stage/writing_stage.py @@ -57,6 +57,5 @@ def execute(self, result_history: list[Result]) -> Result: self.logger.write_fuzz_target(build_result) self.logger.write_build_script(build_result) self.logger.write_chat_history(build_result) - self.logger.debug('Writing stage completed with with result:\n%s', - build_result) + self.logger.debug('Writing stage completed with result:\n%s', build_result) return build_result diff --git a/tool/container_tool.py b/tool/container_tool.py index b4eb5e2f0c..5c75897e4a 100644 --- a/tool/container_tool.py +++ b/tool/container_tool.py @@ -25,9 +25,13 @@ class ProjectContainerTool(BaseTool): """A tool for LLM agents to interact within a project's docker container.""" - def __init__(self, benchmark: Benchmark, name: str = '') -> None: + def __init__(self, + benchmark: Benchmark, + name: str = '', + project_name: str = '') -> None: super().__init__(benchmark, name) - self.image_name = self._prepare_project_image() + self.project_name = project_name or benchmark.project + self.image_name = self._prepare_project_image(self.project_name) self.container_id = self._start_docker_container() self.build_script_path = '/src/build.sh' self._backup_default_build_script() @@ -38,13 +42,13 @@ def tutorial(self) -> str: return self._get_tutorial_file_content('container_tool.txt').replace( '{FUZZ_TARGET_PATH}', self.benchmark.target_path) - def _prepare_project_image(self) -> str: + def _prepare_project_image(self, project_name: str) -> str: """Prepares the project's OSS-Fuzz docker image and returns the image name. """ image_name = oss_fuzz_checkout.prepare_project_image(self.benchmark) if image_name: return image_name - raise Exception(f'Failed to build image for {self.benchmark.project}') + raise Exception(f'Failed to build image for {project_name}') def _execute_command_in_container(self, command: list[str]) -> sp.CompletedProcess: diff --git a/tool/lldb_tool.py b/tool/lldb_tool.py index 8cbeef932e..710d9751e1 100644 --- a/tool/lldb_tool.py +++ b/tool/lldb_tool.py @@ -12,8 +12,52 @@ # See the License for the specific language governing permissions and # limitations under the License. """A tool for LLM agents to interact within a LLDB.""" -from tool.base_tool import BaseTool +import logging +import subprocess as sp +import time +from experiment.benchmark import Benchmark +from results import RunResult +from tool.container_tool import ProjectContainerTool -class LLDBTool(BaseTool): - pass +logger = logging.getLogger(__name__) + + +class LLDBTool(ProjectContainerTool): + """A tool for LLM agents to interact within a LLDB.""" + + def __init__(self, + benchmark: Benchmark, + result: RunResult, + name: str = '', + project_name: str = '') -> None: + super().__init__(benchmark, name, project_name) + self.result = result + + def tutorial(self) -> str: + """Constructs a tool guide tutorial for LLM agents.""" + return self._get_tutorial_file_content('lldb_tool.txt')\ + .replace('{AFTIFACT_NAME}', self.result.artifact_name)\ + .replace('{TARGET_NAME}', self.benchmark.target_name) + + def execute(self, command: str) -> sp.CompletedProcess: + """Executes the |command| in the container and returns the output.""" + logger.debug('Executing command (%s) in %s: ', command, self.container_id) + execute_command_in_container = [ + 'docker', 'exec', self.container_id, '/bin/bash', '-c', command + ] + process = self._execute_command_in_container(execute_command_in_container) + process.args = command + return process + + def execute_in_screen(self, lldb_command: str) -> sp.CompletedProcess: + """Sends a command to the lldb_session screen and returns LLDB output.""" + self.execute('screen -S lldb_session -X logfile flush 0') + self.execute('truncate -s 0 /tmp/lldb_log.txt') + + safe_cmd = lldb_command.replace('"', '\\"') + '\r' + self.execute(f'screen -S lldb_session -X stuff "{safe_cmd}"') + + time.sleep(1.0) + self.execute('screen -S lldb_session -X logfile flush 0') + return self.execute('cat /tmp/lldb_log.txt') From e95b0200b48ad9f2e2369817a268dc3b1ad07f3d Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Mon, 2 Jun 2025 12:25:05 -0700 Subject: [PATCH 17/64] add initial OSS-Fuzz project build fixer (#1076) Fixing broken OSS-Fuzz projects is a common task and something we could leverage OFG to do. This adds an initial agent-based approach for doing this and it has already been used to fix https://github.com/google/oss-fuzz/pull/13389 and other projects locally. ``` oss-fuzz-generator fix-build --project PROJECT_NAME --model ${MODEL} ``` --------- Signed-off-by: David Korczynski --- experiment/oss_fuzz_checkout.py | 40 +++ experimental/build_fixer/__init__.py | 14 ++ experimental/build_fixer/build_fix.py | 343 ++++++++++++++++++++++++++ experimental/build_fixer/templates.py | 111 +++++++++ experimental/end_to_end/cli.py | 29 ++- pyproject.toml | 2 + 6 files changed, 538 insertions(+), 1 deletion(-) create mode 100644 experimental/build_fixer/__init__.py create mode 100644 experimental/build_fixer/build_fix.py create mode 100644 experimental/build_fixer/templates.py diff --git a/experiment/oss_fuzz_checkout.py b/experiment/oss_fuzz_checkout.py index 542d9eaa9d..6c6bb19343 100644 --- a/experiment/oss_fuzz_checkout.py +++ b/experiment/oss_fuzz_checkout.py @@ -459,3 +459,43 @@ def prepare_project_image(benchmark: benchmarklib.Benchmark) -> str: else: logger.warning('Unable to find cached project image for %s', project) return _build_image(generated_oss_fuzz_project) + + +def create_ossfuzz_project_by_name(original_name: str, + generated_oss_fuzz_project: str) -> str: + """Creates an OSS-Fuzz project by replicating an existing project.""" + generated_project_path = os.path.join(OSS_FUZZ_DIR, 'projects', + generated_oss_fuzz_project) + if os.path.exists(generated_project_path): + logger.info('Project %s already exists.', generated_project_path) + return generated_project_path + + oss_fuzz_project_path = os.path.join(OSS_FUZZ_DIR, 'projects', original_name) + shutil.copytree(oss_fuzz_project_path, generated_project_path) + return generated_project_path + + +def prepare_project_image_by_name(project_name: str) -> str: + """Prepares original image of the |project_name|'s fuzz target build + container.""" + project = project_name + image_name = f'gcr.io/oss-fuzz/{project}' + generated_oss_fuzz_project = f'{project_name}-{uuid.uuid4().hex}' + generated_oss_fuzz_project = rectify_docker_tag(generated_oss_fuzz_project) + create_ossfuzz_project_by_name(project, generated_oss_fuzz_project) + + if not ENABLE_CACHING: + logger.warning('Disabled caching when building image for %s', project) + elif is_image_cached(project, 'address'): + logger.info('Will use cached instance.') + # Rewrite for caching. + rewrite_project_to_cached_project(project, generated_oss_fuzz_project, + 'address') + # Prepare build + prepare_build(project, 'address', generated_oss_fuzz_project) + # Build the image + logger.info('Using cached project image for %s: %s', + generated_oss_fuzz_project, image_name) + else: + logger.warning('Unable to find cached project image for %s', project) + return _build_image(generated_oss_fuzz_project) diff --git a/experimental/build_fixer/__init__.py b/experimental/build_fixer/__init__.py new file mode 100644 index 0000000000..cc75d884cb --- /dev/null +++ b/experimental/build_fixer/__init__.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/experimental/build_fixer/build_fix.py b/experimental/build_fixer/build_fix.py new file mode 100644 index 0000000000..0fe7033283 --- /dev/null +++ b/experimental/build_fixer/build_fix.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python3 +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Build fixer tooling.""" + +import os +import re +import shutil +import subprocess +import sys +import uuid +from typing import Optional + +import logger +from agent.base_agent import BaseAgent +from experiment import oss_fuzz_checkout +from experiment.benchmark import Benchmark +from experiment.workdir import WorkDirs +from experimental.build_fixer import templates +from llm_toolkit import models +from llm_toolkit.models import LLM +from llm_toolkit.prompts import Prompt +from results import BuildResult, Result +from tool.base_tool import BaseTool +from tool.container_tool import ProjectContainerTool + + +class BuildFixAgent(BaseAgent): + """Agent for fixing OSS-Fuzz project builds.""" + + def __init__(self, llm: LLM, project_name, work_dirs, args): + super().__init__(trial=1, llm=llm, args=args) + self.project_name = project_name + self.original_project_name = project_name + self.work_dirs = work_dirs + self.last_status = False + self.last_result = '' + self.compiles = False + self.check_all_passed = False + self.initial_error_result = '' + self.trial = 0 + + self.success_build_script = '' + + self.projet_language = oss_fuzz_checkout.get_project_language( + self.project_name) + + def _initial_prompt(self, results: list[Result]): + """Creates the initial prompt for the build fixer agent.""" + with open( + os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, 'projects', + self.project_name, 'build.sh'), 'r') as f: + build_script = f.read() + + with open( + os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, 'projects', + self.project_name, 'Dockerfile'), 'r') as f: + dockerfile = f.read() + + prompt = self.llm.prompt_type()(None) + + template_prompt = templates.BUILD_FIX_PROBLEM + template_prompt.replace('{DOCKERFILE}', dockerfile) + template_prompt.replace('{BUILD_SCRIPT}', build_script) + template_prompt.replace('{LOGS}', self.initial_error_result[-300:]) + template_prompt.replace('{MAX_DISCOVERY_ROUND}', str(self.args.max_round)) + + if self.projet_language.lower() == 'python': + template_prompt.replace('{LANGUAGE_SPECIFICS}', + templates.PYTHON_SPECIFICS) + else: + template_prompt.replace('{LANGUAGE_SPECIFICS}', '') + #prompt.add_priming(template_prompt) + + prompt.add_priming(templates.BUILD_FIXER_LLM_PRIMING) + prompt.add_problem(template_prompt) + return prompt + + def execute(self, result_history: list[Result]) -> BuildResult: + """Executes the build fixer agent.""" + result_name = oss_fuzz_checkout.prepare_project_image_by_name( + self.project_name) + + if not result_name: + logger.info(f'Failed to prepare project image for {self.project_name}.', + trial=self.trial) + sys.exit(1) + + self.project_name = result_name.split('/')[-1] + benchmark = Benchmark(self.project_name, self.project_name, '', '', '', '', + [], '') + + self.inspect_tool = ProjectContainerTool(benchmark, name='inspect') + result = self.inspect_tool.compile( + extra_commands=' && rm -rf /out/* > /dev/null') + + # If the build succeeded, we can exit + if result.returncode == 0: + logger.info(f'Build succeeded for {self.project_name}.', trial=self.trial) + logger.info('Nothing to fix.', trial=self.trial) + sys.exit(0) + + self.initial_error_result = result.stderr + + prompt = self._initial_prompt(result_history) + + build_result = BuildResult(benchmark=benchmark, + trial=0, + work_dirs=self.work_dirs, + author=self, + chat_history={self.name: ''}) + + # LLM iteration + cur_round = 0 + try: + client = self.llm.get_chat_client(model=self.llm.get_model()) + while prompt: + response = self.chat_llm(cur_round, + client=client, + prompt=prompt, + trial=cur_round) + prompt = self._container_tool_reaction(cur_round, response, + build_result) + if not prompt: + break + cur_round += 1 + finally: + self.inspect_tool.terminate() + return build_result + + def _parse_tag(self, response: str, tag: str) -> str: + """Parses the tag from LLM response.""" + patterns = [rf'<{tag}>(.*?)', rf'```{tag}(.*?)```'] + + # Matches both xml and code style tags + for pattern in patterns: + match = re.search(pattern, response, re.DOTALL) + if match: + return match.group(1).strip() + + return '' + + def _parse_tags(self, response: str, tag: str) -> list[str]: + """Parses the tags from LLM response.""" + patterns = [rf'<{tag}>(.*?)', rf'```{tag}(.*?)```'] + found_matches = [] + + # Matches both xml and code style tags + for pattern in patterns: + matches = re.findall(pattern, response, re.DOTALL) + found_matches.extend([content.strip() for content in matches]) + + return found_matches + + def _test_check_build(self, tool: BaseTool, build_script) -> bool: + """Helper to test the generated build script for introspector build.""" + + # Create a copy of the original project name + target_dst = self.original_project_name + '-copy-' + str( + uuid.uuid4().hex)[:8] + shutil.copytree( + os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, 'projects', + self.original_project_name), + os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, 'projects', target_dst)) + + self.success_build_script = build_script + # Overwrite the build script with the new one + with open( + os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, 'projects', target_dst, + 'build.sh'), 'w') as f: + f.write(build_script) + # Build project + try: + subprocess.check_call( + f'python3 infra/helper.py build_fuzzers {target_dst}', + cwd=oss_fuzz_checkout.OSS_FUZZ_DIR, + shell=True) + except: + return False + + try: + subprocess.check_call(f'python3 infra/helper.py check_build {target_dst}', + cwd=oss_fuzz_checkout.OSS_FUZZ_DIR, + shell=True) + except: + return False + return True + + def _container_handle_bash_commands(self, response: str, tool: BaseTool, + prompt: Prompt) -> Prompt: + """Handles the command from LLM with container |tool|.""" + # Initialise variables + prompt_text = '' + success = False + self.invalid = False + self.missing_binary = False + + # Retrieve data from response + build_script = self._parse_tag(response, 'bash') + commands = '; '.join(self._parse_tags(response, 'command')) + + if commands: + self.discovery_stage = True + + # Execute the command directly, then return the formatted result + result = tool.execute(commands) + prompt_text = self._format_bash_execution_result(result, + previous_prompt=prompt) + if result.returncode == 0: + success = True + elif build_script: + self.discovery_stage = False + + # Restart the container to ensure a fresh session for test + if isinstance(tool, ProjectContainerTool): + tool.terminate() + tool = ProjectContainerTool(benchmark=tool.benchmark, name='test') + self.inspect_tool = tool + + # Fix shebang to ensure docker image failing is reflected. + lines = build_script.split('\n') + if lines[0].startswith("#!"): + lines[0] = "#!/bin/bash -eu" + else: + lines = ["#!/bin/bash -eu"] + lines + build_script = '\n'.join(lines) + + # Update build script + if isinstance(tool, ProjectContainerTool): + tool.write_to_file(build_script, tool.build_script_path) + + # Test and parse result + result = tool.execute('compile') + format_result = self._format_bash_execution_result( + result, previous_prompt=prompt) + prompt_text = self._parse_tag(format_result, 'stderr') + '\n' + if result.returncode == 0: + if result.returncode == 0: + success = True + self.compiles = True + + # Test check_all passes + if self._test_check_build(self.inspect_tool, build_script): + self.check_all_passed = True + else: + self.check_all_passed = False + else: + # Fuzzer binary not compiled correctly + success = False + self.missing_binary = True + else: + self.invalid = True + + self.last_status = success + self.last_result = prompt_text + + return prompt + + def _container_handle_conclusion(self, cur_round: int, response: str, + build_result: BuildResult, + prompt: Prompt) -> Optional[Prompt]: + """Runs a compilation tool to validate the new build script from LLM.""" + + # Don't need to check for invalid result + if self.invalid: + return prompt + + # Execution fail + if not self.compiles: + retry = templates.LLM_RETRY.replace('{BASH_RESULT}', self.last_result) + prompt.add_problem(retry) + + # Store build result + build_result.compiles = False + build_result.compile_error = self.last_result + + return prompt + if not self.check_all_passed: + retry = templates.LLM_RETRY_CHECK_ALL.replace('{BASH_RESULT}', + self.last_result) + prompt.add_problem(retry) + + # Store build result + build_result.compiles = False + build_result.compile_error = self.last_result + + return prompt + # Build script succeeded + return None + + def _container_tool_reaction(self, cur_round: int, response: str, + build_result: BuildResult) -> Optional[Prompt]: + """Validates LLM conclusion or executes its command.""" + prompt = self.llm.prompt_type()(None) + + if response: + prompt = self._container_handle_bash_commands(response, self.inspect_tool, + prompt) + + prompt = self._container_handle_conclusion(cur_round, response, + build_result, prompt) + if prompt is None: + logger.info('Succeeded fixing build script', trial=self.trial) + logger.info(self.success_build_script, trial=self.trial) + return None + + return prompt + + +def fix_build(args, oss_fuzz_base): + """Fixes the build of a given project.""" + + project_name = args.project + oss_fuzz_checkout.OSS_FUZZ_DIR = oss_fuzz_base + work_dirs = WorkDirs(args.work_dirs, keep=True) + + # Prepare LLM model + llm = models.LLM.setup( + ai_binary=os.getenv('AI_BINARY', ''), + name=args.model, + max_tokens=4096, + num_samples=1, + temperature=0.4, + temperature_list=[], + ) + llm.MAX_INPUT_TOKEN = 25000 + + # Set up Build fixer agent + agent = BuildFixAgent(llm, project_name, work_dirs, args) + + # Execute the agent + agent.execute([]) diff --git a/experimental/build_fixer/templates.py b/experimental/build_fixer/templates.py new file mode 100644 index 0000000000..d9fbfe6378 --- /dev/null +++ b/experimental/build_fixer/templates.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Templates for the build fixer tool.""" + +BUILD_FIXER_LLM_PRIMING = ''' +You are a developer wanting to fix the build script of an OSS-Fuzz project. +''' + +BUILD_FIX_PROBLEM = """You are a security engineer that needs to fix an OSS-Fuzz +build project. + +OSS-Fuzz projects are composed of a Dockerfile, build.sh, and one or more fuzz +targets. The Dockerfile creates a Docker image that contains the build +environment, and the build.sh script is used to compile the project. +It is likely that the build.sh script is broken. You should focus only on +changing the build.sh and not the Dockerfile. + +Your task is to fix the build.sh script so that the project can be built successfully. + +### OSS-Fuzz Project Structure +- OSS-Fuzz is an open source project that enables continuous fuzzing of open + source software. +- OSS-Fuzz builds projects within a Docker container, this is the environment + the build script will run in. +- The build script is located at `/src/build.sh` inside the Docker container. +- It is very likely that only minor adjustments to the build script are needed + to fix the build. +- The build script is expected to produce one or more fuzzing harnesses, which + are the targets of the fuzzing process. +- The build script should not be expected to produce a final binary, but rather + the fuzzing harnesses that OSS-Fuzz will use. + +{LANGUAGE_SPECIFICS} + +### Provided Resources + +- Dockerfile: + + {DOCKERFILE} + + +- Build script + + {BUILD_SCRIPT} + + +- Initial failed build output: + + {LOGS} + + +### Interaction Protocol + +This is an **interactive process**. You must request commands to be run inside the Docker container to discover this information. + +You are limited to **{MAX_DISCOVERY_ROUND} discovery rounds**, so plan efficiently. + +Your result must only contain these XML tags. **NOTHING MORE**. +- `` – Use to request shell commands that will be executed in the container. You may include multiple semicolon-separated commands per tag, or use multiple tags. +- `` – Use when ready to output the **current version of the build script**. + +If the build script fails or produces errors, you are encouraged to **return to interaction mode** by providing new `` tags. Use them to inspect logs, echo error messages, or run diagnostic commands (e.g., view files in `/tmp`, rerun failing commands with `-v`, etc.). This allows you to iteratively understand and fix the issues. + + """ + +PYTHON_SPECIFICS = '''### OSS-Fuzz python projects + +The project you are working on is a Python project. +The build script should be as Pythonic as possible. +If the project has a "pyproject.toml" file, then we can likely install it using `python3 -m pip install .` +You must prioritise using Python modules by way of `python3`, meaning we want to use `python3 -m pip install ...` instead of `pip install ...`. +The build script you are working on is a Python project. +The target codebase must be build from scratch, meaning you should not install the target project using a pypi package. +If the build script does not unconditionally install the target codebase then the build script is not correct. +Make sure to install the target codebase and avoid using packages already in installed in the Docker image. +Avoid using `pip install .` and always use `python3 -m pip install .` instead. +''' + +LLM_RETRY = ''' +I failed to build the project with the above provided build script. +Please analyse the result and generate a new build script with the same assumption above. +You must only returns the content of the build script and nothing else more as always. +Your output must contain only one XML tag: + – wraps the complete build script for both the target project and the fuzzing harness. + +Here is a dump of the bash execution result. +{BASH_RESULT} +''' + +LLM_RETRY_CHECK_ALL = '''The build script worked, but failed to produce actual fuzzing harnesses. +It is likely the changes you made caused no fuzzing harnesses to be built. + +Please analyse the result and generate a new build script with the same assumption above. + +Your output must contain only one XML tag: + – wraps the complete build script for both the target project and the fuzzing harness. + +Here is a dump of the bash execution result. +{BASH_RESULT}''' diff --git a/experimental/end_to_end/cli.py b/experimental/end_to_end/cli.py index 0047a2c855..76796e3aa8 100644 --- a/experimental/end_to_end/cli.py +++ b/experimental/end_to_end/cli.py @@ -29,6 +29,7 @@ import yaml from data_prep import introspector +from experimental.build_fixer import build_fix from experimental.build_generator import runner from llm_toolkit import models @@ -472,7 +473,7 @@ def run_harness_generation(workdir, def setup_logging(): """Initiate logging.""" - logging.basicConfig(level=logging.INFO, format=LOG_FMT) + logging.basicConfig(level=logging.DEBUG, format=LOG_FMT) def _get_next_folder_in_idx(base_name): @@ -545,6 +546,15 @@ def run_build_generation(args): _run_build_generation(abs_workdir, out_folder, args) +def run_cmd_fix_build(args): + """Command entrypoint for fixing OSS-Fuzz build scripts.""" + workdir = setup_workdirs(None) + abs_workdir = os.path.abspath(workdir) + oss_fuzz_dir = os.path.join(abs_workdir, 'oss-fuzz') + args.work_dirs = 'work_dirs' + build_fix.fix_build(args, oss_fuzz_dir) + + def run_cmd_harness_generation(args): """Entrypoint for command for harness generation.""" @@ -642,6 +652,21 @@ def parse_commandline(): parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest='command') + # Parser for fixing OSS-Fuzz build + fix_build_parser = subparsers.add_parser('fix-build', + help='Fixes OSS-Fuzz build scripts') + + fix_build_parser.add_argument('--project', + type=str, + help='The project to fix') + fix_build_parser.add_argument('--model', + help='The model to use for build fixing.') + fix_build_parser.add_argument('-mr', + '--max-round', + type=int, + default=5, + help='Max trial round for agents.') + # Run build generation. run_build_gen = subparsers.add_parser( 'generate-builds', @@ -719,6 +744,8 @@ def main(): run_build_generation(args) if args.command == 'generate-harnesses': run_cmd_harness_generation(args) + if args.command == 'fix-build': + run_cmd_fix_build(args) if __name__ == '__main__': diff --git a/pyproject.toml b/pyproject.toml index c76d0f8070..3b0873e178 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,6 +92,8 @@ py-modules = [ "experimental.build_generator.constants", "experimental.build_generator.post-process", "experimental.build_generator.templates", + "experimental.build_fixer.build_fix", + "experimental.build_fixer.templates", "experimental.end_to_end.cli", "experimental.from_scratch.generate", "experimental.jvm.utils", From d40bfa2ed8587303fcc1bf8c16a6ac8e2b2c7e65 Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Mon, 2 Jun 2025 12:26:35 -0700 Subject: [PATCH 18/64] build_generator: llm_agent: fix typo (#1078) --- experimental/build_generator/llm_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimental/build_generator/llm_agent.py b/experimental/build_generator/llm_agent.py index ef6bce85d6..351d394a90 100644 --- a/experimental/build_generator/llm_agent.py +++ b/experimental/build_generator/llm_agent.py @@ -34,7 +34,7 @@ class BuildScriptAgent(BaseAgent): - """Base class for buidl script agent.""" + """Base class for build script agent.""" def __init__(self, trial: int, From 149d47e28371367778cc880f5aeb270d6031b49b Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Tue, 3 Jun 2025 07:21:29 -0700 Subject: [PATCH 19/64] build-fixer: improve capabilities (#1080) - Works better with C/CXX - Improve prompt writings - Improve OSS-Fuzz build_fuzzers/check_build validation - Improves code quality --------- Signed-off-by: David Korczynski --- experimental/build_fixer/build_fix.py | 190 ++++++++++++++++---------- experimental/build_fixer/templates.py | 32 +++-- experimental/end_to_end/cli.py | 2 +- 3 files changed, 141 insertions(+), 83 deletions(-) diff --git a/experimental/build_fixer/build_fix.py b/experimental/build_fixer/build_fix.py index 0fe7033283..aa0051e018 100644 --- a/experimental/build_fixer/build_fix.py +++ b/experimental/build_fixer/build_fix.py @@ -56,7 +56,7 @@ def __init__(self, llm: LLM, project_name, work_dirs, args): self.projet_language = oss_fuzz_checkout.get_project_language( self.project_name) - def _initial_prompt(self, results: list[Result]): + def _initial_prompt(self, results: list[Result]): # pylint: disable=unused-argument """Creates the initial prompt for the build fixer agent.""" with open( os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, 'projects', @@ -88,7 +88,13 @@ def _initial_prompt(self, results: list[Result]): return prompt def execute(self, result_history: list[Result]) -> BuildResult: - """Executes the build fixer agent.""" + """Executes the build fixer agent. + Creates a container tool and performs an initial build attempt. + The output of the build is then used to generate a prompt, + and the agent then goes into the iterative process. + """ + + # Prepare an initial image build. result_name = oss_fuzz_checkout.prepare_project_image_by_name( self.project_name) @@ -101,6 +107,7 @@ def execute(self, result_history: list[Result]) -> BuildResult: benchmark = Benchmark(self.project_name, self.project_name, '', '', '', '', [], '') + # Initial run of compile. self.inspect_tool = ProjectContainerTool(benchmark, name='inspect') result = self.inspect_tool.compile( extra_commands=' && rm -rf /out/* > /dev/null') @@ -113,28 +120,37 @@ def execute(self, result_history: list[Result]) -> BuildResult: self.initial_error_result = result.stderr + # Prepare initial prompt. prompt = self._initial_prompt(result_history) - build_result = BuildResult(benchmark=benchmark, trial=0, work_dirs=self.work_dirs, author=self, chat_history={self.name: ''}) - # LLM iteration - cur_round = 0 + # Agent loop + self.trial = 0 try: client = self.llm.get_chat_client(model=self.llm.get_model()) while prompt: - response = self.chat_llm(cur_round, + logger.info(f'Agent Round {self.trial}', trial=self.trial) + # Pass prompt history to LLM and get response. + logger.info('Sending prompt to LLM', trial=self.trial) + response = self.chat_llm(self.trial, client=client, prompt=prompt, - trial=cur_round) - prompt = self._container_tool_reaction(cur_round, response, - build_result) + trial=self.trial) + + # Handle LLM response. + logger.info('Handling LLM response', trial=self.trial) + prompt = self._handle_llm_reponse(response, build_result) if not prompt: break - cur_round += 1 + if self.trial >= self.args.max_round: + logger.info(f'Max discovery rounds reached ({self.args.max_round}).', + trial=self.trial) + break + self.trial += 1 finally: self.inspect_tool.terminate() return build_result @@ -163,10 +179,9 @@ def _parse_tags(self, response: str, tag: str) -> list[str]: return found_matches - def _test_check_build(self, tool: BaseTool, build_script) -> bool: - """Helper to test the generated build script for introspector build.""" - - # Create a copy of the original project name + def _test_build_fuzzers( + self, build_script: str) -> tuple[subprocess.CompletedProcess, str]: + """Runs OSS-Fuzz's build_fuzzers command with the provided build script.""" target_dst = self.original_project_name + '-copy-' + str( uuid.uuid4().hex)[:8] shutil.copytree( @@ -181,36 +196,53 @@ def _test_check_build(self, tool: BaseTool, build_script) -> bool: 'build.sh'), 'w') as f: f.write(build_script) # Build project - try: - subprocess.check_call( - f'python3 infra/helper.py build_fuzzers {target_dst}', - cwd=oss_fuzz_checkout.OSS_FUZZ_DIR, - shell=True) - except: - return False - try: - subprocess.check_call(f'python3 infra/helper.py check_build {target_dst}', - cwd=oss_fuzz_checkout.OSS_FUZZ_DIR, - shell=True) - except: - return False - return True - - def _container_handle_bash_commands(self, response: str, tool: BaseTool, - prompt: Prompt) -> Prompt: - """Handles the command from LLM with container |tool|.""" + cmd = ['python3', 'infra/helper.py', 'build_fuzzers', target_dst] + result = subprocess.run(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=False, + text=True, + encoding='utf-8', + errors='ignore', + cwd=oss_fuzz_checkout.OSS_FUZZ_DIR) + return result, target_dst + + def _test_check_fuzzers(self, target_dst) -> subprocess.CompletedProcess: + """Runs OSS-Fuzz's check_build command to evaluate build fuzzers.""" + + cmd = ['python3', 'infra/helper.py', 'check_build', target_dst] + result = subprocess.run(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + check=False, + text=True, + encoding='utf-8', + errors='ignore', + cwd=oss_fuzz_checkout.OSS_FUZZ_DIR) + return result + + def _parse_llm_reponse_and_operate(self, response: str, tool: BaseTool, + prompt: Prompt) -> Prompt: + """Parses and LLM response and takes appropriate action. This includes + parsing bash commands to be executed in the container tool or extracting + the build script and testing it for compilation.""" # Initialise variables prompt_text = '' success = False self.invalid = False self.missing_binary = False + logger.info('=' * 80, trial=self.trial) + logger.info(response, trial=self.trial) + logger.info('=' * 80, trial=self.trial) + # Retrieve data from response build_script = self._parse_tag(response, 'bash') commands = '; '.join(self._parse_tags(response, 'command')) if commands: + logger.info('LLM Requested commands: %s', commands, trial=self.trial) self.discovery_stage = True # Execute the command directly, then return the formatted result @@ -220,14 +252,9 @@ def _container_handle_bash_commands(self, response: str, tool: BaseTool, if result.returncode == 0: success = True elif build_script: + logger.info('LLM Provided build script.', trial=self.trial) self.discovery_stage = False - # Restart the container to ensure a fresh session for test - if isinstance(tool, ProjectContainerTool): - tool.terminate() - tool = ProjectContainerTool(benchmark=tool.benchmark, name='test') - self.inspect_tool = tool - # Fix shebang to ensure docker image failing is reflected. lines = build_script.split('\n') if lines[0].startswith("#!"): @@ -236,29 +263,34 @@ def _container_handle_bash_commands(self, response: str, tool: BaseTool, lines = ["#!/bin/bash -eu"] + lines build_script = '\n'.join(lines) - # Update build script - if isinstance(tool, ProjectContainerTool): - tool.write_to_file(build_script, tool.build_script_path) - - # Test and parse result - result = tool.execute('compile') - format_result = self._format_bash_execution_result( - result, previous_prompt=prompt) - prompt_text = self._parse_tag(format_result, 'stderr') + '\n' - if result.returncode == 0: - if result.returncode == 0: - success = True - self.compiles = True - - # Test check_all passes - if self._test_check_build(self.inspect_tool, build_script): - self.check_all_passed = True - else: - self.check_all_passed = False - else: - # Fuzzer binary not compiled correctly - success = False - self.missing_binary = True + build_result, target_dst = self._test_build_fuzzers(build_script) + if build_result.returncode != 0: + logger.info('Build failed.', trial=self.trial) + parsed_stdout = build_result.stdout + tag = '---------------------------------------------------------------' + + parsed_stdout = tag.join(parsed_stdout.split(tag)[3:]) + prompt_text = 'Build failed, this is the output:\n' + prompt_text += f'{parsed_stdout}' + self.compiles = False + self.check_all_passed = False + success = False + else: + # Success build + logger.info('Build succeeded.', trial=self.trial) + logger.info('Testing fuzzers run.', trial=self.trial) + test_run_result = self._test_check_fuzzers(target_dst) + if test_run_result.returncode == 0: + logger.info('Fuzzers run successfully.', trial=self.trial) + self.check_all_passed = True + success = True + self.compiles = True + else: + logger.info('Fuzzers run failed.', trial=self.trial) + prompt_text = test_run_result.stdout + self.compiles = True + self.check_all_passed = False + success = False else: self.invalid = True @@ -267,17 +299,28 @@ def _container_handle_bash_commands(self, response: str, tool: BaseTool, return prompt - def _container_handle_conclusion(self, cur_round: int, response: str, - build_result: BuildResult, - prompt: Prompt) -> Optional[Prompt]: - """Runs a compilation tool to validate the new build script from LLM.""" + def _validate_operation_and_prepare_next_prompt( + self, build_result: BuildResult, prompt: Prompt) -> Optional[Prompt]: + """Interprets the results from operating on the LLM response and prepares + a new prompt for the next round of interaction.""" # Don't need to check for invalid result if self.invalid: return prompt # Execution fail + if self.discovery_stage: + logger.info('Validating BASH command response', trial=self.trial) + # Still in bash mode. + prompt.add_problem(self.last_result) + + # Store build result + build_result.compiles = False + build_result.compile_error = self.last_result + + return prompt if not self.compiles: + logger.info('Validation build failure response', trial=self.trial) retry = templates.LLM_RETRY.replace('{BASH_RESULT}', self.last_result) prompt.add_problem(retry) @@ -287,6 +330,7 @@ def _container_handle_conclusion(self, cur_round: int, response: str, return prompt if not self.check_all_passed: + logger.info('Validating check_build failure', trial=self.trial) retry = templates.LLM_RETRY_CHECK_ALL.replace('{BASH_RESULT}', self.last_result) prompt.add_problem(retry) @@ -299,20 +343,22 @@ def _container_handle_conclusion(self, cur_round: int, response: str, # Build script succeeded return None - def _container_tool_reaction(self, cur_round: int, response: str, - build_result: BuildResult) -> Optional[Prompt]: + def _handle_llm_reponse(self, response: str, + build_result: BuildResult) -> Optional[Prompt]: """Validates LLM conclusion or executes its command.""" prompt = self.llm.prompt_type()(None) if response: - prompt = self._container_handle_bash_commands(response, self.inspect_tool, - prompt) - - prompt = self._container_handle_conclusion(cur_round, response, - build_result, prompt) + prompt = self._parse_llm_reponse_and_operate(response, self.inspect_tool, + prompt) + logger.info('Handling conclusions', trial=self.trial) + prompt = self._validate_operation_and_prepare_next_prompt( + build_result, prompt) if prompt is None: logger.info('Succeeded fixing build script', trial=self.trial) + logger.info('-' * 25 + ' Build script: ' + '-' * 25, trial=self.trial) logger.info(self.success_build_script, trial=self.trial) + logger.info('-' * 60, trial=self.trial) return None return prompt diff --git a/experimental/build_fixer/templates.py b/experimental/build_fixer/templates.py index d9fbfe6378..9c64071b48 100644 --- a/experimental/build_fixer/templates.py +++ b/experimental/build_fixer/templates.py @@ -15,20 +15,20 @@ """Templates for the build fixer tool.""" BUILD_FIXER_LLM_PRIMING = ''' -You are a developer wanting to fix the build script of an OSS-Fuzz project. -''' - -BUILD_FIX_PROBLEM = """You are a security engineer that needs to fix an OSS-Fuzz -build project. - +You are an expert software developer that specializes in creating shell scripts that compile and build codebases. +You must support other developers when their codebases no longer build. +You have a technical tone that focus on clear and concise messaging. +You operate primarily by passing technical information wrapped in XML tags to helper developers. +You focus on generating bash commands and shell scripts that will build software. +Most of the codebases you repair are written in C, C++, or Python. +You are an experty in Python build systems and C/C++ build systems. +You are an expert in the OSS-Fuzz build system and you are able to fix broken build scripts. OSS-Fuzz projects are composed of a Dockerfile, build.sh, and one or more fuzz targets. The Dockerfile creates a Docker image that contains the build environment, and the build.sh script is used to compile the project. It is likely that the build.sh script is broken. You should focus only on changing the build.sh and not the Dockerfile. -Your task is to fix the build.sh script so that the project can be built successfully. - ### OSS-Fuzz Project Structure - OSS-Fuzz is an open source project that enables continuous fuzzing of open source software. @@ -42,6 +42,11 @@ - The build script should not be expected to produce a final binary, but rather the fuzzing harnesses that OSS-Fuzz will use. +''' + +BUILD_FIX_PROBLEM = """ +Your task is to fix the build.sh script so that the project can be built successfully. + {LANGUAGE_SPECIFICS} ### Provided Resources @@ -99,8 +104,15 @@ {BASH_RESULT} ''' -LLM_RETRY_CHECK_ALL = '''The build script worked, but failed to produce actual fuzzing harnesses. -It is likely the changes you made caused no fuzzing harnesses to be built. +LLM_RETRY_BASH = '''The output of the bash commands: + +{BASH_RESULT} + +''' + +LLM_RETRY_CHECK_ALL = '''The build script worked, but when checking if the +fuzzers run then the check failed. +It is likely the changes you made caused no fuzzing harnesses to be built or the fuzzing harnesses are not runnable outside the container. Please analyse the result and generate a new build script with the same assumption above. diff --git a/experimental/end_to_end/cli.py b/experimental/end_to_end/cli.py index 76796e3aa8..d20b9d241c 100644 --- a/experimental/end_to_end/cli.py +++ b/experimental/end_to_end/cli.py @@ -664,7 +664,7 @@ def parse_commandline(): fix_build_parser.add_argument('-mr', '--max-round', type=int, - default=5, + default=20, help='Max trial round for agents.') # Run build generation. From 98a7a02f7d5389cb1c0e447355518f0e75750c36 Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Tue, 3 Jun 2025 13:13:36 -0700 Subject: [PATCH 20/64] build-fixer: add simple build output truncation (#1082) Signed-off-by: David Korczynski --- experimental/build_fixer/build_fix.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/experimental/build_fixer/build_fix.py b/experimental/build_fixer/build_fix.py index aa0051e018..cf6c4b5054 100644 --- a/experimental/build_fixer/build_fix.py +++ b/experimental/build_fixer/build_fix.py @@ -222,6 +222,12 @@ def _test_check_fuzzers(self, target_dst) -> subprocess.CompletedProcess: cwd=oss_fuzz_checkout.OSS_FUZZ_DIR) return result + def _simple_truncate_build_output(self, output: str) -> str: + """Truncates the build output to a manageable size.""" + if len(output) > 8000: + return output[:1500] + '\n... (truncated)' + output[-6500:] + return output + def _parse_llm_reponse_and_operate(self, response: str, tool: BaseTool, prompt: Prompt) -> Prompt: """Parses and LLM response and takes appropriate action. This includes @@ -271,6 +277,7 @@ def _parse_llm_reponse_and_operate(self, response: str, tool: BaseTool, parsed_stdout = tag.join(parsed_stdout.split(tag)[3:]) prompt_text = 'Build failed, this is the output:\n' + parsed_stdout = self._simple_truncate_build_output(parsed_stdout) prompt_text += f'{parsed_stdout}' self.compiles = False self.check_all_passed = False From 96959113dde59849591911189ed86aa302258a7c Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Wed, 4 Jun 2025 03:01:32 -0700 Subject: [PATCH 21/64] build-fixer: fix template creation (#1083) Signed-off-by: David Korczynski --- experimental/build_fixer/build_fix.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/experimental/build_fixer/build_fix.py b/experimental/build_fixer/build_fix.py index cf6c4b5054..0e86c74623 100644 --- a/experimental/build_fixer/build_fix.py +++ b/experimental/build_fixer/build_fix.py @@ -71,16 +71,18 @@ def _initial_prompt(self, results: list[Result]): # pylint: disable=unused-argu prompt = self.llm.prompt_type()(None) template_prompt = templates.BUILD_FIX_PROBLEM - template_prompt.replace('{DOCKERFILE}', dockerfile) - template_prompt.replace('{BUILD_SCRIPT}', build_script) - template_prompt.replace('{LOGS}', self.initial_error_result[-300:]) - template_prompt.replace('{MAX_DISCOVERY_ROUND}', str(self.args.max_round)) + template_prompt = template_prompt.replace('{DOCKERFILE}', dockerfile) + template_prompt = template_prompt.replace('{BUILD_SCRIPT}', build_script) + template_prompt = template_prompt.replace('{LOGS}', + self.initial_error_result[-300:]) + template_prompt = template_prompt.replace('{MAX_DISCOVERY_ROUND}', + str(self.args.max_round)) if self.projet_language.lower() == 'python': - template_prompt.replace('{LANGUAGE_SPECIFICS}', - templates.PYTHON_SPECIFICS) + template_prompt = template_prompt.replace('{LANGUAGE_SPECIFICS}', + templates.PYTHON_SPECIFICS) else: - template_prompt.replace('{LANGUAGE_SPECIFICS}', '') + template_prompt = template_prompt.replace('{LANGUAGE_SPECIFICS}', '') #prompt.add_priming(template_prompt) prompt.add_priming(templates.BUILD_FIXER_LLM_PRIMING) From 893edc5995a6079b4183eec11ead500f2125a68d Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Wed, 4 Jun 2025 07:11:24 -0700 Subject: [PATCH 22/64] build-fixer: add function tool based flow (#1084) Signed-off-by: David Korczynski --- agent/base_agent.py | 20 +++ experimental/build_fixer/build_fix.py | 197 +++++++++++++++++++++++++- experimental/build_fixer/templates.py | 27 ++++ llm_toolkit/models.py | 45 ++++++ pyproject.toml | 2 +- 5 files changed, 283 insertions(+), 8 deletions(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index fbbc0aef48..064e38960f 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -60,6 +60,26 @@ def get_tool(self, tool_name: str) -> Optional[BaseTool]: return tool return None + def chat_llm_with_tools(self, client: Any, prompt: Prompt, tools, + trial) -> Any: + """Chat with LLM with tools.""" + logger.info( + '%s', + trial, + prompt.gettext() if prompt else '', + trial, + trial=trial) + response = self.llm.chat_llm_with_tools(client=client, + prompt=prompt, + tools=tools) + logger.info( + '%s', + trial, + response, + trial, + trial=trial) + return response + def chat_llm(self, cur_round: int, client: Any, prompt: Prompt, trial: int) -> str: """Chat with LLM.""" diff --git a/experimental/build_fixer/build_fix.py b/experimental/build_fixer/build_fix.py index 0e86c74623..f923a3ef23 100644 --- a/experimental/build_fixer/build_fix.py +++ b/experimental/build_fixer/build_fix.py @@ -14,6 +14,7 @@ # limitations under the License. """Build fixer tooling.""" +import json import os import re import shutil @@ -35,11 +36,50 @@ from tool.base_tool import BaseTool from tool.container_tool import ProjectContainerTool +FIXER_TOOLS = [{ + 'type': 'function', + 'name': 'test_build_script', + 'description': 'Tests a build script against target project.', + 'parameters': { + 'type': 'object', + 'properties': { + 'build_script': { + 'type': 'string', + 'description': 'Bash script that builds the project.' + } + }, + 'required': ['build_script'], + 'additionalProperties': False + } +}, { + 'type': 'function', + 'name': 'run_commands_in_container', + 'description': 'Runs a command string in the project container.', + 'parameters': { + 'type': 'object', + 'properties': { + 'command': { + 'type': + 'string', + 'description': + 'Bash commands separated by \';\' to run in the container.' + } + }, + 'required': ['command'], + 'additionalProperties': False + } +}] + class BuildFixAgent(BaseAgent): """Agent for fixing OSS-Fuzz project builds.""" - def __init__(self, llm: LLM, project_name, work_dirs, args): + def __init__(self, + llm: LLM, + project_name, + work_dirs, + args, + use_tools: bool = True): super().__init__(trial=1, llm=llm, args=args) self.project_name = project_name self.original_project_name = project_name @@ -51,12 +91,14 @@ def __init__(self, llm: LLM, project_name, work_dirs, args): self.initial_error_result = '' self.trial = 0 + self.use_tools = use_tools + self.success_build_script = '' self.projet_language = oss_fuzz_checkout.get_project_language( self.project_name) - def _initial_prompt(self, results: list[Result]): # pylint: disable=unused-argument + def _initial_prompt(self, results: list[Result], is_tools: bool = True): # pylint: disable=unused-argument """Creates the initial prompt for the build fixer agent.""" with open( os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, 'projects', @@ -70,7 +112,10 @@ def _initial_prompt(self, results: list[Result]): # pylint: disable=unused-argu prompt = self.llm.prompt_type()(None) - template_prompt = templates.BUILD_FIX_PROBLEM + if is_tools: + template_prompt = templates.BUILD_FIX_PROBLEM_TOOLS + else: + template_prompt = templates.BUILD_FIX_PROBLEM template_prompt = template_prompt.replace('{DOCKERFILE}', dockerfile) template_prompt = template_prompt.replace('{BUILD_SCRIPT}', build_script) template_prompt = template_prompt.replace('{LOGS}', @@ -118,18 +163,157 @@ def execute(self, result_history: list[Result]) -> BuildResult: if result.returncode == 0: logger.info(f'Build succeeded for {self.project_name}.', trial=self.trial) logger.info('Nothing to fix.', trial=self.trial) + self.inspect_tool.terminate() sys.exit(0) self.initial_error_result = result.stderr # Prepare initial prompt. - prompt = self._initial_prompt(result_history) + prompt = self._initial_prompt(result_history, self.use_tools) build_result = BuildResult(benchmark=benchmark, trial=0, work_dirs=self.work_dirs, author=self, chat_history={self.name: ''}) + if self.use_tools: + self._agent_run_function_based_loop(prompt, build_result) + else: + self._agent_raw_loop(prompt, build_result) + return build_result + + def _agent_run_function_based_loop( + self, prompt: Optional[Prompt], build_result: BuildResult) -> None: # pylint: disable=unused-argument + """Runs the agent loop using a function-based approach.""" + + # Agent loop + try: + client = self.llm.get_chat_client(model=self.llm.get_model()) + + extended_messages = False + cur_round = 0 + success = False + while prompt or extended_messages: + logger.info(f'Agent Round {cur_round}', trial=self.trial) + cur_round += 1 + if cur_round > self.args.max_round: + sys.exit(0) + + # Pass prompt history to LLM and get response. + logger.info('Sending prompt to LLM', trial=self.trial) + if prompt is None: + return + + response = self.chat_llm_with_tools(client, prompt, FIXER_TOOLS, + self.trial) + + tools_analysed = 0 + extended_messages = False + # handle each of the tool calls in the response. + logger.info('Iterating response output', trial=self.trial) + for tool_call in response.output: + logger.info('- Response out:' + str(tool_call), trial=self.trial) + if tool_call.type != 'function_call': + continue + tools_analysed += 1 + logger.info('Handling tool call %s', tool_call.name, trial=self.trial) + logger.info('Tool call arguments: %s', + tool_call.arguments, + trial=self.trial) + if tool_call.name == 'test_build_script': + arguments = json.loads(tool_call.arguments) + build_fuzzers_result, target_dst = self._test_build_fuzzers( + arguments['build_script']) + if build_fuzzers_result.returncode != 0: + logger.info('Build failed.', trial=self.trial) + parsed_stdout = build_fuzzers_result.stdout + parsed_stdout = self._simple_truncate_build_output(parsed_stdout) + + logger.info('Parsed stdout: %s', parsed_stdout, trial=self.trial) + + self.llm.messages.append(tool_call) + self.llm.messages.append({ + 'type': 'function_call_output', + 'call_id': tool_call.call_id, + 'output': str(parsed_stdout) + }) + extended_messages = True + prompt = None + + else: + logger.info('Build succeeded.', trial=self.trial) + # Testing fuzzers run. + test_run_result = self._test_check_fuzzers(target_dst) + if test_run_result.returncode == 0: + logger.info('Fuzzers run successfully.', trial=self.trial) + success = True + self.success_build_script = arguments['build_script'] + prompt = None + extended_messages = False + else: + logger.info('Fuzzers run failed.', trial=self.trial) + prompt_text = test_run_result.stdout + success = False + self.llm.messages.append(tool_call) + self.llm.messages.append({ + 'type': 'function_call_output', + 'call_id': tool_call.call_id, + 'output': str(prompt_text) + }) + extended_messages = True + prompt = None + + elif tool_call.name == 'run_commands_in_container': + arguments = json.loads(tool_call.arguments) + logger.info(json.dumps(arguments, indent=2), trial=self.trial) + + # Execute the command directly, then return the formatted result + commands = arguments['command'] + logger.info('LLM Requested commands: %s', + commands, + trial=self.trial) + result = self.inspect_tool.execute(commands) + prompt_text = self._format_bash_execution_result( + result, previous_prompt=prompt) + + prompt_text = self._simple_truncate_build_output(prompt_text) + + self.llm.messages.append(tool_call) + self.llm.messages.append({ + 'type': 'function_call_output', + 'call_id': tool_call.call_id, + 'output': str(prompt_text) + }) + extended_messages = True + prompt = None + else: + logger.info('Unsupported tool call: %s', + tool_call.name, + trial=self.trial) + + if tools_analysed == 0 and not success: + logger.info( + 'Did not execute any tool calls. At the moment we do not ' + 'support this, but we should add support for it.', + trial=self.trial) + prompt = prompt = self.llm.prompt_type()(None) + prompt.add_problem( + 'I was unable to interpret your last message. Use tool ' + 'calls to direct this process instead of messages.') + cur_round -= 1 + + if success: + logger.info('Succeeded fixing build script', trial=self.trial) + logger.info('-' * 25 + ' Build script: ' + '-' * 25, trial=self.trial) + logger.info(self.success_build_script, trial=self.trial) + logger.info('-' * 60, trial=self.trial) + break + finally: + self.inspect_tool.terminate() + def _agent_raw_loop(self, prompt: Optional[Prompt], + build_result: BuildResult) -> None: + """Runs the agent loop, sending prompts to the LLM and handling + responses.""" # Agent loop self.trial = 0 try: @@ -155,7 +339,6 @@ def execute(self, result_history: list[Result]) -> BuildResult: self.trial += 1 finally: self.inspect_tool.terminate() - return build_result def _parse_tag(self, response: str, tag: str) -> str: """Parses the tag from LLM response.""" @@ -373,7 +556,7 @@ def _handle_llm_reponse(self, response: str, return prompt -def fix_build(args, oss_fuzz_base): +def fix_build(args, oss_fuzz_base, use_tools: bool = True): """Fixes the build of a given project.""" project_name = args.project @@ -392,7 +575,7 @@ def fix_build(args, oss_fuzz_base): llm.MAX_INPUT_TOKEN = 25000 # Set up Build fixer agent - agent = BuildFixAgent(llm, project_name, work_dirs, args) + agent = BuildFixAgent(llm, project_name, work_dirs, args, use_tools=use_tools) # Execute the agent agent.execute([]) diff --git a/experimental/build_fixer/templates.py b/experimental/build_fixer/templates.py index 9c64071b48..0633ad7bd0 100644 --- a/experimental/build_fixer/templates.py +++ b/experimental/build_fixer/templates.py @@ -28,6 +28,10 @@ environment, and the build.sh script is used to compile the project. It is likely that the build.sh script is broken. You should focus only on changing the build.sh and not the Dockerfile. +You are interacting with a fully automated system so use the tools provided to you +to fix the build.sh script. +Do not provide textual descriptions as response. +Prioritize technical answers in the form of code of commands. ### OSS-Fuzz Project Structure - OSS-Fuzz is an open source project that enables continuous fuzzing of open @@ -44,6 +48,29 @@ ''' +BUILD_FIX_PROBLEM_TOOLS = """ +Your task is to fix the build.sh script so that the project can be built successfully. + +{LANGUAGE_SPECIFICS} + +### Provided Resources + +- Dockerfile: + + {DOCKERFILE} + + +- Build script + + {BUILD_SCRIPT} + + +- Initial failed build output: + + {LOGS} + +""" + BUILD_FIX_PROBLEM = """ Your task is to fix the build.sh script so that the project can be built successfully. diff --git a/llm_toolkit/models.py b/llm_toolkit/models.py index edb5031d43..7d3e974623 100644 --- a/llm_toolkit/models.py +++ b/llm_toolkit/models.py @@ -144,6 +144,11 @@ def ask_llm(self, prompt: prompts.Prompt) -> str: del prompt return '' + @abstractmethod + def chat_llm_with_tools(self, client: Any, prompt: prompts.Prompt, + tools) -> Any: + """Queries the LLM in the given chat session with tools.""" + @abstractmethod def chat_llm(self, client: Any, prompt: prompts.Prompt) -> str: """Queries the LLM in the given chat session and returns the response.""" @@ -334,6 +339,22 @@ def chat_llm(self, client: Any, prompt: prompts.Prompt) -> str: return llm_response + def chat_llm_with_tools(self, client: Any, prompt: prompts.Prompt, + tools) -> Any: + """Queries LLM in a chat session with tools.""" + if self.ai_binary: + raise ValueError(f'OpenAI does not use local AI binary: {self.ai_binary}') + if self.temperature_list: + logger.info('OpenAI does not allow temperature list: %s', + self.temperature_list) + + if prompt: + self.messages.extend(prompt.get()) + + return client.responses.create(model=self.name, + input=self.messages, + tools=tools) + def ask_llm(self, prompt: prompts.Prompt) -> str: """Queries LLM a single prompt and returns its response.""" if self.ai_binary: @@ -562,6 +583,12 @@ def chat_llm(self, client: Any, prompt: prompts.Prompt) -> Any: del client, prompt # Placeholder: To Be Implemented. + def chat_llm_with_tools(self, client: Any, prompt: prompts.Prompt, + tools) -> Any: + """Queries the LLM in the given chat session with tools.""" + # Placeholder: To Be Implemented. + return + class ClaudeHaikuV3(Claude): """Claude Haiku 3.""" @@ -671,6 +698,12 @@ def chat_llm(self, client: Any, prompt: prompts.Prompt) -> Any: del client, prompt raise NotImplementedError + def chat_llm_with_tools(self, client: Any, prompt: prompts.Prompt, + tools) -> Any: + """Queries the LLM in the given chat session with tools.""" + # Placeholder: To Be Implemented. + return + class VertexAIModel(GoogleModel): """Vertex AI model.""" @@ -950,6 +983,12 @@ def chat_llm(self, client: ChatSession, prompt: prompts.Prompt) -> str: response = self._do_generate(client, prompt.get(), parameters_list) or '' return response + def chat_llm_with_tools(self, client: Any, prompt: prompts.Prompt, + tools) -> Any: + """Queries the LLM in the given chat session with tools.""" + # Placeholder: To Be Implemented. + return + class GeminiV2FlashChat(GeminiV1D5Chat): """Gemini 2 Flash for chat session.""" @@ -1008,5 +1047,11 @@ def chat_llm(self, client: Any, prompt: prompts.Prompt) -> Any: del client, prompt # Placeholder: To Be Implemented. + def chat_llm_with_tools(self, client: Any, prompt: prompts.Prompt, + tools) -> Any: + """Queries the LLM in the given chat session with tools.""" + # Placeholder: To Be Implemented. + return + DefaultModel = GeminiV1D5 diff --git a/pyproject.toml b/pyproject.toml index 3b0873e178..5d3a26a417 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dependencies = [ "google-cloud-storage==2.9.0", "google-cloud-logging==3.11.2", "Jinja2==3.1.5", -"openai==1.60.0", +"openai==1.84.0", "pandas==2.2.2", "pylint==3.2.5", "pyright==1.1.345", From f8525348d0b98aadde181d86ed104a20d7668b6c Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Wed, 4 Jun 2025 07:49:51 -0700 Subject: [PATCH 23/64] models: add gpt-4.1 and minor build-fixer fix (#1085) Signed-off-by: David Korczynski --- agent/base_agent.py | 2 +- experimental/build_fixer/build_fix.py | 3 --- llm_toolkit/models.py | 18 ++++++++++++------ 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index 064e38960f..a5b7475e9d 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -60,7 +60,7 @@ def get_tool(self, tool_name: str) -> Optional[BaseTool]: return tool return None - def chat_llm_with_tools(self, client: Any, prompt: Prompt, tools, + def chat_llm_with_tools(self, client: Any, prompt: Optional[Prompt], tools, trial) -> Any: """Chat with LLM with tools.""" logger.info( diff --git a/experimental/build_fixer/build_fix.py b/experimental/build_fixer/build_fix.py index f923a3ef23..2f2c9675db 100644 --- a/experimental/build_fixer/build_fix.py +++ b/experimental/build_fixer/build_fix.py @@ -200,9 +200,6 @@ def _agent_run_function_based_loop( # Pass prompt history to LLM and get response. logger.info('Sending prompt to LLM', trial=self.trial) - if prompt is None: - return - response = self.chat_llm_with_tools(client, prompt, FIXER_TOOLS, self.trial) diff --git a/llm_toolkit/models.py b/llm_toolkit/models.py index 7d3e974623..14d86209b8 100644 --- a/llm_toolkit/models.py +++ b/llm_toolkit/models.py @@ -145,7 +145,7 @@ def ask_llm(self, prompt: prompts.Prompt) -> str: return '' @abstractmethod - def chat_llm_with_tools(self, client: Any, prompt: prompts.Prompt, + def chat_llm_with_tools(self, client: Any, prompt: Optional[prompts.Prompt], tools) -> Any: """Queries the LLM in the given chat session with tools.""" @@ -339,7 +339,7 @@ def chat_llm(self, client: Any, prompt: prompts.Prompt) -> str: return llm_response - def chat_llm_with_tools(self, client: Any, prompt: prompts.Prompt, + def chat_llm_with_tools(self, client: Any, prompt: Optional[prompts.Prompt], tools) -> Any: """Queries LLM in a chat session with tools.""" if self.ai_binary: @@ -401,6 +401,12 @@ class GPT4(GPT): name = 'gpt-4' +class GPT41(GPT): + """OpenAI's GPT-4.1 model.""" + + name = 'gpt-4.1' + + class GPT4o(GPT): """OpenAI's GPT-4o model.""" @@ -583,7 +589,7 @@ def chat_llm(self, client: Any, prompt: prompts.Prompt) -> Any: del client, prompt # Placeholder: To Be Implemented. - def chat_llm_with_tools(self, client: Any, prompt: prompts.Prompt, + def chat_llm_with_tools(self, client: Any, prompt: Optional[prompts.Prompt], tools) -> Any: """Queries the LLM in the given chat session with tools.""" # Placeholder: To Be Implemented. @@ -698,7 +704,7 @@ def chat_llm(self, client: Any, prompt: prompts.Prompt) -> Any: del client, prompt raise NotImplementedError - def chat_llm_with_tools(self, client: Any, prompt: prompts.Prompt, + def chat_llm_with_tools(self, client: Any, prompt: Optional[prompts.Prompt], tools) -> Any: """Queries the LLM in the given chat session with tools.""" # Placeholder: To Be Implemented. @@ -983,7 +989,7 @@ def chat_llm(self, client: ChatSession, prompt: prompts.Prompt) -> str: response = self._do_generate(client, prompt.get(), parameters_list) or '' return response - def chat_llm_with_tools(self, client: Any, prompt: prompts.Prompt, + def chat_llm_with_tools(self, client: Any, prompt: Optional[prompts.Prompt], tools) -> Any: """Queries the LLM in the given chat session with tools.""" # Placeholder: To Be Implemented. @@ -1047,7 +1053,7 @@ def chat_llm(self, client: Any, prompt: prompts.Prompt) -> Any: del client, prompt # Placeholder: To Be Implemented. - def chat_llm_with_tools(self, client: Any, prompt: prompts.Prompt, + def chat_llm_with_tools(self, client: Any, prompt: Optional[prompts.Prompt], tools) -> Any: """Queries the LLM in the given chat session with tools.""" # Placeholder: To Be Implemented. From b4934ab322fe9825c3b19e622d22a51528a9c3fa Mon Sep 17 00:00:00 2001 From: DavidKorczynski Date: Wed, 4 Jun 2025 11:07:02 -0700 Subject: [PATCH 24/64] build-fixer: add tool for building both Dockerfile and build script (#1086) Signed-off-by: David Korczynski --- experimental/build_fixer/build_fix.py | 80 ++++++++++++++++++++++++++- experimental/build_fixer/templates.py | 1 + 2 files changed, 78 insertions(+), 3 deletions(-) diff --git a/experimental/build_fixer/build_fix.py b/experimental/build_fixer/build_fix.py index 2f2c9675db..cf813c0120 100644 --- a/experimental/build_fixer/build_fix.py +++ b/experimental/build_fixer/build_fix.py @@ -51,6 +51,28 @@ 'required': ['build_script'], 'additionalProperties': False } +}, { + 'type': + 'function', + 'name': + 'test_build_script_and_dockerfile', + 'description': + 'Tests a build script and Dockerfile against target project.', + 'parameters': { + 'type': 'object', + 'properties': { + 'build_script': { + 'type': 'string', + 'description': 'Bash script that builds the project.' + }, + 'dockerfile': { + 'type': 'string', + 'description': 'Dockerfile that builds the project.' + } + }, + 'required': ['build_script', 'dockerfile'], + 'additionalProperties': False + } }, { 'type': 'function', 'name': 'run_commands_in_container', @@ -216,7 +238,50 @@ def _agent_run_function_based_loop( logger.info('Tool call arguments: %s', tool_call.arguments, trial=self.trial) - if tool_call.name == 'test_build_script': + if tool_call.name == 'test_build_script_and_dockerfile': + arguments = json.loads(tool_call.arguments) + build_fuzzers_result, target_dst = self._test_build_fuzzers( + arguments['build_script'], arguments['dockerfile']) + if build_fuzzers_result.returncode != 0: + logger.info('Build failed.', trial=self.trial) + parsed_stdout = build_fuzzers_result.stdout + parsed_stdout = self._simple_truncate_build_output(parsed_stdout) + + logger.info('Parsed stdout: %s', parsed_stdout, trial=self.trial) + + self.llm.messages.append(tool_call) + self.llm.messages.append({ + 'type': 'function_call_output', + 'call_id': tool_call.call_id, + 'output': str(parsed_stdout) + }) + extended_messages = True + prompt = None + + else: + logger.info('Build succeeded.', trial=self.trial) + # Testing fuzzers run. + test_run_result = self._test_check_fuzzers(target_dst) + if test_run_result.returncode == 0: + logger.info('Fuzzers run successfully.', trial=self.trial) + success = True + self.success_build_script = arguments['build_script'] + self.success_dockerfile = arguments['dockerfile'] + prompt = None + extended_messages = False + else: + logger.info('Fuzzers run failed.', trial=self.trial) + prompt_text = test_run_result.stdout + success = False + self.llm.messages.append(tool_call) + self.llm.messages.append({ + 'type': 'function_call_output', + 'call_id': tool_call.call_id, + 'output': str(prompt_text) + }) + extended_messages = True + prompt = None + elif tool_call.name == 'test_build_script': arguments = json.loads(tool_call.arguments) build_fuzzers_result, target_dst = self._test_build_fuzzers( arguments['build_script']) @@ -362,7 +427,9 @@ def _parse_tags(self, response: str, tag: str) -> list[str]: return found_matches def _test_build_fuzzers( - self, build_script: str) -> tuple[subprocess.CompletedProcess, str]: + self, + build_script: str, + dockerfile: str = '') -> tuple[subprocess.CompletedProcess, str]: """Runs OSS-Fuzz's build_fuzzers command with the provided build script.""" target_dst = self.original_project_name + '-copy-' + str( uuid.uuid4().hex)[:8] @@ -377,8 +444,15 @@ def _test_build_fuzzers( os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, 'projects', target_dst, 'build.sh'), 'w') as f: f.write(build_script) - # Build project + if dockerfile: + # Overwrite the Dockerfile with the new one + with open( + os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, 'projects', target_dst, + 'Dockerfile'), 'w') as f: + f.write(dockerfile) + + # Build project cmd = ['python3', 'infra/helper.py', 'build_fuzzers', target_dst] result = subprocess.run(cmd, stdout=subprocess.PIPE, diff --git a/experimental/build_fixer/templates.py b/experimental/build_fixer/templates.py index 0633ad7bd0..1c2326c485 100644 --- a/experimental/build_fixer/templates.py +++ b/experimental/build_fixer/templates.py @@ -32,6 +32,7 @@ to fix the build.sh script. Do not provide textual descriptions as response. Prioritize technical answers in the form of code of commands. +You must always target the most recent version of the target code base and do not revert to older branches. ### OSS-Fuzz Project Structure - OSS-Fuzz is an open source project that enables continuous fuzzing of open From 91ab4961b383ac207680f5c79fdc1e21a8f70d0d Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 4 Jun 2025 22:15:04 +0000 Subject: [PATCH 25/64] Merge branch 'main' of https://github.com/google/oss-fuzz-gen into introspector-tool From 1fbe6d3d9b1fff9caf0140af95798279317653ca Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 4 Jun 2025 22:21:15 +0000 Subject: [PATCH 26/64] Merge branch 'introspector-tool' of https://github.com/google/oss-fuzz-gen into new-fa-arch Not sure what is happening From d757ef7777735432ee3beeb70e0c9f3e26eb071b Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 4 Jun 2025 22:23:52 +0000 Subject: [PATCH 27/64] Removed commented out statements. --- agent/function_analyzer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 98f7a239ee..d3c002f040 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -105,8 +105,6 @@ async def call_agent(self, query: str, runner: runners.Runner, user_id: str, session_id: str) -> str: """Call the agent asynchronously with the given query.""" - # logger.info(">>> User query: %s", query) - content = types.Content(role='user', parts=[types.Part(text=query)]) final_response_text = '' @@ -119,7 +117,6 @@ async def call_agent(self, query: str, runner: runners.Runner, user_id: str, new_message=content, ): - # logger.info("Event is %s", event.content) if event.is_final_response(): if (event.content and event.content.parts and event.content.parts[0].text): From 6df02b39dd70bc8a71e8902e8c50582dcc93b59f Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Thu, 5 Jun 2025 14:23:11 +0000 Subject: [PATCH 28/64] Added TODOs --- agent/function_analyzer.py | 1 + stage/writing_stage.py | 1 + 2 files changed, 2 insertions(+) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index d3c002f040..3cd7609e3d 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -72,6 +72,7 @@ def initialize(self, benchmark: benchmarklib.Benchmark): benchmark, self.name) # Create the agent using the ADK library + # TODO(pamusuo): Create another AdkBaseAgent that extends BaseAgent and initializes an ADK agent as well. function_analyzer = agents.LlmAgent( name="FunctionAnalyzer", model=self.vertex_ai_model, diff --git a/stage/writing_stage.py b/stage/writing_stage.py index 69799af8e5..1c24192e3a 100644 --- a/stage/writing_stage.py +++ b/stage/writing_stage.py @@ -49,6 +49,7 @@ def execute(self, result_history: list[Result]) -> Result: if result_history and result_history[-1].fuzz_target_source: agent = self.get_agent(index=1) else: + # TODO(pamusuo): Call the function analyzer agent at this point (temporary implementation). agent = self.get_agent() agent_result = self._execute_agent(agent, result_history) build_result = cast(BuildResult, agent_result) From be31842ee855fd01e54028b0592da1b529020a19 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Thu, 5 Jun 2025 14:43:44 +0000 Subject: [PATCH 29/64] Refactored code --- llm_toolkit/prompt_builder.py | 81 ----------------------------------- 1 file changed, 81 deletions(-) diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 7495815467..a87d3a1afb 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -967,87 +967,6 @@ def build_crash_analyzer_prompt(self, benchmark: Benchmark, driver_code: str, return self._prompt -class CrashAnalyzerTemplateBuilder(DefaultTemplateBuilder): - """Builder for C/C++.""" - - def __init__(self, - model: models.LLM, - benchmark: Optional[Benchmark] = None, - template_dir: str = DEFAULT_TEMPLATE_DIR, - initial: Any = None): - super().__init__(model, benchmark, template_dir, initial) - self.agent_templare_dir = AGENT_TEMPLATE_DIR - - self.crash_analyzer_priming_template_file = self._find_template( - self.agent_templare_dir, 'crash_analyzer-priming.txt') - - def _prepare_prompt( - self, - priming: str, - final_problem: str, - example_pair: Optional[list[list[str]]] = None, - project_example_content: Optional[list[list[str]]] = None): - """Constructs a prompt using the parameters and saves it.""" - self._prompt.add_priming(priming) - - def build_crash_analyzer_prompt(self, benchmark: Benchmark, driver_code: str, - crash_info: str, - crash_func: dict) -> prompts.Prompt: - """Prepares the crash analyzer prompt.""" - all_func_code = [] - for func_name, line_number in crash_func.items(): - if func_name == 'LLVMFuzzerTestOneInput': - driver_code = self._slice_driver_code(benchmark.project, driver_code, - line_number) - else: - func_code = self._slice_func_code(benchmark.project, func_name, - line_number) - all_func_code.append(func_code) - - with open(self.crash_analyzer_priming_template_file) as f: - priming = f.read().strip() - priming = priming.replace('{CRASH_REPORT}', crash_info.strip())\ - .replace('{DRIVER_CODE}', driver_code.strip()) - - priming_prompt = self._prompt.create_prompt_piece(priming, 'user') - template_piece = self._prompt.create_prompt_piece('{PROJECT_FUNCTION_CODE}', - 'user') - - priming_weight = self._model.estimate_token_num(priming_prompt) - template_weight = self._model.estimate_token_num(template_piece) - - prompt_size = priming_weight - template_weight - # Add extra 20-tokens redundancy - prompt_size += 20 - - # Add function code one by one until we reach the maximum prompt size - selected_func_code = [] - for func_code in all_func_code: - func_code_prompt = self._prompt.create_prompt_piece(func_code, 'user') - func_code_token_num = self._model.estimate_token_num(func_code_prompt) - if prompt_size + func_code_token_num >= self._model.context_window: - # The estimation is inaccurate, if an example's size equals to - # the limit, it's safer to not include the example. - logger.warning('Breaking because adding this function code \ - would exceed context window') - break - prompt_size += func_code_token_num - selected_func_code.append(func_code) - - project_function_code = '\n'.join(selected_func_code) - if project_function_code.strip(): - priming.replace('{PROJECT_FUNCTION_CODE}', project_function_code.strip()) - else: - logger.warning( - 'Empty project function code in triage prompt for project: %s, \ - function name: %s', benchmark.project, benchmark.function_name) - priming.replace('{PROJECT_FUNCTION_CODE}', \ - 'No relevant project function code') - - self._prepare_prompt(priming, '') - return self._prompt - - class DefaultJvmTemplateBuilder(PromptBuilder): """Default builder for JVM projects.""" From 2242f66aee6bc8c59dd87f7e1c490f8a4da0e225 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Thu, 5 Jun 2025 14:59:48 +0000 Subject: [PATCH 30/64] Ran presubmit to fix lint errors. --- agent/function_analyzer.py | 22 ++++---- agent/prototyper.py | 19 ++++--- agent_tests/function_analyzer_test.py | 47 ++++++++-------- agent_tests/upload_analysis_result.py | 77 ++++++++++++++++----------- common/cloud_builder.py | 9 ++-- experiment/workdir.py | 5 +- llm_toolkit/prompt_builder.py | 20 +++---- results.py | 24 ++++----- tool/fuzz_introspector_tool.py | 2 +- 9 files changed, 120 insertions(+), 105 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 3cd7609e3d..aa47556f14 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -26,10 +26,10 @@ from google.adk import agents, runners, sessions from google.genai import types -from experiment.workdir import WorkDirs import results as resultslib from agent import base_agent from experiment import benchmark as benchmarklib +from experiment.workdir import WorkDirs from llm_toolkit import models, prompt_builder, prompts from tool import base_tool, fuzz_introspector_tool @@ -78,11 +78,10 @@ def initialize(self, benchmark: benchmarklib.Benchmark): model=self.vertex_ai_model, description="""Extracts a function's requirements from its source implementation.""", - instruction="""You are a security engineer tasked with analyzing a function + instruction= + """You are a security engineer tasked with analyzing a function and extracting its input requirements, necessary for it to execute correctly.""", - tools=[ - introspector_tool.function_source_with_name - ], + tools=[introspector_tool.function_source_with_name], ) # Create the session service @@ -143,9 +142,8 @@ def write_requirements_to_file(self, args, requirements: str) -> str: logger.warning("No requirements to write to file.") return '' - requirement_path = os.path.join( - args.work_dirs.requirements, - f"{self.benchmark.id}.txt") + requirement_path = os.path.join(args.work_dirs.requirements, + f"{self.benchmark.id}.txt") with open(requirement_path, 'w') as f: f.write(requirements) @@ -154,9 +152,8 @@ def write_requirements_to_file(self, args, requirements: str) -> str: return requirement_path - def execute( - self, - result_history: list[resultslib.Result]) -> resultslib.Result: + def execute(self, + result_history: list[resultslib.Result]) -> resultslib.Result: """Execute the agent with the given results.""" WorkDirs(self.args.work_dirs.base, keep=True) @@ -176,8 +173,7 @@ def execute( if result_str: # Write the requirements to a file - requirement_path = self.write_requirements_to_file( - self.args, result_str) + requirement_path = self.write_requirements_to_file(self.args, result_str) function_analysis = resultslib.FunctionAnalysisResult(requirement_path) else: function_analysis = None diff --git a/agent/prototyper.py b/agent/prototyper.py index b7d89fa171..72f42932c0 100644 --- a/agent/prototyper.py +++ b/agent/prototyper.py @@ -56,7 +56,8 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: context_info = {} function_analysis = last_result.function_analysis - if function_analysis and os.path.isfile(function_analysis.function_analysis_path): + if function_analysis and os.path.isfile( + function_analysis.function_analysis_path): with open(function_analysis.function_analysis_path, 'r') as file: function_requirements = file.read() else: @@ -66,13 +67,15 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: model=self.llm, benchmark=benchmark, ) - prompt = builder.build(example_pair=prompt_builder.EXAMPLES.get( - benchmark.file_type.value.lower(), []), - project_example_content=project_examples, - project_context_content=context_info, - tool_guides=self.inspect_tool.tutorial(), - project_dir=self.inspect_tool.project_dir, - function_requirements=function_requirements,) + prompt = builder.build( + example_pair=prompt_builder.EXAMPLES.get( + benchmark.file_type.value.lower(), []), + project_example_content=project_examples, + project_context_content=context_info, + tool_guides=self.inspect_tool.tutorial(), + project_dir=self.inspect_tool.project_dir, + function_requirements=function_requirements, + ) return prompt def _update_fuzz_target_and_build_script(self, response: str, diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py index 600d3e5f21..1a580fc12e 100644 --- a/agent_tests/function_analyzer_test.py +++ b/agent_tests/function_analyzer_test.py @@ -30,9 +30,9 @@ RESULTS_DIR = './results' - NUM_ANA = int(os.getenv('LLM_NUM_ANA', '2')) + def parse_args() -> argparse.Namespace: """Parses command line arguments.""" parser = argparse.ArgumentParser( @@ -57,10 +57,10 @@ def parse_args() -> argparse.Namespace: type=str) parser.add_argument('-np', - '--num-pools', - type=int, - default=NUM_ANA, - help='Number of parallel processes to use for analysis.') + '--num-pools', + type=int, + default=NUM_ANA, + help='Number of parallel processes to use for analysis.') parser.add_argument('-w', '--work-dir', default=RESULTS_DIR) @@ -92,15 +92,12 @@ def parse_args() -> argparse.Namespace: return parsed_args -def analyze_benchmark(benchmark: benchmarklib.Benchmark, - model: models.LLM, - args: argparse.Namespace) -> bool: +def analyze_benchmark(benchmark: benchmarklib.Benchmark, model: models.LLM, + args: argparse.Namespace) -> bool: """Analyzes the benchmark using the function analyzer.""" # Initialize the function analyzer - analyzer = function_analyzer.FunctionAnalyzer(trial=1, - llm=model, - args=args) + analyzer = function_analyzer.FunctionAnalyzer(trial=1, llm=model, args=args) # Initialize the function analyzer with the first benchmark analyzer.initialize(benchmark) @@ -109,11 +106,13 @@ def analyze_benchmark(benchmark: benchmarklib.Benchmark, try: result = analyzer.execute([]) except Exception as e: - logger.error("Error during analysis for benchmark %s: %s", benchmark.function_name, e) + logger.error("Error during analysis for benchmark %s: %s", + benchmark.function_name, e) return False return result.function_analysis is not None + if __name__ == "__main__": model = models.LLM.setup(ai_binary='', name='vertex_ai_gemini-2-5-pro-chat') @@ -140,25 +139,24 @@ def analyze_benchmark(benchmark: benchmarklib.Benchmark, if NUM_ANA == 1: for benchmark in benchmarks: logger.info("Loaded benchmark (%d/%d) for function: %s", - benchmarks.index(benchmark) + 1, len(benchmarks), - benchmark.function_name) + benchmarks.index(benchmark) + 1, len(benchmarks), + benchmark.function_name) if analyze_benchmark(benchmark, model, args): success_count += 1 else: - logger.info("Running analysis in parallel with %d processes.", args.num_pools) + logger.info("Running analysis in parallel with %d processes.", + args.num_pools) with multiprocessing.Pool(args.num_pools, maxtasksperchild=1) as pool: results = {} for benchmark in benchmarks: # Pass a new analyzer instance to each process to avoid sharing state logger.info("Submitted benchmark (%d/%d) for function: %s to the pool.", - benchmarks.index(benchmark) + 1, len(benchmarks), - benchmark.function_name) - result = pool.apply_async( - analyze_benchmark, - args=(benchmark, model, args) - ) + benchmarks.index(benchmark) + 1, len(benchmarks), + benchmark.function_name) + result = pool.apply_async(analyze_benchmark, + args=(benchmark, model, args)) results[benchmark.id] = result pool.close() @@ -172,6 +170,9 @@ def analyze_benchmark(benchmark: benchmarklib.Benchmark, if result.get(): success_count += 1 except Exception as e: - logger.error(f"Error during analysis for benchmark %s: %s", benchmark_id, e) + logger.error(f"Error during analysis for benchmark %s: %s", + benchmark_id, e) - print(f"{success_count} out of {len(benchmarks)} analyses completed successfully.") + print( + f"{success_count} out of {len(benchmarks)} analyses completed successfully." + ) diff --git a/agent_tests/upload_analysis_result.py b/agent_tests/upload_analysis_result.py index 45ec86a06b..865006251b 100644 --- a/agent_tests/upload_analysis_result.py +++ b/agent_tests/upload_analysis_result.py @@ -1,13 +1,17 @@ import argparse import os + from google.cloud import storage GCS_BUCKET_NAME = 'pamusuo-tests' CGS_RESULTS_DIR = "Function-analysis-results" -def upload_directory_to_gcs(local_folder_path, bucket_name, destination_blob_prefix=""): - """ + +def upload_directory_to_gcs(local_folder_path, + bucket_name, + destination_blob_prefix=""): + """ Uploads all .txt files from a local folder to a Google Cloud Storage bucket. Args: @@ -17,42 +21,53 @@ def upload_directory_to_gcs(local_folder_path, bucket_name, destination_blob_pre Useful for organizing files within the bucket. e.g., "my_text_files/" """ - storage_client = storage.Client() - bucket = storage_client.bucket(bucket_name) - - print(f"Starting upload from local folder: {local_folder_path}") + storage_client = storage.Client() + bucket = storage_client.bucket(bucket_name) - for root, _, files in os.walk(local_folder_path): - for file_name in files: - if file_name.endswith(".txt"): - local_file_path = os.path.join(root, file_name) + print(f"Starting upload from local folder: {local_folder_path}") - # Construct the blob path in GCS - # This ensures the folder structure is maintained if needed - # For simplicity, we'll just put all files directly under the prefix - # If you want to maintain subdirectories, you'd adjust this. - relative_path = os.path.relpath(local_file_path, local_folder_path) - destination_blob_name = os.path.join(destination_blob_prefix, relative_path).replace("\\", "/") # Replace backslashes for Linux/GCS compatibility + for root, _, files in os.walk(local_folder_path): + for file_name in files: + if file_name.endswith(".txt"): + local_file_path = os.path.join(root, file_name) + # Construct the blob path in GCS + # This ensures the folder structure is maintained if needed + # For simplicity, we'll just put all files directly under the prefix + # If you want to maintain subdirectories, you'd adjust this. + relative_path = os.path.relpath(local_file_path, local_folder_path) + destination_blob_name = os.path.join( + destination_blob_prefix, relative_path).replace( + "\\", "/") # Replace backslashes for Linux/GCS compatibility - blob = bucket.blob(destination_blob_name) + blob = bucket.blob(destination_blob_name) - try: - blob.upload_from_filename(local_file_path) - print(f"Uploaded {local_file_path} to gs://{bucket_name}/{destination_blob_name}") - except Exception as e: - print(f"Error uploading {local_file_path}: {e}") + try: + blob.upload_from_filename(local_file_path) + print( + f"Uploaded {local_file_path} to gs://{bucket_name}/{destination_blob_name}" + ) + except Exception as e: + print(f"Error uploading {local_file_path}: {e}") if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Upload a directory to a Google Cloud Storage bucket.") - parser.add_argument("-d", "--directory", help="Path to the directory to upload", required=True) - parser.add_argument("-b", "--bucket", help="Name of the GCS bucket", default=GCS_BUCKET_NAME) - args = parser.parse_args() + parser = argparse.ArgumentParser( + description="Upload a directory to a Google Cloud Storage bucket.") + parser.add_argument("-d", + "--directory", + help="Path to the directory to upload", + required=True) + parser.add_argument("-b", + "--bucket", + help="Name of the GCS bucket", + default=GCS_BUCKET_NAME) + args = parser.parse_args() - # Ensure the directory exists - if not os.path.isdir(args.directory): - raise ValueError(f"The specified directory does not exist: {args.directory}") + # Ensure the directory exists + if not os.path.isdir(args.directory): + raise ValueError( + f"The specified directory does not exist: {args.directory}") - # Upload the directory to GCS - upload_directory_to_gcs(args.directory, args.bucket, CGS_RESULTS_DIR) \ No newline at end of file + # Upload the directory to GCS + upload_directory_to_gcs(args.directory, args.bucket, CGS_RESULTS_DIR) diff --git a/common/cloud_builder.py b/common/cloud_builder.py index e9a1a157ad..a7a8e68a0c 100644 --- a/common/cloud_builder.py +++ b/common/cloud_builder.py @@ -250,10 +250,8 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, 'allowFailure': True, }, { - 'name': - 'gcr.io/cloud-builders/gsutil', - 'entrypoint': - 'bash', + 'name': 'gcr.io/cloud-builders/gsutil', + 'entrypoint': 'bash', 'args': [ '-c', f'gsutil cp {experiment_url} /tmp/ofg-exp.tar.gz && ' f'mkdir /workspace/host/{experiment_path} && ' @@ -487,8 +485,7 @@ def run(self, agent: BaseAgent, result_history: list[Result], if experiment_url: logging.info('Uploaded experiment to %s', experiment_url) else: - logging.error('Experiment path %s empty or invalid.', - experiment_path) + logging.error('Experiment path %s empty or invalid.', experiment_path) oss_fuzz_data_url = self._upload_oss_fuzz_data() data_dir_url = self._upload_fi_oss_fuzz_data() diff --git a/experiment/workdir.py b/experiment/workdir.py index 8e4e15780d..e248c51da6 100644 --- a/experiment/workdir.py +++ b/experiment/workdir.py @@ -26,7 +26,10 @@ class WorkDirs: RUN_LOG_NAME_PATTERN = re.compile(r'.*-F(\d+).log') - def __init__(self, base_dir, keep: bool = False, create_children_dirs: bool = True): + def __init__(self, + base_dir, + keep: bool = False, + create_children_dirs: bool = True): self._base_dir = os.path.realpath(base_dir) if os.path.exists(self._base_dir) and not keep: # Clear existing directory. diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 96e01e95bd..ed75e7744b 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -602,7 +602,7 @@ def build(self, final_problem += self.format_context(project_context_content) if function_requirements: final_problem += (f'\nHere are the requirements for the function:\n' - f'{function_requirements}\n') + f'{function_requirements}\n') self._prepare_prompt(priming, final_problem, example_pair, project_example_content) self._prompt.append(tool_guides, True) @@ -881,21 +881,21 @@ def build_prompt(self) -> prompts.Prompt: self.benchmark.project, self.benchmark.function_signature) if not func_source: - logger.error( - 'No function source found for project: %s, function: %s', - self.benchmark.project, self.benchmark.function_signature) + logger.error('No function source found for project: %s, function: %s', + self.benchmark.project, self.benchmark.function_signature) return prompts.TextPrompt() prompt = prompt.replace('{FUNCTION_SOURCE}', func_source) # Get the function's references - xrefs = introspector.query_introspector_cross_references(self.benchmark.project, - self.benchmark.function_signature) + xrefs = introspector.query_introspector_cross_references( + self.benchmark.project, self.benchmark.function_signature) if not xrefs: - logger.error( - 'No cross references found for project: %s, function: %s', - self.benchmark.project, self.benchmark.function_signature) - prompt = prompt.replace('\n{FUNCTION_REFERENCES}\n}', '') + logger.error('No cross references found for project: %s, function: %s', + self.benchmark.project, self.benchmark.function_signature) + prompt = prompt.replace( + '\n{FUNCTION_REFERENCES}\n}', + '') else: references = [f"\n{xref}\n" for xref in xrefs] references_str = '\n'.join(references) diff --git a/results.py b/results.py index cfac3d8a46..659425b58a 100644 --- a/results.py +++ b/results.py @@ -34,16 +34,17 @@ class Result: _repr_exclude = {'_repr_exclude', 'chat_history'} function_analysis: Optional['FunctionAnalysisResult'] - def __init__(self, - benchmark: Benchmark, - trial: int, - work_dirs: WorkDirs, - fuzz_target_source: str = '', - build_script_source: str = '', - author: Any = None, - chat_history: Optional[dict] = None, - default_success: bool = False, - function_analysis: Optional['FunctionAnalysisResult'] = None) -> None: + def __init__( + self, + benchmark: Benchmark, + trial: int, + work_dirs: WorkDirs, + fuzz_target_source: str = '', + build_script_source: str = '', + author: Any = None, + chat_history: Optional[dict] = None, + default_success: bool = False, + function_analysis: Optional['FunctionAnalysisResult'] = None) -> None: self.benchmark = benchmark self.trial = trial self.work_dirs = work_dirs @@ -741,10 +742,9 @@ def textcov_diff(self) -> textcov.Textcov: return all_textcov - class FunctionAnalysisResult: """The result of the function analyzer.""" function_analysis_path: str def __init__(self, function_analysis_path: str): - self.function_analysis_path = function_analysis_path \ No newline at end of file + self.function_analysis_path = function_analysis_path diff --git a/tool/fuzz_introspector_tool.py b/tool/fuzz_introspector_tool.py index fe950abc4b..0a8c26ec24 100644 --- a/tool/fuzz_introspector_tool.py +++ b/tool/fuzz_introspector_tool.py @@ -105,7 +105,7 @@ def function_source_with_name(self, project_name: str, project_name) functions_list = introspector.query_introspector_all_functions( project_name) - + if functions_list: self.project_functions = { func["debug_summary"]["name"]: func From 800b6017f9799bba421bd4b2c1046ace6729c52c Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Thu, 5 Jun 2025 15:01:35 +0000 Subject: [PATCH 31/64] Remove unneeded file. --- agent_tests/upload_analysis_result.py | 73 --------------------------- 1 file changed, 73 deletions(-) delete mode 100644 agent_tests/upload_analysis_result.py diff --git a/agent_tests/upload_analysis_result.py b/agent_tests/upload_analysis_result.py deleted file mode 100644 index 865006251b..0000000000 --- a/agent_tests/upload_analysis_result.py +++ /dev/null @@ -1,73 +0,0 @@ -import argparse -import os - -from google.cloud import storage - -GCS_BUCKET_NAME = 'pamusuo-tests' - -CGS_RESULTS_DIR = "Function-analysis-results" - - -def upload_directory_to_gcs(local_folder_path, - bucket_name, - destination_blob_prefix=""): - """ - Uploads all .txt files from a local folder to a Google Cloud Storage bucket. - - Args: - local_folder_path (str): The path to the local folder containing the .txt files. - bucket_name (str): The name of your Google Cloud Storage bucket. - destination_blob_prefix (str): An optional prefix for the blob names in GCS. - Useful for organizing files within the bucket. - e.g., "my_text_files/" - """ - storage_client = storage.Client() - bucket = storage_client.bucket(bucket_name) - - print(f"Starting upload from local folder: {local_folder_path}") - - for root, _, files in os.walk(local_folder_path): - for file_name in files: - if file_name.endswith(".txt"): - local_file_path = os.path.join(root, file_name) - - # Construct the blob path in GCS - # This ensures the folder structure is maintained if needed - # For simplicity, we'll just put all files directly under the prefix - # If you want to maintain subdirectories, you'd adjust this. - relative_path = os.path.relpath(local_file_path, local_folder_path) - destination_blob_name = os.path.join( - destination_blob_prefix, relative_path).replace( - "\\", "/") # Replace backslashes for Linux/GCS compatibility - - blob = bucket.blob(destination_blob_name) - - try: - blob.upload_from_filename(local_file_path) - print( - f"Uploaded {local_file_path} to gs://{bucket_name}/{destination_blob_name}" - ) - except Exception as e: - print(f"Error uploading {local_file_path}: {e}") - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Upload a directory to a Google Cloud Storage bucket.") - parser.add_argument("-d", - "--directory", - help="Path to the directory to upload", - required=True) - parser.add_argument("-b", - "--bucket", - help="Name of the GCS bucket", - default=GCS_BUCKET_NAME) - args = parser.parse_args() - - # Ensure the directory exists - if not os.path.isdir(args.directory): - raise ValueError( - f"The specified directory does not exist: {args.directory}") - - # Upload the directory to GCS - upload_directory_to_gcs(args.directory, args.bucket, CGS_RESULTS_DIR) From e10870112c74eceb61af6116559b06d05a494f26 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Thu, 5 Jun 2025 15:03:37 +0000 Subject: [PATCH 32/64] Removed redundant files --- ...er__Z10encode_ise12quant_methodjPKhPhj.txt | 26 ------------------- ...criptorRK25symbolic_compressed_blockPh.txt | 20 -------------- 2 files changed, 46 deletions(-) delete mode 100644 function-analyzer-result-sample/astc-encoder__Z10encode_ise12quant_methodjPKhPhj.txt delete mode 100644 function-analyzer-result-sample/astc-encoder__Z20symbolic_to_physicalRK21block_size_descriptorRK25symbolic_compressed_blockPh.txt diff --git a/function-analyzer-result-sample/astc-encoder__Z10encode_ise12quant_methodjPKhPhj.txt b/function-analyzer-result-sample/astc-encoder__Z10encode_ise12quant_methodjPKhPhj.txt deleted file mode 100644 index 8e4b4d0a32..0000000000 --- a/function-analyzer-result-sample/astc-encoder__Z10encode_ise12quant_methodjPKhPhj.txt +++ /dev/null @@ -1,26 +0,0 @@ - -project name: astc-encoder -function signature: void encode_ise(DW_TAG_enumeration_typequant_method, unsigned int, const uint8_t *, uint8_t *, unsigned int) - - - -The function `encode_ise` encodes input data based on the specified quantization level and writes the encoded bits to the output data buffer. It handles different encoding schemes based on the number of trits or quints associated with the quantization level, or simply writes out the raw bits if neither trits nor quints are used. - - - - -`character_count` must be greater than 0. This is enforced by the `promise` statement. - - -`input_data` must be a valid pointer. This pointer is dereferenced to read input values. - - -`output_data` must be a valid pointer. This pointer is dereferenced in `write_bits` to write output values. - - -The size of `input_data` should be at least `character_count` bytes. Otherwise, there will be an out-of-bounds read in the loops. - - -The size of `output_data` should be large enough to hold the encoded data, based on the `character_count`, `quant_level` and `bit_offset`. Otherwise, there will be an out-of-bounds write in `write_bits`. - - \ No newline at end of file diff --git a/function-analyzer-result-sample/astc-encoder__Z20symbolic_to_physicalRK21block_size_descriptorRK25symbolic_compressed_blockPh.txt b/function-analyzer-result-sample/astc-encoder__Z20symbolic_to_physicalRK21block_size_descriptorRK25symbolic_compressed_blockPh.txt deleted file mode 100644 index 0c60c1e81f..0000000000 --- a/function-analyzer-result-sample/astc-encoder__Z20symbolic_to_physicalRK21block_size_descriptorRK25symbolic_compressed_blockPh.txt +++ /dev/null @@ -1,20 +0,0 @@ - -project name: astc-encoder -function signature: void symbolic_to_physical(const struct block_size_descriptor &, const struct symbolic_compressed_block &, uint8_t *) - - - -The function `symbolic_to_physical` converts a symbolic compressed block representation to a physical compressed block representation. It handles different block types and encodes various parameters into the physical block. - - - - -`scb.block_type` must not be equal to `SYM_BTYPE_ERROR` to satisfy the assertion at the beginning of the function. - - -`scb.quant_mode` must be greater than or equal to `QUANT_6` because it is used as an index into the `color_uquant_to_scrambled_pquant_tables` array. - - -For each `i` in the range `[0, scb.partition_count)`, `scb.color_values[i][j]` must be a valid index into `pack_table` (i.e., less than the size of `pack_table`) within the nested loop where `j` ranges from `0` to `2 * (scb.color_formats[i] >> 2) + 2`, and `scb.partition_count` should result in `vals` being less than or equal to 8 due to the assert statement. This prevents out-of-bounds access to `pack_table`. - - \ No newline at end of file From 53342aa083681b6320d1c7adee8c61b87d0d71cd Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Thu, 5 Jun 2025 18:33:26 +0000 Subject: [PATCH 33/64] Integrated the function analyzer agent to the writing stage. --- agent/function_analyzer.py | 17 ++++++++--------- agent_tests/function_analyzer_test.py | 5 +---- common/cloud_builder.py | 9 +++++++++ run_one_experiment.py | 3 +++ stage/writing_stage.py | 12 +++++++++--- 5 files changed, 30 insertions(+), 16 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index aa47556f14..677fc9930e 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -46,30 +46,29 @@ def __init__(self, trial: int, llm: models.LLM, args: argparse.Namespace, + benchmark: benchmarklib.Benchmark, tools: Optional[list[base_tool.BaseTool]] = None, name: str = ''): # Ensure the llm is an instance of VertexAIModel + # TODO (pamusuo): Provide support for other LLM models if not isinstance(llm, models.VertexAIModel): raise ValueError( "FunctionAnalyzer agent requires a VertexAIModel instance for llm.") - self.vertex_ai_model = llm._vertex_ai_model - super().__init__(trial, llm, args, tools, name) - def initialize(self, benchmark: benchmarklib.Benchmark): - """Initialize the function analyzer agent with the given benchmark.""" - + self.vertex_ai_model = llm._vertex_ai_model self.benchmark = benchmark - # Initialize the prompt builder - builder = prompt_builder.FunctionAnalyzerTemplateBuilder( - self.llm, self.benchmark) + self.initialize() + + def initialize(self): + """Initialize the function analyzer agent with the given benchmark.""" # Initialize the Fuzz Introspector tool introspector_tool = fuzz_introspector_tool.FuzzIntrospectorTool( - benchmark, self.name) + self.benchmark, self.name) # Create the agent using the ADK library # TODO(pamusuo): Create another AdkBaseAgent that extends BaseAgent and initializes an ADK agent as well. diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py index 1a580fc12e..6b4500b56e 100644 --- a/agent_tests/function_analyzer_test.py +++ b/agent_tests/function_analyzer_test.py @@ -97,10 +97,7 @@ def analyze_benchmark(benchmark: benchmarklib.Benchmark, model: models.LLM, """Analyzes the benchmark using the function analyzer.""" # Initialize the function analyzer - analyzer = function_analyzer.FunctionAnalyzer(trial=1, llm=model, args=args) - - # Initialize the function analyzer with the first benchmark - analyzer.initialize(benchmark) + analyzer = function_analyzer.FunctionAnalyzer(trial=1, llm=model, args=args, benchmark=benchmark) # Run the function analyzer try: diff --git a/common/cloud_builder.py b/common/cloud_builder.py index a7a8e68a0c..77c3e7d4ff 100644 --- a/common/cloud_builder.py +++ b/common/cloud_builder.py @@ -338,6 +338,15 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, '-e', 'VERTEX_AI_LOCATIONS=' + os.getenv("VERTEX_AI_LOCATIONS", ""), + '-e', + 'GOOGLE_GENAI_USE_VERTEXAI=' + + os.getenv("GOOGLE_GENAI_USE_VERTEXAI", "TRUE"), + '-e', + 'GOOGLE_CLOUD_PROJECT=' + + os.getenv("GOOGLE_CLOUD_PROJECT", "oss-fuzz"), + '-e', + 'GOOGLE_CLOUD_LOCATION=' + + os.getenv("GOOGLE_CLOUD_LOCATION", "global"), '--network=cloudbuild', # Built from this repo's `Dockerfile.cloudbuild-agent`. ('us-central1-docker.pkg.dev/oss-fuzz/oss-fuzz-gen/' diff --git a/run_one_experiment.py b/run_one_experiment.py index cb1e9a0464..9f5d5308b4 100644 --- a/run_one_experiment.py +++ b/run_one_experiment.py @@ -27,6 +27,7 @@ from agent.coverage_analyzer import CoverageAnalyzer from agent.crash_analyzer import CrashAnalyzer from agent.enhancer import Enhancer +from agent.function_analyzer import FunctionAnalyzer from agent.one_prompt_enhancer import OnePromptEnhancer from agent.one_prompt_prototyper import OnePromptPrototyper from agent.prototyper import Prototyper @@ -247,6 +248,7 @@ def _fuzzing_pipeline(benchmark: Benchmark, model: models.LLM, p = pipeline.Pipeline(args=args, trial=trial, writing_stage_agents=[ + FunctionAnalyzer(trial=trial, llm=model, args=args, benchmark=benchmark), Prototyper(trial=trial, llm=model, args=args), Enhancer(trial=trial, llm=model, args=args), ], @@ -263,6 +265,7 @@ def _fuzzing_pipeline(benchmark: Benchmark, model: models.LLM, p = pipeline.Pipeline(args=args, trial=trial, writing_stage_agents=[ + FunctionAnalyzer(trial=trial, llm=model, args=args, benchmark=benchmark), OnePromptPrototyper(trial=trial, llm=model, args=args), diff --git a/stage/writing_stage.py b/stage/writing_stage.py index 1c24192e3a..c2d7dd8410 100644 --- a/stage/writing_stage.py +++ b/stage/writing_stage.py @@ -47,10 +47,16 @@ def _refine_given_fuzz_targets(self, result_history: list[Result]) -> Result: def execute(self, result_history: list[Result]) -> Result: """Executes the writing stage.""" if result_history and result_history[-1].fuzz_target_source: - agent = self.get_agent(index=1) + # Execute the Enhancer agent + agent = self.get_agent(index=2) else: - # TODO(pamusuo): Call the function analyzer agent at this point (temporary implementation). - agent = self.get_agent() + # First, execute the FunctionAnalyzer agent + agent = self.get_agent(index=0) + agent_result = self._execute_agent(agent, result_history) + result_history.append(agent_result) + + # Then, execute the Prototyper agent + agent = self.get_agent(index=1) agent_result = self._execute_agent(agent, result_history) build_result = cast(BuildResult, agent_result) From be8ff8fddeab7507cd6c9145d49db3aea6adb2dc Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Thu, 5 Jun 2025 18:36:19 +0000 Subject: [PATCH 34/64] Presubmit fixes --- agent_tests/function_analyzer_test.py | 5 ++++- run_one_experiment.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py index 6b4500b56e..2fbfb9223c 100644 --- a/agent_tests/function_analyzer_test.py +++ b/agent_tests/function_analyzer_test.py @@ -97,7 +97,10 @@ def analyze_benchmark(benchmark: benchmarklib.Benchmark, model: models.LLM, """Analyzes the benchmark using the function analyzer.""" # Initialize the function analyzer - analyzer = function_analyzer.FunctionAnalyzer(trial=1, llm=model, args=args, benchmark=benchmark) + analyzer = function_analyzer.FunctionAnalyzer(trial=1, + llm=model, + args=args, + benchmark=benchmark) # Run the function analyzer try: diff --git a/run_one_experiment.py b/run_one_experiment.py index 9f5d5308b4..11aefaef88 100644 --- a/run_one_experiment.py +++ b/run_one_experiment.py @@ -248,7 +248,10 @@ def _fuzzing_pipeline(benchmark: Benchmark, model: models.LLM, p = pipeline.Pipeline(args=args, trial=trial, writing_stage_agents=[ - FunctionAnalyzer(trial=trial, llm=model, args=args, benchmark=benchmark), + FunctionAnalyzer(trial=trial, + llm=model, + args=args, + benchmark=benchmark), Prototyper(trial=trial, llm=model, args=args), Enhancer(trial=trial, llm=model, args=args), ], @@ -265,7 +268,10 @@ def _fuzzing_pipeline(benchmark: Benchmark, model: models.LLM, p = pipeline.Pipeline(args=args, trial=trial, writing_stage_agents=[ - FunctionAnalyzer(trial=trial, llm=model, args=args, benchmark=benchmark), + FunctionAnalyzer(trial=trial, + llm=model, + args=args, + benchmark=benchmark), OnePromptPrototyper(trial=trial, llm=model, args=args), From 385a33286ef1b2b9388d054bd68427e56d7b066a Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Thu, 5 Jun 2025 19:59:57 +0000 Subject: [PATCH 35/64] Fixed bug in cloud_builder --- common/cloud_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/cloud_builder.py b/common/cloud_builder.py index 77c3e7d4ff..5a3b73bc21 100644 --- a/common/cloud_builder.py +++ b/common/cloud_builder.py @@ -128,7 +128,7 @@ def _upload_directory(self, directory_path: str) -> str: return '' files_to_upload = list( - os.path.relpath(os.path.join(root, file)) + os.path.relpath(os.path.join(root, file), directory_path) for root, _, files in os.walk(directory_path) for file in files) From e9a14ef4c300ab5a3301b779f92515ce61bcee0f Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Fri, 6 Jun 2025 14:56:46 +0000 Subject: [PATCH 36/64] Fixed an error in cloud_builder that caused incorrect experiment path. --- agent/function_analyzer.py | 19 +++++++------- benchmark-sets/quick-test/astc-encoder.yaml | 29 +++++++++++++++++++++ common/cloud_builder.py | 6 ++--- 3 files changed, 41 insertions(+), 13 deletions(-) create mode 100644 benchmark-sets/quick-test/astc-encoder.yaml diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 677fc9930e..331a0e99a1 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -157,13 +157,20 @@ def execute(self, WorkDirs(self.args.work_dirs.base, keep=True) + result = resultslib.Result( + benchmark=self.benchmark, + trial=self.trial, + work_dirs=self.args.work_dirs, + ) + # Call the agent asynchronously and return the result prompt = self._initial_prompt(result_history) query = prompt.gettext() # Validate query is not empty if not query.strip(): - raise ValueError("Query is empty. Cannot call the agent.") + logger.error("Error occurred while building initial prompt. Cannot call the agent.") + return result user_id = "user" session_id = f"session_{self.trial}" @@ -174,15 +181,7 @@ def execute(self, # Write the requirements to a file requirement_path = self.write_requirements_to_file(self.args, result_str) function_analysis = resultslib.FunctionAnalysisResult(requirement_path) - else: - function_analysis = None - - result = resultslib.Result( - benchmark=self.benchmark, - trial=self.trial, - work_dirs=self.args.work_dirs, - function_analysis=function_analysis, - ) + result.function_analysis = function_analysis return result diff --git a/benchmark-sets/quick-test/astc-encoder.yaml b/benchmark-sets/quick-test/astc-encoder.yaml new file mode 100644 index 0000000000..c7ffe64ab9 --- /dev/null +++ b/benchmark-sets/quick-test/astc-encoder.yaml @@ -0,0 +1,29 @@ +"functions": +- "name": "_Z10encode_ise12quant_methodjPKhPhj" + "params": + - "name": "quant_level" + "type": "int" + - "name": "character_count" + "type": "int" + - "name": "input_data" + "type": "bool " + - "name": "output_data" + "type": "bool " + - "name": "bit_offset" + "type": "int" + "return_type": "void" + "signature": "void encode_ise(DW_TAG_enumeration_typequant_method, unsigned int, const uint8_t *, uint8_t *, unsigned int)" +- "name": "_Z20symbolic_to_physicalRK21block_size_descriptorRK25symbolic_compressed_blockPh" + "params": + - "name": "bsd" + "type": "bool " + - "name": "scb" + "type": "bool " + - "name": "pcb" + "type": "bool " + "return_type": "void" + "signature": "void symbolic_to_physical(const struct block_size_descriptor &, const struct symbolic_compressed_block &, uint8_t *)" +"language": "c++" +"project": "astc-encoder" +"target_name": "fuzz_astc_physical_to_symbolic" +"target_path": "/src/astc-encoder/Source/Fuzzers/fuzz_astc_physical_to_symbolic.cpp" diff --git a/common/cloud_builder.py b/common/cloud_builder.py index 5a3b73bc21..817f2f7efa 100644 --- a/common/cloud_builder.py +++ b/common/cloud_builder.py @@ -254,8 +254,8 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, 'entrypoint': 'bash', 'args': [ '-c', f'gsutil cp {experiment_url} /tmp/ofg-exp.tar.gz && ' - f'mkdir /workspace/host/{experiment_path} && ' - f'tar -xzf /tmp/ofg-exp.tar.gz -C /workspace/host/{experiment_path}' + f'mkdir {os.path.join('/workspace/host/', experiment_path)} && ' + f'tar -xzf /tmp/ofg-exp.tar.gz -C {os.path.join('/workspace/host/', experiment_path)}' ], 'allowFailure': True, }, @@ -396,7 +396,7 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, body=cloud_build_config).execute() build_id = build_info.get('metadata', {}).get('build', {}).get('id', '') - logging.info('Created Cloud Build ID %s at %s', build_id, REGION) + logging.info('Created Cloud Build ID %s for agent %s at %s', build_id, REGION) return build_id def _wait_for_build(self, build_id: str) -> str: From 0ec4675424ee656180c68deb9084336e91197e72 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Fri, 6 Jun 2025 15:06:27 +0000 Subject: [PATCH 37/64] Presubmit fixes --- agent/function_analyzer.py | 10 +++++++--- agent_tests/function_analyzer_test.py | 2 +- common/cloud_builder.py | 11 ++++++----- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 331a0e99a1..ef40cae0b7 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -71,7 +71,8 @@ def initialize(self): self.benchmark, self.name) # Create the agent using the ADK library - # TODO(pamusuo): Create another AdkBaseAgent that extends BaseAgent and initializes an ADK agent as well. + # TODO(pamusuo): Create another AdkBaseAgent that extends + # BaseAgent and initializes an ADK agent as well. function_analyzer = agents.LlmAgent( name="FunctionAnalyzer", model=self.vertex_ai_model, @@ -79,7 +80,8 @@ def initialize(self): from its source implementation.""", instruction= """You are a security engineer tasked with analyzing a function - and extracting its input requirements, necessary for it to execute correctly.""", + and extracting its input requirements, + necessary for it to execute correctly.""", tools=[introspector_tool.function_source_with_name], ) @@ -169,7 +171,9 @@ def execute(self, # Validate query is not empty if not query.strip(): - logger.error("Error occurred while building initial prompt. Cannot call the agent.") + logger.error( + "Error occurred while building initial prompt. Cannot call the agent." + ) return result user_id = "user" diff --git a/agent_tests/function_analyzer_test.py b/agent_tests/function_analyzer_test.py index 2fbfb9223c..b756a81bee 100644 --- a/agent_tests/function_analyzer_test.py +++ b/agent_tests/function_analyzer_test.py @@ -170,7 +170,7 @@ def analyze_benchmark(benchmark: benchmarklib.Benchmark, model: models.LLM, if result.get(): success_count += 1 except Exception as e: - logger.error(f"Error during analysis for benchmark %s: %s", + logger.error("Error during analysis for benchmark %s: %s", benchmark_id, e) print( diff --git a/common/cloud_builder.py b/common/cloud_builder.py index 817f2f7efa..90baad4ee2 100644 --- a/common/cloud_builder.py +++ b/common/cloud_builder.py @@ -132,7 +132,6 @@ def _upload_directory(self, directory_path: str) -> str: for root, _, files in os.walk(directory_path) for file in files) - # TODO(pamusuo): Check if directory_path is the right base directory to use, or OFG_ROOT_DIR? return self._upload_files(f'ofg-exp-{uuid.uuid4().hex}.tar.gz', directory_path, files_to_upload) @@ -214,6 +213,8 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, if data_dir_url: target_data_dir = '/workspace/data-dir' + workspace_exp_path = os.path.join('/workspace', 'host', experiment_path) + cloud_build_config = { 'steps': [ # Step 1: Download the dill, artifact and experiment files from GCS bucket. @@ -254,8 +255,8 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, 'entrypoint': 'bash', 'args': [ '-c', f'gsutil cp {experiment_url} /tmp/ofg-exp.tar.gz && ' - f'mkdir {os.path.join('/workspace/host/', experiment_path)} && ' - f'tar -xzf /tmp/ofg-exp.tar.gz -C {os.path.join('/workspace/host/', experiment_path)}' + f'mkdir {workspace_exp_path} && ' + f'tar -xzf /tmp/ofg-exp.tar.gz -C {workspace_exp_path}' ], 'allowFailure': True, }, @@ -396,7 +397,8 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, body=cloud_build_config).execute() build_id = build_info.get('metadata', {}).get('build', {}).get('id', '') - logging.info('Created Cloud Build ID %s for agent %s at %s', build_id, REGION) + logging.info('Created Cloud Build ID %s at %s', build_id, + REGION) return build_id def _wait_for_build(self, build_id: str) -> str: @@ -486,7 +488,6 @@ def run(self, agent: BaseAgent, result_history: list[Result], else: logging.error('No artifact_path found in RunResult.') - # TODO(pamusuo): Where should we get the experiment path from? self.exp_args.work_dirs.base experiment_path = result_history[-1].work_dirs.base experiment_url = '' if os.path.exists(experiment_path): From 3a60edce29150453ee8ff406c6cd4ec415985b12 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Fri, 6 Jun 2025 15:09:19 +0000 Subject: [PATCH 38/64] Presubmit fixes for cloud_builder.py --- common/cloud_builder.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/common/cloud_builder.py b/common/cloud_builder.py index 90baad4ee2..692c50f4ec 100644 --- a/common/cloud_builder.py +++ b/common/cloud_builder.py @@ -397,8 +397,7 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, body=cloud_build_config).execute() build_id = build_info.get('metadata', {}).get('build', {}).get('id', '') - logging.info('Created Cloud Build ID %s at %s', build_id, - REGION) + logging.info('Created Cloud Build ID %s at %s', build_id, REGION) return build_id def _wait_for_build(self, build_id: str) -> str: From dde533e4fe4a6e081fb17ca42ab9b65a57fbc8b7 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Fri, 6 Jun 2025 16:06:38 +0000 Subject: [PATCH 39/64] Refactored logging in function analyzer --- agent/function_analyzer.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index ef40cae0b7..fbed7310b4 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -19,13 +19,13 @@ import argparse import asyncio -import logging import os from typing import Optional from google.adk import agents, runners, sessions from google.genai import types +import logger import results as resultslib from agent import base_agent from experiment import benchmark as benchmarklib @@ -33,8 +33,6 @@ from llm_toolkit import models, prompt_builder, prompts from tool import base_tool, fuzz_introspector_tool -logger = logging.getLogger(__name__) - class FunctionAnalyzer(base_agent.BaseAgent): """An LLM agent to analyze a function and identify its implicit requirements. @@ -89,7 +87,7 @@ def initialize(self): session_service = sessions.InMemorySessionService() session_service.create_session( app_name=self.name, - user_id="user", + user_id=self.benchmark.id, session_id=f"session_{self.trial}", ) @@ -100,7 +98,9 @@ def initialize(self): session_service=session_service, ) - logger.info("Function Analyzer Agent created, with name: %s", self.name) + logger.info("Function Analyzer Agent created, with name: %s", + self.name, + trial=self.trial) async def call_agent(self, query: str, runner: runners.Runner, user_id: str, session_id: str) -> str: @@ -125,9 +125,9 @@ async def call_agent(self, query: str, runner: runners.Runner, user_id: str, result_available = True elif event.actions and event.actions.escalate: error_message = event.error_message - logger.error("Agent escalated: %s", error_message) + logger.error("Agent escalated: %s", error_message, trial=self.trial) - logger.info("<<< Agent response: %s", final_response_text) + logger.info("<<< Agent response: %s", final_response_text, trial=self.trial) if result_available and self._parse_tag(final_response_text, 'response'): # Get the requirements from the response @@ -140,7 +140,7 @@ async def call_agent(self, query: str, runner: runners.Runner, user_id: str, def write_requirements_to_file(self, args, requirements: str) -> str: """Write the requirements to a file.""" if not requirements: - logger.warning("No requirements to write to file.") + logger.warning("No requirements to write to file.", trial=self.trial) return '' requirement_path = os.path.join(args.work_dirs.requirements, @@ -149,7 +149,9 @@ def write_requirements_to_file(self, args, requirements: str) -> str: with open(requirement_path, 'w') as f: f.write(requirements) - logger.info("Requirements written to %s", requirement_path) + logger.info("Requirements written to %s", + requirement_path, + trial=self.trial) return requirement_path @@ -158,6 +160,7 @@ def execute(self, """Execute the agent with the given results.""" WorkDirs(self.args.work_dirs.base, keep=True) + logger.info('Executing %s', self.name, trial=self.trial) result = resultslib.Result( benchmark=self.benchmark, @@ -172,11 +175,13 @@ def execute(self, # Validate query is not empty if not query.strip(): logger.error( - "Error occurred while building initial prompt. Cannot call the agent." - ) + "Error occurred while building initial prompt. Cannot call the agent.", + trial=self.trial) return result - user_id = "user" + logger.info("Initial prompt created. Calling LLM...", trial=self.trial) + + user_id = self.benchmark.id session_id = f"session_{self.trial}" result_str = asyncio.run( self.call_agent(query, self.runner, user_id, session_id)) From d2d313778d3fd1eca9415bc3aa2204ec5a96b54d Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Fri, 6 Jun 2025 19:31:23 +0000 Subject: [PATCH 40/64] Ensure new experiment files from cloud experiment are saved after the agent finishes executing. --- common/cloud_builder.py | 62 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/common/cloud_builder.py b/common/cloud_builder.py index 692c50f4ec..3ddc2fe19f 100644 --- a/common/cloud_builder.py +++ b/common/cloud_builder.py @@ -198,7 +198,8 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, results_dill_url: str, artifact_url: str, artifact_path: str, oss_fuzz_data_url: str, data_dir_url: str, new_result_filename: str, - experiment_url: str, experiment_path: str) -> str: + experiment_url: str, experiment_path: str, + new_experiment_filename: str) -> str: """Requests Cloud Build to execute the operation.""" # Used for injecting additional OSS-Fuzz project integrations not in @@ -255,7 +256,7 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, 'entrypoint': 'bash', 'args': [ '-c', f'gsutil cp {experiment_url} /tmp/ofg-exp.tar.gz && ' - f'mkdir {workspace_exp_path} && ' + f'mkdir -p {workspace_exp_path} && ' f'tar -xzf /tmp/ofg-exp.tar.gz -C {workspace_exp_path}' ], 'allowFailure': True, @@ -378,7 +379,23 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, 'cp', '/workspace/dills/new_result.pkl', f'gs://{self.bucket_name}/{new_result_filename}' ] - } + }, + # Step 7: Upload the experiment directory to GCS bucket + { + 'name': 'bash', + 'dir': '/workspace', + 'args': ['ls', '-R', workspace_exp_path] + }, + { + 'name': 'gcr.io/cloud-builders/gsutil', + 'entrypoint': 'bash', + 'args': [ + '-c', f'test -d {workspace_exp_path} && ' + f'tar -czf /tmp/{new_experiment_filename} -C {workspace_exp_path} . && ' + f'gsutil cp /tmp/{new_experiment_filename} gs://{self.bucket_name}/{new_experiment_filename}' + ], + 'allowFailure': True, + }, ], 'tags': self.tags, 'timeout': '10800s', # 3 hours @@ -454,6 +471,37 @@ def _download_from_gcs(self, destination_file_name: str) -> None: blob.download_to_filename(destination_file_name) logging.info('Downloaded %s to %s', source_blob_name, destination_file_name) + def _update_experiment_directory(self, experiment_path: str, + new_experiment_url: str) -> None: + """Updates the experiment directory with new files from GCS.""" + if not os.path.exists(experiment_path): + logging.error('Experiment path %s does not exist.', experiment_path) + return + + # Download the new experiment archive. + temp_dest_path = f'/tmp/{os.path.basename(new_experiment_url)}' + self._download_from_gcs(temp_dest_path) + + tar_command = ['tar', '--skip-old-files', '-xzf', temp_dest_path, '-C', experiment_path] + logging.info('Tar command: %s', ' '.join(tar_command)) + + # Extract the archive into the experiment directory. + try: + result = subprocess.run(tar_command, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True) + logging.error("subprocess stdout:\n%s", result.stdout) + logging.error("subprocess stderr:\n%s", result.stderr) + except subprocess.CalledProcessError as e: + logging.error("Tar command failed with return code %d", e.returncode) + logging.error("stdout:\n%s", e.stdout) + logging.error("stderr:\n%s", e.stderr) + raise + logging.info('Updated experiment directory with new files from %s', + new_experiment_url) + def run(self, agent: BaseAgent, result_history: list[Result], dill_dir: str) -> Any: """Runs agent on cloud build.""" @@ -501,11 +549,12 @@ def run(self, agent: BaseAgent, result_history: list[Result], # Step 3: Request Cloud Build. new_result_filename = f'{uuid.uuid4().hex}.pkl' + new_experiment_filename = f'{uuid.uuid4().hex}.tar.gz' build_id = self._request_cloud_build(ofg_url, agent_url, results_url, artifact_url, artifact_path, oss_fuzz_data_url, data_dir_url, new_result_filename, experiment_url, - experiment_path) + experiment_path, new_experiment_filename) # Step 4: Download new result dill. cloud_build_log = '' @@ -537,4 +586,9 @@ def run(self, agent: BaseAgent, result_history: list[Result], author=agent) result.chat_history = {agent.name: cloud_build_log} + # Step 6: Update work directory with any new files created by the agent. + new_experiment_url = f'gs://{self.bucket_name}/{new_experiment_filename}' + self._update_experiment_directory(experiment_path, new_experiment_url) + return result + From 716ff40407cb856f249097cdba601a4ba552e4df Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Fri, 6 Jun 2025 19:37:58 +0000 Subject: [PATCH 41/64] Presubmit fixes --- common/cloud_builder.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/common/cloud_builder.py b/common/cloud_builder.py index 3ddc2fe19f..ceccb470e7 100644 --- a/common/cloud_builder.py +++ b/common/cloud_builder.py @@ -472,7 +472,7 @@ def _download_from_gcs(self, destination_file_name: str) -> None: logging.info('Downloaded %s to %s', source_blob_name, destination_file_name) def _update_experiment_directory(self, experiment_path: str, - new_experiment_url: str) -> None: + new_experiment_url: str) -> None: """Updates the experiment directory with new files from GCS.""" if not os.path.exists(experiment_path): logging.error('Experiment path %s does not exist.', experiment_path) @@ -482,16 +482,18 @@ def _update_experiment_directory(self, experiment_path: str, temp_dest_path = f'/tmp/{os.path.basename(new_experiment_url)}' self._download_from_gcs(temp_dest_path) - tar_command = ['tar', '--skip-old-files', '-xzf', temp_dest_path, '-C', experiment_path] + tar_command = [ + 'tar', '--skip-old-files', '-xzf', temp_dest_path, '-C', experiment_path + ] logging.info('Tar command: %s', ' '.join(tar_command)) # Extract the archive into the experiment directory. try: result = subprocess.run(tar_command, - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True) + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True) logging.error("subprocess stdout:\n%s", result.stdout) logging.error("subprocess stderr:\n%s", result.stderr) except subprocess.CalledProcessError as e: @@ -500,7 +502,7 @@ def _update_experiment_directory(self, experiment_path: str, logging.error("stderr:\n%s", e.stderr) raise logging.info('Updated experiment directory with new files from %s', - new_experiment_url) + new_experiment_url) def run(self, agent: BaseAgent, result_history: list[Result], dill_dir: str) -> Any: @@ -554,7 +556,8 @@ def run(self, agent: BaseAgent, result_history: list[Result], artifact_url, artifact_path, oss_fuzz_data_url, data_dir_url, new_result_filename, experiment_url, - experiment_path, new_experiment_filename) + experiment_path, + new_experiment_filename) # Step 4: Download new result dill. cloud_build_log = '' @@ -591,4 +594,3 @@ def run(self, agent: BaseAgent, result_history: list[Result], self._update_experiment_directory(experiment_path, new_experiment_url) return result - From 735c46e37a73cad0e1636a37dab0ff63643b2b55 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Fri, 6 Jun 2025 20:32:03 +0000 Subject: [PATCH 42/64] Fixed bug --- common/cloud_builder.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/common/cloud_builder.py b/common/cloud_builder.py index ceccb470e7..cdbcf7711b 100644 --- a/common/cloud_builder.py +++ b/common/cloud_builder.py @@ -214,8 +214,6 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, if data_dir_url: target_data_dir = '/workspace/data-dir' - workspace_exp_path = os.path.join('/workspace', 'host', experiment_path) - cloud_build_config = { 'steps': [ # Step 1: Download the dill, artifact and experiment files from GCS bucket. @@ -256,8 +254,8 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, 'entrypoint': 'bash', 'args': [ '-c', f'gsutil cp {experiment_url} /tmp/ofg-exp.tar.gz && ' - f'mkdir -p {workspace_exp_path} && ' - f'tar -xzf /tmp/ofg-exp.tar.gz -C {workspace_exp_path}' + f'mkdir -p /workspace/host/{experiment_path} && ' + f'tar -xzf /tmp/ofg-exp.tar.gz -C /workspace/host/{experiment_path}' ], 'allowFailure': True, }, @@ -384,15 +382,17 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, { 'name': 'bash', 'dir': '/workspace', - 'args': ['ls', '-R', workspace_exp_path] + 'args': ['ls', '-R', f'/workspace/host/{experiment_path}'] }, { 'name': 'gcr.io/cloud-builders/gsutil', 'entrypoint': 'bash', 'args': [ - '-c', f'test -d {workspace_exp_path} && ' - f'tar -czf /tmp/{new_experiment_filename} -C {workspace_exp_path} . && ' - f'gsutil cp /tmp/{new_experiment_filename} gs://{self.bucket_name}/{new_experiment_filename}' + '-c', f'test -d /workspace/host/{experiment_path} && ' + f'tar -czf /tmp/{new_experiment_filename} ' + f'-C /workspace/host/{experiment_path} . && ' + f'gsutil cp /tmp/{new_experiment_filename} ' + f'gs://{self.bucket_name}/{new_experiment_filename}' ], 'allowFailure': True, }, From 2108ded890d3df9ce1f6ecf06a7e07543b5d4607 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Fri, 6 Jun 2025 21:27:46 +0000 Subject: [PATCH 43/64] Integrated ProjectContainerTool and modified the prompt. --- agent/function_analyzer.py | 42 ++++++++++- .../agent/context-retriever-instruction.txt | 75 ------------------- prompts/agent/function-analyzer-priming.txt | 44 +++++++---- tool/fuzz_introspector_tool.py | 9 +-- 4 files changed, 73 insertions(+), 97 deletions(-) delete mode 100644 prompts/agent/context-retriever-instruction.txt diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index fbed7310b4..9189767236 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -31,7 +31,7 @@ from experiment import benchmark as benchmarklib from experiment.workdir import WorkDirs from llm_toolkit import models, prompt_builder, prompts -from tool import base_tool, fuzz_introspector_tool +from tool import base_tool, container_tool, fuzz_introspector_tool class FunctionAnalyzer(base_agent.BaseAgent): @@ -80,7 +80,10 @@ def initialize(self): """You are a security engineer tasked with analyzing a function and extracting its input requirements, necessary for it to execute correctly.""", - tools=[introspector_tool.function_source_with_name], + tools=[ + introspector_tool.get_function_implementation, + self.search_project_files + ], ) # Create the session service @@ -179,6 +182,10 @@ def execute(self, trial=self.trial) return result + # Initialize the ProjectContainerTool for local file search + self.inspect_tool = container_tool.ProjectContainerTool(self.benchmark) + self.inspect_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null') + logger.info("Initial prompt created. Calling LLM...", trial=self.trial) user_id = self.benchmark.id @@ -192,6 +199,8 @@ def execute(self, function_analysis = resultslib.FunctionAnalysisResult(requirement_path) result.function_analysis = function_analysis + self.inspect_tool.terminate() + return result def _initial_prompt( @@ -205,4 +214,33 @@ def _initial_prompt( prompt = builder.build_prompt() + prompt.append(self.inspect_tool.tutorial()) + return prompt + + def search_project_files(self, request: str) -> str: + """ + This function tool uses bash commands to search the project's source files, + and retrieve requested code snippets or file contents. + Args: + request (str): The bash command to execute and its justification, formatted using the and tags. + Returns: + str: The response from executing the bash commands, formatted using the , and tags. + """ + + logger.info('Received ProjectContainerTool request: %s', + request, + trial=self.trial) + + prompt = prompt_builder.DefaultTemplateBuilder(self.llm, None).build([]) + + if request: + prompt = self._container_handle_bash_commands(request, self.inspect_tool, + prompt) + + # Finally check invalid request. + if not request or not prompt.get(): + prompt = self._container_handle_invalid_tool_usage( + self.inspect_tool, 0, request, prompt) + + return prompt.get() diff --git a/prompts/agent/context-retriever-instruction.txt b/prompts/agent/context-retriever-instruction.txt deleted file mode 100644 index 33387eda0f..0000000000 --- a/prompts/agent/context-retriever-instruction.txt +++ /dev/null @@ -1,75 +0,0 @@ -You are a helpful agent. Your task is to use the provided tools to retrieve the source code implementations of a target function and those of the children functions it calls. - -You will be provided with two tools, _function_source_with_signature and _function_source_with_name. - -You MUST use these tools to get the requested function implementations. -DO NOT MAKE UP A FUNCTION BY YOURSELF! - -YOU MUST USE AT LEAST, ONE TOOL, WHEN YOU ARE CALLED. - -Here are the steps you should take to perform your task. - -Step 1: Get the implementation of the target function. - * Use the _function_source_with_signature tool to get the implementation of the target function. - * This tool takes as argument, a project name and a function signature. - * If successful, it returns the function source. If not, it returns an empty string. - -Step 2: Get the names of children functions from the target function. - * Analyze the target function and retrieve the names of other functions it calls (eg function()). - * Also retrieve the names of any class or struct functions it calls (eg struct.function()) - -Step 3: Get the implementations of the first-level children functions. - * For each called function retrieved in step 2, use the _function_source_with_name function to retrieve the implementation of the function. - * This tool takes as argument, a project name and a function name. - * If successful, it returns the function source. Else, it returns an empty string. - -Before you return a response, MAKE SURE you check that you used the provided tools to get the functions you are returning. - -You are to return your result using the following format. - - - - -Function's signature - - -The retrieved source code. - - - - - - -Function's name - - -The retrieved source code. - - - - - -Function's name - - -The retrieved source code. - - - -... - - - -Function's name - - -The retrieved source code. - - - - - - -You will be provided with the following tools. -1. _function_source_with_signature: Use this tool to retrieve the function's implementation in step 1, where you'll have the project's name and function's signature. -2. _function_source_with_name: Use this tool to retrieve the function's implementation in step 3, where you will only have the project's name and function's name. \ No newline at end of file diff --git a/prompts/agent/function-analyzer-priming.txt b/prompts/agent/function-analyzer-priming.txt index 6a289e8b15..2857ad7fe4 100644 --- a/prompts/agent/function-analyzer-priming.txt +++ b/prompts/agent/function-analyzer-priming.txt @@ -1,17 +1,30 @@ -You are a professional security engineer working on creating a valid fuzzing driver for the target function `{FUNCTION_SIGNATURE}` in the project {PROJECT_NAME}. -We will provide you with the implementation of the target function, implementations of other functions that reference the target function, and a set of tools that you can use to get additional function implementations and context information. - -Your goal is to analyze the provided functions and its usages, provide a clear detailed description of the function, and identify the important input requirements for the target function to execute correctly. - -The requirements we are interested in include the following: -5. WHat constraints on input arguments is necessary to prevent assertion failures, out-of-bound array indexing, null pointer dereferencing, invalid memory access, invalid string access, and other crashes. -1. What setup functions must be called before the target function? -2. What existing function in the project should we use to create valid inputs for the target function? -3. What inputs, or members of an input, should we initialize with random fuzz data? -4. What inputs must we initialize by calling another existing function? - -Keep your responses concise. Each requirement should contain two sentences. The first is the requirement. The second is a brief reason why it is important. + +You are a professional security engineer identifying the input requirements for the target function `{FUNCTION_SIGNATURE}` in the project {PROJECT_NAME}. +We will provide you with the implementation of the target function, implementations of functions that reference the target function, and a set of tools that you can use to get additional context information about the target function. +Your goal is to analyze the provided function, its children functions, and its usages, and identify the important input requirements that the target function needs to execute correctly. + + + + We are interested in only the following kinds of requirements. + - Input requirements that are necessary to prevent program crashes. + * Program crashes can be caused by assertion failures, invalid array indexing, out-of-bound memory accesses, pointer dereferencing failures. + - Requirements for creating valid input arguments. + * Here, you should mention what existing function or functions should be used to create a valid input argument. + * For example, if a function takes in an integer argument but uses that argument as a file descriptor for reading a fil (eg the read function), then it implies the integer must have been returned by another function that creates a file descriptor (eg the open function).Add commentMore actions + * Similarly, if a function takes in a character pointer and uses it like a file path or name, then this implies a valid file should be created and the path or name passed to this function. + * Also, if a function takes in a pointer argument and uses that argument as an argument to strlen or strcpy or other string handling function, this implies the function expects a null-terminated string. + - Relationship between inputs + * For example, this can be the relationship between a pointer and an integer argument representing its size. + - Input variables that should be fuzzed + * What input variables can be user-controlled or contain invalid values? + * For example, if a function parses or processes one of its input arguments, then that argument is fuzzable. + - Setup functions to call before the target function can be called. + * This is the function or set of functions we must call before calling the targte function. + * For example, if a function depends on a global variable which is set by another function, this may imply we need to call that function before the target function. + +Keep each requirement concise. Each requirement should contain two sentences. The first is the requirement. The second is a brief reason why it is important. + Here is the provided data. @@ -54,6 +67,7 @@ nth requirement +``` @@ -90,4 +104,6 @@ The third argument should be less than 16. This is to prevent an out-of-bound ar You will be provided with the following tools. -1. _function_source_with_name: Use this tool to retrieve the implementation of a function. You will invoke the tool using the project's name and function's name as arguments. \ No newline at end of file +1. get_function_implementation: This is a tool you can use to retrieve the implementation of a function using the project's name and function's name as arguments. +2. search_project_files: This is an interactive tool you can use to search the project's source file using bash commands and find definitions or usages of functions, classes, structs, and variables. + The usage guide for the Bash Tool is provided below. diff --git a/tool/fuzz_introspector_tool.py b/tool/fuzz_introspector_tool.py index 0a8c26ec24..85c2f24109 100644 --- a/tool/fuzz_introspector_tool.py +++ b/tool/fuzz_introspector_tool.py @@ -79,14 +79,11 @@ def function_source_with_signature(self, project_name: str, return function_code - def function_source_with_name(self, project_name: str, - function_name: str) -> str: + def get_function_implementation(self, project_name: str, + function_name: str) -> str: """ Retrieves a function's source from the fuzz introspector API, - using the project's name and function's name. - This function first retrieves the list of all - functions in the project, so it can get the function's signature. - Then it uses the function's signature to retrieve the source code. + using the project's name and function's name Args: project_name (str): The name of the project. From a7a700db8877da76e5697a214d7063fcefda9965 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Mon, 9 Jun 2025 15:06:41 +0000 Subject: [PATCH 44/64] Added additional benchmarks to quick-test --- benchmark-sets/quick-test/hoextdown.yaml | 17 +++++++++++++++++ benchmark-sets/quick-test/igraph.yaml | 21 +++++++++++++++++++++ benchmark-sets/quick-test/libsndfile.yaml | 15 +++++++++++++++ benchmark-sets/quick-test/xs.yaml | 19 +++++++++++++++++++ 4 files changed, 72 insertions(+) create mode 100644 benchmark-sets/quick-test/hoextdown.yaml create mode 100644 benchmark-sets/quick-test/igraph.yaml create mode 100644 benchmark-sets/quick-test/libsndfile.yaml create mode 100644 benchmark-sets/quick-test/xs.yaml diff --git a/benchmark-sets/quick-test/hoextdown.yaml b/benchmark-sets/quick-test/hoextdown.yaml new file mode 100644 index 0000000000..45609ac8e6 --- /dev/null +++ b/benchmark-sets/quick-test/hoextdown.yaml @@ -0,0 +1,17 @@ +"functions": +- "name": "hoedown_document_render_inline" + "params": + - "name": "doc" + "type": "bool " + - "name": "ob" + "type": "bool " + - "name": "data" + "type": "bool " + - "name": "size" + "type": "size_t" + "return_type": "void" + "signature": "void hoedown_document_render_inline(hoedown_document *, hoedown_buffer *, const uint8_t *, size_t)" +"language": "c++" +"project": "hoextdown" +"target_name": "hoedown_fuzzer" +"target_path": "/src/hoextdown/test/hoedown_fuzzer.c" diff --git a/benchmark-sets/quick-test/igraph.yaml b/benchmark-sets/quick-test/igraph.yaml new file mode 100644 index 0000000000..916f611ad0 --- /dev/null +++ b/benchmark-sets/quick-test/igraph.yaml @@ -0,0 +1,21 @@ +"functions": +- "name": "igraph_sparsemat_arpack_rssolve" + "params": + - "name": "A" + "type": "bool " + - "name": "options" + "type": "bool " + - "name": "storage" + "type": "bool " + - "name": "values" + "type": "bool " + - "name": "vectors" + "type": "bool " + - "name": "solvemethod" + "type": "int" + "return_type": "int" + "signature": "igraph_error_t igraph_sparsemat_arpack_rssolve(const igraph_sparsemat_t *, igraph_arpack_options_t *, igraph_arpack_storage_t *, igraph_vector_t *, igraph_matrix_t *, igraph_sparsemat_solve_t)" +"language": "c" +"project": "igraph" +"target_name": "read_graphdb" +"target_path": "/src/igraph/fuzzing/read_graphdb.cpp" diff --git a/benchmark-sets/quick-test/libsndfile.yaml b/benchmark-sets/quick-test/libsndfile.yaml new file mode 100644 index 0000000000..6c6127bb59 --- /dev/null +++ b/benchmark-sets/quick-test/libsndfile.yaml @@ -0,0 +1,15 @@ +"functions": +- "name": "sf_open" + "params": + - "name": "path" + "type": "bool " + - "name": "mode" + "type": "int" + - "name": "sfinfo" + "type": "bool " + "return_type": "void" + "signature": "SNDFILE * sf_open(const char *, int, SF_INFO *)" +"language": "c" +"project": "libsndfile" +"target_name": "sndfile_fuzzer" +"target_path": "/src/libsndfile/ossfuzz/sndfile_fuzzer.cc" diff --git a/benchmark-sets/quick-test/xs.yaml b/benchmark-sets/quick-test/xs.yaml new file mode 100644 index 0000000000..b4da287606 --- /dev/null +++ b/benchmark-sets/quick-test/xs.yaml @@ -0,0 +1,19 @@ +"functions": +- "name": "fxLoadModulesRejected" + "params": + - "name": "the" + "type": "bool " + "return_type": "void" + "signature": "void fxLoadModulesRejected(txMachine *)" +- "name": "fxAwaitImport" + "params": + - "name": "the" + "type": "bool " + - "name": "defaultFlag" + "type": "int" + "return_type": "void" + "signature": "void fxAwaitImport(txMachine *, txBoolean)" +"language": "c" +"project": "xs" +"target_name": "xst_jsonparse" +"target_path": "/src/moddable/xs/tools/xstFuzz.c" From 466ca80f13e8a6298e40b3b0aacf4945d8ddb44d Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 11 Jun 2025 18:15:04 +0000 Subject: [PATCH 45/64] Fix errors after pulling main --- agent/function_based_prototyper.py | 2 +- data_prep/introspector.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/agent/function_based_prototyper.py b/agent/function_based_prototyper.py index 78d72b34ae..b78bd06473 100644 --- a/agent/function_based_prototyper.py +++ b/agent/function_based_prototyper.py @@ -213,7 +213,7 @@ def _func_handle_get_all_functions_in_project(self, tool_call, args): """Handles getting all functions in the project.""" logger.info('Handling list_functions_in_project: %s', args, trial=-1) - functions = introspector.query_introspector_all_functions( + functions = introspector.query_introspector_all_signatures( self.benchmark.project) logger.info('Functions in project: %s', functions, trial=-1) # Extend messages to prepare for next iteration. diff --git a/data_prep/introspector.py b/data_prep/introspector.py index e7461bdcee..dfa2efcfbd 100755 --- a/data_prep/introspector.py +++ b/data_prep/introspector.py @@ -260,12 +260,16 @@ def query_introspector_for_harness_intrinsics( return _get_data(resp, 'pairs', []) -def query_introspector_all_functions(project: str) -> list[str]: +def query_introspector_all_functions(project: str) -> list[dict]: """Queries FuzzIntrospector API for all functions in a project.""" resp = _query_introspector(INTROSPECTOR_ALL_FUNCTIONS, { 'project': project, }) - functions = _get_data(resp, 'functions', []) + return _get_data(resp, 'functions', []) + +def query_introspector_all_signatures(project: str) -> list[str]: + """Queries FuzzIntrospector API for all functions in a project.""" + functions: list[dict] = query_introspector_all_functions(project) new_funcs = [] for func in functions: new_funcs.append(func['function_signature']) From 08263a01dc74438aceede5007103d2a155636132 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 11 Jun 2025 18:17:25 +0000 Subject: [PATCH 46/64] Presubmit changes --- data_prep/introspector.py | 1 + 1 file changed, 1 insertion(+) diff --git a/data_prep/introspector.py b/data_prep/introspector.py index dfa2efcfbd..20655a35be 100755 --- a/data_prep/introspector.py +++ b/data_prep/introspector.py @@ -267,6 +267,7 @@ def query_introspector_all_functions(project: str) -> list[dict]: }) return _get_data(resp, 'functions', []) + def query_introspector_all_signatures(project: str) -> list[str]: """Queries FuzzIntrospector API for all functions in a project.""" functions: list[dict] = query_introspector_all_functions(project) From f21a7ab592a89c1b95d1f4ee9727fc631846fbfa Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 11 Jun 2025 20:02:13 +0000 Subject: [PATCH 47/64] Created ADKBaseAgent so it contains reusable ADK logic. --- agent/base_agent.py | 88 +++++++++++++++++++++++++- agent/function_analyzer.py | 125 +++++++++---------------------------- 2 files changed, 115 insertions(+), 98 deletions(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index 97325be352..ca401e338a 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -24,13 +24,19 @@ import requests +from google.adk import agents, runners, sessions +from google.genai import types, errors + + import logger import utils from data_prep import introspector -from llm_toolkit.models import LLM +from experiment import benchmark as benchmarklib +from llm_toolkit.models import LLM, VertexAIModel from llm_toolkit.prompts import Prompt from results import Result from tool.base_tool import BaseTool +import asyncio class BaseAgent(ABC): @@ -295,6 +301,86 @@ def execute(self, result_history: list[Result]) -> Result: """Executes the agent based on previous result.""" +class ADKBaseAgent(BaseAgent): + + def __init__(self, + trial: int, + llm: LLM, + args: argparse.Namespace, + benchmark: benchmarklib.Benchmark, + description: str = '', + instruction: str = '', + tools: list = [], + name: str = ''): + + super().__init__(trial, llm, args, tools, name) + + self.benchmark = benchmark + + # For now, ADKBaseAgents only support the Vertex AI Models. + if not isinstance(llm, VertexAIModel): + raise ValueError( + f'{self.name} only supports Vertex AI models.') + + # Create the agent using the ADK library + adk_agent = agents.LlmAgent( + name=name, + model=llm._vertex_ai_model, + description=description, + instruction=instruction, + tools=tools, + ) + + # Create the session service + session_service = sessions.InMemorySessionService() + session_service.create_session( + app_name=self.name, + user_id=benchmark.id, + session_id=f"session_{self.trial}", + ) + + # Create the runner + self.runner = runners.Runner( + agent=adk_agent, + app_name=self.name, + session_service=session_service, + ) + + logger.info("ADK Agent %s created.", + self.name, + trial=self.trial) + + + def query_llm(self, query: str) -> str: + """Call the agent with the given query, running async code in sync.""" + + async def _call(): + user_id = self.benchmark.id + session_id = f"session_{self.trial}" + content = types.Content(role='user', parts=[types.Part(text=query)]) + + final_response_text = '' + + async for event in self.runner.run_async( + user_id=user_id, + session_id=session_id, + new_message=content, + ): + if event.is_final_response(): + if (event.content and event.content.parts and + event.content.parts[0].text): + final_response_text = event.content.parts[0].text + elif event.actions and event.actions.escalate: + error_message = event.error_message + logger.error("Agent escalated: %s", error_message, trial=self.trial) + + logger.info("<<< Agent response: %s", final_response_text, trial=self.trial) + return final_response_text + + return self.llm.with_retry_on_error( + lambda: asyncio.run(_call()), [errors.ClientError]) + + if __name__ == "__main__": # For cloud experiments. BaseAgent.cloud_main() diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index fbed7310b4..e59a99483e 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -34,7 +34,7 @@ from tool import base_tool, fuzz_introspector_tool -class FunctionAnalyzer(base_agent.BaseAgent): +class FunctionAnalyzer(base_agent.ADKBaseAgent): """An LLM agent to analyze a function and identify its implicit requirements. The results of this analysis will be used by the writer agents to generate correct fuzz target for the function. @@ -44,98 +44,25 @@ def __init__(self, trial: int, llm: models.LLM, args: argparse.Namespace, - benchmark: benchmarklib.Benchmark, - tools: Optional[list[base_tool.BaseTool]] = None, + benchmark: benchmarklib.Benchmark,\ name: str = ''): - # Ensure the llm is an instance of VertexAIModel - # TODO (pamusuo): Provide support for other LLM models - if not isinstance(llm, models.VertexAIModel): - raise ValueError( - "FunctionAnalyzer agent requires a VertexAIModel instance for llm.") + description=""" + Extracts a function's requirements + from its source implementation. + """ + instruction= """ + You are a security engineer tasked with analyzing a function + and extracting its input requirements, + necessary for it to execute correctly. + """ - super().__init__(trial, llm, args, tools, name) - - self.vertex_ai_model = llm._vertex_ai_model - self.benchmark = benchmark - - self.initialize() - - def initialize(self): - """Initialize the function analyzer agent with the given benchmark.""" - - # Initialize the Fuzz Introspector tool introspector_tool = fuzz_introspector_tool.FuzzIntrospectorTool( self.benchmark, self.name) + tools = [introspector_tool.function_source_with_name] - # Create the agent using the ADK library - # TODO(pamusuo): Create another AdkBaseAgent that extends - # BaseAgent and initializes an ADK agent as well. - function_analyzer = agents.LlmAgent( - name="FunctionAnalyzer", - model=self.vertex_ai_model, - description="""Extracts a function's requirements - from its source implementation.""", - instruction= - """You are a security engineer tasked with analyzing a function - and extracting its input requirements, - necessary for it to execute correctly.""", - tools=[introspector_tool.function_source_with_name], - ) - - # Create the session service - session_service = sessions.InMemorySessionService() - session_service.create_session( - app_name=self.name, - user_id=self.benchmark.id, - session_id=f"session_{self.trial}", - ) - - # Create the runner - self.runner = runners.Runner( - agent=function_analyzer, - app_name=self.name, - session_service=session_service, - ) - - logger.info("Function Analyzer Agent created, with name: %s", - self.name, - trial=self.trial) - - async def call_agent(self, query: str, runner: runners.Runner, user_id: str, - session_id: str) -> str: - """Call the agent asynchronously with the given query.""" - - content = types.Content(role='user', parts=[types.Part(text=query)]) - - final_response_text = '' - - result_available = False + super().__init__(trial, llm, args, benchmark, description, instruction, tools) - async for event in runner.run_async( - user_id=user_id, - session_id=session_id, - new_message=content, - ): - - if event.is_final_response(): - if (event.content and event.content.parts and - event.content.parts[0].text): - final_response_text = event.content.parts[0].text - result_available = True - elif event.actions and event.actions.escalate: - error_message = event.error_message - logger.error("Agent escalated: %s", error_message, trial=self.trial) - - logger.info("<<< Agent response: %s", final_response_text, trial=self.trial) - - if result_available and self._parse_tag(final_response_text, 'response'): - # Get the requirements from the response - result_str = self._parse_tag(final_response_text, 'response') - else: - result_str = '' - - return result_str def write_requirements_to_file(self, args, requirements: str) -> str: """Write the requirements to a file.""" @@ -155,6 +82,19 @@ def write_requirements_to_file(self, args, requirements: str) -> str: return requirement_path + def handle_llm_response( + self, final_response_text: str, + result: resultslib.Result) -> None: + """Handle the LLM response and update the result.""" + + result_str = self._parse_tag(final_response_text, 'response') + requirements = self._parse_tag(result_str, 'requirements') + if requirements: + # Write the requirements to a file + requirement_path = self.write_requirements_to_file(self.args, result_str) + function_analysis = resultslib.FunctionAnalysisResult(requirement_path) + result.function_analysis = function_analysis + def execute(self, result_history: list[resultslib.Result]) -> resultslib.Result: """Execute the agent with the given results.""" @@ -179,18 +119,9 @@ def execute(self, trial=self.trial) return result - logger.info("Initial prompt created. Calling LLM...", trial=self.trial) + final_response_text = self.query_llm(query) - user_id = self.benchmark.id - session_id = f"session_{self.trial}" - result_str = asyncio.run( - self.call_agent(query, self.runner, user_id, session_id)) - - if result_str: - # Write the requirements to a file - requirement_path = self.write_requirements_to_file(self.args, result_str) - function_analysis = resultslib.FunctionAnalysisResult(requirement_path) - result.function_analysis = function_analysis + self.handle_llm_response(final_response_text, result) return result From 3b38d8585d75f08c438ae34a0857ed369b9a4192 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 11 Jun 2025 20:50:39 +0000 Subject: [PATCH 48/64] Integrate results from function analyzer into web report --- agent/base_agent.py | 19 +++++++++++++++---- agent/function_analyzer.py | 15 ++++++--------- llm_toolkit/prompt_builder.py | 1 - stage/writing_stage.py | 1 + 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index ca401e338a..376fb1c40a 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -350,14 +350,20 @@ def __init__(self, self.name, trial=self.trial) + def chat_llm(self, cur_round: int, client: Any, prompt: Prompt, + trial: int) -> str: + """Call the agent with the given prompt, running async code in sync.""" - def query_llm(self, query: str) -> str: - """Call the agent with the given query, running async code in sync.""" + logger.info('%s', + cur_round, + prompt.gettext(), + cur_round, + trial=trial) async def _call(): user_id = self.benchmark.id session_id = f"session_{self.trial}" - content = types.Content(role='user', parts=[types.Part(text=query)]) + content = types.Content(role='user', parts=[types.Part(text=prompt.get())]) final_response_text = '' @@ -374,7 +380,12 @@ async def _call(): error_message = event.error_message logger.error("Agent escalated: %s", error_message, trial=self.trial) - logger.info("<<< Agent response: %s", final_response_text, trial=self.trial) + logger.info('%s', + cur_round, + final_response_text, + cur_round, + trial=trial) + return final_response_text return self.llm.with_retry_on_error( diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index e59a99483e..26db3ab4c4 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -108,18 +108,15 @@ def execute(self, work_dirs=self.args.work_dirs, ) + cur_round = 1 + # Call the agent asynchronously and return the result prompt = self._initial_prompt(result_history) - query = prompt.gettext() - - # Validate query is not empty - if not query.strip(): - logger.error( - "Error occurred while building initial prompt. Cannot call the agent.", - trial=self.trial) - return result - final_response_text = self.query_llm(query) + final_response_text = self.chat_llm(cur_round, + client=None, + prompt=prompt, + trial=result_history[-1].trial) self.handle_llm_response(final_response_text, result) diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index ed75e7744b..c10012ddb1 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -883,7 +883,6 @@ def build_prompt(self) -> prompts.Prompt: if not func_source: logger.error('No function source found for project: %s, function: %s', self.benchmark.project, self.benchmark.function_signature) - return prompts.TextPrompt() prompt = prompt.replace('{FUNCTION_SOURCE}', func_source) diff --git a/stage/writing_stage.py b/stage/writing_stage.py index c2d7dd8410..dacc0578cb 100644 --- a/stage/writing_stage.py +++ b/stage/writing_stage.py @@ -53,6 +53,7 @@ def execute(self, result_history: list[Result]) -> Result: # First, execute the FunctionAnalyzer agent agent = self.get_agent(index=0) agent_result = self._execute_agent(agent, result_history) + self.logger.write_chat_history(agent_result) result_history.append(agent_result) # Then, execute the Prototyper agent From da435e9978dbd61ae2aea10de6c5b04838b2c526 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 11 Jun 2025 20:56:33 +0000 Subject: [PATCH 49/64] Fixed presubmit errors --- agent/base_agent.py | 44 +++++++++++++++++--------------------- agent/function_analyzer.py | 19 ++++++++-------- 2 files changed, 29 insertions(+), 34 deletions(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index 376fb1c40a..2c24018958 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -13,6 +13,7 @@ # limitations under the License. """The abstract base class for LLM agents in stages.""" import argparse +import asyncio import os import random import re @@ -23,10 +24,8 @@ from typing import Any, Optional import requests - from google.adk import agents, runners, sessions -from google.genai import types, errors - +from google.genai import errors, types import logger import utils @@ -36,7 +35,6 @@ from llm_toolkit.prompts import Prompt from results import Result from tool.base_tool import BaseTool -import asyncio class BaseAgent(ABC): @@ -304,14 +302,14 @@ def execute(self, result_history: list[Result]) -> Result: class ADKBaseAgent(BaseAgent): def __init__(self, - trial: int, - llm: LLM, - args: argparse.Namespace, - benchmark: benchmarklib.Benchmark, - description: str = '', - instruction: str = '', - tools: list = [], - name: str = ''): + trial: int, + llm: LLM, + args: argparse.Namespace, + benchmark: benchmarklib.Benchmark, + description: str = '', + instruction: str = '', + tools: list = [], + name: str = ''): super().__init__(trial, llm, args, tools, name) @@ -319,8 +317,7 @@ def __init__(self, # For now, ADKBaseAgents only support the Vertex AI Models. if not isinstance(llm, VertexAIModel): - raise ValueError( - f'{self.name} only supports Vertex AI models.') + raise ValueError(f'{self.name} only supports Vertex AI models.') # Create the agent using the ADK library adk_agent = agents.LlmAgent( @@ -346,9 +343,7 @@ def __init__(self, session_service=session_service, ) - logger.info("ADK Agent %s created.", - self.name, - trial=self.trial) + logger.info("ADK Agent %s created.", self.name, trial=self.trial) def chat_llm(self, cur_round: int, client: Any, prompt: Prompt, trial: int) -> str: @@ -363,7 +358,8 @@ def chat_llm(self, cur_round: int, client: Any, prompt: Prompt, async def _call(): user_id = self.benchmark.id session_id = f"session_{self.trial}" - content = types.Content(role='user', parts=[types.Part(text=prompt.get())]) + content = types.Content(role='user', + parts=[types.Part(text=prompt.get())]) final_response_text = '' @@ -381,15 +377,15 @@ async def _call(): logger.error("Agent escalated: %s", error_message, trial=self.trial) logger.info('%s', - cur_round, - final_response_text, - cur_round, - trial=trial) + cur_round, + final_response_text, + cur_round, + trial=trial) return final_response_text - return self.llm.with_retry_on_error( - lambda: asyncio.run(_call()), [errors.ClientError]) + return self.llm.with_retry_on_error(lambda: asyncio.run(_call()), + [errors.ClientError]) if __name__ == "__main__": diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 26db3ab4c4..1bc27d994e 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -47,11 +47,11 @@ def __init__(self, benchmark: benchmarklib.Benchmark,\ name: str = ''): - description=""" + description = """ Extracts a function's requirements from its source implementation. """ - instruction= """ + instruction = """ You are a security engineer tasked with analyzing a function and extracting its input requirements, necessary for it to execute correctly. @@ -61,8 +61,8 @@ def __init__(self, self.benchmark, self.name) tools = [introspector_tool.function_source_with_name] - super().__init__(trial, llm, args, benchmark, description, instruction, tools) - + super().__init__(trial, llm, args, benchmark, description, instruction, + tools) def write_requirements_to_file(self, args, requirements: str) -> str: """Write the requirements to a file.""" @@ -82,9 +82,8 @@ def write_requirements_to_file(self, args, requirements: str) -> str: return requirement_path - def handle_llm_response( - self, final_response_text: str, - result: resultslib.Result) -> None: + def handle_llm_response(self, final_response_text: str, + result: resultslib.Result) -> None: """Handle the LLM response and update the result.""" result_str = self._parse_tag(final_response_text, 'response') @@ -114,9 +113,9 @@ def execute(self, prompt = self._initial_prompt(result_history) final_response_text = self.chat_llm(cur_round, - client=None, - prompt=prompt, - trial=result_history[-1].trial) + client=None, + prompt=prompt, + trial=result_history[-1].trial) self.handle_llm_response(final_response_text, result) From f767db923a4727ed70a089bd72762f67b9059f79 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 11 Jun 2025 21:05:20 +0000 Subject: [PATCH 50/64] Fix presubmit errors --- agent/function_analyzer.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 3f71a49c88..6705dacbf5 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -59,10 +59,9 @@ def __init__(self, introspector_tool = fuzz_introspector_tool.FuzzIntrospectorTool( self.benchmark, self.name) - tools=[ - introspector_tool.get_function_implementation, - self.search_project_files - ] + tools = [ + introspector_tool.get_function_implementation, self.search_project_files + ] super().__init__(trial, llm, args, benchmark, description, instruction, tools) From 831e5f2ab93f17a95b1affc3becee24a27312bc0 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 11 Jun 2025 21:28:23 +0000 Subject: [PATCH 51/64] Log tool interaction --- agent/base_agent.py | 8 +++- agent/function_analyzer.py | 81 +++++++++++++++++++++++++++++++++++--- 2 files changed, 82 insertions(+), 7 deletions(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index 2c24018958..da3dac1703 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -343,12 +343,16 @@ def __init__(self, session_service=session_service, ) + self.round = 0 + logger.info("ADK Agent %s created.", self.name, trial=self.trial) def chat_llm(self, cur_round: int, client: Any, prompt: Prompt, trial: int) -> str: """Call the agent with the given prompt, running async code in sync.""" + self.round = cur_round + logger.info('%s', cur_round, prompt.gettext(), @@ -377,9 +381,9 @@ async def _call(): logger.error("Agent escalated: %s", error_message, trial=self.trial) logger.info('%s', - cur_round, + self.round, final_response_text, - cur_round, + self.round, trial=trial) return final_response_text diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 6705dacbf5..2d8c73666f 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -28,6 +28,7 @@ import logger import results as resultslib from agent import base_agent +from data_prep import introspector from experiment import benchmark as benchmarklib from experiment.workdir import WorkDirs from llm_toolkit import models, prompt_builder, prompts @@ -57,10 +58,8 @@ def __init__(self, necessary for it to execute correctly. """ - introspector_tool = fuzz_introspector_tool.FuzzIntrospectorTool( - self.benchmark, self.name) tools = [ - introspector_tool.get_function_implementation, self.search_project_files + self.get_function_implementation, self.search_project_files ] super().__init__(trial, llm, args, benchmark, description, instruction, @@ -153,8 +152,10 @@ def search_project_files(self, request: str) -> str: str: The response from executing the bash commands, formatted using the , and tags. """ - logger.info('Received ProjectContainerTool request: %s', + logger.info('%s', + self.round, request, + self.round, trial=self.trial) prompt = prompt_builder.DefaultTemplateBuilder(self.llm, None).build([]) @@ -168,4 +169,74 @@ def search_project_files(self, request: str) -> str: prompt = self._container_handle_invalid_tool_usage( self.inspect_tool, 0, request, prompt) - return prompt.get() + response = prompt.get() + + # TODO(pamusuo): Move this logging to ADKBaseAgent. + self.round += 1 + logger.info('%s', + self.round, + response, + self.round, + trial=self.trial) + + return response + + + def get_function_implementation(self, project_name: str, + function_name: str) -> str: + """ + Retrieves a function's source from the fuzz introspector API, + using the project's name and function's name + + Args: + project_name (str): The name of the project. + function_name (str): The name of the function. + + Returns: + str: Source code of the function if found, otherwise an empty string. + """ + request = f""" + Retrieve the source code of the function '{function_name}' from the project '{project_name}'. + """ + + logger.info(' %s ', + self.round, + request, + self.round, + trial=self.trial) + + if self.project_functions is None: + logger.info( + "Project functions not initialized. Initializing for project '%s'.", + project_name, trial=self.trial) + functions_list = introspector.query_introspector_all_functions( + project_name) + + if functions_list: + self.project_functions = { + func["debug_summary"]["name"]: func + for func in functions_list + if "debug_summary" in func and "name" in func["debug_summary"] + } + else: + self.project_functions = None + + if (self.project_functions is None or + function_name not in self.project_functions): + logger.error("Error: Required function not found for project '%s'.", + project_name, trial=self.trial) + return "" + + function_signature = self.project_functions[function_name][ + "function_signature"] + + function_source = introspector.query_introspector_function_source(project_name, function_signature) + + self.round += 1 + logger.info('%s', + self.round, + function_source, + self.round, + trial=self.trial) + + return function_source \ No newline at end of file From 56a3698137633ac361e2b82a651c39fce30a3761 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 11 Jun 2025 21:32:36 +0000 Subject: [PATCH 52/64] Fixed presubmit errors --- agent/function_analyzer.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 2d8c73666f..a19a18763b 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -58,9 +58,7 @@ def __init__(self, necessary for it to execute correctly. """ - tools = [ - self.get_function_implementation, self.search_project_files - ] + tools = [self.get_function_implementation, self.search_project_files] super().__init__(trial, llm, args, benchmark, description, instruction, tools) @@ -174,14 +172,13 @@ def search_project_files(self, request: str) -> str: # TODO(pamusuo): Move this logging to ADKBaseAgent. self.round += 1 logger.info('%s', - self.round, - response, - self.round, - trial=self.trial) + self.round, + response, + self.round, + trial=self.trial) return response - def get_function_implementation(self, project_name: str, function_name: str) -> str: """ @@ -208,7 +205,8 @@ def get_function_implementation(self, project_name: str, if self.project_functions is None: logger.info( "Project functions not initialized. Initializing for project '%s'.", - project_name, trial=self.trial) + project_name, + trial=self.trial) functions_list = introspector.query_introspector_all_functions( project_name) @@ -224,19 +222,21 @@ def get_function_implementation(self, project_name: str, if (self.project_functions is None or function_name not in self.project_functions): logger.error("Error: Required function not found for project '%s'.", - project_name, trial=self.trial) + project_name, + trial=self.trial) return "" function_signature = self.project_functions[function_name][ "function_signature"] - function_source = introspector.query_introspector_function_source(project_name, function_signature) + function_source = introspector.query_introspector_function_source( + project_name, function_signature) self.round += 1 logger.info('%s', - self.round, - function_source, - self.round, - trial=self.trial) + self.round, + function_source, + self.round, + trial=self.trial) - return function_source \ No newline at end of file + return function_source From a8008cb013de2b7a647f83fafe3998169f7aac8f Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Thu, 12 Jun 2025 14:22:42 +0000 Subject: [PATCH 53/64] Fixed error in agent name --- agent/base_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index da3dac1703..4c9fbdd74c 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -321,7 +321,7 @@ def __init__(self, # Create the agent using the ADK library adk_agent = agents.LlmAgent( - name=name, + name=self.name, model=llm._vertex_ai_model, description=description, instruction=instruction, From 7aa44ce77f7d14fe423000dd49a75d41f0188226 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Thu, 12 Jun 2025 15:19:55 +0000 Subject: [PATCH 54/64] Fixed error in FunctionAnalyzer --- agent/function_analyzer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index a19a18763b..1a3603c6e1 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -108,12 +108,13 @@ def execute(self, cur_round = 1 - # Call the agent asynchronously and return the result - prompt = self._initial_prompt(result_history) # Initialize the ProjectContainerTool for local file search self.inspect_tool = container_tool.ProjectContainerTool(self.benchmark) self.inspect_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null') + # Call the agent asynchronously and return the result + prompt = self._initial_prompt(result_history) + final_response_text = self.chat_llm(cur_round, client=None, prompt=prompt, From 671b8c61b96855ae57dd4b507221aecccc3cf8db Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Thu, 12 Jun 2025 15:53:12 +0000 Subject: [PATCH 55/64] Fix error in FunctionAnalyzer --- agent/function_analyzer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 1a3603c6e1..b7fec08bf5 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -63,6 +63,8 @@ def __init__(self, super().__init__(trial, llm, args, benchmark, description, instruction, tools) + self.project_functions = None + def write_requirements_to_file(self, args, requirements: str) -> str: """Write the requirements to a file.""" if not requirements: From dd3517325b212463912fe44ccc1c91881bb10ef9 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Fri, 13 Jun 2025 15:41:58 +0000 Subject: [PATCH 56/64] Updated the way tool interactions, using the ADKBaseAgent, are logged. --- agent/base_agent.py | 28 ++++++++++++++++---------- agent/function_analyzer.py | 41 ++++++++++---------------------------- 2 files changed, 29 insertions(+), 40 deletions(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index 4c9fbdd74c..a774843913 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -353,11 +353,7 @@ def chat_llm(self, cur_round: int, client: Any, prompt: Prompt, self.round = cur_round - logger.info('%s', - cur_round, - prompt.gettext(), - cur_round, - trial=trial) + self.log_llm_prompt(prompt.get()) async def _call(): user_id = self.benchmark.id @@ -380,17 +376,29 @@ async def _call(): error_message = event.error_message logger.error("Agent escalated: %s", error_message, trial=self.trial) - logger.info('%s', - self.round, - final_response_text, - self.round, - trial=trial) + self.log_llm_response(final_response_text) return final_response_text return self.llm.with_retry_on_error(lambda: asyncio.run(_call()), [errors.ClientError]) + def log_llm_prompt(self, promt: str) -> None: + self.round += 1 + logger.info('%s', + self.round, + promt, + self.round, + trial=self.trial) + + def log_llm_response(self, response: str) -> None: + logger.info('%s', + self.round, + response, + self.round, + trial=self.trial) + + if __name__ == "__main__": # For cloud experiments. diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index b7fec08bf5..9a6034e612 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -108,8 +108,6 @@ def execute(self, work_dirs=self.args.work_dirs, ) - cur_round = 1 - # Initialize the ProjectContainerTool for local file search self.inspect_tool = container_tool.ProjectContainerTool(self.benchmark) self.inspect_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null') @@ -117,7 +115,7 @@ def execute(self, # Call the agent asynchronously and return the result prompt = self._initial_prompt(result_history) - final_response_text = self.chat_llm(cur_round, + final_response_text = self.chat_llm(self.round, client=None, prompt=prompt, trial=result_history[-1].trial) @@ -153,11 +151,7 @@ def search_project_files(self, request: str) -> str: str: The response from executing the bash commands, formatted using the , and tags. """ - logger.info('%s', - self.round, - request, - self.round, - trial=self.trial) + self.log_llm_response(request) prompt = prompt_builder.DefaultTemplateBuilder(self.llm, None).build([]) @@ -170,17 +164,11 @@ def search_project_files(self, request: str) -> str: prompt = self._container_handle_invalid_tool_usage( self.inspect_tool, 0, request, prompt) - response = prompt.get() + tool_response = prompt.get() - # TODO(pamusuo): Move this logging to ADKBaseAgent. - self.round += 1 - logger.info('%s', - self.round, - response, - self.round, - trial=self.trial) + self.log_llm_prompt(tool_response) - return response + return tool_response def get_function_implementation(self, project_name: str, function_name: str) -> str: @@ -196,14 +184,12 @@ def get_function_implementation(self, project_name: str, str: Source code of the function if found, otherwise an empty string. """ request = f""" - Retrieve the source code of the function '{function_name}' from the project '{project_name}'. - """ + Requesting implementation for the function: + Function name: {function_name} + Project name: {project_name} + """ - logger.info(' %s ', - self.round, - request, - self.round, - trial=self.trial) + self.log_llm_response(request) if self.project_functions is None: logger.info( @@ -235,11 +221,6 @@ def get_function_implementation(self, project_name: str, function_source = introspector.query_introspector_function_source( project_name, function_signature) - self.round += 1 - logger.info('%s', - self.round, - function_source, - self.round, - trial=self.trial) + self.log_llm_prompt(function_source) return function_source From ba1cb9386369dc56182a121a3f143a778b2ae230 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Fri, 13 Jun 2025 16:00:00 +0000 Subject: [PATCH 57/64] Presubmit fixes --- agent/base_agent.py | 6 +++--- agent/function_analyzer.py | 14 ++++++-------- llm_toolkit/prompt_builder.py | 6 +++--- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index a774843913..88fb2c7294 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -300,6 +300,7 @@ def execute(self, result_history: list[Result]) -> Result: class ADKBaseAgent(BaseAgent): + """The abstract base class for agents created using the ADK library.""" def __init__(self, trial: int, @@ -308,7 +309,7 @@ def __init__(self, benchmark: benchmarklib.Benchmark, description: str = '', instruction: str = '', - tools: list = [], + tools: Optional[list] = None, name: str = ''): super().__init__(trial, llm, args, tools, name) @@ -325,7 +326,7 @@ def __init__(self, model=llm._vertex_ai_model, description=description, instruction=instruction, - tools=tools, + tools=tools or [], ) # Create the session service @@ -399,7 +400,6 @@ def log_llm_response(self, response: str) -> None: trial=self.trial) - if __name__ == "__main__": # For cloud experiments. BaseAgent.cloud_main() diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 9a6034e612..14f4a91fd2 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -18,13 +18,9 @@ """ import argparse -import asyncio import os from typing import Optional -from google.adk import agents, runners, sessions -from google.genai import types - import logger import results as resultslib from agent import base_agent @@ -32,7 +28,7 @@ from experiment import benchmark as benchmarklib from experiment.workdir import WorkDirs from llm_toolkit import models, prompt_builder, prompts -from tool import base_tool, container_tool, fuzz_introspector_tool +from tool import container_tool class FunctionAnalyzer(base_agent.ADKBaseAgent): @@ -61,7 +57,7 @@ def __init__(self, tools = [self.get_function_implementation, self.search_project_files] super().__init__(trial, llm, args, benchmark, description, instruction, - tools) + tools, name) self.project_functions = None @@ -146,9 +142,11 @@ def search_project_files(self, request: str) -> str: This function tool uses bash commands to search the project's source files, and retrieve requested code snippets or file contents. Args: - request (str): The bash command to execute and its justification, formatted using the and tags. + request (str): The bash command to execute and its justification, + formatted using the and tags. Returns: - str: The response from executing the bash commands, formatted using the , and tags. + str: The response from executing the bash commands, + formatted using the , and tags. """ self.log_llm_response(request) diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index c10012ddb1..9a55a96ba7 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -892,9 +892,9 @@ def build_prompt(self) -> prompts.Prompt: if not xrefs: logger.error('No cross references found for project: %s, function: %s', self.benchmark.project, self.benchmark.function_signature) - prompt = prompt.replace( - '\n{FUNCTION_REFERENCES}\n}', - '') + prompt = prompt.replace('', '')\ + .replace('{FUNCTION_REFERENCES}', '')\ + .replace('', '') else: references = [f"\n{xref}\n" for xref in xrefs] references_str = '\n'.join(references) From c65ccad3c45bb20b06532f81e5e94b3abc5f1ad0 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Fri, 13 Jun 2025 17:16:32 +0000 Subject: [PATCH 58/64] Fix error from pulling main --- agent/function_analyzer.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index f194d732e6..14f4a91fd2 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -122,9 +122,6 @@ def execute(self, return result - def _initial_prompt( - self, - results: Optional[list[resultslib.Result]] = None) -> prompts.Prompt: def _initial_prompt( self, results: Optional[list[resultslib.Result]] = None) -> prompts.Prompt: From df628f5d948c23ec910abf31e355c5fc8fd0ebe1 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Tue, 17 Jun 2025 21:27:18 +0000 Subject: [PATCH 59/64] Refactored get_function_implementation tool so tool response is logged even when function wasnt found. --- agent/base_agent.py | 6 ++--- agent/function_analyzer.py | 48 ++++++++++++++++++++++++-------------- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index 88fb2c7294..12d593b8a0 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -334,7 +334,7 @@ def __init__(self, session_service.create_session( app_name=self.name, user_id=benchmark.id, - session_id=f"session_{self.trial}", + session_id=f'session_{self.trial}', ) # Create the runner @@ -346,7 +346,7 @@ def __init__(self, self.round = 0 - logger.info("ADK Agent %s created.", self.name, trial=self.trial) + logger.info('ADK Agent %s created.', self.name, trial=self.trial) def chat_llm(self, cur_round: int, client: Any, prompt: Prompt, trial: int) -> str: @@ -375,7 +375,7 @@ async def _call(): final_response_text = event.content.parts[0].text elif event.actions and event.actions.escalate: error_message = event.error_message - logger.error("Agent escalated: %s", error_message, trial=self.trial) + logger.error('Agent escalated: %s', error_message, trial=self.trial) self.log_llm_response(final_response_text) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 14f4a91fd2..37bdab9cd6 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -64,16 +64,16 @@ def __init__(self, def write_requirements_to_file(self, args, requirements: str) -> str: """Write the requirements to a file.""" if not requirements: - logger.warning("No requirements to write to file.", trial=self.trial) + logger.warning('No requirements to write to file.', trial=self.trial) return '' requirement_path = os.path.join(args.work_dirs.requirements, - f"{self.benchmark.id}.txt") + f'{self.benchmark.id}.txt') with open(requirement_path, 'w') as f: f.write(requirements) - logger.info("Requirements written to %s", + logger.info('Requirements written to %s', requirement_path, trial=self.trial) @@ -191,7 +191,7 @@ def get_function_implementation(self, project_name: str, if self.project_functions is None: logger.info( - "Project functions not initialized. Initializing for project '%s'.", + 'Project functions not initialized. Initializing for project "%s".', project_name, trial=self.trial) functions_list = introspector.query_introspector_all_functions( @@ -199,26 +199,38 @@ def get_function_implementation(self, project_name: str, if functions_list: self.project_functions = { - func["debug_summary"]["name"]: func + func['debug_summary']['name']: func for func in functions_list - if "debug_summary" in func and "name" in func["debug_summary"] + if isinstance(func.get('debug_summary'), dict) and + isinstance(func['debug_summary'].get('name'), str) and + func['debug_summary']['name'].strip() } else: self.project_functions = None - if (self.project_functions is None or - function_name not in self.project_functions): - logger.error("Error: Required function not found for project '%s'.", - project_name, - trial=self.trial) - return "" + response = f""" + Project name: {project_name} + Function name: {function_name} + """ + function_source = '' + + if self.project_functions: + function_dict = self.project_functions.get(function_name, {}) + function_signature = function_dict.get('function_signature', '') - function_signature = self.project_functions[function_name][ - "function_signature"] + function_source = introspector.query_introspector_function_source( + project_name, function_signature) - function_source = introspector.query_introspector_function_source( - project_name, function_signature) + if function_source.strip(): + response += f""" + Function source code: + {function_source} + """ + else: + logger.error( + 'Error: Function with name "%s" not found in project "%s".', + function_name, project_name, trial=self.trial) - self.log_llm_prompt(function_source) + self.log_llm_prompt(response) - return function_source + return response From b7f49148fe1da33d625086dd585b9c8abbd98388 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 18 Jun 2025 14:54:59 +0000 Subject: [PATCH 60/64] Presubmit fixes --- agent/function_analyzer.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 37bdab9cd6..4f169fb37a 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -215,11 +215,11 @@ def get_function_implementation(self, project_name: str, function_source = '' if self.project_functions: - function_dict = self.project_functions.get(function_name, {}) - function_signature = function_dict.get('function_signature', '') + function_dict = self.project_functions.get(function_name, {}) + function_signature = function_dict.get('function_signature', '') - function_source = introspector.query_introspector_function_source( - project_name, function_signature) + function_source = introspector.query_introspector_function_source( + project_name, function_signature) if function_source.strip(): response += f""" @@ -227,9 +227,10 @@ def get_function_implementation(self, project_name: str, {function_source} """ else: - logger.error( - 'Error: Function with name "%s" not found in project "%s".', - function_name, project_name, trial=self.trial) + logger.error('Error: Function with name "%s" not found in project "%s".', + function_name, + project_name, + trial=self.trial) self.log_llm_prompt(response) From ee2438cc59a8041310c4e46d8f6f504e1e34b3d1 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 18 Jun 2025 20:40:34 +0000 Subject: [PATCH 61/64] Use ephemeral temporary storage when downloading temporary files. --- common/cloud_builder.py | 46 +++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/common/cloud_builder.py b/common/cloud_builder.py index cdbcf7711b..21af778155 100644 --- a/common/cloud_builder.py +++ b/common/cloud_builder.py @@ -479,30 +479,32 @@ def _update_experiment_directory(self, experiment_path: str, return # Download the new experiment archive. - temp_dest_path = f'/tmp/{os.path.basename(new_experiment_url)}' - self._download_from_gcs(temp_dest_path) + with tempfile.TemporaryDirectory() as tmpdirname: + temp_dest_path = os.path.join(tmpdirname, os.path.basename(new_experiment_url)) + self._download_from_gcs(temp_dest_path) - tar_command = [ - 'tar', '--skip-old-files', '-xzf', temp_dest_path, '-C', experiment_path - ] - logging.info('Tar command: %s', ' '.join(tar_command)) + # Extract the downloaded archive, without replacing any existing files. + tar_command = [ + 'tar', '--skip-old-files', '-xzf', temp_dest_path, '-C', experiment_path + ] + logging.info('Tar command: %s', ' '.join(tar_command)) - # Extract the archive into the experiment directory. - try: - result = subprocess.run(tar_command, - check=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True) - logging.error("subprocess stdout:\n%s", result.stdout) - logging.error("subprocess stderr:\n%s", result.stderr) - except subprocess.CalledProcessError as e: - logging.error("Tar command failed with return code %d", e.returncode) - logging.error("stdout:\n%s", e.stdout) - logging.error("stderr:\n%s", e.stderr) - raise - logging.info('Updated experiment directory with new files from %s', - new_experiment_url) + # Extract the archive into the experiment directory. + try: + result = subprocess.run(tar_command, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True) + logging.error("subprocess stdout:\n%s", result.stdout) + logging.error("subprocess stderr:\n%s", result.stderr) + except subprocess.CalledProcessError as e: + logging.error("Tar command failed with return code %d", e.returncode) + logging.error("stdout:\n%s", e.stdout) + logging.error("stderr:\n%s", e.stderr) + raise + logging.info('Updated experiment directory with new files from %s', + new_experiment_url) def run(self, agent: BaseAgent, result_history: list[Result], dill_dir: str) -> Any: From c027061a1fe5ca03eb13f3480e1483a2c2ba72eb Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Wed, 18 Jun 2025 20:41:23 +0000 Subject: [PATCH 62/64] Presubmit fixes --- common/cloud_builder.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/common/cloud_builder.py b/common/cloud_builder.py index 21af778155..58d1e05f39 100644 --- a/common/cloud_builder.py +++ b/common/cloud_builder.py @@ -480,12 +480,14 @@ def _update_experiment_directory(self, experiment_path: str, # Download the new experiment archive. with tempfile.TemporaryDirectory() as tmpdirname: - temp_dest_path = os.path.join(tmpdirname, os.path.basename(new_experiment_url)) + temp_dest_path = os.path.join(tmpdirname, + os.path.basename(new_experiment_url)) self._download_from_gcs(temp_dest_path) # Extract the downloaded archive, without replacing any existing files. tar_command = [ - 'tar', '--skip-old-files', '-xzf', temp_dest_path, '-C', experiment_path + 'tar', '--skip-old-files', '-xzf', temp_dest_path, '-C', + experiment_path ] logging.info('Tar command: %s', ' '.join(tar_command)) @@ -504,7 +506,7 @@ def _update_experiment_directory(self, experiment_path: str, logging.error("stderr:\n%s", e.stderr) raise logging.info('Updated experiment directory with new files from %s', - new_experiment_url) + new_experiment_url) def run(self, agent: BaseAgent, result_history: list[Result], dill_dir: str) -> Any: From 7b10f6b21397635ecebfe20568568ff8242c064a Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Sun, 22 Jun 2025 19:16:49 +0000 Subject: [PATCH 63/64] Extracted description and instruction to separate files. --- agent/function_analyzer.py | 15 ++-- llm_toolkit/prompt_builder.py | 16 ++-- .../agent/function-analyzer-description.txt | 1 + .../agent/function-analyzer-instruction.txt | 83 +------------------ 4 files changed, 18 insertions(+), 97 deletions(-) create mode 100644 prompts/agent/function-analyzer-description.txt diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py index 4f169fb37a..131e5a08f1 100644 --- a/agent/function_analyzer.py +++ b/agent/function_analyzer.py @@ -44,15 +44,12 @@ def __init__(self, benchmark: benchmarklib.Benchmark,\ name: str = ''): - description = """ - Extracts a function's requirements - from its source implementation. - """ - instruction = """ - You are a security engineer tasked with analyzing a function - and extracting its input requirements, - necessary for it to execute correctly. - """ + builder = prompt_builder.FunctionAnalyzerTemplateBuilder( + self.llm, self.benchmark) + + description = builder.get_description().get() + + instruction = builder.get_instruction().get() tools = [self.get_function_implementation, self.search_project_files] diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 2b6bb702b7..6653c8c3c9 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -901,14 +901,14 @@ def __init__(self, super().__init__(model, benchmark, template_dir, initial) # Load templates. - self.function_analyzer_instruction_template_file = self._find_template( + self.function_analyzer_instruction_file = self._find_template( AGENT_TEMPLATE_DIR, 'function-analyzer-instruction.txt') - self.context_retrieve_template_file = self._find_template( - AGENT_TEMPLATE_DIR, 'context-retriever-instruction.txt') + self.function_analyzer_description_file = self._find_template( + AGENT_TEMPLATE_DIR, 'function-analyzer-description.txt') self.function_analyzer_prompt_template_file = self._find_template( AGENT_TEMPLATE_DIR, 'function-analyzer-priming.txt') - def build_instruction(self) -> prompts.Prompt: + def get_instruction(self) -> prompts.Prompt: """Constructs a prompt using the templates in |self| and saves it.""" self._prompt = self._model.prompt_type()(None) @@ -916,21 +916,21 @@ def build_instruction(self) -> prompts.Prompt: return self._prompt prompt = self._get_template( - self.function_analyzer_instruction_template_file) + self.function_analyzer_instruction_file) self._prompt.append(prompt) return self._prompt - def build_context_retriever_instruction(self) -> prompts.Prompt: + def get_description(self) -> prompts.Prompt: """Constructs a prompt using the templates in |self| and saves it.""" self._prompt = self._model.prompt_type()(None) - if not self.benchmark: return self._prompt - prompt = self._get_template(self.context_retrieve_template_file) + prompt = self._get_template( + self.function_analyzer_description_file) self._prompt.append(prompt) diff --git a/prompts/agent/function-analyzer-description.txt b/prompts/agent/function-analyzer-description.txt new file mode 100644 index 0000000000..484afc037d --- /dev/null +++ b/prompts/agent/function-analyzer-description.txt @@ -0,0 +1 @@ +Extracts a function's requirements from its source implementation. \ No newline at end of file diff --git a/prompts/agent/function-analyzer-instruction.txt b/prompts/agent/function-analyzer-instruction.txt index e1dc112b9d..f6831d0509 100644 --- a/prompts/agent/function-analyzer-instruction.txt +++ b/prompts/agent/function-analyzer-instruction.txt @@ -1,80 +1,3 @@ -You are a professional security engineer. - -Your objective is to analyze the function's implementation using the steps provided and return a response in the expected format. -The requirements you provide will be used by another agent to generate valid fuzz drivers for the target function. - -The function you will analyze is provided below. We have provided the target function, and the implementations of its children functions. - - -{{FUNCTION_SOURCE}} - - -Follow these steps to analyze a function and identify its input requirements: - -Step 1: Identify all Fuzzing Crash Indicators (FCI) in the function. - * Fuzz Crash Indicators are statements that can cause the program to crash if expected conditions are violated. - * They include assertion statements, array indexing statements, pointer dereferencing statements, memory access statements, string handling statements, etc. - * Note that some programs can have custom assertion statements, like require() or ensure(). - -Step 2: Identify the input requirements necessary to ensure the safety of each identified Fuzzing Crash Indicators. - * Each requirement MUST be precise for it to be useful. - * You MUST include a one-sentence summary why a specific requirement was included. - * You should not repeat any requirement, even if it is necessary to satisfy multiple FCIs. - -Step 3: Compile the requirements you derived and return in the expected format. - - - - -Make sure your response follows the following format, enclosed in ``` ```. - -``` - - -project name: the name of the project provided -function signature: The function's signature - - - -A summary of what the function does. - - - - -First requirement - - -Second requirement - -... - -nth requirement - - - - - - - - -Here is an example response - - -project name: htslib -function signature: int sam_index_build(const char *, int) - - - -The sam_index_build function is used to build a sam index. It uses the input arguments to identify and retrieve the index to build. It returns 1 if the build succeeds and 0 if the build fails. - - - - -The second argument should be less than 64. This is to prevent an assertion violation in the program. - - - - - - - +You are a security engineer tasked with analyzing a function + and extracting its input requirements, + necessary for it to execute correctly. \ No newline at end of file From f831530d4f59002644100cd10faa4ade210a86df Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Sun, 22 Jun 2025 19:19:21 +0000 Subject: [PATCH 64/64] Presubmit fixes --- llm_toolkit/prompt_builder.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 6653c8c3c9..b9fcf5d400 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -915,8 +915,7 @@ def get_instruction(self) -> prompts.Prompt: if not self.benchmark: return self._prompt - prompt = self._get_template( - self.function_analyzer_instruction_file) + prompt = self._get_template(self.function_analyzer_instruction_file) self._prompt.append(prompt) @@ -929,8 +928,7 @@ def get_description(self) -> prompts.Prompt: if not self.benchmark: return self._prompt - prompt = self._get_template( - self.function_analyzer_description_file) + prompt = self._get_template(self.function_analyzer_description_file) self._prompt.append(prompt)