Skip to content

Commit

Permalink
Prototyper improvements (#661)
Browse files Browse the repository at this point in the history
## Functionality
1. Validate if the function-under-test is referenced by the fuzz target
(fixes #658)
2. Use cached image from `chronos`.
3. Use exact exception matching in decorator `@retryable` to ensure the
correct config (e.g., #retries) is applied.
4. Compile the new fuzz target with the old build script once even if
LLM proposes a new build script, in case the new one is wrong.
5. Show chat history of each trail for simpler debugging.
6. Show exception (e.g., cloud build expiry) in report.
  • Loading branch information
DonggeLiu authored Oct 21, 2024
1 parent 6bc5565 commit 1ce0df9
Show file tree
Hide file tree
Showing 11 changed files with 313 additions and 91 deletions.
8 changes: 6 additions & 2 deletions agent/base_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,13 @@ def _filter_code(self, raw_code_block: str) -> str:
return filtered_code_block

def _format_bash_execution_result(self, process: sp.CompletedProcess) -> str:
stdout = self.llm.truncate_prompt(process.stdout)
# TODO(dongge) Share input limit evenly if both stdout and stderr overlong.
stderr = self.llm.truncate_prompt(process.stderr, stdout)
return (f'<bash>\n{process.args}\n</bash>\n'
f'<return code>\n{process.returncode}\n</return code>\n'
f'<stdout>\n{process.stdout}\n</stdout>\n'
f'<stderr>\n{process.stderr}\n</stderr>\n')
f'<stdout>\n{stdout}\n</stdout>\n'
f'<stderr>\n{stderr}\n</stderr>\n')

def _container_handle_bash_command(self, cur_round: int, response: str,
tool: BaseTool) -> Prompt:
Expand Down Expand Up @@ -113,6 +116,7 @@ def cloud_main(cls) -> None:
args = cls._parse_args()

agent = utils.deserialize_from_dill(args.agent)
agent.llm.cloud_setup()
result_history = utils.deserialize_from_dill(args.result_history)
result = agent.execute(result_history)
utils.serialize_to_dill(result, args.result_new)
Expand Down
107 changes: 84 additions & 23 deletions agent/prototyper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import logger
from agent.base_agent import BaseAgent
from experiment.benchmark import Benchmark
from llm_toolkit.prompt_builder import DefaultTemplateBuilder
from llm_toolkit.prompts import Prompt
from results import BuildResult, Result
Expand All @@ -24,7 +25,6 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
default_prompt_builder = DefaultTemplateBuilder(model=self.llm,
benchmark=benchmark)
prompt = default_prompt_builder.build([])
# TODO(dongge): Find a way to save prompt and log for agents
return prompt

def _update_fuzz_target_and_build_script(self, cur_round: int, response: str,
Expand All @@ -43,26 +43,68 @@ def _update_fuzz_target_and_build_script(self, cur_round: int, response: str,

build_script_source = self._filter_code(
self._parse_tag(response, 'build script'))
build_result.build_script_source = build_script_source
# Sometimes LLM adds chronos, which makes no sense for new build scripts.
build_result.build_script_source = build_script_source.replace(
'source /src/chronos.sh', '')
if build_script_source:
logger.debug('ROUND %02d Parsed build script from LLM: %s', cur_round,
build_script_source)
else:
logger.debug('ROUND %02d No build script in conclusion: %s', cur_round,
response)

def _update_build_result(self, buid_result: BuildResult,
compile_process: sp.CompletedProcess,
status: bool) -> None:
def _update_build_result(self, build_result: BuildResult,
compile_process: sp.CompletedProcess, status: bool,
referenced: bool) -> None:
"""Updates the build result with the latest info."""
buid_result.compiles = status
buid_result.compile_error = compile_process.stderr
buid_result.compile_log = self._format_bash_execution_result(
build_result.compiles = status
build_result.compile_error = compile_process.stderr
build_result.compile_log = self._format_bash_execution_result(
compile_process)
build_result.is_function_referenced = referenced

def _validate_fuzz_target_and_build_script(self, cur_round: int,
build_result: BuildResult) -> None:
"""Validates the new fuzz target and build script."""
# Steps:
# 1. Recompile without modifying the build script, in case LLM is wrong.
# 2. Recompile with the modified build script, if any.
build_script_source = build_result.build_script_source

logger.info('First compile fuzz target without modifying build script.')
build_result.build_script_source = ''
self._validate_fuzz_target_and_build_script_via_recompile(
cur_round, build_result)

if not build_result.success and build_script_source:
logger.info('Then compile fuzz target with modified build script.')
build_result.build_script_source = build_script_source
self._validate_fuzz_target_and_build_script_via_recompile(
cur_round, build_result, use_recompile=False)

def _validate_fuzz_target_references_function(
self, compilation_tool: ProjectContainerTool, benchmark: Benchmark,
cur_round: int) -> bool:
"""Validates if the LLM generated fuzz target assembly code references
function-under-test."""
disassemble_result = compilation_tool.execute(
'objdump --disassemble=LLVMFuzzerTestOneInput -d '
f'/out/{benchmark.target_name}')
function_referenced = (disassemble_result.returncode == 0 and
benchmark.function_name in disassemble_result.stdout)
logger.debug('ROUND %02d Final fuzz target function referenced: %s',
cur_round, function_referenced)
if not function_referenced:
logger.debug('ROUND %02d Final fuzz target function not referenced',
cur_round)
return function_referenced

def _validate_fuzz_target_and_build_script_via_recompile(
self,
cur_round: int,
build_result: BuildResult,
use_recompile: bool = True) -> None:
"""Validates the new fuzz target and build script by recompiling them."""
benchmark = build_result.benchmark
compilation_tool = ProjectContainerTool(benchmark=benchmark)

Expand All @@ -82,8 +124,7 @@ def _validate_fuzz_target_and_build_script(self, cur_round: int,

# Recompile.
logger.info('===== ROUND %02d Recompile =====', cur_round)
compile_command = 'compile > /dev/null'
compile_process = compilation_tool.execute(compile_command)
compile_process = compilation_tool.compile(use_recompile=use_recompile)
compile_succeed = compile_process.returncode == 0
logger.debug('ROUND %02d Fuzz target compile Succeessfully: %s', cur_round,
compile_succeed)
Expand All @@ -93,11 +134,16 @@ def _validate_fuzz_target_and_build_script(self, cur_round: int,
binary_exists = ls_result.returncode == 0
logger.debug('ROUND %02d Final fuzz target binary exists: %s', cur_round,
binary_exists)
compilation_tool.terminate()

# Validate if function-under-test is referenced by the fuzz target.
function_referenced = self._validate_fuzz_target_references_function(
compilation_tool, benchmark, cur_round)

compilation_tool.terminate()
self._update_build_result(build_result,
compile_process=compile_process,
status=compile_succeed and binary_exists)
status=compile_succeed and binary_exists,
referenced=function_referenced)

def _container_handle_conclusion(
self, cur_round: int, response: str,
Expand All @@ -109,18 +155,34 @@ def _container_handle_conclusion(
self._update_fuzz_target_and_build_script(cur_round, response, build_result)

self._validate_fuzz_target_and_build_script(cur_round, build_result)
if build_result.compiles:
if build_result.success:
logger.info('***** Prototyper succeded in %02d rounds *****', cur_round)
return None

logger.info('***** Failed to recompile in %02d rounds *****', cur_round)
prompt_text = ('Failed to build fuzz target. Here is the fuzz target, build'
' script, compliation command, and other compilation runtime'
' output.\n<fuzz target>\n'
f'{build_result.fuzz_target_source}\n</fuzz target>\n'
f'<build script>\n{build_result.build_script_source}\n'
'</build script>\n'
f'{build_result.compile_log}')
if not build_result.compiles:
compile_log = self.llm.truncate_prompt(build_result.compile_log)
logger.info('***** Failed to recompile in %02d rounds *****', cur_round)
prompt_text = (
'Failed to build fuzz target. Here is the fuzz target, build'
' script, compliation command, and other compilation runtime'
' output.\n<fuzz target>\n'
f'{build_result.fuzz_target_source}\n</fuzz target>\n'
f'<build script>\n{build_result.build_script_source}\n'
f'</build script>\n<compilation log>\n{compile_log}\n'
'</compilation log>\n')
elif not build_result.is_function_referenced:
logger.info(
'***** Fuzz target does not reference function-under-test in %02d '
'rounds *****', cur_round)
prompt_text = (
'The fuzz target builds successfully, but the target function '
f'`{build_result.benchmark.function_signature}` was not used by '
'`LLVMFuzzerTestOneInput` in fuzz target. YOU MUST CALL FUNCTION '
f'`{build_result.benchmark.function_signature}` INSIDE FUNCTION '
'`LLVMFuzzerTestOneInput`.')
else:
prompt_text = ''

prompt = DefaultTemplateBuilder(self.llm, initial=prompt_text).build([])
return prompt

Expand All @@ -140,7 +202,7 @@ def execute(self, result_history: list[Result]) -> BuildResult:
prompt = self._initial_prompt(result_history)
benchmark = last_result.benchmark
self.inspect_tool = ProjectContainerTool(benchmark, name='inspect')
self.inspect_tool.execute('{compile && rm -rf /out/*} > /dev/null')
self.inspect_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null')
cur_round = 1
prompt.append(self.inspect_tool.tutorial())
build_result = BuildResult(benchmark=benchmark,
Expand All @@ -157,7 +219,6 @@ def execute(self, result_history: list[Result]) -> BuildResult:
prompt = self._container_tool_reaction(cur_round, response,
build_result)
cur_round += 1
self._sleep_random_duration()
finally:
# Cleanup: stop and remove the container
logger.debug('Stopping and removing the inspect container %s',
Expand Down
4 changes: 4 additions & 0 deletions ci/k8s/pr-exp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ spec:
value: '10'
- name: VERTEX_AI_LOCATIONS
value: 'asia-east1,asia-east2,asia-northeast1,asia-northeast3,asia-south1,asia-southeast1,australia-southeast1,europe-central2,europe-north1,europe-southwest1,europe-west1,europe-west2,europe-west3,europe-west4,europe-west6,europe-west8,europe-west9,northamerica-northeast1,southamerica-east1,us-central1,us-east1,us-east4,us-east5,us-south1,us-west1,us-west4'
- name: CLOUD_BUILD_LOCATION
value: 'us-west2'
- name: GCB_BUILDPOOL_NAME
value: projects/oss-fuzz/locations/us-west2/workerPools/buildpool-llm-agents
- name: REDIRECT_OUTS
value: '${GKE_REDIRECT_OUTS}'
# imagePullSecrets:
Expand Down
43 changes: 29 additions & 14 deletions common/cloud_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@

OF_REPO = 'https://github.com/google/oss-fuzz.git'
OFG_ROOT_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
US_CENTRAL_CLIENT_OPTIONS = google.api_core.client_options.ClientOptions(
api_endpoint='https://us-central1-cloudbuild.googleapis.com/')
REGION = os.getenv('CLOUD_BUILD_LOCATION', 'us-west2')
REGIONAL_CLIENT_OPTIONS = google.api_core.client_options.ClientOptions(
api_endpoint=f'https://{REGION}-cloudbuild.googleapis.com/')
_CHAT_HISTORY_PREFIX_PATTERN = r'^Step\s+#(\d+)\s+-\s+"agent-step":\s+'


Expand Down Expand Up @@ -60,7 +61,7 @@ def __init__(self, args: argparse.Namespace) -> None:
'v1',
credentials=self.credentials,
cache_discovery=False,
client_options=US_CENTRAL_CLIENT_OPTIONS).projects().builds()
client_options=REGIONAL_CLIENT_OPTIONS).projects().builds()
self.storage_client = storage.Client(credentials=self.credentials)

def _upload_to_gcs(self, local_file_path: str) -> str:
Expand Down Expand Up @@ -143,6 +144,9 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str,
'/workspace:/workspace',
'-v',
'/var/run/docker.sock:/var/run/docker.sock',
'-e',
'VERTEX_AI_LOCATIONS=' +
os.getenv("VERTEX_AI_LOCATIONS", ""),
'--network=cloudbuild',
# Built from this repo's `Dockerfile.cloudbuild-agent`.
('us-central1-docker.pkg.dev/oss-fuzz/oss-fuzz-gen/'
Expand Down Expand Up @@ -192,10 +196,10 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str,
body=cloud_build_config).execute()
build_id = build_info.get('metadata', {}).get('build', {}).get('id', '')

logging.info('Cloud Build ID: %s', build_id)
logging.info('Created Cloud Build ID %s at %s', build_id, REGION)
return build_id

def _wait_for_build(self, build_id: str) -> bool:
def _wait_for_build(self, build_id: str) -> str:
"""Wait for a GCB build."""
prev_status = status = None
while status in [None, 'WORKING', 'QUEUED']:
Expand All @@ -205,11 +209,11 @@ def _wait_for_build(self, build_id: str) -> bool:
if status != prev_status:
logging.info('Cloud Build %s Status: %s', build_id, status)
prev_status = status
time.sleep(60) # Avoid rate limiting.
except (googleapiclient.errors.HttpError, BrokenPipeError) as e:
logging.error('Cloud build %s failed: %s', build_id, e)
return False
return status == 'SUCCESS'
logging.warning('Failed to check cloud build status %s: %s', build_id,
e)
time.sleep(60) # Avoid rate limiting.
return status or ''

def _cancel_build(self, build_id: str) -> None:
"""Cancel a GCB build"""
Expand Down Expand Up @@ -240,7 +244,7 @@ def _get_build_log(self, build_id: str) -> str:
return log_content
except NotFound as e:
logging.error('Cloud build log %s not found: %s', log_file_uri, e)
return ''
return f'Cloud build log {log_file_uri} not found: {e}.'

def _download_from_gcs(self, destination_file_name: str) -> None:
"""Downloads the result file from GCS."""
Expand Down Expand Up @@ -278,22 +282,33 @@ def run(self, agent: BaseAgent, result_history: list[Result],
new_result_filename)

# Step 4: Download new result dill.
cloud_build_log = ''
new_result_dill = os.path.join(dill_dir, new_result_filename)
try:
if self._wait_for_build(build_id):
cloud_build_final_status = self._wait_for_build(build_id)
if cloud_build_final_status == 'SUCCESS':
self._download_from_gcs(new_result_dill)
except (KeyboardInterrupt, SystemExit):
else:
logging.error('Cloud build %s failed with status: %s', build_id,
cloud_build_final_status)
cloud_build_log += (f'Cloud build {build_id} failed with status: '
f'{cloud_build_final_status}.\n')
except (KeyboardInterrupt, SystemExit) as e:
self._cancel_build(build_id)
build_log = self._get_build_log(build_id)
logging.error('Cloud build %s cancled: %s', build_id, e)
cloud_build_log += f'Cloud build {build_id} cancled: {e}.\n'

cloud_build_log += self._get_build_log(build_id)

# Step 4: Deserialize dilld file.
result = utils.deserialize_from_dill(new_result_dill)
if not result:
cloud_build_log += f'Failed to deserialize from dill {new_result_dill}.\n'
last_result = result_history[-1]
result = Result(benchmark=last_result.benchmark,
trial=last_result.trial,
work_dirs=last_result.work_dirs,
author=agent)
result.chat_history = {agent.name: build_log}
result.chat_history = {agent.name: cloud_build_log}

return result
44 changes: 39 additions & 5 deletions experiment/oss_fuzz_checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,23 +375,57 @@ def prepare_build(project_name, sanitizer, generated_project):
shutil.copy(original_dockerfile, dockerfile_to_use)


def _image_exists(image_name: str) -> bool:
"""Checks if the given |image_name| exits."""
def _image_exists_locally(image_name: str, project_name: str) -> bool:
"""Checks if the given |image_name| exits locally."""
try:
all_images = sp.run(['docker', 'images', '--format', '{{.Repository}}'],
stdout=sp.PIPE,
text=True,
check=True).stdout.splitlines()
if image_name in all_images:
logger.info('Will use local cached images of %s: %s', project_name,
image_name)
return True
except sp.CalledProcessError:
logger.info('Unable to list all docker images')
logger.warning('Unable to use local cached image of %s: %s', project_name,
image_name)
return False


def _image_exists_online(image_name: str, project_name: str) -> bool:
"""Checks if the given |image_name| exits in the cloud registry."""
online_image_name = _get_project_cache_image_name(project_name, 'address')
try:
sp.run(['docker', 'pull', online_image_name],
stdout=sp.PIPE,
text=True,
check=True)
logger.info('Pulled online cached images of %s: %s', project_name,
online_image_name)
sp.run([
'docker', 'run', '--entrypoint', '/usr/local/bin/recompile',
online_image_name
],
stdout=sp.PIPE,
text=True,
check=True)

sp.run(['docker', 'tag', online_image_name, image_name],
stdout=sp.PIPE,
text=True,
check=True)
logger.info('Will use online cached images: %s', project_name)
return True
except sp.CalledProcessError:
logger.warning('Unable to use online cached images: %s', project_name)
return False
return image_name in all_images


def prepare_project_image(project: str) -> str:
"""Prepares original image of the |project|'s fuzz target build container."""
image_name = f'gcr.io/oss-fuzz/{project}'
if _image_exists(image_name):
if (_image_exists_locally(image_name, project_name=project) or
_image_exists_online(image_name, project_name=project)):
logger.info('Using existing project image for %s', project)
return image_name
logger.info('Unable to find existing project image for %s', project)
Expand Down
Loading

0 comments on commit 1ce0df9

Please sign in to comment.