From 1ce0df9bfc23192f23b21a0c2603109814cd03f5 Mon Sep 17 00:00:00 2001 From: Dongge Liu Date: Mon, 21 Oct 2024 14:38:35 +1100 Subject: [PATCH] Prototyper improvements (#661) ## Functionality 1. Validate if the function-under-test is referenced by the fuzz target (fixes #658) 2. Use cached image from `chronos`. 3. Use exact exception matching in decorator `@retryable` to ensure the correct config (e.g., #retries) is applied. 4. Compile the new fuzz target with the old build script once even if LLM proposes a new build script, in case the new one is wrong. 5. Show chat history of each trail for simpler debugging. 6. Show exception (e.g., cloud build expiry) in report. --- agent/base_agent.py | 8 ++- agent/prototyper.py | 107 +++++++++++++++++++++++++------- ci/k8s/pr-exp.yaml | 4 ++ common/cloud_builder.py | 43 ++++++++----- experiment/oss_fuzz_checkout.py | 44 +++++++++++-- llm_toolkit/models.py | 75 ++++++++++++++++++---- logger.py | 7 ++- results.py | 13 ++-- stage/execution_stage.py | 25 ++++---- tool/container_tool.py | 68 +++++++++++++++----- utils.py | 10 +-- 11 files changed, 313 insertions(+), 91 deletions(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index 732b5699d3..f4e99300ad 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -59,10 +59,13 @@ def _filter_code(self, raw_code_block: str) -> str: return filtered_code_block def _format_bash_execution_result(self, process: sp.CompletedProcess) -> str: + stdout = self.llm.truncate_prompt(process.stdout) + # TODO(dongge) Share input limit evenly if both stdout and stderr overlong. + stderr = self.llm.truncate_prompt(process.stderr, stdout) return (f'\n{process.args}\n\n' f'\n{process.returncode}\n\n' - f'\n{process.stdout}\n\n' - f'\n{process.stderr}\n\n') + f'\n{stdout}\n\n' + f'\n{stderr}\n\n') def _container_handle_bash_command(self, cur_round: int, response: str, tool: BaseTool) -> Prompt: @@ -113,6 +116,7 @@ def cloud_main(cls) -> None: args = cls._parse_args() agent = utils.deserialize_from_dill(args.agent) + agent.llm.cloud_setup() result_history = utils.deserialize_from_dill(args.result_history) result = agent.execute(result_history) utils.serialize_to_dill(result, args.result_new) diff --git a/agent/prototyper.py b/agent/prototyper.py index 999915d86c..610a6fba3a 100644 --- a/agent/prototyper.py +++ b/agent/prototyper.py @@ -6,6 +6,7 @@ import logger from agent.base_agent import BaseAgent +from experiment.benchmark import Benchmark from llm_toolkit.prompt_builder import DefaultTemplateBuilder from llm_toolkit.prompts import Prompt from results import BuildResult, Result @@ -24,7 +25,6 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: default_prompt_builder = DefaultTemplateBuilder(model=self.llm, benchmark=benchmark) prompt = default_prompt_builder.build([]) - # TODO(dongge): Find a way to save prompt and log for agents return prompt def _update_fuzz_target_and_build_script(self, cur_round: int, response: str, @@ -43,7 +43,9 @@ def _update_fuzz_target_and_build_script(self, cur_round: int, response: str, build_script_source = self._filter_code( self._parse_tag(response, 'build script')) - build_result.build_script_source = build_script_source + # Sometimes LLM adds chronos, which makes no sense for new build scripts. + build_result.build_script_source = build_script_source.replace( + 'source /src/chronos.sh', '') if build_script_source: logger.debug('ROUND %02d Parsed build script from LLM: %s', cur_round, build_script_source) @@ -51,18 +53,58 @@ def _update_fuzz_target_and_build_script(self, cur_round: int, response: str, logger.debug('ROUND %02d No build script in conclusion: %s', cur_round, response) - def _update_build_result(self, buid_result: BuildResult, - compile_process: sp.CompletedProcess, - status: bool) -> None: + def _update_build_result(self, build_result: BuildResult, + compile_process: sp.CompletedProcess, status: bool, + referenced: bool) -> None: """Updates the build result with the latest info.""" - buid_result.compiles = status - buid_result.compile_error = compile_process.stderr - buid_result.compile_log = self._format_bash_execution_result( + build_result.compiles = status + build_result.compile_error = compile_process.stderr + build_result.compile_log = self._format_bash_execution_result( compile_process) + build_result.is_function_referenced = referenced def _validate_fuzz_target_and_build_script(self, cur_round: int, build_result: BuildResult) -> None: """Validates the new fuzz target and build script.""" + # Steps: + # 1. Recompile without modifying the build script, in case LLM is wrong. + # 2. Recompile with the modified build script, if any. + build_script_source = build_result.build_script_source + + logger.info('First compile fuzz target without modifying build script.') + build_result.build_script_source = '' + self._validate_fuzz_target_and_build_script_via_recompile( + cur_round, build_result) + + if not build_result.success and build_script_source: + logger.info('Then compile fuzz target with modified build script.') + build_result.build_script_source = build_script_source + self._validate_fuzz_target_and_build_script_via_recompile( + cur_round, build_result, use_recompile=False) + + def _validate_fuzz_target_references_function( + self, compilation_tool: ProjectContainerTool, benchmark: Benchmark, + cur_round: int) -> bool: + """Validates if the LLM generated fuzz target assembly code references + function-under-test.""" + disassemble_result = compilation_tool.execute( + 'objdump --disassemble=LLVMFuzzerTestOneInput -d ' + f'/out/{benchmark.target_name}') + function_referenced = (disassemble_result.returncode == 0 and + benchmark.function_name in disassemble_result.stdout) + logger.debug('ROUND %02d Final fuzz target function referenced: %s', + cur_round, function_referenced) + if not function_referenced: + logger.debug('ROUND %02d Final fuzz target function not referenced', + cur_round) + return function_referenced + + def _validate_fuzz_target_and_build_script_via_recompile( + self, + cur_round: int, + build_result: BuildResult, + use_recompile: bool = True) -> None: + """Validates the new fuzz target and build script by recompiling them.""" benchmark = build_result.benchmark compilation_tool = ProjectContainerTool(benchmark=benchmark) @@ -82,8 +124,7 @@ def _validate_fuzz_target_and_build_script(self, cur_round: int, # Recompile. logger.info('===== ROUND %02d Recompile =====', cur_round) - compile_command = 'compile > /dev/null' - compile_process = compilation_tool.execute(compile_command) + compile_process = compilation_tool.compile(use_recompile=use_recompile) compile_succeed = compile_process.returncode == 0 logger.debug('ROUND %02d Fuzz target compile Succeessfully: %s', cur_round, compile_succeed) @@ -93,11 +134,16 @@ def _validate_fuzz_target_and_build_script(self, cur_round: int, binary_exists = ls_result.returncode == 0 logger.debug('ROUND %02d Final fuzz target binary exists: %s', cur_round, binary_exists) - compilation_tool.terminate() + # Validate if function-under-test is referenced by the fuzz target. + function_referenced = self._validate_fuzz_target_references_function( + compilation_tool, benchmark, cur_round) + + compilation_tool.terminate() self._update_build_result(build_result, compile_process=compile_process, - status=compile_succeed and binary_exists) + status=compile_succeed and binary_exists, + referenced=function_referenced) def _container_handle_conclusion( self, cur_round: int, response: str, @@ -109,18 +155,34 @@ def _container_handle_conclusion( self._update_fuzz_target_and_build_script(cur_round, response, build_result) self._validate_fuzz_target_and_build_script(cur_round, build_result) - if build_result.compiles: + if build_result.success: logger.info('***** Prototyper succeded in %02d rounds *****', cur_round) return None - logger.info('***** Failed to recompile in %02d rounds *****', cur_round) - prompt_text = ('Failed to build fuzz target. Here is the fuzz target, build' - ' script, compliation command, and other compilation runtime' - ' output.\n\n' - f'{build_result.fuzz_target_source}\n\n' - f'\n{build_result.build_script_source}\n' - '\n' - f'{build_result.compile_log}') + if not build_result.compiles: + compile_log = self.llm.truncate_prompt(build_result.compile_log) + logger.info('***** Failed to recompile in %02d rounds *****', cur_round) + prompt_text = ( + 'Failed to build fuzz target. Here is the fuzz target, build' + ' script, compliation command, and other compilation runtime' + ' output.\n\n' + f'{build_result.fuzz_target_source}\n\n' + f'\n{build_result.build_script_source}\n' + f'\n\n{compile_log}\n' + '\n') + elif not build_result.is_function_referenced: + logger.info( + '***** Fuzz target does not reference function-under-test in %02d ' + 'rounds *****', cur_round) + prompt_text = ( + 'The fuzz target builds successfully, but the target function ' + f'`{build_result.benchmark.function_signature}` was not used by ' + '`LLVMFuzzerTestOneInput` in fuzz target. YOU MUST CALL FUNCTION ' + f'`{build_result.benchmark.function_signature}` INSIDE FUNCTION ' + '`LLVMFuzzerTestOneInput`.') + else: + prompt_text = '' + prompt = DefaultTemplateBuilder(self.llm, initial=prompt_text).build([]) return prompt @@ -140,7 +202,7 @@ def execute(self, result_history: list[Result]) -> BuildResult: prompt = self._initial_prompt(result_history) benchmark = last_result.benchmark self.inspect_tool = ProjectContainerTool(benchmark, name='inspect') - self.inspect_tool.execute('{compile && rm -rf /out/*} > /dev/null') + self.inspect_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null') cur_round = 1 prompt.append(self.inspect_tool.tutorial()) build_result = BuildResult(benchmark=benchmark, @@ -157,7 +219,6 @@ def execute(self, result_history: list[Result]) -> BuildResult: prompt = self._container_tool_reaction(cur_round, response, build_result) cur_round += 1 - self._sleep_random_duration() finally: # Cleanup: stop and remove the container logger.debug('Stopping and removing the inspect container %s', diff --git a/ci/k8s/pr-exp.yaml b/ci/k8s/pr-exp.yaml index f7411de935..46d6c15eaf 100644 --- a/ci/k8s/pr-exp.yaml +++ b/ci/k8s/pr-exp.yaml @@ -47,6 +47,10 @@ spec: value: '10' - name: VERTEX_AI_LOCATIONS value: 'asia-east1,asia-east2,asia-northeast1,asia-northeast3,asia-south1,asia-southeast1,australia-southeast1,europe-central2,europe-north1,europe-southwest1,europe-west1,europe-west2,europe-west3,europe-west4,europe-west6,europe-west8,europe-west9,northamerica-northeast1,southamerica-east1,us-central1,us-east1,us-east4,us-east5,us-south1,us-west1,us-west4' + - name: CLOUD_BUILD_LOCATION + value: 'us-west2' + - name: GCB_BUILDPOOL_NAME + value: projects/oss-fuzz/locations/us-west2/workerPools/buildpool-llm-agents - name: REDIRECT_OUTS value: '${GKE_REDIRECT_OUTS}' # imagePullSecrets: diff --git a/common/cloud_builder.py b/common/cloud_builder.py index 829152811f..32ad0b22d9 100644 --- a/common/cloud_builder.py +++ b/common/cloud_builder.py @@ -23,8 +23,9 @@ OF_REPO = 'https://github.com/google/oss-fuzz.git' OFG_ROOT_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) -US_CENTRAL_CLIENT_OPTIONS = google.api_core.client_options.ClientOptions( - api_endpoint='https://us-central1-cloudbuild.googleapis.com/') +REGION = os.getenv('CLOUD_BUILD_LOCATION', 'us-west2') +REGIONAL_CLIENT_OPTIONS = google.api_core.client_options.ClientOptions( + api_endpoint=f'https://{REGION}-cloudbuild.googleapis.com/') _CHAT_HISTORY_PREFIX_PATTERN = r'^Step\s+#(\d+)\s+-\s+"agent-step":\s+' @@ -60,7 +61,7 @@ def __init__(self, args: argparse.Namespace) -> None: 'v1', credentials=self.credentials, cache_discovery=False, - client_options=US_CENTRAL_CLIENT_OPTIONS).projects().builds() + client_options=REGIONAL_CLIENT_OPTIONS).projects().builds() self.storage_client = storage.Client(credentials=self.credentials) def _upload_to_gcs(self, local_file_path: str) -> str: @@ -143,6 +144,9 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, '/workspace:/workspace', '-v', '/var/run/docker.sock:/var/run/docker.sock', + '-e', + 'VERTEX_AI_LOCATIONS=' + + os.getenv("VERTEX_AI_LOCATIONS", ""), '--network=cloudbuild', # Built from this repo's `Dockerfile.cloudbuild-agent`. ('us-central1-docker.pkg.dev/oss-fuzz/oss-fuzz-gen/' @@ -192,10 +196,10 @@ def _request_cloud_build(self, ofg_repo_url: str, agent_dill_url: str, body=cloud_build_config).execute() build_id = build_info.get('metadata', {}).get('build', {}).get('id', '') - logging.info('Cloud Build ID: %s', build_id) + logging.info('Created Cloud Build ID %s at %s', build_id, REGION) return build_id - def _wait_for_build(self, build_id: str) -> bool: + def _wait_for_build(self, build_id: str) -> str: """Wait for a GCB build.""" prev_status = status = None while status in [None, 'WORKING', 'QUEUED']: @@ -205,11 +209,11 @@ def _wait_for_build(self, build_id: str) -> bool: if status != prev_status: logging.info('Cloud Build %s Status: %s', build_id, status) prev_status = status - time.sleep(60) # Avoid rate limiting. except (googleapiclient.errors.HttpError, BrokenPipeError) as e: - logging.error('Cloud build %s failed: %s', build_id, e) - return False - return status == 'SUCCESS' + logging.warning('Failed to check cloud build status %s: %s', build_id, + e) + time.sleep(60) # Avoid rate limiting. + return status or '' def _cancel_build(self, build_id: str) -> None: """Cancel a GCB build""" @@ -240,7 +244,7 @@ def _get_build_log(self, build_id: str) -> str: return log_content except NotFound as e: logging.error('Cloud build log %s not found: %s', log_file_uri, e) - return '' + return f'Cloud build log {log_file_uri} not found: {e}.' def _download_from_gcs(self, destination_file_name: str) -> None: """Downloads the result file from GCS.""" @@ -278,22 +282,33 @@ def run(self, agent: BaseAgent, result_history: list[Result], new_result_filename) # Step 4: Download new result dill. + cloud_build_log = '' new_result_dill = os.path.join(dill_dir, new_result_filename) try: - if self._wait_for_build(build_id): + cloud_build_final_status = self._wait_for_build(build_id) + if cloud_build_final_status == 'SUCCESS': self._download_from_gcs(new_result_dill) - except (KeyboardInterrupt, SystemExit): + else: + logging.error('Cloud build %s failed with status: %s', build_id, + cloud_build_final_status) + cloud_build_log += (f'Cloud build {build_id} failed with status: ' + f'{cloud_build_final_status}.\n') + except (KeyboardInterrupt, SystemExit) as e: self._cancel_build(build_id) - build_log = self._get_build_log(build_id) + logging.error('Cloud build %s cancled: %s', build_id, e) + cloud_build_log += f'Cloud build {build_id} cancled: {e}.\n' + + cloud_build_log += self._get_build_log(build_id) # Step 4: Deserialize dilld file. result = utils.deserialize_from_dill(new_result_dill) if not result: + cloud_build_log += f'Failed to deserialize from dill {new_result_dill}.\n' last_result = result_history[-1] result = Result(benchmark=last_result.benchmark, trial=last_result.trial, work_dirs=last_result.work_dirs, author=agent) - result.chat_history = {agent.name: build_log} + result.chat_history = {agent.name: cloud_build_log} return result diff --git a/experiment/oss_fuzz_checkout.py b/experiment/oss_fuzz_checkout.py index 17f37041bc..0263672c91 100644 --- a/experiment/oss_fuzz_checkout.py +++ b/experiment/oss_fuzz_checkout.py @@ -375,23 +375,57 @@ def prepare_build(project_name, sanitizer, generated_project): shutil.copy(original_dockerfile, dockerfile_to_use) -def _image_exists(image_name: str) -> bool: - """Checks if the given |image_name| exits.""" +def _image_exists_locally(image_name: str, project_name: str) -> bool: + """Checks if the given |image_name| exits locally.""" try: all_images = sp.run(['docker', 'images', '--format', '{{.Repository}}'], stdout=sp.PIPE, text=True, check=True).stdout.splitlines() + if image_name in all_images: + logger.info('Will use local cached images of %s: %s', project_name, + image_name) + return True except sp.CalledProcessError: - logger.info('Unable to list all docker images') + logger.warning('Unable to use local cached image of %s: %s', project_name, + image_name) + return False + + +def _image_exists_online(image_name: str, project_name: str) -> bool: + """Checks if the given |image_name| exits in the cloud registry.""" + online_image_name = _get_project_cache_image_name(project_name, 'address') + try: + sp.run(['docker', 'pull', online_image_name], + stdout=sp.PIPE, + text=True, + check=True) + logger.info('Pulled online cached images of %s: %s', project_name, + online_image_name) + sp.run([ + 'docker', 'run', '--entrypoint', '/usr/local/bin/recompile', + online_image_name + ], + stdout=sp.PIPE, + text=True, + check=True) + + sp.run(['docker', 'tag', online_image_name, image_name], + stdout=sp.PIPE, + text=True, + check=True) + logger.info('Will use online cached images: %s', project_name) + return True + except sp.CalledProcessError: + logger.warning('Unable to use online cached images: %s', project_name) return False - return image_name in all_images def prepare_project_image(project: str) -> str: """Prepares original image of the |project|'s fuzz target build container.""" image_name = f'gcr.io/oss-fuzz/{project}' - if _image_exists(image_name): + if (_image_exists_locally(image_name, project_name=project) or + _image_exists_online(image_name, project_name=project)): logger.info('Using existing project image for %s', project) return image_name logger.info('Unable to find existing project image for %s', project) diff --git a/llm_toolkit/models.py b/llm_toolkit/models.py index 14937f4bd8..0122d543b9 100644 --- a/llm_toolkit/models.py +++ b/llm_toolkit/models.py @@ -56,6 +56,8 @@ class LLM: # TODO(mihaimaruseac): Should this be MAX_TOKENS or a different global? context_window: int = 2000 # Default token size. + MAX_INPUT_TOKEN: int = sys.maxsize + _max_attempts = 5 # Maximum number of attempts to get prediction response def __init__( @@ -201,6 +203,13 @@ def _save_output(self, index: int, content: str, response_dir: str) -> None: with open(raw_output_path, 'w+') as output_file: output_file.write(content) + def truncate_prompt(self, + raw_prompt_text: Any, + extra_text: Any = None) -> Any: + """Truncates the prompt text to fit in MAX_INPUT_TOKEN.""" + del extra_text + return raw_prompt_text + @abstractmethod def get_chat_client(self, model: Any) -> Any: """Returns a new chat session.""" @@ -408,7 +417,13 @@ def prompt_type(self) -> type[prompts.Prompt]: def estimate_token_num(self, text) -> int: """Estimates the number of tokens in |text|.""" - # Roughly 1.5 tokens per word: + # A rough estimation for very large prompt: Gemini suggest 4 char per token, + # using 3 here to be safer. + text = text or '' + if len(text) // 3 > self.MAX_INPUT_TOKEN: + return len(text) // 3 + + # Otherwise, roughly 1.5 tokens per word: return int(len(re.split('[^a-zA-Z0-9]+', text)) * 1.5 + 0.5) # ============================== Generation ============================== # @@ -607,24 +622,62 @@ class GeminiV1D5(GeminiModel): class GeminiV1D5Chat(GeminiV1D5): """Gemini 1.5 for chat session.""" name = 'vertex_ai_gemini-1-5-chat' + _vertex_ai_model = 'gemini-1.5-pro-002' + + # Avoids sending large prompts. + MAX_INPUT_TOKEN: int = 128000 # max 2000000 def get_chat_client(self, model: GenerativeModel) -> Any: return model.start_chat(response_validation=False) - @retryable(exceptions=[ - GoogleAPICallError, - InvalidArgument, - ], - other_exceptions={ResourceExhausted: 100}) + @retryable( + exceptions=[ + GoogleAPICallError, + InvalidArgument, + ValueError, # TODO(dongge): Handle RECITATION specifically. + IndexError, # A known error from vertexai. + ], + other_exceptions={ResourceExhausted: 100}) def _do_generate(self, client: ChatSession, prompt: str, config: dict[str, Any]) -> Any: """Generates chat response.""" logger.info('%s generating response with config: %s', self.name, config) - return client.send_message( - prompt, - stream=False, - generation_config=config, - safety_settings=self.safety_config).text # type: ignore + try: + return client.send_message( + prompt, + stream=False, + generation_config=config, + safety_settings=self.safety_config).text # type: ignore + except Exception as e: + logger.error('%s failed to generated response: %s; Config: %s', e, + self.name, config) + return '' + + def truncate_prompt(self, + raw_prompt_text: Any, + extra_text: Any = None) -> Any: + """Truncates the prompt text to fit in MAX_INPUT_TOKEN.""" + original_token_count = self.estimate_token_num(raw_prompt_text) + + token_count = original_token_count + if token_count > self.MAX_INPUT_TOKEN: + raw_prompt_text = raw_prompt_text[-3 * self.MAX_INPUT_TOKEN:] + + extra_text_token_count = self.estimate_token_num(extra_text) + # Reserve 10000 tokens for raw prompt wrappers. + max_raw_prompt_token_size = (self.MAX_INPUT_TOKEN - extra_text_token_count - + 10000) + + while token_count > max_raw_prompt_token_size: + estimate_truncate_size = int( + (1 - max_raw_prompt_token_size / token_count) * len(raw_prompt_text)) + raw_prompt_text = raw_prompt_text[estimate_truncate_size + 1:] + + token_count = self.estimate_token_num(raw_prompt_text) + logger.warning('Truncated raw prompt from %d to %d tokens:', + original_token_count, token_count) + + return raw_prompt_text def chat_llm(self, client: ChatSession, prompt: prompts.Prompt) -> str: if self.ai_binary: diff --git a/logger.py b/logger.py index 0ae3a18be8..864ce15f5f 100644 --- a/logger.py +++ b/logger.py @@ -49,11 +49,14 @@ def write_result(self, result_status_dir: str, result: Result) -> None: def write_chat_history(self, result: Result) -> None: """Writes fuzz target.""" # TODO(dongge): Find a proper way to write this. - fuzz_target_path = os.path.join(result.work_dirs.base, 'prompt.txt') + trial_result_dir = os.path.join(result.work_dirs.status, + f'{result.trial:02d}') + os.makedirs(trial_result_dir, exist_ok=True) + chat_history_path = os.path.join(trial_result_dir, 'log.txt') chat_history = '\n'.join( f'{agent_name}\n{chat_history}\n' for agent_name, chat_history in result.chat_history.items()) - self.write_to_file(fuzz_target_path, chat_history) + self.write_to_file(chat_history_path, chat_history) def debug(msg: object, diff --git a/results.py b/results.py index c0ec96e56e..1abe4339d5 100644 --- a/results.py +++ b/results.py @@ -56,6 +56,7 @@ class BuildResult(Result): compiles: bool # Build success/failure. compile_error: str # Build error message. compile_log: str # Build full output. + is_function_referenced: bool # Fuzz target references function-under-test. def __init__(self, benchmark: Benchmark, @@ -64,6 +65,7 @@ def __init__(self, compiles: bool = False, compile_error: str = '', compile_log: str = '', + is_function_referenced: bool = False, fuzz_target_source: str = '', build_script_source: str = '', author: Any = None, @@ -73,17 +75,19 @@ def __init__(self, self.compiles = compiles self.compile_error = compile_error self.compile_log = compile_log + self.is_function_referenced = is_function_referenced def to_dict(self) -> dict: return super().to_dict() | { - 'compiles': self.compiles, + 'compiles': self.success, 'compile_error': self.compile_error, 'compile_log': self.compile_log, + 'is_function_referenced': self.is_function_referenced, } @property def success(self): - return self.compiles + return self.compiles and self.is_function_referenced class RunResult(BuildResult): @@ -110,6 +114,7 @@ def __init__( compiles: bool = False, compile_error: str = '', compile_log: str = '', + is_function_referenced: bool = False, crashes: bool = False, # Runtime crash. run_error: str = '', # Runtime crash error message. run_log: str = '', # Full fuzzing output. @@ -128,8 +133,8 @@ def __init__( author: Any = None, chat_history: Optional[dict] = None) -> None: super().__init__(benchmark, trial, work_dirs, compiles, compile_error, - compile_log, fuzz_target_source, build_script_source, - author, chat_history) + compile_log, is_function_referenced, fuzz_target_source, + build_script_source, author, chat_history) self.crashes = crashes self.run_error = run_error self.run_log = run_log diff --git a/stage/execution_stage.py b/stage/execution_stage.py index 0030222c85..cc394eafce 100644 --- a/stage/execution_stage.py +++ b/stage/execution_stage.py @@ -115,10 +115,11 @@ def execute(self, result_history: list[Result]) -> Result: fuzz_target_source=last_result.fuzz_target_source, build_script_source=last_result.build_script_source, chat_history=last_result.chat_history, - author=self, + author=repr(self), compiles=last_result.compiles, compile_error=last_result.compile_error, compile_log=last_result.compile_log, + is_function_referenced=last_result.is_function_referenced, crashes=run_result.crashes, run_error=run_result.crash_info, run_log=run_result.log_path, @@ -134,15 +135,17 @@ def execute(self, result_history: list[Result]) -> Result: total_pcs=run_result.total_pcs) except Exception as e: self.logger.error('Exception %s occurred on %s', e, last_result) - runresult = RunResult(benchmark=benchmark, - trial=last_result.trial, - work_dirs=last_result.work_dirs, - fuzz_target_source=last_result.fuzz_target_source, - build_script_source=last_result.build_script_source, - chat_history=last_result.chat_history, - author=self, - compiles=last_result.compiles, - compile_error=last_result.compile_error, - compile_log=last_result.compile_log) + runresult = RunResult( + benchmark=benchmark, + trial=last_result.trial, + work_dirs=last_result.work_dirs, + fuzz_target_source=last_result.fuzz_target_source, + build_script_source=last_result.build_script_source, + chat_history=last_result.chat_history, + author=repr(self), + compiles=last_result.compiles, + compile_error=last_result.compile_error, + compile_log=last_result.compile_log, + is_function_referenced=last_result.is_function_referenced) return runresult diff --git a/tool/container_tool.py b/tool/container_tool.py index 5781aad2ea..e4bfefd0ff 100644 --- a/tool/container_tool.py +++ b/tool/container_tool.py @@ -30,27 +30,48 @@ def _prepare_project_image(self) -> str: return image_name raise Exception(f'Failed to build image for {self.benchmark.project}') - def _execute_command(self, - command: list[str], - in_container: bool = False) -> sp.CompletedProcess: + def _execute_command_in_container(self, + command: list[str]) -> sp.CompletedProcess: """Executes the |command| in subprocess and log output.""" - result = sp.run(command, - stdout=sp.PIPE, - stderr=sp.PIPE, - check=False, - text=True) + try: + result = sp.run(command, + stdout=sp.PIPE, + stderr=sp.PIPE, + check=False, + text=True, + encoding='utf-8', + errors='ignore') - if in_container: logger.debug( 'Executing command (%s) in container %s: Return code %d. STDOUT: %s, ' 'STDERR: %s', command, self.container_id, result.returncode, result.stdout, result.stderr) - else: + return result + except Exception as e: + logger.error( + 'Executing command (%s) in container failed with Exception: %s', + command, e) + return sp.CompletedProcess(command, returncode=1, stdout='', stderr='') + + def _execute_command(self, command: list[str]) -> sp.CompletedProcess: + """Executes the |command| in subprocess and log output.""" + try: + result = sp.run(command, + stdout=sp.PIPE, + stderr=sp.PIPE, + check=False, + text=True, + encoding='utf-8', + errors='ignore') + logger.debug( - 'Executing command (%s): Return code %d. STDOUT: %s, ' - 'STDERR: %s', command, result.returncode, result.stdout, - result.stderr) - return result + 'Executing command (%s): Return code %d. STDOUT: %s, STDERR: %s', + command, result.returncode, result.stdout, result.stderr) + return result + except Exception as e: + logger.error('Executing command (%s) failed with Exception: %s', command, + e) + return sp.CompletedProcess(command, returncode=1, stdout='', stderr='') def _start_docker_container(self) -> str: """Runs the project's OSS-Fuzz image as a background container and returns @@ -60,6 +81,8 @@ def _start_docker_container(self) -> str: f'FUZZING_LANGUAGE={self.benchmark.language}', self.image_name ] result = self._execute_command(run_container_command) + if result.returncode: + logger.error('Failed to start container of image: %s', self.image_name) container_id = result.stdout.strip() return container_id @@ -69,10 +92,25 @@ def execute(self, command: str) -> sp.CompletedProcess: execute_command_in_container = [ 'docker', 'exec', self.container_id, '/bin/bash', '-c', command ] - process = self._execute_command(execute_command_in_container, True) + process = self._execute_command_in_container(execute_command_in_container) process.args = command return process + def compile(self, + use_recompile: bool = True, + extra_commands: str = '') -> sp.CompletedProcess: + """Compiles or recompiles the fuzz target.""" + if use_recompile: + logger.info('Will attempt to use recompile') + self.execute( + '[ -f /usr/local/bin/recompile ] && echo "Will use recompile" ' + '&& mv /usr/local/bin/recompile /usr/local/bin/compile') + else: + logger.info('Will use the original compile') + + command = 'compile > /dev/null' + extra_commands + return self.execute(command) + def terminate(self) -> bool: """Terminates the container.""" terminate_container_command = ['docker', 'stop', self.container_id] diff --git a/utils.py b/utils.py index 4717377da1..af7df201cd 100644 --- a/utils.py +++ b/utils.py @@ -68,16 +68,18 @@ def wrapper(*args, **kwargs): # Expected exceptions and their subclass. num_attempts = next( (attempts for exc_type, attempts in exception_config.items() - if isinstance(e, exc_type)), 1) + if type(e) is exc_type), 1) # pylint: disable=unidiomatic-typecheck logging.error( - 'Exception %s on function %s(args=%s, kwargs=%s), attempt %d/%d', - e, func.__name__, args, kwargs, attempt, num_attempts) + 'Exception %s (%s) on function %s(args=%s, kwargs=%s), attempt ' + '%d/%d', type(e), e, func.__name__, args, kwargs, attempt, + num_attempts) if attempt >= num_attempts: logging.error( 'Max attempts %d/%d reached for %s(args=%s, kwargs=%s) due to ' - '%s', attempt, num_attempts, func.__name__, args, kwargs, e) + '%s (%s)', attempt, num_attempts, func.__name__, args, kwargs, + type(e), e) raise attempt += 1