Skip to content

Commit

Permalink
[JVM] Add retry logic for no coverage gain (#742)
Browse files Browse the repository at this point in the history
This PR relocate the calculation of coverage total and coverage diff
into the checking and retry loop. This fixes can then use the lively
caluclated coverage diff to determine if the successfully build and run
harness increase the project coverage or not. If it does not increase
the project coverage, use the new retry logic prompts to ask LLM to help
fixing the harness. The fixing of errors in generated harness as well as
harness with no coverage increase are count together. After this fix, it
increase the rate of successfully build and run for generated harness
and make sure more of them could have help improveing project coverage.

*Remark, this coverage feedback approach is currently only work
exclusively for JVM projects.*

---------

Signed-off-by: Arthur Chan <[email protected]>
  • Loading branch information
arthurscchan authored Dec 7, 2024
1 parent 1eb15f3 commit fe43b0c
Show file tree
Hide file tree
Showing 6 changed files with 199 additions and 54 deletions.
6 changes: 4 additions & 2 deletions data_prep/introspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,10 @@ def query_introspector_public_classes(project: str) -> list[str]:
return _get_data(resp, 'classes', [])


def query_introspector_source_code(project: str, filepath: str, begin_line: int,
end_line: int) -> str:
def query_introspector_source_code(project: str,
filepath: str,
begin_line: int = 0,
end_line: int = 10000) -> str:
"""Queries FuzzIntrospector API for source code of a
file |filepath| between |begin_line| and |end_line|."""

Expand Down
106 changes: 61 additions & 45 deletions experiment/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,18 +283,23 @@ def _fix_generated_fuzz_target(self, ai_binary: str,
run_result: Optional[RunResult],
dual_logger: _Logger, language: str):
"""Fixes the generated fuzz target."""
if build_result.succeeded and not language == 'jvm':
if run_result:
error_desc, errors = run_result.semantic_check.get_error_info()
jvm_coverage_fix = False
error_desc, errors = '', []
if build_result.succeeded:
if language == 'jvm':
jvm_coverage_fix = True
else:
dual_logger.log(f'Warning: Build succeed but no run_result in '
f'{generated_oss_fuzz_project}.')
error_desc, errors = '', []
if run_result:
error_desc, errors = run_result.semantic_check.get_error_info()
else:
dual_logger.log(f'Warning: Build succeed but no run_result in '
f'{generated_oss_fuzz_project}.')
else:
error_desc, errors = None, build_result.errors

code_fixer.llm_fix(ai_binary, target_path, self.benchmark, iteration,
error_desc, errors, self.builder_runner.fixer_model_name,
language)
language, jvm_coverage_fix)
shutil.copyfile(
target_path,
os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, 'projects',
Expand Down Expand Up @@ -388,9 +393,57 @@ def check_target(self, ai_binary, target_path: str) -> Result:
build_result = BuildResult()
run_result = None

# 2. Calculate coverage percentage and coverage diff
coverage_summary = None
total_lines = 0
coverage_percent = 0.0
coverage_diff = 0.0
if run_result:
# Gets line coverage (diff) details.
coverage_summary = self._load_existing_coverage_summary()

if self.benchmark.language in ['python', 'jvm'] and run_result.coverage:
# The Jacoco.xml coverage report used to generate summary.json on
# OSS-Fuzz for JVM projects does not trace the source file location.
# Thus the conversion may miss some classes because they are not
# present during coverage report generation. This fix gets the total
# line calculation from the jacoco.xml report of the current run
# directly and compares it with the total_lines retrieved from
# summary.json. Then the larger total_lines is used which is assumed
# to be more accurate. This is the same case for python project which
# the total line is determined from the all_cov.json file.
total_lines = run_result.coverage.total_lines
elif coverage_summary:
total_lines = compute_total_lines_without_fuzz_targets(
coverage_summary, generated_target_name)
else:
total_lines = 0

if run_result.total_pcs:
coverage_percent = run_result.cov_pcs / run_result.total_pcs
else:
dual_logger.log(
f'Warning: total_pcs == 0 in {generated_oss_fuzz_project}.')
coverage_percent = 0.0

existing_textcov = self.load_existing_textcov()
if run_result.coverage:
run_result.coverage.subtract_covered_lines(existing_textcov)

if total_lines and run_result.coverage:
coverage_diff = run_result.coverage.covered_lines / total_lines
else:
dual_logger.log(
f'Warning: total_lines == 0 in {generated_oss_fuzz_project}.')
coverage_diff = 0.0

if self.benchmark.language == 'jvm':
# Unexpected exceptions that crash JVM fuzzers does not need to be fixed.
# For JVM, the generation is consider success if either is true
# 1) Build success and run crashed (expected for exceptions)
# 2) Build success, run success and coverage diff > 0
gen_succ = build_result.succeeded and run_result
if gen_succ and run_result and run_result.succeeded:
gen_succ = gen_succ and (coverage_diff > 0)
else:
gen_succ = build_result.succeeded and run_result and run_result.succeeded

Expand Down Expand Up @@ -465,43 +518,6 @@ def check_target(self, ai_binary, target_path: str) -> Result:
run_result.coverage_report_path, run_result.reproducer_path,
True, run_result.semantic_check.type, run_result.triage))

# Gets line coverage (diff) details.
coverage_summary = self._load_existing_coverage_summary()

if self.benchmark.language in ['python', 'jvm']:
# The Jacoco.xml coverage report used to generate summary.json on OSS-Fuzz
# for JVM projects does not trace the source file location. Thus the
# conversion may miss some classes because they are not present during
# coverage report generation. This fix gets the total line calculation
# from the jacoco.xml report of the current run directly and compares it
# with the total_lines retrieved from summary.json. Then the larger
# total_lines is used which is assumed to be more accurate.
# This is the same case for python project which the total line
# is determined from the all_cov.json file.
total_lines = run_result.coverage.total_lines
elif coverage_summary:
total_lines = compute_total_lines_without_fuzz_targets(
coverage_summary, generated_target_name)
else:
total_lines = 0

if run_result.total_pcs:
coverage_percent = run_result.cov_pcs / run_result.total_pcs
else:
dual_logger.log(
f'Warning: total_pcs == 0 in {generated_oss_fuzz_project}.')
coverage_percent = 0.0

existing_textcov = self.load_existing_textcov()
run_result.coverage.subtract_covered_lines(existing_textcov)

if total_lines:
coverage_diff = run_result.coverage.covered_lines / total_lines
else:
dual_logger.log(
f'Warning: total_lines == 0 in {generated_oss_fuzz_project}.')
coverage_diff = 0.0

dual_logger.log(
f'Result for {generated_oss_fuzz_project}: '
f'crashes={run_result.crashes}, coverage={coverage_percent} '
Expand Down
6 changes: 4 additions & 2 deletions llm_toolkit/code_fixer.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ def group_error_messages(error_lines: list[str]) -> list[str]:

def llm_fix(ai_binary: str, target_path: str, benchmark: benchmarklib.Benchmark,
llm_fix_id: int, error_desc: Optional[str], errors: list[str],
fixer_model_name: str, language: str) -> None:
fixer_model_name: str, language: str, jvm_cov_fix: bool) -> None:
"""Reads and fixes |target_path| in place with LLM based on |error_log|."""
fuzz_target_source_code = parser.parse_code(target_path)

Expand All @@ -385,6 +385,7 @@ def llm_fix(ai_binary: str, target_path: str, benchmark: benchmarklib.Benchmark,
prompt_path,
response_dir,
language,
jvm_cov_fix,
fixer_model_name,
temperature=0.5 - llm_fix_id * 0.04)

Expand Down Expand Up @@ -427,6 +428,7 @@ def apply_llm_fix(ai_binary: str,
prompt_path: str,
response_dir: str,
language: str,
jvm_cov_fix: bool,
fixer_model_name: str = models.DefaultModel.name,
temperature: float = 0.4):
"""Queries LLM to fix the code."""
Expand All @@ -440,7 +442,7 @@ def apply_llm_fix(ai_binary: str,
if language == 'jvm':
builder = prompt_builder.JvmErrorFixingBuilder(fixer_model, benchmark,
fuzz_target_source_code,
errors)
errors, jvm_cov_fix)
prompt = builder.build([], None, None)
prompt.save(prompt_path)
else:
Expand Down
39 changes: 35 additions & 4 deletions llm_toolkit/prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -1062,16 +1062,22 @@ def __init__(self,
benchmark: Benchmark,
generated_harness: str,
errors: list[str],
jvm_cov_fix: bool,
template_dir: str = DEFAULT_TEMPLATE_DIR):
super().__init__(model)
self._template_dir = template_dir
self.benchmark = benchmark
self.generated_harness = generated_harness
self.error_str = '\n'.join(errors)
self.jvm_cov_fix = jvm_cov_fix

# Load templates.
self.template_file = self._find_template(
template_dir, 'jvm_requirement_error_fixing.txt')
if self.jvm_cov_fix:
self.template_file = self._find_template(
template_dir, 'jvm_requirement_coverage_fixing.txt')
else:
self.template_file = self._find_template(
template_dir, 'jvm_requirement_error_fixing.txt')

def _find_template(self, template_dir: str, template_name: str) -> str:
"""Finds template file based on |template_dir|."""
Expand Down Expand Up @@ -1099,15 +1105,40 @@ def build(self,
with open(self.template_file, 'r') as f:
prompt_text = f.read()

proj = self.benchmark.project

# Format the repository
target_repository = oss_fuzz_checkout.get_project_repository(
self.benchmark.project)
prompt_text = prompt_text.replace('{TARGET_REPO}', target_repository)
prompt_text = prompt_text.replace('{HARNESS_NAME}',
self.benchmark.target_name)

# Add the generated harness and error string to prompt
# Add the generated harness to prompt
prompt_text = prompt_text.replace('{GENERATED_HARNESS}',
self.generated_harness)
prompt_text = prompt_text.replace('{ERRORS}', self.error_str)

if self.jvm_cov_fix:
# Add source code of all existing harnesses to prompt
source_list = []
harnesses = introspector.query_introspector_for_harness_intrinsics(proj)
for pair in harnesses:
path = pair.get('source', '')
if path:
source = introspector.query_introspector_source_code(proj, path)
if source:
source_list.append(source)

prompt_text = prompt_text.replace('{EXISTING_HARNESS}',
'\n---\n'.join(source_list))

# Add all public candidates to prompt
methods = introspector.query_introspector_jvm_all_public_candidates(proj)
name = [method['function_name'] for method in methods]
prompt_text = prompt_text.replace('{PUBLIC_METHODS}', ','.join(name))
else:
# Add the error string to prompt
prompt_text = prompt_text.replace('{ERRORS}', self.error_str)

self._prompt.add_priming(prompt_text)
return self._prompt
Expand Down
94 changes: 94 additions & 0 deletions prompts/template_xml/jvm_requirement_coverage_fixing.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
I'm a security engineer looking to write good fuzzing harnesses. I want you help me improve my fuzzing harness so it could covers more part of the code.

The target library is {TARGET_REPO}.

The target project is implemented in the Java programming language; therefore, the harness should also be written in Java.
The fuzzing harness must be executable within the Jazzer fuzzing framework.

Below is the source code of the target fuzzing harness that I would like to improve:
<code>
{GENERATED_HARNESS}
</code>

For reference, the source code for all existing harnesses of the project is provided below, separated by `---`:
<code>
{EXISTING_HARNESS}
</code>

Additionally, a list of all public methods and constructors of the project is included for your reference, you should try to expand the fuzzing harness that calls these targets to improve the overall fuzzing coverage:
{PUBLIC_METHODS}

Your task is to improve the target fuzzing harness provided above to increase code coverage for additional parts of the project that are not covered by the existing fuzzing harnesses. Please ensure that the changes made are minimal.
In your response, include ONLY the code for the harness, nothing more. You should wrap the code in <code></code> tags.

Here is an additional list of requirements that you MUST follow.
<requirements>
<item>NEVER use any methods from the <code>java.lang.Random</code> class in the generated code.</item>
<item>NEVER use any classes or methods in the <code>java.lang.reflect</code> package in the generated code.</item>
<item>NEVER use the @FuzzTest annotation for specifying the fuzzing method.</item>
<item>NEVER use any assert, printing and logging statements in the generated harness.</item>
<item>NEVER use any multithreading or multi-processing approach.</item>
<item>You MUST create the object before calling the target method.</item>
<item>Please use {HARNESS_NAME} as the Java class name.</item>
<item>You MUST invoke the close method of any resource class objects that implements the java.lang.AutoCloseable interface in the finally block after the target method is invoked.</item>
<item>Always create the fuzzing harness from the following templates:
<code>
import com.code_intelligence.jazzer.api.FuzzedDataProvider;
// Other imports

public class {HARNESS_NAME} {
public static void fuzzerInitialize() {
// Initializing objects for fuzzing
}

public static void fuzzerTearDown() {
// Tear down objects after fuzzing
}

public static void fuzzerTestOneInput(FuzzedDataProvider data) {
// Use the FuzzedDataProvider object to generate random data for fuzzing

// Fuzz by invoking the target method with random parameters / objects generated above.
}
}
</code></item>
<item>
You MUST ONLY use any of the following methods from the FuzzedDataProvider of the Jazzer framework for generating random data for fuzzing.
If the needed return value is not found in the table, try use constructors or methods to create the needed random object. But you MUST try your best to randomise the random object with the methods in the table.

| Method | Return Value |
|---------------------------------------------|---------------------------------------|
| `consumeBytes(int length)` | `byte[]` |
| `consumeRemainingAsBytes()` | `byte[]` |
| `consumeString(int length)` | `String` |
| `consumeRemainingAsString()` | `String` |
| `consumeBoolean()` | `boolean` |
| `consumeInt(int min, int max)` | `int` |
| `consumeInt()` | `int` |
| `consumeLong(long min, long max)` | `long` |
| `consumeLong()` | `long` |
| `consumeFloat(float min, float max)` | `float` |
| `consumeFloat()` | `float` |
| `consumeDouble(double min, double max)` | `double` |
| `consumeDouble()` | `double` |
| `consumeChar()` | `char` |
| `consumeChar(char min, char max)` | `char` |
| `consumeShort(short min, short max)` | `short` |
| `consumeShort()` | `short` |
| `consumeRemainingAsCharSequence()` | `CharSequence` |
| `consumeBytestring()` | `byte[]` |
| `consumeBigInteger(int minNumBits)` | `BigInteger` |
| `consumeEnum(Class<E> enumType)` | `E` (Enum type) |
| `consumeProbabilityDouble()` | `double` |
| `consumeFraction()` | `double` |
| `pickValue(T... values)` | `T` (Type of value) |
| `pickValue(List<T> values)` | `T` (Type of value) |
| `consumeByte()` | `byte` |
| `consumeIntList(int length)` | `List<Integer>` |
| `consumeLongList(int length)` | `List<Long>` |
| `consumeFloatList(int length)` | `List<Float>` |
| `consumeDoubleList(int length)` | `List<Double>` |
| `consumeCharList(int length)` | `List<Character>` |

</item>
</requirements>
2 changes: 1 addition & 1 deletion prompts/template_xml/jvm_requirement_error_fixing.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
I'm a security engineer looking to convert unit tests into fuzzing harnesses. I got some compilation errors and want you to help fix them.
I'm a security engineer looking to write a good fuzzing harnesses. I got some compilation errors and want you to help fix them.

The target library is {TARGET_REPO}.

Expand Down

0 comments on commit fe43b0c

Please sign in to comment.