Skip to content

Commit

Permalink
Smith: Write a custom mutator
Browse files Browse the repository at this point in the history
Hopefully this will be even more effective at discovering interesting
smith programs, and thereby interesting RCL programs.
  • Loading branch information
ruuda committed Apr 21, 2024
1 parent 01d1af3 commit f8872cf
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 3 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ cargo-fuzz = true
[dependencies]
arbitrary = "1.3.0"
libfuzzer-sys = "0.4"
nanorand = "0.7.0"
rcl = { path = ".." }
serde_json = "1.0.114"
tree-sitter = "0.20.10"
Expand Down
157 changes: 156 additions & 1 deletion fuzz/fuzz_targets/fuzz_smith.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@

#![no_main]

use libfuzzer_sys::fuzz_target;
use libfuzzer_sys::{fuzz_mutator, fuzz_target};
use nanorand::{Rng, WyRand};
use rcl::eval::Evaluator;
use rcl::loader::{Loader, VoidFilesystem};
use rcl::tracer::VoidTracer;
Expand All @@ -41,3 +42,157 @@ fuzz_target!(|input: SynthesizedProgram| {
let mut value_env = rcl::runtime::prelude();
let _ = evaluator.eval_doc(&mut type_env, &mut value_env, doc);
});

struct Mutator<'a> {
data: &'a mut [u8],
size: usize,
max_size: usize,
rng: WyRand,
}

impl<'a> Mutator<'a> {
/// Return the byte offset of an arbitrary instruction in the buffer.
fn gen_instruction_index(&mut self) -> usize {
// Subtract 1 so we are sure to have an index of a full 2-byte instruction,
// not a trailing 1-byte leftover.
let i = std::cmp::min(self.size - 1, self.max_size - 1) / 2;
self.rng.generate_range(0..i) * 2
}

/// Return an arbitrary index into the data buffer.
fn gen_data_index(&mut self) -> usize {
// Bias indices towards the end of the data; the instructions are at the
// start and auxiliary data is at the end. Instructions are 2 bytes, so
// if we delete one byte in the middle then the part after it becomes
// meaningless (they might still be valid instructions, but it's not a
// small mutation). We should have more luck deleting in e.g. a string
// at the end.
let n = std::cmp::min(self.size, self.max_size);
match self.rng.generate_range(0..3) {
0 => n - 1,
1 => self.rng.generate_range((n / 2)..n),
2 => self.rng.generate_range(0..n),
_ => unreachable!(),
}
}

/// Generate a random valid opcode.
fn gen_opcode(&mut self) -> u8 {
loop {
let opcode: u8 = self.rng.generate();
if rcl_fuzz::smith::parse_opcode(opcode).is_some() {
return opcode;
}
}
}

/// Generate an instruction argument.
fn gen_argument(&mut self) -> u8 {
// We bias the argument towards smaller numbers, because often they are
// lengths or indexes into the stack, and those are all small.
match self.rng.generate_range(0..4) {
0 => 0,
1 => 1,
2 => self.rng.generate_range(0..10),
3 => self.rng.generate(),
_ => unreachable!(),
}
}

fn mutate(&mut self) {
match self.rng.generate_range(0..9) {
0 => self.insert_instruction(),
1 => self.remove_instruction(),
2 => self.replace_instruction(),
3 => self.swap_instructions(),
4 => self.increment_argument(),
5 => self.decrement_argument(),
6 => self.replace_argument(),
7 => self.append_byte(),
8 => self.remove_byte(),
_ => unreachable!(),
}
}

fn insert_instruction(&mut self) {
let i = self.gen_instruction_index();

// Move everything behind the insertion place one instruction ahead.
self.data.copy_within(i..self.data.len() - 2, i + 2);

// Then insert the new instruction.
self.data[i] = self.gen_opcode();
self.data[i + 1] = self.gen_argument();
self.size += 2;
}

fn remove_instruction(&mut self) {
let i = self.gen_instruction_index();

// Move everything back one place.
self.data.copy_within(i + 2.., i);
self.size -= 2;
}

fn replace_instruction(&mut self) {
let i = self.gen_instruction_index();
self.data[i] = self.gen_opcode();
self.data[i + 1] = self.gen_argument();
}

fn swap_instructions(&mut self) {
let i = self.gen_instruction_index();
let j = self.gen_instruction_index();
self.data.swap(i, j);
self.data.swap(i + 1, j + 1);
}

fn increment_argument(&mut self) {
let i = self.gen_instruction_index();
self.data[i + 1] = self.data[i + 1].saturating_add(1);
}

fn decrement_argument(&mut self) {
let i = self.gen_instruction_index();
self.data[i + 1] = self.data[i + 1].saturating_sub(1);
}

fn replace_argument(&mut self) {
let i = self.gen_instruction_index();
self.data[i + 1] = self.gen_argument();
}

fn append_byte(&mut self) {
if self.size >= self.data.len() || self.max_size >= self.data.len() {
return;
}
// Bias values towards 0 or printable ASCII, the auxiliary data at the
// end is often used for indices or strings.
let b = match self.rng.generate_range(0..2) {
0 => 0,
1 => self.rng.generate_range(0x20..0x7f),
2 => self.rng.generate(),
_ => unreachable!(),
};
self.data[self.size] = b;
self.size += 1;
}

fn remove_byte(&mut self) {
let i = self.gen_data_index();
self.data.copy_within(i + 1.., i);
self.size -= 1;
}
}

fuzz_mutator!(|data: &mut [u8], size: usize, max_size: usize, seed: u32| {
let rng = WyRand::new_seed(seed as u64);
let mut mutator = Mutator {
data,
size,
max_size,
rng,
};
mutator.mutate();
mutator.size
});
4 changes: 2 additions & 2 deletions fuzz/src/smith.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ macro_rules! define_ops {
$( #[doc = $doc] $name = $opcode ),+
}

fn parse_op(opcode: u8) -> Option<Op> {
pub fn parse_opcode(opcode: u8) -> Option<Op> {
match opcode {
$( $opcode => Some(Op::$name), )+
_ => None,
Expand Down Expand Up @@ -264,7 +264,7 @@ impl<'a> ProgramBuilder<'a> {
let n = self.input[self.head + 1];
self.head += 2;

let op = match parse_op(op_byte) {
let op = match parse_opcode(op_byte) {
None => return true,
Some(op) => op,
};
Expand Down

0 comments on commit f8872cf

Please sign in to comment.