Skip to content

Commit

Permalink
add fuzzer, fix bug
Browse files Browse the repository at this point in the history
  • Loading branch information
a10y committed Aug 15, 2024
1 parent b8f5b87 commit 417451e
Show file tree
Hide file tree
Showing 7 changed files with 204 additions and 13 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/fuzz.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Fuzz

on:
schedule:
- cron: "0 0 * * *" # daily
workflow_dispatch:

jobs:
fuzz:
name: "fuzz"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install cargo fuzz
run: cargo install cargo-fuzz
- name: Run fuzzing target
run: cargo fuzz run fuzz_compress -- -max_total_time=600
continue-on-error: true
- name: Archive crash artifacts
uses: actions/upload-artifact@v4
with:
name: fuzzing-crash-artifacts
path: fuzz/artifacts
- name: Archive fuzzing corpus
uses: actions/upload-artifact@v4
with:
name: fuzzing-corpus
path: fuzz/corpus
4 changes: 4 additions & 0 deletions fuzz/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
target
corpus
artifacts
coverage
70 changes: 70 additions & 0 deletions fuzz/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[package]
name = "fsst-rs-fuzz"
version = "0.0.0"
publish = false
edition = "2021"

[package.metadata]
cargo-fuzz = true

[dependencies]
libfuzzer-sys = "0.4"

[dependencies.fsst-rs]
path = ".."

[[bin]]
name = "fuzz_train"
path = "fuzz_targets/fuzz_train.rs"
test = false
doc = false
bench = false

[[bin]]
name = "fuzz_compress"
path = "fuzz_targets/fuzz_compress.rs"
test = false
doc = false
bench = false
8 changes: 8 additions & 0 deletions fuzz/fuzz_targets/fuzz_compress.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#![no_main]

use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &[u8]| {
let table = fsst_rs::train("the quick brown fox jumped over the lazy dog".as_bytes());
let _ = table.compress(data);
});
7 changes: 7 additions & 0 deletions fuzz/fuzz_targets/fuzz_train.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#![no_main]

use libfuzzer_sys::fuzz_target;

fuzz_target!(|data: &[u8]| {
let _ = fsst_rs::train(data);
});
72 changes: 59 additions & 13 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -386,10 +386,34 @@ impl SymbolTable {
remaining_bytes.is_positive(),
"in_ptr exceeded in_end, should not be possible"
);
let remaining_bytes = remaining_bytes as usize;

// Shift off the remaining bytes
let mut last_word = unsafe { (in_ptr as *const u64).read_unaligned() };
last_word = mask_prefix(last_word, remaining_bytes as usize);
// Read the remaining bytes
// Unroll and test multiple values being written here.
// let mut last_word = [0u8; 8];
// for i in 0..remaining_bytes {
// last_word[i as usize] = unsafe { in_ptr.byte_add(i as usize).read() };
// }

// Shift on the words from the remaining bytes.
// let mut last_word = unsafe { (in_ptr as *const u64).read_unaligned() };
// last_word = mask_prefix(last_word, remaining_bytes as usize);
// let mut last_word = u64::from_le_bytes(last_word);
let mut last_word = unsafe {
match remaining_bytes {
0 => 0,
1 => extract_u64::<1>(in_ptr),
2 => extract_u64::<2>(in_ptr),
3 => extract_u64::<3>(in_ptr),
4 => extract_u64::<4>(in_ptr),
5 => extract_u64::<5>(in_ptr),
6 => extract_u64::<6>(in_ptr),
7 => extract_u64::<7>(in_ptr),
8 => extract_u64::<8>(in_ptr),
_ => unreachable!("remaining bytes must be <= 8"),
}
};

while in_ptr < in_end && out_ptr < out_end {
unsafe {
Expand Down Expand Up @@ -466,17 +490,6 @@ impl SymbolTable {
}
}

/// Mask the word, keeping only the `prefix_bytes` front.
fn mask_prefix(word: u64, prefix_bytes: usize) -> u64 {
let mask = if prefix_bytes == 0 {
0
} else {
u64::MAX >> (8 * (8 - prefix_bytes))
};

word & mask
}

fn advance_8byte_word(word: u64, bytes: usize) -> u64 {
// shift the word off the right-end, because little endian means the first
// char is stored in the LSB.
Expand All @@ -499,3 +512,36 @@ fn compare_masked(left: u64, right: u64, ignored_bits: u16) -> bool {

(left & mask) == right
}

unsafe fn extract_u64<const N: usize>(ptr: *const u8) -> u64 {
match N {
1 => ptr.read() as u64,
2 => (ptr as *const u16).read_unaligned() as u64,
3 => {
let low = ptr.read() as u64;
let high = (ptr.byte_add(1) as *const u16).read_unaligned() as u64;
high << 8 | low
}
4 => {
return (ptr as *const u32).read_unaligned() as u64;
}
5 => {
let low = (ptr as *const u32).read_unaligned() as u64;
let high = ptr.byte_add(4).read() as u64;
high << 32 | low
}
6 => {
let low = (ptr as *const u32).read_unaligned() as u64;
let high = (ptr.byte_add(4) as *const u16).read_unaligned() as u64;
high << 32 | low
}
7 => {
let low = (ptr as *const u32).read_unaligned() as u64;
let mid = (ptr.byte_add(4) as *const u16).read_unaligned() as u64;
let high = ptr.byte_add(6).read() as u64;
(high << 48) | (mid << 32) | low
}
8 => (ptr as *const u64).read_unaligned() as u64,
_ => unreachable!("N must be <= 8"),
}
}

0 comments on commit 417451e

Please sign in to comment.