From 6cc0a1401fbc16a2c58c755df1e51fb40b82c150 Mon Sep 17 00:00:00 2001 From: Valentin Obst Date: Sun, 5 May 2024 23:12:09 +0200 Subject: [PATCH] WIP: bench: introduce initial benchmarking infrastructure To better understand the current performance characteristics and to guide future development, introduce microbenchmarks for various steps in the `cwe_checker`'s analysis. This commit adds benchmarks for the following steps: - CFG construction, - individual normalization passes, - function signatures analysis, - pointer inference, - string abstractions, - individual checkers. All benchmarks are executed on the following input programs: - ls, - netfs.ko, for the following architectures: - amd64, - arm64, - armel, - armhf, - mips64el, - mipsel, - ppc64el, - x86. Inputs are stored in the `benches/_data/` directory. It contains json-serialized pcode projects and binaries. The pcode projects are included for the following reasons: - not requiring a Ghidra installation on the benchmarking system, - avoid that changes in Ghidra version influence the benchmark results, - reduce the time it takes to run the benchmarks. The inputs are not included in the repository and have to be obtained from an external source. Since the binaries are taken from Debian packages it shouldn't be a problem to re-distribute them if we include the copyright information. This implementation is based on the Criterion.rs crate. It is added to the dev-dependencies of the `cwe_checker_lib` and the `Cargo.lock` is updated accordingly. Signed-off-by: Valentin Obst --- .dockerignore | 1 + .gitignore | 2 + Cargo.lock | 270 +++++++++ src/cwe_checker_lib/Cargo.toml | 8 + src/cwe_checker_lib/benches/_data/.gitkeep | 0 src/cwe_checker_lib/benches/benchmarks.rs | 667 +++++++++++++++++++++ 6 files changed, 948 insertions(+) create mode 100644 src/cwe_checker_lib/benches/_data/.gitkeep create mode 100644 src/cwe_checker_lib/benches/benchmarks.rs diff --git a/.dockerignore b/.dockerignore index 818d05c18..d5218654c 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,2 +1,3 @@ test/**/build target/ +src/cwe_checker_lib/benches/_data/ diff --git a/.gitignore b/.gitignore index 766ba9770..22097ca66 100644 --- a/.gitignore +++ b/.gitignore @@ -223,6 +223,8 @@ test/run_real_world_samples.sh doc/html test/artificial_samples/dockcross* +src/cwe_checker_lib/benches/_data/* +!src/cwe_checker_lib/benches/_data/.gitkeep .#* diff --git a/Cargo.lock b/Cargo.lock index bd251e93d..0c1694487 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -18,6 +18,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anyhow" version = "1.0.80" @@ -54,12 +60,51 @@ version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "clap" version = "4.0.32" @@ -113,6 +158,42 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + [[package]] name = "crossbeam-channel" version = "0.5.11" @@ -122,12 +203,37 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "cwe_checker" version = "0.9.0-dev" @@ -157,6 +263,7 @@ version = "0.9.0-dev" dependencies = [ "anyhow", "apint", + "criterion", "crossbeam-channel", "derive_more", "directories", @@ -165,6 +272,7 @@ dependencies = [ "goblin", "itertools", "nix", + "paste", "petgraph", "regex", "serde", @@ -264,6 +372,16 @@ dependencies = [ "scroll", ] +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", +] + [[package]] name = "hashbrown" version = "0.14.3" @@ -318,6 +436,15 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" +[[package]] +name = "js-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -381,12 +508,27 @@ dependencies = [ "pin-utils", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + [[package]] name = "option-ext" version = "0.2.0" @@ -399,6 +541,12 @@ version = "6.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + [[package]] name = "petgraph" version = "0.6.4" @@ -423,6 +571,34 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" +[[package]] +name = "plotters" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" + +[[package]] +name = "plotters-svg" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" +dependencies = [ + "plotters-backend", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -493,6 +669,26 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "rdrand" version = "0.4.0" @@ -711,6 +907,16 @@ dependencies = [ "syn 2.0.50", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "unicode-ident" version = "1.0.12" @@ -745,6 +951,70 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasm-bindgen" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.50", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.50", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" + +[[package]] +name = "web-sys" +version = "0.3.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" diff --git a/src/cwe_checker_lib/Cargo.toml b/src/cwe_checker_lib/Cargo.toml index 28f84b6d2..d5fcf2535 100644 --- a/src/cwe_checker_lib/Cargo.toml +++ b/src/cwe_checker_lib/Cargo.toml @@ -22,5 +22,13 @@ itertools = "0.10.3" gcd = "2.1.0" nix = "0.26.1" +[dev-dependencies] +criterion = { version = "0.5.1", features = ["html_reports"] } +paste = "1.0.14" + +[[bench]] +name = "benchmarks" +harness = false + [lib] name = "cwe_checker_lib" diff --git a/src/cwe_checker_lib/benches/_data/.gitkeep b/src/cwe_checker_lib/benches/_data/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/src/cwe_checker_lib/benches/benchmarks.rs b/src/cwe_checker_lib/benches/benchmarks.rs new file mode 100644 index 000000000..7d53cb90a --- /dev/null +++ b/src/cwe_checker_lib/benches/benchmarks.rs @@ -0,0 +1,667 @@ +//! Microbenchmarks for the `cwe_checker`. +//! +//! This module contains microbenchmarks for various steps in the `cwe_checker`. +//! Currently there are benchmarks for the following steps: +//! +//! - CFG construction, +//! - individual normalization passes, +//! - function signatures analysis, +//! - pointer inference, +//! - string abstractions, +//! - individual checkers. +//! +//! All benchmarks are executed on the following input programs: +//! +//! - ls, +//! - netfs.ko, +//! +//! for the following architectures: +//! +//! - amd64, +//! - arm64, +//! - armel, +//! - armhf, +//! - mipsel, +//! - ppc64el, +//! - x86. +//! +//! Inputs are stored in the `benches/_data/` directory. We include +//! json-serialized pcode projects and binaries. The pcode projects are +//! included for the following reasons: +//! +//! - not requiring a Ghidra installation on the benchmarking system, +//! - avoid that changes in Ghidra version influence the benchmark results, +//! - reduce the time it takes to run the benchmarks. +//! +//! # Getting the Inputs +//! +//! The input programs are not included in this repository. Before you can run +//! the benchmarks you need to download them. +//! +//! ``` +//! $ cd benches/_data/ +//! $ wget https://valentinobst.de/34defc254cb6f45ef074431465b7ecc614a6a87e97b13a5c7d0a113e4ed67c6b/cwe_checker_benches.tar.gz +//! $ sha256sum cwe_checker_benches.tar.gz +//! 34defc254cb6f45ef074431465b7ecc614a6a87e97b13a5c7d0a113e4ed67c6b cwe_checker_benches.tar.gz +//! $ tar xf cwe_checker_benches.tar.gz +//! $ rm cwe_checker_benches.tar.gz +//! ``` +//! +//! # Running the Benchmarks +//! +//! If you submit a PR that makes changes which might impact performance you are +//! encouraged to run these benchmarks. In this case, please report the relevant +//! changes between the current master and your code in the PR description. +//! If your PR adds code that is not currently benchmarked you are encouraged to +//! add a benchmark for it to this module. +//! +//! In general, absolute benchmark results are always tied to the system that +//! they were executed on. Thus, it only makes sense to report relative results +//! in your PR. To ensure that results are comparable between runs make +//! sure to follow the advice given below: +//! +//! - ensure that the system is calm, i.e., use a dedicated system and try +//! to shut off as many background processes as possible, +//! - disable adaptive CPU frequency scaling in the OS and UEFI firmware, +//! - connect laptops to the power supply. + +use std::fs; +use std::io::Read; +use std::iter; +use std::time; + +use criterion::{ + black_box, criterion_group, criterion_main, + measurement::{Measurement, WallTime}, + BatchSize, BenchmarkGroup, BenchmarkId, Criterion, SamplingMode, Throughput, +}; + +use cwe_checker_lib::analysis::{self, graph}; +use cwe_checker_lib::intermediate_representation::{ + propagate_control_flow, Project, RuntimeMemoryImage, +}; +use cwe_checker_lib::pipeline::AnalysisResults; +use cwe_checker_lib::utils; + +mod inputs { + //! Constants used to access inputs stored in `benches/_data`. + + pub const LS_PCODE_PROJECTS: [&str; 7] = [ + "amd64-ls_CB30D69B24245BF2", + "armel-ls_400B36192085C142", + "mipsel-ls_7CB1427659E706FB", + "x86-ls_AFE4E5F03F4CF0F7", + "arm64-ls_8D0A90D5AA1F9151", + "armhf-ls_1CE9F5077E5469C9", + "ppc64el-ls_0507CC2232E82FA9", + ]; + + pub const NETFS_PCODE_PROJECTS: [&str; 7] = [ + "amd64-netfs.ko_2968775E85859742", + "armhf-netfs.ko_B7FA86FF57F64C18", + "mips64r2el-netfs.ko_5331834BF22142BD", + "x86-netfs.ko_70E21F23852A0A0B", + "arm64-netfs.ko_91816E1342973AFA", + "mips32r2el-netfs.ko_6DF4CC2FD1E91EDC", + "powerpc64le-netfs.ko_332ECD2BFBEE0616", + ]; + + pub const LS_BINARIES: [&str; 7] = [ + "amd64-ls", + "armel-ls", + "mipsel-ls", + "x86-ls", + "arm64-ls", + "armhf-ls", + "ppc64el-ls", + ]; + + pub const NETFS_BINARIES: [&str; 7] = [ + "amd64-netfs.ko", + "arm64-netfs.ko", + "armhf-netfs.ko", + "mips32r2el-netfs.ko", + "mips64r2el-netfs.ko", + "powerpc64le-netfs.ko", + "x86-netfs.ko", + ]; +} + +mod helpers { + //! Helpers to get inputs and configurations. + + use super::*; + + const PREFIX: &str = "benches/_data/"; + const CONFIG: &str = "../config.json"; + + pub fn get_project_and_binary(pcode_project_json: &str, binary: &str) -> (Project, Vec) { + let mut pcode_project = String::new(); + fs::File::open(format!("{}{}", PREFIX, pcode_project_json)) + .expect("Could not open file.") + .read_to_string(&mut pcode_project) + .expect(""); + let pcode_project = serde_json::from_str(&pcode_project).expect(""); + let binary: Vec = fs::File::open(format!("{}{}", PREFIX, binary)) + .expect("") + .bytes() + .map(|x| x.unwrap()) + .collect(); + let mut project = + utils::ghidra::parse_pcode_project_to_ir_project(pcode_project, &binary, &None) + .expect("") + .0; + let mut runtime_memory_image = RuntimeMemoryImage::new(&binary).expect(""); + if project.program.term.address_base_offset != 0 { + runtime_memory_image.add_global_memory_offset(project.program.term.address_base_offset); + } + project.runtime_memory_image = runtime_memory_image; + + (project, binary) + } + + pub fn get_config() -> serde_json::Value { + let config_file = std::fs::read_to_string(CONFIG).unwrap(); + + serde_json::from_str(&config_file).unwrap() + } + + pub fn get_project(pcode_project_json: &str, binary: &str) -> Project { + get_project_and_binary(pcode_project_json, binary).0 + } +} + +mod checkers { + //! Benchmarks for individual checkers. + + use super::helpers::*; + use super::inputs::*; + use super::*; + use cwe_checker_lib::checkers::*; + + fn helper_bench_checker( + checker: &cwe_checker_lib::CweModule, + group: BenchmarkGroup, + is_lkm: bool, + ) where + T: Measurement, + { + let config = get_config(); + let bench_with_input_loop = + |pcode_projects: &[&str], binaries: &[&str], mut group: BenchmarkGroup| { + for (pcode_project_json, binary) in iter::zip(pcode_projects, binaries) { + let (mut project, binary) = get_project_and_binary(pcode_project_json, binary); + let _ = project.normalize(); + let cfg = graph::get_program_cfg(&project.program); + + let analysis_results = AnalysisResults::new(&binary, &cfg, &project); + + let (function_signatures, _) = analysis_results.compute_function_signatures(); + let analysis_results = + analysis_results.with_function_signatures(Some(&function_signatures)); + + let pi_result = + analysis_results.compute_pointer_inference(&config["Memory"], false); + let analysis_results = + analysis_results.with_pointer_inference(Some(&pi_result)); + + // Only CWE78 needs string abstractions and we do not + // benchmark this checker. + + group.throughput(Throughput::Elements(cfg.edge_count() as u64)); + group.bench_with_input( + BenchmarkId::from_parameter(pcode_project_json), + &analysis_results, + |b, analysis_results| { + b.iter_with_large_drop(|| { + (checker.run)(&analysis_results, &config[&checker.name]) + }) + }, + ); + } + + group.finish(); + }; + + if is_lkm { + bench_with_input_loop(&NETFS_PCODE_PROJECTS, &NETFS_BINARIES, group); + } else { + bench_with_input_loop(&LS_PCODE_PROJECTS, &LS_BINARIES, group); + } + } + + macro_rules! bench_checker { + ($c:ident) => { + bench_checker!(name = $c; samples = 100; time = 10); + }; + (name = $c:ident; samples = $s:expr; time = $t:expr) => { + ::paste::paste! { + pub fn [](c: &mut Criterion) { + let mut group_ls = c.benchmark_group(stringify!([])); + group_ls + .sample_size($s) + .warm_up_time(time::Duration::new(($t as u64).checked_div(2).unwrap(), 0)) + .measurement_time(time::Duration::new($t, 0)); + helper_bench_checker(&$c::CWE_MODULE, group_ls, false); + + if MODULES_LKM.contains( + &["CWE", stringify!($c).split("_").last().unwrap()] + .concat() + .as_str() + ) + { + let mut group_netfs = c.benchmark_group(stringify!([])); + group_netfs + .sample_size($s) + .warm_up_time(time::Duration::new(($t as u64).checked_div(2).unwrap(), 0)) + .measurement_time(time::Duration::new($t, 0)); + helper_bench_checker(&$c::CWE_MODULE, group_netfs, true); + } + } + } + }; + } + + bench_checker!(cwe_119); + bench_checker!(cwe_134); + bench_checker!(cwe_190); + bench_checker!(cwe_215); + bench_checker!(cwe_243); + bench_checker!( + name = cwe_252; + samples = 10; + time = 20 + ); + bench_checker!( + name = cwe_332; + samples = 1000; + time = 10 + ); + bench_checker!( + name = cwe_337; + samples = 1000; + time = 10 + ); + bench_checker!(cwe_367); + bench_checker!( + name = cwe_416; + samples = 10; + time = 20 + ); + bench_checker!(cwe_426); + bench_checker!(cwe_467); + bench_checker!( + name = cwe_476; + samples = 10; + time = 20 + ); + bench_checker!( + name = cwe_560; + samples = 1000; + time = 10 + ); + bench_checker!(cwe_676); + bench_checker!(cwe_782); + bench_checker!(cwe_789); +} + +mod core_analyses { + //! Benchmarks for function signatures, pointer inference and string + //! abstractions. + + use super::helpers::*; + use super::inputs::*; + use super::*; + + pub fn bench_function_signatures(c: &mut Criterion) { + let bench_with_input_loop = + |pcode_projects: &[&str], binaries: &[&str], mut group: BenchmarkGroup| { + for (pcode_project_json, binary) in iter::zip(pcode_projects, binaries) { + let (mut project, binary) = get_project_and_binary(pcode_project_json, binary); + let _ = project.normalize(); + let cfg = graph::get_program_cfg(&project.program); + + let analysis_results = AnalysisResults::new(&binary, &cfg, &project); + + group.throughput(Throughput::Elements(cfg.edge_count() as u64)); + group.bench_with_input( + BenchmarkId::from_parameter(pcode_project_json), + &analysis_results, + |b, analysis_results| { + b.iter_with_large_drop(|| { + analysis_results.compute_function_signatures() + }) + }, + ); + } + + group.finish(); + }; + + let mut group_ls = c.benchmark_group("ls_function_signatures"); + group_ls + .sampling_mode(SamplingMode::Flat) + .warm_up_time(time::Duration::new(60, 0)) + .measurement_time(time::Duration::new(120, 0)); + bench_with_input_loop(&LS_PCODE_PROJECTS, &LS_BINARIES, group_ls); + + let mut group_netfs = c.benchmark_group("netfs_function_signatures"); + group_netfs + .sampling_mode(SamplingMode::Flat) + .warm_up_time(time::Duration::new(30, 0)) + .measurement_time(time::Duration::new(60, 0)); + bench_with_input_loop(&NETFS_PCODE_PROJECTS, &NETFS_BINARIES, group_netfs); + } + + pub fn bench_pi(c: &mut Criterion) { + let bench_with_input_loop = + |pcode_projects: &[&str], binaries: &[&str], mut group: BenchmarkGroup| { + let config = get_config(); + for (pcode_project_json, binary) in iter::zip(pcode_projects, binaries) { + let (mut project, binary) = get_project_and_binary(pcode_project_json, binary); + let _ = project.normalize(); + let cfg = graph::get_program_cfg(&project.program); + + let analysis_results = AnalysisResults::new(&binary, &cfg, &project); + + let (function_signatures, _) = analysis_results.compute_function_signatures(); + let analysis_results = + analysis_results.with_function_signatures(Some(&function_signatures)); + + group.throughput(Throughput::Elements(cfg.edge_count() as u64)); + // should be `bench_with_input`, workaround due to lifetime hell. + // Can not reproduce: + // https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=63c6a380d2e98e8e3f7ec28d393ff8c2 + group.bench_function(BenchmarkId::new("pi", pcode_project_json), |b| { + b.iter_with_large_drop(|| { + analysis_results + .compute_pointer_inference(&config["Memory"], black_box(false)) + }) + }); + } + + group.finish(); + }; + + let mut group_ls = c.benchmark_group("ls_pi"); + group_ls + .sampling_mode(SamplingMode::Flat) + .warm_up_time(time::Duration::new(30, 0)) + .measurement_time(time::Duration::new(60, 0)); + bench_with_input_loop(&LS_PCODE_PROJECTS, &LS_BINARIES, group_ls); + + let mut group_netfs = c.benchmark_group("netfs_pi"); + group_netfs + .sampling_mode(SamplingMode::Flat) + .warm_up_time(time::Duration::new(20, 0)) + .measurement_time(time::Duration::new(30, 0)); + bench_with_input_loop(&NETFS_PCODE_PROJECTS, &NETFS_BINARIES, group_netfs); + } + + pub fn bench_string_abstractions(c: &mut Criterion) { + let bench_with_input_loop = + |pcode_projects: &[&str], binaries: &[&str], mut group: BenchmarkGroup| { + let config = get_config(); + for (pcode_project_json, binary) in iter::zip(pcode_projects, binaries) { + let (mut project, binary) = get_project_and_binary(pcode_project_json, binary); + let _ = project.normalize(); + let cfg = graph::get_program_cfg(&project.program); + + let analysis_results = AnalysisResults::new(&binary, &cfg, &project); + + let (function_signatures, _) = analysis_results.compute_function_signatures(); + let analysis_results = + analysis_results.with_function_signatures(Some(&function_signatures)); + + let pi_result = + analysis_results.compute_pointer_inference(&config["Memory"], false); + let analysis_results = + analysis_results.with_pointer_inference(Some(&pi_result)); + + group.throughput(Throughput::Elements(cfg.edge_count() as u64)); + // should be `bench_with_input`, workaround due to lifetime hell. + // Can not reproduce: + // https://play.rust-lang.org/?version=stable&mode=debug&edition=2021&gist=63c6a380d2e98e8e3f7ec28d393ff8c2 + group.bench_function( + BenchmarkId::new("string_abstractions", pcode_project_json), + |b| { + b.iter_with_large_drop(|| { + analysis_results.compute_string_abstraction( + &config["StringAbstraction"], + black_box(Some(&pi_result)), + ) + }) + }, + ); + } + + group.finish(); + }; + + let mut group_ls = c.benchmark_group("ls_string_abstractions"); + group_ls + .warm_up_time(time::Duration::new(30, 0)) + .measurement_time(time::Duration::new(60, 0)); + bench_with_input_loop(&LS_PCODE_PROJECTS, &LS_BINARIES, group_ls); + + let mut group_netfs = c.benchmark_group("netfs_string_abstractions"); + group_netfs + .warm_up_time(time::Duration::new(10, 0)) + .measurement_time(time::Duration::new(30, 0)); + bench_with_input_loop(&NETFS_PCODE_PROJECTS, &NETFS_BINARIES, group_netfs); + } +} + +mod normalization { + //! Benchmarks for individual normalization passes. + + use super::helpers::*; + use super::inputs::*; + use super::*; + + fn helper_bench_normalization( + pre_passes: F, + pass: G, + mut group: BenchmarkGroup, + is_lkm: bool, + ) where + F: Fn(&mut Project), + G: Fn(&mut Project) -> U, + { + let bench_with_input_loop = |pcode_projects: &[&str], binaries: &[&str]| { + for (pcode_project_json, binary) in iter::zip(pcode_projects, binaries) { + let mut project = get_project(pcode_project_json, binary); + + pre_passes(&mut project); + + group.bench_with_input( + BenchmarkId::from_parameter(pcode_project_json), + &project, + |b, project| { + b.iter_batched_ref( + || project.clone(), + |project| pass(project), + BatchSize::LargeInput, + ) + }, + ); + } + + group.finish(); + }; + + if is_lkm { + bench_with_input_loop(&NETFS_PCODE_PROJECTS, &NETFS_BINARIES); + } else { + bench_with_input_loop(&LS_PCODE_PROJECTS, &LS_BINARIES); + } + } + + macro_rules! bench_pass { + (name = $p:ident; pre_passes = $pre:expr; pass = $pass:expr) => { + bench_pass!( + name = $p; + pre_passes = $pre; + pass = $pass; + samples = 10; + time = 30 + ); + }; + (name = $p:ident; pre_passes = $pre:expr; pass = $pass:expr; samples = $s:expr; time = $t:expr) => { + ::paste::paste! { + pub fn [](c: &mut Criterion) { + let mut group_ls = c.benchmark_group(stringify!([])); + group_ls + .sample_size($s) + .warm_up_time(time::Duration::new(($t as u64).checked_div(2).unwrap(), 0)) + .measurement_time(time::Duration::new($t, 0)); + helper_bench_normalization($pre, $pass, group_ls, false); + + let mut group_netfs = c.benchmark_group(stringify!([])); + group_netfs + .sample_size($s) + .warm_up_time(time::Duration::new(($t as u64).checked_div(2).unwrap(), 0)) + .measurement_time(time::Duration::new($t, 0)); + helper_bench_normalization($pre, $pass, group_netfs, true); + } + } + }; + } + + bench_pass!( + name = basic; + pre_passes = |_: &mut Project| (); + pass = Project::normalize_basic + ); + bench_pass!( + name = expression_propagation; + pre_passes = |project: &mut Project| { + let _ = project.normalize_basic(); + }; + pass = analysis::expression_propagation::propagate_input_expression; + samples = 10; + time = 60 + ); + bench_pass!( + name = substitute_trivial_expressions; + pre_passes = |project: &mut Project| { + let _ = project.normalize_basic(); + analysis::expression_propagation::propagate_input_expression(project); + }; + pass = Project::substitute_trivial_expressions + ); + bench_pass!( + name = dead_variable_elimination; + pre_passes = |project: &mut Project| { + let _ = project.normalize_basic(); + analysis::expression_propagation::propagate_input_expression(project); + project.substitute_trivial_expressions(); + }; + pass = analysis::dead_variable_elimination::remove_dead_var_assignments; + samples = 10; + time = 60 + ); + bench_pass!( + name = propagate_control_flow; + pre_passes = |project: &mut Project| { + let _ = project.normalize_basic(); + analysis::expression_propagation::propagate_input_expression(project); + project.substitute_trivial_expressions(); + analysis::dead_variable_elimination::remove_dead_var_assignments(project); + }; + pass = propagate_control_flow::propagate_control_flow + ); + bench_pass!( + name = substitute_and_on_stackpointer; + pre_passes = |project: &mut Project| { + let _ = project.normalize_basic(); + analysis::expression_propagation::propagate_input_expression(project); + project.substitute_trivial_expressions(); + analysis::dead_variable_elimination::remove_dead_var_assignments(project); + propagate_control_flow::propagate_control_flow(project); + }; + pass = analysis::stack_alignment_substitution::substitute_and_on_stackpointer + ); +} + +mod cfg { + //! Benchmarks for CFG construction. + + use super::helpers::*; + use super::inputs::*; + use super::*; + + pub fn bench_cfg_construction(c: &mut Criterion) { + let bench_with_input_loop = + |pcode_projects: &[&str], binaries: &[&str], mut group: BenchmarkGroup| { + for (pcode_project_json, binary) in iter::zip(pcode_projects, binaries) { + let mut project = get_project(pcode_project_json, binary); + + let _ = project.normalize_basic(); + let program_unoptimized = project.program.clone(); + + let _ = project.normalize_optimize(); + let program_optimized = project.program.clone(); + + group.bench_with_input( + BenchmarkId::new("unoptimized", pcode_project_json), + &program_unoptimized, + |b, program_unoptimized| { + b.iter_with_large_drop(|| graph::get_program_cfg(&program_unoptimized)); + }, + ); + group.bench_with_input( + BenchmarkId::new("optimized", pcode_project_json), + &program_optimized, + |b, program_optimized| { + b.iter_with_large_drop(|| graph::get_program_cfg(&program_optimized)); + }, + ); + } + + group.finish(); + }; + + let group_ls = c.benchmark_group("ls_cfg_construction"); + bench_with_input_loop(&LS_PCODE_PROJECTS, &LS_BINARIES, group_ls); + + let group_netfs = c.benchmark_group("netfs_cfg_construction"); + bench_with_input_loop(&NETFS_PCODE_PROJECTS, &NETFS_BINARIES, group_netfs); + } +} + +criterion_group!( + name = benches; + config = Criterion::default() + .sample_size(10) + .warm_up_time(time::Duration::new(5, 0)) + .measurement_time(time::Duration::new(10, 0)); + targets = cfg::bench_cfg_construction, + normalization::bench_normalize_basic, + normalization::bench_normalize_expression_propagation, + normalization::bench_normalize_substitute_trivial_expressions, + normalization::bench_normalize_dead_variable_elimination, + normalization::bench_normalize_propagate_control_flow, + normalization::bench_normalize_substitute_and_on_stackpointer, + core_analyses::bench_function_signatures, + core_analyses::bench_pi, + core_analyses::bench_string_abstractions, + checkers::bench_cwe_119, + checkers::bench_cwe_134, + checkers::bench_cwe_190, + checkers::bench_cwe_215, + checkers::bench_cwe_243, + checkers::bench_cwe_252, + checkers::bench_cwe_332, + checkers::bench_cwe_337, + checkers::bench_cwe_367, + checkers::bench_cwe_416, + checkers::bench_cwe_426, + checkers::bench_cwe_467, + checkers::bench_cwe_476, + checkers::bench_cwe_560, + checkers::bench_cwe_676, + checkers::bench_cwe_782, + checkers::bench_cwe_789, +); +criterion_main!(benches);