Skip to content

Commit

Permalink
analyze: borrowck: cache results of polonius runs on disk (#1056)
Browse files Browse the repository at this point in the history
The Polonius stage of borrow checking takes a long time to run on
certain functions, such as lighttpd's `li_MD5Transform`. Worse, we often
run Polonius multiple times on the same function as the interprocedural
analysis iterates to reach a fixpoint. This branch speeds up the
analysis by caching Polonius results on disk.

The caching logic is fairly simple: the core Polonius analysis is
effectively a pure function from input facts to output facts, so we hash
the input facts before each call and check whether a file named after
that hash is present in the cache directory. There's no need to factor
in any details of the crate, MIR, permissions, etc. If the current
Polonius query has the same input facts as a previous query, it will
necessarily produce the same output facts, regardless of how those input
facts were computed.

Computing the input facts still has a nontrivial cost for some
functions, but this branch provides significant speedups on `algo_md5`
and `lighttpd_rust_amalgamated` once `c2rust-analyze` has run once to
populate the cache.
  • Loading branch information
spernsteiner authored Jan 19, 2024
2 parents 21907d8 + 09cb658 commit d818e4d
Show file tree
Hide file tree
Showing 4 changed files with 218 additions and 8 deletions.
9 changes: 5 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions c2rust-analyze/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ clap = { version = "4.2.7", features = ["derive"] }
fs-err = "2.9.0"
anyhow = "1.0.75"
toml_edit = "0.19.8"
sha2 = "0.10.8"

[build-dependencies]
c2rust-build-paths = { path = "../c2rust-build-paths", version = "0.18.0" }
Expand Down
5 changes: 4 additions & 1 deletion c2rust-analyze/src/borrowck/atoms.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
use polonius_engine::{self, Atom, FactTypes};
use rustc_middle::mir::{BasicBlock, Local, Location, Place, PlaceElem};
use rustc_middle::ty::TyCtxt;
use serde::{Deserialize, Serialize};
use std::collections::hash_map::{Entry, HashMap};
use std::hash::Hash;

macro_rules! define_atom_type {
($Atom:ident) => {
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
#[derive(
Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Serialize, Deserialize,
)]
pub struct $Atom(usize);

impl From<usize> for $Atom {
Expand Down
211 changes: 208 additions & 3 deletions c2rust-analyze/src/borrowck/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ use rustc_middle::ty::{
TyKind,
};
use rustc_type_ir::RegionKind::ReEarlyBound;
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::fmt::{Debug, Formatter};
use std::hash::Hash;
use std::fmt::{Debug, Formatter, Write as _};
use std::fs::{self, File};
use std::hash::{Hash, Hasher};

mod atoms;
mod def_use;
Expand Down Expand Up @@ -358,12 +360,215 @@ fn run_polonius<'tcx>(

dump::dump_facts_to_dir(&facts, &maps, format!("inspect/{}", name)).unwrap();

let output = polonius_engine::Output::compute(&facts, polonius_engine::Algorithm::Naive, true);
eprintln!("running polonius analysis on {name}");
let facts_hash = bytes_to_hex_string(&hash_facts(&facts));
let output = match try_load_cached_output(&facts_hash) {
Some(output) => output,
None => {
let output = polonius_engine::Output::compute(
&facts,
polonius_engine::Algorithm::DatafrogOpt,
true,
);
save_cached_output(&facts_hash, &output).unwrap();
output
}
};
dump::dump_output_to_dir(&output, &maps, format!("inspect/{}", name)).unwrap();

(facts, maps, output)
}

fn try_load_cached_output(facts_hash: &str) -> Option<Output> {
let path = format!("polonius_cache/{}.output", facts_hash);

let f = File::open(&path).ok()?;
let raw = match bincode::deserialize_from(f) {
Ok(x) => x,
Err(e) => {
log::warn!("failed to parse polonius cache file {path:?}: {e}");
return None;
}
};
// The Polonius `Output` type doesn't implement `Serialize`. Rather than define a local
// wrapper or proxy type and implement `Serialize` on that, we just unpack the struct into a
// tuple and serialize that instead. However, tuples only implement `Deserialize` up to length
// 12, so we have to split up this 17-element tuple into several pieces.
let (
(
errors,
subset_errors,
move_errors,
dump_enabled,
loan_live_at,
origin_contains_loan_at,
origin_contains_loan_anywhere,
origin_live_on_entry,
loan_invalidated_at,
subset,
subset_anywhere,
var_live_on_entry,
),
(
var_drop_live_on_entry,
path_maybe_initialized_on_exit,
path_maybe_uninitialized_on_exit,
known_contains,
var_maybe_partly_initialized_on_exit,
),
) = raw;

eprintln!("loaded cached facts from {}", path);

Some(Output {
errors,
subset_errors,
move_errors,
dump_enabled,
loan_live_at,
origin_contains_loan_at,
origin_contains_loan_anywhere,
origin_live_on_entry,
loan_invalidated_at,
subset,
subset_anywhere,
var_live_on_entry,
var_drop_live_on_entry,
path_maybe_initialized_on_exit,
path_maybe_uninitialized_on_exit,
known_contains,
var_maybe_partly_initialized_on_exit,
})
}

fn save_cached_output(facts_hash: &str, output: &Output) -> Result<(), bincode::Error> {
fs::create_dir_all("polonius_cache")?;
let path = format!("polonius_cache/{}.output", facts_hash);

let Output {
ref errors,
ref subset_errors,
ref move_errors,
ref dump_enabled,
ref loan_live_at,
ref origin_contains_loan_at,
ref origin_contains_loan_anywhere,
ref origin_live_on_entry,
ref loan_invalidated_at,
ref subset,
ref subset_anywhere,
ref var_live_on_entry,
ref var_drop_live_on_entry,
ref path_maybe_initialized_on_exit,
ref path_maybe_uninitialized_on_exit,
ref known_contains,
ref var_maybe_partly_initialized_on_exit,
} = *output;

// Split the tuple into several pieces, as described in `try_load_cached_output`. The tuple
// format used here must match the one in `try_load_cached_output`.
let raw = (
(
errors,
subset_errors,
move_errors,
dump_enabled,
loan_live_at,
origin_contains_loan_at,
origin_contains_loan_anywhere,
origin_live_on_entry,
loan_invalidated_at,
subset,
subset_anywhere,
var_live_on_entry,
),
(
var_drop_live_on_entry,
path_maybe_initialized_on_exit,
path_maybe_uninitialized_on_exit,
known_contains,
var_maybe_partly_initialized_on_exit,
),
);

let f = File::create(path)?;
bincode::serialize_into(f, &raw)
}

fn bytes_to_hex_string(b: &[u8]) -> String {
let mut s = String::with_capacity(b.len() * 2);
for &x in b {
write!(s, "{:02x}", x).unwrap();
}
s
}

fn hash_facts(facts: &AllFacts) -> [u8; 32] {
let AllFacts {
ref loan_issued_at,
ref universal_region,
ref cfg_edge,
ref loan_killed_at,
ref subset_base,
ref loan_invalidated_at,
ref var_used_at,
ref var_defined_at,
ref var_dropped_at,
ref use_of_var_derefs_origin,
ref drop_of_var_derefs_origin,
ref child_path,
ref path_is_var,
ref path_assigned_at_base,
ref path_moved_at_base,
ref path_accessed_at_base,
ref known_placeholder_subset,
ref placeholder,
} = *facts;

// Only tuples up to size 12 implement `Hash`, so we break up this list into nested tuples.
sha256_hash(&(
(
loan_issued_at,
universal_region,
cfg_edge,
loan_killed_at,
subset_base,
loan_invalidated_at,
var_used_at,
var_defined_at,
var_dropped_at,
use_of_var_derefs_origin,
drop_of_var_derefs_origin,
child_path,
),
(
path_is_var,
path_assigned_at_base,
path_moved_at_base,
path_accessed_at_base,
known_placeholder_subset,
placeholder,
),
))
}

fn sha256_hash<T: Hash>(x: &T) -> [u8; 32] {
struct Sha256Hasher(Sha256);
impl Hasher for Sha256Hasher {
fn write(&mut self, bytes: &[u8]) {
self.0.update(bytes);
}
fn finish(&self) -> u64 {
panic!("Sha256Hasher doesn't support finish()");
}
}

let mut hasher = Sha256Hasher(Sha256::new());
x.hash(&mut hasher);
let digest = hasher.0.finalize();
digest.as_slice().try_into().unwrap()
}

fn construct_adt_origins<'tcx>(
ltcx: &LTyCtxt<'tcx>,
adt_metadata: &AdtMetadataTable,
Expand Down

0 comments on commit d818e4d

Please sign in to comment.