Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

analyze: borrowck: cache results of polonius runs on disk #1056

Merged
merged 2 commits into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions c2rust-analyze/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ clap = { version = "4.2.7", features = ["derive"] }
fs-err = "2.9.0"
anyhow = "1.0.75"
toml_edit = "0.19.8"
sha2 = "0.10.8"

[build-dependencies]
c2rust-build-paths = { path = "../c2rust-build-paths", version = "0.18.0" }
Expand Down
5 changes: 4 additions & 1 deletion c2rust-analyze/src/borrowck/atoms.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
use polonius_engine::{self, Atom, FactTypes};
use rustc_middle::mir::{BasicBlock, Local, Location, Place, PlaceElem};
use rustc_middle::ty::TyCtxt;
use serde::{Deserialize, Serialize};
use std::collections::hash_map::{Entry, HashMap};
use std::hash::Hash;

macro_rules! define_atom_type {
($Atom:ident) => {
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)]
#[derive(
Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Serialize, Deserialize,
)]
pub struct $Atom(usize);

impl From<usize> for $Atom {
Expand Down
211 changes: 208 additions & 3 deletions c2rust-analyze/src/borrowck/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ use rustc_middle::ty::{
TyKind,
};
use rustc_type_ir::RegionKind::ReEarlyBound;
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::fmt::{Debug, Formatter};
use std::hash::Hash;
use std::fmt::{Debug, Formatter, Write as _};
use std::fs::{self, File};
use std::hash::{Hash, Hasher};

mod atoms;
mod def_use;
Expand Down Expand Up @@ -358,12 +360,215 @@ fn run_polonius<'tcx>(

dump::dump_facts_to_dir(&facts, &maps, format!("inspect/{}", name)).unwrap();

let output = polonius_engine::Output::compute(&facts, polonius_engine::Algorithm::Naive, true);
eprintln!("running polonius analysis on {name}");
let facts_hash = bytes_to_hex_string(&hash_facts(&facts));
let output = match try_load_cached_output(&facts_hash) {
Some(output) => output,
None => {
let output = polonius_engine::Output::compute(
&facts,
polonius_engine::Algorithm::DatafrogOpt,
true,
);
save_cached_output(&facts_hash, &output).unwrap();
output
}
};
dump::dump_output_to_dir(&output, &maps, format!("inspect/{}", name)).unwrap();

(facts, maps, output)
}

fn try_load_cached_output(facts_hash: &str) -> Option<Output> {
let path = format!("polonius_cache/{}.output", facts_hash);

let f = File::open(&path).ok()?;
let raw = match bincode::deserialize_from(f) {
Ok(x) => x,
Err(e) => {
log::warn!("failed to parse polonius cache file {path:?}: {e}");
return None;
}
};
// The Polonius `Output` type doesn't implement `Serialize`. Rather than define a local
// wrapper or proxy type and implement `Serialize` on that, we just unpack the struct into a
// tuple and serialize that instead. However, tuples only implement `Deserialize` up to length
// 12, so we have to split up this 17-element tuple into several pieces.
let (
(
errors,
subset_errors,
move_errors,
dump_enabled,
loan_live_at,
origin_contains_loan_at,
origin_contains_loan_anywhere,
origin_live_on_entry,
loan_invalidated_at,
subset,
subset_anywhere,
var_live_on_entry,
),
(
var_drop_live_on_entry,
path_maybe_initialized_on_exit,
path_maybe_uninitialized_on_exit,
known_contains,
var_maybe_partly_initialized_on_exit,
),
) = raw;

eprintln!("loaded cached facts from {}", path);

Some(Output {
errors,
subset_errors,
move_errors,
dump_enabled,
loan_live_at,
origin_contains_loan_at,
origin_contains_loan_anywhere,
origin_live_on_entry,
loan_invalidated_at,
subset,
subset_anywhere,
var_live_on_entry,
var_drop_live_on_entry,
path_maybe_initialized_on_exit,
path_maybe_uninitialized_on_exit,
known_contains,
var_maybe_partly_initialized_on_exit,
})
}

fn save_cached_output(facts_hash: &str, output: &Output) -> Result<(), bincode::Error> {
fs::create_dir_all("polonius_cache")?;
let path = format!("polonius_cache/{}.output", facts_hash);

let Output {
ref errors,
ref subset_errors,
ref move_errors,
ref dump_enabled,
ref loan_live_at,
ref origin_contains_loan_at,
ref origin_contains_loan_anywhere,
ref origin_live_on_entry,
ref loan_invalidated_at,
ref subset,
ref subset_anywhere,
ref var_live_on_entry,
ref var_drop_live_on_entry,
ref path_maybe_initialized_on_exit,
ref path_maybe_uninitialized_on_exit,
ref known_contains,
ref var_maybe_partly_initialized_on_exit,
} = *output;

// Split the tuple into several pieces, as described in `try_load_cached_output`. The tuple
// format used here must match the one in `try_load_cached_output`.
let raw = (
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is a tuple needed for the serialization as opposed to serializing the struct directly?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The struct is defined by the polonius crate and doesn't implement Serialize. Serde has some support for deriving impls for types in "remote crates", but it requires duplicating the struct definition and generally seems like a bit of a pain. Since we don't use the struct in fields of other Serialize types, we don't need a proper Serialize impl for it, and this tuple trick is sufficient.

(
errors,
subset_errors,
move_errors,
dump_enabled,
loan_live_at,
origin_contains_loan_at,
origin_contains_loan_anywhere,
origin_live_on_entry,
loan_invalidated_at,
subset,
subset_anywhere,
var_live_on_entry,
),
(
var_drop_live_on_entry,
path_maybe_initialized_on_exit,
path_maybe_uninitialized_on_exit,
known_contains,
var_maybe_partly_initialized_on_exit,
),
);

let f = File::create(path)?;
bincode::serialize_into(f, &raw)
}

fn bytes_to_hex_string(b: &[u8]) -> String {
let mut s = String::with_capacity(b.len() * 2);
for &x in b {
write!(s, "{:02x}", x).unwrap();
}
s
}

fn hash_facts(facts: &AllFacts) -> [u8; 32] {
let AllFacts {
ref loan_issued_at,
ref universal_region,
ref cfg_edge,
ref loan_killed_at,
ref subset_base,
ref loan_invalidated_at,
ref var_used_at,
ref var_defined_at,
ref var_dropped_at,
ref use_of_var_derefs_origin,
ref drop_of_var_derefs_origin,
ref child_path,
ref path_is_var,
ref path_assigned_at_base,
ref path_moved_at_base,
ref path_accessed_at_base,
ref known_placeholder_subset,
ref placeholder,
} = *facts;

// Only tuples up to size 12 implement `Hash`, so we break up this list into nested tuples.
sha256_hash(&(
(
loan_issued_at,
universal_region,
cfg_edge,
loan_killed_at,
subset_base,
loan_invalidated_at,
var_used_at,
var_defined_at,
var_dropped_at,
use_of_var_derefs_origin,
drop_of_var_derefs_origin,
child_path,
),
(
path_is_var,
path_assigned_at_base,
path_moved_at_base,
path_accessed_at_base,
known_placeholder_subset,
placeholder,
),
))
}

fn sha256_hash<T: Hash>(x: &T) -> [u8; 32] {
struct Sha256Hasher(Sha256);
impl Hasher for Sha256Hasher {
fn write(&mut self, bytes: &[u8]) {
self.0.update(bytes);
}
fn finish(&self) -> u64 {
panic!("Sha256Hasher doesn't support finish()");
}
}

let mut hasher = Sha256Hasher(Sha256::new());
x.hash(&mut hasher);
let digest = hasher.0.finalize();
digest.as_slice().try_into().unwrap()
}

fn construct_adt_origins<'tcx>(
ltcx: &LTyCtxt<'tcx>,
adt_metadata: &AdtMetadataTable,
Expand Down
Loading