Skip to content

Commit

Permalink
analyze: add --rewrite-mode pointwise (#1073)
Browse files Browse the repository at this point in the history
This adds a new --rewrite-mode called "pointwise", which rewrites each
function in isolation, with all other definitions marked `FIXED`. The
static analysis runs only once, then we run multiple rewriting passes
using the same analysis results, making this much more efficient than
running the whole `c2rust-analyze` tool multiple times.

The rewritten code is output to a separate file for each function. For
example, given `foo.rs` containing a function `bar`, this mode will
write to `foo.bar.rs` the new code produced by rewriting only `bar`.

Pointwise mode is the basis for our new "pointwise success rate" metric.
  • Loading branch information
spernsteiner authored Apr 15, 2024
2 parents ff1fa02 + a015ff3 commit 6264fae
Show file tree
Hide file tree
Showing 7 changed files with 158 additions and 44 deletions.
168 changes: 131 additions & 37 deletions c2rust-analyze/src/analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,31 @@ fn mark_foreign_fixed<'tcx>(
}
}

fn mark_all_statics_fixed<'tcx>(gacx: &mut GlobalAnalysisCtxt<'tcx>, gasn: &mut GlobalAssignment) {
for (did, lty) in gacx.static_tys.iter() {
make_ty_fixed(gasn, lty);

// Also fix the `addr_of_static` permissions.
let ptr = gacx.addr_of_static[&did];
gasn.flags[ptr].insert(FlagSet::FIXED);
}
}

fn mark_all_structs_fixed<'tcx>(
gacx: &mut GlobalAnalysisCtxt<'tcx>,
gasn: &mut GlobalAssignment,
tcx: TyCtxt<'tcx>,
) {
for adt_did in &gacx.adt_metadata.struct_dids {
let adt_def = tcx.adt_def(adt_did);
let fields = adt_def.all_fields();
for field in fields {
let field_lty = gacx.field_ltys[&field.did];
make_ty_fixed(gasn, field_lty);
}
}
}

fn parse_def_id(s: &str) -> Result<DefId, String> {
// DefId debug output looks like `DefId(0:1 ~ alias1[0dc4]::{use#0})`. The ` ~ name` part may
// be omitted if the name/DefPath info is not available at the point in the compiler where the
Expand Down Expand Up @@ -509,6 +534,34 @@ fn get_fixed_defs(tcx: TyCtxt) -> io::Result<HashSet<DefId>> {
Ok(fixed_defs)
}

/// Local information, specific to a single function. Many of the data structures we use for
/// the pointer analysis have a "global" part that's shared between all functions and a "local"
/// part that's specific to the function being analyzed; this struct contains only the local
/// parts. The different fields are set, used, and cleared at various points below.
#[derive(Clone, Default)]
struct FuncInfo<'tcx> {
/// Local analysis context data, such as [`LTy`]s for all MIR locals. Combine with the
/// [`GlobalAnalysisCtxt`] to get a complete [`AnalysisCtxt`] for use within this function.
acx_data: MaybeUnset<AnalysisCtxtData<'tcx>>,
/// Dataflow constraints gathered from the body of this function. These are used for
/// propagating `READ`/`WRITE`/`OFFSET_ADD` and similar permissions.
dataflow: MaybeUnset<DataflowConstraints>,
/// Local equivalence-class information. Combine with the [`GlobalEquivSet`] to get a
/// complete [`EquivSet`], which assigns an equivalence class to each [`PointerId`] that
/// appears in the function. Used for renumbering [`PointerId`]s.
local_equiv: MaybeUnset<LocalEquivSet>,
/// Local part of the permission/flag assignment. Combine with the [`GlobalAssignment`] to
/// get a complete [`Assignment`] for this function, which maps every [`PointerId`] in this
/// function to a [`PermissionSet`] and [`FlagSet`].
lasn: MaybeUnset<LocalAssignment>,
/// Constraints on pointee types gathered from the body of this function.
pointee_constraints: MaybeUnset<pointee_type::ConstraintSet<'tcx>>,
/// Local part of pointee type sets.
local_pointee_types: MaybeUnset<LocalPointerTable<PointeeTypes<'tcx>>>,
/// Table for looking up the most recent write to a given local.
recent_writes: MaybeUnset<RecentWrites>,
}

fn run(tcx: TyCtxt) {
eprintln!("all defs:");
for ldid in tcx.hir_crate_items(()).definitions() {
Expand All @@ -518,37 +571,13 @@ fn run(tcx: TyCtxt) {
// Load the list of fixed defs early, so any errors are reported immediately.
let fixed_defs = get_fixed_defs(tcx).unwrap();

let rewrite_pointwise = env::var("C2RUST_ANALYZE_REWRITE_MODE")
.ok()
.map_or(false, |val| val == "pointwise");

let mut gacx = GlobalAnalysisCtxt::new(tcx);
let mut func_info = HashMap::new();

/// Local information, specific to a single function. Many of the data structures we use for
/// the pointer analysis have a "global" part that's shared between all functions and a "local"
/// part that's specific to the function being analyzed; this struct contains only the local
/// parts. The different fields are set, used, and cleared at various points below.
#[derive(Default)]
struct FuncInfo<'tcx> {
/// Local analysis context data, such as [`LTy`]s for all MIR locals. Combine with the
/// [`GlobalAnalysisCtxt`] to get a complete [`AnalysisCtxt`] for use within this function.
acx_data: MaybeUnset<AnalysisCtxtData<'tcx>>,
/// Dataflow constraints gathered from the body of this function. These are used for
/// propagating `READ`/`WRITE`/`OFFSET_ADD` and similar permissions.
dataflow: MaybeUnset<DataflowConstraints>,
/// Local equivalence-class information. Combine with the [`GlobalEquivSet`] to get a
/// complete [`EquivSet`], which assigns an equivalence class to each [`PointerId`] that
/// appears in the function. Used for renumbering [`PointerId`]s.
local_equiv: MaybeUnset<LocalEquivSet>,
/// Local part of the permission/flag assignment. Combine with the [`GlobalAssignment`] to
/// get a complete [`Assignment`] for this function, which maps every [`PointerId`] in this
/// function to a [`PermissionSet`] and [`FlagSet`].
lasn: MaybeUnset<LocalAssignment>,
/// Constraints on pointee types gathered from the body of this function.
pointee_constraints: MaybeUnset<pointee_type::ConstraintSet<'tcx>>,
/// Local part of pointee type sets.
local_pointee_types: MaybeUnset<LocalPointerTable<PointeeTypes<'tcx>>>,
/// Table for looking up the most recent write to a given local.
recent_writes: MaybeUnset<RecentWrites>,
}

// Follow a postorder traversal, so that callers are visited after their callees. This means
// callee signatures will usually be up to date when we visit the call site.
let all_fn_ldids = fn_body_owners_postorder(tcx);
Expand Down Expand Up @@ -848,6 +877,13 @@ fn run(tcx: TyCtxt) {

mark_foreign_fixed(&mut gacx, &mut gasn, tcx);

if rewrite_pointwise {
// In pointwise mode, we restrict rewriting to a single fn at a time. All statics and
// struct fields are marked `FIXED` so they won't be rewritten.
mark_all_statics_fixed(&mut gacx, &mut gasn);
mark_all_structs_fixed(&mut gacx, &mut gasn, tcx);
}

for (ptr, perms) in gacx.known_fn_ptr_perms() {
let existing_perms = &mut gasn.perms[ptr];
existing_perms.remove(PermissionSet::UNIQUE);
Expand Down Expand Up @@ -1199,6 +1235,46 @@ fn run(tcx: TyCtxt) {
}
}

if !rewrite_pointwise {
run2(
None,
tcx,
gacx,
gasn,
&global_pointee_types,
func_info,
&all_fn_ldids,
&fixed_defs,
&known_perm_error_fns,
);
} else {
for &ldid in &all_fn_ldids {
run2(
Some(ldid),
tcx,
gacx.clone(),
gasn.clone(),
&global_pointee_types,
func_info.clone(),
&all_fn_ldids,
&fixed_defs,
&known_perm_error_fns,
);
}
}
}

fn run2<'tcx>(
pointwise_fn_ldid: Option<LocalDefId>,
tcx: TyCtxt<'tcx>,
mut gacx: GlobalAnalysisCtxt<'tcx>,
mut gasn: GlobalAssignment,
global_pointee_types: &GlobalPointerTable<PointeeTypes<'tcx>>,
mut func_info: HashMap<LocalDefId, FuncInfo<'tcx>>,
all_fn_ldids: &Vec<LocalDefId>,
fixed_defs: &HashSet<DefId>,
known_perm_error_fns: &HashSet<DefId>,
) {
// ----------------------------------
// Generate rewrites
// ----------------------------------
Expand All @@ -1224,15 +1300,25 @@ fn run(tcx: TyCtxt) {

// For testing, putting #[c2rust_analyze_test::fail_before_rewriting] on a function marks it as
// failed at this point.
for &ldid in &all_fn_ldids {
if !util::has_test_attr(tcx, ldid, TestAttr::FailBeforeRewriting) {
continue;
for &ldid in all_fn_ldids {
let mut should_mark_failed = false;
if util::has_test_attr(tcx, ldid, TestAttr::FailBeforeRewriting) {
should_mark_failed = true;
}
if let Some(pointwise_fn_ldid) = pointwise_fn_ldid {
// In pointwise mode, mark all functions except `pointwise_fn_ldid` as failed to
// prevent rewriting.
if ldid != pointwise_fn_ldid {
should_mark_failed = true;
}
}
if should_mark_failed {
gacx.mark_fn_failed(
ldid.to_def_id(),
DontRewriteFnReason::FAKE_INVALID_FOR_TESTING,
PanicDetail::new("explicit fail_before_rewriting for testing".to_owned()),
);
}
gacx.mark_fn_failed(
ldid.to_def_id(),
DontRewriteFnReason::FAKE_INVALID_FOR_TESTING,
PanicDetail::new("explicit fail_before_rewriting for testing".to_owned()),
);
}

// Buffer debug output for each function. Grouping together all the different types of info
Expand Down Expand Up @@ -1266,7 +1352,7 @@ fn run(tcx: TyCtxt) {
// rewrite, such as pointers in the signatures of non-rewritten functions.
process_new_dont_rewrite_items(&mut gacx, &mut gasn);

for &ldid in &all_fn_ldids {
for &ldid in all_fn_ldids {
if gacx.dont_rewrite_fn(ldid.to_def_id()) {
continue;
}
Expand Down Expand Up @@ -1602,6 +1688,14 @@ fn run(tcx: TyCtxt) {
"alongside" => {
update_files = rewrite::UpdateFiles::Alongside;
}
"pointwise" => {
let pointwise_fn_ldid = pointwise_fn_ldid.expect(
"C2RUST_ANALYZE_REWRITE_MODE=pointwise, \
but pointwise_fn_ldid is unset?",
);
let pointwise_fn_name = tcx.item_name(pointwise_fn_ldid.to_def_id());
update_files = rewrite::UpdateFiles::AlongsidePointwise(pointwise_fn_name);
}
_ => panic!("bad value {:?} for C2RUST_ANALYZE_REWRITE_MODE", val),
}
}
Expand Down
9 changes: 7 additions & 2 deletions c2rust-analyze/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ bitflags! {
}
}

#[derive(Default)]
#[derive(Clone, Default)]
pub struct AdtMetadataTable<'tcx> {
pub table: HashMap<DefId, AdtMetadata<'tcx>>,
pub struct_dids: Vec<DefId>,
Expand Down Expand Up @@ -381,6 +381,7 @@ impl<'tcx> Debug for AdtMetadataTable<'tcx> {
}
}

#[derive(Clone)]
pub struct GlobalAnalysisCtxt<'tcx> {
pub tcx: TyCtxt<'tcx>,
pub lcx: LTyCtxt<'tcx>,
Expand Down Expand Up @@ -464,6 +465,7 @@ impl<'a, 'tcx> AnalysisCtxt<'_, 'tcx> {
}
}

#[derive(Clone)]
pub struct AnalysisCtxtData<'tcx> {
ptr_info: LocalPointerTable<PointerInfo>,
local_tys: IndexVec<Local, LTy<'tcx>>,
Expand All @@ -472,13 +474,14 @@ pub struct AnalysisCtxtData<'tcx> {
string_literal_locs: Vec<Location>,
}

#[derive(Clone)]
pub struct FnSigOrigins<'tcx> {
pub origin_params: Vec<OriginParam>,
pub inputs: Vec<LabeledTy<'tcx, &'tcx [OriginArg<'tcx>]>>,
pub output: LabeledTy<'tcx, &'tcx [OriginArg<'tcx>]>,
}

#[derive(Default)]
#[derive(Clone, Default)]
pub struct FnOriginMap<'tcx> {
pub fn_info: HashMap<DefId, FnSigOrigins<'tcx>>,
}
Expand Down Expand Up @@ -1420,6 +1423,7 @@ impl GlobalAssignment {
}
}

#[derive(Clone, Debug)]
pub struct LocalAssignment {
pub perms: LocalPointerTable<PermissionSet>,
pub flags: LocalPointerTable<FlagSet>,
Expand Down Expand Up @@ -1594,6 +1598,7 @@ pub fn print_ty_with_pointer_labels_into<L: Copy>(
}

/// Map for associating flags (such as `DontRewriteFnReason`) with keys (such as `DefId`).
#[derive(Clone, Debug)]
pub struct FlagMap<K, V> {
/// Stores the current flags for each key. If no flags are set, the entry is omitted; that is,
/// for every entry `(k, v)`, it's always the case that `v != V::default()`.
Expand Down
4 changes: 4 additions & 0 deletions c2rust-analyze/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ enum RewriteMode {
/// Save rewritten code to a separate file alongside each source file.
#[value(name = "alongside")]
Alongside,
/// Rewrite each function separately, and write the results for each to a separate file.
#[value(name = "pointwise")]
Pointwise,
}

fn exit_with_status(status: ExitStatus) {
Expand Down Expand Up @@ -445,6 +448,7 @@ fn cargo_wrapper(rustc_wrapper: &Path) -> anyhow::Result<()> {
RewriteMode::None => "none",
RewriteMode::InPlace => "inplace",
RewriteMode::Alongside => "alongside",
RewriteMode::Pointwise => "pointwise",
};
cmd.env("C2RUST_ANALYZE_REWRITE_MODE", val);
}
Expand Down
4 changes: 2 additions & 2 deletions c2rust-analyze/src/pointee_type/constraint_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub enum Constraint<'tcx> {
Subset(PointerId, PointerId),
}

#[derive(Debug, Default)]
#[derive(Clone, Debug, Default)]
pub struct ConstraintSet<'tcx> {
pub constraints: Vec<Constraint<'tcx>>,
constraint_dedup: HashSet<Constraint<'tcx>>,
Expand Down Expand Up @@ -79,7 +79,7 @@ impl<'tcx> From<LTy<'tcx>> for CTy<'tcx> {
}
}

#[derive(Debug, Default)]
#[derive(Clone, Debug, Default)]
pub struct VarTable<'tcx> {
/// Equivalence class representative for each variable. This can be either a known type
/// (`CTy::Ty`) or an inference variable (`CTy::Var`).
Expand Down
1 change: 1 addition & 0 deletions c2rust-analyze/src/recent_writes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use rustc_middle::mir::{
use std::collections::HashMap;

/// Table for looking up the most recent write to a `Local` prior to a particular MIR statement.
#[derive(Clone, Debug)]
pub struct RecentWrites {
blocks: IndexVec<BasicBlock, BlockWrites>,
/// For each local, whether its address is taken anywhere within the current function.
Expand Down
14 changes: 12 additions & 2 deletions c2rust-analyze/src/rewrite/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,10 @@ pub enum UpdateFiles {
No,
InPlace,
Alongside,
/// Update mode used for pointwise testing. If the file being rewritten is `foo.rs`, and the
/// rewriting mode is `AlongsidePointwise("bar")`, then the rewritten code is written to
/// `foo.bar.rs`.
AlongsidePointwise(rustc_span::symbol::Symbol),
}

pub fn apply_rewrites(
Expand All @@ -332,14 +336,20 @@ pub fn apply_rewrites(
}
println!(" ===== END {:?} =====", filename);

if matches!(update_files, UpdateFiles::InPlace | UpdateFiles::Alongside) {
if !matches!(update_files, UpdateFiles::No) {
let mut path_ok = false;
if let FileName::Real(ref rfn) = filename {
if let Some(path) = rfn.local_path() {
let path = match update_files {
UpdateFiles::InPlace => path.to_owned(),
UpdateFiles::Alongside => path.with_extension("new.rs"),
_ => unreachable!(),
UpdateFiles::AlongsidePointwise(ref s) => {
let ext = format!("{}.rs", s);
let p = path.with_extension(&ext);
eprintln!("writing to {:?}", p);
p
}
UpdateFiles::No => unreachable!(),
};
fs::write(path, src).unwrap();
path_ok = true;
Expand Down
2 changes: 1 addition & 1 deletion c2rust-analyze/src/rewrite/ty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ fn create_rewrite_label<'tcx>(
let mut pointee_ty = None;
// For now, we only rewrite in cases where the inferred pointee has no arguments.
// TODO: expand this to handle pointer-to-pointer cases and other complex inferred pointees
if !pointer_lty.label.is_none() {
if !pointer_lty.label.is_none() && !flags[pointer_lty.label].contains(FlagSet::FIXED) {
if let Some(lty) = pointee_types[pointer_lty.label].get_sole_lty() {
let ty = lty.ty;
if lty.args.len() == 0 && !ty_has_adt_lifetime(ty, adt_metadata) {
Expand Down

0 comments on commit 6264fae

Please sign in to comment.