diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 88d0bea5bb..8b586cdce1 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -387,6 +387,31 @@ fn mark_foreign_fixed<'tcx>( } } +fn mark_all_statics_fixed<'tcx>(gacx: &mut GlobalAnalysisCtxt<'tcx>, gasn: &mut GlobalAssignment) { + for (did, lty) in gacx.static_tys.iter() { + make_ty_fixed(gasn, lty); + + // Also fix the `addr_of_static` permissions. + let ptr = gacx.addr_of_static[&did]; + gasn.flags[ptr].insert(FlagSet::FIXED); + } +} + +fn mark_all_structs_fixed<'tcx>( + gacx: &mut GlobalAnalysisCtxt<'tcx>, + gasn: &mut GlobalAssignment, + tcx: TyCtxt<'tcx>, +) { + for adt_did in &gacx.adt_metadata.struct_dids { + let adt_def = tcx.adt_def(adt_did); + let fields = adt_def.all_fields(); + for field in fields { + let field_lty = gacx.field_ltys[&field.did]; + make_ty_fixed(gasn, field_lty); + } + } +} + fn parse_def_id(s: &str) -> Result { // DefId debug output looks like `DefId(0:1 ~ alias1[0dc4]::{use#0})`. The ` ~ name` part may // be omitted if the name/DefPath info is not available at the point in the compiler where the @@ -509,6 +534,34 @@ fn get_fixed_defs(tcx: TyCtxt) -> io::Result> { Ok(fixed_defs) } +/// Local information, specific to a single function. Many of the data structures we use for +/// the pointer analysis have a "global" part that's shared between all functions and a "local" +/// part that's specific to the function being analyzed; this struct contains only the local +/// parts. The different fields are set, used, and cleared at various points below. +#[derive(Clone, Default)] +struct FuncInfo<'tcx> { + /// Local analysis context data, such as [`LTy`]s for all MIR locals. Combine with the + /// [`GlobalAnalysisCtxt`] to get a complete [`AnalysisCtxt`] for use within this function. + acx_data: MaybeUnset>, + /// Dataflow constraints gathered from the body of this function. These are used for + /// propagating `READ`/`WRITE`/`OFFSET_ADD` and similar permissions. + dataflow: MaybeUnset, + /// Local equivalence-class information. Combine with the [`GlobalEquivSet`] to get a + /// complete [`EquivSet`], which assigns an equivalence class to each [`PointerId`] that + /// appears in the function. Used for renumbering [`PointerId`]s. + local_equiv: MaybeUnset, + /// Local part of the permission/flag assignment. Combine with the [`GlobalAssignment`] to + /// get a complete [`Assignment`] for this function, which maps every [`PointerId`] in this + /// function to a [`PermissionSet`] and [`FlagSet`]. + lasn: MaybeUnset, + /// Constraints on pointee types gathered from the body of this function. + pointee_constraints: MaybeUnset>, + /// Local part of pointee type sets. + local_pointee_types: MaybeUnset>>, + /// Table for looking up the most recent write to a given local. + recent_writes: MaybeUnset, +} + fn run(tcx: TyCtxt) { eprintln!("all defs:"); for ldid in tcx.hir_crate_items(()).definitions() { @@ -518,37 +571,13 @@ fn run(tcx: TyCtxt) { // Load the list of fixed defs early, so any errors are reported immediately. let fixed_defs = get_fixed_defs(tcx).unwrap(); + let rewrite_pointwise = env::var("C2RUST_ANALYZE_REWRITE_MODE") + .ok() + .map_or(false, |val| val == "pointwise"); + let mut gacx = GlobalAnalysisCtxt::new(tcx); let mut func_info = HashMap::new(); - /// Local information, specific to a single function. Many of the data structures we use for - /// the pointer analysis have a "global" part that's shared between all functions and a "local" - /// part that's specific to the function being analyzed; this struct contains only the local - /// parts. The different fields are set, used, and cleared at various points below. - #[derive(Default)] - struct FuncInfo<'tcx> { - /// Local analysis context data, such as [`LTy`]s for all MIR locals. Combine with the - /// [`GlobalAnalysisCtxt`] to get a complete [`AnalysisCtxt`] for use within this function. - acx_data: MaybeUnset>, - /// Dataflow constraints gathered from the body of this function. These are used for - /// propagating `READ`/`WRITE`/`OFFSET_ADD` and similar permissions. - dataflow: MaybeUnset, - /// Local equivalence-class information. Combine with the [`GlobalEquivSet`] to get a - /// complete [`EquivSet`], which assigns an equivalence class to each [`PointerId`] that - /// appears in the function. Used for renumbering [`PointerId`]s. - local_equiv: MaybeUnset, - /// Local part of the permission/flag assignment. Combine with the [`GlobalAssignment`] to - /// get a complete [`Assignment`] for this function, which maps every [`PointerId`] in this - /// function to a [`PermissionSet`] and [`FlagSet`]. - lasn: MaybeUnset, - /// Constraints on pointee types gathered from the body of this function. - pointee_constraints: MaybeUnset>, - /// Local part of pointee type sets. - local_pointee_types: MaybeUnset>>, - /// Table for looking up the most recent write to a given local. - recent_writes: MaybeUnset, - } - // Follow a postorder traversal, so that callers are visited after their callees. This means // callee signatures will usually be up to date when we visit the call site. let all_fn_ldids = fn_body_owners_postorder(tcx); @@ -848,6 +877,13 @@ fn run(tcx: TyCtxt) { mark_foreign_fixed(&mut gacx, &mut gasn, tcx); + if rewrite_pointwise { + // In pointwise mode, we restrict rewriting to a single fn at a time. All statics and + // struct fields are marked `FIXED` so they won't be rewritten. + mark_all_statics_fixed(&mut gacx, &mut gasn); + mark_all_structs_fixed(&mut gacx, &mut gasn, tcx); + } + for (ptr, perms) in gacx.known_fn_ptr_perms() { let existing_perms = &mut gasn.perms[ptr]; existing_perms.remove(PermissionSet::UNIQUE); @@ -1199,6 +1235,46 @@ fn run(tcx: TyCtxt) { } } + if !rewrite_pointwise { + run2( + None, + tcx, + gacx, + gasn, + &global_pointee_types, + func_info, + &all_fn_ldids, + &fixed_defs, + &known_perm_error_fns, + ); + } else { + for &ldid in &all_fn_ldids { + run2( + Some(ldid), + tcx, + gacx.clone(), + gasn.clone(), + &global_pointee_types, + func_info.clone(), + &all_fn_ldids, + &fixed_defs, + &known_perm_error_fns, + ); + } + } +} + +fn run2<'tcx>( + pointwise_fn_ldid: Option, + tcx: TyCtxt<'tcx>, + mut gacx: GlobalAnalysisCtxt<'tcx>, + mut gasn: GlobalAssignment, + global_pointee_types: &GlobalPointerTable>, + mut func_info: HashMap>, + all_fn_ldids: &Vec, + fixed_defs: &HashSet, + known_perm_error_fns: &HashSet, +) { // ---------------------------------- // Generate rewrites // ---------------------------------- @@ -1224,15 +1300,25 @@ fn run(tcx: TyCtxt) { // For testing, putting #[c2rust_analyze_test::fail_before_rewriting] on a function marks it as // failed at this point. - for &ldid in &all_fn_ldids { - if !util::has_test_attr(tcx, ldid, TestAttr::FailBeforeRewriting) { - continue; + for &ldid in all_fn_ldids { + let mut should_mark_failed = false; + if util::has_test_attr(tcx, ldid, TestAttr::FailBeforeRewriting) { + should_mark_failed = true; + } + if let Some(pointwise_fn_ldid) = pointwise_fn_ldid { + // In pointwise mode, mark all functions except `pointwise_fn_ldid` as failed to + // prevent rewriting. + if ldid != pointwise_fn_ldid { + should_mark_failed = true; + } + } + if should_mark_failed { + gacx.mark_fn_failed( + ldid.to_def_id(), + DontRewriteFnReason::FAKE_INVALID_FOR_TESTING, + PanicDetail::new("explicit fail_before_rewriting for testing".to_owned()), + ); } - gacx.mark_fn_failed( - ldid.to_def_id(), - DontRewriteFnReason::FAKE_INVALID_FOR_TESTING, - PanicDetail::new("explicit fail_before_rewriting for testing".to_owned()), - ); } // Buffer debug output for each function. Grouping together all the different types of info @@ -1266,7 +1352,7 @@ fn run(tcx: TyCtxt) { // rewrite, such as pointers in the signatures of non-rewritten functions. process_new_dont_rewrite_items(&mut gacx, &mut gasn); - for &ldid in &all_fn_ldids { + for &ldid in all_fn_ldids { if gacx.dont_rewrite_fn(ldid.to_def_id()) { continue; } @@ -1602,6 +1688,14 @@ fn run(tcx: TyCtxt) { "alongside" => { update_files = rewrite::UpdateFiles::Alongside; } + "pointwise" => { + let pointwise_fn_ldid = pointwise_fn_ldid.expect( + "C2RUST_ANALYZE_REWRITE_MODE=pointwise, \ + but pointwise_fn_ldid is unset?", + ); + let pointwise_fn_name = tcx.item_name(pointwise_fn_ldid.to_def_id()); + update_files = rewrite::UpdateFiles::AlongsidePointwise(pointwise_fn_name); + } _ => panic!("bad value {:?} for C2RUST_ANALYZE_REWRITE_MODE", val), } } diff --git a/c2rust-analyze/src/context.rs b/c2rust-analyze/src/context.rs index 24f7ac7ccb..a89777c3f8 100644 --- a/c2rust-analyze/src/context.rs +++ b/c2rust-analyze/src/context.rs @@ -298,7 +298,7 @@ bitflags! { } } -#[derive(Default)] +#[derive(Clone, Default)] pub struct AdtMetadataTable<'tcx> { pub table: HashMap>, pub struct_dids: Vec, @@ -381,6 +381,7 @@ impl<'tcx> Debug for AdtMetadataTable<'tcx> { } } +#[derive(Clone)] pub struct GlobalAnalysisCtxt<'tcx> { pub tcx: TyCtxt<'tcx>, pub lcx: LTyCtxt<'tcx>, @@ -464,6 +465,7 @@ impl<'a, 'tcx> AnalysisCtxt<'_, 'tcx> { } } +#[derive(Clone)] pub struct AnalysisCtxtData<'tcx> { ptr_info: LocalPointerTable, local_tys: IndexVec>, @@ -472,13 +474,14 @@ pub struct AnalysisCtxtData<'tcx> { string_literal_locs: Vec, } +#[derive(Clone)] pub struct FnSigOrigins<'tcx> { pub origin_params: Vec, pub inputs: Vec]>>, pub output: LabeledTy<'tcx, &'tcx [OriginArg<'tcx>]>, } -#[derive(Default)] +#[derive(Clone, Default)] pub struct FnOriginMap<'tcx> { pub fn_info: HashMap>, } @@ -1420,6 +1423,7 @@ impl GlobalAssignment { } } +#[derive(Clone, Debug)] pub struct LocalAssignment { pub perms: LocalPointerTable, pub flags: LocalPointerTable, @@ -1594,6 +1598,7 @@ pub fn print_ty_with_pointer_labels_into( } /// Map for associating flags (such as `DontRewriteFnReason`) with keys (such as `DefId`). +#[derive(Clone, Debug)] pub struct FlagMap { /// Stores the current flags for each key. If no flags are set, the entry is omitted; that is, /// for every entry `(k, v)`, it's always the case that `v != V::default()`. diff --git a/c2rust-analyze/src/main.rs b/c2rust-analyze/src/main.rs index fd3befdd8e..3f751031c0 100644 --- a/c2rust-analyze/src/main.rs +++ b/c2rust-analyze/src/main.rs @@ -131,6 +131,9 @@ enum RewriteMode { /// Save rewritten code to a separate file alongside each source file. #[value(name = "alongside")] Alongside, + /// Rewrite each function separately, and write the results for each to a separate file. + #[value(name = "pointwise")] + Pointwise, } fn exit_with_status(status: ExitStatus) { @@ -445,6 +448,7 @@ fn cargo_wrapper(rustc_wrapper: &Path) -> anyhow::Result<()> { RewriteMode::None => "none", RewriteMode::InPlace => "inplace", RewriteMode::Alongside => "alongside", + RewriteMode::Pointwise => "pointwise", }; cmd.env("C2RUST_ANALYZE_REWRITE_MODE", val); } diff --git a/c2rust-analyze/src/pointee_type/constraint_set.rs b/c2rust-analyze/src/pointee_type/constraint_set.rs index 470e5bdf57..c12016e7fb 100644 --- a/c2rust-analyze/src/pointee_type/constraint_set.rs +++ b/c2rust-analyze/src/pointee_type/constraint_set.rs @@ -24,7 +24,7 @@ pub enum Constraint<'tcx> { Subset(PointerId, PointerId), } -#[derive(Debug, Default)] +#[derive(Clone, Debug, Default)] pub struct ConstraintSet<'tcx> { pub constraints: Vec>, constraint_dedup: HashSet>, @@ -79,7 +79,7 @@ impl<'tcx> From> for CTy<'tcx> { } } -#[derive(Debug, Default)] +#[derive(Clone, Debug, Default)] pub struct VarTable<'tcx> { /// Equivalence class representative for each variable. This can be either a known type /// (`CTy::Ty`) or an inference variable (`CTy::Var`). diff --git a/c2rust-analyze/src/recent_writes.rs b/c2rust-analyze/src/recent_writes.rs index c6a69be170..9b228809f3 100644 --- a/c2rust-analyze/src/recent_writes.rs +++ b/c2rust-analyze/src/recent_writes.rs @@ -6,6 +6,7 @@ use rustc_middle::mir::{ use std::collections::HashMap; /// Table for looking up the most recent write to a `Local` prior to a particular MIR statement. +#[derive(Clone, Debug)] pub struct RecentWrites { blocks: IndexVec, /// For each local, whether its address is taken anywhere within the current function. diff --git a/c2rust-analyze/src/rewrite/mod.rs b/c2rust-analyze/src/rewrite/mod.rs index e6eb35ebcf..affebe42e3 100644 --- a/c2rust-analyze/src/rewrite/mod.rs +++ b/c2rust-analyze/src/rewrite/mod.rs @@ -310,6 +310,10 @@ pub enum UpdateFiles { No, InPlace, Alongside, + /// Update mode used for pointwise testing. If the file being rewritten is `foo.rs`, and the + /// rewriting mode is `AlongsidePointwise("bar")`, then the rewritten code is written to + /// `foo.bar.rs`. + AlongsidePointwise(rustc_span::symbol::Symbol), } pub fn apply_rewrites( @@ -332,14 +336,20 @@ pub fn apply_rewrites( } println!(" ===== END {:?} =====", filename); - if matches!(update_files, UpdateFiles::InPlace | UpdateFiles::Alongside) { + if !matches!(update_files, UpdateFiles::No) { let mut path_ok = false; if let FileName::Real(ref rfn) = filename { if let Some(path) = rfn.local_path() { let path = match update_files { UpdateFiles::InPlace => path.to_owned(), UpdateFiles::Alongside => path.with_extension("new.rs"), - _ => unreachable!(), + UpdateFiles::AlongsidePointwise(ref s) => { + let ext = format!("{}.rs", s); + let p = path.with_extension(&ext); + eprintln!("writing to {:?}", p); + p + } + UpdateFiles::No => unreachable!(), }; fs::write(path, src).unwrap(); path_ok = true; diff --git a/c2rust-analyze/src/rewrite/ty.rs b/c2rust-analyze/src/rewrite/ty.rs index 538537cd5f..d529ac1341 100644 --- a/c2rust-analyze/src/rewrite/ty.rs +++ b/c2rust-analyze/src/rewrite/ty.rs @@ -114,7 +114,7 @@ fn create_rewrite_label<'tcx>( let mut pointee_ty = None; // For now, we only rewrite in cases where the inferred pointee has no arguments. // TODO: expand this to handle pointer-to-pointer cases and other complex inferred pointees - if !pointer_lty.label.is_none() { + if !pointer_lty.label.is_none() && !flags[pointer_lty.label].contains(FlagSet::FIXED) { if let Some(lty) = pointee_types[pointer_lty.label].get_sole_lty() { let ty = lty.ty; if lty.args.len() == 0 && !ty_has_adt_lifetime(ty, adt_metadata) {