From 74744531bc8967a862335c43e3a53918e9dbe401 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Tue, 30 Jan 2024 13:19:40 -0800 Subject: [PATCH 01/10] analyze: track reasons why functions are not rewritten --- c2rust-analyze/src/context.rs | 111 ++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/c2rust-analyze/src/context.rs b/c2rust-analyze/src/context.rs index 85701a3fbe..f2294ca387 100644 --- a/c2rust-analyze/src/context.rs +++ b/c2rust-analyze/src/context.rs @@ -186,6 +186,59 @@ bitflags! { } } +bitflags! { + /// Flags indicating reasons why a function isn't being rewritten. + #[derive(Default)] + pub struct DontRewriteFnReason: u16 { + /// The user requested that this function be left unchanged. + const USER_REQUEST = 0x0001; + /// The function contains an unsupported int-to-pointer cast. + const INT_TO_PTR_CAST = 0x0002; + /// The function calls an extern function that has no safe rewrite. + const EXTERN_CALL = 0x0004; + /// The function calls another local function that isn't being rewritten. + const NON_REWRITTEN_CALLEE = 0x0008; + /// The function uses `Cell` pointers whose types are too complex for the current rewrite + /// rules. + const COMPLEX_CELL = 0x0010; + /// The function contains a pointer-to-pointer cast that isn't covered by rewrite rules. + const PTR_TO_PTR_CAST = 0x0020; + /// The function dereferences a pointer that would remain raw after rewriting. + const RAW_PTR_DEREF = 0x0040; + + /// Dataflow analysis failed on this function. + const DATAFLOW_FAILED = 0x0080; + /// Borrowcheck/Polonius analysis failed on this function. + const BORROWCK_FAILED = 0x0100; + /// MIR rewrite generation failed on this function. + const MIR_REWRITE_FAILED = 0x0200; + /// HIR/AST rewrite generation failed on this function. + const HIR_REWRITE_FAILED = 0x0200; + } +} + +bitflags! { + /// Flags indicating reasons why a static isn't being rewritten. + #[derive(Default)] + pub struct DontRewriteStaticReason: u16 { + /// The user requested that this static be left unchanged. + const USER_REQUEST = 0x0001; + /// The field is used in a function that isn't being rewritten. + const NON_REWRITTEN_USER = 0x0002; + } +} + +bitflags! { + /// Flags indicating reasons why an ADT field isn't being rewritten. + #[derive(Default)] + pub struct DontRewriteFieldReason: u16 { + /// The user requested that this field be left unchanged. + const USER_REQUEST = 0x0001; + /// The field is used in a function that isn't being rewritten. + const NON_REWRITTEN_USER = 0x0002; + } +} + pub use crate::pointer_id::PointerId; pub type LTy<'tcx> = LabeledTy<'tcx, PointerId>; @@ -326,6 +379,10 @@ pub struct GlobalAnalysisCtxt<'tcx> { /// [`name`]: KnownFn::name known_fns: HashMap<&'static str, &'static KnownFn>, + dont_rewrite_fns: HashMap, + dont_rewrite_statics: HashMap, + dont_rewrite_fields: HashMap, + /// `DefId`s of functions where analysis failed, and a [`PanicDetail`] explaining the reason /// for each failure. pub fns_failed: HashMap, @@ -700,6 +757,9 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { .iter() .map(|known_fn| (known_fn.name, known_fn)) .collect(), + dont_rewrite_fns: HashMap::new(), + dont_rewrite_statics: HashMap::new(), + dont_rewrite_fields: HashMap::new(), fns_failed: HashMap::new(), fns_fixed: HashSet::new(), field_ltys: HashMap::new(), @@ -762,6 +822,9 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { ref mut ptr_info, ref mut fn_sigs, known_fns: _, + dont_rewrite_fns: _, + dont_rewrite_statics: _, + dont_rewrite_fields: _, fns_failed: _, fns_fixed: _, ref mut field_ltys, @@ -880,6 +943,54 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { .filter_map(|(&def_id, fn_sig)| Some((fn_sig, self.known_fn(def_id)?))) .flat_map(|(fn_sig, known_fn)| known_fn.ptr_perms(fn_sig)) } + + pub fn dont_rewrite_fn(&self, def_id: DefId) -> bool { + self.dont_rewrite_fns.contains_key(&def_id) + } + + pub fn dont_rewrite_fn_reason(&self, def_id: DefId) -> DontRewriteFnReason { + self.dont_rewrite_fns + .get(&def_id) + .copied() + .unwrap_or_default() + } + + pub fn set_dont_rewrite_fn(&mut self, def_id: DefId, reason: DontRewriteFnReason) { + assert_ne!(reason, DontRewriteFnReason::empty()); + *self.dont_rewrite_fns.entry(def_id).or_default() |= reason; + } + + pub fn dont_rewrite_static(&self, def_id: DefId) -> bool { + self.dont_rewrite_statics.contains_key(&def_id) + } + + pub fn dont_rewrite_static_reason(&self, def_id: DefId) -> DontRewriteStaticReason { + self.dont_rewrite_statics + .get(&def_id) + .copied() + .unwrap_or_default() + } + + pub fn set_dont_rewrite_static(&mut self, def_id: DefId, reason: DontRewriteStaticReason) { + assert_ne!(reason, DontRewriteStaticReason::empty()); + *self.dont_rewrite_statics.entry(def_id).or_default() |= reason; + } + + pub fn dont_rewrite_field(&self, def_id: DefId) -> bool { + self.dont_rewrite_fields.contains_key(&def_id) + } + + pub fn dont_rewrite_field_reason(&self, def_id: DefId) -> DontRewriteFieldReason { + self.dont_rewrite_fields + .get(&def_id) + .copied() + .unwrap_or_default() + } + + pub fn set_dont_rewrite_field(&mut self, def_id: DefId, reason: DontRewriteFieldReason) { + assert_ne!(reason, DontRewriteFieldReason::empty()); + *self.dont_rewrite_fields.entry(def_id).or_default() |= reason; + } } impl<'a, 'tcx> AnalysisCtxt<'a, 'tcx> { From b517233e879b54985a1cc0b29e95b1bf57d7ed97 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Wed, 31 Jan 2024 11:51:19 -0800 Subject: [PATCH 02/10] analyze: replace fns_fixed and fns_failed with DontRewriteFnReason --- c2rust-analyze/src/analyze.rs | 67 ++++++++++++++++---------- c2rust-analyze/src/context.rs | 91 ++++++++++++++++------------------- 2 files changed, 83 insertions(+), 75 deletions(-) diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 691ac5f075..a9ebf66f9b 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -1,8 +1,9 @@ use crate::annotate::AnnotationBuffer; use crate::borrowck; use crate::context::{ - self, AnalysisCtxt, AnalysisCtxtData, FlagSet, GlobalAnalysisCtxt, GlobalAssignment, LFnSig, - LTy, LTyCtxt, LocalAssignment, PermissionSet, PointerId, PointerInfo, + self, AnalysisCtxt, AnalysisCtxtData, DontRewriteFieldReason, DontRewriteFnReason, + DontRewriteStaticReason, FlagSet, GlobalAnalysisCtxt, GlobalAssignment, LFnSig, LTy, LTyCtxt, + LocalAssignment, PermissionSet, PointerId, PointerInfo, }; use crate::dataflow; use crate::dataflow::DataflowConstraints; @@ -621,7 +622,7 @@ fn run(tcx: TyCtxt) { // ---------------------------------- for &ldid in &all_fn_ldids { - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } @@ -669,7 +670,7 @@ fn run(tcx: TyCtxt) { info.pointee_constraints.set(pointee_constraints); } Err(pd) => { - gacx.mark_fn_failed(ldid.to_def_id(), pd); + gacx.mark_fn_failed(ldid.to_def_id(), DontRewriteFnReason::POINTEE_INVALID, pd); } } @@ -698,7 +699,7 @@ fn run(tcx: TyCtxt) { } for &ldid in &all_fn_ldids { - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } @@ -716,7 +717,7 @@ fn run(tcx: TyCtxt) { // Print results for debugging for &ldid in &all_fn_ldids { - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } @@ -742,7 +743,7 @@ fn run(tcx: TyCtxt) { // computed during this the process is kept around for use in later passes. let mut global_equiv = GlobalEquivSet::new(gacx.num_pointers()); for &ldid in &all_fn_ldids { - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } @@ -763,7 +764,7 @@ fn run(tcx: TyCtxt) { Ok(x) => x, Err(pd) => { info.acx_data.set(acx.into_data()); - gacx.mark_fn_failed(ldid.to_def_id(), pd); + gacx.mark_fn_failed(ldid.to_def_id(), DontRewriteFnReason::DATAFLOW_INVALID, pd); continue; } }; @@ -796,7 +797,7 @@ fn run(tcx: TyCtxt) { gacx.remap_pointers(&global_equiv_map, global_counter); for &ldid in &all_fn_ldids { - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } @@ -979,8 +980,6 @@ fn run(tcx: TyCtxt) { // Items in the "fixed defs" list have all pointers in their types set to `FIXED`. For // testing, putting #[c2rust_analyze_test::fixed_signature] on an item has the same effect. - // - // Functions in the list are also added to `gacx.fns_fixed`. for ldid in tcx.hir_crate_items(()).definitions() { // TODO (HACK): `Clone::clone` impls are omitted from `fn_sigs` and cause a panic below. if is_impl_clone(tcx, ldid.to_def_id()) { @@ -996,7 +995,7 @@ fn run(tcx: TyCtxt) { None => panic!("missing fn_sig for {:?}", ldid), }; make_sig_fixed(&mut gasn, lsig); - gacx.fns_fixed.insert(ldid.to_def_id()); + gacx.set_dont_rewrite_fn(ldid.to_def_id(), DontRewriteFnReason::USER_REQUEST); } DefKind::Struct | DefKind::Enum | DefKind::Union => { @@ -1016,6 +1015,10 @@ fn run(tcx: TyCtxt) { None => panic!("missing field_lty for {:?}", ldid), }; make_ty_fixed(&mut gasn, lty); + gacx.set_dont_rewrite_field( + field.did, + DontRewriteFieldReason::USER_REQUEST, + ); } } } @@ -1034,6 +1037,10 @@ fn run(tcx: TyCtxt) { if !ptr.is_none() { gasn.flags[ptr].insert(FlagSet::FIXED); } + gacx.set_dont_rewrite_static( + ldid.to_def_id(), + DontRewriteStaticReason::USER_REQUEST, + ); } _ => {} @@ -1052,6 +1059,7 @@ fn run(tcx: TyCtxt) { } gacx.mark_fn_failed( ldid.to_def_id(), + DontRewriteFnReason::FAKE_INVALID_FOR_TESTING, PanicDetail::new("explicit fail_before_analysis for testing".to_owned()), ); } @@ -1069,7 +1077,7 @@ fn run(tcx: TyCtxt) { let old_gasn = gasn.clone(); for &ldid in &all_fn_ldids { - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } @@ -1103,7 +1111,11 @@ fn run(tcx: TyCtxt) { match r { Ok(()) => {} Err(pd) => { - gacx.mark_fn_failed(ldid.to_def_id(), pd); + gacx.mark_fn_failed( + ldid.to_def_id(), + DontRewriteFnReason::BORROWCK_INVALID, + pd, + ); continue; } } @@ -1136,7 +1148,7 @@ fn run(tcx: TyCtxt) { // Do final processing on each function. for &ldid in &all_fn_ldids { - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } @@ -1159,7 +1171,11 @@ fn run(tcx: TyCtxt) { match r { Ok(()) => {} Err(pd) => { - gacx.mark_fn_failed(ldid.to_def_id(), pd); + gacx.mark_fn_failed( + ldid.to_def_id(), + DontRewriteFnReason::MISC_ANALYSIS_INVALID, + pd, + ); continue; } } @@ -1223,6 +1239,7 @@ fn run(tcx: TyCtxt) { } gacx.mark_fn_failed( ldid.to_def_id(), + DontRewriteFnReason::FAKE_INVALID_FOR_TESTING, PanicDetail::new("explicit fail_before_rewriting for testing".to_owned()), ); } @@ -1251,12 +1268,12 @@ fn run(tcx: TyCtxt) { all_rewrites.clear(); eprintln!("\n--- start rewriting ---"); - // Before generating rewrites, add the FIXED flag to the signatures of all functions that - // failed analysis. + // Before generating rewrites, add the FIXED flag to the signatures of all functions where + // rewriting will be skipped. // - // The set of failed functions is monotonically nondecreasing throughout this loop, so - // there's no need to worry about potentially removing `FIXED` from some functions. - for did in gacx.iter_fns_failed() { + // The set of nonrewritten functions is monotonically nondecreasing throughout this loop, + // so there's no need to worry about potentially removing `FIXED` from some functions. + for did in gacx.iter_fns_skip_rewrite() { let lsig = gacx.fn_sigs[&did]; for sig_lty in lsig.inputs_and_output() { for lty in sig_lty.iter() { @@ -1269,7 +1286,7 @@ fn run(tcx: TyCtxt) { } for &ldid in &all_fn_ldids { - if gacx.fn_skip_rewrite(ldid.to_def_id()) { + if gacx.dont_rewrite_fn(ldid.to_def_id()) { continue; } @@ -1317,7 +1334,7 @@ fn run(tcx: TyCtxt) { match r { Ok(()) => {} Err(pd) => { - gacx.mark_fn_failed(ldid.to_def_id(), pd); + gacx.mark_fn_failed(ldid.to_def_id(), DontRewriteFnReason::REWRITE_INVALID, pd); continue; } } @@ -1342,7 +1359,7 @@ fn run(tcx: TyCtxt) { match r { Ok(()) => {} Err(pd) => { - gacx.mark_fn_failed(def_id, pd); + gacx.mark_fn_failed(def_id, DontRewriteFnReason::SHIM_GENERATION_FAILED, pd); any_failed = true; continue; } @@ -1422,7 +1439,7 @@ fn run(tcx: TyCtxt) { None => continue, }; - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { continue; } diff --git a/c2rust-analyze/src/context.rs b/c2rust-analyze/src/context.rs index f2294ca387..b409cff32e 100644 --- a/c2rust-analyze/src/context.rs +++ b/c2rust-analyze/src/context.rs @@ -191,29 +191,44 @@ bitflags! { #[derive(Default)] pub struct DontRewriteFnReason: u16 { /// The user requested that this function be left unchanged. - const USER_REQUEST = 0x0001; + const USER_REQUEST = 1 << 0; /// The function contains an unsupported int-to-pointer cast. - const INT_TO_PTR_CAST = 0x0002; + const INT_TO_PTR_CAST = 1 << 1; /// The function calls an extern function that has no safe rewrite. - const EXTERN_CALL = 0x0004; + const EXTERN_CALL = 1 << 2; /// The function calls another local function that isn't being rewritten. - const NON_REWRITTEN_CALLEE = 0x0008; + const NON_REWRITTEN_CALLEE = 1 << 3; /// The function uses `Cell` pointers whose types are too complex for the current rewrite /// rules. - const COMPLEX_CELL = 0x0010; + const COMPLEX_CELL = 1 << 4; /// The function contains a pointer-to-pointer cast that isn't covered by rewrite rules. - const PTR_TO_PTR_CAST = 0x0020; + const PTR_TO_PTR_CAST = 1 << 5; /// The function dereferences a pointer that would remain raw after rewriting. - const RAW_PTR_DEREF = 0x0040; - - /// Dataflow analysis failed on this function. - const DATAFLOW_FAILED = 0x0080; - /// Borrowcheck/Polonius analysis failed on this function. - const BORROWCK_FAILED = 0x0100; - /// MIR rewrite generation failed on this function. - const MIR_REWRITE_FAILED = 0x0200; - /// HIR/AST rewrite generation failed on this function. - const HIR_REWRITE_FAILED = 0x0200; + const RAW_PTR_DEREF = 1 << 6; + /// Calling this function from non-rewritten code requires a shim, but shim generation + /// failed. + const SHIM_GENERATION_FAILED = 1 << 7; + + /// Pointee analysis results for this function are invalid. + const POINTEE_INVALID = 1 << 10; + /// Dataflow analysis results for this function are invalid. + const DATAFLOW_INVALID = 1 << 11; + /// Borrowcheck/Polonius analysis results for this function are invalid. + const BORROWCK_INVALID = 1 << 12; + /// Results of some other analysis for this function are invalid. + const MISC_ANALYSIS_INVALID = 1 << 13; + /// The set of rewrites generated for this function is invalid or incomplete. + const REWRITE_INVALID = 1 << 14; + /// Analysis results for this function are valid, but were marked as invalid anyway in + /// order to test error recovery. + const FAKE_INVALID_FOR_TESTING = 1 << 15; + + const ANALYSIS_INVALID_MASK = Self::POINTEE_INVALID.bits + | Self::DATAFLOW_INVALID.bits + | Self::BORROWCK_INVALID.bits + | Self::MISC_ANALYSIS_INVALID.bits + | Self::REWRITE_INVALID.bits + | Self::FAKE_INVALID_FOR_TESTING.bits; } } @@ -386,9 +401,6 @@ pub struct GlobalAnalysisCtxt<'tcx> { /// `DefId`s of functions where analysis failed, and a [`PanicDetail`] explaining the reason /// for each failure. pub fns_failed: HashMap, - /// `DefId`s of functions that were marked "fixed" (non-rewritable) through command-line - /// arguments. - pub fns_fixed: HashSet, pub field_ltys: HashMap>, @@ -761,7 +773,6 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { dont_rewrite_statics: HashMap::new(), dont_rewrite_fields: HashMap::new(), fns_failed: HashMap::new(), - fns_fixed: HashSet::new(), field_ltys: HashMap::new(), static_tys: HashMap::new(), addr_of_static: HashMap::new(), @@ -826,7 +837,6 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { dont_rewrite_statics: _, dont_rewrite_fields: _, fns_failed: _, - fns_fixed: _, ref mut field_ltys, ref mut static_tys, ref mut addr_of_static, @@ -883,39 +893,15 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { } } - pub fn fn_failed(&self, did: DefId) -> bool { - self.fns_failed.contains_key(&did) - } - - pub fn mark_fn_failed(&mut self, did: DefId, detail: PanicDetail) { - if self.fns_failed.contains_key(&did) { - return; - } - - self.fns_failed.insert(did, detail); - } - - pub fn iter_fns_failed(&self) -> impl Iterator + '_ { - self.fns_failed.keys().copied() - } - - pub fn fn_skip_rewrite(&self, did: DefId) -> bool { - self.fn_failed(did) || self.fns_fixed.contains(&did) + pub fn mark_fn_failed(&mut self, did: DefId, reason: DontRewriteFnReason, detail: PanicDetail) { + self.set_dont_rewrite_fn(did, reason); + // Insert `detail` if there isn't yet an entry for this `DefId`. + self.fns_failed.entry(did).or_insert(detail); } /// Iterate over the `DefId`s of all functions that should skip rewriting. pub fn iter_fns_skip_rewrite<'a>(&'a self) -> impl Iterator + 'a { - // This let binding avoids a lifetime error with the closure and return-position `impl - // Trait`. - let fns_fixed = &self.fns_fixed; - // If the same `DefId` is in both `fns_failed` and `fns_fixed`, be sure to return it only - // once. - fns_fixed.iter().copied().chain( - self.fns_failed - .keys() - .copied() - .filter(move |did| !fns_fixed.contains(&did)), - ) + self.dont_rewrite_fns.keys().copied() } pub fn known_fn(&self, def_id: DefId) -> Option<&'static KnownFn> { @@ -960,6 +946,11 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { *self.dont_rewrite_fns.entry(def_id).or_default() |= reason; } + pub fn fn_analysis_invalid(&self, def_id: DefId) -> bool { + self.dont_rewrite_fn_reason(def_id) + .intersects(DontRewriteFnReason::ANALYSIS_INVALID_MASK) + } + pub fn dont_rewrite_static(&self, def_id: DefId) -> bool { self.dont_rewrite_statics.contains_key(&def_id) } From bcb5ec7880131b927661992dbd048fb961551972 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Wed, 31 Jan 2024 13:52:05 -0800 Subject: [PATCH 03/10] analyze: track newly added entries in dont_rewrite_fns etc --- c2rust-analyze/src/analyze.rs | 15 ++--- c2rust-analyze/src/context.rs | 123 ++++++++++++++++++++-------------- 2 files changed, 78 insertions(+), 60 deletions(-) diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index a9ebf66f9b..0388402d51 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -995,7 +995,8 @@ fn run(tcx: TyCtxt) { None => panic!("missing fn_sig for {:?}", ldid), }; make_sig_fixed(&mut gasn, lsig); - gacx.set_dont_rewrite_fn(ldid.to_def_id(), DontRewriteFnReason::USER_REQUEST); + gacx.dont_rewrite_fns + .add(ldid.to_def_id(), DontRewriteFnReason::USER_REQUEST); } DefKind::Struct | DefKind::Enum | DefKind::Union => { @@ -1015,10 +1016,8 @@ fn run(tcx: TyCtxt) { None => panic!("missing field_lty for {:?}", ldid), }; make_ty_fixed(&mut gasn, lty); - gacx.set_dont_rewrite_field( - field.did, - DontRewriteFieldReason::USER_REQUEST, - ); + gacx.dont_rewrite_fields + .add(field.did, DontRewriteFieldReason::USER_REQUEST); } } } @@ -1037,10 +1036,8 @@ fn run(tcx: TyCtxt) { if !ptr.is_none() { gasn.flags[ptr].insert(FlagSet::FIXED); } - gacx.set_dont_rewrite_static( - ldid.to_def_id(), - DontRewriteStaticReason::USER_REQUEST, - ); + gacx.dont_rewrite_statics + .add(ldid.to_def_id(), DontRewriteStaticReason::USER_REQUEST); } _ => {} diff --git a/c2rust-analyze/src/context.rs b/c2rust-analyze/src/context.rs index b409cff32e..98ab2b0d8c 100644 --- a/c2rust-analyze/src/context.rs +++ b/c2rust-analyze/src/context.rs @@ -36,7 +36,9 @@ use rustc_middle::ty::TyKind; use rustc_type_ir::RegionKind::{ReEarlyBound, ReStatic}; use std::collections::{HashMap, HashSet}; use std::fmt::{Debug, Write as _}; -use std::ops::Index; +use std::hash::Hash; +use std::mem; +use std::ops::{BitOr, Index}; bitflags! { /// Permissions are created such that we allow dropping permissions in any assignment. @@ -394,9 +396,9 @@ pub struct GlobalAnalysisCtxt<'tcx> { /// [`name`]: KnownFn::name known_fns: HashMap<&'static str, &'static KnownFn>, - dont_rewrite_fns: HashMap, - dont_rewrite_statics: HashMap, - dont_rewrite_fields: HashMap, + pub dont_rewrite_fns: FlagMap, + pub dont_rewrite_statics: FlagMap, + pub dont_rewrite_fields: FlagMap, /// `DefId`s of functions where analysis failed, and a [`PanicDetail`] explaining the reason /// for each failure. @@ -769,9 +771,9 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { .iter() .map(|known_fn| (known_fn.name, known_fn)) .collect(), - dont_rewrite_fns: HashMap::new(), - dont_rewrite_statics: HashMap::new(), - dont_rewrite_fields: HashMap::new(), + dont_rewrite_fns: FlagMap::new(), + dont_rewrite_statics: FlagMap::new(), + dont_rewrite_fields: FlagMap::new(), fns_failed: HashMap::new(), field_ltys: HashMap::new(), static_tys: HashMap::new(), @@ -894,14 +896,14 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { } pub fn mark_fn_failed(&mut self, did: DefId, reason: DontRewriteFnReason, detail: PanicDetail) { - self.set_dont_rewrite_fn(did, reason); + self.dont_rewrite_fns.add(did, reason); // Insert `detail` if there isn't yet an entry for this `DefId`. self.fns_failed.entry(did).or_insert(detail); } /// Iterate over the `DefId`s of all functions that should skip rewriting. pub fn iter_fns_skip_rewrite<'a>(&'a self) -> impl Iterator + 'a { - self.dont_rewrite_fns.keys().copied() + self.dont_rewrite_fns.keys() } pub fn known_fn(&self, def_id: DefId) -> Option<&'static KnownFn> { @@ -931,56 +933,19 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { } pub fn dont_rewrite_fn(&self, def_id: DefId) -> bool { - self.dont_rewrite_fns.contains_key(&def_id) + self.dont_rewrite_fns.contains(def_id) } - - pub fn dont_rewrite_fn_reason(&self, def_id: DefId) -> DontRewriteFnReason { - self.dont_rewrite_fns - .get(&def_id) - .copied() - .unwrap_or_default() - } - - pub fn set_dont_rewrite_fn(&mut self, def_id: DefId, reason: DontRewriteFnReason) { - assert_ne!(reason, DontRewriteFnReason::empty()); - *self.dont_rewrite_fns.entry(def_id).or_default() |= reason; - } - pub fn fn_analysis_invalid(&self, def_id: DefId) -> bool { - self.dont_rewrite_fn_reason(def_id) + self.dont_rewrite_fns + .get(def_id) .intersects(DontRewriteFnReason::ANALYSIS_INVALID_MASK) } pub fn dont_rewrite_static(&self, def_id: DefId) -> bool { - self.dont_rewrite_statics.contains_key(&def_id) + self.dont_rewrite_statics.contains(def_id) } - - pub fn dont_rewrite_static_reason(&self, def_id: DefId) -> DontRewriteStaticReason { - self.dont_rewrite_statics - .get(&def_id) - .copied() - .unwrap_or_default() - } - - pub fn set_dont_rewrite_static(&mut self, def_id: DefId, reason: DontRewriteStaticReason) { - assert_ne!(reason, DontRewriteStaticReason::empty()); - *self.dont_rewrite_statics.entry(def_id).or_default() |= reason; - } - pub fn dont_rewrite_field(&self, def_id: DefId) -> bool { - self.dont_rewrite_fields.contains_key(&def_id) - } - - pub fn dont_rewrite_field_reason(&self, def_id: DefId) -> DontRewriteFieldReason { - self.dont_rewrite_fields - .get(&def_id) - .copied() - .unwrap_or_default() - } - - pub fn set_dont_rewrite_field(&mut self, def_id: DefId, reason: DontRewriteFieldReason) { - assert_ne!(reason, DontRewriteFieldReason::empty()); - *self.dont_rewrite_fields.entry(def_id).or_default() |= reason; + self.dont_rewrite_fields.contains(def_id) } } @@ -1625,3 +1590,59 @@ pub fn print_ty_with_pointer_labels_into( } } } + +/// Map for associating flags (such as `DontRewriteFnReason`) with keys (such as `DefId`). +pub struct FlagMap { + /// Stores the current flags for each key. If no flags are set, the entry is omitted; that is, + /// for every entry `(k, v)`, it's always the case that `v != V::default()`. + m: HashMap, + /// Keys that were added to `m` since the last call to `take_new()`. Specifically, this + /// includes every `k` for which `get(k)` was `V::default()` but `get(k)` now has a different, + /// non-default value. + new: Vec, +} + +impl FlagMap +where + K: Copy + Hash + Eq, + V: Copy + Default + Eq + BitOr, +{ + pub fn new() -> FlagMap { + FlagMap { + m: HashMap::new(), + new: Vec::new(), + } + } + + pub fn add(&mut self, k: K, v: V) { + if v == V::default() { + return; + } + let cur = self.m.entry(k).or_default(); + if *cur == V::default() { + self.new.push(k); + } + *cur = *cur | v; + } + + /// Get the current flags for `k`. Returns `V::default()` if no flags have been set. + pub fn get(&self, k: K) -> V { + self.m.get(&k).copied().unwrap_or_default() + } + + pub fn contains(&self, k: K) -> bool { + self.m.contains_key(&k) + } + + pub fn new_keys(&self) -> &[K] { + &self.new + } + + pub fn take_new_keys(&mut self) -> Vec { + mem::take(&mut self.new) + } + + pub fn keys<'a>(&'a self) -> impl Iterator + 'a { + self.m.keys().copied() + } +} From 5b7531326c9b9ea9b8dd82e2c60583f792db82d4 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Wed, 31 Jan 2024 14:42:33 -0800 Subject: [PATCH 04/10] analyze: use dont_rewrite_fns.new_keys() in rewriting loop --- c2rust-analyze/src/analyze.rs | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 0388402d51..2f975dd1ac 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -1265,21 +1265,18 @@ fn run(tcx: TyCtxt) { all_rewrites.clear(); eprintln!("\n--- start rewriting ---"); + // Clear the list of newly non-rewritable fns. After generating rewrites, if any functions + // became non-rewritable, we run another iteration. + let new_non_rewritable_fns = gacx.dont_rewrite_fns.take_new_keys(); + // Before generating rewrites, add the FIXED flag to the signatures of all functions where // rewriting will be skipped. // // The set of nonrewritten functions is monotonically nondecreasing throughout this loop, // so there's no need to worry about potentially removing `FIXED` from some functions. - for did in gacx.iter_fns_skip_rewrite() { - let lsig = gacx.fn_sigs[&did]; - for sig_lty in lsig.inputs_and_output() { - for lty in sig_lty.iter() { - let ptr = lty.label; - if !ptr.is_none() { - gasn.flags[ptr].insert(FlagSet::FIXED); - } - } - } + for did in new_non_rewritable_fns { + let lsig = &gacx.fn_sigs[&did]; + make_sig_fixed(&mut gasn, lsig); } for &ldid in &all_fn_ldids { @@ -1343,7 +1340,6 @@ fn run(tcx: TyCtxt) { all_rewrites.extend(shim_call_rewrites); // Generate shims for functions that need them. - let mut any_failed = false; for def_id in shim_fn_def_ids { let r = panic_detail::catch_unwind(AssertUnwindSafe(|| { all_rewrites.push(rewrite::gen_shim_definition_rewrite( @@ -1357,13 +1353,13 @@ fn run(tcx: TyCtxt) { Ok(()) => {} Err(pd) => { gacx.mark_fn_failed(def_id, DontRewriteFnReason::SHIM_GENERATION_FAILED, pd); - any_failed = true; continue; } } } - if !any_failed { + // Exit the loop upon reaching a fixpoint. + if gacx.dont_rewrite_fns.new_keys().is_empty() { break; } } From f674238785db17889d2dbdf9ec3568561404ee2d Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Wed, 31 Jan 2024 15:05:53 -0800 Subject: [PATCH 05/10] analyze: set DontRewriteFnReason::COMPLEX_CELL on bad cell rewrites --- c2rust-analyze/src/analyze.rs | 27 +++++++++++++------- c2rust-analyze/src/rewrite/expr/mir_op.rs | 25 +++++++++++++++--- c2rust-analyze/src/rewrite/expr/mod.rs | 9 +++++-- c2rust-analyze/tests/filecheck/test_attrs.rs | 4 +-- 4 files changed, 49 insertions(+), 16 deletions(-) diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 2f975dd1ac..5322dacd0c 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -1289,7 +1289,7 @@ fn run(tcx: TyCtxt) { let name = tcx.item_name(ldid.to_def_id()); let mir = tcx.mir_built(ldid_const); let mir = mir.borrow(); - let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); + let mut acx = gacx.function_context_with_data(&mir, info.acx_data.take()); let asn = gasn.and(&mut info.lasn); let pointee_types = global_pointee_types.and(info.local_pointee_types.get()); @@ -1302,8 +1302,14 @@ fn run(tcx: TyCtxt) { } let hir_body_id = tcx.hir().body_owned_by(ldid); - let expr_rewrites = - rewrite::gen_expr_rewrites(&acx, &asn, pointee_types, &mir, hir_body_id); + let expr_rewrites = rewrite::gen_expr_rewrites( + &mut acx, + &asn, + pointee_types, + ldid.to_def_id(), + &mir, + hir_body_id, + ); let ty_rewrites = rewrite::gen_ty_rewrites(&acx, &asn, pointee_types, &mir, ldid); // Print rewrites let report = func_reports.entry(ldid).or_default(); @@ -1628,13 +1634,16 @@ fn run(tcx: TyCtxt) { eprintln!("\nerror summary:"); for ldid in tcx.hir().body_owners() { - if let Some(detail) = gacx.fns_failed.get(&ldid.to_def_id()) { - eprintln!( - "analysis of {:?} failed: {}", - ldid, - detail.to_string_short() - ); + let opt_detail = gacx.fns_failed.get(&ldid.to_def_id()); + let flags = gacx.dont_rewrite_fns.get(ldid.to_def_id()); + if opt_detail.is_none() && flags.is_empty() { + continue; } + let detail_str = match opt_detail { + Some(detail) => detail.to_string_short(), + None => "(no panic)".into(), + }; + eprintln!("analysis of {:?} failed: {:?}, {}", ldid, flags, detail_str,); } eprintln!( diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index cb2027e1e4..e5b3917526 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -7,7 +7,7 @@ //! all adjustments, as this would make even non-rewritten code extremely verbose, so we try to //! materialize adjustments only on code that's subject to some rewrite. -use crate::context::{AnalysisCtxt, Assignment, FlagSet, LTy, PermissionSet}; +use crate::context::{AnalysisCtxt, Assignment, DontRewriteFnReason, FlagSet, LTy, PermissionSet}; use crate::panic_detail; use crate::pointee_type::PointeeTypes; use crate::pointer_id::{PointerId, PointerTable}; @@ -128,6 +128,7 @@ struct ExprRewriteVisitor<'a, 'tcx> { mir: &'a Body<'tcx>, loc: Location, sub_loc: Vec, + errors: DontRewriteFnReason, } impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { @@ -152,9 +153,14 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { statement_index: 0, }, sub_loc: Vec::new(), + errors: DontRewriteFnReason::empty(), } } + fn err(&mut self, reason: DontRewriteFnReason) { + self.errors.insert(reason); + } + fn enter R, R>(&mut self, sub: SubLoc, f: F) -> R { self.sub_loc.push(sub); let r = f(self); @@ -240,6 +246,10 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { if !flags.contains(FlagSet::FIXED) { let desc = type_desc::perms_to_desc(local_lty.ty, perms, flags); if desc.own == Ownership::Cell { + if pl.projection.len() > 1 || desc.qty != Quantity::Single { + // NYI: `Cell` inside structs, arrays, or ptr-to-ptr + self.err(DontRewriteFnReason::COMPLEX_CELL); + } // this is an assignment like `*x = 2` but `x` has CELL permissions self.emit(RewriteKind::CellSet); } @@ -256,6 +266,10 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { let desc = type_desc::local_perms_to_desc(local_ty, perms, flags); if desc.own == Ownership::Cell { // this is an assignment like `let x = 2` but `x` has CELL permissions + if !pl.projection.is_empty() || desc.qty != Quantity::Single { + // NYI: `Cell` inside structs, arrays, or ptr-to-ptr + self.err(DontRewriteFnReason::COMPLEX_CELL); + } self.enter_rvalue(|v| v.emit(RewriteKind::CellNew)) } @@ -269,6 +283,10 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { if !flags.contains(FlagSet::FIXED) && flags.contains(FlagSet::CELL) { // this is an assignment like `let x = *y` but `y` has CELL permissions + if pl.projection.len() > 1 || desc.qty != Quantity::Single { + // NYI: `Cell` inside structs, arrays, or ptr-to-ptr + self.err(DontRewriteFnReason::COMPLEX_CELL); + } self.enter_rvalue(|v| v.emit(RewriteKind::CellGet)) } } @@ -1025,7 +1043,7 @@ pub fn gen_mir_rewrites<'tcx>( asn: &Assignment, pointee_types: PointerTable>, mir: &Body<'tcx>, -) -> HashMap> { +) -> (HashMap>, DontRewriteFnReason) { let mut out = HashMap::new(); let mut v = ExprRewriteVisitor::new(acx, asn, pointee_types, &mut out, mir); @@ -1048,5 +1066,6 @@ pub fn gen_mir_rewrites<'tcx>( } } - out + let errors = v.errors; + (out, errors) } diff --git a/c2rust-analyze/src/rewrite/expr/mod.rs b/c2rust-analyze/src/rewrite/expr/mod.rs index dda17fe74f..4bb7612ac6 100644 --- a/c2rust-analyze/src/rewrite/expr/mod.rs +++ b/c2rust-analyze/src/rewrite/expr/mod.rs @@ -2,6 +2,7 @@ use crate::context::{AnalysisCtxt, Assignment}; use crate::pointee_type::PointeeTypes; use crate::pointer_id::PointerTable; use crate::rewrite::Rewrite; +use rustc_hir::def_id::DefId; use rustc_hir::BodyId; use rustc_middle::mir::Body; use rustc_span::Span; @@ -17,13 +18,17 @@ pub use self::convert::convert_cast_rewrite; pub use self::mir_op::CastBuilder; pub fn gen_expr_rewrites<'tcx>( - acx: &AnalysisCtxt<'_, 'tcx>, + acx: &mut AnalysisCtxt<'_, 'tcx>, asn: &Assignment, pointee_types: PointerTable>, + def_id: DefId, mir: &Body<'tcx>, hir_body_id: BodyId, ) -> Vec<(Span, Rewrite)> { - let mir_rewrites = mir_op::gen_mir_rewrites(acx, asn, pointee_types, mir); + let (mir_rewrites, errors) = mir_op::gen_mir_rewrites(acx, asn, pointee_types, mir); + if !errors.is_empty() { + acx.gacx.dont_rewrite_fns.add(def_id, errors); + } let unlower_map = unlower::unlower(acx.tcx(), mir, hir_body_id); let rewrites_by_expr = distribute::distribute(acx.tcx(), unlower_map, mir_rewrites); let address_of_rewrites = hir_only_casts::remove_hir_only_casts(acx.tcx(), hir_body_id, |ex| { diff --git a/c2rust-analyze/tests/filecheck/test_attrs.rs b/c2rust-analyze/tests/filecheck/test_attrs.rs index 7d5a73ac6d..d76e77059d 100644 --- a/c2rust-analyze/tests/filecheck/test_attrs.rs +++ b/c2rust-analyze/tests/filecheck/test_attrs.rs @@ -17,13 +17,13 @@ fn g(x: *mut i32) -> *mut i32 { x } -// CHECK: analysis of DefId({{.*}} ~ test_attrs[{{.*}}]::h) failed: [unknown]: explicit fail_before_analysis for testing +// CHECK: analysis of DefId({{.*}} ~ test_attrs[{{.*}}]::h) failed: FAKE_INVALID_FOR_TESTING, [unknown]: explicit fail_before_analysis for testing #[c2rust_analyze_test::fail_before_analysis] fn h(x: *mut i32) -> *mut i32 { x } -// CHECK: analysis of DefId({{.*}} ~ test_attrs[{{.*}}]::i) failed: [unknown]: explicit fail_before_rewriting for testing +// CHECK: analysis of DefId({{.*}} ~ test_attrs[{{.*}}]::i) failed: FAKE_INVALID_FOR_TESTING, [unknown]: explicit fail_before_rewriting for testing #[c2rust_analyze_test::fail_before_rewriting] fn i(x: *mut i32) -> *mut i32 { x From a68b8ed83b2f5835b38c3407386b0b1fa420bf42 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Wed, 31 Jan 2024 16:56:41 -0800 Subject: [PATCH 06/10] analyze: record def-use info for fns mentioning adt fields --- c2rust-analyze/src/analyze.rs | 99 ++++++++++++++++++++++++++++++----- c2rust-analyze/src/context.rs | 49 ++++++++++++++++- 2 files changed, 133 insertions(+), 15 deletions(-) diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 5322dacd0c..6d2a550af1 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -33,19 +33,11 @@ use rustc_hir::def_id::DefIndex; use rustc_hir::def_id::LocalDefId; use rustc_hir::definitions::DefPathData; use rustc_index::vec::IndexVec; -use rustc_middle::mir::visit::Visitor; -use rustc_middle::mir::AggregateKind; -use rustc_middle::mir::BindingForm; -use rustc_middle::mir::Body; -use rustc_middle::mir::Constant; -use rustc_middle::mir::Local; -use rustc_middle::mir::LocalDecl; -use rustc_middle::mir::LocalInfo; -use rustc_middle::mir::LocalKind; -use rustc_middle::mir::Location; -use rustc_middle::mir::Operand; -use rustc_middle::mir::Rvalue; -use rustc_middle::mir::StatementKind; +use rustc_middle::mir::visit::{PlaceContext, Visitor}; +use rustc_middle::mir::{ + AggregateKind, BindingForm, Body, Constant, Local, LocalDecl, LocalInfo, LocalKind, Location, + Operand, Place, PlaceElem, PlaceRef, Rvalue, StatementKind, +}; use rustc_middle::ty::GenericArgKind; use rustc_middle::ty::Ty; use rustc_middle::ty::TyCtxt; @@ -565,6 +557,8 @@ fn run(tcx: TyCtxt) { eprintln!(" {:?}", ldid); } + populate_field_users(&mut gacx, &all_fn_ldids); + // ---------------------------------- // Label all global types // ---------------------------------- @@ -1954,6 +1948,85 @@ fn for_each_callee(tcx: TyCtxt, ldid: LocalDefId, f: impl FnMut(LocalDefId)) { CalleeVisitor { tcx, mir, f }.visit_body(mir); } +/// Call `f` for each field mentioned in a place projection within the body of `ldid`. +fn for_each_field_use(tcx: TyCtxt, ldid: LocalDefId, f: impl FnMut(DefId)) { + let ldid_const = WithOptConstParam::unknown(ldid); + let mir = tcx.mir_built(ldid_const); + let mir = mir.borrow(); + let mir: &Body = &mir; + + struct FieldUseVisitor<'a, 'tcx, F> { + tcx: TyCtxt<'tcx>, + mir: &'a Body<'tcx>, + f: F, + } + + impl<'tcx, F: FnMut(DefId)> Visitor<'tcx> for FieldUseVisitor<'_, 'tcx, F> { + fn visit_place( + &mut self, + place: &Place<'tcx>, + _context: PlaceContext, + _location: Location, + ) { + for (i, elem) in place.projection.iter().enumerate() { + let field_idx = match elem { + PlaceElem::Field(x, _) => x, + _ => continue, + }; + // Build a `PlaceRef` with all the projections up to, but not including, `elem`. + let place_ref = PlaceRef { + local: place.local, + projection: &place.projection[..i], + }; + let adt_pty = place_ref.ty(self.mir, self.tcx); + let adt_def = match adt_pty.ty.ty_adt_def() { + Some(x) => x, + // `PlaceElem::Field` also works on tuple types, which don't have an `AdtDef`. + None => continue, + }; + let variant_def = match adt_pty.variant_index { + None => adt_def.non_enum_variant(), + Some(i) => adt_def.variant(i), + }; + let field_def = &variant_def.fields[field_idx.index()]; + (self.f)(field_def.did); + } + } + } + + FieldUseVisitor { tcx, mir, f }.visit_body(mir); +} + +/// Populate `gacx.field_users` and `gacx.fn_fields_used`. +fn populate_field_users(gacx: &mut GlobalAnalysisCtxt, fn_ldids: &[LocalDefId]) { + let mut field_users = HashMap::new(); + let mut seen = HashSet::new(); + + for &ldid in fn_ldids { + let mut fn_fields = Vec::new(); + let mut fn_seen = HashSet::new(); + for_each_field_use(gacx.tcx, ldid, |field_def_id| { + if let Some(field_ldid) = field_def_id.as_local() { + if seen.insert((field_ldid, ldid)) { + field_users + .entry(field_ldid) + .or_insert_with(Vec::new) + .push(ldid); + } + + if fn_seen.insert(field_ldid) { + fn_fields.push(field_ldid); + } + } + }); + gacx.fn_fields_used.insert(ldid, fn_fields); + } + + for (k, v) in field_users { + gacx.field_users.insert(k, v); + } +} + pub struct AnalysisCallbacks; impl rustc_driver::Callbacks for AnalysisCallbacks { diff --git a/c2rust-analyze/src/context.rs b/c2rust-analyze/src/context.rs index 98ab2b0d8c..632e5df754 100644 --- a/c2rust-analyze/src/context.rs +++ b/c2rust-analyze/src/context.rs @@ -34,11 +34,12 @@ use rustc_middle::ty::Ty; use rustc_middle::ty::TyCtxt; use rustc_middle::ty::TyKind; use rustc_type_ir::RegionKind::{ReEarlyBound, ReStatic}; -use std::collections::{HashMap, HashSet}; +use std::collections::hash_map::{Entry, HashMap}; +use std::collections::HashSet; use std::fmt::{Debug, Write as _}; use std::hash::Hash; use std::mem; -use std::ops::{BitOr, Index}; +use std::ops::{BitOr, Index, Range}; bitflags! { /// Permissions are created such that we allow dropping permissions in any assignment. @@ -387,6 +388,7 @@ pub struct GlobalAnalysisCtxt<'tcx> { ptr_info: GlobalPointerTable, pub fn_sigs: HashMap>, + pub fn_fields_used: MultiMap, /// A map of all [`KnownFn`]s as determined by [`all_known_fns`]. /// @@ -405,6 +407,7 @@ pub struct GlobalAnalysisCtxt<'tcx> { pub fns_failed: HashMap, pub field_ltys: HashMap>, + pub field_users: MultiMap, pub static_tys: HashMap>, pub addr_of_static: HashMap, @@ -767,6 +770,7 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { lcx: LabeledTyCtxt::new(tcx), ptr_info: GlobalPointerTable::empty(), fn_sigs: HashMap::new(), + fn_fields_used: MultiMap::new(), known_fns: all_known_fns() .iter() .map(|known_fn| (known_fn.name, known_fn)) @@ -776,6 +780,7 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { dont_rewrite_fields: FlagMap::new(), fns_failed: HashMap::new(), field_ltys: HashMap::new(), + field_users: MultiMap::new(), static_tys: HashMap::new(), addr_of_static: HashMap::new(), adt_metadata: AdtMetadataTable::default(), @@ -834,12 +839,14 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { lcx, ref mut ptr_info, ref mut fn_sigs, + fn_fields_used: _, known_fns: _, dont_rewrite_fns: _, dont_rewrite_statics: _, dont_rewrite_fields: _, fns_failed: _, ref mut field_ltys, + field_users: _, ref mut static_tys, ref mut addr_of_static, adt_metadata: _, @@ -1646,3 +1653,41 @@ where self.m.keys().copied() } } + +/// A map from keys to lists of values, with a compact representation. +#[derive(Clone, Debug)] +pub struct MultiMap { + defs: HashMap>, + users: Vec, +} + +impl MultiMap +where + K: Copy + Hash + Eq + Debug, +{ + pub fn new() -> MultiMap { + MultiMap { + defs: HashMap::new(), + users: Vec::new(), + } + } + + pub fn insert(&mut self, def: K, users: impl IntoIterator) { + let e = match self.defs.entry(def) { + Entry::Vacant(e) => e, + Entry::Occupied(_) => panic!("duplicate entry for {def:?}"), + }; + let start = self.users.len(); + self.users.extend(users); + let end = self.users.len(); + e.insert(start..end); + } + + pub fn get(&self, def: K) -> &[V] { + let range = match self.defs.get(&def) { + Some(x) => x, + None => return &[], + }; + &self.users[range.clone()] + } +} From 44064f3b2579c894c2449fb6a4299398baef4266 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Fri, 2 Feb 2024 13:43:44 -0800 Subject: [PATCH 07/10] analyze: implement some propagation cases for DontRewrite flags --- c2rust-analyze/src/analyze.rs | 74 ++++++++++++++++++++++++++++------- 1 file changed, 59 insertions(+), 15 deletions(-) diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 6d2a550af1..5059c94bf9 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -1254,24 +1254,17 @@ fn run(tcx: TyCtxt) { let manual_shim_casts = manual_shim_casts; // It may take multiple tries to reach a state where all rewrites succeed. - loop { + for i in 0.. { + assert!(i < 100); func_reports.clear(); all_rewrites.clear(); eprintln!("\n--- start rewriting ---"); - // Clear the list of newly non-rewritable fns. After generating rewrites, if any functions - // became non-rewritable, we run another iteration. - let new_non_rewritable_fns = gacx.dont_rewrite_fns.take_new_keys(); - - // Before generating rewrites, add the FIXED flag to the signatures of all functions where - // rewriting will be skipped. - // - // The set of nonrewritten functions is monotonically nondecreasing throughout this loop, - // so there's no need to worry about potentially removing `FIXED` from some functions. - for did in new_non_rewritable_fns { - let lsig = &gacx.fn_sigs[&did]; - make_sig_fixed(&mut gasn, lsig); - } + // Update non-rewritten items first. This has two purposes. First, it clears the + // `new_keys()` lists, which we check at the end of the loop to see whether we've reached a + // fixpoint. Second, doing this adds the `FIXED` flag to pointers that we shouldn't + // rewrite, such as pointers in the signatures of non-rewritten functions. + process_new_dont_rewrite_items(&mut gacx, &mut gasn); for &ldid in &all_fn_ldids { if gacx.dont_rewrite_fn(ldid.to_def_id()) { @@ -1359,7 +1352,10 @@ fn run(tcx: TyCtxt) { } // Exit the loop upon reaching a fixpoint. - if gacx.dont_rewrite_fns.new_keys().is_empty() { + let any_new_dont_rewrite_keys = !gacx.dont_rewrite_fns.new_keys().is_empty() + || !gacx.dont_rewrite_statics.new_keys().is_empty() + || !gacx.dont_rewrite_fields.new_keys().is_empty(); + if !any_new_dont_rewrite_keys { break; } } @@ -2027,6 +2023,54 @@ fn populate_field_users(gacx: &mut GlobalAnalysisCtxt, fn_ldids: &[LocalDefId]) } } +/// Call `take_new_keys()` on `gacx.dont_rewrite_{fns,statics,fields}` and process the results. +/// This involves adding `FIXED` to some pointers and maybe propagating `DontRewrite` flags to +/// other items. +fn process_new_dont_rewrite_items(gacx: &mut GlobalAnalysisCtxt, gasn: &mut GlobalAssignment) { + for i in 0.. { + assert!(i < 20); + let mut found_any = false; + + for did in gacx.dont_rewrite_fns.take_new_keys() { + found_any = true; + let lsig = &gacx.fn_sigs[&did]; + make_sig_fixed(gasn, lsig); + + let ldid = match did.as_local() { + Some(x) => x, + None => continue, + }; + + for &field_ldid in gacx.fn_fields_used.get(ldid) { + gacx.dont_rewrite_fields.add( + field_ldid.to_def_id(), + DontRewriteFieldReason::NON_REWRITTEN_USER, + ); + } + + // TODO: callers/callees + } + + for did in gacx.dont_rewrite_statics.take_new_keys() { + found_any = true; + let lty = gacx.static_tys[&did]; + make_ty_fixed(gasn, lty); + } + + for did in gacx.dont_rewrite_fields.take_new_keys() { + found_any = true; + let lty = gacx.field_ltys[&did]; + make_ty_fixed(gasn, lty); + } + + // The previous steps can cause more items to become non-rewritten. Keep going until + // there's no more work to do. + if !found_any { + break; + } + } +} + pub struct AnalysisCallbacks; impl rustc_driver::Callbacks for AnalysisCallbacks { From e4f71453779b1ea127ebad6467669461278cd009 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Fri, 2 Feb 2024 13:42:05 -0800 Subject: [PATCH 08/10] analyze: report DontRewrite flags for all def kinds in debug output --- c2rust-analyze/src/analyze.rs | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 5059c94bf9..3b1ed35230 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -1623,17 +1623,30 @@ fn run(tcx: TyCtxt) { } eprintln!("\nerror summary:"); - for ldid in tcx.hir().body_owners() { - let opt_detail = gacx.fns_failed.get(&ldid.to_def_id()); - let flags = gacx.dont_rewrite_fns.get(ldid.to_def_id()); - if opt_detail.is_none() && flags.is_empty() { - continue; - } + fn sorted_def_ids(it: impl IntoIterator) -> Vec { + let mut v = it.into_iter().collect::>(); + v.sort(); + v + } + for def_id in sorted_def_ids(gacx.dont_rewrite_fns.keys()) { + let opt_detail = gacx.fns_failed.get(&def_id); + let flags = gacx.dont_rewrite_fns.get(def_id); + assert!(opt_detail.is_some() || !flags.is_empty()); let detail_str = match opt_detail { Some(detail) => detail.to_string_short(), None => "(no panic)".into(), }; - eprintln!("analysis of {:?} failed: {:?}, {}", ldid, flags, detail_str,); + eprintln!("analysis of {def_id:?} failed: {flags:?}, {detail_str}"); + } + + for def_id in sorted_def_ids(gacx.dont_rewrite_statics.keys()) { + let flags = gacx.dont_rewrite_statics.get(def_id); + eprintln!("analysis of {def_id:?} failed: {flags:?}"); + } + + for def_id in sorted_def_ids(gacx.dont_rewrite_fields.keys()) { + let flags = gacx.dont_rewrite_fields.get(def_id); + eprintln!("analysis of {def_id:?} failed: {flags:?}"); } eprintln!( From e0cdb2e94b8033e7fd4b3e014879752bbb1cc8e6 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 25 Mar 2024 13:15:09 -0700 Subject: [PATCH 09/10] analyze: fix warnings --- c2rust-analyze/src/context.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/c2rust-analyze/src/context.rs b/c2rust-analyze/src/context.rs index 632e5df754..020e8435cc 100644 --- a/c2rust-analyze/src/context.rs +++ b/c2rust-analyze/src/context.rs @@ -939,21 +939,16 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { .flat_map(|(fn_sig, known_fn)| known_fn.ptr_perms(fn_sig)) } + /// Check whether the function with the given `def_id` has been marked as non-rewritable. pub fn dont_rewrite_fn(&self, def_id: DefId) -> bool { self.dont_rewrite_fns.contains(def_id) } + /// Check whether analysis failed for the function with the given `def_id`. pub fn fn_analysis_invalid(&self, def_id: DefId) -> bool { self.dont_rewrite_fns .get(def_id) .intersects(DontRewriteFnReason::ANALYSIS_INVALID_MASK) } - - pub fn dont_rewrite_static(&self, def_id: DefId) -> bool { - self.dont_rewrite_statics.contains(def_id) - } - pub fn dont_rewrite_field(&self, def_id: DefId) -> bool { - self.dont_rewrite_fields.contains(def_id) - } } impl<'a, 'tcx> AnalysisCtxt<'a, 'tcx> { From 33a606f1decf289566565ec5bbb35a76ebffea8a Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Fri, 12 Apr 2024 15:02:37 -0700 Subject: [PATCH 10/10] analyze: rename NON_REWRITTEN_USER -> _USE --- c2rust-analyze/src/analyze.rs | 2 +- c2rust-analyze/src/context.rs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 3b1ed35230..88d0bea5bb 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -2057,7 +2057,7 @@ fn process_new_dont_rewrite_items(gacx: &mut GlobalAnalysisCtxt, gasn: &mut Glob for &field_ldid in gacx.fn_fields_used.get(ldid) { gacx.dont_rewrite_fields.add( field_ldid.to_def_id(), - DontRewriteFieldReason::NON_REWRITTEN_USER, + DontRewriteFieldReason::NON_REWRITTEN_USE, ); } diff --git a/c2rust-analyze/src/context.rs b/c2rust-analyze/src/context.rs index 020e8435cc..24f7ac7ccb 100644 --- a/c2rust-analyze/src/context.rs +++ b/c2rust-analyze/src/context.rs @@ -241,8 +241,8 @@ bitflags! { pub struct DontRewriteStaticReason: u16 { /// The user requested that this static be left unchanged. const USER_REQUEST = 0x0001; - /// The field is used in a function that isn't being rewritten. - const NON_REWRITTEN_USER = 0x0002; + /// The static is used in a function that isn't being rewritten. + const NON_REWRITTEN_USE = 0x0002; } } @@ -253,7 +253,7 @@ bitflags! { /// The user requested that this field be left unchanged. const USER_REQUEST = 0x0001; /// The field is used in a function that isn't being rewritten. - const NON_REWRITTEN_USER = 0x0002; + const NON_REWRITTEN_USE = 0x0002; } }