From 7b003014718d5d245bae2facff41190ce01db889 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Thu, 1 Aug 2024 13:34:43 -0700 Subject: [PATCH 01/11] analyze: tests: add malloc + fresh cases to pointee test.rs --- c2rust-analyze/tests/filecheck/pointee.rs | 28 +++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/c2rust-analyze/tests/filecheck/pointee.rs b/c2rust-analyze/tests/filecheck/pointee.rs index c1afa47da0..290d030bb7 100644 --- a/c2rust-analyze/tests/filecheck/pointee.rs +++ b/c2rust-analyze/tests/filecheck/pointee.rs @@ -1,5 +1,11 @@ +#![feature(rustc_private)] use std::ptr; +extern crate libc; +extern "C" { + fn malloc(_: libc::c_ulong) -> *mut libc::c_void; +} + // CHECK-LABEL: fn memcpy1 unsafe fn memcpy1(dest: *mut (), src: *const ()) { // CHECK: let dest = (dest); @@ -22,3 +28,25 @@ unsafe fn remove_cast() { let dest_ptr = &mut dest as *mut u8 as *mut (); memcpy1(dest_ptr, src_ptr); } + + +// CHECK-LABEL: fn malloc_fresh +unsafe fn malloc_fresh() { + let mut p = 0 as *mut libc::c_void; + let fresh = &mut p; + // CHECK: malloc(4) + let q = malloc(4); + *fresh = q; + *(p as *mut i32) = 1; +} + +// CHECK-LABEL: fn malloc_no_fresh +unsafe fn malloc_no_fresh() { + let mut p = 0 as *mut libc::c_void; + // CHECK-NOT: malloc(4) + // CHECK: Box::new + // CHECK-NOT: malloc(4) + let q = malloc(4); + p = q; + *(p as *mut i32) = 1; +} From ebc468b213f0b94bc5c04e672391308ae5e4dde2 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Thu, 1 Aug 2024 13:39:23 -0700 Subject: [PATCH 02/11] analyze: dataflow: add helper method for pointee_type lookups --- c2rust-analyze/src/dataflow/type_check.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/c2rust-analyze/src/dataflow/type_check.rs b/c2rust-analyze/src/dataflow/type_check.rs index 560614ce36..9c541ba512 100644 --- a/c2rust-analyze/src/dataflow/type_check.rs +++ b/c2rust-analyze/src/dataflow/type_check.rs @@ -68,6 +68,10 @@ impl<'tcx> TypeChecker<'tcx, '_> { self.equiv_constraints.push((a, b)); } + fn pointee_type(&self, ptr: PointerId) -> Option> { + self.pointee_types[ptr].get_sole_lty() + } + fn record_access(&mut self, ptr: PointerId, mutbl: Mutability) { debug!("record_access({:?}, {:?})", ptr, mutbl); if ptr == PointerId::NONE { @@ -545,7 +549,7 @@ impl<'tcx> TypeChecker<'tcx, '_> { // Figure out whether we're copying one element or (possibly) several. let mut maybe_offset_perm = PermissionSet::OFFSET_ADD; let rv_ptr = rv_lty.label; - if let Some(pointee_lty) = self.pointee_types[rv_ptr].get_sole_lty() { + if let Some(pointee_lty) = self.pointee_type(rv_ptr) { if self.operand_is_size_of_t(loc, &args[2], pointee_lty.ty) { // The size is exactly the (original) size of the pointee type, so this // `memset` is operating on a single element only. @@ -589,7 +593,7 @@ impl<'tcx> TypeChecker<'tcx, '_> { // Figure out whether we're writing to one element or (possibly) several. let mut maybe_offset_perm = PermissionSet::OFFSET_ADD; let rv_ptr = rv_lty.label; - if let Some(pointee_lty) = self.pointee_types[rv_ptr].get_sole_lty() { + if let Some(pointee_lty) = self.pointee_type(rv_ptr) { if self.operand_is_size_of_t(loc, &args[2], pointee_lty.ty) { // The size is exactly the (original) size of the pointee type, so this // `memset` is operating on a single element only. From 7256e917ebb977aa5671e0cf50b335b3941ab84f Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Thu, 1 Aug 2024 15:26:14 -0700 Subject: [PATCH 03/11] analyze: use #[track_caller] on MaybeUnset methods This gives more useful panic locations when a `FuncInfo` field is accessed while unset. --- c2rust-analyze/src/analyze.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index b2c7a464cc..731ddb0509 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -77,6 +77,7 @@ impl Default for MaybeUnset { } impl MaybeUnset { + #[track_caller] pub fn set(&mut self, x: T) { if self.0.is_some() { panic!("value is already set"); @@ -84,6 +85,7 @@ impl MaybeUnset { self.0 = Some(x); } + #[track_caller] pub fn clear(&mut self) { if self.0.is_none() { panic!("value is already cleared"); @@ -91,14 +93,17 @@ impl MaybeUnset { self.0 = None; } + #[track_caller] pub fn get(&self) -> &T { self.0.as_ref().expect("value is not set") } + #[track_caller] pub fn get_mut(&mut self) -> &mut T { self.0.as_mut().expect("value is not set") } + #[track_caller] pub fn take(&mut self) -> T { self.0.take().expect("value is not set") } From 585d0974d25e7276318a0bae1e61381855af7691 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Thu, 1 Aug 2024 16:36:26 -0700 Subject: [PATCH 04/11] analyze: refactor: add helper fn to do recent_writes analysis --- c2rust-analyze/src/analyze.rs | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 731ddb0509..46aacd392f 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -580,6 +580,12 @@ fn run(tcx: TyCtxt) { // all field types are labeled. gacx.construct_region_metadata(); + // ---------------------------------- + // Run early analyses + // ---------------------------------- + + do_recent_writes(&gacx, &mut func_info, &all_fn_ldids); + // ---------------------------------- // Infer pointee types // ---------------------------------- @@ -615,7 +621,6 @@ fn run(tcx: TyCtxt) { } info.local_pointee_types.set(local_pointee_types); - info.recent_writes.set(RecentWrites::new(&mir)); } // Iterate pointee constraints to a fixpoint. @@ -1918,6 +1923,31 @@ impl<'tcx> AssignPointerIds<'tcx> for AnalysisCtxt<'_, 'tcx> { } } +/// Run the `recent_writes` analysis, which computes the most recent write to each MIR local at +/// each program point. This can then be used to reconstruct the expression that's currently +/// stored in the local. For example, we use this to detect whether the size argument of `memcpy` +/// is `mem::size_of::()` for some `T`. +fn do_recent_writes<'tcx>( + gacx: &GlobalAnalysisCtxt<'tcx>, + func_info: &mut HashMap>, + all_fn_ldids: &[LocalDefId], +) { + let tcx = gacx.tcx; + for &ldid in all_fn_ldids { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { + continue; + } + + let ldid_const = WithOptConstParam::unknown(ldid); + let info = func_info.get_mut(&ldid).unwrap(); + let mir = tcx.mir_built(ldid_const); + let mir = mir.borrow(); + + // This is very straightforward because it doesn't need an `AnalysisCtxt` and never fails. + info.recent_writes.set(RecentWrites::new(&mir)); + } +} + fn make_ty_fixed(asn: &mut Assignment, lty: LTy) { for lty in lty.iter() { let ptr = lty.label; From dc0aabaeb78e98c3856f70bf172a8d5d61891f52 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Thu, 1 Aug 2024 16:50:30 -0700 Subject: [PATCH 05/11] analyze: refactor: add helper fn to do pointee_type analysis --- c2rust-analyze/src/analyze.rs | 192 +++++++++++++++++++--------------- 1 file changed, 110 insertions(+), 82 deletions(-) diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 46aacd392f..a61d64ca3f 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -590,88 +590,13 @@ fn run(tcx: TyCtxt) { // Infer pointee types // ---------------------------------- - let mut pointee_vars = pointee_type::VarTable::default(); - - for &ldid in &all_fn_ldids { - if gacx.fn_analysis_invalid(ldid.to_def_id()) { - continue; - } - - let ldid_const = WithOptConstParam::unknown(ldid); - let info = func_info.get_mut(&ldid).unwrap(); - let mir = tcx.mir_built(ldid_const); - let mir = mir.borrow(); - let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); - - let r = panic_detail::catch_unwind(AssertUnwindSafe(|| { - pointee_type::generate_constraints(&acx, &mir, &mut pointee_vars) - })); - - let local_pointee_types = LocalPointerTable::new(acx.local_ptr_base(), acx.num_pointers()); - info.acx_data.set(acx.into_data()); - - match r { - Ok(pointee_constraints) => { - info.pointee_constraints.set(pointee_constraints); - } - Err(pd) => { - gacx.mark_fn_failed(ldid.to_def_id(), DontRewriteFnReason::POINTEE_INVALID, pd); - assert!(gacx.fn_analysis_invalid(ldid.to_def_id())); - } - } - - info.local_pointee_types.set(local_pointee_types); - } - - // Iterate pointee constraints to a fixpoint. - let mut global_pointee_types = - GlobalPointerTable::::new(gacx.num_global_pointers()); - let mut loop_count = 0; - loop { - // Loop until the global assignment reaches a fixpoint. The inner loop also runs until a - // fixpoint, but it only considers a single function at a time. The inner loop for one - // function can affect other functions by updating `global_pointee_types`, so we also need - // the outer loop, which runs until the global type sets converge as well. - loop_count += 1; - // We shouldn't need more iterations than the longest acyclic path through the callgraph. - assert!(loop_count <= 1000); - let old_global_pointee_types = global_pointee_types.clone(); - - for &ldid in &all_fn_ldids { - if gacx.fn_analysis_invalid(ldid.to_def_id()) { - continue; - } - - let info = func_info.get_mut(&ldid).unwrap(); - - let pointee_constraints = info.pointee_constraints.get(); - let pointee_types = global_pointee_types.and_mut(info.local_pointee_types.get_mut()); - pointee_type::solve_constraints(pointee_constraints, &pointee_vars, pointee_types); - } - - if global_pointee_types == old_global_pointee_types { - break; - } - } - - // Print results for debugging - for &ldid in &all_fn_ldids { - if gacx.fn_analysis_invalid(ldid.to_def_id()) { - continue; - } - - let ldid_const = WithOptConstParam::unknown(ldid); - let info = func_info.get_mut(&ldid).unwrap(); - let mir = tcx.mir_built(ldid_const); - let mir = mir.borrow(); - - let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); - let name = tcx.item_name(ldid.to_def_id()); - let pointee_types = global_pointee_types.and(info.local_pointee_types.get()); - print_function_pointee_types(&acx, name, &mir, pointee_types); - - info.acx_data.set(acx.into_data()); - } + let mut global_pointee_types = do_pointee_type(&mut gacx, &mut func_info, &all_fn_ldids); + debug_print_pointee_types( + &mut gacx, + &mut func_info, + &all_fn_ldids, + &global_pointee_types, + ); // ---------------------------------- // Compute dataflow constraints @@ -1948,6 +1873,109 @@ fn do_recent_writes<'tcx>( } } +/// Run the `pointee_type` analysis, which tries to determine the actual type of data that each +/// pointer can point to. This is particularly important for `void*` pointers, which are typically +/// cast to a different type before use. +fn do_pointee_type<'tcx>( + gacx: &mut GlobalAnalysisCtxt<'tcx>, + func_info: &mut HashMap>, + all_fn_ldids: &[LocalDefId], +) -> GlobalPointerTable> { + let tcx = gacx.tcx; + let mut global_pointee_types = + GlobalPointerTable::::new(gacx.num_global_pointers()); + let mut pointee_vars = pointee_type::VarTable::default(); + + for &ldid in all_fn_ldids { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { + continue; + } + + let ldid_const = WithOptConstParam::unknown(ldid); + let info = func_info.get_mut(&ldid).unwrap(); + let mir = tcx.mir_built(ldid_const); + let mir = mir.borrow(); + let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); + + let r = panic_detail::catch_unwind(AssertUnwindSafe(|| { + pointee_type::generate_constraints(&acx, &mir, &mut pointee_vars) + })); + + let local_pointee_types = LocalPointerTable::new(acx.local_ptr_base(), acx.num_pointers()); + info.acx_data.set(acx.into_data()); + + match r { + Ok(pointee_constraints) => { + info.pointee_constraints.set(pointee_constraints); + } + Err(pd) => { + gacx.mark_fn_failed(ldid.to_def_id(), DontRewriteFnReason::POINTEE_INVALID, pd); + assert!(gacx.fn_analysis_invalid(ldid.to_def_id())); + } + } + + info.local_pointee_types.set(local_pointee_types); + } + + // Iterate pointee constraints to a fixpoint. + let mut loop_count = 0; + loop { + // Loop until the global assignment reaches a fixpoint. The inner loop also runs until a + // fixpoint, but it only considers a single function at a time. The inner loop for one + // function can affect other functions by updating `global_pointee_types`, so we also need + // the outer loop, which runs until the global type sets converge as well. + loop_count += 1; + // We shouldn't need more iterations than the longest acyclic path through the callgraph. + assert!(loop_count <= 1000); + let old_global_pointee_types = global_pointee_types.clone(); + + for &ldid in all_fn_ldids { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { + continue; + } + + let info = func_info.get_mut(&ldid).unwrap(); + + let pointee_constraints = info.pointee_constraints.get(); + let pointee_types = global_pointee_types.and_mut(info.local_pointee_types.get_mut()); + pointee_type::solve_constraints(pointee_constraints, &pointee_vars, pointee_types); + } + + if global_pointee_types == old_global_pointee_types { + break; + } + } + + global_pointee_types +} + +fn debug_print_pointee_types<'tcx>( + gacx: &mut GlobalAnalysisCtxt<'tcx>, + func_info: &mut HashMap>, + all_fn_ldids: &[LocalDefId], + global_pointee_types: &GlobalPointerTable>, +) { + let tcx = gacx.tcx; + // Print results for debugging + for &ldid in all_fn_ldids { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { + continue; + } + + let ldid_const = WithOptConstParam::unknown(ldid); + let info = func_info.get_mut(&ldid).unwrap(); + let mir = tcx.mir_built(ldid_const); + let mir = mir.borrow(); + + let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); + let name = tcx.item_name(ldid.to_def_id()); + let pointee_types = global_pointee_types.and(info.local_pointee_types.get()); + print_function_pointee_types(&acx, name, &mir, pointee_types); + + info.acx_data.set(acx.into_data()); + } +} + fn make_ty_fixed(asn: &mut Assignment, lty: LTy) { for lty in lty.iter() { let ptr = lty.label; From 3aa158894da94c953dbc07ab3500931bc2a349a9 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Thu, 1 Aug 2024 16:53:50 -0700 Subject: [PATCH 06/11] analyze: refactor: add helper fn to build dataflow constraints --- c2rust-analyze/src/analyze.rs | 112 ++++++++++++++++++++-------------- 1 file changed, 67 insertions(+), 45 deletions(-) diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index a61d64ca3f..f464af3650 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -605,51 +605,12 @@ fn run(tcx: TyCtxt) { // Initial pass to assign local `PointerId`s and gather equivalence constraints, which state // that two pointer types must be converted to the same reference type. Some additional data // computed during this the process is kept around for use in later passes. - let mut global_equiv = GlobalEquivSet::new(gacx.num_global_pointers()); - for &ldid in &all_fn_ldids { - let info = func_info.get_mut(&ldid).unwrap(); - let mut local_equiv = - LocalEquivSet::new(info.acx_data.local_ptr_base(), info.acx_data.num_pointers()); - - if gacx.fn_analysis_invalid(ldid.to_def_id()) { - // Even on failure, we set a blank `local_equiv`. This is necessary because we apply - // renumbering to all functions, even those where analysis has failed. - info.local_equiv.set(local_equiv); - continue; - } - - let ldid_const = WithOptConstParam::unknown(ldid); - let mir = tcx.mir_built(ldid_const); - let mir = mir.borrow(); - - let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); - let recent_writes = info.recent_writes.get(); - let pointee_types = global_pointee_types.and(info.local_pointee_types.get()); - - // Compute local equivalence classes and dataflow constraints. - let r = panic_detail::catch_unwind(AssertUnwindSafe(|| { - dataflow::generate_constraints(&acx, &mir, recent_writes, pointee_types) - })); - match r { - Ok((dataflow, equiv_constraints)) => { - let mut equiv = global_equiv.and_mut(&mut local_equiv); - for (a, b) in equiv_constraints { - equiv.unify(a, b); - } - info.dataflow.set(dataflow); - } - Err(pd) => { - acx.gacx.mark_fn_failed( - ldid.to_def_id(), - DontRewriteFnReason::DATAFLOW_INVALID, - pd, - ); - } - }; - - info.acx_data.set(acx.into_data()); - info.local_equiv.set(local_equiv); - } + let global_equiv = build_dataflow_constraints( + &mut gacx, + &mut func_info, + &all_fn_ldids, + &global_pointee_types, + ); // ---------------------------------- // Remap `PointerId`s by equivalence class @@ -1976,6 +1937,67 @@ fn debug_print_pointee_types<'tcx>( } } +/// Compute dataflow and equivalence constraints. This doesn't try to solve the dataflow +/// constraints yet. This function returns only equivalence constraints because there are no +/// global dataflow constraints; all dataflow constraints are function-local and are stored in that +/// function's `FuncInfo`. +fn build_dataflow_constraints<'tcx>( + gacx: &mut GlobalAnalysisCtxt<'tcx>, + func_info: &mut HashMap>, + all_fn_ldids: &[LocalDefId], + global_pointee_types: &GlobalPointerTable>, +) -> GlobalEquivSet { + let tcx = gacx.tcx; + + let mut global_equiv = GlobalEquivSet::new(gacx.num_global_pointers()); + for &ldid in all_fn_ldids { + let info = func_info.get_mut(&ldid).unwrap(); + let mut local_equiv = + LocalEquivSet::new(info.acx_data.local_ptr_base(), info.acx_data.num_pointers()); + + if gacx.fn_analysis_invalid(ldid.to_def_id()) { + // Even on failure, we set a blank `local_equiv`. This is necessary because we apply + // renumbering to all functions, even those where analysis has failed. + info.local_equiv.set(local_equiv); + continue; + } + + let ldid_const = WithOptConstParam::unknown(ldid); + let mir = tcx.mir_built(ldid_const); + let mir = mir.borrow(); + + let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); + let recent_writes = info.recent_writes.get(); + let pointee_types = global_pointee_types.and(info.local_pointee_types.get()); + + // Compute local equivalence classes and dataflow constraints. + let r = panic_detail::catch_unwind(AssertUnwindSafe(|| { + dataflow::generate_constraints(&acx, &mir, recent_writes, pointee_types) + })); + match r { + Ok((dataflow, equiv_constraints)) => { + let mut equiv = global_equiv.and_mut(&mut local_equiv); + for (a, b) in equiv_constraints { + equiv.unify(a, b); + } + info.dataflow.set(dataflow); + } + Err(pd) => { + acx.gacx.mark_fn_failed( + ldid.to_def_id(), + DontRewriteFnReason::DATAFLOW_INVALID, + pd, + ); + } + }; + + info.acx_data.set(acx.into_data()); + info.local_equiv.set(local_equiv); + } + + global_equiv +} + fn make_ty_fixed(asn: &mut Assignment, lty: LTy) { for lty in lty.iter() { let ptr = lty.label; From 536c1a74acf3c915d90640b573495787400986a9 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Fri, 2 Aug 2024 09:31:21 -0700 Subject: [PATCH 07/11] analyze: refactor dataflow::type_check with helpers for all constraints --- c2rust-analyze/src/dataflow/type_check.rs | 42 +++++++++++++---------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/c2rust-analyze/src/dataflow/type_check.rs b/c2rust-analyze/src/dataflow/type_check.rs index 9c541ba512..ad6311e446 100644 --- a/c2rust-analyze/src/dataflow/type_check.rs +++ b/c2rust-analyze/src/dataflow/type_check.rs @@ -64,6 +64,18 @@ impl<'tcx> TypeChecker<'tcx, '_> { self.constraints.add_subset_except(dest, src, except); } + /// Add `Constraint::AllPerms`, which requires `ptr` to have all of the permissions listed in + /// `perms`. + fn add_all_perms(&mut self, ptr: PointerId, perms: PermissionSet) { + self.constraints.add_all_perms(ptr, perms); + } + + /// Add `Constraint::NoPerms`, which requires `ptr` to have none of the permissions listed in + /// `perms`. + fn add_no_perms(&mut self, ptr: PointerId, perms: PermissionSet) { + self.constraints.add_no_perms(ptr, perms); + } + fn add_equiv(&mut self, a: PointerId, b: PointerId) { self.equiv_constraints.push((a, b)); } @@ -79,11 +91,10 @@ impl<'tcx> TypeChecker<'tcx, '_> { } match mutbl { Mutability::Mut => { - self.constraints - .add_all_perms(ptr, PermissionSet::READ | PermissionSet::WRITE); + self.add_all_perms(ptr, PermissionSet::READ | PermissionSet::WRITE); } Mutability::Not => { - self.constraints.add_all_perms(ptr, PermissionSet::READ); + self.add_all_perms(ptr, PermissionSet::READ); } } } @@ -129,8 +140,7 @@ impl<'tcx> TypeChecker<'tcx, '_> { panic!("Creating non-null pointers from exposed addresses not supported"); } // The target type of the cast must not have `NON_NULL` permission. - self.constraints - .add_no_perms(to_lty.label, PermissionSet::NON_NULL); + self.add_no_perms(to_lty.label, PermissionSet::NON_NULL); } CastKind::PointerExposeAddress => { // Allow, as [`CastKind::PointerFromExposedAddress`] is the dangerous one, @@ -467,7 +477,7 @@ impl<'tcx> TypeChecker<'tcx, '_> { let rv_lty = self.acx.type_of(&args[0]); self.do_assign(pl_lty, rv_lty); let perms = PermissionSet::OFFSET_ADD | PermissionSet::OFFSET_SUB; - self.constraints.add_all_perms(rv_lty.label, perms); + self.add_all_perms(rv_lty.label, perms); } Callee::SliceAsPtr { elem_ty, .. } => { @@ -499,8 +509,7 @@ impl<'tcx> TypeChecker<'tcx, '_> { // The output of `malloc` is known not to be a stack pointer. let pl_lty = self.acx.type_of(destination); - self.constraints - .add_no_perms(pl_lty.label, PermissionSet::STACK); + self.add_no_perms(pl_lty.label, PermissionSet::STACK); } Callee::Realloc => { let out_ptr = destination; @@ -515,11 +524,10 @@ impl<'tcx> TypeChecker<'tcx, '_> { // input needs FREE permission let perms = PermissionSet::FREE; - self.constraints.add_all_perms(rv_lty.label, perms); + self.add_all_perms(rv_lty.label, perms); // Output loses the STACK permission. - self.constraints - .add_no_perms(pl_lty.label, PermissionSet::STACK); + self.add_no_perms(pl_lty.label, PermissionSet::STACK); // unify inner-most pointer types self.do_equivalence_nested(pl_lty, rv_lty); @@ -533,7 +541,7 @@ impl<'tcx> TypeChecker<'tcx, '_> { let rv_lty = self.acx.type_of(in_ptr); let perms = PermissionSet::FREE; - self.constraints.add_all_perms(rv_lty.label, perms); + self.add_all_perms(rv_lty.label, perms); } Callee::Memcpy => { let out_ptr = destination; @@ -560,7 +568,7 @@ impl<'tcx> TypeChecker<'tcx, '_> { // input needs WRITE permission let perms = PermissionSet::WRITE | maybe_offset_perm; - self.constraints.add_all_perms(rv_lty.label, perms); + self.add_all_perms(rv_lty.label, perms); let src_ptr = args[1] .place() @@ -574,8 +582,7 @@ impl<'tcx> TypeChecker<'tcx, '_> { // input needs READ permission let perms = PermissionSet::READ | maybe_offset_perm; - self.constraints - .add_all_perms(src_ptr_casted_lty.label, perms); + self.add_all_perms(src_ptr_casted_lty.label, perms); // Perform a pseudo-assignment for *dest = *src. let src_ptr_lty = self.acx.type_of(src_ptr); @@ -603,7 +610,7 @@ impl<'tcx> TypeChecker<'tcx, '_> { debug!("memset at {:?} needs offset? {:?}", loc, maybe_offset_perm); let perms = PermissionSet::WRITE | maybe_offset_perm; - self.constraints.add_all_perms(rv_lty.label, perms); + self.add_all_perms(rv_lty.label, perms); // TODO: the return values of `memcpy` are rarely used // and may not always be casted to a non-void-pointer, @@ -622,8 +629,7 @@ impl<'tcx> TypeChecker<'tcx, '_> { let pl_lty = self.acx.type_of(destination); // We are assigning a null pointer to `destination`, so it must not have the // `NON_NULL` flag. - self.constraints - .add_no_perms(pl_lty.label, PermissionSet::NON_NULL); + self.add_no_perms(pl_lty.label, PermissionSet::NON_NULL); } } } From deeb98080bc6e1959e2e7deba068adbe8e38cd8d Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Fri, 2 Aug 2024 10:30:22 -0700 Subject: [PATCH 08/11] analyze: refactor: split dataflow and equiv visit functions --- c2rust-analyze/src/analyze.rs | 5 +- c2rust-analyze/src/dataflow/mod.rs | 10 +- c2rust-analyze/src/dataflow/type_check.rs | 106 ++++++++++++++++------ 3 files changed, 89 insertions(+), 32 deletions(-) diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index f464af3650..68b29251f0 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -1972,7 +1972,10 @@ fn build_dataflow_constraints<'tcx>( // Compute local equivalence classes and dataflow constraints. let r = panic_detail::catch_unwind(AssertUnwindSafe(|| { - dataflow::generate_constraints(&acx, &mir, recent_writes, pointee_types) + ( + dataflow::generate_constraints(&acx, &mir, recent_writes, pointee_types), + dataflow::generate_equiv_constraints(&acx, &mir, recent_writes), + ) })); match r { Ok((dataflow, equiv_constraints)) => { diff --git a/c2rust-analyze/src/dataflow/mod.rs b/c2rust-analyze/src/dataflow/mod.rs index dd62a6ae0a..bf434f820d 100644 --- a/c2rust-analyze/src/dataflow/mod.rs +++ b/c2rust-analyze/src/dataflow/mod.rs @@ -462,6 +462,14 @@ pub fn generate_constraints<'tcx>( mir: &Body<'tcx>, recent_writes: &RecentWrites, pointee_types: PointerTable>, -) -> (DataflowConstraints, Vec<(PointerId, PointerId)>) { +) -> DataflowConstraints { self::type_check::visit(acx, mir, recent_writes, pointee_types) } + +pub fn generate_equiv_constraints<'tcx>( + acx: &AnalysisCtxt<'_, 'tcx>, + mir: &Body<'tcx>, + recent_writes: &RecentWrites, +) -> Vec<(PointerId, PointerId)> { + self::type_check::visit_equiv(acx, mir, recent_writes) +} diff --git a/c2rust-analyze/src/dataflow/type_check.rs b/c2rust-analyze/src/dataflow/type_check.rs index ad6311e446..fa7647e366 100644 --- a/c2rust-analyze/src/dataflow/type_check.rs +++ b/c2rust-analyze/src/dataflow/type_check.rs @@ -32,16 +32,27 @@ use rustc_middle::ty::{SubstsRef, Ty, TyKind}; /// and destination. This is necessary because we generally can't change the inner pointer type /// when performing a cast (for example, it's possible to convert `&[&[T]]` to `&&[T]` - take the /// address of the first element - but not to `&[&T]]`). +/// +/// +/// # Optional fields +/// +/// Several fields of this visitor are wrapped in `Option`. These are accessed using helper +/// methods that do nothing when the field is `None`. We use this to run the visitor in two +/// different modes, one for computing equivalence constraints and the other for computing all +/// other dataflow constraints. The two kinds of constraints are closely related, and it's easiest +/// to implement them both in a single visitor. But we compute them in two separate passes because +/// equivalence constraints can be used to improve the quality of the `pointee_type` analysis, and +/// pointee results are needed to compute the dataflow constraints. struct TypeChecker<'tcx, 'a> { acx: &'a AnalysisCtxt<'a, 'tcx>, mir: &'a Body<'tcx>, recent_writes: &'a RecentWrites, - pointee_types: PointerTable<'a, PointeeTypes<'tcx>>, + pointee_types: Option>>, /// Subset constraints on pointer permissions. For example, this contains constraints like /// "the `PermissionSet` assigned to `PointerId` `l1` must be a subset of the `PermissionSet` /// assigned to `l2`". See `dataflow::Constraint` for a full description of supported /// constraints. - constraints: DataflowConstraints, + constraints: Option, /// Equivalence constraints on pointer permissions and flags. An entry `(l1, l2)` in this list /// means that `PointerId`s `l1` and `l2` should be assigned exactly the same permissions and /// flags. This ensures that the two pointers will be rewritten to the same safe type. @@ -49,39 +60,51 @@ struct TypeChecker<'tcx, 'a> { /// Higher-level code eventually feeds the constraints recorded here into the union-find data /// structure defined in `crate::equiv`, so adding a constraint here has the effect of unifying /// the equivalence classes of the two `PointerId`s. - equiv_constraints: Vec<(PointerId, PointerId)>, + equiv_constraints: Option>, } impl<'tcx> TypeChecker<'tcx, '_> { fn add_edge(&mut self, src: PointerId, dest: PointerId) { // Copying `src` to `dest` can discard permissions, but can't add new ones. - self.constraints.add_subset(dest, src); + if let Some(ref mut constraints) = self.constraints { + constraints.add_subset(dest, src); + } } fn add_edge_except(&mut self, src: PointerId, dest: PointerId, except: PermissionSet) { // Copying `src` to `dest` can discard permissions, but can't add new ones, // except for the specified exceptions. - self.constraints.add_subset_except(dest, src, except); + if let Some(ref mut constraints) = self.constraints { + constraints.add_subset_except(dest, src, except); + } } /// Add `Constraint::AllPerms`, which requires `ptr` to have all of the permissions listed in /// `perms`. fn add_all_perms(&mut self, ptr: PointerId, perms: PermissionSet) { - self.constraints.add_all_perms(ptr, perms); + if let Some(ref mut constraints) = self.constraints { + constraints.add_all_perms(ptr, perms); + } } /// Add `Constraint::NoPerms`, which requires `ptr` to have none of the permissions listed in /// `perms`. fn add_no_perms(&mut self, ptr: PointerId, perms: PermissionSet) { - self.constraints.add_no_perms(ptr, perms); + if let Some(ref mut constraints) = self.constraints { + constraints.add_no_perms(ptr, perms); + } } fn add_equiv(&mut self, a: PointerId, b: PointerId) { - self.equiv_constraints.push((a, b)); + if let Some(ref mut equiv_constraints) = self.equiv_constraints { + equiv_constraints.push((a, b)); + } } fn pointee_type(&self, ptr: PointerId) -> Option> { - self.pointee_types[ptr].get_sole_lty() + self.pointee_types + .as_ref() + .and_then(|pointee_types| pointee_types[ptr].get_sole_lty()) } fn record_access(&mut self, ptr: PointerId, mutbl: Mutability) { @@ -728,26 +751,7 @@ impl<'tcx> TypeChecker<'tcx, '_> { } } -pub fn visit<'tcx>( - acx: &AnalysisCtxt<'_, 'tcx>, - mir: &Body<'tcx>, - recent_writes: &RecentWrites, - pointee_types: PointerTable>, -) -> (DataflowConstraints, Vec<(PointerId, PointerId)>) { - let mut tc = TypeChecker { - acx, - mir, - recent_writes, - pointee_types, - constraints: DataflowConstraints::default(), - equiv_constraints: Vec::new(), - }; - - for (ptr, perms, neg_perms) in acx.string_literal_perms() { - tc.constraints.add_all_perms(ptr, perms); - tc.constraints.add_no_perms(ptr, neg_perms); - } - +fn visit_common<'tcx>(tc: &mut TypeChecker<'tcx, '_>, mir: &Body<'tcx>) { for (bb, bb_data) in mir.basic_blocks().iter_enumerated() { for (i, stmt) in bb_data.statements.iter().enumerate() { tc.visit_statement( @@ -766,6 +770,48 @@ pub fn visit<'tcx>( }, ); } +} + +/// Process a MIR body to compute dataflow constraints. +pub fn visit<'tcx>( + acx: &AnalysisCtxt<'_, 'tcx>, + mir: &Body<'tcx>, + recent_writes: &RecentWrites, + pointee_types: PointerTable>, +) -> DataflowConstraints { + let mut tc = TypeChecker { + acx, + mir, + recent_writes, + pointee_types: Some(pointee_types), + constraints: Some(DataflowConstraints::default()), + equiv_constraints: None, + }; + + for (ptr, perms, neg_perms) in acx.string_literal_perms() { + tc.add_all_perms(ptr, perms); + tc.add_no_perms(ptr, neg_perms); + } + + visit_common(&mut tc, mir); + tc.constraints.unwrap() +} + +/// Process a MIR body to compute equivalence constraints. +pub fn visit_equiv<'tcx>( + acx: &AnalysisCtxt<'_, 'tcx>, + mir: &Body<'tcx>, + recent_writes: &RecentWrites, +) -> Vec<(PointerId, PointerId)> { + let mut tc = TypeChecker { + acx, + mir, + recent_writes, + pointee_types: None, + constraints: None, + equiv_constraints: Some(Vec::new()), + }; - (tc.constraints, tc.equiv_constraints) + visit_common(&mut tc, mir); + tc.equiv_constraints.unwrap() } From 724d9572f2c5b018e4d6fd88efea69a5baf8f19f Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Fri, 2 Aug 2024 10:55:24 -0700 Subject: [PATCH 09/11] analyze: refactor: split top-level dataflow and equiv passes --- c2rust-analyze/src/analyze.rs | 67 +++++++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 14 deletions(-) diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 68b29251f0..484c034067 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -605,7 +605,8 @@ fn run(tcx: TyCtxt) { // Initial pass to assign local `PointerId`s and gather equivalence constraints, which state // that two pointer types must be converted to the same reference type. Some additional data // computed during this the process is kept around for use in later passes. - let global_equiv = build_dataflow_constraints( + let global_equiv = build_equiv_constraints(&mut gacx, &mut func_info, &all_fn_ldids); + build_dataflow_constraints( &mut gacx, &mut func_info, &all_fn_ldids, @@ -1937,15 +1938,12 @@ fn debug_print_pointee_types<'tcx>( } } -/// Compute dataflow and equivalence constraints. This doesn't try to solve the dataflow -/// constraints yet. This function returns only equivalence constraints because there are no -/// global dataflow constraints; all dataflow constraints are function-local and are stored in that -/// function's `FuncInfo`. -fn build_dataflow_constraints<'tcx>( +/// Compute equivalence constraints. This builds local and global equivalence sets, which map each +/// pointer to an equivalence-class representative. +fn build_equiv_constraints<'tcx>( gacx: &mut GlobalAnalysisCtxt<'tcx>, func_info: &mut HashMap>, all_fn_ldids: &[LocalDefId], - global_pointee_types: &GlobalPointerTable>, ) -> GlobalEquivSet { let tcx = gacx.tcx; @@ -1968,22 +1966,17 @@ fn build_dataflow_constraints<'tcx>( let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); let recent_writes = info.recent_writes.get(); - let pointee_types = global_pointee_types.and(info.local_pointee_types.get()); // Compute local equivalence classes and dataflow constraints. let r = panic_detail::catch_unwind(AssertUnwindSafe(|| { - ( - dataflow::generate_constraints(&acx, &mir, recent_writes, pointee_types), - dataflow::generate_equiv_constraints(&acx, &mir, recent_writes), - ) + dataflow::generate_equiv_constraints(&acx, &mir, recent_writes) })); match r { - Ok((dataflow, equiv_constraints)) => { + Ok(equiv_constraints) => { let mut equiv = global_equiv.and_mut(&mut local_equiv); for (a, b) in equiv_constraints { equiv.unify(a, b); } - info.dataflow.set(dataflow); } Err(pd) => { acx.gacx.mark_fn_failed( @@ -2001,6 +1994,52 @@ fn build_dataflow_constraints<'tcx>( global_equiv } +/// Compute dataflow constraints. This doesn't try to solve the dataflow constraints yet. This +/// function doesn't return anything because there are no global dataflow constraints; all dataflow +/// constraints are function-local and are stored in that function's `FuncInfo`. +fn build_dataflow_constraints<'tcx>( + gacx: &mut GlobalAnalysisCtxt<'tcx>, + func_info: &mut HashMap>, + all_fn_ldids: &[LocalDefId], + global_pointee_types: &GlobalPointerTable>, +) { + let tcx = gacx.tcx; + + for &ldid in all_fn_ldids { + if gacx.fn_analysis_invalid(ldid.to_def_id()) { + continue; + } + + let ldid_const = WithOptConstParam::unknown(ldid); + let info = func_info.get_mut(&ldid).unwrap(); + let mir = tcx.mir_built(ldid_const); + let mir = mir.borrow(); + + let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); + let recent_writes = info.recent_writes.get(); + let pointee_types = global_pointee_types.and(info.local_pointee_types.get()); + + // Compute local equivalence classes and dataflow constraints. + let r = panic_detail::catch_unwind(AssertUnwindSafe(|| { + dataflow::generate_constraints(&acx, &mir, recent_writes, pointee_types) + })); + match r { + Ok(dataflow) => { + info.dataflow.set(dataflow); + } + Err(pd) => { + acx.gacx.mark_fn_failed( + ldid.to_def_id(), + DontRewriteFnReason::DATAFLOW_INVALID, + pd, + ); + } + }; + + info.acx_data.set(acx.into_data()); + } +} + fn make_ty_fixed(asn: &mut Assignment, lty: LTy) { for lty in lty.iter() { let ptr = lty.label; From e3057de8297c857d064325cc2810804e2d441f2d Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Fri, 2 Aug 2024 10:58:44 -0700 Subject: [PATCH 10/11] analyze: run equiv remapping before pointee_type pass --- c2rust-analyze/src/analyze.rs | 70 ++++++++++------------- c2rust-analyze/src/pointee_type/mod.rs | 33 ----------- c2rust-analyze/src/pointee_type/solve.rs | 4 -- c2rust-analyze/tests/filecheck/pointee.rs | 4 +- 4 files changed, 33 insertions(+), 78 deletions(-) diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 484c034067..008baaf631 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -587,45 +587,18 @@ fn run(tcx: TyCtxt) { do_recent_writes(&gacx, &mut func_info, &all_fn_ldids); // ---------------------------------- - // Infer pointee types - // ---------------------------------- - - let mut global_pointee_types = do_pointee_type(&mut gacx, &mut func_info, &all_fn_ldids); - debug_print_pointee_types( - &mut gacx, - &mut func_info, - &all_fn_ldids, - &global_pointee_types, - ); - - // ---------------------------------- - // Compute dataflow constraints + // Remap `PointerId`s by equivalence class // ---------------------------------- - // Initial pass to assign local `PointerId`s and gather equivalence constraints, which state - // that two pointer types must be converted to the same reference type. Some additional data - // computed during this the process is kept around for use in later passes. + // Initial pass to gather equivalence constraints, which state that two pointer types must be + // converted to the same reference type. Some additional data computed during this the process + // is kept around for use in later passes. let global_equiv = build_equiv_constraints(&mut gacx, &mut func_info, &all_fn_ldids); - build_dataflow_constraints( - &mut gacx, - &mut func_info, - &all_fn_ldids, - &global_pointee_types, - ); - - // ---------------------------------- - // Remap `PointerId`s by equivalence class - // ---------------------------------- // Remap pointers based on equivalence classes, so all members of an equivalence class now use // the same `PointerId`. let (global_counter, global_equiv_map) = global_equiv.renumber(); debug!("global_equiv_map = {global_equiv_map:?}"); - pointee_type::remap_pointers_global( - &mut global_pointee_types, - &global_equiv_map, - global_counter.num_pointers(), - ); gacx.remap_pointers(&global_equiv_map, global_counter.num_pointers()); let mut local_counter = global_counter.into_local(); @@ -637,15 +610,6 @@ fn run(tcx: TyCtxt) { .local_equiv .renumber(&global_equiv_map, &mut local_counter); debug!("local_equiv_map = {local_equiv_map:?}"); - if info.local_pointee_types.is_set() { - pointee_type::remap_pointers_local( - &mut global_pointee_types, - &mut info.local_pointee_types, - global_equiv_map.and(&local_equiv_map), - local_base, - local_count, - ); - } info.acx_data.remap_pointers( &mut gacx, global_equiv_map.and(&local_equiv_map), @@ -659,6 +623,32 @@ fn run(tcx: TyCtxt) { info.local_equiv.clear(); } + // ---------------------------------- + // Infer pointee types + // ---------------------------------- + + // This runs after equivalence class remapping because it lets us get better pointee results in + // pointer-to-pointer cases without implementing full type unification. + + let global_pointee_types = do_pointee_type(&mut gacx, &mut func_info, &all_fn_ldids); + debug_print_pointee_types( + &mut gacx, + &mut func_info, + &all_fn_ldids, + &global_pointee_types, + ); + + // ---------------------------------- + // Compute dataflow constraints + // ---------------------------------- + + build_dataflow_constraints( + &mut gacx, + &mut func_info, + &all_fn_ldids, + &global_pointee_types, + ); + // ---------------------------------- // Build initial assignment // ---------------------------------- diff --git a/c2rust-analyze/src/pointee_type/mod.rs b/c2rust-analyze/src/pointee_type/mod.rs index 0075bcc30d..6ee27621ae 100644 --- a/c2rust-analyze/src/pointee_type/mod.rs +++ b/c2rust-analyze/src/pointee_type/mod.rs @@ -1,7 +1,5 @@ use crate::context::AnalysisCtxt; -use crate::pointer_id::{GlobalPointerTable, LocalPointerTable, PointerId, PointerTable}; use rustc_middle::mir::Body; -use std::mem; mod constraint_set; mod solve; @@ -17,34 +15,3 @@ pub fn generate_constraints<'tcx>( ) -> ConstraintSet<'tcx> { type_check::visit(acx, mir, vars) } - -pub fn remap_pointers_global<'tcx>( - pointee_types: &mut GlobalPointerTable>, - map: &GlobalPointerTable, - count: usize, -) { - let mut old = mem::replace(pointee_types, GlobalPointerTable::new(count)); - let new = pointee_types; - for (old_ptr, old_val) in old.iter_mut() { - // If there are multiple old pointers that map to the same new pointer, merge their sets. - new[map[old_ptr]].merge(mem::take(old_val)); - } -} - -pub fn remap_pointers_local<'tcx>( - global_pointee_types: &mut GlobalPointerTable>, - local_pointee_types: &mut LocalPointerTable>, - map: PointerTable, - local_base: u32, - local_count: usize, -) { - let mut old = mem::replace( - local_pointee_types, - LocalPointerTable::new(local_base, local_count), - ); - let mut new = global_pointee_types.and_mut(local_pointee_types); - for (old_ptr, old_val) in old.iter_mut() { - // If there are multiple old pointers that map to the same new pointer, merge their sets. - new[map[old_ptr]].merge(mem::take(old_val)); - } -} diff --git a/c2rust-analyze/src/pointee_type/solve.rs b/c2rust-analyze/src/pointee_type/solve.rs index cf24055ee9..66e35642b7 100644 --- a/c2rust-analyze/src/pointee_type/solve.rs +++ b/c2rust-analyze/src/pointee_type/solve.rs @@ -145,10 +145,6 @@ impl<'tcx> PointeeTypes<'tcx> { } } - pub fn merge(&mut self, other: PointeeTypes<'tcx>) { - self.tys.extend(other.tys); - } - pub fn simplify(&mut self, vars: &VarTable<'tcx>) { let mut add = Vec::new(); let mut remove = Vec::new(); diff --git a/c2rust-analyze/tests/filecheck/pointee.rs b/c2rust-analyze/tests/filecheck/pointee.rs index 290d030bb7..46120a15f5 100644 --- a/c2rust-analyze/tests/filecheck/pointee.rs +++ b/c2rust-analyze/tests/filecheck/pointee.rs @@ -34,7 +34,9 @@ unsafe fn remove_cast() { unsafe fn malloc_fresh() { let mut p = 0 as *mut libc::c_void; let fresh = &mut p; - // CHECK: malloc(4) + // CHECK-NOT: malloc(4) + // CHECK: Box::new + // CHECK-NOT: malloc(4) let q = malloc(4); *fresh = q; *(p as *mut i32) = 1; From 7013b6d4442d1f0cb82c10578021ed3b63fd3464 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Tue, 3 Dec 2024 09:40:43 -0800 Subject: [PATCH 11/11] analyze: address comments from code review --- c2rust-analyze/src/dataflow/type_check.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/c2rust-analyze/src/dataflow/type_check.rs b/c2rust-analyze/src/dataflow/type_check.rs index fa7647e366..0a6a2e5aa2 100644 --- a/c2rust-analyze/src/dataflow/type_check.rs +++ b/c2rust-analyze/src/dataflow/type_check.rs @@ -102,6 +102,7 @@ impl<'tcx> TypeChecker<'tcx, '_> { } fn pointee_type(&self, ptr: PointerId) -> Option> { + assert!(!ptr.is_none()); self.pointee_types .as_ref() .and_then(|pointee_types| pointee_types[ptr].get_sole_lty())