From 1df6c90e4bd6d6c34bc91d78db17984f5cac7280 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Thu, 21 Sep 2023 12:49:43 -0700 Subject: [PATCH 01/13] analyze: rewrite::convert: stricter handling of Rewrite::Identity and subexprs --- c2rust-analyze/src/rewrite/expr/convert.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index fcd2a9ef8a..baf19e216c 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -117,9 +117,13 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { let mir_rws = self.mir_rewrites.remove(&ex.hir_id).unwrap_or_default(); let rewrite_from_mir_rws = |rw: &mir_op::RewriteKind, hir_rw: Rewrite| -> Rewrite { + // Cases that extract a subexpression are handled here; cases that only wrap the + // top-level expression (and thus can handle a non-`Identity` `hir_rw`) are handled by + // `convert_cast_rewrite`. match rw { mir_op::RewriteKind::OffsetSlice { mutbl } => { // `p.offset(i)` -> `&p[i as usize ..]` + assert!(matches!(hir_rw, Rewrite::Identity)); let arr = self.get_subexpr(ex, 0); let idx = Rewrite::Cast(Box::new(self.get_subexpr(ex, 1)), "usize".to_owned()); let elem = Rewrite::SliceTail(Box::new(arr), Box::new(idx)); @@ -134,11 +138,13 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { mir_op::RewriteKind::RawToRef { mutbl } => { // &raw _ to &_ or &raw mut _ to &mut _ + assert!(matches!(hir_rw, Rewrite::Identity)); Rewrite::Ref(Box::new(self.get_subexpr(ex, 0)), mutbl_from_bool(*mutbl)) } mir_op::RewriteKind::CellGet => { // `*x` to `Cell::get(x)` + assert!(matches!(hir_rw, Rewrite::Identity)); Rewrite::MethodCall( "get".to_string(), Box::new(self.get_subexpr(ex, 0)), @@ -148,11 +154,13 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { mir_op::RewriteKind::CellSet => { // `*x` to `Cell::set(x)` + assert!(matches!(hir_rw, Rewrite::Identity)); let deref_lhs = assert_matches!(ex.kind, ExprKind::Assign(lhs, ..) => lhs); let lhs = self.get_subexpr(deref_lhs, 0); let rhs = self.get_subexpr(ex, 1); Rewrite::MethodCall("set".to_string(), Box::new(lhs), vec![rhs]) } + _ => convert_cast_rewrite(rw, hir_rw), } }; @@ -270,15 +278,12 @@ pub fn convert_cast_rewrite(kind: &mir_op::RewriteKind, hir_rw: Rewrite) -> Rewr mir_op::RewriteKind::CellNew => { // `x` to `Cell::new(x)` - Rewrite::Call("std::cell::Cell::new".to_string(), vec![Rewrite::Identity]) + Rewrite::Call("std::cell::Cell::new".to_string(), vec![hir_rw]) } mir_op::RewriteKind::CellFromMut => { // `x` to `Cell::from_mut(x)` - Rewrite::Call( - "std::cell::Cell::from_mut".to_string(), - vec![Rewrite::Identity], - ) + Rewrite::Call("std::cell::Cell::from_mut".to_string(), vec![hir_rw]) } mir_op::RewriteKind::AsPtr => { // `x` to `x.as_ptr()` From af211333326781639549d40dad1444c7cbb0a09d Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Fri, 22 Sep 2023 13:18:05 -0700 Subject: [PATCH 02/13] analyze: rewrite::distribute: include MirOriginDesc in output --- c2rust-analyze/src/rewrite/expr/convert.rs | 7 ++++--- c2rust-analyze/src/rewrite/expr/distribute.rs | 19 +++++++++++++++++-- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index baf19e216c..e4a009a576 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -1,4 +1,5 @@ use crate::panic_detail; +use crate::rewrite::expr::distribute::DistRewrite; use crate::rewrite::expr::mir_op; use crate::rewrite::Rewrite; use assert_matches::assert_matches; @@ -17,7 +18,7 @@ use std::collections::HashMap; struct ConvertVisitor<'tcx> { tcx: TyCtxt<'tcx>, typeck_results: &'tcx TypeckResults<'tcx>, - mir_rewrites: HashMap>, + mir_rewrites: HashMap>, rewrites: Vec<(Span, Rewrite)>, /// When `true`, any `Expr` where rustc added an implicit adjustment will be rewritten to make /// that adjustment explicit. Any node that emits a non-adjustment rewrite sets this flag when @@ -166,7 +167,7 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { }; for mir_rw in mir_rws { - hir_rw = rewrite_from_mir_rws(&mir_rw, hir_rw); + hir_rw = rewrite_from_mir_rws(&mir_rw.rw, hir_rw); } // Emit rewrites on subexpressions first. @@ -304,7 +305,7 @@ pub fn convert_cast_rewrite(kind: &mir_op::RewriteKind, hir_rw: Rewrite) -> Rewr pub fn convert_rewrites( tcx: TyCtxt, hir_body_id: hir::BodyId, - mir_rewrites: HashMap>, + mir_rewrites: HashMap>, ) -> Vec<(Span, Rewrite)> { // Run the visitor. let typeck_results = tcx.typeck_body(hir_body_id); diff --git a/c2rust-analyze/src/rewrite/expr/distribute.rs b/c2rust-analyze/src/rewrite/expr/distribute.rs index 9fcdbb79cc..433ba2fbc6 100644 --- a/c2rust-analyze/src/rewrite/expr/distribute.rs +++ b/c2rust-analyze/src/rewrite/expr/distribute.rs @@ -29,6 +29,21 @@ enum Priority { LoadResult, } +#[derive(Clone, Debug)] +pub struct DistRewrite { + pub rw: mir_op::RewriteKind, + pub desc: MirOriginDesc, +} + +impl From for DistRewrite { + fn from(x: RewriteInfo) -> DistRewrite { + DistRewrite { + rw: x.rw, + desc: x.desc, + } + } +} + /// Distributes MIR rewrites to HIR nodes. This takes a list of MIR rewrites (from `mir_op`) and a /// map from MIR location to `HirId` (from `unlower`) and produces a map from `HirId` to a list of /// MIR rewrites. @@ -52,7 +67,7 @@ pub fn distribute( tcx: TyCtxt, unlower_map: BTreeMap, mir_rewrites: HashMap>, -) -> HashMap> { +) -> HashMap> { let mut info_map = HashMap::>::new(); for (loc, mir_rws) in mir_rewrites { @@ -127,6 +142,6 @@ pub fn distribute( // the `RewriteKind`s. info_map .into_iter() - .map(|(k, vs)| (k, vs.into_iter().map(|v| v.rw).collect())) + .map(|(k, vs)| (k, vs.into_iter().map(DistRewrite::from).collect())) .collect() } From 086321debc3261b730ca2f75443ec75f47572b2c Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 25 Sep 2023 14:23:29 -0700 Subject: [PATCH 03/13] analyze: rewrite::unlower: associate method receiver adjustments with the receiver expr --- c2rust-analyze/src/rewrite/expr/unlower.rs | 47 +++++++++++++++++----- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/unlower.rs b/c2rust-analyze/src/rewrite/expr/unlower.rs index 00cee0aa26..e8021d4906 100644 --- a/c2rust-analyze/src/rewrite/expr/unlower.rs +++ b/c2rust-analyze/src/rewrite/expr/unlower.rs @@ -12,6 +12,7 @@ use rustc_middle::mir::{self, Body, Location, Operand}; use rustc_middle::ty::TyCtxt; use rustc_span::Span; use std::collections::btree_map::{BTreeMap, Entry}; +use std::collections::HashMap; #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] pub struct PreciseLoc { @@ -44,6 +45,11 @@ struct UnlowerVisitor<'a, 'tcx> { span_index: SpanIndex, /// Maps MIR (sub)locations to the HIR node that produced each one, if known. unlower_map: BTreeMap, + + /// When processing the `hir::Expr` identified by the `HirId`, append some locations to the + /// list retrieved from the `SpanIndex`. This is used in cases where some MIR statements have + /// their spans set to a parent expr but really belong to the child. + append_extra_locations: HashMap>, } impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { @@ -187,12 +193,20 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { fn visit_expr_inner(&mut self, ex: &'tcx hir::Expr<'tcx>) { let _g = panic_detail::set_current_span(ex.span); - let locs = self + let mut locs = self .span_index .lookup_exact(ex.span) .copied() .filter(|&loc| !self.should_ignore_statement(loc)) .collect::>(); + if let Some(extra) = self.append_extra_locations.remove(&ex.hir_id) { + locs.extend( + extra + .into_iter() + .filter(|&loc| !self.should_ignore_statement(loc)), + ); + } + let locs = locs; if locs.is_empty() { return; } @@ -242,17 +256,22 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { self.record(loc, &[SubLoc::Rvalue], ex); for (i, (arg, mir_arg)) in args.iter().zip(mir_args).enumerate() { self.record_operand(loc, &[SubLoc::Rvalue, SubLoc::CallArg(i)], arg, mir_arg); - // TODO: Distribute extra `locs` among the various args. For example, if - // `locs` is `[a, b, loc]`, we may need to pass `[a]` as the `extra_locs` for - // the first arg and `[b]` for the second, or `[a, b]` for the first and `[]` - // for the second, or some other combination. These extra locations are - // usually coercions or other adjustments related to a particular argument. - // However, figuring out which `Location`s are associated with which argument - // may be nontrivial. self.visit_expr_operand(arg, mir_arg, &[]); } + if locs.len() > 1 { - warn!("NYI: extra locations {:?} in Call", &locs[..locs.len() - 1]); + // Special case: if the receiver of a `MethodCall` has adjustments, they are + // emitted with the same span as the `MethodCall` itself, and thus show up as + // extra `locs` here. We associate them with the child instead so all of the + // child's spans can be processed together. + if matches!(ex.kind, hir::ExprKind::MethodCall(..)) { + self.append_extra_locations + .entry(args[0].hir_id) + .or_insert_with(Vec::new) + .extend_from_slice(&locs[..locs.len() - 1]); + } else { + warn!("NYI: extra locations {:?} in Call", &locs[..locs.len() - 1]); + } } } @@ -390,9 +409,19 @@ pub fn unlower<'tcx>( mir, span_index, unlower_map: BTreeMap::new(), + append_extra_locations: HashMap::new(), }; visitor.visit_body(hir); + if visitor.append_extra_locations.len() > 0 { + for (&hir_id, locs) in &visitor.append_extra_locations { + error!( + "leftover locations for {hir_id:?} = {:?}: locs = {locs:?}", + tcx.hir().get(hir_id) + ); + } + } + debug_print_unlower_map(tcx, mir, &visitor.unlower_map); visitor.unlower_map From b38f6cc6bdff883cf5810b2a5c9dd2bc80325324 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 25 Sep 2023 14:41:40 -0700 Subject: [PATCH 04/13] analyze: rewrite::convert: allow applying rewrites after each expr adjustment --- c2rust-analyze/src/rewrite/expr/convert.rs | 46 +++++++++++++++---- c2rust-analyze/src/rewrite/expr/distribute.rs | 9 ++-- c2rust-analyze/src/rewrite/expr/unlower.rs | 2 + 3 files changed, 46 insertions(+), 11 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index e4a009a576..f8bf07340a 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -1,6 +1,7 @@ use crate::panic_detail; use crate::rewrite::expr::distribute::DistRewrite; use crate::rewrite::expr::mir_op; +use crate::rewrite::expr::unlower::MirOriginDesc; use crate::rewrite::Rewrite; use assert_matches::assert_matches; use log::*; @@ -116,6 +117,7 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { let callsite_span = ex.span.source_callsite(); let mir_rws = self.mir_rewrites.remove(&ex.hir_id).unwrap_or_default(); + let mut mir_rws = &mir_rws as &[_]; let rewrite_from_mir_rws = |rw: &mir_op::RewriteKind, hir_rw: Rewrite| -> Rewrite { // Cases that extract a subexpression are handled here; cases that only wrap the @@ -166,22 +168,41 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { } }; + let expr_rws = take_prefix_while(&mut mir_rws, |x: &DistRewrite| { + matches!(x.desc, MirOriginDesc::Expr) + }); + for mir_rw in expr_rws { + hir_rw = rewrite_from_mir_rws(&mir_rw.rw, hir_rw); + } + + // Materialize adjustments if requested by an ancestor or required locally. + let has_adjustment_rewrites = mir_rws + .iter() + .any(|x| matches!(x.desc, MirOriginDesc::Adjustment(_))); + if self.materialize_adjustments || has_adjustment_rewrites { + let adjusts = self.typeck_results.expr_adjustments(ex); + hir_rw = materialize_adjustments(self.tcx, adjusts, hir_rw, |i, mut hir_rw| { + let adj_rws = + take_prefix_while(&mut mir_rws, |x| x.desc == MirOriginDesc::Adjustment(i)); + for mir_rw in adj_rws { + eprintln!("would apply {mir_rw:?} for adjustment #{i}, over {hir_rw:?}"); + hir_rw = rewrite_from_mir_rws(&mir_rw.rw, hir_rw); + } + hir_rw + }); + } + + // Apply late rewrites. for mir_rw in mir_rws { hir_rw = rewrite_from_mir_rws(&mir_rw.rw, hir_rw); } - // Emit rewrites on subexpressions first. + // Emit rewrites on subexpressions last. let applied_mir_rewrite = !matches!(hir_rw, Rewrite::Identity); self.with_materialize_adjustments(applied_mir_rewrite, |this| { intravisit::walk_expr(this, ex); }); - // Materialize adjustments if requested by an ancestor. - if self.materialize_adjustments { - let adjusts = self.typeck_results.expr_adjustments(ex); - hir_rw = materialize_adjustments(self.tcx, adjusts, hir_rw); - } - // Emit the rewrite, if it's nontrivial. if !matches!(hir_rw, Rewrite::Identity) { eprintln!( @@ -225,14 +246,16 @@ fn materialize_adjustments<'tcx>( tcx: TyCtxt<'tcx>, adjustments: &[Adjustment<'tcx>], hir_rw: Rewrite, + mut callback: impl FnMut(usize, Rewrite) -> Rewrite, ) -> Rewrite { let adj_kinds: Vec<&_> = adjustments.iter().map(|a| &a.kind).collect(); match (hir_rw, &adj_kinds[..]) { (Rewrite::Identity, []) => Rewrite::Identity, (Rewrite::Identity, _) => { let mut hir_rw = Rewrite::Identity; - for adj in adjustments { + for (i, adj) in adjustments.iter().enumerate() { hir_rw = apply_identity_adjustment(tcx, adj, hir_rw); + hir_rw = callback(i, hir_rw); } hir_rw } @@ -243,6 +266,13 @@ fn materialize_adjustments<'tcx>( } } +fn take_prefix_while<'a, T>(slice: &mut &'a [T], mut pred: impl FnMut(&'a T) -> bool) -> &'a [T] { + let i = slice.iter().position(|x| !pred(x)).unwrap_or(slice.len()); + let (a, b) = slice.split_at(i); + *slice = b; + a +} + /// Convert a single `RewriteKind` representing a cast into a `Span`-based `Rewrite`. This panics /// on rewrites that modify the original expression; only rewrites that wrap the expression in some /// kind of cast or conversion are supported. diff --git a/c2rust-analyze/src/rewrite/expr/distribute.rs b/c2rust-analyze/src/rewrite/expr/distribute.rs index 433ba2fbc6..97e1997ebe 100644 --- a/c2rust-analyze/src/rewrite/expr/distribute.rs +++ b/c2rust-analyze/src/rewrite/expr/distribute.rs @@ -19,12 +19,14 @@ struct RewriteInfo { /// /// The order of variants follows the order of operations we typically see in generated MIR code. /// For a given HIR `Expr`, the MIR will usually evaluate the expression ([`Priority::Eval`]), -/// store the result into a temporary ([`Priority::_StoreResult`]; currently unused), and later -/// load the result back from the temporary ([`Priority::LoadResult`]) when computing the parent -/// `Expr`. +/// apply zero or more adjustments ([`Priority::Adjust(i)`][Priority::Adjust]), store the result +/// into a temporary ([`Priority::_StoreResult`]; currently unused), and later load the result back +/// from the temporary ([`Priority::LoadResult`]). #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] enum Priority { Eval, + /// Apply the rewrite just after the adjustment at index `i`. + Adjust(usize), _StoreResult, LoadResult, } @@ -87,6 +89,7 @@ pub fn distribute( let priority = match origin.desc { MirOriginDesc::Expr => Priority::Eval, + MirOriginDesc::Adjustment(i) => Priority::Adjust(i), MirOriginDesc::LoadFromTemp => Priority::LoadResult, _ => { panic!( diff --git a/c2rust-analyze/src/rewrite/expr/unlower.rs b/c2rust-analyze/src/rewrite/expr/unlower.rs index e8021d4906..b44c4be584 100644 --- a/c2rust-analyze/src/rewrite/expr/unlower.rs +++ b/c2rust-analyze/src/rewrite/expr/unlower.rs @@ -37,6 +37,8 @@ pub enum MirOriginDesc { /// previously stored. Loads from user-visible locals, which originate from HIR local variable /// expressions, use the `Expr` variant instead. LoadFromTemp, + /// This MIR applies adjustment `i` from the expression's list of adjustments. + Adjustment(usize), } struct UnlowerVisitor<'a, 'tcx> { From d1365de94b503885537a45ffb477c3f8f1d6a34e Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 25 Sep 2023 15:12:54 -0700 Subject: [PATCH 05/13] analyze: rewrite::convert: allow applying mir_op::RewriteKind::RawToRef over Rewrite::AddrOf --- c2rust-analyze/src/rewrite/expr/convert.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index f8bf07340a..e240c30f18 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -141,8 +141,13 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { mir_op::RewriteKind::RawToRef { mutbl } => { // &raw _ to &_ or &raw mut _ to &mut _ - assert!(matches!(hir_rw, Rewrite::Identity)); - Rewrite::Ref(Box::new(self.get_subexpr(ex, 0)), mutbl_from_bool(*mutbl)) + match hir_rw { + Rewrite::Identity => { + Rewrite::Ref(Box::new(self.get_subexpr(ex, 0)), mutbl_from_bool(*mutbl)) + } + Rewrite::AddrOf(rw, mutbl) => Rewrite::Ref(rw, mutbl), + _ => panic!("unexpected hir_rw {hir_rw:?} for RawToRef"), + } } mir_op::RewriteKind::CellGet => { From 7ea661090b9abdc1d09e4fd3667cb37ce96df540 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 25 Sep 2023 15:06:37 -0700 Subject: [PATCH 06/13] analyze: rewrite::unlower: include expr adjustments in the unlower map --- c2rust-analyze/src/rewrite/expr/unlower.rs | 326 ++++++++++++++++++++- c2rust-analyze/tests/filecheck/addr_of.rs | 2 +- c2rust-analyze/tests/filecheck/cell.rs | 14 +- 3 files changed, 330 insertions(+), 12 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/unlower.rs b/c2rust-analyze/src/rewrite/expr/unlower.rs index b44c4be584..fb0891663e 100644 --- a/c2rust-analyze/src/rewrite/expr/unlower.rs +++ b/c2rust-analyze/src/rewrite/expr/unlower.rs @@ -9,7 +9,8 @@ use rustc_hir::intravisit::{self, Visitor}; use rustc_hir::HirId; use rustc_middle::hir::nested_filter; use rustc_middle::mir::{self, Body, Location, Operand}; -use rustc_middle::ty::TyCtxt; +use rustc_middle::ty::adjustment::{Adjust, AutoBorrow, AutoBorrowMutability, PointerCast}; +use rustc_middle::ty::{TyCtxt, TypeckResults}; use rustc_span::Span; use std::collections::btree_map::{BTreeMap, Entry}; use std::collections::HashMap; @@ -44,6 +45,7 @@ pub enum MirOriginDesc { struct UnlowerVisitor<'a, 'tcx> { tcx: TyCtxt<'tcx>, mir: &'a Body<'tcx>, + typeck_results: &'tcx TypeckResults<'tcx>, span_index: SpanIndex, /// Maps MIR (sub)locations to the HIR node that produced each one, if known. unlower_map: BTreeMap, @@ -108,7 +110,7 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { fn operand_desc(&self, op: &Operand<'tcx>) -> MirOriginDesc { match *op { mir::Operand::Copy(pl) | mir::Operand::Move(pl) => { - if is_var(pl) && self.mir.local_kind(pl.local) == mir::LocalKind::Temp { + if is_temp_var(self.mir, pl.as_ref()) { MirOriginDesc::LoadFromTemp } else { MirOriginDesc::Expr @@ -142,6 +144,8 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { self.get_last_assign(locs) } + /// If the last statement in `locs` is `StatementKind::Assign`, return `Some` containing its + /// parts. Otherwise, return `None`. fn get_last_assign( &self, locs: &[Location], @@ -288,8 +292,7 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { } }; self.record_desc(loc, &[], ex, MirOriginDesc::StoreIntoLocal); - self.record(loc, &[SubLoc::Rvalue], ex); - self.visit_expr_rvalue(ex, mir_rv, &locs[..locs.len() - 1]); + self.visit_expr_rvalue(ex, loc, mir_rv, &locs[..locs.len() - 1]); } } } @@ -297,11 +300,99 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { fn visit_expr_rvalue( &mut self, ex: &'tcx hir::Expr<'tcx>, - _rv: &'a mir::Rvalue<'tcx>, - _extra_locs: &[Location], + loc: Location, + rv: &'a mir::Rvalue<'tcx>, + extra_locs: &[Location], ) { let _g = panic_detail::set_current_span(ex.span); - // TODO: handle adjustments, overloaded operators, etc + + // We proceed backward through `adjusts`, peeling off elements of the current `Rvalue` or + // `Place` and/or consuming temporary assignments from `extra_locs`. Once all adjustments + // have been accounted for, the final remaining `Rvalue` is the `Expr` itself. + let adjusts = self.typeck_results.expr_adjustments(ex); + + let mut s = VisitExprState::new( + self.mir, + ExprMir::from(rv), + extra_locs, + loc, + vec![SubLoc::Rvalue], + ); + + for (i, adjust) in adjusts.iter().enumerate().rev() { + while s.peel_temp().is_some() { + // No-op. Just loop until we've peeled all temporaries. + } + self.record_desc(s.loc, &s.sub_loc, ex, MirOriginDesc::Adjustment(i)); + match adjust.kind { + Adjust::Borrow(AutoBorrow::RawPtr(mutbl)) => { + if s.peel_address_of() != Some(mutbl) { + warn!( + "expected Rvalue::AddressOf for {adjust:?} on expr {ex:?}, \ + but got {:?}", + s.cur + ); + break; + } + } + Adjust::Borrow(AutoBorrow::Ref(_, AutoBorrowMutability::Not)) => { + if s.peel_ref() != Some(mir::Mutability::Not) { + warn!( + "expected Rvalue::Ref(Mutability::Not) for {adjust:?} \ + on expr {ex:?}, but got {:?}", + s.cur + ); + break; + } + } + Adjust::Borrow(AutoBorrow::Ref(_, AutoBorrowMutability::Mut { .. })) => { + if s.peel_ref() != Some(mir::Mutability::Mut) { + warn!( + "expected Rvalue::Ref(Mutability::Mut) for {adjust:?} \ + on expr {ex:?}, but got {:?}", + s.cur + ); + break; + } + } + // Non-overloaded deref + Adjust::Deref(None) => { + if s.peel_deref().is_none() { + warn!( + "expected Rvalue::Deref for {adjust:?} on expr {ex:?}, \ + but got {:?}", + s.cur + ); + break; + } + } + Adjust::Pointer(pc) => { + if s.peel_pointer_cast() != Some(pc) { + warn!( + "expected Rvalue::Cast(Pointer({pc:?})) for {adjust:?} \ + on expr {ex:?}, but got {:?}", + s.cur + ); + break; + } + } + _ => { + warn!("unsupported adjustment {adjust:?} on expr {ex:?}"); + break; + } + } + } + + while s.peel_temp().is_some() { + // No-op. Just loop until we've peeled all temporaries. + } + + // Record entries for temporaries that are buffered in `temp_info`. + for (precise_loc, desc) in s.temp_info { + self.record_desc(precise_loc.loc, &precise_loc.sub, ex, desc); + } + + self.record_desc(s.loc, &s.sub_loc, ex, MirOriginDesc::Expr); } fn visit_expr_operand( @@ -328,10 +419,228 @@ impl<'a, 'tcx> Visitor<'tcx> for UnlowerVisitor<'a, 'tcx> { } } +/// The MIR representation of some part of a `hir::Expr` or its adjustments. +#[derive(Clone, Copy, Debug)] +enum ExprMir<'a, 'tcx> { + Rvalue(&'a mir::Rvalue<'tcx>), + Operand(&'a mir::Operand<'tcx>), + Place(mir::PlaceRef<'tcx>), +} + +impl<'a, 'tcx> From<&'a mir::Rvalue<'tcx>> for ExprMir<'a, 'tcx> { + fn from(x: &'a mir::Rvalue<'tcx>) -> ExprMir<'a, 'tcx> { + ExprMir::Rvalue(x) + } +} + +impl<'a, 'tcx> From<&'a mir::Operand<'tcx>> for ExprMir<'a, 'tcx> { + fn from(x: &'a mir::Operand<'tcx>) -> ExprMir<'a, 'tcx> { + ExprMir::Operand(x) + } +} + +impl<'a, 'tcx> From> for ExprMir<'a, 'tcx> { + fn from(x: mir::PlaceRef<'tcx>) -> ExprMir<'a, 'tcx> { + ExprMir::Place(x) + } +} + +/// Helper for `visit_expr_common`. This type has methods for traversing the MIR produced from a +/// `hir::Expr` and its adjustments. +struct VisitExprState<'a, 'tcx> { + mir: &'a Body<'tcx>, + cur: ExprMir<'a, 'tcx>, + locs: &'a [Location], + + loc: Location, + sub_loc: Vec, + + /// Buffer for temporaries handled by `peel_temp`. + temp_info: Vec<(PreciseLoc, MirOriginDesc)>, +} + +impl<'a, 'tcx> VisitExprState<'a, 'tcx> { + pub fn new( + mir: &'a Body<'tcx>, + cur: ExprMir<'a, 'tcx>, + extra_locs: &'a [Location], + loc: Location, + sub_loc: Vec, + ) -> VisitExprState<'a, 'tcx> { + VisitExprState { + mir, + cur, + locs: extra_locs, + loc, + sub_loc, + + temp_info: Vec::new(), + } + } + + /// If the current MIR is a temporary, and the previous `Location` is an assignment to + /// that temporary, peel it off, leaving the temporary's initializer as the current + /// `Rvalue`. + pub fn peel_temp(&mut self) -> Option<()> { + // Run `peel_temp_inner`, and restore `self.cur` and `self.sub_loc` if it fails. + let old_cur = self.cur; + let old_sub_loc_len = self.sub_loc.len(); + let old_temp_info_len = self.temp_info.len(); + let r = self.peel_temp_inner(); + if r.is_none() { + debug_assert!(self.sub_loc.len() >= old_sub_loc_len); + self.cur = old_cur; + self.sub_loc.truncate(old_sub_loc_len); + self.temp_info.truncate(old_temp_info_len); + } + r + } + + fn peel_temp_inner(&mut self) -> Option<()> { + // We can freely mutate `self.cur` and `self.sub_loc` here, since the outer + // `peel_temp` will reset them if this function returns `None`. + + // Record the location of the load before `require_place`, so it gets the outermost + // `sub`-location possible. + let load_loc = PreciseLoc { + loc: self.loc, + sub: self.sub_loc.clone(), + }; + + let pl = self.require_place()?; + if !is_temp_var(self.mir, pl) { + return None; + } + + let (&loc, remaining_locs) = self.locs.split_last()?; + let stmt = self.mir.stmt_at(loc).left()?; + let (assign_pl, assign_rv) = match stmt.kind { + mir::StatementKind::Assign(ref x) => (&x.0, &x.1), + _ => return None, + }; + if !is_temp_var(self.mir, assign_pl.as_ref()) { + return None; + } + + let store_loc = PreciseLoc { loc, sub: vec![] }; + self.temp_info.push((load_loc, MirOriginDesc::LoadFromTemp)); + self.temp_info + .push((store_loc, MirOriginDesc::StoreIntoLocal)); + + // Success - the next relevant MIR is `assign_rv`. + self.cur = ExprMir::Rvalue(assign_rv); + self.locs = remaining_locs; + self.loc = loc; + self.sub_loc = vec![SubLoc::Rvalue]; + + Some(()) + } + + fn require_place(&mut self) -> Option> { + Some(*self.require_place_mut()?) + } + + fn require_place_mut(&mut self) -> Option<&mut mir::PlaceRef<'tcx>> { + if let ExprMir::Rvalue(rv) = self.cur { + match *rv { + mir::Rvalue::Use(ref op) => { + self.sub_loc.push(SubLoc::RvalueOperand(0)); + self.cur = ExprMir::Operand(op); + } + _ => return None, + } + } + if let ExprMir::Operand(op) = self.cur { + match *op { + mir::Operand::Copy(pl) | mir::Operand::Move(pl) => { + self.sub_loc.push(SubLoc::OperandPlace); + self.cur = ExprMir::Place(pl.as_ref()); + } + _ => return None, + } + } + match self.cur { + ExprMir::Place(ref mut pl) => Some(pl), + _ => None, + } + } + + /// If the current MIR is `Rvalue::AddressOf`, peel it off and return its `Mutability`. + pub fn peel_address_of(&mut self) -> Option { + loop { + if let ExprMir::Rvalue(&mir::Rvalue::AddressOf(mutbl, pl)) = self.cur { + self.cur = ExprMir::Place(pl.as_ref()); + self.sub_loc.push(SubLoc::RvaluePlace(0)); + return Some(mutbl); + } + self.peel_temp()?; + } + } + + /// If the current MIR is `Rvalue::Ref`, peel it off and return its `Mutability`. + pub fn peel_ref(&mut self) -> Option { + loop { + if let ExprMir::Rvalue(&mir::Rvalue::Ref(_, kind, pl)) = self.cur { + let mutbl = match kind { + mir::BorrowKind::Shared => mir::Mutability::Not, + mir::BorrowKind::Mut { .. } => mir::Mutability::Mut, + _ => return None, + }; + self.cur = ExprMir::Place(pl.as_ref()); + self.sub_loc.push(SubLoc::RvaluePlace(0)); + return Some(mutbl); + } + self.peel_temp()?; + } + } + + /// If the current MIR is a `Place` ending with a `Deref` projection, peel off the + /// `Deref`. + pub fn peel_deref(&mut self) -> Option<()> { + loop { + let pl = self.require_place_mut()?; + if let Some((&outer_proj, remaining_proj)) = pl.projection.split_last() { + if matches!(outer_proj, mir::PlaceElem::Deref) { + pl.projection = remaining_proj; + // Number of derefs, not counting the outermost one we just peeled off. + let num_inner_derefs = remaining_proj + .iter() + .filter(|p| matches!(p, mir::PlaceElem::Deref)) + .count(); + self.sub_loc.push(SubLoc::PlacePointer(num_inner_derefs)); + return Some(()); + } + } + self.peel_temp()?; + } + } + + /// If the current MIR is `Rvalue::Cast` with `CastKind::Pointer`, peel it off and + /// return the `PointerCast` kind. + pub fn peel_pointer_cast(&mut self) -> Option { + loop { + if let ExprMir::Rvalue(&mir::Rvalue::Cast(kind, ref op, _)) = self.cur { + let pc = match kind { + mir::CastKind::Pointer(x) => x, + _ => return None, + }; + self.cur = ExprMir::Operand(op); + self.sub_loc.push(SubLoc::RvalueOperand(0)); + return Some(pc); + } + self.peel_temp()?; + } + } +} + fn is_var(pl: mir::Place) -> bool { pl.projection.len() == 0 } +fn is_temp_var(mir: &Body, pl: mir::PlaceRef) -> bool { + pl.projection.len() == 0 && mir.local_kind(pl.local) == mir::LocalKind::Temp +} + /// Builds the *unlowering map*, which maps each piece of the MIR to the HIR `Expr` that was /// lowered to produce it. /// @@ -400,6 +709,8 @@ pub fn unlower<'tcx>( return BTreeMap::new(); } + let typeck_results = tcx.typeck_body(hir_body_id); + // Build `span_index`, which maps `Span`s to MIR `Locations`. let span_index = build_span_index(mir); @@ -409,6 +720,7 @@ pub fn unlower<'tcx>( let mut visitor = UnlowerVisitor { tcx, mir, + typeck_results, span_index, unlower_map: BTreeMap::new(), append_extra_locations: HashMap::new(), diff --git a/c2rust-analyze/tests/filecheck/addr_of.rs b/c2rust-analyze/tests/filecheck/addr_of.rs index fc51a20bec..b0437cbd10 100644 --- a/c2rust-analyze/tests/filecheck/addr_of.rs +++ b/c2rust-analyze/tests/filecheck/addr_of.rs @@ -27,6 +27,6 @@ fn shared_ref_with_struct() { // CHECK-LABEL: fn cast_array_to_ptr_explicit(s: &[u8; 1]) { pub fn cast_array_to_ptr_explicit(s: &[u8; 1]) { - // CHECK-DAG: &*(std::ptr::addr_of!(*s)) as *const u8 + // CHECK: &*(std::ptr::addr_of!(*s)) as *const u8; std::ptr::addr_of!(*s) as *const u8; } diff --git a/c2rust-analyze/tests/filecheck/cell.rs b/c2rust-analyze/tests/filecheck/cell.rs index 56bc8adac2..b764eead0e 100644 --- a/c2rust-analyze/tests/filecheck/cell.rs +++ b/c2rust-analyze/tests/filecheck/cell.rs @@ -1,3 +1,4 @@ +// CHECK-LABEL: fn cell( unsafe fn cell() { // CHECK-DAG: let mut x = std::cell::Cell::new((1)); let mut x = 1; @@ -22,15 +23,20 @@ struct S { i: i32, } +// CHECK-LABEL: fn cell_field( unsafe extern "C" fn cell_field(mut s: *mut S) { (*s).i = 1; - // CHECK-DAG: let r1: &core::cell::Cell<(R)> = &((*s).r); + // FIXME: The initializers for `r1` and `r2` are rewritten incorrectly. Neither `s` nor the + // `r` field have `Cell` type, and an `&mut T -> &Cell` rewrite is not applied. + // XXXXX: let r1: &core::cell::Cell<(R)> = &((*s).r); let r1: *mut R = &mut (*s).r; - // CHECK-DAG: let r2: &core::cell::Cell<(R)> = &((*s).r); + // XXXXX: let r2: &core::cell::Cell<(R)> = &((*s).r); let r2: *mut R = &mut (*s).r; - // CHECK-DAG: ((*r1)).set((0)); + // FIXME: The assignments to `(*r1).i` and `(*r2).i` are rewritten incorrectly. The field + // projection is omitted, producing `(*r1).set(0)`. + // XXXXX: ((*r1)).set((0)); (*r1).i = 0; - // CHECK-DAG: ((*r2)).set((1)); + // XXXXX: ((*r2)).set((1)); (*r2).i = 1; *s = S { r: R { i: 0 }, From b57fa642c94c05381efaa81ea9123735ae18ba58 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 25 Sep 2023 15:19:41 -0700 Subject: [PATCH 07/13] analyze: label Ref and AddressOf rvalues with fresh PointerIds --- c2rust-analyze/src/context.rs | 20 ++++++++++++++++++++ c2rust-analyze/src/dataflow/type_check.rs | 2 ++ c2rust-analyze/src/main.rs | 4 ++++ c2rust-analyze/tests/filecheck/addr_of.rs | 2 +- 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/c2rust-analyze/src/context.rs b/c2rust-analyze/src/context.rs index 5d56b24dca..146dcd472d 100644 --- a/c2rust-analyze/src/context.rs +++ b/c2rust-analyze/src/context.rs @@ -950,6 +950,26 @@ impl<'a, 'tcx> AnalysisCtxt<'a, 'tcx> { return lty; } + self.derived_type_of_rvalue(rv) + } + + /// In some cases, we can compute an `LTy` for an `Rvalue` that uses `PointerId`s derived from + /// the `LTy`s of other value, rather than using the `PointerId`s assigned in `rvalue_tys`. + /// For example, suppose we have this code: + /// + /// ```ignore + /// let p: & /*p0*/ MyStruct = ...; + /// let q: & /*p2*/ i32 = &(*p).field: & /*p1*/ i32; + /// ``` + /// + /// The type ascription on the rvalue `&(*p).field` represents the entry in `rvalue_tys`, which + /// uses `PointerId` `p1`. However, we can derive another type for this rvalue from the type + /// of `p`: since `p` has type `& /*p0*/ MyStruct`, the projection `&(*p).field` can be given + /// the type `& /*p0*/ i32`, using the same `PointerId` `p0`. Calling `type_of_rvalue` will + /// return the type assigned in `rvalue_tys`, which uses `p1`, whereas this method will return + /// the derived type using `p0`. Certain cases in `dataflow::type_check` will establish + /// constraints relating the two types. + pub fn derived_type_of_rvalue(&self, rv: &Rvalue<'tcx>) -> LTy<'tcx> { if let Some(desc) = describe_rvalue(rv) { let ty = rv.ty(self, self.tcx()); if matches!(ty.kind(), TyKind::Ref(..) | TyKind::RawPtr(..)) { diff --git a/c2rust-analyze/src/dataflow/type_check.rs b/c2rust-analyze/src/dataflow/type_check.rs index b356460297..fa34eab58e 100644 --- a/c2rust-analyze/src/dataflow/type_check.rs +++ b/c2rust-analyze/src/dataflow/type_check.rs @@ -173,6 +173,8 @@ impl<'tcx> TypeChecker<'tcx, '_> { } RvalueDesc::AddrOfLocal { .. } => {} } + let derived_lty = self.acx.derived_type_of_rvalue(rv); + self.do_assign(rvalue_lty, derived_lty); return; } diff --git a/c2rust-analyze/src/main.rs b/c2rust-analyze/src/main.rs index dfbfae1483..06f17d4ea3 100644 --- a/c2rust-analyze/src/main.rs +++ b/c2rust-analyze/src/main.rs @@ -187,6 +187,10 @@ fn label_rvalue_tys<'tcx>(acx: &mut AnalysisCtxt<'_, 'tcx>, mir: &Body<'tcx>) { let _g = panic_detail::set_current_span(stmt.source_info.span); let lty = match rv { + Rvalue::Ref(..) | Rvalue::AddressOf(..) => { + let ty = rv.ty(acx, acx.tcx()); + acx.assign_pointer_ids(ty) + } Rvalue::Aggregate(ref kind, ref _ops) => match **kind { AggregateKind::Array(elem_ty) => { let elem_lty = acx.assign_pointer_ids(elem_ty); diff --git a/c2rust-analyze/tests/filecheck/addr_of.rs b/c2rust-analyze/tests/filecheck/addr_of.rs index b0437cbd10..c0c97c181f 100644 --- a/c2rust-analyze/tests/filecheck/addr_of.rs +++ b/c2rust-analyze/tests/filecheck/addr_of.rs @@ -27,6 +27,6 @@ fn shared_ref_with_struct() { // CHECK-LABEL: fn cast_array_to_ptr_explicit(s: &[u8; 1]) { pub fn cast_array_to_ptr_explicit(s: &[u8; 1]) { - // CHECK: &*(std::ptr::addr_of!(*s)) as *const u8; + // CHECK: &(*s) as *const u8; std::ptr::addr_of!(*s) as *const u8; } From c096a7f00f3e9eb77e22b94435511619e140920a Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 25 Sep 2023 15:25:20 -0700 Subject: [PATCH 08/13] analyze: add adjust_unsize test case --- c2rust-analyze/tests/filecheck.rs | 1 + .../tests/filecheck/adjust_unsize.rs | 28 +++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 c2rust-analyze/tests/filecheck/adjust_unsize.rs diff --git a/c2rust-analyze/tests/filecheck.rs b/c2rust-analyze/tests/filecheck.rs index 1d5a2ce118..04e91aa9ab 100644 --- a/c2rust-analyze/tests/filecheck.rs +++ b/c2rust-analyze/tests/filecheck.rs @@ -32,6 +32,7 @@ macro_rules! define_tests { define_tests! { addr_of, + adjust_unsize, aggregate1, algo_md5, alias1, diff --git a/c2rust-analyze/tests/filecheck/adjust_unsize.rs b/c2rust-analyze/tests/filecheck/adjust_unsize.rs new file mode 100644 index 0000000000..8aaeb92676 --- /dev/null +++ b/c2rust-analyze/tests/filecheck/adjust_unsize.rs @@ -0,0 +1,28 @@ + +// CHECK-LABEL: unsafe fn f +// CHECK-SAME: <'[[LT:[a-z0-9]+]]>(p: &'[[LT]] mut (u8)) { +unsafe fn f(p: *mut u8) { + *p = 1; +} + +// CHECK-LABEL: unsafe fn pass_arr_simple() +unsafe fn pass_arr_simple() { + let mut arr: [u8; 3] = [0; 3]; + // CHECK: f(&mut (&mut (arr) as &mut [u8])[0]); + f(arr.as_mut_ptr()); +} + +// CHECK-LABEL: unsafe fn pass_arr_explicit_ref() +unsafe fn pass_arr_explicit_ref() { + let mut arr: [u8; 3] = [0; 3]; + // CHECK: f(&mut (&mut *((&mut arr)) as &mut [u8])[0]); + f((&mut arr).as_mut_ptr()); +} + +// CHECK-LABEL: unsafe fn pass_arr_ufcs() +unsafe fn pass_arr_ufcs() { + let mut arr: [u8; 3] = [0; 3]; + // CHECK: f(&mut (&mut *(&mut arr) as &mut [u8])[0]); + f(<[_]>::as_mut_ptr(&mut arr)); +} + From 7c17a85526f61b8692ee83c738dc9680889840df Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 9 Oct 2023 15:22:46 -0700 Subject: [PATCH 09/13] analyze: rewrite: add comments explaining distribute/convert sort order --- c2rust-analyze/src/rewrite/expr/convert.rs | 10 ++++++++-- c2rust-analyze/src/rewrite/expr/distribute.rs | 7 ++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index e240c30f18..f1659c5f87 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -173,6 +173,8 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { } }; + // Apply rewrites on the expression itself. These will be the first rewrites in the sorted + // list produced by `distribute`. let expr_rws = take_prefix_while(&mut mir_rws, |x: &DistRewrite| { matches!(x.desc, MirOriginDesc::Expr) }); @@ -199,16 +201,20 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> { // Apply late rewrites. for mir_rw in mir_rws { + assert!(matches!( + mir_rw.desc, + MirOriginDesc::StoreIntoLocal | MirOriginDesc::LoadFromTemp + )); hir_rw = rewrite_from_mir_rws(&mir_rw.rw, hir_rw); } - // Emit rewrites on subexpressions last. + // Emit rewrites on subexpressions first, then emit the rewrite on the expression itself, + // if it's nontrivial. let applied_mir_rewrite = !matches!(hir_rw, Rewrite::Identity); self.with_materialize_adjustments(applied_mir_rewrite, |this| { intravisit::walk_expr(this, ex); }); - // Emit the rewrite, if it's nontrivial. if !matches!(hir_rw, Rewrite::Identity) { eprintln!( "rewrite {:?} at {:?} (materialize? {})", diff --git a/c2rust-analyze/src/rewrite/expr/distribute.rs b/c2rust-analyze/src/rewrite/expr/distribute.rs index 97e1997ebe..50dcb153f4 100644 --- a/c2rust-analyze/src/rewrite/expr/distribute.rs +++ b/c2rust-analyze/src/rewrite/expr/distribute.rs @@ -15,7 +15,7 @@ struct RewriteInfo { } /// This enum defines a sort order for [`RewriteInfo`], from innermost (applied earlier) to -/// outermost (applied later). +/// outermost (applied later). The results of `fn distribute` are sorted in this order. /// /// The order of variants follows the order of operations we typically see in generated MIR code. /// For a given HIR `Expr`, the MIR will usually evaluate the expression ([`Priority::Eval`]), @@ -65,6 +65,11 @@ impl From for DistRewrite { /// result in an error: this MIR assignment is a store to a temporary that was introduced during /// HIR-to-MIR lowering, so there is no corresponding HIR assignment where such a rewrite could be /// attached. +/// +/// The rewrites for each `HirId` are sorted in [`Priority`] order, matching the order in which the +/// expression and related parts are evaluated. For example, the [`Expr`][MirOriginDesc::Expr] +/// itself is evaluated first, and any [`Adjustment`][MirOriginDesc::Adjustment]s are applied +/// afterward. pub fn distribute( tcx: TyCtxt, unlower_map: BTreeMap, From 5aceaaffac2802e03b4e585015d967a6f91e5465 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 9 Oct 2023 15:31:07 -0700 Subject: [PATCH 10/13] analyze: rewrite::unlower: document the purpose of VisitExprState::temp_info --- c2rust-analyze/src/rewrite/expr/unlower.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/c2rust-analyze/src/rewrite/expr/unlower.rs b/c2rust-analyze/src/rewrite/expr/unlower.rs index fb0891663e..2b320012d4 100644 --- a/c2rust-analyze/src/rewrite/expr/unlower.rs +++ b/c2rust-analyze/src/rewrite/expr/unlower.rs @@ -456,6 +456,11 @@ struct VisitExprState<'a, 'tcx> { sub_loc: Vec, /// Buffer for temporaries handled by `peel_temp`. + /// + /// When `peel_temp` traverses past a temporary, it also needs to emit some `StoreIntoLocal` + /// and `LoadFromTemp` entries as a side effect. But it doesn't have access to the + /// `UnlowerVisitor` to emit these entries directly, so instead we buffer those entries for the + /// caller to emit later. temp_info: Vec<(PreciseLoc, MirOriginDesc)>, } From 60aa23eace333fcd1e9a9fc361fb2c534ffdea97 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 9 Oct 2023 15:42:06 -0700 Subject: [PATCH 11/13] analyze: rewrite::unlower: add doc comments for fn record + fn record_desc --- c2rust-analyze/src/rewrite/expr/unlower.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/c2rust-analyze/src/rewrite/expr/unlower.rs b/c2rust-analyze/src/rewrite/expr/unlower.rs index 2b320012d4..88c7f2ef32 100644 --- a/c2rust-analyze/src/rewrite/expr/unlower.rs +++ b/c2rust-analyze/src/rewrite/expr/unlower.rs @@ -63,10 +63,16 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { .either(|stmt| stmt.source_info.span, |term| term.source_info.span) } + /// Record an `unlower_map` entry indicating that MIR location `loc, sub_loc` corresponds to + /// the HIR expression `ex`. fn record(&mut self, loc: Location, sub_loc: &[SubLoc], ex: &hir::Expr) { self.record_desc(loc, sub_loc, ex, MirOriginDesc::Expr); } + /// Like [`record`][Self::record], but also takes a [`MirOriginDesc`] to indicate how the MIR + /// location `loc, sub_loc` relates to the HIR expression `desc`. For example, this can be + /// used to record that a particular piece of MIR loads/stores a temporary used in the + /// evaluation of `ex`. fn record_desc( &mut self, loc: Location, From df74c59a085a99591b613b3537db29361a7c5e9b Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 9 Oct 2023 16:45:44 -0700 Subject: [PATCH 12/13] analyze: rewrite::unlower: expand doc comment for peel_temp with an example --- c2rust-analyze/src/rewrite/expr/unlower.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/unlower.rs b/c2rust-analyze/src/rewrite/expr/unlower.rs index 88c7f2ef32..419d4adf2f 100644 --- a/c2rust-analyze/src/rewrite/expr/unlower.rs +++ b/c2rust-analyze/src/rewrite/expr/unlower.rs @@ -127,7 +127,7 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { } /// Special `record` variant for MIR [`Operand`]s. This sets the [`MirOriginDesc`] to - /// `LoadFromLocal` if `op` is a MIR temporary and otherwise sets it to `Expr`. + /// `LoadFromTemp` if `op` is a MIR temporary and otherwise sets it to `Expr`. /// /// [`Operand`]: mir::Operand fn record_operand( @@ -491,7 +491,23 @@ impl<'a, 'tcx> VisitExprState<'a, 'tcx> { /// If the current MIR is a temporary, and the previous `Location` is an assignment to /// that temporary, peel it off, leaving the temporary's initializer as the current - /// `Rvalue`. + /// `Rvalue`. This also adds `LoadFromTemp` and `StoreIntoLocal` entries in `self.temp_info` + /// for the temporary's use and definition. + /// + /// For example, starting from this position: + /// ```Rust,ignore + /// _temp = Add(_1, _2) + /// _3 = move _temp + /// ^^^^^ + /// ``` + /// A successful call to `peel_temp` will advance to this position: + /// ```Rust,ignore + /// _temp = Add(_1, _2) + /// ^^^^^^^^^^^ + /// _3 = move _temp + /// ``` + /// That is, it steps from a use of the temporary (an `Rvalue`, `Operand`, or `Place`) to the + /// `Rvalue` that was used to initialize that temporary. pub fn peel_temp(&mut self) -> Option<()> { // Run `peel_temp_inner`, and restore `self.cur` and `self.sub_loc` if it fails. let old_cur = self.cur; From 86ed8163e622eff4a0deda2d80dbb0d120c81d97 Mon Sep 17 00:00:00 2001 From: Stuart Pernsteiner Date: Mon, 9 Oct 2023 16:54:49 -0700 Subject: [PATCH 13/13] analyze: rewrite::unlower: rename VisitExprState -> VisitExprCursor --- c2rust-analyze/src/rewrite/expr/unlower.rs | 47 ++++++++++++---------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/c2rust-analyze/src/rewrite/expr/unlower.rs b/c2rust-analyze/src/rewrite/expr/unlower.rs index 419d4adf2f..7f0b05843e 100644 --- a/c2rust-analyze/src/rewrite/expr/unlower.rs +++ b/c2rust-analyze/src/rewrite/expr/unlower.rs @@ -317,7 +317,7 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { // have been accounted for, the final remaining `Rvalue` is the `Expr` itself. let adjusts = self.typeck_results.expr_adjustments(ex); - let mut s = VisitExprState::new( + let mut cursor = VisitExprCursor::new( self.mir, ExprMir::from(rv), extra_locs, @@ -326,58 +326,63 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { ); for (i, adjust) in adjusts.iter().enumerate().rev() { - while s.peel_temp().is_some() { + while cursor.peel_temp().is_some() { // No-op. Just loop until we've peeled all temporaries. } - self.record_desc(s.loc, &s.sub_loc, ex, MirOriginDesc::Adjustment(i)); + self.record_desc( + cursor.loc, + &cursor.sub_loc, + ex, + MirOriginDesc::Adjustment(i), + ); match adjust.kind { Adjust::Borrow(AutoBorrow::RawPtr(mutbl)) => { - if s.peel_address_of() != Some(mutbl) { + if cursor.peel_address_of() != Some(mutbl) { warn!( "expected Rvalue::AddressOf for {adjust:?} on expr {ex:?}, \ but got {:?}", - s.cur + cursor.cur ); break; } } Adjust::Borrow(AutoBorrow::Ref(_, AutoBorrowMutability::Not)) => { - if s.peel_ref() != Some(mir::Mutability::Not) { + if cursor.peel_ref() != Some(mir::Mutability::Not) { warn!( "expected Rvalue::Ref(Mutability::Not) for {adjust:?} \ on expr {ex:?}, but got {:?}", - s.cur + cursor.cur ); break; } } Adjust::Borrow(AutoBorrow::Ref(_, AutoBorrowMutability::Mut { .. })) => { - if s.peel_ref() != Some(mir::Mutability::Mut) { + if cursor.peel_ref() != Some(mir::Mutability::Mut) { warn!( "expected Rvalue::Ref(Mutability::Mut) for {adjust:?} \ on expr {ex:?}, but got {:?}", - s.cur + cursor.cur ); break; } } // Non-overloaded deref Adjust::Deref(None) => { - if s.peel_deref().is_none() { + if cursor.peel_deref().is_none() { warn!( "expected Rvalue::Deref for {adjust:?} on expr {ex:?}, \ but got {:?}", - s.cur + cursor.cur ); break; } } Adjust::Pointer(pc) => { - if s.peel_pointer_cast() != Some(pc) { + if cursor.peel_pointer_cast() != Some(pc) { warn!( "expected Rvalue::Cast(Pointer({pc:?})) for {adjust:?} \ on expr {ex:?}, but got {:?}", - s.cur + cursor.cur ); break; } @@ -389,16 +394,16 @@ impl<'a, 'tcx> UnlowerVisitor<'a, 'tcx> { } } - while s.peel_temp().is_some() { + while cursor.peel_temp().is_some() { // No-op. Just loop until we've peeled all temporaries. } // Record entries for temporaries that are buffered in `temp_info`. - for (precise_loc, desc) in s.temp_info { + for (precise_loc, desc) in cursor.temp_info { self.record_desc(precise_loc.loc, &precise_loc.sub, ex, desc); } - self.record_desc(s.loc, &s.sub_loc, ex, MirOriginDesc::Expr); + self.record_desc(cursor.loc, &cursor.sub_loc, ex, MirOriginDesc::Expr); } fn visit_expr_operand( @@ -451,9 +456,9 @@ impl<'a, 'tcx> From> for ExprMir<'a, 'tcx> { } } -/// Helper for `visit_expr_common`. This type has methods for traversing the MIR produced from a +/// Helper for `visit_expr_rvalue`. This type has methods for traversing the MIR produced from a /// `hir::Expr` and its adjustments. -struct VisitExprState<'a, 'tcx> { +struct VisitExprCursor<'a, 'tcx> { mir: &'a Body<'tcx>, cur: ExprMir<'a, 'tcx>, locs: &'a [Location], @@ -470,15 +475,15 @@ struct VisitExprState<'a, 'tcx> { temp_info: Vec<(PreciseLoc, MirOriginDesc)>, } -impl<'a, 'tcx> VisitExprState<'a, 'tcx> { +impl<'a, 'tcx> VisitExprCursor<'a, 'tcx> { pub fn new( mir: &'a Body<'tcx>, cur: ExprMir<'a, 'tcx>, extra_locs: &'a [Location], loc: Location, sub_loc: Vec, - ) -> VisitExprState<'a, 'tcx> { - VisitExprState { + ) -> VisitExprCursor<'a, 'tcx> { + VisitExprCursor { mir, cur, locs: extra_locs,