Skip to content

Commit

Permalink
analyze: assign fresh PointerIds to Ref and AddressOf rvalues (#1028)
Browse files Browse the repository at this point in the history
Given code like this:
```Rust
let p: *mut S = ...;
let q: *mut i32 = &mut (*p).field;
```
Currently, we produce two `PointerId`s, one on `p` and one on `q`. The
rvalue `&mut (*p).field` has the same `PointerId` as `p`, on the
assumption that it must always have the same permissions as `p` itself.

In the pointee analysis, `p` and `&mut (*p).field` will usually have
different pointee sets (`{S}` and `{i32}` respectively). But we'd like
to use `PointerId`s to identify pointers in the pointee analysis, rather
than inventing a whole new labeling system. This requires that `p` and
`&mut (*p).field` have different `PointerId`s (though in the dataflow
analysis, we will add a constraint relating their permissions, so that
analysis will still produce identical results).

Actually adding `PointerId`s for the necessary rvalues is fairly trivial
(see b57fa64). The majority of this PR
consists of rewriter improvements to let it handle the new `PointerId`s.
In particular, it can now apply rewrites to MIR code arising from expr
adjustments. For example:

```Rust
let arr: [u8; 3] = [0; 3];
let p: *const u8 = arr.as_ptr();
```

This produces the following MIR:
```Rust
/*1*/   _1 = [const 0_u8; 3];
/*2*/   _4 = &_1;
/*3*/   _3 = move _4 as &[u8] (PointerCoercion(Unsize));
/*4*/   _2 = core::slice::<impl [u8]>::as_ptr(move _3) -> [return: bb1, unwind continue];
```

MIR line 1 is the initialization of `arr`. Line 2 applies a borrow
adjustment to `arr`, line 3 applies an unsizing adjustment, and line 4
actually calls `as_ptr`. The MIR code produced is as if the programmer
had written `(&arr as &[u8]).as_ptr()` rather than `arr.as_ptr()`.
Previously, if the `rewrite::expr::mir_op` module tried to introduce
rewrites on lines 2 or 3, it would result in an error; with this PR,
those rewrites are correctly lifted to HIR (after materializing the
adjustments so there is a place in the source code to apply those
rewrites).
  • Loading branch information
spernsteiner authored Oct 11, 2023
2 parents 0ba4903 + 86ed816 commit b8c4a4c
Show file tree
Hide file tree
Showing 10 changed files with 552 additions and 46 deletions.
20 changes: 20 additions & 0 deletions c2rust-analyze/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,26 @@ impl<'a, 'tcx> AnalysisCtxt<'a, 'tcx> {
return lty;
}

self.derived_type_of_rvalue(rv)
}

/// In some cases, we can compute an `LTy` for an `Rvalue` that uses `PointerId`s derived from
/// the `LTy`s of other value, rather than using the `PointerId`s assigned in `rvalue_tys`.
/// For example, suppose we have this code:
///
/// ```ignore
/// let p: & /*p0*/ MyStruct = ...;
/// let q: & /*p2*/ i32 = &(*p).field: & /*p1*/ i32;
/// ```
///
/// The type ascription on the rvalue `&(*p).field` represents the entry in `rvalue_tys`, which
/// uses `PointerId` `p1`. However, we can derive another type for this rvalue from the type
/// of `p`: since `p` has type `& /*p0*/ MyStruct`, the projection `&(*p).field` can be given
/// the type `& /*p0*/ i32`, using the same `PointerId` `p0`. Calling `type_of_rvalue` will
/// return the type assigned in `rvalue_tys`, which uses `p1`, whereas this method will return
/// the derived type using `p0`. Certain cases in `dataflow::type_check` will establish
/// constraints relating the two types.
pub fn derived_type_of_rvalue(&self, rv: &Rvalue<'tcx>) -> LTy<'tcx> {
if let Some(desc) = describe_rvalue(rv) {
let ty = rv.ty(self, self.tcx());
if matches!(ty.kind(), TyKind::Ref(..) | TyKind::RawPtr(..)) {
Expand Down
2 changes: 2 additions & 0 deletions c2rust-analyze/src/dataflow/type_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ impl<'tcx> TypeChecker<'tcx, '_> {
}
RvalueDesc::AddrOfLocal { .. } => {}
}
let derived_lty = self.acx.derived_type_of_rvalue(rv);
self.do_assign(rvalue_lty, derived_lty);
return;
}

Expand Down
4 changes: 4 additions & 0 deletions c2rust-analyze/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ fn label_rvalue_tys<'tcx>(acx: &mut AnalysisCtxt<'_, 'tcx>, mir: &Body<'tcx>) {
let _g = panic_detail::set_current_span(stmt.source_info.span);

let lty = match rv {
Rvalue::Ref(..) | Rvalue::AddressOf(..) => {
let ty = rv.ty(acx, acx.tcx());
acx.assign_pointer_ids(ty)
}
Rvalue::Aggregate(ref kind, ref _ops) => match **kind {
AggregateKind::Array(elem_ty) => {
let elem_lty = acx.assign_pointer_ids(elem_ty);
Expand Down
83 changes: 65 additions & 18 deletions c2rust-analyze/src/rewrite/expr/convert.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use crate::panic_detail;
use crate::rewrite::expr::distribute::DistRewrite;
use crate::rewrite::expr::mir_op;
use crate::rewrite::expr::unlower::MirOriginDesc;
use crate::rewrite::Rewrite;
use assert_matches::assert_matches;
use log::*;
Expand All @@ -17,7 +19,7 @@ use std::collections::HashMap;
struct ConvertVisitor<'tcx> {
tcx: TyCtxt<'tcx>,
typeck_results: &'tcx TypeckResults<'tcx>,
mir_rewrites: HashMap<HirId, Vec<mir_op::RewriteKind>>,
mir_rewrites: HashMap<HirId, Vec<DistRewrite>>,
rewrites: Vec<(Span, Rewrite)>,
/// When `true`, any `Expr` where rustc added an implicit adjustment will be rewritten to make
/// that adjustment explicit. Any node that emits a non-adjustment rewrite sets this flag when
Expand Down Expand Up @@ -115,11 +117,16 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> {
let callsite_span = ex.span.source_callsite();

let mir_rws = self.mir_rewrites.remove(&ex.hir_id).unwrap_or_default();
let mut mir_rws = &mir_rws as &[_];

let rewrite_from_mir_rws = |rw: &mir_op::RewriteKind, hir_rw: Rewrite| -> Rewrite {
// Cases that extract a subexpression are handled here; cases that only wrap the
// top-level expression (and thus can handle a non-`Identity` `hir_rw`) are handled by
// `convert_cast_rewrite`.
match rw {
mir_op::RewriteKind::OffsetSlice { mutbl } => {
// `p.offset(i)` -> `&p[i as usize ..]`
assert!(matches!(hir_rw, Rewrite::Identity));
let arr = self.get_subexpr(ex, 0);
let idx = Rewrite::Cast(Box::new(self.get_subexpr(ex, 1)), "usize".to_owned());
let elem = Rewrite::SliceTail(Box::new(arr), Box::new(idx));
Expand All @@ -134,11 +141,18 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> {

mir_op::RewriteKind::RawToRef { mutbl } => {
// &raw _ to &_ or &raw mut _ to &mut _
Rewrite::Ref(Box::new(self.get_subexpr(ex, 0)), mutbl_from_bool(*mutbl))
match hir_rw {
Rewrite::Identity => {
Rewrite::Ref(Box::new(self.get_subexpr(ex, 0)), mutbl_from_bool(*mutbl))
}
Rewrite::AddrOf(rw, mutbl) => Rewrite::Ref(rw, mutbl),
_ => panic!("unexpected hir_rw {hir_rw:?} for RawToRef"),
}
}

mir_op::RewriteKind::CellGet => {
// `*x` to `Cell::get(x)`
assert!(matches!(hir_rw, Rewrite::Identity));
Rewrite::MethodCall(
"get".to_string(),
Box::new(self.get_subexpr(ex, 0)),
Expand All @@ -148,32 +162,59 @@ impl<'tcx> Visitor<'tcx> for ConvertVisitor<'tcx> {

mir_op::RewriteKind::CellSet => {
// `*x` to `Cell::set(x)`
assert!(matches!(hir_rw, Rewrite::Identity));
let deref_lhs = assert_matches!(ex.kind, ExprKind::Assign(lhs, ..) => lhs);
let lhs = self.get_subexpr(deref_lhs, 0);
let rhs = self.get_subexpr(ex, 1);
Rewrite::MethodCall("set".to_string(), Box::new(lhs), vec![rhs])
}

_ => convert_cast_rewrite(rw, hir_rw),
}
};

// Apply rewrites on the expression itself. These will be the first rewrites in the sorted
// list produced by `distribute`.
let expr_rws = take_prefix_while(&mut mir_rws, |x: &DistRewrite| {
matches!(x.desc, MirOriginDesc::Expr)
});
for mir_rw in expr_rws {
hir_rw = rewrite_from_mir_rws(&mir_rw.rw, hir_rw);
}

// Materialize adjustments if requested by an ancestor or required locally.
let has_adjustment_rewrites = mir_rws
.iter()
.any(|x| matches!(x.desc, MirOriginDesc::Adjustment(_)));
if self.materialize_adjustments || has_adjustment_rewrites {
let adjusts = self.typeck_results.expr_adjustments(ex);
hir_rw = materialize_adjustments(self.tcx, adjusts, hir_rw, |i, mut hir_rw| {
let adj_rws =
take_prefix_while(&mut mir_rws, |x| x.desc == MirOriginDesc::Adjustment(i));
for mir_rw in adj_rws {
eprintln!("would apply {mir_rw:?} for adjustment #{i}, over {hir_rw:?}");
hir_rw = rewrite_from_mir_rws(&mir_rw.rw, hir_rw);
}
hir_rw
});
}

// Apply late rewrites.
for mir_rw in mir_rws {
hir_rw = rewrite_from_mir_rws(&mir_rw, hir_rw);
assert!(matches!(
mir_rw.desc,
MirOriginDesc::StoreIntoLocal | MirOriginDesc::LoadFromTemp
));
hir_rw = rewrite_from_mir_rws(&mir_rw.rw, hir_rw);
}

// Emit rewrites on subexpressions first.
// Emit rewrites on subexpressions first, then emit the rewrite on the expression itself,
// if it's nontrivial.
let applied_mir_rewrite = !matches!(hir_rw, Rewrite::Identity);
self.with_materialize_adjustments(applied_mir_rewrite, |this| {
intravisit::walk_expr(this, ex);
});

// Materialize adjustments if requested by an ancestor.
if self.materialize_adjustments {
let adjusts = self.typeck_results.expr_adjustments(ex);
hir_rw = materialize_adjustments(self.tcx, adjusts, hir_rw);
}

// Emit the rewrite, if it's nontrivial.
if !matches!(hir_rw, Rewrite::Identity) {
eprintln!(
"rewrite {:?} at {:?} (materialize? {})",
Expand Down Expand Up @@ -216,14 +257,16 @@ fn materialize_adjustments<'tcx>(
tcx: TyCtxt<'tcx>,
adjustments: &[Adjustment<'tcx>],
hir_rw: Rewrite,
mut callback: impl FnMut(usize, Rewrite) -> Rewrite,
) -> Rewrite {
let adj_kinds: Vec<&_> = adjustments.iter().map(|a| &a.kind).collect();
match (hir_rw, &adj_kinds[..]) {
(Rewrite::Identity, []) => Rewrite::Identity,
(Rewrite::Identity, _) => {
let mut hir_rw = Rewrite::Identity;
for adj in adjustments {
for (i, adj) in adjustments.iter().enumerate() {
hir_rw = apply_identity_adjustment(tcx, adj, hir_rw);
hir_rw = callback(i, hir_rw);
}
hir_rw
}
Expand All @@ -234,6 +277,13 @@ fn materialize_adjustments<'tcx>(
}
}

fn take_prefix_while<'a, T>(slice: &mut &'a [T], mut pred: impl FnMut(&'a T) -> bool) -> &'a [T] {
let i = slice.iter().position(|x| !pred(x)).unwrap_or(slice.len());
let (a, b) = slice.split_at(i);
*slice = b;
a
}

/// Convert a single `RewriteKind` representing a cast into a `Span`-based `Rewrite`. This panics
/// on rewrites that modify the original expression; only rewrites that wrap the expression in some
/// kind of cast or conversion are supported.
Expand Down Expand Up @@ -270,15 +320,12 @@ pub fn convert_cast_rewrite(kind: &mir_op::RewriteKind, hir_rw: Rewrite) -> Rewr

mir_op::RewriteKind::CellNew => {
// `x` to `Cell::new(x)`
Rewrite::Call("std::cell::Cell::new".to_string(), vec![Rewrite::Identity])
Rewrite::Call("std::cell::Cell::new".to_string(), vec![hir_rw])
}

mir_op::RewriteKind::CellFromMut => {
// `x` to `Cell::from_mut(x)`
Rewrite::Call(
"std::cell::Cell::from_mut".to_string(),
vec![Rewrite::Identity],
)
Rewrite::Call("std::cell::Cell::from_mut".to_string(), vec![hir_rw])
}
mir_op::RewriteKind::AsPtr => {
// `x` to `x.as_ptr()`
Expand All @@ -299,7 +346,7 @@ pub fn convert_cast_rewrite(kind: &mir_op::RewriteKind, hir_rw: Rewrite) -> Rewr
pub fn convert_rewrites(
tcx: TyCtxt,
hir_body_id: hir::BodyId,
mir_rewrites: HashMap<HirId, Vec<mir_op::RewriteKind>>,
mir_rewrites: HashMap<HirId, Vec<DistRewrite>>,
) -> Vec<(Span, Rewrite)> {
// Run the visitor.
let typeck_results = tcx.typeck_body(hir_body_id);
Expand Down
35 changes: 29 additions & 6 deletions c2rust-analyze/src/rewrite/expr/distribute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,37 @@ struct RewriteInfo {
}

/// This enum defines a sort order for [`RewriteInfo`], from innermost (applied earlier) to
/// outermost (applied later).
/// outermost (applied later). The results of `fn distribute` are sorted in this order.
///
/// The order of variants follows the order of operations we typically see in generated MIR code.
/// For a given HIR `Expr`, the MIR will usually evaluate the expression ([`Priority::Eval`]),
/// store the result into a temporary ([`Priority::_StoreResult`]; currently unused), and later
/// load the result back from the temporary ([`Priority::LoadResult`]) when computing the parent
/// `Expr`.
/// apply zero or more adjustments ([`Priority::Adjust(i)`][Priority::Adjust]), store the result
/// into a temporary ([`Priority::_StoreResult`]; currently unused), and later load the result back
/// from the temporary ([`Priority::LoadResult`]).
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
enum Priority {
Eval,
/// Apply the rewrite just after the adjustment at index `i`.
Adjust(usize),
_StoreResult,
LoadResult,
}

#[derive(Clone, Debug)]
pub struct DistRewrite {
pub rw: mir_op::RewriteKind,
pub desc: MirOriginDesc,
}

impl From<RewriteInfo> for DistRewrite {
fn from(x: RewriteInfo) -> DistRewrite {
DistRewrite {
rw: x.rw,
desc: x.desc,
}
}
}

/// Distributes MIR rewrites to HIR nodes. This takes a list of MIR rewrites (from `mir_op`) and a
/// map from MIR location to `HirId` (from `unlower`) and produces a map from `HirId` to a list of
/// MIR rewrites.
Expand All @@ -48,11 +65,16 @@ enum Priority {
/// result in an error: this MIR assignment is a store to a temporary that was introduced during
/// HIR-to-MIR lowering, so there is no corresponding HIR assignment where such a rewrite could be
/// attached.
///
/// The rewrites for each `HirId` are sorted in [`Priority`] order, matching the order in which the
/// expression and related parts are evaluated. For example, the [`Expr`][MirOriginDesc::Expr]
/// itself is evaluated first, and any [`Adjustment`][MirOriginDesc::Adjustment]s are applied
/// afterward.
pub fn distribute(
tcx: TyCtxt,
unlower_map: BTreeMap<PreciseLoc, MirOrigin>,
mir_rewrites: HashMap<Location, Vec<MirRewrite>>,
) -> HashMap<HirId, Vec<mir_op::RewriteKind>> {
) -> HashMap<HirId, Vec<DistRewrite>> {
let mut info_map = HashMap::<HirId, Vec<RewriteInfo>>::new();

for (loc, mir_rws) in mir_rewrites {
Expand All @@ -72,6 +94,7 @@ pub fn distribute(

let priority = match origin.desc {
MirOriginDesc::Expr => Priority::Eval,
MirOriginDesc::Adjustment(i) => Priority::Adjust(i),
MirOriginDesc::LoadFromTemp => Priority::LoadResult,
_ => {
panic!(
Expand Down Expand Up @@ -127,6 +150,6 @@ pub fn distribute(
// the `RewriteKind`s.
info_map
.into_iter()
.map(|(k, vs)| (k, vs.into_iter().map(|v| v.rw).collect()))
.map(|(k, vs)| (k, vs.into_iter().map(DistRewrite::from).collect()))
.collect()
}
Loading

0 comments on commit b8c4a4c

Please sign in to comment.