Skip to content

Commit

Permalink
analyze: implement Box<T> rewrites (#1106)
Browse files Browse the repository at this point in the history
This branch adds rewrites to convert pointers with the `FREE` permission
into `Box<T>`. This follows the dynamic ownership tracking proposal in
#1097.

Specific changes:
* Adds a new `dyn_owned: bool` field to `TypeDesc`, which indicates that
the pointer type should be wrapped in a dynamic ownership wrapper.
* Extends `ZeroizeType` handling to support zero-initializing some
pointer types (necessary to allow `malloc`/`calloc` of structs that
contain pointers).
* Adds support for `Box` and `dyn_owned` related casts in `mir_op` and
`rewrite::expr::convert`.
* Adds custom rewrite rules for `malloc`, `calloc`, `free`, and
`realloc`.
  • Loading branch information
spernsteiner authored Oct 24, 2024
2 parents 0d2c6c2 + 95d4038 commit 0872f48
Show file tree
Hide file tree
Showing 8 changed files with 786 additions and 127 deletions.
35 changes: 31 additions & 4 deletions c2rust-analyze/src/dataflow/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,18 @@ mod type_check;
#[derive(Clone, Debug)]
enum Constraint {
/// Pointer `.0` must have a subset of the permissions of pointer `.1`.
///
/// `Subset` and `SubsetExcept` have a special case involving `FREE` and `OFFSET` permissions.
/// The rewriter can't produce a cast that converts `Box<[T]>` to `Box<T>`; to avoid needing
/// such casts, we forbid assignment operations from discarding the `OFFSET` permission while
/// keeping `FREE`. We implement this restriction by adding an additional requirement to the
/// definition of `Subset(L, R)`: if `L` contains `FREE` and `R` contains `OFFSET`, then `L`
/// must also contain `OFFSET`. This is sufficient because all assignments and
/// pseudo-assignments generate `Subset` constraints.
///
/// If `L` does not contain `FREE`, then no additional requirement applies, even if `R` does
/// contain `OFFSET`. We allow discarding both `FREE` and `OFFSET` simultaneously during an
/// assignment.
Subset(PointerId, PointerId),
/// Pointer `.0` must have a subset of permissions of pointer `.1`, except
/// for the provided permission set.
Expand Down Expand Up @@ -102,10 +114,25 @@ impl DataflowConstraints {
| PermissionSet::OFFSET_SUB
| PermissionSet::FREE;

(
old_a & !(!old_b & (PROPAGATE_DOWN & !except)),
old_b | (old_a & (PROPAGATE_UP & !except)),
)
let remove_a = !old_b & PROPAGATE_DOWN & !except;
let add_b = old_a & PROPAGATE_UP & !except;

// Special case: as documented on `Constraint::Subset`, if the subset has `FREE`,
// we propagate `OFFSET` in the opposite direction. Specifically, if the superset
// has `OFFSET`, we add it to the subset, propagating "down". (Propagating "up"
// here could allow `OFFSET` and `!OFFSET` to propagated up into the same
// `PointerId` through two different constraints, creating a conflict.)
let add_a = if old_a.contains(PermissionSet::FREE) {
#[allow(bad_style)]
let PROPAGATE_DOWN_WHEN_FREE =
PermissionSet::OFFSET_ADD | PermissionSet::OFFSET_SUB;
old_b & PROPAGATE_DOWN_WHEN_FREE & !except
} else {
PermissionSet::empty()
};
debug_assert_eq!(add_a & remove_a, PermissionSet::empty());

((old_a | add_a) & !remove_a, old_b | add_b)
}

fn all_perms(
Expand Down
11 changes: 9 additions & 2 deletions c2rust-analyze/src/pointee_type/type_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,8 +299,15 @@ impl<'tcx> TypeChecker<'tcx, '_> {
self.assign(dest_lty.label, arg_lty.label);
}
Callee::Free => {
// No constraints on `free`, since it doesn't reveal anything about the concrete
// type.
// Here we create a fresh inference variable and associate it with the argument
// pointer. This doesn't constraint the type, since `free` doesn't reveal anything
// about the concrete type of the data, but it does ensure that the pointee type of
// the argument operand matches the pointee type of other pointers to the same
// allocation, which lets us remove a `void*` cast during rewriting.
let var = self.constraints.fresh_var();
assert_eq!(args.len(), 1);
let arg_lty = self.acx.type_of(&args[0]);
self.use_pointer_at_type(arg_lty.label, var);
}

Callee::Memcpy => {
Expand Down
171 changes: 166 additions & 5 deletions c2rust-analyze/src/rewrite/expr/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,119 @@ impl<'tcx> ConvertVisitor<'tcx> {
)
}

mir_op::RewriteKind::MallocSafe {
ref zero_ty,
elem_size,
single,
}
| mir_op::RewriteKind::CallocSafe {
ref zero_ty,
elem_size,
single,
} => {
// `malloc(n)` -> `Box::new(z)` or similar
assert!(matches!(hir_rw, Rewrite::Identity));
let zeroize_expr = generate_zeroize_expr(zero_ty);
let mut stmts = match *rw {
mir_op::RewriteKind::MallocSafe { .. } => vec![
Rewrite::Let(vec![("byte_len".into(), self.get_subexpr(ex, 0))]),
Rewrite::Let1(
"n".into(),
Box::new(format_rewrite!("byte_len as usize / {elem_size}")),
),
],
mir_op::RewriteKind::CallocSafe { .. } => vec![
Rewrite::Let(vec![
("count".into(), self.get_subexpr(ex, 0)),
("size".into(), self.get_subexpr(ex, 1)),
]),
format_rewrite!("assert_eq!(size, {elem_size})"),
Rewrite::Let1("n".into(), Box::new(format_rewrite!("count as usize"))),
],
_ => unreachable!(),
};
let expr = if single {
stmts.push(Rewrite::Text("assert_eq!(n, 1)".into()));
format_rewrite!("Box::new({})", zeroize_expr)
} else {
stmts.push(Rewrite::Let1(
"mut v".into(),
Box::new(Rewrite::Text("Vec::with_capacity(n)".into())),
));
stmts.push(format_rewrite!(
"for i in 0..n {{\n v.push({});\n}}",
zeroize_expr,
));
Rewrite::Text("v.into_boxed_slice()".into())
};
Rewrite::Block(stmts, Some(Box::new(expr)))
}

mir_op::RewriteKind::FreeSafe { single: _ } => {
// `free(p)` -> `drop(p)`
assert!(matches!(hir_rw, Rewrite::Identity));
Rewrite::Call("std::mem::drop".to_string(), vec![self.get_subexpr(ex, 0)])
}

mir_op::RewriteKind::ReallocSafe {
ref zero_ty,
elem_size,
src_single,
dest_single,
} => {
// `realloc(p, n)` -> `Box::new(...)`
assert!(matches!(hir_rw, Rewrite::Identity));
let zeroize_expr = generate_zeroize_expr(zero_ty);
let mut stmts = vec![
Rewrite::Let(vec![
("src_ptr".into(), self.get_subexpr(ex, 0)),
("dest_byte_len".into(), self.get_subexpr(ex, 1)),
]),
Rewrite::Let1(
"dest_n".into(),
Box::new(format_rewrite!("dest_byte_len as usize / {elem_size}")),
),
];
if dest_single {
stmts.push(Rewrite::Text("assert_eq!(dest_n, 1)".into()));
}
let expr = match (src_single, dest_single) {
(false, false) => {
stmts.push(Rewrite::Let1(
"mut dest_ptr".into(),
Box::new(Rewrite::Text("Vec::from(src_ptr)".into())),
));
stmts.push(format_rewrite!(
"dest_ptr.resize_with(dest_n, || {})",
zeroize_expr,
));
Rewrite::Text("dest_ptr.into_boxed_slice()".into())
}
(false, true) => {
format_rewrite!(
"src_ptr.into_iter().next().unwrap_or_else(|| {})",
zeroize_expr
)
}
(true, false) => {
stmts.push(Rewrite::Let1(
"mut dest_ptr".into(),
Box::new(Rewrite::Text("Vec::with_capacity(dest_n)".into())),
));
stmts.push(Rewrite::Text(
"if dest_n >= 1 { dest_ptr.push(*src_ptr); }".into(),
));
stmts.push(format_rewrite!(
"dest_ptr.resize_with(dest_n, || {})",
zeroize_expr,
));
Rewrite::Text("dest_ptr.into_boxed_slice()".into())
}
(true, true) => Rewrite::Text("src_ptr".into()),
};
Rewrite::Block(stmts, Some(Box::new(expr)))
}

mir_op::RewriteKind::CellGet => {
// `*x` to `Cell::get(x)`
assert!(matches!(hir_rw, Rewrite::Identity));
Expand Down Expand Up @@ -566,7 +679,7 @@ fn generate_zeroize_code(zero_ty: &ZeroizeType, lv: &str) -> String {
match *zero_ty {
ZeroizeType::Int => format!("{lv} = 0"),
ZeroizeType::Bool => format!("{lv} = false"),
ZeroizeType::Iterable(ref elem_zero_ty) => format!(
ZeroizeType::Array(ref elem_zero_ty) => format!(
"
{{
for elem in {lv}.iter_mut() {{
Expand All @@ -576,7 +689,7 @@ fn generate_zeroize_code(zero_ty: &ZeroizeType, lv: &str) -> String {
",
generate_zeroize_code(elem_zero_ty, "(*elem)")
),
ZeroizeType::Struct(ref fields) => {
ZeroizeType::Struct(_, ref fields) => {
eprintln!("zeroize: {} fields on {lv}: {fields:?}", fields.len());
let mut s = String::new();
writeln!(s, "{{").unwrap();
Expand All @@ -594,6 +707,27 @@ fn generate_zeroize_code(zero_ty: &ZeroizeType, lv: &str) -> String {
}
}

/// Generate an expression to produce a zeroized version of a value.
fn generate_zeroize_expr(zero_ty: &ZeroizeType) -> String {
match *zero_ty {
ZeroizeType::Int => format!("0"),
ZeroizeType::Bool => format!("false"),
ZeroizeType::Array(ref elem_zero_ty) => format!(
"std::array::from_fn(|| {})",
generate_zeroize_expr(elem_zero_ty)
),
ZeroizeType::Struct(ref name, ref fields) => {
let mut s = String::new();
write!(s, "{} {{\n", name).unwrap();
for (name, field_zero_ty) in fields {
write!(s, "{}: {},\n", name, generate_zeroize_expr(field_zero_ty),).unwrap();
}
write!(s, "}}\n").unwrap();
s
}
}
}

fn take_prefix_while<'a, T>(slice: &mut &'a [T], mut pred: impl FnMut(&'a T) -> bool) -> &'a [T] {
let i = slice.iter().position(|x| !pred(x)).unwrap_or(slice.len());
let (a, b) = slice.split_at(i);
Expand All @@ -614,14 +748,14 @@ pub fn convert_cast_rewrite(kind: &mir_op::RewriteKind, hir_rw: Rewrite) -> Rewr
Rewrite::Ref(Box::new(elem), mutbl_from_bool(mutbl))
}

mir_op::RewriteKind::MutToImm => {
// `p` -> `&*p`
mir_op::RewriteKind::Reborrow { mutbl } => {
// `p` -> `&*p` / `&mut *p`
let hir_rw = match fold_mut_to_imm(hir_rw) {
Ok(folded_rw) => return folded_rw,
Err(rw) => rw,
};
let place = Rewrite::Deref(Box::new(hir_rw));
Rewrite::Ref(Box::new(place), hir::Mutability::Not)
Rewrite::Ref(Box::new(place), mutbl_from_bool(mutbl))
}

mir_op::RewriteKind::OptionUnwrap => {
Expand Down Expand Up @@ -661,6 +795,33 @@ pub fn convert_cast_rewrite(kind: &mir_op::RewriteKind, hir_rw: Rewrite) -> Rewr
Rewrite::MethodCall(ref_method, Box::new(hir_rw), vec![])
}

mir_op::RewriteKind::DynOwnedUnwrap => {
Rewrite::MethodCall("unwrap".to_string(), Box::new(hir_rw), vec![])
}
mir_op::RewriteKind::DynOwnedTake => {
// `p` -> `mem::replace(&mut p, Err(()))`
Rewrite::Call(
"std::mem::replace".to_string(),
vec![
Rewrite::Ref(Box::new(hir_rw), hir::Mutability::Mut),
Rewrite::Text("Err(())".into()),
],
)
}
mir_op::RewriteKind::DynOwnedWrap => {
Rewrite::Call("std::result::Result::<_, ()>::Ok".to_string(), vec![hir_rw])
}

mir_op::RewriteKind::DynOwnedDowngrade { mutbl } => {
let ref_method = if mutbl {
"as_deref_mut".into()
} else {
"as_deref".into()
};
let hir_rw = Rewrite::MethodCall(ref_method, Box::new(hir_rw), vec![]);
Rewrite::MethodCall("unwrap".into(), Box::new(hir_rw), vec![])
}

mir_op::RewriteKind::CastRefToRaw { mutbl } => {
// `addr_of!(*p)` is cleaner than `p as *const _`; we don't know the pointee
// type here, so we can't emit `p as *const T`.
Expand Down
Loading

0 comments on commit 0872f48

Please sign in to comment.