Skip to content

Commit

Permalink
analyze: allow pointee analysis to work interprocedurally (#1172)
Browse files Browse the repository at this point in the history
This branch modifies the pointee-type analysis to use a single global
`VarTable` for all functions, which allows pointee inference variables
produced in one function to be resolved in another. This is necessary
for lighttpd's `buffer` module, which allocates a `buffer` object in one
function (producing an inference variable for the `malloc`) and
initializes it in a different function (where the inference variable can
now be resolved).

The pointee analysis previously involved an "export" operation that ran
after processing each function to convert the analysis results to a form
that makes sense in other functions. This meant that the results
couldn't contain any inference variables or mention any local
`PointerId`s. Any entries in the results that violated these constraints
would be erased, and the result set would be marked "incomplete". With
this branch, both constraints are lifted, as both inference variables
and local `PointerId`s are representable in the global scope.
  • Loading branch information
spernsteiner authored Dec 3, 2024
2 parents ee4b3cc + 07a0cb7 commit a0850b1
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 90 deletions.
21 changes: 6 additions & 15 deletions c2rust-analyze/src/analyze.rs
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,8 @@ fn run(tcx: TyCtxt) {
// Infer pointee types
// ----------------------------------

let mut pointee_vars = pointee_type::VarTable::default();

for &ldid in &all_fn_ldids {
if gacx.fn_analysis_invalid(ldid.to_def_id()) {
continue;
Expand All @@ -591,7 +593,7 @@ fn run(tcx: TyCtxt) {
let acx = gacx.function_context_with_data(&mir, info.acx_data.take());

let r = panic_detail::catch_unwind(AssertUnwindSafe(|| {
pointee_type::generate_constraints(&acx, &mir)
pointee_type::generate_constraints(&acx, &mir, &mut pointee_vars)
}));

let local_pointee_types = LocalPointerTable::new(acx.local_ptr_base(), acx.num_pointers());
Expand Down Expand Up @@ -625,12 +627,6 @@ fn run(tcx: TyCtxt) {
assert!(loop_count <= 1000);
let old_global_pointee_types = global_pointee_types.clone();

// Clear the `incomplete` flags for all global pointers. See comment in
// `pointee_types::solve::solve_constraints`.
for (_, tys) in global_pointee_types.iter_mut() {
tys.incomplete = false;
}

for &ldid in &all_fn_ldids {
if gacx.fn_analysis_invalid(ldid.to_def_id()) {
continue;
Expand All @@ -640,7 +636,7 @@ fn run(tcx: TyCtxt) {

let pointee_constraints = info.pointee_constraints.get();
let pointee_types = global_pointee_types.and_mut(info.local_pointee_types.get_mut());
pointee_type::solve_constraints(pointee_constraints, pointee_types);
pointee_type::solve_constraints(pointee_constraints, &pointee_vars, pointee_types);
}

if global_pointee_types == old_global_pointee_types {
Expand Down Expand Up @@ -2274,15 +2270,10 @@ fn print_function_pointee_types<'tcx>(

for ptr in all_pointer_ids {
let tys = &pointee_types[ptr];
if tys.ltys.is_empty() && !tys.incomplete {
if tys.tys.is_empty() {
continue;
}
debug!(
" pointer {:?}: {:?}{}",
ptr,
tys.ltys,
if tys.incomplete { " (INCOMPLETE)" } else { "" }
);
debug!(" pointer {:?}: {:?}", ptr, tys.tys);
}
}
}
Expand Down
5 changes: 0 additions & 5 deletions c2rust-analyze/src/pointee_type/constraint_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ pub enum Constraint<'tcx> {
pub struct ConstraintSet<'tcx> {
pub constraints: Vec<Constraint<'tcx>>,
constraint_dedup: HashSet<Constraint<'tcx>>,
pub var_table: VarTable<'tcx>,
}

impl<'tcx> ConstraintSet<'tcx> {
Expand All @@ -53,10 +52,6 @@ impl<'tcx> ConstraintSet<'tcx> {
pub fn subset(&mut self, p: PointerId, q: PointerId) {
self.add(Constraint::Subset(p, q));
}

pub fn fresh_var(&mut self) -> CTy<'tcx> {
self.var_table.fresh()
}
}

/// A "constraint type", which is either an `LTy` or an inference variable.
Expand Down
5 changes: 3 additions & 2 deletions c2rust-analyze/src/pointee_type/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@ mod constraint_set;
mod solve;
mod type_check;

pub use self::constraint_set::{CTy, Constraint, ConstraintSet};
pub use self::constraint_set::{CTy, Constraint, ConstraintSet, VarTable};
pub use self::solve::{solve_constraints, PointeeTypes};

pub fn generate_constraints<'tcx>(
acx: &AnalysisCtxt<'_, 'tcx>,
mir: &Body<'tcx>,
vars: &mut VarTable<'tcx>,
) -> ConstraintSet<'tcx> {
type_check::visit(acx, mir)
type_check::visit(acx, mir, vars)
}

pub fn remap_pointers_global<'tcx>(
Expand Down
74 changes: 35 additions & 39 deletions c2rust-analyze/src/pointee_type/solve.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ fn index_both<'a, T>(
/// can contain local `CTy::Var`s and refer to local `PointerId`s.
pub fn propagate_types<'tcx>(
cset: &ConstraintSet<'tcx>,
vars: &VarTable<'tcx>,
mut ty_sets: PointerTableMut<HashSet<CTy<'tcx>>>,
) {
// Map from each `PointerId` to the `PointerId`s whose `ty_sets` should be supersets.
Expand Down Expand Up @@ -108,12 +109,12 @@ pub fn propagate_types<'tcx>(
// example.
for constraint in &cset.constraints {
if let Constraint::AllTypesCompatibleWith(ptr, cty) = *constraint {
unify_types(&cset.var_table, &ty_sets[ptr], Some(cty));
unify_types(vars, &ty_sets[ptr], Some(cty));
}
}

for (_, ctys) in ty_sets.iter() {
unify_types(&cset.var_table, ctys, None);
unify_types(vars, ctys, None);
}

#[cfg(debug_assertions)]
Expand All @@ -129,25 +130,42 @@ pub fn propagate_types<'tcx>(
#[derive(Clone, PartialEq, Eq, Debug, Default)]
pub struct PointeeTypes<'tcx> {
/// The possible pointee types for this pointer.
pub ltys: HashSet<LTy<'tcx>>,
/// If set, `ltys` is incomplete - the analysis identified pointee types that couldn't be
/// exported into global scope.
pub incomplete: bool,
pub tys: HashSet<CTy<'tcx>>,
}

impl<'tcx> PointeeTypes<'tcx> {
/// Get the sole `LTy` in this set, if there is exactly one.
pub fn get_sole_lty(&self) -> Option<LTy<'tcx>> {
if self.incomplete || self.ltys.len() != 1 {
if self.tys.len() != 1 {
return None;
}
let lty = *self.ltys.iter().next().unwrap();
Some(lty)
match self.tys.iter().copied().next()? {
CTy::Var(_) => None,
CTy::Ty(lty) => Some(lty),
}
}

pub fn merge(&mut self, other: PointeeTypes<'tcx>) {
self.ltys.extend(other.ltys);
self.incomplete |= other.incomplete;
self.tys.extend(other.tys);
}

pub fn simplify(&mut self, vars: &VarTable<'tcx>) {
let mut add = Vec::new();
let mut remove = Vec::new();
for &cty in &self.tys {
let rep = vars.cty_rep(cty);
if rep != cty {
remove.push(cty);
add.push(rep);
}
}

for cty in remove {
self.tys.remove(&cty);
}
for cty in add {
self.tys.insert(cty);
}
}
}

Expand All @@ -158,9 +176,7 @@ fn import<'tcx>(
) {
for (ptr, tys) in pointee_tys.iter() {
let ty_set = &mut ty_sets[ptr];
for &lty in &tys.ltys {
ty_set.insert(CTy::Ty(lty));
}
ty_set.extend(tys.tys.iter().copied());
}
}

Expand All @@ -170,41 +186,21 @@ fn export<'tcx>(
ty_sets: PointerTable<HashSet<CTy<'tcx>>>,
mut pointee_tys: PointerTableMut<PointeeTypes<'tcx>>,
) {
let local_ptr_range = pointee_tys.local().range();
for (ptr, ctys) in ty_sets.iter() {
let out = &mut pointee_tys[ptr];
for &cty in ctys {
if let CTy::Ty(lty) = var_table.cty_rep(cty) {
let mut ok = true;
lty.for_each_label(&mut |p| {
if local_ptr_range.contains(p) {
ok = false;
}
});
if ok {
out.ltys.insert(lty);
continue;
}
}
// If we failed to export this `CTy`, mark the `PointeeTypes` incomplete.
out.incomplete = true;
}
out.tys.extend(ctys.iter().copied());
out.simplify(var_table);
}
}

pub fn solve_constraints<'tcx>(
cset: &ConstraintSet<'tcx>,
vars: &VarTable<'tcx>,
mut pointee_tys: PointerTableMut<PointeeTypes<'tcx>>,
) {
// Clear the `incomplete` flags for all local pointers. If there are still non-exportable
// types for those pointers, the flag will be set again in `export()`.
for (_, tys) in pointee_tys.local_mut().iter_mut() {
tys.incomplete = false;
}

let mut ty_sets = OwnedPointerTable::with_len_of(&pointee_tys.borrow());
import(pointee_tys.borrow(), ty_sets.borrow_mut());
init_type_sets(cset, ty_sets.borrow_mut());
propagate_types(cset, ty_sets.borrow_mut());
export(&cset.var_table, ty_sets.borrow(), pointee_tys.borrow_mut());
propagate_types(cset, vars, ty_sets.borrow_mut());
export(vars, ty_sets.borrow(), pointee_tys.borrow_mut());
}
16 changes: 11 additions & 5 deletions c2rust-analyze/src/pointee_type/type_check.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::constraint_set::{CTy, ConstraintSet};
use super::constraint_set::{CTy, ConstraintSet, VarTable};
use crate::context::{AnalysisCtxt, LTy, PointerId};
use crate::panic_detail;
use crate::util::{describe_rvalue, ty_callee, Callee, RvalueDesc, UnknownDefCallee};
Expand All @@ -13,6 +13,7 @@ struct TypeChecker<'tcx, 'a> {
acx: &'a AnalysisCtxt<'a, 'tcx>,
mir: &'a Body<'tcx>,
constraints: ConstraintSet<'tcx>,
vars: &'a mut VarTable<'tcx>,
}

impl<'tcx> TypeChecker<'tcx, '_> {
Expand Down Expand Up @@ -304,7 +305,7 @@ impl<'tcx> TypeChecker<'tcx, '_> {
// about the concrete type of the data, but it does ensure that the pointee type of
// the argument operand matches the pointee type of other pointers to the same
// allocation, which lets us remove a `void*` cast during rewriting.
let var = self.constraints.fresh_var();
let var = self.vars.fresh();
assert_eq!(args.len(), 1);
let arg_lty = self.acx.type_of(&args[0]);
self.use_pointer_at_type(arg_lty.label, var);
Expand All @@ -317,7 +318,7 @@ impl<'tcx> TypeChecker<'tcx, '_> {
// variable and solve for it later.
//
// In the future, we might check the copy length as described for `malloc`.
let var = self.constraints.fresh_var();
let var = self.vars.fresh();
assert_eq!(args.len(), 3);
let dest_arg_lty = self.acx.type_of(&args[0]);
let src_arg_lty = self.acx.type_of(&args[1]);
Expand All @@ -329,7 +330,7 @@ impl<'tcx> TypeChecker<'tcx, '_> {
// We treat this much like `memcpy`, but with only a store, not a load.
//
// In the future, we might check the length as described for `malloc`.
let var = self.constraints.fresh_var();
let var = self.vars.fresh();
assert_eq!(args.len(), 3);
let dest_arg_lty = self.acx.type_of(&args[0]);
self.use_pointer_at_type(dest_lty.label, var);
Expand All @@ -346,11 +347,16 @@ impl<'tcx> TypeChecker<'tcx, '_> {
}
}

pub fn visit<'tcx>(acx: &AnalysisCtxt<'_, 'tcx>, mir: &Body<'tcx>) -> ConstraintSet<'tcx> {
pub fn visit<'tcx>(
acx: &AnalysisCtxt<'_, 'tcx>,
mir: &Body<'tcx>,
vars: &mut VarTable<'tcx>,
) -> ConstraintSet<'tcx> {
let mut tc = TypeChecker {
acx,
mir,
constraints: ConstraintSet::default(),
vars,
};

for (bb, bb_data) in mir.basic_blocks().iter_enumerated() {
Expand Down
24 changes: 0 additions & 24 deletions c2rust-analyze/src/pointer_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,14 +268,6 @@ impl<T> LocalPointerTable<T> {
ptr.index().wrapping_sub(self.base) < self.len() as u32
}

/// Helper for performing `contains` checks while `self` is mutably borrowed.
pub fn range(&self) -> PointerRange {
PointerRange {
base: self.base,
len: self.len() as u32,
}
}

pub fn iter(&self) -> impl Iterator<Item = (PointerId, &T)> {
let base = self.base;
self.table
Expand Down Expand Up @@ -411,22 +403,6 @@ impl<T> IndexMut<PointerId> for GlobalPointerTable<T> {
}
}

pub struct PointerRange {
base: u32,
len: u32,
}

impl PointerRange {
pub fn contains(&self, ptr: PointerId) -> bool {
// If `ptr.index() < self.base`, the subtraction will wrap to a large number in excess of
// `self.len()`.
//
// Note that `base + len` can't overflow `u32::MAX` due to checks in `LocalPointerTable`
// above.
ptr.index().wrapping_sub(self.base) < self.len
}
}

#[allow(dead_code)]
impl<'a, T> PointerTable<'a, T> {
pub fn new(
Expand Down

0 comments on commit a0850b1

Please sign in to comment.