From a7aac0d9f856ca85724fd863410ad1e3d697230a Mon Sep 17 00:00:00 2001 From: Andrei Homescu Date: Tue, 23 Jul 2024 23:24:09 -0700 Subject: [PATCH] Keep track of unique projection combinations in Project Add an index value to Project events to differentiate between different projections with the same pointer offset, e.g., (*p).x and (*p).x.a if a is the first field of the structure. This is implemented as an IndexSet> where each element is a unique combination of field projections. The Project index points to an element in this set. --- Cargo.lock | 1 + analysis/runtime/Cargo.toml | 1 + analysis/runtime/src/events.rs | 8 ++- analysis/runtime/src/handlers.rs | 4 +- analysis/runtime/src/metadata.rs | 6 +- dynamic_instrumentation/src/instrument.rs | 23 +++++- dynamic_instrumentation/src/into_operand.rs | 14 ++++ dynamic_instrumentation/src/point/mod.rs | 7 ++ pdg/src/builder.rs | 2 +- pdg/src/graph.rs | 4 +- pdg/src/info.rs | 80 ++++++++++----------- 11 files changed, 100 insertions(+), 50 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2a8d9e267f..4f5a93ec53 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -198,6 +198,7 @@ dependencies = [ "crossbeam-utils", "enum_dispatch", "fs-err", + "indexmap", "once_cell", "serde", ] diff --git a/analysis/runtime/Cargo.toml b/analysis/runtime/Cargo.toml index 7f2b2bc538..14436d99c4 100644 --- a/analysis/runtime/Cargo.toml +++ b/analysis/runtime/Cargo.toml @@ -19,3 +19,4 @@ enum_dispatch = "0.3" fs-err = "2" crossbeam-queue = "0.3" crossbeam-utils = "0.8" +indexmap = { version = "1.9", features = ["serde"] } diff --git a/analysis/runtime/src/events.rs b/analysis/runtime/src/events.rs index 1dea82d532..e0932177db 100644 --- a/analysis/runtime/src/events.rs +++ b/analysis/runtime/src/events.rs @@ -31,7 +31,11 @@ pub enum EventKind { CopyRef, /// Projection. Used for operations like `_2 = &(*_1).0`. - Project(Pointer, Pointer), + /// The third value is a "projection index" that points to an element + /// of the projections data structure in the metadata. It is used to + /// disambiguate between different projections with the same pointer, + /// e.g., `(*p).x` and `(*p).x.a` where `a` is at offset 0. + Project(Pointer, Pointer, usize), Alloc { size: usize, @@ -88,7 +92,7 @@ impl Debug for EventKind { use EventKind::*; match *self { CopyPtr(ptr) => write!(f, "copy(0x{:x})", ptr), - Project(ptr, new_ptr) => write!(f, "project(0x{:x}, 0x{:x})", ptr, new_ptr), + Project(ptr, new_ptr, idx) => write!(f, "project(0x{:x}, 0x{:x}, [{}])", ptr, new_ptr, idx), Alloc { size, ptr } => { write!(f, "malloc({}) -> 0x{:x}", size, ptr) } diff --git a/analysis/runtime/src/handlers.rs b/analysis/runtime/src/handlers.rs index 1b121656d2..8876db38b1 100644 --- a/analysis/runtime/src/handlers.rs +++ b/analysis/runtime/src/handlers.rs @@ -110,10 +110,10 @@ pub const HOOK_FUNCTIONS: &[&str] = &[ hook_fn!(offset), ]; -pub fn ptr_project(mir_loc: MirLocId, ptr: usize, new_ptr: usize) { +pub fn ptr_project(mir_loc: MirLocId, ptr: usize, new_ptr: usize, proj_idx: usize) { RUNTIME.send_event(Event { mir_loc, - kind: EventKind::Project(ptr, new_ptr), + kind: EventKind::Project(ptr, new_ptr, proj_idx), }); } diff --git a/analysis/runtime/src/metadata.rs b/analysis/runtime/src/metadata.rs index 0856bf8d6e..d349ece63f 100644 --- a/analysis/runtime/src/metadata.rs +++ b/analysis/runtime/src/metadata.rs @@ -1,3 +1,4 @@ +use indexmap::IndexSet; use std::{ collections::HashMap, fmt::{self, Debug, Formatter}, @@ -13,6 +14,7 @@ use crate::mir_loc::{Func, FuncId, MirLoc, MirLocId}; pub struct Metadata { pub locs: Vec, pub functions: HashMap, + pub projections: IndexSet>, } impl Metadata { @@ -46,11 +48,13 @@ impl FromIterator for Metadata { fn from_iter>(iter: I) -> Self { let mut locs = Vec::new(); let mut functions = HashMap::new(); + let mut projections = IndexSet::new(); for metadata in iter { locs.extend(metadata.locs); functions.extend(metadata.functions); + projections.extend(metadata.projections); } - Self { locs, functions } + Self { locs, functions, projections } } } diff --git a/dynamic_instrumentation/src/instrument.rs b/dynamic_instrumentation/src/instrument.rs index bc5280dad5..d03ce496fa 100644 --- a/dynamic_instrumentation/src/instrument.rs +++ b/dynamic_instrumentation/src/instrument.rs @@ -32,12 +32,14 @@ use crate::point::{cast_ptr_to_usize, InstrumentationPriority}; use crate::point::{ CollectAddressTakenLocals, CollectInstrumentationPoints, RewriteAddressTakenLocals, }; +use crate::point::ProjectionSet; use crate::util::Convert; #[derive(Default)] pub struct Instrumenter { mir_locs: Mutex>, functions: Mutex>, + projections: Mutex>>, } impl Instrumenter { @@ -72,9 +74,10 @@ impl Instrumenter { pub fn finalize(&self, metadata_path: &Path) -> anyhow::Result<()> { let mut locs = self.mir_locs.lock().unwrap(); let mut functions = self.functions.lock().unwrap(); + let projections = std::mem::take(&mut *self.projections.lock().unwrap()); let locs = locs.drain(..).collect::>(); let functions = functions.drain().collect::>(); - let metadata = Metadata { locs, functions }; + let metadata = Metadata { locs, functions, projections }; let bytes = bincode::serialize(&metadata).context("Location serialization failed")?; let mut file = OpenOptions::new() .append(true) @@ -115,6 +118,12 @@ impl Instrumenter { } } +impl ProjectionSet for Instrumenter { + fn add_proj(&self, proj: Vec) -> usize { + self.projections.lock().unwrap().insert_full(proj).0 + } +} + fn is_shared_or_unsafe_ptr(ty: Ty) -> bool { ty.is_unsafe_ptr() || (ty.is_region_ptr() && !ty.is_mutable_ptr()) } @@ -374,7 +383,14 @@ impl<'tcx> Visitor<'tcx> for CollectInstrumentationPoints<'_, 'tcx> { while let Some((inner_deref_base, PlaceElem::Deref)) = proj_iter.next() { // Find the next Deref or end of projections + // Meanwhile, collect all `Field`s into a vector that we + // can add to the `projections` IndexSet + let mut fields = vec![]; let (outer_deref_base, have_outer_deref) = loop { + if let Some((_, PlaceElem::Field(idx, _))) = proj_iter.peek() { + fields.push(idx.index()); + } + match proj_iter.peek() { Some((base, PlaceElem::Deref)) => break (base.clone(), true), // Reached the end, we can use the full place @@ -387,6 +403,8 @@ impl<'tcx> Visitor<'tcx> for CollectInstrumentationPoints<'_, 'tcx> { // We have some other elements between the two projections if outer_deref_base.projection.len() - inner_deref_base.projection.len() > 1 { + let proj_idx = self.projections.add_proj(fields); + // There are non-deref projection elements between the two derefs. // Add a Project event between the start pointer of those field/index // projections and their final address. @@ -400,6 +418,7 @@ impl<'tcx> Visitor<'tcx> for CollectInstrumentationPoints<'_, 'tcx> { self.loc(location, location, project_fn) .arg_var(inner_deref_base) .arg_addr_of(outer_deref_base.clone()) + .arg_var(proj_idx) .add_to(self); } @@ -749,7 +768,7 @@ fn instrument_body<'a, 'tcx>( // collect instrumentation points let points = { - let mut collector = CollectInstrumentationPoints::new(tcx, hooks, body, local_to_address); + let mut collector = CollectInstrumentationPoints::new(tcx, hooks, body, local_to_address, state); collector.visit_body(body); collector.into_instrumentation_points() }; diff --git a/dynamic_instrumentation/src/into_operand.rs b/dynamic_instrumentation/src/into_operand.rs index 49c9527f5e..1ddfe92172 100644 --- a/dynamic_instrumentation/src/into_operand.rs +++ b/dynamic_instrumentation/src/into_operand.rs @@ -31,6 +31,20 @@ impl<'tcx> IntoOperand<'tcx> for u32 { } } +impl<'tcx> IntoOperand<'tcx> for usize { + fn op(self, tcx: TyCtxt<'tcx>) -> Operand<'tcx> { + Operand::Constant(Box::new(Constant { + span: DUMMY_SP, + user_ty: None, + literal: ConstantKind::Ty(ty::Const::from_bits( + tcx, + self.try_into().unwrap(), + ParamEnv::empty().and(tcx.types.usize), + )), + })) + } +} + impl<'tcx> IntoOperand<'tcx> for Local { fn op(self, tcx: TyCtxt<'tcx>) -> Operand<'tcx> { Place::from(self).op(tcx) diff --git a/dynamic_instrumentation/src/point/mod.rs b/dynamic_instrumentation/src/point/mod.rs index 1b3a069c3f..f15398ab09 100644 --- a/dynamic_instrumentation/src/point/mod.rs +++ b/dynamic_instrumentation/src/point/mod.rs @@ -112,6 +112,10 @@ impl<'tcx> RewriteAddressTakenLocals<'tcx> { } } +pub trait ProjectionSet { + fn add_proj(&self, proj: Vec) -> usize; +} + pub struct CollectInstrumentationPoints<'a, 'tcx: 'a> { tcx: TyCtxt<'tcx>, hooks: Hooks<'tcx>, @@ -119,6 +123,7 @@ pub struct CollectInstrumentationPoints<'a, 'tcx: 'a> { pub instrumentation_points: Vec>, assignment: Option<(Place<'tcx>, Rvalue<'tcx>)>, pub addr_taken_local_addresses: IndexMap, + pub projections: &'a dyn ProjectionSet, } impl<'a, 'tcx: 'a> CollectInstrumentationPoints<'a, 'tcx> { @@ -127,6 +132,7 @@ impl<'a, 'tcx: 'a> CollectInstrumentationPoints<'a, 'tcx> { hooks: Hooks<'tcx>, body: &'a Body<'tcx>, addr_taken_local_addresses: IndexMap, + projections: &'a dyn ProjectionSet, ) -> Self { Self { tcx, @@ -135,6 +141,7 @@ impl<'a, 'tcx: 'a> CollectInstrumentationPoints<'a, 'tcx> { instrumentation_points: Default::default(), assignment: Default::default(), addr_taken_local_addresses, + projections, } } diff --git a/pdg/src/builder.rs b/pdg/src/builder.rs index d14ca1334e..c290b46f87 100644 --- a/pdg/src/builder.rs +++ b/pdg/src/builder.rs @@ -70,7 +70,7 @@ impl EventKindExt for EventKind { Realloc { .. } => NodeKind::Alloc(1), Free { .. } => NodeKind::Free, CopyPtr(..) | CopyRef => NodeKind::Copy, - Project(base_ptr, new_ptr) => NodeKind::Project(new_ptr - base_ptr), + Project(base_ptr, new_ptr, idx) => NodeKind::Project(new_ptr - base_ptr, idx), LoadAddr(..) => NodeKind::LoadAddr, StoreAddr(..) => NodeKind::StoreAddr, StoreAddrTaken(..) => NodeKind::StoreAddr, diff --git a/pdg/src/graph.rs b/pdg/src/graph.rs index 01b9fa3ff6..9269f3d878 100644 --- a/pdg/src/graph.rs +++ b/pdg/src/graph.rs @@ -25,7 +25,7 @@ pub enum NodeKind { /// [`Field`] projection. /// /// Used for operations like `_2 = &(*_1).0`. - Project(usize), + Project(usize, usize), /// Pointer arithmetic. /// @@ -119,7 +119,7 @@ impl Display for NodeKind { use NodeKind::*; match self { Copy => write!(f, "copy"), - Project(offset) => write!(f, "project.{offset}"), + Project(offset, idx) => write!(f, "project.{offset}[{idx}]"), Offset(offset) => write!(f, "offset[{offset}]"), AddrOfLocal(local) => write!(f, "&{local:?}"), _AddrOfStatic(static_) => write!(f, "&'static {static_:?}"), diff --git a/pdg/src/info.rs b/pdg/src/info.rs index 1a247acfba..0538b7604d 100644 --- a/pdg/src/info.rs +++ b/pdg/src/info.rs @@ -113,7 +113,7 @@ fn collect_children(g: &Graph) -> HashMap)>> { .rev() .filter_map(|(child, child_node)| Some((child_node.source?, child, child_node))) { - if let NodeKind::Project(f) = child_node.kind { + if let NodeKind::Project(_, f) = child_node.kind { let my_children = children .remove(&child) @@ -256,8 +256,8 @@ mod test { mk_node(g, NodeKind::StoreAddr, Some(source)) } - fn mk_project(g: &mut Graph, source: NodeId, field: impl Into) -> NodeId { - mk_node(g, NodeKind::Project(field.into()), Some(source)) + fn mk_project(g: &mut Graph, source: NodeId, field: impl Into, idx: usize) -> NodeId { + mk_node(g, NodeKind::Project(field.into(), idx), Some(source)) } fn mk_offset(g: &mut Graph, source: NodeId, i: isize) -> NodeId { @@ -486,10 +486,10 @@ mod test { // let mut a = Point { x: 0, y: 0 }; let a = mk_addr_of_local(&mut g, 0_u32); // let b = &mut a.x; - let b11 = mk_project(&mut g, a, 0_usize); + let b11 = mk_project(&mut g, a, 0_usize, 0); let b1 = mk_copy(&mut g, b11); // let c = &mut a.y; - let c11 = mk_project(&mut g, a, 1_usize); + let c11 = mk_project(&mut g, a, 1_usize, 1); let c1 = mk_copy(&mut g, c11); // *b = 1; let b2 = mk_store_addr(&mut g, b1); @@ -536,17 +536,17 @@ mod test { // let j = &mut a; let j = mk_copy(&mut g, a); // let b = &mut j.x; - let b11 = mk_project(&mut g, j, 0_usize); + let b11 = mk_project(&mut g, j, 0_usize, 0); let b1 = mk_copy(&mut g, b11); // let c = &mut j.x; - let c11 = mk_project(&mut g, j, 0_usize); + let c11 = mk_project(&mut g, j, 0_usize, 0); let c1 = mk_copy(&mut g, c11); // *b = 1; let b2 = mk_store_addr(&mut g, b1); // *c = 2; let c2 = mk_store_addr(&mut g, c1); // *(a.y) = 3; - let d1 = mk_project(&mut g, a, 1_usize); + let d1 = mk_project(&mut g, a, 1_usize, 1); let d2 = mk_store_addr(&mut g, d1); let pdg = build_pdg(g); @@ -585,7 +585,7 @@ mod test { // let b = &mut a; let b1 = mk_copy(&mut g, a); // let c = &mut a.y; - let c11 = mk_project(&mut g, a, 1_usize); + let c11 = mk_project(&mut g, a, 1_usize, 0); let c1 = mk_copy(&mut g, c11); // *c = 2; let c2 = mk_store_addr(&mut g, c1); @@ -631,10 +631,10 @@ mod test { // let b = &mut a; let b1 = mk_copy(&mut g, a); // let c = &mut b.y; - let c1 = mk_project(&mut g, a, 1_usize); + let c1 = mk_project(&mut g, a, 1_usize, 0); let c2 = mk_copy(&mut g, c1); // let bb = &mut b.y; - let bb = mk_project(&mut g, b1, 1_usize); + let bb = mk_project(&mut g, b1, 1_usize, 0); let bb1 = mk_copy(&mut g, bb); // *c = 2; let c3 = mk_store_addr(&mut g, c2); @@ -676,14 +676,14 @@ mod test { // let mut a = ColorPoint { x: 0, y: 0, z: Color { r: 100, g: 100, b: 100 } }; let a = mk_addr_of_local(&mut g, 0_u32); // let b = &mut a.x; - let bb1 = mk_project(&mut g, a, x); + let bb1 = mk_project(&mut g, a, x, 0); let b1 = mk_copy(&mut g, bb1); // let c = &mut a.y; - let cc1 = mk_project(&mut g, a, y); + let cc1 = mk_project(&mut g, a, y, 1); let c1 = mk_copy(&mut g, cc1); // a.z.r = 200; - let x1 = mk_project(&mut g, a, z); - let x2 = mk_project(&mut g, x1, red); + let x1 = mk_project(&mut g, a, z, 2); + let x2 = mk_project(&mut g, x1, red, 0); let x3 = mk_store_addr(&mut g, x2); // *b = 4; let b2 = mk_store_addr(&mut g, b1); @@ -694,19 +694,19 @@ mod test { // *d = ColorPoint { x: 0, y: 0, z: Color { r: 20, g: 200, b: 20 } }; let d2 = mk_store_addr(&mut g, d1); // let e = &mut a.z; - let ee = mk_project(&mut g, a, z); + let ee = mk_project(&mut g, a, z, 2); let e = mk_copy(&mut g, ee); // let f = &mut e.g; - let ff1 = mk_project(&mut g, e, green); + let ff1 = mk_project(&mut g, e, green, 1); let f1 = mk_copy(&mut g, ff1); // let g = &mut e.g; - let ggg = mk_project(&mut g, e, green); + let ggg = mk_project(&mut g, e, green, 1); let gg = mk_copy(&mut g, ggg); // *f = 3; let f2 = mk_store_addr(&mut g, f1); // a.z.r = 100; - let x4 = mk_project(&mut g, a, z); - let x5 = mk_project(&mut g, x4, green); + let x4 = mk_project(&mut g, a, z, 2); + let x5 = mk_project(&mut g, x4, green, 1); let x6 = mk_store_addr(&mut g, x5); let pdg = build_pdg(g); @@ -759,10 +759,10 @@ mod test { // let mut a = (1, (2, 3)); let a = mk_addr_of_local(&mut g, 0_u32); // let x = &mut a.0; - let x1 = mk_project(&mut g, a, 0_usize); + let x1 = mk_project(&mut g, a, 0_usize, 0); let x2 = mk_copy(&mut g, x1); // let y = &mut a.1; - let y1 = mk_project(&mut g, a, 1_usize); + let y1 = mk_project(&mut g, a, 1_usize, 1); let y2 = mk_copy(&mut g, y1); // *x = 1; let x3 = mk_store_addr(&mut g, x2); @@ -817,12 +817,12 @@ mod test { // let mut a = (1, (2, 3)); let a = mk_addr_of_local(&mut g, 0_u32); // let x = &mut a.1.0; - let x1 = mk_project(&mut g, a, 1_usize); - let x2 = mk_project(&mut g, x1, 0_usize); + let x1 = mk_project(&mut g, a, 1_usize, 0); + let x2 = mk_project(&mut g, x1, 0_usize, 1); let x3 = mk_copy(&mut g, x2); // let y = &mut a.1.1; - let y1 = mk_project(&mut g, a, 1_usize); - let y2 = mk_project(&mut g, y1, 1_usize); + let y1 = mk_project(&mut g, a, 1_usize, 0); + let y2 = mk_project(&mut g, y1, 1_usize, 0); let y3 = mk_copy(&mut g, y2); // *x = 1; let x4 = mk_store_addr(&mut g, x3); @@ -871,16 +871,16 @@ mod test { //let mut a = (1, (2, 3)); let a = mk_addr_of_local(&mut g, 0_u32); //let mut x = &mut a.1; - let x1 = mk_project(&mut g, a, 1_usize); + let x1 = mk_project(&mut g, a, 1_usize, 0); let x2 = mk_copy(&mut g, x1); //let mut y = &mut a.1; - let y1 = mk_project(&mut g, a, 1_usize); + let y1 = mk_project(&mut g, a, 1_usize, 0); let y2 = mk_copy(&mut g, y1); // *(x.0) = 4; - let x3 = mk_project(&mut g, x2, 0_usize); + let x3 = mk_project(&mut g, x2, 0_usize, 1); let x4 = mk_store_addr(&mut g, x3); // *(y.1) = 2; - let y3 = mk_project(&mut g, y2, 1_usize); + let y3 = mk_project(&mut g, y2, 1_usize, 0); let y4 = mk_store_addr(&mut g, y3); let pdg = build_pdg(g); @@ -925,12 +925,12 @@ mod test { // let mut a = (1, (2, 3)); let a = mk_addr_of_local(&mut g, 0_u32); // let x = &mut a.1.0; - let x1 = mk_project(&mut g, a, 1_usize); - let x2 = mk_project(&mut g, x1, 0_usize); + let x1 = mk_project(&mut g, a, 1_usize, 0); + let x2 = mk_project(&mut g, x1, 0_usize, 1); let x3 = mk_copy(&mut g, x2); // let y = &mut a.1.0; - let y1 = mk_project(&mut g, a, 1_usize); - let y2 = mk_project(&mut g, y1, 0_usize); + let y1 = mk_project(&mut g, a, 1_usize, 0); + let y2 = mk_project(&mut g, y1, 0_usize, 1); let y3 = mk_copy(&mut g, y2); // *x = 1; let x4 = mk_store_addr(&mut g, x3); @@ -987,11 +987,11 @@ mod test { // let mut a = ([1, 2], [3, 4]); let a = mk_addr_of_local(&mut g, 0_u32); // let x = &mut a.0[0]; - let x1 = mk_project(&mut g, a, 1_usize); + let x1 = mk_project(&mut g, a, 1_usize, 0); let x2 = mk_offset(&mut g, x1, 0); let x3 = mk_copy(&mut g, x2); // let y = &mut a.0[1]; - let y1 = mk_project(&mut g, a, 1_usize); + let y1 = mk_project(&mut g, a, 1_usize, 0); let y2 = mk_offset(&mut g, y1, 1); let y3 = mk_copy(&mut g, y2); // *x = 1; @@ -1049,11 +1049,11 @@ mod test { // let mut a = ([1, 2], [3, 4]); let a = mk_addr_of_local(&mut g, 0_u32); // let x = &mut a.0[0]; - let x1 = mk_project(&mut g, a, 0_usize); + let x1 = mk_project(&mut g, a, 0_usize, 0); let x2 = mk_offset(&mut g, x1, 0); let x3 = mk_copy(&mut g, x2); // let y = &mut a.1[0]; - let y1 = mk_project(&mut g, a, 1_usize); + let y1 = mk_project(&mut g, a, 1_usize, 1); let y2 = mk_offset(&mut g, y1, 0); let y3 = mk_copy(&mut g, y2); // *x = 1; @@ -1121,11 +1121,11 @@ mod test { let p = mk_addr_of_local(&mut g, 0_u32); // let x = &mut (*p)[0].0; let x1 = mk_offset(&mut g, p, 0); - let x2 = mk_project(&mut g, x1, 0_usize); + let x2 = mk_project(&mut g, x1, 0_usize, 0); let x3 = mk_copy(&mut g, x2); // let y = &mut (*p)[0].1; let y1 = mk_offset(&mut g, p, 0); - let y2 = mk_project(&mut g, y1, 1_usize); + let y2 = mk_project(&mut g, y1, 1_usize, 1); let y3 = mk_copy(&mut g, y2); // *x = 1; let x4 = mk_store_addr(&mut g, x3);