diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 84391a23a7..9f81e5ce66 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -1,7 +1,7 @@ use crate::annotate::AnnotationBuffer; use crate::borrowck; use crate::context::{ - self, AnalysisCtxt, AnalysisCtxtData, DontRewriteFieldReason, DontRewriteFnReason, + self, AnalysisCtxt, AnalysisCtxtData, Assignment, DontRewriteFieldReason, DontRewriteFnReason, DontRewriteStaticReason, FlagSet, GlobalAnalysisCtxt, GlobalAssignment, LFnSig, LTy, LTyCtxt, LocalAssignment, PermissionSet, PointerId, PointerInfo, }; @@ -17,6 +17,7 @@ use crate::pointee_type::PointeeTypes; use crate::pointer_id::GlobalPointerTable; use crate::pointer_id::LocalPointerTable; use crate::pointer_id::PointerTable; +use crate::pointer_id::PointerTableMut; use crate::recent_writes::RecentWrites; use crate::rewrite; use crate::type_desc; @@ -26,6 +27,7 @@ use crate::util::Callee; use crate::util::TestAttr; use ::log::warn; use c2rust_pdg::graph::Graphs; +use c2rust_pdg::info::NodeInfo; use rustc_hir::def::DefKind; use rustc_hir::def_id::CrateNum; use rustc_hir::def_id::DefId; @@ -59,6 +61,7 @@ use std::ops::Deref; use std::ops::DerefMut; use std::ops::Index; use std::panic::AssertUnwindSafe; +use std::path::Path; use std::str::FromStr; /// A wrapper around `T` that dynamically tracks whether it's initialized or not. @@ -932,140 +935,18 @@ fn run(tcx: TyCtxt) { } // Load permission info from PDG - let mut func_def_path_hash_to_ldid = HashMap::new(); - for &ldid in &all_fn_ldids { - let def_path_hash: (u64, u64) = tcx.def_path_hash(ldid.to_def_id()).0.as_value(); - eprintln!("def_path_hash {:?} = {:?}", def_path_hash, ldid); - func_def_path_hash_to_ldid.insert(def_path_hash, ldid); - } - - if let Some(pdg_file_path) = std::env::var_os("PDG_FILE") { - let f = std::fs::File::open(pdg_file_path).unwrap(); - let graphs: Graphs = bincode::deserialize_from(f).unwrap(); - - let mut known_nulls = HashSet::new(); - for g in &graphs.graphs { - for n in &g.nodes { - let dest_pl = match n.dest.as_ref() { - Some(x) => x, - None => { - continue; - } - }; - if !dest_pl.projection.is_empty() { - continue; - } - let dest = dest_pl.local; - let dest = Local::from_u32(dest.index); - if g.is_null { - known_nulls.insert((n.function.id, dest)); - } - } - } - - let allow_unsound = - env::var("C2RUST_ANALYZE_PDG_ALLOW_UNSOUND").map_or(false, |val| &val == "1"); - - for g in &graphs.graphs { - for n in &g.nodes { - let def_path_hash: (u64, u64) = n.function.id.0.into(); - let ldid = match func_def_path_hash_to_ldid.get(&def_path_hash) { - Some(&x) => x, - None => { - eprintln!( - "pdg: unknown DefPathHash {:?} for function {:?}", - n.function.id, n.function.name - ); - continue; - } - }; - let info = func_info.get_mut(&ldid).unwrap(); - let ldid_const = WithOptConstParam::unknown(ldid); - let mir = tcx.mir_built(ldid_const); - let mir = mir.borrow(); - let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); - let mut asn = gasn.and(&mut info.lasn); - let mut updates_forbidden = - g_updates_forbidden.and_mut(&mut info.l_updates_forbidden); - - let dest_pl = match n.dest.as_ref() { - Some(x) => x, - None => { - info.acx_data.set(acx.into_data()); - continue; - } - }; - if !dest_pl.projection.is_empty() { - info.acx_data.set(acx.into_data()); - continue; - } - let dest = dest_pl.local; - let dest = Local::from_u32(dest.index); - - if acx.local_tys.get(dest).is_none() { - eprintln!( - "pdg: {}: local {:?} appears as dest, but is out of bounds", - n.function.name, dest - ); - info.acx_data.set(acx.into_data()); - continue; - } - let ptr = match acx.ptr_of(dest) { - Some(x) => x, - None => { - eprintln!( - "pdg: {}: local {:?} appears as dest, but has no PointerId", - n.function.name, dest - ); - info.acx_data.set(acx.into_data()); - continue; - } - }; - - let old_perms = asn.perms()[ptr]; - let mut perms = old_perms; - if known_nulls.contains(&(n.function.id, dest)) { - perms.remove(PermissionSet::NON_NULL); - } else if allow_unsound { - perms.insert(PermissionSet::NON_NULL); - // Unsound update: if we have never seen a NULL for - // this local in the PDG, prevent the static analysis - // from changing that permission. - updates_forbidden[ptr].insert(PermissionSet::NON_NULL); - } - - if let Some(node_info) = n.info.as_ref() { - if node_info.flows_to.load.is_some() { - perms.insert(PermissionSet::READ); - } - if node_info.flows_to.store.is_some() { - perms.insert(PermissionSet::WRITE); - } - if node_info.flows_to.pos_offset.is_some() { - perms.insert(PermissionSet::OFFSET_ADD); - } - if node_info.flows_to.neg_offset.is_some() { - perms.insert(PermissionSet::OFFSET_SUB); - } - if !node_info.unique { - perms.remove(PermissionSet::UNIQUE); - } - } - - if perms != old_perms { - let added = perms & !old_perms; - let removed = old_perms & !perms; - let kept = old_perms & perms; - eprintln!( - "pdg: changed {:?}: added {:?}, removed {:?}, kept {:?}", - ptr, added, removed, kept - ); - - asn.perms_mut()[ptr] = perms; - } - - info.acx_data.set(acx.into_data()); - } + let pdg_compare = env::var("C2RUST_ANALYZE_COMPARE_PDG").as_deref() == Ok("1"); + // In compare mode, we load the PDG for comparison after analysis, not before. + if !pdg_compare { + if let Some(pdg_file_path) = std::env::var_os("PDG_FILE") { + pdg_update_permissions( + &mut gacx, + &all_fn_ldids, + &mut func_info, + &mut gasn, + &mut g_updates_forbidden, + pdg_file_path, + ); } } @@ -1295,6 +1176,111 @@ fn run(tcx: TyCtxt) { } } + // PDG comparison mode: skip all normal rewriting, and instead add annotations describing + // places where the static analysis and PDG differ. + if pdg_compare { + // For each `PointerId`, this records whether we saw a null pointer stored in a location + // annotated with that `PointerId` and whether we saw at least one non-null pointer stored + // in such a location. + let mut observations = HashMap::<(Option, PointerId), (bool, bool)>::new(); + + let pdg_file_path = std::env::var_os("PDG_FILE") + .unwrap_or_else(|| panic!("must set PDG_FILE for PDG comparison mode")); + pdg_update_permissions_with_callback( + &mut gacx, + &all_fn_ldids, + &mut func_info, + &mut gasn, + &mut g_updates_forbidden, + pdg_file_path, + |_asn, _updates_forbidden, ldid, ptr, _node_info, node_is_non_null| { + let parent = if ptr.is_global() { None } else { Some(ldid) }; + let obs = observations.entry((parent, ptr)).or_insert((false, false)); + if node_is_non_null { + obs.1 = true; + } else { + obs.0 = true; + } + }, + ); + + let mut ann = AnnotationBuffer::new(tcx); + // Generate comparison annotations for all functions. + for ldid in tcx.hir().body_owners() { + // Skip any body owners that aren't present in `func_info`, and also get the info + // itself. + let info = match func_info.get_mut(&ldid) { + Some(x) => x, + None => continue, + }; + + if !info.acx_data.is_set() { + continue; + } + + let ldid_const = WithOptConstParam::unknown(ldid); + let mir = tcx.mir_built(ldid_const); + let mir = mir.borrow(); + let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); + let asn = gasn.and(&mut info.lasn); + + // Generate inline annotations for pointer-typed locals + for (local, decl) in mir.local_decls.iter_enumerated() { + let span = local_span(decl); + let mut ptrs = Vec::new(); + let ty_str = context::print_ty_with_pointer_labels(acx.local_tys[local], |ptr| { + if ptr.is_none() { + return String::new(); + } + ptrs.push(ptr); + format!("{{{}}}", ptr) + }); + // Pointers where static analysis reports `NON_NULL` but dynamic reports nullable. + let mut static_non_null_ptrs = Vec::new(); + // Pointers where dynamic analysis reports `NON_NULL` but static reports nullable. + let mut dynamic_non_null_ptrs = Vec::new(); + for ptr in ptrs { + let static_non_null: bool = asn.perms()[ptr].contains(PermissionSet::NON_NULL); + let parent = if ptr.is_global() { None } else { Some(ldid) }; + let dynamic_non_null: Option = observations + .get(&(parent, ptr)) + .map(|&(saw_null, saw_non_null)| saw_non_null && !saw_null); + if dynamic_non_null.is_none() || dynamic_non_null == Some(static_non_null) { + // No conflict between static and dynamic results. + continue; + } + if static_non_null { + static_non_null_ptrs.push(ptr); + } else { + dynamic_non_null_ptrs.push(ptr); + } + } + if static_non_null_ptrs.is_empty() && dynamic_non_null_ptrs.is_empty() { + continue; + } + ann.emit(span, format_args!("typeof({:?}) = {}", local, ty_str)); + if static_non_null_ptrs.len() > 0 { + ann.emit( + span, + format_args!(" static NON_NULL: {:?}", static_non_null_ptrs), + ); + } + if dynamic_non_null_ptrs.len() > 0 { + ann.emit( + span, + format_args!(" dynamic NON_NULL: {:?}", dynamic_non_null_ptrs), + ); + } + } + + info.acx_data.set(acx.into_data()); + } + + let annotations = ann.finish(); + rewrite::apply_rewrites(tcx, Vec::new(), annotations, rewrite::UpdateFiles::No); + return; + } + if !rewrite_pointwise { run2( None, @@ -1937,6 +1923,192 @@ fn apply_test_attr_force_non_null_args( } } +fn pdg_update_permissions<'tcx>( + gacx: &mut GlobalAnalysisCtxt<'tcx>, + all_fn_ldids: &[LocalDefId], + func_info: &mut HashMap>, + gasn: &mut GlobalAssignment, + g_updates_forbidden: &mut GlobalPointerTable, + pdg_file_path: impl AsRef, +) { + let allow_unsound = + env::var("C2RUST_ANALYZE_PDG_ALLOW_UNSOUND").map_or(false, |val| &val == "1"); + + pdg_update_permissions_with_callback( + gacx, + all_fn_ldids, + func_info, + gasn, + g_updates_forbidden, + pdg_file_path, + |asn, updates_forbidden, _ldid, ptr, node_info, node_is_non_null| { + let old_perms = asn.perms()[ptr]; + let mut perms = old_perms; + if !node_is_non_null { + perms.remove(PermissionSet::NON_NULL); + } else if allow_unsound { + perms.insert(PermissionSet::NON_NULL); + // Unsound update: if we have never seen a NULL for + // this local in the PDG, prevent the static analysis + // from changing that permission. + updates_forbidden[ptr].insert(PermissionSet::NON_NULL); + } + + if let Some(node_info) = node_info { + if node_info.flows_to.load.is_some() { + perms.insert(PermissionSet::READ); + } + if node_info.flows_to.store.is_some() { + perms.insert(PermissionSet::WRITE); + } + if node_info.flows_to.pos_offset.is_some() { + perms.insert(PermissionSet::OFFSET_ADD); + } + if node_info.flows_to.neg_offset.is_some() { + perms.insert(PermissionSet::OFFSET_SUB); + } + if !node_info.unique { + perms.remove(PermissionSet::UNIQUE); + } + } + + if perms != old_perms { + let added = perms & !old_perms; + let removed = old_perms & !perms; + let kept = old_perms & perms; + eprintln!( + "pdg: changed {:?}: added {:?}, removed {:?}, kept {:?}", + ptr, added, removed, kept + ); + + asn.perms_mut()[ptr] = perms; + } + }, + ); +} + +/// Load PDG from `pdg_file_path` and update permissions. +/// +/// Each time a pointer's permissions are changed, this function calls `callback(ptr, old, new)` +/// where `ptr` is the `PointerId` in question, `old` is the old `PermissionSet`, and `new` is the +/// new one. +fn pdg_update_permissions_with_callback<'tcx>( + gacx: &mut GlobalAnalysisCtxt<'tcx>, + all_fn_ldids: &[LocalDefId], + func_info: &mut HashMap>, + gasn: &mut GlobalAssignment, + g_updates_forbidden: &mut GlobalPointerTable, + pdg_file_path: impl AsRef, + mut callback: impl FnMut( + &mut Assignment, + &mut PointerTableMut, + LocalDefId, + PointerId, + Option<&NodeInfo>, + bool, + ), +) { + let tcx = gacx.tcx; + + let f = std::fs::File::open(pdg_file_path).unwrap(); + let graphs: Graphs = bincode::deserialize_from(f).unwrap(); + + let mut known_nulls = HashSet::new(); + for g in &graphs.graphs { + for n in &g.nodes { + let dest_pl = match n.dest.as_ref() { + Some(x) => x, + None => { + continue; + } + }; + if !dest_pl.projection.is_empty() { + continue; + } + let dest = dest_pl.local; + let dest = Local::from_u32(dest.index); + if g.is_null { + known_nulls.insert((n.function.id, dest)); + } + } + } + + let mut func_def_path_hash_to_ldid = HashMap::new(); + for &ldid in all_fn_ldids { + let def_path_hash: (u64, u64) = tcx.def_path_hash(ldid.to_def_id()).0.as_value(); + eprintln!("def_path_hash {:?} = {:?}", def_path_hash, ldid); + func_def_path_hash_to_ldid.insert(def_path_hash, ldid); + } + + for g in &graphs.graphs { + for n in &g.nodes { + let def_path_hash: (u64, u64) = n.function.id.0.into(); + let ldid = match func_def_path_hash_to_ldid.get(&def_path_hash) { + Some(&x) => x, + None => { + eprintln!( + "pdg: unknown DefPathHash {:?} for function {:?}", + n.function.id, n.function.name + ); + continue; + } + }; + let info = func_info.get_mut(&ldid).unwrap(); + let ldid_const = WithOptConstParam::unknown(ldid); + let mir = tcx.mir_built(ldid_const); + let mir = mir.borrow(); + let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); + let mut asn = gasn.and(&mut info.lasn); + let mut updates_forbidden = g_updates_forbidden.and_mut(&mut info.l_updates_forbidden); + + let dest_pl = match n.dest.as_ref() { + Some(x) => x, + None => { + info.acx_data.set(acx.into_data()); + continue; + } + }; + if !dest_pl.projection.is_empty() { + info.acx_data.set(acx.into_data()); + continue; + } + let dest = dest_pl.local; + let dest = Local::from_u32(dest.index); + + if acx.local_tys.get(dest).is_none() { + eprintln!( + "pdg: {}: local {:?} appears as dest, but is out of bounds", + n.function.name, dest + ); + info.acx_data.set(acx.into_data()); + continue; + } + let ptr = match acx.ptr_of(dest) { + Some(x) => x, + None => { + eprintln!( + "pdg: {}: local {:?} appears as dest, but has no PointerId", + n.function.name, dest + ); + info.acx_data.set(acx.into_data()); + continue; + } + }; + + callback( + &mut asn, + &mut updates_forbidden, + ldid, + ptr, + n.info.as_ref(), + !known_nulls.contains(&(n.function.id, dest)), + ); + + info.acx_data.set(acx.into_data()); + } + } +} + fn local_span(decl: &LocalDecl) -> Span { let mut span = decl.source_info.span; if let Some(ref info) = decl.local_info { diff --git a/scripts/pdg.sh b/scripts/pdg.sh old mode 100755 new mode 100644 index da81717164..eed6e0355c --- a/scripts/pdg.sh +++ b/scripts/pdg.sh @@ -2,17 +2,15 @@ set -euox pipefail -CWD="${PWD}" -SCRIPT_PATH="${0}" -SCRIPT_DIR="${CWD}/$(dirname "${SCRIPT_PATH}")" +SCRIPT_PATH="$(realpath ${0})" +SCRIPT_DIR="$(dirname "${SCRIPT_PATH}")" +C2RUST_DIR="$(dirname "${SCRIPT_DIR}")" + +source ${SCRIPT_DIR}/pdg_setup.sh # Usage: `./pdg.sh ` # -# Environment Variables: -# * `PROFILE` (default `release`): -# a `cargo` profile as in `target/$PROFILE` -# * `NO_USE_PDG` (default empty): -# if non-empty, do not use the PDG as a starting point for analysis +# Environment Variables: see `scripts/pdg_functions.sh` for a full list. # # Instrument and run a test crate, create its PDG, and then run analysis on it. # @@ -27,80 +25,18 @@ SCRIPT_DIR="${CWD}/$(dirname "${SCRIPT_PATH}")" # A machine-readable PDG is saved to `pdg.bc` in the same directory. # 5. Using the `pdg.bc` file as an initial state for analysis, run static # analysis using `c2rust-analyze`. -main() { - local test_dir="${1}" - local args=("${@:2}") - - local profile_dir_name="${PROFILE:-release}" - local profile_dir="target/${profile_dir_name}" - local profile="${profile_dir_name}" - if [[ "${profile}" == "debug" ]]; then - profile=dev - fi - local profile_args=(--profile "${profile}") - - local metadata="${test_dir}/metadata.bc" - local pdg="${test_dir}/pdg.bc" - local event_log="${test_dir}/log.bc" - local runtime="analysis/runtime" +main() { + CARGO_INSTRUMENT_COMMAND=run + # set the environment variables for instrumentation + c2rust-set-instrument-vars "${@}" # build and run a test with instrumentation - ( - unset RUSTFLAGS # transpiled code has tons of warnings; don't allow `-D warnings` - export RUST_BACKTRACE=1 - export INSTRUMENT_RUNTIME=bg - export INSTRUMENT_BACKEND=log - export INSTRUMENT_OUTPUT="${event_log}" - export INSTRUMENT_OUTPUT_APPEND=false - export METADATA_FILE="${metadata}" - - cargo run \ - --bin c2rust-instrument \ - "${profile_args[@]}" \ - -- \ - --metadata "${metadata}" \ - --set-runtime \ - --runtime-path "${runtime}" \ - -- run \ - --manifest-path "${test_dir}/Cargo.toml" \ - "${profile_args[@]}" \ - -- "${args[@]}" - ) + c2rust-instrument "${@}" # construct pdg from log events - ( - export RUST_BACKTRACE=full # print sources w/ color-eyre - export RUST_LOG=error - cargo run \ - --bin c2rust-pdg \ - "${profile_args[@]}" \ - -- \ - --event-log "${event_log}" \ - --metadata "${metadata}" \ - --print graphs \ - --print write-permissions \ - --print counts \ - --output "${pdg}" \ - > "${test_dir}/pdg.log" - ) + c2rust-pdg "${@}" # use pdg in analysis - ( - export RUST_BACKTRACE=full # print sources w/ color-eyre - export RUST_LOG=error - if [[ "${NO_USE_PDG:-}" == "" ]]; then - # cargo runs this from a different pwd, so make path absolute - export PDG_FILE="$(realpath ${pdg})" - fi - cargo run \ - --bin c2rust-analyze \ - "${profile_args[@]}" \ - -- \ - build \ - -- \ - "${profile_args[@]}" \ - --features=c2rust-analysis-rt \ - --manifest-path "${test_dir}/Cargo.toml" - ) + c2rust-analyze-with-pdg "${@}" } main "${@}" diff --git a/scripts/pdg_setup.sh b/scripts/pdg_setup.sh new file mode 100755 index 0000000000..87fdceaa61 --- /dev/null +++ b/scripts/pdg_setup.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash +# +# This script can be source'd to provide a few useful +# shell functions for analysis using the PDG. +# +# Environment Variables: +# * `RUST_PROFILE` (default `release`): +# a `cargo` profile as in `target/$RUST_PROFILE` +# * `NO_USE_PDG` (default empty): +# if non-empty, do not use the PDG as a starting point for analysis +# * `CARGO_INSTRUMENT_COMMAND` (default `build`): +# the `cargo` command to run for instrumentation, e.g., `run` or `build` +# * `INSTRUMENT_OUTPUT_APPEND` (default `false`): +# set to `true` to append instrumentation events to the log instead +# of replacing the existing log on every invocation of the instrumented +# binary + +# Variables that are computed when the script is sourced +SCRIPT_PATH="$(realpath ${BASH_SOURCE[0]})" +SCRIPT_DIR="$(dirname "${SCRIPT_PATH}")" + +# Variables that are exported to the functions below +export C2RUST_DIR="$(dirname "${SCRIPT_DIR}")" + +_c2rust_prepare_vars() { + test_dir="$(realpath ${1})" + args=("${@:2}") + + profile_dir_name="${RUST_PROFILE:-release}" + + profile_dir="target/${profile_dir_name}" + profile="${profile_dir_name}" + if [[ "${profile}" == "debug" ]]; then + profile=dev + fi + profile_args=(--profile "${profile}") + + metadata="${test_dir}/metadata.bc" + pdg="${test_dir}/pdg.bc" + event_log="${test_dir}/log.bc" + runtime="${C2RUST_DIR}/analysis/runtime" +} + +c2rust-set-instrument-vars() { + _c2rust_prepare_vars "${@}" + + # We need these variables to have exactly these values + export INSTRUMENT_RUNTIME=bg + export INSTRUMENT_BACKEND=log + export INSTRUMENT_OUTPUT="${event_log}" + export METADATA_FILE="${metadata}" + + # These variables can be overridden by the user + export INSTRUMENT_OUTPUT_APPEND=${INSTRUMENT_OUTPUT_APPEND:-false} +} + +c2rust-instrument() ( + _c2rust_prepare_vars "${@}" + + unset RUSTFLAGS # transpiled code has tons of warnings; don't allow `-D warnings` + export RUST_BACKTRACE=full + + cd "${C2RUST_DIR}" + cargo run \ + --bin c2rust-instrument \ + "${profile_args[@]}" \ + -- \ + --metadata "${metadata}" \ + --set-runtime \ + --runtime-path "${runtime}" \ + -- "${CARGO_INSTRUMENT_COMMAND:-build}" \ + --manifest-path "${test_dir}/Cargo.toml" \ + "${profile_args[@]}" \ + -- "${args[@]}" +) + +c2rust-pdg() ( + _c2rust_prepare_vars "${@}" + + export RUST_BACKTRACE=full # print sources w/ color-eyre + export RUST_LOG=error + + cd "${C2RUST_DIR}" + cargo run \ + --bin c2rust-pdg \ + "${profile_args[@]}" \ + -- \ + --event-log "${event_log}" \ + --metadata "${metadata}" \ + --print graphs \ + --print write-permissions \ + --print counts \ + --output "${pdg}" \ + > "${test_dir}/pdg.log" +) + +c2rust-analyze-with-pdg() ( + _c2rust_prepare_vars "${@}" + + export RUST_BACKTRACE=full # print sources w/ color-eyre + export RUST_LOG=error + if [[ "${NO_USE_PDG:-}" == "" ]]; then + # cargo runs this from a different pwd, so make path absolute + export PDG_FILE="${pdg}" + fi + + cd "${C2RUST_DIR}" + cargo run \ + --bin c2rust-analyze \ + "${profile_args[@]}" \ + -- \ + build \ + -- \ + "${profile_args[@]}" \ + --features=c2rust-analysis-rt \ + --manifest-path "${test_dir}/Cargo.toml" +)