diff --git a/c2rust-analyze/README.md b/c2rust-analyze/README.md index e56a7cea25..e76517fe12 100644 --- a/c2rust-analyze/README.md +++ b/c2rust-analyze/README.md @@ -1,7 +1,62 @@ +# Usage + +Build `c2rust-analyze`: + +```sh +cargo build --release +``` + +Then, in the directory of a cargo project you wish to rewrite, run +`c2rust-analyze` on the project: + ```sh -cargo run --bin c2rust-analyze -- tests/filecheck/insertion_sort.rs -L "$(rustc --print target-libdir)" --crate-type rlib +.../path/to/c2rust/target/release/c2rust-analyze build |& tee c2rust-analyze.log ``` -This should produce a large amount of debug output, including a table at the -end listing the type and expression rewrites the analysis has inferred for the -`insertion_sort` function. +`c2rust-analyze` is currently at a prototype stage and produces verbose debug +output by default; the use of `tee` to capture the output to a log file allows +inspecting the results even when they exceed the length of the terminal +scrollback buffer. + +`c2rust-analyze` does not modify the target project's source code by default; +it only prints the rewritten code to standard output. Look for `===== +BEGIN/END =====` markers in the output to see the proposed rewritten code for +each file, or rerun with the `--rewrite-in-place` option (that is, +`c2rust-analyze --rewrite-in-place build`) to apply the rewrites directly to +the source files. + +`c2rust-analyze` may take a long time to run even on medium-sized codebases. +In particular, running the Polonius analysis on very large functions may take +several minutes (though Polonius results are cached after the first run). For +testing, it may be useful to comment out some modules from `lib.rs` to speed up +the analysis. + + +## Known limitations + +The automated safety rewrites in `c2rust-analyze` only apply to a small subset +of unsafe Rust code. When `c2rust-analyze` encounters unsupported code, it +will report an error and skip rewriting the function in question. + +Other notable limitations: + +* `c2rust-analyze` does not remove the `unsafe` keyword from function + definitions, even when it succeeds at removing all unsafe operations from the + function. The user must remove the `unsafe` keyword manually where it is + appropriate to do so. + + Note that even if a function contains only safe operations, it might still + need to be marked `unsafe` if it could break an invariant that other code + relies on for safety. For example, `Vec::set_len` only writes to the + `self.len` field (a safe operation), but it can be used to violate the + invariant `self.len <= self.cap`, which `Vec::as_slice` relies on for safety. + +* In non-amalgamated builds, where cross-module function calls use `extern "C" + { fn foo(); }` in the calling module and `#[no_mangle] fn foo() { ... }` in + the callee, `c2rust-analyze` may rewrite the signature of the `#[no_mangle]` + function definition in a way that's incompatible with the corresponding + `extern "C"` declaration in another module. This can lead to segfaults or + other undefined behavior at run time. This can be avoided by using an + amalgamated build of the C code (where all functions are placed in one + module), or by manually editing the function definition and/or declaration + after rewriting to ensure that the signatures match up. diff --git a/c2rust-analyze/src/analyze.rs b/c2rust-analyze/src/analyze.rs index 14376dc7c6..84f589b65a 100644 --- a/c2rust-analyze/src/analyze.rs +++ b/c2rust-analyze/src/analyze.rs @@ -37,6 +37,7 @@ use rustc_hir::def_id::CrateNum; use rustc_hir::def_id::DefId; use rustc_hir::def_id::DefIndex; use rustc_hir::def_id::LocalDefId; +use rustc_hir::definitions::DefPathData; use rustc_index::vec::IndexVec; use rustc_middle::mir::visit::Visitor; use rustc_middle::mir::AggregateKind; @@ -57,6 +58,7 @@ use rustc_middle::ty::TyCtxt; use rustc_middle::ty::TyKind; use rustc_middle::ty::WithOptConstParam; use rustc_span::Span; +use rustc_span::Symbol; use std::collections::HashMap; use std::collections::HashSet; use std::env; @@ -433,9 +435,8 @@ fn parse_def_id(s: &str) -> Result { Ok(def_id) } -fn read_fixed_defs_list(path: &str) -> io::Result> { +fn read_fixed_defs_list(fixed_defs: &mut HashSet, path: &str) -> io::Result<()> { let f = BufReader::new(File::open(path)?); - let mut def_ids = HashSet::new(); for (i, line) in f.lines().enumerate() { let line = line?; let line = line.trim(); @@ -446,9 +447,77 @@ fn read_fixed_defs_list(path: &str) -> io::Result> { let def_id = parse_def_id(&line).unwrap_or_else(|e| { panic!("failed to parse {} line {}: {}", path, i + 1, e); }); - def_ids.insert(def_id); + fixed_defs.insert(def_id); } - Ok(def_ids) + Ok(()) +} + +/// Examine each `DefId` in the crate, and add to `fixed_defs` any that doesn't match at least one +/// prefix in `prefixes`. For example, if `prefixes` is `foo,bar::baz`, only `foo`, `bar::baz`, +/// and their descendants will be eligible for rewriting; all other `DefId`s will be added to +/// `fixed_defs`. +fn check_rewrite_path_prefixes(tcx: TyCtxt, fixed_defs: &mut HashSet, prefixes: &str) { + let hir = tcx.hir(); + let prefixes: HashSet> = prefixes + .split(',') + // Exclude empty paths. This allows for leading/trailing commas or double commas within + // the list, which may result when building the list programmatically. + .filter(|prefix| prefix.len() > 0) + .map(|prefix| prefix.split("::").map(Symbol::intern).collect::>()) + .collect(); + let sym_impl = Symbol::intern("{impl}"); + // Buffer for accumulating the path to a particular def. + let mut path_buf = Vec::with_capacity(10); + for ldid in tcx.hir_crate_items(()).definitions() { + let def_path = hir.def_path(ldid); + + // Traverse `def_path`, adding each `Symbol` to `path_buf`. We check after each push + // whether `path_buf` matches something in `prefixes`, which has the effect of checking + // every prefix of the path of `ldid`. + path_buf.clear(); + let mut matched = false; + for ddpd in &def_path.data { + match ddpd.data { + // We ignore these when building the `Symbol` path. + DefPathData::CrateRoot + | DefPathData::ForeignMod + | DefPathData::Use + | DefPathData::GlobalAsm + | DefPathData::ClosureExpr + | DefPathData::Ctor + | DefPathData::AnonConst + | DefPathData::ImplTrait => continue, + DefPathData::TypeNs(sym) + | DefPathData::ValueNs(sym) + | DefPathData::MacroNs(sym) + | DefPathData::LifetimeNs(sym) => { + path_buf.push(sym); + } + DefPathData::Impl => { + path_buf.push(sym_impl); + } + } + if prefixes.contains(&path_buf) { + matched = true; + break; + } + } + + if !matched { + fixed_defs.insert(ldid.to_def_id()); + } + } +} + +fn get_fixed_defs(tcx: TyCtxt) -> io::Result> { + let mut fixed_defs = HashSet::new(); + if let Ok(path) = env::var("C2RUST_ANALYZE_FIXED_DEFS_LIST") { + read_fixed_defs_list(&mut fixed_defs, &path)?; + } + if let Ok(prefixes) = env::var("C2RUST_ANALYZE_REWRITE_PATHS") { + check_rewrite_path_prefixes(tcx, &mut fixed_defs, &prefixes); + } + Ok(fixed_defs) } fn run(tcx: TyCtxt) { @@ -458,11 +527,7 @@ fn run(tcx: TyCtxt) { } // Load the list of fixed defs early, so any errors are reported immediately. - let fixed_defs = if let Ok(path) = env::var("C2RUST_ANALYZE_FIXED_DEFS_LIST") { - read_fixed_defs_list(&path).unwrap() - } else { - HashSet::new() - }; + let fixed_defs = get_fixed_defs(tcx).unwrap(); let mut gacx = GlobalAnalysisCtxt::new(tcx); let mut func_info = HashMap::new(); @@ -918,7 +983,14 @@ fn run(tcx: TyCtxt) { // Items in the "fixed defs" list have all pointers in their types set to `FIXED`. For // testing, putting #[c2rust_analyze_test::fixed_signature] on an item has the same effect. + // + // Functions in the list are also added to `gacx.fns_fixed`. for ldid in tcx.hir_crate_items(()).definitions() { + // TODO (HACK): `Clone::clone` impls are omitted from `fn_sigs` and cause a panic below. + if is_impl_clone(tcx, ldid.to_def_id()) { + continue; + } + let def_fixed = fixed_defs.contains(&ldid.to_def_id()) || util::has_test_attr(tcx, ldid, TestAttr::FixedSignature); match tcx.def_kind(ldid.to_def_id()) { @@ -928,6 +1000,7 @@ fn run(tcx: TyCtxt) { None => panic!("missing fn_sig for {:?}", ldid), }; make_sig_fixed(&mut gasn, lsig); + gacx.fns_fixed.insert(ldid.to_def_id()); } DefKind::Struct | DefKind::Enum | DefKind::Union => { @@ -1163,6 +1236,14 @@ fn run(tcx: TyCtxt) { // Generate rewrites for all functions. let mut all_rewrites = Vec::new(); + let mut manual_shim_casts = rewrite::ManualShimCasts::No; + if let Ok(val) = env::var("C2RUST_ANALYZE_USE_MANUAL_SHIMS") { + if val == "1" { + manual_shim_casts = rewrite::ManualShimCasts::Yes; + } + } + let manual_shim_casts = manual_shim_casts; + // It may take multiple tries to reach a state where all rewrites succeed. loop { func_reports.clear(); @@ -1187,7 +1268,7 @@ fn run(tcx: TyCtxt) { } for &ldid in &all_fn_ldids { - if gacx.fn_failed(ldid.to_def_id()) { + if gacx.fn_skip_rewrite(ldid.to_def_id()) { continue; } @@ -1249,7 +1330,12 @@ fn run(tcx: TyCtxt) { let mut any_failed = false; for def_id in shim_fn_def_ids { let r = panic_detail::catch_unwind(AssertUnwindSafe(|| { - all_rewrites.push(rewrite::gen_shim_definition_rewrite(&gacx, &gasn, def_id)); + all_rewrites.push(rewrite::gen_shim_definition_rewrite( + &gacx, + &gasn, + def_id, + manual_shim_casts, + )); })); match r { Ok(()) => {} @@ -1421,7 +1507,13 @@ fn run(tcx: TyCtxt) { // ---------------------------------- // Apply rewrite to all functions at once. - rewrite::apply_rewrites(tcx, all_rewrites); + let mut update_files = rewrite::UpdateFiles::No; + if let Ok(val) = env::var("C2RUST_ANALYZE_REWRITE_IN_PLACE") { + if val == "1" { + update_files = rewrite::UpdateFiles::Yes; + } + } + rewrite::apply_rewrites(tcx, all_rewrites, update_files); // ---------------------------------- // Report caught panics diff --git a/c2rust-analyze/src/context.rs b/c2rust-analyze/src/context.rs index 239973df28..81cecd1fbc 100644 --- a/c2rust-analyze/src/context.rs +++ b/c2rust-analyze/src/context.rs @@ -329,6 +329,9 @@ pub struct GlobalAnalysisCtxt<'tcx> { /// `DefId`s of functions where analysis failed, and a [`PanicDetail`] explaining the reason /// for each failure. pub fns_failed: HashMap, + /// `DefId`s of functions that were marked "fixed" (non-rewritable) through command-line + /// arguments. + pub fns_fixed: HashSet, pub field_ltys: HashMap>, @@ -698,6 +701,7 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { .map(|known_fn| (known_fn.name, known_fn)) .collect(), fns_failed: HashMap::new(), + fns_fixed: HashSet::new(), field_ltys: HashMap::new(), static_tys: HashMap::new(), addr_of_static: HashMap::new(), @@ -759,6 +763,7 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { ref mut fn_sigs, known_fns: _, fns_failed: _, + fns_fixed: _, ref mut field_ltys, ref mut static_tys, ref mut addr_of_static, @@ -815,7 +820,7 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { } } - pub fn fn_failed(&mut self, did: DefId) -> bool { + pub fn fn_failed(&self, did: DefId) -> bool { self.fns_failed.contains_key(&did) } @@ -831,6 +836,25 @@ impl<'tcx> GlobalAnalysisCtxt<'tcx> { self.fns_failed.keys().copied() } + pub fn fn_skip_rewrite(&self, did: DefId) -> bool { + self.fn_failed(did) || self.fns_fixed.contains(&did) + } + + /// Iterate over the `DefId`s of all functions that should skip rewriting. + pub fn iter_fns_skip_rewrite<'a>(&'a self) -> impl Iterator + 'a { + // This let binding avoids a lifetime error with the closure and return-position `impl + // Trait`. + let fns_fixed = &self.fns_fixed; + // If the same `DefId` is in both `fns_failed` and `fns_fixed`, be sure to return it only + // once. + fns_fixed.iter().copied().chain( + self.fns_failed + .keys() + .copied() + .filter(move |did| !fns_fixed.contains(&did)), + ) + } + pub fn known_fn(&self, def_id: DefId) -> Option<&'static KnownFn> { let symbol = self.tcx.symbol_name(Instance::mono(self.tcx, def_id)); self.known_fns.get(symbol.name).copied() diff --git a/c2rust-analyze/src/main.rs b/c2rust-analyze/src/main.rs index 6f8990a309..545d303977 100644 --- a/c2rust-analyze/src/main.rs +++ b/c2rust-analyze/src/main.rs @@ -37,7 +37,7 @@ use analyze::AnalysisCallbacks; use anyhow::anyhow; use anyhow::ensure; use anyhow::Context; -use clap::Parser; +use clap::{ArgAction, Parser}; use rustc_driver::RunCompiler; use rustc_driver::TimePassesCallbacks; use rustc_session::config::CrateType; @@ -73,6 +73,30 @@ struct Args { #[clap(long)] rustflags: Option, + /// Comma-separated list of paths to rewrite. Any item whose path does not start with a prefix + /// from this list will be marked non-rewritable (`FIXED`). + #[clap(long, action(ArgAction::Append))] + rewrite_paths: Vec, + /// Rewrite source files on disk. The default is to print the rewritten source code to stdout + /// as part of the tool's debug output. + #[clap(long)] + rewrite_in_place: bool, + /// Use `todo!()` placeholders in shims for casts that must be implemented manually. + /// + /// When a function requires a shim, and the shim requires a cast that can't be generated + /// automatically, the default is to cancel rewriting of the function. With this option, + /// rewriting proceeds as normal, and shim generation emits `todo!()` in place of each + /// unsupported cast. + #[clap(long)] + use_manual_shims: bool, + + /// Read a list of defs that should be marked non-rewritable (`FIXED`) from this file path. + /// Run `c2rust-analyze` without this option and check the debug output for a full list of defs + /// in the crate being analyzed; the file passed to this option should list a subset of those + /// defs. + #[clap(long)] + fixed_defs_list: Option, + /// `cargo` args. cargo_args: Vec, } @@ -327,6 +351,10 @@ where fn cargo_wrapper(rustc_wrapper: &Path) -> anyhow::Result<()> { let Args { rustflags, + rewrite_paths, + rewrite_in_place, + use_manual_shims, + fixed_defs_list, cargo_args, } = Args::parse(); @@ -362,6 +390,24 @@ fn cargo_wrapper(rustc_wrapper: &Path) -> anyhow::Result<()> { .env(RUSTC_WRAPPER_VAR, rustc_wrapper) .env(RUST_SYSROOT_VAR, &sysroot) .env("RUSTFLAGS", &rustflags); + + if let Some(ref fixed_defs_list) = fixed_defs_list { + cmd.env("C2RUST_ANALYZE_FIXED_DEFS_LIST", fixed_defs_list); + } + + if rewrite_paths.len() > 0 { + let rewrite_paths = rewrite_paths.join(OsStr::new(",")); + cmd.env("C2RUST_ANALYZE_REWRITE_PATHS", rewrite_paths); + } + + if rewrite_in_place { + cmd.env("C2RUST_ANALYZE_REWRITE_IN_PLACE", "1"); + } + + if use_manual_shims { + cmd.env("C2RUST_ANALYZE_USE_MANUAL_SHIMS", "1"); + } + Ok(()) })?; diff --git a/c2rust-analyze/src/rewrite/apply.rs b/c2rust-analyze/src/rewrite/apply.rs index 66034fab03..7944139dfa 100644 --- a/c2rust-analyze/src/rewrite/apply.rs +++ b/c2rust-analyze/src/rewrite/apply.rs @@ -400,6 +400,13 @@ impl Emitter<'_, S> { self.emit_str(")") } + Rewrite::Let1(ref name, ref rw) => { + self.emit_str("let ")?; + self.emit_str(name)?; + self.emit_str(" = ")?; + self.emit(rw, 0) + } + Rewrite::TyPtr(ref rw, mutbl) => { match mutbl { Mutability::Not => self.emit_str("*const ")?, diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index 8a52ca1d73..cb2027e1e4 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -13,7 +13,6 @@ use crate::pointee_type::PointeeTypes; use crate::pointer_id::{PointerId, PointerTable}; use crate::type_desc::{self, Ownership, Quantity, TypeDesc}; use crate::util::{ty_callee, Callee}; -use log::*; use rustc_ast::Mutability; use rustc_middle::mir::{ BasicBlock, Body, Location, Operand, Place, Rvalue, Statement, StatementKind, Terminator, @@ -751,25 +750,42 @@ where } pub fn build_cast_desc_desc(&mut self, from: TypeDesc<'tcx>, to: TypeDesc<'tcx>) { + self.try_build_cast_desc_desc(from, to).unwrap() + } + + /// Try to build a cast between `from` and `to`, emitting any intermediate rewrites that are + /// necessary through the `self.emit` callback. + /// + /// Note that when cast building fails, this method may still call `self.emit` one or more + /// times before returning `Err`. The caller should be prepared to roll back the effects of + /// any `self.emit` calls if the overall operation fails. + pub fn try_build_cast_desc_desc( + &mut self, + from: TypeDesc<'tcx>, + to: TypeDesc<'tcx>, + ) -> Result<(), String> { let orig_from = from; let mut from = orig_from; // The `from` and `to` pointee types should only differ in their lifetimes. - assert_eq!( - self.tcx.erase_regions(from.pointee_ty), - self.tcx.erase_regions(to.pointee_ty), - ); + let from_pointee_erased = self.tcx.erase_regions(from.pointee_ty); + let to_pointee_erased = self.tcx.erase_regions(to.pointee_ty); + if from_pointee_erased != to_pointee_erased { + return Err(format!( + "pointee type mismatch: {from_pointee_erased:?} != {to_pointee_erased:?}" + )); + } // There might still be differences in lifetimes, which we don't care about here. // Overwriting `from.pointee_ty` allows the final `from == to` check to succeed below. from.pointee_ty = to.pointee_ty; if from == to { - return; + return Ok(()); } // Early `Ownership` casts. We do certain casts here in hopes of reaching an `Ownership` // on which we can safely adjust `Quantity`. - from.own = self.cast_ownership(from, to, true); + from.own = self.cast_ownership(from, to, true)?; // Safe casts that change `Quantity`. while from.qty != to.qty { @@ -787,8 +803,8 @@ where (Quantity::Array, _) => { // `Array` goes only to `Slice` directly. All other `Array` conversions go // through `Slice` first. - error!("TODO: cast Array to {:?}", to.qty); - from.qty = Quantity::Slice; + return Err(format!("TODO: cast Array to {:?}", to.qty)); + //from.qty = Quantity::Slice; } // Bidirectional conversions between `Slice` and `OffsetPtr`. (Quantity::Slice, Quantity::OffsetPtr) | (Quantity::OffsetPtr, Quantity::Slice) => { @@ -819,14 +835,16 @@ where } // Late `Ownership` casts. - from.own = self.cast_ownership(from, to, false); + from.own = self.cast_ownership(from, to, false)?; if from != to { - panic!( + return Err(format!( "unsupported cast kind: {:?} -> {:?} (original input: {:?})", from, to, orig_from - ); + )); } + + Ok(()) } fn cast_ownership( @@ -834,17 +852,17 @@ where from: TypeDesc<'tcx>, to: TypeDesc<'tcx>, early: bool, - ) -> Ownership { + ) -> Result { let mut from = from; while from.own != to.own { - match self.cast_ownership_one_step(from, to, early) { + match self.cast_ownership_one_step(from, to, early)? { Some(new_own) => { from.own = new_own; } None => break, } } - from.own + Ok(from.own) } fn cast_ownership_one_step( @@ -852,23 +870,23 @@ where from: TypeDesc<'tcx>, to: TypeDesc<'tcx>, early: bool, - ) -> Option { - match from.own { + ) -> Result, String> { + Ok(match from.own { Ownership::Box => match to.own { Ownership::Raw | Ownership::Imm => { - error!("TODO: cast Box to Imm"); - Some(Ownership::Imm) + return Err(format!("TODO: cast Box to Imm")); + //Some(Ownership::Imm) } Ownership::RawMut | Ownership::Mut => { - error!("TODO: cast Box to Mut"); - Some(Ownership::Mut) + return Err(format!("TODO: cast Box to Mut")); + //Some(Ownership::Mut) } _ => None, }, Ownership::Rc => match to.own { Ownership::Imm | Ownership::Raw | Ownership::RawMut => { - error!("TODO: cast Rc to Imm"); - Some(Ownership::Imm) + return Err(format!("TODO: cast Rc to Imm")); + //Some(Ownership::Imm) } _ => None, }, @@ -932,7 +950,7 @@ where } _ => None, }, - } + }) } pub fn build_cast_lty_desc(&mut self, from_lty: LTy<'tcx>, to: TypeDesc<'tcx>) { diff --git a/c2rust-analyze/src/rewrite/mod.rs b/c2rust-analyze/src/rewrite/mod.rs index f2c656846a..607870565c 100644 --- a/c2rust-analyze/src/rewrite/mod.rs +++ b/c2rust-analyze/src/rewrite/mod.rs @@ -25,8 +25,9 @@ use rustc_hir::Mutability; use rustc_middle::ty::TyCtxt; -use rustc_span::Span; +use rustc_span::{FileName, Span}; use std::fmt; +use std::fs; mod apply; mod expr; @@ -36,7 +37,7 @@ mod statics; mod ty; pub use self::expr::gen_expr_rewrites; -pub use self::shim::{gen_shim_call_rewrites, gen_shim_definition_rewrite}; +pub use self::shim::{gen_shim_call_rewrites, gen_shim_definition_rewrite, ManualShimCasts}; pub use self::statics::gen_static_rewrites; pub use self::ty::dump_rewritten_local_tys; pub use self::ty::{gen_adt_ty_rewrites, gen_ty_rewrites}; @@ -88,11 +89,14 @@ pub enum Rewrite { /// A multi-variable `let` binding, like `let (x, y) = (rw0, rw1)`. Note that this rewrite /// does not include a trailing semicolon. /// - /// Since these variable bindings are not hygienic, a `StmtBind` can invalidate the expression - /// produced by `Identity` or `Sub` rewrites used later in the same scope. In general, - /// `StmtBind` should only be used inside a `Block`, and `Identity` and `Sub` rewrites should - /// not be used later in that block. + /// Since these variable bindings are not hygienic, a `Let` can invalidate the expression + /// produced by `Identity` or `Sub` rewrites used later in the same scope. In general, `Let` + /// should only be used inside a `Block`, and `Identity` and `Sub` rewrites should not be used + /// later in that block. Let(Vec<(String, Rewrite)>), + /// Single-variable `let` binding. This has the same scoping issues as multi-variable `Let`; + /// because of this, `Let` should generally be used instead of multiple `Let1`s. + Let1(String, Box), // Type builders /// Emit a complete pretty-printed type, discarding the original annotation. @@ -190,6 +194,7 @@ impl Rewrite { } Let(new_vars) } + Let1(ref name, ref rw) => Let1(String::clone(name), try_subst(rw)?), Print(ref s) => Print(String::clone(s)), TyPtr(ref rw, mutbl) => TyPtr(try_subst(rw)?, mutbl), @@ -238,23 +243,42 @@ impl apply::Sink for FormatterSink<'_, '_> { } } -pub fn apply_rewrites(tcx: TyCtxt, rewrites: Vec<(Span, Rewrite)>) { +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub enum UpdateFiles { + No, + Yes, +} + +pub fn apply_rewrites(tcx: TyCtxt, rewrites: Vec<(Span, Rewrite)>, update_files: UpdateFiles) { // TODO: emit new source code properly instead of just printing let new_src = apply::apply_rewrites(tcx.sess.source_map(), rewrites); for (filename, src) in new_src { - eprintln!("\n\n ===== BEGIN {:?} =====", filename); + println!("\n\n ===== BEGIN {:?} =====", filename); for line in src.lines() { // Omit filecheck directives from the debug output, as filecheck can get confused due // to directives matching themselves (e.g. `// CHECK: foo` will match the `foo` in the // line `// CHECK: foo`). if let Some((pre, _post)) = line.split_once("// CHECK") { - eprintln!("{}// (FileCheck directive omitted)", pre); + println!("{}// (FileCheck directive omitted)", pre); } else { - eprintln!("{}", line); + println!("{}", line); + } + } + println!(" ===== END {:?} =====", filename); + + if update_files == UpdateFiles::Yes { + let mut path_ok = false; + if let FileName::Real(ref rfn) = filename { + if let Some(path) = rfn.local_path() { + fs::write(path, src).unwrap(); + path_ok = true; + } + } + if !path_ok { + log::warn!("couldn't write to non-real file {filename:?}"); } } - eprintln!(" ===== END {:?} =====", filename); } } diff --git a/c2rust-analyze/src/rewrite/shim.rs b/c2rust-analyze/src/rewrite/shim.rs index 34558c3ec4..f6a24dbe72 100644 --- a/c2rust-analyze/src/rewrite/shim.rs +++ b/c2rust-analyze/src/rewrite/shim.rs @@ -1,6 +1,7 @@ use crate::context::LTy; use crate::context::{FlagSet, GlobalAnalysisCtxt, GlobalAssignment}; use crate::rewrite::expr::{self, CastBuilder}; +use crate::rewrite::ty; use crate::rewrite::Rewrite; use crate::type_desc::{self, TypeDesc}; use rustc_hir::def::{DefKind, Res}; @@ -103,9 +104,9 @@ impl<'a, 'tcx> Visitor<'tcx> for ShimCallVisitor<'a, 'tcx> { } } -/// For each failed function that calls or mentions a non-failed function meeting certain criteria, -/// generate rewrites to change calls to `foo` into calls to `foo_shim`. Also produces a set of -/// callee `DefId`s for the calls that were rewritten this way. +/// For each non-rewritable function that calls or mentions a rewritable function meeting certain +/// criteria, generate rewrites to change calls to `foo` into calls to `foo_shim`. Also produces a +/// set of callee `DefId`s for the calls that were rewritten this way. /// /// The criteria we look for are: /// * The callee must be a local function. @@ -121,12 +122,17 @@ pub fn gen_shim_call_rewrites<'tcx>( let mut rewrites = Vec::new(); let mut mentioned_fns = HashSet::new(); - for &failed_def_id in gacx.fns_failed.keys() { - let failed_def_id = match failed_def_id.as_local() { + for skip_def_id in gacx.iter_fns_skip_rewrite() { + let skip_def_id = match skip_def_id.as_local() { + Some(x) => x, + None => continue, + }; + // When using --rewrite-paths, fns in extern blocks may show up here. We can't do anything + // with these, since they don't have a HIR body, so skip them. + let hir_body_id = match tcx.hir().maybe_body_owned_by(skip_def_id) { Some(x) => x, None => continue, }; - let hir_body_id = tcx.hir().body_owned_by(failed_def_id); let hir = tcx.hir().body(hir_body_id); let typeck_results = tcx.typeck_body(hir_body_id); let mut v = ShimCallVisitor { @@ -168,10 +174,19 @@ fn lty_to_desc_pair<'tcx>( Some((desc, fixed_desc)) } +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub enum ManualShimCasts { + No, + /// Emit `todo!("cast X from T1 to T2")` instead of panicking when a cast can't be generated. + /// The user can then implement the necessary casts manually. + Yes, +} + pub fn gen_shim_definition_rewrite<'tcx>( gacx: &GlobalAnalysisCtxt<'tcx>, gasn: &GlobalAssignment, def_id: DefId, + manual_casts: ManualShimCasts, ) -> (Span, Rewrite) { let tcx = gacx.tcx; @@ -193,35 +208,72 @@ pub fn gen_shim_definition_rewrite<'tcx>( // valid `fn_sigs` entries. let lsig = gacx.fn_sigs[&def_id]; + // 1 cast per arg, 1 call, 1 cast for the result. The final result is returned using the + // trailing expression of the block. + let mut stmts = Vec::with_capacity(arg_tys.len() + 2); + + // Generate `let safe_arg0 = arg0 as ...;` for each argument. let mut arg_exprs = Vec::with_capacity(arg_tys.len()); for (i, arg_lty) in lsig.inputs.iter().enumerate() { let mut hir_rw = Rewrite::FnArg(i); - let (arg_desc, fixed_desc) = match lty_to_desc_pair(tcx, gasn, arg_lty) { - Some(x) => x, - None => { - // `arg_lty` is a FIXED pointer, which doesn't need a cast; the shim argument - // type is the same as the argument type of the wrapped function. - arg_exprs.push(hir_rw); - continue; + if let Some((arg_desc, fixed_desc)) = lty_to_desc_pair(tcx, gasn, arg_lty) { + let mut cast_builder = CastBuilder::new(tcx, &gasn.perms, &gasn.flags, |rk| { + hir_rw = expr::convert_cast_rewrite(&rk, mem::take(&mut hir_rw)); + }); + match cast_builder.try_build_cast_desc_desc(fixed_desc, arg_desc) { + Ok(()) => {} + Err(e) => { + if manual_casts == ManualShimCasts::Yes { + hir_rw = Rewrite::Print(format!( + r#"todo!("cast arg{i} from {} to {}")"#, + ty::desc_to_ty(tcx, fixed_desc), + ty::desc_to_ty(tcx, arg_desc), + )); + } else { + panic!("error generating cast for {:?} arg{}: {}", def_id, i, e); + } + } } - }; + } else { + // No-op. `arg_lty` is a FIXED pointer, which doesn't need a cast; the shim argument + // type is the same as the argument type of the wrapped function. + } - let mut cast_builder = CastBuilder::new(tcx, &gasn.perms, &gasn.flags, |rk| { - hir_rw = expr::convert_cast_rewrite(&rk, mem::take(&mut hir_rw)); - }); - cast_builder.build_cast_desc_desc(fixed_desc, arg_desc); - arg_exprs.push(hir_rw); + let safe_name = format!("safe_arg{}", i); + stmts.push(Rewrite::Let1(safe_name.clone(), Box::new(hir_rw))); + arg_exprs.push(Rewrite::Print(safe_name)); } - let mut body_rw = Rewrite::Call(owner_node.ident().unwrap().as_str().to_owned(), arg_exprs); + // Generate the call: `let safe_result = f(safe_arg0, safe_arg1);` + let call_rw = Rewrite::Call(owner_node.ident().unwrap().as_str().to_owned(), arg_exprs); + stmts.push(Rewrite::Let1("safe_result".into(), Box::new(call_rw))); + // Generate `let result = safe_result as ...;` + let mut result_rw = Rewrite::Print("safe_result".into()); if let Some((return_desc, fixed_desc)) = lty_to_desc_pair(tcx, gasn, lsig.output) { let mut cast_builder = CastBuilder::new(tcx, &gasn.perms, &gasn.flags, |rk| { - body_rw = expr::convert_cast_rewrite(&rk, mem::take(&mut body_rw)); + result_rw = expr::convert_cast_rewrite(&rk, mem::take(&mut result_rw)); }); - cast_builder.build_cast_desc_desc(return_desc, fixed_desc); + match cast_builder.try_build_cast_desc_desc(return_desc, fixed_desc) { + Ok(()) => {} + Err(e) => { + if manual_casts == ManualShimCasts::Yes { + result_rw = Rewrite::Print(format!( + r#"todo!("cast safe_result from {} to {}")"#, + ty::desc_to_ty(tcx, return_desc), + ty::desc_to_ty(tcx, fixed_desc), + )); + } else { + panic!("error generating cast for {:?} result: {}", def_id, e); + } + } + } } + stmts.push(Rewrite::Let1("result".into(), Box::new(result_rw))); + + // Build the function body. + let body_rw = Rewrite::Block(stmts, Some(Box::new(Rewrite::Print("result".into())))); let rw = Rewrite::DefineFn { name: format!("{}_shim", owner_node.ident().unwrap().as_str()), diff --git a/c2rust-analyze/src/rewrite/ty.rs b/c2rust-analyze/src/rewrite/ty.rs index 07807f9dd2..538537cd5f 100644 --- a/c2rust-analyze/src/rewrite/ty.rs +++ b/c2rust-analyze/src/rewrite/ty.rs @@ -17,7 +17,7 @@ use crate::labeled_ty::{LabeledTy, LabeledTyCtxt}; use crate::pointee_type::PointeeTypes; use crate::pointer_id::{GlobalPointerTable, PointerId, PointerTable}; use crate::rewrite::Rewrite; -use crate::type_desc::{self, Ownership, Quantity}; +use crate::type_desc::{self, Ownership, Quantity, TypeDesc}; use hir::{ FnRetTy, GenericParamKind, Generics, ItemKind, Path, PathSegment, VariantData, WherePredicate, }; @@ -373,7 +373,7 @@ fn mk_rewritten_ty<'tcx>( ) -> ty::Ty<'tcx> { let tcx = *lcx; lcx.rewrite_unlabeled(rw_lty, &mut |ptr_ty, args, label| { - let (mut ty, own, qty) = match (label.pointee_ty, label.ty_desc) { + let (ty, own, qty) = match (label.pointee_ty, label.ty_desc) { (Some(pointee_ty), Some((own, qty))) => { // The `ty` should be a pointer. assert_eq!(args.len(), 1); @@ -402,30 +402,45 @@ fn mk_rewritten_ty<'tcx>( } }; - if own == Ownership::Cell { - ty = mk_cell(tcx, ty); - } + desc_parts_to_ty(tcx, own, qty, ty) + }) +} - ty = match qty { - Quantity::Single => ty, - Quantity::Slice => tcx.mk_slice(ty), - // TODO: This should generate `OffsetPtr` rather than `&[T]`, but `OffsetPtr` is NYI - Quantity::OffsetPtr => tcx.mk_slice(ty), - Quantity::Array => panic!("can't mk_rewritten_ty with Quantity::Array"), - }; +pub fn desc_parts_to_ty<'tcx>( + tcx: TyCtxt<'tcx>, + own: Ownership, + qty: Quantity, + pointee_ty: Ty<'tcx>, +) -> Ty<'tcx> { + let mut ty = pointee_ty; - ty = match own { - Ownership::Raw => tcx.mk_imm_ptr(ty), - Ownership::RawMut => tcx.mk_mut_ptr(ty), - Ownership::Imm => tcx.mk_imm_ref(tcx.mk_region(ReErased), ty), - Ownership::Cell => tcx.mk_imm_ref(tcx.mk_region(ReErased), ty), - Ownership::Mut => tcx.mk_mut_ref(tcx.mk_region(ReErased), ty), - Ownership::Rc => todo!(), - Ownership::Box => tcx.mk_box(ty), - }; + if own == Ownership::Cell { + ty = mk_cell(tcx, ty); + } - ty - }) + ty = match qty { + Quantity::Single => ty, + Quantity::Slice => tcx.mk_slice(ty), + // TODO: This should generate `OffsetPtr` rather than `&[T]`, but `OffsetPtr` is NYI + Quantity::OffsetPtr => tcx.mk_slice(ty), + Quantity::Array => panic!("can't mk_rewritten_ty with Quantity::Array"), + }; + + ty = match own { + Ownership::Raw => tcx.mk_imm_ptr(ty), + Ownership::RawMut => tcx.mk_mut_ptr(ty), + Ownership::Imm => tcx.mk_imm_ref(tcx.mk_region(ReErased), ty), + Ownership::Cell => tcx.mk_imm_ref(tcx.mk_region(ReErased), ty), + Ownership::Mut => tcx.mk_mut_ref(tcx.mk_region(ReErased), ty), + Ownership::Rc => todo!(), + Ownership::Box => tcx.mk_box(ty), + }; + + ty +} + +pub fn desc_to_ty<'tcx>(tcx: TyCtxt<'tcx>, desc: TypeDesc<'tcx>) -> Ty<'tcx> { + desc_parts_to_ty(tcx, desc.own, desc.qty, desc.pointee_ty) } struct HirTyVisitor<'a, 'tcx> { diff --git a/c2rust-analyze/tests/common/mod.rs b/c2rust-analyze/tests/common/mod.rs index f63ea1d71c..599e2e3149 100644 --- a/c2rust-analyze/tests/common/mod.rs +++ b/c2rust-analyze/tests/common/mod.rs @@ -1,6 +1,7 @@ use std::{ collections::HashSet, env, + ffi::OsString, fmt::{self, Display, Formatter}, fs::{self, File}, path::{Path, PathBuf}, @@ -56,6 +57,20 @@ struct AnalyzeArgs { /// this flag. #[arg(long)] catch_panics: bool, + + /// Comma-separated list of paths to rewrite. Any item whose path does not start with a prefix + /// from this list will be marked non-rewritable (`FIXED`). + #[clap(long)] + rewrite_paths: Option, + + /// Use `todo!()` placeholders in shims for casts that must be implemented manually. + /// + /// When a function requires a shim, and the shim requires a cast that can't be generated + /// automatically, the default is to cancel rewriting of the function. With this option, + /// rewriting proceeds as normal, and shim generation emits `todo!()` in place of each + /// unsupported cast. + #[clap(long)] + use_manual_shims: bool, } impl AnalyzeArgs { @@ -176,6 +191,12 @@ impl Analyze { if !args.catch_panics { cmd.env("C2RUST_ANALYZE_TEST_DONT_CATCH_PANIC", "1"); } + if args.use_manual_shims { + cmd.env("C2RUST_ANALYZE_USE_MANUAL_SHIMS", "1"); + } + if let Some(ref rewrite_paths) = args.rewrite_paths { + cmd.env("C2RUST_ANALYZE_REWRITE_PATHS", rewrite_paths); + } cmd.arg(&rs_path) .arg("-L") .arg(lib_dir) diff --git a/c2rust-analyze/tests/filecheck.rs b/c2rust-analyze/tests/filecheck.rs index 6e343e15b5..849f602d6a 100644 --- a/c2rust-analyze/tests/filecheck.rs +++ b/c2rust-analyze/tests/filecheck.rs @@ -60,6 +60,8 @@ define_tests! { pointee, ptrptr1, regions_fixed, + rewrite_paths, + rewrite_paths_manual_shim, statics, test_attrs, trivial, diff --git a/c2rust-analyze/tests/filecheck/rewrite_paths.rs b/c2rust-analyze/tests/filecheck/rewrite_paths.rs new file mode 100644 index 0000000000..7b4545f1d2 --- /dev/null +++ b/c2rust-analyze/tests/filecheck/rewrite_paths.rs @@ -0,0 +1,35 @@ +//! --rewrite-paths good1,good2 +#![feature(register_tool)] +#![register_tool(c2rust_analyze_test)] + +// CHECK-LABEL: fn good1<'h0>(x: &'h0 mut (i32)) +unsafe fn good1(x: *mut i32) -> i32 { + // CHECK: let y = &*(bad(core::ptr::addr_of_mut!(*(x)))).cast_const(); + let y = bad(x); + *y +} + +// CHECK-LABEL: fn bad(x: *mut i32) -> *mut i32 +// Since `bad` is not listed in --rewrite-paths, it will not be rewritten (aside from adding shim +// calls). +unsafe fn bad(x: *mut i32) -> *mut i32 { + // CHECK: good2_shim(x) + good2(x) +} + +// CHECK-LABEL: fn good2<'h0,'h1>(x: &'h0 mut (i32)) -> &'h1 (i32) +unsafe fn good2(x: *mut i32) -> *mut i32 { + *x = 1; + // CHECK: &*(x) + x +} +// CHECK-LABEL: unsafe fn good2_shim(arg0: *mut i32) -> *mut i32 +// CHECK: let safe_arg0 = &mut *arg0; +// CHECK: let safe_result = good2(safe_arg0); +// CHECK: let result = core::ptr::addr_of!(*safe_result).cast_mut(); + +// Regression test for an issue where `gen_shim_call_rewrites` would panic upon encountering a +// non-rewritten function that doesn't have a body. +extern "C" { + fn memcpy(_: *mut u8, _: *const u8, _: usize) -> *mut u8; +} diff --git a/c2rust-analyze/tests/filecheck/rewrite_paths_manual_shim.rs b/c2rust-analyze/tests/filecheck/rewrite_paths_manual_shim.rs new file mode 100644 index 0000000000..4da8f67a4d --- /dev/null +++ b/c2rust-analyze/tests/filecheck/rewrite_paths_manual_shim.rs @@ -0,0 +1,29 @@ +//! --rewrite-paths good1,good2 --use-manual-shims +#![feature(register_tool)] +#![register_tool(c2rust_analyze_test)] + +// CHECK-LABEL: fn good1<'h0>(x: &'h0 mut [(i32)]) +unsafe fn good1(x: *mut i32) -> i32 { + // CHECK: let y = &*(bad(core::ptr::addr_of_mut!(*&mut (x)[0]))).cast_const(); + let y = bad(x); + *y +} + +// CHECK-LABEL: fn bad(x: *mut i32) -> *mut i32 +// Since `bad` is not listed in --rewrite-paths, it will not be rewritten (aside from adding shim +// calls). +unsafe fn bad(x: *mut i32) -> *mut i32 { + // CHECK: good2_shim(x) + good2(x) +} + +// CHECK-LABEL: fn good2<'h0,'h1>(x: &'h0 mut [(i32)]) -> &'h1 (i32) +unsafe fn good2(x: *mut i32) -> *mut i32 { + *x.offset(1) = 1; + // CHECK: &*(x) + x +} +// CHECK-LABEL: unsafe fn good2_shim(arg0: *mut i32) -> *mut i32 +// CHECK: let safe_arg0 = todo!("cast arg0 from *mut i32 to &mut [i32]"); +// CHECK: let safe_result = good2(safe_arg0); +// CHECK: let result = core::ptr::addr_of!(*safe_result).cast_mut(); diff --git a/c2rust-analyze/tests/filecheck/unrewritten_calls.rs b/c2rust-analyze/tests/filecheck/unrewritten_calls.rs index 7559787f59..e0b6690197 100644 --- a/c2rust-analyze/tests/filecheck/unrewritten_calls.rs +++ b/c2rust-analyze/tests/filecheck/unrewritten_calls.rs @@ -24,5 +24,7 @@ unsafe fn good2(x: *mut i32) -> *mut i32 { // CHECK: &*(x) x } -// CHECK: unsafe fn good2_shim(arg0: *mut i32) -> *mut i32 -// CHECK: core::ptr::addr_of!(*good2(&mut *arg0)).cast_mut() +// CHECK-LABEL: unsafe fn good2_shim(arg0: *mut i32) -> *mut i32 +// CHECK: let safe_arg0 = &mut *arg0; +// CHECK: let safe_result = good2(safe_arg0); +// CHECK: let result = core::ptr::addr_of!(*safe_result).cast_mut();