diff --git a/Cargo.lock b/Cargo.lock index e2602535f..83ae6f103 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -567,6 +567,20 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "miden-hir-transform" +version = "0.1.0" +dependencies = [ + "anyhow", + "cranelift-entity", + "miden-diagnostics", + "miden-hir", + "miden-hir-analysis", + "miden-hir-pass", + "rustc-hash", + "smallvec", +] + [[package]] name = "miden-hir-type" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 74b594be8..2b3697dd7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ members = [ "hir-analysis", "hir-pass", "hir-symbol", + "hir-transform", "hir-type", "tools/*", ] diff --git a/hir-transform/Cargo.toml b/hir-transform/Cargo.toml new file mode 100644 index 000000000..6ba7a08b8 --- /dev/null +++ b/hir-transform/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "miden-hir-transform" +version.workspace = true +rust-version.workspace = true +authors.workspace = true +repository.workspace = true +categories.workspace = true +keywords.workspace = true +license.workspace = true +readme.workspace = true +edition.workspace = true + +[dependencies] +anyhow.workspace = true +cranelift-entity.workspace = true +miden-diagnostics.workspace = true +miden-hir = { path = "../hir" } +miden-hir-analysis = { path = "../hir-analysis" } +miden-hir-pass = { path = "../hir-pass" } +rustc-hash.workspace = true +smallvec.workspace = true diff --git a/hir-transform/src/adt/mod.rs b/hir-transform/src/adt/mod.rs new file mode 100644 index 000000000..38aa84015 --- /dev/null +++ b/hir-transform/src/adt/mod.rs @@ -0,0 +1,3 @@ +mod scoped_map; + +pub use self::scoped_map::ScopedMap; diff --git a/hir-transform/src/adt/scoped_map.rs b/hir-transform/src/adt/scoped_map.rs new file mode 100644 index 000000000..4061920e7 --- /dev/null +++ b/hir-transform/src/adt/scoped_map.rs @@ -0,0 +1,57 @@ +use std::borrow::Borrow; +use std::hash::Hash; +use std::rc::Rc; + +use rustc_hash::FxHashMap; + +#[derive(Clone)] +pub struct ScopedMap +where + K: Eq + Hash, +{ + parent: Option>>, + map: FxHashMap, +} +impl Default for ScopedMap +where + K: Eq + Hash, +{ + fn default() -> Self { + Self { + parent: None, + map: Default::default(), + } + } +} +impl ScopedMap +where + K: Eq + Hash, +{ + pub fn new(parent: Option>>) -> Self { + Self { + parent, + map: Default::default(), + } + } + + pub fn get(&self, k: &Q) -> Option<&V> + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { + self.map + .get(k) + .or_else(|| self.parent.as_ref().and_then(|p| p.get(k))) + } + + pub fn insert(&mut self, k: K, v: V) { + self.map.insert(k, v); + } + + pub fn extend(&mut self, iter: I) + where + I: IntoIterator, + { + self.map.extend(iter); + } +} diff --git a/hir-transform/src/inline_blocks.rs b/hir-transform/src/inline_blocks.rs new file mode 100644 index 000000000..3cf59c919 --- /dev/null +++ b/hir-transform/src/inline_blocks.rs @@ -0,0 +1,131 @@ +use std::collections::VecDeque; + +use rustc_hash::FxHashSet; + +use miden_hir::{self as hir, Block as BlockId, *}; +use miden_hir_analysis::{ControlFlowGraph, FunctionAnalysis}; + +use super::RewritePass; + +/// This pass operates on the SSA IR, and inlines superfluous blocks which serve no +/// purpose. Such blocks have no block arguments, and have a single predecessor. +/// +/// Blocks like this may have been introduced for the following reasons: +/// +/// * Due to less than optimal lowering to SSA form +/// * To split critical edges in preparation for dataflow analysis and related transformations, +/// but ultimately no code introduced along those edges, and critical edges no longer present +/// an obstacle to further optimization or codegen. +/// * During treeification of the CFG, where blocks with multiple predecessors were duplicated +/// to produce a CFG in tree form, where no blocks (other than loop headers) have multiple +/// predecessors. This process removed block arguments from these blocks, and rewrote instructions +/// dominated by those block arguments to reference the values passed from the original predecessor +/// to whom the subtree is attached. This transformation can expose a chain of blocks which all have +/// a single predecessor and successor, introducing branches where none are needed, and by removing +/// those redundant branches, all of the code from blocks in the chain can be inlined in the first +/// block of the chain. +pub struct InlineBlocks; +impl RewritePass for InlineBlocks { + type Error = anyhow::Error; + + fn run( + &mut self, + function: &mut hir::Function, + analysis: &mut FunctionAnalysis, + ) -> Result<(), Self::Error> { + let cfg = analysis.cfg_mut(); + + let mut changed = false; + let mut visited = FxHashSet::::default(); + let mut worklist = VecDeque::::default(); + worklist.push_back(function.dfg.entry_block()); + + // First, search down the CFG for non-loop header blocks with only a single successor. + // These blocks form possible roots of a chain of blocks that can be inlined. + // + // For each such root, we then check if the successor block has a single predecessor, + // if so, then we can remove the terminator instruction from the root block, and then + // move all of the code from the successor block into the root block. We can then repeat + // this process until we inline a terminator instruction that is not an unconditional branch + // to a single successor. + while let Some(p) = worklist.pop_front() { + // If we've already visited a block, skip it + if !visited.insert(p) { + continue; + } + + // If this block has multiple successors, or multiple predecessors, add all of it's + // successors to the work queue and move on. + if cfg.num_successors(p) > 1 || cfg.num_predecessors(p) > 1 { + for b in cfg.succ_iter(p) { + worklist.push_back(b); + } + continue; + } + + // This block is a candidate for inlining + // + // If inlining can proceed, do so until we reach a point where the inlined terminator + // returns from the function, has multiple successors, or branches to a block with + // multiple predecessors. + while let BranchInfo::SingleDest(b, args) = function + .dfg + .analyze_branch(function.dfg.last_inst(p).unwrap()) + { + // If this successor has other predecessors, it can't be inlined, so + // add it to the work list and move on + if cfg.num_predecessors(b) > 1 { + worklist.push_back(b); + break; + } + + // Only inline if the successor has no block arguments + // + // TODO: We can inline blocks with arguments as well, but with higher cost, + // as we must visit all uses of the block arguments and update them. This + // is left as a future extension of this pass should we find that it is + // valuable as an optimization. + if !args.is_empty() { + break; + } + + inline(b, p, function, cfg); + + // Mark that the control flow graph as modified + changed = true; + } + } + + if changed { + analysis.cfg_changed(function); + } + + Ok(()) + } +} + +fn inline(from: BlockId, to: BlockId, function: &mut hir::Function, cfg: &mut ControlFlowGraph) { + assert_ne!(from, to); + { + let mut from_insts = function.dfg.block_mut(from).insts.take(); + let to_insts = &mut function.dfg.block_mut(to).insts; + // Remove the original terminator + to_insts.pop_back(); + // Move all instructions from their original block to the parent, + // updating the instruction data along the way to reflect the change + // in location + while let Some(unsafe_ix_ref) = from_insts.pop_front() { + let ix_ptr = UnsafeRef::into_raw(unsafe_ix_ref); + unsafe { + let ix = &mut *ix_ptr; + ix.block = to; + } + to_insts.push_back(unsafe { UnsafeRef::from_raw(ix_ptr) }); + } + } + // Detach the original block from the function + function.dfg.detach_block(from); + // Update the control flow graph to reflect the changes + cfg.detach_block(from); + cfg.recompute_block(&function.dfg, to); +} diff --git a/hir-transform/src/lib.rs b/hir-transform/src/lib.rs new file mode 100644 index 000000000..142533380 --- /dev/null +++ b/hir-transform/src/lib.rs @@ -0,0 +1,105 @@ +pub(crate) mod adt; +mod inline_blocks; +mod split_critical_edges; +mod treeify; + +pub use self::inline_blocks::InlineBlocks; +pub use self::split_critical_edges::SplitCriticalEdges; +pub use self::treeify::Treeify; + +use miden_hir_analysis::FunctionAnalysis; +use miden_hir_pass::Pass; + +/// A [RewritePass] is a special kind of [Pass] which is designed to perform some +/// kind of rewrite transformation on a [miden_hir::Function]. +/// +/// Rewrites require one or more control flow analyses to have been computed, as +/// determined by the requirements of the pass itself. The [FunctionAnalysis] +/// structure is designed for this purpose, allowing one to request specific +/// analysis results, which will be computed on-demand if not yet available. +pub trait RewritePass { + type Error; + + /// Runs the rewrite on `function` with `analyses`. + /// + /// Rewrites should return `Err` to signal that the pass has failed + /// and compilation should be aborted + fn run( + &mut self, + function: &mut miden_hir::Function, + analyses: &mut FunctionAnalysis, + ) -> Result<(), Self::Error>; + + /// Chains two rewrites together to form a new, fused rewrite + fn chain

(self, pass: P) -> RewriteChain + where + Self: Sized, + P: RewritePass, + { + RewriteChain::new(self, pass) + } +} + +/// [RewriteChain] is the equivalent of [miden_hir_pass::Chain] for [RewritePass]. +/// +/// This is not meant to be constructed or referenced directly, as the type signature gets out +/// of hand quickly when combining multiple rewrites. Instead, you should invoke `chain` on a +/// [RewritePass] implementation, and use it as a trait object. In some cases this may require boxing +/// the `RewriteChain`, depending on how it is being used. +pub struct RewriteChain { + a: A, + b: B, +} +impl RewriteChain { + fn new(a: A, b: B) -> Self { + Self { a, b } + } +} +impl Copy for RewriteChain +where + A: Copy, + B: Copy, +{ +} +impl Clone for RewriteChain +where + A: Clone, + B: Clone, +{ + #[inline] + fn clone(&self) -> Self { + Self::new(self.a.clone(), self.b.clone()) + } +} +impl RewritePass for RewriteChain +where + A: RewritePass, + B: RewritePass, +{ + type Error = ::Error; + + fn run( + &mut self, + function: &mut miden_hir::Function, + analyses: &mut FunctionAnalysis, + ) -> Result<(), Self::Error> { + self.a.run(function, analyses)?; + self.b.run(function, analyses) + } +} +impl Pass for RewriteChain +where + A: RewritePass, + B: RewritePass, +{ + type Input<'a> = (&'a mut miden_hir::Function, &'a mut FunctionAnalysis); + type Output<'a> = (&'a mut miden_hir::Function, &'a mut FunctionAnalysis); + type Error = ::Error; + + fn run<'a>(&mut self, input: Self::Input<'a>) -> Result, Self::Error> { + let (function, analyses) = input; + self.a.run(function, analyses)?; + self.b.run(function, analyses)?; + Ok((function, analyses)) + } +} diff --git a/hir-transform/src/split_critical_edges.rs b/hir-transform/src/split_critical_edges.rs new file mode 100644 index 000000000..1f0e9d662 --- /dev/null +++ b/hir-transform/src/split_critical_edges.rs @@ -0,0 +1,135 @@ +use std::collections::VecDeque; + +use rustc_hash::FxHashSet; +use smallvec::SmallVec; + +use miden_diagnostics::Spanned; +use miden_hir::{self as hir, Block as BlockId, *}; +use miden_hir_analysis::FunctionAnalysis; + +use super::RewritePass; + +/// This pass operates on the SSA IR, and ensures that there are no critical +/// edges in the control flow graph. +/// +/// A critical edge occurs when control flow may exit a block, which we'll call `P`, to +/// more than one successor block, which we'll call `S`, where any `S` has more than one +/// predecessor from which it may receive control. Put another way, in the control flow graph, +/// a critical edge is one which connects two nodes where the source node has multiple outgoing +/// edges, and the destination node has multiple incoming edges. +/// +/// These types of edges cause unnecessary complications with certain types of dataflow analyses +/// and transformations, and so we fix this by splitting these edges. This is done by introducing +/// a new block, `B`, in which we insert a branch to `S` with whatever arguments were originally +/// provided in `P`, and then rewriting the branch in `P` that went to `S`, to go to `B` instead. +/// +/// After this pass completes, no node in the control flow graph will have both multiple predecessors +/// and multiple successors. +/// +pub struct SplitCriticalEdges; +impl RewritePass for SplitCriticalEdges { + type Error = anyhow::Error; + + fn run( + &mut self, + function: &mut hir::Function, + analysis: &mut FunctionAnalysis, + ) -> Result<(), Self::Error> { + // Search for blocks with multiple successors with edges to blocks with + // multiple predecessors; these blocks form critical edges in the control + // flow graph which must be split. + // + // We split the critical edge by inserting a new block after the predecessor + // and updating the predecessor instruction to transfer to the new block + // instead. We then insert an unconditional branch in the new block that + // passes the block arguments that were meant for the "real" successor. + let mut visited = FxHashSet::::default(); + let mut worklist = VecDeque::::default(); + worklist.push_back(function.dfg.entry_block()); + + let cfg = analysis.cfg_mut(); + + while let Some(p) = worklist.pop_front() { + // If we've already visited a block, skip it + if !visited.insert(p) { + continue; + } + + // Make sure we visit all of the successors of this block next + for b in cfg.succ_iter(p) { + worklist.push_back(b); + } + + // Unless this block has multiple successors, skip it + if cfg.num_successors(p) < 2 { + continue; + } + + let succs = SmallVec::<[BlockId; 2]>::from_iter(cfg.succ_iter(p)); + for b in succs.into_iter() { + // Unless this successor has multiple predecessors, skip it + if cfg.num_predecessors(b) < 2 { + continue; + } + + // We found a critical edge, so perform the following steps: + // + // * Create a new block, placed after the predecessor in the layout + // * Rewrite the terminator of the predecessor to refer to the new + // block, but without passing any block arguments + // * Insert an unconditional branch to the successor with the block + // arguments of the original terminator + // * Recompute the control flow graph for affected blocks + let split = function.dfg.create_block_after(p); + let terminator = function.dfg.last_inst(p).unwrap(); + let ix = function.dfg.inst_mut(terminator); + let span = ix.span(); + let args: ValueList; + match &mut ix.data.item { + Instruction::Br(hir::Br { + ref mut destination, + args: ref mut orig_args, + .. + }) => { + args = orig_args.take(); + *destination = split; + } + Instruction::CondBr(hir::CondBr { + then_dest: (ref mut then_dest, ref mut then_args), + else_dest: (ref mut else_dest, ref mut else_args), + .. + }) => { + if *then_dest == b { + *then_dest = split; + args = then_args.take(); + } else { + *else_dest = split; + args = else_args.take(); + } + } + Instruction::Switch(_) => unimplemented!(), + _ => unreachable!(), + } + function.dfg.insert_inst( + InsertionPoint { + at: ProgramPoint::Block(split), + action: Insert::After, + }, + Instruction::Br(hir::Br { + op: hir::Opcode::Br, + destination: b, + args, + }), + Type::Unknown, + span, + ); + + cfg.recompute_block(&function.dfg, split); + } + + cfg.recompute_block(&function.dfg, p); + } + + Ok(()) + } +} diff --git a/hir-transform/src/treeify.rs b/hir-transform/src/treeify.rs new file mode 100644 index 000000000..e523e2917 --- /dev/null +++ b/hir-transform/src/treeify.rs @@ -0,0 +1,665 @@ +use std::collections::VecDeque; +use std::rc::Rc; + +use miden_hir::{self as hir, Block as BlockId, Value as ValueId, *}; +use miden_hir_analysis::{BlockPredecessor, ControlFlowGraph, FunctionAnalysis, LoopAnalysis}; +use rustc_hash::FxHashSet; + +use crate::{adt::ScopedMap, RewritePass}; + +/// This pass takes as input the SSA form of a function, and ensures that the CFG of +/// that function is a tree, not a DAG, excepting loop headers. +/// +/// This transformation splits vertices with multiple predecessors, by duplicating the +/// subtree of the program rooted at those vertices. As mentioned above, we do not split +/// vertices representing loop headers, in order to preserve loops in the CFG of the resulting +/// IR. However, we can consider each loop within the overall CFG of a function to be a single +/// vertex after this transformation, and with this perspective the CFG forms a tree. Loop +/// nodes are then handled specially during codegen. +/// +/// The transformation is performed bottom-up, in CFG postorder. +/// +/// This pass also computes the set of blocks in each loop which must be terminated with `push.0` +/// to exit the containing loop. +/// +/// # Examples +/// +/// ## Basic DAG +/// +/// This example demonstrates how the DAG of a function with multiple returns gets transformed: +/// +/// ```ignore +/// blk0 +/// | +/// v +/// blk1 -> blk3 -> ret +/// | / +/// | / +/// | / +/// v v +/// blk2 +/// | +/// v +/// ret +/// ``` +/// +/// Becomes: +/// +/// ```ignore +/// blk0 +/// | +/// v +/// blk1 -> blk3 -> ret +/// | | +/// | | +/// | | +/// v v +/// blk2 blk2 +/// | | +/// v v +/// ret ret +/// ``` +/// +/// ## Basic Loop +/// +/// This is an example of a function with multiple returns and a simple loop: +/// +/// ```ignore +/// blk0 +/// | ------- +/// v v | +/// blk1 -> blk3 -> blk4 -> blk5 -> ret +/// | / +/// | / +/// | / +/// v v +/// blk2 +/// | +/// v +/// ret +/// ``` +/// +/// Becomes: +/// +/// ```ignore +/// blk0 +/// | ------- +/// v v | +/// blk1 -> blk3 -> blk4 -> blk5 -> ret +/// | | +/// | | +/// | | +/// v v +/// blk2 blk2 +/// | | +/// v v +/// ret ret +/// ``` +/// +/// ## Complex Loop +/// +/// This is an example of a function with a complex loop (i.e. multiple exit points): +/// +/// ```ignore +/// blk0 +/// | +/// v +/// blk1 +/// | \ +/// | blk2 <----- +/// | | | +/// | blk3 | +/// | / \ | +/// | / blk4-- +/// | / | +/// vv | +/// blk5 blk6 +/// ``` +/// +/// Becomes: +/// +/// ```ignore +/// blk0 +/// | +/// v +/// blk1 +/// | \ +/// | \ +/// | blk2 <--- +/// | | | +/// | v | +/// | blk3 | +/// | | \ | +/// | | blk4-- +/// | | | +/// v v v +/// blk5 blk5 blk6 +/// ``` +/// +/// NOTE: Here, when generating code for `blk5` and `blk6`, the loop depth is 0, so +/// we will emit a single `push.0` at the end of both blocks which will terminate the +/// containing loop, and then return from the function as we've reached the bottom +/// of the tree. +/// +/// ## Nested Loops +/// +/// This is an extension of the example above, but with nested loops: +/// +/// ```ignore +/// blk0 +/// | +/// v +/// blk1 +/// | \ +/// | blk2 <------- +/// | | | | +/// | blk3 | | +/// | / \ | | +/// | / blk4-- | +/// | / | | +/// vv v | +/// blk5<- blk6-->blk7-->blk8 +/// | ^ | +/// | |_____________| +/// | | +/// |__________________| +/// ``` +/// +/// We have two loops, the outer one starting at `blk2`: +/// +/// * `blk2->blk3->blk4->blk2` +/// * `blk2->blk3->blk4->blk6->blk7->blk2` +/// +/// And the inner one starting at `blk6`: +/// +/// * `blk6->blk7->blk8->blk6` +/// +/// Additionally, there are multiple exits through the loops, depending on the path taken: +/// +/// * `blk2->blk3->blk5` +/// * `blk2->blk3->blk4->blk6->blk7->blk8->blk5` +/// * `blk6->blk7->blk8->blk5` +/// +/// After transformation, this becomes: +/// +/// ```ignore +/// blk0 +/// | +/// v +/// blk1 +/// | \ +/// | blk2 <------- +/// | | | | +/// | blk3 | | +/// | | \ | | +/// | | blk4-- | +/// | | | | +/// v v v | +/// blk5 blk5 blk6-->blk7-->blk8 +/// ^ | | +/// |_____________|_| +/// | +/// v +/// blk5 +/// ``` +/// +/// During codegen though, we end up with the following tree of stack machine code. +/// +/// At each point where control flow either continues a loop or leaves it, we must +/// +/// * Duplicate loop headers on control flow edges leading to those headers +/// * Emit N `push.0` instructions on control flow edges exiting the function from a loop depth of N +/// * Emit a combination of the above on control flow edges exiting an inner loop for an outer loop, +/// depending on what depths the predecessor and successor blocks are at +/// +/// ```ignore +/// blk0 +/// blk1 +/// if.true +/// blk2 +/// while.true +/// blk3 +/// if.true +/// blk4 +/// if.true +/// blk2 # duplicated outer loop header +/// else +/// blk6 +/// while.true +/// blk7 +/// if.true +/// blk2 # duplicated outer loop header +/// push.0 # break out of inner loop +/// else +/// blk8 +/// if.true +/// blk6 # duplicated inner loop header +/// else +/// blk5 +/// push.0 # break out of outer loop +/// push.0 # break out of inner loop +/// end +/// end +/// end +/// end +/// else +/// blk5 +/// push.0 # break out of outer loop +/// end +/// end +/// else +/// blk5 +/// end +/// ``` +/// +pub struct Treeify; +impl RewritePass for Treeify { + type Error = anyhow::Error; + + fn run( + &mut self, + function: &mut hir::Function, + analysis: &mut FunctionAnalysis, + ) -> Result<(), Self::Error> { + // Require the dominator tree and loop analyses + analysis.ensure_loops(function); + + let cfg = analysis.cfg(); + let domtree = analysis.domtree(); + let loops = analysis.loops(); + let mut block_q = VecDeque::::default(); + let mut changed = false; + + for b in domtree.cfg_postorder().iter().copied() { + if loops.is_loop_header(b).is_some() { + // Ignore loop headers + continue; + } + + // Blocks with multiple predecessors cause the CFG to form a DAG, + // we need to duplicate the CFG rooted at this block for all predecessors. + // + // While we could technically preserve one of the predecessors, we perform + // some transformations during the copy that would result in copied vs original + // trees to differ slightly, which would inhibit subsequent optimizations. + // The original subtree blocks are detached from the function. + if cfg.num_predecessors(b) > 1 { + for p in cfg.pred_iter(b) { + assert!(block_q.is_empty()); + block_q.push_back(CopyBlock::new(b, p)); + while let Some(CopyBlock { + b, + ref p, + value_map, + block_map, + }) = block_q.pop_front() + { + // Copy this block and its children + if loops.is_loop_header(b).is_some() { + treeify_loop( + b, + p, + function, + cfg, + loops, + &mut block_q, + value_map, + block_map, + )?; + } else { + treeify( + b, + p, + function, + cfg, + loops, + &mut block_q, + value_map, + block_map, + )?; + } + } + } + + // After treeification, the original subtree blocks cannot possibly be + // referenced by other blocks in the function, so remove all of them + detach_tree(b, function, cfg); + + // Mark the control flow graph as modified + changed = true; + } + } + + // If we made any changes, we need to recompute all analyses + if changed { + analysis.recompute(function); + } + + Ok(()) + } +} + +fn treeify( + b: BlockId, + p: &BlockPredecessor, + function: &mut hir::Function, + cfg: &ControlFlowGraph, + loops: &LoopAnalysis, + block_q: &mut VecDeque, + mut value_map: ScopedMap, + mut block_map: ScopedMap, +) -> anyhow::Result<()> { + // 1. Create a new block `b'`, without block arguments, + let b_prime = function.dfg.create_block_after(p.block); + block_map.insert(b, b_prime); + // 2. Initialize a lookup table of old value defs to new value defs, seed it by mapping the + // block arguments of `b` to the values passed from the predecessor + match function.dfg.analyze_branch(p.inst) { + BranchInfo::NotABranch => { + value_map.extend( + function + .dfg + .block_args(b) + .iter() + .copied() + .zip(function.dfg.inst_args(p.inst).iter().copied()), + ); + } + BranchInfo::SingleDest(_, args) => { + value_map.extend( + function + .dfg + .block_args(b) + .iter() + .copied() + .zip(args.iter().copied()), + ); + } + BranchInfo::MultiDest(ref jts) => { + for jt in jts.iter() { + if jt.destination == b { + value_map.extend( + function + .dfg + .block_args(b) + .iter() + .copied() + .zip(jt.args.iter().copied()), + ); + break; + } + } + } + } + // 3. Update the predecessor instruction to reference the new block, remove block arguments. + update_predecessor(function, p, |dest, dest_args, pool| { + if *dest == b { + *dest = b_prime; + dest_args.clear(pool); + } + }); + // 4. Copy contents of `b` to `b'`, inserting defs in the lookup table, and mapping operands + // to their new "corrected" values + copy_instructions(b, b_prime, function, &mut value_map, &block_map); + // 5. Recursively copy all children of `b` to `b_prime` + copy_children( + b, b_prime, function, cfg, loops, block_q, value_map, block_map, + ) +} + +fn treeify_loop( + b: BlockId, + p: &BlockPredecessor, + function: &mut hir::Function, + cfg: &ControlFlowGraph, + loops: &LoopAnalysis, + block_q: &mut VecDeque, + mut value_map: ScopedMap, + mut block_map: ScopedMap, +) -> anyhow::Result<()> { + // 1. Create new block, b', with a new set of block arguments matching the original, + // populate the value map with rewrites for the original block argument values + let b_prime = function.dfg.create_block_after(p.block); + block_map.insert(b, b_prime); + function.dfg.clone_block_params(b, b_prime); + for (src, dest) in function + .dfg + .block_params(b) + .iter() + .copied() + .zip(function.dfg.block_params(b_prime).iter().copied()) + { + value_map.insert(src, dest); + } + // 2. Update the predecessor instruction to reference the new block, leave block arguments unchanged + update_predecessor(function, p, |dest, _, _| { + if *dest == b { + *dest = b_prime; + } + }); + // 3. Copy contents of `b` to `b'`, inserting defs in the lookup table, and mapping operands + // to their new "corrected" values + copy_instructions(b, b_prime, function, &mut value_map, &block_map); + // 4. Recursively copy all children of `b` to `b_prime` + copy_children( + b, b_prime, function, cfg, loops, block_q, value_map, block_map, + ) +} + +/// Detach `root`, and all of it's reachable children, from the layout of `function` +/// +/// When called, it is assumed that `root` has been cloned to a new block, +/// along with all of it's reachable children, and its predecessor rewritten +/// to refer to the new block instead. As a result, `root` should no longer be +/// reachable in the CFG, along with its children, as they would have been cloned +/// as well. +/// +/// NOTE: This does not delete the block data attached to the function, only the +/// presence of the block in the layout of the function. +fn detach_tree(root: BlockId, function: &mut hir::Function, cfg: &ControlFlowGraph) { + let mut delete_q = VecDeque::::default(); + let mut visited = FxHashSet::::default(); + delete_q.push_back(root); + visited.insert(root); + while let Some(block) = delete_q.pop_front() { + function.dfg.detach_block(block); + for b in cfg.succ_iter(block).into_iter() { + // Skip blocks we've already seen + if visited.insert(b) { + delete_q.push_back(b); + } + } + } +} + +fn copy_children( + b: BlockId, + b_prime: BlockId, + function: &mut hir::Function, + cfg: &ControlFlowGraph, + loops: &LoopAnalysis, + block_q: &mut VecDeque, + value_map: ScopedMap, + block_map: ScopedMap, +) -> anyhow::Result<()> { + let pred = BlockPredecessor { + inst: function + .dfg + .last_inst(b_prime) + .expect("expected non-empty block"), + block: b_prime, + }; + let value_map = Rc::new(value_map); + let block_map = Rc::new(block_map); + for succ in cfg.succ_iter(b) { + // If we've already seen this successor, and it is a loop header, then + // we don't want to copy it, but we do want to replace the reference to + // this block with its copy + if let Some(succ_prime) = block_map.get(&succ) { + if loops.is_loop_header(succ).is_some() { + update_predecessor(function, &pred, |dest, _, _| { + if dest == &succ { + *dest = *succ_prime; + } + }); + continue; + } + } + + block_q.push_back(CopyBlock { + b: succ, + p: pred, + value_map: ScopedMap::new(Some(value_map.clone())), + block_map: ScopedMap::new(Some(block_map.clone())), + }); + } + + Ok(()) +} + +fn copy_instructions( + b: BlockId, + b_prime: BlockId, + function: &mut hir::Function, + value_map: &mut ScopedMap, + block_map: &ScopedMap, +) { + // Initialize the cursor at the first instruction in `b` + let mut next = { + let cursor = function.dfg.block(b).insts.front(); + cursor.get().map(|inst_data| inst_data as *const InstNode) + }; + + while let Some(ptr) = next.take() { + // Get the id of the instruction at the current cursor position, then advance the cursor + let src_inst = { + let mut cursor = unsafe { function.dfg.block(b).insts.cursor_from_ptr(ptr) }; + let id = cursor.get().unwrap().key; + cursor.move_next(); + next = cursor.get().map(|inst_data| inst_data as *const InstNode); + id + }; + + // Clone the source instruction data + let inst = function.dfg.clone_inst(src_inst); + + // We need to fix up the cloned instruction data + let data = &mut function.dfg.insts[inst]; + // First, we're going to be placing it in b', so make sure the instruction is aware of that + data.block = b_prime; + // Second, we need to rewrite value/block references contained in the instruction + match &mut data.item { + Instruction::Br(hir::Br { + ref mut destination, + ref mut args, + .. + }) => { + if let Some(new_dest) = block_map.get(destination) { + *destination = *new_dest; + } + let args = args.as_mut_slice(&mut function.dfg.value_lists); + for arg in args.iter_mut() { + if let Some(arg_prime) = value_map.get(arg) { + *arg = *arg_prime; + } + } + } + Instruction::CondBr(hir::CondBr { + ref mut cond, + then_dest: (ref mut then_dest, ref mut then_args), + else_dest: (ref mut else_dest, ref mut else_args), + .. + }) => { + if let Some(cond_prime) = value_map.get(cond) { + *cond = *cond_prime; + } + if let Some(new_dest) = block_map.get(then_dest) { + *then_dest = *new_dest; + } + let then_args = then_args.as_mut_slice(&mut function.dfg.value_lists); + for arg in then_args.iter_mut() { + if let Some(arg_prime) = value_map.get(arg) { + *arg = *arg_prime; + } + } + if let Some(new_dest) = block_map.get(else_dest) { + *else_dest = *new_dest; + } + let else_args = else_args.as_mut_slice(&mut function.dfg.value_lists); + for arg in else_args.iter_mut() { + if let Some(arg_prime) = value_map.get(arg) { + *arg = *arg_prime; + } + } + } + other => { + for arg in other + .arguments_mut(&mut function.dfg.value_lists) + .iter_mut() + { + if let Some(arg_prime) = value_map.get(arg) { + *arg = *arg_prime; + } + } + } + } + // Finally, append the cloned instruction to the block layout + let node = unsafe { UnsafeRef::from_raw(data) }; + function.dfg.block_mut(b_prime).insts.push_back(node); + value_map.extend( + function + .dfg + .inst_results(src_inst) + .iter() + .copied() + .zip(function.dfg.inst_results(inst).iter().copied()), + ); + } +} + +struct CopyBlock { + b: BlockId, + p: BlockPredecessor, + value_map: ScopedMap, + block_map: ScopedMap, +} +impl CopyBlock { + fn new(b: BlockId, p: BlockPredecessor) -> Self { + Self { + b, + p, + value_map: Default::default(), + block_map: Default::default(), + } + } +} + +#[inline] +fn update_predecessor(function: &mut hir::Function, p: &BlockPredecessor, mut callback: F) +where + F: FnMut(&mut BlockId, &mut ValueList, &mut ValueListPool), +{ + match &mut function.dfg.insts[p.inst].data.item { + Instruction::Br(hir::Br { + ref mut destination, + ref mut args, + .. + }) => { + callback(destination, args, &mut function.dfg.value_lists); + } + Instruction::CondBr(hir::CondBr { + then_dest: (ref mut then_dest, ref mut then_args), + else_dest: (ref mut else_dest, ref mut else_args), + .. + }) => { + assert_ne!(then_dest, else_dest, "unexpected critical edge"); + let value_lists = &mut function.dfg.value_lists; + callback(then_dest, then_args, value_lists); + callback(else_dest, else_args, value_lists); + } + Instruction::Switch(_) => { + panic!("expected switch instructions to have been simplified prior to treeification") + } + _ => unreachable!(), + } +}