feat: implement hir transforms

This commit contains the implementations for a set of transformation passes intended to prepare Miden IR for stackification/code generation. * `SplitCriticalEdges`, does what it says on the tin; it splits critical edges in the control flow graph by introducing new blocks between a predecessor block with multiple successors and a successor with multiple predecessors. This eases analysis of the control flow graph. * `Treeify`, this converts a control flow graph (a directed, acyclic graph) and ensures that it is a tree by duplicating subtrees of the graph as needed, such that no block has more than a single predecessor. This transformation does not modify loop headers however, as by definition those introduce cycles in the graph. That suits us just fine though, as the purpose here is to ensure that the control flow graph for a function can be trivially lowered to Miden Assembly, which does not have jumps, and thus requires programs to form a tree. We handle the translation of loops using the high-level looping ops in Miden Assembly - as long as the body of the loop is a tree, we're good. * `InlineBlocks`, is applied after `Treeify` to simplify the control flow graph, by removing redundant blocks/branches which were either introduced in the original IR, as a result of critical edge splitting, or due to duplicating blocks during treeification that were previously join points in the CFG, but aren't anymore.
0xPolygonMiden · Sep 4, 2023 · 3e72b27 · 3e72b27
1 parent d0c3a1e
commit 3e72b27
Show file tree

Hide file tree

Showing 9 changed files with 1,132 additions and 0 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -5,6 +5,7 @@ members = [
   "hir-analysis",
   "hir-pass",
   "hir-symbol",
+  "hir-transform",
   "hir-type",
   "tools/*",
 ]

diff --git a/hir-transform/Cargo.toml b/hir-transform/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "miden-hir-transform"
+version.workspace = true
+rust-version.workspace = true
+authors.workspace = true
+repository.workspace = true
+categories.workspace = true
+keywords.workspace = true
+license.workspace = true
+readme.workspace = true
+edition.workspace = true
+
+[dependencies]
+anyhow.workspace = true
+cranelift-entity.workspace = true
+miden-diagnostics.workspace = true
+miden-hir = { path = "../hir" }
+miden-hir-analysis = { path = "../hir-analysis" }
+miden-hir-pass = { path = "../hir-pass" }
+rustc-hash.workspace = true
+smallvec.workspace = true
diff --git a/hir-transform/src/adt/mod.rs b/hir-transform/src/adt/mod.rs
@@ -0,0 +1,3 @@
+mod scoped_map;
+
+pub use self::scoped_map::ScopedMap;
diff --git a/hir-transform/src/adt/scoped_map.rs b/hir-transform/src/adt/scoped_map.rs
@@ -0,0 +1,57 @@
+use std::borrow::Borrow;
+use std::hash::Hash;
+use std::rc::Rc;
+
+use rustc_hash::FxHashMap;
+
+#[derive(Clone)]
+pub struct ScopedMap<K, V>
+where
+    K: Eq + Hash,
+{
+    parent: Option<Rc<ScopedMap<K, V>>>,
+    map: FxHashMap<K, V>,
+}
+impl<K, V> Default for ScopedMap<K, V>
+where
+    K: Eq + Hash,
+{
+    fn default() -> Self {
+        Self {
+            parent: None,
+            map: Default::default(),
+        }
+    }
+}
+impl<K, V> ScopedMap<K, V>
+where
+    K: Eq + Hash,
+{
+    pub fn new(parent: Option<Rc<ScopedMap<K, V>>>) -> Self {
+        Self {
+            parent,
+            map: Default::default(),
+        }
+    }
+
+    pub fn get<Q>(&self, k: &Q) -> Option<&V>
+    where
+        K: Borrow<Q>,
+        Q: Hash + Eq + ?Sized,
+    {
+        self.map
+            .get(k)
+            .or_else(|| self.parent.as_ref().and_then(|p| p.get(k)))
+    }
+
+    pub fn insert(&mut self, k: K, v: V) {
+        self.map.insert(k, v);
+    }
+
+    pub fn extend<I>(&mut self, iter: I)
+    where
+        I: IntoIterator<Item = (K, V)>,
+    {
+        self.map.extend(iter);
+    }
+}
diff --git a/hir-transform/src/inline_blocks.rs b/hir-transform/src/inline_blocks.rs
@@ -0,0 +1,131 @@
+use std::collections::VecDeque;
+
+use rustc_hash::FxHashSet;
+
+use miden_hir::{self as hir, Block as BlockId, *};
+use miden_hir_analysis::{ControlFlowGraph, FunctionAnalysis};
+
+use super::RewritePass;
+
+/// This pass operates on the SSA IR, and inlines superfluous blocks which serve no
+/// purpose. Such blocks have no block arguments, and have a single predecessor.
+///
+/// Blocks like this may have been introduced for the following reasons:
+///
+/// * Due to less than optimal lowering to SSA form
+/// * To split critical edges in preparation for dataflow analysis and related transformations,
+/// but ultimately no code introduced along those edges, and critical edges no longer present
+/// an obstacle to further optimization or codegen.
+/// * During treeification of the CFG, where blocks with multiple predecessors were duplicated
+/// to produce a CFG in tree form, where no blocks (other than loop headers) have multiple
+/// predecessors. This process removed block arguments from these blocks, and rewrote instructions
+/// dominated by those block arguments to reference the values passed from the original predecessor
+/// to whom the subtree is attached. This transformation can expose a chain of blocks which all have
+/// a single predecessor and successor, introducing branches where none are needed, and by removing
+/// those redundant branches, all of the code from blocks in the chain can be inlined in the first
+/// block of the chain.
+pub struct InlineBlocks;
+impl RewritePass for InlineBlocks {
+    type Error = anyhow::Error;
+
+    fn run(
+        &mut self,
+        function: &mut hir::Function,
+        analysis: &mut FunctionAnalysis,
+    ) -> Result<(), Self::Error> {
+        let cfg = analysis.cfg_mut();
+
+        let mut changed = false;
+        let mut visited = FxHashSet::<BlockId>::default();
+        let mut worklist = VecDeque::<BlockId>::default();
+        worklist.push_back(function.dfg.entry_block());
+
+        // First, search down the CFG for non-loop header blocks with only a single successor.
+        // These blocks form possible roots of a chain of blocks that can be inlined.
+        //
+        // For each such root, we then check if the successor block has a single predecessor,
+        // if so, then we can remove the terminator instruction from the root block, and then
+        // move all of the code from the successor block into the root block. We can then repeat
+        // this process until we inline a terminator instruction that is not an unconditional branch
+        // to a single successor.
+        while let Some(p) = worklist.pop_front() {
+            // If we've already visited a block, skip it
+            if !visited.insert(p) {
+                continue;
+            }
+
+            // If this block has multiple successors, or multiple predecessors, add all of it's
+            // successors to the work queue and move on.
+            if cfg.num_successors(p) > 1 || cfg.num_predecessors(p) > 1 {
+                for b in cfg.succ_iter(p) {
+                    worklist.push_back(b);
+                }
+                continue;
+            }
+
+            // This block is a candidate for inlining
+            //
+            // If inlining can proceed, do so until we reach a point where the inlined terminator
+            // returns from the function, has multiple successors, or branches to a block with
+            // multiple predecessors.
+            while let BranchInfo::SingleDest(b, args) = function
+                .dfg
+                .analyze_branch(function.dfg.last_inst(p).unwrap())
+            {
+                // If this successor has other predecessors, it can't be inlined, so
+                // add it to the work list and move on
+                if cfg.num_predecessors(b) > 1 {
+                    worklist.push_back(b);
+                    break;
+                }
+
+                // Only inline if the successor has no block arguments
+                //
+                // TODO: We can inline blocks with arguments as well, but with higher cost,
+                // as we must visit all uses of the block arguments and update them. This
+                // is left as a future extension of this pass should we find that it is
+                // valuable as an optimization.
+                if !args.is_empty() {
+                    break;
+                }
+
+                inline(b, p, function, cfg);
+
+                // Mark that the control flow graph as modified
+                changed = true;
+            }
+        }
+
+        if changed {
+            analysis.cfg_changed(function);
+        }
+
+        Ok(())
+    }
+}
+
+fn inline(from: BlockId, to: BlockId, function: &mut hir::Function, cfg: &mut ControlFlowGraph) {
+    assert_ne!(from, to);
+    {
+        let mut from_insts = function.dfg.block_mut(from).insts.take();
+        let to_insts = &mut function.dfg.block_mut(to).insts;
+        // Remove the original terminator
+        to_insts.pop_back();
+        // Move all instructions from their original block to the parent,
+        // updating the instruction data along the way to reflect the change
+        // in location
+        while let Some(unsafe_ix_ref) = from_insts.pop_front() {
+            let ix_ptr = UnsafeRef::into_raw(unsafe_ix_ref);
+            unsafe {
+                let ix = &mut *ix_ptr;
+                ix.block = to;
+            }
+            to_insts.push_back(unsafe { UnsafeRef::from_raw(ix_ptr) });
+        }
+    }
+    // Detach the original block from the function
+    function.dfg.detach_block(from);
+    // Update the control flow graph to reflect the changes
+    cfg.detach_block(from);
+    cfg.recompute_block(&function.dfg, to);
+}
diff --git a/hir-transform/src/lib.rs b/hir-transform/src/lib.rs
@@ -0,0 +1,105 @@
+pub(crate) mod adt;
+mod inline_blocks;
+mod split_critical_edges;
+mod treeify;
+
+pub use self::inline_blocks::InlineBlocks;
+pub use self::split_critical_edges::SplitCriticalEdges;
+pub use self::treeify::Treeify;
+
+use miden_hir_analysis::FunctionAnalysis;
+use miden_hir_pass::Pass;
+
+/// A [RewritePass] is a special kind of [Pass] which is designed to perform some
+/// kind of rewrite transformation on a [miden_hir::Function].
+///
+/// Rewrites require one or more control flow analyses to have been computed, as
+/// determined by the requirements of the pass itself. The [FunctionAnalysis]
+/// structure is designed for this purpose, allowing one to request specific
+/// analysis results, which will be computed on-demand if not yet available.
+pub trait RewritePass {
+    type Error;
+
+    /// Runs the rewrite on `function` with `analyses`.
+    ///
+    /// Rewrites should return `Err` to signal that the pass has failed
+    /// and compilation should be aborted
+    fn run(
+        &mut self,
+        function: &mut miden_hir::Function,
+        analyses: &mut FunctionAnalysis,
+    ) -> Result<(), Self::Error>;
+
+    /// Chains two rewrites together to form a new, fused rewrite
+    fn chain<P>(self, pass: P) -> RewriteChain<Self, P>
+    where
+        Self: Sized,
+        P: RewritePass<Error = Self::Error>,
+    {
+        RewriteChain::new(self, pass)
+    }
+}
+
+/// [RewriteChain] is the equivalent of [miden_hir_pass::Chain] for [RewritePass].
+///
+/// This is not meant to be constructed or referenced directly, as the type signature gets out
+/// of hand quickly when combining multiple rewrites. Instead, you should invoke `chain` on a
+/// [RewritePass] implementation, and use it as a trait object. In some cases this may require boxing
+/// the `RewriteChain`, depending on how it is being used.
+pub struct RewriteChain<A, B> {
+    a: A,
+    b: B,
+}
+impl<A, B> RewriteChain<A, B> {
+    fn new(a: A, b: B) -> Self {
+        Self { a, b }
+    }
+}
+impl<A, B> Copy for RewriteChain<A, B>
+where
+    A: Copy,
+    B: Copy,
+{
+}
+impl<A, B> Clone for RewriteChain<A, B>
+where
+    A: Clone,
+    B: Clone,
+{
+    #[inline]
+    fn clone(&self) -> Self {
+        Self::new(self.a.clone(), self.b.clone())
+    }
+}
+impl<A, B, E> RewritePass for RewriteChain<A, B>
+where
+    A: RewritePass<Error = E>,
+    B: RewritePass<Error = E>,
+{
+    type Error = <B as RewritePass>::Error;
+
+    fn run(
+        &mut self,
+        function: &mut miden_hir::Function,
+        analyses: &mut FunctionAnalysis,
+    ) -> Result<(), Self::Error> {
+        self.a.run(function, analyses)?;
+        self.b.run(function, analyses)
+    }
+}
+impl<A, B, E> Pass for RewriteChain<A, B>
+where
+    A: RewritePass<Error = E>,
+    B: RewritePass<Error = E>,
+{
+    type Input<'a> = (&'a mut miden_hir::Function, &'a mut FunctionAnalysis);
+    type Output<'a> = (&'a mut miden_hir::Function, &'a mut FunctionAnalysis);
+    type Error = <B as RewritePass>::Error;
+
+    fn run<'a>(&mut self, input: Self::Input<'a>) -> Result<Self::Output<'a>, Self::Error> {
+        let (function, analyses) = input;
+        self.a.run(function, analyses)?;
+        self.b.run(function, analyses)?;
+        Ok((function, analyses))
+    }
+}