diff --git a/Cargo.lock b/Cargo.lock
index e2602535f..83ae6f103 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -567,6 +567,20 @@ dependencies = [
  "lazy_static",
 ]
 
+[[package]]
+name = "miden-hir-transform"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "cranelift-entity",
+ "miden-diagnostics",
+ "miden-hir",
+ "miden-hir-analysis",
+ "miden-hir-pass",
+ "rustc-hash",
+ "smallvec",
+]
+
 [[package]]
 name = "miden-hir-type"
 version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index 74b594be8..2b3697dd7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,6 +5,7 @@ members = [
   "hir-analysis",
   "hir-pass",
   "hir-symbol",
+  "hir-transform",
   "hir-type",
   "tools/*",
 ]
diff --git a/hir-transform/Cargo.toml b/hir-transform/Cargo.toml
new file mode 100644
index 000000000..6ba7a08b8
--- /dev/null
+++ b/hir-transform/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "miden-hir-transform"
+version.workspace = true
+rust-version.workspace = true
+authors.workspace = true
+repository.workspace = true
+categories.workspace = true
+keywords.workspace = true
+license.workspace = true
+readme.workspace = true
+edition.workspace = true
+
+[dependencies]
+anyhow.workspace = true
+cranelift-entity.workspace = true
+miden-diagnostics.workspace = true
+miden-hir = { path = "../hir" }
+miden-hir-analysis = { path = "../hir-analysis" }
+miden-hir-pass = { path = "../hir-pass" }
+rustc-hash.workspace = true
+smallvec.workspace = true
diff --git a/hir-transform/src/adt/mod.rs b/hir-transform/src/adt/mod.rs
new file mode 100644
index 000000000..38aa84015
--- /dev/null
+++ b/hir-transform/src/adt/mod.rs
@@ -0,0 +1,3 @@
+mod scoped_map;
+
+pub use self::scoped_map::ScopedMap;
diff --git a/hir-transform/src/adt/scoped_map.rs b/hir-transform/src/adt/scoped_map.rs
new file mode 100644
index 000000000..4061920e7
--- /dev/null
+++ b/hir-transform/src/adt/scoped_map.rs
@@ -0,0 +1,57 @@
+use std::borrow::Borrow;
+use std::hash::Hash;
+use std::rc::Rc;
+
+use rustc_hash::FxHashMap;
+
+#[derive(Clone)]
+pub struct ScopedMap<K, V>
+where
+    K: Eq + Hash,
+{
+    parent: Option<Rc<ScopedMap<K, V>>>,
+    map: FxHashMap<K, V>,
+}
+impl<K, V> Default for ScopedMap<K, V>
+where
+    K: Eq + Hash,
+{
+    fn default() -> Self {
+        Self {
+            parent: None,
+            map: Default::default(),
+        }
+    }
+}
+impl<K, V> ScopedMap<K, V>
+where
+    K: Eq + Hash,
+{
+    pub fn new(parent: Option<Rc<ScopedMap<K, V>>>) -> Self {
+        Self {
+            parent,
+            map: Default::default(),
+        }
+    }
+
+    pub fn get<Q>(&self, k: &Q) -> Option<&V>
+    where
+        K: Borrow<Q>,
+        Q: Hash + Eq + ?Sized,
+    {
+        self.map
+            .get(k)
+            .or_else(|| self.parent.as_ref().and_then(|p| p.get(k)))
+    }
+
+    pub fn insert(&mut self, k: K, v: V) {
+        self.map.insert(k, v);
+    }
+
+    pub fn extend<I>(&mut self, iter: I)
+    where
+        I: IntoIterator<Item = (K, V)>,
+    {
+        self.map.extend(iter);
+    }
+}
diff --git a/hir-transform/src/inline_blocks.rs b/hir-transform/src/inline_blocks.rs
new file mode 100644
index 000000000..3cf59c919
--- /dev/null
+++ b/hir-transform/src/inline_blocks.rs
@@ -0,0 +1,131 @@
+use std::collections::VecDeque;
+
+use rustc_hash::FxHashSet;
+
+use miden_hir::{self as hir, Block as BlockId, *};
+use miden_hir_analysis::{ControlFlowGraph, FunctionAnalysis};
+
+use super::RewritePass;
+
+/// This pass operates on the SSA IR, and inlines superfluous blocks which serve no
+/// purpose. Such blocks have no block arguments, and have a single predecessor.
+///
+/// Blocks like this may have been introduced for the following reasons:
+///
+/// * Due to less than optimal lowering to SSA form
+/// * To split critical edges in preparation for dataflow analysis and related transformations,
+/// but ultimately no code introduced along those edges, and critical edges no longer present
+/// an obstacle to further optimization or codegen.
+/// * During treeification of the CFG, where blocks with multiple predecessors were duplicated
+/// to produce a CFG in tree form, where no blocks (other than loop headers) have multiple
+/// predecessors. This process removed block arguments from these blocks, and rewrote instructions
+/// dominated by those block arguments to reference the values passed from the original predecessor
+/// to whom the subtree is attached. This transformation can expose a chain of blocks which all have
+/// a single predecessor and successor, introducing branches where none are needed, and by removing
+/// those redundant branches, all of the code from blocks in the chain can be inlined in the first
+/// block of the chain.
+pub struct InlineBlocks;
+impl RewritePass for InlineBlocks {
+    type Error = anyhow::Error;
+
+    fn run(
+        &mut self,
+        function: &mut hir::Function,
+        analysis: &mut FunctionAnalysis,
+    ) -> Result<(), Self::Error> {
+        let cfg = analysis.cfg_mut();
+
+        let mut changed = false;
+        let mut visited = FxHashSet::<BlockId>::default();
+        let mut worklist = VecDeque::<BlockId>::default();
+        worklist.push_back(function.dfg.entry_block());
+
+        // First, search down the CFG for non-loop header blocks with only a single successor.
+        // These blocks form possible roots of a chain of blocks that can be inlined.
+        //
+        // For each such root, we then check if the successor block has a single predecessor,
+        // if so, then we can remove the terminator instruction from the root block, and then
+        // move all of the code from the successor block into the root block. We can then repeat
+        // this process until we inline a terminator instruction that is not an unconditional branch
+        // to a single successor.
+        while let Some(p) = worklist.pop_front() {
+            // If we've already visited a block, skip it
+            if !visited.insert(p) {
+                continue;
+            }
+
+            // If this block has multiple successors, or multiple predecessors, add all of it's
+            // successors to the work queue and move on.
+            if cfg.num_successors(p) > 1 || cfg.num_predecessors(p) > 1 {
+                for b in cfg.succ_iter(p) {
+                    worklist.push_back(b);
+                }
+                continue;
+            }
+
+            // This block is a candidate for inlining
+            //
+            // If inlining can proceed, do so until we reach a point where the inlined terminator
+            // returns from the function, has multiple successors, or branches to a block with
+            // multiple predecessors.
+            while let BranchInfo::SingleDest(b, args) = function
+                .dfg
+                .analyze_branch(function.dfg.last_inst(p).unwrap())
+            {
+                // If this successor has other predecessors, it can't be inlined, so
+                // add it to the work list and move on
+                if cfg.num_predecessors(b) > 1 {
+                    worklist.push_back(b);
+                    break;
+                }
+
+                // Only inline if the successor has no block arguments
+                //
+                // TODO: We can inline blocks with arguments as well, but with higher cost,
+                // as we must visit all uses of the block arguments and update them. This
+                // is left as a future extension of this pass should we find that it is
+                // valuable as an optimization.
+                if !args.is_empty() {
+                    break;
+                }
+
+                inline(b, p, function, cfg);
+
+                // Mark that the control flow graph as modified
+                changed = true;
+            }
+        }
+
+        if changed {
+            analysis.cfg_changed(function);
+        }
+
+        Ok(())
+    }
+}
+
+fn inline(from: BlockId, to: BlockId, function: &mut hir::Function, cfg: &mut ControlFlowGraph) {
+    assert_ne!(from, to);
+    {
+        let mut from_insts = function.dfg.block_mut(from).insts.take();
+        let to_insts = &mut function.dfg.block_mut(to).insts;
+        // Remove the original terminator
+        to_insts.pop_back();
+        // Move all instructions from their original block to the parent,
+        // updating the instruction data along the way to reflect the change
+        // in location
+        while let Some(unsafe_ix_ref) = from_insts.pop_front() {
+            let ix_ptr = UnsafeRef::into_raw(unsafe_ix_ref);
+            unsafe {
+                let ix = &mut *ix_ptr;
+                ix.block = to;
+            }
+            to_insts.push_back(unsafe { UnsafeRef::from_raw(ix_ptr) });
+        }
+    }
+    // Detach the original block from the function
+    function.dfg.detach_block(from);
+    // Update the control flow graph to reflect the changes
+    cfg.detach_block(from);
+    cfg.recompute_block(&function.dfg, to);
+}
diff --git a/hir-transform/src/lib.rs b/hir-transform/src/lib.rs
new file mode 100644
index 000000000..142533380
--- /dev/null
+++ b/hir-transform/src/lib.rs
@@ -0,0 +1,105 @@
+pub(crate) mod adt;
+mod inline_blocks;
+mod split_critical_edges;
+mod treeify;
+
+pub use self::inline_blocks::InlineBlocks;
+pub use self::split_critical_edges::SplitCriticalEdges;
+pub use self::treeify::Treeify;
+
+use miden_hir_analysis::FunctionAnalysis;
+use miden_hir_pass::Pass;
+
+/// A [RewritePass] is a special kind of [Pass] which is designed to perform some
+/// kind of rewrite transformation on a [miden_hir::Function].
+///
+/// Rewrites require one or more control flow analyses to have been computed, as
+/// determined by the requirements of the pass itself. The [FunctionAnalysis]
+/// structure is designed for this purpose, allowing one to request specific
+/// analysis results, which will be computed on-demand if not yet available.
+pub trait RewritePass {
+    type Error;
+
+    /// Runs the rewrite on `function` with `analyses`.
+    ///
+    /// Rewrites should return `Err` to signal that the pass has failed
+    /// and compilation should be aborted
+    fn run(
+        &mut self,
+        function: &mut miden_hir::Function,
+        analyses: &mut FunctionAnalysis,
+    ) -> Result<(), Self::Error>;
+
+    /// Chains two rewrites together to form a new, fused rewrite
+    fn chain<P>(self, pass: P) -> RewriteChain<Self, P>
+    where
+        Self: Sized,
+        P: RewritePass<Error = Self::Error>,
+    {
+        RewriteChain::new(self, pass)
+    }
+}
+
+/// [RewriteChain] is the equivalent of [miden_hir_pass::Chain] for [RewritePass].
+///
+/// This is not meant to be constructed or referenced directly, as the type signature gets out
+/// of hand quickly when combining multiple rewrites. Instead, you should invoke `chain` on a
+/// [RewritePass] implementation, and use it as a trait object. In some cases this may require boxing
+/// the `RewriteChain`, depending on how it is being used.
+pub struct RewriteChain<A, B> {
+    a: A,
+    b: B,
+}
+impl<A, B> RewriteChain<A, B> {
+    fn new(a: A, b: B) -> Self {
+        Self { a, b }
+    }
+}
+impl<A, B> Copy for RewriteChain<A, B>
+where
+    A: Copy,
+    B: Copy,
+{
+}
+impl<A, B> Clone for RewriteChain<A, B>
+where
+    A: Clone,
+    B: Clone,
+{
+    #[inline]
+    fn clone(&self) -> Self {
+        Self::new(self.a.clone(), self.b.clone())
+    }
+}
+impl<A, B, E> RewritePass for RewriteChain<A, B>
+where
+    A: RewritePass<Error = E>,
+    B: RewritePass<Error = E>,
+{
+    type Error = <B as RewritePass>::Error;
+
+    fn run(
+        &mut self,
+        function: &mut miden_hir::Function,
+        analyses: &mut FunctionAnalysis,
+    ) -> Result<(), Self::Error> {
+        self.a.run(function, analyses)?;
+        self.b.run(function, analyses)
+    }
+}
+impl<A, B, E> Pass for RewriteChain<A, B>
+where
+    A: RewritePass<Error = E>,
+    B: RewritePass<Error = E>,
+{
+    type Input<'a> = (&'a mut miden_hir::Function, &'a mut FunctionAnalysis);
+    type Output<'a> = (&'a mut miden_hir::Function, &'a mut FunctionAnalysis);
+    type Error = <B as RewritePass>::Error;
+
+    fn run<'a>(&mut self, input: Self::Input<'a>) -> Result<Self::Output<'a>, Self::Error> {
+        let (function, analyses) = input;
+        self.a.run(function, analyses)?;
+        self.b.run(function, analyses)?;
+        Ok((function, analyses))
+    }
+}
diff --git a/hir-transform/src/split_critical_edges.rs b/hir-transform/src/split_critical_edges.rs
new file mode 100644
index 000000000..1f0e9d662
--- /dev/null
+++ b/hir-transform/src/split_critical_edges.rs
@@ -0,0 +1,135 @@
+use std::collections::VecDeque;
+
+use rustc_hash::FxHashSet;
+use smallvec::SmallVec;
+
+use miden_diagnostics::Spanned;
+use miden_hir::{self as hir, Block as BlockId, *};
+use miden_hir_analysis::FunctionAnalysis;
+
+use super::RewritePass;
+
+/// This pass operates on the SSA IR, and ensures that there are no critical
+/// edges in the control flow graph.
+///
+/// A critical edge occurs when control flow may exit a block, which we'll call `P`, to
+/// more than one successor block, which we'll call `S`, where any `S` has more than one
+/// predecessor from which it may receive control. Put another way, in the control flow graph,
+/// a critical edge is one which connects two nodes where the source node has multiple outgoing
+/// edges, and the destination node has multiple incoming edges.
+///
+/// These types of edges cause unnecessary complications with certain types of dataflow analyses
+/// and transformations, and so we fix this by splitting these edges. This is done by introducing
+/// a new block, `B`, in which we insert a branch to `S` with whatever arguments were originally
+/// provided in `P`, and then rewriting the branch in `P` that went to `S`, to go to `B` instead.
+///
+/// After this pass completes, no node in the control flow graph will have both multiple predecessors
+/// and multiple successors.
+///
+pub struct SplitCriticalEdges;
+impl RewritePass for SplitCriticalEdges {
+    type Error = anyhow::Error;
+
+    fn run(
+        &mut self,
+        function: &mut hir::Function,
+        analysis: &mut FunctionAnalysis,
+    ) -> Result<(), Self::Error> {
+        // Search for blocks with multiple successors with edges to blocks with
+        // multiple predecessors; these blocks form critical edges in the control
+        // flow graph which must be split.
+        //
+        // We split the critical edge by inserting a new block after the predecessor
+        // and updating the predecessor instruction to transfer to the new block
+        // instead. We then insert an unconditional branch in the new block that
+        // passes the block arguments that were meant for the "real" successor.
+        let mut visited = FxHashSet::<BlockId>::default();
+        let mut worklist = VecDeque::<BlockId>::default();
+        worklist.push_back(function.dfg.entry_block());
+
+        let cfg = analysis.cfg_mut();
+
+        while let Some(p) = worklist.pop_front() {
+            // If we've already visited a block, skip it
+            if !visited.insert(p) {
+                continue;
+            }
+
+            // Make sure we visit all of the successors of this block next
+            for b in cfg.succ_iter(p) {
+                worklist.push_back(b);
+            }
+
+            // Unless this block has multiple successors, skip it
+            if cfg.num_successors(p) < 2 {
+                continue;
+            }
+
+            let succs = SmallVec::<[BlockId; 2]>::from_iter(cfg.succ_iter(p));
+            for b in succs.into_iter() {
+                // Unless this successor has multiple predecessors, skip it
+                if cfg.num_predecessors(b) < 2 {
+                    continue;
+                }
+
+                // We found a critical edge, so perform the following steps:
+                //
+                // * Create a new block, placed after the predecessor in the layout
+                // * Rewrite the terminator of the predecessor to refer to the new
+                // block, but without passing any block arguments
+                // * Insert an unconditional branch to the successor with the block
+                // arguments of the original terminator
+                // * Recompute the control flow graph for affected blocks
+                let split = function.dfg.create_block_after(p);
+                let terminator = function.dfg.last_inst(p).unwrap();
+                let ix = function.dfg.inst_mut(terminator);
+                let span = ix.span();
+                let args: ValueList;
+                match &mut ix.data.item {
+                    Instruction::Br(hir::Br {
+                        ref mut destination,
+                        args: ref mut orig_args,
+                        ..
+                    }) => {
+                        args = orig_args.take();
+                        *destination = split;
+                    }
+                    Instruction::CondBr(hir::CondBr {
+                        then_dest: (ref mut then_dest, ref mut then_args),
+                        else_dest: (ref mut else_dest, ref mut else_args),
+                        ..
+                    }) => {
+                        if *then_dest == b {
+                            *then_dest = split;
+                            args = then_args.take();
+                        } else {
+                            *else_dest = split;
+                            args = else_args.take();
+                        }
+                    }
+                    Instruction::Switch(_) => unimplemented!(),
+                    _ => unreachable!(),
+                }
+                function.dfg.insert_inst(
+                    InsertionPoint {
+                        at: ProgramPoint::Block(split),
+                        action: Insert::After,
+                    },
+                    Instruction::Br(hir::Br {
+                        op: hir::Opcode::Br,
+                        destination: b,
+                        args,
+                    }),
+                    Type::Unknown,
+                    span,
+                );
+
+                cfg.recompute_block(&function.dfg, split);
+            }
+
+            cfg.recompute_block(&function.dfg, p);
+        }
+
+        Ok(())
+    }
+}
diff --git a/hir-transform/src/treeify.rs b/hir-transform/src/treeify.rs
new file mode 100644
index 000000000..e523e2917
--- /dev/null
+++ b/hir-transform/src/treeify.rs
@@ -0,0 +1,665 @@
+use std::collections::VecDeque;
+use std::rc::Rc;
+
+use miden_hir::{self as hir, Block as BlockId, Value as ValueId, *};
+use miden_hir_analysis::{BlockPredecessor, ControlFlowGraph, FunctionAnalysis, LoopAnalysis};
+use rustc_hash::FxHashSet;
+
+use crate::{adt::ScopedMap, RewritePass};
+
+/// This pass takes as input the SSA form of a function, and ensures that the CFG of
+/// that function is a tree, not a DAG, excepting loop headers.
+///
+/// This transformation splits vertices with multiple predecessors, by duplicating the
+/// subtree of the program rooted at those vertices. As mentioned above, we do not split
+/// vertices representing loop headers, in order to preserve loops in the CFG of the resulting
+/// IR. However, we can consider each loop within the overall CFG of a function to be a single
+/// vertex after this transformation, and with this perspective the CFG forms a tree. Loop
+/// nodes are then handled specially during codegen.
+///
+/// The transformation is performed bottom-up, in CFG postorder.
+///
+/// This pass also computes the set of blocks in each loop which must be terminated with `push.0`
+/// to exit the containing loop.
+///
+/// # Examples
+///
+/// ## Basic DAG
+///
+/// This example demonstrates how the DAG of a function with multiple returns gets transformed:
+///
+/// ```ignore
+/// blk0
+///  |
+///  v
+/// blk1 -> blk3 -> ret
+///  |     /
+///  |    /
+///  |   /
+///  v  v
+/// blk2
+///  |
+///  v
+/// ret
+/// ```
+///
+/// Becomes:
+///
+/// ```ignore
+/// blk0
+///  |
+///  v
+/// blk1 -> blk3 -> ret
+///  |       |
+///  |       |
+///  |       |
+///  v       v
+/// blk2    blk2
+///  |       |
+///  v       v
+/// ret     ret
+/// ```
+///
+/// ## Basic Loop
+///
+/// This is an example of a function with multiple returns and a simple loop:
+///
+/// ```ignore
+/// blk0
+///  |                -------
+///  v               v       |
+/// blk1 -> blk3 -> blk4 -> blk5 -> ret
+///  |     /
+///  |    /
+///  |   /
+///  v  v
+/// blk2
+///  |
+///  v
+/// ret
+/// ```
+///
+/// Becomes:
+///
+/// ```ignore
+/// blk0
+///  |                -------
+///  v               v       |
+/// blk1 -> blk3 -> blk4 -> blk5 -> ret
+///  |       |
+///  |       |
+///  |       |
+///  v       v
+/// blk2    blk2
+///  |       |
+///  v       v
+/// ret     ret
+/// ```
+///
+/// ## Complex Loop
+///
+/// This is an example of a function with a complex loop (i.e. multiple exit points):
+///
+/// ```ignore
+/// blk0
+///  |
+///  v
+/// blk1
+///  |  \
+///  |   blk2 <-----
+///  |    |         |
+///  |   blk3       |
+///  |   /   \      |
+///  |  /     blk4--
+///  | /       |
+///  vv        |
+/// blk5      blk6
+/// ```
+///
+/// Becomes:
+///
+/// ```ignore
+/// blk0
+///  |
+///  v
+/// blk1
+///  |  \
+///  |   \
+///  |    blk2 <---
+///  |     |       |
+///  |     v       |
+///  |    blk3     |
+///  |    |  \     |
+///  |    |   blk4--
+///  |    |    |
+///  v    v    v
+/// blk5 blk5  blk6
+/// ```
+///
+/// NOTE: Here, when generating code for `blk5` and `blk6`, the loop depth is 0, so
+/// we will emit a single `push.0` at the end of both blocks which will terminate the
+/// containing loop, and then return from the function as we've reached the bottom
+/// of the tree.
+///
+/// ## Nested Loops
+///
+/// This is an extension of the example above, but with nested loops:
+///
+/// ```ignore
+/// blk0
+///  |
+///  v
+/// blk1
+///  |  \
+///  |   blk2 <-------
+///  |    |         | |
+///  |   blk3       | |
+///  |   /   \      | |
+///  |  /     blk4--  |
+///  | /       |      |
+///  vv        v      |
+/// blk5<-    blk6-->blk7-->blk8
+///       |    ^             |
+///       |    |_____________|
+///       |                  |
+///       |__________________|
+/// ```
+///
+/// We have two loops, the outer one starting at `blk2`:
+///
+/// * `blk2->blk3->blk4->blk2`
+/// * `blk2->blk3->blk4->blk6->blk7->blk2`
+///
+/// And the inner one starting at `blk6`:
+///
+/// * `blk6->blk7->blk8->blk6`
+///
+/// Additionally, there are multiple exits through the loops, depending on the path taken:
+///
+/// * `blk2->blk3->blk5`
+/// * `blk2->blk3->blk4->blk6->blk7->blk8->blk5`
+/// * `blk6->blk7->blk8->blk5`
+///
+/// After transformation, this becomes:
+///
+/// ```ignore
+/// blk0
+///  |
+///  v
+/// blk1
+///  |  \
+///  |   blk2 <-------
+///  |    |         | |
+///  |   blk3       | |
+///  |    |  \      | |
+///  |    |   blk4--  |
+///  |    |    |      |
+///  v    v    v      |
+/// blk5 blk5 blk6-->blk7-->blk8
+///            ^             | |
+///            |_____________|_|
+///                          |
+///                          v
+///                         blk5
+/// ```
+///
+/// During codegen though, we end up with the following tree of stack machine code.
+///
+/// At each point where control flow either continues a loop or leaves it, we must
+///
+/// * Duplicate loop headers on control flow edges leading to those headers
+/// * Emit N `push.0` instructions on control flow edges exiting the function from a loop depth of N
+/// * Emit a combination of the above on control flow edges exiting an inner loop for an outer loop,
+/// depending on what depths the predecessor and successor blocks are at
+///
+/// ```ignore
+/// blk0
+/// blk1
+/// if.true
+///   blk2
+///   while.true
+///     blk3
+///     if.true
+///       blk4
+///       if.true
+///         blk2         # duplicated outer loop header
+///       else
+///         blk6
+///         while.true
+///           blk7
+///           if.true
+///             blk2     # duplicated outer loop header
+///             push.0   # break out of inner loop
+///           else
+///             blk8
+///             if.true
+///               blk6   # duplicated inner loop header
+///             else
+///               blk5
+///               push.0 # break out of outer loop
+///               push.0 # break out of inner loop
+///             end
+///           end
+///         end
+///       end
+///     else
+///       blk5
+///       push.0         # break out of outer loop
+///     end
+///   end
+/// else
+///   blk5
+/// end
+/// ```
+///
+pub struct Treeify;
+impl RewritePass for Treeify {
+    type Error = anyhow::Error;
+
+    fn run(
+        &mut self,
+        function: &mut hir::Function,
+        analysis: &mut FunctionAnalysis,
+    ) -> Result<(), Self::Error> {
+        // Require the dominator tree and loop analyses
+        analysis.ensure_loops(function);
+
+        let cfg = analysis.cfg();
+        let domtree = analysis.domtree();
+        let loops = analysis.loops();
+        let mut block_q = VecDeque::<CopyBlock>::default();
+        let mut changed = false;
+
+        for b in domtree.cfg_postorder().iter().copied() {
+            if loops.is_loop_header(b).is_some() {
+                // Ignore loop headers
+                continue;
+            }
+
+            // Blocks with multiple predecessors cause the CFG to form a DAG,
+            // we need to duplicate the CFG rooted at this block for all predecessors.
+            //
+            // While we could technically preserve one of the predecessors, we perform
+            // some transformations during the copy that would result in copied vs original
+            // trees to differ slightly, which would inhibit subsequent optimizations.
+            // The original subtree blocks are detached from the function.
+            if cfg.num_predecessors(b) > 1 {
+                for p in cfg.pred_iter(b) {
+                    assert!(block_q.is_empty());
+                    block_q.push_back(CopyBlock::new(b, p));
+                    while let Some(CopyBlock {
+                        b,
+                        ref p,
+                        value_map,
+                        block_map,
+                    }) = block_q.pop_front()
+                    {
+                        // Copy this block and its children
+                        if loops.is_loop_header(b).is_some() {
+                            treeify_loop(
+                                b,
+                                p,
+                                function,
+                                cfg,
+                                loops,
+                                &mut block_q,
+                                value_map,
+                                block_map,
+                            )?;
+                        } else {
+                            treeify(
+                                b,
+                                p,
+                                function,
+                                cfg,
+                                loops,
+                                &mut block_q,
+                                value_map,
+                                block_map,
+                            )?;
+                        }
+                    }
+                }
+
+                // After treeification, the original subtree blocks cannot possibly be
+                // referenced by other blocks in the function, so remove all of them
+                detach_tree(b, function, cfg);
+
+                // Mark the control flow graph as modified
+                changed = true;
+            }
+        }
+
+        // If we made any changes, we need to recompute all analyses
+        if changed {
+            analysis.recompute(function);
+        }
+
+        Ok(())
+    }
+}
+
+fn treeify(
+    b: BlockId,
+    p: &BlockPredecessor,
+    function: &mut hir::Function,
+    cfg: &ControlFlowGraph,
+    loops: &LoopAnalysis,
+    block_q: &mut VecDeque<CopyBlock>,
+    mut value_map: ScopedMap<ValueId, ValueId>,
+    mut block_map: ScopedMap<BlockId, BlockId>,
+) -> anyhow::Result<()> {
+    // 1. Create a new block `b'`, without block arguments,
+    let b_prime = function.dfg.create_block_after(p.block);
+    block_map.insert(b, b_prime);
+    // 2. Initialize a lookup table of old value defs to new value defs, seed it by mapping the
+    //    block arguments of `b` to the values passed from the predecessor
+    match function.dfg.analyze_branch(p.inst) {
+        BranchInfo::NotABranch => {
+            value_map.extend(
+                function
+                    .dfg
+                    .block_args(b)
+                    .iter()
+                    .copied()
+                    .zip(function.dfg.inst_args(p.inst).iter().copied()),
+            );
+        }
+        BranchInfo::SingleDest(_, args) => {
+            value_map.extend(
+                function
+                    .dfg
+                    .block_args(b)
+                    .iter()
+                    .copied()
+                    .zip(args.iter().copied()),
+            );
+        }
+        BranchInfo::MultiDest(ref jts) => {
+            for jt in jts.iter() {
+                if jt.destination == b {
+                    value_map.extend(
+                        function
+                            .dfg
+                            .block_args(b)
+                            .iter()
+                            .copied()
+                            .zip(jt.args.iter().copied()),
+                    );
+                    break;
+                }
+            }
+        }
+    }
+    // 3. Update the predecessor instruction to reference the new block, remove block arguments.
+    update_predecessor(function, p, |dest, dest_args, pool| {
+        if *dest == b {
+            *dest = b_prime;
+            dest_args.clear(pool);
+        }
+    });
+    // 4. Copy contents of `b` to `b'`, inserting defs in the lookup table, and mapping operands
+    //    to their new "corrected" values
+    copy_instructions(b, b_prime, function, &mut value_map, &block_map);
+    // 5. Recursively copy all children of `b` to `b_prime`
+    copy_children(
+        b, b_prime, function, cfg, loops, block_q, value_map, block_map,
+    )
+}
+
+fn treeify_loop(
+    b: BlockId,
+    p: &BlockPredecessor,
+    function: &mut hir::Function,
+    cfg: &ControlFlowGraph,
+    loops: &LoopAnalysis,
+    block_q: &mut VecDeque<CopyBlock>,
+    mut value_map: ScopedMap<ValueId, ValueId>,
+    mut block_map: ScopedMap<BlockId, BlockId>,
+) -> anyhow::Result<()> {
+    // 1. Create new block, b', with a new set of block arguments matching the original,
+    // populate the value map with rewrites for the original block argument values
+    let b_prime = function.dfg.create_block_after(p.block);
+    block_map.insert(b, b_prime);
+    function.dfg.clone_block_params(b, b_prime);
+    for (src, dest) in function
+        .dfg
+        .block_params(b)
+        .iter()
+        .copied()
+        .zip(function.dfg.block_params(b_prime).iter().copied())
+    {
+        value_map.insert(src, dest);
+    }
+    // 2. Update the predecessor instruction to reference the new block, leave block arguments unchanged
+    update_predecessor(function, p, |dest, _, _| {
+        if *dest == b {
+            *dest = b_prime;
+        }
+    });
+    // 3. Copy contents of `b` to `b'`, inserting defs in the lookup table, and mapping operands
+    //    to their new "corrected" values
+    copy_instructions(b, b_prime, function, &mut value_map, &block_map);
+    // 4. Recursively copy all children of `b` to `b_prime`
+    copy_children(
+        b, b_prime, function, cfg, loops, block_q, value_map, block_map,
+    )
+}
+
+/// Detach `root`, and all of it's reachable children, from the layout of `function`
+///
+/// When called, it is assumed that `root` has been cloned to a new block,
+/// along with all of it's reachable children, and its predecessor rewritten
+/// to refer to the new block instead. As a result, `root` should no longer be
+/// reachable in the CFG, along with its children, as they would have been cloned
+/// as well.
+///
+/// NOTE: This does not delete the block data attached to the function, only the
+/// presence of the block in the layout of the function.
+fn detach_tree(root: BlockId, function: &mut hir::Function, cfg: &ControlFlowGraph) {
+    let mut delete_q = VecDeque::<BlockId>::default();
+    let mut visited = FxHashSet::<BlockId>::default();
+    delete_q.push_back(root);
+    visited.insert(root);
+    while let Some(block) = delete_q.pop_front() {
+        function.dfg.detach_block(block);
+        for b in cfg.succ_iter(block).into_iter() {
+            // Skip blocks we've already seen
+            if visited.insert(b) {
+                delete_q.push_back(b);
+            }
+        }
+    }
+}
+
+fn copy_children(
+    b: BlockId,
+    b_prime: BlockId,
+    function: &mut hir::Function,
+    cfg: &ControlFlowGraph,
+    loops: &LoopAnalysis,
+    block_q: &mut VecDeque<CopyBlock>,
+    value_map: ScopedMap<ValueId, ValueId>,
+    block_map: ScopedMap<BlockId, BlockId>,
+) -> anyhow::Result<()> {
+    let pred = BlockPredecessor {
+        inst: function
+            .dfg
+            .last_inst(b_prime)
+            .expect("expected non-empty block"),
+        block: b_prime,
+    };
+    let value_map = Rc::new(value_map);
+    let block_map = Rc::new(block_map);
+    for succ in cfg.succ_iter(b) {
+        // If we've already seen this successor, and it is a loop header, then
+        // we don't want to copy it, but we do want to replace the reference to
+        // this block with its copy
+        if let Some(succ_prime) = block_map.get(&succ) {
+            if loops.is_loop_header(succ).is_some() {
+                update_predecessor(function, &pred, |dest, _, _| {
+                    if dest == &succ {
+                        *dest = *succ_prime;
+                    }
+                });
+                continue;
+            }
+        }
+
+        block_q.push_back(CopyBlock {
+            b: succ,
+            p: pred,
+            value_map: ScopedMap::new(Some(value_map.clone())),
+            block_map: ScopedMap::new(Some(block_map.clone())),
+        });
+    }
+
+    Ok(())
+}
+
+fn copy_instructions(
+    b: BlockId,
+    b_prime: BlockId,
+    function: &mut hir::Function,
+    value_map: &mut ScopedMap<ValueId, ValueId>,
+    block_map: &ScopedMap<BlockId, BlockId>,
+) {
+    // Initialize the cursor at the first instruction in `b`
+    let mut next = {
+        let cursor = function.dfg.block(b).insts.front();
+        cursor.get().map(|inst_data| inst_data as *const InstNode)
+    };
+
+    while let Some(ptr) = next.take() {
+        // Get the id of the instruction at the current cursor position, then advance the cursor
+        let src_inst = {
+            let mut cursor = unsafe { function.dfg.block(b).insts.cursor_from_ptr(ptr) };
+            let id = cursor.get().unwrap().key;
+            cursor.move_next();
+            next = cursor.get().map(|inst_data| inst_data as *const InstNode);
+            id
+        };
+
+        // Clone the source instruction data
+        let inst = function.dfg.clone_inst(src_inst);
+
+        // We need to fix up the cloned instruction data
+        let data = &mut function.dfg.insts[inst];
+        // First, we're going to be placing it in b', so make sure the instruction is aware of that
+        data.block = b_prime;
+        // Second, we need to rewrite value/block references contained in the instruction
+        match &mut data.item {
+            Instruction::Br(hir::Br {
+                ref mut destination,
+                ref mut args,
+                ..
+            }) => {
+                if let Some(new_dest) = block_map.get(destination) {
+                    *destination = *new_dest;
+                }
+                let args = args.as_mut_slice(&mut function.dfg.value_lists);
+                for arg in args.iter_mut() {
+                    if let Some(arg_prime) = value_map.get(arg) {
+                        *arg = *arg_prime;
+                    }
+                }
+            }
+            Instruction::CondBr(hir::CondBr {
+                ref mut cond,
+                then_dest: (ref mut then_dest, ref mut then_args),
+                else_dest: (ref mut else_dest, ref mut else_args),
+                ..
+            }) => {
+                if let Some(cond_prime) = value_map.get(cond) {
+                    *cond = *cond_prime;
+                }
+                if let Some(new_dest) = block_map.get(then_dest) {
+                    *then_dest = *new_dest;
+                }
+                let then_args = then_args.as_mut_slice(&mut function.dfg.value_lists);
+                for arg in then_args.iter_mut() {
+                    if let Some(arg_prime) = value_map.get(arg) {
+                        *arg = *arg_prime;
+                    }
+                }
+                if let Some(new_dest) = block_map.get(else_dest) {
+                    *else_dest = *new_dest;
+                }
+                let else_args = else_args.as_mut_slice(&mut function.dfg.value_lists);
+                for arg in else_args.iter_mut() {
+                    if let Some(arg_prime) = value_map.get(arg) {
+                        *arg = *arg_prime;
+                    }
+                }
+            }
+            other => {
+                for arg in other
+                    .arguments_mut(&mut function.dfg.value_lists)
+                    .iter_mut()
+                {
+                    if let Some(arg_prime) = value_map.get(arg) {
+                        *arg = *arg_prime;
+                    }
+                }
+            }
+        }
+        // Finally, append the cloned instruction to the block layout
+        let node = unsafe { UnsafeRef::from_raw(data) };
+        function.dfg.block_mut(b_prime).insts.push_back(node);
+        value_map.extend(
+            function
+                .dfg
+                .inst_results(src_inst)
+                .iter()
+                .copied()
+                .zip(function.dfg.inst_results(inst).iter().copied()),
+        );
+    }
+}
+
+struct CopyBlock {
+    b: BlockId,
+    p: BlockPredecessor,
+    value_map: ScopedMap<ValueId, ValueId>,
+    block_map: ScopedMap<BlockId, BlockId>,
+}
+impl CopyBlock {
+    fn new(b: BlockId, p: BlockPredecessor) -> Self {
+        Self {
+            b,
+            p,
+            value_map: Default::default(),
+            block_map: Default::default(),
+        }
+    }
+}
+
+#[inline]
+fn update_predecessor<F>(function: &mut hir::Function, p: &BlockPredecessor, mut callback: F)
+where
+    F: FnMut(&mut BlockId, &mut ValueList, &mut ValueListPool),
+{
+    match &mut function.dfg.insts[p.inst].data.item {
+        Instruction::Br(hir::Br {
+            ref mut destination,
+            ref mut args,
+            ..
+        }) => {
+            callback(destination, args, &mut function.dfg.value_lists);
+        }
+        Instruction::CondBr(hir::CondBr {
+            then_dest: (ref mut then_dest, ref mut then_args),
+            else_dest: (ref mut else_dest, ref mut else_args),
+            ..
+        }) => {
+            assert_ne!(then_dest, else_dest, "unexpected critical edge");
+            let value_lists = &mut function.dfg.value_lists;
+            callback(then_dest, then_args, value_lists);
+            callback(else_dest, else_args, value_lists);
+        }
+        Instruction::Switch(_) => {
+            panic!("expected switch instructions to have been simplified prior to treeification")
+        }
+        _ => unreachable!(),
+    }
+}