From 8ed5b52256437849e193b4f832a94226be286833 Mon Sep 17 00:00:00 2001
From: Jake Fecher <jfecher11@gmail.com>
Date: Fri, 8 Nov 2024 13:12:38 -0600
Subject: [PATCH 1/5] Start reworking die to work on SCCs

---
 Cargo.lock                                    |  1 +
 Cargo.toml                                    |  1 +
 compiler/noirc_evaluator/Cargo.toml           |  1 +
 .../noirc_evaluator/src/ssa/ir/function.rs    | 44 +++++++++++++
 .../noirc_evaluator/src/ssa/ir/post_order.rs  | 66 ++++++++++---------
 compiler/noirc_evaluator/src/ssa/opt/die.rs   | 52 +++++++++++----
 compiler/noirc_frontend/Cargo.toml            |  2 +-
 7 files changed, 124 insertions(+), 43 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index cdbe470cf09..92171129ad2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2935,6 +2935,7 @@ dependencies = [
  "noirc_errors",
  "noirc_frontend",
  "num-bigint",
+ "petgraph",
  "proptest",
  "rayon",
  "serde",
diff --git a/Cargo.toml b/Cargo.toml
index 214ed9a5bcb..4ed8b2626ef 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -163,6 +163,7 @@ tracing = "0.1.40"
 tracing-web = "0.1.3"
 tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
 rust-embed = "6.6.0"
+petgraph = "0.6"
 
 [profile.dev]
 # This is required to be able to run `cargo test` in acvm_js due to the `locals exceeds maximum` error.
diff --git a/compiler/noirc_evaluator/Cargo.toml b/compiler/noirc_evaluator/Cargo.toml
index 1db6af2ae85..c7f8cd8373d 100644
--- a/compiler/noirc_evaluator/Cargo.toml
+++ b/compiler/noirc_evaluator/Cargo.toml
@@ -28,6 +28,7 @@ tracing.workspace = true
 chrono = "0.4.37"
 rayon.workspace = true
 cfg-if.workspace = true
+petgraph.workspace = true
 
 [dev-dependencies]
 proptest.workspace = true
diff --git a/compiler/noirc_evaluator/src/ssa/ir/function.rs b/compiler/noirc_evaluator/src/ssa/ir/function.rs
index e8245ff6036..0412f47018e 100644
--- a/compiler/noirc_evaluator/src/ssa/ir/function.rs
+++ b/compiler/noirc_evaluator/src/ssa/ir/function.rs
@@ -2,8 +2,12 @@ use std::collections::BTreeSet;
 
 use iter_extended::vecmap;
 use noirc_frontend::monomorphization::ast::InlineType;
+use petgraph::graph::NodeIndex;
+use petgraph::prelude::DiGraph;
 use serde::{Deserialize, Serialize};
 
+use fxhash::{FxHashMap as HashMap, FxHashSet as HashSet};
+
 use super::basic_block::BasicBlockId;
 use super::dfg::DataFlowGraph;
 use super::instruction::TerminatorInstruction;
@@ -187,6 +191,46 @@ impl Function {
 
         unreachable!("SSA Function {} has no reachable return instruction!", self.id())
     }
+
+    /// Return each SCC (strongly-connected component) of the CFG of this function
+    /// in postorder. In practice this is identical to a normal CFG except that
+    /// blocks within loops will all be grouped together in the same SCC.
+    pub(crate) fn postorder_scc_cfg(&self) -> Vec<Vec<BasicBlockId>> {
+        let mut cfg = DiGraph::new();
+        let mut stack = vec![self.entry_block];
+
+        let mut visited = HashSet::default();
+        let mut block_to_index = HashMap::default();
+        let mut index_to_block = HashMap::default();
+
+        // Add or create a block node in the cfg
+        let mut get_block_index = |cfg: &mut DiGraph<_, _>, block| {
+            block_to_index.get(&block).copied().unwrap_or_else(|| {
+                let index = cfg.add_node(block);
+                block_to_index.insert(block, index);
+                index_to_block.insert(index, block);
+                index
+            })
+        };
+
+        // Populate each reachable block & edges between them
+        while let Some(block) = stack.pop() {
+            if visited.insert(block) {
+                let block_index = get_block_index(&mut cfg, block);
+
+                for successor in self.dfg[block].successors() {
+                    stack.push(successor);
+                    let successor_index = get_block_index(&mut cfg, successor);
+                    cfg.add_edge(block_index, successor_index, ());
+                }
+            }
+        }
+
+        // Perform tarjan_scc to get strongly connected components.
+        // Lucky for us, this already returns SCCs in postorder.
+        let postorder: Vec<Vec<NodeIndex>> = petgraph::algo::tarjan_scc(&cfg);
+        vecmap(postorder, |indices| vecmap(indices, |index| index_to_block[&index]))
+    }
 }
 
 impl std::fmt::Display for RuntimeType {
diff --git a/compiler/noirc_evaluator/src/ssa/ir/post_order.rs b/compiler/noirc_evaluator/src/ssa/ir/post_order.rs
index 94ff96ba1d7..e98857dcc87 100644
--- a/compiler/noirc_evaluator/src/ssa/ir/post_order.rs
+++ b/compiler/noirc_evaluator/src/ssa/ir/post_order.rs
@@ -15,6 +15,12 @@ enum Visit {
 
 pub(crate) struct PostOrder(Vec<BasicBlockId>);
 
+/// An SCC Post Order is a post-order visit of each strongly-connected
+/// component in a function's control flow graph. In practice this is
+/// identical to a normal post order except that blocks within loops
+/// will all be grouped together within the same visit step.
+pub(crate) struct SccPostOrder(Vec<Vec<BasicBlockId>>);
+
 impl PostOrder {
     pub(crate) fn as_slice(&self) -> &[BasicBlockId] {
         self.0.as_slice()
@@ -24,49 +30,49 @@ impl PostOrder {
 impl PostOrder {
     /// Allocate and compute a function's block post-order. Pos
     pub(crate) fn with_function(func: &Function) -> Self {
-        PostOrder(Self::compute_post_order(func))
+        PostOrder(compute_post_order(func))
     }
 
     pub(crate) fn into_vec(self) -> Vec<BasicBlockId> {
         self.0
     }
+}
 
-    // Computes the post-order of the function by doing a depth-first traversal of the
-    // function's entry block's previously unvisited children. Each block is sequenced according
-    // to when the traversal exits it.
-    fn compute_post_order(func: &Function) -> Vec<BasicBlockId> {
-        let mut stack = vec![(Visit::First, func.entry_block())];
-        let mut visited: HashSet<BasicBlockId> = HashSet::new();
-        let mut post_order: Vec<BasicBlockId> = Vec::new();
-
-        while let Some((visit, block_id)) = stack.pop() {
-            match visit {
-                Visit::First => {
-                    if !visited.contains(&block_id) {
-                        // This is the first time we pop the block, so we need to scan its
-                        // successors and then revisit it.
-                        visited.insert(block_id);
-                        stack.push((Visit::Last, block_id));
-                        // Stack successors for visiting. Because items are taken from the top of the
-                        // stack, we push the item that's due for a visit first to the top.
-                        for successor_id in func.dfg[block_id].successors().rev() {
-                            if !visited.contains(&successor_id) {
-                                // This not visited check would also be cover by the next
-                                // iteration, but checking here two saves an iteration per successor.
-                                stack.push((Visit::First, successor_id));
-                            }
+// Computes the post-order of the function by doing a depth-first traversal of the
+// function's entry block's previously unvisited children. Each block is sequenced according
+// to when the traversal exits it.
+fn compute_post_order(func: &Function) -> Vec<BasicBlockId> {
+    let mut stack = vec![(Visit::First, func.entry_block())];
+    let mut visited: HashSet<BasicBlockId> = HashSet::new();
+    let mut post_order: Vec<BasicBlockId> = Vec::new();
+
+    while let Some((visit, block_id)) = stack.pop() {
+        match visit {
+            Visit::First => {
+                if !visited.contains(&block_id) {
+                    // This is the first time we pop the block, so we need to scan its
+                    // successors and then revisit it.
+                    visited.insert(block_id);
+                    stack.push((Visit::Last, block_id));
+                    // Stack successors for visiting. Because items are taken from the top of the
+                    // stack, we push the item that's due for a visit first to the top.
+                    for successor_id in func.dfg[block_id].successors() {
+                        if !visited.contains(&successor_id) {
+                            // This not visited check would also be cover by the next
+                            // iteration, but checking here two saves an iteration per successor.
+                            stack.push((Visit::First, successor_id));
                         }
                     }
                 }
+            }
 
-                Visit::Last => {
-                    // We've finished all this node's successors.
-                    post_order.push(block_id);
-                }
+            Visit::Last => {
+                // We've finished all this node's successors.
+                post_order.push(block_id);
             }
         }
-        post_order
     }
+    post_order
 }
 
 #[cfg(test)]
diff --git a/compiler/noirc_evaluator/src/ssa/opt/die.rs b/compiler/noirc_evaluator/src/ssa/opt/die.rs
index beca7c41e5c..3e35be955ca 100644
--- a/compiler/noirc_evaluator/src/ssa/opt/die.rs
+++ b/compiler/noirc_evaluator/src/ssa/opt/die.rs
@@ -44,15 +44,19 @@ impl Function {
             context.mark_used_instruction_results(&self.dfg, call_data.array_id);
         }
 
+        let postorder_scc_cfg = self.postorder_scc_cfg();
         let mut inserted_out_of_bounds_checks = false;
 
-        let blocks = PostOrder::with_function(self);
-        for block in blocks.as_slice() {
-            inserted_out_of_bounds_checks |= context.remove_unused_instructions_in_block(
-                self,
-                *block,
-                insert_out_of_bounds_checks,
-            );
+        for scc in postorder_scc_cfg {
+            if scc.len() == 1 {
+                inserted_out_of_bounds_checks |= context.scan_and_remove_unused_instructions_in_block(
+                    self,
+                    scc[0],
+                    insert_out_of_bounds_checks,
+                );
+            } else {
+                context.die_in_scc(scc, self);
+            }
         }
 
         // If we inserted out of bounds check, let's run the pass again with those new
@@ -97,7 +101,7 @@ impl Context {
     /// removing unused instructions and return `true`. The idea then is to later call this
     /// function again with `insert_out_of_bounds_checks` set to false to effectively remove
     /// unused instructions but leave the out of bounds checks.
-    fn remove_unused_instructions_in_block(
+    fn scan_and_remove_unused_instructions_in_block(
         &mut self,
         function: &mut Function,
         block_id: BasicBlockId,
@@ -158,13 +162,37 @@ impl Context {
             }
         }
 
-        function.dfg[block_id]
-            .instructions_mut()
-            .retain(|instruction| !self.instructions_to_remove.contains(instruction));
-
+        self.remove_unused_instructions_in_block(function, block_id);
         false
     }
 
+    fn die_in_scc(&mut self, scc: Vec<BasicBlockId>, function: &mut Function) {
+        let mut used_values_len = self.used_values.len();
+
+        // Continue to mark used values while we have at least one change
+        while {
+            for block in &scc {
+                self.mark_used_values(*block, function);
+            }
+            let len_has_changed = self.used_values.len() != used_values_len;
+            used_values_len = self.used_values.len();
+            len_has_changed
+        } {}
+
+        for block in scc {
+            // remove each unused instruction
+            self.remove_unused_instructions_in_block(function, block);
+        }
+    }
+
+    fn remove_unused_instructions_in_block(&self, function: &mut Function, block: BasicBlockId) {
+        let instructions = function.dfg[block].instructions_mut();
+        instructions.retain(|instruction| !self.instructions_to_remove.contains(instruction));
+    }
+
+    fn mark_used_values(&mut self, block_id: BasicBlockId, function: &mut Function) {
+    }
+
     /// Returns true if an instruction can be removed.
     ///
     /// An instruction can be removed as long as it has no side-effects, and none of its result
diff --git a/compiler/noirc_frontend/Cargo.toml b/compiler/noirc_frontend/Cargo.toml
index 581d7f1b61d..2257df29bfd 100644
--- a/compiler/noirc_frontend/Cargo.toml
+++ b/compiler/noirc_frontend/Cargo.toml
@@ -28,7 +28,7 @@ small-ord-set = "0.1.3"
 regex = "1.9.1"
 cfg-if.workspace = true
 tracing.workspace = true
-petgraph = "0.6"
+petgraph.workspace = true
 rangemap = "1.4.0"
 strum = "0.24"
 strum_macros = "0.24"

From 36826ff775da8c11735bb8b14b83d61cc584f755 Mon Sep 17 00:00:00 2001
From: Jake Fecher <jfecher11@gmail.com>
Date: Fri, 8 Nov 2024 14:04:27 -0600
Subject: [PATCH 2/5] Resolve some issues with side-effects

---
 compiler/noirc_evaluator/src/ssa/opt/die.rs | 60 ++++++++++++++++++---
 1 file changed, 52 insertions(+), 8 deletions(-)

diff --git a/compiler/noirc_evaluator/src/ssa/opt/die.rs b/compiler/noirc_evaluator/src/ssa/opt/die.rs
index 3e35be955ca..3acd6ec6380 100644
--- a/compiler/noirc_evaluator/src/ssa/opt/die.rs
+++ b/compiler/noirc_evaluator/src/ssa/opt/die.rs
@@ -10,7 +10,6 @@ use crate::ssa::{
         dfg::DataFlowGraph,
         function::Function,
         instruction::{BinaryOp, Instruction, InstructionId, Intrinsic},
-        post_order::PostOrder,
         types::Type,
         value::{Value, ValueId},
     },
@@ -49,11 +48,12 @@ impl Function {
 
         for scc in postorder_scc_cfg {
             if scc.len() == 1 {
-                inserted_out_of_bounds_checks |= context.scan_and_remove_unused_instructions_in_block(
-                    self,
-                    scc[0],
-                    insert_out_of_bounds_checks,
-                );
+                inserted_out_of_bounds_checks |= context
+                    .scan_and_remove_unused_instructions_in_block(
+                        self,
+                        scc[0],
+                        insert_out_of_bounds_checks,
+                    );
             } else {
                 context.die_in_scc(scc, self);
             }
@@ -180,7 +180,7 @@ impl Context {
         } {}
 
         for block in scc {
-            // remove each unused instruction
+            self.populate_instructions_to_remove(function, block);
             self.remove_unused_instructions_in_block(function, block);
         }
     }
@@ -190,7 +190,51 @@ impl Context {
         instructions.retain(|instruction| !self.instructions_to_remove.contains(instruction));
     }
 
-    fn mark_used_values(&mut self, block_id: BasicBlockId, function: &mut Function) {
+    fn mark_used_values(&mut self, block_id: BasicBlockId, function: &Function) {
+        let block = &function.dfg[block_id];
+        self.mark_terminator_values_as_used(function, block);
+
+        for instruction_id in block.instructions() {
+            let instruction = &function.dfg[*instruction_id];
+
+            // We're just marking so avoid removing instructions that may need bounds checks
+            if !self.is_unused(*instruction_id, function)
+                || instruction_might_result_in_out_of_bounds(function, instruction)
+            {
+                use Instruction::*;
+                if matches!(instruction, IncrementRc { .. } | DecrementRc { .. }) {
+                    // self.rc_instructions.push((*instruction_id, block_id));
+                } else {
+                    instruction.for_each_value(|value| {
+                        self.mark_used_instruction_results(&function.dfg, value);
+                    });
+                }
+            }
+        }
+    }
+
+    /// Go through a block and populate the instructions to remove list, assuming
+    /// `self.used_values` is already populated.
+    fn populate_instructions_to_remove(&mut self, function: &Function, block_id: BasicBlockId) {
+        let block = &function.dfg[block_id];
+        for instruction_id in block.instructions() {
+            use Instruction::*;
+            let instruction = &function.dfg[*instruction_id];
+
+            // We're just marking so avoid removing instructions that may need bounds checks
+            if self.is_unused(*instruction_id, function)
+                && !instruction_might_result_in_out_of_bounds(function, instruction)
+            {
+                self.instructions_to_remove.insert(*instruction_id);
+
+            // We can remove side-effectful instructions here since we marked beforehand we know
+            // `value` isn't used anywhere by this point.
+            } else if let IncrementRc { value } | DecrementRc { value } = instruction {
+                if !self.used_values.contains(value) {
+                    self.instructions_to_remove.insert(*instruction_id);
+                }
+            }
+        }
     }
 
     /// Returns true if an instruction can be removed.

From cd0c931f00404618a7f2b8a0e152a1794846530e Mon Sep 17 00:00:00 2001
From: Jake Fecher <jfecher11@gmail.com>
Date: Fri, 8 Nov 2024 14:40:23 -0600
Subject: [PATCH 3/5] Remove unused arraygets/sets in loops

---
 compiler/noirc_evaluator/src/ssa/opt/die.rs | 25 ++++++++++-----------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/compiler/noirc_evaluator/src/ssa/opt/die.rs b/compiler/noirc_evaluator/src/ssa/opt/die.rs
index 3acd6ec6380..25944804d08 100644
--- a/compiler/noirc_evaluator/src/ssa/opt/die.rs
+++ b/compiler/noirc_evaluator/src/ssa/opt/die.rs
@@ -110,15 +110,13 @@ impl Context {
         let block = &function.dfg[block_id];
         self.mark_terminator_values_as_used(function, block);
 
-        let instructions_len = block.instructions().len();
-
         let mut rc_tracker = RcTracker::default();
 
         // Indexes of instructions that might be out of bounds.
         // We'll remove those, but before that we'll insert bounds checks for them.
         let mut possible_index_out_of_bounds_indexes = Vec::new();
 
-        for (instruction_index, instruction_id) in block.instructions().iter().rev().enumerate() {
+        for (instruction_index, instruction_id) in block.instructions().iter().enumerate().rev() {
             let instruction = &function.dfg[*instruction_id];
 
             if self.is_unused(*instruction_id, function) {
@@ -127,8 +125,7 @@ impl Context {
                 if insert_out_of_bounds_checks
                     && instruction_might_result_in_out_of_bounds(function, instruction)
                 {
-                    possible_index_out_of_bounds_indexes
-                        .push(instructions_len - instruction_index - 1);
+                    possible_index_out_of_bounds_indexes.push(instruction_index);
                 }
             } else {
                 use Instruction::*;
@@ -195,13 +192,12 @@ impl Context {
         self.mark_terminator_values_as_used(function, block);
 
         for instruction_id in block.instructions() {
+            use Instruction::*;
             let instruction = &function.dfg[*instruction_id];
+            // TODO: Insert OOB checks for unused array operations
 
             // We're just marking so avoid removing instructions that may need bounds checks
-            if !self.is_unused(*instruction_id, function)
-                || instruction_might_result_in_out_of_bounds(function, instruction)
-            {
-                use Instruction::*;
+            if !self.is_unused(*instruction_id, function) {
                 if matches!(instruction, IncrementRc { .. } | DecrementRc { .. }) {
                     // self.rc_instructions.push((*instruction_id, block_id));
                 } else {
@@ -209,6 +205,12 @@ impl Context {
                         self.mark_used_instruction_results(&function.dfg, value);
                     });
                 }
+            } else if instruction_might_result_in_out_of_bounds(function, instruction) {
+                // Even if the array is unused, mark the index as used since we'll insert
+                // an OOB check on it later.
+                if let ArrayGet { index, .. } | ArraySet { index, .. } = instruction {
+                    self.used_values.insert(*index);
+                }
             }
         }
     }
@@ -221,10 +223,7 @@ impl Context {
             use Instruction::*;
             let instruction = &function.dfg[*instruction_id];
 
-            // We're just marking so avoid removing instructions that may need bounds checks
-            if self.is_unused(*instruction_id, function)
-                && !instruction_might_result_in_out_of_bounds(function, instruction)
-            {
+            if self.is_unused(*instruction_id, function) {
                 self.instructions_to_remove.insert(*instruction_id);
 
             // We can remove side-effectful instructions here since we marked beforehand we know

From 01d681ba1bd00334e3972b11f04e6f6153b0b7a8 Mon Sep 17 00:00:00 2001
From: Jake Fecher <jfecher11@gmail.com>
Date: Fri, 8 Nov 2024 15:04:16 -0600
Subject: [PATCH 4/5] Fix post order test

---
 compiler/noirc_evaluator/src/ssa/ir/post_order.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler/noirc_evaluator/src/ssa/ir/post_order.rs b/compiler/noirc_evaluator/src/ssa/ir/post_order.rs
index e98857dcc87..cb0112f32a0 100644
--- a/compiler/noirc_evaluator/src/ssa/ir/post_order.rs
+++ b/compiler/noirc_evaluator/src/ssa/ir/post_order.rs
@@ -110,7 +110,7 @@ mod tests {
         //   D (seen)
         // } -> push(A)
         // Result:
-        // D, F, E, B, A, (C dropped as unreachable)
+        // F, E, B, D, A, (C dropped as unreachable)
 
         let func_id = Id::test_new(0);
         let mut builder = FunctionBuilder::new("func".into(), func_id);
@@ -151,6 +151,6 @@ mod tests {
         let func = ssa.main();
         let post_order = PostOrder::with_function(func);
         let block_a_id = func.entry_block();
-        assert_eq!(post_order.0, [block_d_id, block_f_id, block_e_id, block_b_id, block_a_id]);
+        assert_eq!(post_order.0, [block_f_id, block_e_id, block_b_id, block_d_id, block_a_id]);
     }
 }

From ca935b02abe2443f1ca8530fc9f8eb0d46ca2e96 Mon Sep 17 00:00:00 2001
From: Jake Fecher <jfecher11@gmail.com>
Date: Wed, 13 Nov 2024 14:43:57 -0600
Subject: [PATCH 5/5] Update test

---
 compiler/noirc_evaluator/src/ssa/opt/array_set.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/compiler/noirc_evaluator/src/ssa/opt/array_set.rs b/compiler/noirc_evaluator/src/ssa/opt/array_set.rs
index e7f69f87da8..252e0ee75e8 100644
--- a/compiler/noirc_evaluator/src/ssa/opt/array_set.rs
+++ b/compiler/noirc_evaluator/src/ssa/opt/array_set.rs
@@ -223,15 +223,15 @@ mod tests {
                 jmp b1(u32 0)
               b1(v0: u32):
                 v12 = lt v0, u32 5
-                jmpif v12 then: b3, else: b2
-              b3():
+                jmpif v12 then: b2, else: b5
+              b2():
                 v13 = eq v0, u32 5
-                jmpif v13 then: b4, else: b5
-              b4():
+                jmpif v13 then: b3, else: b4
+              b3():
                 v14 = load v1 -> [[Field; 5]; 2]
                 store v14 at v6
-                jmp b5()
-              b5():
+                jmp b4()
+              b4():
                 v15 = load v1 -> [[Field; 5]; 2]
                 v16 = array_get v15, index Field 0 -> [Field; 5]
                 v18 = array_set v16, index v0, value Field 20
@@ -239,7 +239,7 @@ mod tests {
                 store v19 at v1
                 v21 = add v0, u32 1
                 jmp b1(v21)
-              b2():
+              b5():
                 return
             }
             ";