diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000000..24ec1c5601
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,20 @@
+# Documentation available at editorconfig.org
+
+root=true
+
+[*]
+ident_style = space
+ident_size = 4
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+[*.rs]
+max_line_length = 100
+
+[*.md]
+trim_trailing_whitespace = false
+
+[*.yml]
+ident_size = 2
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 791a4ef739..53f8242a9f 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -3,4 +3,4 @@ updates:
   - package-ecosystem: "cargo"
     directory: "/"
     schedule:
-      interval: "weekly"
\ No newline at end of file
+      interval: "weekly"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 571e57a0a5..16cde14641 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -4,7 +4,7 @@ on:
     branches:
       - main
   pull_request:
-    types: [opened, repoened, synchronize]
+    types: [opened, reopened, synchronize]
 
 jobs:
   check:
diff --git a/.gitignore b/.gitignore
index 0cfeb9c381..da16b038fa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,3 +17,9 @@ stdlib/assets/std.masl
 
 # These are files generated by MacOS
 **/.DS_Store
+
+# File present in Intellij IDE's.
+.idea/
+
+# VS Code
+.vscode/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6c630e1e31..82b3ee81b7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,36 @@
 # Changelog
 
+## 0.8.0 (TBD)
+
+#### Assembly
+- Expanded capabilities of the `debug` decorator. Added `debug.mem` and `debug.local` variations (#1103).
+- Introduced the `emit.<event_id>` assembly instruction (#1119).
+- Introduced the `procref.<proc_name>` assembly instruction (#1113).
+- Added the ability to use constants as counters in `repeat` loops (#1124). 
+- All `checked` versions of the u32 instructions were removed. All `unchecked` versions were renamed: this mode specification was removed from their titles (#1115).
+- Introduced the `u32clz`, `u32ctz`, `u32clo`, `u32cto` and `ilog2` assembly instructions (#1176).
+- Added support for hexadecimal values in constants (#1199).
+- Added the `RCombBase` instruction (#1216).
+
+#### Stdlib
+- Introduced `std::utils` module with `is_empty_word` procedure.  Refactored `std::collections::smt`
+  and `std::collections::smt64` to use the procedure (#1107).
+- Removed `checked` versions of the instructions in the `std::math::u64` module (#1142). 
+- Introduced `clz`, `ctz`, `clo` and `cto` instructions in the `std::math::u64` module (#1179).
+- Removed `std::collections::smt64` (#1249)
+
+#### VM Internals
+- Introduced the `Event` decorator and an associated `on_event` handler on the `Host` trait (#1119).
+- Updated Winterfell dependency to v0.7 (#1121).
+- Added methods `StackOutputs::get_stack_item()` and `StackOutputs::get_stack_word()` (#1155).
+- Added [Tracing](https://crates.io/crates/tracing) logger to the VM (#1139).
+- Added `on_assert_failed()` method to the Host trait (#1197).
+- Added support for handling `trace` instruction in the `Host` interface (#1198).
+
+#### CLI
+- Introduced the `!use` command for the Miden REPL (#1162).
+- Introduced a `BLAKE3` hashing example (#1180).
+
 ## 0.7.0 (2023-10-11)
 
 #### Assembly
@@ -17,16 +48,16 @@
 
 #### VM Internals
 - Simplified range checker and removed 1 main and 1 auxiliary trace column (#949).
-- Migrated range checker lookups to use LogUp and reduced the number of trace columns to 2 main and 
+- Migrated range checker lookups to use LogUp and reduced the number of trace columns to 2 main and
   1 auxiliary (#1027).
 - Added `get_mapped_values()` and `get_store_subset()` methods to the `AdviceProvider` trait (#987).
 - [BREAKING] Added options to specify maximum number of cycles and expected number of cycles for a program (#998).
 - Improved handling of invalid/incomplete parameters in `StackOutputs` constructors (#1010).
 - Allowed the assembler to produce programs with "phantom" calls (#1019).
 - Added `TraceLenSummary` struct which holds information about traces lengths to the `ExecutionTrace` (#1029).
-- Imposed the 2^32 limit for the memory addresses used in the memory chiplet (#1049). 
+- Imposed the 2^32 limit for the memory addresses used in the memory chiplet (#1049).
 - Supported `PartialMerkleTree` as a secret input in `.input` file (#1072).
-- [BREAKING] Refactored `AdviceProvider` interface into [Host] interface (#1082).
+- [BREAKING] Refactored `AdviceProvider` interface into `Host` interface (#1082).
 
 #### Stdlib
 - Completed `std::collections::smt` module by implementing `insert` and `set` procedures (#1036, #1038, #1046).
diff --git a/Makefile b/Makefile
index 6ba366c124..f528581586 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 FEATURES_INTERNALS=--features internals
 FEATURES_CONCURRENT_EXEC=--features concurrent,executable
-FEATURES_GRAVITON_EXEC=--features concurrent,executable,sve
+FEATURES_LOG_TREE=--features concurrent,executable,tracing-forest
 FEATURES_METAL_EXEC=--features concurrent,executable,metal
 PROFILE_OPTIMIZED=--profile optimized
 PROFILE_TEST=--profile test-release
@@ -14,8 +14,14 @@ exec:
 exec-metal:
 	cargo build $(PROFILE_OPTIMIZED) $(FEATURES_METAL_EXEC)
 
-exec-graviton:
-	RUSTFLAGS="-C target-cpu=native" cargo build $(PROFILE_OPTIMIZED) $(FEATURES_GRAVITON_EXEC)
+exec-avx2:
+	RUSTFLAGS="-C target-feature=+avx2" cargo build $(PROFILE_OPTIMIZED) $(FEATURES_CONCURRENT_EXEC)
+
+exec-sve:
+	RUSTFLAGS="-C target-feature=+sve" cargo build $(PROFILE_OPTIMIZED) $(FEATURES_CONCURRENT_EXEC)
+
+exec-info:
+	cargo build $(PROFILE_OPTIMIZED) $(FEATURES_LOG_TREE)
 
 test:
 	cargo test $(PROFILE_TEST) $(FEATURES_INTERNALS)
diff --git a/README.md b/README.md
index 12e76f342b..13abe7b7c1 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Miden VM is a zero-knowledge virtual machine written in Rust. For any program ex
 * If you'd like to learn more about STARKs, check out the [references](#references) section.
 
 ### Status and features
-Miden VM is currently on release v0.7. In this release, most of the core features of the VM have been stabilized, and most of the STARK proof generation has been implemented. While we expect to keep making changes to the VM internals, the external interfaces should remain relatively stable, and we will do our best to minimize the amount of breaking changes going forward.
+Miden VM is currently on release v0.8. In this release, most of the core features of the VM have been stabilized, and most of the STARK proof generation has been implemented. While we expect to keep making changes to the VM internals, the external interfaces should remain relatively stable, and we will do our best to minimize the amount of breaking changes going forward.
 
 The next version of the VM is being developed in the [next](https://github.com/0xPolygonMiden/miden-vm/tree/next) branch. There is also a documentation for the latest features and changes in the next branch [documentation next branch](https://0xpolygonmiden.github.io/miden-vm/intro/main.html).
 
diff --git a/air/Cargo.toml b/air/Cargo.toml
index ed72ddc74a..1e02643ddf 100644
--- a/air/Cargo.toml
+++ b/air/Cargo.toml
@@ -1,11 +1,12 @@
 [package]
 name = "miden-air"
-version = "0.7.0"
+version = "0.8.0"
 description = "Algebraic intermediate representation of Miden VM processor"
 authors = ["miden contributors"]
 readme = "README.md"
 license = "MIT"
 repository = "https://github.com/0xPolygonMiden/miden-vm"
+documentation = "https://docs.rs/miden-air/0.8.0"
 categories = ["cryptography", "no-std"]
 keywords = ["air", "arithmetization", "crypto", "miden"]
 edition = "2021"
@@ -26,12 +27,14 @@ harness = false
 [features]
 default = ["std"]
 std = ["vm-core/std", "winter-air/std"]
+internals = []
 
 [dependencies]
-vm-core = { package = "miden-core", path = "../core", version = "0.7", default-features = false }
-winter-air = { package = "winter-air", version = "0.6", default-features = false }
+vm-core = { package = "miden-core", path = "../core", version = "0.8", default-features = false }
+winter-air = { package = "winter-air", version = "0.8", default-features = false }
+winter-prover = { package = "winter-prover", version = "0.8", default-features = false }
 
 [dev-dependencies]
 criterion = "0.5"
 proptest = "1.3"
-rand-utils = { package = "winter-rand-utils", version = "0.6" }
+rand-utils = { package = "winter-rand-utils", version = "0.8" }
diff --git a/air/src/constraints/chiplets/bitwise/mod.rs b/air/src/constraints/chiplets/bitwise/mod.rs
index 87b405f298..c01e10f4d9 100644
--- a/air/src/constraints/chiplets/bitwise/mod.rs
+++ b/air/src/constraints/chiplets/bitwise/mod.rs
@@ -1,11 +1,11 @@
-use super::{EvaluationFrame, Felt, FieldElement, Vec};
+use super::{EvaluationFrame, Felt, FieldElement};
 use crate::{
     trace::chiplets::{
         bitwise::{NUM_DECOMP_BITS, NUM_SELECTORS, OP_CYCLE_LEN},
         BITWISE_A_COL_IDX, BITWISE_A_COL_RANGE, BITWISE_B_COL_IDX, BITWISE_B_COL_RANGE,
         BITWISE_OUTPUT_COL_IDX, BITWISE_PREV_OUTPUT_COL_IDX, BITWISE_SELECTOR_COL_IDX,
     },
-    utils::{are_equal, binary_not, is_binary, is_zero, EvaluationResult},
+    utils::{are_equal, binary_not, collections::*, is_binary, is_zero, EvaluationResult},
     ONE, ZERO,
 };
 use winter_air::TransitionConstraintDegree;
@@ -232,7 +232,7 @@ pub fn bitwise_and<E: FieldElement>(decomposed_values: &[E]) -> E {
     for idx in 0..NUM_DECOMP_BITS {
         let a = decomposed_values[idx];
         let b = decomposed_values[idx + NUM_DECOMP_BITS];
-        result += E::from(2_u64.pow(idx as u32)) * a * b
+        result += E::from(2_u32.pow(idx as u32)) * a * b
     }
     result
 }
@@ -245,7 +245,7 @@ pub fn bitwise_xor<E: FieldElement>(decomposed_values: &[E]) -> E {
     for idx in 0..NUM_DECOMP_BITS {
         let a = decomposed_values[idx];
         let b = decomposed_values[idx + NUM_DECOMP_BITS];
-        result += E::from(2_u64.pow(idx as u32)) * (a + b - E::from(2_u8) * a * b)
+        result += E::from(2_u32.pow(idx as u32)) * (a + b - E::from(2_u8) * a * b)
     }
     result
 }
@@ -407,7 +407,7 @@ pub fn agg_bits<E: FieldElement>(row: &[E], start_idx: usize) -> E {
     // thus, in theory, we could just aggregate results in a 128-bit integer and perform only a
     // single reduction in the end. This works only when we are in the base field."
     for bit_idx in 0..NUM_DECOMP_BITS {
-        result += E::from(2_u64.pow(bit_idx as u32)) * row[start_idx + bit_idx];
+        result += E::from(2_u32.pow(bit_idx as u32)) * row[start_idx + bit_idx];
     }
     result
 }
diff --git a/air/src/constraints/chiplets/hasher/mod.rs b/air/src/constraints/chiplets/hasher/mod.rs
index dde05fc5c8..17e4ecc211 100644
--- a/air/src/constraints/chiplets/hasher/mod.rs
+++ b/air/src/constraints/chiplets/hasher/mod.rs
@@ -1,12 +1,13 @@
-use super::{EvaluationFrame, Felt, FieldElement, TransitionConstraintDegree, Vec};
-use crate::trace::chiplets::{
-    hasher::{
-        Hasher, CAPACITY_LEN, DIGEST_LEN, DIGEST_RANGE, HASH_CYCLE_LEN, NUM_SELECTORS, STATE_WIDTH,
-    },
-    HASHER_NODE_INDEX_COL_IDX, HASHER_SELECTOR_COL_RANGE, HASHER_STATE_COL_RANGE,
-};
+use super::{EvaluationFrame, Felt, FieldElement, TransitionConstraintDegree};
 use crate::{
-    utils::{are_equal, binary_not, is_binary, EvaluationResult},
+    trace::chiplets::{
+        hasher::{
+            Hasher, CAPACITY_LEN, DIGEST_LEN, DIGEST_RANGE, HASH_CYCLE_LEN, NUM_SELECTORS,
+            STATE_WIDTH,
+        },
+        HASHER_NODE_INDEX_COL_IDX, HASHER_SELECTOR_COL_RANGE, HASHER_STATE_COL_RANGE,
+    },
+    utils::{are_equal, binary_not, collections::*, is_binary, EvaluationResult},
     ONE, ZERO,
 };
 
@@ -344,9 +345,11 @@ trait EvaluationFrameExt<E: FieldElement> {
     // --- Flags ----------------------------------------------------------------------------------
 
     /// Set to 1 on the first 7 steps of every 8-step cycle. This flag is degree 1.
+    #[allow(dead_code)]
     fn f_rpr(&self, k: &[E]) -> E;
     /// Set to 1 when selector flags are (1,0,0) on rows which are multiples of 8. This flag is
     /// degree 4.
+    #[allow(dead_code)]
     fn f_bp(&self, k: &[E]) -> E;
     /// Set to 1 when selector flags are (1,0,1) on rows which are multiples of 8. This flag is
     /// degree 4.
@@ -359,9 +362,11 @@ trait EvaluationFrameExt<E: FieldElement> {
     fn f_mu(&self, k: &[E]) -> E;
     /// Set to 1 when selector flags are (0,0,0) on rows which are 1 less than a multiple of 8. This
     /// flag is degree 4.
+    #[allow(dead_code)]
     fn f_hout(&self, k: &[E]) -> E;
     /// Set to 1 when selector flags are (0,0,1) on rows which are 1 less than a multiple of 8. This
     /// flag is degree 4.
+    #[allow(dead_code)]
     fn f_sout(&self, k: &[E]) -> E;
     /// This flag will be set to 1 when either f_hout=1 or f_sout=1 in the current row. This flag is
     /// degree 3.
diff --git a/air/src/constraints/chiplets/hasher/tests.rs b/air/src/constraints/chiplets/hasher/tests.rs
index 7c183164df..5c278efc3b 100644
--- a/air/src/constraints/chiplets/hasher/tests.rs
+++ b/air/src/constraints/chiplets/hasher/tests.rs
@@ -4,10 +4,11 @@ use super::{
 };
 use crate::{
     trace::chiplets::hasher::{Selectors, LINEAR_HASH, STATE_WIDTH},
+    utils::collections::*,
     Felt, TRACE_WIDTH,
 };
 use rand_utils::rand_array;
-use vm_core::{chiplets::hasher::apply_round, utils::collections::Vec};
+use vm_core::chiplets::hasher::apply_round;
 use winter_air::EvaluationFrame;
 
 // UNIT TESTS
diff --git a/air/src/constraints/chiplets/memory/mod.rs b/air/src/constraints/chiplets/memory/mod.rs
index e0120aa758..159719428e 100644
--- a/air/src/constraints/chiplets/memory/mod.rs
+++ b/air/src/constraints/chiplets/memory/mod.rs
@@ -1,10 +1,12 @@
-use super::{EvaluationFrame, FieldElement, Vec};
-use crate::trace::chiplets::{
-    memory::NUM_ELEMENTS, MEMORY_ADDR_COL_IDX, MEMORY_CLK_COL_IDX, MEMORY_CTX_COL_IDX,
-    MEMORY_D0_COL_IDX, MEMORY_D1_COL_IDX, MEMORY_D_INV_COL_IDX, MEMORY_TRACE_OFFSET,
-    MEMORY_V_COL_RANGE,
+use super::{EvaluationFrame, FieldElement};
+use crate::{
+    trace::chiplets::{
+        memory::NUM_ELEMENTS, MEMORY_ADDR_COL_IDX, MEMORY_CLK_COL_IDX, MEMORY_CTX_COL_IDX,
+        MEMORY_D0_COL_IDX, MEMORY_D1_COL_IDX, MEMORY_D_INV_COL_IDX, MEMORY_TRACE_OFFSET,
+        MEMORY_V_COL_RANGE,
+    },
+    utils::{binary_not, collections::*, is_binary, EvaluationResult},
 };
-use crate::utils::{binary_not, is_binary, EvaluationResult};
 use winter_air::TransitionConstraintDegree;
 
 #[cfg(test)]
@@ -174,12 +176,16 @@ trait EvaluationFrameExt<E: FieldElement> {
     /// Gets the value of the specified selector column in the next row.
     fn selector_next(&self, idx: usize) -> E;
     /// The current context value.
+    #[allow(dead_code)]
     fn ctx(&self) -> E;
     /// The current address.
+    #[allow(dead_code)]
     fn addr(&self) -> E;
     /// The current clock cycle.
+    #[allow(dead_code)]
     fn clk(&self) -> E;
     /// The next clock cycle.
+    #[allow(dead_code)]
     fn clk_next(&self) -> E;
     /// The value from the specified index of the values (0, 1, 2, 3) in the current row.
     fn v(&self, index: usize) -> E;
diff --git a/air/src/constraints/chiplets/memory/tests.rs b/air/src/constraints/chiplets/memory/tests.rs
index 41eef4260f..910aabf36d 100644
--- a/air/src/constraints/chiplets/memory/tests.rs
+++ b/air/src/constraints/chiplets/memory/tests.rs
@@ -9,9 +9,8 @@ use crate::trace::{
     },
     TRACE_WIDTH,
 };
-use crate::{chiplets::memory, Felt, FieldElement, ONE, ZERO};
+use crate::{chiplets::memory, utils::collections::*, Felt, FieldElement, ONE, ZERO};
 use rand_utils::rand_value;
-use vm_core::utils::collections::Vec;
 
 // UNIT TESTS
 // ================================================================================================
diff --git a/air/src/constraints/chiplets/mod.rs b/air/src/constraints/chiplets/mod.rs
index 9810f91cec..7dc36f9d26 100644
--- a/air/src/constraints/chiplets/mod.rs
+++ b/air/src/constraints/chiplets/mod.rs
@@ -1,7 +1,7 @@
 use super::super::{
-    EvaluationFrame, Felt, FieldElement, TransitionConstraintDegree, Vec, CHIPLETS_OFFSET,
+    EvaluationFrame, Felt, FieldElement, TransitionConstraintDegree, CHIPLETS_OFFSET,
 };
-use crate::utils::{are_equal, binary_not, is_binary};
+use crate::utils::{are_equal, binary_not, collections::*, is_binary};
 
 mod bitwise;
 mod hasher;
diff --git a/air/src/constraints/range.rs b/air/src/constraints/range.rs
index 1b8e95d9cd..9b3a756a51 100644
--- a/air/src/constraints/range.rs
+++ b/air/src/constraints/range.rs
@@ -2,10 +2,10 @@ use crate::{
     chiplets::ChipletsFrameExt,
     constraints::MainFrameExt,
     trace::range::{B_RANGE_COL_IDX, M_COL_IDX, V_COL_IDX},
-    utils::are_equal,
+    utils::{are_equal, collections::*},
     Assertion, EvaluationFrame, Felt, FieldElement, TransitionConstraintDegree,
 };
-use vm_core::{utils::collections::Vec, ExtensionOf, ZERO};
+use vm_core::{ExtensionOf, ZERO};
 use winter_air::AuxTraceRandElements;
 
 // CONSTANTS
diff --git a/air/src/constraints/stack/field_ops/mod.rs b/air/src/constraints/stack/field_ops/mod.rs
index 31afeaee4c..b2ec9981f3 100644
--- a/air/src/constraints/stack/field_ops/mod.rs
+++ b/air/src/constraints/stack/field_ops/mod.rs
@@ -1,10 +1,8 @@
-use super::{op_flags::OpFlags, EvaluationFrame, Vec};
+use super::{op_flags::OpFlags, EvaluationFrame, FieldElement, TransitionConstraintDegree};
 use crate::{
     stack::EvaluationFrameExt,
-    utils::{are_equal, is_binary},
+    utils::{are_equal, collections::*, is_binary},
 };
-use vm_core::FieldElement;
-use winter_air::TransitionConstraintDegree;
 
 #[cfg(test)]
 pub mod tests;
diff --git a/air/src/constraints/stack/field_ops/tests.rs b/air/src/constraints/stack/field_ops/tests.rs
index 0ddde311c3..1a0c6a0240 100644
--- a/air/src/constraints/stack/field_ops/tests.rs
+++ b/air/src/constraints/stack/field_ops/tests.rs
@@ -6,7 +6,7 @@ use crate::stack::op_flags::{generate_evaluation_frame, OpFlags};
 use crate::trace::decoder::USER_OP_HELPERS_OFFSET;
 use core::ops::Neg;
 use rand_utils::rand_value;
-use vm_core::{Felt, FieldElement, Operation, StarkField, ONE, ZERO};
+use vm_core::{Felt, FieldElement, Operation, ONE, ZERO};
 
 use proptest::prelude::*;
 
diff --git a/air/src/constraints/stack/io_ops/mod.rs b/air/src/constraints/stack/io_ops/mod.rs
index 9628bc41f1..3b6179289d 100644
--- a/air/src/constraints/stack/io_ops/mod.rs
+++ b/air/src/constraints/stack/io_ops/mod.rs
@@ -1,7 +1,8 @@
-use super::{op_flags::OpFlags, EvaluationFrame, Vec};
-use crate::{stack::EvaluationFrameExt, utils::are_equal};
-use vm_core::FieldElement;
-use winter_air::TransitionConstraintDegree;
+use super::{op_flags::OpFlags, EvaluationFrame, FieldElement, TransitionConstraintDegree};
+use crate::{
+    stack::EvaluationFrameExt,
+    utils::{are_equal, collections::*},
+};
 
 #[cfg(test)]
 pub mod tests;
diff --git a/air/src/constraints/stack/mod.rs b/air/src/constraints/stack/mod.rs
index 41ddbbcfec..00dbb4f2b2 100644
--- a/air/src/constraints/stack/mod.rs
+++ b/air/src/constraints/stack/mod.rs
@@ -4,8 +4,8 @@ use super::super::{
     STACK_AUX_TRACE_OFFSET, STACK_TRACE_OFFSET, ZERO,
 };
 use crate::decoder::{IS_CALL_FLAG_COL_IDX, IS_SYSCALL_FLAG_COL_IDX, USER_OP_HELPERS_OFFSET};
-use crate::utils::{are_equal, is_binary};
-use vm_core::{stack::STACK_TOP_SIZE, utils::collections::Vec, StackOutputs, StarkField};
+use crate::utils::{are_equal, collections::*, is_binary};
+use vm_core::{stack::STACK_TOP_SIZE, StackOutputs};
 
 pub mod field_ops;
 pub mod io_ops;
@@ -107,7 +107,7 @@ pub fn enforce_constraints<E: FieldElement<BaseField = Felt>>(
 }
 
 /// Enforces unique constraints of all the stack ops.
-pub fn enforce_unique_constraints<E: FieldElement>(
+pub fn enforce_unique_constraints<E: FieldElement<BaseField = Felt>>(
     frame: &EvaluationFrame<E>,
     result: &mut [E],
     op_flag: &op_flags::OpFlags<E>,
@@ -275,7 +275,7 @@ where
 {
     let mut value = E::ONE;
     let mut prev_clk = ZERO;
-    let mut clk = Felt::from(Felt::MODULUS - init_values.len() as u64);
+    let mut clk = -Felt::from(init_values.len() as u32);
 
     // the values are in the overflow table in reverse order, since the deepest stack
     // value is added to the overflow table first.
@@ -337,12 +337,14 @@ trait EvaluationFrameExt<E: FieldElement> {
     fn stack_overflow_addr_next(&self) -> E;
 
     /// Returns the current value of stack helper column `h0`.
+    #[allow(dead_code)]
     fn stack_helper(&self) -> E;
 
     /// Gets the current element of the clk register in the trace.
     fn clk(&self) -> E;
 
     /// Gets the next element of the clk register in the trace.
+    #[allow(dead_code)]
     fn clk_next(&self) -> E;
 
     /// Gets the current element of the fmp register in the trace.
diff --git a/air/src/constraints/stack/op_flags/mod.rs b/air/src/constraints/stack/op_flags/mod.rs
index a38dee06b5..741d83a871 100644
--- a/air/src/constraints/stack/op_flags/mod.rs
+++ b/air/src/constraints/stack/op_flags/mod.rs
@@ -624,7 +624,7 @@ impl<E: FieldElement> OpFlags<E> {
     /// Operation Flag of ASSERT operation.
     #[inline(always)]
     pub fn assert(&self) -> E {
-        self.degree7_op_flags[get_op_index(Operation::Assert(ZERO).op_code())]
+        self.degree7_op_flags[get_op_index(Operation::Assert(0).op_code())]
     }
 
     /// Operation Flag of EQ operation.
@@ -951,7 +951,7 @@ impl<E: FieldElement> OpFlags<E> {
     }
 
     /// Returns ONE when the stack item at the specified depth shifts to the left during an
-    /// operation, and ZERO otherwise. The left shift is not defined on the first postion in the
+    /// operation, and ZERO otherwise. The left shift is not defined on the first position in the
     /// stack and therefore, a ZERO is returned.
     #[inline(always)]
     pub fn left_shift_at(&self, index: usize) -> E {
@@ -959,7 +959,7 @@ impl<E: FieldElement> OpFlags<E> {
     }
 
     /// Returns ONE when the stack item at the specified depth shifts to the right during an
-    /// operation, and ZERO otherwise. The right shift is not defined on the last postion in the
+    /// operation, and ZERO otherwise. The right shift is not defined on the last position in the
     /// stack and therefore, a ZERO is returned.
     #[inline(always)]
     pub fn right_shift_at(&self, index: usize) -> E {
diff --git a/air/src/constraints/stack/overflow/mod.rs b/air/src/constraints/stack/overflow/mod.rs
index 0761a93846..32bd93093a 100644
--- a/air/src/constraints/stack/overflow/mod.rs
+++ b/air/src/constraints/stack/overflow/mod.rs
@@ -1,7 +1,5 @@
-use super::{op_flags::OpFlags, EvaluationFrame, Vec};
-use crate::stack::EvaluationFrameExt;
-use vm_core::FieldElement;
-use winter_air::TransitionConstraintDegree;
+use super::{op_flags::OpFlags, EvaluationFrame, FieldElement, TransitionConstraintDegree};
+use crate::{stack::EvaluationFrameExt, utils::collections::*};
 
 #[cfg(test)]
 pub mod tests;
diff --git a/air/src/constraints/stack/stack_manipulation/mod.rs b/air/src/constraints/stack/stack_manipulation/mod.rs
index e825499d2f..3fdee5cae0 100644
--- a/air/src/constraints/stack/stack_manipulation/mod.rs
+++ b/air/src/constraints/stack/stack_manipulation/mod.rs
@@ -1,10 +1,8 @@
-use super::{op_flags::OpFlags, EvaluationFrame, Vec};
+use super::{op_flags::OpFlags, EvaluationFrame, FieldElement, TransitionConstraintDegree};
 use crate::{
     stack::EvaluationFrameExt,
-    utils::{are_equal, binary_not},
+    utils::{are_equal, binary_not, collections::*},
 };
-use vm_core::FieldElement;
-use winter_air::TransitionConstraintDegree;
 
 #[cfg(test)]
 pub mod tests;
diff --git a/air/src/constraints/stack/system_ops/mod.rs b/air/src/constraints/stack/system_ops/mod.rs
index 073020edcf..2775f73e22 100644
--- a/air/src/constraints/stack/system_ops/mod.rs
+++ b/air/src/constraints/stack/system_ops/mod.rs
@@ -1,7 +1,8 @@
-use super::{op_flags::OpFlags, EvaluationFrame, Vec};
-use crate::{stack::EvaluationFrameExt, utils::are_equal};
-use vm_core::FieldElement;
-use winter_air::TransitionConstraintDegree;
+use super::{op_flags::OpFlags, EvaluationFrame, FieldElement, TransitionConstraintDegree};
+use crate::{
+    stack::EvaluationFrameExt,
+    utils::{are_equal, collections::*},
+};
 
 #[cfg(test)]
 pub mod tests;
diff --git a/air/src/constraints/stack/system_ops/tests.rs b/air/src/constraints/stack/system_ops/tests.rs
index 166a41b48a..cd4790999f 100644
--- a/air/src/constraints/stack/system_ops/tests.rs
+++ b/air/src/constraints/stack/system_ops/tests.rs
@@ -102,7 +102,7 @@ pub fn get_fmpupdate_test_frame(a: u64) -> EvaluationFrame<Felt> {
 /// returns an EvaluationFrame for testing.
 pub fn get_assert_test_frame() -> EvaluationFrame<Felt> {
     // frame initialized with a fmpupdate operation using it's unique opcode.
-    let mut frame = generate_evaluation_frame(Operation::Assert(ZERO).op_code() as usize);
+    let mut frame = generate_evaluation_frame(Operation::Assert(0).op_code() as usize);
 
     // Set the output. The top element in the current frame of the stack should be ONE.
     frame.current_mut()[STACK_TRACE_OFFSET] = ONE;
diff --git a/air/src/constraints/stack/u32_ops/mod.rs b/air/src/constraints/stack/u32_ops/mod.rs
index c255d3863e..401166f72c 100644
--- a/air/src/constraints/stack/u32_ops/mod.rs
+++ b/air/src/constraints/stack/u32_ops/mod.rs
@@ -1,10 +1,8 @@
-use super::{op_flags::OpFlags, EvaluationFrame, Vec};
+use super::{op_flags::OpFlags, EvaluationFrame, Felt, FieldElement, TransitionConstraintDegree};
 use crate::{
     stack::EvaluationFrameExt,
-    utils::{are_equal, is_binary},
+    utils::{are_equal, collections::*, is_binary},
 };
-use vm_core::FieldElement;
-use winter_air::TransitionConstraintDegree;
 
 #[cfg(test)]
 pub mod tests;
@@ -16,16 +14,16 @@ pub mod tests;
 pub const NUM_CONSTRAINTS: usize = 13;
 
 // The co-efficient of the most significant 16-bit limb in the helper register during aggregation.
-pub const TWO_48: u64 = 2u64.pow(48);
+pub const TWO_48: Felt = Felt::new(2u64.pow(48));
 
 // The co-efficient of the 2nd most significant 16-bit limb in the helper register during aggregation.
-pub const TWO_32: u64 = 2u64.pow(32);
+pub const TWO_32: Felt = Felt::new(2u64.pow(32));
 
 // The co-efficient of the 3rd significant 16-bit limb in the helper register during aggregation.
-pub const TWO_16: u64 = 2u64.pow(16);
+pub const TWO_16: Felt = Felt::new(2u64.pow(16));
 
 // The co-efficient of the least significant 16-bit bit in the helper register during aggregation.
-pub const TWO_0: u64 = 1;
+pub const TWO_0: Felt = Felt::new(1);
 
 /// The degrees of constraints in individual u32 operations.
 pub const CONSTRAINT_DEGREES: [usize; NUM_CONSTRAINTS] = [
@@ -59,7 +57,7 @@ pub fn get_transition_constraint_count() -> usize {
 }
 
 /// Enforces constraints for the u32 operations.
-pub fn enforce_constraints<E: FieldElement>(
+pub fn enforce_constraints<E: FieldElement<BaseField = Felt>>(
     frame: &EvaluationFrame<E>,
     result: &mut [E],
     op_flag: &OpFlags<E>,
@@ -103,7 +101,7 @@ pub fn enforce_constraints<E: FieldElement>(
 /// Enforces constraints of the U32SPLIT operation. The U32SPLIT operation splits the top element into
 /// two 32-bit numbers. Therefore, the following constraints are enforced:
 /// - The aggregation of limbs from the helper registers forms the top element in the stack.
-pub fn enforce_u32split_constraints<E: FieldElement>(
+pub fn enforce_u32split_constraints<E: FieldElement<BaseField = Felt>>(
     frame: &EvaluationFrame<E>,
     result: &mut [E],
     op_flag: E,
@@ -120,7 +118,7 @@ pub fn enforce_u32split_constraints<E: FieldElement>(
 /// enforced:
 /// - The aggregation of limbs from the helper registers is equal to the sum of the top two
 /// element in the stack.
-pub fn enforce_u32add_constraints<E: FieldElement>(
+pub fn enforce_u32add_constraints<E: FieldElement<BaseField = Felt>>(
     frame: &EvaluationFrame<E>,
     result: &mut [E],
     op_flag: E,
@@ -141,7 +139,7 @@ pub fn enforce_u32add_constraints<E: FieldElement>(
 /// enforced:
 /// - The aggregation of limbs from the helper registers is equal to the sum of the top three
 /// elements in the stack.
-pub fn enforce_u32add3_constraints<E: FieldElement>(
+pub fn enforce_u32add3_constraints<E: FieldElement<BaseField = Felt>>(
     frame: &EvaluationFrame<E>,
     result: &mut [E],
     op_flag: E,
@@ -164,7 +162,7 @@ pub fn enforce_u32add3_constraints<E: FieldElement>(
 /// - The aggregation of limbs from helper registers is equal to the difference of the top
 ///   two elements in the stack.
 /// - The first element in the next trace should be a binary.
-pub fn enforce_u32sub_constraints<E: FieldElement>(
+pub fn enforce_u32sub_constraints<E: FieldElement<BaseField = Felt>>(
     frame: &EvaluationFrame<E>,
     result: &mut [E],
     op_flag: E,
@@ -191,7 +189,7 @@ pub fn enforce_u32sub_constraints<E: FieldElement>(
 /// enforced:
 /// - The aggregation of all the limbs in the helper registers is equal to the product of the
 ///   top two elements in the stack.
-pub fn enforce_u32mul_constraints<E: FieldElement>(
+pub fn enforce_u32mul_constraints<E: FieldElement<BaseField = Felt>>(
     frame: &EvaluationFrame<E>,
     result: &mut [E],
     op_flag: E,
@@ -212,7 +210,7 @@ pub fn enforce_u32mul_constraints<E: FieldElement>(
 /// following constraints are enforced:
 /// - The aggregation of all the limbs in the helper registers is equal to the sum of the
 ///   third element with the product of the first two elements in the current trace.
-pub fn enforce_u32madd_constraints<E: FieldElement>(
+pub fn enforce_u32madd_constraints<E: FieldElement<BaseField = Felt>>(
     frame: &EvaluationFrame<E>,
     result: &mut [E],
     op_flag: E,
@@ -238,7 +236,7 @@ pub fn enforce_u32madd_constraints<E: FieldElement>(
 ///   aggregation of the lower 16-bits limbs.
 /// - The difference between the second elements in the current and next trace and one should be equal
 ///   to the aggregation of the upper 16-bits limbs.
-pub fn enforce_u32div_constraints<E: FieldElement>(
+pub fn enforce_u32div_constraints<E: FieldElement<BaseField = Felt>>(
     frame: &EvaluationFrame<E>,
     result: &mut [E],
     op_flag: E,
@@ -266,7 +264,7 @@ pub fn enforce_u32div_constraints<E: FieldElement>(
 
 /// The constraint checks if the top four element in the trace on aggregating forms a valid field element.
 /// no not. This constraint is applicable in `U32SPLIT`, `U32MADD` and `U32MUL`.
-pub fn enforce_check_element_validity<E: FieldElement>(
+pub fn enforce_check_element_validity<E: FieldElement<BaseField = Felt>>(
     frame: &EvaluationFrame<E>,
     result: &mut [E],
     op_flag: &OpFlags<E>,
@@ -291,7 +289,7 @@ pub fn enforce_check_element_validity<E: FieldElement>(
 /// element in the next row.
 /// - The aggregation of lower two upper 16-bits limbs in the helper registers is equal to the first
 /// element in the next row.
-pub fn enforce_limbs_agg<E: FieldElement>(
+pub fn enforce_limbs_agg<E: FieldElement<BaseField = Felt>>(
     frame: &EvaluationFrame<E>,
     result: &mut [E],
     op_flag: &OpFlags<E>,
@@ -324,7 +322,7 @@ pub struct LimbCompositions<E: FieldElement> {
     v64: E,
 }
 
-impl<E: FieldElement> LimbCompositions<E> {
+impl<E: FieldElement<BaseField = Felt>> LimbCompositions<E> {
     // Returns a new instance of [LimbCompositions] instantiated with all the intermediate limbs values.
     pub fn new(frame: &EvaluationFrame<E>) -> Self {
         let v_lo =
diff --git a/air/src/constraints/stack/u32_ops/tests.rs b/air/src/constraints/stack/u32_ops/tests.rs
index cfe0161c93..0c21711b8e 100644
--- a/air/src/constraints/stack/u32_ops/tests.rs
+++ b/air/src/constraints/stack/u32_ops/tests.rs
@@ -4,7 +4,7 @@ use super::{
 };
 use crate::stack::op_flags::{generate_evaluation_frame, OpFlags};
 use crate::trace::decoder::USER_OP_HELPERS_OFFSET;
-use vm_core::{Felt, FieldElement, Operation, StarkField, ZERO};
+use vm_core::{Felt, FieldElement, Operation, ZERO};
 
 use proptest::prelude::*;
 
diff --git a/air/src/errors.rs b/air/src/errors.rs
index ce9c50b8be..feda629ab7 100644
--- a/air/src/errors.rs
+++ b/air/src/errors.rs
@@ -1,5 +1,4 @@
-use super::String;
-use crate::trace::MIN_TRACE_LEN;
+use crate::{trace::MIN_TRACE_LEN, utils::string::*};
 use core::fmt::{Display, Formatter};
 
 // EXECUTION ERROR
diff --git a/air/src/lib.rs b/air/src/lib.rs
index d954eef7fa..a597604ce4 100644
--- a/air/src/lib.rs
+++ b/air/src/lib.rs
@@ -5,13 +5,14 @@
 extern crate alloc;
 
 use vm_core::{
-    utils::{collections::Vec, string::String, ByteWriter, Serializable},
+    utils::{collections::*, ByteReader, ByteWriter, Deserializable, Serializable},
     ExtensionOf, ProgramInfo, StackInputs, StackOutputs, ONE, ZERO,
 };
 use winter_air::{
     Air, AirContext, Assertion, AuxTraceRandElements, EvaluationFrame,
     ProofOptions as WinterProofOptions, TraceInfo, TransitionConstraintDegree,
 };
+use winter_prover::matrix::ColMatrix;
 
 mod constraints;
 pub use constraints::stack;
@@ -27,7 +28,7 @@ mod proof;
 mod utils;
 use utils::TransitionConstraintRange;
 
-// EXPORTS
+// RE-EXPORTS
 // ================================================================================================
 
 pub use errors::ExecutionOptionsError;
@@ -271,6 +272,18 @@ impl PublicInputs {
     }
 }
 
+impl vm_core::ToElements<Felt> for PublicInputs {
+    fn to_elements(&self) -> Vec<Felt> {
+        let mut result = self.program_info.to_elements();
+        result.append(&mut self.stack_inputs.to_elements());
+        result.append(&mut self.stack_outputs.to_elements());
+        result
+    }
+}
+
+// SERIALIZATION
+// ================================================================================================
+
 impl Serializable for PublicInputs {
     fn write_into<W: ByteWriter>(&self, target: &mut W) {
         self.program_info.write_into(target);
@@ -279,11 +292,16 @@ impl Serializable for PublicInputs {
     }
 }
 
-impl vm_core::ToElements<Felt> for PublicInputs {
-    fn to_elements(&self) -> Vec<Felt> {
-        let mut result = self.program_info.to_elements();
-        result.append(&mut self.stack_inputs.to_elements());
-        result.append(&mut self.stack_outputs.to_elements());
-        result
+impl Deserializable for PublicInputs {
+    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
+        let program_info = ProgramInfo::read_from(source)?;
+        let stack_inputs = StackInputs::read_from(source)?;
+        let stack_outputs = StackOutputs::read_from(source)?;
+
+        Ok(PublicInputs {
+            program_info,
+            stack_inputs,
+            stack_outputs,
+        })
     }
 }
diff --git a/air/src/options.rs b/air/src/options.rs
index 4cbc995a60..1308060bf2 100644
--- a/air/src/options.rs
+++ b/air/src/options.rs
@@ -1,6 +1,6 @@
-use super::{ExecutionOptionsError, HashFunction};
-use crate::trace::MIN_TRACE_LEN;
-use winter_air::{FieldExtension, ProofOptions as WinterProofOptions};
+use super::{
+    trace::MIN_TRACE_LEN, ExecutionOptionsError, FieldExtension, HashFunction, WinterProofOptions,
+};
 
 // PROVING OPTIONS
 // ================================================================================================
@@ -8,13 +8,32 @@ use winter_air::{FieldExtension, ProofOptions as WinterProofOptions};
 /// A set of parameters specifying how Miden VM execution proofs are to be generated.
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct ProvingOptions {
-    pub exec_options: ExecutionOptions,
-    pub proof_options: WinterProofOptions,
-    pub hash_fn: HashFunction,
+    exec_options: ExecutionOptions,
+    proof_options: WinterProofOptions,
+    hash_fn: HashFunction,
 }
 
 impl ProvingOptions {
-    // CONSTRUCTOR
+    // CONSTANTS
+    // --------------------------------------------------------------------------------------------
+
+    /// Standard proof parameters for 96-bit conjectured security in non-recursive context.
+    pub const REGULAR_96_BITS: WinterProofOptions =
+        WinterProofOptions::new(27, 8, 16, FieldExtension::Quadratic, 8, 255);
+
+    /// Standard proof parameters for 128-bit conjectured security in non-recursive context.
+    pub const REGULAR_128_BITS: WinterProofOptions =
+        WinterProofOptions::new(27, 16, 21, FieldExtension::Cubic, 8, 255);
+
+    /// Standard proof parameters for 96-bit conjectured security in recursive context.
+    pub const RECURSIVE_96_BITS: WinterProofOptions =
+        WinterProofOptions::new(27, 8, 16, FieldExtension::Quadratic, 4, 7);
+
+    /// Standard proof parameters for 128-bit conjectured security in recursive context.
+    pub const RECURSIVE_128_BITS: WinterProofOptions =
+        WinterProofOptions::new(27, 16, 21, FieldExtension::Cubic, 4, 7);
+
+    // CONSTRUCTORS
     // --------------------------------------------------------------------------------------------
 
     /// Creates a new instance of [ProvingOptions] from the specified parameters.
@@ -50,18 +69,15 @@ impl ProvingOptions {
     /// but may take significantly longer to generate.
     pub fn with_96_bit_security(recursive: bool) -> Self {
         if recursive {
-            let proof_options = WinterProofOptions::new(27, 8, 16, FieldExtension::Quadratic, 4, 7);
             Self {
                 exec_options: ExecutionOptions::default(),
-                proof_options,
+                proof_options: Self::RECURSIVE_96_BITS,
                 hash_fn: HashFunction::Rpo256,
             }
         } else {
-            let proof_options =
-                WinterProofOptions::new(27, 8, 16, FieldExtension::Quadratic, 8, 255);
             Self {
                 exec_options: ExecutionOptions::default(),
-                proof_options,
+                proof_options: Self::REGULAR_96_BITS,
                 hash_fn: HashFunction::Blake3_192,
             }
         }
@@ -74,17 +90,15 @@ impl ProvingOptions {
     /// but may take significantly longer to generate.
     pub fn with_128_bit_security(recursive: bool) -> Self {
         if recursive {
-            let proof_options = WinterProofOptions::new(27, 16, 21, FieldExtension::Cubic, 4, 7);
             Self {
                 exec_options: ExecutionOptions::default(),
-                proof_options,
+                proof_options: Self::RECURSIVE_128_BITS,
                 hash_fn: HashFunction::Rpo256,
             }
         } else {
-            let proof_options = WinterProofOptions::new(27, 16, 21, FieldExtension::Cubic, 8, 255);
             Self {
                 exec_options: ExecutionOptions::default(),
-                proof_options,
+                proof_options: Self::REGULAR_128_BITS,
                 hash_fn: HashFunction::Blake3_256,
             }
         }
@@ -136,6 +150,7 @@ impl From<ProvingOptions> for WinterProofOptions {
 pub struct ExecutionOptions {
     max_cycles: u32,
     expected_cycles: u32,
+    enable_tracing: bool,
 }
 
 impl Default for ExecutionOptions {
@@ -143,6 +158,7 @@ impl Default for ExecutionOptions {
         ExecutionOptions {
             max_cycles: u32::MAX,
             expected_cycles: MIN_TRACE_LEN as u32,
+            enable_tracing: false,
         }
     }
 }
@@ -157,6 +173,7 @@ impl ExecutionOptions {
     pub fn new(
         max_cycles: Option<u32>,
         expected_cycles: u32,
+        enable_tracing: bool,
     ) -> Result<Self, ExecutionOptionsError> {
         let max_cycles = max_cycles.unwrap_or(u32::MAX);
         if max_cycles < MIN_TRACE_LEN as u32 {
@@ -173,9 +190,19 @@ impl ExecutionOptions {
         Ok(ExecutionOptions {
             max_cycles,
             expected_cycles,
+            enable_tracing,
         })
     }
 
+    /// Enables Host to handle the `tracing` instructions.
+    pub fn with_tracing(mut self) -> Self {
+        self.enable_tracing = true;
+        self
+    }
+
+    // PUBLIC ACCESSORS
+    // --------------------------------------------------------------------------------------------
+
     /// Returns maximum number of cycles
     pub fn max_cycles(&self) -> u32 {
         self.max_cycles
@@ -185,4 +212,9 @@ impl ExecutionOptions {
     pub fn expected_cycles(&self) -> u32 {
         self.expected_cycles
     }
+
+    /// Returns a flag indicating whether the Host should handle `trace` instructions
+    pub fn enable_tracing(&self) -> bool {
+        self.enable_tracing
+    }
 }
diff --git a/air/src/proof.rs b/air/src/proof.rs
index 7f83908a51..43d8dec278 100644
--- a/air/src/proof.rs
+++ b/air/src/proof.rs
@@ -1,7 +1,8 @@
-use super::DeserializationError;
 use vm_core::{
     crypto::hash::{Blake3_192, Blake3_256, Hasher, Rpo256},
-    utils::collections::Vec,
+    utils::{
+        collections::*, ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable,
+    },
 };
 use winter_air::proof::StarkProof;
 
@@ -127,3 +128,34 @@ impl TryFrom<u8> for HashFunction {
         }
     }
 }
+
+// SERIALIZATION
+// ================================================================================================
+
+impl Serializable for HashFunction {
+    fn write_into<W: ByteWriter>(&self, target: &mut W) {
+        target.write_u8(*self as u8);
+    }
+}
+
+impl Deserializable for HashFunction {
+    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
+        source.read_u8()?.try_into()
+    }
+}
+
+impl Serializable for ExecutionProof {
+    fn write_into<W: ByteWriter>(&self, target: &mut W) {
+        self.proof.write_into(target);
+        self.hash_fn.write_into(target);
+    }
+}
+
+impl Deserializable for ExecutionProof {
+    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
+        let proof = StarkProof::read_from(source)?;
+        let hash_fn = HashFunction::read_from(source)?;
+
+        Ok(ExecutionProof { proof, hash_fn })
+    }
+}
diff --git a/air/src/trace/main_trace.rs b/air/src/trace/main_trace.rs
new file mode 100644
index 0000000000..405f5471db
--- /dev/null
+++ b/air/src/trace/main_trace.rs
@@ -0,0 +1,438 @@
+use super::{
+    super::ColMatrix,
+    chiplets::{
+        hasher::{DIGEST_LEN, HASH_CYCLE_LEN, STATE_WIDTH},
+        BITWISE_A_COL_IDX, BITWISE_B_COL_IDX, BITWISE_OUTPUT_COL_IDX, HASHER_NODE_INDEX_COL_IDX,
+        HASHER_STATE_COL_RANGE, MEMORY_ADDR_COL_IDX, MEMORY_CLK_COL_IDX, MEMORY_CTX_COL_IDX,
+        MEMORY_V_COL_RANGE,
+    },
+    decoder::{
+        GROUP_COUNT_COL_IDX, HASHER_STATE_OFFSET, IN_SPAN_COL_IDX, IS_CALL_FLAG_COL_IDX,
+        IS_LOOP_BODY_FLAG_COL_IDX, IS_LOOP_FLAG_COL_IDX, IS_SYSCALL_FLAG_COL_IDX,
+        NUM_HASHER_COLUMNS, NUM_OP_BATCH_FLAGS, OP_BATCH_FLAGS_OFFSET, OP_BITS_EXTRA_COLS_OFFSET,
+        USER_OP_HELPERS_OFFSET,
+    },
+    stack::{B0_COL_IDX, B1_COL_IDX, H0_COL_IDX},
+    CHIPLETS_OFFSET, CLK_COL_IDX, CTX_COL_IDX, DECODER_TRACE_OFFSET, FMP_COL_IDX, FN_HASH_OFFSET,
+    STACK_TRACE_OFFSET,
+};
+use crate::utils::collections::*;
+use core::ops::{Deref, Range};
+use vm_core::{utils::range, Felt, ONE, ZERO};
+
+// CONSTANTS
+// ================================================================================================
+
+const DECODER_HASHER_RANGE: Range<usize> =
+    range(DECODER_TRACE_OFFSET + HASHER_STATE_OFFSET, NUM_HASHER_COLUMNS);
+
+// HELPER STRUCT AND METHODS
+// ================================================================================================
+
+pub struct MainTrace {
+    columns: ColMatrix<Felt>,
+}
+
+impl Deref for MainTrace {
+    type Target = ColMatrix<Felt>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.columns
+    }
+}
+
+impl MainTrace {
+    pub fn new(main_trace: ColMatrix<Felt>) -> Self {
+        Self {
+            columns: main_trace,
+        }
+    }
+
+    pub fn num_rows(&self) -> usize {
+        self.columns.num_rows()
+    }
+
+    #[cfg(any(test, feature = "internals"))]
+    pub fn get_column_range(&self, range: Range<usize>) -> Vec<Vec<Felt>> {
+        range.fold(vec![], |mut acc, col_idx| {
+            acc.push(self.get_column(col_idx).to_vec());
+            acc
+        })
+    }
+
+    // SYSTEM COLUMNS
+    // --------------------------------------------------------------------------------------------
+
+    /// Returns the value of the clk column at row i.
+    pub fn clk(&self, i: usize) -> Felt {
+        self.columns.get_column(CLK_COL_IDX)[i]
+    }
+
+    /// Returns the value of the fmp column at row i.
+    pub fn fmp(&self, i: usize) -> Felt {
+        self.columns.get_column(FMP_COL_IDX)[i]
+    }
+
+    /// Returns the value of the ctx column at row i.
+    pub fn ctx(&self, i: usize) -> Felt {
+        self.columns.get_column(CTX_COL_IDX)[i]
+    }
+
+    // DECODER COLUMNS
+    // --------------------------------------------------------------------------------------------
+
+    /// Returns the value in the block address column at the row i.
+    pub fn addr(&self, i: usize) -> Felt {
+        self.columns.get_column(DECODER_TRACE_OFFSET)[i]
+    }
+
+    /// Helper method to detect change of address.
+    pub fn is_addr_change(&self, i: usize) -> bool {
+        self.addr(i) != self.addr(i + 1)
+    }
+
+    /// The i-th decoder helper register at `row`.
+    pub fn helper_register(&self, i: usize, row: usize) -> Felt {
+        self.columns.get_column(DECODER_TRACE_OFFSET + USER_OP_HELPERS_OFFSET + i)[row]
+    }
+
+    /// Returns the hasher state at row i.
+    pub fn decoder_hasher_state(&self, i: usize) -> [Felt; NUM_HASHER_COLUMNS] {
+        let mut state = [ZERO; NUM_HASHER_COLUMNS];
+        for (idx, col_idx) in DECODER_HASHER_RANGE.enumerate() {
+            let column = self.columns.get_column(col_idx);
+            state[idx] = column[i];
+        }
+        state
+    }
+
+    /// Returns the first half of the hasher state at row i.
+    pub fn decoder_hasher_state_first_half(&self, i: usize) -> [Felt; DIGEST_LEN] {
+        let mut state = [ZERO; DIGEST_LEN];
+        for (col, s) in state.iter_mut().enumerate() {
+            *s = self.columns.get_column(DECODER_TRACE_OFFSET + HASHER_STATE_OFFSET + col)[i];
+        }
+        state
+    }
+
+    /// Returns a specific element from the hasher state at row i.
+    pub fn decoder_hasher_state_element(&self, element: usize, i: usize) -> Felt {
+        self.columns.get_column(DECODER_TRACE_OFFSET + HASHER_STATE_OFFSET + element)[i + 1]
+    }
+
+    /// Returns the current function hash (i.e., root) at row i.
+    pub fn fn_hash(&self, i: usize) -> [Felt; DIGEST_LEN] {
+        let mut state = [ZERO; DIGEST_LEN];
+        for (col, s) in state.iter_mut().enumerate() {
+            *s = self.columns.get_column(FN_HASH_OFFSET + col)[i];
+        }
+        state
+    }
+
+    /// Returns the `is_loop_body` flag at row i.
+    pub fn is_loop_body_flag(&self, i: usize) -> Felt {
+        self.columns.get_column(DECODER_TRACE_OFFSET + IS_LOOP_BODY_FLAG_COL_IDX)[i]
+    }
+
+    /// Returns the `is_loop` flag at row i.
+    pub fn is_loop_flag(&self, i: usize) -> Felt {
+        self.columns.get_column(DECODER_TRACE_OFFSET + IS_LOOP_FLAG_COL_IDX)[i]
+    }
+
+    /// Returns the `is_call` flag at row i.
+    pub fn is_call_flag(&self, i: usize) -> Felt {
+        self.columns.get_column(DECODER_TRACE_OFFSET + IS_CALL_FLAG_COL_IDX)[i]
+    }
+
+    /// Returns the `is_syscall` flag at row i.
+    pub fn is_syscall_flag(&self, i: usize) -> Felt {
+        self.columns.get_column(DECODER_TRACE_OFFSET + IS_SYSCALL_FLAG_COL_IDX)[i]
+    }
+
+    /// Returns the operation batch flags at row i. This indicates the number of op groups in
+    /// the current batch that is being processed.
+    pub fn op_batch_flag(&self, i: usize) -> [Felt; NUM_OP_BATCH_FLAGS] {
+        [
+            self.columns.get(DECODER_TRACE_OFFSET + OP_BATCH_FLAGS_OFFSET, i),
+            self.columns.get(DECODER_TRACE_OFFSET + OP_BATCH_FLAGS_OFFSET + 1, i),
+            self.columns.get(DECODER_TRACE_OFFSET + OP_BATCH_FLAGS_OFFSET + 2, i),
+        ]
+    }
+
+    /// Returns the operation group count. This indicates the number of operation that remain
+    /// to be executed in the current span block.
+    pub fn group_count(&self, i: usize) -> Felt {
+        self.columns.get_column(DECODER_TRACE_OFFSET + GROUP_COUNT_COL_IDX)[i]
+    }
+
+    /// Returns the delta between the current and next group counts.
+    pub fn delta_group_count(&self, i: usize) -> Felt {
+        self.group_count(i) - self.group_count(i + 1)
+    }
+
+    /// Returns the `in_span` flag at row i.
+    pub fn is_in_span(&self, i: usize) -> Felt {
+        self.columns.get_column(DECODER_TRACE_OFFSET + IN_SPAN_COL_IDX)[i]
+    }
+
+    /// Constructs the i-th op code value from its individual bits.
+    pub fn get_op_code(&self, i: usize) -> Felt {
+        let col_b0 = self.columns.get_column(DECODER_TRACE_OFFSET + 1);
+        let col_b1 = self.columns.get_column(DECODER_TRACE_OFFSET + 2);
+        let col_b2 = self.columns.get_column(DECODER_TRACE_OFFSET + 3);
+        let col_b3 = self.columns.get_column(DECODER_TRACE_OFFSET + 4);
+        let col_b4 = self.columns.get_column(DECODER_TRACE_OFFSET + 5);
+        let col_b5 = self.columns.get_column(DECODER_TRACE_OFFSET + 6);
+        let col_b6 = self.columns.get_column(DECODER_TRACE_OFFSET + 7);
+        let [b0, b1, b2, b3, b4, b5, b6] =
+            [col_b0[i], col_b1[i], col_b2[i], col_b3[i], col_b4[i], col_b5[i], col_b6[i]];
+        b0 + b1.mul_small(2)
+            + b2.mul_small(4)
+            + b3.mul_small(8)
+            + b4.mul_small(16)
+            + b5.mul_small(32)
+            + b6.mul_small(64)
+    }
+
+    /// Returns a flag indicating whether the current operation induces a left shift of the operand
+    /// stack.
+    pub fn is_left_shift(&self, i: usize) -> bool {
+        let b0 = self.columns.get(DECODER_TRACE_OFFSET + 1, i);
+        let b1 = self.columns.get(DECODER_TRACE_OFFSET + 2, i);
+        let b2 = self.columns.get(DECODER_TRACE_OFFSET + 3, i);
+        let b3 = self.columns.get(DECODER_TRACE_OFFSET + 4, i);
+        let b4 = self.columns.get(DECODER_TRACE_OFFSET + 5, i);
+        let b5 = self.columns.get(DECODER_TRACE_OFFSET + 6, i);
+        let b6 = self.columns.get(DECODER_TRACE_OFFSET + 7, i);
+        let e0 = self.columns.get(DECODER_TRACE_OFFSET + OP_BITS_EXTRA_COLS_OFFSET, i);
+        let h5 = self.columns.get(DECODER_TRACE_OFFSET + IS_LOOP_FLAG_COL_IDX, i);
+
+        // group with left shift effect grouped by a common prefix
+        ([b6, b5, b4] == [ZERO, ONE, ZERO])||
+        // U32ADD3 or U32MADD
+        ([b6, b5, b4, b3, b2] == [ONE, ZERO, ZERO, ONE, ONE]) ||
+        // SPLIT or LOOP block
+        ([e0, b3, b2, b1] == [ONE, ZERO, ONE, ZERO]) ||
+        // REPEAT
+        ([b6, b5, b4, b3, b2, b1, b0] == [ONE, ONE, ONE, ZERO, ONE, ZERO, ZERO]) ||
+        // END of a loop
+        ([b6, b5, b4, b3, b2, b1, b0] == [ONE, ONE, ONE, ZERO, ZERO, ZERO, ZERO] && h5 == ONE)
+    }
+
+    /// Returns a flag indicating whether the current operation induces a right shift of the operand
+    /// stack.
+    pub fn is_right_shift(&self, i: usize) -> bool {
+        let b0 = self.columns.get(DECODER_TRACE_OFFSET + 1, i);
+        let b1 = self.columns.get(DECODER_TRACE_OFFSET + 2, i);
+        let b2 = self.columns.get(DECODER_TRACE_OFFSET + 3, i);
+        let b3 = self.columns.get(DECODER_TRACE_OFFSET + 4, i);
+        let b4 = self.columns.get(DECODER_TRACE_OFFSET + 5, i);
+        let b5 = self.columns.get(DECODER_TRACE_OFFSET + 6, i);
+        let b6 = self.columns.get(DECODER_TRACE_OFFSET + 7, i);
+
+        // group with right shift effect grouped by a common prefix
+        [b6, b5, b4] == [ZERO, ONE, ONE]||
+        // u32SPLIT 100_1000
+        ([b6, b5, b4, b3, b2, b1, b0] == [ONE, ZERO, ZERO, ONE, ZERO, ZERO, ZERO]) ||
+        // PUSH i.e., 110_0100
+        ([b6, b5, b4, b3, b2, b1, b0] == [ONE, ONE, ZERO, ZERO, ONE, ZERO, ZERO])
+    }
+
+    // STACK COLUMNS
+    // --------------------------------------------------------------------------------------------
+
+    /// Returns the value of the stack depth column at row i.
+    pub fn stack_depth(&self, i: usize) -> Felt {
+        self.columns.get_column(STACK_TRACE_OFFSET + B0_COL_IDX)[i]
+    }
+
+    /// Returns the element at row i in a given stack trace column.
+    pub fn stack_element(&self, column: usize, i: usize) -> Felt {
+        self.columns.get_column(STACK_TRACE_OFFSET + column)[i]
+    }
+
+    /// Returns the address of the top element in the stack overflow table at row i.
+    pub fn parent_overflow_address(&self, i: usize) -> Felt {
+        self.columns.get_column(STACK_TRACE_OFFSET + B1_COL_IDX)[i]
+    }
+
+    /// Returns a flag indicating whether the overflow stack is non-empty.
+    pub fn is_non_empty_overflow(&self, i: usize) -> bool {
+        let b0 = self.columns.get_column(STACK_TRACE_OFFSET + B0_COL_IDX)[i];
+        let h0 = self.columns.get_column(STACK_TRACE_OFFSET + H0_COL_IDX)[i];
+        (b0 - Felt::new(16)) * h0 == ONE
+    }
+
+    // CHIPLETS COLUMNS
+    // --------------------------------------------------------------------------------------------
+
+    /// Returns chiplet column number 0 at row i.
+    pub fn chiplet_selector_0(&self, i: usize) -> Felt {
+        self.columns.get_column(CHIPLETS_OFFSET)[i]
+    }
+
+    /// Returns chiplet column number 1 at row i.
+    pub fn chiplet_selector_1(&self, i: usize) -> Felt {
+        self.columns.get_column(CHIPLETS_OFFSET + 1)[i]
+    }
+
+    /// Returns chiplet column number 2 at row i.
+    pub fn chiplet_selector_2(&self, i: usize) -> Felt {
+        self.columns.get_column(CHIPLETS_OFFSET + 2)[i]
+    }
+
+    /// Returns chiplet column number 3 at row i.
+    pub fn chiplet_selector_3(&self, i: usize) -> Felt {
+        self.columns.get_column(CHIPLETS_OFFSET + 3)[i]
+    }
+
+    /// Returns chiplet column number 4 at row i.
+    pub fn chiplet_selector_4(&self, i: usize) -> Felt {
+        self.columns.get_column(CHIPLETS_OFFSET + 4)[i]
+    }
+
+    /// Returns the (full) state of the hasher chiplet at row i.
+    pub fn chiplet_hasher_state(&self, i: usize) -> [Felt; STATE_WIDTH] {
+        let mut state = [ZERO; STATE_WIDTH];
+        for (idx, col_idx) in HASHER_STATE_COL_RANGE.enumerate() {
+            let column = self.columns.get_column(col_idx);
+            state[idx] = column[i];
+        }
+        state
+    }
+
+    /// Returns the hasher's node index column at row i
+    pub fn chiplet_node_index(&self, i: usize) -> Felt {
+        self.columns.get(HASHER_NODE_INDEX_COL_IDX, i)
+    }
+
+    /// Returns the bitwise column holding the aggregated value of input `a` at row i.
+    pub fn chiplet_bitwise_a(&self, i: usize) -> Felt {
+        self.columns.get_column(BITWISE_A_COL_IDX)[i]
+    }
+
+    /// Returns the bitwise column holding the aggregated value of input `b` at row i.
+    pub fn chiplet_bitwise_b(&self, i: usize) -> Felt {
+        self.columns.get_column(BITWISE_B_COL_IDX)[i]
+    }
+
+    /// Returns the bitwise column holding the aggregated value of the output at row i.
+    pub fn chiplet_bitwise_z(&self, i: usize) -> Felt {
+        self.columns.get_column(BITWISE_OUTPUT_COL_IDX)[i]
+    }
+
+    /// Returns the i-th row of the chiplet column containing memory context.
+    pub fn chiplet_memory_ctx(&self, i: usize) -> Felt {
+        self.columns.get_column(MEMORY_CTX_COL_IDX)[i]
+    }
+
+    /// Returns the i-th row of the chiplet column containing memory address.
+    pub fn chiplet_memory_addr(&self, i: usize) -> Felt {
+        self.columns.get_column(MEMORY_ADDR_COL_IDX)[i]
+    }
+
+    /// Returns the i-th row of the chiplet column containing clock cycle.
+    pub fn chiplet_memory_clk(&self, i: usize) -> Felt {
+        self.columns.get_column(MEMORY_CLK_COL_IDX)[i]
+    }
+
+    /// Returns the i-th row of the chiplet column containing the zeroth memory value element.
+    pub fn chiplet_memory_value_0(&self, i: usize) -> Felt {
+        self.columns.get_column(MEMORY_V_COL_RANGE.start)[i]
+    }
+
+    /// Returns the i-th row of the chiplet column containing the first memory value element.
+    pub fn chiplet_memory_value_1(&self, i: usize) -> Felt {
+        self.columns.get_column(MEMORY_V_COL_RANGE.start + 1)[i]
+    }
+
+    /// Returns the i-th row of the chiplet column containing the second memory value element.
+    pub fn chiplet_memory_value_2(&self, i: usize) -> Felt {
+        self.columns.get_column(MEMORY_V_COL_RANGE.start + 2)[i]
+    }
+
+    /// Returns the i-th row of the chiplet column containing the third memory value element.
+    pub fn chiplet_memory_value_3(&self, i: usize) -> Felt {
+        self.columns.get_column(MEMORY_V_COL_RANGE.start + 3)[i]
+    }
+
+    /// Returns the i-th row of the kernel chiplet `addr` column.
+    pub fn chiplet_kernel_addr(&self, i: usize) -> Felt {
+        self.columns.get_column(CHIPLETS_OFFSET + 5)[i]
+    }
+
+    /// Returns the i-th row of the chiplet column containing the zeroth element of the kernel
+    /// procedure root.
+    pub fn chiplet_kernel_root_0(&self, i: usize) -> Felt {
+        self.columns.get_column(CHIPLETS_OFFSET + 6)[i]
+    }
+
+    /// Returns the i-th row of the chiplet column containing the first element of the kernel
+    /// procedure root.
+    pub fn chiplet_kernel_root_1(&self, i: usize) -> Felt {
+        self.columns.get_column(CHIPLETS_OFFSET + 7)[i]
+    }
+
+    /// Returns the i-th row of the chiplet column containing the second element of the kernel
+    /// procedure root.
+    pub fn chiplet_kernel_root_2(&self, i: usize) -> Felt {
+        self.columns.get_column(CHIPLETS_OFFSET + 8)[i]
+    }
+
+    /// Returns the i-th row of the chiplet column containing the third element of the kernel
+    /// procedure root.
+    pub fn chiplet_kernel_root_3(&self, i: usize) -> Felt {
+        self.columns.get_column(CHIPLETS_OFFSET + 9)[i]
+    }
+
+    /// Returns `true` if a row is part of the kernel chiplet.
+    pub fn is_kernel_row(&self, i: usize) -> bool {
+        self.chiplet_selector_0(i) == ONE
+            && self.chiplet_selector_1(i) == ONE
+            && self.chiplet_selector_2(i) == ONE
+            && self.chiplet_selector_3(i) == ZERO
+    }
+
+    //  MERKLE PATH HASHING SELECTORS
+    // --------------------------------------------------------------------------------------------
+
+    /// Returns `true` if the hasher chiplet flags indicate the initialization of verifying
+    /// a Merkle path to an old node during Merkle root update procedure (MRUPDATE).
+    pub fn f_mv(&self, i: usize) -> bool {
+        (i % HASH_CYCLE_LEN == 0)
+            && self.chiplet_selector_0(i) == ZERO
+            && self.chiplet_selector_1(i) == ONE
+            && self.chiplet_selector_2(i) == ONE
+            && self.chiplet_selector_3(i) == ZERO
+    }
+
+    /// Returns `true` if the hasher chiplet flags indicate the continuation of verifying
+    /// a Merkle path to an old node during Merkle root update procedure (MRUPDATE).
+    pub fn f_mva(&self, i: usize) -> bool {
+        (i % HASH_CYCLE_LEN == HASH_CYCLE_LEN - 1)
+            && self.chiplet_selector_0(i) == ZERO
+            && self.chiplet_selector_1(i) == ONE
+            && self.chiplet_selector_2(i) == ONE
+            && self.chiplet_selector_3(i) == ZERO
+    }
+
+    /// Returns `true` if the hasher chiplet flags indicate the initialization of verifying
+    /// a Merkle path to a new node during Merkle root update procedure (MRUPDATE).
+    pub fn f_mu(&self, i: usize) -> bool {
+        (i % HASH_CYCLE_LEN == 0)
+            && self.chiplet_selector_0(i) == ZERO
+            && self.chiplet_selector_1(i) == ONE
+            && self.chiplet_selector_2(i) == ONE
+            && self.chiplet_selector_3(i) == ONE
+    }
+
+    /// Returns `true` if the hasher chiplet flags indicate the continuation of verifying
+    /// a Merkle path to a new node during Merkle root update procedure (MRUPDATE).
+    pub fn f_mua(&self, i: usize) -> bool {
+        (i % HASH_CYCLE_LEN == HASH_CYCLE_LEN - 1)
+            && self.chiplet_selector_0(i) == ZERO
+            && self.chiplet_selector_1(i) == ONE
+            && self.chiplet_selector_2(i) == ONE
+            && self.chiplet_selector_3(i) == ONE
+    }
+}
diff --git a/air/src/trace/mod.rs b/air/src/trace/mod.rs
index fa908671f4..f61e1cae05 100644
--- a/air/src/trace/mod.rs
+++ b/air/src/trace/mod.rs
@@ -3,6 +3,7 @@ use vm_core::utils::range;
 
 pub mod chiplets;
 pub mod decoder;
+pub mod main_trace;
 pub mod range;
 pub mod stack;
 
diff --git a/air/src/utils.rs b/air/src/utils.rs
index 7cd7a04a03..687b8432fc 100644
--- a/air/src/utils.rs
+++ b/air/src/utils.rs
@@ -1,6 +1,10 @@
 use super::FieldElement;
 use core::ops::Range;
-use vm_core::{utils::collections::Vec, utils::range as create_range};
+use vm_core::utils::{collections::*, range as create_range};
+
+// RE-EXPORTS
+// ================================================================================================
+pub use vm_core::utils::{collections, string};
 
 // BASIC CONSTRAINT OPERATORS
 // ================================================================================================
diff --git a/assembly/Cargo.toml b/assembly/Cargo.toml
index fa1df8b0ae..150bd4fa7d 100644
--- a/assembly/Cargo.toml
+++ b/assembly/Cargo.toml
@@ -1,11 +1,12 @@
 [package]
 name = "miden-assembly"
-version = "0.7.0"
+version = "0.8.0"
 description = "Miden VM assembly language"
 authors = ["miden contributors"]
 readme = "README.md"
 license = "MIT"
 repository = "https://github.com/0xPolygonMiden/miden-vm"
+documentation = "https://docs.rs/miden-assembly/0.8.0"
 categories = ["compilers", "no-std"]
 keywords = ["assembler", "assembly", "language", "miden"]
 edition = "2021"
@@ -21,4 +22,5 @@ std = ["vm-core/std"]
 
 [dependencies]
 num_enum = "0.7"
-vm-core = { package = "miden-core", path = "../core", version = "0.7", default-features = false }
+tracing = { version = "0.1", default-features = false, features = ["attributes"] }
+vm-core = { package = "miden-core", path = "../core", version = "0.8", default-features = false }
diff --git a/assembly/README.md b/assembly/README.md
index c78f9c97a1..9d26fca606 100644
--- a/assembly/README.md
+++ b/assembly/README.md
@@ -43,7 +43,7 @@ use.std::math::u64
 begin
     push.1.0
     push.2.0
-    exec.u64::checked_add
+    exec.u64::wrapping_add
 end
 ```
 
diff --git a/assembly/src/assembler/context.rs b/assembly/src/assembler/context.rs
index 2f3452bc72..6d30ef70c2 100644
--- a/assembly/src/assembler/context.rs
+++ b/assembly/src/assembler/context.rs
@@ -1,9 +1,11 @@
 use super::{
-    AssemblyError, BTreeMap, CallSet, CodeBlock, CodeBlockTable, Kernel, LibraryPath,
-    NamedProcedure, Procedure, ProcedureCache, ProcedureId, ProcedureName, RpoDigest, ToString,
-    Vec,
+    AssemblyError, CallSet, CodeBlock, CodeBlockTable, Kernel, LibraryPath, NamedProcedure,
+    Procedure, ProcedureCache, ProcedureId, ProcedureName, RpoDigest,
+};
+use crate::{
+    ast::{ModuleAst, ProgramAst},
+    utils::{collections::*, string::*},
 };
-use crate::ast::{ModuleAst, ProgramAst};
 
 // ASSEMBLY CONTEXT
 // ================================================================================================
@@ -43,7 +45,8 @@ impl AssemblyContext {
     /// by the program, and thus, will be able to determine names of imported procedures for error
     /// reporting purposes.
     pub fn for_program(program: Option<&ProgramAst>) -> Self {
-        let program_imports = program.map(|p| p.get_imported_procedures_map()).unwrap_or_default();
+        let program_imports =
+            program.map(|p| p.import_info().get_imported_procedures()).unwrap_or_default();
         Self {
             module_stack: vec![ModuleContext::for_program(program_imports)],
             is_kernel: false,
@@ -80,13 +83,23 @@ impl AssemblyContext {
 
     /// Returns the name of the procedure by its ID from the procedure map.
     pub fn get_imported_procedure_name(&self, id: &ProcedureId) -> Option<ProcedureName> {
-        if let Some(module) = self.module_stack.first() {
+        if let Some(module) = self.module_stack.last() {
             module.proc_map.get(id).cloned()
         } else {
             None
         }
     }
 
+    /// Returns the [Procedure] by its index from the vector of local procedures.
+    pub fn get_local_procedure(&self, idx: u16) -> Result<&Procedure, AssemblyError> {
+        let module_context = self.module_stack.last().expect("no modules");
+        module_context
+            .compiled_procs
+            .get(idx as usize)
+            .map(|named_proc| named_proc.inner())
+            .ok_or_else(|| AssemblyError::local_proc_not_found(idx, &module_context.path))
+    }
+
     // STATE MUTATORS
     // --------------------------------------------------------------------------------------------
 
@@ -118,7 +131,7 @@ impl AssemblyContext {
         }
 
         // get the imported procedures map
-        let proc_map = module_ast.get_imported_procedures_map();
+        let proc_map = module_ast.import_info().get_imported_procedures();
 
         // push a new module context onto the module stack and return
         self.module_stack.push(ModuleContext::for_module(module_path, proc_map));
@@ -129,7 +142,7 @@ impl AssemblyContext {
     ///
     /// This pops the module off the module stack and return all local procedures of the module
     /// (both exported and internal) together with the combined callset of module's procedures.
-    pub fn complete_module(&mut self) -> (Vec<NamedProcedure>, CallSet) {
+    pub fn complete_module(&mut self) -> Result<(Vec<NamedProcedure>, CallSet), AssemblyError> {
         let module_ctx = self.module_stack.pop().expect("no modules");
         if self.is_kernel && self.module_stack.is_empty() {
             // if we are compiling a kernel and this is the last module on the module stack, then
@@ -141,11 +154,11 @@ impl AssemblyContext {
                 .filter(|proc| proc.is_export())
                 .map(|proc| proc.mast_root())
                 .collect::<Vec<_>>();
-            self.kernel = Some(Kernel::new(&proc_roots));
+            self.kernel = Some(Kernel::new(&proc_roots).map_err(AssemblyError::KernelError)?);
         }
 
         // return compiled procedures and callset from the module
-        (module_ctx.compiled_procs, module_ctx.callset)
+        Ok((module_ctx.compiled_procs, module_ctx.callset))
     }
 
     // PROCEDURE PROCESSORS
diff --git a/assembly/src/assembler/instruction/ext2_ops.rs b/assembly/src/assembler/instruction/ext2_ops.rs
index a5642035d7..20286440f9 100644
--- a/assembly/src/assembler/instruction/ext2_ops.rs
+++ b/assembly/src/assembler/instruction/ext2_ops.rs
@@ -1,4 +1,4 @@
-use super::{AssemblyError, CodeBlock, Operation::*, SpanBuilder, ZERO};
+use super::{AssemblyError, CodeBlock, Operation::*, SpanBuilder};
 use vm_core::AdviceInjector::Ext2Inv;
 
 /// Given a stack in the following initial configuration [b1, b0, a1, a0, ...] where a = (a0, a1)
@@ -60,9 +60,9 @@ pub fn ext2_div(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyErr
         Ext2Mul,        // [b1', b0', 0, 1, a1, a0, ...]
         MovUp2,         // [0, b1', b0', 1, a1, a0, ...]
         Eqz,            // [1, b1', b0', 1, a1, a0, ...]
-        Assert(ZERO),   // [b1', b0', 1, a1, a0, ...]
+        Assert(0),      // [b1', b0', 1, a1, a0, ...]
         MovUp2,         // [1, b1', b0', a1, a0, ...]
-        Assert(ZERO),   // [b1', b0', a1, a0, ...]
+        Assert(0),      // [b1', b0', a1, a0, ...]
         Ext2Mul,        // [b1', b0', a1*b1', a0*b0', ...]
         Drop,           // [b0', a1*b1', a0*b0'...]
         Drop            // [a1*b1', a0*b0'...]
@@ -120,9 +120,9 @@ pub fn ext2_inv(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyErr
         Ext2Mul,        // [a1', a0', 0, 1, ...]
         MovUp2,         // [0, a1', a0', 1, ...]
         Eqz,            // [1, a1', a0', 1, ...]
-        Assert(ZERO),   // [a1', a0', 1, ...]
+        Assert(0),      // [a1', a0', 1, ...]
         MovUp2,         // [1, a1', a0', ...]
-        Assert(ZERO),   // [a1', a0', ...]
+        Assert(0),      // [a1', a0', ...]
     ];
     span.add_ops(ops)
 }
diff --git a/assembly/src/assembler/instruction/field_ops.rs b/assembly/src/assembler/instruction/field_ops.rs
index 7246b38be3..1f3be07ac2 100644
--- a/assembly/src/assembler/instruction/field_ops.rs
+++ b/assembly/src/assembler/instruction/field_ops.rs
@@ -1,8 +1,9 @@
 use super::{
-    validate_param, AssemblyError, CodeBlock, Felt, FieldElement, Operation::*, SpanBuilder,
-    StarkField, ONE, ZERO,
+    validate_param, AssemblyError, CodeBlock, Felt, FieldElement, Operation::*, SpanBuilder, ONE,
+    ZERO,
 };
 use crate::MAX_EXP_BITS;
+use vm_core::AdviceInjector::ILog2;
 
 /// Field element representing TWO in the base field of the VM.
 const TWO: Felt = Felt::new(2);
@@ -13,7 +14,7 @@ const TWO: Felt = Felt::new(2);
 /// Asserts that the top two words in the stack are equal.
 ///
 /// VM cycles: 11 cycles
-pub fn assertw(span: &mut SpanBuilder, err_code: Felt) -> Result<Option<CodeBlock>, AssemblyError> {
+pub fn assertw(span: &mut SpanBuilder, err_code: u32) -> Result<Option<CodeBlock>, AssemblyError> {
     span.add_ops([
         MovUp4,
         Eq,
@@ -122,7 +123,7 @@ pub fn append_pow2_op(span: &mut SpanBuilder) {
     // drop the top two elements bit and exp value of the latest bit.
     span.push_ops([Drop, Drop]);
     // taking `b` to the top and asserting if it's equal to ZERO after all the right shifts.
-    span.push_ops([Swap, Eqz, Assert(ZERO)]);
+    span.push_ops([Swap, Eqz, Assert(0)]);
 }
 
 // EXPONENTIATION OPERATION
@@ -151,7 +152,7 @@ pub fn exp(span: &mut SpanBuilder, num_pow_bits: u8) -> Result<Option<CodeBlock>
     span.push_ops([Drop, Drop]);
 
     // taking `b` to the top and asserting if it's equal to ZERO after all the right shifts.
-    span.push_ops([Swap, Eqz, Assert(ZERO)]);
+    span.push_ops([Swap, Eqz, Assert(0)]);
     Ok(None)
 }
 
@@ -235,6 +236,49 @@ fn perform_exp_for_small_power(span: &mut SpanBuilder, pow: u64) {
     }
 }
 
+// LOGARITHMIC OPERATIONS
+// ================================================================================================
+
+/// Appends a sequence of operations to calculate the base 2 integer logarithm of the stack top
+/// element, using non-deterministic technique (i.e. it takes help of advice provider).
+///
+/// This operation takes 44 VM cycles.
+///
+/// # Errors
+/// Returns an error if the logarithm argument (top stack element) equals ZERO.
+pub fn ilog2(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
+    span.push_advice_injector(ILog2);
+    span.push_op(AdvPop); // [ilog2, n, ...]
+
+    // compute the power-of-two for the value given in the advice tape (17 cycles)
+    span.push_op(Dup0);
+    append_pow2_op(span);
+    // => [pow2, ilog2, n, ...]
+
+    #[rustfmt::skip]
+    let ops = [
+        // split the words into u32 halves to use the bitwise operations (4 cycles)
+        MovUp2, U32split, MovUp2, U32split,
+        // => [pow2_high, pow2_low, n_high, n_low, ilog2, ...]
+
+        // only one of the two halves in pow2 has a bit set, drop the other (9 cycles)
+        Dup1, Eqz, Dup0, MovDn3,
+        // => [drop_low, pow2_high, pow2_low, drop_low, n_high, n_low, ilog2, ...]
+        CSwap, Drop, MovDn3, CSwap, Drop,
+        // => [n_half, pow2_half, ilog2, ...]
+
+        // set all bits to 1 lower than pow2_half (00010000 -> 00011111)
+        Swap, Pad, Incr, Incr, Mul, Pad, Incr, Neg, Add, 
+        // => [pow2_half * 2 - 1, n_half, ilog2, ...]
+        Dup1, U32and, 
+        // => [m, n_half, ilog2, ...] if ilog2 calculation was correct, m should be equal to n_half
+        Eq, Assert(0),
+        // => [ilog2, ...]
+    ];
+
+    span.add_ops(ops)
+}
+
 // COMPARISON OPERATIONS
 // ================================================================================================
 
@@ -275,11 +319,11 @@ pub fn eqw(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
     ])
 }
 
-/// Appends a sequence of operations to to pop the top 2 elements off the stack and do a "less
+/// Appends a sequence of operations to pop the top 2 elements off the stack and do a "less
 /// than" comparison. The stack is expected to be arranged as [b, a, ...] (from the top). A value
 /// of 1 is pushed onto the stack if a < b. Otherwise, 0 is pushed.
 ///
-/// This operation takes 17 VM cycles.
+/// This operation takes 14 VM cycles.
 pub fn lt(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
     // Split both elements into high and low bits
     // 3 cycles
@@ -287,7 +331,7 @@ pub fn lt(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
 
     // compare the high bit values and put comparison result flags on the stack for eq and lt
     // then reorder in preparation for the low-bit comparison (a_lo < b_lo)
-    // 9 cycles
+    // 6 cycles
     check_lt_high_bits(span);
 
     // check a_lo < b_lo, resulting in 1 if true and 0 otherwise
@@ -305,7 +349,7 @@ pub fn lt(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
 /// than or equal" comparison. The stack is expected to be arranged as [b, a, ...] (from the top).
 /// A value of 1 is pushed onto the stack if a <= b. Otherwise, 0 is pushed.
 ///
-/// This operation takes 18 VM cycles.
+/// This operation takes 15 VM cycles.
 pub fn lte(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
     // Split both elements into high and low bits
     // 3 cycles
@@ -313,7 +357,7 @@ pub fn lte(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
 
     // compare the high bit values and put comparison result flags on the stack for eq and lt
     // then reorder in preparation for the low-bit comparison (a_lo <= b_lo)
-    // 9 cycles
+    // 6 cycles
     check_lt_high_bits(span);
 
     // check a_lo <= b_lo, resulting in 1 if true and 0 otherwise
@@ -331,7 +375,7 @@ pub fn lte(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
 /// than" comparison. The stack is expected to be arranged as [b, a, ...] (from the top). A value
 /// of 1 is pushed onto the stack if a > b. Otherwise, 0 is pushed.
 ///
-/// This operation takes 18 VM cycles.
+/// This operation takes 15 VM cycles.
 pub fn gt(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
     // Split both elements into high and low bits
     // 3 cycles
@@ -339,7 +383,7 @@ pub fn gt(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
 
     // compare the high bit values and put comparison result flags on the stack for eq and gt
     // then reorder in preparation for the low-bit comparison (b_lo < a_lo)
-    // 10 cycles
+    // 7 cycles
     check_gt_high_bits(span);
 
     // check b_lo < a_lo, resulting in 1 if true and 0 otherwise
@@ -357,7 +401,7 @@ pub fn gt(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
 /// than or equal" comparison. The stack is expected to be arranged as [b, a, ...] (from the top).
 /// A value of 1 is pushed onto the stack if a >= b. Otherwise, 0 is pushed.
 ///
-/// This operation takes 19 VM cycles.
+/// This operation takes 16 VM cycles.
 pub fn gte(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
     // Split both elements into high and low bits
     // 3 cycles
@@ -365,7 +409,7 @@ pub fn gte(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
 
     // compare the high bit values and put comparison result flags on the stack for eq and gt
     // then reorder in preparation for the low-bit comparison (b_lo <= a_lo)
-    // 10 cycles
+    // 7 cycles
     check_gt_high_bits(span);
 
     // check b_lo <= a_lo, resulting in 1 if true and 0 otherwise
@@ -413,17 +457,22 @@ fn split_elements(span: &mut SpanBuilder) {
 ///
 /// The resulting stack after this operation is: [eq_flag, lt_flag, ...].
 ///
-/// This operation takes 6 cycles.
+/// This operation takes 3 cycles.
 fn check_lt_and_eq(span: &mut SpanBuilder) {
     // calculate a - b
     // stack: [b, a, ...] => [underflow_flag, result, ...]
     span.push_op(U32sub);
-    // Put 1 on the stack if the underflow flag was not set (there was no underflow)
-    span.push_ops([Dup0, Not]);
-    // move the result to the top of the stack and check if it was zero
-    span.push_ops([MovUp2, Eqz]);
-    // set the equality flag to 1 if there was no underflow and the result was zero
-    span.push_op(And);
+    // after the u32sub operation we can be in one of 3 states:
+    // - [1, result > 0] - this means that we underflowed (a < b)
+    // - [0, result > 0] - this means that we didn't underflow (a > b)
+    // - [0, result = 0] - this means that comparing values are equal (a = b)
+    //
+    // The situation of `[1, 0]` is impossible because we can't reach 0 with underflow: subtracting
+    // maximum type value from minimum value (which is 0) will result in 1, so to reach 0 we need
+    // to subtract value that is bigger than the maximum type value, which is impossible.
+    // For example `0u64.wrapping_sub(u64::MAX) = 1`, but `0u64.wrapping_sub(u64::MAX + 1)` is
+    // impossible.
+    span.push_ops([Swap, Eqz]);
 }
 
 /// This is a helper function for comparison operations that perform a less-than check a < b
@@ -442,7 +491,7 @@ fn check_lt_and_eq(span: &mut SpanBuilder) {
 /// - hi_flag_eq: 1 if the high bit values were equal; 0 otherwise
 /// - hi_flag_lt: 1 if a's high-bit values were less than b's (a_hi < b_hi); 0 otherwise
 ///
-/// This operation takes 9 cycles.
+/// This operation takes 6 cycles.
 fn check_lt_high_bits(span: &mut SpanBuilder) {
     // reorder the stack to check a_hi < b_hi
     span.push_op(MovUp2);
@@ -533,7 +582,7 @@ fn check_lte(span: &mut SpanBuilder) {
 /// - hi_flag_eq: 1 if the high bit values were equal; 0 otherwise
 /// - hi_flag_gt: 1 if a's high-bit values were greater than b's (a_hi > b_hi); 0 otherwise
 ///
-/// This function takes 10 cycles.
+/// This function takes 7 cycles.
 fn check_gt_high_bits(span: &mut SpanBuilder) {
     // reorder the stack to check b_hi < a_hi
     span.push_ops([Swap, MovDn2]);
diff --git a/assembly/src/assembler/instruction/mod.rs b/assembly/src/assembler/instruction/mod.rs
index 06875bc411..d2771b2e65 100644
--- a/assembly/src/assembler/instruction/mod.rs
+++ b/assembly/src/assembler/instruction/mod.rs
@@ -4,7 +4,7 @@ use super::{
 };
 use crate::utils::bound_into_included_u64;
 use core::ops::RangeBounds;
-use vm_core::{Decorator, FieldElement, StarkField};
+use vm_core::{Decorator, FieldElement};
 
 mod adv_ops;
 mod crypto_ops;
@@ -37,20 +37,14 @@ impl Assembler {
         }
 
         let result = match instruction {
-            Instruction::Assert => span.add_op(Assert(ZERO)),
-            Instruction::AssertWithError(err_code) => span.add_op(Assert(Felt::from(*err_code))),
-            Instruction::AssertEq => span.add_ops([Eq, Assert(ZERO)]),
-            Instruction::AssertEqWithError(err_code) => {
-                span.add_ops([Eq, Assert(Felt::from(*err_code))])
-            }
-            Instruction::AssertEqw => field_ops::assertw(span, ZERO),
-            Instruction::AssertEqwWithError(err_code) => {
-                field_ops::assertw(span, Felt::from(*err_code))
-            }
-            Instruction::Assertz => span.add_ops([Eqz, Assert(ZERO)]),
-            Instruction::AssertzWithError(err_code) => {
-                span.add_ops([Eqz, Assert(Felt::from(*err_code))])
-            }
+            Instruction::Assert => span.add_op(Assert(0)),
+            Instruction::AssertWithError(err_code) => span.add_op(Assert(*err_code)),
+            Instruction::AssertEq => span.add_ops([Eq, Assert(0)]),
+            Instruction::AssertEqWithError(err_code) => span.add_ops([Eq, Assert(*err_code)]),
+            Instruction::AssertEqw => field_ops::assertw(span, 0),
+            Instruction::AssertEqwWithError(err_code) => field_ops::assertw(span, *err_code),
+            Instruction::Assertz => span.add_ops([Eqz, Assert(0)]),
+            Instruction::AssertzWithError(err_code) => span.add_ops([Eqz, Assert(*err_code)]),
 
             Instruction::Add => span.add_op(Add),
             Instruction::AddImm(imm) => field_ops::add_imm(span, *imm),
@@ -68,6 +62,7 @@ impl Assembler {
             Instruction::Exp => field_ops::exp(span, 64),
             Instruction::ExpImm(pow) => field_ops::exp_imm(span, *pow),
             Instruction::ExpBitLength(num_pow_bits) => field_ops::exp(span, *num_pow_bits),
+            Instruction::ILog2 => field_ops::ilog2(span),
 
             Instruction::Not => span.add_op(Not),
             Instruction::And => span.add_op(And),
@@ -112,8 +107,6 @@ impl Assembler {
             Instruction::U32Cast => span.add_ops([U32split, Drop]),
             Instruction::U32Split => span.add_op(U32split),
 
-            Instruction::U32CheckedAdd => u32_ops::u32add(span, Checked, None),
-            Instruction::U32CheckedAddImm(v) => u32_ops::u32add(span, Checked, Some(*v)),
             Instruction::U32OverflowingAdd => u32_ops::u32add(span, Overflowing, None),
             Instruction::U32OverflowingAddImm(v) => u32_ops::u32add(span, Overflowing, Some(*v)),
             Instruction::U32WrappingAdd => u32_ops::u32add(span, Wrapping, None),
@@ -121,15 +114,11 @@ impl Assembler {
             Instruction::U32OverflowingAdd3 => span.add_op(U32add3),
             Instruction::U32WrappingAdd3 => span.add_ops([U32add3, Drop]),
 
-            Instruction::U32CheckedSub => u32_ops::u32sub(span, Checked, None),
-            Instruction::U32CheckedSubImm(v) => u32_ops::u32sub(span, Checked, Some(*v)),
             Instruction::U32OverflowingSub => u32_ops::u32sub(span, Overflowing, None),
             Instruction::U32OverflowingSubImm(v) => u32_ops::u32sub(span, Overflowing, Some(*v)),
             Instruction::U32WrappingSub => u32_ops::u32sub(span, Wrapping, None),
             Instruction::U32WrappingSubImm(v) => u32_ops::u32sub(span, Wrapping, Some(*v)),
 
-            Instruction::U32CheckedMul => u32_ops::u32mul(span, Checked, None),
-            Instruction::U32CheckedMulImm(v) => u32_ops::u32mul(span, Checked, Some(*v)),
             Instruction::U32OverflowingMul => u32_ops::u32mul(span, Overflowing, None),
             Instruction::U32OverflowingMulImm(v) => u32_ops::u32mul(span, Overflowing, Some(*v)),
             Instruction::U32WrappingMul => u32_ops::u32mul(span, Wrapping, None),
@@ -137,58 +126,37 @@ impl Assembler {
             Instruction::U32OverflowingMadd => span.add_op(U32madd),
             Instruction::U32WrappingMadd => span.add_ops([U32madd, Drop]),
 
-            Instruction::U32CheckedDiv => u32_ops::u32div(span, Checked, None),
-            Instruction::U32CheckedDivImm(v) => u32_ops::u32div(span, Checked, Some(*v)),
-            Instruction::U32UncheckedDiv => u32_ops::u32div(span, Unchecked, None),
-            Instruction::U32UncheckedDivImm(v) => u32_ops::u32div(span, Unchecked, Some(*v)),
-            Instruction::U32CheckedMod => u32_ops::u32mod(span, Checked, None),
-            Instruction::U32CheckedModImm(v) => u32_ops::u32mod(span, Checked, Some(*v)),
-            Instruction::U32UncheckedMod => u32_ops::u32mod(span, Unchecked, None),
-            Instruction::U32UncheckedModImm(v) => u32_ops::u32mod(span, Unchecked, Some(*v)),
-            Instruction::U32CheckedDivMod => u32_ops::u32divmod(span, Checked, None),
-            Instruction::U32CheckedDivModImm(v) => u32_ops::u32divmod(span, Checked, Some(*v)),
-            Instruction::U32UncheckedDivMod => u32_ops::u32divmod(span, Unchecked, None),
-            Instruction::U32UncheckedDivModImm(v) => u32_ops::u32divmod(span, Unchecked, Some(*v)),
-
-            Instruction::U32CheckedAnd => span.add_op(U32and),
-            Instruction::U32CheckedOr => span.add_ops([Dup1, Dup1, U32and, Neg, Add, Add]),
-            Instruction::U32CheckedXor => span.add_op(U32xor),
-            Instruction::U32CheckedNot => u32_ops::u32not(span),
-            Instruction::U32CheckedShl => u32_ops::u32shl(span, Checked, None),
-            Instruction::U32CheckedShlImm(v) => u32_ops::u32shl(span, Checked, Some(*v)),
-            Instruction::U32UncheckedShl => u32_ops::u32shl(span, Unchecked, None),
-            Instruction::U32UncheckedShlImm(v) => u32_ops::u32shl(span, Unchecked, Some(*v)),
-            Instruction::U32CheckedShr => u32_ops::u32shr(span, Checked, None),
-            Instruction::U32CheckedShrImm(v) => u32_ops::u32shr(span, Checked, Some(*v)),
-            Instruction::U32UncheckedShr => u32_ops::u32shr(span, Unchecked, None),
-            Instruction::U32UncheckedShrImm(v) => u32_ops::u32shr(span, Unchecked, Some(*v)),
-            Instruction::U32CheckedRotl => u32_ops::u32rotl(span, Checked, None),
-            Instruction::U32CheckedRotlImm(v) => u32_ops::u32rotl(span, Checked, Some(*v)),
-            Instruction::U32UncheckedRotl => u32_ops::u32rotl(span, Unchecked, None),
-            Instruction::U32UncheckedRotlImm(v) => u32_ops::u32rotl(span, Unchecked, Some(*v)),
-            Instruction::U32CheckedRotr => u32_ops::u32rotr(span, Checked, None),
-            Instruction::U32CheckedRotrImm(v) => u32_ops::u32rotr(span, Checked, Some(*v)),
-            Instruction::U32UncheckedRotr => u32_ops::u32rotr(span, Unchecked, None),
-            Instruction::U32UncheckedRotrImm(v) => u32_ops::u32rotr(span, Unchecked, Some(*v)),
-            Instruction::U32CheckedPopcnt => u32_ops::u32popcnt(span, Checked),
-            Instruction::U32UncheckedPopcnt => u32_ops::u32popcnt(span, Unchecked),
-
-            Instruction::U32CheckedEq => u32_ops::u32eq(span, None),
-            Instruction::U32CheckedEqImm(v) => u32_ops::u32eq(span, Some(*v)),
-            Instruction::U32CheckedNeq => u32_ops::u32neq(span, None),
-            Instruction::U32CheckedNeqImm(v) => u32_ops::u32neq(span, Some(*v)),
-            Instruction::U32CheckedLt => u32_ops::u32lt(span, Checked),
-            Instruction::U32UncheckedLt => u32_ops::u32lt(span, Unchecked),
-            Instruction::U32CheckedLte => u32_ops::u32lte(span, Checked),
-            Instruction::U32UncheckedLte => u32_ops::u32lte(span, Unchecked),
-            Instruction::U32CheckedGt => u32_ops::u32gt(span, Checked),
-            Instruction::U32UncheckedGt => u32_ops::u32gt(span, Unchecked),
-            Instruction::U32CheckedGte => u32_ops::u32gte(span, Checked),
-            Instruction::U32UncheckedGte => u32_ops::u32gte(span, Unchecked),
-            Instruction::U32CheckedMin => u32_ops::u32min(span, Checked),
-            Instruction::U32UncheckedMin => u32_ops::u32min(span, Unchecked),
-            Instruction::U32CheckedMax => u32_ops::u32max(span, Checked),
-            Instruction::U32UncheckedMax => u32_ops::u32max(span, Unchecked),
+            Instruction::U32Div => u32_ops::u32div(span, None),
+            Instruction::U32DivImm(v) => u32_ops::u32div(span, Some(*v)),
+            Instruction::U32Mod => u32_ops::u32mod(span, None),
+            Instruction::U32ModImm(v) => u32_ops::u32mod(span, Some(*v)),
+            Instruction::U32DivMod => u32_ops::u32divmod(span, None),
+            Instruction::U32DivModImm(v) => u32_ops::u32divmod(span, Some(*v)),
+
+            Instruction::U32And => span.add_op(U32and),
+            Instruction::U32Or => span.add_ops([Dup1, Dup1, U32and, Neg, Add, Add]),
+            Instruction::U32Xor => span.add_op(U32xor),
+            Instruction::U32Not => u32_ops::u32not(span),
+            Instruction::U32Shl => u32_ops::u32shl(span, None),
+            Instruction::U32ShlImm(v) => u32_ops::u32shl(span, Some(*v)),
+            Instruction::U32Shr => u32_ops::u32shr(span, None),
+            Instruction::U32ShrImm(v) => u32_ops::u32shr(span, Some(*v)),
+            Instruction::U32Rotl => u32_ops::u32rotl(span, None),
+            Instruction::U32RotlImm(v) => u32_ops::u32rotl(span, Some(*v)),
+            Instruction::U32Rotr => u32_ops::u32rotr(span, None),
+            Instruction::U32RotrImm(v) => u32_ops::u32rotr(span, Some(*v)),
+            Instruction::U32Popcnt => u32_ops::u32popcnt(span),
+            Instruction::U32Clz => u32_ops::u32clz(span),
+            Instruction::U32Ctz => u32_ops::u32ctz(span),
+            Instruction::U32Clo => u32_ops::u32clo(span),
+            Instruction::U32Cto => u32_ops::u32cto(span),
+
+            Instruction::U32Lt => u32_ops::u32lt(span),
+            Instruction::U32Lte => u32_ops::u32lte(span),
+            Instruction::U32Gt => u32_ops::u32gt(span),
+            Instruction::U32Gte => u32_ops::u32gte(span),
+            Instruction::U32Min => u32_ops::u32min(span),
+            Instruction::U32Max => u32_ops::u32max(span),
 
             // ----- stack manipulation -----------------------------------------------------------
             Instruction::Drop => span.add_op(Drop),
@@ -317,6 +285,7 @@ impl Assembler {
 
             // ----- STARK proof verification -----------------------------------------------------
             Instruction::FriExt2Fold4 => span.add_op(FriE2F4),
+            Instruction::RCombBase => span.add_op(RCombBase),
 
             // ----- exec/call instructions -------------------------------------------------------
             Instruction::ExecLocal(idx) => self.exec_local(*idx, ctx),
@@ -327,6 +296,8 @@ impl Assembler {
             Instruction::SysCall(id) => self.syscall(id, ctx),
             Instruction::DynExec => self.dynexec(),
             Instruction::DynCall => self.dyncall(),
+            Instruction::ProcRefLocal(idx) => self.procref_local(*idx, ctx, span),
+            Instruction::ProcRefImported(id) => self.procref_imported(id, ctx, span),
 
             // ----- debug decorators -------------------------------------------------------------
             Instruction::Breakpoint => {
@@ -343,6 +314,18 @@ impl Assembler {
                 }
                 Ok(None)
             }
+
+            // ----- emit instruction -------------------------------------------------------------
+            Instruction::Emit(event_id) => {
+                span.push_decorator(Decorator::Event(*event_id));
+                Ok(None)
+            }
+
+            // ----- trace instruction ------------------------------------------------------------
+            Instruction::Trace(trace_id) => {
+                span.push_decorator(Decorator::Trace(*trace_id));
+                Ok(None)
+            }
         };
 
         // compute and update the cycle count of the instruction which just finished executing
diff --git a/assembly/src/assembler/instruction/procedures.rs b/assembly/src/assembler/instruction/procedures.rs
index 43bcee08b2..1801caac39 100644
--- a/assembly/src/assembler/instruction/procedures.rs
+++ b/assembly/src/assembler/instruction/procedures.rs
@@ -1,4 +1,8 @@
-use super::{Assembler, AssemblyContext, AssemblyError, CodeBlock, ProcedureId, RpoDigest};
+use super::{
+    Assembler, AssemblyContext, AssemblyError, CodeBlock, Operation, ProcedureId, RpoDigest,
+    SpanBuilder,
+};
+use crate::utils::collections::*;
 
 // PROCEDURE INVOCATIONS
 // ================================================================================================
@@ -133,4 +137,43 @@ impl Assembler {
         // create a new CALL block whose target is DYN
         Ok(Some(CodeBlock::new_dyncall()))
     }
+
+    pub(super) fn procref_local(
+        &self,
+        proc_idx: u16,
+        context: &mut AssemblyContext,
+        span: &mut SpanBuilder,
+    ) -> Result<Option<CodeBlock>, AssemblyError> {
+        // get root of the compiled local procedure and add it to the callset to be able to use
+        // dynamic instructions with this procedure later
+        let proc_root = context.register_local_call(proc_idx, false)?.mast_root();
+
+        // create an array with `Push` operations containing root elements
+        let ops: Vec<Operation> = proc_root.iter().map(|elem| Operation::Push(*elem)).collect();
+        span.add_ops(ops)
+    }
+
+    pub(super) fn procref_imported(
+        &self,
+        proc_id: &ProcedureId,
+        context: &mut AssemblyContext,
+        span: &mut SpanBuilder,
+    ) -> Result<Option<CodeBlock>, AssemblyError> {
+        // make sure the procedure is in procedure cache
+        self.ensure_procedure_is_in_cache(proc_id, context)?;
+
+        // get the procedure from the assembler
+        let proc_cache = self.proc_cache.borrow();
+        let proc = proc_cache.get_by_id(proc_id).expect("procedure not in cache");
+
+        // add the root of the procedure to the callset to be able to use dynamic instructions with
+        // this procedure later
+        context.register_external_call(proc, false)?;
+
+        // get root of the cimported procedure
+        let proc_root = proc.mast_root();
+        // create an array with `Push` operations containing root elements
+        let ops: Vec<Operation> = proc_root.iter().map(|elem| Operation::Push(*elem)).collect();
+        span.add_ops(ops)
+    }
 }
diff --git a/assembly/src/assembler/instruction/u32_ops.rs b/assembly/src/assembler/instruction/u32_ops.rs
index 156fe8d3f9..c322dcbe16 100644
--- a/assembly/src/assembler/instruction/u32_ops.rs
+++ b/assembly/src/assembler/instruction/u32_ops.rs
@@ -5,6 +5,7 @@ use super::{
     SpanBuilder, ZERO,
 };
 use crate::{MAX_U32_ROTATE_VALUE, MAX_U32_SHIFT_VALUE};
+use vm_core::AdviceInjector::{U32Clo, U32Clz, U32Cto, U32Ctz};
 
 // ENUMS
 // ================================================================================================
@@ -12,8 +13,6 @@ use crate::{MAX_U32_ROTATE_VALUE, MAX_U32_SHIFT_VALUE};
 /// This enum is intended to determine the mode of operation passed to the parsing function
 #[derive(PartialEq, Eq)]
 pub enum U32OpMode {
-    Checked,
-    Unchecked,
     Wrapping,
     Overflowing,
 }
@@ -77,10 +76,6 @@ pub fn u32assertw(
 /// inserted. Please refer to the docs of `handle_arithmetic_operation` for more details.
 ///
 /// VM cycles per mode:
-/// - u32checked_add: 4 cycles
-/// - u32checked_add.b:
-///    - 6 cycles if b = 1
-///    - 5 cycles if b != 1
 /// - u32wrapping_add: 2 cycles
 /// - u32wrapping_add.b: 3 cycles
 /// - u32overflowing_add: 1 cycles
@@ -99,10 +94,6 @@ pub fn u32add(
 /// inserted. Please refer to the docs of `handle_arithmetic_operation` for more details.
 ///
 /// VM cycles per mode:
-/// - u32checked_sub: 4 cycles
-/// - u32checked_sub.b:
-///    - 6 cycles if b = 1
-///    - 5 cycles if b != 1
 /// - u32wrapping_sub: 2 cycles
 /// - u32wrapping_sub.b: 3 cycles
 /// - u32overflowing_sub: 1 cycles
@@ -121,10 +112,6 @@ pub fn u32sub(
 /// inserted. Please refer to the docs of `handle_arithmetic_operation` for more details.
 ///
 /// VM cycles per mode:
-/// - u32checked_mul: 4 cycles
-/// - u32checked_mul.b:
-///    - 6 cycles if b = 1
-///    - 5 cycles if b != 1
 /// - u32wrapping_mul: 2 cycles
 /// - u32wrapping_mul.b: 3 cycles
 /// - u32overflowing_mul: 1 cycles
@@ -140,66 +127,98 @@ pub fn u32mul(
 /// Translates u32div assembly instructions to VM operations.
 ///
 /// VM cycles per mode:
-/// - u32checked_div: 3 cycles
-/// - u32checked_div.b:
-///    - 5 cycles if b is 1
-///    - 4 cycles if b is not 1
-/// - u32unchecked_div: 2 cycles
-/// - u32unchecked_div.b:
+/// - u32div: 2 cycles
+/// - u32div.b:
 ///    - 4 cycles if b is 1
 ///    - 3 cycles if b is not 1
 pub fn u32div(
     span: &mut SpanBuilder,
-    op_mode: U32OpMode,
     imm: Option<u32>,
 ) -> Result<Option<CodeBlock>, AssemblyError> {
-    handle_division(span, op_mode, imm)?;
+    handle_division(span, imm)?;
     span.add_op(Drop)
 }
 
 /// Translates u32mod assembly instructions to VM operations.
 ///
 /// VM cycles per mode:
-/// - u32checked_mod: 4 cycles
-/// - u32checked_mod.b:
-///    - 6 cycles if b is 1
-///    - 5 cycles if b is not 1
-/// - u32unchecked_mod: 3 cycle
-/// - u32unchecked_mod.b:
+/// - u32mod: 3 cycle
+/// - u32mod.b:
 ///    - 5 cycles if b is 1
 ///    - 4 cycles if b is not 1
 pub fn u32mod(
     span: &mut SpanBuilder,
-    op_mode: U32OpMode,
     imm: Option<u32>,
 ) -> Result<Option<CodeBlock>, AssemblyError> {
-    handle_division(span, op_mode, imm)?;
+    handle_division(span, imm)?;
     span.add_ops([Swap, Drop])
 }
 
 /// Translates u32divmod assembly instructions to VM operations.
 ///
 /// VM cycles per mode:
-/// - u32checked_divmod: 2 cycles
-/// - u32checked_divmod.b:
-///    - 4 cycles if b is 1
-///    - 3 cycles if b is not 1
-/// - u32unchecked_divmod: 1 cycle
-/// - u32unchecked_divmod.b:
+/// - u32divmod: 1 cycle
+/// - u32divmod.b:
 ///    - 3 cycles if b is 1
 ///    - 2 cycles if b is not 1
 pub fn u32divmod(
     span: &mut SpanBuilder,
+    imm: Option<u32>,
+) -> Result<Option<CodeBlock>, AssemblyError> {
+    handle_division(span, imm)
+}
+
+// ARITHMETIC OPERATIONS - HELPERS
+// ================================================================================================
+
+/// Handles U32ADD, U32SUB, and U32MUL operations in wrapping, and overflowing modes, including
+/// handling of immediate parameters.
+///
+/// Specifically handles these specific inputs per the spec.
+/// - Wrapping: does not check if the inputs are u32 values; overflow or underflow bits are
+///   discarded.
+/// - Overflowing: does not check if the inputs are u32 values; overflow or underflow bits are
+///   pushed onto the stack.
+fn handle_arithmetic_operation(
+    span: &mut SpanBuilder,
+    op: Operation,
     op_mode: U32OpMode,
     imm: Option<u32>,
 ) -> Result<Option<CodeBlock>, AssemblyError> {
-    handle_division(span, op_mode, imm)
+    if let Some(imm) = imm {
+        push_u32_value(span, imm);
+    }
+
+    span.push_op(op);
+
+    // in the wrapping mode, drop high 32 bits
+    if matches!(op_mode, U32OpMode::Wrapping) {
+        span.add_op(Drop)
+    } else {
+        Ok(None)
+    }
+}
+
+/// Handles common parts of u32div, u32mod, and u32divmod operations, including handling of
+/// immediate parameters.
+fn handle_division(
+    span: &mut SpanBuilder,
+    imm: Option<u32>,
+) -> Result<Option<CodeBlock>, AssemblyError> {
+    if let Some(imm) = imm {
+        if imm == 0 {
+            return Err(AssemblyError::division_by_zero());
+        }
+        push_u32_value(span, imm);
+    }
+
+    span.add_op(U32div)
 }
 
 // BITWISE OPERATIONS
 // ================================================================================================
 
-/// Translates u32checked_not assembly instruction to VM operations.
+/// Translates u32not assembly instruction to VM operations.
 ///
 /// The reason this method works is because 2^32 -1 provides a bit mask of ones, which after
 /// subtracting the element, flips the bits of the original value to perform a bitwise NOT.
@@ -223,135 +242,91 @@ pub fn u32not(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError
 /// Translates u32shl assembly instructions to VM operations.
 ///
 /// The operation is implemented by putting a power of 2 on the stack, then multiplying it with
-/// the value to be shifted and splitting the result. For checked variants, the shift value is
-/// asserted to be between 0-31 and the value to be shifted is asserted to be a 32-bit value.
+/// the value to be shifted and splitting the result.
 ///
 /// VM cycles per mode:
-/// - u32checked_shl: 19 cycles
-/// - u32checked_shl.b: 4 cycles
-/// - u32unchecked_shl: 18 cycles
-/// - u32unchecked_shl.b: 3 cycles
-pub fn u32shl(
-    span: &mut SpanBuilder,
-    op_mode: U32OpMode,
-    imm: Option<u8>,
-) -> Result<Option<CodeBlock>, AssemblyError> {
-    prepare_bitwise::<MAX_U32_SHIFT_VALUE>(span, imm, op_mode, [U32mul, Drop])
+/// - u32shl: 18 cycles
+/// - u32shl.b: 3 cycles
+pub fn u32shl(span: &mut SpanBuilder, imm: Option<u8>) -> Result<Option<CodeBlock>, AssemblyError> {
+    prepare_bitwise::<MAX_U32_SHIFT_VALUE>(span, imm)?;
+    if imm != Some(0) {
+        span.add_ops([U32mul, Drop])
+    } else {
+        Ok(None)
+    }
 }
 
 /// Translates u32shr assembly instructions to VM operations.
 ///
 /// The operation is implemented by putting a power of 2 on the stack, then dividing the value to
-/// be shifted by it and returning the quotient. For checked variants, the shift value is asserted
-/// to be between 0-31 and the value to be shifted is asserted to be a 32-bit value.
+/// be shifted by it and returning the quotient.
 ///
 /// VM cycles per mode:
-/// - u32checked_shr: 19 cycles
-/// - u32checked_shr.b: 4 cycles
-/// - u32unchecked_shr: 18 cycles
-/// - u32unchecked_shr.b: 3 cycles
-pub fn u32shr(
-    span: &mut SpanBuilder,
-    op_mode: U32OpMode,
-    imm: Option<u8>,
-) -> Result<Option<CodeBlock>, AssemblyError> {
-    prepare_bitwise::<MAX_U32_SHIFT_VALUE>(span, imm, op_mode, [U32div, Drop])
+/// - u32shr: 18 cycles
+/// - u32shr.b: 3 cycles
+pub fn u32shr(span: &mut SpanBuilder, imm: Option<u8>) -> Result<Option<CodeBlock>, AssemblyError> {
+    prepare_bitwise::<MAX_U32_SHIFT_VALUE>(span, imm)?;
+    if imm != Some(0) {
+        span.add_ops([U32div, Drop])
+    } else {
+        Ok(None)
+    }
 }
 
 /// Translates u32rotl assembly instructions to VM operations.
 ///
 /// The base operation is implemented by putting a power of 2 on the stack, then multiplying the
-/// value to be shifted by it and adding the overflow limb to the shifted limb. For the checked
-/// variants, the shift value is asserted to be between 0-31 and the value to be shifted is
-/// asserted to be a 32-bit value.
+/// value to be shifted by it and adding the overflow limb to the shifted limb.
 ///
 /// VM cycles per mode:
-/// - u32checked_rotl: 19 cycles
-/// - u32checked_rotl.b: 4 cycles
-/// - u32unchecked_rotl: 18 cycles
-/// - u32unchecked_rotl.b: 3 cycles
+/// - u32rotl: 18 cycles
+/// - u32rotl.b: 3 cycles
 pub fn u32rotl(
     span: &mut SpanBuilder,
-    op_mode: U32OpMode,
     imm: Option<u8>,
 ) -> Result<Option<CodeBlock>, AssemblyError> {
-    prepare_bitwise::<MAX_U32_ROTATE_VALUE>(span, imm, op_mode, [U32mul, Add])
+    prepare_bitwise::<MAX_U32_ROTATE_VALUE>(span, imm)?;
+    if imm != Some(0) {
+        span.add_ops([U32mul, Add])
+    } else {
+        Ok(None)
+    }
 }
 
 /// Translates u32rotr assembly instructions to VM operations.
 ///
 /// The base operation is implemented by multiplying the value to be shifted by 2^(32-b), where
-/// b is the shift amount, then adding the overflow limb to the shifted limb. For the checked
-/// variants, the shift value is asserted to be between 0-31 and the value to be shifted is
-/// asserted to be a 32-bit value.
+/// b is the shift amount, then adding the overflow limb to the shifted limb.
 ///
 /// VM cycles per mode:
-/// - u32checked_rotr: 31 cycles
-/// - u32checked_rotr.b: 6 cycles
-/// - u32unchecked_rotr: 22 cycles
-/// - u32unchecked_rotr.b: 3 cycles
+/// - u32rotr: 22 cycles
+/// - u32rotr.b: 3 cycles
 pub fn u32rotr(
     span: &mut SpanBuilder,
-    op_mode: U32OpMode,
     imm: Option<u8>,
 ) -> Result<Option<CodeBlock>, AssemblyError> {
-    match (imm, op_mode) {
-        (Some(0), U32OpMode::Checked) => {
-            // if rotation is performed by 0, just verify that stack top is u32
-            span.push_ops([Pad, U32assert2(ZERO), Drop]);
-            return Ok(None);
-        }
-        (Some(imm), U32OpMode::Checked) => {
-            validate_param(imm, 1..=MAX_U32_ROTATE_VALUE)?;
-            span.push_ops([Push(Felt::new(1 << (32 - imm))), U32assert2(ZERO)]);
-        }
-        (Some(0), U32OpMode::Unchecked) => {
+    match imm {
+        Some(0) => {
             // if rotation is performed by 0, do nothing (Noop)
             span.push_op(Noop);
             return Ok(None);
         }
-        (Some(imm), U32OpMode::Unchecked) => {
+        Some(imm) => {
             validate_param(imm, 1..=MAX_U32_ROTATE_VALUE)?;
             span.push_op(Push(Felt::new(1 << (32 - imm))));
         }
-        (None, U32OpMode::Checked) => {
-            #[rustfmt::skip]
-            span.push_ops([
-                // Verify both b and a are u32.
-                U32assert2(ZERO),
-
-                // Calculate 32 - b and assert that the shift value b <= 31.
-                Push(Felt::from(MAX_U32_ROTATE_VALUE)), Dup1, U32sub, Not, Assert(ZERO), Incr, Dup1,
-
-                // If 32-b = 32, replace it with 0.
-                Eqz, Not, CSwap, Drop,
-            ]);
-            append_pow2_op(span);
-            span.push_op(Swap);
-        }
-        (None, U32OpMode::Unchecked) => {
+        None => {
             span.push_ops([Push(Felt::new(32)), Swap, U32sub, Drop]);
             append_pow2_op(span);
         }
-        _ => unreachable!("unsupported operation mode"),
     }
     span.add_ops([U32mul, Add])
 }
 
 /// Translates u32popcnt assembly instructions to VM operations.
 ///
-/// VM cycles per mode:
-/// - u32checked_popcnt: 36 cycles
-/// - u32unchecked_popcnt: 33 cycles
-pub fn u32popcnt(
-    span: &mut SpanBuilder,
-    op_mode: U32OpMode,
-) -> Result<Option<CodeBlock>, AssemblyError> {
-    match op_mode {
-        U32OpMode::Checked => span.push_ops([Pad, U32assert2(ZERO), Drop]),
-        U32OpMode::Unchecked => (),
-        _ => unreachable!("unsupported operation mode"),
-    }
+/// This operation takes 33 cycles.
+pub fn u32popcnt(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
     #[rustfmt::skip]
     let ops = [
         // i = i - ((i >> 1) & 0x55555555);
@@ -383,74 +358,52 @@ pub fn u32popcnt(
     span.add_ops(ops)
 }
 
-/// Handles U32ADD, U32SUB, and U32MUL operations in checked, wrapping, and overflowing modes,
-/// including handling of immediate parameters.
+/// Translates `u32clz` assembly instruction to VM operations. `u32clz` counts the number of
+/// leading zeros of the value using non-deterministic technique (i.e. it takes help of advice
+/// provider).
 ///
-/// Specifically handles these specific inputs per the spec.
-/// - Checked: fails if either of the inputs or the output is not a u32 value.
-/// - Wrapping: does not check if the inputs are u32 values; overflow or underflow bits are
-///   discarded.
-/// - Overflowing: does not check if the inputs are u32 values; overflow or underflow bits are
-///   pushed onto the stack.
-fn handle_arithmetic_operation(
-    span: &mut SpanBuilder,
-    op: Operation,
-    op_mode: U32OpMode,
-    imm: Option<u32>,
-) -> Result<Option<CodeBlock>, AssemblyError> {
-    let mut drop_high_bits = false;
-    let mut assert_u32_res = false;
+/// This operation takes 37 VM cycles.
+pub fn u32clz(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
+    span.push_advice_injector(U32Clz);
+    span.push_op(AdvPop); // [clz, n, ...]
 
-    if let Some(imm) = imm {
-        push_u32_value(span, imm);
-    }
-
-    match op_mode {
-        U32OpMode::Checked => {
-            span.push_op(U32assert2(ZERO));
-            assert_u32_res = true;
-        }
-        U32OpMode::Wrapping => {
-            drop_high_bits = true;
-        }
-        U32OpMode::Overflowing => {}
-        _ => unreachable!("unsupported operation mode"),
-    }
+    calculate_clz(span)
+}
 
-    span.push_op(op);
+/// Translates `u32ctz` assembly instruction to VM operations. `u32ctz` counts the number of
+/// trailing zeros of the value using non-deterministic technique (i.e. it takes help of advice
+/// provider).
+///
+/// This operation takes 34 VM cycles.
+pub fn u32ctz(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
+    span.push_advice_injector(U32Ctz);
+    span.push_op(AdvPop); // [ctz, n, ...]
 
-    if assert_u32_res {
-        span.add_ops([Eqz, Assert(ZERO)])
-    } else if drop_high_bits {
-        span.add_op(Drop)
-    } else {
-        Ok(None)
-    }
+    calculate_ctz(span)
 }
 
-/// Handles common parts of u32div, u32mod, and u32divmod operations in checked and unchecked modes,
-/// including handling of immediate parameters.
-fn handle_division(
-    span: &mut SpanBuilder,
-    op_mode: U32OpMode,
-    imm: Option<u32>,
-) -> Result<Option<CodeBlock>, AssemblyError> {
-    if let Some(imm) = imm {
-        if imm == 0 {
-            return Err(AssemblyError::division_by_zero());
-        }
-        push_u32_value(span, imm);
-    }
+/// Translates `u32clo` assembly instruction to VM operations. `u32clo` counts the number of
+/// leading ones of the value using non-deterministic technique (i.e. it takes help of advice
+/// provider).
+///
+/// This operation takes 36 VM cycles.
+pub fn u32clo(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
+    span.push_advice_injector(U32Clo);
+    span.push_op(AdvPop); // [clo, n, ...]
 
-    match op_mode {
-        U32OpMode::Checked => {
-            span.push_op(U32assert2(ZERO));
-        }
-        U32OpMode::Unchecked => {}
-        _ => unreachable!("unsupported operation mode"),
-    }
+    calculate_clo(span)
+}
 
-    span.add_op(U32div)
+/// Translates `u32cto` assembly instruction to VM operations. `u32cto` counts the number of
+/// trailing ones of the value using non-deterministic technique (i.e. it takes help of advice
+/// provider).
+///
+/// This operation takes 33 VM cycles.
+pub fn u32cto(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
+    span.push_advice_injector(U32Cto);
+    span.push_op(AdvPop); // [cto, n, ...]
+
+    calculate_cto(span)
 }
 
 // BITWISE OPERATIONS - HELPERS
@@ -458,99 +411,331 @@ fn handle_division(
 
 /// Mutate the first two elements of the stack from `[b, a, ..]` into `[2^b, a, ..]`, with `b`
 /// either as a provided immediate value, or as an element that already exists in the stack.
-///
-/// If the used mode is `checked`, the function will assert that both `[b, a]` are valid `u32`.
-/// This function is equivalent to a bit shift operation, so the exponent shouldn't cause a number
-/// to be greater than `u32::MAX`; therefore, the maximum valid value must be `31`, as defined in
-/// the helper constants.
-///
-/// This function supports only checked and unchecked modes; if some other mode is provided, it
-/// will panic.
 fn prepare_bitwise<const MAX_VALUE: u8>(
     span: &mut SpanBuilder,
     imm: Option<u8>,
-    op_mode: U32OpMode,
-    final_ops: [Operation; 2],
-) -> Result<Option<CodeBlock>, AssemblyError> {
-    match (imm, op_mode) {
-        (Some(0), U32OpMode::Checked) => {
-            // if shift/rotation is performed by 0, just verify that stack top is u32
-            span.push_ops([Pad, U32assert2(ZERO), Drop]);
-            return Ok(None);
-        }
-        (Some(imm), U32OpMode::Checked) => {
-            validate_param(imm, 1..=MAX_VALUE)?;
-            span.push_ops([Push(Felt::new(1 << imm)), U32assert2(ZERO)]);
-        }
-        (Some(0), U32OpMode::Unchecked) => {
+) -> Result<(), AssemblyError> {
+    match imm {
+        Some(0) => {
             // if shift/rotation is performed by 0, do nothing (Noop)
             span.push_op(Noop);
-            return Ok(None);
         }
-        (Some(imm), U32OpMode::Unchecked) => {
+        Some(imm) => {
+            validate_param(imm, 1..=MAX_VALUE)?;
             span.push_op(Push(Felt::new(1 << imm)));
         }
-        (None, U32OpMode::Checked) => {
-            // Assume the dynamic shift value b is on top of the stack.
+        None => {
             append_pow2_op(span);
-            span.push_op(U32assert2(ZERO));
         }
-        (None, U32OpMode::Unchecked) => append_pow2_op(span),
-        _ => unreachable!("unsupported operation mode"),
     }
-    span.add_ops(final_ops)
+    Ok(())
 }
 
-// COMPARISON OPERATIONS
-// ================================================================================================
+/// Appends relevant operations to the span block for the correctness check of the `U32Clz`
+/// injector.
+/// The idea is to compare the actual value with a bitmask consisting of `clz` leading ones to
+/// check that every bit in `clz` leading bits is zero and `1` additional one to check that
+/// `clz + 1`'th leading bit is one:
+/// ```text
+/// 000000000...000100...10 <-- actual value
+/// └─ clz zeros ─┘
+///
+/// 1111111111...11100...00 <-- bitmask
+/// └─  clz ones ─┘│
+///                └─ additional one
+/// ```
+/// After applying a `u32and` bit operation on this values the result's leading `clz` bits should
+/// be zeros, otherwise there were some ones in initial value's `clz` leading bits, and therefore
+/// `clz` value is incorrect. `clz + 1`'th leading bit of the result should be one, otherwise this
+/// bit in the initial value wasn't one and `clz` value is incorrect:
+/// ```text
+///  0000...00|1|10...10
+/// &
+///  1111...11|1|00...00
+///  ↓↓↓↓   ↓↓ ↓
+///  0000...00|1|00...00
+/// ```
+///
+/// ---
+/// The stack is expected to be arranged as follows (from the top):
+/// - number of the leading zeros (`clz`), 1 element
+/// - value for which we count the number of leading zeros (`n`), 1 element
+///
+/// After the operations are executed, the stack will be arranged as follows:
+/// - number of the leading zeros (`clz`), 1 element
+///
+/// `[clz, n, ... ] -> [clz, ... ]`
+///
+/// VM cycles: 36
+fn calculate_clz(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
+    // [clz, n, ...]
+    #[rustfmt::skip]
+    let ops_group_1 = [
+        Swap, Push(32u8.into()), Dup2, Neg, Add // [32 - clz, n, clz, ...]
+    ];
+    span.push_ops(ops_group_1);
 
-/// Translates u32checked_eq assembly instruction to VM operations.
-///
-/// Specifically we test the first two numbers to be u32, then perform a EQ to check the equality.
-///
-/// VM cycles per mode:
-/// - u32checked_eq: 2 cycles
-/// - u32checked_eq.b: 3 cycles
-pub fn u32eq(span: &mut SpanBuilder, imm: Option<u32>) -> Result<Option<CodeBlock>, AssemblyError> {
-    if let Some(imm) = imm {
-        push_u32_value(span, imm);
-    }
+    append_pow2_op(span); // [pow2(32 - clz), n, clz, ...]
+
+    #[rustfmt::skip]
+    let ops_group_2 = [
+        Push(Felt::new(u32::MAX as u64 + 1)), // [2^32, pow2(32 - clz), n, clz, ...]
+
+        Dup1, Neg, Add, // [2^32 - pow2(32 - clz), pow2(32 - clz), n, clz, ...] 
+                        // `2^32 - pow2(32 - clz)` is equal to `clz` leading ones and `32 - clz` 
+                        // zeros:
+                        // 1111111111...1110000...0
+                        // └─ `clz` ones ─┘
+
+        Swap, Push(2u8.into()), U32div, Drop, // [pow2(32 - clz) / 2, 2^32 - pow2(32 - clz), n, clz, ...] 
+                                              // pow2(32 - clz) / 2 is equal to `clz` leading 
+                                              // zeros, `1` one and all other zeros.
+
+        Swap, Dup1, Add, // [bit_mask, pow2(32 - clz) / 2, n, clz, ...] 
+                         // 1111111111...111000...0 <-- bitmask
+                         // └─  clz ones ─┘│
+                         //                └─ additional one
+
+        MovUp2, U32and, // [m, pow2(32 - clz) / 2, clz] 
+                        // If calcualtion of `clz` is correct, m should be equal to 
+                        // pow2(32 - clz) / 2
+
+        Eq, Assert(0) // [clz, ...]
+    ];
 
-    span.add_ops([U32assert2(ZERO), Eq])
+    span.add_ops(ops_group_2)
 }
 
-/// Translates u32checked_neq assembly instruction to VM operations.
-///
-/// Specifically we test the first two numbers to be u32, then perform a `EQ NOT` to check the
-/// equality.
-///
-/// VM cycles per mode:
-/// - u32checked_neq: 3 cycles
-/// - u32checked_neq.b: 4 cycles
-pub fn u32neq(
-    span: &mut SpanBuilder,
-    imm: Option<u32>,
-) -> Result<Option<CodeBlock>, AssemblyError> {
-    if let Some(imm) = imm {
-        push_u32_value(span, imm);
-    }
+/// Appends relevant operations to the span block for the correctness check of the `U32Clo`
+/// injector.
+/// The idea is to compare the actual value with a bitmask consisting of `clo` leading ones to
+/// check that every bit in `clo` leading bits is one and `1` additional one to check that
+/// `clo + 1`'th leading bit is zero:
+/// ```text
+/// 11111111...111010...10 <-- actual value
+/// └─ clo ones ─┘
+///
+/// 111111111...11100...00 <-- bitmask
+/// └─ clo ones ─┘│
+///               └─ additional one
+/// ```
+/// After applying a `u32and` bit operation on this values the result's leading `clo` bits should
+/// be ones, otherwise there were some zeros in initial value's `clo` leading bits, and therefore
+/// `clo` value is incorrect. `clo + 1`'th leading bit of the result should be zero, otherwise this
+/// bit in the initial value wasn't zero and `clo` value is incorrect:
+/// ```text
+///  1111...11|0|10...10
+/// &
+///  1111...11|1|00...00
+///  ↓↓↓↓   ↓↓ ↓
+///  1111...11|0|00...00
+/// ```
+///
+/// ---
+/// The stack is expected to be arranged as follows (from the top):
+/// - number of the leading ones (`clo`), 1 element
+/// - value for which we count the number of leading ones (`n`), 1 element
+///
+/// After the operations are executed, the stack will be arranged as follows:
+/// - number of the leading ones (`clo`), 1 element
+///
+/// `[clo, n, ... ] -> [clo, ... ]`
+///
+/// VM cycles: 35
+fn calculate_clo(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
+    // [clo, n, ...]
+    #[rustfmt::skip]
+    let ops_group_1 = [
+        Swap, Push(32u8.into()), Dup2, Neg, Add // [32 - clo, n, clo, ...]
+    ];
+    span.push_ops(ops_group_1);
+
+    append_pow2_op(span); // [pow2(32 - clo), n, clo, ...]
+
+    #[rustfmt::skip]
+    let ops_group_2 = [
+        Push(Felt::new(u32::MAX as u64 + 1)), // [2^32, pow2(32 - clo), n, clo, ...]
 
-    span.add_ops([U32assert2(ZERO), Eq, Not])
+        Dup1, Neg, Add, // [2^32 - pow2(32 - clo), pow2(32 - clo), n, clo, ...] 
+                        // `2^32 - pow2(32 - clo)` is equal to `clo` leading ones and `32 - clo` 
+                        // zeros:
+                        // 11111111...1110000...0
+                        // └─ clo ones ─┘
+
+        Swap, Push(2u8.into()), U32div, Drop, // [pow2(32 - clo) / 2, 2^32 - pow2(32 - clo), n, clo, ...] 
+                                              // pow2(32 - clo) / 2 is equal to `clo` leading 
+                                              // zeros, `1` one and all other zeros.
+
+        Dup1, Add, // [bit_mask, 2^32 - pow2(32 - clo), n, clo, ...] 
+                   // 111111111...111000...0 <-- bitmask
+                   // └─ clo ones ─┘│
+                   //               └─ additional one
+
+        MovUp2, U32and, // [m, 2^32 - pow2(32 - clo), clo] 
+                        // If calcualtion of `clo` is correct, m should be equal to 
+                        // 2^32 - pow2(32 - clo)
+
+        Eq, Assert(0) // [clo, ...]
+    ];
+
+    span.add_ops(ops_group_2)
 }
 
+/// Appends relevant operations to the span block for the correctness check of the `U32Ctz`
+/// injector.
+/// The idea is to compare the actual value with a bitmask consisting of `ctz` trailing ones to
+/// check that every bit in `ctz` trailing bits is zero and `1` additional one to check that
+/// `ctz + 1`'th trailing bit is one:
+/// ```text
+/// 10..001000000000000000 <-- actual value
+///        └─ ctz zeros ─┘
+///
+/// 00..0011111111111...11 <-- bitmask
+///       │└─  ctz ones ─┘
+///       └─ additional one
+/// ```
+/// After applying a `u32and` bit operation on this values the result's trailing `ctz` bits should
+/// be zeros, otherwise there were some ones in initial value's `ctz` trailing bits, and therefore
+/// `ctz` value is incorrect. `ctz + 1`'th trailing bit of the result should be one, otherwise this
+/// bit in the initial value wasn't one and `ctz` value is incorrect:
+/// ```text
+///  10...10|1|00...00
+/// &
+///  00...00|1|11...11
+/// =        ↓ ↓↓   ↓↓
+///  00...00|1|00...00
+/// ```
+///
+/// ---
+/// The stack is expected to be arranged as follows (from the top):
+/// - number of the trailing zeros (`ctz`), 1 element
+/// - value for which we count the number of trailing zeros (`n`), 1 element
+///
+/// After the operations are executed, the stack will be arranged as follows:
+/// - number of the trailing zeros (`ctz`), 1 element
+///
+/// `[ctz, n, ... ] -> [ctz, ... ]`
+///
+/// VM cycles: 33
+fn calculate_ctz(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
+    // [ctz, n, ...]
+    #[rustfmt::skip]
+    let ops_group_1 = [
+        Swap, Dup1, // [ctz, n, ctz, ...]
+    ];
+    span.push_ops(ops_group_1);
+
+    append_pow2_op(span); // [pow2(ctz), n, ctz, ...]
+
+    #[rustfmt::skip]
+    let ops_group_2 = [
+        Dup0, // [pow2(ctz), pow2(ctz), n, ctz, ...]
+              // pow2(ctz) is equal to all zeros with only one on the `ctz`'th trailing position
+
+        Pad, Incr, Neg, Add, // [pow2(ctz) - 1, pow2(ctz), n, ctz, ...]
+
+        Swap, U32split, Drop, // [pow2(ctz), pow2(ctz) - 1, n, ctz, ...]
+                              // We need to drop the high bits of `pow2(ctz)` because if `ctz` 
+                              // equals 32 `pow2(ctz)` will exceed the u32. Also in that case there
+                              // is no need to check the dividing one, since it is absent (value is
+                              // all 0's). 
+
+        Dup0, MovUp2, Add, // [bit_mask, pow2(ctz), n, ctz]
+                           // 00..001111111111...11 <-- bitmask
+                           //       │└─ ctz ones ─┘
+                           //       └─ additional one
+                           
+        MovUp2, U32and, // [m, pow2(ctz), ctz]
+                        // If calcualtion of `ctz` is correct, m should be equal to 
+                        // pow2(ctz)
+
+        Eq, Assert(0), // [ctz, ...]
+    ];
+
+    span.add_ops(ops_group_2)
+}
+
+/// Appends relevant operations to the span block for the correctness check of the `U32Cto`
+/// injector.
+/// The idea is to compare the actual value with a bitmask consisting of `cto` trailing ones to
+/// check that every bit in `cto` trailing bits is one and `1` additional one to check that
+/// `cto + 1`'th trailing bit is zero:
+/// ```text
+/// 10..01011111111111111 <-- actual value
+///        └─ cto ones ─┘
+///
+/// 00..001111111111...11 <-- bitmask
+///       │└─ cto ones ─┘
+///       └─ additional one
+/// ```
+/// After applying a `u32and` bit operation on this values the result's trailing `cto` bits should
+/// be ones, otherwise there were some zeros in initial value's `cto` trailing bits, and therefore
+/// `cto` value is incorrect. `cto + 1`'th trailing bit of the result should be zero, otherwise
+/// this bit in the initial value wasn't zero and `cto` value is incorrect:
+/// ```text
+///  10...11|0|11...11
+/// &
+///  00...00|1|11...11
+/// =        ↓ ↓↓   ↓↓
+///  00...00|0|11...11
+/// ```
+///
+/// ---
+/// The stack is expected to be arranged as follows (from the top):
+/// - number of the trailing ones (`cto`), 1 element
+/// - value for which we count the number of trailing zeros (`n`), 1 element
+///
+/// After the operations are executed, the stack will be arranged as follows:
+/// - number of the trailing zeros (`cto`), 1 element
+///
+/// `[cto, n, ... ] -> [cto, ... ]`
+///
+/// VM cycles: 32
+fn calculate_cto(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
+    // [cto, n, ...]
+    #[rustfmt::skip]
+    let ops_group_1 = [
+        Swap, Dup1, // [cto, n, cto, ...]
+    ];
+    span.push_ops(ops_group_1);
+
+    append_pow2_op(span); // [pow2(cto), n, cto, ...]
+
+    #[rustfmt::skip]
+    let ops_group_2 = [
+        Dup0, // [pow2(cto), pow2(cto), n, cto, ...]
+              // pow2(cto) is equal to all zeros with only one on the `cto`'th trailing position
+
+        Pad, Incr, Neg, Add, // [pow2(cto) - 1, pow2(cto), n, cto, ...]
+
+        Swap, U32split, Drop, // [pow2(cto), pow2(cto) - 1, n, cto, ...]
+                              // We need to drop the high bits of `pow2(cto)` because if `cto` 
+                              // equals 32 `pow2(cto)` will exceed the u32. Also in that case there
+                              // is no need to check the dividing zero, since it is absent (value 
+                              // is all 1's). 
+
+        Dup1, Add, // [bit_mask, pow2(cto) - 1, n, cto]
+                   // 00..001111111111...11 <-- bitmask
+                   //       │└─ cto ones ─┘
+                   //       └─ additional one
+                           
+        MovUp2, U32and, // [m, pow2(cto) - 1, cto]
+                        // If calcualtion of `cto` is correct, m should be equal to 
+                        // pow2(cto) - 1
+
+        Eq, Assert(0), // [cto, ...]
+    ];
+
+    span.add_ops(ops_group_2)
+}
+
+// COMPARISON OPERATIONS
+// ================================================================================================
+
 /// Translates u32lt assembly instructions to VM operations.
 ///
-/// Specifically we test the first two numbers to be u32, then perform a `U32SUB EQZ NOT` to check
-/// the underflow flag.
-///
-/// VM cycles per mode:
-/// - u32checked_lt: 6 cycles
-/// - u32unchecked_lt 5 cycles
-pub fn u32lt(
-    span: &mut SpanBuilder,
-    op_mode: U32OpMode,
-) -> Result<Option<CodeBlock>, AssemblyError> {
-    handle_u32_and_unchecked_mode(span, op_mode);
+/// This operation takes 3 cycles.
+pub fn u32lt(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
     compute_lt(span);
 
     Ok(None)
@@ -558,18 +743,8 @@ pub fn u32lt(
 
 /// Translates u32lte assembly instructions to VM operations.
 ///
-/// Specifically we test the first two numbers to be u32, then perform a gt check and flip the
-/// results.
-///
-/// VM cycles per mode:
-/// - u32checked_lte: 8 cycles
-/// - u32unchecked_lte: 7 cycles
-pub fn u32lte(
-    span: &mut SpanBuilder,
-    op_mode: U32OpMode,
-) -> Result<Option<CodeBlock>, AssemblyError> {
-    handle_u32_and_unchecked_mode(span, op_mode);
-
+/// This operation takes 5 cycles.
+pub fn u32lte(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
     // Compute the lt with reversed number to get a gt check
     span.push_op(Swap);
     compute_lt(span);
@@ -580,18 +755,8 @@ pub fn u32lte(
 
 /// Translates u32gt assembly instructions to VM operations.
 ///
-/// Specifically we test the first two numbers to be u32, then perform a lt check with the
-/// numbers swapped.
-///
-/// VM cycles per mode:
-/// - u32checked_gt: 7 cycles
-/// - u32unchecked_gt: 6 cycles
-pub fn u32gt(
-    span: &mut SpanBuilder,
-    op_mode: U32OpMode,
-) -> Result<Option<CodeBlock>, AssemblyError> {
-    handle_u32_and_unchecked_mode(span, op_mode);
-
+/// This operation takes 4 cycles.
+pub fn u32gt(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
     // Reverse the numbers so we can get a gt check.
     span.push_op(Swap);
 
@@ -602,18 +767,8 @@ pub fn u32gt(
 
 /// Translates u32gte assembly instructions to VM operations.
 ///
-/// Specifically we test the first two numbers to be u32, then compute a lt check and flip the
-/// results.
-///
-/// VM cycles per mode:
-/// - u32checked_gte: 7 cycles
-/// - u32unchecked_gte: 6 cycles
-pub fn u32gte(
-    span: &mut SpanBuilder,
-    op_mode: U32OpMode,
-) -> Result<Option<CodeBlock>, AssemblyError> {
-    handle_u32_and_unchecked_mode(span, op_mode);
-
+/// This operation takes 4 cycles.
+pub fn u32gte(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
     compute_lt(span);
 
     // Flip the final results to get the gte results.
@@ -622,19 +777,13 @@ pub fn u32gte(
 
 /// Translates u32min assembly instructions to VM operations.
 ///
-/// Specifically, we test the first two numbers to be u32 (U32SPLIT NOT ASSERT), subtract the top
-/// value from the second to the top value (U32SUB), check the underflow flag (EQZ), and perform a
-/// conditional swap (CSWAP) to have the max number in front. Then we finally drop the top element
-/// to keep the min.
+/// Specifically, we subtract the top value from the second to the top value (U32SUB), check the
+/// underflow flag (EQZ), and perform a conditional swap (CSWAP) to have the max number in front.
+/// Then we finally drop the top element to keep the min.
 ///
-/// VM cycles per mode:
-/// - u32checked_min: 9 cycles
-/// - u32unchecked_min: 8 cycles
-pub fn u32min(
-    span: &mut SpanBuilder,
-    op_mode: U32OpMode,
-) -> Result<Option<CodeBlock>, AssemblyError> {
-    compute_max_and_min(span, op_mode);
+/// This operation takes 8 cycles.
+pub fn u32min(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
+    compute_max_and_min(span);
 
     // Drop the max and keep the min
     span.add_op(Drop)
@@ -642,19 +791,13 @@ pub fn u32min(
 
 /// Translates u32max assembly instructions to VM operations.
 ///
-/// Specifically, we test the first two values to be u32 (U32SPLIT NOT ASSERT), subtract the top
-/// value from the second to the top value (U32SUB), check the underflow flag (EQZ), and perform
-/// a conditional swap (CSWAP) to have the max number in front. then we finally drop the 2nd
-/// element to keep the max.
+/// Specifically, we subtract the top value from the second to the top value (U32SUB), check the
+/// underflow flag (EQZ), and perform a conditional swap (CSWAP) to have the max number in front.
+/// Then we finally drop the 2nd element to keep the max.
 ///
-/// VM cycles per mode:
-/// - u32checked_max: 10 cycles
-/// - u32unchecked_max: 9 cycles
-pub fn u32max(
-    span: &mut SpanBuilder,
-    op_mode: U32OpMode,
-) -> Result<Option<CodeBlock>, AssemblyError> {
-    compute_max_and_min(span, op_mode);
+/// This operation takes 9 cycles.
+pub fn u32max(span: &mut SpanBuilder) -> Result<Option<CodeBlock>, AssemblyError> {
+    compute_max_and_min(span);
 
     // Drop the min and keep the max
     span.add_ops([Swap, Drop])
@@ -663,32 +806,20 @@ pub fn u32max(
 // COMPARISON OPERATIONS - HELPERS
 // ================================================================================================
 
-/// Handles u32 assertion and unchecked mode for any u32 operation.
-fn handle_u32_and_unchecked_mode(span: &mut SpanBuilder, op_mode: U32OpMode) {
-    if op_mode == U32OpMode::Checked {
-        span.push_op(U32assert2(ZERO));
-    }
-}
-
 /// Inserts the VM operations to check if the second element is less than
-/// the top element. This takes 5 cycles.
+/// the top element. This takes 3 cycles.
 fn compute_lt(span: &mut SpanBuilder) {
     span.push_ops([
         U32sub, Swap, Drop, // Perform the operations
-        Eqz, Not, // Check the underflow flag
     ])
 }
 
-/// Duplicate the top two elements in the stack and check both are u32, and determine the min
-/// and max between them.
+/// Duplicate the top two elements in the stack and determine the min and max between them.
 ///
 /// The maximum number will be at the top of the stack and minimum will be at the 2nd index.
-fn compute_max_and_min(span: &mut SpanBuilder, op_mode: U32OpMode) {
+fn compute_max_and_min(span: &mut SpanBuilder) {
     // Copy top two elements of the stack.
     span.push_ops([Dup1, Dup1]);
-    if op_mode == U32OpMode::Checked {
-        span.push_op(U32assert2(ZERO));
-    }
 
     #[rustfmt::skip]
     span.push_ops([
diff --git a/assembly/src/assembler/mod.rs b/assembly/src/assembler/mod.rs
index af92112779..67686db4b4 100644
--- a/assembly/src/assembler/mod.rs
+++ b/assembly/src/assembler/mod.rs
@@ -1,11 +1,12 @@
 use super::{
-    ast::{Instruction, ModuleAst, Node, ProcedureAst, ProgramAst},
+    ast::{instrument, Instruction, ModuleAst, Node, ProcedureAst, ProgramAst},
     btree_map,
     crypto::hash::RpoDigest,
     AssemblyError, BTreeMap, CallSet, CodeBlock, CodeBlockTable, Felt, Kernel, Library,
     LibraryError, LibraryPath, Module, NamedProcedure, Operation, Procedure, ProcedureId,
-    ProcedureName, Program, ToString, Vec, ONE, ZERO,
+    ProcedureName, Program, ONE, ZERO,
 };
+use crate::utils::collections::*;
 use core::{borrow::Borrow, cell::RefCell};
 use vm_core::{utils::group_vector_elements, Decorator, DecoratorList};
 
@@ -139,6 +140,7 @@ impl Assembler {
     ///
     /// # Errors
     /// Returns an error if the compilation of the specified program fails.
+    #[instrument("compile_ast", skip_all)]
     pub fn compile_ast(&self, program: &ProgramAst) -> Result<Program, AssemblyError> {
         // compile the program
         let mut context = AssemblyContext::for_program(Some(program));
@@ -194,6 +196,9 @@ impl Assembler {
     /// - If a module with the same path already exists in the module stack of the
     ///   [AssemblyContext].
     /// - If a lock to the [ProcedureCache] can not be attained.
+    #[instrument(level = "trace",
+                 name = "compile_module",
+                 fields(module = path.unwrap_or(&LibraryPath::anon_path()).path()), skip_all)]
     pub fn compile_module(
         &self,
         module: &ModuleAst,
@@ -240,7 +245,7 @@ impl Assembler {
         for proc_ast in module.procs().iter() {
             self.compile_procedure(proc_ast, context)?;
         }
-        let (module_procs, module_callset) = context.complete_module();
+        let (module_procs, module_callset) = context.complete_module()?;
 
         // add the compiled procedures to the assembler's cache. the procedures are added to the
         // cache only if:
diff --git a/assembly/src/assembler/module_provider.rs b/assembly/src/assembler/module_provider.rs
index 7f2172fa69..f25d0b33d6 100644
--- a/assembly/src/assembler/module_provider.rs
+++ b/assembly/src/assembler/module_provider.rs
@@ -1,4 +1,5 @@
-use super::{BTreeMap, Library, LibraryError, Module, ProcedureId, Vec};
+use super::{Library, LibraryError, Module, ProcedureId};
+use crate::utils::collections::*;
 
 // MODULE PROVIDER
 // ================================================================================================
diff --git a/assembly/src/assembler/span_builder.rs b/assembly/src/assembler/span_builder.rs
index 63db32f057..b767fb2dbb 100644
--- a/assembly/src/assembler/span_builder.rs
+++ b/assembly/src/assembler/span_builder.rs
@@ -1,7 +1,8 @@
 use super::{
     AssemblyContext, AssemblyError, BodyWrapper, Borrow, CodeBlock, Decorator, DecoratorList,
-    Instruction, Operation, ToString, Vec,
+    Instruction, Operation,
 };
+use crate::utils::{collections::*, string::*};
 use vm_core::{AdviceInjector, AssemblyOp};
 
 // SPAN BUILDER
diff --git a/assembly/src/assembler/tests.rs b/assembly/src/assembler/tests.rs
index 15ca21bafb..8edd17cb65 100644
--- a/assembly/src/assembler/tests.rs
+++ b/assembly/src/assembler/tests.rs
@@ -113,26 +113,26 @@ fn nested_blocks() {
         syscall.foo
     end"#;
 
-    let before = CodeBlock::new_span(vec![Operation::Push(2u64.into())]);
+    let before = CodeBlock::new_span(vec![Operation::Push(2u32.into())]);
 
-    let r#true = CodeBlock::new_span(vec![Operation::Push(3u64.into())]);
-    let r#false = CodeBlock::new_span(vec![Operation::Push(5u64.into())]);
+    let r#true = CodeBlock::new_span(vec![Operation::Push(3u32.into())]);
+    let r#false = CodeBlock::new_span(vec![Operation::Push(5u32.into())]);
     let r#if = CodeBlock::new_split(r#true, r#false);
 
-    let r#true = CodeBlock::new_span(vec![Operation::Push(7u64.into())]);
-    let r#false = CodeBlock::new_span(vec![Operation::Push(11u64.into())]);
+    let r#true = CodeBlock::new_span(vec![Operation::Push(7u32.into())]);
+    let r#false = CodeBlock::new_span(vec![Operation::Push(11u32.into())]);
     let r#true = CodeBlock::new_split(r#true, r#false);
     let r#while = CodeBlock::new_span(vec![
-        Operation::Push(17u64.into()),
-        Operation::Push(19u64.into()),
-        Operation::Push(23u64.into()),
+        Operation::Push(17u32.into()),
+        Operation::Push(19u32.into()),
+        Operation::Push(23u32.into()),
     ]);
     let r#while = CodeBlock::new_loop(r#while);
-    let span = CodeBlock::new_span(vec![Operation::Push(13u64.into())]);
+    let span = CodeBlock::new_span(vec![Operation::Push(13u32.into())]);
     let r#false = CodeBlock::new_join([span, r#while]);
     let nested = CodeBlock::new_split(r#true, r#false);
 
-    let exec = CodeBlock::new_span(vec![Operation::Push(29u64.into())]);
+    let exec = CodeBlock::new_span(vec![Operation::Push(29u32.into())]);
 
     let combined = combine_blocks(vec![before, r#if, nested, exec, syscall]);
     let program = assembler.compile(program).unwrap();
diff --git a/assembly/src/ast/code_body.rs b/assembly/src/ast/code_body.rs
index d6a82bc6fb..230035f83c 100644
--- a/assembly/src/ast/code_body.rs
+++ b/assembly/src/ast/code_body.rs
@@ -1,7 +1,8 @@
 use super::{
     ByteReader, ByteWriter, Deserializable, DeserializationError, Node, Serializable,
-    SourceLocation, Vec,
+    SourceLocation, MAX_BODY_LEN,
 };
+use crate::utils::collections::*;
 use core::{iter, slice};
 
 // CODE BODY
@@ -22,12 +23,17 @@ impl CodeBody {
     // --------------------------------------------------------------------------------------------
 
     /// Creates a new instance of [CodeBody] populated with the provided `nodes`.
+    ///
+    /// # Panics
+    /// Assumes that the number of nodes is smaller than 2^16 and panics otherwise.
     pub fn new<N>(nodes: N) -> Self
     where
         N: IntoIterator<Item = Node>,
     {
+        let nodes: Vec<_> = nodes.into_iter().collect();
+        assert!(nodes.len() <= MAX_BODY_LEN, "too many nodes");
         Self {
-            nodes: nodes.into_iter().collect(),
+            nodes,
             locations: Vec::new(),
         }
     }
diff --git a/assembly/src/ast/format.rs b/assembly/src/ast/format.rs
index a8634fe1dc..2ba9c6fec3 100644
--- a/assembly/src/ast/format.rs
+++ b/assembly/src/ast/format.rs
@@ -1,7 +1,8 @@
 use super::{
     CodeBody, FormattableNode, InvokedProcsMap, LibraryPath, ProcedureAst, ProcedureId,
-    ProcedureName, Vec,
+    ProcedureName,
 };
+use crate::utils::collections::*;
 use core::fmt;
 
 const INDENT_STRING: &str = "    ";
diff --git a/assembly/src/ast/imports.rs b/assembly/src/ast/imports.rs
index 8c767d36b6..70912ee7b1 100644
--- a/assembly/src/ast/imports.rs
+++ b/assembly/src/ast/imports.rs
@@ -1,8 +1,9 @@
 use super::{
-    BTreeMap, ByteReader, ByteWriter, Deserializable, DeserializationError, InvokedProcsMap,
-    LibraryPath, ParsingError, ProcedureId, ProcedureName, Serializable, String, ToString, Token,
-    TokenStream, Vec, MAX_IMPORTS, MAX_INVOKED_IMPORTED_PROCS,
+    ByteReader, ByteWriter, Deserializable, DeserializationError, InvokedProcsMap, LibraryPath,
+    ParsingError, ProcedureId, ProcedureName, Serializable, Token, TokenStream, MAX_IMPORTS,
+    MAX_INVOKED_IMPORTED_PROCS,
 };
+use crate::utils::{collections::*, string::*};
 
 // TYPE ALIASES
 // ================================================================================================
@@ -77,18 +78,53 @@ impl ModuleImports {
     // PUBLIC ACCESSORS
     // --------------------------------------------------------------------------------------------
 
+    /// Returns true if there are no imports in the containing module
+    pub fn is_empty(&self) -> bool {
+        self.imports.is_empty()
+    }
+
+    /// Returns the number of imports contained in this table
+    pub fn len(&self) -> usize {
+        self.imports.len()
+    }
+
     /// Look up the path of the imported module with the given name.
     pub fn get_module_path(&self, module_name: &str) -> Option<&LibraryPath> {
         self.imports.get(&module_name.to_string())
     }
 
+    /// Look up the actual procedure name and module path associated with the given [ProcedureId],
+    /// if that procedure was imported and invoked in the current module.
+    pub fn get_procedure_info(&self, id: &ProcedureId) -> Option<(&ProcedureName, &LibraryPath)> {
+        self.invoked_procs
+            .get(id)
+            .map(|invoked_proc| (&invoked_proc.0, &invoked_proc.1))
+    }
+
+    /// Look up the procedure name associated with the given [ProcedureId],
+    /// if that procedure was imported and invoked in the current module.
+    pub fn get_procedure_name(&self, id: &ProcedureId) -> Option<&ProcedureName> {
+        self.invoked_procs.get(id).map(|(name, _)| name)
+    }
+
+    /// Look up the [LibraryPath] associated with the given [ProcedureId],
+    /// if that procedure was imported and invoked in the current module.
+    pub fn get_procedure_path(&self, id: &ProcedureId) -> Option<&LibraryPath> {
+        self.invoked_procs.get(id).map(|(_, path)| path)
+    }
+
     /// Return the paths of all imported module
     pub fn import_paths(&self) -> Vec<&LibraryPath> {
         self.imports.values().collect()
     }
 
-    /// Returns a reference to the invoked procedure map which maps procedure IDs to their names.
-    pub fn invoked_procs(&self) -> &InvokedProcsMap {
+    /// Returns a map containing IDs and names of imported procedures.
+    pub fn get_imported_procedures(&self) -> BTreeMap<ProcedureId, ProcedureName> {
+        self.invoked_procs.iter().map(|(id, (name, _))| (*id, name.clone())).collect()
+    }
+
+    /// Returns a reference to the internal invoked procedure map which maps procedure IDs to their names and paths.
+    pub(super) fn invoked_procs(&self) -> &InvokedProcsMap {
         &self.invoked_procs
     }
 
@@ -123,6 +159,12 @@ impl ModuleImports {
         }
         Ok(proc_id)
     }
+
+    /// Clears all stored information about imported modules and invoked procedures
+    pub fn clear(&mut self) {
+        self.imports.clear();
+        self.invoked_procs.clear();
+    }
 }
 
 impl Serializable for ModuleImports {
diff --git a/assembly/src/ast/mod.rs b/assembly/src/ast/mod.rs
index 24e2d746b7..9f0652ad44 100644
--- a/assembly/src/ast/mod.rs
+++ b/assembly/src/ast/mod.rs
@@ -2,18 +2,16 @@
 //!
 //! Structs in this module (specifically [ProgramAst] and [ModuleAst]) can be used to parse source
 //! code into relevant ASTs. This can be done via their `parse()` methods.
-
 use super::{
-    crypto::hash::RpoDigest, BTreeMap, ByteReader, ByteWriter, Deserializable,
-    DeserializationError, Felt, LabelError, LibraryPath, ParsingError, ProcedureId, ProcedureName,
-    Serializable, SliceReader, StarkField, String, ToString, Token, TokenStream, Vec,
-    MAX_LABEL_LEN,
+    crypto::hash::RpoDigest, ByteReader, ByteWriter, Deserializable, DeserializationError, Felt,
+    LabelError, LibraryPath, ParsingError, ProcedureId, ProcedureName, Serializable, SliceReader,
+    StarkField, Token, TokenStream, MAX_LABEL_LEN,
 };
-use core::{fmt, iter, str::from_utf8};
-#[cfg(feature = "std")]
-use std::{fs, io, path::Path};
+use crate::utils::{collections::*, string::*};
 use vm_core::utils::bound_into_included_u64;
 
+pub use tracing::{event, info_span, instrument, Level};
+
 pub use super::tokens::SourceLocation;
 
 mod nodes;
@@ -33,9 +31,19 @@ mod invocation_target;
 pub use invocation_target::InvocationTarget;
 
 mod parsers;
-use parsers::{parse_constants, ParserContext};
 
-pub(crate) use parsers::{NAMESPACE_LABEL_PARSER, PROCEDURE_LABEL_PARSER};
+mod module;
+pub use module::ModuleAst;
+
+mod procedure;
+pub use procedure::{ProcReExport, ProcedureAst};
+
+mod program;
+pub use program::ProgramAst;
+
+pub(crate) use parsers::{
+    parse_param_with_constant_lookup, NAMESPACE_LABEL_PARSER, PROCEDURE_LABEL_PARSER,
+};
 
 mod serde;
 pub use serde::AstSerdeOptions;
@@ -74,896 +82,6 @@ type LocalConstMap = BTreeMap<String, u64>;
 type ReExportedProcMap = BTreeMap<ProcedureName, ProcReExport>;
 type InvokedProcsMap = BTreeMap<ProcedureId, (ProcedureName, LibraryPath)>;
 
-// EXECUTABLE PROGRAM AST
-// ================================================================================================
-
-/// An abstract syntax tree of an executable Miden program.
-///
-/// A program AST consists of a body of the program, a list of internal procedure ASTs, a list of
-/// imported libraries, a map from procedure ids to procedure names for imported procedures used in
-/// the module, and the source location of the program.
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct ProgramAst {
-    body: CodeBody,
-    local_procs: Vec<ProcedureAst>,
-    import_info: Option<ModuleImports>,
-    start: SourceLocation,
-}
-
-impl ProgramAst {
-    // CONSTRUCTORS
-    // --------------------------------------------------------------------------------------------
-    /// Returns a new [ProgramAst].
-    ///
-    /// A program consist of a body and a set of internal (i.e., not exported) procedures.
-    pub fn new(body: Vec<Node>, local_procs: Vec<ProcedureAst>) -> Result<Self, ParsingError> {
-        if local_procs.len() > MAX_LOCAL_PROCS {
-            return Err(ParsingError::too_many_module_procs(local_procs.len(), MAX_LOCAL_PROCS));
-        }
-        let start = SourceLocation::default();
-        let body = CodeBody::new(body);
-        Ok(Self {
-            body,
-            local_procs,
-            import_info: None,
-            start,
-        })
-    }
-
-    /// Adds the provided import information to the program.
-    ///
-    /// # Panics
-    /// Panics if import information has already been added.
-    pub fn with_import_info(mut self, import_info: ModuleImports) -> Self {
-        assert!(self.import_info.is_none(), "module imports have already been added");
-        self.import_info = Some(import_info);
-        self
-    }
-
-    /// Binds the provided `locations` to the nodes of this program's body.
-    ///
-    /// The `start` location points to the `begin` token which does not have its own node.
-    ///
-    /// # Panics
-    /// Panics if source location information has already been associated with this program.
-    pub fn with_source_locations<L>(mut self, locations: L, start: SourceLocation) -> Self
-    where
-        L: IntoIterator<Item = SourceLocation>,
-    {
-        assert!(!self.body.has_locations(), "source locations have already been loaded");
-        self.start = start;
-        self.body = self.body.with_source_locations(locations);
-        self
-    }
-
-    // PUBLIC ACCESSORS
-    // --------------------------------------------------------------------------------------------
-
-    /// Returns the [SourceLocation] associated with this program, if present.
-    pub fn source_locations(&self) -> impl Iterator<Item = &'_ SourceLocation> {
-        iter::once(&self.start).chain(self.body.source_locations().iter())
-    }
-
-    /// Returns a slice over the internal procedures of this program.
-    pub fn procedures(&self) -> &[ProcedureAst] {
-        &self.local_procs
-    }
-
-    /// Returns a reference to the body of this program.
-    pub fn body(&self) -> &CodeBody {
-        &self.body
-    }
-
-    /// Returns a map containing IDs and names of imported procedures.
-    pub fn get_imported_procedures_map(&self) -> BTreeMap<ProcedureId, ProcedureName> {
-        if let Some(info) = &self.import_info {
-            info.invoked_procs().iter().map(|(&id, (name, _))| (id, name.clone())).collect()
-        } else {
-            BTreeMap::new()
-        }
-    }
-
-    // PARSER
-    // --------------------------------------------------------------------------------------------
-    /// Parses the provided source into a [ProgramAst].
-    ///
-    /// A program consist of a body and a set of internal (i.e., not exported) procedures.
-    pub fn parse(source: &str) -> Result<ProgramAst, ParsingError> {
-        let mut tokens = TokenStream::new(source)?;
-        let mut import_info = ModuleImports::parse(&mut tokens)?;
-        let local_constants = parse_constants(&mut tokens)?;
-
-        let mut context = ParserContext {
-            import_info: &mut import_info,
-            local_procs: LocalProcMap::default(),
-            reexported_procs: ReExportedProcMap::default(),
-            local_constants,
-        };
-
-        context.parse_procedures(&mut tokens, false)?;
-
-        // make sure program body is present
-        let next_token = tokens
-            .read()
-            .ok_or_else(|| ParsingError::unexpected_eof(*tokens.eof_location()))?;
-        if next_token.parts()[0] != Token::BEGIN {
-            return Err(ParsingError::unexpected_token(next_token, Token::BEGIN));
-        }
-
-        let program_start = tokens.pos();
-        // consume the 'begin' token
-        let header = tokens.read().expect("missing program header");
-        let start = *header.location();
-        header.validate_begin()?;
-        tokens.advance();
-
-        // make sure there is something to be read
-        if tokens.eof() {
-            return Err(ParsingError::unexpected_eof(*tokens.eof_location()));
-        }
-
-        // parse the sequence of nodes and add each node to the list
-        let body = context.parse_body(&mut tokens, false)?;
-
-        // consume the 'end' token
-        match tokens.read() {
-            None => Err(ParsingError::unmatched_begin(
-                tokens.read_at(program_start).expect("no begin token"),
-            )),
-            Some(token) => match token.parts()[0] {
-                Token::END => token.validate_end(),
-                Token::ELSE => Err(ParsingError::dangling_else(token)),
-                _ => Err(ParsingError::unmatched_begin(
-                    tokens.read_at(program_start).expect("no begin token"),
-                )),
-            },
-        }?;
-        tokens.advance();
-
-        // make sure there are no instructions after the end
-        if let Some(token) = tokens.read() {
-            return Err(ParsingError::dangling_ops_after_program(token));
-        }
-
-        let local_procs = sort_procs_into_vec(context.local_procs);
-        let (nodes, locations) = body.into_parts();
-        Ok(Self::new(nodes, local_procs)?
-            .with_source_locations(locations, start)
-            .with_import_info(import_info))
-    }
-
-    // SERIALIZATION / DESERIALIZATION
-    // --------------------------------------------------------------------------------------------
-
-    /// Returns byte representation of this [ProgramAst].
-    ///
-    /// The serde options are serialized as header information for the purposes of deserialization.
-    pub fn to_bytes(&self, options: AstSerdeOptions) -> Vec<u8> {
-        let mut target = Vec::<u8>::default();
-
-        // serialize the options, so that deserialization knows what to do
-        options.write_into(&mut target);
-
-        // asserts below are OK because we enforce limits on the number of procedure and the
-        // number of body instructions in relevant parsers
-
-        // serialize imports if required
-        if options.serialize_imports {
-            match &self.import_info {
-                Some(imports) => imports.write_into(&mut target),
-                None => panic!("imports not initialized"),
-            }
-        }
-
-        // serialize procedures
-        assert!(self.local_procs.len() <= MAX_LOCAL_PROCS, "too many local procs");
-        target.write_u16(self.local_procs.len() as u16);
-        self.local_procs.write_into(&mut target);
-
-        // serialize program body
-        assert!(self.body.nodes().len() <= MAX_BODY_LEN, "too many body instructions");
-        target.write_u16(self.body.nodes().len() as u16);
-        self.body.nodes().write_into(&mut target);
-
-        target
-    }
-
-    /// Returns a [ProgramAst] struct deserialized from the provided bytes.
-    ///
-    /// This function assumes that the byte array contains a serialized [AstSerdeOptions] struct as
-    /// a header.
-    pub fn from_bytes(bytes: &[u8]) -> Result<Self, DeserializationError> {
-        let mut source = SliceReader::new(bytes);
-
-        // Deserialize the serialization options used when serializing
-        let options = AstSerdeOptions::read_from(&mut source)?;
-
-        // deserialize imports if required
-        let mut import_info = None;
-        if options.serialize_imports {
-            import_info = Some(ModuleImports::read_from(&mut source)?);
-        }
-
-        // deserialize local procs
-        let num_local_procs = source.read_u16()?;
-        let local_procs = Deserializable::read_batch_from(&mut source, num_local_procs as usize)?;
-
-        // deserialize program body
-        let body_len = source.read_u16()? as usize;
-        let nodes = Deserializable::read_batch_from(&mut source, body_len)?;
-
-        match Self::new(nodes, local_procs) {
-            Err(err) => Err(DeserializationError::UnknownError(err.message().clone())),
-            Ok(res) => match import_info {
-                Some(info) => Ok(res.with_import_info(info)),
-                None => Ok(res),
-            },
-        }
-    }
-
-    /// Loads the [SourceLocation] from the `source`.
-    ///
-    /// It expects the `start` location at the first position, and will subsequently load the
-    /// body via [CodeBody::load_source_locations]. Finally, it will load the local procedures via
-    /// [ProcedureAst::load_source_locations].
-    pub fn load_source_locations<R: ByteReader>(
-        &mut self,
-        source: &mut R,
-    ) -> Result<(), DeserializationError> {
-        self.start = SourceLocation::read_from(source)?;
-        self.body.load_source_locations(source)?;
-        self.local_procs.iter_mut().try_for_each(|p| p.load_source_locations(source))
-    }
-
-    /// Writes the [SourceLocation] into `target`.
-    ///
-    /// It will write the `start` location, and then execute the body serialization via
-    /// [CodeBlock::write_source_locations]. Finally, it will write the local procedures via
-    /// [ProcedureAst::write_source_locations].
-    pub fn write_source_locations<W: ByteWriter>(&self, target: &mut W) {
-        self.start.write_into(target);
-        self.body.write_source_locations(target);
-        self.local_procs.iter().for_each(|p| p.write_source_locations(target))
-    }
-
-    // DESTRUCTURING
-    // --------------------------------------------------------------------------------------------
-
-    /// Returns local procedures and body nodes of this program.
-    pub fn into_parts(self) -> (Vec<ProcedureAst>, Vec<Node>) {
-        (self.local_procs, self.body.into_parts().0)
-    }
-
-    /// Clear import info from the program
-    pub fn clear_imports(&mut self) {
-        self.import_info = None;
-    }
-
-    // WRITE TO FILE
-    // --------------------------------------------------------------------------------------------
-
-    /// Writes ProgramAst to provided file path
-    #[cfg(feature = "std")]
-    pub fn write_to_file<P>(&self, file_path: P) -> io::Result<()>
-    where
-        P: AsRef<Path>,
-    {
-        let path = file_path.as_ref();
-        if let Some(dir) = path.parent() {
-            fs::create_dir_all(dir)?;
-        }
-
-        let bytes = self.to_bytes(AstSerdeOptions {
-            serialize_imports: true,
-        });
-        fs::write(path, bytes)
-    }
-}
-
-impl fmt::Display for ProgramAst {
-    /// Writes this [ProgramAst] as formatted MASM code into the formatter.
-    ///
-    /// The formatted code puts each instruction on a separate line and preserves correct indentation
-    /// for instruction blocks.
-    ///
-    /// # Panics
-    /// Panics if import info is not associated with this program.
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        assert!(self.import_info.is_some(), "Program imports not instantiated");
-
-        // Imports
-        if let Some(ref info) = self.import_info {
-            let paths = info.import_paths();
-            for path in paths.iter() {
-                writeln!(f, "use.{path}")?;
-            }
-            if !paths.is_empty() {
-                writeln!(f)?;
-            }
-        }
-
-        let tmp_procs = InvokedProcsMap::new();
-        let invoked_procs =
-            self.import_info.as_ref().map(|info| info.invoked_procs()).unwrap_or(&tmp_procs);
-
-        let context = AstFormatterContext::new(&self.local_procs, invoked_procs);
-
-        // Local procedures
-        for proc in self.local_procs.iter() {
-            writeln!(f, "{}", FormattableProcedureAst::new(proc, &context))?;
-        }
-
-        // Main progrma
-        writeln!(f, "begin")?;
-        write!(f, "{}", FormattableCodeBody::new(&self.body, &context.inner_scope_context()))?;
-        writeln!(f, "end")
-    }
-}
-
-// MODULE AST
-// ================================================================================================
-
-/// An abstract syntax tree of a Miden module.
-///
-/// A module AST consists of a list of procedure ASTs, a list of re-exported procedures, a list of
-/// imports, and module documentation. Local procedures could be internal or exported.
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct ModuleAst {
-    local_procs: Vec<ProcedureAst>,
-    reexported_procs: Vec<ProcReExport>,
-    import_info: Option<ModuleImports>,
-    docs: Option<String>,
-}
-
-impl ModuleAst {
-    // AST
-    // --------------------------------------------------------------------------------------------
-    /// Returns a new [ModuleAst].
-    ///
-    /// A module consists of internal and exported procedures but does not contain a body.
-    pub fn new(
-        local_procs: Vec<ProcedureAst>,
-        reexported_procs: Vec<ProcReExport>,
-        docs: Option<String>,
-    ) -> Result<Self, ParsingError> {
-        if local_procs.len() > MAX_LOCAL_PROCS {
-            return Err(ParsingError::too_many_module_procs(local_procs.len(), MAX_LOCAL_PROCS));
-        }
-        if reexported_procs.len() > MAX_REEXPORTED_PROCS {
-            return Err(ParsingError::too_many_module_procs(
-                reexported_procs.len(),
-                MAX_REEXPORTED_PROCS,
-            ));
-        }
-        if let Some(ref docs) = docs {
-            if docs.len() > MAX_DOCS_LEN {
-                return Err(ParsingError::module_docs_too_long(docs.len(), MAX_DOCS_LEN));
-            }
-        }
-        Ok(Self {
-            local_procs,
-            reexported_procs,
-            import_info: None,
-            docs,
-        })
-    }
-
-    /// Adds the provided import information to the module.
-    ///
-    /// # Panics
-    /// Panics if import information has already been added.
-    pub fn with_import_info(mut self, import_info: ModuleImports) -> Self {
-        assert!(self.import_info.is_none(), "module imports have already been added");
-        self.import_info = Some(import_info);
-        self
-    }
-
-    // PARSER
-    // --------------------------------------------------------------------------------------------
-    /// Parses the provided source into a [ModuleAst].
-    ///
-    /// A module consists of internal and exported procedures but does not contain a body.
-    pub fn parse(source: &str) -> Result<Self, ParsingError> {
-        let mut tokens = TokenStream::new(source)?;
-        let mut import_info = ModuleImports::parse(&mut tokens)?;
-        let local_constants = parse_constants(&mut tokens)?;
-        let mut context = ParserContext {
-            import_info: &mut import_info,
-            local_procs: LocalProcMap::default(),
-            reexported_procs: ReExportedProcMap::default(),
-            local_constants,
-        };
-        context.parse_procedures(&mut tokens, true)?;
-
-        // make sure program body is absent and there are no more instructions.
-        if let Some(token) = tokens.read() {
-            if token.parts()[0] == Token::BEGIN {
-                return Err(ParsingError::not_a_library_module(token));
-            } else {
-                return Err(ParsingError::dangling_ops_after_module(token));
-            }
-        }
-
-        // build a list of local procs sorted by their declaration order
-        let local_procs = sort_procs_into_vec(context.local_procs);
-
-        // build a list of re-exported procedures sorted by procedure name
-        let reexported_procs = context.reexported_procs.into_values().collect();
-
-        // get module docs and make sure the size is within the limit
-        let docs = tokens.take_module_comments();
-
-        Ok(Self::new(local_procs, reexported_procs, docs)?.with_import_info(import_info))
-    }
-
-    // PUBLIC ACCESSORS
-    // --------------------------------------------------------------------------------------------
-
-    /// Returns a list of procedures in this module.
-    pub fn procs(&self) -> &[ProcedureAst] {
-        &self.local_procs
-    }
-
-    /// Returns a list of re-exported procedures in this module.
-    pub fn reexported_procs(&self) -> &[ProcReExport] {
-        &self.reexported_procs
-    }
-
-    /// Returns doc comments for this module.
-    pub fn docs(&self) -> Option<&String> {
-        self.docs.as_ref()
-    }
-
-    /// Returns a map of imported modules in this module.
-    pub fn import_paths(&self) -> Vec<&LibraryPath> {
-        match &self.import_info {
-            Some(info) => info.import_paths(),
-            None => Vec::<&LibraryPath>::new(),
-        }
-    }
-
-    /// Returns a map containing IDs and names of imported procedures.
-    pub fn get_imported_procedures_map(&self) -> BTreeMap<ProcedureId, ProcedureName> {
-        if let Some(info) = &self.import_info {
-            info.invoked_procs().iter().map(|(&id, (name, _))| (id, name.clone())).collect()
-        } else {
-            BTreeMap::new()
-        }
-    }
-
-    // STATE MUTATORS
-    // --------------------------------------------------------------------------------------------
-
-    /// Clears the source locations from this module.
-    pub fn clear_locations(&mut self) {
-        self.local_procs.iter_mut().for_each(|p| p.clear_locations())
-    }
-
-    // SERIALIZATION / DESERIALIZATION
-    // --------------------------------------------------------------------------------------------
-
-    /// Returns byte representation of this [ModuleAst].
-    ///
-    /// The serde options are NOT serialized - the caller must keep track of the serialization
-    /// options used.
-    pub fn write_into<R: ByteWriter>(&self, target: &mut R, options: AstSerdeOptions) {
-        // asserts below are OK because we enforce limits on the number of procedure and length of
-        // module docs in the module parser
-
-        // serialize docs
-        match &self.docs {
-            Some(docs) => {
-                assert!(docs.len() <= u16::MAX as usize, "docs too long");
-                target.write_u16(docs.len() as u16);
-                target.write_bytes(docs.as_bytes());
-            }
-            None => {
-                target.write_u16(0);
-            }
-        }
-
-        // serialize imports if required
-        if options.serialize_imports {
-            match &self.import_info {
-                Some(imports) => imports.write_into(target),
-                None => panic!("imports not initialized"),
-            }
-        }
-
-        // serialize procedures
-        assert!(self.local_procs.len() <= u16::MAX as usize, "too many local procs");
-        assert!(
-            self.reexported_procs.len() <= MAX_REEXPORTED_PROCS,
-            "too many re-exported procs"
-        );
-        target.write_u16((self.reexported_procs.len()) as u16);
-        self.reexported_procs.write_into(target);
-        target.write_u16(self.local_procs.len() as u16);
-        self.local_procs.write_into(target);
-    }
-
-    /// Returns a [ModuleAst] struct deserialized from the provided source.
-    ///
-    /// The serde options must correspond to the options used for serialization.
-    pub fn read_from<R: ByteReader>(
-        source: &mut R,
-        options: AstSerdeOptions,
-    ) -> Result<Self, DeserializationError> {
-        // deserialize docs
-        let docs_len = source.read_u16()? as usize;
-        let docs = if docs_len != 0 {
-            let str = source.read_vec(docs_len)?;
-            let str =
-                from_utf8(&str).map_err(|e| DeserializationError::InvalidValue(e.to_string()))?;
-            Some(str.to_string())
-        } else {
-            None
-        };
-
-        // deserialize imports if required
-        let mut import_info = None;
-        if options.serialize_imports {
-            import_info = Some(ModuleImports::read_from(source)?);
-        }
-
-        // deserialize re-exports
-        let num_reexported_procs = source.read_u16()? as usize;
-        let reexported_procs = Deserializable::read_batch_from(source, num_reexported_procs)?;
-
-        // deserialize local procs
-        let num_local_procs = source.read_u16()? as usize;
-        let local_procs = Deserializable::read_batch_from(source, num_local_procs)?;
-
-        match Self::new(local_procs, reexported_procs, docs) {
-            Err(err) => Err(DeserializationError::UnknownError(err.message().clone())),
-            Ok(res) => match import_info {
-                Some(info) => Ok(res.with_import_info(info)),
-                None => Ok(res),
-            },
-        }
-    }
-
-    /// Returns byte representation of this [ModuleAst].
-    ///
-    /// The serde options are serialized as header information for the purposes of deserialization.
-    pub fn to_bytes(&self, options: AstSerdeOptions) -> Vec<u8> {
-        let mut target = Vec::<u8>::default();
-
-        // serialize the options, so that deserialization knows what to do
-        options.write_into(&mut target);
-
-        self.write_into(&mut target, options);
-        target
-    }
-
-    /// Returns a [ModuleAst] struct deserialized from the provided bytes.
-    ///
-    /// This function assumes that the byte array contains a serialized [AstSerdeOptions] struct as
-    /// a header.
-    pub fn from_bytes(bytes: &[u8]) -> Result<Self, DeserializationError> {
-        let mut source = SliceReader::new(bytes);
-
-        // Deserialize the serialization options used when serializing
-        let options = AstSerdeOptions::read_from(&mut source)?;
-
-        Self::read_from(&mut source, options)
-    }
-
-    /// Loads the [SourceLocation] of the procedures via [ProcedureAst::load_source_locations].
-    ///
-    /// The local procedures are expected to have deterministic order from parse. This way, the
-    /// serialization can be simplified into a contiguous sequence of locations.
-    pub fn load_source_locations<R: ByteReader>(
-        &mut self,
-        source: &mut R,
-    ) -> Result<(), DeserializationError> {
-        self.local_procs.iter_mut().try_for_each(|p| p.load_source_locations(source))
-    }
-
-    /// Writes the [SourceLocation] of the procedures via [ProcedureAst::write_source_locations].
-    ///
-    /// The local procedures are expected to have deterministic order from parse. This way, the
-    /// serialization can be simplified into a contiguous sequence of locations.
-    pub fn write_source_locations<W: ByteWriter>(&self, target: &mut W) {
-        self.local_procs.iter().for_each(|p| p.write_source_locations(target))
-    }
-
-    // DESTRUCTURING
-    // --------------------------------------------------------------------------------------------
-
-    /// Clear import info from the module
-    pub fn clear_imports(&mut self) {
-        self.import_info = None;
-    }
-}
-
-impl fmt::Display for ModuleAst {
-    /// Writes this [ModuleAst] as formatted MASM code into the formatter.
-    ///
-    /// The formatted code puts each instruction on a separate line and preserves correct indentation
-    /// for instruction blocks.
-    ///
-    /// # Panics
-    /// Panics if import info is not associated with this module.
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        assert!(self.import_info.is_some(), "Program imports not instantiated");
-
-        // Docs
-        if let Some(ref doc) = self.docs {
-            writeln!(f, "#! {doc}")?;
-            writeln!(f)?;
-        }
-
-        // Imports
-        if let Some(ref info) = self.import_info {
-            let paths = info.import_paths();
-            for path in paths.iter() {
-                writeln!(f, "use.{path}")?;
-            }
-            if !paths.is_empty() {
-                writeln!(f)?;
-            }
-        }
-
-        // Re-exports
-        for proc in self.reexported_procs.iter() {
-            writeln!(f, "export.{}", proc.name)?;
-            writeln!(f)?;
-        }
-
-        // Local procedures
-        let tmp_procs = InvokedProcsMap::new();
-        let invoked_procs =
-            self.import_info.as_ref().map(|info| info.invoked_procs()).unwrap_or(&tmp_procs);
-
-        let context = AstFormatterContext::new(&self.local_procs, invoked_procs);
-
-        for proc in self.local_procs.iter() {
-            writeln!(f, "{}", FormattableProcedureAst::new(proc, &context))?;
-        }
-        Ok(())
-    }
-}
-
-// PROCEDURE AST
-// ================================================================================================
-
-/// An abstract syntax tree of a Miden procedure.
-///
-/// A procedure AST consists of a list of body nodes and additional metadata about the procedure
-/// (e.g., procedure name, number of memory locals used by the procedure, and whether a procedure
-/// is exported or internal).
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct ProcedureAst {
-    pub name: ProcedureName,
-    pub docs: Option<String>,
-    pub num_locals: u16,
-    pub body: CodeBody,
-    pub start: SourceLocation,
-    pub is_export: bool,
-}
-
-impl ProcedureAst {
-    // CONSTRUCTORS
-    // --------------------------------------------------------------------------------------------
-    /// Constructs a [ProcedureAst].
-    ///
-    /// A procedure consists of a name, a number of locals, a body, and a flag to signal whether
-    /// the procedure is exported.
-    pub fn new(
-        name: ProcedureName,
-        num_locals: u16,
-        body: Vec<Node>,
-        is_export: bool,
-        docs: Option<String>,
-    ) -> Self {
-        let start = SourceLocation::default();
-        let body = CodeBody::new(body);
-        Self {
-            name,
-            docs,
-            num_locals,
-            body,
-            is_export,
-            start,
-        }
-    }
-
-    /// Binds the provided `locations` into the ast nodes.
-    ///
-    /// The `start` location points to the first node of this block.
-    pub fn with_source_locations<L>(mut self, locations: L, start: SourceLocation) -> Self
-    where
-        L: IntoIterator<Item = SourceLocation>,
-    {
-        self.start = start;
-        self.body = self.body.with_source_locations(locations);
-        self
-    }
-
-    // PUBLIC ACCESSORS
-    // --------------------------------------------------------------------------------------------
-
-    /// Returns the [SourceLocation] associated with this procedure, if present.
-    pub fn source_locations(&self) -> impl Iterator<Item = &'_ SourceLocation> {
-        iter::once(&self.start).chain(self.body.source_locations().iter())
-    }
-
-    // STATE MUTATORS
-    // --------------------------------------------------------------------------------------------
-
-    /// Clears the source locations from this Ast.
-    pub fn clear_locations(&mut self) {
-        self.start = SourceLocation::default();
-        self.body.clear_locations();
-    }
-
-    // SERIALIZATION / DESERIALIZATION
-    // --------------------------------------------------------------------------------------------
-
-    /// Loads the [SourceLocation] from the `source`.
-    ///
-    /// It expects the `start` location at the first position, and will subsequently load the
-    /// body via [CodeBody::load_source_locations].
-    pub fn load_source_locations<R: ByteReader>(
-        &mut self,
-        source: &mut R,
-    ) -> Result<(), DeserializationError> {
-        self.start = SourceLocation::read_from(source)?;
-        self.body.load_source_locations(source)?;
-        Ok(())
-    }
-
-    /// Writes the [SourceLocation] into `target`.
-    ///
-    /// It will write the `start` location, and then execute the body serialization via
-    /// [CodeBlock::write_source_locations].
-    pub fn write_source_locations<W: ByteWriter>(&self, target: &mut W) {
-        self.start.write_into(target);
-        self.body.write_source_locations(target);
-    }
-}
-
-impl Serializable for ProcedureAst {
-    fn write_into<W: ByteWriter>(&self, target: &mut W) {
-        // asserts below are OK because we enforce limits on the procedure body size and length of
-        // procedure docs in the procedure parser
-
-        self.name.write_into(target);
-        match &self.docs {
-            Some(docs) => {
-                assert!(docs.len() <= MAX_DOCS_LEN, "docs too long");
-                target.write_u16(docs.len() as u16);
-                target.write_bytes(docs.as_bytes());
-            }
-            None => {
-                target.write_u16(0);
-            }
-        }
-
-        target.write_bool(self.is_export);
-        target.write_u16(self.num_locals);
-        assert!(self.body.nodes().len() <= MAX_BODY_LEN, "too many body instructions");
-        target.write_u16(self.body.nodes().len() as u16);
-        self.body.nodes().write_into(target);
-    }
-}
-
-impl Deserializable for ProcedureAst {
-    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
-        let name = ProcedureName::read_from(source)?;
-        let docs_len = source.read_u16()? as usize;
-        let docs = if docs_len != 0 {
-            let str = source.read_vec(docs_len)?;
-            let str =
-                from_utf8(&str).map_err(|e| DeserializationError::InvalidValue(e.to_string()))?;
-            Some(str.to_string())
-        } else {
-            None
-        };
-
-        let is_export = source.read_bool()?;
-        let num_locals = source.read_u16()?;
-        let body_len = source.read_u16()? as usize;
-        let nodes = Deserializable::read_batch_from(source, body_len)?;
-        let body = CodeBody::new(nodes);
-        let start = SourceLocation::default();
-        Ok(Self {
-            name,
-            num_locals,
-            body,
-            start,
-            is_export,
-            docs,
-        })
-    }
-}
-
-/// Represents a re-exported procedure.
-///
-/// A re-exported procedure is a procedure that is defined in a different module in the same
-/// library or a different library and re-exported with the same or a different name. The
-/// re-exported procedure is not copied into the module, but rather a reference to it is added to
-/// the [ModuleAST].
-#[derive(Default, Debug, Clone, PartialEq, Eq)]
-pub struct ProcReExport {
-    proc_id: ProcedureId,
-    name: ProcedureName,
-    docs: Option<String>,
-}
-
-impl ProcReExport {
-    /// Creates a new re-exported procedure.
-    pub fn new(proc_id: ProcedureId, name: ProcedureName, docs: Option<String>) -> Self {
-        Self {
-            proc_id,
-            name,
-            docs,
-        }
-    }
-
-    // PUBLIC ACCESSORS
-    // --------------------------------------------------------------------------------------------
-
-    /// Returns the ID of the re-exported procedure.
-    pub fn proc_id(&self) -> ProcedureId {
-        self.proc_id
-    }
-
-    /// Returns the name of the re-exported procedure.
-    pub fn name(&self) -> &ProcedureName {
-        &self.name
-    }
-
-    /// Returns the documentation of the re-exported procedure, if present.
-    pub fn docs(&self) -> Option<&str> {
-        self.docs.as_deref()
-    }
-
-    /// Returns the ID of the re-exported procedure using the specified module.
-    pub fn get_alias_id(&self, module_path: &LibraryPath) -> ProcedureId {
-        ProcedureId::from_name(&self.name, module_path)
-    }
-}
-
-impl Serializable for ProcReExport {
-    fn write_into<W: ByteWriter>(&self, target: &mut W) {
-        self.proc_id.write_into(target);
-        self.name.write_into(target);
-        match &self.docs {
-            Some(docs) => {
-                assert!(docs.len() <= MAX_DOCS_LEN, "docs too long");
-                target.write_u16(docs.len() as u16);
-                target.write_bytes(docs.as_bytes());
-            }
-            None => {
-                target.write_u16(0);
-            }
-        }
-    }
-}
-
-impl Deserializable for ProcReExport {
-    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
-        let proc_id = ProcedureId::read_from(source)?;
-        let name = ProcedureName::read_from(source)?;
-        let docs_len = source.read_u16()? as usize;
-        let docs = if docs_len != 0 {
-            let str = source.read_vec(docs_len)?;
-            let str =
-                from_utf8(&str).map_err(|e| DeserializationError::InvalidValue(e.to_string()))?;
-            Some(str.to_string())
-        } else {
-            None
-        };
-        Ok(Self {
-            proc_id,
-            name,
-            docs,
-        })
-    }
-}
-
 // HELPER FUNCTIONS
 // ================================================================================================
 
@@ -974,3 +92,16 @@ fn sort_procs_into_vec(proc_map: LocalProcMap) -> Vec<ProcedureAst> {
 
     procedures.into_iter().map(|(_idx, proc)| proc).collect()
 }
+
+/// Logging a warning message for every imported but unused module.
+fn check_unused_imports(import_info: &ModuleImports) {
+    let import_lib_paths = import_info.import_paths();
+    let invoked_procs_paths: Vec<&LibraryPath> =
+        import_info.invoked_procs().iter().map(|(_id, (_name, path))| path).collect();
+
+    for lib in import_lib_paths {
+        if !invoked_procs_paths.contains(&lib) {
+            event!(Level::WARN, "unused import: \"{}\"", lib);
+        }
+    }
+}
diff --git a/assembly/src/ast/module.rs b/assembly/src/ast/module.rs
new file mode 100644
index 0000000000..e15099a401
--- /dev/null
+++ b/assembly/src/ast/module.rs
@@ -0,0 +1,321 @@
+use super::check_unused_imports;
+use super::{
+    format::*,
+    imports::ModuleImports,
+    parsers::{parse_constants, ParserContext},
+    serde::AstSerdeOptions,
+    sort_procs_into_vec, LocalProcMap, ProcReExport, ProcedureAst, ReExportedProcMap, MAX_DOCS_LEN,
+    MAX_LOCAL_PROCS, MAX_REEXPORTED_PROCS,
+    {
+        ByteReader, ByteWriter, Deserializable, DeserializationError, ParsingError, SliceReader,
+        Token, TokenStream,
+    },
+};
+use crate::utils::{collections::*, string::*};
+
+use core::{fmt, str::from_utf8};
+use vm_core::utils::Serializable;
+
+// MODULE AST
+// ================================================================================================
+
+/// An abstract syntax tree of a Miden module.
+///
+/// A module AST consists of a list of procedure ASTs, a list of re-exported procedures, a list of
+/// imports, and module documentation. Local procedures could be internal or exported.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ModuleAst {
+    pub(super) local_procs: Vec<ProcedureAst>,
+    pub(super) reexported_procs: Vec<ProcReExport>,
+    pub(super) import_info: ModuleImports,
+    pub(super) docs: Option<String>,
+}
+
+impl ModuleAst {
+    // AST
+    // --------------------------------------------------------------------------------------------
+    /// Returns a new [ModuleAst].
+    ///
+    /// A module consists of internal and exported procedures but does not contain a body.
+    pub fn new(
+        local_procs: Vec<ProcedureAst>,
+        reexported_procs: Vec<ProcReExport>,
+        docs: Option<String>,
+    ) -> Result<Self, ParsingError> {
+        if local_procs.len() > MAX_LOCAL_PROCS {
+            return Err(ParsingError::too_many_module_procs(local_procs.len(), MAX_LOCAL_PROCS));
+        }
+        if reexported_procs.len() > MAX_REEXPORTED_PROCS {
+            return Err(ParsingError::too_many_module_procs(
+                reexported_procs.len(),
+                MAX_REEXPORTED_PROCS,
+            ));
+        }
+        if let Some(ref docs) = docs {
+            if docs.len() > MAX_DOCS_LEN {
+                return Err(ParsingError::module_docs_too_long(docs.len(), MAX_DOCS_LEN));
+            }
+        }
+        Ok(Self {
+            local_procs,
+            reexported_procs,
+            import_info: Default::default(),
+            docs,
+        })
+    }
+
+    /// Adds the provided import information to the module.
+    ///
+    /// # Panics
+    /// Panics if import information has already been added.
+    pub fn with_import_info(mut self, import_info: ModuleImports) -> Self {
+        assert!(self.import_info.is_empty(), "module imports have already been added");
+        self.import_info = import_info;
+        self
+    }
+
+    // PARSER
+    // --------------------------------------------------------------------------------------------
+    /// Parses the provided source into a [ModuleAst].
+    ///
+    /// A module consists of internal and exported procedures but does not contain a body.
+    pub fn parse(source: &str) -> Result<Self, ParsingError> {
+        let mut tokens = TokenStream::new(source)?;
+        let mut import_info = ModuleImports::parse(&mut tokens)?;
+        let local_constants = parse_constants(&mut tokens)?;
+        let mut context = ParserContext {
+            import_info: &mut import_info,
+            local_procs: LocalProcMap::default(),
+            reexported_procs: ReExportedProcMap::default(),
+            local_constants,
+            num_proc_locals: 0,
+        };
+        context.parse_procedures(&mut tokens, true)?;
+
+        // make sure program body is absent and there are no more instructions.
+        if let Some(token) = tokens.read() {
+            if token.parts()[0] == Token::BEGIN {
+                return Err(ParsingError::not_a_library_module(token));
+            } else {
+                return Err(ParsingError::dangling_ops_after_module(token));
+            }
+        }
+
+        // build a list of local procs sorted by their declaration order
+        let local_procs = sort_procs_into_vec(context.local_procs);
+
+        // build a list of re-exported procedures sorted by procedure name
+        let reexported_procs = context.reexported_procs.into_values().collect();
+
+        // get module docs and make sure the size is within the limit
+        let docs = tokens.take_module_comments();
+
+        check_unused_imports(context.import_info);
+
+        Ok(Self::new(local_procs, reexported_procs, docs)?.with_import_info(import_info))
+    }
+
+    // PUBLIC ACCESSORS
+    // --------------------------------------------------------------------------------------------
+
+    /// Returns a list of procedures in this module.
+    pub fn procs(&self) -> &[ProcedureAst] {
+        &self.local_procs
+    }
+
+    /// Returns a list of re-exported procedures in this module.
+    pub fn reexported_procs(&self) -> &[ProcReExport] {
+        &self.reexported_procs
+    }
+
+    /// Returns doc comments for this module.
+    pub fn docs(&self) -> Option<&String> {
+        self.docs.as_ref()
+    }
+
+    /// Returns a reference to the import information for this module
+    pub fn import_info(&self) -> &ModuleImports {
+        &self.import_info
+    }
+
+    // STATE MUTATORS
+    // --------------------------------------------------------------------------------------------
+
+    /// Clears the source locations from this module.
+    pub fn clear_locations(&mut self) {
+        self.local_procs.iter_mut().for_each(|p| p.clear_locations())
+    }
+
+    // SERIALIZATION / DESERIALIZATION
+    // --------------------------------------------------------------------------------------------
+
+    /// Returns byte representation of this [ModuleAst].
+    ///
+    /// The serde options are NOT serialized - the caller must keep track of the serialization
+    /// options used.
+    pub fn write_into<R: ByteWriter>(&self, target: &mut R, options: AstSerdeOptions) {
+        // asserts below are OK because we enforce limits on the number of procedure and length of
+        // module docs in the module parser
+
+        // serialize docs
+        match &self.docs {
+            Some(docs) => {
+                assert!(docs.len() <= u16::MAX as usize, "docs too long");
+                target.write_u16(docs.len() as u16);
+                target.write_bytes(docs.as_bytes());
+            }
+            None => {
+                target.write_u16(0);
+            }
+        }
+
+        // serialize imports if required
+        if options.serialize_imports {
+            self.import_info.write_into(target);
+        }
+
+        // serialize procedures
+        assert!(self.local_procs.len() <= u16::MAX as usize, "too many local procs");
+        assert!(
+            self.reexported_procs.len() <= MAX_REEXPORTED_PROCS,
+            "too many re-exported procs"
+        );
+        target.write_u16((self.reexported_procs.len()) as u16);
+        target.write_many(&self.reexported_procs);
+        target.write_u16(self.local_procs.len() as u16);
+        target.write_many(&self.local_procs);
+    }
+
+    /// Returns a [ModuleAst] struct deserialized from the provided source.
+    ///
+    /// The serde options must correspond to the options used for serialization.
+    pub fn read_from<R: ByteReader>(
+        source: &mut R,
+        options: AstSerdeOptions,
+    ) -> Result<Self, DeserializationError> {
+        // deserialize docs
+        let docs_len = source.read_u16()? as usize;
+        let docs = if docs_len != 0 {
+            let str = source.read_vec(docs_len)?;
+            let str =
+                from_utf8(&str).map_err(|e| DeserializationError::InvalidValue(e.to_string()))?;
+            Some(str.to_string())
+        } else {
+            None
+        };
+
+        // deserialize imports if required
+        let import_info = if options.serialize_imports {
+            ModuleImports::read_from(source)?
+        } else {
+            ModuleImports::default()
+        };
+
+        // deserialize re-exports
+        let num_reexported_procs = source.read_u16()? as usize;
+        let reexported_procs = source.read_many::<ProcReExport>(num_reexported_procs)?;
+
+        // deserialize local procs
+        let num_local_procs = source.read_u16()? as usize;
+        let local_procs = source.read_many::<ProcedureAst>(num_local_procs)?;
+
+        match Self::new(local_procs, reexported_procs, docs) {
+            Err(err) => Err(DeserializationError::UnknownError(err.message().clone())),
+            Ok(res) => Ok(res.with_import_info(import_info)),
+        }
+    }
+
+    /// Returns byte representation of this [ModuleAst].
+    ///
+    /// The serde options are serialized as header information for the purposes of deserialization.
+    pub fn to_bytes(&self, options: AstSerdeOptions) -> Vec<u8> {
+        let mut target = Vec::<u8>::default();
+
+        // serialize the options, so that deserialization knows what to do
+        options.write_into(&mut target);
+
+        self.write_into(&mut target, options);
+        target
+    }
+
+    /// Returns a [ModuleAst] struct deserialized from the provided bytes.
+    ///
+    /// This function assumes that the byte array contains a serialized [AstSerdeOptions] struct as
+    /// a header.
+    pub fn from_bytes(bytes: &[u8]) -> Result<Self, DeserializationError> {
+        let mut source = SliceReader::new(bytes);
+
+        // Deserialize the serialization options used when serializing
+        let options = AstSerdeOptions::read_from(&mut source)?;
+
+        Self::read_from(&mut source, options)
+    }
+
+    /// Loads the [SourceLocation] of the procedures via [ProcedureAst::load_source_locations].
+    ///
+    /// The local procedures are expected to have deterministic order from parse. This way, the
+    /// serialization can be simplified into a contiguous sequence of locations.
+    pub fn load_source_locations<R: ByteReader>(
+        &mut self,
+        source: &mut R,
+    ) -> Result<(), DeserializationError> {
+        self.local_procs.iter_mut().try_for_each(|p| p.load_source_locations(source))
+    }
+
+    /// Writes the [SourceLocation] of the procedures via [ProcedureAst::write_source_locations].
+    ///
+    /// The local procedures are expected to have deterministic order from parse. This way, the
+    /// serialization can be simplified into a contiguous sequence of locations.
+    pub fn write_source_locations<W: ByteWriter>(&self, target: &mut W) {
+        self.local_procs.iter().for_each(|p| p.write_source_locations(target))
+    }
+
+    // DESTRUCTURING
+    // --------------------------------------------------------------------------------------------
+
+    /// Clear import info from the module
+    pub fn clear_imports(&mut self) {
+        self.import_info.clear();
+    }
+}
+
+impl fmt::Display for ModuleAst {
+    /// Writes this [ModuleAst] as formatted MASM code into the formatter.
+    ///
+    /// The formatted code puts each instruction on a separate line and preserves correct indentation
+    /// for instruction blocks.
+    ///
+    /// # Panics
+    /// Panics if import info is not associated with this module.
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // Docs
+        if let Some(ref doc) = self.docs {
+            writeln!(f, "#! {doc}")?;
+            writeln!(f)?;
+        }
+
+        // Imports
+        let paths = self.import_info.import_paths();
+        for path in paths.iter() {
+            writeln!(f, "use.{path}")?;
+        }
+        if !paths.is_empty() {
+            writeln!(f)?;
+        }
+
+        // Re-exports
+        for proc in self.reexported_procs.iter() {
+            writeln!(f, "export.{}", proc.name())?;
+            writeln!(f)?;
+        }
+
+        // Local procedures
+        let invoked_procs = self.import_info.invoked_procs();
+        let context = AstFormatterContext::new(&self.local_procs, invoked_procs);
+
+        for proc in self.local_procs.iter() {
+            writeln!(f, "{}", FormattableProcedureAst::new(proc, &context))?;
+        }
+        Ok(())
+    }
+}
diff --git a/assembly/src/ast/nodes/advice.rs b/assembly/src/ast/nodes/advice.rs
index d386377fec..2b0fdc8b86 100644
--- a/assembly/src/ast/nodes/advice.rs
+++ b/assembly/src/ast/nodes/advice.rs
@@ -1,10 +1,11 @@
 use super::{
     super::{
-        ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable, ToString,
+        ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable,
         MAX_STACK_WORD_OFFSET,
     },
     serde::signatures,
 };
+use crate::utils::string::*;
 use core::fmt;
 use vm_core::{AdviceInjector, Felt, SignatureKind, ZERO};
 
@@ -18,7 +19,7 @@ use vm_core::{AdviceInjector, Felt, SignatureKind, ZERO};
 /// - Insert new data into the advice map.
 #[derive(Clone, Copy, PartialEq, Eq, Debug)]
 pub enum AdviceInjectorNode {
-    PushU64div,
+    PushU64Div,
     PushExt2intt,
     PushSmtGet,
     PushSmtSet,
@@ -39,7 +40,7 @@ impl From<&AdviceInjectorNode> for AdviceInjector {
     fn from(value: &AdviceInjectorNode) -> Self {
         use AdviceInjectorNode::*;
         match value {
-            PushU64div => Self::DivU64,
+            PushU64Div => Self::U64Div,
             PushExt2intt => Self::Ext2Intt,
             PushSmtGet => Self::SmtGet,
             PushSmtSet => Self::SmtSet,
@@ -76,7 +77,7 @@ impl fmt::Display for AdviceInjectorNode {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         use AdviceInjectorNode::*;
         match self {
-            PushU64div => write!(f, "push_u64div"),
+            PushU64Div => write!(f, "push_u64div"),
             PushExt2intt => write!(f, "push_ext2intt"),
             PushSmtGet => write!(f, "push_smtget"),
             PushSmtSet => write!(f, "push_smtset"),
@@ -118,7 +119,7 @@ impl Serializable for AdviceInjectorNode {
     fn write_into<W: ByteWriter>(&self, target: &mut W) {
         use AdviceInjectorNode::*;
         match self {
-            PushU64div => target.write_u8(PUSH_U64DIV),
+            PushU64Div => target.write_u8(PUSH_U64DIV),
             PushExt2intt => target.write_u8(PUSH_EXT2INTT),
             PushSmtGet => target.write_u8(PUSH_SMTGET),
             PushSmtSet => target.write_u8(PUSH_SMTSET),
@@ -152,7 +153,7 @@ impl Serializable for AdviceInjectorNode {
 impl Deserializable for AdviceInjectorNode {
     fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
         match source.read_u8()? {
-            PUSH_U64DIV => Ok(AdviceInjectorNode::PushU64div),
+            PUSH_U64DIV => Ok(AdviceInjectorNode::PushU64Div),
             PUSH_EXT2INTT => Ok(AdviceInjectorNode::PushExt2intt),
             PUSH_SMTGET => Ok(AdviceInjectorNode::PushSmtGet),
             PUSH_SMTSET => Ok(AdviceInjectorNode::PushSmtSet),
diff --git a/assembly/src/ast/nodes/format.rs b/assembly/src/ast/nodes/format.rs
index 0136820fc6..ebbbb68f0a 100644
--- a/assembly/src/ast/nodes/format.rs
+++ b/assembly/src/ast/nodes/format.rs
@@ -120,6 +120,14 @@ impl fmt::Display for FormattableInstruction<'_> {
                 write!(f, "call.")?;
                 display_hex_bytes(f, &root.as_bytes())?;
             }
+            Instruction::ProcRefLocal(index) => {
+                let proc_name = self.context.local_proc(*index as usize);
+                write!(f, "procref.{proc_name}")?;
+            }
+            Instruction::ProcRefImported(proc_id) => {
+                let (_, path) = self.context.imported_proc(proc_id);
+                write!(f, "procref.{path}")?;
+            }
             _ => {
                 // Not a procedure call. Use the normal formatting
                 write!(f, "{}", self.instruction)?;
diff --git a/assembly/src/ast/nodes/mod.rs b/assembly/src/ast/nodes/mod.rs
index 127ae16e2b..f10feb087c 100644
--- a/assembly/src/ast/nodes/mod.rs
+++ b/assembly/src/ast/nodes/mod.rs
@@ -1,6 +1,5 @@
-use super::{
-    AstFormatterContext, CodeBody, Felt, FormattableCodeBody, ProcedureId, RpoDigest, ToString, Vec,
-};
+use super::{AstFormatterContext, CodeBody, Felt, FormattableCodeBody, ProcedureId, RpoDigest};
+use crate::utils::collections::*;
 use core::fmt;
 use vm_core::DebugOptions;
 
@@ -64,6 +63,7 @@ pub enum Instruction {
     Exp,
     ExpImm(Felt),
     ExpBitLength(u8),
+    ILog2,
     Not,
     And,
     Or,
@@ -98,78 +98,51 @@ pub enum Instruction {
     U32AssertWWithError(ErrorCode),
     U32Split,
     U32Cast,
-    U32CheckedAdd,
-    U32CheckedAddImm(u32),
     U32WrappingAdd,
     U32WrappingAddImm(u32),
     U32OverflowingAdd,
     U32OverflowingAddImm(u32),
     U32OverflowingAdd3,
     U32WrappingAdd3,
-    U32CheckedSub,
-    U32CheckedSubImm(u32),
     U32WrappingSub,
     U32WrappingSubImm(u32),
     U32OverflowingSub,
     U32OverflowingSubImm(u32),
-    U32CheckedMul,
-    U32CheckedMulImm(u32),
     U32WrappingMul,
     U32WrappingMulImm(u32),
     U32OverflowingMul,
     U32OverflowingMulImm(u32),
     U32OverflowingMadd,
     U32WrappingMadd,
-    U32CheckedDiv,
-    U32CheckedDivImm(u32),
-    U32UncheckedDiv,
-    U32UncheckedDivImm(u32),
-    U32CheckedMod,
-    U32CheckedModImm(u32),
-    U32UncheckedMod,
-    U32UncheckedModImm(u32),
-    U32CheckedDivMod,
-    U32CheckedDivModImm(u32),
-    U32UncheckedDivMod,
-    U32UncheckedDivModImm(u32),
-    U32CheckedAnd,
-    U32CheckedOr,
-    U32CheckedXor,
-    U32CheckedNot,
-    U32CheckedShr,
-    U32CheckedShrImm(u8),
-    U32UncheckedShr,
-    U32UncheckedShrImm(u8),
-    U32CheckedShl,
-    U32CheckedShlImm(u8),
-    U32UncheckedShl,
-    U32UncheckedShlImm(u8),
-    U32CheckedRotr,
-    U32CheckedRotrImm(u8),
-    U32UncheckedRotr,
-    U32UncheckedRotrImm(u8),
-    U32CheckedRotl,
-    U32CheckedRotlImm(u8),
-    U32UncheckedRotl,
-    U32UncheckedRotlImm(u8),
-    U32CheckedPopcnt,
-    U32UncheckedPopcnt,
-    U32CheckedEq,
-    U32CheckedEqImm(u32),
-    U32CheckedNeq,
-    U32CheckedNeqImm(u32),
-    U32CheckedLt,
-    U32UncheckedLt,
-    U32CheckedLte,
-    U32UncheckedLte,
-    U32CheckedGt,
-    U32UncheckedGt,
-    U32CheckedGte,
-    U32UncheckedGte,
-    U32CheckedMin,
-    U32UncheckedMin,
-    U32CheckedMax,
-    U32UncheckedMax,
+    U32Div,
+    U32DivImm(u32),
+    U32Mod,
+    U32ModImm(u32),
+    U32DivMod,
+    U32DivModImm(u32),
+    U32And,
+    U32Or,
+    U32Xor,
+    U32Not,
+    U32Shr,
+    U32ShrImm(u8),
+    U32Shl,
+    U32ShlImm(u8),
+    U32Rotr,
+    U32RotrImm(u8),
+    U32Rotl,
+    U32RotlImm(u8),
+    U32Popcnt,
+    U32Clz,
+    U32Ctz,
+    U32Clo,
+    U32Cto,
+    U32Lt,
+    U32Lte,
+    U32Gt,
+    U32Gte,
+    U32Min,
+    U32Max,
 
     // ----- stack manipulation -------------------------------------------------------------------
     Drop,
@@ -299,6 +272,7 @@ pub enum Instruction {
 
     // ----- STARK proof verification -------------------------------------------------------------
     FriExt2Fold4,
+    RCombBase,
 
     // ----- exec / call --------------------------------------------------------------------------
     ExecLocal(u16),
@@ -309,10 +283,16 @@ pub enum Instruction {
     SysCall(ProcedureId),
     DynExec,
     DynCall,
+    ProcRefLocal(u16),
+    ProcRefImported(ProcedureId),
 
     // ----- debug decorators ---------------------------------------------------------------------
     Breakpoint,
     Debug(DebugOptions),
+
+    // ----- event decorators ---------------------------------------------------------------------
+    Emit(u32),
+    Trace(u32),
 }
 
 impl Instruction {
@@ -348,6 +328,7 @@ impl fmt::Display for Instruction {
             Self::Exp => write!(f, "exp"),
             Self::ExpImm(value) => write!(f, "exp.{value}"),
             Self::ExpBitLength(value) => write!(f, "exp.u{value}"),
+            Self::ILog2 => write!(f, "ilog2"),
             Self::Not => write!(f, "not"),
             Self::And => write!(f, "and"),
             Self::Or => write!(f, "or"),
@@ -382,78 +363,51 @@ impl fmt::Display for Instruction {
             Self::U32AssertWWithError(err_code) => write!(f, "u32assertw.err={err_code}"),
             Self::U32Split => write!(f, "u32split"),
             Self::U32Cast => write!(f, "u32cast"),
-            Self::U32CheckedAdd => write!(f, "u32checked_add"),
-            Self::U32CheckedAddImm(value) => write!(f, "u32checked_add.{value}"),
             Self::U32WrappingAdd => write!(f, "u32wrapping_add"),
             Self::U32WrappingAddImm(value) => write!(f, "u32wrapping_add.{value}"),
             Self::U32OverflowingAdd => write!(f, "u32overflowing_add"),
             Self::U32OverflowingAddImm(value) => write!(f, "u32overflowing_add.{value}"),
             Self::U32OverflowingAdd3 => write!(f, "u32overflowing_add3"),
             Self::U32WrappingAdd3 => write!(f, "u32wrapping_add3"),
-            Self::U32CheckedSub => write!(f, "u32checked_sub"),
-            Self::U32CheckedSubImm(value) => write!(f, "u32checked_sub.{value}"),
             Self::U32WrappingSub => write!(f, "u32wrapping_sub"),
             Self::U32WrappingSubImm(value) => write!(f, "u32wrapping_sub.{value}"),
             Self::U32OverflowingSub => write!(f, "u32overflowing_sub"),
             Self::U32OverflowingSubImm(value) => write!(f, "u32overflowing_sub.{value}"),
-            Self::U32CheckedMul => write!(f, "u32checked_mul"),
-            Self::U32CheckedMulImm(value) => write!(f, "u32checked_mul.{value}"),
             Self::U32WrappingMul => write!(f, "u32wrapping_mul"),
             Self::U32WrappingMulImm(value) => write!(f, "u32wrapping_mul.{value}"),
             Self::U32OverflowingMul => write!(f, "u32overflowing_mul"),
             Self::U32OverflowingMulImm(value) => write!(f, "u32overflowing_mul.{value}"),
             Self::U32OverflowingMadd => write!(f, "u32overflowing_madd"),
             Self::U32WrappingMadd => write!(f, "u32wrapping_madd"),
-            Self::U32CheckedDiv => write!(f, "u32checked_div"),
-            Self::U32CheckedDivImm(value) => write!(f, "u32checked_div.{value}"),
-            Self::U32UncheckedDiv => write!(f, "u32unchecked_div"),
-            Self::U32UncheckedDivImm(value) => write!(f, "u32unchecked_div.{value}"),
-            Self::U32CheckedMod => write!(f, "u32checked_mod"),
-            Self::U32CheckedModImm(value) => write!(f, "u32checked_mod.{value}"),
-            Self::U32UncheckedMod => write!(f, "u32unchecked_mod"),
-            Self::U32UncheckedModImm(value) => write!(f, "u32unchecked_mod.{value}"),
-            Self::U32CheckedDivMod => write!(f, "u32checked_divmod"),
-            Self::U32CheckedDivModImm(value) => write!(f, "u32checked_divmod.{value}"),
-            Self::U32UncheckedDivMod => write!(f, "u32unchecked_divmod"),
-            Self::U32UncheckedDivModImm(value) => write!(f, "u32unchecked_divmod.{value}"),
-            Self::U32CheckedAnd => write!(f, "u32checked_and"),
-            Self::U32CheckedOr => write!(f, "u32checked_or"),
-            Self::U32CheckedXor => write!(f, "u32checked_xor"),
-            Self::U32CheckedNot => write!(f, "u32checked_not"),
-            Self::U32CheckedShr => write!(f, "u32checked_shr"),
-            Self::U32CheckedShrImm(value) => write!(f, "u32checked_shr.{value}"),
-            Self::U32UncheckedShr => write!(f, "u32unchecked_shr"),
-            Self::U32UncheckedShrImm(value) => write!(f, "u32unchecked_shr.{value}"),
-            Self::U32CheckedShl => write!(f, "u32checked_shl"),
-            Self::U32CheckedShlImm(value) => write!(f, "u32checked_shl.{value}"),
-            Self::U32UncheckedShl => write!(f, "u32unchecked_shl"),
-            Self::U32UncheckedShlImm(value) => write!(f, "u32unchecked_shl.{value}"),
-            Self::U32CheckedRotr => write!(f, "u32checked_rotr"),
-            Self::U32CheckedRotrImm(value) => write!(f, "u32checked_rotr.{value}"),
-            Self::U32UncheckedRotr => write!(f, "u32unchecked_rotr"),
-            Self::U32UncheckedRotrImm(value) => write!(f, "u32unchecked_rotr.{value}"),
-            Self::U32CheckedRotl => write!(f, "u32checked_rotl"),
-            Self::U32CheckedRotlImm(value) => write!(f, "u32checked_rotl.{value}"),
-            Self::U32UncheckedRotl => write!(f, "u32unchecked_rotl"),
-            Self::U32UncheckedRotlImm(value) => write!(f, "u32unchecked_rotl.{value}"),
-            Self::U32CheckedPopcnt => write!(f, "u32checked_popcnt"),
-            Self::U32UncheckedPopcnt => write!(f, "u32unchecked_popcnt"),
-            Self::U32CheckedEq => write!(f, "u32checked_eq"),
-            Self::U32CheckedEqImm(value) => write!(f, "u32checked_eq.{value}"),
-            Self::U32CheckedNeq => write!(f, "u32checked_neq"),
-            Self::U32CheckedNeqImm(value) => write!(f, "u32checked_neq.{value}"),
-            Self::U32CheckedLt => write!(f, "u32checked_lt"),
-            Self::U32UncheckedLt => write!(f, "u32unchecked_lt"),
-            Self::U32CheckedLte => write!(f, "u32checked_lte"),
-            Self::U32UncheckedLte => write!(f, "u32unchecked_lte"),
-            Self::U32CheckedGt => write!(f, "u32checked_gt"),
-            Self::U32UncheckedGt => write!(f, "u32unchecked_gt"),
-            Self::U32CheckedGte => write!(f, "u32checked_gte"),
-            Self::U32UncheckedGte => write!(f, "u32unchecked_gte"),
-            Self::U32CheckedMin => write!(f, "u32checked_min"),
-            Self::U32UncheckedMin => write!(f, "u32unchecked_min"),
-            Self::U32CheckedMax => write!(f, "u32checked_max"),
-            Self::U32UncheckedMax => write!(f, "u32unchecked_max"),
+            Self::U32Div => write!(f, "u32div"),
+            Self::U32DivImm(value) => write!(f, "u32div.{value}"),
+            Self::U32Mod => write!(f, "u32mod"),
+            Self::U32ModImm(value) => write!(f, "u32mod.{value}"),
+            Self::U32DivMod => write!(f, "u32divmod"),
+            Self::U32DivModImm(value) => write!(f, "u32divmod.{value}"),
+            Self::U32And => write!(f, "u32and"),
+            Self::U32Or => write!(f, "u32or"),
+            Self::U32Xor => write!(f, "u32xor"),
+            Self::U32Not => write!(f, "u32not"),
+            Self::U32Shr => write!(f, "u32shr"),
+            Self::U32ShrImm(value) => write!(f, "u32shr.{value}"),
+            Self::U32Shl => write!(f, "u32shl"),
+            Self::U32ShlImm(value) => write!(f, "u32shl.{value}"),
+            Self::U32Rotr => write!(f, "u32rotr"),
+            Self::U32RotrImm(value) => write!(f, "u32rotr.{value}"),
+            Self::U32Rotl => write!(f, "u32rotl"),
+            Self::U32RotlImm(value) => write!(f, "u32rotl.{value}"),
+            Self::U32Popcnt => write!(f, "u32popcnt"),
+            Self::U32Clz => write!(f, "u32clz"),
+            Self::U32Ctz => write!(f, "u32ctz"),
+            Self::U32Clo => write!(f, "u32clo"),
+            Self::U32Cto => write!(f, "u32cto"),
+            Self::U32Lt => write!(f, "u32lt"),
+            Self::U32Lte => write!(f, "u32lte"),
+            Self::U32Gt => write!(f, "u32gt"),
+            Self::U32Gte => write!(f, "u32gte"),
+            Self::U32Min => write!(f, "u32min"),
+            Self::U32Max => write!(f, "u32max"),
 
             // ----- stack manipulation ---------------------------------------------------------------
             Self::Drop => write!(f, "drop"),
@@ -581,7 +535,10 @@ impl fmt::Display for Instruction {
             Self::MTreeSet => write!(f, "mtree_set"),
             Self::MTreeMerge => write!(f, "mtree_merge"),
             Self::MTreeVerify => write!(f, "mtree_verify"),
+
+            // ----- STARK proof verification -----------------------------------------------------
             Self::FriExt2Fold4 => write!(f, "fri_ext2fold4"),
+            Self::RCombBase => write!(f, "rcomb_base"),
 
             // ----- exec / call ------------------------------------------------------------------
             Self::ExecLocal(index) => write!(f, "exec.{index}"),
@@ -595,10 +552,16 @@ impl fmt::Display for Instruction {
             Self::SysCall(proc_id) => write!(f, "syscall.{proc_id}"),
             Self::DynExec => write!(f, "dynexec"),
             Self::DynCall => write!(f, "dyncall"),
+            Self::ProcRefLocal(index) => write!(f, "procref.{index}"),
+            Self::ProcRefImported(proc_id) => write!(f, "procref.{proc_id}"),
 
             // ----- debug decorators -------------------------------------------------------------
             Self::Breakpoint => write!(f, "breakpoint"),
             Self::Debug(options) => write!(f, "debug.{options}"),
+
+            // ----- event decorators -------------------------------------------------------------
+            Self::Emit(value) => write!(f, "emit.{value}"),
+            Self::Trace(value) => write!(f, "trace.{value}"),
         }
     }
 }
diff --git a/assembly/src/ast/nodes/serde/debug.rs b/assembly/src/ast/nodes/serde/debug.rs
index 18bcb6a6a7..3b8b30b0b5 100644
--- a/assembly/src/ast/nodes/serde/debug.rs
+++ b/assembly/src/ast/nodes/serde/debug.rs
@@ -1,7 +1,11 @@
-use super::{super::DebugOptions, ByteReader, ByteWriter, DeserializationError, ToString};
+use super::{super::DebugOptions, ByteReader, ByteWriter, DeserializationError};
+use crate::utils::string::*;
 
 const STACK_ALL: u8 = 0;
 const STACK_TOP: u8 = 1;
+const MEM_ALL: u8 = 2;
+const MEM_INTERVAL: u8 = 3;
+const LOCAL_INTERVAL: u8 = 4;
 
 /// Writes the provided [DebugOptions] into the provided target.
 pub fn write_options_into<W: ByteWriter>(target: &mut W, options: &DebugOptions) {
@@ -11,6 +15,18 @@ pub fn write_options_into<W: ByteWriter>(target: &mut W, options: &DebugOptions)
             target.write_u8(STACK_TOP);
             target.write_u16(*n);
         }
+        DebugOptions::MemAll => target.write_u8(MEM_ALL),
+        DebugOptions::MemInterval(n, m) => {
+            target.write_u8(MEM_INTERVAL);
+            target.write_u32(*n);
+            target.write_u32(*m);
+        }
+        DebugOptions::LocalInterval(start, end, num_locals) => {
+            target.write_u8(LOCAL_INTERVAL);
+            target.write_u16(*start);
+            target.write_u16(*end);
+            target.write_u16(*num_locals);
+        }
     }
 }
 
@@ -27,6 +43,18 @@ pub fn read_options_from<R: ByteReader>(
             }
             Ok(DebugOptions::StackTop(n))
         }
+        MEM_ALL => Ok(DebugOptions::MemAll),
+        MEM_INTERVAL => {
+            let n = source.read_u32()?;
+            let m = source.read_u32()?;
+            Ok(DebugOptions::MemInterval(n, m))
+        }
+        LOCAL_INTERVAL => {
+            let n = source.read_u16()?;
+            let m = source.read_u16()?;
+            let num_locals = source.read_u16()?;
+            Ok(DebugOptions::LocalInterval(n, m, num_locals))
+        }
         val => Err(DeserializationError::InvalidValue(val.to_string())),
     }
 }
diff --git a/assembly/src/ast/nodes/serde/deserialization.rs b/assembly/src/ast/nodes/serde/deserialization.rs
index c6a4209d70..7a86a5caf6 100644
--- a/assembly/src/ast/nodes/serde/deserialization.rs
+++ b/assembly/src/ast/nodes/serde/deserialization.rs
@@ -1,7 +1,8 @@
 use super::{
     super::AdviceInjectorNode, debug, ByteReader, CodeBody, Deserializable, DeserializationError,
-    Felt, Instruction, Node, OpCode, ProcedureId, RpoDigest, ToString, MAX_PUSH_INPUTS,
+    Felt, Instruction, Node, OpCode, ProcedureId, RpoDigest, MAX_PUSH_INPUTS,
 };
+use crate::utils::string::*;
 
 // NODE DESERIALIZATION
 // ================================================================================================
@@ -14,11 +15,11 @@ impl Deserializable for Node {
             source.read_u8()?;
 
             let if_block_len = source.read_u16()? as usize;
-            let nodes = Deserializable::read_batch_from(source, if_block_len)?;
+            let nodes = source.read_many::<Node>(if_block_len)?;
             let true_case = CodeBody::new(nodes);
 
             let else_block_len = source.read_u16()? as usize;
-            let nodes = Deserializable::read_batch_from(source, else_block_len)?;
+            let nodes = source.read_many::<Node>(else_block_len)?;
             let false_case = CodeBody::new(nodes);
 
             Ok(Node::IfElse {
@@ -31,7 +32,7 @@ impl Deserializable for Node {
             let times = source.read_u32()?;
 
             let nodes_len = source.read_u16()? as usize;
-            let nodes = Deserializable::read_batch_from(source, nodes_len)?;
+            let nodes = source.read_many::<Node>(nodes_len)?;
             let body = CodeBody::new(nodes);
 
             Ok(Node::Repeat { times, body })
@@ -39,7 +40,7 @@ impl Deserializable for Node {
             source.read_u8()?;
 
             let nodes_len = source.read_u16()? as usize;
-            let nodes = Deserializable::read_batch_from(source, nodes_len)?;
+            let nodes = source.read_many::<Node>(nodes_len)?;
             let body = CodeBody::new(nodes);
 
             Ok(Node::While { body })
@@ -81,6 +82,7 @@ impl Deserializable for Instruction {
             OpCode::Exp => Ok(Instruction::Exp),
             OpCode::ExpImm => Ok(Instruction::ExpImm(Felt::read_from(source)?)),
             OpCode::ExpBitLength => Ok(Instruction::ExpBitLength(source.read_u8()?)),
+            OpCode::ILog2 => Ok(Instruction::ILog2),
             OpCode::Not => Ok(Instruction::Not),
             OpCode::And => Ok(Instruction::And),
             OpCode::Or => Ok(Instruction::Or),
@@ -115,8 +117,6 @@ impl Deserializable for Instruction {
             OpCode::U32AssertWWithError => Ok(Instruction::U32AssertWWithError(source.read_u32()?)),
             OpCode::U32Split => Ok(Instruction::U32Split),
             OpCode::U32Cast => Ok(Instruction::U32Cast),
-            OpCode::U32CheckedAdd => Ok(Instruction::U32CheckedAdd),
-            OpCode::U32CheckedAddImm => Ok(Instruction::U32CheckedAddImm(source.read_u32()?)),
             OpCode::U32WrappingAdd => Ok(Instruction::U32WrappingAdd),
             OpCode::U32WrappingAddImm => Ok(Instruction::U32WrappingAddImm(source.read_u32()?)),
             OpCode::U32OverflowingAdd => Ok(Instruction::U32OverflowingAdd),
@@ -125,16 +125,12 @@ impl Deserializable for Instruction {
             }
             OpCode::U32OverflowingAdd3 => Ok(Instruction::U32OverflowingAdd3),
             OpCode::U32WrappingAdd3 => Ok(Instruction::U32WrappingAdd3),
-            OpCode::U32CheckedSub => Ok(Instruction::U32CheckedSub),
-            OpCode::U32CheckedSubImm => Ok(Instruction::U32CheckedSubImm(source.read_u32()?)),
             OpCode::U32WrappingSub => Ok(Instruction::U32WrappingSub),
             OpCode::U32WrappingSubImm => Ok(Instruction::U32WrappingSubImm(source.read_u32()?)),
             OpCode::U32OverflowingSub => Ok(Instruction::U32OverflowingSub),
             OpCode::U32OverflowingSubImm => {
                 Ok(Instruction::U32OverflowingSubImm(source.read_u32()?))
             }
-            OpCode::U32CheckedMul => Ok(Instruction::U32CheckedMul),
-            OpCode::U32CheckedMulImm => Ok(Instruction::U32CheckedMulImm(source.read_u32()?)),
             OpCode::U32WrappingMul => Ok(Instruction::U32WrappingMul),
             OpCode::U32WrappingMulImm => Ok(Instruction::U32WrappingMulImm(source.read_u32()?)),
             OpCode::U32OverflowingMul => Ok(Instruction::U32OverflowingMul),
@@ -143,58 +139,35 @@ impl Deserializable for Instruction {
             }
             OpCode::U32OverflowingMadd => Ok(Instruction::U32OverflowingMadd),
             OpCode::U32WrappingMadd => Ok(Instruction::U32WrappingMadd),
-            OpCode::U32CheckedDiv => Ok(Instruction::U32CheckedDiv),
-            OpCode::U32CheckedDivImm => Ok(Instruction::U32CheckedDivImm(source.read_u32()?)),
-            OpCode::U32UncheckedDiv => Ok(Instruction::U32UncheckedDiv),
-            OpCode::U32UncheckedDivImm => Ok(Instruction::U32UncheckedDivImm(source.read_u32()?)),
-            OpCode::U32CheckedMod => Ok(Instruction::U32CheckedMod),
-            OpCode::U32CheckedModImm => Ok(Instruction::U32CheckedModImm(source.read_u32()?)),
-            OpCode::U32UncheckedMod => Ok(Instruction::U32UncheckedMod),
-            OpCode::U32UncheckedModImm => Ok(Instruction::U32UncheckedModImm(source.read_u32()?)),
-            OpCode::U32CheckedDivMod => Ok(Instruction::U32CheckedDivMod),
-            OpCode::U32CheckedDivModImm => Ok(Instruction::U32CheckedDivModImm(source.read_u32()?)),
-            OpCode::U32UncheckedDivMod => Ok(Instruction::U32UncheckedDivMod),
-            OpCode::U32UncheckedDivModImm => {
-                Ok(Instruction::U32UncheckedDivModImm(source.read_u32()?))
-            }
-            OpCode::U32CheckedAnd => Ok(Instruction::U32CheckedAnd),
-            OpCode::U32CheckedOr => Ok(Instruction::U32CheckedOr),
-            OpCode::U32CheckedXor => Ok(Instruction::U32CheckedXor),
-            OpCode::U32CheckedNot => Ok(Instruction::U32CheckedNot),
-            OpCode::U32CheckedShr => Ok(Instruction::U32CheckedShr),
-            OpCode::U32CheckedShrImm => Ok(Instruction::U32CheckedShrImm(source.read_u8()?)),
-            OpCode::U32UncheckedShr => Ok(Instruction::U32UncheckedShr),
-            OpCode::U32UncheckedShrImm => Ok(Instruction::U32UncheckedShrImm(source.read_u8()?)),
-            OpCode::U32CheckedShl => Ok(Instruction::U32CheckedShl),
-            OpCode::U32CheckedShlImm => Ok(Instruction::U32CheckedShlImm(source.read_u8()?)),
-            OpCode::U32UncheckedShl => Ok(Instruction::U32UncheckedShl),
-            OpCode::U32UncheckedShlImm => Ok(Instruction::U32UncheckedShlImm(source.read_u8()?)),
-            OpCode::U32CheckedRotr => Ok(Instruction::U32CheckedRotr),
-            OpCode::U32CheckedRotrImm => Ok(Instruction::U32CheckedRotrImm(source.read_u8()?)),
-            OpCode::U32UncheckedRotr => Ok(Instruction::U32UncheckedRotr),
-            OpCode::U32UncheckedRotrImm => Ok(Instruction::U32UncheckedRotrImm(source.read_u8()?)),
-            OpCode::U32CheckedRotl => Ok(Instruction::U32CheckedRotl),
-            OpCode::U32CheckedRotlImm => Ok(Instruction::U32CheckedRotlImm(source.read_u8()?)),
-            OpCode::U32UncheckedRotl => Ok(Instruction::U32UncheckedRotl),
-            OpCode::U32UncheckedRotlImm => Ok(Instruction::U32UncheckedRotlImm(source.read_u8()?)),
-            OpCode::U32CheckedPopcnt => Ok(Instruction::U32CheckedPopcnt),
-            OpCode::U32UncheckedPopcnt => Ok(Instruction::U32UncheckedPopcnt),
-            OpCode::U32CheckedEq => Ok(Instruction::U32CheckedEq),
-            OpCode::U32CheckedEqImm => Ok(Instruction::U32CheckedEqImm(source.read_u32()?)),
-            OpCode::U32CheckedNeq => Ok(Instruction::U32CheckedNeq),
-            OpCode::U32CheckedNeqImm => Ok(Instruction::U32CheckedNeqImm(source.read_u32()?)),
-            OpCode::U32CheckedLt => Ok(Instruction::U32CheckedLt),
-            OpCode::U32UncheckedLt => Ok(Instruction::U32UncheckedLt),
-            OpCode::U32CheckedLte => Ok(Instruction::U32CheckedLte),
-            OpCode::U32UncheckedLte => Ok(Instruction::U32UncheckedLte),
-            OpCode::U32CheckedGt => Ok(Instruction::U32CheckedGt),
-            OpCode::U32UncheckedGt => Ok(Instruction::U32UncheckedGt),
-            OpCode::U32CheckedGte => Ok(Instruction::U32CheckedGte),
-            OpCode::U32UncheckedGte => Ok(Instruction::U32UncheckedGte),
-            OpCode::U32CheckedMin => Ok(Instruction::U32CheckedMin),
-            OpCode::U32UncheckedMin => Ok(Instruction::U32UncheckedMin),
-            OpCode::U32CheckedMax => Ok(Instruction::U32CheckedMax),
-            OpCode::U32UncheckedMax => Ok(Instruction::U32UncheckedMax),
+            OpCode::U32Div => Ok(Instruction::U32Div),
+            OpCode::U32DivImm => Ok(Instruction::U32DivImm(source.read_u32()?)),
+            OpCode::U32Mod => Ok(Instruction::U32Mod),
+            OpCode::U32ModImm => Ok(Instruction::U32ModImm(source.read_u32()?)),
+            OpCode::U32DivMod => Ok(Instruction::U32DivMod),
+            OpCode::U32DivModImm => Ok(Instruction::U32DivModImm(source.read_u32()?)),
+            OpCode::U32And => Ok(Instruction::U32And),
+            OpCode::U32Or => Ok(Instruction::U32Or),
+            OpCode::U32Xor => Ok(Instruction::U32Xor),
+            OpCode::U32Not => Ok(Instruction::U32Not),
+            OpCode::U32Shr => Ok(Instruction::U32Shr),
+            OpCode::U32ShrImm => Ok(Instruction::U32ShrImm(source.read_u8()?)),
+            OpCode::U32Shl => Ok(Instruction::U32Shl),
+            OpCode::U32ShlImm => Ok(Instruction::U32ShlImm(source.read_u8()?)),
+            OpCode::U32Rotr => Ok(Instruction::U32Rotr),
+            OpCode::U32RotrImm => Ok(Instruction::U32RotrImm(source.read_u8()?)),
+            OpCode::U32Rotl => Ok(Instruction::U32Rotl),
+            OpCode::U32RotlImm => Ok(Instruction::U32RotlImm(source.read_u8()?)),
+            OpCode::U32Popcnt => Ok(Instruction::U32Popcnt),
+            OpCode::U32Clz => Ok(Instruction::U32Clz),
+            OpCode::U32Ctz => Ok(Instruction::U32Ctz),
+            OpCode::U32Clo => Ok(Instruction::U32Clo),
+            OpCode::U32Cto => Ok(Instruction::U32Cto),
+            OpCode::U32Lt => Ok(Instruction::U32Lt),
+            OpCode::U32Lte => Ok(Instruction::U32Lte),
+            OpCode::U32Gt => Ok(Instruction::U32Gt),
+            OpCode::U32Gte => Ok(Instruction::U32Gte),
+            OpCode::U32Min => Ok(Instruction::U32Min),
+            OpCode::U32Max => Ok(Instruction::U32Max),
 
             // ----- stack manipulation -----------------------------------------------------------
             OpCode::Drop => Ok(Instruction::Drop),
@@ -353,6 +326,7 @@ impl Deserializable for Instruction {
 
             // ----- STARK proof verification -----------------------------------------------------
             OpCode::FriExt2Fold4 => Ok(Instruction::FriExt2Fold4),
+            OpCode::RCombBase => Ok(Instruction::RCombBase),
 
             // ----- exec / call ------------------------------------------------------------------
             OpCode::ExecLocal => Ok(Instruction::ExecLocal(source.read_u16()?)),
@@ -363,6 +337,10 @@ impl Deserializable for Instruction {
             OpCode::SysCall => Ok(Instruction::SysCall(ProcedureId::read_from(source)?)),
             OpCode::DynExec => Ok(Instruction::DynExec),
             OpCode::DynCall => Ok(Instruction::DynCall),
+            OpCode::ProcRefLocal => Ok(Instruction::ProcRefLocal(source.read_u16()?)),
+            OpCode::ProcRefImported => {
+                Ok(Instruction::ProcRefImported(ProcedureId::read_from(source)?))
+            }
 
             // ----- debugging --------------------------------------------------------------------
             OpCode::Debug => {
@@ -370,6 +348,10 @@ impl Deserializable for Instruction {
                 Ok(Instruction::Debug(options))
             }
 
+            // ----- event decorators -------------------------------------------------------------
+            OpCode::Emit => Ok(Instruction::Emit(source.read_u32()?)),
+            OpCode::Trace => Ok(Instruction::Trace(source.read_u32()?)),
+
             // ----- control flow -----------------------------------------------------------------
             // control flow instructions should be parsed as a part of Node::read_from() and we
             // should never get here
diff --git a/assembly/src/ast/nodes/serde/mod.rs b/assembly/src/ast/nodes/serde/mod.rs
index b901e75f4d..a81ddaaa7a 100644
--- a/assembly/src/ast/nodes/serde/mod.rs
+++ b/assembly/src/ast/nodes/serde/mod.rs
@@ -1,4 +1,5 @@
-use super::{CodeBody, Felt, Instruction, Node, ProcedureId, RpoDigest, ToString};
+use super::{CodeBody, Felt, Instruction, Node, ProcedureId, RpoDigest};
+use crate::utils::string::*;
 use crate::MAX_PUSH_INPUTS;
 use num_enum::TryFromPrimitive;
 use vm_core::utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable};
@@ -37,254 +38,235 @@ pub enum OpCode {
     Exp = 20,
     ExpImm = 21,
     ExpBitLength = 22,
-    Not = 23,
-    And = 24,
-    Or = 25,
-    Xor = 26,
-    Eq = 27,
-    EqImm = 28,
-    Neq = 29,
-    NeqImm = 30,
-    Eqw = 31,
-    Lt = 32,
-    Lte = 33,
-    Gt = 34,
-    Gte = 35,
-    IsOdd = 36,
+    ILog2 = 23,
+    Not = 24,
+    And = 25,
+    Or = 26,
+    Xor = 27,
+    Eq = 28,
+    EqImm = 29,
+    Neq = 30,
+    NeqImm = 31,
+    Eqw = 32,
+    Lt = 33,
+    Lte = 34,
+    Gt = 35,
+    Gte = 36,
+    IsOdd = 37,
 
     // ----- ext2 operations ----------------------------------------------------------------------
-    Ext2Add = 37,
-    Ext2Sub = 38,
-    Ext2Mul = 39,
-    Ext2Div = 40,
-    Ext2Neg = 41,
-    Ext2Inv = 42,
+    Ext2Add = 38,
+    Ext2Sub = 39,
+    Ext2Mul = 40,
+    Ext2Div = 41,
+    Ext2Neg = 42,
+    Ext2Inv = 43,
 
     // ----- u32 manipulation ---------------------------------------------------------------------
-    U32Test = 43,
-    U32TestW = 44,
-    U32Assert = 45,
-    U32AssertWithError = 46,
-    U32Assert2 = 47,
-    U32Assert2WithError = 48,
-    U32AssertW = 49,
-    U32AssertWWithError = 50,
-    U32Split = 51,
-    U32Cast = 52,
-    U32CheckedAdd = 53,
-    U32CheckedAddImm = 54,
-    U32WrappingAdd = 55,
-    U32WrappingAddImm = 56,
-    U32OverflowingAdd = 57,
-    U32OverflowingAddImm = 58,
-    U32OverflowingAdd3 = 59,
-    U32WrappingAdd3 = 60,
-    U32CheckedSub = 61,
-    U32CheckedSubImm = 62,
-    U32WrappingSub = 63,
-    U32WrappingSubImm = 64,
-    U32OverflowingSub = 65,
-    U32OverflowingSubImm = 66,
-    U32CheckedMul = 67,
-    U32CheckedMulImm = 68,
-    U32WrappingMul = 69,
-    U32WrappingMulImm = 70,
-    U32OverflowingMul = 71,
-    U32OverflowingMulImm = 72,
-    U32OverflowingMadd = 73,
-    U32WrappingMadd = 74,
-    U32CheckedDiv = 75,
-    U32CheckedDivImm = 76,
-    U32UncheckedDiv = 77,
-    U32UncheckedDivImm = 78,
-    U32CheckedMod = 79,
-    U32CheckedModImm = 80,
-    U32UncheckedMod = 81,
-    U32UncheckedModImm = 82,
-    U32CheckedDivMod = 83,
-    U32CheckedDivModImm = 84,
-    U32UncheckedDivMod = 85,
-    U32UncheckedDivModImm = 86,
-    U32CheckedAnd = 87,
-    U32CheckedOr = 88,
-    U32CheckedXor = 89,
-    U32CheckedNot = 90,
-    U32CheckedShr = 91,
-    U32CheckedShrImm = 92,
-    U32UncheckedShr = 93,
-    U32UncheckedShrImm = 94,
-    U32CheckedShl = 95,
-    U32CheckedShlImm = 96,
-    U32UncheckedShl = 97,
-    U32UncheckedShlImm = 98,
-    U32CheckedRotr = 99,
-    U32CheckedRotrImm = 100,
-    U32UncheckedRotr = 101,
-    U32UncheckedRotrImm = 102,
-    U32CheckedRotl = 103,
-    U32CheckedRotlImm = 104,
-    U32UncheckedRotl = 105,
-    U32UncheckedRotlImm = 106,
-    U32CheckedPopcnt = 107,
-    U32UncheckedPopcnt = 108,
-    U32CheckedEq = 109,
-    U32CheckedEqImm = 110,
-    U32CheckedNeq = 111,
-    U32CheckedNeqImm = 112,
-    U32CheckedLt = 113,
-    U32UncheckedLt = 114,
-    U32CheckedLte = 115,
-    U32UncheckedLte = 116,
-    U32CheckedGt = 117,
-    U32UncheckedGt = 118,
-    U32CheckedGte = 119,
-    U32UncheckedGte = 120,
-    U32CheckedMin = 121,
-    U32UncheckedMin = 122,
-    U32CheckedMax = 123,
-    U32UncheckedMax = 124,
+    U32Test = 44,
+    U32TestW = 45,
+    U32Assert = 46,
+    U32AssertWithError = 47,
+    U32Assert2 = 48,
+    U32Assert2WithError = 49,
+    U32AssertW = 50,
+    U32AssertWWithError = 51,
+    U32Split = 52,
+    U32Cast = 53,
+    U32WrappingAdd = 54,
+    U32WrappingAddImm = 55,
+    U32OverflowingAdd = 56,
+    U32OverflowingAddImm = 57,
+    U32OverflowingAdd3 = 58,
+    U32WrappingAdd3 = 59,
+    U32WrappingSub = 60,
+    U32WrappingSubImm = 61,
+    U32OverflowingSub = 62,
+    U32OverflowingSubImm = 63,
+    U32WrappingMul = 64,
+    U32WrappingMulImm = 65,
+    U32OverflowingMul = 66,
+    U32OverflowingMulImm = 67,
+    U32OverflowingMadd = 68,
+    U32WrappingMadd = 69,
+    U32Div = 70,
+    U32DivImm = 71,
+    U32Mod = 72,
+    U32ModImm = 73,
+    U32DivMod = 74,
+    U32DivModImm = 75,
+    U32And = 76,
+    U32Or = 77,
+    U32Xor = 78,
+    U32Not = 79,
+    U32Shr = 80,
+    U32ShrImm = 81,
+    U32Shl = 82,
+    U32ShlImm = 83,
+    U32Rotr = 84,
+    U32RotrImm = 85,
+    U32Rotl = 86,
+    U32RotlImm = 87,
+    U32Popcnt = 88,
+    U32Clz = 89,
+    U32Ctz = 90,
+    U32Clo = 91,
+    U32Cto = 92,
+    U32Lt = 93,
+    U32Lte = 94,
+    U32Gt = 95,
+    U32Gte = 96,
+    U32Min = 97,
+    U32Max = 98,
 
     // ----- stack manipulation -------------------------------------------------------------------
-    Drop = 125,
-    DropW = 126,
-    PadW = 127,
-    Dup0 = 128,
-    Dup1 = 129,
-    Dup2 = 130,
-    Dup3 = 131,
-    Dup4 = 132,
-    Dup5 = 133,
-    Dup6 = 134,
-    Dup7 = 135,
-    Dup8 = 136,
-    Dup9 = 137,
-    Dup10 = 138,
-    Dup11 = 139,
-    Dup12 = 140,
-    Dup13 = 141,
-    Dup14 = 142,
-    Dup15 = 143,
-    DupW0 = 144,
-    DupW1 = 145,
-    DupW2 = 146,
-    DupW3 = 147,
-    Swap1 = 148,
-    Swap2 = 149,
-    Swap3 = 150,
-    Swap4 = 151,
-    Swap5 = 152,
-    Swap6 = 153,
-    Swap7 = 154,
-    Swap8 = 155,
-    Swap9 = 156,
-    Swap10 = 157,
-    Swap11 = 158,
-    Swap12 = 159,
-    Swap13 = 160,
-    Swap14 = 161,
-    Swap15 = 162,
-    SwapW1 = 163,
-    SwapW2 = 164,
-    SwapW3 = 165,
-    SwapDW = 166,
-    MovUp2 = 167,
-    MovUp3 = 168,
-    MovUp4 = 169,
-    MovUp5 = 170,
-    MovUp6 = 171,
-    MovUp7 = 172,
-    MovUp8 = 173,
-    MovUp9 = 174,
-    MovUp10 = 175,
-    MovUp11 = 176,
-    MovUp12 = 177,
-    MovUp13 = 178,
-    MovUp14 = 179,
-    MovUp15 = 180,
-    MovUpW2 = 181,
-    MovUpW3 = 182,
-    MovDn2 = 183,
-    MovDn3 = 184,
-    MovDn4 = 185,
-    MovDn5 = 186,
-    MovDn6 = 187,
-    MovDn7 = 188,
-    MovDn8 = 189,
-    MovDn9 = 190,
-    MovDn10 = 191,
-    MovDn11 = 192,
-    MovDn12 = 193,
-    MovDn13 = 194,
-    MovDn14 = 195,
-    MovDn15 = 196,
-    MovDnW2 = 197,
-    MovDnW3 = 198,
-    CSwap = 199,
-    CSwapW = 200,
-    CDrop = 201,
-    CDropW = 202,
+    Drop = 99,
+    DropW = 100,
+    PadW = 101,
+    Dup0 = 102,
+    Dup1 = 103,
+    Dup2 = 104,
+    Dup3 = 105,
+    Dup4 = 106,
+    Dup5 = 107,
+    Dup6 = 108,
+    Dup7 = 109,
+    Dup8 = 110,
+    Dup9 = 111,
+    Dup10 = 112,
+    Dup11 = 113,
+    Dup12 = 114,
+    Dup13 = 115,
+    Dup14 = 116,
+    Dup15 = 117,
+    DupW0 = 118,
+    DupW1 = 119,
+    DupW2 = 120,
+    DupW3 = 121,
+    Swap1 = 122,
+    Swap2 = 123,
+    Swap3 = 124,
+    Swap4 = 125,
+    Swap5 = 126,
+    Swap6 = 127,
+    Swap7 = 128,
+    Swap8 = 129,
+    Swap9 = 130,
+    Swap10 = 131,
+    Swap11 = 132,
+    Swap12 = 133,
+    Swap13 = 134,
+    Swap14 = 135,
+    Swap15 = 136,
+    SwapW1 = 137,
+    SwapW2 = 138,
+    SwapW3 = 139,
+    SwapDW = 140,
+    MovUp2 = 141,
+    MovUp3 = 142,
+    MovUp4 = 143,
+    MovUp5 = 144,
+    MovUp6 = 145,
+    MovUp7 = 146,
+    MovUp8 = 147,
+    MovUp9 = 148,
+    MovUp10 = 149,
+    MovUp11 = 150,
+    MovUp12 = 151,
+    MovUp13 = 152,
+    MovUp14 = 153,
+    MovUp15 = 154,
+    MovUpW2 = 155,
+    MovUpW3 = 156,
+    MovDn2 = 157,
+    MovDn3 = 158,
+    MovDn4 = 159,
+    MovDn5 = 160,
+    MovDn6 = 161,
+    MovDn7 = 162,
+    MovDn8 = 163,
+    MovDn9 = 164,
+    MovDn10 = 165,
+    MovDn11 = 166,
+    MovDn12 = 167,
+    MovDn13 = 168,
+    MovDn14 = 169,
+    MovDn15 = 170,
+    MovDnW2 = 171,
+    MovDnW3 = 172,
+    CSwap = 173,
+    CSwapW = 174,
+    CDrop = 175,
+    CDropW = 176,
 
     // ----- input / output operations ------------------------------------------------------------
-    PushU8 = 203,
-    PushU16 = 204,
-    PushU32 = 205,
-    PushFelt = 206,
-    PushWord = 207,
-    PushU8List = 208,
-    PushU16List = 209,
-    PushU32List = 210,
-    PushFeltList = 211,
+    PushU8 = 177,
+    PushU16 = 178,
+    PushU32 = 179,
+    PushFelt = 180,
+    PushWord = 181,
+    PushU8List = 182,
+    PushU16List = 183,
+    PushU32List = 184,
+    PushFeltList = 185,
 
-    Locaddr = 212,
-    Sdepth = 213,
-    Caller = 214,
-    Clk = 215,
+    Locaddr = 186,
+    Sdepth = 187,
+    Caller = 188,
+    Clk = 189,
 
-    MemLoad = 216,
-    MemLoadImm = 217,
-    MemLoadW = 218,
-    MemLoadWImm = 219,
-    LocLoad = 220,
-    LocLoadW = 221,
-    MemStore = 222,
-    MemStoreImm = 223,
-    LocStore = 224,
-    MemStoreW = 225,
-    MemStoreWImm = 226,
-    LocStoreW = 227,
+    MemLoad = 190,
+    MemLoadImm = 191,
+    MemLoadW = 192,
+    MemLoadWImm = 193,
+    LocLoad = 194,
+    LocLoadW = 195,
+    MemStore = 196,
+    MemStoreImm = 197,
+    LocStore = 198,
+    MemStoreW = 199,
+    MemStoreWImm = 200,
+    LocStoreW = 201,
 
-    MemStream = 228,
-    AdvPipe = 229,
+    MemStream = 202,
+    AdvPipe = 203,
 
-    AdvPush = 230,
-    AdvLoadW = 231,
+    AdvPush = 204,
+    AdvLoadW = 205,
 
-    AdvInject = 232,
+    AdvInject = 206,
 
     // ----- cryptographic operations -------------------------------------------------------------
-    Hash = 233,
-    HMerge = 234,
-    HPerm = 235,
-    MTreeGet = 236,
-    MTreeSet = 237,
-    MTreeMerge = 238,
-    MTreeVerify = 239,
+    Hash = 207,
+    HMerge = 208,
+    HPerm = 209,
+    MTreeGet = 210,
+    MTreeSet = 211,
+    MTreeMerge = 212,
+    MTreeVerify = 213,
 
     // ----- STARK proof verification -------------------------------------------------------------
-    FriExt2Fold4 = 240,
+    FriExt2Fold4 = 214,
+    RCombBase = 215,
 
     // ----- exec / call --------------------------------------------------------------------------
-    ExecLocal = 241,
-    ExecImported = 242,
-    CallLocal = 243,
-    CallMastRoot = 244,
-    CallImported = 245,
-    SysCall = 246,
-    DynExec = 247,
-    DynCall = 248,
+    ExecLocal = 216,
+    ExecImported = 217,
+    CallLocal = 218,
+    CallMastRoot = 219,
+    CallImported = 220,
+    SysCall = 221,
+    DynExec = 222,
+    DynCall = 223,
+    ProcRefLocal = 224,
+    ProcRefImported = 225,
 
     // ----- debugging ----------------------------------------------------------------------------
-    Debug = 249,
+    Debug = 226,
+
+    // ----- event decorators ---------------------------------------------------------------------
+    Emit = 227,
+    Trace = 228,
 
     // ----- control flow -------------------------------------------------------------------------
     IfElse = 253,
diff --git a/assembly/src/ast/nodes/serde/serialization.rs b/assembly/src/ast/nodes/serde/serialization.rs
index 9e73b67fab..71a0edeb05 100644
--- a/assembly/src/ast/nodes/serde/serialization.rs
+++ b/assembly/src/ast/nodes/serde/serialization.rs
@@ -21,11 +21,11 @@ impl Serializable for Node {
 
                 assert!(true_case.nodes().len() <= MAX_BODY_LEN, "too many body nodes");
                 target.write_u16(true_case.nodes().len() as u16);
-                true_case.nodes().write_into(target);
+                target.write_many(true_case.nodes());
 
                 assert!(false_case.nodes().len() <= MAX_BODY_LEN, "too many body nodes");
                 target.write_u16(false_case.nodes().len() as u16);
-                false_case.nodes().write_into(target);
+                target.write_many(false_case.nodes());
             }
             Self::Repeat { times, body } => {
                 OpCode::Repeat.write_into(target);
@@ -33,14 +33,14 @@ impl Serializable for Node {
 
                 assert!(body.nodes().len() <= MAX_BODY_LEN, "too many body nodes");
                 target.write_u16(body.nodes().len() as u16);
-                body.nodes().write_into(target);
+                target.write_many(body.nodes());
             }
             Self::While { body } => {
                 OpCode::While.write_into(target);
 
                 assert!(body.nodes().len() <= MAX_BODY_LEN, "too many body nodes");
                 target.write_u16(body.nodes().len() as u16);
-                body.nodes().write_into(target);
+                target.write_many(body.nodes());
             }
         }
     }
@@ -105,6 +105,7 @@ impl Serializable for Instruction {
                 OpCode::ExpBitLength.write_into(target);
                 target.write_u8(*v);
             }
+            Self::ILog2 => OpCode::ILog2.write_into(target),
             Self::Not => OpCode::Not.write_into(target),
             Self::And => OpCode::And.write_into(target),
             Self::Or => OpCode::Or.write_into(target),
@@ -154,11 +155,6 @@ impl Serializable for Instruction {
             }
             Self::U32Split => OpCode::U32Split.write_into(target),
             Self::U32Cast => OpCode::U32Cast.write_into(target),
-            Self::U32CheckedAdd => OpCode::U32CheckedAdd.write_into(target),
-            Self::U32CheckedAddImm(v) => {
-                OpCode::U32CheckedAddImm.write_into(target);
-                target.write_u32(*v);
-            }
             Self::U32WrappingAdd => OpCode::U32WrappingAdd.write_into(target),
             Self::U32WrappingAddImm(v) => {
                 OpCode::U32WrappingAddImm.write_into(target);
@@ -171,11 +167,6 @@ impl Serializable for Instruction {
             }
             Self::U32OverflowingAdd3 => OpCode::U32OverflowingAdd3.write_into(target),
             Self::U32WrappingAdd3 => OpCode::U32WrappingAdd3.write_into(target),
-            Self::U32CheckedSub => OpCode::U32CheckedSub.write_into(target),
-            Self::U32CheckedSubImm(v) => {
-                OpCode::U32CheckedSubImm.write_into(target);
-                target.write_u32(*v);
-            }
             Self::U32WrappingSub => OpCode::U32WrappingSub.write_into(target),
             Self::U32WrappingSubImm(v) => {
                 OpCode::U32WrappingSubImm.write_into(target);
@@ -186,11 +177,6 @@ impl Serializable for Instruction {
                 OpCode::U32OverflowingSubImm.write_into(target);
                 target.write_u32(*v);
             }
-            Self::U32CheckedMul => OpCode::U32CheckedMul.write_into(target),
-            Self::U32CheckedMulImm(v) => {
-                OpCode::U32CheckedMulImm.write_into(target);
-                target.write_u32(*v);
-            }
             Self::U32WrappingMul => OpCode::U32WrappingMul.write_into(target),
             Self::U32WrappingMulImm(v) => {
                 OpCode::U32WrappingMulImm.write_into(target);
@@ -203,104 +189,56 @@ impl Serializable for Instruction {
             }
             Self::U32OverflowingMadd => OpCode::U32OverflowingMadd.write_into(target),
             Self::U32WrappingMadd => OpCode::U32WrappingMadd.write_into(target),
-            Self::U32CheckedDiv => OpCode::U32CheckedDiv.write_into(target),
-            Self::U32CheckedDivImm(v) => {
-                OpCode::U32CheckedDivImm.write_into(target);
-                target.write_u32(*v);
-            }
-            Self::U32UncheckedDiv => OpCode::U32UncheckedDiv.write_into(target),
-            Self::U32UncheckedDivImm(v) => {
-                OpCode::U32UncheckedDivImm.write_into(target);
+            Self::U32Div => OpCode::U32Div.write_into(target),
+            Self::U32DivImm(v) => {
+                OpCode::U32DivImm.write_into(target);
                 target.write_u32(*v);
             }
-            Self::U32CheckedMod => OpCode::U32CheckedMod.write_into(target),
-            Self::U32CheckedModImm(v) => {
-                OpCode::U32CheckedModImm.write_into(target);
+            Self::U32Mod => OpCode::U32Mod.write_into(target),
+            Self::U32ModImm(v) => {
+                OpCode::U32ModImm.write_into(target);
                 target.write_u32(*v);
             }
-            Self::U32UncheckedMod => OpCode::U32UncheckedMod.write_into(target),
-            Self::U32UncheckedModImm(v) => {
-                OpCode::U32UncheckedModImm.write_into(target);
+            Self::U32DivMod => OpCode::U32DivMod.write_into(target),
+            Self::U32DivModImm(v) => {
+                OpCode::U32DivModImm.write_into(target);
                 target.write_u32(*v);
             }
-            Self::U32CheckedDivMod => OpCode::U32CheckedDivMod.write_into(target),
-            Self::U32CheckedDivModImm(v) => {
-                OpCode::U32CheckedDivModImm.write_into(target);
-                target.write_u32(*v);
-            }
-            Self::U32UncheckedDivMod => OpCode::U32UncheckedDivMod.write_into(target),
-            Self::U32UncheckedDivModImm(v) => {
-                OpCode::U32UncheckedDivModImm.write_into(target);
-                target.write_u32(*v);
-            }
-            Self::U32CheckedAnd => OpCode::U32CheckedAnd.write_into(target),
-            Self::U32CheckedOr => OpCode::U32CheckedOr.write_into(target),
-            Self::U32CheckedXor => OpCode::U32CheckedXor.write_into(target),
-            Self::U32CheckedNot => OpCode::U32CheckedNot.write_into(target),
-            Self::U32CheckedShr => OpCode::U32CheckedShr.write_into(target),
-            Self::U32CheckedShrImm(v) => {
-                OpCode::U32CheckedShrImm.write_into(target);
-                target.write_u8(*v);
-            }
-            Self::U32UncheckedShr => OpCode::U32UncheckedShr.write_into(target),
-            Self::U32UncheckedShrImm(v) => {
-                OpCode::U32UncheckedShrImm.write_into(target);
-                target.write_u8(*v);
-            }
-            Self::U32CheckedShl => OpCode::U32CheckedShl.write_into(target),
-            Self::U32CheckedShlImm(v) => {
-                OpCode::U32CheckedShlImm.write_into(target);
+            Self::U32And => OpCode::U32And.write_into(target),
+            Self::U32Or => OpCode::U32Or.write_into(target),
+            Self::U32Xor => OpCode::U32Xor.write_into(target),
+            Self::U32Not => OpCode::U32Not.write_into(target),
+            Self::U32Shr => OpCode::U32Shr.write_into(target),
+            Self::U32ShrImm(v) => {
+                OpCode::U32ShrImm.write_into(target);
                 target.write_u8(*v);
             }
-            Self::U32UncheckedShl => OpCode::U32UncheckedShl.write_into(target),
-            Self::U32UncheckedShlImm(v) => {
-                OpCode::U32UncheckedShlImm.write_into(target);
+            Self::U32Shl => OpCode::U32Shl.write_into(target),
+            Self::U32ShlImm(v) => {
+                OpCode::U32ShlImm.write_into(target);
                 target.write_u8(*v);
             }
-            Self::U32CheckedRotr => OpCode::U32CheckedRotr.write_into(target),
-            Self::U32CheckedRotrImm(v) => {
-                OpCode::U32CheckedRotrImm.write_into(target);
+            Self::U32Rotr => OpCode::U32Rotr.write_into(target),
+            Self::U32RotrImm(v) => {
+                OpCode::U32RotrImm.write_into(target);
                 target.write_u8(*v);
             }
-            Self::U32UncheckedRotr => OpCode::U32UncheckedRotr.write_into(target),
-            Self::U32UncheckedRotrImm(v) => {
-                OpCode::U32UncheckedRotrImm.write_into(target);
+            Self::U32Rotl => OpCode::U32Rotl.write_into(target),
+            Self::U32RotlImm(v) => {
+                OpCode::U32RotlImm.write_into(target);
                 target.write_u8(*v);
             }
-            Self::U32CheckedRotl => OpCode::U32CheckedRotl.write_into(target),
-            Self::U32CheckedRotlImm(v) => {
-                OpCode::U32CheckedRotlImm.write_into(target);
-                target.write_u8(*v);
-            }
-            Self::U32UncheckedRotl => OpCode::U32UncheckedRotl.write_into(target),
-            Self::U32UncheckedRotlImm(v) => {
-                OpCode::U32UncheckedRotlImm.write_into(target);
-                target.write_u8(*v);
-            }
-            Self::U32CheckedPopcnt => OpCode::U32CheckedPopcnt.write_into(target),
-            Self::U32UncheckedPopcnt => OpCode::U32UncheckedPopcnt.write_into(target),
-            Self::U32CheckedEq => OpCode::U32CheckedEq.write_into(target),
-            Self::U32CheckedEqImm(v) => {
-                OpCode::U32CheckedEqImm.write_into(target);
-                target.write_u32(*v);
-            }
-            Self::U32CheckedNeq => OpCode::U32CheckedNeq.write_into(target),
-            Self::U32CheckedNeqImm(v) => {
-                OpCode::U32CheckedNeqImm.write_into(target);
-                target.write_u32(*v);
-            }
-            Self::U32CheckedLt => OpCode::U32CheckedLt.write_into(target),
-            Self::U32UncheckedLt => OpCode::U32UncheckedLt.write_into(target),
-            Self::U32CheckedLte => OpCode::U32CheckedLte.write_into(target),
-            Self::U32UncheckedLte => OpCode::U32UncheckedLte.write_into(target),
-            Self::U32CheckedGt => OpCode::U32CheckedGt.write_into(target),
-            Self::U32UncheckedGt => OpCode::U32UncheckedGt.write_into(target),
-            Self::U32CheckedGte => OpCode::U32CheckedGte.write_into(target),
-            Self::U32UncheckedGte => OpCode::U32UncheckedGte.write_into(target),
-            Self::U32CheckedMin => OpCode::U32CheckedMin.write_into(target),
-            Self::U32UncheckedMin => OpCode::U32UncheckedMin.write_into(target),
-            Self::U32CheckedMax => OpCode::U32CheckedMax.write_into(target),
-            Self::U32UncheckedMax => OpCode::U32UncheckedMax.write_into(target),
+            Self::U32Popcnt => OpCode::U32Popcnt.write_into(target),
+            Self::U32Clz => OpCode::U32Clz.write_into(target),
+            Self::U32Ctz => OpCode::U32Ctz.write_into(target),
+            Self::U32Clo => OpCode::U32Clo.write_into(target),
+            Self::U32Cto => OpCode::U32Cto.write_into(target),
+            Self::U32Lt => OpCode::U32Lt.write_into(target),
+            Self::U32Lte => OpCode::U32Lte.write_into(target),
+            Self::U32Gt => OpCode::U32Gt.write_into(target),
+            Self::U32Gte => OpCode::U32Gte.write_into(target),
+            Self::U32Min => OpCode::U32Min.write_into(target),
+            Self::U32Max => OpCode::U32Max.write_into(target),
 
             // ----- stack manipulation ---------------------------------------------------------------
             Self::Drop => OpCode::Drop.write_into(target),
@@ -493,6 +431,7 @@ impl Serializable for Instruction {
 
             // ----- STARK proof verification -----------------------------------------------------
             Self::FriExt2Fold4 => OpCode::FriExt2Fold4.write_into(target),
+            Self::RCombBase => OpCode::RCombBase.write_into(target),
 
             // ----- exec / call ------------------------------------------------------------------
             Self::ExecLocal(v) => {
@@ -521,6 +460,14 @@ impl Serializable for Instruction {
             }
             Self::DynExec => OpCode::DynExec.write_into(target),
             Self::DynCall => OpCode::DynCall.write_into(target),
+            Self::ProcRefLocal(v) => {
+                OpCode::ProcRefLocal.write_into(target);
+                target.write_u16(*v)
+            }
+            Self::ProcRefImported(imported) => {
+                OpCode::ProcRefImported.write_into(target);
+                imported.write_into(target)
+            }
 
             // ----- debug decorators -------------------------------------------------------------
             Self::Breakpoint => {
@@ -531,6 +478,17 @@ impl Serializable for Instruction {
                 OpCode::Debug.write_into(target);
                 debug::write_options_into(target, options);
             }
+
+            // ----- event decorators -------------------------------------------------------------
+            Self::Emit(event_id) => {
+                OpCode::Emit.write_into(target);
+                target.write_u32(*event_id);
+            }
+
+            Self::Trace(trace_id) => {
+                OpCode::Trace.write_into(target);
+                target.write_u32(*trace_id);
+            }
         }
     }
 }
diff --git a/assembly/src/ast/nodes/serde/signatures.rs b/assembly/src/ast/nodes/serde/signatures.rs
index 48ba14bb0f..ebaf358562 100644
--- a/assembly/src/ast/nodes/serde/signatures.rs
+++ b/assembly/src/ast/nodes/serde/signatures.rs
@@ -1,4 +1,5 @@
-use super::{ByteReader, ByteWriter, DeserializationError, ToString};
+use super::{ByteReader, ByteWriter, DeserializationError};
+use crate::utils::string::*;
 use vm_core::SignatureKind;
 
 const RPOFALCON512: u8 = 0;
diff --git a/assembly/src/ast/parsers/adv_ops.rs b/assembly/src/ast/parsers/adv_ops.rs
index 4d758bf971..2ba6802145 100644
--- a/assembly/src/ast/parsers/adv_ops.rs
+++ b/assembly/src/ast/parsers/adv_ops.rs
@@ -23,7 +23,7 @@ pub fn parse_adv_inject(op: &Token) -> Result<Node, ParsingError> {
 
     let injector = match op.parts()[1] {
         "push_u64div" => match op.num_parts() {
-            2 => AdvInject(PushU64div),
+            2 => AdvInject(PushU64Div),
             _ => return Err(ParsingError::extra_param(op)),
         },
         "push_ext2intt" => match op.num_parts() {
diff --git a/assembly/src/ast/parsers/constants.rs b/assembly/src/ast/parsers/constants.rs
index 339150efae..131aa26cbf 100644
--- a/assembly/src/ast/parsers/constants.rs
+++ b/assembly/src/ast/parsers/constants.rs
@@ -1,4 +1,5 @@
-use super::{Felt, LocalConstMap, ParsingError, StarkField, String, Token, Vec};
+use super::{Felt, LocalConstMap, ParsingError, Token};
+use crate::utils::{collections::*, string::*};
 use core::fmt::Display;
 
 // CONSTANT VALUE EXPRESSIONS
diff --git a/assembly/src/ast/parsers/context.rs b/assembly/src/ast/parsers/context.rs
index f8ac5b1280..41c3dadb9f 100644
--- a/assembly/src/ast/parsers/context.rs
+++ b/assembly/src/ast/parsers/context.rs
@@ -1,10 +1,10 @@
 use super::{
-    super::ProcReExport, adv_ops, debug, field_ops, io_ops, stack_ops, sys_ops, u32_ops, CodeBody,
-    Instruction, InvocationTarget, LibraryPath, LocalConstMap, LocalProcMap, ModuleImports, Node,
-    ParsingError, ProcedureAst, ProcedureId, ProcedureName, ReExportedProcMap, Token, TokenStream,
-    MAX_BODY_LEN, MAX_DOCS_LEN,
+    super::ProcReExport, adv_ops, debug, events, field_ops, io_ops, stack_ops, sys_ops, u32_ops,
+    CodeBody, Instruction, InvocationTarget, LibraryPath, LocalConstMap, LocalProcMap,
+    ModuleImports, Node, ParsingError, ProcedureAst, ProcedureId, ProcedureName, ReExportedProcMap,
+    Token, TokenStream, MAX_BODY_LEN, MAX_DOCS_LEN,
 };
-use vm_core::utils::{collections::Vec, string::ToString};
+use crate::utils::{collections::*, string::*};
 
 // PARSER CONTEXT
 // ================================================================================================
@@ -15,6 +15,7 @@ pub struct ParserContext<'a> {
     pub local_procs: LocalProcMap,
     pub reexported_procs: ReExportedProcMap,
     pub local_constants: LocalConstMap,
+    pub num_proc_locals: u16,
 }
 
 impl ParserContext<'_> {
@@ -132,7 +133,7 @@ impl ParserContext<'_> {
         // record start of the repeat block and consume the 'repeat' token
         let repeat_start = tokens.pos();
         let repeat_token = tokens.read().expect("no repeat token");
-        let times = repeat_token.parse_repeat()?;
+        let times = repeat_token.parse_repeat(&self.local_constants)?;
         tokens.advance();
 
         // read the loop body
@@ -213,6 +214,26 @@ impl ParserContext<'_> {
         }
     }
 
+    // PROCREF PARSERS
+    // --------------------------------------------------------------------------------------------
+
+    /// Parse a `procref` token into an instruction node.
+    pub fn parse_procref(&mut self, token: &Token) -> Result<Node, ParsingError> {
+        match token.parse_invocation(token.parts()[0])? {
+            InvocationTarget::ProcedureName(proc_name) => {
+                let index = self.get_local_proc_index(proc_name, token)?;
+                let inner = Instruction::ProcRefLocal(index);
+                Ok(Node::Instruction(inner))
+            }
+            InvocationTarget::ProcedurePath { name, module } => {
+                let proc_id = self.import_info.add_invoked_proc(&name, module, token)?;
+                let inner = Instruction::ProcRefImported(proc_id);
+                Ok(Node::Instruction(inner))
+            }
+            _ => Err(ParsingError::invalid_param(token, 1)),
+        }
+    }
+
     // PROCEDURE PARSERS
     // --------------------------------------------------------------------------------------------
 
@@ -290,9 +311,13 @@ impl ParserContext<'_> {
             None
         };
 
+        self.num_proc_locals = num_locals;
+
         // parse procedure body
         let body = self.parse_body(tokens, false)?;
 
+        self.num_proc_locals = 0;
+
         // consume the 'end' token
         match tokens.read() {
             None => {
@@ -452,6 +477,7 @@ impl ParserContext<'_> {
 
             "pow2" => simple_instruction(op, Pow2),
             "exp" => field_ops::parse_exp(op),
+            "ilog2" => simple_instruction(op, ILog2),
 
             "not" => simple_instruction(op, Not),
             "and" => simple_instruction(op, And),
@@ -484,73 +510,52 @@ impl ParserContext<'_> {
             "u32cast" => simple_instruction(op, U32Cast),
             "u32split" => simple_instruction(op, U32Split),
 
-            "u32checked_add" => u32_ops::parse_u32checked_add(op),
             "u32wrapping_add" => u32_ops::parse_u32wrapping_add(op),
             "u32overflowing_add" => u32_ops::parse_u32overflowing_add(op),
 
             "u32overflowing_add3" => simple_instruction(op, U32OverflowingAdd3),
             "u32wrapping_add3" => simple_instruction(op, U32WrappingAdd3),
 
-            "u32checked_sub" => u32_ops::parse_u32checked_sub(op),
             "u32wrapping_sub" => u32_ops::parse_u32wrapping_sub(op),
             "u32overflowing_sub" => u32_ops::parse_u32overflowing_sub(op),
 
-            "u32checked_mul" => u32_ops::parse_u32checked_mul(op),
             "u32wrapping_mul" => u32_ops::parse_u32wrapping_mul(op),
             "u32overflowing_mul" => u32_ops::parse_u32overflowing_mul(op),
 
             "u32overflowing_madd" => simple_instruction(op, U32OverflowingMadd),
             "u32wrapping_madd" => simple_instruction(op, U32WrappingMadd),
 
-            "u32checked_div" => u32_ops::parse_u32_div(op, true),
-            "u32unchecked_div" => u32_ops::parse_u32_div(op, false),
-
-            "u32checked_mod" => u32_ops::parse_u32_mod(op, true),
-            "u32unchecked_mod" => u32_ops::parse_u32_mod(op, false),
-
-            "u32checked_divmod" => u32_ops::parse_u32_divmod(op, true),
-            "u32unchecked_divmod" => u32_ops::parse_u32_divmod(op, false),
+            "u32div" => u32_ops::parse_u32_div(op),
 
-            "u32checked_and" => simple_instruction(op, U32CheckedAnd),
-            "u32checked_or" => simple_instruction(op, U32CheckedOr),
-            "u32checked_xor" => simple_instruction(op, U32CheckedXor),
-            "u32checked_not" => simple_instruction(op, U32CheckedNot),
+            "u32mod" => u32_ops::parse_u32_mod(op),
 
-            "u32checked_shr" => u32_ops::parse_u32_shr(op, true),
-            "u32unchecked_shr" => u32_ops::parse_u32_shr(op, false),
+            "u32divmod" => u32_ops::parse_u32_divmod(op),
 
-            "u32checked_shl" => u32_ops::parse_u32_shl(op, true),
-            "u32unchecked_shl" => u32_ops::parse_u32_shl(op, false),
+            "u32and" => simple_instruction(op, U32And),
+            "u32or" => simple_instruction(op, U32Or),
+            "u32xor" => simple_instruction(op, U32Xor),
+            "u32not" => simple_instruction(op, U32Not),
 
-            "u32checked_rotr" => u32_ops::parse_u32_rotr(op, true),
-            "u32unchecked_rotr" => u32_ops::parse_u32_rotr(op, false),
+            "u32shr" => u32_ops::parse_u32_shr(op),
+            "u32shl" => u32_ops::parse_u32_shl(op),
 
-            "u32checked_rotl" => u32_ops::parse_u32_rotl(op, true),
-            "u32unchecked_rotl" => u32_ops::parse_u32_rotl(op, false),
+            "u32rotr" => u32_ops::parse_u32_rotr(op),
+            "u32rotl" => u32_ops::parse_u32_rotl(op),
 
-            "u32checked_popcnt" => simple_instruction(op, U32CheckedPopcnt),
-            "u32unchecked_popcnt" => simple_instruction(op, U32UncheckedPopcnt),
+            "u32popcnt" => simple_instruction(op, U32Popcnt),
+            "u32clz" => simple_instruction(op, U32Clz),
+            "u32ctz" => simple_instruction(op, U32Ctz),
+            "u32clo" => simple_instruction(op, U32Clo),
+            "u32cto" => simple_instruction(op, U32Cto),
 
-            "u32checked_eq" => u32_ops::parse_u32checked_eq(op),
-            "u32checked_neq" => u32_ops::parse_u32checked_neq(op),
+            "u32lt" => simple_instruction(op, U32Lt),
+            "u32lte" => simple_instruction(op, U32Lte),
 
-            "u32checked_lt" => simple_instruction(op, U32CheckedLt),
-            "u32unchecked_lt" => simple_instruction(op, U32UncheckedLt),
+            "u32gt" => simple_instruction(op, U32Gt),
+            "u32gte" => simple_instruction(op, U32Gte),
 
-            "u32checked_lte" => simple_instruction(op, U32CheckedLte),
-            "u32unchecked_lte" => simple_instruction(op, U32UncheckedLte),
-
-            "u32checked_gt" => simple_instruction(op, U32CheckedGt),
-            "u32unchecked_gt" => simple_instruction(op, U32UncheckedGt),
-
-            "u32checked_gte" => simple_instruction(op, U32CheckedGte),
-            "u32unchecked_gte" => simple_instruction(op, U32UncheckedGte),
-
-            "u32checked_min" => simple_instruction(op, U32CheckedMin),
-            "u32unchecked_min" => simple_instruction(op, U32UncheckedMin),
-
-            "u32checked_max" => simple_instruction(op, U32CheckedMax),
-            "u32unchecked_max" => simple_instruction(op, U32UncheckedMax),
+            "u32min" => simple_instruction(op, U32Min),
+            "u32max" => simple_instruction(op, U32Max),
 
             // ----- stack manipulation -----------------------------------------------------------
             "drop" => simple_instruction(op, Drop),
@@ -609,7 +614,9 @@ impl ParserContext<'_> {
             "mtree_merge" => simple_instruction(op, MTreeMerge),
             "mtree_verify" => simple_instruction(op, MTreeVerify),
 
+            // ----- STARK proof verification -----------------------------------------------------
             "fri_ext2fold4" => simple_instruction(op, FriExt2Fold4),
+            "rcomb_base" => simple_instruction(op, RCombBase),
 
             // ----- procedure invocations --------------------------------------------------------
             "exec" => self.parse_exec(op),
@@ -617,13 +624,18 @@ impl ParserContext<'_> {
             "syscall" => self.parse_syscall(op),
             "dynexec" => simple_instruction(op, DynExec),
             "dyncall" => simple_instruction(op, DynCall),
+            "procref" => self.parse_procref(op),
 
             // ----- constant statements ----------------------------------------------------------
             "const" => Err(ParsingError::const_invalid_scope(op)),
 
             // ----- debug decorators -------------------------------------------------------------
             "breakpoint" => simple_instruction(op, Breakpoint),
-            "debug" => debug::parse_debug(op),
+            "debug" => debug::parse_debug(op, self.num_proc_locals),
+
+            // ----- event decorators -------------------------------------------------------------
+            "emit" => events::parse_emit(op, &self.local_constants),
+            "trace" => events::parse_trace(op, &self.local_constants),
 
             // ----- catch all --------------------------------------------------------------------
             _ => Err(ParsingError::invalid_op(op)),
diff --git a/assembly/src/ast/parsers/debug.rs b/assembly/src/ast/parsers/debug.rs
index ef7158c784..a87f7e9fcf 100644
--- a/assembly/src/ast/parsers/debug.rs
+++ b/assembly/src/ast/parsers/debug.rs
@@ -14,7 +14,7 @@ use vm_core::DebugOptions;
 /// # Errors
 /// Returns an error if the instruction token contains a wrong number of parameters, or if
 /// the provided parameters are not valid.
-pub fn parse_debug(op: &Token) -> Result<Node, ParsingError> {
+pub fn parse_debug(op: &Token, num_proc_locals: u16) -> Result<Node, ParsingError> {
     debug_assert_eq!(op.parts()[0], "debug");
     if op.num_parts() < 2 {
         return Err(ParsingError::missing_param(op, "debug.stack.<debug_params?>"));
@@ -29,6 +29,38 @@ pub fn parse_debug(op: &Token) -> Result<Node, ParsingError> {
             }
             _ => return Err(ParsingError::extra_param(op)),
         },
+        "mem" => match op.num_parts() {
+            2 => DebugOptions::MemAll,
+            3 => {
+                let n: u32 = parse_checked_param(op, 2, 1..=u32::MAX)?;
+                DebugOptions::MemInterval(n, n)
+            }
+            4 => {
+                let n: u32 = parse_checked_param(op, 2, 0..=u32::MAX)?;
+                let m: u32 = parse_checked_param(op, 3, 0..=u32::MAX)?;
+                if m < n {
+                    return Err(ParsingError::invalid_param_with_reason(op, 3, "the index of the end of the interval must be greater than the index of its beginning"));
+                }
+                DebugOptions::MemInterval(n, m)
+            }
+            _ => return Err(ParsingError::extra_param(op)),
+        },
+        "local" => match op.num_parts() {
+            2 => DebugOptions::LocalInterval(0, u16::MAX, num_proc_locals),
+            3 => {
+                let n: u16 = parse_checked_param(op, 2, 0..=u16::MAX)?;
+                DebugOptions::LocalInterval(n, n, num_proc_locals)
+            }
+            4 => {
+                let n: u16 = parse_checked_param(op, 2, 0..=u16::MAX)?;
+                let m: u16 = parse_checked_param(op, 3, 0..=u16::MAX)?;
+                if m < n {
+                    return Err(ParsingError::invalid_param_with_reason(op, 3, "the index of the end of the interval must be greater than the index of its beginning"));
+                }
+                DebugOptions::LocalInterval(n, m, num_proc_locals)
+            }
+            _ => return Err(ParsingError::extra_param(op)),
+        },
         _ => return Err(ParsingError::invalid_op(op)),
     };
 
diff --git a/assembly/src/ast/parsers/events.rs b/assembly/src/ast/parsers/events.rs
new file mode 100644
index 0000000000..344468f5a9
--- /dev/null
+++ b/assembly/src/ast/parsers/events.rs
@@ -0,0 +1,51 @@
+use super::{
+    parse_param_with_constant_lookup,
+    Instruction::*,
+    LocalConstMap,
+    Node::{self, Instruction},
+    ParsingError, Token,
+};
+
+// EMIT PARSER
+// ================================================================================================
+
+/// Returns `Emit` instruction node with the parsed `event_id`.
+///
+/// The `event_id` can be provided as a constant label or as a u32 value.
+///
+/// # Errors
+/// Returns an error if the constant does not exist or if the value is not a u32.
+pub fn parse_emit(op: &Token, constants: &LocalConstMap) -> Result<Node, ParsingError> {
+    debug_assert_eq!(op.parts()[0], "emit");
+    match op.num_parts() {
+        0 => unreachable!(),
+        1 => Err(ParsingError::missing_param(op, "emit.<event_id>")),
+        2 => {
+            let event_id = parse_param_with_constant_lookup(op, 1, constants)?;
+            Ok(Instruction(Emit(event_id)))
+        }
+        _ => Err(ParsingError::extra_param(op)),
+    }
+}
+
+// TRACE PARSER
+// ================================================================================================
+
+/// Returns `Trace` instruction node with the parsed `trace_id`.
+///
+/// The `trace_id` can be provided as a constant label or as a u32 value.
+///
+/// # Errors
+/// Returns an error if the constant does not exist or if the value is not a u32.
+pub fn parse_trace(op: &Token, constants: &LocalConstMap) -> Result<Node, ParsingError> {
+    debug_assert_eq!(op.parts()[0], "trace");
+    match op.num_parts() {
+        0 => unreachable!(),
+        1 => Err(ParsingError::missing_param(op, "trace.<tace_id>")),
+        2 => {
+            let trace_id = parse_param_with_constant_lookup(op, 1, constants)?;
+            Ok(Instruction(Trace(trace_id)))
+        }
+        _ => Err(ParsingError::extra_param(op)),
+    }
+}
diff --git a/assembly/src/ast/parsers/io_ops.rs b/assembly/src/ast/parsers/io_ops.rs
index 9b2eb1f179..2a23a9af68 100644
--- a/assembly/src/ast/parsers/io_ops.rs
+++ b/assembly/src/ast/parsers/io_ops.rs
@@ -1,21 +1,14 @@
 use super::{
-    parse_checked_param, parse_param_with_constant_lookup, Felt,
+    parse_checked_param, parse_hex_value, parse_param_with_constant_lookup, Endianness, Felt,
     Instruction::*,
     LocalConstMap,
     Node::{self, Instruction},
-    ParsingError, Token, Vec, CONSTANT_LABEL_PARSER,
+    ParsingError, Token, CONSTANT_LABEL_PARSER, HEX_CHUNK_SIZE,
 };
-use crate::{StarkField, ADVICE_READ_LIMIT, HEX_CHUNK_SIZE, MAX_PUSH_INPUTS};
-use core::{convert::TryFrom, ops::RangeBounds};
+use crate::{utils::collections::*, StarkField, ADVICE_READ_LIMIT, MAX_PUSH_INPUTS};
+use core::ops::RangeBounds;
 use vm_core::WORD_SIZE;
 
-/// Helper enum for endianness determination in the parsing functions.
-#[derive(Debug)]
-enum Endianness {
-    Little,
-    Big,
-}
-
 // CONSTANTS
 // ================================================================================================
 
@@ -315,67 +308,6 @@ fn parse_long_hex_param(op: &Token, hex_str: &str) -> Result<Node, ParsingError>
     build_push_many_instruction(values)
 }
 
-/// Parses a hexadecimal parameter value into a u64.
-///
-/// # Errors
-/// Returns an error if:
-/// - The length of a short hex string (big-endian) is not even.
-/// - The length of a short hex string (big-endian) is greater than 16.
-/// - The length of the chunk of a long hex string (little-endian) is not equal to 16.
-/// - If the string does not contain a valid hexadecimal value.
-/// - If the parsed value is greater than or equal to the field modulus.
-fn parse_hex_value(
-    op: &Token,
-    hex_str: &str,
-    param_idx: usize,
-    endianness: Endianness,
-) -> Result<u64, ParsingError> {
-    let value = match endianness {
-        Endianness::Big => {
-            if hex_str.len() % 2 != 0 {
-                return Err(ParsingError::invalid_param_with_reason(
-                    op,
-                    param_idx,
-                    &format!(
-                        "hex string '{hex_str}' does not contain an even number of characters"
-                    ),
-                ));
-            }
-            if hex_str.len() > HEX_CHUNK_SIZE {
-                return Err(ParsingError::invalid_param_with_reason(
-                    op,
-                    param_idx,
-                    &format!("hex string '{hex_str}' contains too many characters"),
-                ));
-            }
-            u64::from_str_radix(hex_str, 16)
-                .map_err(|_| ParsingError::invalid_param(op, param_idx))?
-        }
-        Endianness::Little => {
-            if hex_str.len() != HEX_CHUNK_SIZE {
-                return Err(ParsingError::invalid_param_with_reason(
-                    op,
-                    param_idx,
-                    &format!("hex string chunk '{hex_str}' must contain exactly 16 characters"),
-                ));
-            }
-            u64::from_str_radix(hex_str, 16)
-                .map(|v| v.swap_bytes())
-                .map_err(|_| ParsingError::invalid_param(op, param_idx))?
-        }
-    };
-
-    if value >= Felt::MODULUS {
-        Err(ParsingError::invalid_param_with_reason(
-            op,
-            param_idx,
-            &format!("hex string '{hex_str}' contains value greater than field modulus"),
-        ))
-    } else {
-        Ok(value)
-    }
-}
-
 /// Determines the minimal type appropriate for provided value and returns appropriate instruction
 /// for this value
 fn build_push_one_instruction(value: u64) -> Result<Node, ParsingError> {
diff --git a/assembly/src/ast/parsers/labels.rs b/assembly/src/ast/parsers/labels.rs
index 2d6171ea06..61ea86445e 100644
--- a/assembly/src/ast/parsers/labels.rs
+++ b/assembly/src/ast/parsers/labels.rs
@@ -1,4 +1,5 @@
-use super::{Deserializable, LabelError, RpoDigest, SliceReader, ToString, Vec, MAX_LABEL_LEN};
+use super::{Deserializable, LabelError, RpoDigest, SliceReader, MAX_LABEL_LEN};
+use crate::utils::{collections::*, string::*};
 
 // LABEL PARSERS
 // ================================================================================================
diff --git a/assembly/src/ast/parsers/mod.rs b/assembly/src/ast/parsers/mod.rs
index 7b86042959..1b8c8f39cd 100644
--- a/assembly/src/ast/parsers/mod.rs
+++ b/assembly/src/ast/parsers/mod.rs
@@ -2,13 +2,18 @@ use super::{
     bound_into_included_u64, AdviceInjectorNode, CodeBody, Deserializable, Felt, Instruction,
     InvocationTarget, LabelError, LibraryPath, LocalConstMap, LocalProcMap, ModuleImports, Node,
     ParsingError, ProcedureAst, ProcedureId, ProcedureName, ReExportedProcMap, RpoDigest,
-    SliceReader, StarkField, String, ToString, Token, TokenStream, Vec, MAX_BODY_LEN, MAX_DOCS_LEN,
-    MAX_LABEL_LEN, MAX_STACK_WORD_OFFSET,
+    SliceReader, StarkField, Token, TokenStream, MAX_BODY_LEN, MAX_DOCS_LEN, MAX_LABEL_LEN,
+    MAX_STACK_WORD_OFFSET,
+};
+use crate::{
+    utils::{collections::*, string::*},
+    HEX_CHUNK_SIZE,
 };
 use core::{fmt::Display, ops::RangeBounds};
 
 mod adv_ops;
 mod debug;
+mod events;
 mod field_ops;
 mod io_ops;
 mod stack_ops;
@@ -27,6 +32,13 @@ pub use labels::{
     PROCEDURE_LABEL_PARSER,
 };
 
+/// Helper enum for endianness determination in the parsing functions.
+#[derive(Debug)]
+pub enum Endianness {
+    Little,
+    Big,
+}
+
 // PARSERS FUNCTIONS
 // ================================================================================================
 
@@ -108,7 +120,10 @@ fn parse_const_value(
 ) -> Result<u64, ParsingError> {
     let result = match const_value.parse::<u64>() {
         Ok(value) => value,
-        Err(_) => calculate_const_value(op, const_value, constants)?.as_int(),
+        Err(_) => match const_value.strip_prefix("0x") {
+            Some(param_str) => parse_hex_value(op, param_str, 1, Endianness::Big)?,
+            None => calculate_const_value(op, const_value, constants)?.as_int(),
+        },
     };
 
     if result >= Felt::MODULUS {
@@ -121,7 +136,7 @@ fn parse_const_value(
 
 /// Parses a param from the op token with the specified type and index. If the param is a constant
 /// label, it will be looked up in the provided constant map.
-fn parse_param_with_constant_lookup<R>(
+pub(crate) fn parse_param_with_constant_lookup<R>(
     op: &Token,
     param_idx: usize,
     constants: &LocalConstMap,
@@ -216,3 +231,64 @@ fn parse_error_code(token: &Token, constants: &LocalConstMap) -> Result<u32, Par
         _ => Err(ParsingError::extra_param(token)),
     }
 }
+
+/// Parses a hexadecimal parameter value into a u64.
+///
+/// # Errors
+/// Returns an error if:
+/// - The length of a short hex string (big-endian) is not even.
+/// - The length of a short hex string (big-endian) is greater than 16.
+/// - The length of the chunk of a long hex string (little-endian) is not equal to 16.
+/// - If the string does not contain a valid hexadecimal value.
+/// - If the parsed value is greater than or equal to the field modulus.
+fn parse_hex_value(
+    op: &Token,
+    hex_str: &str,
+    param_idx: usize,
+    endianness: Endianness,
+) -> Result<u64, ParsingError> {
+    let value = match endianness {
+        Endianness::Big => {
+            if hex_str.len() % 2 != 0 {
+                return Err(ParsingError::invalid_param_with_reason(
+                    op,
+                    param_idx,
+                    &format!(
+                        "hex string '{hex_str}' does not contain an even number of characters"
+                    ),
+                ));
+            }
+            if hex_str.len() > HEX_CHUNK_SIZE {
+                return Err(ParsingError::invalid_param_with_reason(
+                    op,
+                    param_idx,
+                    &format!("hex string '{hex_str}' contains too many characters"),
+                ));
+            }
+            u64::from_str_radix(hex_str, 16)
+                .map_err(|_| ParsingError::invalid_param(op, param_idx))?
+        }
+        Endianness::Little => {
+            if hex_str.len() != HEX_CHUNK_SIZE {
+                return Err(ParsingError::invalid_param_with_reason(
+                    op,
+                    param_idx,
+                    &format!("hex string chunk '{hex_str}' must contain exactly 16 characters"),
+                ));
+            }
+            u64::from_str_radix(hex_str, 16)
+                .map(|v| v.swap_bytes())
+                .map_err(|_| ParsingError::invalid_param(op, param_idx))?
+        }
+    };
+
+    if value >= Felt::MODULUS {
+        Err(ParsingError::invalid_param_with_reason(
+            op,
+            param_idx,
+            &format!("hex string '{hex_str}' contains value greater than field modulus"),
+        ))
+    } else {
+        Ok(value)
+    }
+}
diff --git a/assembly/src/ast/parsers/u32_ops.rs b/assembly/src/ast/parsers/u32_ops.rs
index bbce3bfea9..50714b2fc8 100644
--- a/assembly/src/ast/parsers/u32_ops.rs
+++ b/assembly/src/ast/parsers/u32_ops.rs
@@ -79,25 +79,6 @@ pub fn parse_u32assertw(op: &Token, constants: &LocalConstMap) -> Result<Node, P
     }
 }
 
-/// Returns `U32CheckedAdd` instruction node if no immediate value is provided or
-/// `U32CheckedAddImm` instruction node otherwise.
-///
-/// # Errors
-/// Returns an error if the instruction token contains wrong number of parameters, or if the
-/// provided parameter is not a u32 value.
-pub fn parse_u32checked_add(op: &Token) -> Result<Node, ParsingError> {
-    debug_assert_eq!(op.parts()[0], "u32checked_add");
-    match op.num_parts() {
-        0 => unreachable!(),
-        1 => Ok(Instruction(U32CheckedAdd)),
-        2 => {
-            let value = parse_param::<u32>(op, 1)?;
-            Ok(Instruction(U32CheckedAddImm(value)))
-        }
-        _ => Err(ParsingError::extra_param(op)),
-    }
-}
-
 /// Returns `U32WrappingAdd` instruction node if no immediate value is provided or
 /// `U32WrappingAddImm` instruction node otherwise.
 ///
@@ -136,24 +117,6 @@ pub fn parse_u32overflowing_add(op: &Token) -> Result<Node, ParsingError> {
     }
 }
 
-/// Returns `U32CheckedSub` instruction node if no immediate value is provided or
-/// `U32CheckedSubImm` instruction node otherwise.
-///
-/// # Errors
-/// Returns an error if the instruction token contains wrong number of parameters, or if the
-/// provided parameter is not a u32 value.
-pub fn parse_u32checked_sub(op: &Token) -> Result<Node, ParsingError> {
-    debug_assert_eq!(op.parts()[0], "u32checked_sub");
-    match op.num_parts() {
-        1 => Ok(Instruction(U32CheckedSub)),
-        2 => {
-            let value = parse_param::<u32>(op, 1)?;
-            Ok(Instruction(U32CheckedSubImm(value)))
-        }
-        _ => Err(ParsingError::extra_param(op)),
-    }
-}
-
 /// Returns `U32WrappingSub` instruction node if no immediate value is provided or
 /// `U32WrappingSubImm` instruction node otherwise.
 ///
@@ -192,25 +155,6 @@ pub fn parse_u32overflowing_sub(op: &Token) -> Result<Node, ParsingError> {
     }
 }
 
-/// Returns `U32CheckedMul` instruction node if no immediate value is provided or
-/// `U32CheckedMulImm` instruction node otherwise.
-///
-/// # Errors
-/// Returns an error if the instruction token contains wrong number of parameters, or if the
-/// provided parameter is not a u32 value.
-pub fn parse_u32checked_mul(op: &Token) -> Result<Node, ParsingError> {
-    debug_assert_eq!(op.parts()[0], "u32checked_mul");
-    match op.num_parts() {
-        0 => unreachable!(),
-        1 => Ok(Instruction(U32CheckedMul)),
-        2 => {
-            let value = parse_param::<u32>(op, 1)?;
-            Ok(Instruction(U32CheckedMulImm(value)))
-        }
-        _ => Err(ParsingError::extra_param(op)),
-    }
-}
-
 /// Returns `U32WrappingMul` instruction node if no immediate value is provided or
 /// `U32WrappingMulImm` instruction node otherwise.
 ///
@@ -249,260 +193,137 @@ pub fn parse_u32overflowing_mul(op: &Token) -> Result<Node, ParsingError> {
     }
 }
 
-/// Returns one of four possible instructions:
-/// - checked without parameter: `U32CheckedDiv`
-/// - unchecked without parameter: `U32UncheckedDiv`
-/// - checked with parameter: `U32CheckedDivImm`
-/// - unchecked with parameter: `U32UncheckedDivImm`
+/// Returns one of two possible instructions:
+/// - division without parameter: `U32Div`
+/// - division with parameter: `U32DivImm`
 ///
 /// # Errors
 /// Returns an error if the instruction token contains wrong number of parameters, or if the
 /// provided parameter is not a u32 value.
-pub fn parse_u32_div(op: &Token, checked: bool) -> Result<Node, ParsingError> {
-    //debug_assert_eq!("u32checked_div", op.parts()[0], "not a u32checked_div");
+pub fn parse_u32_div(op: &Token) -> Result<Node, ParsingError> {
     match op.num_parts() {
         0 => unreachable!(),
-        1 => {
-            if checked {
-                Ok(Instruction(U32CheckedDiv))
-            } else {
-                Ok(Instruction(U32UncheckedDiv))
-            }
-        }
+        1 => Ok(Instruction(U32Div)),
         2 => {
             let value = parse_param::<u32>(op, 1)?;
             check_div_by_zero(value.into(), op, 1)?;
-            if checked {
-                Ok(Instruction(U32CheckedDivImm(value)))
-            } else {
-                Ok(Instruction(U32UncheckedDivImm(value)))
-            }
+            Ok(Instruction(U32DivImm(value)))
         }
         _ => Err(ParsingError::extra_param(op)),
     }
 }
 
-/// Returns one of four possible instructions:
-/// - checked without parameter: `U32CheckedMod`
-/// - unchecked without parameter: `U32UncheckedMod`
-/// - checked with parameter: `U32CheckedModImm`
-/// - unchecked with parameter: `U32UncheckedModImm`
+/// Returns one of two possible instructions:
+/// - module without parameter: `U32Mod`
+/// - module with parameter: `U32ModImm`
 ///
 /// # Errors
 /// Returns an error if the instruction token contains wrong number of parameters, or if the
 /// provided parameter is not a u32 value.
-pub fn parse_u32_mod(op: &Token, checked: bool) -> Result<Node, ParsingError> {
+pub fn parse_u32_mod(op: &Token) -> Result<Node, ParsingError> {
     match op.num_parts() {
         0 => unreachable!(),
-        1 => {
-            if checked {
-                Ok(Instruction(U32CheckedMod))
-            } else {
-                Ok(Instruction(U32UncheckedMod))
-            }
-        }
+        1 => Ok(Instruction(U32Mod)),
         2 => {
             let value = parse_param::<u32>(op, 1)?;
             check_div_by_zero(value.into(), op, 1)?;
-            if checked {
-                Ok(Instruction(U32CheckedModImm(value)))
-            } else {
-                Ok(Instruction(U32UncheckedModImm(value)))
-            }
+            Ok(Instruction(U32ModImm(value)))
         }
         _ => Err(ParsingError::extra_param(op)),
     }
 }
 
-/// Returns one of four possible instructions:
-/// - checked without parameter: `U32CheckedDivMod`
-/// - unchecked without parameter: `U32UncheckedDivMod`
-/// - checked with parameter: `U32CheckedDivModImm`
-/// - unchecked with parameter: `U32UncheckedDivModImm`
+/// Returns one of two possible instructions:
+/// - DivMod without parameter: `U32DivMod`
+/// - DivMod with parameter: `U32DivModImm`
 ///
 /// # Errors
 /// Returns an error if the instruction token contains wrong number of parameters, or if the
 /// provided parameter is not a u32 value.
-pub fn parse_u32_divmod(op: &Token, checked: bool) -> Result<Node, ParsingError> {
+pub fn parse_u32_divmod(op: &Token) -> Result<Node, ParsingError> {
     match op.num_parts() {
         0 => unreachable!(),
-        1 => {
-            if checked {
-                Ok(Instruction(U32CheckedDivMod))
-            } else {
-                Ok(Instruction(U32UncheckedDivMod))
-            }
-        }
+        1 => Ok(Instruction(U32DivMod)),
         2 => {
             let value = parse_param::<u32>(op, 1)?;
             check_div_by_zero(value.into(), op, 1)?;
-            if checked {
-                Ok(Instruction(U32CheckedDivModImm(value)))
-            } else {
-                Ok(Instruction(U32UncheckedDivModImm(value)))
-            }
+            Ok(Instruction(U32DivModImm(value)))
         }
         _ => Err(ParsingError::extra_param(op)),
     }
 }
 
-/// Returns one of four possible instructions:
-/// - checked without parameter: `U32CheckedShr`
-/// - unchecked without parameter: `U32UncheckedShr`
-/// - checked with parameter: `U32CheckedShrImm`
-/// - unchecked with parameter: `U32UncheckedShrImm`
+/// Returns one of two possible instructions:
+/// - shift right without parameter: `U32Shr`
+/// - shift right with parameter: `U32ShrImm`
 ///
 /// # Errors
 /// Returns an error if the instruction token contains wrong number of parameters, or if the
 /// provided parameter is greater than 31.
-pub fn parse_u32_shr(op: &Token, checked: bool) -> Result<Node, ParsingError> {
+pub fn parse_u32_shr(op: &Token) -> Result<Node, ParsingError> {
     match op.num_parts() {
         0 => unreachable!(),
-        1 => {
-            if checked {
-                Ok(Instruction(U32CheckedShr))
-            } else {
-                Ok(Instruction(U32UncheckedShr))
-            }
-        }
+        1 => Ok(Instruction(U32Shr)),
         2 => {
             let n = parse_checked_param::<u8, _>(op, 1, 0..=MAX_U32_SHIFT_VALUE)?;
-            if checked {
-                Ok(Instruction(U32CheckedShrImm(n)))
-            } else {
-                Ok(Instruction(U32UncheckedShrImm(n)))
-            }
+            Ok(Instruction(U32ShrImm(n)))
         }
         _ => Err(ParsingError::extra_param(op)),
     }
 }
 
-/// Returns one of four possible instructions:
-/// - checked without parameter: `U32CheckedShl`
-/// - unchecked without parameter: `U32UncheckedShl`
-/// - checked with parameter: `U32CheckedShlImm`
-/// - unchecked with parameter: `U32UncheckedShlImm`
+/// Returns one of two possible instructions:
+/// - shift left without parameter: `U32Shl`
+/// - shift left with parameter: `U32ShlImm`
 ///
 /// # Errors
 /// Returns an error if the instruction token contains wrong number of parameters, or if the
 /// provided parameter is greater than 31.
-pub fn parse_u32_shl(op: &Token, checked: bool) -> Result<Node, ParsingError> {
+pub fn parse_u32_shl(op: &Token) -> Result<Node, ParsingError> {
     match op.num_parts() {
         0 => unreachable!(),
-        1 => {
-            if checked {
-                Ok(Instruction(U32CheckedShl))
-            } else {
-                Ok(Instruction(U32UncheckedShl))
-            }
-        }
+        1 => Ok(Instruction(U32Shl)),
         2 => {
             let n = parse_checked_param::<u8, _>(op, 1, 0..=MAX_U32_SHIFT_VALUE)?;
-            if checked {
-                Ok(Instruction(U32CheckedShlImm(n)))
-            } else {
-                Ok(Instruction(U32UncheckedShlImm(n)))
-            }
+            Ok(Instruction(U32ShlImm(n)))
         }
         _ => Err(ParsingError::extra_param(op)),
     }
 }
 
-/// Returns one of four possible instructions:
-/// - checked without parameter: `U32CheckedRotr`
-/// - unchecked without parameter: `U32UncheckedRotr`
-/// - checked with parameter: `U32CheckedRotrImm`
-/// - unchecked with parameter: `U32UncheckedRotrImm`
+/// Returns one of two possible instructions:
+/// - rotation right without parameter: `U32Rotr`
+/// - rotation right with parameter: `U32RotrImm`
 ///
 /// # Errors
 /// Returns an error if the instruction token contains wrong number of parameters, or if the
 /// provided parameter is greater than 31.
-pub fn parse_u32_rotr(op: &Token, checked: bool) -> Result<Node, ParsingError> {
+pub fn parse_u32_rotr(op: &Token) -> Result<Node, ParsingError> {
     match op.num_parts() {
         0 => unreachable!(),
-        1 => {
-            if checked {
-                Ok(Instruction(U32CheckedRotr))
-            } else {
-                Ok(Instruction(U32UncheckedRotr))
-            }
-        }
+        1 => Ok(Instruction(U32Rotr)),
         2 => {
             let n = parse_checked_param::<u8, _>(op, 1, 0..=MAX_U32_ROTATE_VALUE)?;
-            if checked {
-                Ok(Instruction(U32CheckedRotrImm(n)))
-            } else {
-                Ok(Instruction(U32UncheckedRotrImm(n)))
-            }
+            Ok(Instruction(U32RotrImm(n)))
         }
         _ => Err(ParsingError::extra_param(op)),
     }
 }
 
-/// Returns one of four possible instructions:
-/// - checked without parameter: `U32CheckedRotl`
-/// - unchecked without parameter: `U32UncheckedRotl`
-/// - checked with parameter: `U32CheckedRotlImm`
-/// - unchecked with parameter: `U32UncheckedRotlImm`
+/// Returns one of two possible instructions:
+/// - rotation left without parameter: `U32Rotl`
+/// - rotation left with parameter: `U32RotlImm`
 ///
 /// # Errors
 /// Returns an error if the instruction token contains wrong number of parameters, or if the
 /// provided parameter is greater than 31.
-pub fn parse_u32_rotl(op: &Token, checked: bool) -> Result<Node, ParsingError> {
+pub fn parse_u32_rotl(op: &Token) -> Result<Node, ParsingError> {
     match op.num_parts() {
         0 => unreachable!(),
-        1 => {
-            if checked {
-                Ok(Instruction(U32CheckedRotl))
-            } else {
-                Ok(Instruction(U32UncheckedRotl))
-            }
-        }
+        1 => Ok(Instruction(U32Rotl)),
         2 => {
             let n = parse_checked_param::<u8, _>(op, 1, 0..=MAX_U32_ROTATE_VALUE)?;
-            if checked {
-                Ok(Instruction(U32CheckedRotlImm(n)))
-            } else {
-                Ok(Instruction(U32UncheckedRotlImm(n)))
-            }
-        }
-        _ => Err(ParsingError::extra_param(op)),
-    }
-}
-
-/// Returns `U32CheckedEq` instruction node if no immediate value is provided or
-/// `U32CheckedEqImm` instruction node otherwise.
-///
-/// # Errors
-/// Returns an error if the instruction token contains wrong number of parameters, or if the
-/// provided parameter is not a u32 value.
-pub fn parse_u32checked_eq(op: &Token) -> Result<Node, ParsingError> {
-    debug_assert_eq!(op.parts()[0], "u32checked_eq");
-    match op.num_parts() {
-        0 => unreachable!(),
-        1 => Ok(Instruction(U32CheckedEq)),
-        2 => {
-            let value = parse_param::<u32>(op, 1)?;
-            Ok(Instruction(U32CheckedEqImm(value)))
-        }
-        _ => Err(ParsingError::extra_param(op)),
-    }
-}
-
-/// Returns `U32CheckedNeq` instruction node if no immediate value is provided or
-/// `U32CheckedNeqImm` instruction node otherwise.
-///
-/// # Errors
-/// Returns an error if the instruction token contains wrong number of parameters, or if the
-/// provided parameter is not a u32 value.
-pub fn parse_u32checked_neq(op: &Token) -> Result<Node, ParsingError> {
-    debug_assert_eq!(op.parts()[0], "u32checked_neq");
-    match op.num_parts() {
-        0 => unreachable!(),
-        1 => Ok(Instruction(U32CheckedNeq)),
-        2 => {
-            let value = parse_param::<u32>(op, 1)?;
-            Ok(Instruction(U32CheckedNeqImm(value)))
+            Ok(Instruction(U32RotlImm(n)))
         }
         _ => Err(ParsingError::extra_param(op)),
     }
diff --git a/assembly/src/ast/procedure.rs b/assembly/src/ast/procedure.rs
new file mode 100644
index 0000000000..4ad93afce2
--- /dev/null
+++ b/assembly/src/ast/procedure.rs
@@ -0,0 +1,250 @@
+use crate::ast::{MAX_BODY_LEN, MAX_DOCS_LEN};
+
+use super::{
+    super::tokens::SourceLocation, code_body::CodeBody, nodes::Node, ByteReader, ByteWriter,
+    Deserializable, DeserializationError, LibraryPath, ProcedureId, ProcedureName, Serializable,
+};
+use crate::utils::{collections::*, string::*};
+use core::{iter, str::from_utf8};
+
+// PROCEDURE AST
+// ================================================================================================
+
+/// An abstract syntax tree of a Miden procedure.
+///
+/// A procedure AST consists of a list of body nodes and additional metadata about the procedure
+/// (e.g., procedure name, number of memory locals used by the procedure, and whether a procedure
+/// is exported or internal).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ProcedureAst {
+    pub name: ProcedureName,
+    pub docs: Option<String>,
+    pub num_locals: u16,
+    pub body: CodeBody,
+    pub start: SourceLocation,
+    pub is_export: bool,
+}
+
+impl ProcedureAst {
+    // CONSTRUCTORS
+    // --------------------------------------------------------------------------------------------
+    /// Constructs a [ProcedureAst].
+    ///
+    /// A procedure consists of a name, a number of locals, a body, and a flag to signal whether
+    /// the procedure is exported.
+    pub fn new(
+        name: ProcedureName,
+        num_locals: u16,
+        body: Vec<Node>,
+        is_export: bool,
+        docs: Option<String>,
+    ) -> Self {
+        let start = SourceLocation::default();
+        let body = CodeBody::new(body);
+        Self {
+            name,
+            docs,
+            num_locals,
+            body,
+            is_export,
+            start,
+        }
+    }
+
+    /// Binds the provided `locations` into the ast nodes.
+    ///
+    /// The `start` location points to the first node of this block.
+    pub fn with_source_locations<L>(mut self, locations: L, start: SourceLocation) -> Self
+    where
+        L: IntoIterator<Item = SourceLocation>,
+    {
+        self.start = start;
+        self.body = self.body.with_source_locations(locations);
+        self
+    }
+
+    // PUBLIC ACCESSORS
+    // --------------------------------------------------------------------------------------------
+
+    /// Returns the [SourceLocation] associated with this procedure, if present.
+    pub fn source_locations(&self) -> impl Iterator<Item = &'_ SourceLocation> {
+        iter::once(&self.start).chain(self.body.source_locations().iter())
+    }
+
+    // STATE MUTATORS
+    // --------------------------------------------------------------------------------------------
+
+    /// Clears the source locations from this Ast.
+    pub fn clear_locations(&mut self) {
+        self.start = SourceLocation::default();
+        self.body.clear_locations();
+    }
+
+    // SERIALIZATION / DESERIALIZATION
+    // --------------------------------------------------------------------------------------------
+
+    /// Loads the [SourceLocation] from the `source`.
+    ///
+    /// It expects the `start` location at the first position, and will subsequently load the
+    /// body via [CodeBody::load_source_locations].
+    pub fn load_source_locations<R: ByteReader>(
+        &mut self,
+        source: &mut R,
+    ) -> Result<(), DeserializationError> {
+        self.start = SourceLocation::read_from(source)?;
+        self.body.load_source_locations(source)?;
+        Ok(())
+    }
+
+    /// Writes the [SourceLocation] into `target`.
+    ///
+    /// It will write the `start` location, and then execute the body serialization via
+    /// [CodeBlock::write_source_locations].
+    pub fn write_source_locations<W: ByteWriter>(&self, target: &mut W) {
+        self.start.write_into(target);
+        self.body.write_source_locations(target);
+    }
+}
+
+impl Serializable for ProcedureAst {
+    fn write_into<W: ByteWriter>(&self, target: &mut W) {
+        // asserts below are OK because we enforce limits on the procedure body size and length of
+        // procedure docs in the procedure parser
+
+        self.name.write_into(target);
+        match &self.docs {
+            Some(docs) => {
+                assert!(docs.len() <= MAX_DOCS_LEN, "docs too long");
+                target.write_u16(docs.len() as u16);
+                target.write_bytes(docs.as_bytes());
+            }
+            None => {
+                target.write_u16(0);
+            }
+        }
+
+        target.write_bool(self.is_export);
+        target.write_u16(self.num_locals);
+        assert!(self.body.nodes().len() <= MAX_BODY_LEN, "too many body instructions");
+        target.write_u16(self.body.nodes().len() as u16);
+        target.write_many(self.body.nodes());
+    }
+}
+
+impl Deserializable for ProcedureAst {
+    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
+        let name = ProcedureName::read_from(source)?;
+        let docs_len = source.read_u16()? as usize;
+        let docs = if docs_len != 0 {
+            let str = source.read_vec(docs_len)?;
+            let str =
+                from_utf8(&str).map_err(|e| DeserializationError::InvalidValue(e.to_string()))?;
+            Some(str.to_string())
+        } else {
+            None
+        };
+
+        let is_export = source.read_bool()?;
+        let num_locals = source.read_u16()?;
+        let body_len = source.read_u16()? as usize;
+        let nodes = source.read_many::<Node>(body_len)?;
+        let body = CodeBody::new(nodes);
+        let start = SourceLocation::default();
+        Ok(Self {
+            name,
+            num_locals,
+            body,
+            start,
+            is_export,
+            docs,
+        })
+    }
+}
+
+// PROCEDURE RE-EXPORT
+// ================================================================================================
+
+/// Represents a re-exported procedure.
+///
+/// A re-exported procedure is a procedure that is defined in a different module in the same
+/// library or a different library and re-exported with the same or a different name. The
+/// re-exported procedure is not copied into the module, but rather a reference to it is added to
+/// the [ModuleAST].
+#[derive(Default, Debug, Clone, PartialEq, Eq)]
+pub struct ProcReExport {
+    pub(crate) proc_id: ProcedureId,
+    pub(crate) name: ProcedureName,
+    pub(crate) docs: Option<String>,
+}
+
+impl ProcReExport {
+    /// Creates a new re-exported procedure.
+    pub fn new(proc_id: ProcedureId, name: ProcedureName, docs: Option<String>) -> Self {
+        Self {
+            proc_id,
+            name,
+            docs,
+        }
+    }
+
+    // PUBLIC ACCESSORS
+    // --------------------------------------------------------------------------------------------
+
+    /// Returns the ID of the re-exported procedure.
+    pub fn proc_id(&self) -> ProcedureId {
+        self.proc_id
+    }
+
+    /// Returns the name of the re-exported procedure.
+    pub fn name(&self) -> &ProcedureName {
+        &self.name
+    }
+
+    /// Returns the documentation of the re-exported procedure, if present.
+    pub fn docs(&self) -> Option<&str> {
+        self.docs.as_deref()
+    }
+
+    /// Returns the ID of the re-exported procedure using the specified module.
+    pub fn get_alias_id(&self, module_path: &LibraryPath) -> ProcedureId {
+        ProcedureId::from_name(&self.name, module_path)
+    }
+}
+
+impl Serializable for ProcReExport {
+    fn write_into<W: ByteWriter>(&self, target: &mut W) {
+        self.proc_id.write_into(target);
+        self.name.write_into(target);
+        match &self.docs {
+            Some(docs) => {
+                assert!(docs.len() <= MAX_DOCS_LEN, "docs too long");
+                target.write_u16(docs.len() as u16);
+                target.write_bytes(docs.as_bytes());
+            }
+            None => {
+                target.write_u16(0);
+            }
+        }
+    }
+}
+
+impl Deserializable for ProcReExport {
+    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
+        let proc_id = ProcedureId::read_from(source)?;
+        let name = ProcedureName::read_from(source)?;
+        let docs_len = source.read_u16()? as usize;
+        let docs = if docs_len != 0 {
+            let str = source.read_vec(docs_len)?;
+            let str =
+                from_utf8(&str).map_err(|e| DeserializationError::InvalidValue(e.to_string()))?;
+            Some(str.to_string())
+        } else {
+            None
+        };
+        Ok(Self {
+            proc_id,
+            name,
+            docs,
+        })
+    }
+}
diff --git a/assembly/src/ast/program.rs b/assembly/src/ast/program.rs
new file mode 100644
index 0000000000..26139ab9ad
--- /dev/null
+++ b/assembly/src/ast/program.rs
@@ -0,0 +1,361 @@
+use crate::ast::MAX_BODY_LEN;
+
+use super::{
+    super::tokens::SourceLocation,
+    check_unused_imports,
+    code_body::CodeBody,
+    imports::ModuleImports,
+    instrument,
+    nodes::Node,
+    parsers::{parse_constants, ParserContext},
+    serde::AstSerdeOptions,
+    {
+        format::*, sort_procs_into_vec, LocalProcMap, ProcedureAst, ReExportedProcMap,
+        MAX_LOCAL_PROCS,
+    },
+    {
+        ByteReader, ByteWriter, Deserializable, DeserializationError, ParsingError, Serializable,
+        SliceReader, Token, TokenStream,
+    },
+};
+use crate::utils::collections::*;
+
+use core::{fmt, iter};
+#[cfg(feature = "std")]
+use std::{fs, io, path::Path};
+// PROGRAM AST
+// ================================================================================================
+
+/// An abstract syntax tree of an executable Miden program.
+///
+/// A program AST consists of a body of the program, a list of internal procedure ASTs, a list of
+/// imported libraries, a map from procedure ids to procedure names for imported procedures used in
+/// the module, and the source location of the program.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct ProgramAst {
+    pub(super) body: CodeBody,
+    pub(super) local_procs: Vec<ProcedureAst>,
+    pub(super) import_info: ModuleImports,
+    pub(super) start: SourceLocation,
+}
+
+impl ProgramAst {
+    // CONSTRUCTORS
+    // --------------------------------------------------------------------------------------------
+    /// Returns a new [ProgramAst].
+    ///
+    /// A program consist of a body and a set of internal (i.e., not exported) procedures.
+    ///
+    /// # Errors
+    /// Returns an error if:
+    /// - The number of body nodes is greater than or equal to 2^16.
+    /// - The number of local procedures is greater than or equal to 2^16.
+    pub fn new(body: Vec<Node>, local_procs: Vec<ProcedureAst>) -> Result<Self, ParsingError> {
+        // TODO: instead of ParsingError, this should probably return a different error type:
+        // e.g., AstError.
+        if body.len() > MAX_BODY_LEN {
+            return Err(ParsingError::too_many_body_nodes(body.len(), MAX_BODY_LEN));
+        }
+        if local_procs.len() > MAX_LOCAL_PROCS {
+            return Err(ParsingError::too_many_module_procs(local_procs.len(), MAX_LOCAL_PROCS));
+        }
+        let start = SourceLocation::default();
+        let body = CodeBody::new(body);
+        Ok(Self {
+            body,
+            local_procs,
+            import_info: Default::default(),
+            start,
+        })
+    }
+
+    /// Adds the provided import information to the program.
+    ///
+    /// # Panics
+    /// Panics if import information has already been added.
+    pub fn with_import_info(mut self, import_info: ModuleImports) -> Self {
+        assert!(self.import_info.is_empty(), "module imports have already been added");
+        self.import_info = import_info;
+        self
+    }
+
+    /// Binds the provided `locations` to the nodes of this program's body.
+    ///
+    /// The `start` location points to the `begin` token which does not have its own node.
+    ///
+    /// # Panics
+    /// Panics if source location information has already been associated with this program.
+    pub fn with_source_locations<L>(mut self, locations: L, start: SourceLocation) -> Self
+    where
+        L: IntoIterator<Item = SourceLocation>,
+    {
+        assert!(!self.body.has_locations(), "source locations have already been loaded");
+        self.start = start;
+        self.body = self.body.with_source_locations(locations);
+        self
+    }
+
+    // PUBLIC ACCESSORS
+    // --------------------------------------------------------------------------------------------
+
+    /// Returns the [SourceLocation] associated with this program, if present.
+    pub fn source_locations(&self) -> impl Iterator<Item = &'_ SourceLocation> {
+        iter::once(&self.start).chain(self.body.source_locations().iter())
+    }
+
+    /// Returns a slice over the internal procedures of this program.
+    pub fn procedures(&self) -> &[ProcedureAst] {
+        &self.local_procs
+    }
+
+    /// Returns a reference to the body of this program.
+    pub fn body(&self) -> &CodeBody {
+        &self.body
+    }
+
+    /// Returns a reference to the import info for this program
+    pub fn import_info(&self) -> &ModuleImports {
+        &self.import_info
+    }
+
+    // PARSER
+    // --------------------------------------------------------------------------------------------
+    /// Parses the provided source into a [ProgramAst].
+    ///
+    /// A program consist of a body and a set of internal (i.e., not exported) procedures.
+    #[instrument(name = "parse_program", skip_all)]
+    pub fn parse(source: &str) -> Result<ProgramAst, ParsingError> {
+        let mut tokens = TokenStream::new(source)?;
+        let mut import_info = ModuleImports::parse(&mut tokens)?;
+        let local_constants = parse_constants(&mut tokens)?;
+
+        let mut context = ParserContext {
+            import_info: &mut import_info,
+            local_procs: LocalProcMap::default(),
+            reexported_procs: ReExportedProcMap::default(),
+            local_constants,
+            num_proc_locals: 0,
+        };
+
+        context.parse_procedures(&mut tokens, false)?;
+
+        // make sure program body is present
+        let next_token = tokens
+            .read()
+            .ok_or_else(|| ParsingError::unexpected_eof(*tokens.eof_location()))?;
+        if next_token.parts()[0] != Token::BEGIN {
+            return Err(ParsingError::unexpected_token(next_token, Token::BEGIN));
+        }
+
+        let program_start = tokens.pos();
+        // consume the 'begin' token
+        let header = tokens.read().expect("missing program header");
+        let start = *header.location();
+        header.validate_begin()?;
+        tokens.advance();
+
+        // make sure there is something to be read
+        if tokens.eof() {
+            return Err(ParsingError::unexpected_eof(*tokens.eof_location()));
+        }
+
+        // parse the sequence of nodes and add each node to the list
+        let body = context.parse_body(&mut tokens, false)?;
+
+        // consume the 'end' token
+        match tokens.read() {
+            None => Err(ParsingError::unmatched_begin(
+                tokens.read_at(program_start).expect("no begin token"),
+            )),
+            Some(token) => match token.parts()[0] {
+                Token::END => token.validate_end(),
+                Token::ELSE => Err(ParsingError::dangling_else(token)),
+                _ => Err(ParsingError::unmatched_begin(
+                    tokens.read_at(program_start).expect("no begin token"),
+                )),
+            },
+        }?;
+        tokens.advance();
+
+        // make sure there are no instructions after the end
+        if let Some(token) = tokens.read() {
+            return Err(ParsingError::dangling_ops_after_program(token));
+        }
+
+        check_unused_imports(context.import_info);
+
+        let local_procs = sort_procs_into_vec(context.local_procs);
+        let (nodes, locations) = body.into_parts();
+        Ok(Self::new(nodes, local_procs)?
+            .with_source_locations(locations, start)
+            .with_import_info(import_info))
+    }
+
+    // SERIALIZATION / DESERIALIZATION
+    // --------------------------------------------------------------------------------------------
+
+    /// Writes byte representation of this [ProgramAst] into the specified target according with
+    /// the specified serde options.
+    ///
+    /// The serde options are serialized as header information for the purposes of deserialization.
+    pub fn write_into<W: ByteWriter>(&self, target: &mut W, options: AstSerdeOptions) {
+        // serialize the options, so that deserialization knows what to do
+        options.write_into(target);
+
+        // asserts below are OK because we enforce limits on the number of procedure and the
+        // number of body instructions in relevant parsers
+
+        // serialize imports if required
+        if options.serialize_imports {
+            self.import_info.write_into(target);
+        }
+
+        // serialize procedures
+        assert!(self.local_procs.len() <= MAX_LOCAL_PROCS, "too many local procs");
+        target.write_u16(self.local_procs.len() as u16);
+        target.write_many(&self.local_procs);
+
+        // serialize program body
+        assert!(self.body.nodes().len() <= MAX_BODY_LEN, "too many body instructions");
+        target.write_u16(self.body.nodes().len() as u16);
+        target.write_many(self.body.nodes());
+    }
+
+    /// Returns byte representation of this [ProgramAst].
+    ///
+    /// The serde options are serialized as header information for the purposes of deserialization.
+    pub fn to_bytes(&self, options: AstSerdeOptions) -> Vec<u8> {
+        let mut target = Vec::<u8>::default();
+        self.write_into(&mut target, options);
+        target
+    }
+
+    /// Returns a [ProgramAst] struct deserialized from the specified reader.
+    ///
+    /// This function assumes that the byte array contains a serialized [AstSerdeOptions] struct as
+    /// a header.
+    pub fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
+        // Deserialize the serialization options used when serializing
+        let options = AstSerdeOptions::read_from(source)?;
+
+        // deserialize imports if required
+        let import_info = if options.serialize_imports {
+            ModuleImports::read_from(source)?
+        } else {
+            ModuleImports::default()
+        };
+
+        // deserialize local procs
+        let num_local_procs = source.read_u16()?.into();
+        let local_procs = source.read_many::<ProcedureAst>(num_local_procs)?;
+
+        // deserialize program body
+        let body_len = source.read_u16()? as usize;
+        let nodes = source.read_many::<Node>(body_len)?;
+
+        match Self::new(nodes, local_procs) {
+            Err(err) => Err(DeserializationError::UnknownError(err.message().clone())),
+            Ok(res) => Ok(res.with_import_info(import_info)),
+        }
+    }
+
+    /// Returns a [ProgramAst] struct deserialized from the provided bytes.
+    ///
+    /// This function assumes that the byte array contains a serialized [AstSerdeOptions] struct as
+    /// a header.
+    pub fn from_bytes(bytes: &[u8]) -> Result<Self, DeserializationError> {
+        let mut source = SliceReader::new(bytes);
+        Self::read_from(&mut source)
+    }
+
+    /// Loads the [SourceLocation] from the `source`.
+    ///
+    /// It expects the `start` location at the first position, and will subsequently load the
+    /// body via [CodeBody::load_source_locations]. Finally, it will load the local procedures via
+    /// [ProcedureAst::load_source_locations].
+    pub fn load_source_locations<R: ByteReader>(
+        &mut self,
+        source: &mut R,
+    ) -> Result<(), DeserializationError> {
+        self.start = SourceLocation::read_from(source)?;
+        self.body.load_source_locations(source)?;
+        self.local_procs.iter_mut().try_for_each(|p| p.load_source_locations(source))
+    }
+
+    /// Writes the [SourceLocation] into `target`.
+    ///
+    /// It will write the `start` location, and then execute the body serialization via
+    /// [CodeBlock::write_source_locations]. Finally, it will write the local procedures via
+    /// [ProcedureAst::write_source_locations].
+    pub fn write_source_locations<W: ByteWriter>(&self, target: &mut W) {
+        self.start.write_into(target);
+        self.body.write_source_locations(target);
+        self.local_procs.iter().for_each(|p| p.write_source_locations(target))
+    }
+
+    // DESTRUCTURING
+    // --------------------------------------------------------------------------------------------
+
+    /// Returns local procedures and body nodes of this program.
+    pub fn into_parts(self) -> (Vec<ProcedureAst>, Vec<Node>) {
+        (self.local_procs, self.body.into_parts().0)
+    }
+
+    /// Clear import info from the program
+    pub fn clear_imports(&mut self) {
+        self.import_info.clear();
+    }
+
+    // WRITE TO FILE
+    // --------------------------------------------------------------------------------------------
+
+    /// Writes ProgramAst to provided file path
+    #[cfg(feature = "std")]
+    pub fn write_to_file<P>(&self, file_path: P) -> io::Result<()>
+    where
+        P: AsRef<Path>,
+    {
+        let path = file_path.as_ref();
+        if let Some(dir) = path.parent() {
+            fs::create_dir_all(dir)?;
+        }
+
+        let bytes = self.to_bytes(AstSerdeOptions {
+            serialize_imports: true,
+        });
+        fs::write(path, bytes)
+    }
+}
+
+impl fmt::Display for ProgramAst {
+    /// Writes this [ProgramAst] as formatted MASM code into the formatter.
+    ///
+    /// The formatted code puts each instruction on a separate line and preserves correct indentation
+    /// for instruction blocks.
+    ///
+    /// # Panics
+    /// Panics if import info is not associated with this program.
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // Imports
+        let paths = self.import_info.import_paths();
+        for path in paths.iter() {
+            writeln!(f, "use.{path}")?;
+        }
+        if !paths.is_empty() {
+            writeln!(f)?;
+        }
+
+        let invoked_procs = self.import_info.invoked_procs();
+        let context = AstFormatterContext::new(&self.local_procs, invoked_procs);
+
+        // Local procedures
+        for proc in self.local_procs.iter() {
+            writeln!(f, "{}", FormattableProcedureAst::new(proc, &context))?;
+        }
+
+        // Main progrma
+        writeln!(f, "begin")?;
+        write!(f, "{}", FormattableCodeBody::new(&self.body, &context.inner_scope_context()))?;
+        writeln!(f, "end")
+    }
+}
diff --git a/assembly/src/ast/serde.rs b/assembly/src/ast/serde.rs
index ec27cf1c15..da5e957240 100644
--- a/assembly/src/ast/serde.rs
+++ b/assembly/src/ast/serde.rs
@@ -13,7 +13,7 @@ pub struct AstSerdeOptions {
 }
 
 impl AstSerdeOptions {
-    pub fn new(serialize_imports: bool) -> Self {
+    pub const fn new(serialize_imports: bool) -> Self {
         Self { serialize_imports }
     }
 }
diff --git a/assembly/src/ast/tests.rs b/assembly/src/ast/tests.rs
index 5eb5786555..f465ffa1e4 100644
--- a/assembly/src/ast/tests.rs
+++ b/assembly/src/ast/tests.rs
@@ -1,8 +1,8 @@
 use super::{
-    AstSerdeOptions, BTreeMap, CodeBody, Felt, Instruction, LocalProcMap, ModuleAst, Node,
-    ParsingError, ProcedureAst, ProcedureId, ProcedureName, ProgramAst, SourceLocation, String,
-    ToString, Token,
+    AstSerdeOptions, CodeBody, Felt, Instruction, LocalProcMap, ModuleAst, Node, ParsingError,
+    ProcedureAst, ProcedureId, ProcedureName, ProgramAst, SourceLocation, Token,
 };
+use crate::utils::{collections::*, string::*};
 use vm_core::utils::SliceReader;
 
 // UNIT TESTS
@@ -30,7 +30,7 @@ fn test_ast_parsing_program_push() {
         push.5.7 \
         push.500.700 \
         push.70000.90000 \
-        push.5000000000.7000000000 
+        push.5000000000.7000000000
 
         push.0x0000000000000000010000000000000002000000000000000300000000000000
     end";
@@ -38,13 +38,13 @@ fn test_ast_parsing_program_push() {
         Node::Instruction(Instruction::PushU8(10)),
         Node::Instruction(Instruction::PushU16(500)),
         Node::Instruction(Instruction::PushU32(70000)),
-        Node::Instruction(Instruction::PushFelt(Felt::from(5000000000_u64))),
+        Node::Instruction(Instruction::PushFelt(Felt::new(5000000000_u64))),
         Node::Instruction(Instruction::PushWord(
             vec![
-                Felt::from(5000000000_u64),
-                Felt::from(7000000000_u64),
-                Felt::from(9000000000_u64),
-                Felt::from(11000000000_u64),
+                Felt::new(5000000000_u64),
+                Felt::new(7000000000_u64),
+                Felt::new(9000000000_u64),
+                Felt::new(11000000000_u64),
             ]
             .try_into()
             .unwrap(),
@@ -53,8 +53,8 @@ fn test_ast_parsing_program_push() {
         Node::Instruction(Instruction::PushU16List(vec![500, 700])),
         Node::Instruction(Instruction::PushU32List(vec![70000, 90000])),
         Node::Instruction(Instruction::PushFeltList(vec![
-            Felt::from(5000000000_u64),
-            Felt::from(7000000000_u64),
+            Felt::new(5000000000_u64),
+            Felt::new(7000000000_u64),
         ])),
         Node::Instruction(Instruction::PushU8List(vec![0, 1, 2, 3])),
     ];
@@ -86,28 +86,22 @@ fn test_ast_parsing_program_u32() {
     begin
         push.3
 
-        u32checked_add.5
         u32wrapping_add.5
         u32overflowing_add.5
 
-        u32checked_sub.1
         u32wrapping_sub.1
         u32overflowing_sub.1
 
-        u32checked_mul.2
         u32wrapping_mul.2
         u32overflowing_mul.2
 
     end";
     let nodes: Vec<Node> = vec![
         Node::Instruction(Instruction::PushU8(3)),
-        Node::Instruction(Instruction::U32CheckedAddImm(5)),
         Node::Instruction(Instruction::U32WrappingAddImm(5)),
         Node::Instruction(Instruction::U32OverflowingAddImm(5)),
-        Node::Instruction(Instruction::U32CheckedSubImm(1)),
         Node::Instruction(Instruction::U32WrappingSubImm(1)),
         Node::Instruction(Instruction::U32OverflowingSubImm(1)),
-        Node::Instruction(Instruction::U32CheckedMulImm(2)),
         Node::Instruction(Instruction::U32WrappingMulImm(2)),
         Node::Instruction(Instruction::U32OverflowingMulImm(2)),
     ];
@@ -232,7 +226,7 @@ fn test_ast_parsing_adv_injection() {
 
     let source = "begin adv.push_u64div adv.push_mapval adv.push_smtget adv.insert_mem end";
     let nodes: Vec<Node> = vec![
-        Node::Instruction(AdvInject(PushU64div)),
+        Node::Instruction(AdvInject(PushU64Div)),
         Node::Instruction(AdvInject(PushMapVal)),
         Node::Instruction(AdvInject(PushSmtGet)),
         Node::Instruction(AdvInject(InsertMem)),
@@ -241,6 +235,28 @@ fn test_ast_parsing_adv_injection() {
     assert_program_output(source, BTreeMap::new(), nodes);
 }
 
+#[test]
+fn test_ast_parsing_bitwise_counters() {
+    let source = "begin u32clz u32ctz u32clo u32cto end";
+    let nodes: Vec<Node> = vec![
+        Node::Instruction(Instruction::U32Clz),
+        Node::Instruction(Instruction::U32Ctz),
+        Node::Instruction(Instruction::U32Clo),
+        Node::Instruction(Instruction::U32Cto),
+    ];
+
+    assert_program_output(source, BTreeMap::new(), nodes);
+}
+
+#[test]
+fn test_ast_parsing_ilog2() {
+    let source = "begin push.8 ilog2 end";
+    let nodes: Vec<Node> =
+        vec![Node::Instruction(Instruction::PushU8(8)), Node::Instruction(Instruction::ILog2)];
+
+    assert_program_output(source, BTreeMap::new(), nodes);
+}
+
 #[test]
 fn test_ast_parsing_use() {
     let source = "\
@@ -485,7 +501,7 @@ fn test_missing_import() {
 
     let result = ProgramAst::parse(source);
     match result {
-        Ok(_) => assert!(false),
+        Ok(_) => panic!("should have panicked"),
         Err(err) => assert!(err.to_string().contains("module 'u64' was not imported")),
     }
 }
@@ -503,7 +519,7 @@ fn test_use_in_proc_body() {
 
     let result = ModuleAst::parse(source);
     match result {
-        Ok(_) => assert!(false),
+        Ok(_) => panic!("should have panicked"),
         Err(err) => assert!(err.to_string().contains("import in procedure body")),
     }
 }
@@ -514,7 +530,7 @@ fn test_unterminated_proc() {
 
     let result = ModuleAst::parse(source);
     match result {
-        Ok(_) => assert!(false),
+        Ok(_) => panic!("should have panicked"),
         Err(err) => assert!(err.to_string().contains("procedure 'foo' has no matching end")),
     }
 }
@@ -525,7 +541,7 @@ fn test_unterminated_if() {
 
     let result = ModuleAst::parse(source);
     match result {
-        Ok(_) => assert!(false),
+        Ok(_) => panic!("should have panicked"),
         Err(err) => assert!(err.to_string().contains("if without matching else/end")),
     }
 }
@@ -810,7 +826,7 @@ fn test_ast_program_serde_control_flow() {
 
         while.true
             push.5.7
-            u32checked_add
+            u32wrapping_add
             loc_store.1
             push.0
         end
@@ -826,7 +842,7 @@ fn test_ast_program_serde_control_flow() {
 
 #[test]
 fn assert_parsing_line_unmatched_begin() {
-    let source = format!("\n\nbegin\npush.1.2\n\nadd mul");
+    let source = "\n\nbegin\npush.1.2\n\nadd mul".to_string();
     let err = ProgramAst::parse(&source).err().unwrap();
     let location = SourceLocation::new(3, 1);
     assert_eq!(err, ParsingError::unmatched_begin(&Token::new("begin", location)));
@@ -834,7 +850,7 @@ fn assert_parsing_line_unmatched_begin() {
 
 #[test]
 fn assert_parsing_line_extra_param() {
-    let source = format!("begin add.1.2\nend");
+    let source = "begin add.1.2\nend".to_string();
     let err = ProgramAst::parse(&source).err().unwrap();
     let location = SourceLocation::new(1, 7);
     assert_eq!(err, ParsingError::extra_param(&Token::new("add.1.2", location)));
@@ -863,7 +879,7 @@ fn assert_parsing_line_invalid_op() {
 
         while.true
             push.5.7
-            u32checked_add
+            u32wrapping_add
             loc_store.1
             push.0
         end
@@ -881,7 +897,7 @@ fn assert_parsing_line_invalid_op() {
 
 #[test]
 fn assert_parsing_line_unexpected_eof() {
-    let source = format!("proc.foo\nadd\nend");
+    let source = "proc.foo\nadd\nend".to_string();
     let err = ProgramAst::parse(&source).err().unwrap();
     let location = SourceLocation::new(3, 1);
     assert_eq!(err, ParsingError::unexpected_eof(location));
@@ -889,7 +905,7 @@ fn assert_parsing_line_unexpected_eof() {
 
 #[test]
 fn assert_parsing_line_unexpected_token() {
-    let source = format!("proc.foo\nadd\nend\n\nmul");
+    let source = "proc.foo\nadd\nend\n\nmul".to_string();
     let err = ProgramAst::parse(&source).err().unwrap();
     let location = SourceLocation::new(5, 1);
     assert_eq!(err, ParsingError::unexpected_token(&Token::new("mul", location), "begin"));
@@ -904,7 +920,7 @@ fn test_ast_program_serde_imports_serialized() {
     begin
         push.0
         push.1
-        exec.u64::checked_add
+        exec.u64::wrapping_add
     end";
     assert_correct_program_serialization(source, true);
 }
@@ -918,7 +934,7 @@ fn test_ast_program_serde_imports_not_serialized() {
     begin
         push.0
         push.1
-        exec.u64::checked_add
+        exec.u64::wrapping_add
     end";
     assert_correct_program_serialization(source, false);
 }
@@ -932,7 +948,7 @@ fn test_ast_module_serde_imports_serialized() {
     proc.foo.2
         push.0
         push.1
-        exec.u64::checked_add
+        exec.u64::wrapping_add
     end";
     assert_correct_module_serialization(source, true);
 }
@@ -946,11 +962,43 @@ fn test_ast_module_serde_imports_not_serialized() {
     proc.foo.2
         push.0
         push.1
-        exec.u64::checked_add
+        exec.u64::wrapping_add
     end";
     assert_correct_module_serialization(source, false);
 }
 
+#[test]
+fn test_repeat_with_constant_count() {
+    let source = "\
+    const.A=3
+    const.B=A*3+5
+
+    begin
+        repeat.A
+            push.1
+        end
+
+        repeat.B
+            push.0
+        end
+    end";
+
+    assert_correct_program_serialization(source, false);
+
+    let nodes: Vec<Node> = vec![
+        Node::Repeat {
+            times: 3,
+            body: CodeBody::new(vec![Node::Instruction(Instruction::PushU8(1))]),
+        },
+        Node::Repeat {
+            times: 14,
+            body: CodeBody::new(vec![Node::Instruction(Instruction::PushU8(0))]),
+        },
+    ];
+
+    assert_program_output(source, BTreeMap::new(), nodes);
+}
+
 fn assert_program_output(source: &str, procedures: LocalProcMap, body: Vec<Node>) {
     let program = ProgramAst::parse(source).unwrap();
     assert_eq!(program.body.nodes(), body);
@@ -1038,9 +1086,13 @@ fn assert_correct_program_serialization(source: &str, serialize_imports: bool) {
     program_deserialized
         .load_source_locations(&mut SliceReader::new(&locations))
         .unwrap();
-    if !serialize_imports {
-        program_deserialized.import_info = program.import_info.clone();
-    }
+
+    let program_deserialized = if !serialize_imports {
+        program_deserialized.with_import_info(program.import_info().clone())
+    } else {
+        program_deserialized
+    };
+
     assert_eq!(program, program_deserialized);
 }
 
@@ -1069,8 +1121,12 @@ fn assert_correct_module_serialization(source: &str, serialize_imports: bool) {
     module_deserialized
         .load_source_locations(&mut SliceReader::new(&locations))
         .unwrap();
-    if !serialize_imports {
-        module_deserialized.import_info = module.import_info.clone();
-    }
+
+    module_deserialized = if !serialize_imports {
+        module_deserialized.with_import_info(module.import_info().clone())
+    } else {
+        module_deserialized
+    };
+
     assert_eq!(module, module_deserialized);
 }
diff --git a/assembly/src/errors.rs b/assembly/src/errors.rs
index f11cfc3685..414bcceb72 100644
--- a/assembly/src/errors.rs
+++ b/assembly/src/errors.rs
@@ -1,7 +1,8 @@
 use super::{
-    ast::ProcReExport, crypto::hash::RpoDigest, tokens::SourceLocation, LibraryNamespace,
-    ProcedureId, ProcedureName, String, ToString, Token, Vec,
+    ast::ProcReExport, crypto::hash::RpoDigest, tokens::SourceLocation, KernelError,
+    LibraryNamespace, ProcedureId, ProcedureName, Token,
 };
+use crate::utils::{collections::*, string::*};
 use core::fmt;
 
 // ASSEMBLY ERROR
@@ -11,28 +12,29 @@ use core::fmt;
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub enum AssemblyError {
     CallInKernel(String),
-    CallerOutOKernel,
     CallSetProcedureNotFound(RpoDigest),
+    CallerOutOKernel,
     CircularModuleDependency(Vec<String>),
     ConflictingNumLocals(String),
     DivisionByZero,
-    DuplicateProcName(String, String),
     DuplicateProcId(ProcedureId),
+    DuplicateProcName(String, String),
     ExportedProcInProgram(String),
     ImportedProcModuleNotFound(ProcedureId, String),
-    ReExportedProcModuleNotFound(ProcReExport),
     ImportedProcNotFoundInModule(ProcedureId, String),
-    InvalidProgramAssemblyContext,
     InvalidCacheLock,
+    InvalidProgramAssemblyContext,
+    Io(String),
+    KernelError(KernelError),
     KernelProcNotFound(ProcedureId),
+    LibraryError(String),
     LocalProcNotFound(u16, String),
-    ParsingError(String),
     ParamOutOfBounds(u64, u64, u64),
+    ParsingError(String),
     PhantomCallsNotAllowed(RpoDigest),
     ProcedureNameError(String),
+    ReExportedProcModuleNotFound(ProcReExport),
     SysCallInKernel(String),
-    LibraryError(String),
-    Io(String),
 }
 
 impl AssemblyError {
@@ -134,25 +136,26 @@ impl fmt::Display for AssemblyError {
         use AssemblyError::*;
         match self {
             CallInKernel(proc_name) => write!(f, "call instruction used kernel procedure '{proc_name}'"),
-            CallerOutOKernel => write!(f, "caller instruction used outside of kernel"),
             CallSetProcedureNotFound(mast_root) => write!(f, "callset procedure not found in assembler cache for procedure with MAST root {mast_root}"),
+            CallerOutOKernel => write!(f, "caller instruction used outside of kernel"),
             CircularModuleDependency(dep_chain) => write!(f, "circular module dependency in the following chain: {dep_chain:?}"),
             ConflictingNumLocals(proc_name) => write!(f, "procedure `{proc_name}` has the same MAST as another procedure but different number of locals"),
             DivisionByZero => write!(f, "division by zero"),
-            DuplicateProcName(proc_name, module_path) => write!(f, "duplicate proc name '{proc_name}' in module {module_path}"),
             DuplicateProcId(proc_id) => write!(f, "duplicate proc id {proc_id}"),
+            DuplicateProcName(proc_name, module_path) => write!(f, "duplicate proc name '{proc_name}' in module {module_path}"),
             ExportedProcInProgram(proc_name) => write!(f, "exported procedure '{proc_name}' in executable program"),
             ImportedProcModuleNotFound(proc_id, proc_name) => write!(f, "module for imported procedure `{proc_name}` with ID {proc_id} not found"),
-            ReExportedProcModuleNotFound(reexport) => write!(f, "re-exported proc {} with id {} not found", reexport.name(), reexport.proc_id()),
             ImportedProcNotFoundInModule(proc_id, module_path) => write!(f, "imported procedure {proc_id} not found in module {module_path}"),
-            InvalidProgramAssemblyContext => write!(f, "assembly context improperly initialized for program compilation"),
             InvalidCacheLock => write!(f, "an attempt was made to lock a borrowed procedures cache"),
+            InvalidProgramAssemblyContext => write!(f, "assembly context improperly initialized for program compilation"),
             Io(description) => write!(f, "I/O error: {description}"),
+            KernelError(error) => write!(f, "{}", error),
             KernelProcNotFound(proc_id) => write!(f, "procedure {proc_id} not found in kernel"),
             LibraryError(err) | ParsingError(err) | ProcedureNameError(err) => write!(f, "{err}"),
             LocalProcNotFound(proc_idx, module_path) => write!(f, "procedure at index {proc_idx} not found in module {module_path}"),
             ParamOutOfBounds(value, min, max) => write!(f, "parameter value must be greater than or equal to {min} and less than or equal to {max}, but was {value}"),
             PhantomCallsNotAllowed(mast_root) => write!(f, "cannot call phantom procedure with MAST root {mast_root}: phantom calls not allowed"),
+            ReExportedProcModuleNotFound(reexport) => write!(f, "re-exported proc {} with id {} not found", reexport.name(), reexport.proc_id()),
             SysCallInKernel(proc_name) => write!(f, "syscall instruction used in kernel procedure '{proc_name}'"),
         }
     }
@@ -415,6 +418,16 @@ impl ParsingError {
         }
     }
 
+    pub fn too_many_body_nodes(num_nodes: usize, max_nodes: usize) -> Self {
+        ParsingError {
+            message: format!(
+                "a code body cannot contain more than {num_nodes} nodes, but had {max_nodes}"
+            ),
+            location: SourceLocation::default(),
+            op: "".to_string(),
+        }
+    }
+
     pub fn module_docs_too_long(doc_len: usize, max_len: usize) -> Self {
         ParsingError {
             message: format!(
diff --git a/assembly/src/lib.rs b/assembly/src/lib.rs
index 5c08aabeea..a7cc116704 100644
--- a/assembly/src/lib.rs
+++ b/assembly/src/lib.rs
@@ -7,9 +7,9 @@ extern crate alloc;
 use vm_core::{
     code_blocks::CodeBlock,
     crypto,
+    errors::KernelError,
     utils::{
-        collections::{btree_map, BTreeMap, BTreeSet, Vec},
-        string::{String, ToString},
+        collections::{btree_map, BTreeMap},
         ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable, SliceReader,
     },
     CodeBlockTable, Felt, Kernel, Operation, Program, StarkField, ONE, ZERO,
diff --git a/assembly/src/library/masl.rs b/assembly/src/library/masl.rs
index 755a78dd73..51eca33fd5 100644
--- a/assembly/src/library/masl.rs
+++ b/assembly/src/library/masl.rs
@@ -1,8 +1,9 @@
 use super::{
-    super::BTreeSet, AstSerdeOptions, ByteReader, ByteWriter, Deserializable, DeserializationError,
-    Library, LibraryError, LibraryNamespace, LibraryPath, Module, ModuleAst, Serializable, Vec,
-    Version, MAX_DEPENDENCIES, MAX_MODULES,
+    AstSerdeOptions, ByteReader, ByteWriter, Deserializable, DeserializationError, Library,
+    LibraryError, LibraryNamespace, LibraryPath, Module, ModuleAst, Serializable, Version,
+    MAX_DEPENDENCIES, MAX_MODULES,
 };
+use crate::utils::collections::*;
 use core::slice::Iter;
 
 // CONSTANT DEFINITIONS
@@ -73,7 +74,7 @@ impl MaslLibrary {
     /// # Errors
     /// Returns an error if the provided `modules` vector is empty or contains more than
     /// [u16::MAX] elements.
-    pub(super) fn new(
+    pub fn new(
         namespace: LibraryNamespace,
         version: Version,
         has_source_locations: bool,
@@ -118,8 +119,7 @@ impl MaslLibrary {
 
 #[cfg(feature = "std")]
 mod use_std {
-    use super::*;
-    use crate::{ast::ModuleAst, BTreeMap};
+    use super::{super::super::ast::instrument, *};
     use std::{fs, io, path::Path};
 
     impl MaslLibrary {
@@ -184,6 +184,7 @@ mod use_std {
         }
 
         /// Read a library from a file.
+        #[instrument(name = "read_library_file", fields(path = %path.as_ref().display()))]
         pub fn read_from_file<P>(path: P) -> Result<MaslLibrary, LibraryError>
         where
             P: AsRef<Path>,
@@ -272,7 +273,7 @@ mod use_std {
                     let ast = ModuleAst::parse(&contents)?;
 
                     // add dependencies of this module to the dependencies of this library
-                    for path in ast.import_paths() {
+                    for path in ast.import_info().import_paths() {
                         let ns = LibraryNamespace::new(path.first())?;
                         deps.insert(ns);
                     }
diff --git a/assembly/src/library/mod.rs b/assembly/src/library/mod.rs
index 4bc34d7ea8..56608a9bba 100644
--- a/assembly/src/library/mod.rs
+++ b/assembly/src/library/mod.rs
@@ -1,8 +1,9 @@
 use super::{
     ast::{AstSerdeOptions, ModuleAst},
     ByteReader, ByteWriter, Deserializable, DeserializationError, LibraryError, PathError,
-    Serializable, String, ToString, Vec, MAX_LABEL_LEN, NAMESPACE_LABEL_PARSER,
+    Serializable, MAX_LABEL_LEN, NAMESPACE_LABEL_PARSER,
 };
+use crate::utils::string::*;
 use core::{cmp::Ordering, fmt, ops::Deref, str::from_utf8};
 
 mod masl;
@@ -41,6 +42,11 @@ pub trait Library {
 
     /// Returns the dependency libraries of this library.
     fn dependencies(&self) -> &[LibraryNamespace];
+
+    /// Returns the AST of the module stored at the provided path.
+    fn get_module_ast(&self, path: &LibraryPath) -> Option<&ModuleAst> {
+        self.modules().find(|&module| module.path == *path).map(|module| &module.ast)
+    }
 }
 
 impl<T> Library for &T
@@ -66,6 +72,10 @@ where
     fn dependencies(&self) -> &[LibraryNamespace] {
         T::dependencies(self)
     }
+
+    fn get_module_ast(&self, path: &LibraryPath) -> Option<&ModuleAst> {
+        T::get_module_ast(self, path)
+    }
 }
 
 // MODULE
diff --git a/assembly/src/library/path.rs b/assembly/src/library/path.rs
index 600d81d435..7bb7fb0192 100644
--- a/assembly/src/library/path.rs
+++ b/assembly/src/library/path.rs
@@ -1,7 +1,8 @@
 use super::{
-    ByteReader, ByteWriter, Deserializable, DeserializationError, PathError, Serializable, String,
-    ToString, MAX_LABEL_LEN,
+    ByteReader, ByteWriter, Deserializable, DeserializationError, PathError, Serializable,
+    MAX_LABEL_LEN,
 };
+use crate::utils::string::*;
 use core::{fmt, ops::Deref, str::from_utf8};
 
 // CONSTANTS
@@ -86,6 +87,11 @@ impl LibraryPath {
     // PUBLIC ACCESSORS
     // --------------------------------------------------------------------------------------------
 
+    /// Returns the full path of the Library
+    pub fn path(&self) -> &str {
+        &self.path
+    }
+
     /// Returns the first component of the path.
     ///
     /// The first component is the leftmost token separated by `::`.
diff --git a/assembly/src/library/tests.rs b/assembly/src/library/tests.rs
index d6bcf8ffc9..92886923c4 100644
--- a/assembly/src/library/tests.rs
+++ b/assembly/src/library/tests.rs
@@ -1,4 +1,4 @@
-use super::{LibraryNamespace, LibraryPath, MaslLibrary, Module, ModuleAst, Version};
+use super::{Library, LibraryNamespace, LibraryPath, MaslLibrary, Module, ModuleAst, Version};
 use vm_core::utils::{Deserializable, Serializable, SliceReader};
 
 #[test]
@@ -56,3 +56,37 @@ fn masl_locations_serialization() {
     bundle.clear_locations();
     assert_eq!(bundle, deserialized);
 }
+
+#[test]
+fn get_module_by_path() {
+    // declare foo module
+    let foo_source = r#"
+        export.foo
+            add
+        end
+    "#;
+    let path = LibraryPath::new("test::foo").unwrap();
+    let ast = ModuleAst::parse(foo_source).unwrap();
+    let foo = Module::new(path, ast);
+
+    let modules = [foo].to_vec();
+
+    // create the bundle with locations
+    let namespace = LibraryNamespace::new("test").unwrap();
+    let version = Version::MIN;
+    let locations = true;
+    let bundle =
+        MaslLibrary::new(namespace, version, locations, modules.clone(), Vec::new()).unwrap();
+
+    // get AST associated with "test::foo" path
+    let foo_ast = bundle.get_module_ast(&LibraryPath::new("test::foo").unwrap()).unwrap();
+    let foo_ast_str = format!("{foo_ast}");
+    let foo_expected = "export.foo.0
+    add
+end
+
+";
+    assert_eq!(foo_ast_str, foo_expected);
+
+    assert!(bundle.get_module_ast(&LibraryPath::new("test::bar").unwrap()).is_none());
+}
diff --git a/assembly/src/procedures/mod.rs b/assembly/src/procedures/mod.rs
index 5a6591da9d..4519de64cd 100644
--- a/assembly/src/procedures/mod.rs
+++ b/assembly/src/procedures/mod.rs
@@ -1,8 +1,9 @@
 use super::{
     crypto::hash::{Blake3_160, RpoDigest},
-    BTreeSet, ByteReader, ByteWriter, CodeBlock, Deserializable, DeserializationError, LabelError,
-    LibraryPath, Serializable, String, ToString, PROCEDURE_LABEL_PARSER,
+    ByteReader, ByteWriter, CodeBlock, Deserializable, DeserializationError, LabelError,
+    LibraryPath, Serializable, PROCEDURE_LABEL_PARSER,
 };
+use crate::utils::{collections::*, string::*};
 use core::{
     fmt,
     ops::{self, Deref},
diff --git a/assembly/src/tests.rs b/assembly/src/tests.rs
index 10af9bbf8a..f76fbc4e32 100644
--- a/assembly/src/tests.rs
+++ b/assembly/src/tests.rs
@@ -1,6 +1,7 @@
 use crate::{
     ast::{ModuleAst, ProgramAst},
-    Assembler, AssemblyContext, Library, LibraryNamespace, LibraryPath, Module, Version,
+    Assembler, AssemblyContext, AssemblyError, Library, LibraryNamespace, LibraryPath, MaslLibrary,
+    Module, ProcedureName, Version,
 };
 use core::slice::Iter;
 
@@ -124,7 +125,7 @@ fn simple_main_call() {
     export.account_method_1
         push.2.1 add
     end
-    
+
     export.account_method_2
         push.3.1 sub
     end
@@ -161,7 +162,7 @@ fn call_without_path() {
     export.account_method_1
         push.2.1 add
     end
-    
+
     export.account_method_2
         push.3.1 sub
     end
@@ -180,7 +181,7 @@ fn call_without_path() {
     export.account_method_1
         push.2.2 add
     end
-    
+
     export.account_method_2
         push.4.1 sub
     end
@@ -195,15 +196,15 @@ fn call_without_path() {
 
     // compile program in which functions from different modules but with equal names are called
     let source = ProgramAst::parse(
-        "begin 
+        "begin
             # call the account_method_1 from the first module (account_code1)
-            call.0x81e0b1afdbd431e4c9d4b86599b82c3852ecf507ae318b71c099cdeba0169068 
+            call.0x81e0b1afdbd431e4c9d4b86599b82c3852ecf507ae318b71c099cdeba0169068
 
             # call the account_method_2 from the first module (account_code1)
             call.0x1bc375fc794af6637af3f428286bf6ac1a24617640ed29f8bc533f48316c6d75
 
             # call the account_method_1 from the second module (account_code2)
-            call.0xcfadd74886ea075d15826a4f59fb4db3a10cde6e6e953603cba96b4dcbb94321 
+            call.0xcfadd74886ea075d15826a4f59fb4db3a10cde6e6e953603cba96b4dcbb94321
 
             # call the account_method_2 from the second module (account_code2)
             call.0x1976bf72d457bd567036d3648b7e3f3c22eca4096936931e59796ec05c0ecb10
@@ -216,6 +217,138 @@ fn call_without_path() {
         .unwrap();
 }
 
+// PROGRAM WITH PROCREF
+// ================================================================================================
+
+#[test]
+fn procref_call() {
+    // instantiate assembler
+    let assembler = Assembler::default();
+
+    // compile first module
+    let module_path1 = LibraryPath::new("module::path::one").unwrap();
+    let module_source1 = ModuleAst::parse(
+        "
+        export.aaa
+            push.7.8
+        end
+
+        export.foo
+            push.1.2
+        end",
+    )
+    .unwrap();
+
+    let _roots1 = assembler
+        .compile_module(
+            &module_source1,
+            Some(&module_path1),
+            &mut AssemblyContext::for_module(false),
+        )
+        .unwrap();
+
+    // compile second module
+    let module_path2 = LibraryPath::new("module::path::two").unwrap();
+    let module_source2 = ModuleAst::parse(
+        "
+        use.module::path::one
+        export.one::foo
+
+        export.bar
+            procref.one::aaa
+        end",
+    )
+    .unwrap();
+
+    let _roots2 = assembler
+        .compile_module(
+            &module_source2,
+            Some(&module_path2),
+            &mut AssemblyContext::for_module(false),
+        )
+        .unwrap();
+
+    // compile program with procref calls
+    let program_source = ProgramAst::parse(
+        "
+        use.module::path::two
+            
+        proc.baz.4
+            push.3.4
+        end
+        
+        begin 
+            procref.two::bar
+            procref.two::foo
+            procref.baz
+        end",
+    )
+    .unwrap();
+
+    let _compiled_program = assembler
+        .compile_in_context(
+            &program_source,
+            &mut AssemblyContext::for_program(Some(&program_source)),
+        )
+        .unwrap();
+}
+
+#[test]
+fn get_proc_name_of_unknown_module() {
+    // Module `two` is unknown. This program should return
+    // `AssemblyError::imported_proc_module_not_found` error with `bar` procedure name.
+    let module_source1 = "
+    use.module::path::two
+
+    export.foo
+        procref.two::bar
+    end";
+    let module_ast1 = ModuleAst::parse(module_source1).unwrap();
+    let module_path1 = LibraryPath::new("module::path::one").unwrap();
+    let module1 = Module::new(module_path1, module_ast1);
+
+    let masl_lib = MaslLibrary::new(
+        LibraryNamespace::new("module").unwrap(),
+        Version::default(),
+        false,
+        vec![module1],
+        vec![],
+    )
+    .unwrap();
+
+    // instantiate assembler
+    let assembler = Assembler::default().with_library(&masl_lib).unwrap();
+
+    // compile program with procref calls
+    let program_source = ProgramAst::parse(
+        "
+        use.module::path::one
+
+        begin 
+            procref.one::foo
+        end",
+    )
+    .unwrap();
+
+    let compilation_error = assembler
+        .compile_in_context(
+            &program_source,
+            &mut AssemblyContext::for_program(Some(&program_source)),
+        )
+        .err()
+        .unwrap();
+
+    let expected_error = AssemblyError::imported_proc_module_not_found(
+        &crate::ProcedureId([
+            17, 137, 148, 17, 42, 108, 60, 23, 205, 115, 62, 70, 16, 121, 221, 142, 51, 247, 250,
+            43,
+        ]),
+        ProcedureName::try_from("bar").ok(),
+    );
+
+    assert_eq!(compilation_error, expected_error);
+}
+
 // CONSTANTS
 // ================================================================================================
 
@@ -293,6 +426,24 @@ fn constant_alphanumeric_expression() {
     assert_eq!(expected, format!("{program}"));
 }
 
+#[test]
+fn constant_hexadecimal_value() {
+    let assembler = Assembler::default();
+    let source = "const.TEST_CONSTANT=0xFF \
+    begin \
+    push.TEST_CONSTANT \
+    end \
+    ";
+    let expected = "\
+    begin \
+        span \
+            push(255) \
+        end \
+    end";
+    let program = assembler.compile(source).unwrap();
+    assert_eq!(expected, format!("{program}"));
+}
+
 #[test]
 fn constant_field_division() {
     let assembler = Assembler::default();
@@ -1084,7 +1235,7 @@ fn program_with_reexported_proc_in_same_library() {
     let ast = ModuleAst::parse(MODULE_BODY).unwrap();
 
     // check docs
-    let docs_checked_eqz = ast.reexported_procs().get(0).unwrap().docs().unwrap();
+    let docs_checked_eqz = ast.reexported_procs().first().unwrap().docs().unwrap();
     assert_eq!(
         docs_checked_eqz,
         "checked_eqz checks if the value is u32 and zero and returns 1 if it is, 0 otherwise"
diff --git a/assembly/src/tokens/lines.rs b/assembly/src/tokens/lines.rs
index 8694ae911e..16d5b2083e 100644
--- a/assembly/src/tokens/lines.rs
+++ b/assembly/src/tokens/lines.rs
@@ -1,4 +1,5 @@
-use super::{SourceLocation, Token, Vec};
+use super::{SourceLocation, Token};
+use crate::utils::collections::*;
 use core::{iter, str::Lines};
 
 // LINES STREAM
@@ -197,7 +198,7 @@ impl<'a> LineInfo<'a> {
     ///
     /// ```masm
     /// #!      doc comments
-    /// #! for foo procedure          
+    /// #! for foo procedure
     /// #! with examples
     /// export.foo
     ///   add
@@ -350,7 +351,7 @@ mod tests {
             #! bar
 
             mul
-    #!                    end doc comment with trailing spaces    
+    #!                    end doc comment with trailing spaces
         #! and multiple lines
       end
 
diff --git a/assembly/src/tokens/mod.rs b/assembly/src/tokens/mod.rs
index d82499e8f0..4534cb0bbf 100644
--- a/assembly/src/tokens/mod.rs
+++ b/assembly/src/tokens/mod.rs
@@ -1,7 +1,9 @@
 use super::{
-    ast::InvocationTarget, BTreeMap, ByteReader, ByteWriter, Deserializable, DeserializationError,
-    LibraryPath, ParsingError, ProcedureName, Serializable, String, ToString, Vec,
+    ast::{parse_param_with_constant_lookup, InvocationTarget},
+    ByteReader, ByteWriter, Deserializable, DeserializationError, LibraryPath, ParsingError,
+    ProcedureName, Serializable,
 };
+use crate::utils::{collections::*, string::*};
 use core::fmt;
 
 mod lines;
@@ -238,12 +240,12 @@ impl<'a> Token<'a> {
         }
     }
 
-    pub fn parse_repeat(&self) -> Result<u32, ParsingError> {
+    pub fn parse_repeat(&self, constants: &BTreeMap<String, u64>) -> Result<u32, ParsingError> {
         assert_eq!(Self::REPEAT, self.parts[0], "not a repeat");
         match self.num_parts() {
             0 => unreachable!(),
             1 => Err(ParsingError::missing_param(self, "repeat.<num_repetitions>")),
-            2 => self.parts[1].parse::<u32>().map_err(|_| ParsingError::invalid_param(self, 1)),
+            2 => parse_param_with_constant_lookup::<u32>(self, 1, constants),
             _ => Err(ParsingError::extra_param(self)),
         }
     }
diff --git a/assembly/src/tokens/stream.rs b/assembly/src/tokens/stream.rs
index 795b21770f..0d81227807 100644
--- a/assembly/src/tokens/stream.rs
+++ b/assembly/src/tokens/stream.rs
@@ -1,6 +1,5 @@
-use super::{
-    BTreeMap, LineTokenizer, LinesStream, ParsingError, SourceLocation, String, Token, Vec,
-};
+use super::{LineTokenizer, LinesStream, ParsingError, SourceLocation, Token};
+use crate::utils::{collections::*, string::*};
 use core::fmt;
 
 // TOKEN STREAM
diff --git a/core/Cargo.toml b/core/Cargo.toml
index 1c85ae0277..9a5b47438c 100644
--- a/core/Cargo.toml
+++ b/core/Cargo.toml
@@ -1,11 +1,12 @@
 [package]
 name = "miden-core"
-version = "0.7.0"
+version = "0.8.0"
 description = "Miden VM core components"
 authors = ["miden contributors"]
 readme = "README.md"
 license = "MIT"
 repository = "https://github.com/0xPolygonMiden/miden-vm"
+documentation = "https://docs.rs/miden-core/0.8.0"
 categories = ["emulators", "no-std"]
 keywords = ["instruction-set", "miden", "program"]
 edition = "2021"
@@ -18,14 +19,12 @@ doctest = false
 [features]
 default = ["std"]
 std = ["miden-crypto/std", "math/std", "winter-utils/std"]
-sve = ["miden-crypto/sve", "std"]
 
 [dependencies]
-math = { package = "winter-math", version = "0.6", default-features = false }
-miden-crypto = { package = "miden-crypto", version = "0.7", default-features = false }
-winter-crypto = { package = "winter-crypto", version = "0.6", default-features = false }
-winter-utils = { package = "winter-utils", version = "0.6", default-features = false }
+math = { package = "winter-math", version = "0.8", default-features = false }
+miden-crypto = { version = "0.8", default-features = false }
+winter-utils = { package = "winter-utils", version = "0.8", default-features = false }
 
 [dev-dependencies]
-proptest = "1.1"
-rand_utils = { version = "0.6", package = "winter-rand-utils" }
+proptest = "1.3"
+rand_utils = { version = "0.8", package = "winter-rand-utils" }
diff --git a/core/src/errors.rs b/core/src/errors.rs
index 1a1e98ceb0..7ab73b9b13 100644
--- a/core/src/errors.rs
+++ b/core/src/errors.rs
@@ -1,3 +1,4 @@
+use crate::utils::string::*;
 use core::fmt;
 
 // INPUT ERROR
@@ -5,7 +6,7 @@ use core::fmt;
 
 #[derive(Clone, Debug)]
 pub enum InputError {
-    NotFieldElement(u64, &'static str),
+    NotFieldElement(u64, String),
     DuplicateAdviceRoot([u8; 32]),
 }
 
@@ -28,11 +29,13 @@ impl std::error::Error for InputError {}
 
 // OUTPUT ERROR
 // ================================================================================================
+
 #[derive(Clone, Debug)]
 pub enum OutputError {
     InvalidOverflowAddress(u64),
     InvalidOverflowAddressLength(usize, usize),
     InvalidStackElement(u64),
+    OutputSizeTooBig(usize),
 }
 
 impl fmt::Display for OutputError {
@@ -48,9 +51,37 @@ impl fmt::Display for OutputError {
             InvalidStackElement(element) => {
                 write!(f, "stack contains {element} that is not a valid field element")
             }
+            OutputSizeTooBig(size) => {
+                write!(f, "too many elements for output stack, {size} elements")
+            }
         }
     }
 }
 
+// KERNEL ERROR
+// ================================================================================================
+
 #[cfg(feature = "std")]
 impl std::error::Error for OutputError {}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum KernelError {
+    DuplicatedProcedures,
+    TooManyProcedures(usize, usize),
+}
+
+impl fmt::Display for KernelError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            KernelError::DuplicatedProcedures => {
+                write!(f, "Kernel can not have duplicated procedures",)
+            }
+            KernelError::TooManyProcedures(max, count) => {
+                write!(f, "Kernel can have at most {} procedures, received {}", max, count)
+            }
+        }
+    }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for KernelError {}
diff --git a/core/src/lib.rs b/core/src/lib.rs
index edf5028864..3653452e4d 100644
--- a/core/src/lib.rs
+++ b/core/src/lib.rs
@@ -11,9 +11,9 @@ pub use miden_crypto::{Word, EMPTY_WORD, ONE, WORD_SIZE, ZERO};
 pub mod crypto {
     pub mod merkle {
         pub use miden_crypto::merkle::{
-            DefaultMerkleStore, EmptySubtreeRoots, InnerNodeInfo, MerkleError, MerklePath,
-            MerkleStore, MerkleTree, Mmr, MmrPeaks, NodeIndex, PartialMerkleTree,
-            RecordingMerkleStore, SimpleSmt, StoreNode, TieredSmt,
+            DefaultMerkleStore, EmptySubtreeRoots, InnerNodeInfo, LeafIndex, MerkleError,
+            MerklePath, MerkleStore, MerkleTree, Mmr, MmrPeaks, NodeIndex, PartialMerkleTree,
+            RecordingMerkleStore, SimpleSmt, Smt, SmtProof, SmtProofError, StoreNode, SMT_DEPTH,
         };
     }
 
@@ -26,7 +26,9 @@ pub mod crypto {
     }
 
     pub mod random {
-        pub use crate::random::*;
+        pub use miden_crypto::rand::{
+            RandomCoin, RandomCoinError, RpoRandomCoin, WinterRandomCoin,
+        };
     }
 
     pub mod dsa {
@@ -51,9 +53,6 @@ pub use operations::{
 pub mod stack;
 pub use stack::{StackInputs, StackOutputs};
 
-// TODO: this should move to miden-crypto crate
-mod random;
-
 pub mod utils;
 
 // TYPE ALIASES
diff --git a/core/src/operations/decorators/advice.rs b/core/src/operations/decorators/advice.rs
index 0a8011d4e5..9f2aef6807 100644
--- a/core/src/operations/decorators/advice.rs
+++ b/core/src/operations/decorators/advice.rs
@@ -103,7 +103,7 @@ pub enum AdviceInjector {
     /// respectively (with a0 representing the 32 lest significant bits and a1 representing the
     /// 32 most significant bits). Similarly, (q0, q1) and (r0, r1) represent the quotient and
     /// the remainder respectively.
-    DivU64,
+    U64Div,
 
     /// Given an element in a quadratic extension field on the top of the stack (i.e., a0, b1),
     /// computes its multiplicative inverse and push the result onto the advice stack.
@@ -144,11 +144,17 @@ pub enum AdviceInjector {
     ///   degree coefficients are located at the top of the advice stack.
     Ext2Intt,
 
-    /// Pushes values onto the advice stack which are required for successful retrieval of a
-    /// value from a Sparse Merkle Tree data structure.
+    /// Currently unimplemented
+    SmtGet,
+
+    /// Currently unimplemented
+    SmtSet,
+
+    /// Pushes onto the advice stack the value associated with the specified key in a Sparse
+    /// Merkle Tree defined by the specified root.
     ///
-    /// The Sparse Merkle Tree is tiered, meaning it will have leaf depths in `{16, 32, 48, 64}`.
-    /// The depth flags define the tier on which the leaf is located.
+    /// If no value was previously associated with the specified key, [ZERO; 4] is pushed onto
+    /// the advice stack.
     ///
     /// Inputs:
     ///   Operand stack: [KEY, ROOT, ...]
@@ -156,67 +162,62 @@ pub enum AdviceInjector {
     ///
     /// Outputs:
     ///   Operand stack: [KEY, ROOT, ...]
-    ///   Advice stack: [f0, f1, K, V, f2]
-    ///
-    /// Where:
-    /// - f0 is a boolean flag set to `1` if the depth is `16` or `48`.
-    /// - f1 is a boolean flag set to `1` if the depth is `16` or `32`.
-    /// - K is the remaining key word; will be zeroed if the tree don't contain a mapped value
-    ///   for the key.
-    /// - V is the value word; will be zeroed if the tree don't contain a mapped value for the key.
-    /// - f2 is a boolean flag set to `1` if a remaining key is not zero.
-    SmtGet,
+    ///   Advice stack: [VALUE, ...]
+    SmtPeek,
 
-    /// Pushes values onto the advice stack which are required for successful insertion of a
-    /// key-value pair into a Sparse Merkle Tree data structure.
+    /// Pushes the number of the leading zeros of the top stack element onto the advice stack.
     ///
-    /// The Sparse Merkle Tree is tiered, meaning it will have leaf depths in `{16, 32, 48, 64}`.
+    /// Inputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [...]
+    ///
+    /// Outputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [leading_zeros, ...]
+    U32Clz,
+
+    /// Pushes the number of the trailing zeros of the top stack element onto the advice stack.
     ///
     /// Inputs:
-    ///   Operand stack: [VALUE, KEY, ROOT, ...]
+    ///   Operand stack: [n, ...]
     ///   Advice stack: [...]
     ///
     /// Outputs:
-    ///   Operand stack: [OLD_VALUE, NEW_ROOT, ...]
-    ///   Advice stack depends on the type of insert operation as follows:
-    ///   - Update of an existing leaf: [ZERO (padding), d0, d1, ONE (is_update), OLD_VALUE]
-    ///   - Simple insert at depth 16: [d0, d1, ONE (is_simple_insert), ZERO (is_update)]
-    ///   - Simple insert at depth 32 or 48: [d0, d1, ONE (is_simple_insert), ZERO (is_update), P_NODE]
-    ///   - Complex insert: [f0, f1, ZERO (is_simple_insert), ZERO (is_update), E_KEY, E_VALUE]
-    ///   - Delete against an empty subtree: [d0, d1, ZERO (is_leaf), ONE (key_not_set)]
-    ///   - Delete against another leaf: [d0, d1, ONE (is_leaf), ONE (key_not_set), KEY, VALUE]
-    ///   - Delete against own leaf: [ZERO, ZERO, ZERO, ZERO (key_not_set), NEW_ROOT, OLD_VALUE]
-    ///
-    /// Where:
-    /// - ROOT and NEW_ROOT are the roots of the TSMT before and after the insert respectively.
-    /// - VALUE is the value to be inserted.
-    /// - OLD_VALUE is the value previously associated with the specified KEY.
-    /// - d0 is a boolean flag set to `1` if the depth is `16` or `48`.
-    /// - d1 is a boolean flag set to `1` if the depth is `16` or `32`.
-    /// - P_NODE is an internal node located at the tier above the insert tier.
-    /// - f0 and f1 are boolean flags a combination of which determines the source and the target
-    ///   tiers as follows:
-    ///   - (0, 0): depth 16 -> 32
-    ///   - (0, 1): depth 16 -> 48
-    ///   - (1, 0): depth 32 -> 48
-    ///   - (1, 1): depth 16, 32, or 48 -> 64
-    /// - E_KEY and E_VALUE are the key-value pair for a leaf which is to be replaced by a subtree.
-    SmtSet,
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [trailing_zeros, ...]
+    U32Ctz,
 
-    /// Pushes onto the advice stack the value associated with the specified key in a Sparse
-    /// Merkle Tree defined by the specified root.
+    /// Pushes the number of the leading ones of the top stack element onto the advice stack.
     ///
-    /// If no value was previously associated with the specified key, [ZERO; 4] is pushed onto
-    /// the advice stack.
+    /// Inputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [...]
+    ///
+    /// Outputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [leading_ones, ...]
+    U32Clo,
+
+    /// Pushes the number of the trailing ones of the top stack element onto the advice stack.
     ///
     /// Inputs:
-    ///   Operand stack: [KEY, ROOT, ...]
+    ///   Operand stack: [n, ...]
     ///   Advice stack: [...]
     ///
     /// Outputs:
-    ///   Operand stack: [KEY, ROOT, ...]
-    ///   Advice stack: [VALUE, ...]
-    SmtPeek,
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [trailing_ones, ...]
+    U32Cto,
+
+    /// Pushes the base 2 logarithm of the top stack element, rounded down.
+    /// Inputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [...]
+    ///
+    /// Outputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [ilog2(n), ...]
+    ILog2,
 
     // ADVICE MAP INJECTORS
     // --------------------------------------------------------------------------------------------
@@ -298,12 +299,17 @@ impl fmt::Display for AdviceInjector {
                     write!(f, "map_value_to_stack.{key_offset}")
                 }
             }
-            Self::DivU64 => write!(f, "div_u64"),
+            Self::U64Div => write!(f, "div_u64"),
             Self::Ext2Inv => write!(f, "ext2_inv"),
             Self::Ext2Intt => write!(f, "ext2_intt"),
             Self::SmtGet => write!(f, "smt_get"),
             Self::SmtSet => write!(f, "smt_set"),
             Self::SmtPeek => write!(f, "smt_peek"),
+            Self::U32Clz => write!(f, "u32clz"),
+            Self::U32Ctz => write!(f, "u32ctz"),
+            Self::U32Clo => write!(f, "u32clo"),
+            Self::U32Cto => write!(f, "u32cto"),
+            Self::ILog2 => write!(f, "ilog2"),
             Self::MemToMap => write!(f, "mem_to_map"),
             Self::HdwordToMap { domain } => write!(f, "hdword_to_map.{domain}"),
             Self::HpermToMap => write!(f, "hperm_to_map"),
diff --git a/core/src/operations/decorators/assembly_op.rs b/core/src/operations/decorators/assembly_op.rs
index 1ade86fb22..b18b893787 100644
--- a/core/src/operations/decorators/assembly_op.rs
+++ b/core/src/operations/decorators/assembly_op.rs
@@ -1,4 +1,4 @@
-use crate::utils::string::String;
+use crate::utils::string::*;
 use core::fmt;
 
 // ASSEMBLY OP
diff --git a/core/src/operations/decorators/debug.rs b/core/src/operations/decorators/debug.rs
index b6bdc81334..90f276abc1 100644
--- a/core/src/operations/decorators/debug.rs
+++ b/core/src/operations/decorators/debug.rs
@@ -13,6 +13,19 @@ pub enum DebugOptions {
     StackAll,
     /// Prints out the top n items of the stack for the current context.
     StackTop(u16),
+    /// Prints out the entire contents of RAM.
+    MemAll,
+    /// Prints out the contents of memory stored in the provided interval. Interval boundaries are
+    /// both inclusive.
+    ///
+    /// First parameter specifies the interval starting address, second -- the ending address.
+    MemInterval(u32, u32),
+    /// Prints out locals stored in the provided interval of the currently executing procedure.
+    /// Interval boundaries are both inclusive.
+    ///
+    /// First parameter specifies the starting address, second -- the ending address, and the third
+    /// specifies the overall number of locals.
+    LocalInterval(u16, u16, u16),
 }
 
 impl fmt::Display for DebugOptions {
@@ -20,6 +33,11 @@ impl fmt::Display for DebugOptions {
         match self {
             Self::StackAll => write!(f, "stack"),
             Self::StackTop(n) => write!(f, "stack.{n}"),
+            Self::MemAll => write!(f, "mem"),
+            Self::MemInterval(n, m) => write!(f, "mem.{n}.{m}"),
+            Self::LocalInterval(start, end, _) => {
+                write!(f, "local.{start}.{end}")
+            }
         }
     }
 }
diff --git a/core/src/operations/decorators/mod.rs b/core/src/operations/decorators/mod.rs
index 70b63c28cb..9fb653c32e 100644
--- a/core/src/operations/decorators/mod.rs
+++ b/core/src/operations/decorators/mod.rs
@@ -1,4 +1,4 @@
-use crate::utils::collections::Vec;
+use crate::utils::collections::*;
 use core::fmt;
 
 mod advice;
@@ -30,6 +30,10 @@ pub enum Decorator {
     /// Prints out information about the state of the VM based on the specified options. This
     /// decorator is executed only in debug mode.
     Debug(DebugOptions),
+    /// Emits an event to the host.
+    Event(u32),
+    /// Emmits a trace to the host.
+    Trace(u32),
 }
 
 impl fmt::Display for Decorator {
@@ -40,6 +44,8 @@ impl fmt::Display for Decorator {
                 write!(f, "asmOp({}, {})", assembly_op.op(), assembly_op.num_cycles())
             }
             Self::Debug(options) => write!(f, "debug({options})"),
+            Self::Event(event_id) => write!(f, "event({})", event_id),
+            Self::Trace(trace_id) => write!(f, "trace({})", trace_id),
         }
     }
 }
diff --git a/core/src/operations/mod.rs b/core/src/operations/mod.rs
index fb18a59626..3dd67a184e 100644
--- a/core/src/operations/mod.rs
+++ b/core/src/operations/mod.rs
@@ -22,7 +22,7 @@ pub enum Operation {
     ///
     /// The internal value specifies an error code associated with the error in case when the
     /// execution fails.
-    Assert(Felt),
+    Assert(u32),
 
     /// Pops an element off the stack, adds the current value of the `fmp` register to it, and
     /// pushes the result back onto the stack.
@@ -422,6 +422,17 @@ pub enum Operation {
 
     /// TODO: add docs
     FriE2F4,
+
+    /// Performs a single step of a random linear combination defining the DEEP composition
+    /// polynomial i.e., the input to the FRI protocol. More precisely, the sum in question is:
+    /// \sum_{i=0}^k{\alpha_i \cdot \left(\frac{T_i(x) - T_i(z)}{x - z} +
+    ///            \frac{T_i(x) - T_i(g \cdot z)}{x - g \cdot z} \right)}
+    ///
+    /// and the following instruction computes the numerators $\alpha_i \cdot (T_i(x) - T_i(z))$
+    /// and $\alpha_i \cdot (T_i(x) - T_i(g \cdot z))$ and stores the values in two accumulators
+    /// $r$ and $p$, respectively. This instruction is specialized to main trace columns i.e.
+    /// the values $T_i(x)$ are base field elements.
+    RCombBase,
 }
 
 impl Operation {
@@ -528,7 +539,7 @@ impl Operation {
             Self::Span          => 0b0101_0110,
             Self::Join          => 0b0101_0111,
             Self::Dyn           => 0b0101_1000,
-            // <empty>          => 0b0101_1001,
+            Self::RCombBase     => 0b0101_1001,
             // <empty>          => 0b0101_1010,
             // <empty>          => 0b0101_1011,
             // <empty>          => 0b0101_1100,
@@ -696,6 +707,7 @@ impl fmt::Display for Operation {
             Self::MpVerify => write!(f, "mpverify"),
             Self::MrUpdate => write!(f, "mrupdate"),
             Self::FriE2F4 => write!(f, "frie2f4"),
+            Self::RCombBase => write!(f, "rcomb1"),
         }
     }
 }
diff --git a/core/src/program/blocks/call_block.rs b/core/src/program/blocks/call_block.rs
index b6e84c36f9..ecd9262e6c 100644
--- a/core/src/program/blocks/call_block.rs
+++ b/core/src/program/blocks/call_block.rs
@@ -14,7 +14,7 @@ use crate::utils::to_hex;
 /// > hash(fn_hash || padding, domain=SYSCALL_DOMAIN)  # when a syscall is used
 ///
 /// Where `fn_hash` is 4 field elements (256 bits), and `padding` is 4 ZERO elements (256 bits).
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub struct Call {
     hash: Digest,
     fn_hash: Digest,
diff --git a/core/src/program/blocks/dyn_block.rs b/core/src/program/blocks/dyn_block.rs
index 9f93b4069b..a257d93272 100644
--- a/core/src/program/blocks/dyn_block.rs
+++ b/core/src/program/blocks/dyn_block.rs
@@ -24,7 +24,7 @@ const DYN_CONSTANT: Digest = Digest::new([
 /// affect the representation of the Dyn block. Therefore all Dyn blocks are represented by the same
 /// constant (rather than by unique hashes), which is computed as an RPO hash of two empty words
 /// ([ZERO, ZERO, ZERO, ZERO]) with a domain value of `DYN_DOMAIN`.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub struct Dyn {}
 
 impl Dyn {
diff --git a/core/src/program/blocks/join_block.rs b/core/src/program/blocks/join_block.rs
index d5ae4ad88b..711b10d481 100644
--- a/core/src/program/blocks/join_block.rs
+++ b/core/src/program/blocks/join_block.rs
@@ -1,4 +1,5 @@
-use super::{fmt, hasher, Box, CodeBlock, Digest, Felt, Operation};
+use super::{fmt, hasher, CodeBlock, Digest, Felt, Operation};
+use crate::utils::boxed::*;
 
 // JOIN BLOCKS
 // ================================================================================================
@@ -11,7 +12,7 @@ use super::{fmt, hasher, Box, CodeBlock, Digest, Felt, Operation};
 /// > hash(left_block_hash || right_block_hash, domain=JOIN_DOMAIN)
 ///
 /// Where `left_block_hash` and `right_block_hash` are 4 field elements (256 bits) each.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub struct Join {
     body: Box<[CodeBlock; 2]>,
     hash: Digest,
diff --git a/core/src/program/blocks/loop_block.rs b/core/src/program/blocks/loop_block.rs
index 74e1cbb032..eb68df6324 100644
--- a/core/src/program/blocks/loop_block.rs
+++ b/core/src/program/blocks/loop_block.rs
@@ -1,4 +1,5 @@
-use super::{fmt, hasher, Box, CodeBlock, Digest, Felt, Operation};
+use super::{fmt, hasher, CodeBlock, Digest, Felt, Operation};
+use crate::utils::boxed::*;
 
 // LOOP BLOCK
 // ================================================================================================
@@ -12,7 +13,7 @@ use super::{fmt, hasher, Box, CodeBlock, Digest, Felt, Operation};
 /// > hash(body_hash || padding, domain=LOOP_DOMAIN)
 ///
 /// Where `body_hash` is 4 field elements (256 bits), and `padding` is 4 ZERO elements (256 bits).
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub struct Loop {
     body: Box<CodeBlock>,
     hash: Digest,
diff --git a/core/src/program/blocks/mod.rs b/core/src/program/blocks/mod.rs
index cce75235c0..1fc7328600 100644
--- a/core/src/program/blocks/mod.rs
+++ b/core/src/program/blocks/mod.rs
@@ -1,4 +1,5 @@
-use super::{hasher, Box, Digest, Felt, Operation, Vec};
+use super::{hasher, Digest, Felt, Operation};
+use crate::utils::collections::*;
 use crate::DecoratorList;
 use core::fmt;
 
@@ -24,7 +25,7 @@ pub use split_block::Split;
 // PROGRAM BLOCK
 // ================================================================================================
 /// TODO: add comments
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub enum CodeBlock {
     Span(Span),
     Join(Join),
diff --git a/core/src/program/blocks/proxy_block.rs b/core/src/program/blocks/proxy_block.rs
index 6d9e04bfa1..ca37816429 100644
--- a/core/src/program/blocks/proxy_block.rs
+++ b/core/src/program/blocks/proxy_block.rs
@@ -8,7 +8,7 @@ use super::{fmt, Digest};
 /// of the program secret. Fails if executed.
 ///
 /// Hash of a proxy block is not computed but is rather defined at instantiation time.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub struct Proxy {
     hash: Digest,
 }
diff --git a/core/src/program/blocks/span_block.rs b/core/src/program/blocks/span_block.rs
index e9aadf610b..8ba4a8c74b 100644
--- a/core/src/program/blocks/span_block.rs
+++ b/core/src/program/blocks/span_block.rs
@@ -1,5 +1,5 @@
-use super::{fmt, hasher, Digest, Felt, Operation, Vec};
-use crate::{DecoratorIterator, DecoratorList, ZERO};
+use super::{fmt, hasher, Digest, Felt, Operation};
+use crate::{utils::collections::*, DecoratorIterator, DecoratorList, ZERO};
 use winter_utils::flatten_slice_elements;
 
 // CONSTANTS
@@ -45,7 +45,7 @@ const MAX_OPS_PER_BATCH: usize = GROUP_SIZE * BATCH_SIZE;
 ///
 /// Where `batches` is the concatenation of each `batch` in the span, and each batch is 8 field
 /// elements (512 bits).
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub struct Span {
     op_batches: Vec<OpBatch>,
     hash: Digest,
@@ -170,7 +170,7 @@ impl fmt::Display for Span {
 ///
 /// An operation batch consists of up to 8 operation groups, with each group containing up to 9
 /// operations or a single immediate value.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub struct OpBatch {
     ops: Vec<Operation>,
     groups: [Felt; BATCH_SIZE],
diff --git a/core/src/program/blocks/split_block.rs b/core/src/program/blocks/split_block.rs
index 873f0d7917..91c4e36f54 100644
--- a/core/src/program/blocks/split_block.rs
+++ b/core/src/program/blocks/split_block.rs
@@ -1,4 +1,5 @@
-use super::{fmt, hasher, Box, CodeBlock, Digest, Felt, Operation};
+use super::{fmt, hasher, CodeBlock, Digest, Felt, Operation};
+use crate::utils::boxed::*;
 
 // SPLIT BLOCK
 // ================================================================================================
@@ -12,7 +13,7 @@ use super::{fmt, hasher, Box, CodeBlock, Digest, Felt, Operation};
 /// > hash(true_branch_hash || false_branch_hash, domain=SPLIT_DOMAIN)
 ///
 /// Where `true_branch_hash` and `false_branch_hash` are 4 field elements (256 bits) each.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub struct Split {
     branches: Box<[CodeBlock; 2]>,
     hash: Digest,
diff --git a/core/src/program/info.rs b/core/src/program/info.rs
index 5899ba17fa..b51f6405de 100644
--- a/core/src/program/info.rs
+++ b/core/src/program/info.rs
@@ -1,8 +1,9 @@
 use super::{
     super::{ToElements, WORD_SIZE},
     ByteReader, ByteWriter, Deserializable, DeserializationError, Digest, Felt, Kernel, Program,
-    Serializable, Vec,
+    Serializable,
 };
+use crate::utils::collections::*;
 
 // PROGRAM INFO
 // ================================================================================================
@@ -70,7 +71,7 @@ impl From<Program> for ProgramInfo {
 impl Serializable for ProgramInfo {
     fn write_into<W: ByteWriter>(&self, target: &mut W) {
         self.program_hash.write_into(target);
-        <Kernel as Serializable>::write_into(&self.kernel, target);
+        self.kernel.write_into(target);
     }
 }
 
diff --git a/core/src/program/mod.rs b/core/src/program/mod.rs
index 972d85db99..5e5f2ef88d 100644
--- a/core/src/program/mod.rs
+++ b/core/src/program/mod.rs
@@ -1,13 +1,11 @@
 use super::{
     chiplets::hasher::{self, Digest},
-    utils::{
-        collections::{BTreeMap, Vec},
-        Box,
-    },
-    Felt, Operation,
+    errors, Felt, Operation,
+};
+use crate::utils::{
+    collections::*, ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable,
 };
 use core::fmt;
-use winter_utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable};
 
 pub mod blocks;
 use blocks::CodeBlock;
@@ -126,15 +124,25 @@ impl CodeBlockTable {
 #[derive(Debug, Clone, Default, PartialEq, Eq)]
 pub struct Kernel(Vec<Digest>);
 
+pub const MAX_KERNEL_PROCEDURES: usize = u8::MAX as usize;
+
 impl Kernel {
     /// Returns a new [Kernel] instantiated with the specified procedure hashes.
-    pub fn new(proc_hashes: &[Digest]) -> Self {
-        // make sure procedure roots are ordered consistently
-        let mut hash_map: BTreeMap<[u8; 32], Digest> = BTreeMap::new();
-        proc_hashes.iter().cloned().for_each(|r| {
-            hash_map.insert(r.into(), r);
-        });
-        Self(hash_map.values().copied().collect())
+    pub fn new(proc_hashes: &[Digest]) -> Result<Self, errors::KernelError> {
+        if proc_hashes.len() > MAX_KERNEL_PROCEDURES {
+            Err(errors::KernelError::TooManyProcedures(MAX_KERNEL_PROCEDURES, proc_hashes.len()))
+        } else {
+            let mut hashes = proc_hashes.to_vec();
+            hashes.sort_by_key(|v| v.as_bytes()); // ensure consistent order
+
+            let duplicated = hashes.windows(2).any(|data| data[0] == data[1]);
+
+            if duplicated {
+                Err(errors::KernelError::DuplicatedProcedures)
+            } else {
+                Ok(Self(hashes))
+            }
+        }
     }
 
     /// Returns true if this kernel does not contain any procedures.
@@ -158,20 +166,16 @@ impl Kernel {
 // this is required by AIR as public inputs will be serialized with the proof
 impl Serializable for Kernel {
     fn write_into<W: ByteWriter>(&self, target: &mut W) {
-        // TODO the serialization of MAST will not support values greater than `u16::MAX`, so we
-        // reflect the same restriction here. however, this should be tweaked in the future. This
-        // value will likely be capped to `u8::MAX`.
-
-        debug_assert!(self.0.len() <= u16::MAX as usize);
+        debug_assert!(self.0.len() <= MAX_KERNEL_PROCEDURES);
         target.write_u16(self.0.len() as u16);
-        Digest::write_batch_into(&self.0, target)
+        target.write_many(&self.0)
     }
 }
 
 impl Deserializable for Kernel {
     fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
-        let len = source.read_u16()?;
-        let kernel = (0..len).map(|_| source.read::<Digest>()).collect::<Result<_, _>>()?;
+        let len = source.read_u16()?.into();
+        let kernel = source.read_many::<Digest>(len)?;
         Ok(Self(kernel))
     }
 }
diff --git a/core/src/program/tests.rs b/core/src/program/tests.rs
index 016e2e98a6..ab11bac3e4 100644
--- a/core/src/program/tests.rs
+++ b/core/src/program/tests.rs
@@ -23,7 +23,7 @@ proptest! {
                 Some(digest_from_seed(*seed))
             })
             .collect();
-        let kernel = Kernel::new(&kernel);
+        let kernel = Kernel::new(&kernel).unwrap();
         let program_info = ProgramInfo::new(program_hash, kernel);
         let bytes = program_info.to_bytes();
         let deser = ProgramInfo::read_from_bytes(&bytes).unwrap();
diff --git a/core/src/random.rs b/core/src/random.rs
deleted file mode 100644
index 1484dd4dcf..0000000000
--- a/core/src/random.rs
+++ /dev/null
@@ -1,161 +0,0 @@
-use super::{crypto::hash::Rpo256, utils::collections::Vec, Felt, FieldElement};
-use math::StarkField;
-use miden_crypto::{hash::rpo::RpoDigest, Word, ZERO};
-
-// RE-EXPORTS
-// ================================================================================================
-
-pub use winter_crypto::{DefaultRandomCoin as WinterRandomCoin, RandomCoin, RandomCoinError};
-
-// CONSTANTS
-// ================================================================================================
-
-const STATE_WIDTH: usize = Rpo256::STATE_WIDTH;
-const RATE_START: usize = Rpo256::RATE_RANGE.start;
-const RATE_END: usize = Rpo256::RATE_RANGE.end;
-const HALF_RATE_WIDTH: usize = (Rpo256::RATE_RANGE.end - Rpo256::RATE_RANGE.start) / 2;
-
-// RPO RANDOM COIN
-// ================================================================================================
-/// A simplified version of the `SPONGE_PRG` reseedable pseudo-random number generator algorithm
-/// described in https://eprint.iacr.org/2011/499.pdf. The simplification is related to
-/// to the following facts:
-/// 1. A call to the reseed method implies one and only one call to the permutation function.
-///  This is possible because in our case we never reseed with more than 4 field elements.
-/// 2. As a result of the previous point, we dont make use of an input buffer to accumulate seed
-///  material.
-/// It is important to note that the current implementation of `RPORandomCoin` assumes that
-/// `draw_integers()` is called immediately after `reseed_with_int()`.
-pub struct RpoRandomCoin {
-    state: [Felt; STATE_WIDTH],
-    current: usize,
-}
-
-impl RpoRandomCoin {
-    fn draw_basefield(&mut self) -> Felt {
-        if self.current == RATE_END {
-            Rpo256::apply_permutation(&mut self.state);
-            self.current = RATE_START;
-        }
-
-        self.current += 1;
-        self.state[self.current - 1]
-    }
-}
-
-impl RandomCoin for RpoRandomCoin {
-    type BaseField = Felt;
-    type Hasher = Rpo256;
-
-    fn new(seed: &[Self::BaseField]) -> Self {
-        let mut state = [ZERO; STATE_WIDTH];
-
-        let digest = Rpo256::hash_elements(seed);
-        let digest_elem = digest.as_elements();
-
-        for i in 0..HALF_RATE_WIDTH {
-            state[RATE_START + i] += digest_elem[i];
-        }
-
-        // Absorb
-        Rpo256::apply_permutation(&mut state);
-
-        RpoRandomCoin {
-            state,
-            current: RATE_START,
-        }
-    }
-
-    fn reseed(&mut self, data: RpoDigest) {
-        // Reset buffer
-        self.current = RATE_START;
-
-        // Add the new seed material to the first half of the rate portion of the RPO state
-        let data: Word = data.into();
-
-        self.state[RATE_START] += data[0];
-        self.state[RATE_START + 1] += data[1];
-        self.state[RATE_START + 2] += data[2];
-        self.state[RATE_START + 3] += data[3];
-
-        // Absorb
-        Rpo256::apply_permutation(&mut self.state);
-    }
-
-    fn reseed_with_int(&mut self, value: u64) {
-        // Reset buffer
-        self.current = RATE_START;
-
-        let value = Felt::new(value);
-        self.state[RATE_START] += value;
-        Rpo256::apply_permutation(&mut self.state);
-    }
-
-    fn leading_zeros(&self) -> u32 {
-        let first_rate_element = self.state[RATE_START].as_int();
-        first_rate_element.trailing_zeros()
-    }
-
-    fn check_leading_zeros(&self, value: u64) -> u32 {
-        let value = Felt::new(value);
-        let mut state_tmp = self.state;
-
-        state_tmp[RATE_START] += value;
-
-        Rpo256::apply_permutation(&mut state_tmp);
-
-        let first_rate_element = state_tmp[RATE_START].as_int();
-        first_rate_element.trailing_zeros()
-    }
-
-    fn draw<E: FieldElement<BaseField = Felt>>(&mut self) -> Result<E, RandomCoinError> {
-        let ext_degree = E::EXTENSION_DEGREE;
-        let mut result = vec![ZERO; ext_degree];
-        for r in result.iter_mut().take(ext_degree) {
-            *r = self.draw_basefield();
-        }
-
-        let result = E::slice_from_base_elements(&result);
-        Ok(result[0])
-    }
-
-    fn draw_integers(
-        &mut self,
-        num_values: usize,
-        domain_size: usize,
-    ) -> Result<Vec<usize>, RandomCoinError> {
-        assert!(domain_size.is_power_of_two(), "domain size must be a power of two");
-        assert!(num_values < domain_size, "number of values must be smaller than domain size");
-
-        // Since the first element of the rate portion is used for proof-of-work and thus is not
-        // random, we need to make sure that it is not used for generating a random index.
-        self.current += 1;
-
-        // determine how many bits are needed to represent valid values in the domain
-        let v_mask = (domain_size - 1) as u64;
-
-        // draw values from PRNG until we get as many unique values as specified by num_queries
-        let mut values = Vec::new();
-        for _ in 0..1000 {
-            // get the next pseudo-random field element
-            let value = self.draw_basefield().as_int();
-
-            // use the mask to get a value within the range
-            let value = (value & v_mask) as usize;
-
-            if values.contains(&value) {
-                continue;
-            }
-            values.push(value);
-            if values.len() == num_values {
-                break;
-            }
-        }
-
-        if values.len() < num_values {
-            return Err(RandomCoinError::FailedToDrawIntegers(num_values, values.len(), 1000));
-        }
-
-        Ok(values)
-    }
-}
diff --git a/core/src/stack/inputs.rs b/core/src/stack/inputs.rs
index 41245bcd95..fdfc0ce2ef 100644
--- a/core/src/stack/inputs.rs
+++ b/core/src/stack/inputs.rs
@@ -1,4 +1,6 @@
-use super::{vec, ByteWriter, Felt, InputError, Serializable, ToElements, Vec};
+use crate::utils::{collections::*, ByteReader, Deserializable, DeserializationError};
+
+use super::{ByteWriter, Felt, InputError, Serializable, ToElements};
 use core::slice;
 
 // STACK INPUTS
@@ -29,14 +31,12 @@ impl StackInputs {
     where
         I: IntoIterator<Item = u64>,
     {
-        iter.into_iter()
-            .map(|v| {
-                Felt::try_from(v).map_err(|_| {
-                    InputError::NotFieldElement(v, "the provided value isn't a valid field element")
-                })
-            })
-            .collect::<Result<Vec<_>, _>>()
-            .map(Self::new)
+        let values = iter
+            .into_iter()
+            .map(|v| Felt::try_from(v).map_err(|e| InputError::NotFieldElement(v, e)))
+            .collect::<Result<Vec<_>, _>>()?;
+
+        Ok(Self::new(values))
     }
 
     // PUBLIC ACCESSORS
@@ -66,6 +66,15 @@ impl IntoIterator for StackInputs {
     }
 }
 
+impl ToElements<Felt> for StackInputs {
+    fn to_elements(&self) -> Vec<Felt> {
+        self.values.to_vec()
+    }
+}
+
+// SERIALIZATION
+// ================================================================================================
+
 impl Serializable for StackInputs {
     fn write_into<W: ByteWriter>(&self, target: &mut W) {
         // TODO the length of the stack, by design, will not be greater than `u32::MAX`. however,
@@ -74,12 +83,15 @@ impl Serializable for StackInputs {
 
         debug_assert!(self.values.len() <= u32::MAX as usize);
         target.write_u32(self.values.len() as u32);
-        self.values.iter().copied().for_each(|v| target.write(v));
+        target.write_many(&self.values);
     }
 }
 
-impl ToElements<Felt> for StackInputs {
-    fn to_elements(&self) -> Vec<Felt> {
-        self.values.to_vec()
+impl Deserializable for StackInputs {
+    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
+        let count = source.read_u32()?;
+
+        let values = source.read_many::<Felt>(count as usize)?;
+        Ok(StackInputs { values })
     }
 }
diff --git a/core/src/stack/mod.rs b/core/src/stack/mod.rs
index 56d3147005..7d73207d9d 100644
--- a/core/src/stack/mod.rs
+++ b/core/src/stack/mod.rs
@@ -2,10 +2,7 @@ use super::{
     errors::{InputError, OutputError},
     Felt, StackTopState, StarkField, ToElements,
 };
-use winter_utils::{
-    collections::{vec, Vec},
-    ByteWriter, Serializable,
-};
+use crate::utils::{ByteWriter, Serializable};
 
 mod inputs;
 pub use inputs::StackInputs;
diff --git a/core/src/stack/outputs.rs b/core/src/stack/outputs.rs
index a1f7baa028..a69a1f328e 100644
--- a/core/src/stack/outputs.rs
+++ b/core/src/stack/outputs.rs
@@ -1,5 +1,8 @@
+use crate::utils::{collections::*, range, ByteReader, Deserializable, DeserializationError};
+use miden_crypto::Word;
+
 use super::{
-    ByteWriter, Felt, OutputError, Serializable, StackTopState, StarkField, ToElements, Vec,
+    ByteWriter, Felt, OutputError, Serializable, StackTopState, StarkField, ToElements,
     STACK_TOP_SIZE,
 };
 
@@ -28,6 +31,8 @@ pub struct StackOutputs {
     overflow_addrs: Vec<u64>,
 }
 
+pub const MAX_STACK_OUTPUTS_SIZE: usize = u16::MAX as usize;
+
 impl StackOutputs {
     // CONSTRUCTOR
     // --------------------------------------------------------------------------------------------
@@ -40,6 +45,10 @@ impl StackOutputs {
     /// - If the number of stack elements is greater than `STACK_TOP_SIZE` (16) and `overflow_addrs`
     ///   does not contain exactly `stack.len() + 1 - STACK_TOP_SIZE` elements.
     pub fn new(mut stack: Vec<u64>, overflow_addrs: Vec<u64>) -> Result<Self, OutputError> {
+        if stack.len() > MAX_STACK_OUTPUTS_SIZE {
+            return Err(OutputError::OutputSizeTooBig(stack.len()));
+        }
+
         // Validate stack elements
         if let Some(element) = find_invalid_elements(&stack) {
             return Err(OutputError::InvalidStackElement(element));
@@ -84,6 +93,32 @@ impl StackOutputs {
     // PUBLIC ACCESSORS
     // --------------------------------------------------------------------------------------------
 
+    /// Returns the element located at the specified position on the stack or `None` if out of
+    /// bounds.
+    pub fn get_stack_item(&self, idx: usize) -> Option<Felt> {
+        self.stack.get(idx).map(|&felt| {
+            felt.try_into().expect("value is greater than or equal to the field modulus")
+        })
+    }
+
+    /// Returns the word located starting at the specified Felt position on the stack or `None` if
+    /// out of bounds. For example, passing in `0` returns the word at the top of the stack, and
+    /// passing in `4` returns the word starting at element index `4`.
+    pub fn get_stack_word(&self, idx: usize) -> Option<Word> {
+        let word_elements: Word = {
+            let word_elements: Vec<Felt> = range(idx, 4)
+                .map(|idx| self.get_stack_item(idx))
+                // Elements need to be reversed, since a word `[a, b, c, d]` will be stored on the
+                // stack as `[d, c, b, a]`
+                .rev()
+                .collect::<Option<_>>()?;
+
+            word_elements.try_into().expect("a Word contains 4 elements")
+        };
+
+        Some(word_elements)
+    }
+
     /// Returns the stack outputs, which is state of the stack at the end of execution converted to
     /// integers.
     pub fn stack(&self) -> &[u64] {
@@ -164,24 +199,6 @@ fn find_invalid_elements(outputs: &[u64]) -> Option<u64> {
     None
 }
 
-impl Serializable for StackOutputs {
-    fn write_into<W: ByteWriter>(&self, target: &mut W) {
-        // TODO the length of the stack, by design, will not be greater than `u32::MAX`. however,
-        // we must define a common serialization format as we might diverge from the implementation
-        // here and the one provided by default from winterfell.
-
-        // stack
-        debug_assert!(self.stack.len() <= u32::MAX as usize);
-        target.write_u32(self.stack.len() as u32);
-        self.stack.iter().copied().for_each(|v| target.write_u64(v));
-
-        // overflow addrs
-        debug_assert!(self.overflow_addrs.len() <= u32::MAX as usize);
-        target.write_u32(self.overflow_addrs.len() as u32);
-        self.overflow_addrs.iter().copied().for_each(|v| target.write_u64(v));
-    }
-}
-
 impl ToElements<Felt> for StackOutputs {
     fn to_elements(&self) -> Vec<Felt> {
         // infallible conversion from u64 to Felt is OK here because we check validity of u64
@@ -195,3 +212,33 @@ impl ToElements<Felt> for StackOutputs {
             .collect()
     }
 }
+
+// SERIALIZATION
+// ================================================================================================
+
+impl Serializable for StackOutputs {
+    fn write_into<W: ByteWriter>(&self, target: &mut W) {
+        debug_assert!(self.stack.len() <= u32::MAX as usize);
+        target.write_u32(self.stack.len() as u32);
+        target.write_many(&self.stack);
+
+        debug_assert!(self.overflow_addrs.len() <= u32::MAX as usize);
+        target.write_u32(self.overflow_addrs.len() as u32);
+        target.write_many(&self.overflow_addrs);
+    }
+}
+
+impl Deserializable for StackOutputs {
+    fn read_from<R: ByteReader>(source: &mut R) -> Result<Self, DeserializationError> {
+        let count = source.read_u32()?.try_into().expect("u32 must fit in a usize");
+        let stack = source.read_many::<u64>(count)?;
+
+        let count = source.read_u32()?.try_into().expect("u32 must fit in a usize");
+        let overflow_addrs = source.read_many::<u64>(count)?;
+
+        Ok(Self {
+            stack,
+            overflow_addrs,
+        })
+    }
+}
diff --git a/core/src/utils/mod.rs b/core/src/utils/mod.rs
index 54b45be4d9..c90ceb8cd1 100644
--- a/core/src/utils/mod.rs
+++ b/core/src/utils/mod.rs
@@ -1,20 +1,19 @@
-use super::{Felt, StarkField};
-use core::fmt::{self, Write};
+use crate::Felt;
 use core::{
-    fmt::Debug,
+    fmt::{self, Debug, Write},
     ops::{Bound, Range},
 };
-use winter_utils::{collections::Vec, string::String};
+use {collections::*, string::*};
 
 // RE-EXPORTS
 // ================================================================================================
 
-pub use winter_utils::{
-    group_slice_elements, group_vector_elements, string, uninit_vector, Box, ByteReader,
-    ByteWriter, Deserializable, DeserializationError, Serializable, SliceReader,
-};
+pub use winter_utils::{group_slice_elements, group_vector_elements};
 
-pub use miden_crypto::utils::collections;
+pub use miden_crypto::utils::{
+    boxed, collections, string, uninit_vector, vec, Box, ByteReader, ByteWriter, Deserializable,
+    DeserializationError, Serializable, SliceReader,
+};
 
 pub mod math {
     pub use math::{batch_inversion, log2};
diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md
index b344da23ee..280e2d013b 100644
--- a/docs/src/SUMMARY.md
+++ b/docs/src/SUMMARY.md
@@ -17,6 +17,7 @@
     - [Stack manipulation](./user_docs/assembly/stack_manipulation.md)
     - [Input / Output Operations](./user_docs/assembly/io_operations.md)
     - [Cryptographic Operations](./user_docs/assembly/cryptographic_operations.md)
+    - [Events](./user_docs/assembly/events.md)
     - [Debugging](./user_docs/assembly/debugging.md)
   - [Miden Standard Library](./user_docs/stdlib/main.md)
     - [std::collections](./user_docs/stdlib/collections.md)
diff --git a/docs/src/assets/design/stack/crypto_ops/RCOMBBASE.png b/docs/src/assets/design/stack/crypto_ops/RCOMBBASE.png
new file mode 100644
index 0000000000..3441a3cac0
Binary files /dev/null and b/docs/src/assets/design/stack/crypto_ops/RCOMBBASE.png differ
diff --git a/docs/src/design/chiplets/bitwise.md b/docs/src/design/chiplets/bitwise.md
index 65373a08c8..969f91c577 100644
--- a/docs/src/design/chiplets/bitwise.md
+++ b/docs/src/design/chiplets/bitwise.md
@@ -22,7 +22,7 @@ To perform this operation we will use a table with 12 columns, and computing a s
 
 In the above, the columns have the following meanings:
 
-- Periodic columns $k_0$ and $k_1$. These columns contain values needed to switch various constraint on or off. $k_0$ contains a repeating sequence of a single one, followed by seven zeros. $k_1$ contains a repeating sequence of seven ones, followed by a single zero.
+- Periodic columns $k_0$ and $k_1$. These columns contain values needed to switch various constraints on or off. $k_0$ contains a single one, followed by a repeating sequence of seven zeros. $k_1$ contains a repeating sequence of seven ones, followed by a single zero.
 - Input columns $a$ and $b$. On the first row of each 8-row cycle, the prover will set values in these columns to the upper 4 bits of the values to which a bitwise operation is to be applied. For all subsequent rows, we will append the next-most-significant 4-bit limb to each value. Thus, by the final row columns $a$ and $b$ will contain the full input values for the bitwise operation.
 - Columns $a_0$, $a_1$, $a_2$, $a_3$, $b_0$, $b_1$, $b_2$, $b_3$ will contain lower 4 bits of their corresponding values.
 - Output column $z_p$. This column represents the value of column $z$ for the prior row. For the first row, it is set to $0$.
@@ -32,7 +32,7 @@ In the above, the columns have the following meanings:
 
 Let's illustrate the above table on a concrete example. For simplicity, we'll use 16-bit values, and thus, we'll only need 4 rows to complete the operation (rather than 8 for 32-bit values). Let's say $a = 41851$ (`b1010_0011_0111_1011`) and $b = 40426$ (`b1001_1101_1110_1010`), then $and(a, b) = 33130$ (`b1000_0001_0110_1010`). The table for this computation looks like so:
 
-|   a   |   b   | x0  | x1  | x2  | x3  | y0  | y1  | y2  | y3  |   zp   |   z   |
+|   a   |   b   | a0  | a1  | a2  | a3  | b0  | b1  | b2  | b3  |   zp   |   z   |
 | :---: | :---: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :----: | :---: |
 |  10   |   9   |  0  |  1  |  0  |  1  |  1  |  0  |  0  |  1  |   0    |   8   |
 |  163  |  157  |  1  |  1  |  0  |  0  |  1  |  0  |  1  |  1  |   8    |  129  |
diff --git a/docs/src/design/chiplets/hasher.md b/docs/src/design/chiplets/hasher.md
index 8176410ebd..7e1f75cbc0 100644
--- a/docs/src/design/chiplets/hasher.md
+++ b/docs/src/design/chiplets/hasher.md
@@ -344,7 +344,7 @@ $$
 
 In the above:
 
-- $m$ is a _transition label_, composed of the [operation label](main.md#operation-labels) and the periodic columns that uniquely identify each transition function. The values in the $k_0$ and $k_2$ periodic columns are included to identify the row in the hash cycle where the operation occurs. They serve to differentiate between operations that share selectors but occur at different rows in the cycle, such as `BP`, which uses $op_{linhash}$ at the first row in the cycle to initiatiate a linear hash, and `ABP`, which uses $op_{linhash}$ at the last row in the cycle to absorb new elements.
+- $m$ is a _transition label_, composed of the [operation label](main.md#operation-labels) and the periodic columns that uniquely identify each transition function. The values in the $k_0$ and $k_2$ periodic columns are included to identify the row in the hash cycle where the operation occurs. They serve to differentiate between operations that share selectors but occur at different rows in the cycle, such as `BP`, which uses $op_{linhash}$ at the first row in the cycle to initiate a linear hash, and `ABP`, which uses $op_{linhash}$ at the last row in the cycle to absorb new elements.
 - $v_h$ is a _common header_ which is a combination of the transition label, a unique row address, and the node index. For the unique row address, the `clk` column from the system component is used, but we add $1$, because the system's `clk` column starts at $0$.
 - $v_a$, $v_b$, $v_c$ are the first, second, and third words (4 elements) of the hasher state.
 - $v_d$ is the third word of the hasher state but computed using the same $\alpha$ values as used for the second word. This is needed for computing the value of $v_{leaf}$ below to ensure that the same $\alpha$ values are used for the leaf node regardless of which part of the state the node comes from.
diff --git a/docs/src/design/chiplets/kernel_rom.md b/docs/src/design/chiplets/kernel_rom.md
index f68c20e818..7c101ddbb9 100644
--- a/docs/src/design/chiplets/kernel_rom.md
+++ b/docs/src/design/chiplets/kernel_rom.md
@@ -88,4 +88,4 @@ $$
 
 Thus, when $\Delta addr = 0$, the above reduces to $vt'_{chip}=vt_{chip}$, but when $\Delta addr = 1$, the above becomes $vt'_{chip} = vt_{chip} \cdot v$.
 
-We also need to impose boundary constraints to make sure that running product column implementing the kernel procedure table is equal to $1$ when the kernel procedure table begins and to the product of all unique kernel functions when it ends. The last boundary constraint means that the verifier only needs to know which kernel was used, but doesn't need to know which functions were invoked within the kernel. These two constraints are described as part of the [chiplets virtual table constraints](../chiplets/main.md#chiplets-virtual-table-constraints).
\ No newline at end of file
+We also need to impose boundary constraints to make sure that running product column implementing the kernel procedure table is equal to $1$ when the kernel procedure table begins and to the product of all unique kernel functions when it ends. The last boundary constraint means that the verifier only needs to know which kernel was used, but doesn't need to know which functions were invoked within the kernel. These two constraints are described as part of the [chiplets virtual table constraints](../chiplets/main.md#chiplets-virtual-table-constraints).
diff --git a/docs/src/design/chiplets/main.md b/docs/src/design/chiplets/main.md
index 2695b01b0e..a974863d85 100644
--- a/docs/src/design/chiplets/main.md
+++ b/docs/src/design/chiplets/main.md
@@ -199,4 +199,4 @@ For the kernel procedure table to be properly constrained, the value must be $1$
 
 > $$
 s_0 \cdot s_1 \cdot (s'_2 - s_2) \cdot (1 - vt'_{chip}) = 0 \text{ | degree} = 4
-$$
\ No newline at end of file
+$$
diff --git a/docs/src/design/decoder/main.md b/docs/src/design/decoder/main.md
index c99d7a010e..af2020fe8d 100644
--- a/docs/src/design/decoder/main.md
+++ b/docs/src/design/decoder/main.md
@@ -501,9 +501,9 @@ We also need to make sure that at most $9$ operations are executed as a part of
 
 #### Operation batch flags
 
-Operation batch flags are used to specify how many operation groups comprise in a given operation batch. For most batches, the number of groups will be equal to $8$. However, for the last batch in a block (or for the first batch, if the block consists of only a single batch), the number of groups may be less than $8$. Since processing of new batches starts only on `SPAN` and `RESPAN` operations, only for these operations the flags can be set to non-zero values.
+Operation batch flags are used to specify how many operation groups comprise a given operation batch. For most batches, the number of groups will be equal to $8$. However, for the last batch in a block (or for the first batch, if the block consists of only a single batch), the number of groups may be less than $8$. Since processing of new batches starts only on `SPAN` and `RESPAN` operations, only for these operations the flags can be set to non-zero values.
 
-To simplify the constraint system, number of groups in a batch can be only one of the following values: $1$, $2$, $4$, and $8$. If number of groups in a batch does not match one of these values, the batch is simply padded with `NOOP`'s (one `NOOP` per added group). Consider the diagram below.
+To simplify the constraint system, the number of groups in a batch can be only one of the following values: $1$, $2$, $4$, and $8$. If the number of groups in a batch does not match one of these values, the batch is simply padded with `NOOP`'s (one `NOOP` per added group). Consider the diagram below.
 
 ![decoder_OPERATION_batch_flags](../../assets/design/decoder/decoder_OPERATION_batch_flags.png)
 
@@ -523,7 +523,7 @@ The simplest example of a *span* block is a block with a single batch. This batc
 
 ![decoder_single_batch_span](../../assets/design/decoder/decoder_single_batch_span.png)
 
-Before the VM starts processing this *span* block, the prover populates registers $h_0, ..., h_7$ with operation groups $g_0, ..., g_7$. The prover also puts the total number of groups into the `group count` register $gc$. In this case, the total number of groups is $8$.
+Before the VM starts processing this *span* block, the prover populates registers $h_0, ..., h_7$ with operation groups $g_0, ..., g_7$. The prover also puts the total number of groups into the `group_count` register $gc$. In this case, the total number of groups is $8$.
 
 When the VM executes a `SPAN` operation, it does the following:
 
@@ -537,7 +537,7 @@ When the VM executes a `SPAN` operation, it does the following:
 
 ![decoder_op_group_table_after_span_op](../../assets/design/decoder/decoder_op_group_table_after_span_op.png)
 
-Then, with every step the next operation is removed from $g_0$, and by step $9$, value of $g_0$ is $0$. Once this happens, the VM does the following:
+Then, with every step the next operation is removed from $g_0$, and by step $9$, the value of $g_0$ is $0$. Once this happens, the VM does the following:
 
 1. Decrements `group_count` register by $1$.
 2. Sets `op bits` registers at the next step to the first operation of $g_1$.
@@ -548,7 +548,7 @@ Note that we rely on the `group_count` column to construct the row to be removed
 
 Decoding of $g_1$ is performed in the same manner as decoding of $g_0$: with every subsequent step the next operation is removed from $g_1$ until its value reaches $0$, at which point, decoding of group $g_2$ begins.
 
-The above steps are executed until value of `group_count` reaches $0$. Once `group_count` reaches $0$ and the last operation group $g_7$ is executed, the VM executed the `END` operation. Semantics of the `END` operation are described [here](#end-operation).
+The above steps are executed until value of `group_count` reaches $0$. Once `group_count` reaches $0$ and the last operation group $g_7$ is executed, the VM executes the `END` operation. Semantics of the `END` operation are described [here](#end-operation).
 
 Notice that by the time we get to the `END` operation, all rows are removed from the op group table.
 
@@ -574,7 +574,7 @@ Executing a `RESPAN` operation also adds groups $g_9, g_{10}, g_{11}$ to the op
 
 ![decoder_op_group_table_post_respan](../../assets/design/decoder/decoder_op_group_table_post_respan.png)
 
-Then, the execution of the second batch proceeds in the manner similar to the first batch: we remove operations from the current op group, execute them, and when the value of the op group reaches $0$, we start executing the next group in the batch. Thus, by the time we get to the `END` operation, the op group table should be empty.
+Then, the execution of the second batch proceeds in a manner similar to the first batch: we remove operations from the current op group, execute them, and when the value of the op group reaches $0$, we start executing the next group in the batch. Thus, by the time we get to the `END` operation, the op group table should be empty.
 
 When executing the `END` operation, the hash of the *span* block will be read from hasher row at address `addr + 7`, which, in our example, will be equal to `blk + 15`.
 
diff --git a/docs/src/design/lookups/logup.md b/docs/src/design/lookups/logup.md
index 49fe5b45bb..72a6d07c7f 100644
--- a/docs/src/design/lookups/logup.md
+++ b/docs/src/design/lookups/logup.md
@@ -1,6 +1,6 @@
 # LogUp: multivariate lookups with logarithmic derivatives
 
-The description of LogUp can be found [here](https://eprint.iacr.org/2022/1530.pdf). In MidenVM, LogUp is used to implement efficient [communication buses](./main.md#communication-buses-in-miden-vm). 
+The description of LogUp can be found [here](https://eprint.iacr.org/2022/1530.pdf). In MidenVM, LogUp is used to implement efficient [communication buses](./main.md#communication-buses-in-miden-vm).
 
 Using the LogUp construction instead of a simple [multiset check](./multiset.md) with running products reduces the computational effort for the prover and the verifier. Given two columns $a$ and $b$ in the main trace where $a$ contains duplicates and $b$ does not (i.e. $b$ is part of the lookup table), LogUp allows us to compute two logarithmic derivatives and check their equality.
 
@@ -26,7 +26,7 @@ The generalized trace columns and constraints for this construction are as follo
 
 ### Constraints
 
-The diagrams above show running sum columns for computing the logarithmic derivatives for both $X$ and $T$. As an optimization, we can combine these values into a single auxiliary column in the extension field that contains the running sum of values from both logarithmic derivatives. We'll refer to this column as a _communication bus_ $b$, since it communicates the lookup request from the component $X$ to the lookup table $T$. 
+The diagrams above show running sum columns for computing the logarithmic derivatives for both $X$ and $T$. As an optimization, we can combine these values into a single auxiliary column in the extension field that contains the running sum of values from both logarithmic derivatives. We'll refer to this column as a _communication bus_ $b$, since it communicates the lookup request from the component $X$ to the lookup table $T$.
 
 This can be expressed as follows:
 
@@ -62,4 +62,4 @@ Boolean flags can also be used to determine when requests from various component
 b' = b + \frac{m}{(\alpha - v)} - \frac{f_x}{(\alpha - x)} - \frac{f_y}{(\alpha - y)} \text{ | degree} = 4
 $$
 
-If any of these flags have degree greater than 2 then this will increase the overall degree of the constraint and reduce the number of lookup requests that can be accommodated by the bus per row.
\ No newline at end of file
+If any of these flags have degree greater than 2 then this will increase the overall degree of the constraint and reduce the number of lookup requests that can be accommodated by the bus per row.
diff --git a/docs/src/design/lookups/main.md b/docs/src/design/lookups/main.md
index e37309c3e4..bd1236a4cc 100644
--- a/docs/src/design/lookups/main.md
+++ b/docs/src/design/lookups/main.md
@@ -50,6 +50,6 @@ The auxiliary columns used for buses and virtual tables are computed by includin
 This is true when the data in the main trace could go all the way to the end of the trace, such as in the case of the range checker.
 
 ## Cost of auxiliary columns for lookup arguments
-It is important to note that depending on the field in which we operate, an auxilliary column implementing a lookup argument may actually require more than one trace column. This is specifically true for small fields.
+It is important to note that depending on the field in which we operate, an auxiliary column implementing a lookup argument may actually require more than one trace column. This is specifically true for small fields.
 
-Since Miden uses a 64-bit field, each auxiliary column needs to be represented by $2$ columns to achieve ~100-bit security and by $3$ columns to achieve ~128-bit security.
\ No newline at end of file
+Since Miden uses a 64-bit field, each auxiliary column needs to be represented by $2$ columns to achieve ~100-bit security and by $3$ columns to achieve ~128-bit security.
diff --git a/docs/src/design/main.md b/docs/src/design/main.md
index d967da8457..f96371d596 100644
--- a/docs/src/design/main.md
+++ b/docs/src/design/main.md
@@ -49,4 +49,4 @@ AIR constraints for the `fmp` column are described in [system operations](./stac
 
 >$$
 clk' - (clk + 1) = 0 \text{ | degree} = 1
-$$
\ No newline at end of file
+$$
diff --git a/docs/src/design/stack/crypto_ops.md b/docs/src/design/stack/crypto_ops.md
index 5066135c84..fffad56411 100644
--- a/docs/src/design/stack/crypto_ops.md
+++ b/docs/src/design/stack/crypto_ops.md
@@ -150,3 +150,56 @@ To keep the degree of the constraints low, a number of intermediate values are u
 
 The effect on the rest of the stack is:
 * **Left shift** starting from position $16$.
+
+## RCOMBBASE
+The `RCOMBBASE` operation performs a single step in the computation of the random linear combination defining the DEEP composition polynomial i.e., the input to the FRI protocol. More precisely, the sum in question is:
+    $$\sum_{i=0}^k{\alpha_i \cdot \left(\frac{T_i(x) - T_i(z)}{x - z} + \frac{T_i(x) - T_i(g \cdot z)}{x - g \cdot z} \right)}$$
+where $x$ is the current query to the DEEP composition polynomial for which we are computing the above random linear combination.
+The `RCOMBBASE` instruction computes the numerators $$\alpha_i \cdot (T_i(x) - T_i(z))$$ and $$\alpha_i \cdot (T_i(x) - T_i(g \cdot z))$$ and stores the values in two accumulators $p$ and $r$, respectively. This instruction is specialized to main trace columns i.e. the values $T_i(x)$ are base field elements. The instruction works in combination with the `mem_stream` instruction where it is called 8 times in a row for each call to `mem_stream`.
+
+The stack for the operation is expected to be arranged as follows:
+- The first $8$ stack elements contain $8$ base field elements $T_0,\cdots , T_7$ representing the values of $T_i(x)$ for the current query $x$ and the current batch of $8$ column values of the main trace for query $x$.
+- The next $2$ elements contain the current value of the accumulator $p$ as a quadratic extension field element.
+- The next $2$ elements contain the current value of the accumulator $r$ as a quadratic extension field element.
+- The next element contains the value of the memory pointer `x_ptr` to the next batch of $8$ column values for query $x$.
+- The next element contains the value of the memory pointer `z_ptr` to the $i$-th OOD evaluations at z and gz i.e. $T_i(z) = (T_i(z)_0, T_i(z)_1)$ and $T_i(gz) = (T_i(gz)_0, T_i(gz)_1)$.
+- The next element contains the value of the memory pointer `a_ptr` to the $i$-th random value $\alpha_i = (\alpha_{i, 0}, \alpha_{i,1})$. The remaining elements of the word are expected to be empty.
+
+The diagram below illustrates the stack transition for `RCOMBBASE` operation.
+
+![rcomb_base](../../assets/design/stack/crypto_ops/RCOMBBASE.png)
+
+After calling the `mem_stream ` with `x_ptr`, the operation does the following:
+- Populates the helper registers with $\left[T_i(z)_0, T_i(z)_1, T_i(gz)_0, T_i(gz)_1, \alpha_{i, 0}, \alpha_{i,1}\right]$ using the pointers `z_ptr` and `a_ptr`.
+- Updates the accumulators $$p \mathrel{{+}{=}} \alpha_i\cdot\left(T_i(x) - T_i(z)\right)$$ and $$r \mathrel{{+}{=}} \alpha_i\cdot\left(T_i(x) - T_i(gz)\right).$$
+- Increments the pointers `z_ptr` and `a_ptr` by $1$.
+- The top $8$ base field elements $T_0,\cdots , T_7$ are circularly shifted so that `T_0` becomes the element at the top of the operand stack.
+
+> TODO: add detailed constraint descriptions. See discussion [here](https://github.com/0xPolygonMiden/miden-vm/issues/869).
+
+The effect on the rest of the stack is:
+* **No change.**
+
+The `RCOMBBASE` makes two memory access request. To simplify the description of these, we first define the following variables :
+
+$$
+v_1 = \sum_{i=0}^3\alpha_{i+5} \cdot h_{i}
+$$
+
+$$
+v_2 = \sum_{i=0}^1\alpha_{i+5} \cdot h_{i + 4}
+$$
+
+Using the above variables, we define the values representing the memory access request as follows:
+
+$$
+u_{mem, 1} = \alpha_0 + \alpha_1 \cdot op_{mem\_read} + \alpha_2 \cdot ctx + \alpha_3 \cdot s_{13} + \alpha_4 \cdot clk + v_1
+$$
+
+$$
+u_{mem, 2} = \alpha_0 + \alpha_1 \cdot op_{mem\_read} + \alpha_2 \cdot ctx + \alpha_3 \cdot s_{14} + \alpha_4 \cdot clk + v_2
+$$
+
+$$
+u_{mem} = u_{mem, 1} \cdot u_{mem, 2}
+$$
\ No newline at end of file
diff --git a/docs/src/design/stack/field_ops.md b/docs/src/design/stack/field_ops.md
index c4306c7456..4e0a2ad9e1 100644
--- a/docs/src/design/stack/field_ops.md
+++ b/docs/src/design/stack/field_ops.md
@@ -218,7 +218,7 @@ The effect on the rest of the stack is:
 * **No change** starting from position $4$.
 
 ## EXT2MUL
-The `EXT2MUL` operation pops top $4$ values from the top of the stack, performs mulitplication between the two extension field elements, and pushes the resulting $4$ values onto the stack. The diagram below illustrates this graphically.
+The `EXT2MUL` operation pops top $4$ values from the top of the stack, performs multiplication between the two extension field elements, and pushes the resulting $4$ values onto the stack. The diagram below illustrates this graphically.
 
 ![ext2mul](../../assets/design/stack/field_operations/EXT2MUL.png)
 
diff --git a/docs/src/design/stack/io_ops.md b/docs/src/design/stack/io_ops.md
index eaad320462..822d0fa831 100644
--- a/docs/src/design/stack/io_ops.md
+++ b/docs/src/design/stack/io_ops.md
@@ -209,4 +209,4 @@ In the above:
 - $clk$ is the current clock cycle of the VM.
 
 The effect of this operation on the rest of the stack is:
-* **No change** starting from position $8$ except position $12$.
\ No newline at end of file
+* **No change** starting from position $8$ except position $12$.
diff --git a/docs/src/design/stack/op_constraints.md b/docs/src/design/stack/op_constraints.md
index 94ea80e559..f4502b2daa 100644
--- a/docs/src/design/stack/op_constraints.md
+++ b/docs/src/design/stack/op_constraints.md
@@ -188,7 +188,7 @@ This group contains operations which require constraints with degree up to $3$.
 | `SPAN`       | $86$         | `101_0110`      | [Flow control ops](../decoder/main.md) | $5$         |
 | `JOIN`       | $87$         | `101_0111`      | [Flow control ops](../decoder/main.md) | $5$         |
 | `DYN`        | $88$         | `101_1000`      | [Flow control ops](../decoder/main.md) | $5$         |
-| `<unused>`   | $89$         | `101_1001`      |                                        | $5$         |
+| `RCOMBBASE`  | $89$         | `101_1001`      | [Crypto ops](./crypto_ops.md)          | $5$         |
 | `<unused>`   | $90$         | `101_1010`      |                                        | $5$         |
 | `<unused>`   | $91$         | `101_1011`      |                                        | $5$         |
 | `<unused>`   | $92$         | `101_1100`      |                                        | $5$         |
diff --git a/docs/src/intro/main.md b/docs/src/intro/main.md
index 05a3d44274..a10c23626a 100644
--- a/docs/src/intro/main.md
+++ b/docs/src/intro/main.md
@@ -2,7 +2,7 @@
 Miden VM is a zero-knowledge virtual machine written in Rust. For any program executed on Miden VM, a STARK-based proof of execution is automatically generated. This proof can then be used by anyone to verify that the program was executed correctly without the need for re-executing the program or even knowing the contents of the program.
 
 ## Status and features
-Miden VM is currently on release v0.7. In this release, most of the core features of the VM have been stabilized, and most of the STARK proof generation has been implemented. While we expect to keep making changes to the VM internals, the external interfaces should remain relatively stable, and we will do our best to minimize the amount of breaking changes going forward.
+Miden VM is currently on release v0.8. In this release, most of the core features of the VM have been stabilized, and most of the STARK proof generation has been implemented. While we expect to keep making changes to the VM internals, the external interfaces should remain relatively stable, and we will do our best to minimize the amount of breaking changes going forward.
 
 At this point, Miden VM is good enough for experimentation, and even for real-world applications, but it is not yet ready for production use. The codebase has not been audited and contains known and unknown bugs and security flaws.
 
diff --git a/docs/src/intro/usage.md b/docs/src/intro/usage.md
index cfba00ade8..10baae805e 100644
--- a/docs/src/intro/usage.md
+++ b/docs/src/intro/usage.md
@@ -1,5 +1,5 @@
 # Usage
-Before you can use Miden VM, you'll need to make sure you have Rust [installed](https://www.rust-lang.org/tools/install). Miden VM v0.7 requires Rust version **1.67** or later.
+Before you can use Miden VM, you'll need to make sure you have Rust [installed](https://www.rust-lang.org/tools/install). Miden VM v0.8 requires Rust version **1.73** or later.
 
 Miden VM consists of several crates, each of which exposes a small set of functionality. The most notable of these crates are:
 * [miden-processor](https://crates.io/crates/miden-processor), which can be used to execute Miden VM programs.
@@ -43,13 +43,21 @@ Similar to `make exec` command, this will place the resulting `miden` executable
 Currently, GPU acceleration is applicable only to recursive proofs which can be generated using the `-r` flag.
 
 ### SIMD acceleration
-Miden VM execution and proof generation can be accelerated via vectorized instructions. Currently, SIMD acceleration can be enabled only on platforms supporting [SVE](https://en.wikipedia.org/wiki/AArch64#Scalable_Vector_Extension_(SVE)) instructions (e.g., Graviton 3). To compile Miden VM with SVE acceleration enabled, you can run the following command:
+Miden VM execution and proof generation can be accelerated via vectorized instructions. Currently, SIMD acceleration can be enabled on platforms supporting [SVE](https://en.wikipedia.org/wiki/AArch64#Scalable_Vector_Extension_(SVE)) and [AVX2](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#Advanced_Vector_Extensions_2) instructions.
+
+To compile Miden VM with AVX2 acceleration enabled, you can run the following command:
+```
+make exec-avx2
+```
+
+To compile Miden VM with SVE acceleration enabled, you can run the following command:
 ```
-make exec-graviton
+make exec-sve
 ```
+
 This will place the resulting `miden` executable into the `./target/optimized` directory.
 
-Similar to Metal acceleration, SVE acceleration is currently applicable only to recursive proofs which can be generated using the `-r` flag.
+Similar to Metal acceleration, SVE/AVX2 acceleration is currently applicable only to recursive proofs which can be generated using the `-r` flag.
 
 ### Running Miden VM
 Once the executable has been compiled, you can run Miden VM like so:
@@ -64,6 +72,7 @@ Currently, Miden VM can be executed with the following subcommands:
 * `debug` - this will instantiate a [Miden debugger](../tools/debugger.md) against the specified Miden assembly program and inputs.
 * `analyze` - this will run a Miden assembly program against specific inputs and will output stats about its execution.
 * `repl` - this will initiate the [Miden REPL](../tools/repl.md) tool.
+* `example` - this will execute a Miden assembly example program, generate a STARK proof of execution and verify it. Currently it is possible to run `blake3` and `fibonacci` examples.
 
 All of the above subcommands require various parameters to be provided. To get more detailed help on what is needed for a given subcommand, you can run the following:
 ```
@@ -76,6 +85,13 @@ For example:
 
 To execute a program using the Miden VM there needs to be a `.masm` file containing the Miden Assembly code and a `.inputs` file containing the inputs.
 
+#### Enabling logging
+You can use `MIDEN_LOG` environment variable to control how much logging output the VM produces. For example:
+```
+MIDEN_LOG=trace ./target/optimized/miden [subcommand] [parameters]
+```
+If the level is not specified, `warn` level is set as default. 
+
 ### Inputs
 
 As described [here](https://0xpolygonmiden.github.io/miden-vm/intro/overview.html#inputs-and-outputs) the Miden VM can consume public and secret inputs.
diff --git a/docs/src/tools/debugger.md b/docs/src/tools/debugger.md
index 9735a1b015..83af1e15e6 100644
--- a/docs/src/tools/debugger.md
+++ b/docs/src/tools/debugger.md
@@ -6,13 +6,13 @@ The Miden debugger supports the following commands:
 
 | Command | Shortcut | Arguments | Description |
 | --- | --- | --- | --- |
-| next | n | count? | Steps `count` clock cycles. Will step `1` cycle of `count` is ommitted. |
+| next | n | count? | Steps `count` clock cycles. Will step `1` cycle of `count` is omitted. |
 | continue | c | - | Executes the program until completion, failure or a breakpoint. |
-| back | b | count? | Backward step `count` clock cycles. Will back-step `1` cycle of `count` is ommitted. |
+| back | b | count? | Backward step `count` clock cycles. Will back-step `1` cycle of `count` is omitted. |
 | rewind | r | - | Executes the program backwards until the beginning, failure or a breakpoint. |
 | print | p | - | Displays the complete state of the virtual machine. |
-| print mem | p m | address? | Displays the memory value at `address`. If `address` is ommitted, didisplays all the memory values. |
-| print stack | p s | index? | Displays the stack value at `index`. If `index` is ommitted, displays all the stack values. |
+| print mem | p m | address? | Displays the memory value at `address`. If `address` is omitted, didisplays all the memory values. |
+| print stack | p s | index? | Displays the stack value at `index`. If `index` is omitted, displays all the stack values. |
 | clock | c | - | Displays the current clock cycle. |
 | quit | q | - | Quits the debugger. |
 | help | h | - | Displays the help message. |
diff --git a/docs/src/tools/repl.md b/docs/src/tools/repl.md
index c6fdb8e69d..ec90538d56 100644
--- a/docs/src/tools/repl.md
+++ b/docs/src/tools/repl.md
@@ -7,6 +7,11 @@ Miden REPL can be started via the CLI [repl](../intro/usage.md#cli-interface) co
 ./target/optimized/miden repl
 ```
 
+It is also possible to initialize REPL with libraries. To create it with Miden standard library you need to specify `-s` or `--stdlib` subcommand, it is also possible to add a third-party library by specifying `-l` or `--libraries` subcommand with paths to `.masl` library files. For example:
+```Shell
+./target/optimized/miden repl -s -l example/library.masl
+```
+
 ### Miden assembly instruction
 
 All Miden instructions mentioned in the [Miden Assembly sections](../user_docs/assembly/main.md) are valid. One can either input instructions one by one or multiple instructions in one input.
@@ -47,13 +52,13 @@ The `!program` command prints out the entire Miden program being executed. E.g.,
 ```
 >> push.1.2.3.4
 >> repeat.16 pow2 end
->> u32checked_add
+>> u32wrapping_add
 
 >> !program
 begin
     push.1.2.3.4
     repeat.16 pow2 end
-    u32checked_add
+    u32wrapping_add
 end
 ```
 
@@ -64,7 +69,7 @@ The `!stack` command prints out the state of the stack at the last executed inst
 ```
 >> push.1 push.2 push.3 push.4 push.5
 >> exp
->> u32checked_mul
+>> u32wrapping_mul
 >> swap
 >> eq.2
 >> assert
@@ -115,6 +120,34 @@ If the `addr` has not been initialized:
 Memory at address 87 is empty
 ```
 
+### !use
+
+The `!use` command prints out the list of all modules available for import. 
+
+If the stdlib was added to the available libraries list `!use` command will print all its modules:
+```
+>> !use
+Modules available for importing:
+std::collections::mmr
+std::collections::smt
+...
+std::mem
+std::sys
+std::utils
+```
+
+Using the `!use` command with a module name will add the specified module to the program imports:
+```
+>> !use std::math::u64
+
+>> !program
+use.std::math::u64
+
+begin
+
+end
+```
+
 ### !undo
 
 The `!undo` command reverts to the previous state of the stack and memory by dropping off the last executed assembly instruction from the program. One could use `!undo` as often as they want to restore the state of a stack and memory $n$ instructions ago (provided there are $n$ instructions in the program). The `!undo` command will result in an error if no remaining instructions are left in the Miden program.
diff --git a/docs/src/user_docs/assembly/code_organization.md b/docs/src/user_docs/assembly/code_organization.md
index 7e2eb2c1d5..a22af8b044 100644
--- a/docs/src/user_docs/assembly/code_organization.md
+++ b/docs/src/user_docs/assembly/code_organization.md
@@ -54,7 +54,7 @@ dynexec
 This causes the VM to do the following:
 
 1. Read the top 4 elements of the stack to get the hash of the dynamic target (leaving the stack unchanged).
-2. Execute the code block which hashes to the specified target. The VM must know the specified code block and hash (they must be in the CodeBlockTable of the executing Program).
+2. Execute the code block which hashes to the specified target. The VM must know the specified code block and hash: they must be in the CodeBlockTable of the executing Program. Hashes can be put into the CodeBlockTable manually, or by executing `call`, `syscall`, or `procref` instructions. 
 
 Dynamic code execution in a new context can be achieved similarly by setting the top $4$ elements of the stack to the hash of the dynamic code block and then executing the following instruction:
 
@@ -107,7 +107,7 @@ use.std::math::u64
 begin
     push.1.0
     push.2.0
-    exec.u64::checked_add
+    exec.u64::wrapping_add
 end
 ```
 In the above example we import `std::math::u64` module from the [standard library](../stdlib/main.md). We then execute a program which pushes two 64-bit integers onto the stack, and then invokes a 64-bit addition procedure from the imported module.
@@ -143,9 +143,9 @@ In addition to the locally-defined procedure `foo`, the above module also export
 ### Constants
 Miden assembly supports constant declarations. These constants are scoped to the module they are defined in and can be used as immediate parameters for Miden assembly instructions. Constants are supported as immediate values for the following instructions: `push`, `assert`, `assertz`, `asert_eq`, `assert_eqw`, `locaddr`, `loc_load`, `loc_loadw`, `loc_store`, `loc_storew`, `mem_load`, `mem_loadw`, `mem_store`, `mem_storew`.
 
-Constants must be declared right after module imports and before any procedures or program bodies. A constant's name must start with an upper-case letter and can contain any combination of numbers, upper-case ASCII letters, and underscores (`_`). The number of characters in a constant name cannot exceed 100. 
+Constants must be declared right after module imports and before any procedures or program bodies. A constant's name must start with an upper-case letter and can contain any combination of numbers, upper-case ASCII letters, and underscores (`_`). The number of characters in a constant name cannot exceed 100.
 
-A constant's value must be in the range between $0$ and $2^{64} - 2^{32}$ (both inclusive) and can be defined by an arithmetic expression using `+`, `-`, `*`, `/`, `//`, `(`, `)` operators and references to the previously defined constants. Here `/` is a field division and `//` is an integer division. Note that the arithmetic expression cannot contain spaces.
+A constant's value must be in a decimal or hexidecimal form and be in the range between $0$ and $2^{64} - 2^{32}$ (both inclusive). Value can be defined by an arithmetic expression using `+`, `-`, `*`, `/`, `//`, `(`, `)` operators and references to the previously defined constants if it uses only decimal numbers. Here `/` is a field division and `//` is an integer division. Note that the arithmetic expression cannot contain spaces.
 
 ```
 use.std::math::u64
@@ -156,7 +156,7 @@ const.ADDR_1=3
 
 begin
     push.CONSTANT_1.CONSTANT_2
-    exec.u64::checked_add
+    exec.u64::wrapping_add
     mem_store.ADDR_1
 end
 
diff --git a/docs/src/user_docs/assembly/debugging.md b/docs/src/user_docs/assembly/debugging.md
index 173acb4272..aec022c303 100644
--- a/docs/src/user_docs/assembly/debugging.md
+++ b/docs/src/user_docs/assembly/debugging.md
@@ -4,7 +4,13 @@ To support basic debugging capabilities, Miden assembly provides a `debug` instr
 
 - `debug.stack` prints out the entire contents of the stack.
 - `debug.stack.<n>` prints out the top $n$ items of the stack. $n$ must be an integer greater than $0$ and smaller than $256$.
+- `debug.mem` prints out the entire contents of RAM.
+- `debug.mem.<n>` prints out contents of memory at address $n$.
+- `debug.mem.<n>.<m>` prints out the contents of memory starting at address $n$ and ending at address $m$ (both inclusive). $m$ must be greater or equal to $n$.
+- `debug.local` prints out the whole local memory of the currently executing procedure.
+- `debug.local.<n>` prints out contents of the local memory at index $n$ for the currently executing procedure. $n$ must be greater or equal to $0$ and smaller than $65536$.
+- `debug.local.<n>.<m>` prints out contents of the local memory starting at index $n$ and ending at index $m$ (both inclusive). $m$ must be greater or equal to $n$. $n$ and $m$ must be greater or equal to $0$ and smaller than $65536$.
 
 Debug instructions do not affect the VM state and do not change the program hash.
 
-To make use of the `debug` instruction, programs must be compiled with an assembler instantiated in the debug mode. Otherwise, the assembler will simply ignore the `debug` instructions.
\ No newline at end of file
+To make use of the `debug` instruction, programs must be compiled with an assembler instantiated in the debug mode. Otherwise, the assembler will simply ignore the `debug` instructions.
diff --git a/docs/src/user_docs/assembly/events.md b/docs/src/user_docs/assembly/events.md
new file mode 100644
index 0000000000..f6951a2d93
--- /dev/null
+++ b/docs/src/user_docs/assembly/events.md
@@ -0,0 +1,23 @@
+## Events
+
+Miden assembly supports the concept of events. Events are a simple data structure with a single `event_id` field.  When an event is emitted by a program, it is communicated to the host. Events can be emitted at specific points of program execution with the intent of triggering some action on the host. This is useful as the program has contextual information that would be challenging for the host to infer. The emission of events allows the program to communicate this contextual information to the host. The host contains an event handler that is responsible for handling events and taking appropriate actions. The emission of events does not change the state of the VM but it can  change the state of the host.
+
+An event can be emitted via the `emit.<event_id>` assembly instruction where `<event_id>` can be any 32-bit value specified either directly or via a [named constant](./code_organization.md#constants). For example:
+
+```
+emit.EVENT_ID_1
+emit.2
+```
+
+## Tracing
+
+Miden assembly also supports code tracing, which works similar to the event emitting. 
+
+A trace can be emitted via the `trace.<trace_id>` assembly instruction where `<trace_id>` can be any 32-bit value specified either directly or via a [named constant](./code_organization.md#constants). For example:
+
+```
+trace.EVENT_ID_1
+trace.2
+```
+
+To make use of the `trace` instruction, programs should be ran with tracing flag (`-t` or `--tracing`), otherwise these instructions will be ignored.
diff --git a/docs/src/user_docs/assembly/field_operations.md b/docs/src/user_docs/assembly/field_operations.md
index a0ca6c7b00..d3425c17a3 100644
--- a/docs/src/user_docs/assembly/field_operations.md
+++ b/docs/src/user_docs/assembly/field_operations.md
@@ -23,6 +23,8 @@ If the error code is omitted, the default value of $0$ is assumed.
 
 ### Arithmetic and Boolean operations
 
+The arithmetic operations below are performed in a 64-bit [prime filed](https://en.wikipedia.org/wiki/Finite_field) defined by modulus $p = 2^{64} - 2^{32} + 1$. This means that overflow happens after a value exceeds $p$. Also, the result of divisions may appear counter-intuitive because divisions are defined via inversions.
+
 | Instruction                                                                    | Stack_input | Stack_output  | Notes                                                                                                        |
 | ------------------------------------------------------------------------------ | ----------- | ------------- | ------------------------------------------------------------------------------------------------------------ |
 | add <br> - *(1 cycle)*  <br> add.*b* <br> - *(1-2 cycle)*                      | [b, a, ...] | [c, ...]      | $c \leftarrow (a + b) \mod p$                                                                                |
@@ -33,6 +35,7 @@ If the error code is omitted, the default value of $0$ is assumed.
 | inv <br> - *(1 cycle)*                                                         | [a, ...]    | [b, ...]      | $b \leftarrow a^{-1} \mod p$ <br> Fails if $a = 0$                                                           |
 | pow2 <br> - *(16 cycles)*                                                      | [a, ...]    | [b, ...]      | $b \leftarrow 2^a$ <br> Fails if $a > 63$                                                                    |
 | exp.*uxx* <br> - *(9 + xx cycles)*  <br> exp.*b* <br> - *(9 + log2(b) cycles)* | [b, a, ...] | [c, ...]      | $c \leftarrow a^b$ <br> Fails if xx is outside [0, 63) <br> exp is equivalent to exp.u64 and needs 73 cycles |
+| ilog2 <br> - *(44 cycles)*                                                      | [a, ...]    | [b, ...]      | $b \leftarrow \lfloor{log_2{a}}\rfloor$ <br> Fails if $a = 0 $                                                                    |
 | not <br> - *(1 cycle)*                                                         | [a, ...]    | [b, ...]      | $b \leftarrow 1 - a$ <br> Fails if $a > 1$                                                                   |
 | and <br> - *(1 cycle)*                                                         | [b, a, ...] | [c, ...]      | $c \leftarrow a \cdot b$ <br> Fails if $max(a, b) > 1$                                                       |
 | or <br> - *(1 cycle)*                                                          | [b, a, ...] | [c, ...]      | $c \leftarrow a + b - a \cdot b$ <br> Fails if $max(a, b) > 1$                                               |
@@ -44,10 +47,10 @@ If the error code is omitted, the default value of $0$ is assumed.
 | ---------------------------------------------------------- | ----------- | -------------- | ---------------------------------------------------------------------------------------------------------------------------- |
 | eq <br> - *(1 cycle)*  <br> eq.*b* <br> - *(1-2 cycles)*   | [b, a, ...] | [c, ...]       | $c \leftarrow \begin{cases} 1, & \text{if}\ a=b \\ 0, & \text{otherwise}\ \end{cases}$                                       |
 | neq <br> - *(2 cycle)*  <br> neq.*b* <br> - *(2-3 cycles)* | [b, a, ...] | [c, ...]       | $c \leftarrow \begin{cases} 1, & \text{if}\ a \ne b \\ 0, & \text{otherwise}\ \end{cases}$                                   |
-| lt <br> - *(17 cycles)*                                    | [b, a, ...] | [c, ...]       | $c \leftarrow \begin{cases} 1, & \text{if}\ a < b \\ 0, & \text{otherwise}\ \end{cases}$                                     |
-| lte <br> - *(18 cycles)*                                   | [b, a, ...] | [c, ...]       | $c \leftarrow \begin{cases} 1, & \text{if}\ a \le b \\ 0, & \text{otherwise}\ \end{cases}$                                   |
-| gt <br> - *(18 cycles)*                                    | [b, a, ...] | [c, ...]       | $c \leftarrow \begin{cases} 1, & \text{if}\ a > b \\ 0, & \text{otherwise}\ \end{cases}$                                     |
-| gte <br> - *(19 cycles)*                                   | [b, a, ...] | [c, ...]       | $c \leftarrow \begin{cases} 1, & \text{if}\ a \ge b \\ 0, & \text{otherwise}\ \end{cases}$                                   |
+| lt <br> - *(14 cycles)*                                    | [b, a, ...] | [c, ...]       | $c \leftarrow \begin{cases} 1, & \text{if}\ a < b \\ 0, & \text{otherwise}\ \end{cases}$                                     |
+| lte <br> - *(15 cycles)*                                   | [b, a, ...] | [c, ...]       | $c \leftarrow \begin{cases} 1, & \text{if}\ a \le b \\ 0, & \text{otherwise}\ \end{cases}$                                   |
+| gt <br> - *(15 cycles)*                                    | [b, a, ...] | [c, ...]       | $c \leftarrow \begin{cases} 1, & \text{if}\ a > b \\ 0, & \text{otherwise}\ \end{cases}$                                     |
+| gte <br> - *(16 cycles)*                                   | [b, a, ...] | [c, ...]       | $c \leftarrow \begin{cases} 1, & \text{if}\ a \ge b \\ 0, & \text{otherwise}\ \end{cases}$                                   |
 | is_odd <br> - *(5 cycles)*                                 | [a, ...]    | [b, ...]       | $b \leftarrow \begin{cases} 1, & \text{if}\ a \text{ is odd} \\ 0, & \text{otherwise}\ \end{cases}$                          |
 | eqw <br> - *(15 cycles)*                                   | [A, B, ...] | [c, A, B, ...] | $c \leftarrow \begin{cases} 1, & \text{if}\ a_i = b_i \; \forall i \in \{0, 1, 2, 3\} \\ 0, & \text{otherwise}\ \end{cases}$ |
 
diff --git a/docs/src/user_docs/assembly/flow_control.md b/docs/src/user_docs/assembly/flow_control.md
index 1800ea8700..74f134c15b 100644
--- a/docs/src/user_docs/assembly/flow_control.md
+++ b/docs/src/user_docs/assembly/flow_control.md
@@ -33,7 +33,7 @@ end
 where:
 
 * `instructions` can be a sequence of any instructions, including nested control structures.
-* `count` is the number of times the `instructions` sequence should be repeated (e.g. `repeat.10`). `count` must be an integer greater than $0$.
+* `count` is the number of times the `instructions` sequence should be repeated (e.g. `repeat.10`). `count` must be an integer or a [constant](./code_organization.md#constants) greater than $0$.
 
 > **Note**: During compilation the `repeat.<count>` blocks are unrolled and expanded into `<count>` copies of its inner block, there is no additional cost for counting variables in this case.
 
@@ -65,4 +65,4 @@ while.true
     # push the boolean false to the stack, finishing the loop for the next iteration
     push.0
 end
-```
\ No newline at end of file
+```
diff --git a/docs/src/user_docs/assembly/io_operations.md b/docs/src/user_docs/assembly/io_operations.md
index f6f267e7b2..f9993c3314 100644
--- a/docs/src/user_docs/assembly/io_operations.md
+++ b/docs/src/user_docs/assembly/io_operations.md
@@ -25,10 +25,11 @@ In both case the values must still encode valid field elements.
 
 | Instruction                     | Stack_input  | Stack_output | Notes                                                                                                                                                                                                             |
 | ------------------------------- | ------------ | ------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| clk <br> - *(1 cycle)*          | [ ... ]      | [t, ... ]    | $t \leftarrow clock\_value()$ <br> Pushes the current value of the clock cycle counter onto the stack.                                                                                                            |
-| sdepth <br> - *(1 cycle)*       | [ ... ]      | [d, ... ]    | $d \leftarrow stack.depth()$ <br> Pushes the current depth of the stack onto the stack.                                                                                                                           |
-| caller <br> - *(1 cycle)*       | [A, b, ... ] | [H, b, ... ] | $H \leftarrow context.fn\_hash()$ <br> Overwrites the top four stack items with the hash of a function which initiated the current SYSCALL. <br> Executing this instruction outside of SYSCALL context will fail. |
-| locaddr.*i* <br> - *(2 cycles)* | [ ... ]      | [a, ... ]    | $a \leftarrow address\_of(i)$ <br> Pushes the absolute memory address of local memory at index $i$ onto the stack.                                                                                                |
+| clk <br> - *(1 cycle)*             | [ ... ]      | [t, ... ]    | $t \leftarrow clock\_value()$ <br> Pushes the current value of the clock cycle counter onto the stack.                                                                                                       |
+| sdepth <br> - *(1 cycle)*          | [ ... ]      | [d, ... ]    | $d \leftarrow stack.depth()$ <br> Pushes the current depth of the stack onto the stack.                                                                                                                        |
+| caller <br> - *(1 cycle)*          | [A, b, ... ] | [H, b, ... ] | $H \leftarrow context.fn\_hash()$ <br> Overwrites the top four stack items with the hash of a function which initiated the current SYSCALL. <br> Executing this instruction outside of SYSCALL context will fail. |
+| locaddr.*i* <br> - *(2 cycles)*    | [ ... ]      | [a, ... ]    | $a \leftarrow address\_of(i)$ <br> Pushes the absolute memory address of local memory at index $i$ onto the stack.                                                                                           |
+| procref.*name* <br> - *(4 cycles)* | [ ... ]      | [A, ... ]    | $A \leftarrow mast\_root()$ <br> Pushes MAST root of the procedure with name $name$ onto the stack.                                                                                                               | 
 
 ### Nondeterministic inputs
 
@@ -54,9 +55,7 @@ Advice injectors fall into two categories: (1) injectors which push new data ont
 | adv.push_u64div                              | [b1, b0, a1, a0, ...]      | [b1, b0, a1, a0, ...]      | Pushes the result of `u64` division $a / b$ onto the advice stack. Both $a$ and $b$ are represented using 32-bit limbs. The result consists of both the quotient and the remainder. |
 | adv.push_ext2intt                            | [osize, isize, iptr, ... ] | [osize, isize, iptr, ... ] | Given evaluations of a polynomial over some specified domain, interpolates the evaluations into a polynomial in coefficient form and pushes the result into the advice stack. |
 | adv.push_sig.*kind*                          | [K, M, ...]                | [K, M, ...]                | Pushes values onto the advice stack which are required for verification of a DSA with scheme specified by *kind* against the public key commitment $K$ and message $M$. |
-| adv.smt_get                                  | [K, R, ... ]               | [K, R, ... ]               | Pushes values onto the advice stack which are required for successful retrieval of a value under the key $K$ from a Sparse Merkle Tree with root $R$. |
-| adv.smt_set                                  | [V, K, R, ...]             | [V, K, R, ...]             | Pushes values onto the advice stack which are required for successful insertion of a key-value pair $(K, V)$ into a Sparse Merkle Tree with root $R$. |
-| adv.smt_peek                                 | [K, R, ... ]               | [K, R, ... ]               | Pushes value onto the advice stack which is associated with key $K$ in a Sparse Merkle Tree with root $R$. |
+| adv.push_smtpeek                                 | [K, R, ... ]               | [K, R, ... ]               | Pushes value onto the advice stack which is associated with key $K$ in a Sparse Merkle Tree with root $R$. |
 | adv.insert_mem                               | [K, a, b, ... ]            | [K, a, b, ... ]            | Reads words $data \leftarrow mem[a] .. mem[b]$ from memory, and save the data into $advice\_map[K] \leftarrow data$. |
 | adv.insert_hdword <br> adv.insert_hdword.*d* | [B, A, ... ]               | [B, A, ... ]               | Reads top two words from the stack, computes a key as $K \leftarrow hash(A || b, d)$, and saves the data into $advice\_map[K] \leftarrow [A, B]$. $d$ is an optional domain value which can be between $0$ and $255$, default value $0$. |
 | adv.insert_hperm                             | [B, A, C, ...]             | [B, A, C, ...]             | Reads top three words from the stack, computes a key as $K \leftarrow permute(C, A, B).digest$, and saves data into $advice\_mpa[K] \leftarrow [A, B]$. |
diff --git a/docs/src/user_docs/assembly/u32_operations.md b/docs/src/user_docs/assembly/u32_operations.md
index c776f942b8..10bb7ae61a 100644
--- a/docs/src/user_docs/assembly/u32_operations.md
+++ b/docs/src/user_docs/assembly/u32_operations.md
@@ -1,11 +1,7 @@
 ## u32 operations
 Miden assembly provides a set of instructions which can perform operations on regular two-complement 32-bit integers. These instructions are described in the tables below.
 
-Most instructions have _checked_ variants. These variants ensure that input values are 32-bit integers, and fail if that's not the case. All other variants do not perform these checks, and thus, should be used only if the inputs are known to be 32-bit integers. Supplying inputs which are greater than or equal to $2^{32}$ to unchecked operations results in undefined behavior.
-
-The primary benefit of using unchecked operations is performance: they can frequently be executed $2$ or $3$ times faster than their checked counterparts. In general, vast majority of the unchecked operations listed below can be executed in a single VM cycle.
-
-For instructions where one or more operands can be provided as immediate parameters (e.g., `u32checked_add` and `u32checked_add.b`), we provide stack transition diagrams only for the non-immediate version. For the immediate version, it can be assumed that the operand with the specified name is not present on the stack.
+For instructions where one or more operands can be provided as immediate parameters (e.g., `u32wrapping_add` and `u32wrapping_add.b`), we provide stack transition diagrams only for the non-immediate version. For the immediate version, it can be assumed that the operand with the specified name is not present on the stack.
 
 In all the table below, the number of cycles it takes for the VM to execute each instruction is listed beneath the instruction.
 
@@ -32,60 +28,46 @@ If the error code is omitted, the default value of $0$ is assumed.
 
 | Instruction                                                                               | Stack input    | Stack output  | Notes                                                                                                                                                                                  |
 | ----------------------------------------------------------------------------------------- | -------------- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| u32checked_add <br> - *(4 cycles)* <br> u32checked_add.*b* <br> - *(5-6 cycles)*          | [b, a, ...]    | [c, ...]      | $c \leftarrow a + b$ <br> Fails if $max(a, b, c) \ge 2^{32}$                                                                                                                           |
 | u32overflowing_add <br> - *(1 cycle)* <br> u32overflowing_add.*b* <br> - *(2-3 cycles)*   | [b, a, ...]    | [d, c, ...]   | $c \leftarrow (a + b) \mod 2^{32}$ <br> $d \leftarrow \begin{cases} 1, & \text{if}\ (a + b) \ge 2^{32} \\ 0, & \text{otherwise}\ \end{cases}$ <br> Undefined if $max(a, b) \ge 2^{32}$ |
 | u32wrapping_add <br> - *(2 cycles)* <br> u32wrapping_add.*b* <br> - *(3-4 cycles)*        | [b, a, ...]    | [c, ...]      | $c \leftarrow (a + b) \mod 2^{32}$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                                            |
 | u32overflowing_add3 <br> - *(1 cycle)*                                                    | [c, b, a, ...] | [e, d, ...]   | $d \leftarrow (a + b + c) \mod 2^{32}$, <br> $e \leftarrow \lfloor (a + b + c) / 2^{32}\rfloor$ <br> Undefined if $max(a, b, c) \ge 2^{32}$ <br>                                       |
 | u32wrapping_add3 <br> - *(2 cycles)*                                                      | [c, b, a, ...] | [d, ...]      | $d \leftarrow (a + b + c) \mod 2^{32}$, <br> Undefined if $max(a, b, c) \ge 2^{32}$ <br>                                                                                               |
-| u32checked_sub <br> - *(4 cycles)* <br> u32checked_sub.*b*  <br> - *(5-6 cycles)*         | [b, a, ...]    | [c, ...]      | $c \leftarrow (a - b)$ <br> Fails if $max(a, b) \ge 2^{32}$ or $a < b$                                                                                                                 |
 | u32overflowing_sub <br> - *(1 cycle)* <br> u32overflowing_sub.*b* <br> - *(2-3 cycles)*   | [b, a, ...]    | [d, c, ...]   | $c \leftarrow (a - b) \mod 2^{32}$ <br> $d \leftarrow \begin{cases} 1, & \text{if}\ a < b \\ 0, & \text{otherwise}\ \end{cases}$ <br> Undefined if $max(a, b) \ge 2^{32}$              |
 | u32wrapping_sub <br> - *(2 cycles)* <br> u32wrapping_sub.*b* <br> - *(3-4 cycles)*        | [b, a, ...]    | [c, ...]      | $c \leftarrow (a - b) \mod 2^{32}$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                                            |
-| u32checked_mul <br> - *(4 cycles)* <br> u32checked_mul.*b* <br> - *(5-6 cycles)*          | [b, a, ...]    | [c, ...]      | $c \leftarrow a \cdot b$ <br> Fails if $max(a, b, c) \ge 2^{32}$                                                                                                                       |
 | u32overflowing_mul <br> - *(1 cycle)* <br> u32overflowing_mul.*b* <br> - *(2-3 cycles)*   | [b, a, ...]    | [d, c, ...]   | $c \leftarrow (a \cdot b) \mod 2^{32}$ <br> $d \leftarrow \lfloor(a \cdot b) / 2^{32}\rfloor$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                 |
 | u32wrapping_mul <br> - *(2 cycles)* <br> u32wrapping_mul.*b* <br> - *(3-4 cycles)*        | [b, a, ...]    | [c, ...]      | $c \leftarrow (a \cdot b) \mod 2^{32}$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                                        |
 | u32overflowing_madd <br> - *(1 cycle)*                                                    | [b, a, c, ...] | [e, d, ...]   | $d \leftarrow (a \cdot b + c) \mod 2^{32}$ <br> $e \leftarrow \lfloor(a \cdot b + c) / 2^{32}\rfloor$ <br> Undefined if $max(a, b, c) \ge 2^{32}$                                      |
 | u32wrapping_madd <br> - *(2 cycles)*                                                      | [b, a, c, ...] | [d, ...]      | $d \leftarrow (a \cdot b + c) \mod 2^{32}$ <br> Undefined if $max(a, b, c) \ge 2^{32}$                                                                                                 |
-| u32checked_div <br> - *(3 cycles)* <br> u32checked_div.*b* <br> - *(4-5 cycles)*          | [b, a, ...]    | [c, ...]      | $c \leftarrow \lfloor a / b\rfloor$ <br> Fails if $max(a, b) \ge 2^{32}$ or $b = 0$                                                                                                    |
-| u32unchecked_div <br> - *(2 cycles)* <br> u32unchecked_div.*b* <br> - *(3-4 cycles)*      | [b, a, ...]    | [c, ...]      | $c \leftarrow \lfloor a / b\rfloor$ <br> Fails if $b = 0$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                     |
-| u32checked_mod <br> - *(4 cycles)* <br> u32checked_mod.*b* <br> - *(5-6 cycles)*          | [b, a, ...]    | [c, ...]      | $c \leftarrow a \mod b$ <br> Fails if $max(a, b) \ge 2^{32}$ or $b = 0$                                                                                                                |
-| u32unchecked_mod <br> - *(3 cycles)* <br> u32unchecked_mod.*b* <br> - *(4-5 cycles)*      | [b, a, ...]    | [c, ...]      | $c \leftarrow a \mod b$ <br> Fails if $b = 0$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                                 |
-| u32checked_divmod <br> - *(2 cycles)* <br> u32checked_divmod.*b* <br> - *(3-4 cycles)*    | [b, a, ...]    | [d, c, ...]   | $c \leftarrow \lfloor a / b\rfloor$ <br> $d \leftarrow a \mod b$ <br> Fails if $max(a, b) \ge 2^{32}$ or $b = 0$                                                                       |
-| u32unchecked_divmod <br> - *(1 cycle)* <br> u32unchecked_divmod.*b* <br> - *(2-3 cycles)* | [b, a, ...]    | [d, c, ...]   | $c \leftarrow \lfloor a / b\rfloor$ <br> $d \leftarrow a \mod b$ <br> Fails if $b = 0$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                        |
+| u32div <br> - *(2 cycles)* <br> u32div.*b* <br> - *(3-4 cycles)*      | [b, a, ...]    | [c, ...]      | $c \leftarrow \lfloor a / b\rfloor$ <br> Fails if $b = 0$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                     |
+| u32mod <br> - *(3 cycles)* <br> u32mod.*b* <br> - *(4-5 cycles)*      | [b, a, ...]    | [c, ...]      | $c \leftarrow a \mod b$ <br> Fails if $b = 0$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                                 |
+| u32divmod <br> - *(1 cycle)* <br> u32divmod.*b* <br> - *(2-3 cycles)* | [b, a, ...]    | [d, c, ...]   | $c \leftarrow \lfloor a / b\rfloor$ <br> $d \leftarrow a \mod b$ <br> Fails if $b = 0$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                        |
 
 ### Bitwise operations
 
 | Instruction                                                                           | Stack input    | Stack output  | Notes                                                                                                                          |
 | ------------------------------------------------------------------------------------- | -------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------ |
-| u32checked_and <br> - *(1 cycle)*                                                     | [b, a, ...]    | [c, ...]      | Computes $c$ as a bitwise `AND` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$                  |
-| u32checked_or <br> - *(6 cycle)s*                                                     | [b, a, ...]    | [c, ...]      | Computes $c$ as a bitwise `OR` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$                   |
-| u32checked_xor <br> - *(1 cycle)*                                                     | [b, a, ...]    | [c, ...]      | Computes $c$ as a bitwise `XOR` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$                  |
-| u32checked_not <br> - *(5 cycles)*                                                    | [a, ...]       | [b, ...]      | Computes $b$ as a bitwise `NOT` of binary representation of $a$. <br> Fails if $a \ge 2^{32}$                                  |
-| u32checked_shl <br> - *(47 cycles)* <br> u32checked_shl.*b*  <br> - *(4 cycles)*      | [b, a, ...]    | [c, ...]      | $c \leftarrow (a \cdot 2^b) \mod 2^{32}$ <br> Fails if $a \ge 2^{32}$ or $b > 31$                                              |
-| u32unchecked_shl <br> - *(40 cycles)* <br> u32unchecked_shl.*b* <br> - *(3 cycles)*   | [b, a, ...]    | [c, ...]      | $c \leftarrow (a \cdot 2^b) \mod 2^{32}$ <br> Undefined if $a \ge 2^{32}$ or $b > 31$                                          |
-| u32checked_shr <br> - *(47 cycles)*<br> u32checked_shr.*b* <br> - *(4 cycles)*        | [b, a, ...]    | [c, ...]      | $c \leftarrow \lfloor a/2^b \rfloor$ <br> Fails if $a \ge 2^{32}$ or $b > 31$                                                  |
-| u32unchecked_shr <br> - *(40 cycles)* <br> u32unchecked_shr.*b* <br> - *(3 cycles)*   | [b, a, ...]    | [c, ...]      | $c \leftarrow \lfloor a/2^b \rfloor$ <br> Undefined if $a \ge 2^{32}$ or $b > 31$                                              |
-| u32checked_rotl <br> - *(47 cycles)* <br> u32checked_rotl.*b* <br> - *(4 cycles)*     | [b, a, ...]    | [c, ...]      | Computes $c$ by rotating a 32-bit representation of $a$ to the left by $b$ bits. <br> Fails if $a \ge 2^{32}$ or $b > 31$      |
-| u32unchecked_rotl <br> - *(40 cycles)* <br> u32unchecked_rotl.*b* <br> - *(3 cycles)* | [b, a, ...]    | [c, ...]      | Computes $c$ by rotating a 32-bit representation of $a$ to the left by $b$ bits. <br> Undefined if $a \ge 2^{32}$ or $b > 31$  |
-| u32checked_rotr <br> - *(59 cycles)* <br> u32checked_rotr.*b* <br> - *(6 cycles)*     | [b, a, ...]    | [c, ...]      | Computes $c$ by rotating a 32-bit representation of $a$ to the right by $b$ bits. <br> Fails if $a \ge 2^{32}$ or $b > 31$     |
-| u32unchecked_rotr <br> - *(44 cycles)* <br> u32unchecked_rotr.*b* <br> - *(3 cycles)* | [b, a, ...]    | [c, ...]      | Computes $c$ by rotating a 32-bit representation of $a$ to the right by $b$ bits. <br> Undefined if $a \ge 2^{32}$ or $b > 31$ |
-| u32checked_popcnt <br> - *(36 cycles)*                                                | [a, ...]       | [b, ...]      | Computes $b$ by counting the number of set bits in $a$ (hamming weight of $a$). <br> Fails if $a \ge 2^{32}$                   |
-| u32unchecked_popcnt <br> - *(33 cycles)*                                              | [a, ...]       | [b, ...]      | Computes $b$ by counting the number of set bits in $a$ (hamming weight of $a$). <br> Undefined if $a \ge 2^{32}$               |
+| u32and <br> - *(1 cycle)*                                                     | [b, a, ...]    | [c, ...]      | Computes $c$ as a bitwise `AND` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$                  |
+| u32or <br> - *(6 cycle)s*                                                     | [b, a, ...]    | [c, ...]      | Computes $c$ as a bitwise `OR` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$                   |
+| u32xor <br> - *(1 cycle)*                                                     | [b, a, ...]    | [c, ...]      | Computes $c$ as a bitwise `XOR` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$                  |
+| u32not <br> - *(5 cycles)*                                                    | [a, ...]       | [b, ...]      | Computes $b$ as a bitwise `NOT` of binary representation of $a$. <br> Fails if $a \ge 2^{32}$                                  |
+| u32shl <br> - *(18 cycles)* <br> u32shl.*b* <br> - *(3 cycles)*   | [b, a, ...]    | [c, ...]      | $c \leftarrow (a \cdot 2^b) \mod 2^{32}$ <br> Undefined if $a \ge 2^{32}$ or $b > 31$                                          |
+| u32shr <br> - *(18 cycles)* <br> u32shr.*b* <br> - *(3 cycles)*   | [b, a, ...]    | [c, ...]      | $c \leftarrow \lfloor a/2^b \rfloor$ <br> Undefined if $a \ge 2^{32}$ or $b > 31$                                              |
+| u32rotl <br> - *(18 cycles)* <br> u32rotl.*b* <br> - *(3 cycles)* | [b, a, ...]    | [c, ...]      | Computes $c$ by rotating a 32-bit representation of $a$ to the left by $b$ bits. <br> Undefined if $a \ge 2^{32}$ or $b > 31$  |
+| u32rotr <br> - *(22 cycles)* <br> u32rotr.*b* <br> - *(3 cycles)* | [b, a, ...]    | [c, ...]      | Computes $c$ by rotating a 32-bit representation of $a$ to the right by $b$ bits. <br> Undefined if $a \ge 2^{32}$ or $b > 31$ |
+| u32popcnt <br> - *(33 cycles)*                                              | [a, ...]       | [b, ...]      | Computes $b$ by counting the number of set bits in $a$ (hamming weight of $a$). <br> Undefined if $a \ge 2^{32}$               |
+| u32clz <br> - *(37 cycles)*                                                     | [a, ...]    | [b, ...]      | Computes $b$ as a number of leading zeros of $a$. <br> Undefined if $a \ge 2^{32}$               |
+| u32ctz <br> - *(34 cycles)*                                                     | [a, ...]    | [b, ...]      | Computes $b$ as a number of trailing zeros of $a$. <br> Undefined if $a \ge 2^{32}$               |
+| u32clo <br> - *(36 cycles)*                                                     | [a, ...]    | [b, ...]      | Computes $b$ as a number of leading ones of $a$. <br> Undefined if $a \ge 2^{32}$               |
+| u32cto <br> - *(33 cycles)*                                                     | [a, ...]    | [b, ...]      | Computes $b$ as a number of trailing ones of $a$. <br> Undefined if $a \ge 2^{32}$               |
+
 
 ### Comparison operations
 
 | Instruction                                                                      | Stack input  | Stack output    | Notes                                                                                                                                                                                                                  |
 | -------------------------------------------------------------------------------- | ------------ | --------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| u32checked_eq <br> - *(2 cycles)* <br> u32checked_eq.*b*  <br> - *(3-4 cycles)*  | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} 1, & \text{if}\ a=b \\ 0, & \text{otherwise}\ \end{cases}$ <br> Fails if $max(a, b) \ge 2^{32}$ <br> Note: unchecked version is not provided because it is equivalent to simple `eq`.      |
-| u32checked_neq <br> - *(3 cycles)* <br> u32checked_neq.*b* <br> - *(4-5 cycles)* | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} 1, & \text{if}\ a \ne b \\ 0, & \text{otherwise}\ \end{cases}$ <br> Fails if $max(a, b) \ge 2^{32}$ <br> Note: unchecked version is not provided because it is equivalent to simple `neq`. |
-| u32checked_lt <br> - *(6 cycles)*                                                | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} 1, & \text{if}\ a < b \\ 0, & \text{otherwise}\ \end{cases}$ <br> Fails if $max(a, b) \ge 2^{32}$                                                                                          |
-| u32unchecked_lt <br> - *(5 cycles)*                                              | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} 1, & \text{if}\ a < b \\ 0, & \text{otherwise}\ \end{cases}$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                      |
-| u32checked_lte <br> - *(8 cycles)*                                               | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} 1, & \text{if}\ a \le b \\ 0, & \text{otherwise}\ \end{cases}$ <br> Fails if $max(a, b) \ge 2^{32}$                                                                                        |
-| u32unchecked_lte <br> - *(7 cycles)*                                             | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} 1, & \text{if}\ a \le b \\ 0, & \text{otherwise}\ \end{cases}$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                    |
-| u32checked_gt <br> - *(7 cycles)*                                                | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} 1, & \text{if}\ a > b \\ 0, & \text{otherwise}\ \end{cases}$ <br> Fails if $max(a, b) \ge 2^{32}$                                                                                          |
-| u32unchecked_gt <br> - *(6 cycles)*                                              | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} 1, & \text{if}\ a > b \\ 0, & \text{otherwise}\ \end{cases}$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                      |
-| u32checked_gte <br> - *(7 cycles)*                                               | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} 1, & \text{if}\ a \ge b \\ 0, & \text{otherwise}\ \end{cases}$ <br> Fails if $max(a, b) \ge 2^{32}$                                                                                        |
-| u32unchecked_gte <br> - *(6 cycles)*                                             | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} 1, & \text{if}\ a \ge b \\ 0, & \text{otherwise}\ \end{cases}$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                    |
-| u32checked_min <br> - *(9 cycles)*                                               | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} a, & \text{if}\ a < b \\ b, & \text{otherwise}\ \end{cases}$ <br> Fails if $max(a, b) \ge 2^{32}$                                                                                          |
-| u32unchecked_min <br> - *(8 cycles)*                                             | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} a, & \text{if}\ a < b \\ b, & \text{otherwise}\ \end{cases}$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                      |
-| u32checked_max <br> - *(10 cycles)*                                              | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} a, & \text{if}\ a > b \\ b, & \text{otherwise}\ \end{cases}$ <br> Fails if $max(a, b) \ge 2^{32}$                                                                                          |
-| u32unchecked_max <br> - *(9 cycles)*                                             | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} a, & \text{if}\ a > b \\ b, & \text{otherwise}\ \end{cases}$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                      |
+| u32lt <br> - *(3 cycles)*                                              | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} 1, & \text{if}\ a < b \\ 0, & \text{otherwise}\ \end{cases}$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                      |
+| u32lte <br> - *(5 cycles)*                                             | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} 1, & \text{if}\ a \le b \\ 0, & \text{otherwise}\ \end{cases}$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                    |
+| u32gt <br> - *(4 cycles)*                                              | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} 1, & \text{if}\ a > b \\ 0, & \text{otherwise}\ \end{cases}$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                      |
+| u32gte <br> - *(4 cycles)*                                             | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} 1, & \text{if}\ a \ge b \\ 0, & \text{otherwise}\ \end{cases}$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                    |
+| u32min <br> - *(8 cycles)*                                             | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} a, & \text{if}\ a < b \\ b, & \text{otherwise}\ \end{cases}$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                      |
+| u32max <br> - *(9 cycles)*                                             | [b, a, ...]  | [c, ...]        | $c \leftarrow \begin{cases} a, & \text{if}\ a > b \\ b, & \text{otherwise}\ \end{cases}$ <br> Undefined if $max(a, b) \ge 2^{32}$                                                                                      |
diff --git a/docs/src/user_docs/stdlib/collections.md b/docs/src/user_docs/stdlib/collections.md
index 3dfcabc4c3..3b3b26138a 100644
--- a/docs/src/user_docs/stdlib/collections.md
+++ b/docs/src/user_docs/stdlib/collections.md
@@ -17,21 +17,9 @@ The following procedures are available to read data from and make updates to a M
 | pack        | Computes a commitment to the given MMR and copies the MMR to the Advice Map using the commitment as a key.<br /><br />Inputs: `[mmr_ptr, ...]`<br />Outputs: `[HASH, ...]`<br /><br /> |
 | unpack      | Load the MMR peak data based on its hash.<br /><br />Inputs: `[HASH, mmr_ptr, ...]`<br />Outputs: `[...]`<br /><br />Where:<br />- `HASH`: is the MMR peak hash, the hash is expected to be padded to an even length and to have a minimum size of 16 elements.<br />- The advice map must contain a key with `HASH`, and its value is `num_leaves \|\| hash_data`, and hash_data is the data used to computed `HASH`<br />- `mmt_ptr`: the memory location where the MMR data will be written, starting with the MMR forest (the total count of its leaves) followed by its peaks. |
 
-## Sparse Merkle Tree (64)
+## Sparse Merkle Tree
 
-Module `std::collections::smt64` contains procedures for manipulating key-value maps with single-element keys and 4-element values. The current implementation is a thin wrapper over a simple Sparse Merkle Tree of depth 64. In the future, this will be replaced with a compact Sparse Merkle Tree implementation.
-
-The following procedures are available to read data from and make updates to a Sparse Merkle Tree.
-
-| Procedure   | Description |
-| ----------- | ------------- |
-| get         | Returns the value located under the specified key in the Sparse Merkle Tree defined by the specified root.<br /><br />If no values had been previously inserted under the specified key, an empty word is returned.<br /><br />Inputs: `[key, ROOT, ...]`<br />Outputs: `[VALUE, ROOT, ...]`<br /><br />Fails if the tree with the specified root does not exist in the VM's advice provider. |
-| set         | Inserts the specified value under the specified key in a Sparse Merkle Tree defined by the specified root. If the insert is successful, the old value located under the specified key is returned via the stack.<br /><br />If `VALUE` is an empty word, the new state of the tree is guaranteed to be equivalent to the state as if the updated value was never inserted.<br /><br />Inputs: `[VALUE, key, ROOT, ...]`<br />Outputs: `[OLD_VALUE, NEW_ROOT, ...]`<br /><br />Fails if the tree with the specified root does not exits in the VM's advice provider. |
-| insert      | Inserts the specified value under the specified key in a Sparse Merkle Tree defined by the specified root. If the insert is successful, the old value located under the specified key is returned via the stack.<br /><br />This procedure requires that `VALUE` be a non-empty word.<br /><br />Inputs: `[VALUE, key, ROOT, ...]`<br />Outputs: `[OLD_VALUE, NEW_ROOT, ...]`<br /><br />Fails if:<br />- The tree with the specified root does not exits in the VM's advice provider.<br />- The provided value is an empty word. |
-
-## Sparse Merkle Tree (256)
-
-Module `std::collections::smt` contains procedures for manipulating key-value maps with 4-element keys and 4-element values. The underlying implementation is a Tiered (compacted) Sparse Merkle where leaves can exist only at specific depths called "tiers". These depths are: 16, 32, 48, and 64. Initially, when a tree is empty, it is equivalent to an empty Sparse Merkle Tree of depth 64 (i.e., leaves at depth 64 are set to [ZERO; 4]). As non-empty values are inserted into the tree, they are added to the first available tier.
+Module `std::collections::smt` contains procedures for manipulating key-value maps with 4-element keys and 4-element values. The underlying implementation is a Sparse Merkle Tree where leaves can exist only at depth 64. Initially, when a tree is empty, it is equivalent to an empty Sparse Merkle Tree of depth 64 (i.e., leaves at depth 64 are set and hash to [ZERO; 4]). When inserting non-empty values into the tree, the most significant element of the key is used to identify the corresponding leaf. All key-value pairs that map to a given leaf are inserted (ordered) in the leaf.
 
 The following procedures are available to read data from and make updates to a Sparse Merkle Tree.
 
@@ -39,4 +27,3 @@ The following procedures are available to read data from and make updates to a S
 | ----------- | ------------- |
 | get         | Returns the value located under the specified key in the Sparse Merkle Tree defined by the specified root.<br /><br />If no values had been previously inserted under the specified key, an empty word is returned.<br /><br />Inputs: `[KEY, ROOT, ...]`<br />Outputs: `[VALUE, ROOT, ...]`<br /><br />Fails if the tree with the specified root does not exist in the VM's advice provider. |
 | set         | Inserts the specified value under the specified key in a Sparse Merkle Tree defined by the specified root. If the insert is successful, the old value located under the specified key is returned via the stack.<br /><br />If `VALUE` is an empty word, the new state of the tree is guaranteed to be equivalent to the state as if the updated value was never inserted.<br /><br />Inputs: `[VALUE, KEY, ROOT, ...]`<br />Outputs: `[OLD_VALUE, NEW_ROOT, ...]`<br /><br />Fails if the tree with the specified root does not exits in the VM's advice provider. |
-| insert      | Inserts the specified value under the specified key in a Sparse Merkle Tree defined by the specified root. If the insert is successful, the old value located under the specified key is returned via the stack.<br /><br />This procedure requires that `VALUE` be a non-empty word.<br /><br />Inputs: `[VALUE, KEY, ROOT, ...]`<br />Outputs: `[OLD_VALUE, NEW_ROOT, ...]`<br /><br />Fails if:<br />- The tree with the specified root does not exits in the VM's advice provider.<br />- The provided value is an empty word. |
diff --git a/docs/src/user_docs/stdlib/crypto/dsa.md b/docs/src/user_docs/stdlib/crypto/dsa.md
index f4c31b7185..d27c0ebcaa 100644
--- a/docs/src/user_docs/stdlib/crypto/dsa.md
+++ b/docs/src/user_docs/stdlib/crypto/dsa.md
@@ -12,4 +12,3 @@ The module exposes the following procedures:
 | Procedure   | Description |
 | ----------- | ------------- |
 | verify      | Verifies a signature against a public key and a message. The procedure gets as inputs the hash of the public key and the hash of the message via the operand stack. The signature is expected to be provided via the advice provider.<br /><br />The signature is valid if and only if the procedure returns.<br /><br />Inputs: `[PK, MSG, ...]`<br />Outputs: `[...]`<br /><br />Where `PK` is the hash of the public key and `MSG` is the hash of the message. Both hashes are expected to be computed using `RPO` hash function.<br /><br /> The procedure relies on the `adv.push_sig` [decorator](../../assembly/io_operations.md#nondeterministic-inputs) to retrieve the signature from the host. The default host implementation assumes that the private-public key pair is loaded into the advice provider, and uses it to generate the signature. However, for production grade implementations, this functionality should be overridden to ensure more secure handling of private keys.|
-
diff --git a/docs/src/user_docs/stdlib/main.md b/docs/src/user_docs/stdlib/main.md
index eb8830a05a..b4782fde71 100644
--- a/docs/src/user_docs/stdlib/main.md
+++ b/docs/src/user_docs/stdlib/main.md
@@ -27,7 +27,6 @@ Currently, Miden standard library contains just a few modules, which are listed
 | Module | Description |
 | ------ | ----------- |
 | [std::collections::mmr](./collections.md#merkle-mountain-range) | Contains procedures for manipulating [Merkle Mountain Ranges](https://github.com/opentimestamps/opentimestamps-server/blob/master/doc/merkle-mountain-range.md). |
-| [std::collections::smt64](./collections.md#sparse-merkle-tree-64) | Contains procedures for manipulating key-value maps with single-element keys and 4-element values. |
 | [std::crypto::fri::frie2f4](./crypto/fri.md#fri-extension-2-fold-4) | Contains procedures for verifying FRI proofs (field extension = 2, folding factor = 4). |
 | [std::crypto::hashes::blake3](./crypto/hashes.md#blake3) | Contains procedures for computing hashes using BLAKE3 hash function. |
 | [std::crypto::hashes::sha256](./crypto/hashes.md#sha256) | Contains procedures for computing hashes using SHA256 hash function. |
diff --git a/docs/src/user_docs/stdlib/math/u64.md b/docs/src/user_docs/stdlib/math/u64.md
index 8cb6fe1775..5cf9b64337 100644
--- a/docs/src/user_docs/stdlib/math/u64.md
+++ b/docs/src/user_docs/stdlib/math/u64.md
@@ -10,60 +10,47 @@ All procedures assume that an unsigned 64-bit integer (u64) is encoded using two
 [a_hi, a_lo, ... ]
 ```
 
-Procedures which check whether the input values are encoded correctly are designated with `checked` prefix. For example, `checked_add` would fail if any of the top 4 elements on the stack contains a value greater than $2^{32} - 1$. In contrast, `wrapping_add` and `overflowing_add` would not perform these checks, and therefore, if any of the top 4 stack elements is greater than $2^{32} - 1$, the operation will not fail but rather will produce an undefined result. Thus, when using versions of procedures which are not checked, it is important to be certain that input values are 32-bit limbs encoding valid u64 values.
+Many of the procedures listed below (e.g., `overflowing_add`, `wrapping_add`, `lt`) do not check whether the inputs are encoded using valid `u32` values. These procedures do not fail when the inputs are encoded incorrectly, but rather produce undefined results. Thus, it is important to be certain that limbs of input values are valid `u32` values prior to calling such procedures.
 
 ## Arithmetic operations
 
 | Procedure          | Description   |
 | ------------------ | ------------- |
-| checked_add        | Performs addition of two unsigned 64-bit integers and fails if the result would overflow.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a + b) % 2^64 |
-| overflowing_add    | Performs addition of two unsigned 64-bit integers preserving the overflow.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [overflow_flag, c_hi, c_lo, ...], where c = (a + b) % 2^64 |
-| wrapping_add       | Performs addition of two unsigned 64-bit integers discarding the overflow.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a + b) % 2^64 |
-| checked_sub        | Performs subtraction of two unsigned 64-bit integers and fails if the result would underflow.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a - b) % 2^64 |
-| overflowing_sub    | Performs subtraction of two unsigned 64-bit integers preserving the overflow.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [underflow_flag, c_hi, c_lo, ...], where c = (a - b) % 2^64 |
-| wrapping_sub       | Performs subtraction of two unsigned 64-bit integers discarding the overflow.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a - b) % 2^64 |
-| checked_mul        | Performs multiplication of two unsigned 64-bit integers and fails if the result would overflow.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a * b) % 2^64 |
-| overflowing_mul    | Performs multiplication of two unsigned 64-bit integers preserving the overflow.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi_hi, c_hi_lo, c_lo_hi, c_lo_lo, ...], where c = (a * b) % 2^64|
-| wrapping_mul       | Performs multiplication of two unsigned 64-bit integers discarding the overflow.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a * b) % 2^64 |
-| checked_div        | Performs division of two unsigned 64-bit integers discarding the remainder.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a // b |
-| unchecked_div      | Performs division of two unsigned 64-bit integers discarding the remainder.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a // b |
-| checked_mod        | Performs modulo operation of two unsigned 64-bit integers.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a % b |
-| unchecked_mod      | Performs modulo operation of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a % b |
-| checked_divmod     | Performs divmod operation of two unsigned 64-bit integers.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [r_hi, r_lo, q_hi, q_lo ...], where r = a % b, q = a // b |
-| unchecked_divmod |  Performs divmod operation of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [r_hi, r_lo, q_hi, q_lo ...], where r = a % b, q = a // b |
+| overflowing_add    | Performs addition of two unsigned 64-bit integers preserving the overflow.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [overflow_flag, c_hi, c_lo, ...], where c = (a + b) % 2^64<br /> This takes 6 cycles.|
+| wrapping_add       | Performs addition of two unsigned 64-bit integers discarding the overflow.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a + b) % 2^64<br /> This takes 7 cycles.|
+| overflowing_sub    | Performs subtraction of two unsigned 64-bit integers preserving the overflow.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [underflow_flag, c_hi, c_lo, ...], where c = (a - b) % 2^64<br /> This takes 11 cycles. |
+| wrapping_sub       | Performs subtraction of two unsigned 64-bit integers discarding the overflow.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a - b) % 2^64<br /> This takes 10 cycles. |
+| overflowing_mul    | Performs multiplication of two unsigned 64-bit integers preserving the overflow.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi_hi, c_hi_lo, c_lo_hi, c_lo_lo, ...], where c = (a * b) % 2^64<br /> This takes 18 cycles.|
+| wrapping_mul       | Performs multiplication of two unsigned 64-bit integers discarding the overflow.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a * b) % 2^64<br /> This takes 11 cycles. |
+| div      | Performs division of two unsigned 64-bit integers discarding the remainder.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a // b<br /> This takes 54 cycles. |
+| mod      | Performs modulo operation of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a % b<br /> This takes 54 cycles. |
+| divmod |  Performs divmod operation of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [r_hi, r_lo, q_hi, q_lo ...], where r = a % b, q = a // b<br /> This takes 54 cycles. |
 
 ## Comparison operations
 
 | Procedure          | Description   |
 | ------------------ | ------------- |
-| checked_lt         | Performs less-than comparison of two unsigned 64-bit integers.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a < b, and 0 otherwise. |
-| unchecked_lt       | Performs less-than comparison of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a < b, and 0 otherwise. |
-| checked_gt         | Performs greater-than comparison of two unsigned 64-bit integers.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a > b, and 0 otherwise. |
-| unchecked_gt       | Performs greater-than comparison of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a > b, and 0 otherwise.<br /> This takes 11 cycles. |
-| checked_lte        | Performs less-than-or-equal comparison of two unsigned 64-bit integers.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a <= b, and 0 otherwise. |
-| unchecked_lte      | Performs less-than-or-equal comparison of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a <= b, and 0 otherwise. |
-| checked_gte        | Performs greater-than-or-equal comparison of two unsigned 64-bit integers.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a >= b, and 0 otherwise. |
-| unchecked_gte      | Performs greater-than-or-equal comparison of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a >= b, and 0 otherwise. |
-| checked_eq         | Performs equality comparison of two unsigned 64-bit integers.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == b, and 0 otherwise. |
-| unchecked_eq       | Performs equality comparison of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == b, and 0 otherwise. |
-| checked_neq        | Performs inequality comparison of two unsigned 64-bit integers.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a != b, and 0 otherwise. |
-| unchecked_neq      | Performs inequality comparison of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a != b, and 0 otherwise. |
-| checked_eqz        | Performs comparison to zero of an unsigned 64-bit integer.<br /> The input value is assumed to be represented using 32-bit limbs, fails if it is not.<br /> The stack transition looks as follows:<br /> [a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == 0, and 0 otherwise. |
-| unchecked_eqz      | Performs comparison to zero of an unsigned 64-bit integer.<br /> The input value is assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == 0, and 0 otherwise. |
-| checked_min        | Compares two unsigned 64-bit integers and drop the larger one from the stack.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a < b, and b otherwise. |
-| unchecked_min      | Compares two unsigned 64-bit integers and drop the larger one from the stack.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a < b, and b otherwise. |
-| checked_max        | Compares two unsigned 64-bit integers and drop the smaller one from the stack.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a > b, and b otherwise. |
-| unchecked_max      | Compares two unsigned 64-bit integers and drop the smaller one from the stack.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a > b, and b otherwise. |
+| lt       | Performs less-than comparison of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a < b, and 0 otherwise.<br /> This takes 11 cycles. |
+| gt       | Performs greater-than comparison of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a > b, and 0 otherwise.<br /> This takes 11 cycles. |
+| lte      | Performs less-than-or-equal comparison of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a <= b, and 0 otherwise.<br /> This takes 12 cycles. |
+| gte      | Performs greater-than-or-equal comparison of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a >= b, and 0 otherwise.<br /> This takes 12 cycles. |
+| eq       | Performs equality comparison of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == b, and 0 otherwise.<br /> This takes 6 cycles. |
+| neq      | Performs inequality comparison of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a != b, and 0 otherwise.<br /> This takes 6 cycles. |
+| eqz      | Performs comparison to zero of an unsigned 64-bit integer.<br /> The input value is assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == 0, and 0 otherwise.<br /> This takes 4 cycles. |
+| min      | Compares two unsigned 64-bit integers and drop the larger one from the stack.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a < b, and b otherwise.<br /> This takes 23 cycles. |
+| max      | Compares two unsigned 64-bit integers and drop the smaller one from the stack.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a > b, and b otherwise.<br /> This takes 23 cycles. |
 
 ## Bitwise operations
 | Procedure   | Description   |
 | ----------- | ------------- |
-| checked_and | Performs bitwise AND of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a AND b. |
-| checked_or |  Performs bitwise OR of two unsigned 64-bit integers.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a OR b. |
-| checked_xor |  Performs bitwise XOR of two unsigned 64-bit integers.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a XOR b. |
-| overflowing_shl |  Performs left shift of one unsigned 64-bit integer preserving the overflow and<br /> using the pow2 operation.<br /> The input value to be shifted is assumed to be represented using 32-bit limbs.<br /> The shift value should be in the range [0, 64), otherwise it will result in an error.<br /> The stack transition looks as follows:<br /> [b, a_hi, a_lo, ...] -> [d_hi, d_lo, c_hi, c_lo, ...], where (d,c) = a << b, <br /> which d contains the bits shifted out.<br /> This takes 35 cycles. |
-| unchecked_shl |  Performs left shift of one unsigned 64-bit integer using the pow2 operation.<br /> The input value to be shifted is assumed to be represented using 32-bit limbs.<br /> The shift value should be in the range [0, 64), otherwise it will result in an error.<br /> The stack transition looks as follows:<br /> [b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a << b mod 2^64.<br /> This takes 28 cycles. |
-| overflowing_shr |  Performs right shift of one unsigned 64-bit integer preserving the overflow and<br /> using the pow2 operation.<br /> The input value to be shifted is assumed to be represented using 32-bit limbs.<br /> The shift value should be in the range [0, 64), otherwise it will result in an error.<br /> The stack transition looks as follows:<br /> [b, a_hi, a_lo, ...] -> [d_hi, d_lo, c_hi, c_lo, ...], where c = a >> b, d = a << (64 - b).<br /> This takes 94 cycles. |
-| unchecked_shr |  Performs right shift of one unsigned 64-bit integer using the pow2 operation.<br /> The input value to be shifted is assumed to be represented using 32-bit limbs.<br /> The shift value should be in the range [0, 64), otherwise it will result in an error.<br /> The stack transition looks as follows:<br /> [b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a >> b.<br /> This takes 44 cycles. |
-| unchecked_rotl |  Performs left rotation of one unsigned 64-bit integer using the pow2 operation.<br /> The input value to be shifted is assumed to be represented using 32-bit limbs.<br /> The shift value should be in the range [0, 64), otherwise it will result in an error.<br /> The stack transition looks as follows:<br /> [b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a << b mod 2^64.<br /> This takes 35 cycles. |
-| unchecked_rotr |  Performs right rotation of one unsigned 64-bit integer using the pow2 operation.<br /> The input value to be shifted is assumed to be represented using 32-bit limbs.<br /> The shift value should be in the range [0, 64), otherwise it will result in an error.<br /> The stack transition looks as follows:<br /> [b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a << b mod 2^64.<br /> This takes 40 cycles. |
+| and | Performs bitwise AND of two unsigned 64-bit integers.<br /> The input values are assumed to be represented using 32-bit limbs, but this is not checked.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a AND b.<br /> This takes 6 cycles. |
+| or |  Performs bitwise OR of two unsigned 64-bit integers.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a OR b.<br /> This takes 16 cycles. |
+| xor |  Performs bitwise XOR of two unsigned 64-bit integers.<br /> The input values are expected to be represented using 32-bit limbs, and the procedure will fail if they are not.<br /> The stack transition looks as follows:<br /> [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a XOR b.<br /> This takes 6 cycles. |
+| shl |  Performs left shift of one unsigned 64-bit integer using the pow2 operation.<br /> The input value to be shifted is assumed to be represented using 32-bit limbs.<br /> The shift value should be in the range [0, 64), otherwise it will result in an error.<br /> The stack transition looks as follows:<br /> [b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a << b mod 2^64.<br /> This takes 28 cycles.|
+| shr |  Performs right shift of one unsigned 64-bit integer using the pow2 operation.<br /> The input value to be shifted is assumed to be represented using 32-bit limbs.<br /> The shift value should be in the range [0, 64), otherwise it will result in an error.<br /> The stack transition looks as follows:<br /> [b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a >> b.<br /> This takes 44 cycles. |
+| rotl |  Performs left rotation of one unsigned 64-bit integer using the pow2 operation.<br /> The input value to be shifted is assumed to be represented using 32-bit limbs.<br /> The shift value should be in the range [0, 64), otherwise it will result in an error.<br /> The stack transition looks as follows:<br /> [b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a << b mod 2^64.<br /> This takes 35 cycles. |
+| rotr |  Performs right rotation of one unsigned 64-bit integer using the pow2 operation.<br /> The input value to be shifted is assumed to be represented using 32-bit limbs.<br /> The shift value should be in the range [0, 64), otherwise it will result in an error.<br /> The stack transition looks as follows:<br /> [b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a << b mod 2^64.<br /> This takes 40 cycles. |
+| clz |  Counts the number of leading zeros of one unsigned 64-bit integer.<br /> The input value is assumed to be represented using 32 bit limbs, but this is not checked.<br /> The stack transition looks as follows: `[n_hi, n_lo, ...] -> [clz, ...]`, where `clz` is a number of leading zeros of value `n`.<br /> This takes 43 cycles. |
+| ctz |  Counts the number of trailing zeros of one unsigned 64-bit integer.<br /> The input value is assumed to be represented using 32 bit limbs, but this is not checked.<br /> The stack transition looks as follows: `[n_hi, n_lo, ...] -> [ctz, ...]`, where `ctz` is a number of trailing zeros of value `n`.<br /> This takes 41 cycles. |
+| clo |  Counts the number of leading ones of one unsigned 64-bit integer.<br /> The input value is assumed to be represented using 32 bit limbs, but this is not checked.<br /> The stack transition looks as follows: `[n_hi, n_lo, ...] -> [clo, ...]`, where `clo` is a number of leading ones of value `n`.<br /> This takes 42 cycles. |
+| cto |  Counts the number of trailing ones of one unsigned 64-bit integer.<br /> The input value is assumed to be represented using 32 bit limbs, but this is not checked.<br /> The stack transition looks as follows: `[n_hi, n_lo, ...] -> [cto, ...]`, where `cto` is a number of trailing ones of value `n`.<br /> This takes 40 cycles. |
diff --git a/miden/Cargo.toml b/miden/Cargo.toml
index d86607e05a..bb11bd325f 100644
--- a/miden/Cargo.toml
+++ b/miden/Cargo.toml
@@ -1,11 +1,12 @@
 [package]
 name = "miden-vm"
-version = "0.7.0"
+version = "0.8.0"
 description="Miden virtual machine"
 authors = ["miden contributors"]
 readme="README.md"
 license = "MIT"
 repository = "https://github.com/0xPolygonMiden/miden-vm"
+documentation = "https://docs.rs/miden-vm/0.8.0"
 categories = ["cryptography", "emulators", "no-std"]
 keywords = ["miden", "stark", "virtual-machine", "zkp"]
 edition = "2021"
@@ -39,25 +40,27 @@ path = "tests/integration/main.rs"
 [features]
 concurrent = ["prover/concurrent", "std"]
 default = ["std"]
-executable = ["dep:env_logger", "dep:hex", "hex?/std", "std", "dep:serde", "serde?/std", "dep:serde_derive", "dep:serde_json", "serde_json?/std", "dep:clap", "dep:rustyline"]
+executable = ["dep:hex", "hex?/std", "std", "dep:serde", "serde?/std", "dep:serde_derive", "dep:serde_json", "serde_json?/std", "dep:clap", "dep:rustyline", "dep:tracing-subscriber"]
 metal = ["prover/metal", "std"]
-std = ["assembly/std", "log/std", "processor/std", "prover/std", "verifier/std"]
-sve = ["processor/sve", "prover/sve", "std"]
+std = ["assembly/std", "processor/std", "prover/std", "verifier/std"]
 
 [dependencies]
-assembly = { package = "miden-assembly", path = "../assembly", version = "0.7", default-features = false }
+assembly = { package = "miden-assembly", path = "../assembly", version = "0.8", default-features = false }
+blake3 = "1.5"
 clap = { version = "4.4", features = ["derive"], optional = true }
-env_logger = { version = "0.10", default-features = false, optional = true }
 hex = { version = "0.4", optional = true }
-log = { version = "0.4", default-features = false, optional = true }
-processor = { package = "miden-processor", path = "../processor", version = "0.7", default-features = false }
-prover = { package = "miden-prover", path = "../prover", version = "0.7", default-features = false }
-rustyline = { version = "12.0", default-features = false, optional = true }
+processor = { package = "miden-processor", path = "../processor", version = "0.8", default-features = false }
+prover = { package = "miden-prover", path = "../prover", version = "0.8", default-features = false }
+rustyline = { version = "13.0", default-features = false, optional = true }
 serde = {version = "1.0", optional = true }
 serde_derive = {version = "1.0", optional = true }
 serde_json = {version = "1.0", optional = true }
-stdlib = { package = "miden-stdlib", path = "../stdlib", version = "0.6", default-features = false }
-verifier = { package = "miden-verifier", path = "../verifier", version = "0.7", default-features = false }
+stdlib = { package = "miden-stdlib", path = "../stdlib", version = "0.8", default-features = false }
+tracing = { version = "0.1", default-features = false, features = ["attributes"] }
+tracing-subscriber = { version = "0.3", features = ["std", "env-filter"], optional = true }
+tracing-forest = { version = "0.1", features = ["ansi", "smallvec"], optional = true }
+verifier = { package = "miden-verifier", path = "../verifier", version = "0.8", default-features = false }
+vm-core = { package = "miden-core", path = "../core", version = "0.8", default-features = false }
 
 [dev-dependencies]
 assert_cmd = "2.0"
@@ -66,5 +69,5 @@ escargot = "0.5"
 num-bigint = "0.4"
 predicates = "3.0"
 test-utils = { package = "miden-test-utils", path = "../test-utils" }
-vm-core = { package = "miden-core", path = "../core", version = "0.7" }
-winter-fri = { package = "winter-fri", version = "0.6" }
+vm-core = { package = "miden-core", path = "../core", version = "0.8" }
+winter-fri = { package = "winter-fri", version = "0.8" }
diff --git a/miden/README.md b/miden/README.md
index 3e452ba032..3f4684542b 100644
--- a/miden/README.md
+++ b/miden/README.md
@@ -224,8 +224,11 @@ make exec
 # build an executable for Apple silicon (concurrent+metal)
 make exec-metal
 
-# built an executable for the Graviton 3 target (concurrent+sve)
-make exec-graviton
+# built an executable for targets with AVX2 instructions (concurrent)
+make exec-avx2
+
+# built an executable for targets with SVE instructions (concurrent)
+make exec-sve
 ```
 
 ### Running Miden VM
@@ -263,7 +266,6 @@ Miden VM can be compiled with the following features:
 * `std` - enabled by default and relies on the Rust standard library.
 * `concurrent` - implies `std` and also enables multi-threaded proof generation.
 * `executable` - required for building Miden VM binary as described above. Implies `std`.
-* `sve` - enables [SVE](https://en.wikipedia.org/wiki/AArch64#Scalable_Vector_Extension_(SVE))-based acceleration of the RPO hash function on supported platforms (e.g., Graviton 3).
 * `metal` - enables [Metal](https://en.wikipedia.org/wiki/Metal_(API))-based acceleration of proof generation (for recursive proofs) on supported platforms (e.g., Apple silicon).
 * `no_std` does not rely on the Rust standard library and enables compilation to WebAssembly.
 
diff --git a/miden/benches/program_compilation.rs b/miden/benches/program_compilation.rs
index 5f7fafc9a8..da2bec96b9 100644
--- a/miden/benches/program_compilation.rs
+++ b/miden/benches/program_compilation.rs
@@ -1,4 +1,4 @@
-use assembly::{self, Assembler};
+use assembly::Assembler;
 use criterion::{criterion_group, criterion_main, Criterion};
 use std::time::Duration;
 use stdlib::StdLibrary;
diff --git a/miden/examples/debug/debug.inputs b/miden/examples/debug/debug.inputs
new file mode 100644
index 0000000000..bc0dadb254
--- /dev/null
+++ b/miden/examples/debug/debug.inputs
@@ -0,0 +1,3 @@
+{
+    "operand_stack": []
+}
diff --git a/miden/examples/debug/debug.masm b/miden/examples/debug/debug.masm
new file mode 100644
index 0000000000..0c4c27f237
--- /dev/null
+++ b/miden/examples/debug/debug.masm
@@ -0,0 +1,42 @@
+proc.foo.3
+    push.11
+    loc_store.0
+    push.101
+    loc_store.1
+
+    debug.local
+    debug.local.1
+    debug.local.0.1
+    debug.local.1.5
+    # will fail: debug.local.0.65536
+    # will fail: debug.local.1.65540
+end
+
+proc.bar.4
+    push.21
+    loc_store.0
+    push.121
+    loc_store.1
+    debug.local
+    debug.local.2
+end
+
+begin
+    push.13.2
+    mem_store
+    debug.mem.2
+    push.104467440737.1
+    mem_store
+    push.1044674407370.10446744073.10446744073709.10446744073709.1000
+    mem_storew
+
+    debug.mem.1000
+    debug.mem.1001
+    debug.mem.999.1002
+    debug.mem
+
+    debug.stack.8
+
+    exec.foo
+    exec.bar
+end
diff --git a/miden/examples/fib/fib.masm b/miden/examples/fib/fib.masm
index 1bc29cd5c5..ac8fcc1ac4 100644
--- a/miden/examples/fib/fib.masm
+++ b/miden/examples/fib/fib.masm
@@ -3,4 +3,4 @@ begin
     repeat.1000
         swap dup.1 add
     end
-end
\ No newline at end of file
+end
diff --git a/miden/examples/hashing/blake3/blake3.inputs b/miden/examples/hashing/blake3/blake3.inputs
new file mode 100644
index 0000000000..700e6fb0d5
--- /dev/null
+++ b/miden/examples/hashing/blake3/blake3.inputs
@@ -0,0 +1,3 @@
+{
+    "operand_stack": ["4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295"]
+}
diff --git a/miden/examples/hashing/blake3/blake3.masm b/miden/examples/hashing/blake3/blake3.masm
new file mode 100644
index 0000000000..9378ca14a3
--- /dev/null
+++ b/miden/examples/hashing/blake3/blake3.masm
@@ -0,0 +1,17 @@
+use.std::crypto::hashes::blake3
+
+begin
+    # hash_2to1 pops top 16 elements (64 bytes) from the stack, compute their hash and puts the 
+    # resulting 8 elements (32 bytes) back to the stack.
+    exec.blake3::hash_2to1
+
+    # Check the correctness of the hashing result by comparing it with precomputed correct values. 
+    # This hash is a result of applying a blake3 hashing function to the binary value consisting of
+    # only ones.
+    push.0xD9696D27.0xF209D66E.0xD0DFDEB9.0x7D5992E2.0x44DDA9CB.0xD6FFB5E5.0x8CD0CAA6.0xF0270FA9
+
+    # compare results
+    movupw.2
+    assert_eqw
+    assert_eqw
+end
\ No newline at end of file
diff --git a/miden/examples/hashing/sha256/sha256.inputs b/miden/examples/hashing/sha256/sha256.inputs
new file mode 100644
index 0000000000..700e6fb0d5
--- /dev/null
+++ b/miden/examples/hashing/sha256/sha256.inputs
@@ -0,0 +1,3 @@
+{
+    "operand_stack": ["4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295", "4294967295"]
+}
diff --git a/miden/examples/hashing/sha256/sha256.masm b/miden/examples/hashing/sha256/sha256.masm
new file mode 100644
index 0000000000..ccde47a3fb
--- /dev/null
+++ b/miden/examples/hashing/sha256/sha256.masm
@@ -0,0 +1,17 @@
+use.std::crypto::hashes::sha256
+
+begin
+    # hash_2to1 pops top 16 elements (64 bytes) from the stack, compute their hash and puts the 
+    # resulting 8 elements (32 bytes) back to the stack.
+    exec.sha256::hash_2to1
+
+    # Check the correctness of the hashing result by comparing it with precomputed correct values. 
+    # This hash is a result of applying a sha256 hashing function to the binary value consisting of
+    # only ones.
+    push.0x85E1D1F7.0x8643E4A2.0x2DAD7274.0x1F764AAD.0xBA3EEB20.0xF1D30600.0x294E9E0D.0x8667E718
+
+    # compare results
+    movupw.2
+    assert_eqw
+    assert_eqw
+end
\ No newline at end of file
diff --git a/miden/examples/merkle_store/merkle_store.inputs b/miden/examples/merkle_store/merkle_store.inputs
index 47b5d9215d..9dbaa4612d 100644
--- a/miden/examples/merkle_store/merkle_store.inputs
+++ b/miden/examples/merkle_store/merkle_store.inputs
@@ -42,4 +42,4 @@
             ]
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/miden/examples/merkle_store/merkle_store.masm b/miden/examples/merkle_store/merkle_store.masm
index ba99e31e5d..c7c01d8f52 100644
--- a/miden/examples/merkle_store/merkle_store.masm
+++ b/miden/examples/merkle_store/merkle_store.masm
@@ -10,7 +10,7 @@ begin
     assert_eqw
 
     dropw
-    
+
     # push the root of the Sparse Merkle Tree on the stack
     push.0x444693b04b509c6b69d9ed981e67243342505f3b64aa3d45746211c180d11902
 
@@ -22,7 +22,7 @@ begin
     assert_eqw
 
     dropw
-    
+
     # push the root of the Merkle Tree on the stack
     push.0x0463f7d47758ad94b11dbf9675ffb7b331baa9c150d7fac6d784055c313eab0e
 
diff --git a/miden/examples/nprime/nprime.masm b/miden/examples/nprime/nprime.masm
index 5013247c0b..d6af5e2c9d 100644
--- a/miden/examples/nprime/nprime.masm
+++ b/miden/examples/nprime/nprime.masm
@@ -74,7 +74,7 @@ proc.is_not_prime_should_continue
         # [remainder, continue loop?, is prime?, prime, j, candidate, i, n, primes..]
         dup.4
         dup.3
-        u32checked_mod
+        u32assert2 u32mod
 
         # if remainder is zero, then the number is divisible by prime; hence isn't prime
         # [continue loop?, is prime?, prime, j, candidate, i, n, primes..]
diff --git a/miden/src/cli/data.rs b/miden/src/cli/data.rs
index aef6b6b691..80ebc3a373 100644
--- a/miden/src/cli/data.rs
+++ b/miden/src/cli/data.rs
@@ -8,13 +8,17 @@ use miden::{
 };
 use serde_derive::{Deserialize, Serialize};
 use std::{
-    collections::HashMap,
+    collections::{BTreeMap, HashMap},
     fs,
     io::Write,
     path::{Path, PathBuf},
-    time::Instant,
 };
 use stdlib::StdLibrary;
+pub use tracing::{event, instrument, Level};
+
+// CONSTANTS
+// ================================================================================================
+const SIMPLE_SMT_DEPTH: u8 = u64::BITS as u8;
 
 // HELPERS
 // ================================================================================================
@@ -81,6 +85,7 @@ pub struct InputFile {
 
 /// Helper methods to interact with the input file
 impl InputFile {
+    #[instrument(name = "read_input_file", skip_all)]
     pub fn read(inputs_path: &Option<PathBuf>, program_path: &Path) -> Result<Self, String> {
         // if file not specified explicitly and corresponding file with same name as program_path
         // with '.inputs' extension does't exist, set operand_stack to empty vector
@@ -100,8 +105,6 @@ impl InputFile {
             None => program_path.with_extension("inputs"),
         };
 
-        println!("Reading input file `{}`", path.display());
-
         // read input file to string
         let inputs_file = fs::read_to_string(&path)
             .map_err(|err| format!("Failed to open input file `{}` - {}", path.display(), err))?;
@@ -148,13 +151,13 @@ impl InputFile {
             .iter()
             .map(|v| {
                 v.parse::<u64>()
-                    .map_err(|e| format!("failed to parse advice stack value `{v}` - {e}"))
+                    .map_err(|e| format!("failed to parse advice stack value '{v}': {e}"))
             })
             .collect::<Result<Vec<_>, _>>()
     }
 
     /// Parse advice map data from the input file.
-    fn parse_advice_map(&self) -> Result<Option<HashMap<[u8; 32], Vec<Felt>>>, String> {
+    fn parse_advice_map(&self) -> Result<Option<BTreeMap<RpoDigest, Vec<Felt>>>, String> {
         let advice_map = match &self.advice_map {
             Some(advice_map) => advice_map,
             None => return Ok(None),
@@ -163,23 +166,22 @@ impl InputFile {
         let map = advice_map
             .iter()
             .map(|(k, v)| {
-                // decode hex key
-                let mut key = [0u8; 32];
-                hex::decode_to_slice(k, &mut key)
-                    .map_err(|e| format!("failed to decode advice map key `{k}` - {e}"))?;
+                // Convert key to RpoDigest
+                let key = RpoDigest::try_from(k)
+                    .map_err(|e| format!("failed to decode advice map key '{k}': {e}"))?;
 
                 // convert values to Felt
                 let values = v
                     .iter()
                     .map(|v| {
                         Felt::try_from(*v).map_err(|e| {
-                            format!("failed to convert advice map value `{v}` to Felt - {e}")
+                            format!("failed to convert advice map value '{v}' to Felt: {e}")
                         })
                     })
                     .collect::<Result<Vec<_>, _>>()?;
                 Ok((key, values))
             })
-            .collect::<Result<HashMap<[u8; 32], Vec<Felt>>, String>>()?;
+            .collect::<Result<BTreeMap<RpoDigest, Vec<Felt>>, String>>()?;
 
         Ok(Some(map))
     }
@@ -199,14 +201,19 @@ impl InputFile {
                     let tree = MerkleTree::new(leaves)
                         .map_err(|e| format!("failed to parse a Merkle tree: {e}"))?;
                     merkle_store.extend(tree.inner_nodes());
-                    println!("Added Merkle tree with root {} to the Merkle store", tree.root());
+                    event!(
+                        Level::TRACE,
+                        "Added Merkle tree with root {} to the Merkle store",
+                        tree.root()
+                    );
                 }
                 MerkleData::SparseMerkleTree(data) => {
                     let entries = Self::parse_sparse_merkle_tree(data)?;
-                    let tree = SimpleSmt::with_leaves(u64::BITS as u8, entries)
+                    let tree = SimpleSmt::<SIMPLE_SMT_DEPTH>::with_leaves(entries)
                         .map_err(|e| format!("failed to parse a Sparse Merkle Tree: {e}"))?;
                     merkle_store.extend(tree.inner_nodes());
-                    println!(
+                    event!(
+                        Level::TRACE,
                         "Added Sparse Merkle tree with root {} to the Merkle store",
                         tree.root()
                     );
@@ -216,7 +223,8 @@ impl InputFile {
                     let tree = PartialMerkleTree::with_leaves(entries)
                         .map_err(|e| format!("failed to parse a Partial Merkle Tree: {e}"))?;
                     merkle_store.extend(tree.inner_nodes());
-                    println!(
+                    event!(
+                        Level::TRACE,
                         "Added Partial Merkle tree with root {} to the Merkle store",
                         tree.root()
                     );
@@ -316,6 +324,8 @@ impl OutputFile {
     }
 
     /// Read the output file
+    #[instrument(name = "read_output_file", 
+        fields(path = %outputs_path.clone().unwrap_or(program_path.with_extension("outputs")).display()), skip_all)]
     pub fn read(outputs_path: &Option<PathBuf>, program_path: &Path) -> Result<Self, String> {
         // If outputs_path has been provided then use this as path.  Alternatively we will
         // replace the program_path extension with `.outputs` and use this as a default.
@@ -324,8 +334,6 @@ impl OutputFile {
             None => program_path.with_extension("outputs"),
         };
 
-        println!("Reading output file `{}`", path.display());
-
         // read outputs file to string
         let outputs_file = fs::read_to_string(&path)
             .map_err(|err| format!("Failed to open outputs file `{}` - {}", path.display(), err))?;
@@ -338,16 +346,13 @@ impl OutputFile {
     }
 
     /// Write the output file
+    #[instrument(name = "write_data_to_output_file", fields(path = %path.display()), skip_all)]
     pub fn write(stack_outputs: &StackOutputs, path: &PathBuf) -> Result<(), String> {
         // if path provided, create output file
-        println!("Creating output file `{}`", path.display());
-
-        let file = fs::File::create(&path).map_err(|err| {
+        let file = fs::File::create(path).map_err(|err| {
             format!("Failed to create output file `{}` - {}", path.display(), err)
         })?;
 
-        println!("Writing data to output file");
-
         // write outputs to output file
         serde_json::to_writer_pretty(file, &Self::new(stack_outputs))
             .map_err(|err| format!("Failed to write output data - {}", err))
@@ -379,19 +384,17 @@ pub struct ProgramFile {
 /// Helper methods to interact with masm program file.
 impl ProgramFile {
     /// Reads the masm file at the specified path and parses it into a [ProgramAst].
+    #[instrument(name = "read_program_file", fields(path = %path.display()))]
     pub fn read(path: &PathBuf) -> Result<Self, String> {
         // read program file to string
-        println!("Reading program file `{}`", path.display());
-        let source = fs::read_to_string(&path)
-            .map_err(|err| format!("Failed to open program file `{}` - {}", path.display(), err))?;
+        let source = fs::read_to_string(path).map_err(|err| {
+            format!("Failed to open program file `{}` - {}\n", path.display(), err)
+        })?;
 
         // parse the program into an AST
-        print!("Parsing program... ");
-        let now = Instant::now();
         let ast = ProgramAst::parse(&source).map_err(|err| {
-            format!("Failed to parse program file `{}` - {}", path.display(), err)
+            format!("Failed to parse program file `{}` - {}\n", path.display(), err)
         })?;
-        println!("done ({} ms)", now.elapsed().as_millis());
 
         Ok(Self {
             ast,
@@ -400,14 +403,12 @@ impl ProgramFile {
     }
 
     /// Compiles this program file into a [Program].
+    #[instrument(name = "compile_program", skip_all)]
     pub fn compile<I, L>(&self, debug: &Debug, libraries: I) -> Result<Program, String>
     where
         I: IntoIterator<Item = L>,
         L: Library,
     {
-        print!("Compiling program... ");
-        let now = Instant::now();
-
         // compile program
         let mut assembler = Assembler::default()
             .with_debug_mode(debug.is_on())
@@ -422,8 +423,6 @@ impl ProgramFile {
             .compile_ast(&self.ast)
             .map_err(|err| format!("Failed to compile program - {}", err))?;
 
-        println!("done ({} ms)", now.elapsed().as_millis());
-
         Ok(program)
     }
 
@@ -450,6 +449,8 @@ pub struct ProofFile;
 /// Helper methods to interact with proof file
 impl ProofFile {
     /// Read stark proof from file
+    #[instrument(name = "read_proof_file", 
+        fields(path = %proof_path.clone().unwrap_or(program_path.with_extension("proof")).display()), skip_all)]
     pub fn read(
         proof_path: &Option<PathBuf>,
         program_path: &Path,
@@ -461,8 +462,6 @@ impl ProofFile {
             None => program_path.with_extension("proof"),
         };
 
-        println!("Reading proof file `{}`", path.display());
-
         // read the file to bytes
         let file = fs::read(&path)
             .map_err(|err| format!("Failed to open proof file `{}` - {}", path.display(), err))?;
@@ -473,6 +472,10 @@ impl ProofFile {
     }
 
     /// Write stark proof to file
+    #[instrument(name = "write_data_to_proof_file", 
+                 fields(
+                    path = %proof_path.clone().unwrap_or(program_path.with_extension("proof")).display(), 
+                    size = format!("{} KB", proof.to_bytes().len() / 1024)), skip_all)]
     pub fn write(
         proof: ExecutionProof,
         proof_path: &Option<PathBuf>,
@@ -485,16 +488,12 @@ impl ProofFile {
             None => program_path.with_extension("proof"),
         };
 
-        println!("Creating proof file `{}`", path.display());
-
         // create output fille
         let mut file = fs::File::create(&path)
             .map_err(|err| format!("Failed to create proof file `{}` - {}", path.display(), err))?;
 
         let proof_bytes = proof.to_bytes();
 
-        println!("Writing data to proof file - size {} KB", proof_bytes.len() / 1024);
-
         // write proof bytes to file
         file.write_all(&proof_bytes).unwrap();
 
@@ -509,6 +508,7 @@ pub struct ProgramHash;
 
 /// Helper method to parse program hash from hex
 impl ProgramHash {
+    #[instrument(name = "read_program_hash", skip_all)]
     pub fn read(hash_hex_string: &String) -> Result<Digest, String> {
         // decode hex to bytes
         let program_hash_bytes = hex::decode(hash_hex_string)
@@ -533,6 +533,7 @@ pub struct Libraries {
 
 impl Libraries {
     /// Creates a new instance of [Libraries] from a list of library paths.
+    #[instrument(name = "read_library_files", skip_all)]
     pub fn new<P, I>(paths: I) -> Result<Self, String>
     where
         P: AsRef<Path>,
@@ -541,8 +542,6 @@ impl Libraries {
         let mut libraries = Vec::new();
 
         for path in paths {
-            println!("Reading library file `{}`", path.as_ref().display());
-
             let library = MaslLibrary::read_from_file(path)
                 .map_err(|e| format!("Failed to read library: {e}"))?;
             libraries.push(library);
@@ -582,7 +581,7 @@ mod test {
                 }
             ]
         }";
-        let inputs: InputFile = serde_json::from_str(&program_with_pmt).unwrap();
+        let inputs: InputFile = serde_json::from_str(program_with_pmt).unwrap();
         let merkle_store = inputs.parse_merkle_store().unwrap();
         assert!(merkle_store.is_some());
 
@@ -608,7 +607,7 @@ mod test {
               }
             ]
           }";
-        let inputs: InputFile = serde_json::from_str(&program_with_smt).unwrap();
+        let inputs: InputFile = serde_json::from_str(program_with_smt).unwrap();
         let merkle_store = inputs.parse_merkle_store().unwrap();
         assert!(merkle_store.is_some());
 
@@ -626,7 +625,7 @@ mod test {
                 }
             ]
         }";
-        let inputs: InputFile = serde_json::from_str(&program_with_merkle_tree).unwrap();
+        let inputs: InputFile = serde_json::from_str(program_with_merkle_tree).unwrap();
         let merkle_store = inputs.parse_merkle_store().unwrap();
         assert!(merkle_store.is_some());
     }
diff --git a/miden/src/cli/debug/executor.rs b/miden/src/cli/debug/executor.rs
index 80dd021d5a..209b7d10d2 100644
--- a/miden/src/cli/debug/executor.rs
+++ b/miden/src/cli/debug/executor.rs
@@ -1,7 +1,6 @@
 use super::DebugCommand;
 use miden::{
-    math::{Felt, StarkField},
-    DefaultHost, MemAdviceProvider, Program, StackInputs, VmState, VmStateIterator,
+    math::Felt, DefaultHost, MemAdviceProvider, Program, StackInputs, VmState, VmStateIterator,
 };
 
 /// Holds debugger state and iterator used for debugging.
diff --git a/miden/src/cli/debug/mod.rs b/miden/src/cli/debug/mod.rs
index bd683fde29..16f71ab79a 100644
--- a/miden/src/cli/debug/mod.rs
+++ b/miden/src/cli/debug/mod.rs
@@ -40,7 +40,7 @@ impl DebugCmd {
             ProgramFile::read(&self.assembly_file)?.compile(&Debug::On, libraries.libraries)?;
 
         let program_hash: [u8; 32] = program.hash().into();
-        println!("Debugging program with hash {}... ", hex::encode(program_hash));
+        println!("Debugging program with hash {}...", hex::encode(program_hash));
 
         // load input data from file
         let input_data = InputFile::read(&self.input_file, &self.assembly_file)?;
diff --git a/miden/src/cli/prove.rs b/miden/src/cli/prove.rs
index eb9b7d8d33..fbcc138d95 100644
--- a/miden/src/cli/prove.rs
+++ b/miden/src/cli/prove.rs
@@ -1,10 +1,10 @@
-use super::data::{Debug, InputFile, Libraries, OutputFile, ProgramFile, ProofFile};
+use super::data::{instrument, Debug, InputFile, Libraries, OutputFile, ProgramFile, ProofFile};
 use clap::Parser;
 use miden::ProvingOptions;
-use processor::{DefaultHost, ExecutionOptions, ExecutionOptionsError};
-use std::{io::Write, path::PathBuf, time::Instant};
+use processor::{DefaultHost, ExecutionOptions, ExecutionOptionsError, Program};
+
+use std::{path::PathBuf, time::Instant};
 
-// TODO check if clap is supporting automatic generation of list values of hash function
 #[derive(Debug, Clone, Parser)]
 #[clap(about = "Prove a miden program")]
 pub struct ProveCmd {
@@ -47,11 +47,16 @@ pub struct ProveCmd {
     /// Security level for execution proofs generated by the VM
     #[clap(short = 's', long = "security", default_value = "96bits")]
     security: String,
+
+    /// Enable tracing to monitor execution of the VM
+    #[clap(short = 't', long = "tracing")]
+    tracing: bool,
 }
 
 impl ProveCmd {
     pub fn get_proof_options(&self) -> Result<ProvingOptions, ExecutionOptionsError> {
-        let exec_options = ExecutionOptions::new(Some(self.max_cycles), self.expected_cycles)?;
+        let exec_options =
+            ExecutionOptions::new(Some(self.max_cycles), self.expected_cycles, self.tracing)?;
         Ok(match self.security.as_str() {
             "96bits" => ProvingOptions::with_96_bit_security(self.recursive),
             "128bits" => ProvingOptions::with_128_bit_security(self.recursive),
@@ -61,25 +66,11 @@ impl ProveCmd {
     }
 
     pub fn execute(&self) -> Result<(), String> {
-        println!("============================================================");
-        println!("Prove program");
-        println!("============================================================");
-
-        // configure logging
-        env_logger::Builder::new()
-            .format(|buf, record| writeln!(buf, "{}", record.args()))
-            .filter_level(log::LevelFilter::Debug)
-            .init();
+        println!("===============================================================================");
+        println!("Prove program: {}", self.assembly_file.display());
+        println!("-------------------------------------------------------------------------------");
 
-        // load libraries from files
-        let libraries = Libraries::new(&self.library_paths)?;
-
-        // load program from file and compile
-        let program =
-            ProgramFile::read(&self.assembly_file)?.compile(&Debug::Off, libraries.libraries)?;
-
-        // load input data from file
-        let input_data = InputFile::read(&self.input_file, &self.assembly_file)?;
+        let (program, input_data) = load_data(self)?;
 
         let program_hash: [u8; 32] = program.hash().into();
         println!("Proving program with hash {}...", hex::encode(program_hash));
@@ -123,3 +114,21 @@ impl ProveCmd {
         Ok(())
     }
 }
+
+// HELPER FUNCTIONS
+// ================================================================================================
+
+#[instrument(skip_all)]
+fn load_data(params: &ProveCmd) -> Result<(Program, InputFile), String> {
+    // load libraries from files
+    let libraries = Libraries::new(&params.library_paths)?;
+
+    // load program from file and compile
+    let program =
+        ProgramFile::read(&params.assembly_file)?.compile(&Debug::Off, libraries.libraries)?;
+
+    // load input data from file
+    let input_data = InputFile::read(&params.input_file, &params.assembly_file)?;
+
+    Ok((program, input_data))
+}
diff --git a/miden/src/cli/repl.rs b/miden/src/cli/repl.rs
index 4ce4d3ecce..c2f5570351 100644
--- a/miden/src/cli/repl.rs
+++ b/miden/src/cli/repl.rs
@@ -1,15 +1,24 @@
 use clap::Parser;
+use std::path::PathBuf;
 
 use crate::repl::start_repl;
 
 #[derive(Debug, Clone, Parser)]
 #[clap(about = "Initiates the Miden REPL tool")]
-pub struct ReplCmd {}
+pub struct ReplCmd {
+    /// Paths to .masl library files
+    #[clap(short = 'l', long = "libraries", value_parser)]
+    library_paths: Vec<PathBuf>,
+
+    /// Usage of standard library
+    #[clap(short = 's', long = "stdlib")]
+    use_stdlib: bool,
+}
 
 impl ReplCmd {
     pub fn execute(&self) -> Result<(), String> {
         // initiates repl tool.
-        start_repl();
+        start_repl(&self.library_paths, self.use_stdlib);
         Ok(())
     }
 }
diff --git a/miden/src/cli/run.rs b/miden/src/cli/run.rs
index f123093dd2..b9e0243436 100644
--- a/miden/src/cli/run.rs
+++ b/miden/src/cli/run.rs
@@ -1,6 +1,6 @@
-use super::data::{Debug, InputFile, Libraries, OutputFile, ProgramFile};
+use super::data::{instrument, Debug, InputFile, Libraries, OutputFile, ProgramFile};
 use clap::Parser;
-use processor::{DefaultHost, ExecutionOptions};
+use processor::{DefaultHost, ExecutionOptions, ExecutionTrace};
 use std::{path::PathBuf, time::Instant};
 
 #[derive(Debug, Clone, Parser)]
@@ -33,41 +33,27 @@ pub struct RunCmd {
     /// Path to output file
     #[clap(short = 'o', long = "output", value_parser)]
     output_file: Option<PathBuf>,
+
+    /// Enable tracing to monitor execution of the VM
+    #[clap(short = 't', long = "tracing")]
+    tracing: bool,
 }
 
 impl RunCmd {
     pub fn execute(&self) -> Result<(), String> {
-        println!("============================================================");
-        println!("Run program");
-        println!("============================================================");
-
-        // load libraries from files
-        let libraries = Libraries::new(&self.library_paths)?;
-
-        // load program from file and compile
-        let program =
-            ProgramFile::read(&self.assembly_file)?.compile(&Debug::Off, libraries.libraries)?;
-
-        // load input data from file
-        let input_data = InputFile::read(&self.input_file, &self.assembly_file)?;
-
-        // get execution options
-        let execution_options = ExecutionOptions::new(Some(self.max_cycles), self.expected_cycles)
-            .map_err(|err| format!("{err}"))?;
-
-        // fetch the stack and program inputs from the arguments
-        let stack_inputs = input_data.parse_stack_inputs()?;
-        let host = DefaultHost::new(input_data.parse_advice_provider()?);
+        println!("===============================================================================");
+        println!("Run program: {}", self.assembly_file.display());
+        println!("-------------------------------------------------------------------------------");
 
-        let program_hash: [u8; 32] = program.hash().into();
-        print!("Executing program with hash {}... ", hex::encode(program_hash));
         let now = Instant::now();
 
-        // execute program and generate outputs
-        let trace = processor::execute(&program, stack_inputs, host, execution_options)
-            .map_err(|err| format!("Failed to generate execution trace = {:?}", err))?;
+        let (trace, program_hash) = run_program(self)?;
 
-        println!("done ({} ms)", now.elapsed().as_millis());
+        println!(
+            "Executed the program with hash {} in {} ms",
+            hex::encode(program_hash),
+            now.elapsed().as_millis()
+        );
 
         if let Some(output_path) = &self.output_file {
             // write outputs to file if one was specified
@@ -107,3 +93,36 @@ impl RunCmd {
         Ok(())
     }
 }
+
+// HELPER FUNCTIONS
+// ================================================================================================
+
+#[instrument(name = "run_program", skip_all)]
+fn run_program(params: &RunCmd) -> Result<(ExecutionTrace, [u8; 32]), String> {
+    // load libraries from files
+    let libraries = Libraries::new(&params.library_paths)?;
+
+    // load program from file and compile
+    let program =
+        ProgramFile::read(&params.assembly_file)?.compile(&Debug::Off, libraries.libraries)?;
+
+    // load input data from file
+    let input_data = InputFile::read(&params.input_file, &params.assembly_file)?;
+
+    // get execution options
+    let execution_options =
+        ExecutionOptions::new(Some(params.max_cycles), params.expected_cycles, params.tracing)
+            .map_err(|err| format!("{err}"))?;
+
+    // fetch the stack and program inputs from the arguments
+    let stack_inputs = input_data.parse_stack_inputs()?;
+    let host = DefaultHost::new(input_data.parse_advice_provider()?);
+
+    let program_hash: [u8; 32] = program.hash().into();
+
+    // execute program and generate outputs
+    let trace = processor::execute(&program, stack_inputs, host, execution_options)
+        .map_err(|err| format!("Failed to generate execution trace = {:?}", err))?;
+
+    Ok((trace, program_hash))
+}
diff --git a/miden/src/cli/verify.rs b/miden/src/cli/verify.rs
index 9e81863b26..35b360d13a 100644
--- a/miden/src/cli/verify.rs
+++ b/miden/src/cli/verify.rs
@@ -22,9 +22,9 @@ pub struct VerifyCmd {
 
 impl VerifyCmd {
     pub fn execute(&self) -> Result<(), String> {
-        println!("============================================================");
-        println!("Verify program");
-        println!("============================================================");
+        println!("===============================================================================");
+        println!("Verifying proof: {}", self.proof_file.display());
+        println!("-------------------------------------------------------------------------------");
 
         // read program hash from input
         let program_hash = ProgramHash::read(&self.program_hash)?;
@@ -41,7 +41,6 @@ impl VerifyCmd {
         // load proof from file
         let proof = ProofFile::read(&Some(self.proof_file.clone()), &self.proof_file)?;
 
-        println!("verifying program...");
         let now = Instant::now();
 
         // TODO accept kernel as CLI argument
diff --git a/miden/src/examples/blake3.rs b/miden/src/examples/blake3.rs
new file mode 100644
index 0000000000..5dac76c870
--- /dev/null
+++ b/miden/src/examples/blake3.rs
@@ -0,0 +1,87 @@
+use super::Example;
+use miden::{Assembler, DefaultHost, MemAdviceProvider, Program, StackInputs};
+use stdlib::StdLibrary;
+use vm_core::utils::group_slice_elements;
+
+// CONSTANTS
+// ================================================================================================
+
+const INITIAL_HASH_VALUE: [u32; 8] = [u32::MAX; 8];
+
+// EXAMPLE BUILDER
+// ================================================================================================
+
+pub fn get_example(n: usize) -> Example<DefaultHost<MemAdviceProvider>> {
+    // generate the program and expected results
+    let program = generate_blake3_program(n);
+    let expected_result = compute_hash_chain(n);
+    println!(
+        "Generated a program to compute {}-th iteration of BLAKE3 1-to-1 hash; expected result: {:?}",
+        n, expected_result
+    );
+
+    Example {
+        program,
+        stack_inputs: StackInputs::try_from_values(INITIAL_HASH_VALUE.iter().map(|&v| v as u64))
+            .unwrap(),
+        host: DefaultHost::default(),
+        expected_result,
+        num_outputs: 8,
+    }
+}
+
+/// Generates a program to compute the `n`-th hash of blake3 1-to-1 hash chain
+fn generate_blake3_program(n: usize) -> Program {
+    let program = format!(
+        "
+        use.std::crypto::hashes::blake3
+        
+        begin
+            repeat.{}
+                exec.blake3::hash_1to1
+            end
+        end",
+        n
+    );
+
+    Assembler::default()
+        .with_library(&StdLibrary::default())
+        .unwrap()
+        .compile(program)
+        .unwrap()
+}
+
+/// Computes the `n`-th hash of blake3 1-to-1 hash chain
+fn compute_hash_chain(n: usize) -> Vec<u64> {
+    let mut bytes: [u8; 32] = INITIAL_HASH_VALUE
+        .iter()
+        .flat_map(|v| v.to_le_bytes())
+        .collect::<Vec<u8>>()
+        .try_into()
+        .unwrap();
+
+    for _ in 0..n {
+        let hasher = blake3::hash(&bytes);
+        bytes = *hasher.as_bytes();
+    }
+
+    group_slice_elements::<u8, 4>(&bytes)
+        .iter()
+        .map(|&bytes| u32::from_le_bytes(bytes) as u64)
+        .collect::<Vec<u64>>()
+}
+
+// EXAMPLE TESTER
+// ================================================================================================
+
+#[test]
+fn test_blake3_example() {
+    let example = get_example(2);
+    super::test_example(example, false);
+}
+
+#[test]
+fn test_blake3_example_fail() {
+    let example = get_example(2);
+    super::test_example(example, true);
+}
diff --git a/miden/src/examples/fibonacci.rs b/miden/src/examples/fibonacci.rs
index 4f10c37413..c5d7e2b82d 100644
--- a/miden/src/examples/fibonacci.rs
+++ b/miden/src/examples/fibonacci.rs
@@ -1,8 +1,5 @@
 use super::{Example, ONE, ZERO};
-use miden::{
-    math::{Felt, StarkField},
-    Assembler, DefaultHost, MemAdviceProvider, Program, StackInputs,
-};
+use miden::{math::Felt, Assembler, DefaultHost, MemAdviceProvider, Program, StackInputs};
 
 // EXAMPLE BUILDER
 // ================================================================================================
@@ -42,7 +39,7 @@ fn generate_fibonacci_program(n: usize) -> Program {
         n - 1
     );
 
-    Assembler::default().compile(&program).unwrap()
+    Assembler::default().compile(program).unwrap()
 }
 
 /// Computes the `n`-th term of Fibonacci sequence
diff --git a/miden/src/examples/mod.rs b/miden/src/examples/mod.rs
index df1a234184..638b3169bf 100644
--- a/miden/src/examples/mod.rs
+++ b/miden/src/examples/mod.rs
@@ -1,9 +1,10 @@
 use clap::Parser;
 use miden::{ExecutionProof, Host, Program, ProgramInfo, ProvingOptions, StackInputs};
 use processor::{ExecutionOptions, ExecutionOptionsError, ONE, ZERO};
-use std::io::Write;
+
 use std::time::Instant;
 
+pub mod blake3;
 pub mod fibonacci;
 
 // EXAMPLE
@@ -41,9 +42,13 @@ pub struct ExampleOptions {
     #[clap(short = 'r', long = "recursive")]
     recursive: bool,
 
-    /// Security level for execution proofs generated by the VM    
+    /// Security level for execution proofs generated by the VM
     #[clap(short = 's', long = "security", default_value = "96bits")]
     security: String,
+
+    /// Enable tracing to monitor execution of the VM
+    #[clap(short = 't', long = "tracing")]
+    tracing: bool,
 }
 
 #[derive(Debug, Clone, Parser)]
@@ -55,11 +60,19 @@ pub enum ExampleType {
         #[clap(short = 'n', default_value = "1024")]
         sequence_length: usize,
     },
+
+    /// Compute a chain of the BLAKE3 1-to-1 hashes
+    Blake3 {
+        /// Length of the hash chain
+        #[clap(short = 'n', default_value = "32")]
+        chain_length: usize,
+    },
 }
 
 impl ExampleOptions {
     pub fn get_proof_options(&self) -> Result<ProvingOptions, ExecutionOptionsError> {
-        let exec_options = ExecutionOptions::new(Some(self.max_cycles), self.expected_cycles)?;
+        let exec_options =
+            ExecutionOptions::new(Some(self.max_cycles), self.expected_cycles, self.tracing)?;
         Ok(match self.security.as_str() {
             "96bits" => ProvingOptions::with_96_bit_security(self.recursive),
             "128bits" => ProvingOptions::with_128_bit_security(self.recursive),
@@ -71,17 +84,12 @@ impl ExampleOptions {
     pub fn execute(&self) -> Result<(), String> {
         println!("============================================================");
 
-        // configure logging
-        env_logger::Builder::new()
-            .format(|buf, record| writeln!(buf, "{}", record.args()))
-            .filter_level(log::LevelFilter::Debug)
-            .init();
-
         let proof_options = self.get_proof_options().map_err(|err| format!("{err}"))?;
 
         // instantiate and prepare the example
         let example = match self.example {
             ExampleType::Fib { sequence_length } => fibonacci::get_example(sequence_length),
+            ExampleType::Blake3 { chain_length } => blake3::get_example(chain_length),
         };
 
         let Example {
diff --git a/miden/src/main.rs b/miden/src/main.rs
index cd3e8bb64b..81a3fb428e 100644
--- a/miden/src/main.rs
+++ b/miden/src/main.rs
@@ -1,6 +1,11 @@
 use clap::Parser;
 use core::fmt;
 use miden::{AssemblyError, ExecutionError};
+#[cfg(feature = "tracing-forest")]
+use tracing_forest::ForestLayer;
+#[cfg(not(feature = "tracing-forest"))]
+use tracing_subscriber::fmt::format::FmtSpan;
+use tracing_subscriber::{prelude::*, EnvFilter};
 
 mod cli;
 mod examples;
@@ -53,6 +58,30 @@ pub fn main() {
     // read command-line args
     let cli = Cli::parse();
 
+    // configure logging
+    // if logging level is not specified, set level to "warn"
+    if std::env::var("MIDEN_LOG").is_err() {
+        std::env::set_var("MIDEN_LOG", "warn");
+    }
+    let registry =
+        tracing_subscriber::registry::Registry::default().with(EnvFilter::from_env("MIDEN_LOG"));
+
+    #[cfg(feature = "tracing-forest")]
+    registry.with(ForestLayer::default()).init();
+
+    #[cfg(not(feature = "tracing-forest"))]
+    {
+        let format = tracing_subscriber::fmt::layer()
+            .with_level(false)
+            .with_target(false)
+            .with_thread_names(false)
+            .with_span_events(FmtSpan::CLOSE)
+            .with_ansi(false)
+            .compact();
+
+        registry.with(format).init();
+    }
+
     // execute cli action
     if let Err(error) = cli.execute() {
         println!("{}", error);
diff --git a/miden/src/repl/mod.rs b/miden/src/repl/mod.rs
index e51495aa68..5b4245b3e2 100644
--- a/miden/src/repl/mod.rs
+++ b/miden/src/repl/mod.rs
@@ -1,9 +1,9 @@
-use super::ProgramError;
-use miden::{
-    math::{Felt, StarkField},
-    DefaultHost, StackInputs, Word,
-};
+use assembly::{Assembler, Library, MaslLibrary};
+use miden::{math::Felt, DefaultHost, StackInputs, Word};
+use processor::ContextId;
 use rustyline::{error::ReadlineError, DefaultEditor};
+use std::{collections::BTreeSet, path::PathBuf};
+use stdlib::StdLibrary;
 
 /// This work is in continuation to the amazing work done by team `Scribe`
 /// [here](https://github.com/ControlCplusControlV/Scribe/blob/main/transpiler/src/repl.rs#L8)
@@ -53,7 +53,7 @@ use rustyline::{error::ReadlineError, DefaultEditor};
 /// if all of them are zeros).
 /// >> push.1 push.2 push.3 push.4 push.5
 /// >> exp
-/// >> u32checked_mul
+/// >> u32wrapping_mul
 /// >> swap
 /// >> eq.2
 /// >> assert
@@ -124,9 +124,24 @@ use rustyline::{error::ReadlineError, DefaultEditor};
 /// Memory at address 87 is empty
 
 /// Initiates the Miden Repl tool.
-pub fn start_repl() {
+pub fn start_repl(library_paths: &Vec<PathBuf>, use_stdlib: bool) {
     let mut program_lines: Vec<String> = Vec::new();
 
+    // set of user imported modules
+    let mut imported_modules: BTreeSet<String> = BTreeSet::new();
+
+    // load libraries from files
+    let mut provided_libraries = Vec::new();
+    for path in library_paths {
+        let library = MaslLibrary::read_from_file(path)
+            .map_err(|e| format!("Failed to read library: {e}"))
+            .unwrap();
+        provided_libraries.push(library);
+    }
+    if use_stdlib {
+        provided_libraries.push(MaslLibrary::from(StdLibrary::default()));
+    }
+
     println!("========================== Miden REPL ============================");
     println!();
     // prints out all the available commands in the Miden Repl tool.
@@ -142,16 +157,21 @@ pub fn start_repl() {
     // initializing readline.
     let mut rl = DefaultEditor::new().expect("Readline couldn't be initialized");
     loop {
-        let program = format!(
-            "begin\n{}\nend",
+        let mut program = String::new();
+        for module in imported_modules.iter() {
+            program.push_str(module);
+            program.push('\n');
+        }
+        program.push_str(&format!(
+            "\nbegin\n{}\nend",
             program_lines
                 .iter()
                 .map(|l| format!("    {}", l))
                 .collect::<Vec<_>>()
                 .join("\n")
-        );
+        ));
 
-        let result = execute(program.clone());
+        let result = execute(program.clone(), &provided_libraries);
 
         if !program_lines.is_empty() {
             match result {
@@ -162,15 +182,14 @@ pub fn start_repl() {
                     memory = mem;
                 }
                 Err(e) => {
-                    println!("{}", format!("Error running program: {:?}", e));
+                    println!("Error running program: {:?}", e);
                     program_lines.pop();
                 }
             }
-        } else {
-            if should_print_stack {
-                println!("{}", str::repeat("0 ", 16));
-            }
+        } else if should_print_stack {
+            println!("{}", str::repeat("0 ", 16));
         }
+
         match rl.readline(">> ") {
             Ok(line) => {
                 if line == "!program" {
@@ -182,7 +201,7 @@ pub fn start_repl() {
                     should_print_stack = false;
                 } else if line == "!mem" {
                     should_print_stack = false;
-                    if memory.len() == 0 {
+                    if memory.is_empty() {
                         println!("The memory has not been initialized yet");
                         continue;
                     }
@@ -209,7 +228,7 @@ pub fn start_repl() {
                                     break;
                                 }
                             }
-                            // incase the flag has not been initialized.
+                            // in case the flag has not been initialized.
                             if !mem_at_addr_present {
                                 println!("Memory at address {} is empty", addr);
                             }
@@ -231,6 +250,8 @@ pub fn start_repl() {
                     };
                 } else if line == "!stack" {
                     should_print_stack = true;
+                } else if line.starts_with("!use") {
+                    handle_use_command(line, &provided_libraries, &mut imported_modules);
                 } else {
                     rl.add_history_entry(line.clone()).expect("Failed to add a history entry");
                     program_lines.push(line.clone());
@@ -261,10 +282,18 @@ pub fn start_repl() {
 /// Compiles and executes a compiled Miden program, returning the stack, memory and any Miden errors.
 /// The program is passed in as a String, passed to the Miden Assembler, and then passed into the Miden
 /// Processor to be executed.
-fn execute(program: String) -> Result<(Vec<(u64, Word)>, Vec<Felt>), ProgramError> {
-    let program = assembly::Assembler::default()
-        .compile(&program)
-        .map_err(ProgramError::AssemblyError)?;
+fn execute(
+    program: String,
+    provided_libraries: &[MaslLibrary],
+) -> Result<(Vec<(u64, Word)>, Vec<Felt>), String> {
+    // compile program
+    let mut assembler = Assembler::default();
+
+    assembler = assembler
+        .with_libraries(provided_libraries.iter())
+        .map_err(|err| format!("{err}"))?;
+
+    let program = assembler.compile(program).map_err(|err| format!("{err}"))?;
 
     let stack_inputs = StackInputs::default();
     let host = DefaultHost::default();
@@ -272,11 +301,11 @@ fn execute(program: String) -> Result<(Vec<(u64, Word)>, Vec<Felt>), ProgramErro
     let state_iter = processor::execute_iter(&program, stack_inputs, host);
     let (system, _, stack, chiplets, err) = state_iter.into_parts();
     if let Some(err) = err {
-        return Err(ProgramError::ExecutionError(err));
+        return Err(format!("{err}"));
     }
 
     // loads the memory at the latest clock cycle.
-    let mem_state = chiplets.get_mem_state_at(0, system.clk());
+    let mem_state = chiplets.get_mem_state_at(ContextId::root(), system.clk());
     // loads the stack along with the overflow values at the latest clock cycle.
     let stack_state = stack.get_state_at(system.clk());
 
@@ -305,16 +334,41 @@ fn read_mem_address(mem_str: &str) -> Result<u64, String> {
     Ok(*addr)
 }
 
+/// Parses `!use` command. Adds the provided module to the program imports, or prints the list of
+/// all available modules if no module name was provided.
+fn handle_use_command(
+    line: String,
+    provided_libraries: &Vec<MaslLibrary>,
+    imported_modules: &mut BTreeSet<String>,
+) {
+    let tokens: Vec<&str> = line.split_whitespace().collect();
+
+    match tokens.len() {
+        1 => {
+            println!("Modules available for importing:");
+            for lib in provided_libraries {
+                lib.modules().for_each(|module| println!("{}", module.path));
+            }
+        }
+        2 => {
+            imported_modules.insert(format!("use.{}", tokens[1]).to_string());
+        }
+        _ => println!("malformed instruction '!use': too many parameters provided"),
+    }
+}
+
 /// Prints out all the available command present in the Miden Repl tool.
 fn print_instructions() {
     println!("Available commands:");
     println!();
-    println!("!stack: displays the complete state of the stack");
-    println!("!mem: displays the state of the entire memory");
-    println!("!mem[i]: displays the state of the memory at address i");
+    println!("!stack: display the complete state of the stack");
+    println!("!mem: display the state of the entire memory");
+    println!("!mem[i]: display the state of the memory at address i");
     println!("!undo: remove the last instruction");
+    println!("!use: display a list of modules available for import");
+    println!("!use <full_module_name>: import the specified module");
     println!("!program: display the program");
-    println!("!help: prints out all the available commands");
+    println!("!help: print out all the available commands");
     println!();
 }
 
diff --git a/miden/src/tools/mod.rs b/miden/src/tools/mod.rs
index 9e9a766293..0441ce67fb 100644
--- a/miden/src/tools/mod.rs
+++ b/miden/src/tools/mod.rs
@@ -1,7 +1,7 @@
 use super::{cli::InputFile, ProgramError};
 use clap::Parser;
 use core::fmt;
-use miden::{utils::collections::Vec, Assembler, DefaultHost, Host, Operation, StackInputs};
+use miden::{utils::collections::*, Assembler, DefaultHost, Host, Operation, StackInputs};
 use processor::{AsmOpInfo, TraceLenSummary};
 use std::{fs, path::PathBuf};
 use stdlib::StdLibrary;
diff --git a/miden/tests/integration/air/chiplets/bitwise.rs b/miden/tests/integration/air/chiplets/bitwise.rs
index f36323024b..213d00e2a6 100644
--- a/miden/tests/integration/air/chiplets/bitwise.rs
+++ b/miden/tests/integration/air/chiplets/bitwise.rs
@@ -3,7 +3,7 @@ use test_utils::{build_op_test, build_test};
 #[test]
 fn bitwise_and() {
     // Test all bit input combinations: (1, 1), (1, 0), (0, 0). Then test larger numbers.
-    let asm_op = "u32checked_and push.0 u32checked_and push.0 u32checked_and push.65535 push.137 u32checked_and";
+    let asm_op = "u32and push.0 u32and push.0 u32and push.65535 push.137 u32and";
     let pub_inputs = vec![1, 1];
 
     build_op_test!(&asm_op, &pub_inputs).prove_and_verify(pub_inputs, false);
@@ -12,7 +12,7 @@ fn bitwise_and() {
 #[test]
 fn bitwise_or() {
     // Test all bit input combinations: (1, 1), (1, 0), (0, 0). Then test larger numbers.
-    let asm_op = "u32checked_or push.0 u32checked_or not push.0 u32checked_or push.65535 push.137 u32checked_or";
+    let asm_op = "u32or push.0 u32or not push.0 u32or push.65535 push.137 u32or";
     let pub_inputs = vec![1, 1];
 
     build_op_test!(&asm_op, &pub_inputs).prove_and_verify(pub_inputs, false);
@@ -21,7 +21,7 @@ fn bitwise_or() {
 #[test]
 fn bitwise_xor() {
     // Test all bit input combinations: (1, 1), (0, 0), (1, 0). Then test larger numbers
-    let asm_op = "u32checked_xor push.0 u32checked_xor push.1 u32checked_xor push.65535 push.137 u32checked_xor";
+    let asm_op = "u32xor push.0 u32xor push.1 u32xor push.65535 push.137 u32xor";
     let pub_inputs = vec![1, 1];
 
     build_op_test!(&asm_op, &pub_inputs).prove_and_verify(pub_inputs, false);
@@ -29,7 +29,7 @@ fn bitwise_xor() {
 
 #[test]
 fn all_operations() {
-    let source = "begin u32checked_and push.0 u32checked_or push.0 u32checked_xor end";
+    let source = "begin u32and push.0 u32or push.0 u32xor end";
     let pub_inputs = vec![1, 1];
 
     build_test!(source, &pub_inputs).prove_and_verify(pub_inputs, false);
diff --git a/miden/tests/integration/air/chiplets/hasher.rs b/miden/tests/integration/air/chiplets/hasher.rs
index 84e20ed869..6c9e71e32c 100644
--- a/miden/tests/integration/air/chiplets/hasher.rs
+++ b/miden/tests/integration/air/chiplets/hasher.rs
@@ -2,7 +2,7 @@ use test_utils::{
     build_op_test,
     crypto::{init_merkle_leaf, init_merkle_store, MerkleStore, MerkleTree, Rpo256},
     rand::rand_vector,
-    StarkField, Word,
+    Word,
 };
 
 #[test]
@@ -86,7 +86,7 @@ fn mtree_merge() {
     let tree_b = MerkleTree::new(leaves_b.clone()).unwrap();
     let root_a = tree_a.root();
     let root_b = tree_b.root();
-    let root_merged = Rpo256::merge(&[root_a.into(), root_b.into()]);
+    let root_merged = Rpo256::merge(&[root_a, root_b]);
     let mut store = MerkleStore::default();
     store.extend(tree_a.inner_nodes());
     store.extend(tree_b.inner_nodes());
diff --git a/miden/tests/integration/air/chiplets/mod.rs b/miden/tests/integration/air/chiplets/mod.rs
index aa59571d73..ca7137832b 100644
--- a/miden/tests/integration/air/chiplets/mod.rs
+++ b/miden/tests/integration/air/chiplets/mod.rs
@@ -8,9 +8,9 @@ mod memory;
 fn chiplets() {
     // Test a program that uses all of the chiplets.
     let source = "begin
-        hperm                          # hasher operation
-        push.5 push.10 u32checked_or    # bitwise operation
-        mem_load                        # memory operation
+        hperm                   # hasher operation
+        push.5 push.10 u32or    # bitwise operation
+        mem_load                # memory operation
     end";
     let pub_inputs = rand_vector::<u64>(8);
 
diff --git a/miden/tests/integration/air/range.rs b/miden/tests/integration/air/range.rs
index c6820e788e..9d59cfdfaf 100644
--- a/miden/tests/integration/air/range.rs
+++ b/miden/tests/integration/air/range.rs
@@ -14,8 +14,10 @@ fn range_check_once() {
 /// 5 is checked 3 times, 10 is checked twice, and 15 is checked once.
 #[test]
 fn range_check_multi() {
-    let source = "begin u32checked_add u32checked_add end";
+    let source =
+        "begin u32assert2 u32overflowing_add assertz u32assert2 u32overflowing_add assertz end";
     let stack = vec![5, 5, 5];
+
     build_test!(source, &stack).prove_and_verify(stack, false);
 }
 
diff --git a/miden/tests/integration/cli/cli_test.rs b/miden/tests/integration/cli/cli_test.rs
index 3743847e2c..f25ad2fd25 100644
--- a/miden/tests/integration/cli/cli_test.rs
+++ b/miden/tests/integration/cli/cli_test.rs
@@ -18,7 +18,7 @@ fn cli_run() -> Result<(), Box<dyn std::error::Error>> {
 
     cmd.arg("run")
         .arg("-a")
-        .arg("examples/fib/fib.masm")
+        .arg("./examples/fib/fib.masm")
         .arg("-n")
         .arg("1")
         .arg("-m")
diff --git a/miden/tests/integration/exec_iters.rs b/miden/tests/integration/exec_iters.rs
index 51839d914e..b9efc36c0f 100644
--- a/miden/tests/integration/exec_iters.rs
+++ b/miden/tests/integration/exec_iters.rs
@@ -1,4 +1,4 @@
-use processor::{AsmOpInfo, VmState};
+use processor::{AsmOpInfo, ContextId, VmState};
 use test_utils::{build_debug_test, Felt, ToElements, ONE};
 use vm_core::{AssemblyOp, Operation};
 
@@ -19,7 +19,7 @@ fn test_exec_iter() {
     let expected_states = vec![
         VmState {
             clk: 0,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: None,
             asmop: None,
             stack: [16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1].to_elements(),
@@ -28,7 +28,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 1,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::Span),
             asmop: None,
             stack: [16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 1].to_elements(),
@@ -37,7 +37,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 2,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::Pad),
             asmop: Some(AsmOpInfo::new(
                 AssemblyOp::new("#main".to_string(), 3, "mem_storew.1".to_string(), false),
@@ -49,7 +49,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 3,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::Incr),
             asmop: Some(AsmOpInfo::new(
                 AssemblyOp::new("#main".to_string(), 3, "mem_storew.1".to_string(), false),
@@ -61,7 +61,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 4,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::MStoreW),
             asmop: Some(AsmOpInfo::new(
                 AssemblyOp::new("#main".to_string(), 3, "mem_storew.1".to_string(), false),
@@ -73,7 +73,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 5,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::Drop),
             asmop: Some(AsmOpInfo::new(
                 AssemblyOp::new("#main".to_string(), 4, "dropw".to_string(), false),
@@ -85,7 +85,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 6,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::Drop),
             asmop: Some(AsmOpInfo::new(
                 AssemblyOp::new("#main".to_string(), 4, "dropw".to_string(), false),
@@ -97,7 +97,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 7,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::Drop),
             asmop: Some(AsmOpInfo::new(
                 AssemblyOp::new("#main".to_string(), 4, "dropw".to_string(), false),
@@ -109,7 +109,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 8,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::Drop),
             asmop: Some(AsmOpInfo::new(
                 AssemblyOp::new("#main".to_string(), 4, "dropw".to_string(), false),
@@ -121,7 +121,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 9,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::Push(Felt::new(17))),
             asmop: Some(AsmOpInfo::new(
                 AssemblyOp::new("#main".to_string(), 1, "push.17".to_string(), false),
@@ -133,7 +133,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 10,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::Noop),
             asmop: None,
             stack: [17, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0].to_elements(),
@@ -142,7 +142,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 11,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::Push(ONE)),
             asmop: None,
             stack: [1, 17, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0].to_elements(),
@@ -151,7 +151,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 12,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::FmpUpdate),
             asmop: None,
             stack: [17, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0].to_elements(),
@@ -160,7 +160,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 13,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::Pad),
             asmop: Some(AsmOpInfo::new(
                 AssemblyOp::new("foo".to_string(), 4, "loc_store.0".to_string(), false),
@@ -172,7 +172,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 14,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::FmpAdd),
             asmop: Some(AsmOpInfo::new(
                 AssemblyOp::new("foo".to_string(), 4, "loc_store.0".to_string(), false),
@@ -185,7 +185,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 15,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::MStore),
             asmop: Some(AsmOpInfo::new(
                 AssemblyOp::new("foo".to_string(), 4, "loc_store.0".to_string(), false),
@@ -200,7 +200,7 @@ fn test_exec_iter() {
         },
         VmState {
             clk: 16,
-            ctx: 0,
+            ctx: ContextId::root(),
             op: Some(Operation::Drop),
             asmop: Some(AsmOpInfo::new(
                 AssemblyOp::new("foo".to_string(), 4, "loc_store.0".to_string(), false),
diff --git a/miden/tests/integration/flow_control/mod.rs b/miden/tests/integration/flow_control/mod.rs
index aec1e0c046..d81692871b 100644
--- a/miden/tests/integration/flow_control/mod.rs
+++ b/miden/tests/integration/flow_control/mod.rs
@@ -1,3 +1,7 @@
+use assembly::{Assembler, AssemblyContext, LibraryPath};
+use miden::ModuleAst;
+use processor::ExecutionError;
+use stdlib::StdLibrary;
 use test_utils::{build_test, AdviceInputs, StackInputs, Test, TestError};
 
 // SIMPLE FLOW CONTROL TESTS
@@ -133,7 +137,7 @@ fn local_fn_call() {
             call.foo
         end";
 
-    let expected_err = TestError::ExecutionError("InvalidStackDepthOnReturn(17)");
+    let expected_err = TestError::ExecutionError(ExecutionError::InvalidStackDepthOnReturn(17));
     build_test!(source, &[1, 2]).expect_error(expected_err);
 
     // dropping values from the stack in the current execution context should not affect values
@@ -279,13 +283,45 @@ fn simple_dyn_exec() {
     );
 }
 
+#[test]
+fn dynexec_with_procref() {
+    let program_source = "
+    use.std::math::u64
+
+    proc.foo
+        push.1.2
+        u32wrapping_add
+    end
+
+    begin
+        procref.foo
+        dynexec 
+
+        procref.u64::wrapping_add
+        dynexec
+    end";
+
+    let mut test = build_test!(program_source, &[]);
+    test.libraries = vec![StdLibrary::default().into()];
+
+    test.expect_stack(&[
+        1719755471,
+        1057995821,
+        3,
+        12973202366681443424,
+        7933716460165146367,
+        14382661273226268231,
+        15818904913409383971,
+    ]);
+}
+
 #[test]
 fn simple_dyncall() {
     let program_source = "
         proc.foo
             # drop the top 4 values, since that will be the code hash when we call this dynamically
             dropw
-            
+
             # test that the execution context has changed
             mem_load.0 assertz
 
@@ -352,3 +388,57 @@ fn simple_dyncall() {
         false,
     );
 }
+
+// PROCREF INSTRUCTION
+// ================================================================================================
+
+#[test]
+fn procref() {
+    let assembler = Assembler::default().with_library(&StdLibrary::default()).unwrap();
+
+    let module_source = "
+    use.std::math::u64
+    export.u64::overflowing_add
+
+    export.foo.4
+        push.3.4
+    end
+    ";
+
+    // obtain procedures' MAST roots by compiling them as module
+    let module_ast = ModuleAst::parse(module_source).unwrap();
+    let module_path = LibraryPath::new("test::foo").unwrap();
+    let mast_roots = assembler
+        .compile_module(&module_ast, Some(&module_path), &mut AssemblyContext::for_module(false))
+        .unwrap();
+
+    let source = "
+    use.std::math::u64
+
+    proc.foo.4
+        push.3.4
+    end
+
+    begin
+        procref.u64::overflowing_add
+        push.0
+        procref.foo
+    end";
+
+    let mut test = build_test!(source, &[]);
+    test.libraries = vec![StdLibrary::default().into()];
+
+    test.expect_stack(&[
+        mast_roots[1][3].as_int(),
+        mast_roots[1][2].as_int(),
+        mast_roots[1][1].as_int(),
+        mast_roots[1][0].as_int(),
+        0,
+        mast_roots[0][3].as_int(),
+        mast_roots[0][2].as_int(),
+        mast_roots[0][1].as_int(),
+        mast_roots[0][0].as_int(),
+    ]);
+
+    test.prove_and_verify(vec![], false);
+}
diff --git a/miden/tests/integration/operations/crypto_ops.rs b/miden/tests/integration/operations/crypto_ops.rs
index 2e5569ce52..cea2f441c1 100644
--- a/miden/tests/integration/operations/crypto_ops.rs
+++ b/miden/tests/integration/operations/crypto_ops.rs
@@ -2,7 +2,7 @@ use test_utils::{
     build_expected_hash, build_expected_perm, build_op_test,
     crypto::{init_merkle_leaf, init_merkle_store, MerkleTree, NodeIndex},
     rand::rand_vector,
-    Felt, StarkField,
+    Felt,
 };
 
 // TESTS
diff --git a/miden/tests/integration/operations/decorators/advice.rs b/miden/tests/integration/operations/decorators/advice.rs
index 6a4566ca0e..4a00d9322f 100644
--- a/miden/tests/integration/operations/decorators/advice.rs
+++ b/miden/tests/integration/operations/decorators/advice.rs
@@ -1,4 +1,9 @@
-use test_utils::{build_test, crypto::MerkleStore, rand::rand_value, Felt};
+use test_utils::{
+    build_test,
+    crypto::{MerkleStore, RpoDigest},
+    rand::rand_value,
+    Felt,
+};
 
 // ADVICE INJECTION
 // ================================================================================================
@@ -179,7 +184,7 @@ fn advice_push_mapval() {
     // --- test simple adv.mapval ---------------------------------------------
     let source: &str = "begin
     # stack: [4, 3, 2, 1, ...]
-    
+
     # load the advice stack with values from the advice map and drop the key
     adv.push_mapval
     dropw
@@ -191,17 +196,17 @@ fn advice_push_mapval() {
 
     let stack_inputs = [1, 2, 3, 4];
     let adv_map = [(
-        key_to_bytes(stack_inputs),
+        RpoDigest::try_from(stack_inputs).unwrap(),
         vec![Felt::new(8), Felt::new(7), Felt::new(6), Felt::new(5)],
     )];
 
-    let test = build_test!(source, &stack_inputs, vec![], MerkleStore::default(), adv_map);
+    let test = build_test!(source, &stack_inputs, [], MerkleStore::default(), adv_map);
     test.expect_stack(&[5, 6, 7, 8]);
 
     // --- test adv.mapval with offset ----------------------------------------
     let source: &str = "begin
     # stack: [4, 3, 2, 1, ...]
-    
+
     # shift the key on the stack by 2 slots
     push.0 push.0
 
@@ -216,17 +221,17 @@ fn advice_push_mapval() {
 
     let stack_inputs = [1, 2, 3, 4];
     let adv_map = [(
-        key_to_bytes(stack_inputs),
+        RpoDigest::try_from(stack_inputs).unwrap(),
         vec![Felt::new(8), Felt::new(7), Felt::new(6), Felt::new(5)],
     )];
 
-    let test = build_test!(source, &stack_inputs, vec![], MerkleStore::default(), adv_map);
+    let test = build_test!(source, &stack_inputs, [], MerkleStore::default(), adv_map);
     test.expect_stack(&[5, 6, 7, 8]);
 
     // --- test simple adv.mapvaln --------------------------------------------
     let source: &str = "begin
     # stack: [4, 3, 2, 1, ...]
-    
+
     # load the advice stack with values from the advice map (including the number
     # of elements) and drop the key
     adv.push_mapvaln
@@ -239,17 +244,17 @@ fn advice_push_mapval() {
 
     let stack_inputs = [1, 2, 3, 4];
     let adv_map = [(
-        key_to_bytes(stack_inputs),
+        RpoDigest::try_from(stack_inputs).unwrap(),
         vec![Felt::new(11), Felt::new(12), Felt::new(13), Felt::new(14), Felt::new(15)],
     )];
 
-    let test = build_test!(source, &stack_inputs, vec![], MerkleStore::default(), adv_map);
+    let test = build_test!(source, &stack_inputs, [], MerkleStore::default(), adv_map);
     test.expect_stack(&[15, 14, 13, 12, 11, 5]);
 
     // --- test adv.mapval with offset ----------------------------------------
     let source: &str = "begin
     # stack: [4, 3, 2, 1, ...]
-    
+
     # shift the key on the stack by 2 slots
     push.0 push.0
 
@@ -265,11 +270,11 @@ fn advice_push_mapval() {
 
     let stack_inputs = [1, 2, 3, 4];
     let adv_map = [(
-        key_to_bytes(stack_inputs),
+        RpoDigest::try_from(stack_inputs).unwrap(),
         vec![Felt::new(11), Felt::new(12), Felt::new(13), Felt::new(14), Felt::new(15)],
     )];
 
-    let test = build_test!(source, &stack_inputs, vec![], MerkleStore::default(), adv_map);
+    let test = build_test!(source, &stack_inputs, [], MerkleStore::default(), adv_map);
     test.expect_stack(&[15, 14, 13, 12, 11, 5]);
 }
 
@@ -281,7 +286,7 @@ fn advice_insert_hdword() {
 
     # hash and insert top two words into the advice map
     adv.insert_hdword
-    
+
     # manually compute the hash of the two words
     hmerge
     # => [KEY, ...]
@@ -304,7 +309,7 @@ fn advice_insert_hdword() {
 
     # hash and insert top two words into the advice map
     adv.insert_hdword.3
-    
+
     # manually compute the hash of the two words
     push.0.3.0.0
     swapw.2 swapw
@@ -324,17 +329,3 @@ fn advice_insert_hdword() {
     let test = build_test!(source, &stack_inputs);
     test.expect_stack(&[1, 2, 3, 4, 5, 6, 7, 8]);
 }
-
-// HELPER FUNCTIONS
-// ================================================================================================
-
-fn key_to_bytes(key: [u64; 4]) -> [u8; 32] {
-    let mut result = [0; 32];
-
-    result[..8].copy_from_slice(&key[0].to_le_bytes());
-    result[8..16].copy_from_slice(&key[1].to_le_bytes());
-    result[16..24].copy_from_slice(&key[2].to_le_bytes());
-    result[24..].copy_from_slice(&key[3].to_le_bytes());
-
-    result
-}
diff --git a/miden/tests/integration/operations/decorators/events.rs b/miden/tests/integration/operations/decorators/events.rs
new file mode 100644
index 0000000000..7f189e0672
--- /dev/null
+++ b/miden/tests/integration/operations/decorators/events.rs
@@ -0,0 +1,54 @@
+use super::TestHost;
+use assembly::Assembler;
+use processor::ExecutionOptions;
+
+#[test]
+fn test_event_handling() {
+    let source = "\
+    begin
+        push.1
+        emit.1
+        push.2
+        emit.2
+    end";
+
+    // compile and execute program
+    let program = Assembler::default().compile(source).unwrap();
+    let mut host = TestHost::default();
+    processor::execute(&program, Default::default(), &mut host, Default::default()).unwrap();
+
+    // make sure events were handled correctly
+    let expected = vec![1, 2];
+    assert_eq!(host.event_handler, expected);
+}
+
+#[test]
+fn test_trace_handling() {
+    let source = "\
+    begin
+        push.1
+        trace.1
+        push.2
+        trace.2
+    end";
+
+    // compile program
+    let program = Assembler::default().compile(source).unwrap();
+    let mut host = TestHost::default();
+
+    // execute program with disabled tracing
+    processor::execute(&program, Default::default(), &mut host, Default::default()).unwrap();
+    let expected = Vec::<u32>::new();
+    assert_eq!(host.trace_handler, expected);
+
+    // execute program with enabled tracing
+    processor::execute(
+        &program,
+        Default::default(),
+        &mut host,
+        ExecutionOptions::default().with_tracing(),
+    )
+    .unwrap();
+    let expected = vec![1, 2];
+    assert_eq!(host.trace_handler, expected);
+}
diff --git a/miden/tests/integration/operations/decorators/mod.rs b/miden/tests/integration/operations/decorators/mod.rs
index 393fe57e67..7212cedd2f 100644
--- a/miden/tests/integration/operations/decorators/mod.rs
+++ b/miden/tests/integration/operations/decorators/mod.rs
@@ -1,2 +1,63 @@
+use processor::{
+    AdviceExtractor, AdviceProvider, ExecutionError, Host, HostResponse, MemAdviceProvider,
+    ProcessState,
+};
+use vm_core::AdviceInjector;
+
 mod advice;
 mod asmop;
+mod events;
+
+// TEST HOST
+// ================================================================================================
+pub struct TestHost<A> {
+    pub adv_provider: A,
+    pub event_handler: Vec<u32>,
+    pub trace_handler: Vec<u32>,
+}
+
+impl Default for TestHost<MemAdviceProvider> {
+    fn default() -> Self {
+        Self {
+            adv_provider: MemAdviceProvider::default(),
+            event_handler: Vec::new(),
+            trace_handler: Vec::new(),
+        }
+    }
+}
+
+impl<A: AdviceProvider> Host for TestHost<A> {
+    fn get_advice<S: ProcessState>(
+        &mut self,
+        process: &S,
+        extractor: AdviceExtractor,
+    ) -> Result<HostResponse, ExecutionError> {
+        self.adv_provider.get_advice(process, &extractor)
+    }
+
+    fn set_advice<S: ProcessState>(
+        &mut self,
+        process: &S,
+        injector: AdviceInjector,
+    ) -> Result<HostResponse, ExecutionError> {
+        self.adv_provider.set_advice(process, &injector)
+    }
+
+    fn on_event<S: ProcessState>(
+        &mut self,
+        _process: &S,
+        event_id: u32,
+    ) -> Result<HostResponse, ExecutionError> {
+        self.event_handler.push(event_id);
+        Ok(HostResponse::None)
+    }
+
+    fn on_trace<S: ProcessState>(
+        &mut self,
+        _process: &S,
+        trace_id: u32,
+    ) -> Result<HostResponse, ExecutionError> {
+        self.trace_handler.push(trace_id);
+        Ok(HostResponse::None)
+    }
+}
diff --git a/miden/tests/integration/operations/ext2_ops.rs b/miden/tests/integration/operations/ext2_ops.rs
index 216ce851df..cd2b6e48d7 100644
--- a/miden/tests/integration/operations/ext2_ops.rs
+++ b/miden/tests/integration/operations/ext2_ops.rs
@@ -1,4 +1,4 @@
-use test_utils::{build_op_test, rand::rand_value, FieldElement, QuadFelt, StarkField};
+use test_utils::{build_op_test, rand::rand_value, FieldElement, QuadFelt};
 
 // EXT2 OPS ASSERTIONS - MANUAL TESTS
 // ================================================================================================
diff --git a/miden/tests/integration/operations/field_ops.rs b/miden/tests/integration/operations/field_ops.rs
index ed4e913fc5..b7940dc6fa 100644
--- a/miden/tests/integration/operations/field_ops.rs
+++ b/miden/tests/integration/operations/field_ops.rs
@@ -1,3 +1,5 @@
+use assembly::AssemblyError;
+use processor::ExecutionError;
 use test_utils::{
     build_op_test, prop_randw, proptest::prelude::*, rand::rand_value, Felt, FieldElement,
     StarkField, TestError, ONE, WORD_SIZE,
@@ -18,7 +20,7 @@ fn add() {
     test.expect_stack(&[13]);
 
     // --- test overflow --------------------------------------------------------------------------
-    let test = build_op_test!(asm_op, &[Felt::MODULUS, 8]);
+    let test = build_op_test!(asm_op, &[Felt::MODULUS - 1, 9]);
     test.expect_stack(&[8]);
 
     // --- test that the rest of the stack isn't affected -----------------------------------------
@@ -45,7 +47,7 @@ fn add_b() {
     test.expect_stack(&[13]);
 
     // --- test overflow --------------------------------------------------------------------------
-    let test = build_op_test!(build_asm_op(8), &[Felt::MODULUS]);
+    let test = build_op_test!(build_asm_op(9), &[Felt::MODULUS - 1]);
     test.expect_stack(&[8]);
 
     // --- test that the rest of the stack isn't affected -----------------------------------------
@@ -179,7 +181,9 @@ fn div_b() {
     test.expect_stack(&[77]);
 
     let test = build_op_test!(build_asm_op(0), &[14]);
-    test.expect_error(TestError::AssemblyError("division by zero"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction 'div.0', parameter 0 is invalid: division by zero",
+    ))));
 
     let test = build_op_test!(build_asm_op(2), &[4]);
     test.expect_stack(&[2]);
@@ -201,7 +205,7 @@ fn div_fail() {
 
     // --- test divide by zero --------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[1, 0]);
-    test.expect_error(TestError::ExecutionError("DivideByZero"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::DivideByZero(1)));
 }
 
 #[test]
@@ -230,7 +234,9 @@ fn neg_fail() {
 
     // --- test illegal argument -------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[1]);
-    test.expect_error(TestError::AssemblyError("neg"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction 'neg.1': too many parameters provided",
+    ))));
 }
 
 #[test]
@@ -256,13 +262,15 @@ fn inv_fail() {
 
     // --- test no inv on 0 -----------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[0]);
-    test.expect_error(TestError::ExecutionError("DivideByZero"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::DivideByZero(1)));
 
     let asm_op = "inv.1";
 
     // --- test illegal argument -----------------------------------------------------------------
     let test = build_op_test!(asm_op, &[1]);
-    test.expect_error(TestError::AssemblyError("inv"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction 'inv.1': too many parameters provided",
+    ))));
 }
 
 #[test]
@@ -283,7 +291,13 @@ fn pow2_fail() {
     let mut value = rand_value::<u32>() as u64;
     value += (u32::MAX as u64) + 1;
 
-    build_op_test!(asm_op, &[value]).expect_error(TestError::ExecutionError("FailedAssertion"));
+    build_op_test!(asm_op, &[value]).expect_error(TestError::ExecutionError(
+        ExecutionError::FailedAssertion {
+            clk: 16,
+            err_code: 0,
+            err_msg: None,
+        },
+    ));
 }
 
 #[test]
@@ -309,8 +323,13 @@ fn exp_bits_length_fail() {
     let base = 9;
     let pow = 1021; // pow is a 10 bit number
 
-    build_op_test!(build_asm_op(9), &[base, pow])
-        .expect_error(TestError::ExecutionError("FailedAssertion"));
+    build_op_test!(build_asm_op(9), &[base, pow]).expect_error(TestError::ExecutionError(
+        ExecutionError::FailedAssertion {
+            clk: 18,
+            err_code: 0,
+            err_msg: None,
+        },
+    ));
 
     //---------------------- exp containing more than 64 bits -------------------------------------
 
@@ -318,7 +337,7 @@ fn exp_bits_length_fail() {
     let pow = 1021; // pow is a 10 bit number
 
     let test = build_op_test!(build_asm_op(65), &[base, pow]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from("malformed instruction 'exp.u65', parameter u65 is invalid: parameter can at max be a u64 but found u65"))));
 }
 
 #[test]
@@ -333,6 +352,23 @@ fn exp_small_pow() {
     test.expect_stack(&[expected.as_int()]);
 }
 
+#[test]
+fn ilog2() {
+    let asm_op = "ilog2";
+    build_op_test!(asm_op, &[1]).expect_stack(&[0]);
+    build_op_test!(asm_op, &[8]).expect_stack(&[3]);
+    build_op_test!(asm_op, &[15]).expect_stack(&[3]);
+    build_op_test!(asm_op, &[Felt::MODULUS - 1]).expect_stack(&[63]);
+}
+
+#[test]
+fn ilog2_fail() {
+    let asm_op = "ilog2";
+
+    build_op_test!(asm_op, &[0])
+        .expect_error(TestError::ExecutionError(ExecutionError::LogArgumentZero(1)));
+}
+
 // FIELD OPS BOOLEAN - MANUAL TESTS
 // ================================================================================================
 
@@ -353,7 +389,7 @@ fn not_fail() {
 
     // --- test value > 1 --------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[2]);
-    test.expect_error(TestError::ExecutionError("NotBinaryValue"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotBinaryValue(Felt::new(2))));
 }
 
 #[test]
@@ -379,13 +415,13 @@ fn and_fail() {
 
     // --- test value > 1 --------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[2, 3]);
-    test.expect_error(TestError::ExecutionError("NotBinaryValue"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotBinaryValue(Felt::new(3))));
 
     let test = build_op_test!(asm_op, &[2, 0]);
-    test.expect_error(TestError::ExecutionError("NotBinaryValue"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotBinaryValue(Felt::new(2))));
 
     let test = build_op_test!(asm_op, &[0, 2]);
-    test.expect_error(TestError::ExecutionError("NotBinaryValue"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotBinaryValue(Felt::new(2))));
 }
 
 #[test]
@@ -411,13 +447,13 @@ fn or_fail() {
 
     // --- test value > 1 --------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[2, 3]);
-    test.expect_error(TestError::ExecutionError("NotBinaryValue"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotBinaryValue(Felt::new(3))));
 
     let test = build_op_test!(asm_op, &[2, 0]);
-    test.expect_error(TestError::ExecutionError("NotBinaryValue"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotBinaryValue(Felt::new(2))));
 
     let test = build_op_test!(asm_op, &[0, 2]);
-    test.expect_error(TestError::ExecutionError("NotBinaryValue"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotBinaryValue(Felt::new(2))));
 }
 
 #[test]
@@ -443,13 +479,13 @@ fn xor_fail() {
 
     // --- test value > 1 --------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[2, 3]);
-    test.expect_error(TestError::ExecutionError("NotBinaryValue"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotBinaryValue(Felt::new(2))));
 
     let test = build_op_test!(asm_op, &[2, 0]);
-    test.expect_error(TestError::ExecutionError("NotBinaryValue"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotBinaryValue(Felt::new(2))));
 
     let test = build_op_test!(asm_op, &[0, 2]);
-    test.expect_error(TestError::ExecutionError("NotBinaryValue"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotBinaryValue(Felt::new(2))));
 }
 
 // FIELD OPS COMPARISON - MANUAL TESTS
@@ -466,12 +502,6 @@ fn eq() {
     // --- test when two elements are unequal ----------------------------------------------------
     let test = build_op_test!(asm_op, &[25, 100]);
     test.expect_stack(&[0]);
-
-    // --- test when two u64s are unequal but their felts are equal ------------------------------
-    let a = Felt::MODULUS + 1;
-    let b = 1;
-    let test = build_op_test!(asm_op, &[a, b]);
-    test.expect_stack(&[1]);
 }
 
 #[test]
@@ -639,34 +669,39 @@ proptest! {
         let asm_op = "pow2";
         let expected = 2_u64.wrapping_pow(b);
 
-        build_op_test!(asm_op, &[b as u64]).prop_expect_stack(&[expected])?;
+        let test = build_op_test!(asm_op, &[b as u64]);
+        test.prop_expect_stack(&[expected])?;
     }
 
     #[test]
     fn exp_proptest(a in any::<u64>(), b in any::<u64>()) {
-
-        //---------------------- exp with no parameter -------------------------------------
-
+        // --- exp with no parameter --------------------------------------------------------------
         let asm_op = "exp";
         let base = a;
         let pow = b;
         let expected = Felt::new(base).exp(pow);
 
         let test = build_op_test!(asm_op, &[base, pow]);
-        test.expect_stack(&[expected.as_int()]);
-
-        //----------------------- exp with parameter containing pow ----------------
+        test.prop_expect_stack(&[expected.as_int()])?;
 
+        // --- exp with parameter containing pow --------------------------------------------------
         let build_asm_op = |param: u64| format!("exp.{param}");
         let base = a;
         let pow = b;
         let expected = Felt::new(base).exp(pow);
 
         let test = build_op_test!(build_asm_op(pow), &[base]);
-        test.expect_stack(&[expected.as_int()]);
-
+        test.prop_expect_stack(&[expected.as_int()])?;
     }
 
+    #[test]
+    fn ilog2_proptest(a in 1..Felt::MODULUS) {
+        let asm_op = "ilog2";
+        let expected = a.ilog2();
+
+        let test = build_op_test!(asm_op, &[a]);
+        test.prop_expect_stack(&[expected as u64])?;
+    }
 }
 
 // FIELD OPS COMPARISON - RANDOMIZED TESTS
@@ -787,10 +822,6 @@ fn test_felt_comparison_op(asm_op: &str, expect_if_lt: u64, expect_if_eq: u64, e
     // element with high bits bigger than "smaller" and low bits equal
     let hi_gt_lo_eq = hi_gt_lo_lt + low_bit;
 
-    // unequal integers expected to be equal as field elements
-    let a = Felt::MODULUS + 1;
-    let a_mod = 1_u64;
-
     // --- a < b ----------------------------------------------------------------------------------
     // a is smaller in the low bits (equal in high bits)
     let test = build_op_test!(asm_op, &[smaller, hi_eq_lo_gt]);
@@ -804,19 +835,11 @@ fn test_felt_comparison_op(asm_op: &str, expect_if_lt: u64, expect_if_eq: u64, e
     let test = build_op_test!(asm_op, &[smaller, hi_gt_lo_lt]);
     test.expect_stack(&[expect_if_lt]);
 
-    // compare values above and below the field modulus
-    let test = build_op_test!(asm_op, &[a_mod, a + 1]);
-    test.expect_stack(&[expect_if_lt]);
-
     // --- a = b ----------------------------------------------------------------------------------
     // high and low bits are both set
     let test = build_op_test!(asm_op, &[hi_gt_lo_eq, hi_gt_lo_eq]);
     test.expect_stack(&[expect_if_eq]);
 
-    // compare values above and below the field modulus
-    let test = build_op_test!(asm_op, &[a_mod, a]);
-    test.expect_stack(&[expect_if_eq]);
-
     // --- a > b ----------------------------------------------------------------------------------
     // a is bigger in the low bits (equal in high bits)
     let test = build_op_test!(asm_op, &[hi_eq_lo_gt, smaller]);
@@ -829,8 +852,4 @@ fn test_felt_comparison_op(asm_op: &str, expect_if_lt: u64, expect_if_eq: u64, e
     // a is bigger in the high bits but smaller in the low bits
     let test = build_op_test!(asm_op, &[hi_gt_lo_lt, smaller]);
     test.expect_stack(&[expect_if_gt]);
-
-    // compare values above and below the field modulus
-    let test = build_op_test!(asm_op, &[a_mod + 1, a]);
-    test.expect_stack(&[expect_if_gt]);
 }
diff --git a/miden/tests/integration/operations/fri_ops.rs b/miden/tests/integration/operations/fri_ops.rs
index 3c14c71694..7c1ac6c072 100644
--- a/miden/tests/integration/operations/fri_ops.rs
+++ b/miden/tests/integration/operations/fri_ops.rs
@@ -1,4 +1,4 @@
-use test_utils::{build_test, rand::rand_array, Felt, FieldElement, StarkField};
+use test_utils::{build_test, rand::rand_array, Felt, FieldElement};
 
 // FRI_EXT2FOLD4
 // ================================================================================================
diff --git a/miden/tests/integration/operations/io_ops/adv_ops.rs b/miden/tests/integration/operations/io_ops/adv_ops.rs
index e4f486de42..58738bf474 100644
--- a/miden/tests/integration/operations/io_ops/adv_ops.rs
+++ b/miden/tests/integration/operations/io_ops/adv_ops.rs
@@ -1,5 +1,7 @@
 use super::{build_op_test, build_test, TestError};
-use vm_core::{chiplets::hasher::apply_permutation, utils::ToElements, Felt, StarkField};
+use processor::ExecutionError;
+use processor::ExecutionError::AdviceStackReadFailed;
+use vm_core::{chiplets::hasher::apply_permutation, utils::ToElements, Felt};
 
 // PUSHING VALUES ONTO THE STACK (PUSH)
 // ================================================================================================
@@ -29,7 +31,7 @@ fn adv_push() {
 fn adv_push_invalid() {
     // attempting to read from empty advice stack should throw an error
     let test = build_op_test!("adv_push.1");
-    test.expect_error(TestError::ExecutionError("AdviceStackReadFailed"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::AdviceStackReadFailed(1)));
 }
 
 // OVERWRITING VALUES ON THE STACK (LOAD)
@@ -50,7 +52,7 @@ fn adv_loadw() {
 fn adv_loadw_invalid() {
     // attempting to read from empty advice stack should throw an error
     let test = build_op_test!("adv_loadw", &[0, 0, 0, 0]);
-    test.expect_error(TestError::ExecutionError("AdviceStackReadFailed"));
+    test.expect_error(TestError::ExecutionError(AdviceStackReadFailed(1)));
 }
 
 // MOVING ELEMENTS TO MEMORY VIA THE STACK (PIPE)
diff --git a/miden/tests/integration/operations/io_ops/env_ops.rs b/miden/tests/integration/operations/io_ops/env_ops.rs
index 52ea6cc394..0f0323cc3d 100644
--- a/miden/tests/integration/operations/io_ops/env_ops.rs
+++ b/miden/tests/integration/operations/io_ops/env_ops.rs
@@ -1,6 +1,6 @@
 use processor::FMP_MIN;
 use test_utils::{
-    build_op_test, build_test, AdviceInputs, StackInputs, StarkField, Test, Word, STACK_TOP_SIZE,
+    build_op_test, build_test, AdviceInputs, StackInputs, Test, Word, STACK_TOP_SIZE,
 };
 use vm_core::{code_blocks::CodeBlock, Operation};
 
diff --git a/miden/tests/integration/operations/io_ops/mem_ops.rs b/miden/tests/integration/operations/io_ops/mem_ops.rs
index ae6d1997e4..0238bcff92 100644
--- a/miden/tests/integration/operations/io_ops/mem_ops.rs
+++ b/miden/tests/integration/operations/io_ops/mem_ops.rs
@@ -1,4 +1,4 @@
-use super::{apply_permutation, build_op_test, build_test, Felt, StarkField, ToElements};
+use super::{apply_permutation, build_op_test, build_test, Felt, ToElements};
 
 // LOADING SINGLE ELEMENT ONTO THE STACK (MLOAD)
 // ================================================================================================
diff --git a/miden/tests/integration/operations/io_ops/mod.rs b/miden/tests/integration/operations/io_ops/mod.rs
index 9b6a5fdc37..676f333df5 100644
--- a/miden/tests/integration/operations/io_ops/mod.rs
+++ b/miden/tests/integration/operations/io_ops/mod.rs
@@ -1,4 +1,4 @@
-use test_utils::{build_op_test, build_test, Felt, StarkField, TestError, ToElements};
+use test_utils::{build_op_test, build_test, Felt, TestError, ToElements};
 use vm_core::chiplets::hasher::apply_permutation;
 
 mod adv_ops;
diff --git a/miden/tests/integration/operations/stack_ops.rs b/miden/tests/integration/operations/stack_ops.rs
index 6b29b4c3cf..908b547dc8 100644
--- a/miden/tests/integration/operations/stack_ops.rs
+++ b/miden/tests/integration/operations/stack_ops.rs
@@ -1,3 +1,4 @@
+use assembly::AssemblyError;
 use test_utils::{build_op_test, proptest::prelude::*, TestError, STACK_TOP_SIZE, WORD_SIZE};
 
 // STACK OPERATIONS TESTS
@@ -54,7 +55,9 @@ fn dupn_fail() {
 
     // --- simple case ----------------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `dup.16`: parameter '16' is invalid",
+    ))));
 }
 
 #[test]
@@ -81,7 +84,9 @@ fn dupwn_fail() {
 
     // --- simple case ----------------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `dupw.4`: parameter '4' is invalid",
+    ))));
 }
 
 #[test]
@@ -108,9 +113,10 @@ fn swapn_fail() {
 
     // --- simple case ----------------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `swap.16`: parameter '16' is invalid",
+    ))));
 }
-
 #[test]
 fn swapw() {
     let asm_op = "swapw";
@@ -135,7 +141,9 @@ fn swapwn_fail() {
 
     // --- simple case ----------------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `swapw.4`: parameter '4' is invalid",
+    ))));
 }
 
 #[test]
@@ -159,15 +167,21 @@ fn movup() {
 fn movup_fail() {
     let asm_op = "movup.0";
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `movup.0`: parameter '0' is invalid",
+    ))));
 
     let asm_op = "movup.1";
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `movup.1`: parameter '1' is invalid",
+    ))));
 
     let asm_op = "movup.16";
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `movup.16`: parameter '16' is invalid",
+    ))));
 }
 
 #[test]
@@ -182,15 +196,21 @@ fn movupw() {
 fn movupw_fail() {
     let asm_op = "movupw.0";
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `movupw.0`: parameter '0' is invalid",
+    ))));
 
     let asm_op = "movupw.1";
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `movupw.1`: parameter '1' is invalid",
+    ))));
 
     let asm_op = "movupw.4";
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `movupw.4`: parameter '4' is invalid",
+    ))));
 }
 
 #[test]
@@ -205,15 +225,21 @@ fn movdn() {
 fn movdn_fail() {
     let asm_op = "movdn.0";
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `movdn.0`: parameter '0' is invalid",
+    ))));
 
     let asm_op = "movdn.1";
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `movdn.1`: parameter '1' is invalid",
+    ))));
 
     let asm_op = "movdn.16";
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `movdn.16`: parameter '16' is invalid",
+    ))));
 }
 
 #[test]
@@ -228,15 +254,21 @@ fn movdnw() {
 fn movdnw_fail() {
     let asm_op = "movdnw.0";
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `movdnw.0`: parameter '0' is invalid",
+    ))));
 
     let asm_op = "movdnw.1";
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `movdnw.1`: parameter '1' is invalid",
+    ))));
 
     let asm_op = "movdnw.4";
     let test = build_op_test!(asm_op, &[16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]);
-    test.expect_error(TestError::AssemblyError("parameter"));
+    test.expect_error(TestError::AssemblyError(AssemblyError::ParsingError(String::from(
+        "malformed instruction `movdnw.4`: parameter '4' is invalid",
+    ))));
 }
 
 #[test]
diff --git a/miden/tests/integration/operations/sys_ops.rs b/miden/tests/integration/operations/sys_ops.rs
index 987b8185d2..48080dfaa2 100644
--- a/miden/tests/integration/operations/sys_ops.rs
+++ b/miden/tests/integration/operations/sys_ops.rs
@@ -1,4 +1,5 @@
-use test_utils::{build_op_test, Felt, TestError};
+use processor::ExecutionError;
+use test_utils::{build_op_test, TestError};
 
 // SYSTEM OPS ASSERTIONS - MANUAL TESTS
 // ================================================================================================
@@ -19,9 +20,12 @@ fn assert_with_code() {
     test.expect_stack(&[]);
 
     // triggered assertion captures both the VM cycle and error code
-    let expected_err = format!("FailedAssertion(1, BaseElement({}))", Felt::new(123).inner());
     let test = build_op_test!(asm_op, &[0]);
-    test.expect_error(TestError::ExecutionError(&expected_err));
+    test.expect_error(TestError::ExecutionError(ExecutionError::FailedAssertion {
+        clk: 1,
+        err_code: 123,
+        err_msg: None,
+    }));
 }
 
 #[test]
@@ -29,7 +33,11 @@ fn assert_fail() {
     let asm_op = "assert";
 
     let test = build_op_test!(asm_op, &[2]);
-    test.expect_error(TestError::ExecutionError("FailedAssertion"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::FailedAssertion {
+        clk: 1,
+        err_code: 0,
+        err_msg: None,
+    }));
 }
 
 #[test]
@@ -48,8 +56,16 @@ fn assert_eq_fail() {
     let asm_op = "assert_eq";
 
     let test = build_op_test!(asm_op, &[2, 1]);
-    test.expect_error(TestError::ExecutionError("FailedAssertion"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::FailedAssertion {
+        clk: 2,
+        err_code: 0,
+        err_msg: None,
+    }));
 
     let test = build_op_test!(asm_op, &[1, 4]);
-    test.expect_error(TestError::ExecutionError("FailedAssertion"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::FailedAssertion {
+        clk: 2,
+        err_code: 0,
+        err_msg: None,
+    }));
 }
diff --git a/miden/tests/integration/operations/u32_ops/arithmetic_ops.rs b/miden/tests/integration/operations/u32_ops/arithmetic_ops.rs
index a7b3bec551..7995d7cdb7 100644
--- a/miden/tests/integration/operations/u32_ops/arithmetic_ops.rs
+++ b/miden/tests/integration/operations/u32_ops/arithmetic_ops.rs
@@ -1,97 +1,10 @@
-use super::{test_param_out_of_bounds, test_unchecked_execution};
+use super::test_unchecked_execution;
+use processor::ExecutionError;
 use test_utils::{build_op_test, proptest::prelude::*, rand::rand_value, TestError, U32_BOUND};
 
 // U32 OPERATIONS TESTS - MANUAL - ARITHMETIC OPERATIONS
 // ================================================================================================
 
-#[test]
-fn u32checked_add() {
-    let asm_op = "u32checked_add";
-
-    // --- simple case ----------------------------------------------------------------------------
-    let test = build_op_test!(asm_op, &[1, 2]);
-    test.expect_stack(&[3]);
-
-    // --- random values --------------------------------------------------------------------------
-    // test using u16 values to ensure there's no overflow so the result is valid.
-    let a = rand_value::<u64>() as u16;
-    let b = rand_value::<u64>() as u16;
-    let expected = a as u64 + b as u64;
-
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[expected]);
-
-    // --- test that the rest of the stack isn't affected -----------------------------------------
-    let c = rand_value::<u64>();
-    let test = build_op_test!(asm_op, &[c, a as u64, b as u64]);
-    test.expect_stack(&[expected, c]);
-}
-
-#[test]
-fn u32checked_add_fail() {
-    let asm_op = "u32checked_add";
-
-    // should fail if a >= 2^32
-    let test = build_op_test!(asm_op, &[U32_BOUND, 0]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail if b >= 2^32
-    let test = build_op_test!(asm_op, &[0, U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail if a + b >= 2^32
-    let a = u32::MAX;
-    let b = 1_u64;
-    let test = build_op_test!(asm_op, &[a as u64, b]);
-    test.expect_error(TestError::ExecutionError("FailedAssertion"));
-}
-
-#[test]
-fn u32checked_add_b() {
-    let build_asm_op = |param: u16| format!("u32checked_add.{param}");
-
-    // --- simple cases ----------------------------------------------------------------------------
-    let test = build_op_test!(build_asm_op(2).as_str(), &[1]);
-    test.expect_stack(&[3]);
-
-    let test = build_op_test!(build_asm_op(0).as_str(), &[1]);
-    test.expect_stack(&[1]);
-
-    // --- random values --------------------------------------------------------------------------
-    // test using u16 values to ensure there's no overflow so the result is valid.
-    let a = rand_value::<u64>() as u16;
-    let b = rand_value::<u64>() as u16;
-    let expected = a as u64 + b as u64;
-
-    let test = build_op_test!(build_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[expected]);
-
-    // --- test that the rest of the stack isn't affected -----------------------------------------
-    let c = rand_value::<u64>();
-
-    let test = build_op_test!(build_asm_op(b).as_str(), &[c, a as u64]);
-    test.expect_stack(&[expected, c]);
-}
-
-#[test]
-fn u32checked_add_b_fail() {
-    let build_asm_op = |param: u64| format!("u32checked_add.{param}");
-
-    // should fail during execution if a >= 2^32.
-    let test = build_op_test!(build_asm_op(0).as_str(), &[U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail during compilation if b >= 2^32.
-    test_param_out_of_bounds("u32checked_add", U32_BOUND);
-
-    // should fail if a + b >= 2^32.
-    let a = u32::MAX;
-    let b = 1_u64;
-
-    let test = build_op_test!(build_asm_op(b).as_str(), &[a as u64]);
-    test.expect_error(TestError::ExecutionError("FailedAssertion"));
-}
-
 #[test]
 fn u32wrapping_add() {
     let asm_op = "u32wrapping_add";
@@ -269,99 +182,6 @@ fn u32overflowing_add3() {
     assert!(test.execute().is_ok());
 }
 
-#[test]
-fn u32checked_sub() {
-    let asm_op = "u32checked_sub";
-
-    // --- simple cases ---------------------------------------------------------------------------
-    let test = build_op_test!(asm_op, &[1, 1]);
-    test.expect_stack(&[0]);
-
-    let test = build_op_test!(asm_op, &[2, 1]);
-    test.expect_stack(&[1]);
-
-    // --- random u32 values ----------------------------------------------------------------------
-    let val1 = rand_value::<u32>();
-    let val2 = rand_value::<u32>();
-    // assign the larger value to a and the smaller value to b.
-    let (a, b) = if val1 >= val2 { (val1, val2) } else { (val2, val1) };
-    let expected = a - b;
-
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[expected as u64]);
-
-    // --- test that the rest of the stack isn't affected -----------------------------------------
-    let c = rand_value::<u64>();
-
-    let test = build_op_test!(asm_op, &[c, a as u64, b as u64]);
-    test.expect_stack(&[expected as u64, c]);
-}
-
-#[test]
-fn u32checked_sub_fail() {
-    let asm_op = "u32checked_sub";
-
-    // should fail if a >= 2^32.
-    let test = build_op_test!(asm_op, &[U32_BOUND, 0]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail if b >= 2^32.
-    let test = build_op_test!(asm_op, &[0, U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail if a < b.
-    let a = 1_u64;
-    let b = 2_u64;
-    let test = build_op_test!(asm_op, &[a, b]);
-    test.expect_error(TestError::ExecutionError("FailedAssertion"));
-}
-
-#[test]
-fn u32checked_sub_b() {
-    let build_asm_op = |param: u32| format!("u32checked_sub.{param}");
-
-    // --- simple cases ---------------------------------------------------------------------------
-    let test = build_op_test!(build_asm_op(1).as_str(), &[2]);
-    test.expect_stack(&[1]);
-
-    let test = build_op_test!(build_asm_op(1).as_str(), &[1]);
-    test.expect_stack(&[0]);
-
-    // --- random u32 values ----------------------------------------------------------------------
-    let val1 = rand_value::<u32>();
-    let val2 = rand_value::<u32>();
-    // assign the larger value to a and the smaller value to b.
-    let (a, b) = if val1 >= val2 { (val1, val2) } else { (val2, val1) };
-    let expected = a - b;
-
-    let test = build_op_test!(build_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[expected as u64]);
-
-    // --- test that the rest of the stack isn't affected -----------------------------------------
-    let c = rand_value::<u64>();
-
-    let test = build_op_test!(build_asm_op(b).as_str(), &[c, a as u64]);
-    test.expect_stack(&[expected as u64, c]);
-}
-
-#[test]
-fn u32checked_sub_b_fail() {
-    let build_asm_op = |param: u64| format!("u32checked_sub.{param}");
-
-    // should fail during execution if a >= 2^32.
-    let test = build_op_test!(build_asm_op(0).as_str(), &[U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail during compilation if b >= 2^32.
-    test_param_out_of_bounds("u32checked_sub", U32_BOUND);
-
-    // should fail if a < b.
-    let a = 1_u64;
-    let b = 2_u64;
-    let test = build_op_test!(build_asm_op(b).as_str(), &[a]);
-    test.expect_error(TestError::ExecutionError("FailedAssertion"));
-}
-
 #[test]
 fn u32wrapping_sub() {
     let asm_op = "u32wrapping_sub";
@@ -465,101 +285,6 @@ fn u32overflowing_sub() {
     test_unchecked_execution(asm_op, 2);
 }
 
-#[test]
-fn u32checked_mul() {
-    let asm_op = "u32checked_mul";
-
-    // --- simple cases ---------------------------------------------------------------------------
-    let test = build_op_test!(asm_op, &[1, 0]);
-    test.expect_stack(&[0]);
-
-    let test = build_op_test!(asm_op, &[5, 1]);
-    test.expect_stack(&[5]);
-
-    let test = build_op_test!(asm_op, &[2, 5]);
-    test.expect_stack(&[10]);
-
-    // --- random values --------------------------------------------------------------------------
-    // test using u16 values to ensure there's no overflow so the result is valid.
-    let a = rand_value::<u16>();
-    let b = rand_value::<u16>();
-
-    let expected: u64 = a as u64 * b as u64;
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[expected]);
-
-    // --- test that the rest of the stack isn't affected -----------------------------------------
-    let c = rand_value::<u64>();
-    let test = build_op_test!(asm_op, &[c, a as u64, b as u64]);
-    test.expect_stack(&[expected, c]);
-}
-
-#[test]
-fn u32checked_mul_fail() {
-    let asm_op = "u32checked_mul";
-
-    // should fail if a >= 2^32.
-    let test = build_op_test!(asm_op, &[U32_BOUND, 0]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail if b >= 2^32.
-    let test = build_op_test!(asm_op, &[0, U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail if a * b  >= 2^32.
-    let a = u32::MAX as u64;
-    let b = 2_u64;
-    let test = build_op_test!(asm_op, &[a, b]);
-    test.expect_error(TestError::ExecutionError("FailedAssertion"));
-}
-
-#[test]
-fn u32checked_mul_b() {
-    let build_asm_op = |param: u16| format!("u32checked_mul.{param}");
-
-    // --- simple cases ---------------------------------------------------------------------------
-    let test = build_op_test!(build_asm_op(0).as_str(), &[1]);
-    test.expect_stack(&[0]);
-
-    let test = build_op_test!(build_asm_op(1).as_str(), &[5]);
-    test.expect_stack(&[5]);
-
-    let test = build_op_test!(build_asm_op(5).as_str(), &[2]);
-    test.expect_stack(&[10]);
-
-    // --- random values --------------------------------------------------------------------------
-    // test using u16 values to ensure there's no overflow so the result is valid.
-    let a = rand_value::<u16>();
-    let b = rand_value::<u16>();
-
-    let expected: u64 = a as u64 * b as u64;
-    let test = build_op_test!(build_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[expected]);
-
-    // --- test that the rest of the stack isn't affected -----------------------------------------
-    let c = rand_value::<u64>();
-    let test = build_op_test!(build_asm_op(5).as_str(), &[c, 10]);
-    test.expect_stack(&[50, c]);
-}
-
-#[test]
-fn u32checked_mul_b_fail() {
-    let build_asm_op = |param: u64| format!("u32checked_mul.{param}");
-
-    // should fail during execution if a >= 2^32.
-    let test = build_op_test!(build_asm_op(0).as_str(), &[U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail during compilation if b >= 2^32.
-    test_param_out_of_bounds("u32checked_mul", U32_BOUND);
-
-    // should fail if a * b >= 2^32.
-    let a = u32::MAX as u64;
-    let b = u32::MAX as u64;
-    let test = build_op_test!(build_asm_op(b).as_str(), &[a]);
-    test.expect_error(TestError::ExecutionError("FailedAssertion"));
-}
-
 #[test]
 fn u32wrapping_mul() {
     let asm_op = "u32wrapping_mul";
@@ -703,45 +428,20 @@ fn u32overflowing_madd() {
 }
 
 #[test]
-fn u32checked_div() {
-    let asm_op = "u32checked_div";
-
-    test_div(asm_op);
-}
-
-#[test]
-fn u32checked_div_fail() {
-    let asm_op = "u32checked_div";
-
-    // should fail if a >= 2^32
-    let test = build_op_test!(asm_op, &[U32_BOUND, 1]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail if b >= 2^32
-    let test = build_op_test!(asm_op, &[1, U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail if b == 0
-    let test = build_op_test!(asm_op, &[1, 0]);
-    test.expect_error(TestError::ExecutionError("DivideByZero"));
-}
-
-#[test]
-fn u32checked_div_b() {
-    let build_asm_op = |param: u32| format!("u32checked_div.{param}");
-
+fn u32div() {
     // --- simple cases ---------------------------------------------------------------------------
-    let test = build_op_test!(build_asm_op(1).as_str(), &[0]);
+    let test = build_op_test!("u32div", &[0, 1]);
     test.expect_stack(&[0]);
 
-    // division with no remainder
-    let test = build_op_test!(build_asm_op(1).as_str(), &[2]);
+    let test = build_op_test!("u32div", &[2, 1]);
     test.expect_stack(&[2]);
 
-    // division with remainder
-    let test = build_op_test!(build_asm_op(2).as_str(), &[1]);
+    let test = build_op_test!("u32div", &[1, 2]);
     test.expect_stack(&[0]);
 
+    let test = build_op_test!("u32div", &[3, 2]);
+    test.expect_stack(&[1]);
+
     // --- random u32 values ----------------------------------------------------------------------
     let a = rand_value::<u32>();
     let mut b = rand_value::<u32>();
@@ -749,89 +449,38 @@ fn u32checked_div_b() {
         // ensure we're not using a failure case.
         b += 1;
     }
-    let expected = (a / b) as u64;
-
-    let test = build_op_test!(build_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[expected]);
+    let quot = (a / b) as u64;
+    let test = build_op_test!("u32div", &[a as u64, b as u64]);
+    test.expect_stack(&[quot]);
 
     // --- test that the rest of the stack isn't affected -----------------------------------------
-    let c = rand_value::<u64>();
-    let test = build_op_test!(build_asm_op(b).as_str(), &[c, a as u64]);
-    test.expect_stack(&[expected, c]);
-}
-
-#[test]
-fn u32checked_div_b_fail() {
-    let build_asm_op = |param: u64| format!("u32checked_div.{param}");
-
-    // should fail during execution if a >= 2^32.
-    let test = build_op_test!(build_asm_op(1).as_str(), &[U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail during compilation if b >= 2^32.
-    test_param_out_of_bounds("u32checked_div", U32_BOUND);
-
-    // should fail during compilation if b = 0.
-    let test = build_op_test!(build_asm_op(0).as_str());
-    test.expect_error(TestError::AssemblyError("division by zero"));
-}
-
-#[test]
-fn u32unchecked_div() {
-    let asm_op = "u32unchecked_div";
-
-    // should push d = (a * b) / 2^32 onto the stack.
-    test_div(asm_op);
+    let e = rand_value::<u64>();
+    let test = build_op_test!("u32div", &[e, a as u64, b as u64]);
+    test.expect_stack(&[quot, e]);
 
     // should not fail when inputs are out of bounds.
-    test_unchecked_execution(asm_op, 2);
+    test_unchecked_execution("u32div", 2);
 }
 
 #[test]
-fn u32unchecked_div_fail() {
-    let asm_op = "u32unchecked_div";
+fn u32div_fail() {
+    let asm_op = "u32div";
 
     // should fail if b == 0.
     let test = build_op_test!(asm_op, &[1, 0]);
-    test.expect_error(TestError::ExecutionError("DivideByZero"));
-}
-
-#[test]
-fn u32checked_mod() {
-    let asm_op = "u32checked_mod";
-
-    test_mod(asm_op);
-}
-
-#[test]
-fn u32checked_mod_fail() {
-    let asm_op = "u32checked_mod";
-
-    // should fail if a >= 2^32
-    let test = build_op_test!(asm_op, &[U32_BOUND, 1]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail if b >= 2^32
-    let test = build_op_test!(asm_op, &[1, U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail if b == 0
-    let test = build_op_test!(asm_op, &[1, 0]);
-    test.expect_error(TestError::ExecutionError("DivideByZero"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::DivideByZero(1)));
 }
 
 #[test]
-fn u32checked_mod_b() {
-    let build_asm_op = |param: u32| format!("u32checked_mod.{param}");
-
+fn u32mod() {
     // --- simple cases ---------------------------------------------------------------------------
-    let test = build_op_test!(build_asm_op(5).as_str(), &[10]);
+    let test = build_op_test!("u32mod", &[10, 5]);
     test.expect_stack(&[0]);
 
-    let test = build_op_test!(build_asm_op(5).as_str(), &[11]);
+    let test = build_op_test!("u32mod", &[11, 5]);
     test.expect_stack(&[1]);
 
-    let test = build_op_test!(build_asm_op(11).as_str(), &[5]);
+    let test = build_op_test!("u32mod", &[5, 11]);
     test.expect_stack(&[5]);
 
     // --- random u32 values ----------------------------------------------------------------------
@@ -842,92 +491,41 @@ fn u32checked_mod_b() {
         b += 1;
     }
     let expected = a % b;
-
-    let test = build_op_test!(build_asm_op(b).as_str(), &[a as u64]);
+    let test = build_op_test!("u32mod", &[a as u64, b as u64]);
     test.expect_stack(&[expected as u64]);
 
     // --- test that the rest of the stack isn't affected -----------------------------------------
     let c = rand_value::<u64>();
-
-    let test = build_op_test!(build_asm_op(b).as_str(), &[c, a as u64]);
+    let test = build_op_test!("u32mod", &[c, a as u64, b as u64]);
     test.expect_stack(&[expected as u64, c]);
-}
-
-#[test]
-fn u32checked_mod_b_fail() {
-    let build_asm_op = |param: u64| format!("u32checked_mod.{param}");
-
-    // should fail during execution if a >= 2^32.
-    let test = build_op_test!(build_asm_op(1).as_str(), &[U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail during compilation if b >= 2^32.
-    test_param_out_of_bounds("u32checked_mod", U32_BOUND);
-
-    // should fail during compilation if b = 0.
-    let test = build_op_test!(build_asm_op(0).as_str());
-    test.expect_error(TestError::AssemblyError("division by zero"));
-}
-
-#[test]
-fn u32unchecked_mod() {
-    let asm_op = "u32unchecked_mod";
-
-    test_mod(asm_op);
 
     // should not fail when inputs are out of bounds.
-    test_unchecked_execution(asm_op, 2);
+    test_unchecked_execution("u32mod", 2);
 }
 
 #[test]
-fn u32unchecked_mod_fail() {
-    let asm_op = "u32unchecked_mod";
+fn u32mod_fail() {
+    let asm_op = "u32mod";
 
     // should fail if b == 0
     let test = build_op_test!(asm_op, &[1, 0]);
-    test.expect_error(TestError::ExecutionError("DivideByZero"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::DivideByZero(1)));
 }
 
 #[test]
-fn u32checked_divmod() {
-    let asm_op = "u32checked_divmod";
-
-    test_divmod(asm_op);
-}
-
-#[test]
-fn u32checked_divmod_fail() {
-    let asm_op = "u32checked_divmod";
-
-    // should fail if a >= 2^32
-    let test = build_op_test!(asm_op, &[U32_BOUND, 1]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail if b >= 2^32
-    let test = build_op_test!(asm_op, &[1, U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail if b == 0
-    let test = build_op_test!(asm_op, &[1, 0]);
-    test.expect_error(TestError::ExecutionError("DivideByZero"));
-}
-
-#[test]
-fn u32checked_divmod_b() {
-    let build_asm_op = |param: u32| format!("u32checked_divmod.{param}");
-
+fn u32divmod() {
     // --- simple cases ---------------------------------------------------------------------------
-    let test = build_op_test!(build_asm_op(1).as_str(), &[0]);
+    let test = build_op_test!("u32divmod", &[0, 1]);
     test.expect_stack(&[0, 0]);
 
     // division with no remainder
-    let test = build_op_test!(build_asm_op(1).as_str(), &[2]);
+    let test = build_op_test!("u32divmod", &[2, 1]);
     test.expect_stack(&[0, 2]);
 
     // division with remainder
-    let test = build_op_test!(build_asm_op(2).as_str(), &[1]);
+    let test = build_op_test!("u32divmod", &[1, 2]);
     test.expect_stack(&[1, 0]);
-    let test = build_op_test!(build_asm_op(2).as_str(), &[3]);
+    let test = build_op_test!("u32divmod", &[3, 2]);
     test.expect_stack(&[1, 1]);
 
     // --- random u32 values ----------------------------------------------------------------------
@@ -939,69 +537,30 @@ fn u32checked_divmod_b() {
     }
     let quot = (a / b) as u64;
     let rem = (a % b) as u64;
-    let test = build_op_test!(build_asm_op(b).as_str(), &[a as u64]);
+    let test = build_op_test!("u32divmod", &[a as u64, b as u64]);
     test.expect_stack(&[rem, quot]);
 
     // --- test that the rest of the stack isn't affected -----------------------------------------
     let e = rand_value::<u64>();
-    let test = build_op_test!(build_asm_op(b).as_str(), &[e, a as u64]);
+    let test = build_op_test!("u32divmod", &[e, a as u64, b as u64]);
     test.expect_stack(&[rem, quot, e]);
-}
-
-#[test]
-fn u32checked_divmod_b_fail() {
-    let build_asm_op = |param: u64| format!("u32checked_divmod.{param}");
-
-    // should fail during execution if a >= 2^32.
-    let test = build_op_test!(build_asm_op(1).as_str(), &[U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail during compilation if b >= 2^32.
-    test_param_out_of_bounds("u32checked_divmod", U32_BOUND);
-
-    // should fail during compilation if b = 0.
-    let test = build_op_test!(build_asm_op(0).as_str());
-    test.expect_error(TestError::AssemblyError("division by zero"));
-}
-
-#[test]
-fn u32unchecked_divmod() {
-    let asm_op = "u32unchecked_divmod";
-
-    test_divmod(asm_op);
 
     // should not fail when inputs are out of bounds.
-    test_unchecked_execution(asm_op, 2);
+    test_unchecked_execution("u32divmod", 2);
 }
 
 #[test]
-fn u32unchecked_divmod_fail() {
-    let asm_op = "u32unchecked_divmod";
+fn u32divmod_fail() {
+    let asm_op = "u32divmod";
 
     // should fail if b == 0.
     let test = build_op_test!(asm_op, &[1, 0]);
-    test.expect_error(TestError::ExecutionError("DivideByZero"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::DivideByZero(1)));
 }
 
 // U32 OPERATIONS TESTS - RANDOMIZED - ARITHMETIC OPERATIONS
 // ================================================================================================
 proptest! {
-    #[test]
-    fn u32checked_add_proptest(a in any::<u16>(), b in any::<u16>()) {
-        let asm_op = "u32checked_add";
-
-        let expected = a as u64 + b as u64;
-
-        // b provided via the stack.
-        let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[expected])?;
-
-        // b provided as a parameter.
-        let asm_op = format!("{asm_op}.{b}");
-        let test = build_op_test!(&asm_op, &[a as u64]);
-        test.prop_expect_stack(&[expected])?;
-    }
-
     #[test]
     fn u32unchecked_add_proptest(a in any::<u32>(), b in any::<u32>()) {
         let wrapping_asm_op = "u32wrapping_add";
@@ -1029,29 +588,6 @@ proptest! {
         test.prop_expect_stack(&[hi, lo])?;
     }
 
-    #[test]
-    fn u32checked_sub_proptest(val1 in any::<u32>(), val2 in any::<u32>()) {
-        let asm_op = "u32checked_sub";
-
-        // assign the larger value to a and the smaller value to b so all parameters are valid.
-        let (a, b) = if val1 >= val2 {
-            (val1, val2)
-        } else {
-            (val2, val1)
-        };
-
-        let expected = a - b;
-        // b provided via the stack.
-        let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[expected as u64])?;
-
-        // b provided as a parameter.
-        let asm_op = format!("{asm_op}.{b}");
-        let test = build_op_test!(&asm_op, &[a as u64]);
-        test.prop_expect_stack(&[expected as u64])?;
-
-    }
-
     #[test]
     fn u32unchecked_sub_proptest(a in any::<u32>(), b in any::<u32>()) {
         let wrapping_asm_op = "u32wrapping_sub";
@@ -1068,22 +604,6 @@ proptest! {
         test.prop_expect_stack(&[d, c as u64])?;
     }
 
-    #[test]
-    fn u32checked_mul_proptest(a in any::<u16>(), b in any::<u16>()) {
-        let asm_op = "u32checked_mul";
-
-        let expected = a as u64 * b as u64;
-
-        // b provided via the stack
-        let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[expected])?;
-
-        // b provided as a parameter
-        let asm_op = format!("{asm_op}.{b}");
-        let test = build_op_test!(&asm_op, &[a as u64]);
-        test.prop_expect_stack(&[expected])?;
-    }
-
     #[test]
     fn u32unchecked_mul_proptest(a in any::<u32>(), b in any::<u32>()) {
         let wrapping_asm_op = "u32wrapping_mul";
@@ -1103,38 +623,16 @@ proptest! {
         test.prop_expect_stack(&[d, c as u64])?;
     }
 
-    #[test]
-    fn u32overflowing_madd_proptest(a in any::<u32>(), b in any::<u32>(), c in any::<u32>()) {
-        let asm_op = "u32overflowing_madd";
-
-        let madd = a as u64 * b as u64 + c as u64;
-        let d = madd % U32_BOUND;
-        let e = madd / U32_BOUND;
-
-        let test = build_op_test!(asm_op, &[c as u64, a as u64, b as u64]);
-        test.prop_expect_stack(&[e, d])?;
-    }
-
     #[test]
     fn u32div_proptest(a in any::<u32>(), b in 1..u32::MAX) {
-        let asm_op = "u32checked_div";
-
+        let asm_op = "u32div";
         let expected = (a / b) as u64;
 
         // b provided via the stack.
-        let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[expected])?;
-
-        // b provided as a parameter.
-        let asm_op = format!("{asm_op}.{b}");
-        let test = build_op_test!(&asm_op, &[a as u64]);
-        test.prop_expect_stack(&[expected])?;
-
-        // unchecked version should produce the same result for valid values.
-        let asm_op = "u32unchecked_div";
         let test = build_op_test!(&asm_op, &[a as u64, b as u64]);
         test.prop_expect_stack(&[expected])?;
 
+        // b provided as a parameter.
         let asm_op = format!("{asm_op}.{b}");
         let test = build_op_test!(&asm_op, &[a as u64]);
         test.prop_expect_stack(&[expected])?;
@@ -1142,157 +640,45 @@ proptest! {
 
     #[test]
     fn u32mod_proptest(a in any::<u32>(), b in 1..u32::MAX) {
-        let base_op = "u32checked_mod";
+        let asm_op = "u32mod";
+        let expected = (a % b) as u64;
 
-        let expected = a % b;
-
-        // b provided via the stack
-        let test = build_op_test!(base_op, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[expected as u64])?;
-
-        // b provided as a parameter
-        let asm_op = format!("{base_op}.{b}");
-        let test = build_op_test!(&asm_op, &[a as u64]);
-        test.prop_expect_stack(&[expected as u64])?;
-
-        // unchecked version should produce the same result for valid values.
-        let asm_op = "u32unchecked_mod";
+        // b provided via the stack.
         let test = build_op_test!(&asm_op, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[expected as u64])?;
+        test.prop_expect_stack(&[expected])?;
 
+        // b provided as a parameter.
         let asm_op = format!("{asm_op}.{b}");
         let test = build_op_test!(&asm_op, &[a as u64]);
-        test.prop_expect_stack(&[expected as u64])?;
+        test.prop_expect_stack(&[expected])?;
     }
 
     #[test]
     fn u32divmod_proptest(a in any::<u32>(), b in 1..u32::MAX) {
-        let asm_op = "u32checked_divmod";
+        let asm_op = "u32divmod";
 
         let quot = (a / b) as u64;
         let rem = (a % b) as u64;
 
         // b provided via the stack.
-        let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[rem, quot])?;
-
-        // b provided as a parameter.
-        let asm_op = format!("{asm_op}.{b}");
-        let test = build_op_test!(&asm_op, &[a as u64]);
-        test.prop_expect_stack(&[rem, quot])?;
-
-        // unchecked version should produce the same result for valid values.
-        let asm_op = "u32unchecked_divmod";
         let test = build_op_test!(&asm_op, &[a as u64, b as u64]);
         test.prop_expect_stack(&[rem, quot])?;
 
+        // b provided as a parameter.
         let asm_op = format!("{asm_op}.{b}");
         let test = build_op_test!(&asm_op, &[a as u64]);
         test.prop_expect_stack(&[rem, quot])?;
     }
-}
-
-// HELPER FUNCTIONS
-// ================================================================================================
-
-/// This helper function tests division without remainder for two u32 inputs for a number of simple
-/// cases as well as for random values. It checks that the floor of a / b is pushed to the
-/// stack. Finally, it ensures that the rest of the stack was unaffected.
-fn test_div(asm_op: &str) {
-    // --- simple cases ---------------------------------------------------------------------------
-    let test = build_op_test!(asm_op, &[0, 1]);
-    test.expect_stack(&[0]);
-
-    let test = build_op_test!(asm_op, &[2, 1]);
-    test.expect_stack(&[2]);
-
-    let test = build_op_test!(asm_op, &[1, 2]);
-    test.expect_stack(&[0]);
-
-    let test = build_op_test!(asm_op, &[3, 2]);
-    test.expect_stack(&[1]);
-
-    // --- random u32 values ----------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let mut b = rand_value::<u32>();
-    if b == 0 {
-        // ensure we're not using a failure case.
-        b += 1;
-    }
-    let quot = (a / b) as u64;
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[quot]);
-
-    // --- test that the rest of the stack isn't affected -----------------------------------------
-    let e = rand_value::<u64>();
-    let test = build_op_test!(asm_op, &[e, a as u64, b as u64]);
-    test.expect_stack(&[quot, e]);
-}
-
-/// This helper function tests the modulus operation for two u32 inputs for a number of simple
-/// cases as well as for random values. It checks that a % b is pushed to the stack. Finally, it
-/// ensures that the rest of the stack was unaffected.
-fn test_mod(asm_op: &str) {
-    // --- simple cases ---------------------------------------------------------------------------
-    let test = build_op_test!(asm_op, &[10, 5]);
-    test.expect_stack(&[0]);
-
-    let test = build_op_test!(asm_op, &[11, 5]);
-    test.expect_stack(&[1]);
-
-    let test = build_op_test!(asm_op, &[5, 11]);
-    test.expect_stack(&[5]);
-
-    // --- random u32 values ----------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let mut b = rand_value::<u32>();
-    if b == 0 {
-        // ensure we're not using a failure case.
-        b += 1;
-    }
-    let expected = a % b;
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[expected as u64]);
 
-    // --- test that the rest of the stack isn't affected -----------------------------------------
-    let c = rand_value::<u64>();
-    let test = build_op_test!(asm_op, &[c, a as u64, b as u64]);
-    test.expect_stack(&[expected as u64, c]);
-}
-
-/// This helper function tests division with remainder for two u32 inputs for a number of simple
-/// cases as well as for random values. It checks that the floor of a / b is pushed to the
-/// stack, along with the remainder a % b. Finally, it ensures that the rest of the stack was
-/// unaffected.
-fn test_divmod(asm_op: &str) {
-    // --- simple cases ---------------------------------------------------------------------------
-    let test = build_op_test!(asm_op, &[0, 1]);
-    test.expect_stack(&[0, 0]);
-
-    // division with no remainder
-    let test = build_op_test!(asm_op, &[2, 1]);
-    test.expect_stack(&[0, 2]);
+    #[test]
+    fn u32overflowing_madd_proptest(a in any::<u32>(), b in any::<u32>(), c in any::<u32>()) {
+        let asm_op = "u32overflowing_madd";
 
-    // division with remainder
-    let test = build_op_test!(asm_op, &[1, 2]);
-    test.expect_stack(&[1, 0]);
-    let test = build_op_test!(asm_op, &[3, 2]);
-    test.expect_stack(&[1, 1]);
+        let madd = a as u64 * b as u64 + c as u64;
+        let d = madd % U32_BOUND;
+        let e = madd / U32_BOUND;
 
-    // --- random u32 values ----------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let mut b = rand_value::<u32>();
-    if b == 0 {
-        // ensure we're not using a failure case.
-        b += 1;
+        let test = build_op_test!(asm_op, &[c as u64, a as u64, b as u64]);
+        test.prop_expect_stack(&[e, d])?;
     }
-    let quot = (a / b) as u64;
-    let rem = (a % b) as u64;
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[rem, quot]);
-
-    // --- test that the rest of the stack isn't affected -----------------------------------------
-    let e = rand_value::<u64>();
-    let test = build_op_test!(asm_op, &[e, a as u64, b as u64]);
-    test.expect_stack(&[rem, quot, e]);
 }
diff --git a/miden/tests/integration/operations/u32_ops/bitwise_ops.rs b/miden/tests/integration/operations/u32_ops/bitwise_ops.rs
index af9fd1a3a9..7b29904d85 100644
--- a/miden/tests/integration/operations/u32_ops/bitwise_ops.rs
+++ b/miden/tests/integration/operations/u32_ops/bitwise_ops.rs
@@ -1,12 +1,16 @@
-use super::{test_input_out_of_bounds, test_param_out_of_bounds};
-use test_utils::{build_op_test, proptest::prelude::*, rand::rand_value, TestError, U32_BOUND};
+use super::test_input_out_of_bounds;
+use processor::math::Felt;
+use processor::ExecutionError;
+use test_utils::{
+    build_op_test, proptest::prelude::*, rand::rand_value, TestError, U32_BOUND, ZERO,
+};
 
 // U32 OPERATIONS TESTS - MANUAL - BITWISE OPERATIONS
 // ================================================================================================
 
 #[test]
-fn u32checked_and() {
-    let asm_op = "u32checked_and";
+fn u32and() {
+    let asm_op = "u32and";
 
     // --- simple cases ---------------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[1, 1]);
@@ -37,19 +41,25 @@ fn u32checked_and() {
 }
 
 #[test]
-fn u32checked_and_fail() {
-    let asm_op = "u32checked_and";
+fn u32and_fail() {
+    let asm_op = "u32and";
 
     let test = build_op_test!(asm_op, &[U32_BOUND, 0]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(
+        Felt::new(U32_BOUND),
+        ZERO,
+    )));
 
     let test = build_op_test!(asm_op, &[0, U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(
+        Felt::new(U32_BOUND),
+        ZERO,
+    )));
 }
 
 #[test]
-fn u32checked_or() {
-    let asm_op = "u32checked_or";
+fn u32or() {
+    let asm_op = "u32or";
 
     // --- simple cases ---------------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[1, 1]);
@@ -80,19 +90,25 @@ fn u32checked_or() {
 }
 
 #[test]
-fn u32checked_or_fail() {
-    let asm_op = "u32checked_or";
+fn u32or_fail() {
+    let asm_op = "u32or";
 
     let test = build_op_test!(asm_op, &[U32_BOUND, 0]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(
+        Felt::new(U32_BOUND),
+        ZERO,
+    )));
 
     let test = build_op_test!(asm_op, &[0, U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(
+        Felt::new(U32_BOUND),
+        ZERO,
+    )));
 }
 
 #[test]
-fn u32checked_xor() {
-    let asm_op = "u32checked_xor";
+fn u32xor() {
+    let asm_op = "u32xor";
 
     // --- simple cases ---------------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[1, 1]);
@@ -122,19 +138,25 @@ fn u32checked_xor() {
 }
 
 #[test]
-fn u32checked_xor_fail() {
-    let asm_op = "u32checked_xor";
+fn u32xor_fail() {
+    let asm_op = "u32xor";
 
     let test = build_op_test!(asm_op, &[U32_BOUND, 0]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(
+        Felt::new(U32_BOUND),
+        ZERO,
+    )));
 
     let test = build_op_test!(asm_op, &[0, U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(
+        Felt::new(U32_BOUND),
+        ZERO,
+    )));
 }
 
 #[test]
-fn u32checked_not() {
-    let asm_op = "u32checked_not";
+fn u32not() {
+    let asm_op = "u32not";
 
     // --- simple cases ---------------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[U32_BOUND - 1]);
@@ -157,104 +179,15 @@ fn u32checked_not() {
 }
 
 #[test]
-fn u32checked_not_fail() {
-    let asm_op = "u32checked_not";
+fn u32not_fail() {
+    let asm_op = "u32not";
     test_input_out_of_bounds(asm_op);
 }
 
 #[test]
-fn u32checked_shl() {
+fn u32shl() {
     // left shift: pops a from the stack and pushes (a * 2^b) mod 2^32 for a provided value b
-    let asm_op = "u32checked_shl";
-
-    // --- test simple case -----------------------------------------------------------------------
-    let a = 1_u32;
-    let b = 1_u32;
-    let test = build_op_test!(asm_op, &[5, a as u64, b as u64]);
-    test.expect_stack(&[2, 5]);
-
-    // --- test max values of a and b -------------------------------------------------------------
-    let a = (U32_BOUND - 1) as u32;
-    let b = 31;
-
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[a.wrapping_shl(b) as u64]);
-
-    // --- test b = 0 -----------------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = 0;
-
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[a.wrapping_shl(b) as u64]);
-
-    // --- test random values ---------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = rand_value::<u32>() % 32;
-
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[a.wrapping_shl(b) as u64]);
-}
-
-#[test]
-fn u32checked_shl_fail() {
-    let asm_op = "u32checked_shl";
-
-    // should fail if a >= 2^32
-    let test = build_op_test!(asm_op, &[U32_BOUND, 1]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail if b >= 32
-    let test = build_op_test!(asm_op, &[1, 32]);
-    // if b >= 32, 2^b >= 2^32 or not a u32
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-}
-
-#[test]
-fn u32checked_shl_b() {
-    // left shift: pops a from the stack and pushes (a * 2^b) mod 2^32 for a provided value b
-    let op_base = "u32checked_shl";
-    let get_asm_op = |b: u32| format!("{op_base}.{b}");
-
-    // --- test simple case -----------------------------------------------------------------------
-    let a = 1_u32;
-    let b = 1_u32;
-    let test = build_op_test!(get_asm_op(b).as_str(), &[5, a as u64]);
-    test.expect_stack(&[2, 5]);
-
-    // --- test max values of a and b -------------------------------------------------------------
-    let a = (U32_BOUND - 1) as u32;
-    let b = 31;
-
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[a.wrapping_shl(b) as u64]);
-
-    // --- test b = 0 -----------------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = 0;
-
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[a.wrapping_shl(b) as u64]);
-
-    // --- test random values ---------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = rand_value::<u32>() % 32;
-
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[a.wrapping_shl(b) as u64]);
-}
-
-#[test]
-fn u32checked_shl_b_fail() {
-    let op_base = "u32checked_shl";
-
-    test_input_out_of_bounds(format!("{}.{}", op_base, 1).as_str());
-    test_param_out_of_bounds(op_base, 32);
-}
-
-#[test]
-fn u32unchecked_shl() {
-    // left shift: pops a from the stack and pushes (a * 2^b) mod 2^32 for a provided value b
-    let asm_op = "u32unchecked_shl";
+    let asm_op = "u32shl";
 
     // --- test simple case -----------------------------------------------------------------------
     let a = 1_u32;
@@ -289,9 +222,9 @@ fn u32unchecked_shl() {
 }
 
 #[test]
-fn u32unchecked_shl_b() {
+fn u32shl_b() {
     // left shift: pops a from the stack and pushes (a * 2^b) mod 2^32 for a provided value b
-    let op_base = "u32unchecked_shl";
+    let op_base = "u32shl";
     let get_asm_op = |b: u32| format!("{op_base}.{b}");
 
     // --- test simple case -----------------------------------------------------------------------
@@ -314,111 +247,23 @@ fn u32unchecked_shl_b() {
     let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
     test.expect_stack(&[a.wrapping_shl(b) as u64]);
 
-    // --- test random values ---------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = rand_value::<u32>() % 32;
+    // // --- test random values ---------------------------------------------------------------------
+    // let a = rand_value::<u32>();
+    // let b = rand_value::<u32>() % 32;
 
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[a.wrapping_shl(b) as u64]);
+    // let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
+    // test.expect_stack(&[a.wrapping_shl(b) as u64]);
 
-    // --- test out of bounds input (should not fail) --------------------------------------------
-    let b = 1;
-    let test = build_op_test!(get_asm_op(b).as_str(), &[U32_BOUND]);
-    assert!(test.execute().is_ok());
+    // // --- test out of bounds input (should not fail) --------------------------------------------
+    // let b = 1;
+    // let test = build_op_test!(get_asm_op(b).as_str(), &[U32_BOUND]);
+    // assert!(test.execute().is_ok());
 }
 
 #[test]
-fn u32checked_shr() {
+fn u32shr() {
     // right shift: pops a from the stack and pushes a / 2^b for a provided value b
-    let asm_op = "u32checked_shr";
-
-    // --- test simple case -----------------------------------------------------------------------
-    let a = 4_u32;
-    let b = 2_u32;
-    let test = build_op_test!(asm_op, &[5, a as u64, b as u64]);
-    test.expect_stack(&[1, 5]);
-
-    // --- test max values of a and b -------------------------------------------------------------
-    let a = (U32_BOUND - 1) as u32;
-    let b = 31;
-
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[a.wrapping_shr(b) as u64]);
-
-    // --- test b = 0 ---------------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = 0;
-
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[a.wrapping_shr(b) as u64]);
-
-    // --- test random values ---------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = rand_value::<u32>() % 32;
-
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[a.wrapping_shr(b) as u64]);
-}
-
-#[test]
-fn u32checked_shr_fail() {
-    let asm_op = "u32checked_shr";
-
-    // should fail if a >= 2^32
-    let test = build_op_test!(asm_op, &[U32_BOUND, 1]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail if b >= 32
-    let test = build_op_test!(asm_op, &[1, 32]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-}
-
-#[test]
-fn u32checked_shr_b() {
-    // right shift: pops a from the stack and pushes a / 2^b for a provided value b
-    let op_base = "u32checked_shr";
-    let get_asm_op = |b: u32| format!("{op_base}.{b}");
-
-    // --- test simple case -----------------------------------------------------------------------
-    let a = 4_u32;
-    let b = 2_u32;
-    let test = build_op_test!(get_asm_op(b).as_str(), &[5, a as u64]);
-    test.expect_stack(&[1, 5]);
-
-    // --- test max values of a and b -------------------------------------------------------------
-    let a = (U32_BOUND - 1) as u32;
-    let b = 31;
-
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[a.wrapping_shr(b) as u64]);
-
-    // --- test b = 0 ---------------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = 0;
-
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[a.wrapping_shr(b) as u64]);
-
-    // --- test random values ---------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = rand_value::<u32>() % 32;
-
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[a.wrapping_shr(b) as u64]);
-}
-
-#[test]
-fn u32checked_shr_b_fail() {
-    let op_base = "u32checked_shr";
-
-    test_input_out_of_bounds(format!("{}.{}", op_base, 1).as_str());
-    test_param_out_of_bounds(op_base, 32);
-}
-
-#[test]
-fn u32unchecked_shr() {
-    // right shift: pops a from the stack and pushes a / 2^b for a provided value b
-    let asm_op = "u32unchecked_shr";
+    let asm_op = "u32shr";
 
     // --- test simple case -----------------------------------------------------------------------
     let a = 4_u32;
@@ -453,9 +298,9 @@ fn u32unchecked_shr() {
 }
 
 #[test]
-fn u32unchecked_shr_b() {
+fn u32shr_b() {
     // right shift: pops a from the stack and pushes a / 2^b for a provided value b
-    let op_base = "u32unchecked_shr";
+    let op_base = "u32shr";
     let get_asm_op = |b: u32| format!("{op_base}.{b}");
 
     // --- test simple case -----------------------------------------------------------------------
@@ -492,106 +337,9 @@ fn u32unchecked_shr_b() {
 }
 
 #[test]
-fn u32checked_rotl() {
+fn u32rotl() {
     // Computes c by rotating a 32-bit representation of a to the left by b bits.
-    let asm_op = "u32checked_rotl";
-
-    // --- test simple case -----------------------------------------------------------------------
-    let a = 1_u32;
-    let b = 1_u32;
-    let test = build_op_test!(asm_op, &[5, a as u64, b as u64]);
-    test.expect_stack(&[2, 5]);
-
-    // --- test simple wraparound case with large a -----------------------------------------------
-    let a = (1_u64 << 31) as u32;
-    let b: u32 = 1;
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[1]);
-
-    // --- test simple case wraparound case with max b --------------------------------------------
-    let a = 2_u32;
-    let b: u32 = 31;
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[1]);
-
-    // --- no change when a is max value (all 1s) -------------------------------------------------
-    let a = (U32_BOUND - 1) as u32;
-    let b = 2;
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[a as u64]);
-
-    // --- test b = 0 -----------------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = 0;
-
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[a.rotate_left(b) as u64]);
-
-    // --- test random values ---------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = rand_value::<u32>() % 32;
-
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[a.rotate_left(b) as u64]);
-}
-
-#[test]
-fn u32checked_rotl_b() {
-    // Computes c by rotating a 32-bit representation of a to the left by b bits.
-    let op_base = "u32checked_rotl";
-    let get_asm_op = |b: u32| format!("{op_base}.{b}");
-
-    // --- test simple case -----------------------------------------------------------------------
-    let a = 1_u32;
-    let b = 1_u32;
-    let test = build_op_test!(get_asm_op(b).as_str(), &[5, a as u64]);
-    test.expect_stack(&[2, 5]);
-
-    // --- test simple wraparound case with large a -----------------------------------------------
-    let a = (1_u64 << 31) as u32;
-    let b: u32 = 1;
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[1]);
-
-    // --- test simple case wraparound case with max b --------------------------------------------
-    let a = 2_u32;
-    let b: u32 = 31;
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[1]);
-
-    // --- no change when a is max value (all 1s) -------------------------------------------------
-    let a = (U32_BOUND - 1) as u32;
-    let b = 2;
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[a as u64]);
-
-    // --- test b = 0 ---------------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = 0;
-
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[a.rotate_left(b) as u64]);
-
-    // --- test random values ---------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = rand_value::<u32>() % 32;
-
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[a.rotate_left(b) as u64]);
-}
-
-#[test]
-fn u32checked_rotl_fail_b() {
-    let op_base = "u32checked_rotl";
-
-    test_input_out_of_bounds(format!("{}.{}", op_base, 1).as_str());
-    test_param_out_of_bounds(op_base, 32);
-}
-
-#[test]
-fn u32unchecked_rotl() {
-    // Computes c by rotating a 32-bit representation of a to the left by b bits.
-    let asm_op = "u32unchecked_rotl";
+    let asm_op = "u32rotl";
 
     // --- test simple case -----------------------------------------------------------------------
     let a = 1_u32;
@@ -637,9 +385,9 @@ fn u32unchecked_rotl() {
 }
 
 #[test]
-fn u32checked_rotr() {
+fn u32rotr() {
     // Computes c by rotating a 32-bit representation of a to the right by b bits.
-    let asm_op = "u32checked_rotr";
+    let asm_op = "u32rotr";
 
     // --- test simple case -----------------------------------------------------------------------
     let a = 2_u32;
@@ -678,147 +426,59 @@ fn u32checked_rotr() {
 
     let test = build_op_test!(asm_op, &[a as u64, b as u64]);
     test.expect_stack(&[a.rotate_right(b) as u64]);
-}
 
-#[test]
-fn u32checked_rotr_fail() {
-    let asm_op = "u32checked_rotr";
-
-    // should fail if a >= 2^32
+    // --- test out of bounds inputs (should not fail) --------------------------------------------
     let test = build_op_test!(asm_op, &[U32_BOUND, 1]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-
-    // should fail if b >= 32
-    let test = build_op_test!(asm_op, &[1, 32]);
-    test.expect_error(TestError::ExecutionError("FailedAssertion"));
+    assert!(test.execute().is_ok());
 }
 
 #[test]
-fn u32checked_rotr_b() {
-    // Computes c by rotating a 32-bit representation of a to the right by b bits.
-    let op_base = "u32checked_rotr";
-    let get_asm_op = |b: u32| format!("{op_base}.{b}");
-
-    // --- test simple case -----------------------------------------------------------------------
-    let a = 2_u32;
-    let b = 1_u32;
-    let test = build_op_test!(get_asm_op(b).as_str(), &[5, a as u64]);
-    test.expect_stack(&[1, 5]);
-
-    // --- test simple wraparound case with small a -----------------------------------------------
-    let a = 1_u32;
-    let b = 1_u32;
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[U32_BOUND >> 1]);
-
-    // --- test simple case wraparound case with max b --------------------------------------------
-    let a = 1_u32;
-    let b: u32 = 31;
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[2]);
-
-    // --- no change when a is max value (all 1s) -------------------------------------------------
-    let a = (U32_BOUND - 1) as u32;
-    let b = 2;
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[a as u64]);
-
-    // --- test b = 0 ---------------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = 0;
-
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[a.rotate_right(b) as u64]);
-
-    // --- test random values ---------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = rand_value::<u32>() % 32;
-
-    let test = build_op_test!(get_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[a.rotate_right(b) as u64]);
+fn u32popcnt() {
+    let asm_op = "u32popcnt";
+    build_op_test!(asm_op, &[0]).expect_stack(&[0]);
+    build_op_test!(asm_op, &[1]).expect_stack(&[1]);
+    build_op_test!(asm_op, &[555]).expect_stack(&[5]);
+    build_op_test!(asm_op, &[65536]).expect_stack(&[1]);
+    build_op_test!(asm_op, &[4294967295]).expect_stack(&[32]);
 }
 
 #[test]
-fn u32checked_rotr_b_fail() {
-    let op_base = "u32checked_rotr";
-
-    test_input_out_of_bounds(format!("{}.{}", op_base, 1).as_str());
-    test_param_out_of_bounds(op_base, 32);
+fn u32clz() {
+    let asm_op = "u32clz";
+    build_op_test!(asm_op, &[0]).expect_stack(&[32]);
+    build_op_test!(asm_op, &[1]).expect_stack(&[31]);
+    // bit representation of the 67123567 is 00000100000000000011100101101111
+    build_op_test!(asm_op, &[67123567]).expect_stack(&[5]);
+    build_op_test!(asm_op, &[4294967295]).expect_stack(&[0]);
 }
 
 #[test]
-fn u32unchecked_rotr() {
-    // Computes c by rotating a 32-bit representation of a to the right by b bits.
-    let asm_op = "u32unchecked_rotr";
-
-    // --- test simple case -----------------------------------------------------------------------
-    let a = 2_u32;
-    let b = 1_u32;
-    let test = build_op_test!(asm_op, &[5, a as u64, b as u64]);
-    test.expect_stack(&[1, 5]);
-
-    // --- test simple wraparound case with small a -----------------------------------------------
-    let a = 1_u32;
-    let b = 1_u32;
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[U32_BOUND >> 1]);
-
-    // --- test simple case wraparound case with max b --------------------------------------------
-    let a = 1_u32;
-    let b: u32 = 31;
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[2]);
-
-    // --- no change when a is max value (all 1s) -------------------------------------------------
-    let a = (U32_BOUND - 1) as u32;
-    let b = 2;
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[a as u64]);
-
-    // --- test b = 0 ---------------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = 0;
-
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[a.rotate_right(b) as u64]);
-
-    // --- test random values ---------------------------------------------------------------------
-    let a = rand_value::<u32>();
-    let b = rand_value::<u32>() % 32;
-
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[a.rotate_right(b) as u64]);
-
-    // --- test out of bounds inputs (should not fail) --------------------------------------------
-    let test = build_op_test!(asm_op, &[U32_BOUND, 1]);
-    assert!(test.execute().is_ok());
+fn u32ctz() {
+    let asm_op = "u32ctz";
+    build_op_test!(asm_op, &[0]).expect_stack(&[32]);
+    build_op_test!(asm_op, &[1]).expect_stack(&[0]);
+    // bit representaion of the 14688 is 00000000000000000011100101100000
+    build_op_test!(asm_op, &[14688]).expect_stack(&[5]);
+    build_op_test!(asm_op, &[4294967295]).expect_stack(&[0]);
 }
 
 #[test]
-fn u32checked_popcnt() {
-    let asm_op = "u32checked_popcnt";
+fn u32clo() {
+    let asm_op = "u32clo";
     build_op_test!(asm_op, &[0]).expect_stack(&[0]);
-    build_op_test!(asm_op, &[1]).expect_stack(&[1]);
-    build_op_test!(asm_op, &[555]).expect_stack(&[5]);
-    build_op_test!(asm_op, &[65536]).expect_stack(&[1]);
+    build_op_test!(asm_op, &[1]).expect_stack(&[0]);
+    // bit representation of the 4185032762 is 11111001011100101000100000111010
+    build_op_test!(asm_op, &[4185032762]).expect_stack(&[5]);
     build_op_test!(asm_op, &[4294967295]).expect_stack(&[32]);
 }
 
 #[test]
-fn u32checked_popcnt_fail() {
-    let asm_op = "u32checked_popcnt";
-    build_op_test!(asm_op, &[4294967296]).expect_error(TestError::ExecutionError("NotU32Value"));
-    build_op_test!(asm_op, &[281474976710655])
-        .expect_error(TestError::ExecutionError("NotU32Value"));
-}
-
-#[test]
-fn u32unchecked_popcnt() {
-    let asm_op = "u32unchecked_popcnt";
+fn u32cto() {
+    let asm_op = "u32cto";
     build_op_test!(asm_op, &[0]).expect_stack(&[0]);
     build_op_test!(asm_op, &[1]).expect_stack(&[1]);
-    build_op_test!(asm_op, &[555]).expect_stack(&[5]);
-    build_op_test!(asm_op, &[65536]).expect_stack(&[1]);
+    // bit representation of the 4185032735 is 11111001011100101000100000011111
+    build_op_test!(asm_op, &[4185032735]).expect_stack(&[5]);
     build_op_test!(asm_op, &[4294967295]).expect_stack(&[32]);
 }
 
@@ -827,8 +487,8 @@ fn u32unchecked_popcnt() {
 
 proptest! {
     #[test]
-    fn u32checked_and_proptest(a in any::<u32>(), b in any::<u32>()) {
-        let asm_opcode = "u32checked_and";
+    fn u32and_proptest(a in any::<u32>(), b in any::<u32>()) {
+        let asm_opcode = "u32and";
         let values = [a as u64, b as u64];
         // should result in bitwise AND
         let expected = (a & b) as u64;
@@ -838,8 +498,8 @@ proptest! {
     }
 
     #[test]
-    fn u32checked_or_proptest(a in any::<u32>(), b in any::<u32>()) {
-        let asm_opcode = "u32checked_or";
+    fn u32or_proptest(a in any::<u32>(), b in any::<u32>()) {
+        let asm_opcode = "u32or";
         let values = [a as u64, b as u64];
         // should result in bitwise OR
         let expected = (a | b) as u64;
@@ -849,8 +509,8 @@ proptest! {
     }
 
     #[test]
-    fn u32checked_xor_proptest(a in any::<u32>(), b in any::<u32>()) {
-        let asm_opcode = "u32checked_xor";
+    fn u32xor_proptest(a in any::<u32>(), b in any::<u32>()) {
+        let asm_opcode = "u32xor";
         let values = [a as u64, b as u64];
         // should result in bitwise XOR
         let expected = (a ^ b) as u64;
@@ -860,8 +520,8 @@ proptest! {
     }
 
     #[test]
-    fn u32checked_not_proptest(value in any::<u32>()) {
-        let asm_opcode = "u32checked_not";
+    fn u32not_proptest(value in any::<u32>()) {
+        let asm_opcode = "u32not";
 
         // should result in bitwise NOT
         let test = build_op_test!(asm_opcode, &[value as u64]);
@@ -869,28 +529,8 @@ proptest! {
     }
 
     #[test]
-    fn u32checked_shl_proptest(a in any::<u32>(), b in 0_u32..32) {
-        let asm_opcode = "u32checked_shl";
-
-        // should execute left shift
-        let expected = a << b;
-        let test = build_op_test!(asm_opcode, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[expected as u64])?;
-    }
-
-    #[test]
-    fn u32checked_shl_b_proptest(a in any::<u32>(), b in 0_u32..32) {
-        let asm_opcode = format!("u32checked_shl.{b}");
-
-        // should execute left shift
-        let expected = a << b;
-        let test = build_op_test!(asm_opcode, &[a as u64]);
-        test.prop_expect_stack(&[expected as u64])?;
-    }
-
-    #[test]
-    fn u32unchecked_shl_proptest(a in any::<u32>(), b in 0_u32..32) {
-        let asm_opcode = "u32unchecked_shl";
+    fn u32shl_proptest(a in any::<u32>(), b in 0_u32..32) {
+        let asm_opcode = "u32shl";
 
         // should execute left shift
         let c = a.wrapping_shl(b);
@@ -899,8 +539,8 @@ proptest! {
     }
 
     #[test]
-    fn u32unchecked_shl_b_proptest(a in any::<u32>(), b in 0_u32..32) {
-        let asm_opcode = format!("u32unchecked_shl.{b}");
+    fn u32shl_b_proptest(a in any::<u32>(), b in 0_u32..32) {
+        let asm_opcode = format!("u32shl.{b}");
 
         // should execute left shift
         let c = a.wrapping_shl(b);
@@ -909,47 +549,8 @@ proptest! {
     }
 
     #[test]
-    fn u32checked_shr_proptest(a in any::<u32>(), b in 0_u32..32) {
-        let asm_opcode = "u32checked_shr";
-
-        // should execute right shift
-        let expected = a >> b;
-        let test = build_op_test!(asm_opcode, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[expected as u64])?;
-    }
-
-    #[test]
-    fn u32checked_shr_b_proptest(a in any::<u32>(), b in 0_u32..32) {
-        let asm_opcode = format!("u32checked_shr.{b}");
-
-        // should execute right shift
-        let expected = a >> b;
-        let test = build_op_test!(asm_opcode, &[a as u64]);
-        test.prop_expect_stack(&[expected as u64])?;
-    }
-
-    #[test]
-    fn u32checked_rotl_proptest(a in any::<u32>(), b in 0_u32..32) {
-        let asm_opcode = "u32checked_rotl";
-
-        // should execute left bit rotation
-        let test = build_op_test!(asm_opcode, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[a.rotate_left(b) as u64])?;
-    }
-
-    #[test]
-    fn u32checked_rotl_b_proptest(a in any::<u32>(), b in 0_u32..32) {
-        let op_base = "u32checked_rotl";
-        let asm_opcode = format!("{op_base}.{b}");
-
-        // should execute left bit rotation
-        let test = build_op_test!(asm_opcode, &[a as u64]);
-        test.prop_expect_stack(&[a.rotate_left(b) as u64])?;
-    }
-
-    #[test]
-    fn u32unchecked_rotl_proptest(a in any::<u32>(), b in 0_u32..32) {
-        let asm_opcode = "u32unchecked_rotl";
+    fn u32rotl_proptest(a in any::<u32>(), b in 0_u32..32) {
+        let asm_opcode = "u32rotl";
 
         // should execute left bit rotation
         let test = build_op_test!(asm_opcode, &[a as u64, b as u64]);
@@ -957,8 +558,8 @@ proptest! {
     }
 
     #[test]
-    fn u32unchecked_rotl_b_proptest(a in any::<u32>(), b in 0_u32..32) {
-        let op_base = "u32unchecked_rotl";
+    fn u32rotl_b_proptest(a in any::<u32>(), b in 0_u32..32) {
+        let op_base = "u32rotl";
         let asm_opcode = format!("{op_base}.{b}");
 
         // should execute left bit rotation
@@ -967,8 +568,8 @@ proptest! {
     }
 
     #[test]
-    fn u32checked_rotr_proptest(a in any::<u32>(), b in 0_u32..32) {
-        let asm_opcode = "u32checked_rotr";
+    fn u32rotr_proptest(a in any::<u32>(), b in 0_u32..32) {
+        let asm_opcode = "u32rotr";
 
         // should execute right bit rotation
         let test = build_op_test!(asm_opcode, &[a as u64, b as u64]);
@@ -976,8 +577,8 @@ proptest! {
     }
 
     #[test]
-    fn u32checked_rotr_b_proptest(a in any::<u32>(), b in 0_u32..32) {
-        let op_base = "u32checked_rotr";
+    fn u32rotr_b_proptest(a in any::<u32>(), b in 0_u32..32) {
+        let op_base = "u32rotr";
         let asm_opcode = format!("{op_base}.{b}");
 
         // should execute right bit rotation
@@ -986,36 +587,41 @@ proptest! {
     }
 
     #[test]
-    fn u32unchecked_rotr_proptest(a in any::<u32>(), b in 0_u32..32) {
-        let asm_opcode = "u32unchecked_rotr";
-
-        // should execute right bit rotation
-        let test = build_op_test!(asm_opcode, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[a.rotate_right(b) as u64])?;
+    fn u32popcount_proptest(a in any::<u32>()) {
+        let asm_opcode = "u32popcnt";
+        let expected = a.count_ones();
+        let test = build_op_test!(asm_opcode, &[a as u64]);
+        test.prop_expect_stack(&[expected as u64])?;
     }
 
     #[test]
-    fn u32unchecked_rotr_b_proptest(a in any::<u32>(), b in 0_u32..32) {
-        let op_base = "u32unchecked_rotr";
-        let asm_opcode = format!("{op_base}.{b}");
+    fn u32clz_proptest(a in any::<u32>()) {
+        let asm_opcode = "u32clz";
+        let expected = a.leading_zeros();
+        let test = build_op_test!(asm_opcode, &[a as u64]);
+        test.prop_expect_stack(&[expected as u64])?;
+    }
 
-        // should execute right bit rotation
+    #[test]
+    fn u32ctz_proptest(a in any::<u32>()) {
+        let asm_opcode = "u32ctz";
+        let expected = a.trailing_zeros();
         let test = build_op_test!(asm_opcode, &[a as u64]);
-        test.prop_expect_stack(&[a.rotate_right(b) as u64])?;
+        test.prop_expect_stack(&[expected as u64])?;
     }
 
     #[test]
-    fn u32checked_popcount_proptest(a in any::<u32>()) {
-        let asm_opcode = "u32checked_popcnt";
-        let expected = a.count_ones();
+    fn u32clo_proptest(a in any::<u32>()) {
+        let asm_opcode = "u32clo";
+        let expected = a.leading_ones();
         let test = build_op_test!(asm_opcode, &[a as u64]);
         test.prop_expect_stack(&[expected as u64])?;
     }
 
     #[test]
-    fn u32unchecked_popcount_proptest(a in any::<u32>()) {
-        let asm_opcode = "u32unchecked_popcnt";
-        let expected = a.count_ones();
+    fn u32cto_proptest(a in any::<u32>()) {
+        let asm_opcode = "u32cto";
+        let expected = a.trailing_ones();
         let test = build_op_test!(asm_opcode, &[a as u64]);
         test.prop_expect_stack(&[expected as u64])?;
     }
diff --git a/miden/tests/integration/operations/u32_ops/comparison_ops.rs b/miden/tests/integration/operations/u32_ops/comparison_ops.rs
index c8ce7c6c1b..7213d16660 100644
--- a/miden/tests/integration/operations/u32_ops/comparison_ops.rs
+++ b/miden/tests/integration/operations/u32_ops/comparison_ops.rs
@@ -1,195 +1,13 @@
-use super::{test_inputs_out_of_bounds, test_param_out_of_bounds, test_unchecked_execution};
+use super::test_unchecked_execution;
 use core::cmp::Ordering;
-use test_utils::{build_op_test, proptest::prelude::*, rand::rand_value, TestError, U32_BOUND};
+use test_utils::{build_op_test, proptest::prelude::*, rand::rand_value};
 
 // U32 OPERATIONS TESTS - MANUAL - COMPARISON OPERATIONS
 // ================================================================================================
 
 #[test]
-fn u32checked_eq() {
-    let asm_op = "u32checked_eq";
-
-    // --- simple cases ---------------------------------------------------------------------------
-    let test = build_op_test!(asm_op, &[1, 1]);
-    test.expect_stack(&[1]);
-
-    let test = build_op_test!(asm_op, &[0, 1]);
-    test.expect_stack(&[0]);
-
-    // --- random u32: equality -------------------------------------------------------------------
-    let a = rand_value::<u64>() as u32;
-
-    let test = build_op_test!(asm_op, &[a as u64, a as u64]);
-    test.expect_stack(&[1]);
-
-    // --- random u32: probable inequality --------------------------------------------------------
-    let b = rand_value::<u64>() as u32;
-    let expected = if a == b { 1 } else { 0 };
-
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[expected]);
-
-    // --- test that the rest of the stack isn't affected -----------------------------------------
-    let c = rand_value::<u64>();
-
-    let test = build_op_test!(asm_op, &[c, a as u64, b as u64]);
-    test.expect_stack(&[expected, c]);
-}
-
-#[test]
-fn u32eq_fail() {
-    let asm_op = "u32checked_eq";
-
-    // should fail if either one of 2 inputs is out of bounds
-    test_inputs_out_of_bounds(asm_op, 2);
-}
-
-#[test]
-fn u32checked_eq_b() {
-    let build_asm_op = |param: u32| format!("u32checked_eq.{param}");
-
-    // --- simple cases ---------------------------------------------------------------------------
-    let test = build_op_test!(build_asm_op(1).as_str(), &[1]);
-    test.expect_stack(&[1]);
-
-    let test = build_op_test!(build_asm_op(0).as_str(), &[1]);
-    test.expect_stack(&[0]);
-
-    // --- random u32: equality -------------------------------------------------------------------
-    let a = rand_value::<u64>() as u32;
-
-    let test = build_op_test!(build_asm_op(a).as_str(), &[a as u64]);
-    test.expect_stack(&[1]);
-
-    // --- random u32: probable inequality --------------------------------------------------------
-    let b = rand_value::<u64>() as u32;
-    let expected = if a == b { 1 } else { 0 };
-
-    let test = build_op_test!(build_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[expected]);
-
-    // --- test that the rest of the stack isn't affected -----------------------------------------
-    let c = rand_value::<u64>();
-
-    let test = build_op_test!(build_asm_op(b).as_str(), &[c, a as u64]);
-    test.expect_stack(&[expected, c]);
-}
-
-#[test]
-fn u32checked_eq_b_fail() {
-    let asm_op = "u32checked_eq";
-
-    // should fail when b is out of bounds and provided as a parameter
-    test_param_out_of_bounds(asm_op, U32_BOUND);
-
-    // should fail when b is a valid parameter but a is out of bounds
-    let asm_op = format!("{}.{}", asm_op, 1);
-    let test = build_op_test!(&asm_op, &[U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-}
-
-#[test]
-fn u32checked_neq() {
-    let asm_op = "u32checked_neq";
-
-    // --- simple cases ---------------------------------------------------------------------------
-    let test = build_op_test!(asm_op, &[1, 1]);
-    test.expect_stack(&[0]);
-
-    let test = build_op_test!(asm_op, &[0, 1]);
-    test.expect_stack(&[1]);
-
-    // --- random u32: equality -------------------------------------------------------------------
-    let a = rand_value::<u64>() as u32;
-
-    let test = build_op_test!(asm_op, &[a as u64, a as u64]);
-    test.expect_stack(&[0]);
-
-    // --- random u32: probable inequality --------------------------------------------------------
-    let b = rand_value::<u64>() as u32;
-    let expected = if a != b { 1 } else { 0 };
-
-    let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-    test.expect_stack(&[expected]);
-
-    // --- test that the rest of the stack isn't affected -----------------------------------------
-    let c = rand_value::<u64>();
-
-    let test = build_op_test!(asm_op, &[c, a as u64, b as u64]);
-    test.expect_stack(&[expected, c]);
-}
-
-#[test]
-fn u32checked_neq_fail() {
-    let asm_op = "u32checked_neq";
-
-    // should fail if either one of 2 inputs is out of bounds
-    test_inputs_out_of_bounds(asm_op, 2);
-}
-
-#[test]
-fn u32checked_neq_b() {
-    let build_asm_op = |param: u32| format!("u32checked_neq.{param}");
-
-    // --- simple cases ---------------------------------------------------------------------------
-    let test = build_op_test!(build_asm_op(1).as_str(), &[1]);
-    test.expect_stack(&[0]);
-
-    let test = build_op_test!(build_asm_op(0).as_str(), &[1]);
-    test.expect_stack(&[1]);
-
-    // --- random u32: equality -------------------------------------------------------------------
-    let a = rand_value::<u64>() as u32;
-
-    let test = build_op_test!(build_asm_op(a).as_str(), &[a as u64]);
-    test.expect_stack(&[0]);
-
-    // --- random u32: probable inequality --------------------------------------------------------
-    let b = rand_value::<u64>() as u32;
-    let expected = if a != b { 1 } else { 0 };
-
-    let test = build_op_test!(build_asm_op(b).as_str(), &[a as u64]);
-    test.expect_stack(&[expected]);
-
-    // --- test that the rest of the stack isn't affected -----------------------------------------
-    let c = rand_value::<u64>();
-
-    let test = build_op_test!(build_asm_op(b).as_str(), &[c, a as u64]);
-    test.expect_stack(&[expected, c]);
-}
-
-#[test]
-fn u32checked_neq_b_fail() {
-    let asm_op = "u32checked_neq";
-
-    // should fail when b is out of bounds and provided as a parameter
-    test_param_out_of_bounds(asm_op, U32_BOUND);
-
-    // should fail when b is a valid parameter but a is out of bounds
-    let asm_op = format!("{}.{}", asm_op, 1);
-    let test = build_op_test!(&asm_op, &[U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-}
-
-#[test]
-fn u32checked_lt() {
-    let asm_op = "u32checked_lt";
-
-    // should push 1 to the stack when a < b and 0 otherwise
-    test_comparison_op(asm_op, 1, 0, 0);
-}
-
-#[test]
-fn u32checked_lt_fail() {
-    let asm_op = "u32checked_lt";
-
-    // should fail if either one of 2 inputs is out of bounds
-    test_inputs_out_of_bounds(asm_op, 2);
-}
-
-#[test]
-fn u32unchecked_lt() {
-    let asm_op = "u32unchecked_lt";
+fn u32lt() {
+    let asm_op = "u32lt";
 
     // should push 1 to the stack when a < b and 0 otherwise
     test_comparison_op(asm_op, 1, 0, 0);
@@ -199,24 +17,8 @@ fn u32unchecked_lt() {
 }
 
 #[test]
-fn u32checked_lte() {
-    let asm_op = "u32checked_lte";
-
-    // should push 1 to the stack when a <= b and 0 otherwise
-    test_comparison_op(asm_op, 1, 1, 0);
-}
-
-#[test]
-fn u32checked_lte_fail() {
-    let asm_op = "u32checked_lte";
-
-    // should fail if either one of 2 inputs is out of bounds
-    test_inputs_out_of_bounds(asm_op, 2);
-}
-
-#[test]
-fn u32unchecked_lte() {
-    let asm_op = "u32unchecked_lte";
+fn u32lte() {
+    let asm_op = "u32lte";
 
     // should push 1 to the stack when a <= b and 0 otherwise
     test_comparison_op(asm_op, 1, 1, 0);
@@ -226,24 +28,8 @@ fn u32unchecked_lte() {
 }
 
 #[test]
-fn u32checked_gt() {
-    let asm_op = "u32checked_gt";
-
-    // should push 1 to the stack when a > b and 0 otherwise
-    test_comparison_op(asm_op, 0, 0, 1);
-}
-
-#[test]
-fn u32checked_gt_fail() {
-    let asm_op = "u32checked_gt";
-
-    // should fail if either one of 2 inputs is out of bounds
-    test_inputs_out_of_bounds(asm_op, 2);
-}
-
-#[test]
-fn u32unchecked_gt() {
-    let asm_op = "u32unchecked_gt";
+fn u32gt() {
+    let asm_op = "u32gt";
 
     // should push 1 to the stack when a > b and 0 otherwise
     test_comparison_op(asm_op, 0, 0, 1);
@@ -253,24 +39,8 @@ fn u32unchecked_gt() {
 }
 
 #[test]
-fn u32checked_gte() {
-    let asm_op = "u32checked_gte";
-
-    // should push 1 to the stack when a >= b and 0 otherwise
-    test_comparison_op(asm_op, 0, 1, 1);
-}
-
-#[test]
-fn u32checked_gte_fail() {
-    let asm_op = "u32checked_gte";
-
-    // should fail if either one of 2 inputs is out of bounds
-    test_inputs_out_of_bounds(asm_op, 2);
-}
-
-#[test]
-fn u32unchecked_gte() {
-    let asm_op = "u32unchecked_gte";
+fn u32gte() {
+    let asm_op = "u32gte";
 
     // should push 1 to the stack when a >= b and 0 otherwise
     test_comparison_op(asm_op, 0, 1, 1);
@@ -280,24 +50,8 @@ fn u32unchecked_gte() {
 }
 
 #[test]
-fn u32checked_min() {
-    let asm_op = "u32checked_min";
-
-    // should put the minimum of the 2 inputs on the stack
-    test_min(asm_op);
-}
-
-#[test]
-fn u32checked_min_fail() {
-    let asm_op = "u32checked_min";
-
-    // should fail if either one of 2 inputs is out of bounds
-    test_inputs_out_of_bounds(asm_op, 2);
-}
-
-#[test]
-fn u32unchecked_min() {
-    let asm_op = "u32unchecked_min";
+fn u32min() {
+    let asm_op = "u32min";
 
     // should put the minimum of the 2 inputs on the stack
     test_min(asm_op);
@@ -307,24 +61,8 @@ fn u32unchecked_min() {
 }
 
 #[test]
-fn u32checked_max() {
-    let asm_op = "u32checked_max";
-
-    // should put the maximum of the 2 inputs on the stack
-    test_max(asm_op);
-}
-
-#[test]
-fn u32checked_max_fail() {
-    let asm_op = "u32checked_max";
-
-    // should fail if either one of 2 inputs is out of bounds
-    test_inputs_out_of_bounds(asm_op, 2);
-}
-
-#[test]
-fn u32unchecked_max() {
-    let asm_op = "u32unchecked_max";
+fn u32max() {
+    let asm_op = "u32max";
 
     // should put the maximum of the 2 inputs on the stack
     test_max(asm_op);
@@ -337,136 +75,72 @@ fn u32unchecked_max() {
 // ================================================================================================
 
 proptest! {
-    #[test]
-    fn u32checked_eq_proptest(a in any::<u32>(), b in any::<u32>()) {
-        let asm_op = "u32checked_eq";
-        let values = [b as u64, a as u64];
-
-        // should test for equality
-        let expected = if a == b { 1 } else { 0 };
-        // b provided via the stack
-        let test = build_op_test!(asm_op, &values);
-        test.prop_expect_stack(&[expected])?;
-
-        // b provided as a parameter
-        let asm_op = format!("{asm_op}.{b}");
-        let test = build_op_test!(&asm_op, &[a as u64]);
-        test.prop_expect_stack(&[expected])?;
-    }
-
-    #[test]
-    fn u32checked_neq_proptest(a in any::<u32>(), b in any::<u32>()) {
-        let asm_op = "u32checked_neq";
-        let values = [b as u64, a as u64];
-
-        // should test for inequality
-        let expected = if a != b { 1 } else { 0 };
-        // b provided via the stack
-        let test = build_op_test!(asm_op, &values);
-        test.prop_expect_stack(&[expected])?;
-
-        // b provided as a parameter
-        let asm_op = format!("{asm_op}.{b}");
-        let test = build_op_test!(&asm_op, &[a as u64]);
-        test.prop_expect_stack(&[expected])?;
-    }
-
     #[test]
     fn u32lt_proptest(a in any::<u32>(), b in any::<u32>()) {
-        let asm_op = "u32checked_lt";
         let expected = match a.cmp(&b) {
             Ordering::Less => 1,
             Ordering::Equal => 0,
             Ordering::Greater => 0,
         };
 
-        // checked and unchecked should produce the same result for valid values
-        let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[expected])?;
-
-        let asm_op = "u32unchecked_lt";
+        let asm_op = "u32lt";
         let test = build_op_test!(&asm_op, &[a as u64, b as u64]);
         test.prop_expect_stack(&[expected])?;
     }
 
     #[test]
     fn u32lte_proptest(a in any::<u32>(), b in any::<u32>()) {
-        let asm_op = "u32checked_lte";
         let expected = match a.cmp(&b) {
             Ordering::Less => 1,
             Ordering::Equal => 1,
             Ordering::Greater => 0,
         };
 
-        // checked and unchecked should produce the same result for valid values
-        let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[expected])?;
-
-        let asm_op = "u32unchecked_lte";
+        let asm_op = "u32lte";
         let test = build_op_test!(&asm_op, &[a as u64, b as u64]);
         test.prop_expect_stack(&[expected])?;
     }
 
     #[test]
     fn u32gt_proptest(a in any::<u32>(), b in any::<u32>()) {
-        let asm_op = "u32checked_gt";
         let expected = match a.cmp(&b) {
             Ordering::Less => 0,
             Ordering::Equal => 0,
             Ordering::Greater => 1,
         };
 
-        // checked and unchecked should produce the same result for valid values
-        let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[expected])?;
-
-        let asm_op = "u32unchecked_gt";
+        let asm_op = "u32gt";
         let test = build_op_test!(&asm_op, &[a as u64, b as u64]);
         test.prop_expect_stack(&[expected])?;
     }
 
     #[test]
     fn u32gte_proptest(a in any::<u32>(), b in any::<u32>()) {
-        let asm_op = "u32checked_gte";
         let expected = match a.cmp(&b) {
             Ordering::Less => 0,
             Ordering::Equal => 1,
             Ordering::Greater => 1,
         };
 
-        // checked and unchecked should produce the same result for valid values
-        let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[expected])?;
-
-        let asm_op = "u32unchecked_gte";
+        let asm_op = "u32gte";
         let test = build_op_test!(&asm_op, &[a as u64, b as u64]);
         test.prop_expect_stack(&[expected])?;
     }
 
     #[test]
     fn u32min_proptest(a in any::<u32>(), b in any::<u32>()) {
-        let asm_op = "u32checked_min";
         let expected = if a < b { a } else { b };
 
-        // checked and unchecked should produce the same result for valid values
-        let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[expected as u64])?;
-
-        let asm_op = "u32unchecked_min";
+        let asm_op = "u32min";
         let test = build_op_test!(&asm_op, &[a as u64, b as u64]);
         test.prop_expect_stack(&[expected as u64])?;
     }
 
     #[test]
     fn u32max_proptest(a in any::<u32>(), b in any::<u32>()) {
-        let asm_op = "u32checked_max";
         let expected = if a > b { a } else { b };
 
-        // checked and unchecked should produce the same result for valid values
-        let test = build_op_test!(asm_op, &[a as u64, b as u64]);
-        test.prop_expect_stack(&[expected as u64])?;
-
-        let asm_op = "u32unchecked_max";
+        let asm_op = "u32max";
         let test = build_op_test!(&asm_op, &[a as u64, b as u64]);
         test.prop_expect_stack(&[expected as u64])?;
     }
@@ -510,8 +184,8 @@ fn test_comparison_op(asm_op: &str, expected_lt: u64, expected_eq: u64, expected
     test.expect_stack(&[expected, c]);
 }
 
-/// Tests a u32min assembly operation (u32checked_min or u32unchecked_min) against a number of
-/// cases to ensure that the operation puts the minimum of 2 input values on the stack.
+/// Tests a u32min assembly operation against a number of cases to ensure that the operation puts
+/// the minimum of 2 input values on the stack.
 fn test_min(asm_op: &str) {
     // --- simple cases ---------------------------------------------------------------------------
     // a < b should put a on the stack
@@ -545,8 +219,8 @@ fn test_min(asm_op: &str) {
     test.expect_stack(&[expected as u64, c]);
 }
 
-/// Tests a u32max assembly operation (u32checked_max or u32unchecked_max) against a number of
-/// cases to ensure that the operation puts the maximum of 2 input values on the stack.
+/// Tests a u32max assembly operation against a number of cases to ensure that the operation puts
+/// the maximum of 2 input values on the stack.
 fn test_max(asm_op: &str) {
     // --- simple cases ---------------------------------------------------------------------------
     // a < b should put b on the stack
diff --git a/miden/tests/integration/operations/u32_ops/conversion_ops.rs b/miden/tests/integration/operations/u32_ops/conversion_ops.rs
index 1aa9c23dab..308440dd67 100644
--- a/miden/tests/integration/operations/u32_ops/conversion_ops.rs
+++ b/miden/tests/integration/operations/u32_ops/conversion_ops.rs
@@ -1,7 +1,8 @@
 use super::{prop_randw, test_inputs_out_of_bounds};
+use processor::ExecutionError;
 use test_utils::{
     build_op_test, proptest::prelude::*, rand::rand_value, Felt, StarkField, TestError, U32_BOUND,
-    WORD_SIZE,
+    WORD_SIZE, ZERO,
 };
 
 // U32 OPERATIONS TESTS - MANUAL - CONVERSIONS AND TESTS
@@ -91,7 +92,6 @@ fn u32assert() {
 fn u32assert_fail() {
     // assertion fails if a >= 2^32
     let asm_op = "u32assert";
-    let err = "NotU32Value";
 
     // vars to test
     let equal = 1_u64 << 32;
@@ -99,11 +99,17 @@ fn u32assert_fail() {
 
     // --- test when a = 2^32 ---------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[equal]);
-    test.expect_error(TestError::ExecutionError(err));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(
+        Felt::new(equal),
+        ZERO,
+    )));
 
     // --- test when a > 2^32 ---------------------------------------------------------------------
     let test = build_op_test!(asm_op, &[larger]);
-    test.expect_error(TestError::ExecutionError(err));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(
+        Felt::new(larger),
+        ZERO,
+    )));
 }
 
 #[test]
@@ -124,26 +130,34 @@ fn u32assert2() {
 #[test]
 fn u32assert2_fail() {
     let asm_op = "u32assert2";
-    let err = "NotU32Value";
 
     // vars to test
     // -------- Case 1: a > 2^32 and b > 2^32 ---------------------------------------------------
     let value_a = (1_u64 << 32) + 1;
     let value_b = value_a + 2;
     let test = build_op_test!(asm_op, &[value_a, value_b]);
-    test.expect_error(TestError::ExecutionError(err));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(
+        Felt::new(value_b),
+        ZERO,
+    )));
 
     // -------- Case 2: a > 2^32 and b < 2^32 ---------------------------------------------------
     let value_a = (1_u64 << 32) + 1;
     let value_b = 1_u64;
     let test = build_op_test!(asm_op, &[value_a, value_b]);
-    test.expect_error(TestError::ExecutionError(err));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(
+        Felt::new(value_a),
+        ZERO,
+    )));
 
     // --------- Case 3: a < 2^32 and b > 2^32 --------------------------------------------------
     let value_b = (1_u64 << 32) + 1;
     let value_a = 1_u64;
     let test = build_op_test!(asm_op, &[value_a, value_b]);
-    test.expect_error(TestError::ExecutionError(err));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(
+        Felt::new(value_b),
+        ZERO,
+    )));
 }
 
 #[test]
@@ -159,14 +173,16 @@ fn u32assertw() {
 fn u32assertw_fail() {
     // fails if any element in the word >= 2^32
     let asm_op = "u32assertw";
-    let err = "NotU32Value";
 
     // --- any one of the inputs inputs >= 2^32 (out of bounds) -----------------------------------
     test_inputs_out_of_bounds(asm_op, WORD_SIZE);
 
     // --- all elements out of range --------------------------------------------------------------
     let test = build_op_test!(asm_op, &[U32_BOUND; WORD_SIZE]);
-    test.expect_error(TestError::ExecutionError(err));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(
+        Felt::new(U32_BOUND),
+        ZERO,
+    )));
 }
 
 #[test]
diff --git a/miden/tests/integration/operations/u32_ops/mod.rs b/miden/tests/integration/operations/u32_ops/mod.rs
index 397ae1b95b..3cf6b08986 100644
--- a/miden/tests/integration/operations/u32_ops/mod.rs
+++ b/miden/tests/integration/operations/u32_ops/mod.rs
@@ -1,4 +1,5 @@
-use test_utils::{build_op_test, prop_randw, TestError, U32_BOUND};
+use processor::ExecutionError;
+use test_utils::{build_op_test, prop_randw, Felt, TestError, U32_BOUND, ZERO};
 
 mod arithmetic_ops;
 mod bitwise_ops;
@@ -12,7 +13,10 @@ mod conversion_ops;
 /// ensure that it fails when the input is >= 2^32.
 pub fn test_input_out_of_bounds(asm_op: &str) {
     let test = build_op_test!(asm_op, &[U32_BOUND]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(
+        Felt::new(U32_BOUND),
+        ZERO,
+    )));
 }
 
 /// This helper function tests a provided u32 assembly operation, which takes multiple inputs, to
@@ -26,18 +30,13 @@ pub fn test_inputs_out_of_bounds(asm_op: &str, input_count: usize) {
         i_inputs[i] = U32_BOUND;
 
         let test = build_op_test!(asm_op, &i_inputs);
-        test.expect_error(TestError::ExecutionError("NotU32Value"));
+        test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(
+            Felt::new(U32_BOUND),
+            ZERO,
+        )));
     }
 }
 
-/// This helper function tests a provided assembly operation which takes a single parameter
-/// to ensure that it fails when that parameter is over the maximum allowed value (out of bounds).
-pub fn test_param_out_of_bounds(asm_op_base: &str, gt_max_value: u64) {
-    let asm_op = format!("{asm_op_base}.{gt_max_value}");
-    let test = build_op_test!(&asm_op);
-    test.expect_error(TestError::AssemblyError("parameter"));
-}
-
 /// This helper function tests that when the given u32 assembly instruction is executed on
 /// out-of-bounds inputs it does not fail. Each input is tested independently.
 pub fn test_unchecked_execution(asm_op: &str, input_count: usize) {
diff --git a/processor/Cargo.toml b/processor/Cargo.toml
index 2ea01e56fe..d624167b6c 100644
--- a/processor/Cargo.toml
+++ b/processor/Cargo.toml
@@ -1,11 +1,12 @@
 [package]
 name = "miden-processor"
-version = "0.7.0"
+version = "0.8.0"
 description = "Miden VM processor"
 authors = ["miden contributors"]
 readme = "README.md"
 license = "MIT"
 repository = "https://github.com/0xPolygonMiden/miden-vm"
+documentation = "https://docs.rs/miden-processor/0.8.0"
 categories = ["emulators", "no-std"]
 keywords = ["miden", "virtual-machine"]
 edition = "2021"
@@ -18,19 +19,18 @@ doctest = false
 [features]
 concurrent = ["std", "winter-prover/concurrent"]
 default = ["std"]
-internals = []
-std = ["log/std", "vm-core/std", "winter-prover/std"]
-sve = ["std", "vm-core/sve"]
+internals = ["miden-air/internals"]
+std = ["vm-core/std", "winter-prover/std"]
 
 [dependencies]
-log = { version = "0.4", default-features = false, optional = true }
-vm-core = { package = "miden-core", path = "../core", version = "0.7", default-features = false }
-miden-air = { package = "miden-air", path = "../air", version = "0.7", default-features = false }
-winter-prover = { package = "winter-prover", version = "0.6", default-features = false }
+tracing = { version = "0.1", default-features = false, features = ["attributes"] }
+vm-core = { package = "miden-core", path = "../core", version = "0.8", default-features = false }
+miden-air = { package = "miden-air", path = "../air", version = "0.8", default-features = false }
+winter-prover = { package = "winter-prover", version = "0.8", default-features = false }
 
 [dev-dependencies]
-logtest = { version = "2.0", default-features = false  }
-miden-assembly = { package = "miden-assembly", path = "../assembly", version = "0.7", default-features = false }
+logtest = { version = "2.0", default-features = false }
+miden-assembly = { package = "miden-assembly", path = "../assembly", version = "0.8", default-features = false }
 test-utils = { package = "miden-test-utils", path = "../test-utils" }
-winter-fri = { package = "winter-fri", version = "0.6" }
-winter-utils = { package = "winter-utils", version = "0.6" }
+winter-fri = { package = "winter-fri", version = "0.8" }
+winter-utils = { package = "winter-utils", version = "0.8" }
diff --git a/processor/README.md b/processor/README.md
index d176d158e1..b0d9935619 100644
--- a/processor/README.md
+++ b/processor/README.md
@@ -62,7 +62,6 @@ A much more in-depth description of Miden VM design is available [here](https://
 Miden processor can be compiled with the following features:
 
 * `std` - enabled by default and relies on the Rust standard library.
-* `sve` - enables [SVE](https://en.wikipedia.org/wiki/AArch64#Scalable_Vector_Extension_(SVE))-based acceleration of the RPO hash function on supported platforms (e.g., Graviton 3).
 * `no_std` does not rely on the Rust standard library and enables compilation to WebAssembly.
 
 To compile with `no_std`, disable default features via `--no-default-features` flag.
diff --git a/processor/src/chiplets/aux_trace/bus.rs b/processor/src/chiplets/aux_trace/bus.rs
deleted file mode 100644
index 50d603572c..0000000000
--- a/processor/src/chiplets/aux_trace/bus.rs
+++ /dev/null
@@ -1,281 +0,0 @@
-use super::{
-    super::{hasher::HasherLookup, BitwiseLookup, KernelProcLookup, MemoryLookup},
-    BTreeMap, BusTraceBuilder, ColMatrix, Felt, FieldElement, LookupTableRow, Vec,
-};
-
-// CHIPLETS BUS
-// ================================================================================================
-
-/// The Chiplets bus tracks data requested from or provided by chiplets in the Chiplets module. It
-/// processes lookup requests from the stack & decoder and response data from the chiplets.
-///
-/// For correct execution, the lookup data used by the stack for each chiplet must be a permutation
-/// of the lookups executed by that chiplet so that they cancel out. This is ensured by the `b_chip`
-/// bus column. When the `b_chip` column is built, requests from the stack must be divided out and
-/// lookup results provided by the chiplets must be multiplied in. To ensure that all lookups are
-/// attributed to the correct chiplet and operation, a unique chiplet operation label must be
-/// included in the lookup row value when it is computed.
-
-#[derive(Default)]
-pub struct ChipletsBus {
-    lookup_hints: BTreeMap<u32, ChipletsBusRow>,
-    requests: Vec<ChipletLookup>,
-    responses: Vec<ChipletLookup>,
-    // TODO: remove queued requests by refactoring the hasher/decoder interactions so that the
-    // lookups are built as they are requested. This will be made easier by removing state info from
-    // the HasherLookup struct. Primarily it will require a refactor of `hash_span_block`,
-    // `start_span_block`, `respan`, and `end_span_block`.
-    queued_requests: Vec<HasherLookup>,
-}
-
-impl ChipletsBus {
-    // LOOKUP MUTATORS
-    // --------------------------------------------------------------------------------------------
-
-    /// Requests lookups for a single operation at the specified cycle. A Hasher operation request
-    /// can contain one or more lookups, while Bitwise and Memory requests will only contain a
-    /// single lookup.
-    fn request_lookups(&mut self, request_cycle: u32, request_indices: &mut Vec<u32>) {
-        self.lookup_hints
-            .entry(request_cycle)
-            .and_modify(|bus_row| {
-                bus_row.send_requests(request_indices);
-            })
-            .or_insert_with(|| ChipletsBusRow::new(request_indices, None));
-    }
-
-    /// Provides lookup data at the specified cycle, which is the row of the Chiplets execution
-    /// trace that contains this lookup row.
-    fn provide_lookup(&mut self, response_cycle: u32) {
-        let response_idx = self.responses.len() as u32;
-        self.lookup_hints
-            .entry(response_cycle)
-            .and_modify(|bus_row| {
-                bus_row.send_response(response_idx);
-            })
-            .or_insert_with(|| ChipletsBusRow::new(&[], Some(response_idx)));
-    }
-
-    // HASHER LOOKUPS
-    // --------------------------------------------------------------------------------------------
-
-    /// Requests lookups at the specified `cycle` for the initial row and result row of Hash
-    /// operations in the Hash Chiplet. This request is expected to originate from operation
-    /// executors requesting one or more hash operations for the Stack where all operation lookups
-    /// must be included at the same cycle. For simple permutations this will require 2 lookups,
-    /// while for a Merkle root update it will require 4, since two Hash operations are required.
-    pub(crate) fn request_hasher_operation(&mut self, lookups: &[HasherLookup], cycle: u32) {
-        debug_assert!(
-            lookups.len() == 2 || lookups.len() == 4,
-            "incorrect number of lookup rows for hasher operation request"
-        );
-        let mut request_indices = vec![0; lookups.len()];
-        for (idx, lookup) in lookups.iter().enumerate() {
-            request_indices[idx] = self.requests.len() as u32;
-            self.requests.push(ChipletLookup::Hasher(*lookup));
-        }
-        self.request_lookups(cycle, &mut request_indices);
-    }
-
-    /// Requests the specified lookup from the Hash Chiplet at the specified `cycle`. Single lookup
-    /// requests are expected to originate from the decoder during control block decoding. This
-    /// lookup can be for either the initial or the final row of the hash operation.
-    pub(crate) fn request_hasher_lookup(&mut self, lookup: HasherLookup, cycle: u32) {
-        self.request_lookups(cycle, &mut vec![self.requests.len() as u32]);
-        self.requests.push(ChipletLookup::Hasher(lookup));
-    }
-
-    /// Adds the request for the specified lookup to a queue from which it can be sent later when
-    /// the cycle of the request is known. Queued requests are expected to originate from the
-    /// decoder, since the hash is computed at the start of each control block (along with all
-    /// required lookups), but the decoder does not request intermediate and final lookups until the
-    /// end of the control block or until a `RESPAN`, in the case of `SPAN` blocks with more than
-    /// one operation batch.
-    pub(crate) fn enqueue_hasher_request(&mut self, lookup: HasherLookup) {
-        self.queued_requests.push(lookup);
-    }
-
-    /// Pops the top HasherLookup request off the queue and sends it to the bus. This request is
-    /// expected to originate from the decoder as it continues or finalizes control blocks with
-    /// `RESPAN` or `END`.
-    pub(crate) fn send_queued_hasher_request(&mut self, cycle: u32) {
-        let lookup = self.queued_requests.pop();
-        debug_assert!(lookup.is_some(), "no queued requests");
-
-        if let Some(lookup) = lookup {
-            self.request_hasher_lookup(lookup, cycle);
-        }
-    }
-
-    /// Provides the data of a hash chiplet operation contained in the [Hasher] table. The hash
-    /// lookup value is provided at cycle `response_cycle`, which is the row of the execution trace
-    /// that contains this Hasher row. It will always be either the first or last row of a Hasher
-    /// operation cycle.
-    pub(crate) fn provide_hasher_lookup(&mut self, lookup: HasherLookup, response_cycle: u32) {
-        self.provide_lookup(response_cycle);
-        self.responses.push(ChipletLookup::Hasher(lookup));
-    }
-
-    /// Provides multiple hash lookup values and their response cycles, which are the rows of the
-    /// execution trace which contains the corresponding hasher row for either the start or end of
-    /// a hasher operation cycle.
-    pub(crate) fn provide_hasher_lookups(&mut self, lookups: &[HasherLookup]) {
-        for lookup in lookups.iter() {
-            self.provide_hasher_lookup(*lookup, lookup.cycle());
-        }
-    }
-
-    // BITWISE LOOKUPS
-    // --------------------------------------------------------------------------------------------
-
-    /// Requests the specified bitwise lookup at the specified `cycle`. This request is expected to
-    /// originate from operation executors.
-    pub(crate) fn request_bitwise_operation(&mut self, lookup: BitwiseLookup, cycle: u32) {
-        self.request_lookups(cycle, &mut vec![self.requests.len() as u32]);
-        self.requests.push(ChipletLookup::Bitwise(lookup));
-    }
-
-    /// Provides the data of a bitwise operation contained in the [Bitwise] table. The bitwise value
-    /// is provided at cycle `response_cycle`, which is the row of the execution trace that contains
-    /// this Bitwise row. It will always be the final row of a Bitwise operation cycle.
-    pub(crate) fn provide_bitwise_operation(&mut self, lookup: BitwiseLookup, response_cycle: u32) {
-        self.provide_lookup(response_cycle);
-        self.responses.push(ChipletLookup::Bitwise(lookup));
-    }
-
-    // MEMORY LOOKUPS
-    // --------------------------------------------------------------------------------------------
-
-    /// Sends the specified memory access requests. There must be exactly one or two requests. The
-    /// requests are made at the specified `cycle` and are expected to originate from operation
-    /// executors.
-    pub(crate) fn request_memory_operation(&mut self, lookups: &[MemoryLookup], cycle: u32) {
-        debug_assert!(
-            lookups.len() == 1 || lookups.len() == 2,
-            "invalid number of requested memory operations"
-        );
-        let mut request_indices = vec![0; lookups.len()];
-        for (idx, lookup) in lookups.iter().enumerate() {
-            request_indices[idx] = self.requests.len() as u32;
-            self.requests.push(ChipletLookup::Memory(*lookup));
-        }
-        self.request_lookups(cycle, &mut request_indices);
-    }
-
-    /// Provides the data of the specified memory access. The memory access data is provided at
-    /// cycle `response_cycle`, which is the row of the execution trace that contains this Memory
-    /// row.
-    pub(crate) fn provide_memory_operation(&mut self, lookup: MemoryLookup, response_cycle: u32) {
-        self.provide_lookup(response_cycle);
-        self.responses.push(ChipletLookup::Memory(lookup));
-    }
-
-    // KERNEL ROM LOOKUPS
-    // --------------------------------------------------------------------------------------------
-
-    /// Requests the specified kernel procedure lookup at the specified `cycle`. This request is
-    /// expected to originate from operation executors.
-    pub(crate) fn request_kernel_proc_call(&mut self, lookup: KernelProcLookup, cycle: u32) {
-        self.request_lookups(cycle, &mut vec![self.requests.len() as u32]);
-        self.requests.push(ChipletLookup::KernelRom(lookup));
-    }
-
-    /// Provides a kernel procedure call contained in the [KernelRom] chiplet. The procedure access
-    /// is provided at cycle `response_cycle`, which is the row of the execution trace that contains
-    /// this [KernelRom] row.
-    pub(crate) fn provide_kernel_proc_call(
-        &mut self,
-        lookup: KernelProcLookup,
-        response_cycle: u32,
-    ) {
-        self.provide_lookup(response_cycle);
-        self.responses.push(ChipletLookup::KernelRom(lookup));
-    }
-
-    // AUX TRACE BUILDER GENERATION
-    // --------------------------------------------------------------------------------------------
-
-    /// Converts this [ChipletsBus] into an auxiliary trace builder which can be used to construct
-    /// the auxiliary trace column describing the [Chiplets] lookups at every cycle.
-    pub(crate) fn into_aux_builder(self) -> BusTraceBuilder {
-        let lookup_hints = self.lookup_hints.into_iter().collect();
-
-        BusTraceBuilder::new(lookup_hints, self.requests, self.responses)
-    }
-
-    // PUBLIC ACCESSORS
-    // --------------------------------------------------------------------------------------------
-
-    /// Returns an option with the lookup hint for the specified cycle.
-    #[cfg(test)]
-    pub(crate) fn get_lookup_hint(&self, cycle: u32) -> Option<&ChipletsBusRow> {
-        self.lookup_hints.get(&cycle)
-    }
-
-    /// Returns the ith lookup response provided by the Chiplets module.
-    #[cfg(test)]
-    pub(crate) fn get_response_row(&self, i: usize) -> ChipletLookup {
-        self.responses[i].clone()
-    }
-}
-
-// CHIPLETS LOOKUPS
-// ================================================================================================
-
-/// This represents all communication with the Chiplets Bus at a single cycle. Multiple requests can
-/// be sent to the bus in any given cycle, but only one response can be provided.
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct ChipletsBusRow {
-    requests: Vec<u32>,
-    response: Option<u32>,
-}
-
-impl ChipletsBusRow {
-    pub(crate) fn new(requests: &[u32], response: Option<u32>) -> Self {
-        ChipletsBusRow {
-            requests: requests.to_vec(),
-            response,
-        }
-    }
-
-    pub(super) fn requests(&self) -> &[u32] {
-        &self.requests
-    }
-
-    pub(super) fn response(&self) -> Option<u32> {
-        self.response
-    }
-
-    fn send_requests(&mut self, requests: &mut Vec<u32>) {
-        self.requests.append(requests);
-    }
-
-    fn send_response(&mut self, response: u32) {
-        debug_assert!(self.response.is_none(), "bus row already contains a response");
-        self.response = Some(response);
-    }
-}
-
-/// Data representing a single lookup row in one of the [Chiplets].
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub(crate) enum ChipletLookup {
-    Bitwise(BitwiseLookup),
-    Hasher(HasherLookup),
-    KernelRom(KernelProcLookup),
-    Memory(MemoryLookup),
-}
-
-impl LookupTableRow for ChipletLookup {
-    fn to_value<E: FieldElement<BaseField = Felt>>(
-        &self,
-        main_trace: &ColMatrix<Felt>,
-        alphas: &[E],
-    ) -> E {
-        match self {
-            ChipletLookup::Bitwise(row) => row.to_value(main_trace, alphas),
-            ChipletLookup::Hasher(row) => row.to_value(main_trace, alphas),
-            ChipletLookup::KernelRom(row) => row.to_value(main_trace, alphas),
-            ChipletLookup::Memory(row) => row.to_value(main_trace, alphas),
-        }
-    }
-}
diff --git a/processor/src/chiplets/aux_trace/mod.rs b/processor/src/chiplets/aux_trace/mod.rs
index 03d75f877b..7292da2c19 100644
--- a/processor/src/chiplets/aux_trace/mod.rs
+++ b/processor/src/chiplets/aux_trace/mod.rs
@@ -1,29 +1,55 @@
-use super::{
-    trace::{build_lookup_table_row_values, AuxColumnBuilder, LookupTableRow},
-    BTreeMap, ColMatrix, Felt, FieldElement, StarkField, Vec, Word,
+use super::{super::trace::AuxColumnBuilder, Felt, FieldElement};
+use crate::utils::collections::*;
+
+use miden_air::trace::{
+    chiplets::{
+        bitwise::OP_CYCLE_LEN as BITWISE_OP_CYCLE_LEN,
+        hasher::{
+            CAPACITY_LEN, DIGEST_RANGE, HASH_CYCLE_LEN, LINEAR_HASH_LABEL, MP_VERIFY_LABEL,
+            MR_UPDATE_NEW_LABEL, MR_UPDATE_OLD_LABEL, NUM_ROUNDS, RETURN_HASH_LABEL,
+            RETURN_STATE_LABEL, STATE_WIDTH,
+        },
+        kernel_rom::KERNEL_PROC_LABEL,
+        memory::{MEMORY_READ_LABEL, MEMORY_WRITE_LABEL},
+    },
+    main_trace::MainTrace,
 };
 
-mod bus;
-pub(crate) use bus::{ChipletLookup, ChipletsBus, ChipletsBusRow};
+use vm_core::{Operation, Word, ONE, ZERO};
 
-mod virtual_table;
-pub(crate) use virtual_table::{ChipletsVTableRow, ChipletsVTableUpdate};
+// CONSTANTS
+// ================================================================================================
 
-/// Contains all relevant information and describes how to construct the execution trace for
-/// chiplets-related auxiliary columns (used in multiset checks).
-pub struct AuxTraceBuilder {
-    bus_builder: BusTraceBuilder,
-    table_builder: ChipletsVTableTraceBuilder,
-}
+const JOIN: u8 = Operation::Join.op_code();
+const SPLIT: u8 = Operation::Split.op_code();
+const LOOP: u8 = Operation::Loop.op_code();
+const DYN: u8 = Operation::Dyn.op_code();
+const CALL: u8 = Operation::Call.op_code();
+const SYSCALL: u8 = Operation::SysCall.op_code();
+const SPAN: u8 = Operation::Span.op_code();
+const RESPAN: u8 = Operation::Respan.op_code();
+const END: u8 = Operation::End.op_code();
+const AND: u8 = Operation::U32and.op_code();
+const XOR: u8 = Operation::U32xor.op_code();
+const MLOADW: u8 = Operation::MLoadW.op_code();
+const MSTOREW: u8 = Operation::MStoreW.op_code();
+const MLOAD: u8 = Operation::MLoad.op_code();
+const MSTORE: u8 = Operation::MStore.op_code();
+const MSTREAM: u8 = Operation::MStream.op_code();
+const RCOMBBASE: u8 = Operation::RCombBase.op_code();
+const HPERM: u8 = Operation::HPerm.op_code();
+const MPVERIFY: u8 = Operation::MpVerify.op_code();
+const MRUPDATE: u8 = Operation::MrUpdate.op_code();
+const NUM_HEADER_ALPHAS: usize = 4;
 
-impl AuxTraceBuilder {
-    pub fn new(bus_builder: BusTraceBuilder, table_builder: ChipletsVTableTraceBuilder) -> Self {
-        Self {
-            bus_builder,
-            table_builder,
-        }
-    }
+// CHIPLETS AUXILIARY TRACE BUILDER
+// ================================================================================================
+
+/// Constructs the execution trace for chiplets-related auxiliary columns (used in multiset checks).
+#[derive(Default)]
+pub struct AuxTraceBuilder {}
 
+impl AuxTraceBuilder {
     // COLUMN TRACE CONSTRUCTOR
     // --------------------------------------------------------------------------------------------
 
@@ -32,11 +58,13 @@ impl AuxTraceBuilder {
     /// provided by chiplets in the Chiplets module.
     pub fn build_aux_columns<E: FieldElement<BaseField = Felt>>(
         &self,
-        main_trace: &ColMatrix<Felt>,
+        main_trace: &MainTrace,
         rand_elements: &[E],
     ) -> Vec<Vec<E>> {
-        let t_chip = self.table_builder.build_aux_column(main_trace, rand_elements);
-        let b_chip = self.bus_builder.build_aux_column(main_trace, rand_elements);
+        let v_table_col_builder = ChipletsVTableColBuilder::default();
+        let bus_col_builder = BusColumnBuilder::default();
+        let t_chip = v_table_col_builder.build_aux_column(main_trace, rand_elements);
+        let b_chip = bus_col_builder.build_aux_column(main_trace, rand_elements);
         vec![t_chip, b_chip]
     }
 }
@@ -45,203 +73,917 @@ impl AuxTraceBuilder {
 // ================================================================================================
 
 /// Describes how to construct the execution trace of the chiplets bus auxiliary trace column.
-pub struct BusTraceBuilder {
-    pub(super) lookup_hints: Vec<(u32, ChipletsBusRow)>,
-    pub(super) requests: Vec<ChipletLookup>,
-    pub(super) responses: Vec<ChipletLookup>,
-}
-
-impl BusTraceBuilder {
-    pub(crate) fn new(
-        lookup_hints: Vec<(u32, ChipletsBusRow)>,
-        requests: Vec<ChipletLookup>,
-        responses: Vec<ChipletLookup>,
-    ) -> Self {
-        Self {
-            lookup_hints,
-            requests,
-            responses,
-        }
-    }
-}
-
-impl AuxColumnBuilder<ChipletsBusRow, ChipletLookup, u32> for BusTraceBuilder {
-    /// This method is required, but because it is only called inside `build_row_values` which is
-    /// overridden below, it is not used here and should not be called.
-    fn get_table_rows(&self) -> &[ChipletLookup] {
-        unimplemented!()
-    }
+#[derive(Default)]
+pub struct BusColumnBuilder {}
 
-    /// Returns hints which describe the [Chiplets] lookup requests and responses during program
-    /// execution. Each update hint is accompanied by a clock cycle at which the update happened.
-    ///
-    /// Internally, each update hint also contains an index of the row into the full list of request
-    /// rows or response rows, depending on whether it is a request, a response, or both (in which
-    /// case it contains 2 indices).
-    fn get_table_hints(&self) -> &[(u32, ChipletsBusRow)] {
-        &self.lookup_hints
-    }
-
-    /// Returns the value by which the running product column should be multiplied for the provided
-    /// hint value.
-    fn get_multiplicand<E: FieldElement<BaseField = Felt>>(
-        &self,
-        hint: ChipletsBusRow,
-        row_values: &[E],
-        inv_row_values: &[E],
-    ) -> E {
-        let mut mult = if let Some(response_idx) = hint.response() {
-            row_values[response_idx as usize]
-        } else {
-            E::ONE
-        };
+impl<E: FieldElement<BaseField = Felt>> AuxColumnBuilder<E> for BusColumnBuilder {
+    /// Constructs the requests made by the VM-components to the chiplets at row i.
+    fn get_requests_at(&self, main_trace: &MainTrace, alphas: &[E], row: usize) -> E
+    where
+        E: FieldElement<BaseField = Felt>,
+    {
+        let op_code_felt = main_trace.get_op_code(row);
+        let op_code = op_code_felt.as_int() as u8;
 
-        for request_idx in hint.requests() {
-            mult *= inv_row_values[*request_idx as usize];
+        match op_code {
+            JOIN | SPLIT | LOOP | DYN | CALL => {
+                build_control_block_request(main_trace, op_code_felt, alphas, row)
+            }
+            SYSCALL => build_syscall_block_request(main_trace, op_code_felt, alphas, row),
+            SPAN => build_span_block_request(main_trace, alphas, row),
+            RESPAN => build_respan_block_request(main_trace, alphas, row),
+            END => build_end_block_request(main_trace, alphas, row),
+            AND => build_bitwise_request(main_trace, ZERO, alphas, row),
+            XOR => build_bitwise_request(main_trace, ONE, alphas, row),
+            MLOADW => build_mem_request_word(main_trace, MEMORY_READ_LABEL, alphas, row),
+            MSTOREW => build_mem_request_word(main_trace, MEMORY_WRITE_LABEL, alphas, row),
+            MLOAD => build_mem_request_element(main_trace, MEMORY_READ_LABEL, alphas, row),
+            MSTORE => build_mem_request_element(main_trace, MEMORY_WRITE_LABEL, alphas, row),
+            MSTREAM => build_mstream_request(main_trace, alphas, row),
+            RCOMBBASE => build_rcomb_base_request(main_trace, alphas, row),
+            HPERM => build_hperm_request(main_trace, alphas, row),
+            MPVERIFY => build_mpverify_request(main_trace, alphas, row),
+            MRUPDATE => build_mrupdate_request(main_trace, alphas, row),
+            _ => E::ONE,
         }
-
-        mult
     }
 
-    /// Build the row values and inverse values used to build the auxiliary column.
-    ///
-    /// The row values to be included come from the responses and the inverse values come from
-    /// requests. Since responses are grouped by chiplet, the operation order for the requests and
-    /// responses will be permutations of each other rather than sharing the same order. Therefore,
-    /// the `row_values` and `inv_row_values` must be built separately.
-    fn build_row_values<E>(&self, main_trace: &ColMatrix<Felt>, alphas: &[E]) -> (Vec<E>, Vec<E>)
+    /// Constructs the responses from the chiplets to the other VM-components at row i.
+    fn get_responses_at(&self, main_trace: &MainTrace, alphas: &[E], row: usize) -> E
     where
         E: FieldElement<BaseField = Felt>,
     {
-        // get the row values from the resonse rows
-        let row_values = self
-            .responses
-            .iter()
-            .map(|response| response.to_value(main_trace, alphas))
-            .collect();
-        // get the inverse values from the request rows
-        let (_, inv_row_values) = build_lookup_table_row_values(&self.requests, main_trace, alphas);
-
-        (row_values, inv_row_values)
+        let selector0 = main_trace.chiplet_selector_0(row);
+        let selector1 = main_trace.chiplet_selector_1(row);
+        let selector2 = main_trace.chiplet_selector_2(row);
+        let selector3 = main_trace.chiplet_selector_3(row);
+        let selector4 = main_trace.chiplet_selector_4(row);
+
+        if selector0 == ZERO {
+            build_hasher_chiplet_responses(main_trace, row, alphas, selector1, selector2, selector3)
+        } else if selector1 == ZERO {
+            debug_assert_eq!(selector0, ONE);
+            build_bitwise_chiplet_responses(main_trace, row, selector2, alphas)
+        } else if selector2 == ZERO {
+            debug_assert_eq!(selector0, ONE);
+            debug_assert_eq!(selector1, ONE);
+            build_memory_chiplet_responses(main_trace, row, selector3, alphas)
+        } else if selector3 == ZERO {
+            debug_assert_eq!(selector0, ONE);
+            debug_assert_eq!(selector1, ONE);
+            debug_assert_eq!(selector2, ONE);
+            build_kernel_chiplet_responses(main_trace, row, selector4, alphas)
+                * build_kernel_procedure_table_responses(main_trace, row, alphas)
+        } else {
+            debug_assert_eq!(selector0, ONE);
+            debug_assert_eq!(selector1, ONE);
+            debug_assert_eq!(selector2, ONE);
+            debug_assert_eq!(selector3, ONE);
+            E::ONE
+        }
     }
 }
 
 // VIRTUAL TABLE TRACE BUILDER
 // ================================================================================================
 
-/// Describes how to construct the execution trace of the chiplets virtual table, used to manage
-/// internal updates and data required by the chiplets.
-///
-/// This manages construction of a single column which first represents the state of the sibling
-/// table (used in Merkle root update computation), and then is subsequently used to represent the
-/// procedures contained in the kernel ROM. Thus, it is expected that the initial value is ONE, the
-/// value after all sibling table updates are completed is again ONE, and the value at the end of
-/// the trace is the product of the representations of the kernel ROM procedures.
-#[derive(Debug, Clone, Default)]
-pub struct ChipletsVTableTraceBuilder {
-    pub(super) hints: Vec<(u32, ChipletsVTableUpdate)>,
-    pub(super) rows: Vec<ChipletsVTableRow>,
-}
-
-impl ChipletsVTableTraceBuilder {
-    // STATE MUTATORS
-    // --------------------------------------------------------------------------------------------
+/// Describes how to construct the execution trace of the chiplets virtual table auxiliary trace
+/// column.
+#[derive(Default)]
+pub struct ChipletsVTableColBuilder {}
 
-    /// Specifies that an entry for the provided sibling was added to the chiplets virtual table at
-    /// the specified step.
-    ///
-    /// It is assumed that the table is empty or contains only sibling entries at this point and has
-    /// not been used for any other chiplet updates.
-    pub fn sibling_added(&mut self, step: u32, index: Felt, sibling: Word) {
-        let row_index = self.rows.len();
-        let update = ChipletsVTableUpdate::SiblingAdded(row_index as u32);
-        self.hints.push((step, update));
-        self.rows.push(ChipletsVTableRow::new_sibling(index, sibling));
+impl<E: FieldElement<BaseField = Felt>> AuxColumnBuilder<E> for ChipletsVTableColBuilder {
+    fn get_requests_at(&self, main_trace: &MainTrace, alphas: &[E], row: usize) -> E {
+        chiplets_vtable_remove_sibling(main_trace, alphas, row)
     }
 
-    /// Specifies that an entry for a sibling was removed from the chiplets virtual table. The entry
-    /// is defined by the provided offset. For example, if row_offset = 2, the second from the last
-    /// entry was removed from the table.
-    ///
-    /// It is assumed that the table contains only sibling entries at this point and has not been
-    /// used for any other chiplet updates.
-    pub fn sibling_removed(&mut self, step: u32, row_offset: usize) {
-        let row_index = self.rows.len() - row_offset - 1;
-        let update = ChipletsVTableUpdate::SiblingRemoved(row_index as u32);
-        self.hints.push((step, update));
+    fn get_responses_at(&self, main_trace: &MainTrace, alphas: &[E], row: usize) -> E {
+        chiplets_vtable_add_sibling(main_trace, alphas, row)
+            * chiplets_kernel_table_include(main_trace, alphas, row)
     }
+}
 
-    /// Specifies a kernel procedure that must be added to the virtual table.
-    ///
-    /// It is assumed that kernel procedures will only be added after all sibling updates have been
-    /// completed.
-    pub fn add_kernel_proc(&mut self, step: u32, addr: Felt, proc_hash: Word) {
-        let proc_index = self.rows.len();
-        let update = ChipletsVTableUpdate::KernelProcAdded(proc_index as u32);
-        self.hints.push((step, update));
-        self.rows.push(ChipletsVTableRow::new_kernel_proc(addr, proc_hash));
-    }
+// CHIPLETS VIRTUAL TABLE REQUESTS
+// ================================================================================================
 
-    // TEST HELPERS
-    // --------------------------------------------------------------------------------------------
-    #[cfg(test)]
-    pub fn hints(&self) -> &[(u32, ChipletsVTableUpdate)] {
-        &self.hints
-    }
+/// Constructs the inclusions to the table when the hasher absorbs a new sibling node while
+/// computing the old Merkle root.
+fn chiplets_vtable_add_sibling<E>(main_trace: &MainTrace, alphas: &[E], row: usize) -> E
+where
+    E: FieldElement<BaseField = Felt>,
+{
+    let f_mv: bool = main_trace.f_mv(row);
+    let f_mva: bool = if row == 0 { false } else { main_trace.f_mva(row - 1) };
 
-    #[cfg(test)]
-    pub fn rows(&self) -> &[ChipletsVTableRow] {
-        &self.rows
+    if f_mv || f_mva {
+        let index = if f_mva {
+            main_trace.chiplet_node_index(row - 1)
+        } else {
+            main_trace.chiplet_node_index(row)
+        };
+        let lsb = index.as_int() & 1;
+        if lsb == 0 {
+            let sibling = &main_trace.chiplet_hasher_state(row)[DIGEST_RANGE.end..];
+            alphas[0]
+                + alphas[3].mul_base(index)
+                + alphas[12].mul_base(sibling[0])
+                + alphas[13].mul_base(sibling[1])
+                + alphas[14].mul_base(sibling[2])
+                + alphas[15].mul_base(sibling[3])
+        } else {
+            let sibling = &main_trace.chiplet_hasher_state(row)[DIGEST_RANGE];
+            alphas[0]
+                + alphas[3].mul_base(index)
+                + alphas[8].mul_base(sibling[0])
+                + alphas[9].mul_base(sibling[1])
+                + alphas[10].mul_base(sibling[2])
+                + alphas[11].mul_base(sibling[3])
+        }
+    } else {
+        E::ONE
     }
 }
 
-impl AuxColumnBuilder<ChipletsVTableUpdate, ChipletsVTableRow, u32> for ChipletsVTableTraceBuilder {
-    /// Returns a list of rows which were added to and then removed from the chiplets virtual table.
-    ///
-    /// The order of the rows in the list is the same as the order in which the rows were added to
-    /// the table.
-    fn get_table_rows(&self) -> &[ChipletsVTableRow] {
-        &self.rows
+/// Constructs the removals from the table when the hasher absorbs a new sibling node while
+/// computing the new Merkle root.
+fn chiplets_vtable_remove_sibling<E>(main_trace: &MainTrace, alphas: &[E], row: usize) -> E
+where
+    E: FieldElement<BaseField = Felt>,
+{
+    let f_mu: bool = main_trace.f_mu(row);
+    let f_mua: bool = if row == 0 { false } else { main_trace.f_mua(row - 1) };
+
+    if f_mu || f_mua {
+        let index = if f_mua {
+            main_trace.chiplet_node_index(row - 1)
+        } else {
+            main_trace.chiplet_node_index(row)
+        };
+        let lsb = index.as_int() & 1;
+        if lsb == 0 {
+            let sibling = &main_trace.chiplet_hasher_state(row)[DIGEST_RANGE.end..];
+            alphas[0]
+                + alphas[3].mul_base(index)
+                + alphas[12].mul_base(sibling[0])
+                + alphas[13].mul_base(sibling[1])
+                + alphas[14].mul_base(sibling[2])
+                + alphas[15].mul_base(sibling[3])
+        } else {
+            let sibling = &main_trace.chiplet_hasher_state(row)[DIGEST_RANGE];
+            alphas[0]
+                + alphas[3].mul_base(index)
+                + alphas[8].mul_base(sibling[0])
+                + alphas[9].mul_base(sibling[1])
+                + alphas[10].mul_base(sibling[2])
+                + alphas[11].mul_base(sibling[3])
+        }
+    } else {
+        E::ONE
     }
+}
 
-    /// Returns hints which describe how the chiplets virtual table was updated during program
-    /// execution. Each update hint is accompanied by a clock cycle at which the update happened.
-    ///
-    /// Internally, each update hint also contains an index of the row into the full list of rows
-    /// which was either added or removed.
-    fn get_table_hints(&self) -> &[(u32, ChipletsVTableUpdate)] {
-        &self.hints
+/// Constructs the inclusions to the kernel procedure table.
+fn chiplets_kernel_table_include<E>(main_trace: &MainTrace, alphas: &[E], row: usize) -> E
+where
+    E: FieldElement<BaseField = Felt>,
+{
+    if main_trace.is_kernel_row(row) && main_trace.is_addr_change(row) {
+        alphas[0]
+            + alphas[1].mul_base(main_trace.addr(row))
+            + alphas[2].mul_base(main_trace.chiplet_kernel_root_0(row))
+            + alphas[3].mul_base(main_trace.chiplet_kernel_root_1(row))
+            + alphas[4].mul_base(main_trace.chiplet_kernel_root_2(row))
+            + alphas[5].mul_base(main_trace.chiplet_kernel_root_3(row))
+    } else {
+        E::ONE
     }
+}
 
-    /// Returns the value by which the running product column should be multiplied for the provided
-    /// hint value.
-    fn get_multiplicand<E: FieldElement<BaseField = Felt>>(
-        &self,
-        hint: ChipletsVTableUpdate,
-        row_values: &[E],
-        inv_row_values: &[E],
-    ) -> E {
-        match hint {
-            ChipletsVTableUpdate::SiblingAdded(inserted_row_idx) => {
-                row_values[inserted_row_idx as usize]
-            }
-            ChipletsVTableUpdate::SiblingRemoved(removed_row_idx) => {
-                inv_row_values[removed_row_idx as usize]
-            }
-            ChipletsVTableUpdate::KernelProcAdded(idx) => row_values[idx as usize],
+// CHIPLETS REQUESTS
+// ================================================================================================
+
+/// Builds requests made to the hasher chiplet at the start of a control block.
+fn build_control_block_request<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    op_code_felt: Felt,
+    alphas: &[E],
+    row: usize,
+) -> E {
+    let op_label = LINEAR_HASH_LABEL;
+    let addr_nxt = main_trace.addr(row + 1);
+    let first_cycle_row = addr_to_row_index(addr_nxt) % HASH_CYCLE_LEN == 0;
+    let transition_label = if first_cycle_row { op_label + 16 } else { op_label + 32 };
+
+    let header =
+        alphas[0] + alphas[1].mul_base(Felt::from(transition_label)) + alphas[2].mul_base(addr_nxt);
+
+    let state = main_trace.decoder_hasher_state(row);
+
+    header + build_value(&alphas[8..16], &state) + alphas[5].mul_base(op_code_felt)
+}
+
+/// Builds requests made to kernel ROM chiplet when initializing a syscall block.
+fn build_syscall_block_request<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    op_code_felt: Felt,
+    alphas: &[E],
+    row: usize,
+) -> E {
+    let factor1 = build_control_block_request(main_trace, op_code_felt, alphas, row);
+
+    let op_label = KERNEL_PROC_LABEL;
+    let state = main_trace.decoder_hasher_state(row);
+    let factor2 = alphas[0]
+        + alphas[1].mul_base(op_label)
+        + alphas[2].mul_base(state[0])
+        + alphas[3].mul_base(state[1])
+        + alphas[4].mul_base(state[2])
+        + alphas[5].mul_base(state[3]);
+
+    factor1 * factor2
+}
+
+/// Builds requests made to the hasher chiplet at the start of a span block.
+fn build_span_block_request<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    alphas: &[E],
+    row: usize,
+) -> E {
+    let op_label = LINEAR_HASH_LABEL;
+    let addr_nxt = main_trace.addr(row + 1);
+    let first_cycle_row = addr_to_row_index(addr_nxt) % HASH_CYCLE_LEN == 0;
+    let transition_label = if first_cycle_row { op_label + 16 } else { op_label + 32 };
+
+    let header =
+        alphas[0] + alphas[1].mul_base(Felt::from(transition_label)) + alphas[2].mul_base(addr_nxt);
+
+    let state = main_trace.decoder_hasher_state(row);
+
+    header + build_value(&alphas[8..16], &state)
+}
+
+/// Builds requests made to the hasher chiplet at the start of a respan block.
+fn build_respan_block_request<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    alphas: &[E],
+    row: usize,
+) -> E {
+    let op_label = LINEAR_HASH_LABEL;
+    let addr_nxt = main_trace.addr(row + 1);
+
+    let first_cycle_row = addr_to_row_index(addr_nxt - ONE) % HASH_CYCLE_LEN == 0;
+    let transition_label = if first_cycle_row { op_label + 16 } else { op_label + 32 };
+
+    let header = alphas[0]
+        + alphas[1].mul_base(Felt::from(transition_label))
+        + alphas[2].mul_base(addr_nxt - ONE)
+        + alphas[3].mul_base(ZERO);
+
+    let state = &main_trace.chiplet_hasher_state(row - 2)[CAPACITY_LEN..];
+    let state_nxt = &main_trace.chiplet_hasher_state(row - 1)[CAPACITY_LEN..];
+
+    header + build_value(&alphas[8..16], state_nxt) - build_value(&alphas[8..16], state)
+}
+
+/// Builds requests made to the hasher chiplet at the end of a block.
+fn build_end_block_request<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    alphas: &[E],
+    row: usize,
+) -> E {
+    let op_label = RETURN_HASH_LABEL;
+    let addr = main_trace.addr(row) + Felt::from(NUM_ROUNDS as u8);
+
+    let first_cycle_row = addr_to_row_index(addr) % HASH_CYCLE_LEN == 0;
+    let transition_label = if first_cycle_row { op_label + 16 } else { op_label + 32 };
+
+    let header =
+        alphas[0] + alphas[1].mul_base(Felt::from(transition_label)) + alphas[2].mul_base(addr);
+
+    let state = main_trace.decoder_hasher_state(row);
+    let digest = &state[..4];
+
+    header + build_value(&alphas[8..12], digest)
+}
+
+/// Builds requests made to the bitwise chiplet. This can be either a request for the computation
+/// of a `XOR` or an `AND` operation.
+fn build_bitwise_request<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    is_xor: Felt,
+    alphas: &[E],
+    row: usize,
+) -> E {
+    let op_label = get_op_label(ONE, ZERO, is_xor, ZERO);
+    let a = main_trace.stack_element(1, row);
+    let b = main_trace.stack_element(0, row);
+    let z = main_trace.stack_element(0, row + 1);
+
+    alphas[0]
+        + alphas[1].mul_base(op_label)
+        + alphas[2].mul_base(a)
+        + alphas[3].mul_base(b)
+        + alphas[4].mul_base(z)
+}
+
+/// Builds `MLOAD` and `MSTORE` requests made to the memory chiplet.
+fn build_mem_request_element<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    op_label: u8,
+    alphas: &[E],
+    row: usize,
+) -> E {
+    let word = [
+        main_trace.stack_element(0, row + 1),
+        main_trace.helper_register(2, row),
+        main_trace.helper_register(1, row),
+        main_trace.helper_register(0, row),
+    ];
+    let addr = main_trace.stack_element(0, row);
+
+    compute_memory_request(main_trace, op_label, alphas, row, addr, word)
+}
+
+/// Builds `MLOADW` and `MSTOREW` requests made to the memory chiplet.
+fn build_mem_request_word<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    op_label: u8,
+    alphas: &[E],
+    row: usize,
+) -> E {
+    let word = [
+        main_trace.stack_element(3, row + 1),
+        main_trace.stack_element(2, row + 1),
+        main_trace.stack_element(1, row + 1),
+        main_trace.stack_element(0, row + 1),
+    ];
+    let addr = main_trace.stack_element(0, row);
+
+    compute_memory_request(main_trace, op_label, alphas, row, addr, word)
+}
+
+/// Builds `MSTREAM` requests made to the memory chiplet.
+fn build_mstream_request<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    alphas: &[E],
+    row: usize,
+) -> E {
+    let word1 = [
+        main_trace.stack_element(7, row + 1),
+        main_trace.stack_element(6, row + 1),
+        main_trace.stack_element(5, row + 1),
+        main_trace.stack_element(4, row + 1),
+    ];
+    let word2 = [
+        main_trace.stack_element(3, row + 1),
+        main_trace.stack_element(2, row + 1),
+        main_trace.stack_element(1, row + 1),
+        main_trace.stack_element(0, row + 1),
+    ];
+    let addr = main_trace.stack_element(12, row);
+    let op_label = MEMORY_READ_LABEL;
+
+    let factor1 = compute_memory_request(main_trace, op_label, alphas, row, addr, word1);
+    let factor2 = compute_memory_request(main_trace, op_label, alphas, row, addr + ONE, word2);
+
+    factor1 * factor2
+}
+
+/// Builds `RCOMBBASE` requests made to the memory chiplet.
+fn build_rcomb_base_request<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    alphas: &[E],
+    row: usize,
+) -> E {
+    let tz0 = main_trace.helper_register(0, row);
+    let tz1 = main_trace.helper_register(1, row);
+    let tzg0 = main_trace.helper_register(2, row);
+    let tzg1 = main_trace.helper_register(3, row);
+    let a0 = main_trace.helper_register(4, row);
+    let a1 = main_trace.helper_register(5, row);
+    let z_ptr = main_trace.stack_element(13, row);
+    let a_ptr = main_trace.stack_element(14, row);
+    let op_label = MEMORY_READ_LABEL;
+
+    let factor1 =
+        compute_memory_request(main_trace, op_label, alphas, row, z_ptr, [tz0, tz1, tzg0, tzg1]);
+    let factor2 =
+        compute_memory_request(main_trace, op_label, alphas, row, a_ptr, [a0, a1, ZERO, ZERO]);
+
+    factor1 * factor2
+}
+
+/// Builds `HPERM` requests made to the hash chiplet.
+fn build_hperm_request<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    alphas: &[E],
+    row: usize,
+) -> E {
+    let helper_0 = main_trace.helper_register(0, row);
+
+    let s0_s12_cur = [
+        main_trace.stack_element(0, row),
+        main_trace.stack_element(1, row),
+        main_trace.stack_element(2, row),
+        main_trace.stack_element(3, row),
+        main_trace.stack_element(4, row),
+        main_trace.stack_element(5, row),
+        main_trace.stack_element(6, row),
+        main_trace.stack_element(7, row),
+        main_trace.stack_element(8, row),
+        main_trace.stack_element(9, row),
+        main_trace.stack_element(10, row),
+        main_trace.stack_element(11, row),
+    ];
+
+    let s0_s12_nxt = [
+        main_trace.stack_element(0, row + 1),
+        main_trace.stack_element(1, row + 1),
+        main_trace.stack_element(2, row + 1),
+        main_trace.stack_element(3, row + 1),
+        main_trace.stack_element(4, row + 1),
+        main_trace.stack_element(5, row + 1),
+        main_trace.stack_element(6, row + 1),
+        main_trace.stack_element(7, row + 1),
+        main_trace.stack_element(8, row + 1),
+        main_trace.stack_element(9, row + 1),
+        main_trace.stack_element(10, row + 1),
+        main_trace.stack_element(11, row + 1),
+    ];
+
+    let op_label = LINEAR_HASH_LABEL;
+    let op_label = if addr_to_hash_cycle(helper_0) == 0 {
+        op_label + 16
+    } else {
+        op_label + 32
+    };
+
+    let sum_input = alphas[4..16]
+        .iter()
+        .rev()
+        .enumerate()
+        .fold(E::ZERO, |acc, (i, x)| acc + x.mul_base(s0_s12_cur[i]));
+    let v_input = alphas[0]
+        + alphas[1].mul_base(Felt::from(op_label))
+        + alphas[2].mul_base(helper_0)
+        + sum_input;
+
+    let op_label = RETURN_STATE_LABEL;
+    let op_label = if addr_to_hash_cycle(helper_0 + Felt::new(7)) == 0 {
+        op_label + 16
+    } else {
+        op_label + 32
+    };
+
+    let sum_output = alphas[4..16]
+        .iter()
+        .rev()
+        .enumerate()
+        .fold(E::ZERO, |acc, (i, x)| acc + x.mul_base(s0_s12_nxt[i]));
+    let v_output = alphas[0]
+        + alphas[1].mul_base(Felt::from(op_label))
+        + alphas[2].mul_base(helper_0 + Felt::new(7))
+        + sum_output;
+
+    v_input * v_output
+}
+
+/// Builds `MPVERIFY` requests made to the hash chiplet.
+fn build_mpverify_request<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    alphas: &[E],
+    row: usize,
+) -> E {
+    let helper_0 = main_trace.helper_register(0, row);
+
+    let s0_s3 = [
+        main_trace.stack_element(0, row),
+        main_trace.stack_element(1, row),
+        main_trace.stack_element(2, row),
+        main_trace.stack_element(3, row),
+    ];
+    let s4 = main_trace.stack_element(4, row);
+    let s5 = main_trace.stack_element(5, row);
+    let s6_s9 = [
+        main_trace.stack_element(6, row),
+        main_trace.stack_element(7, row),
+        main_trace.stack_element(8, row),
+        main_trace.stack_element(9, row),
+    ];
+
+    let op_label = MP_VERIFY_LABEL;
+    let op_label = if addr_to_hash_cycle(helper_0) == 0 {
+        op_label + 16
+    } else {
+        op_label + 32
+    };
+
+    let sum_input = alphas[8..12]
+        .iter()
+        .rev()
+        .enumerate()
+        .fold(E::ZERO, |acc, (i, x)| acc + x.mul_base(s0_s3[i]));
+
+    let v_input = alphas[0]
+        + alphas[1].mul_base(Felt::from(op_label))
+        + alphas[2].mul_base(helper_0)
+        + alphas[3].mul_base(s5)
+        + sum_input;
+
+    let op_label = RETURN_HASH_LABEL;
+    let op_label = if (helper_0).as_int() % 8 == 0 {
+        op_label + 16
+    } else {
+        op_label + 32
+    };
+
+    let sum_output = alphas[8..12]
+        .iter()
+        .rev()
+        .enumerate()
+        .fold(E::ZERO, |acc, (i, x)| acc + x.mul_base(s6_s9[i]));
+    let v_output = alphas[0]
+        + alphas[1].mul_base(Felt::from(op_label))
+        + alphas[2].mul_base(helper_0 + s4.mul_small(8) - ONE)
+        + sum_output;
+
+    v_input * v_output
+}
+
+/// Builds `MRUPDATE` requests made to the hash chiplet.
+fn build_mrupdate_request<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    alphas: &[E],
+    row: usize,
+) -> E {
+    let helper_0 = main_trace.helper_register(0, row);
+
+    let s0_s3 = [
+        main_trace.stack_element(0, row),
+        main_trace.stack_element(1, row),
+        main_trace.stack_element(2, row),
+        main_trace.stack_element(3, row),
+    ];
+    let s0_s3_nxt = [
+        main_trace.stack_element(0, row + 1),
+        main_trace.stack_element(1, row + 1),
+        main_trace.stack_element(2, row + 1),
+        main_trace.stack_element(3, row + 1),
+    ];
+    let s4 = main_trace.stack_element(4, row);
+    let s5 = main_trace.stack_element(5, row);
+    let s6_s9 = [
+        main_trace.stack_element(6, row),
+        main_trace.stack_element(7, row),
+        main_trace.stack_element(8, row),
+        main_trace.stack_element(9, row),
+    ];
+    let s10_s13 = [
+        main_trace.stack_element(10, row),
+        main_trace.stack_element(11, row),
+        main_trace.stack_element(12, row),
+        main_trace.stack_element(13, row),
+    ];
+
+    let op_label = MR_UPDATE_OLD_LABEL;
+    let op_label = if addr_to_hash_cycle(helper_0) == 0 {
+        op_label + 16
+    } else {
+        op_label + 32
+    };
+
+    let sum_input = alphas[8..12]
+        .iter()
+        .rev()
+        .enumerate()
+        .fold(E::ZERO, |acc, (i, x)| acc + x.mul_base(s0_s3[i]));
+    let v_input_old = alphas[0]
+        + alphas[1].mul_base(Felt::from(op_label))
+        + alphas[2].mul_base(helper_0)
+        + alphas[3].mul_base(s5)
+        + sum_input;
+
+    let op_label = RETURN_HASH_LABEL;
+    let op_label = if addr_to_hash_cycle(helper_0 + s4.mul_small(8) - ONE) == 0 {
+        op_label + 16
+    } else {
+        op_label + 32
+    };
+
+    let sum_output = alphas[8..12]
+        .iter()
+        .rev()
+        .enumerate()
+        .fold(E::ZERO, |acc, (i, x)| acc + x.mul_base(s6_s9[i]));
+    let v_output_old = alphas[0]
+        + alphas[1].mul_base(Felt::from(op_label))
+        + alphas[2].mul_base(helper_0 + s4.mul_small(8) - ONE)
+        + sum_output;
+
+    let op_label = MR_UPDATE_NEW_LABEL;
+    let op_label = if addr_to_hash_cycle(helper_0 + s4.mul_small(8)) == 0 {
+        op_label + 16
+    } else {
+        op_label + 32
+    };
+    let sum_input = alphas[8..12]
+        .iter()
+        .rev()
+        .enumerate()
+        .fold(E::ZERO, |acc, (i, x)| acc + x.mul_base(s10_s13[i]));
+    let v_input_new = alphas[0]
+        + alphas[1].mul_base(Felt::from(op_label))
+        + alphas[2].mul_base(helper_0 + s4.mul_small(8))
+        + alphas[3].mul_base(s5)
+        + sum_input;
+
+    let op_label = RETURN_HASH_LABEL;
+    let op_label = if addr_to_hash_cycle(helper_0 + s4.mul_small(16) - ONE) == 0 {
+        op_label + 16
+    } else {
+        op_label + 32
+    };
+
+    let sum_output = alphas[8..12]
+        .iter()
+        .rev()
+        .enumerate()
+        .fold(E::ZERO, |acc, (i, x)| acc + x.mul_base(s0_s3_nxt[i]));
+    let v_output_new = alphas[0]
+        + alphas[1].mul_base(Felt::from(op_label))
+        + alphas[2].mul_base(helper_0 + s4.mul_small(16) - ONE)
+        + sum_output;
+
+    v_input_new * v_input_old * v_output_new * v_output_old
+}
+
+// CHIPLETS RESPONSES
+// ================================================================================================
+
+/// Builds the response from the hasher chiplet at `row`.
+fn build_hasher_chiplet_responses<E>(
+    main_trace: &MainTrace,
+    // TODO: change type of the `row` variable to `u32`
+    row: usize,
+    alphas: &[E],
+    selector1: Felt,
+    selector2: Felt,
+    selector3: Felt,
+) -> E
+where
+    E: FieldElement<BaseField = Felt>,
+{
+    let mut multiplicand = E::ONE;
+    let selector0 = main_trace.chiplet_selector_0(row);
+    let op_label = get_op_label(selector0, selector1, selector2, selector3);
+
+    // f_bp, f_mp, f_mv or f_mu == 1
+    if row % HASH_CYCLE_LEN == 0 {
+        let state = main_trace.chiplet_hasher_state(row);
+        let alphas_state = &alphas[NUM_HEADER_ALPHAS..(NUM_HEADER_ALPHAS + STATE_WIDTH)];
+        let node_index = main_trace.chiplet_node_index(row);
+        let transition_label = op_label + Felt::from(16_u8);
+
+        // f_bp == 1
+        // v_all = v_h + v_a + v_b + v_c
+        if selector1 == ONE && selector2 == ZERO && selector3 == ZERO {
+            let header = alphas[0]
+                + alphas[1].mul_base(transition_label)
+                + alphas[2].mul_base(Felt::from((row + 1) as u32))
+                + alphas[3].mul_base(node_index);
+
+            multiplicand = header + build_value(alphas_state, &state);
+        }
+
+        // f_mp or f_mv or f_mu == 1
+        // v_leaf = v_h + (1 - b) * v_b + b * v_d
+        if selector1 == ONE && !(selector2 == ZERO && selector3 == ZERO) {
+            let header = alphas[0]
+                + alphas[1].mul_base(transition_label)
+                + alphas[2].mul_base(Felt::from((row + 1) as u32))
+                + alphas[3].mul_base(node_index);
+
+            let bit = (node_index.as_int() & 1) as u8;
+            let left_word = build_value(&alphas_state[DIGEST_RANGE], &state[DIGEST_RANGE]);
+            let right_word = build_value(&alphas_state[DIGEST_RANGE], &state[DIGEST_RANGE.end..]);
+
+            multiplicand = header + E::from(1 - bit).mul(left_word) + E::from(bit).mul(right_word);
         }
     }
 
-    /// Returns the final value in the auxiliary column. Default implementation of this method
-    /// returns ONE.
-    fn final_column_value<E: FieldElement<BaseField = Felt>>(&self, row_values: &[E]) -> E {
-        let mut result = E::ONE;
-        for (_, table_update) in self.hints.iter() {
-            if let ChipletsVTableUpdate::KernelProcAdded(idx) = table_update {
-                result *= row_values[*idx as usize];
-            }
+    // f_hout, f_sout, f_abp == 1
+    if row % HASH_CYCLE_LEN == HASH_CYCLE_LEN - 1 {
+        let state = main_trace.chiplet_hasher_state(row);
+        let alphas_state = &alphas[NUM_HEADER_ALPHAS..(NUM_HEADER_ALPHAS + STATE_WIDTH)];
+        let node_index = main_trace.chiplet_node_index(row);
+        let transition_label = op_label + Felt::from(32_u8);
+
+        // f_hout == 1
+        // v_res = v_h + v_b;
+        if selector1 == ZERO && selector2 == ZERO && selector3 == ZERO {
+            let header = alphas[0]
+                + alphas[1].mul_base(transition_label)
+                + alphas[2].mul_base(Felt::from((row + 1) as u32))
+                + alphas[3].mul_base(node_index);
+
+            multiplicand = header + build_value(&alphas_state[DIGEST_RANGE], &state[DIGEST_RANGE]);
+        }
+
+        // f_sout == 1
+        // v_all = v_h + v_a + v_b + v_c
+        if selector1 == ZERO && selector2 == ZERO && selector3 == ONE {
+            let header = alphas[0]
+                + alphas[1].mul_base(transition_label)
+                + alphas[2].mul_base(Felt::from((row + 1) as u32))
+                + alphas[3].mul_base(node_index);
+
+            multiplicand = header + build_value(alphas_state, &state);
+        }
+
+        // f_abp == 1
+        // v_abp = v_h + v_b' + v_c' - v_b - v_c
+        if selector1 == ONE && selector2 == ZERO && selector3 == ZERO {
+            let header = alphas[0]
+                + alphas[1].mul_base(transition_label)
+                + alphas[2].mul_base(Felt::from((row + 1) as u32))
+                + alphas[3].mul_base(node_index);
+
+            let state_nxt = main_trace.chiplet_hasher_state(row + 1);
+
+            // build the value from the difference of the hasher state's just before and right
+            // after the absorption of new elements.
+            let next_state_value =
+                build_value(&alphas_state[CAPACITY_LEN..], &state_nxt[CAPACITY_LEN..]);
+            let state_value = build_value(&alphas_state[CAPACITY_LEN..], &state[CAPACITY_LEN..]);
+
+            multiplicand = header + next_state_value - state_value;
         }
+    }
+    multiplicand
+}
+
+/// Builds the response from the bitwise chiplet at `row`.
+fn build_bitwise_chiplet_responses<E>(
+    main_trace: &MainTrace,
+    row: usize,
+    is_xor: Felt,
+    alphas: &[E],
+) -> E
+where
+    E: FieldElement<BaseField = Felt>,
+{
+    if row % BITWISE_OP_CYCLE_LEN == BITWISE_OP_CYCLE_LEN - 1 {
+        let op_label = get_op_label(ONE, ZERO, is_xor, ZERO);
 
-        result
+        let a = main_trace.chiplet_bitwise_a(row);
+        let b = main_trace.chiplet_bitwise_b(row);
+        let z = main_trace.chiplet_bitwise_z(row);
+
+        alphas[0]
+            + alphas[1].mul_base(op_label)
+            + alphas[2].mul_base(a)
+            + alphas[3].mul_base(b)
+            + alphas[4].mul_base(z)
+    } else {
+        E::ONE
     }
 }
+
+/// Builds the response from the memory chiplet at `row`.
+fn build_memory_chiplet_responses<E>(
+    main_trace: &MainTrace,
+    row: usize,
+    is_read: Felt,
+    alphas: &[E],
+) -> E
+where
+    E: FieldElement<BaseField = Felt>,
+{
+    let op_label = get_op_label(ONE, ONE, ZERO, is_read);
+
+    let ctx = main_trace.chiplet_memory_ctx(row);
+    let clk = main_trace.chiplet_memory_clk(row);
+    let addr = main_trace.chiplet_memory_addr(row);
+    let value0 = main_trace.chiplet_memory_value_0(row);
+    let value1 = main_trace.chiplet_memory_value_1(row);
+    let value2 = main_trace.chiplet_memory_value_2(row);
+    let value3 = main_trace.chiplet_memory_value_3(row);
+
+    alphas[0]
+        + alphas[1].mul_base(op_label)
+        + alphas[2].mul_base(ctx)
+        + alphas[3].mul_base(addr)
+        + alphas[4].mul_base(clk)
+        + alphas[5].mul_base(value0)
+        + alphas[6].mul_base(value1)
+        + alphas[7].mul_base(value2)
+        + alphas[8].mul_base(value3)
+}
+
+/// Builds the response from the kernel chiplet at `row`.
+fn build_kernel_chiplet_responses<E>(
+    main_trace: &MainTrace,
+    row: usize,
+    kernel_chiplet_selector: Felt,
+    alphas: &[E],
+) -> E
+where
+    E: FieldElement<BaseField = Felt>,
+{
+    let op_label = KERNEL_PROC_LABEL;
+
+    let root0 = main_trace.chiplet_kernel_root_0(row);
+    let root1 = main_trace.chiplet_kernel_root_1(row);
+    let root2 = main_trace.chiplet_kernel_root_2(row);
+    let root3 = main_trace.chiplet_kernel_root_3(row);
+
+    let v = alphas[0]
+        + alphas[1].mul_base(op_label)
+        + alphas[2].mul_base(root0)
+        + alphas[3].mul_base(root1)
+        + alphas[4].mul_base(root2)
+        + alphas[5].mul_base(root3);
+
+    v.mul_base(kernel_chiplet_selector) + E::from(ONE - kernel_chiplet_selector)
+}
+
+/// Builds the response from the kernel procedure table at `row`.
+fn build_kernel_procedure_table_responses<E>(main_trace: &MainTrace, row: usize, alphas: &[E]) -> E
+where
+    E: FieldElement<BaseField = Felt>,
+{
+    let addr = main_trace.chiplet_kernel_addr(row);
+    let addr_nxt = main_trace.chiplet_kernel_addr(row + 1);
+    let addr_delta = addr_nxt - addr;
+    let root0 = main_trace.chiplet_kernel_root_0(row);
+    let root1 = main_trace.chiplet_kernel_root_1(row);
+    let root2 = main_trace.chiplet_kernel_root_2(row);
+    let root3 = main_trace.chiplet_kernel_root_3(row);
+
+    let v = alphas[0]
+        + alphas[1].mul_base(addr)
+        + alphas[2].mul_base(root0)
+        + alphas[3].mul_base(root1)
+        + alphas[4].mul_base(root2)
+        + alphas[5].mul_base(root3);
+
+    v.mul_base(addr_delta) + E::from(ONE - addr_delta)
+}
+
+// HELPER FUNCTIONS
+// ================================================================================================
+
+/// Reduces a slice of elements to a single field element in the field specified by E using a slice
+/// of alphas of matching length. This can be used to build the value for a single word or for an
+/// entire [HasherState].
+fn build_value<E: FieldElement<BaseField = Felt>>(alphas: &[E], elements: &[Felt]) -> E {
+    assert_eq!(alphas.len(), elements.len());
+    let mut value = E::ZERO;
+    for (&alpha, &element) in alphas.iter().zip(elements.iter()) {
+        value += alpha.mul_base(element);
+    }
+    value
+}
+
+/// Returns the operation unique label.
+fn get_op_label(s0: Felt, s1: Felt, s2: Felt, s3: Felt) -> Felt {
+    s3.mul_small(1 << 3) + s2.mul_small(1 << 2) + s1.mul_small(2) + s0 + ONE
+}
+
+/// Returns the hash cycle corresponding to the provided Hasher address.
+fn addr_to_hash_cycle(addr: Felt) -> usize {
+    let row = (addr.as_int() - 1) as usize;
+    let cycle_row = row % HASH_CYCLE_LEN;
+    debug_assert!(cycle_row == 0 || cycle_row == HASH_CYCLE_LEN - 1, "invalid address for hasher");
+
+    cycle_row
+}
+
+/// Convenience method to convert from addresses to rows.
+fn addr_to_row_index(addr: Felt) -> usize {
+    (addr.as_int() - 1) as usize
+}
+
+/// Computes a memory read or write request at `row` given randomness `alphas`, memory address
+/// `addr` and value `value`.
+fn compute_memory_request<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    op_label: u8,
+    alphas: &[E],
+    row: usize,
+    addr: Felt,
+    value: Word,
+) -> E {
+    let ctx = main_trace.ctx(row);
+    let clk = main_trace.clk(row);
+
+    alphas[0]
+        + alphas[1].mul_base(Felt::from(op_label))
+        + alphas[2].mul_base(ctx)
+        + alphas[3].mul_base(addr)
+        + alphas[4].mul_base(clk)
+        + alphas[5].mul_base(value[0])
+        + alphas[6].mul_base(value[1])
+        + alphas[7].mul_base(value[2])
+        + alphas[8].mul_base(value[3])
+}
diff --git a/processor/src/chiplets/aux_trace/virtual_table.rs b/processor/src/chiplets/aux_trace/virtual_table.rs
deleted file mode 100644
index 77d0736c99..0000000000
--- a/processor/src/chiplets/aux_trace/virtual_table.rs
+++ /dev/null
@@ -1,160 +0,0 @@
-use super::{ColMatrix, Felt, FieldElement, StarkField, Word};
-use crate::trace::LookupTableRow;
-
-// CHIPLETS VIRTUAL TABLE
-// ================================================================================================
-
-/// Describes updates to the chiplets virtual table. This includes management of the "sibling table"
-/// used by the hasher chiplet and the "kernel procedure table" used by the kernel ROM chiplet.
-///
-/// - The sibling table is used to enforce Merkle root update computations. The internal u32 values
-/// are indices of added/removed rows in a list of rows sorted chronologically (i.e., from first
-/// added row to last).
-/// - The kernel procedure table contains all kernel procedures along with the address where they
-/// first appear in the kernel ROM trace. Each kernel procedure is expected to be included exactly
-/// once, regardless of whether it is ever called or not.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub enum ChipletsVTableUpdate {
-    SiblingAdded(u32),
-    SiblingRemoved(u32),
-    KernelProcAdded(u32),
-}
-
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct ChipletsVTableRow {
-    sibling: Option<SiblingTableRow>,
-    kernel_proc: Option<KernelProc>,
-}
-
-impl ChipletsVTableRow {
-    pub fn new_sibling(index: Felt, sibling: Word) -> Self {
-        Self {
-            sibling: Some(SiblingTableRow::new(index, sibling)),
-            kernel_proc: None,
-        }
-    }
-
-    pub fn new_kernel_proc(addr: Felt, proc_hash: Word) -> Self {
-        Self {
-            sibling: None,
-            kernel_proc: Some(KernelProc::new(addr, proc_hash)),
-        }
-    }
-
-    #[cfg(test)]
-    pub fn kernel_proc(&self) -> Option<KernelProc> {
-        self.kernel_proc
-    }
-}
-
-impl LookupTableRow for ChipletsVTableRow {
-    /// Reduces this row to a single field element in the field specified by E. This requires
-    /// at least 6 alpha values.
-    fn to_value<E: FieldElement<BaseField = Felt>>(
-        &self,
-        main_trace: &ColMatrix<Felt>,
-        alphas: &[E],
-    ) -> E {
-        if let Some(sibling) = self.sibling {
-            debug_assert!(
-                self.kernel_proc.is_none(),
-                "a chiplet virtual table row cannot represent both a sibling and a kernel ROM procedure"
-            );
-            sibling.to_value(main_trace, alphas)
-        } else if let Some(kernel_proc) = self.kernel_proc {
-            kernel_proc.to_value(main_trace, alphas)
-        } else {
-            E::ONE
-        }
-    }
-}
-
-// SIBLING TABLE ROW
-// ================================================================================================
-
-/// Describes a single entry in the sibling table which consists of a tuple `(index, node)` where
-/// index is the index of the node at its depth. For example, assume a leaf has index n. For the
-/// leaf's parent the index will be n << 1. For the parent of the parent, the index will be
-/// n << 2 etc.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct SiblingTableRow {
-    index: Felt,
-    sibling: Word,
-}
-
-impl SiblingTableRow {
-    pub fn new(index: Felt, sibling: Word) -> Self {
-        Self { index, sibling }
-    }
-}
-
-impl LookupTableRow for SiblingTableRow {
-    /// Reduces this row to a single field element in the field specified by E. This requires
-    /// at least 6 alpha values.
-    fn to_value<E: FieldElement<BaseField = Felt>>(
-        &self,
-        _main_trace: &ColMatrix<Felt>,
-        alphas: &[E],
-    ) -> E {
-        // when the least significant bit of the index is 0, the sibling will be in the 3rd word
-        // of the hasher state, and when the least significant bit is 1, it will be in the 2nd
-        // word. we compute the value in this way to make constraint evaluation a bit easier since
-        // we need to compute the 2nd and the 3rd word values for other purposes as well.
-        let lsb = self.index.as_int() & 1;
-        if lsb == 0 {
-            alphas[0]
-                + alphas[3].mul_base(self.index)
-                + alphas[12].mul_base(self.sibling[0])
-                + alphas[13].mul_base(self.sibling[1])
-                + alphas[14].mul_base(self.sibling[2])
-                + alphas[15].mul_base(self.sibling[3])
-        } else {
-            alphas[0]
-                + alphas[3].mul_base(self.index)
-                + alphas[8].mul_base(self.sibling[0])
-                + alphas[9].mul_base(self.sibling[1])
-                + alphas[10].mul_base(self.sibling[2])
-                + alphas[11].mul_base(self.sibling[3])
-        }
-    }
-}
-
-// KERNEL ROM PROCEDURES
-// ================================================================================================
-
-/// Describes a single entry in the kernel rom procedure table which consists of a tuple
-/// `(addr, proc_hash)` where `addr` is the address of the first entry of the procedure in the
-/// kernel ROM table and `proc_hash` is the 4-element root hash of the procedure.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct KernelProc {
-    addr: Felt,
-    proc_hash: Word,
-}
-
-impl KernelProc {
-    pub fn new(addr: Felt, proc_hash: Word) -> Self {
-        Self { addr, proc_hash }
-    }
-
-    #[cfg(test)]
-    pub fn proc_hash(&self) -> Word {
-        self.proc_hash
-    }
-}
-
-impl LookupTableRow for KernelProc {
-    /// Reduces this row to a single field element in the field specified by E. This requires
-    /// at least 6 alpha values.
-    fn to_value<E: FieldElement<BaseField = Felt>>(
-        &self,
-        _main_trace: &ColMatrix<Felt>,
-        alphas: &[E],
-    ) -> E {
-        alphas[0]
-            + alphas[1].mul_base(self.addr)
-            + alphas[2].mul_base(self.proc_hash[0])
-            + alphas[3].mul_base(self.proc_hash[1])
-            + alphas[4].mul_base(self.proc_hash[2])
-            + alphas[5].mul_base(self.proc_hash[3])
-    }
-}
diff --git a/processor/src/chiplets/bitwise/mod.rs b/processor/src/chiplets/bitwise/mod.rs
index b7d84e47a1..bdf18f5c52 100644
--- a/processor/src/chiplets/bitwise/mod.rs
+++ b/processor/src/chiplets/bitwise/mod.rs
@@ -1,10 +1,8 @@
-use super::{
-    trace::LookupTableRow, utils::get_trace_len, ChipletsBus, ColMatrix, ExecutionError, Felt,
-    FieldElement, StarkField, TraceFragment, Vec, BITWISE_AND_LABEL, BITWISE_XOR_LABEL, ZERO,
-};
+use super::{utils::get_trace_len, ExecutionError, Felt, TraceFragment, ZERO};
+use crate::utils::collections::*;
 use miden_air::trace::chiplets::bitwise::{
-    A_COL_IDX, A_COL_RANGE, BITWISE_AND, BITWISE_XOR, B_COL_IDX, B_COL_RANGE, OP_CYCLE_LEN,
-    OUTPUT_COL_IDX, PREV_OUTPUT_COL_IDX, TRACE_WIDTH,
+    A_COL_IDX, A_COL_RANGE, BITWISE_AND, BITWISE_XOR, B_COL_IDX, B_COL_RANGE, OUTPUT_COL_IDX,
+    PREV_OUTPUT_COL_IDX, TRACE_WIDTH,
 };
 
 #[cfg(test)]
@@ -150,43 +148,12 @@ impl Bitwise {
     // EXECUTION TRACE GENERATION
     // --------------------------------------------------------------------------------------------
 
-    /// Fills the provided trace fragment with trace data from this bitwise helper instance. Each
-    /// bitwise operation lookup is also sent to the chiplets bus, along with the cycle at which it
-    /// was provided, which is calculated as an offset from the first row of the Bitwise chiplet.
-    /// Lookup values come from the last row of each bitwise operation cycle which contains both the
-    /// aggregated input values and the output result.
-    pub fn fill_trace(
-        self,
-        trace: &mut TraceFragment,
-        chiplets_bus: &mut ChipletsBus,
-        bitwise_start_row: usize,
-    ) {
+    /// Fills the provided trace fragment with trace data from this bitwise helper instance.
+    pub fn fill_trace(self, trace: &mut TraceFragment) {
         // make sure fragment dimensions are consistent with the dimensions of this trace
         debug_assert_eq!(self.trace_len(), trace.len(), "inconsistent trace lengths");
         debug_assert_eq!(TRACE_WIDTH, trace.width(), "inconsistent trace widths");
 
-        // provide the lookup data from the last row in each bitwise cycle
-        for row in ((OP_CYCLE_LEN - 1)..self.trace_len()).step_by(OP_CYCLE_LEN) {
-            let a = self.trace[A_COL_IDX][row];
-            let b = self.trace[B_COL_IDX][row];
-            let z = self.trace[OUTPUT_COL_IDX][row];
-
-            // get the operation label.
-            let op_selector: Felt = self.trace[0][row];
-            let label = if op_selector == BITWISE_AND {
-                BITWISE_AND_LABEL
-            } else {
-                assert!(
-                    op_selector == BITWISE_XOR,
-                    "Unrecognized operation selectors in Bitwise chiplet"
-                );
-                BITWISE_XOR_LABEL
-            };
-
-            let lookup = BitwiseLookup::new(label, a, b, z);
-            chiplets_bus.provide_bitwise_operation(lookup, (bitwise_start_row + row) as u32);
-        }
-
         // copy trace into the fragment column-by-column
         // TODO: this can be parallelized to copy columns in multiple threads
         for (out_column, column) in trace.columns().zip(self.trace) {
@@ -239,36 +206,3 @@ pub fn assert_u32(value: Felt) -> Result<Felt, ExecutionError> {
         Ok(value)
     }
 }
-
-// BITWISE LOOKUPS
-// ================================================================================================
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct BitwiseLookup {
-    // unique label identifying the bitwise operation
-    label: Felt,
-    a: Felt,
-    b: Felt,
-    z: Felt,
-}
-
-impl BitwiseLookup {
-    pub fn new(label: Felt, a: Felt, b: Felt, z: Felt) -> Self {
-        Self { label, a, b, z }
-    }
-}
-
-impl LookupTableRow for BitwiseLookup {
-    /// Reduces this row to a single field element in the field specified by E. This requires
-    /// at least 5 alpha values.
-    fn to_value<E: FieldElement<BaseField = Felt>>(
-        &self,
-        _main_trace: &ColMatrix<Felt>,
-        alphas: &[E],
-    ) -> E {
-        alphas[0]
-            + alphas[1].mul_base(self.label)
-            + alphas[2].mul_base(self.a)
-            + alphas[3].mul_base(self.b)
-            + alphas[4].mul_base(self.z)
-    }
-}
diff --git a/processor/src/chiplets/bitwise/tests.rs b/processor/src/chiplets/bitwise/tests.rs
index c590359a67..1ea5e89df1 100644
--- a/processor/src/chiplets/bitwise/tests.rs
+++ b/processor/src/chiplets/bitwise/tests.rs
@@ -1,11 +1,10 @@
-use super::{
-    super::aux_trace::{ChipletLookup, ChipletsBusRow},
-    Bitwise, BitwiseLookup, ChipletsBus, Felt, StarkField, TraceFragment, Vec, A_COL_IDX,
-    A_COL_RANGE, BITWISE_AND, BITWISE_AND_LABEL, BITWISE_XOR, BITWISE_XOR_LABEL, B_COL_IDX,
-    B_COL_RANGE, OP_CYCLE_LEN, OUTPUT_COL_IDX, PREV_OUTPUT_COL_IDX, TRACE_WIDTH,
+use super::{Bitwise, Felt, TraceFragment};
+use miden_air::trace::chiplets::bitwise::{
+    A_COL_IDX, A_COL_RANGE, BITWISE_AND, BITWISE_XOR, B_COL_IDX, B_COL_RANGE, OP_CYCLE_LEN,
+    OUTPUT_COL_IDX, PREV_OUTPUT_COL_IDX, TRACE_WIDTH,
 };
 use test_utils::rand::rand_value;
-use vm_core::ZERO;
+use vm_core::{utils::collections::*, ZERO};
 
 #[test]
 fn bitwise_init() {
@@ -24,7 +23,7 @@ fn bitwise_and() {
     assert_eq!(a.as_int() & b.as_int(), result.as_int());
 
     // --- check generated trace ----------------------------------------------
-    let (trace, chiplets_bus) = build_trace(bitwise, OP_CYCLE_LEN);
+    let trace = build_trace(bitwise, OP_CYCLE_LEN);
 
     // make sure the selector values specify bitwise AND at each step in the trace
     for row in 0..OP_CYCLE_LEN {
@@ -54,11 +53,6 @@ fn bitwise_and() {
 
         prev_result = result;
     }
-
-    // make sure the lookup was sent to the bus correctly
-    let bitwise_lookup =
-        BitwiseLookup::new(BITWISE_AND_LABEL, a, b, Felt::new(a.as_int() & b.as_int()));
-    verify_bus(&chiplets_bus, 0, (OP_CYCLE_LEN - 1) as u32, &bitwise_lookup);
 }
 
 #[test]
@@ -72,7 +66,7 @@ fn bitwise_xor() {
     assert_eq!(a.as_int() ^ b.as_int(), result.as_int());
 
     // --- check generated trace ----------------------------------------------
-    let (trace, chiplets_bus) = build_trace(bitwise, OP_CYCLE_LEN);
+    let trace = build_trace(bitwise, OP_CYCLE_LEN);
 
     // make sure the selector values specify bitwise XOR at each step in the trace
     for row in 0..OP_CYCLE_LEN {
@@ -102,11 +96,6 @@ fn bitwise_xor() {
 
         prev_result = result;
     }
-
-    // make sure the lookup was sent to the bus correctly
-    let bitwise_lookup =
-        BitwiseLookup::new(BITWISE_XOR_LABEL, a, b, Felt::new(a.as_int() ^ b.as_int()));
-    verify_bus(&chiplets_bus, 0, (OP_CYCLE_LEN - 1) as u32, &bitwise_lookup);
 }
 
 #[test]
@@ -129,7 +118,7 @@ fn bitwise_multiple() {
     assert_eq!(a[2].as_int() & b[2].as_int(), result2.as_int());
 
     // --- check generated trace ----------------------------------------------
-    let (trace, chiplets_bus) = build_trace(bitwise, 3 * OP_CYCLE_LEN);
+    let trace = build_trace(bitwise, 3 * OP_CYCLE_LEN);
 
     // make sure results and results from the trace are the same
     assert_eq!(result0, trace[OUTPUT_COL_IDX][OP_CYCLE_LEN - 1]);
@@ -189,32 +178,18 @@ fn bitwise_multiple() {
 
         prev_result = result;
     }
-
-    // make sure the lookups were sent to the bus correctly
-    let bitwise_lookup =
-        BitwiseLookup::new(BITWISE_AND_LABEL, a[0], b[0], Felt::new(a[0].as_int() & b[0].as_int()));
-    verify_bus(&chiplets_bus, 0, (OP_CYCLE_LEN - 1) as u32, &bitwise_lookup);
-
-    let bitwise_lookup =
-        BitwiseLookup::new(BITWISE_XOR_LABEL, a[1], b[1], Felt::new(a[1].as_int() ^ b[1].as_int()));
-    verify_bus(&chiplets_bus, 1, (OP_CYCLE_LEN * 2 - 1) as u32, &bitwise_lookup);
-
-    let bitwise_lookup =
-        BitwiseLookup::new(BITWISE_AND_LABEL, a[2], b[2], Felt::new(a[2].as_int() & b[2].as_int()));
-    verify_bus(&chiplets_bus, 2, (OP_CYCLE_LEN * 3 - 1) as u32, &bitwise_lookup);
 }
 
 // HELPER FUNCTIONS
 // ================================================================================================
 
 /// Builds a trace of the specified length and fills it with data from the provided Bitwise instance.
-fn build_trace(bitwise: Bitwise, num_rows: usize) -> (Vec<Vec<Felt>>, ChipletsBus) {
-    let mut chiplets_bus = ChipletsBus::default();
+fn build_trace(bitwise: Bitwise, num_rows: usize) -> Vec<Vec<Felt>> {
     let mut trace = (0..TRACE_WIDTH).map(|_| vec![ZERO; num_rows]).collect::<Vec<_>>();
     let mut fragment = TraceFragment::trace_to_fragment(&mut trace);
-    bitwise.fill_trace(&mut fragment, &mut chiplets_bus, 0);
+    bitwise.fill_trace(&mut fragment);
 
-    (trace, chiplets_bus)
+    trace
 }
 
 fn check_decomposition(trace: &[Vec<Felt>], start: usize, a: u64, b: u64) {
@@ -253,21 +228,3 @@ fn rand_u32() -> Felt {
     let value = rand_value::<u64>() as u32 as u64;
     Felt::new(value)
 }
-
-/// Verifies that the chiplet bus received the specified BitwiseLookup response at `cycle` which was
-/// added to the list of responses at `index`.
-fn verify_bus(
-    chiplets_bus: &ChipletsBus,
-    index: usize,
-    cycle: u32,
-    bitwise_lookup: &BitwiseLookup,
-) {
-    let expected_lookup = ChipletLookup::Bitwise(*bitwise_lookup);
-    let expected_hint = ChipletsBusRow::new(&[], Some(index as u32));
-
-    let lookup = chiplets_bus.get_response_row(index);
-    let hint = chiplets_bus.get_lookup_hint(cycle).unwrap();
-
-    assert_eq!(expected_lookup, lookup);
-    assert_eq!(&expected_hint, hint);
-}
diff --git a/processor/src/chiplets/hasher/lookups.rs b/processor/src/chiplets/hasher/lookups.rs
deleted file mode 100644
index 98db0ea177..0000000000
--- a/processor/src/chiplets/hasher/lookups.rs
+++ /dev/null
@@ -1,197 +0,0 @@
-use super::{ColMatrix, Felt, FieldElement, LookupTableRow, StarkField, Vec, ZERO};
-use core::ops::Range;
-use miden_air::trace::chiplets::{
-    hasher::{
-        CAPACITY_LEN, DIGEST_LEN, DIGEST_RANGE, LINEAR_HASH_LABEL, MP_VERIFY_LABEL,
-        MR_UPDATE_NEW_LABEL, MR_UPDATE_OLD_LABEL, RATE_LEN, RETURN_HASH_LABEL, RETURN_STATE_LABEL,
-        STATE_WIDTH,
-    },
-    HASHER_RATE_COL_RANGE, HASHER_STATE_COL_RANGE,
-};
-
-// CONSTANTS
-// ================================================================================================
-const NUM_HEADER_ALPHAS: usize = 4;
-
-// HASHER LOOKUPS
-// ================================================================================================
-
-/// Specifies the context of the [HasherLookup], indicating whether it describes the beginning of a
-/// hash operation, the return of a specified result, or the absorption of additional elements,
-/// initiating a new hash cycle with the provided [HasherState].
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub enum HasherLookupContext {
-    Start,
-    Absorb,
-    Return,
-}
-
-/// Contains the data required to describe and verify hash operations.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct HasherLookup {
-    // unique label identifying the hash operation
-    label: u8,
-    // row address in the Hasher table
-    addr: u32,
-    // node index
-    index: Felt,
-    // context
-    context: HasherLookupContext,
-}
-
-impl HasherLookup {
-    /// Creates a new HasherLookup.
-    pub(super) fn new(label: u8, addr: u32, index: Felt, context: HasherLookupContext) -> Self {
-        Self {
-            label,
-            addr,
-            index,
-            context,
-        }
-    }
-
-    /// The cycle at which the lookup is provided by the hasher.
-    pub fn cycle(&self) -> u32 {
-        // the hasher's addresses start from one instead of zero, so the cycle at which each lookup
-        // is provided is one less than its address
-        self.addr - 1
-    }
-
-    /// Returns the common header value which describes this hash operation. It is a combination of
-    /// the transition label, the row address, and the node index.
-    fn get_header_value<E: FieldElement<BaseField = Felt>>(&self, alphas: &[E]) -> E {
-        let transition_label = match self.context {
-            HasherLookupContext::Start => E::from(self.label) + E::from(16_u8),
-            _ => E::from(self.label) + E::from(32_u8),
-        };
-
-        alphas[0]
-            + alphas[1].mul(transition_label)
-            + alphas[2].mul(E::from(self.addr))
-            + alphas[3].mul_base(self.index)
-    }
-}
-
-impl LookupTableRow for HasherLookup {
-    /// Reduces this row to a single field element in the field specified by E. This requires
-    /// at least 16 alpha values.
-    fn to_value<E: FieldElement<BaseField = Felt>>(
-        &self,
-        main_trace: &ColMatrix<Felt>,
-        alphas: &[E],
-    ) -> E {
-        let header = self.get_header_value(&alphas[..NUM_HEADER_ALPHAS]);
-        // computing the rest of the value requires an alpha for each element in the [HasherState]
-        let alphas = &alphas[NUM_HEADER_ALPHAS..(NUM_HEADER_ALPHAS + STATE_WIDTH)];
-
-        match self.context {
-            HasherLookupContext::Start => {
-                if self.label == LINEAR_HASH_LABEL {
-                    // include the entire state when initializing a linear hash.
-                    header
-                        + build_value(
-                            alphas,
-                            &get_hasher_state_at(self.addr, main_trace, 0..STATE_WIDTH),
-                        )
-                } else {
-                    let state =
-                        &get_hasher_state_at(self.addr, main_trace, CAPACITY_LEN..STATE_WIDTH);
-                    assert!(
-                        self.label == MR_UPDATE_OLD_LABEL
-                            || self.label == MR_UPDATE_NEW_LABEL
-                            || self.label == MP_VERIFY_LABEL,
-                        "unrecognized hash operation"
-                    );
-                    // build the leaf value by selecting from the left and right words of the state.
-                    // the same alphas must be used in both cases, since whichever word is selected
-                    // by the index bit will be the leaf node, and the value must be computed in
-                    // the same way in both cases.
-                    let bit = (self.index.as_int() >> 1) & 1;
-                    let left_word = build_value(&alphas[DIGEST_RANGE], &state[..DIGEST_LEN]);
-                    let right_word = build_value(&alphas[DIGEST_RANGE], &state[DIGEST_LEN..]);
-
-                    header + E::from(1 - bit).mul(left_word) + E::from(bit).mul(right_word)
-                }
-            }
-            HasherLookupContext::Absorb => {
-                assert!(self.label == LINEAR_HASH_LABEL, "unrecognized hash operation");
-                let (curr_hasher_rate, next_hasher_rate) =
-                    get_adjacent_hasher_rates(self.addr, main_trace);
-                // build the value from the delta of the hasher state's rate before and after the
-                // absorption of new elements.
-                let next_state_value = build_value(&alphas[CAPACITY_LEN..], &next_hasher_rate);
-                let state_value = build_value(&alphas[CAPACITY_LEN..], &curr_hasher_rate);
-
-                header + next_state_value - state_value
-            }
-            HasherLookupContext::Return => {
-                if self.label == RETURN_STATE_LABEL {
-                    // build the value from the result, which is the entire state
-                    header
-                        + build_value(
-                            alphas,
-                            &get_hasher_state_at(self.addr, main_trace, 0..STATE_WIDTH),
-                        )
-                } else {
-                    assert!(self.label == RETURN_HASH_LABEL, "unrecognized hash operation");
-                    // build the value from the result, which is the digest portion of the state
-                    header
-                        + build_value(
-                            &alphas[DIGEST_RANGE],
-                            &get_hasher_state_at(self.addr, main_trace, DIGEST_RANGE),
-                        )
-                }
-            }
-        }
-    }
-}
-
-// HELPER FUNCTIONS
-// ================================================================================================
-
-/// Reduces a slice of elements to a single field element in the field specified by E using a slice
-/// of alphas of matching length. This can be used to build the value for a single word or for an
-/// entire [HasherState].
-fn build_value<E: FieldElement<BaseField = Felt>>(alphas: &[E], elements: &[Felt]) -> E {
-    let mut value = E::ZERO;
-    for (&alpha, &element) in alphas.iter().zip(elements.iter()) {
-        value += alpha.mul_base(element);
-    }
-    value
-}
-
-/// Returns the portion of the hasher state at the provided address that is within the provided
-/// column range.
-fn get_hasher_state_at(
-    addr: u32,
-    main_trace: &ColMatrix<Felt>,
-    col_range: Range<usize>,
-) -> Vec<Felt> {
-    let row = get_row_from_addr(addr);
-    col_range
-        .map(|col| main_trace.get(HASHER_STATE_COL_RANGE.start + col, row))
-        .collect::<Vec<Felt>>()
-}
-
-/// Returns the rate portion of the hasher state for the provided row and the next row.
-fn get_adjacent_hasher_rates(
-    addr: u32,
-    main_trace: &ColMatrix<Felt>,
-) -> ([Felt; RATE_LEN], [Felt; RATE_LEN]) {
-    let row = get_row_from_addr(addr);
-
-    let mut current = [ZERO; RATE_LEN];
-    let mut next = [ZERO; RATE_LEN];
-    for (idx, col_idx) in HASHER_RATE_COL_RANGE.enumerate() {
-        let column = main_trace.get_column(col_idx);
-        current[idx] = column[row];
-        next[idx] = column[row + 1];
-    }
-
-    (current, next)
-}
-
-/// Gets the row index from the specified row address.
-fn get_row_from_addr(addr: u32) -> usize {
-    addr as usize - 1
-}
diff --git a/processor/src/chiplets/hasher/mod.rs b/processor/src/chiplets/hasher/mod.rs
index 6ffd083abf..cdcf7e2e84 100644
--- a/processor/src/chiplets/hasher/mod.rs
+++ b/processor/src/chiplets/hasher/mod.rs
@@ -1,19 +1,12 @@
 use super::{
-    trace::LookupTableRow, BTreeMap, ChipletsVTableTraceBuilder, ColMatrix, Felt, FieldElement,
-    HasherState, MerklePath, MerkleRootUpdate, OpBatch, StarkField, TraceFragment, Vec, Word, ONE,
+    BTreeMap, Felt, HasherState, MerklePath, MerkleRootUpdate, OpBatch, TraceFragment, Word, ONE,
     ZERO,
 };
 use miden_air::trace::chiplets::hasher::{
-    Digest, Selectors, DIGEST_LEN, DIGEST_RANGE, HASH_CYCLE_LEN, LINEAR_HASH, LINEAR_HASH_LABEL,
-    MP_VERIFY, MP_VERIFY_LABEL, MR_UPDATE_NEW, MR_UPDATE_NEW_LABEL, MR_UPDATE_OLD,
-    MR_UPDATE_OLD_LABEL, RATE_LEN, RETURN_HASH, RETURN_HASH_LABEL, RETURN_STATE,
-    RETURN_STATE_LABEL, STATE_WIDTH, TRACE_WIDTH,
+    Digest, Selectors, DIGEST_LEN, DIGEST_RANGE, LINEAR_HASH, MP_VERIFY, MR_UPDATE_NEW,
+    MR_UPDATE_OLD, RATE_LEN, RETURN_HASH, RETURN_STATE, STATE_WIDTH, TRACE_WIDTH,
 };
 
-mod lookups;
-pub use lookups::HasherLookup;
-use lookups::HasherLookupContext;
-
 mod trace;
 use trace::HasherTrace;
 
@@ -55,9 +48,6 @@ mod tests;
 /// In addition to the execution trace, the hash chiplet also maintains:
 /// - an auxiliary trace builder, which can be used to construct a running product column describing
 ///   the state of the sibling table (used in Merkle root update operations).
-/// - a vector of [HasherLookup]s, each of which specifies the data for one of the lookup rows which
-///   are required for verification of the communication between the stack/decoder and the Hash
-///   Chiplet via the Chiplets Bus.
 /// - a map of memoized execution trace, which keeps track of start and end rows of the sections of
 ///   the trace of a control or span block that can be copied to be used later for program blocks
 ///   encountered with the same digest instead of building it from scratch everytime. The hash of
@@ -65,7 +55,6 @@ mod tests;
 #[derive(Default)]
 pub struct Hasher {
     trace: HasherTrace,
-    aux_trace: ChipletsVTableTraceBuilder,
     memoized_trace_map: BTreeMap<[u8; 32], (usize, usize)>,
 }
 
@@ -78,51 +67,25 @@ impl Hasher {
         self.trace.trace_len()
     }
 
-    /// Returns the [HasherLookup] from the provided label, index and context inputs.
-    #[inline(always)]
-    fn get_lookup(&self, label: u8, index: Felt, context: HasherLookupContext) -> HasherLookup {
-        let addr = match context {
-            // when starting a new hash operation, lookups are added before the operation begins.
-            HasherLookupContext::Start => self.trace.next_row_addr().as_int() as u32,
-            // in all other cases, they are added after the hash operation has completed.
-            _ => self.trace_len() as u32,
-        };
-        HasherLookup::new(label, addr, index, context)
-    }
-
     // HASHING METHODS
     // --------------------------------------------------------------------------------------------
 
     /// Applies a single permutation of the hash function to the provided state and records the
-    /// execution trace of this computation as well as the lookups required for verifying the
-    /// correctness of the permutation so that they can be provided to the Chiplets Bus.
+    /// execution trace of this computation.
     ///
     /// The returned tuple contains the hasher state after the permutation and the row address of
     /// the execution trace at which the permutation started.
-    pub(super) fn permute(
-        &mut self,
-        mut state: HasherState,
-        lookups: &mut Vec<HasherLookup>,
-    ) -> (Felt, HasherState) {
+    pub(super) fn permute(&mut self, mut state: HasherState) -> (Felt, HasherState) {
         let addr = self.trace.next_row_addr();
 
-        // add the lookup for the hash initialization.
-        let lookup = self.get_lookup(LINEAR_HASH_LABEL, ZERO, HasherLookupContext::Start);
-        lookups.push(lookup);
-
         // perform the hash.
         self.trace.append_permutation(&mut state, LINEAR_HASH, RETURN_STATE);
 
-        // add the lookup for the hash result.
-        let lookup = self.get_lookup(RETURN_STATE_LABEL, ZERO, HasherLookupContext::Return);
-        lookups.push(lookup);
-
         (addr, state)
     }
 
     /// Computes the hash of the control block by computing hash(h1, h2) and returns the result.
-    /// It also records the execution trace of this computation as well as the lookups required for
-    /// verifying its correctness so that they can be provided to the Chiplets Bus.
+    /// It also records the execution trace of this computation.
     ///
     /// The returned tuple also contains the row address of the execution trace at which the hash
     /// computation started.
@@ -132,15 +95,10 @@ impl Hasher {
         h2: Word,
         domain: Felt,
         expected_hash: Digest,
-        lookups: &mut Vec<HasherLookup>,
     ) -> (Felt, Word) {
         let addr = self.trace.next_row_addr();
         let mut state = init_state_from_words_with_domain(&h1, &h2, domain);
 
-        // add the lookup for the hash initialization.
-        let lookup = self.get_lookup(LINEAR_HASH_LABEL, ZERO, HasherLookupContext::Start);
-        lookups.push(lookup);
-
         if let Some((start_row, end_row)) = self.get_memoized_trace(expected_hash) {
             // copy the trace of a block with same hash instead of building it again.
             self.trace.copy_trace(&mut state, *start_row..*end_row);
@@ -151,18 +109,13 @@ impl Hasher {
             self.insert_to_memoized_trace_map(addr, expected_hash);
         };
 
-        // add the lookup for the hash result.
-        let lookup = self.get_lookup(RETURN_HASH_LABEL, ZERO, HasherLookupContext::Return);
-        lookups.push(lookup);
-
         let result = get_digest(&state);
 
         (addr, result)
     }
 
     /// Computes a sequential hash of all operation batches in the list and returns the result. It
-    /// also records the execution trace of this computation, as well as the lookups required for
-    /// verifying its correctness so that they can be provided to the Chiplets Bus.
+    /// also records the execution trace of this computation.
     ///
     /// The returned tuple also contains the row address of the execution trace at which the hash
     /// computation started.
@@ -170,17 +123,13 @@ impl Hasher {
         &mut self,
         op_batches: &[OpBatch],
         expected_hash: Digest,
-        lookups: &mut Vec<HasherLookup>,
     ) -> (Felt, Word) {
         const START: Selectors = LINEAR_HASH;
-        const START_LABEL: u8 = LINEAR_HASH_LABEL;
         const RETURN: Selectors = RETURN_HASH;
-        const RETURN_LABEL: u8 = RETURN_HASH_LABEL;
         // absorb selectors are the same as linear hash selectors, but absorb selectors are
         // applied on the last row of a permutation cycle, while linear hash selectors are
         // applied on the first row of a permutation cycle.
         const ABSORB: Selectors = LINEAR_HASH;
-        const ABSORB_LABEL: u8 = LINEAR_HASH_LABEL;
         // to continue linear hash we need retain the 2nd and 3rd selector flags and set the
         // 1st flag to ZERO.
         const CONTINUE: Selectors = [ZERO, LINEAR_HASH[1], LINEAR_HASH[2]];
@@ -190,10 +139,6 @@ impl Hasher {
         // initialize the state and absorb the first operation batch into it
         let mut state = init_state(op_batches[0].groups(), ZERO);
 
-        // add the lookup for the hash initialization.
-        let lookup = self.get_lookup(START_LABEL, ZERO, HasherLookupContext::Start);
-        lookups.push(lookup);
-
         // check if a span block with same hash has been encountered before in which case we can
         // directly copy it's trace.
         let (start_row, end_row, is_memoized) =
@@ -226,53 +171,24 @@ impl Hasher {
                 for batch in op_batches.iter().take(num_batches - 1).skip(1) {
                     absorb_into_state(&mut state, batch.groups());
 
-                    // add the lookup for absorbing the next operation batch.
-                    let lookup = self.get_lookup(ABSORB_LABEL, ZERO, HasherLookupContext::Absorb);
-                    lookups.push(lookup);
-
                     self.trace.append_permutation(&mut state, CONTINUE, ABSORB);
                 }
 
                 absorb_into_state(&mut state, op_batches[num_batches - 1].groups());
 
-                // add the lookup for absorbing the final operation batch.
-                let lookup = self.get_lookup(ABSORB_LABEL, ZERO, HasherLookupContext::Absorb);
-                lookups.push(lookup);
-
                 self.trace.append_permutation(&mut state, CONTINUE, RETURN);
             }
             self.insert_to_memoized_trace_map(addr, expected_hash);
-        } else if num_batches == 1 {
-            self.trace.copy_trace(&mut state, start_row..end_row);
         } else {
-            for i in 1..num_batches {
-                // add the lookup for absorbing the next operation batch. Here we add the
-                // lookups before actually copying the memoized trace.
-                let lookup_addr = self.trace_len() + i * HASH_CYCLE_LEN;
-                let lookup = HasherLookup::new(
-                    ABSORB_LABEL,
-                    lookup_addr as u32,
-                    ZERO,
-                    HasherLookupContext::Absorb,
-                );
-                lookups.push(lookup);
-            }
-
             self.trace.copy_trace(&mut state, start_row..end_row);
         }
 
-        // add the lookup for the hash result.
-        let lookup = self.get_lookup(RETURN_LABEL, ZERO, HasherLookupContext::Return);
-        lookups.push(lookup);
-
         let result = get_digest(&state);
 
         (addr, result)
     }
 
-    /// Performs Merkle path verification computation and records its execution trace, as well as
-    /// the lookups required for verifying its correctness so that they can be provided to the
-    /// Chiplets Bus.
+    /// Performs Merkle path verification computation and records its execution trace.
     ///
     /// The computation consists of computing a Merkle root of the specified path for a node with
     /// the specified value, located at the specified index.
@@ -289,24 +205,16 @@ impl Hasher {
         value: Word,
         path: &MerklePath,
         index: Felt,
-        lookups: &mut Vec<HasherLookup>,
     ) -> (Felt, Word) {
         let addr = self.trace.next_row_addr();
 
-        let root = self.verify_merkle_path(
-            value,
-            path,
-            index.as_int(),
-            MerklePathContext::MpVerify,
-            lookups,
-        );
+        let root =
+            self.verify_merkle_path(value, path, index.as_int(), MerklePathContext::MpVerify);
 
         (addr, root)
     }
 
-    /// Performs Merkle root update computation and records its execution trace, as well as the
-    /// lookups required for verifying its correctness so that they can be provided to the Chiplets
-    /// Bus.
+    /// Performs Merkle root update computation and records its execution trace.
     ///
     /// The computation consists of two Merkle path verifications, one for the old value of the
     /// node (value before the update), and another for the new value (value after the update).
@@ -321,25 +229,14 @@ impl Hasher {
         new_value: Word,
         path: &MerklePath,
         index: Felt,
-        lookups: &mut Vec<HasherLookup>,
     ) -> MerkleRootUpdate {
         let address = self.trace.next_row_addr();
         let index = index.as_int();
 
-        let old_root = self.verify_merkle_path(
-            old_value,
-            path,
-            index,
-            MerklePathContext::MrUpdateOld,
-            lookups,
-        );
-        let new_root = self.verify_merkle_path(
-            new_value,
-            path,
-            index,
-            MerklePathContext::MrUpdateNew,
-            lookups,
-        );
+        let old_root =
+            self.verify_merkle_path(old_value, path, index, MerklePathContext::MrUpdateOld);
+        let new_root =
+            self.verify_merkle_path(new_value, path, index, MerklePathContext::MrUpdateNew);
 
         MerkleRootUpdate {
             address,
@@ -353,10 +250,8 @@ impl Hasher {
 
     /// Fills the provided trace fragment with trace data from this hasher trace instance. This
     /// also returns the trace builder for hasher-related auxiliary trace columns.
-    pub(super) fn fill_trace(self, trace: &mut TraceFragment) -> ChipletsVTableTraceBuilder {
-        self.trace.fill_trace(trace);
-
-        self.aux_trace
+    pub(super) fn fill_trace(self, trace: &mut TraceFragment) {
+        self.trace.fill_trace(trace)
     }
 
     // HELPER METHODS
@@ -365,8 +260,7 @@ impl Hasher {
     /// Computes a root of the provided Merkle path in the specified context. The path is assumed
     /// to be for a node with the specified value at the specified index.
     ///
-    /// This also records the execution trace of the Merkle path computation and all lookups
-    /// required for verifying its correctness.
+    /// This also records the execution trace of the Merkle path computation.
     ///
     /// # Panics
     /// Panics if:
@@ -378,7 +272,6 @@ impl Hasher {
         path: &MerklePath,
         mut index: u64,
         context: MerklePathContext,
-        lookups: &mut Vec<HasherLookup>,
     ) -> Word {
         assert!(!path.is_empty(), "path is empty");
         assert!(
@@ -386,7 +279,6 @@ impl Hasher {
             "invalid index for the path"
         );
         let mut root = value;
-        let mut depth = path.len() - 1;
 
         // determine selectors for the specified context
         let main_selectors = context.main_selectors();
@@ -395,41 +287,22 @@ impl Hasher {
         if path.len() == 1 {
             // handle path of length 1 separately because pattern for init and final selectors
             // is different from other cases
-            self.update_sibling_hints(context, index, path[0], depth);
-            self.verify_mp_leg(root, path[0], &mut index, main_selectors, RETURN_HASH, lookups)
+            self.verify_mp_leg(root, path[0], &mut index, main_selectors, RETURN_HASH)
         } else {
             // process the first node of the path; for this node, init and final selectors are
             // the same
             let sibling = path[0];
-            self.update_sibling_hints(context, index, sibling, depth);
-            root = self.verify_mp_leg(
-                root,
-                sibling,
-                &mut index,
-                main_selectors,
-                main_selectors,
-                lookups,
-            );
-            depth -= 1;
+            root = self.verify_mp_leg(root, sibling, &mut index, main_selectors, main_selectors);
 
             // process all other nodes, except for the last one
             for &sibling in &path[1..path.len() - 1] {
-                self.update_sibling_hints(context, index, sibling, depth);
-                root = self.verify_mp_leg(
-                    root,
-                    sibling,
-                    &mut index,
-                    part_selectors,
-                    main_selectors,
-                    lookups,
-                );
-                depth -= 1;
+                root =
+                    self.verify_mp_leg(root, sibling, &mut index, part_selectors, main_selectors);
             }
 
             // process the last node
             let sibling = path[path.len() - 1];
-            self.update_sibling_hints(context, index, sibling, depth);
-            self.verify_mp_leg(root, sibling, &mut index, part_selectors, RETURN_HASH, lookups)
+            self.verify_mp_leg(root, sibling, &mut index, part_selectors, RETURN_HASH)
         }
     }
 
@@ -437,11 +310,7 @@ impl Hasher {
     ///
     /// This function does the following:
     /// - Builds the initial hasher state based on the least significant bit of the index.
-    /// - Records the lookup required for verification of the hash initialization if the
-    ///   `init_selectors` indicate that it is the beginning of the Merkle path verification.
     /// - Applies a permutation to this state and records the resulting trace.
-    /// - Records the lookup required for verification of the hash result if the `final_selectors`
-    ///   indicate that it is the end of the Merkle path verification.
     /// - Returns the result of the permutation and updates the index by removing its least
     ///   significant bit.
     fn verify_mp_leg(
@@ -451,19 +320,11 @@ impl Hasher {
         index: &mut u64,
         init_selectors: Selectors,
         final_selectors: Selectors,
-        lookups: &mut Vec<HasherLookup>,
     ) -> Word {
         // build the hasher state based on the value of the least significant bit of the index
         let index_bit = *index & 1;
         let mut state = build_merge_state(&root, &sibling, index_bit);
 
-        // add the lookup for the hash initialization if this is the beginning.
-        let context = HasherLookupContext::Start;
-        if let Some(label) = get_selector_context_label(init_selectors, context) {
-            let lookup = self.get_lookup(label, Felt::new(*index), context);
-            lookups.push(lookup);
-        }
-
         // determine values for the node index column for this permutation. if the first selector
         // of init_selectors is not ZERO (i.e., we are processing the first leg of the Merkle
         // path), the index for the first row is different from the index for the other rows;
@@ -486,44 +347,9 @@ impl Hasher {
         // remove the least significant bit from the index and return hash result
         *index >>= 1;
 
-        // add the lookup for the hash result if this is the end.
-        let context = HasherLookupContext::Return;
-        if let Some(label) = get_selector_context_label(final_selectors, context) {
-            let lookup = self.get_lookup(label, Felt::new(*index), context);
-            lookups.push(lookup);
-        }
-
         get_digest(&state)
     }
 
-    /// Records an update hint in the auxiliary trace builder to indicate whether the sibling was
-    /// consumed as a part of computing the new or the old Merkle root. This is relevant only for
-    /// the Merkle root update computation.
-    fn update_sibling_hints(
-        &mut self,
-        context: MerklePathContext,
-        index: u64,
-        sibling: Digest,
-        depth: usize,
-    ) {
-        let step = self.trace.trace_len() as u32;
-        match context {
-            MerklePathContext::MrUpdateOld => {
-                self.aux_trace.sibling_added(step, Felt::new(index), sibling.into());
-            }
-            MerklePathContext::MrUpdateNew => {
-                // we use node depth as row offset here because siblings are added to the table
-                // in reverse order of their depth (i.e., the sibling with the greatest depth is
-                // added first). thus, when removing siblings from the table, we can find the right
-                // entry by looking at the n-th entry from the end of the table, where n is the
-                // node's depth (e.g., an entry for the sibling with depth 2, would be in the
-                // second entry from the end of the table).
-                self.aux_trace.sibling_removed(step, depth);
-            }
-            _ => (),
-        }
-    }
-
     /// Checks if a trace for a program block already exists and returns the start and end rows
     /// of the memoized trace. Returns None otherwise.
     fn get_memoized_trace(&self, hash: Digest) -> Option<&(usize, usize)> {
@@ -590,44 +416,6 @@ fn build_merge_state(a: &Word, b: &Word, index_bit: u64) -> HasherState {
     }
 }
 
-/// Gets the label for the hash operation from the provided selectors and the specified context.
-pub fn get_selector_context_label(
-    selectors: Selectors,
-    context: HasherLookupContext,
-) -> Option<u8> {
-    match context {
-        HasherLookupContext::Start => {
-            if selectors == LINEAR_HASH {
-                Some(LINEAR_HASH_LABEL)
-            } else if selectors == MP_VERIFY {
-                Some(MP_VERIFY_LABEL)
-            } else if selectors == MR_UPDATE_OLD {
-                Some(MR_UPDATE_OLD_LABEL)
-            } else if selectors == MR_UPDATE_NEW {
-                Some(MR_UPDATE_NEW_LABEL)
-            } else {
-                None
-            }
-        }
-        HasherLookupContext::Return => {
-            if selectors == RETURN_HASH {
-                Some(RETURN_HASH_LABEL)
-            } else if selectors == RETURN_STATE {
-                Some(RETURN_STATE_LABEL)
-            } else {
-                None
-            }
-        }
-        _ => {
-            if selectors == LINEAR_HASH {
-                Some(LINEAR_HASH_LABEL)
-            } else {
-                None
-            }
-        }
-    }
-}
-
 // TODO: Move these to another file.
 
 // HASHER STATE MUTATORS
diff --git a/processor/src/chiplets/hasher/tests.rs b/processor/src/chiplets/hasher/tests.rs
index edff2b2a7d..b595a5474b 100644
--- a/processor/src/chiplets/hasher/tests.rs
+++ b/processor/src/chiplets/hasher/tests.rs
@@ -1,22 +1,19 @@
 use super::{
-    init_state_from_words, lookups::HasherLookupContext, Digest, Felt, Hasher, HasherLookup,
-    HasherState, MerklePath, Selectors, TraceFragment, Vec, Word, LINEAR_HASH, MP_VERIFY,
-    MR_UPDATE_NEW, MR_UPDATE_OLD, RETURN_HASH, RETURN_STATE, TRACE_WIDTH,
-};
-use crate::chiplets::aux_trace::{
-    ChipletsVTableRow, ChipletsVTableTraceBuilder, ChipletsVTableUpdate,
+    init_state_from_words, Digest, Felt, Hasher, HasherState, MerklePath, Selectors, TraceFragment,
+    Word, LINEAR_HASH, MP_VERIFY, MR_UPDATE_NEW, MR_UPDATE_OLD, RETURN_HASH, RETURN_STATE,
+    TRACE_WIDTH,
 };
+use crate::utils::collections::*;
+
 use miden_air::trace::chiplets::hasher::{
-    DIGEST_LEN, HASH_CYCLE_LEN, LINEAR_HASH_LABEL, MP_VERIFY_LABEL, MR_UPDATE_NEW_LABEL,
-    MR_UPDATE_OLD_LABEL, NUM_ROUNDS, NUM_SELECTORS, RETURN_HASH_LABEL, RETURN_STATE_LABEL,
-    STATE_COL_RANGE,
+    DIGEST_LEN, HASH_CYCLE_LEN, NUM_ROUNDS, NUM_SELECTORS, STATE_COL_RANGE,
 };
 use test_utils::rand::rand_array;
 use vm_core::{
     chiplets::hasher,
     code_blocks::CodeBlock,
     crypto::merkle::{MerkleTree, NodeIndex},
-    Operation, StarkField, ONE, ZERO,
+    Operation, ONE, ZERO,
 };
 
 // LINEAR HASH TESTS
@@ -29,27 +26,8 @@ fn hasher_permute() {
     // initialize the hasher and perform one permutation
     let mut hasher = Hasher::default();
     let init_state: HasherState = rand_array();
-    let mut lookups = Vec::new();
-    let (addr, final_state) = hasher.permute(init_state, &mut lookups);
-
-    let lookup_start_addr = 1;
-    // there should be two lookups for start and end rows of hasher operation
-    let expected_lookups_len = 2;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let expected_lookup_start =
-        HasherLookup::new(LINEAR_HASH_LABEL, lookup_start_addr, ZERO, HasherLookupContext::Start);
-
-    let expected_lookup_end = HasherLookup::new(
-        RETURN_STATE_LABEL,
-        lookup_start_addr + HASH_CYCLE_LEN as u32 - 1,
-        ZERO,
-        HasherLookupContext::Return,
-    );
-    check_lookups_validity(
-        lookups,
-        expected_lookups_len,
-        vec![expected_lookup_start, expected_lookup_end],
-    );
+
+    let (addr, final_state) = hasher.permute(init_state);
 
     // address of the permutation should be ONE (as hasher address starts at ONE)
     assert_eq!(ONE, addr);
@@ -59,28 +37,23 @@ fn hasher_permute() {
     assert_eq!(expected_state, final_state);
 
     // build the trace
-    let (trace, aux_hints) = build_trace(hasher, 8);
+    let trace = build_trace(hasher, 8);
 
     // make sure the trace is correct
     check_selector_trace(&trace, 0, LINEAR_HASH, RETURN_STATE);
     check_hasher_state_trace(&trace, 0, init_state);
     assert_eq!(trace.last().unwrap(), &[ZERO; 8]);
 
-    // make sure aux hints for sibling table are empty
-    assert!(aux_hints.hints().is_empty());
-    assert!(aux_hints.rows().is_empty());
-
     // --- test two permutations ----------------------------------------------
 
     // initialize the hasher and perform two permutations
     let mut hasher = Hasher::default();
     let init_state1: HasherState = rand_array();
-    let mut lookups1 = Vec::new();
-    let (addr1, final_state1) = hasher.permute(init_state1, &mut lookups1);
 
-    let mut lookups2 = Vec::new();
+    let (addr1, final_state1) = hasher.permute(init_state1);
+
     let init_state2: HasherState = rand_array();
-    let (addr2, final_state2) = hasher.permute(init_state2, &mut lookups2);
+    let (addr2, final_state2) = hasher.permute(init_state2);
 
     // make sure the returned addresses are correct (they must be 8 rows apart)
     assert_eq!(ONE, addr1);
@@ -94,7 +67,7 @@ fn hasher_permute() {
     assert_eq!(expected_state2, final_state2);
 
     // build the trace
-    let (trace, aux_hints) = build_trace(hasher, 16);
+    let trace = build_trace(hasher, 16);
 
     // make sure the trace is correct
     check_selector_trace(&trace, 0, LINEAR_HASH, RETURN_STATE);
@@ -102,10 +75,6 @@ fn hasher_permute() {
     check_hasher_state_trace(&trace, 0, init_state1);
     check_hasher_state_trace(&trace, 8, init_state2);
     assert_eq!(trace.last().unwrap(), &[ZERO; 16]);
-
-    // make sure aux hints for sibling table are empty
-    assert!(aux_hints.hints().is_empty());
-    assert!(aux_hints.rows().is_empty());
 }
 
 // MERKLE TREE TESTS
@@ -117,56 +86,20 @@ fn hasher_build_merkle_root() {
 
     // build a Merkle tree
     let leaves = init_leaves(&[1, 2]);
-    let tree = MerkleTree::new(leaves.to_vec()).unwrap();
+    let tree = MerkleTree::new(&leaves).unwrap();
 
     // initialize the hasher and perform two Merkle branch verifications
     let mut hasher = Hasher::default();
     let path0 = tree.get_path(NodeIndex::new(1, 0).unwrap()).unwrap();
-    let mut lookups = Vec::new();
-    hasher.build_merkle_root(leaves[0], &path0, ZERO, &mut lookups);
-
-    // there should be two lookups for start and end rows of hasher operation
-    let expected_lookups_len = 2;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let lookup_start_addr = 1;
-    let expected_lookup_start =
-        HasherLookup::new(MP_VERIFY_LABEL, lookup_start_addr, ZERO, HasherLookupContext::Start);
-    let expected_lookup_end = HasherLookup::new(
-        RETURN_HASH_LABEL,
-        lookup_start_addr + HASH_CYCLE_LEN as u32 - 1,
-        ZERO,
-        HasherLookupContext::Return,
-    );
-    check_lookups_validity(
-        lookups,
-        expected_lookups_len,
-        vec![expected_lookup_start, expected_lookup_end],
-    );
+
+    hasher.build_merkle_root(leaves[0], &path0, ZERO);
 
     let path1 = tree.get_path(NodeIndex::new(1, 1).unwrap()).unwrap();
-    let mut lookups = Vec::new();
-    hasher.build_merkle_root(leaves[1], &path1, ONE, &mut lookups);
-
-    let lookup_start_addr = 9;
-    // there should be two lookups for start and end rows of hasher operation
-    let expected_lookups_len = 2;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let expected_lookup_start =
-        HasherLookup::new(MP_VERIFY_LABEL, lookup_start_addr, ONE, HasherLookupContext::Start);
-    let expected_lookup_end = HasherLookup::new(
-        RETURN_HASH_LABEL,
-        lookup_start_addr + HASH_CYCLE_LEN as u32 - 1,
-        ZERO,
-        HasherLookupContext::Return,
-    );
-    check_lookups_validity(
-        lookups,
-        expected_lookups_len,
-        vec![expected_lookup_start, expected_lookup_end],
-    );
+
+    hasher.build_merkle_root(leaves[1], &path1, ONE);
 
     // build the trace
-    let (trace, aux_hints) = build_trace(hasher, 16);
+    let trace = build_trace(hasher, 16);
 
     // make sure the trace is correct
     check_selector_trace(&trace, 0, MP_VERIFY, RETURN_HASH);
@@ -178,167 +111,48 @@ fn hasher_build_merkle_root() {
     assert_eq!(node_idx_column[8], ONE);
     assert_eq!(&node_idx_column[9..], &[ZERO; 7]);
 
-    // make sure aux hints for sibling table are empty
-    assert!(aux_hints.hints().is_empty());
-    assert!(aux_hints.rows().is_empty());
-
     // --- Merkle tree with 8 leaves ------------------------------------------
 
     // build a Merkle tree
     let leaves = init_leaves(&[1, 2, 3, 4, 5, 6, 7, 8]);
-    let tree = MerkleTree::new(leaves.to_vec()).unwrap();
+    let tree = MerkleTree::new(&leaves).unwrap();
 
     // initialize the hasher and perform one Merkle branch verifications
     let mut hasher = Hasher::default();
     let path = tree.get_path(NodeIndex::new(3, 5).unwrap()).unwrap();
-    let mut lookups = Vec::new();
-    hasher.build_merkle_root(leaves[5], &path, Felt::new(5), &mut lookups);
-
-    let lookup_start_addr = 1;
-    // there should be two lookups for start and end rows of hasher operation
-    let expected_lookups_len = 2;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let expected_lookup_start = HasherLookup::new(
-        MP_VERIFY_LABEL,
-        lookup_start_addr,
-        Felt::new(5),
-        HasherLookupContext::Start,
-    );
-    let expected_lookup_end = HasherLookup::new(
-        RETURN_HASH_LABEL,
-        lookup_start_addr + 3 * HASH_CYCLE_LEN as u32 - 1,
-        ZERO,
-        HasherLookupContext::Return,
-    );
-    check_lookups_validity(
-        lookups,
-        expected_lookups_len,
-        vec![expected_lookup_start, expected_lookup_end],
-    );
+    hasher.build_merkle_root(leaves[5], &path, Felt::new(5));
 
     // build and check the trace for validity
-    let (trace, aux_hints) = build_trace(hasher, 24);
+    let trace = build_trace(hasher, 24);
     check_merkle_path(&trace, 0, leaves[5], &path, 5, MP_VERIFY);
 
-    // make sure aux hints for sibling table are empty
-    assert!(aux_hints.hints().is_empty());
-    assert!(aux_hints.rows().is_empty());
-
     // --- Merkle tree with 8 leaves (multiple branches) ----------------------
 
     // initialize the hasher and perform one Merkle branch verifications
     let mut hasher = Hasher::default();
 
     let path0 = tree.get_path(NodeIndex::new(3, 0).unwrap()).unwrap();
-    let mut lookups = Vec::new();
-    hasher.build_merkle_root(leaves[0], &path0, ZERO, &mut lookups);
-
-    let lookup_start_addr = 1;
-    // there should be two lookups for start and end rows of hasher operation
-    let expected_lookups_len = 2;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let expected_lookup_start =
-        HasherLookup::new(MP_VERIFY_LABEL, lookup_start_addr, ZERO, HasherLookupContext::Start);
-    let expected_lookup_end = HasherLookup::new(
-        RETURN_HASH_LABEL,
-        lookup_start_addr + 3 * HASH_CYCLE_LEN as u32 - 1,
-        ZERO,
-        HasherLookupContext::Return,
-    );
-    check_lookups_validity(
-        lookups,
-        expected_lookups_len,
-        vec![expected_lookup_start, expected_lookup_end],
-    );
+
+    hasher.build_merkle_root(leaves[0], &path0, ZERO);
 
     let path3 = tree.get_path(NodeIndex::new(3, 3).unwrap()).unwrap();
-    let mut lookups = Vec::new();
-    hasher.build_merkle_root(leaves[3], &path3, Felt::new(3), &mut lookups);
-
-    let lookup_start_addr = 25;
-    // there should be two lookups for start and end rows of hasher operation
-    let expected_lookups_len = 2;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let expected_lookup_start = HasherLookup::new(
-        MP_VERIFY_LABEL,
-        lookup_start_addr,
-        Felt::new(3),
-        HasherLookupContext::Start,
-    );
-    let expected_lookup_end = HasherLookup::new(
-        RETURN_HASH_LABEL,
-        lookup_start_addr + 3 * HASH_CYCLE_LEN as u32 - 1,
-        ZERO,
-        HasherLookupContext::Return,
-    );
-    check_lookups_validity(
-        lookups,
-        expected_lookups_len,
-        vec![expected_lookup_start, expected_lookup_end],
-    );
+
+    hasher.build_merkle_root(leaves[3], &path3, Felt::new(3));
 
     let path7 = tree.get_path(NodeIndex::new(3, 7).unwrap()).unwrap();
-    let mut lookups = Vec::new();
-    hasher.build_merkle_root(leaves[7], &path7, Felt::new(7), &mut lookups);
-
-    let lookup_start_addr = 49;
-    // there should be two lookups for start and end rows of hasher operation
-    let expected_lookups_len = 2;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let expected_lookup_start = HasherLookup::new(
-        MP_VERIFY_LABEL,
-        lookup_start_addr,
-        Felt::new(7),
-        HasherLookupContext::Start,
-    );
-    let expected_lookup_end = HasherLookup::new(
-        RETURN_HASH_LABEL,
-        lookup_start_addr + 3 * HASH_CYCLE_LEN as u32 - 1,
-        ZERO,
-        HasherLookupContext::Return,
-    );
-    check_lookups_validity(
-        lookups,
-        expected_lookups_len,
-        vec![expected_lookup_start, expected_lookup_end],
-    );
+
+    hasher.build_merkle_root(leaves[7], &path7, Felt::new(7));
 
     // path3 again
-    let mut lookups = Vec::new();
-    hasher.build_merkle_root(leaves[3], &path3, Felt::new(3), &mut lookups);
-
-    let lookup_start_addr = 73;
-    // there should be two lookups for start and end rows of hasher operation
-    let expected_lookups_len = 2;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let expected_lookup_start = HasherLookup::new(
-        MP_VERIFY_LABEL,
-        lookup_start_addr,
-        Felt::new(3),
-        HasherLookupContext::Start,
-    );
-    let expected_lookup_end = HasherLookup::new(
-        RETURN_HASH_LABEL,
-        lookup_start_addr + 3 * HASH_CYCLE_LEN as u32 - 1,
-        ZERO,
-        HasherLookupContext::Return,
-    );
-    check_lookups_validity(
-        lookups,
-        expected_lookups_len,
-        vec![expected_lookup_start, expected_lookup_end],
-    );
+
+    hasher.build_merkle_root(leaves[3], &path3, Felt::new(3));
 
     // build and check the trace for validity
-    let (trace, aux_hints) = build_trace(hasher, 96);
+    let trace = build_trace(hasher, 96);
     check_merkle_path(&trace, 0, leaves[0], &path0, 0, MP_VERIFY);
     check_merkle_path(&trace, 24, leaves[3], &path3, 3, MP_VERIFY);
     check_merkle_path(&trace, 48, leaves[7], &path7, 7, MP_VERIFY);
     check_merkle_path(&trace, 72, leaves[3], &path3, 3, MP_VERIFY);
-
-    // make sure aux hints for sibling table are empty
-    assert!(aux_hints.hints().is_empty());
-    assert!(aux_hints.rows().is_empty());
 }
 
 #[test]
@@ -347,78 +161,25 @@ fn hasher_update_merkle_root() {
 
     // build a Merkle tree
     let leaves = init_leaves(&[1, 2]);
-    let mut tree = MerkleTree::new(leaves.to_vec()).unwrap();
+    let mut tree = MerkleTree::new(&leaves).unwrap();
 
     // initialize the hasher and update both leaves
     let mut hasher = Hasher::default();
 
     let path0 = tree.get_path(NodeIndex::new(1, 0).unwrap()).unwrap();
     let new_leaf0 = init_leaf(3);
-    let mut lookups = Vec::new();
-    let lookup_start_addr = 1;
-    hasher.update_merkle_root(leaves[0], new_leaf0, &path0, ZERO, &mut lookups);
-    tree.update_leaf(0, new_leaf0).unwrap();
 
-    let expected_lookups_len = 4;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let expected_lookups = vec![
-        HasherLookup::new(MR_UPDATE_OLD_LABEL, lookup_start_addr, ZERO, HasherLookupContext::Start),
-        HasherLookup::new(
-            RETURN_HASH_LABEL,
-            lookup_start_addr + HASH_CYCLE_LEN as u32 - 1,
-            ZERO,
-            HasherLookupContext::Return,
-        ),
-        HasherLookup::new(
-            MR_UPDATE_NEW_LABEL,
-            lookup_start_addr + HASH_CYCLE_LEN as u32,
-            ZERO,
-            HasherLookupContext::Start,
-        ),
-        HasherLookup::new(
-            RETURN_HASH_LABEL,
-            lookup_start_addr + 2 * HASH_CYCLE_LEN as u32 - 1,
-            ZERO,
-            HasherLookupContext::Return,
-        ),
-    ];
-    check_lookups_validity(lookups, expected_lookups_len, expected_lookups);
+    hasher.update_merkle_root(leaves[0], new_leaf0, &path0, ZERO);
+    tree.update_leaf(0, new_leaf0).unwrap();
 
     let path1 = tree.get_path(NodeIndex::new(1, 1).unwrap()).unwrap();
     let new_leaf1 = init_leaf(4);
-    let mut lookups = Vec::new();
 
-    hasher.update_merkle_root(leaves[1], new_leaf1, &path1, ONE, &mut lookups);
+    hasher.update_merkle_root(leaves[1], new_leaf1, &path1, ONE);
     tree.update_leaf(1, new_leaf1).unwrap();
 
-    let lookup_start_addr = 17;
-    let expected_lookups_len = 4;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let expected_lookups = vec![
-        HasherLookup::new(MR_UPDATE_OLD_LABEL, lookup_start_addr, ONE, HasherLookupContext::Start),
-        HasherLookup::new(
-            RETURN_HASH_LABEL,
-            lookup_start_addr + HASH_CYCLE_LEN as u32 - 1,
-            ZERO,
-            HasherLookupContext::Return,
-        ),
-        HasherLookup::new(
-            MR_UPDATE_NEW_LABEL,
-            lookup_start_addr + HASH_CYCLE_LEN as u32,
-            ONE,
-            HasherLookupContext::Start,
-        ),
-        HasherLookup::new(
-            RETURN_HASH_LABEL,
-            lookup_start_addr + 2 * HASH_CYCLE_LEN as u32 - 1,
-            ZERO,
-            HasherLookupContext::Return,
-        ),
-    ];
-    check_lookups_validity(lookups, expected_lookups_len, expected_lookups);
-
     // build the trace
-    let (trace, aux_hints) = build_trace(hasher, 32);
+    let trace = build_trace(hasher, 32);
 
     // make sure the trace is correct
     check_selector_trace(&trace, 0, MR_UPDATE_OLD, RETURN_HASH);
@@ -436,195 +197,41 @@ fn hasher_update_merkle_root() {
     assert_eq!(node_idx_column[24], ONE);
     assert_eq!(&node_idx_column[25..], &[ZERO; 7]);
 
-    // make sure sibling table hints were built correctly
-    let expected_hints = vec![
-        // first update
-        (0, ChipletsVTableUpdate::SiblingAdded(0)),
-        (8, ChipletsVTableUpdate::SiblingRemoved(0)),
-        // second update
-        (16, ChipletsVTableUpdate::SiblingAdded(1)),
-        (24, ChipletsVTableUpdate::SiblingRemoved(1)),
-    ];
-    assert_eq!(expected_hints, aux_hints.hints());
-
-    let expected_sibling_rows = vec![
-        ChipletsVTableRow::new_sibling(ZERO, path0[0].into()),
-        ChipletsVTableRow::new_sibling(ONE, path1[0].into()),
-    ];
-    assert_eq!(expected_sibling_rows, aux_hints.rows());
-
     // --- Merkle tree with 8 leaves ------------------------------------------
 
     // build a Merkle tree
     let leaves = init_leaves(&[1, 2, 3, 4, 5, 6, 7, 8]);
-    let mut tree = MerkleTree::new(leaves.to_vec()).unwrap();
+    let mut tree = MerkleTree::new(&leaves).unwrap();
 
     // initialize the hasher
     let mut hasher = Hasher::default();
 
     let path3 = tree.get_path(NodeIndex::new(3, 3).unwrap()).unwrap();
     let new_leaf3 = init_leaf(23);
-    let mut lookups = Vec::new();
-    hasher.update_merkle_root(leaves[3], new_leaf3, &path3, Felt::new(3), &mut lookups);
-    tree.update_leaf(3, new_leaf3).unwrap();
 
-    let lookup_start_addr = 1;
-    let expected_lookups_len = 4;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let expected_lookups = vec![
-        HasherLookup::new(
-            MR_UPDATE_OLD_LABEL,
-            lookup_start_addr,
-            Felt::new(3),
-            HasherLookupContext::Start,
-        ),
-        HasherLookup::new(
-            RETURN_HASH_LABEL,
-            lookup_start_addr + 3 * HASH_CYCLE_LEN as u32 - 1,
-            ZERO,
-            HasherLookupContext::Return,
-        ),
-        HasherLookup::new(
-            MR_UPDATE_NEW_LABEL,
-            lookup_start_addr + 3 * HASH_CYCLE_LEN as u32,
-            Felt::new(3),
-            HasherLookupContext::Start,
-        ),
-        HasherLookup::new(
-            RETURN_HASH_LABEL,
-            lookup_start_addr + 3 * HASH_CYCLE_LEN as u32 + 3 * HASH_CYCLE_LEN as u32 - 1,
-            ZERO,
-            HasherLookupContext::Return,
-        ),
-    ];
-    check_lookups_validity(lookups, expected_lookups_len, expected_lookups);
+    hasher.update_merkle_root(leaves[3], new_leaf3, &path3, Felt::new(3));
+    tree.update_leaf(3, new_leaf3).unwrap();
 
     let path6 = tree.get_path(NodeIndex::new(3, 6).unwrap()).unwrap();
     let new_leaf6 = init_leaf(25);
-    let mut lookups = Vec::new();
-    hasher.update_merkle_root(leaves[6], new_leaf6, &path6, Felt::new(6), &mut lookups);
+    hasher.update_merkle_root(leaves[6], new_leaf6, &path6, Felt::new(6));
     tree.update_leaf(6, new_leaf6).unwrap();
 
-    let lookup_start_addr = 49;
-    let expected_lookups_len = 4;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let expected_lookups = vec![
-        HasherLookup::new(
-            MR_UPDATE_OLD_LABEL,
-            lookup_start_addr,
-            Felt::new(6),
-            HasherLookupContext::Start,
-        ),
-        HasherLookup::new(
-            RETURN_HASH_LABEL,
-            lookup_start_addr + 3 * HASH_CYCLE_LEN as u32 - 1,
-            ZERO,
-            HasherLookupContext::Return,
-        ),
-        HasherLookup::new(
-            MR_UPDATE_NEW_LABEL,
-            lookup_start_addr + 3 * HASH_CYCLE_LEN as u32,
-            Felt::new(6),
-            HasherLookupContext::Start,
-        ),
-        HasherLookup::new(
-            RETURN_HASH_LABEL,
-            lookup_start_addr + 3 * HASH_CYCLE_LEN as u32 + 3 * HASH_CYCLE_LEN as u32 - 1,
-            ZERO,
-            HasherLookupContext::Return,
-        ),
-    ];
-    check_lookups_validity(lookups, expected_lookups_len, expected_lookups);
-
     // update leaf 3 again
     let path3_2 = tree.get_path(NodeIndex::new(3, 3).unwrap()).unwrap();
     let new_leaf3_2 = init_leaf(27);
-    let mut lookups = Vec::new();
-    hasher.update_merkle_root(new_leaf3, new_leaf3_2, &path3_2, Felt::new(3), &mut lookups);
+    hasher.update_merkle_root(new_leaf3, new_leaf3_2, &path3_2, Felt::new(3));
     tree.update_leaf(3, new_leaf3_2).unwrap();
     assert_ne!(path3, path3_2);
 
-    let lookup_start_addr = 97;
-    let expected_lookups_len = 4;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let expected_lookups = vec![
-        HasherLookup::new(
-            MR_UPDATE_OLD_LABEL,
-            lookup_start_addr,
-            Felt::new(3),
-            HasherLookupContext::Start,
-        ),
-        HasherLookup::new(
-            RETURN_HASH_LABEL,
-            lookup_start_addr + 3 * HASH_CYCLE_LEN as u32 - 1,
-            ZERO,
-            HasherLookupContext::Return,
-        ),
-        HasherLookup::new(
-            MR_UPDATE_NEW_LABEL,
-            lookup_start_addr + 3 * HASH_CYCLE_LEN as u32,
-            Felt::new(3),
-            HasherLookupContext::Start,
-        ),
-        HasherLookup::new(
-            RETURN_HASH_LABEL,
-            lookup_start_addr + 3 * HASH_CYCLE_LEN as u32 + 3 * HASH_CYCLE_LEN as u32 - 1,
-            ZERO,
-            HasherLookupContext::Return,
-        ),
-    ];
-    check_lookups_validity(lookups, expected_lookups_len, expected_lookups);
-
     // build and check the trace for validity
-    let (trace, aux_hints) = build_trace(hasher, 144);
+    let trace = build_trace(hasher, 144);
     check_merkle_path(&trace, 0, leaves[3], &path3, 3, MR_UPDATE_OLD);
     check_merkle_path(&trace, 24, new_leaf3, &path3, 3, MR_UPDATE_NEW);
     check_merkle_path(&trace, 48, leaves[6], &path6, 6, MR_UPDATE_OLD);
     check_merkle_path(&trace, 72, new_leaf6, &path6, 6, MR_UPDATE_NEW);
     check_merkle_path(&trace, 96, new_leaf3, &path3_2, 3, MR_UPDATE_OLD);
     check_merkle_path(&trace, 120, new_leaf3_2, &path3_2, 3, MR_UPDATE_NEW);
-
-    // make sure sibling table hints were built correctly
-    let expected_hints = vec![
-        // first update
-        (0, ChipletsVTableUpdate::SiblingAdded(0)),
-        (8, ChipletsVTableUpdate::SiblingAdded(1)),
-        (16, ChipletsVTableUpdate::SiblingAdded(2)),
-        (24, ChipletsVTableUpdate::SiblingRemoved(0)),
-        (32, ChipletsVTableUpdate::SiblingRemoved(1)),
-        (40, ChipletsVTableUpdate::SiblingRemoved(2)),
-        // second update
-        (48, ChipletsVTableUpdate::SiblingAdded(3)),
-        (56, ChipletsVTableUpdate::SiblingAdded(4)),
-        (64, ChipletsVTableUpdate::SiblingAdded(5)),
-        (72, ChipletsVTableUpdate::SiblingRemoved(3)),
-        (80, ChipletsVTableUpdate::SiblingRemoved(4)),
-        (88, ChipletsVTableUpdate::SiblingRemoved(5)),
-        // third update
-        (96, ChipletsVTableUpdate::SiblingAdded(6)),
-        (104, ChipletsVTableUpdate::SiblingAdded(7)),
-        (112, ChipletsVTableUpdate::SiblingAdded(8)),
-        (120, ChipletsVTableUpdate::SiblingRemoved(6)),
-        (128, ChipletsVTableUpdate::SiblingRemoved(7)),
-        (136, ChipletsVTableUpdate::SiblingRemoved(8)),
-    ];
-    assert_eq!(expected_hints, aux_hints.hints());
-
-    let expected_sibling_rows = vec![
-        // first update
-        ChipletsVTableRow::new_sibling(Felt::new(3), path3[0].into()),
-        ChipletsVTableRow::new_sibling(Felt::new(3 >> 1), path3[1].into()),
-        ChipletsVTableRow::new_sibling(Felt::new(3 >> 2), path3[2].into()),
-        // second update
-        ChipletsVTableRow::new_sibling(Felt::new(6), path6[0].into()),
-        ChipletsVTableRow::new_sibling(Felt::new(6 >> 1), path6[1].into()),
-        ChipletsVTableRow::new_sibling(Felt::new(6 >> 2), path6[2].into()),
-        // third update
-        ChipletsVTableRow::new_sibling(Felt::new(3), path3_2[0].into()),
-        ChipletsVTableRow::new_sibling(Felt::new(3 >> 1), path3_2[1].into()),
-        ChipletsVTableRow::new_sibling(Felt::new(3 >> 2), path3_2[2].into()),
-    ];
-    assert_eq!(expected_sibling_rows, aux_hints.rows());
 }
 
 // MEMOIZATION TESTS
@@ -659,23 +266,8 @@ fn hash_memoization_control_blocks() {
 
     let expected_hash = join_block.hash();
 
-    let mut lookups = Vec::new();
     // builds the trace of the join block.
-    let (_, final_state) =
-        hasher.hash_control_block(h1, h2, join_block.domain(), expected_hash, &mut lookups);
-
-    let lookup_start_addr = 1;
-    let expected_lookups_len = 2;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let lookup_start =
-        HasherLookup::new(LINEAR_HASH_LABEL, lookup_start_addr, ZERO, HasherLookupContext::Start);
-    let lookup_end = HasherLookup::new(
-        RETURN_HASH_LABEL,
-        lookup_start_addr + HASH_CYCLE_LEN as u32 - 1,
-        ZERO,
-        HasherLookupContext::Return,
-    );
-    check_lookups_validity(lookups, expected_lookups_len, vec![lookup_start, lookup_end]);
+    let (_, final_state) = hasher.hash_control_block(h1, h2, join_block.domain(), expected_hash);
 
     // make sure the hash of the final state is the same as the expected hash.
     assert_eq!(Digest::new(final_state), expected_hash);
@@ -693,23 +285,9 @@ fn hash_memoization_control_blocks() {
 
     let expected_hash = split1_block.hash();
 
-    let mut lookups = Vec::new();
     // builds the hash execution trace of the first split block from scratch.
     let (addr, final_state) =
-        hasher.hash_control_block(h1, h2, split1_block.domain(), expected_hash, &mut lookups);
-
-    let lookup_start_addr = 9;
-    let expected_lookups_len = 2;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let lookup_start =
-        HasherLookup::new(LINEAR_HASH_LABEL, lookup_start_addr, ZERO, HasherLookupContext::Start);
-    let lookup_end = HasherLookup::new(
-        RETURN_HASH_LABEL,
-        lookup_start_addr + HASH_CYCLE_LEN as u32 - 1,
-        ZERO,
-        HasherLookupContext::Return,
-    );
-    check_lookups_validity(lookups, expected_lookups_len, vec![lookup_start, lookup_end]);
+        hasher.hash_control_block(h1, h2, split1_block.domain(), expected_hash);
 
     let first_block_final_state = final_state;
 
@@ -732,24 +310,10 @@ fn hash_memoization_control_blocks() {
         .expect("Could not convert slice to array");
     let expected_hash = split2_block.hash();
 
-    let mut lookups = Vec::new();
     // builds the hash execution trace of the second split block by copying it from the trace of
     // the first split block.
     let (addr, final_state) =
-        hasher.hash_control_block(h1, h2, split2_block.domain(), expected_hash, &mut lookups);
-
-    let lookup_start_addr = 17;
-    let expected_lookups_len = 2;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let lookup_start =
-        HasherLookup::new(LINEAR_HASH_LABEL, lookup_start_addr, ZERO, HasherLookupContext::Start);
-    let lookup_end = HasherLookup::new(
-        RETURN_HASH_LABEL,
-        lookup_start_addr + HASH_CYCLE_LEN as u32 - 1,
-        ZERO,
-        HasherLookupContext::Return,
-    );
-    check_lookups_validity(lookups, expected_lookups_len, vec![lookup_start, lookup_end]);
+        hasher.hash_control_block(h1, h2, split2_block.domain(), expected_hash);
 
     // make sure the hash of the final state of the second split block is the same as the expected
     // hash.
@@ -760,7 +324,7 @@ fn hash_memoization_control_blocks() {
     let copied_start_row = addr.as_int() as usize - 1;
     let copied_end_row = hasher.trace_len() - 1;
 
-    let (trace, _) = build_trace(hasher, copied_end_row + 1);
+    let trace = build_trace(hasher, copied_end_row + 1);
 
     //  check the row address at which memoized block starts.
     let hash_cycle_len: u64 = HASH_CYCLE_LEN.try_into().expect("Could not convert usize to u64");
@@ -857,23 +421,9 @@ fn hash_memoization_span_blocks_check(span_block: CodeBlock) {
         .expect("Could not convert slice to array");
     let expected_hash = join1_block.hash();
 
-    let mut lookups = Vec::new();
     // builds the trace of the Join1 block.
-    let (_, final_state) =
-        hasher.hash_control_block(h1, h2, join1_block.domain(), expected_hash, &mut lookups);
-
-    let lookup_start_addr = 1;
-    let expected_lookups_len = 2;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let lookup_start =
-        HasherLookup::new(LINEAR_HASH_LABEL, lookup_start_addr, ZERO, HasherLookupContext::Start);
-    let lookup_end = HasherLookup::new(
-        RETURN_HASH_LABEL,
-        lookup_start_addr + HASH_CYCLE_LEN as u32 - 1,
-        ZERO,
-        HasherLookupContext::Return,
-    );
-    check_lookups_validity(lookups, expected_lookups_len, vec![lookup_start, lookup_end]);
+    let (_, final_state) = hasher.hash_control_block(h1, h2, join1_block.domain(), expected_hash);
+
     // make sure the hash of the final state of Join1 is the same as the expected hash.
     assert_eq!(Digest::new(final_state), expected_hash);
 
@@ -889,22 +439,7 @@ fn hash_memoization_span_blocks_check(span_block: CodeBlock) {
         .expect("Could not convert slice to array");
     let expected_hash = join2_block.hash();
 
-    let mut lookups = Vec::new();
-    let (_, final_state) =
-        hasher.hash_control_block(h1, h2, join2_block.domain(), expected_hash, &mut lookups);
-
-    let lookup_start_addr = 9;
-    let expected_lookups_len = 2;
-    // make sure the lookups have correct labels, addresses, indices and contexts.
-    let lookup_start =
-        HasherLookup::new(LINEAR_HASH_LABEL, lookup_start_addr, ZERO, HasherLookupContext::Start);
-    let lookup_end = HasherLookup::new(
-        RETURN_HASH_LABEL,
-        lookup_start_addr + HASH_CYCLE_LEN as u32 - 1,
-        ZERO,
-        HasherLookupContext::Return,
-    );
-    check_lookups_validity(lookups, expected_lookups_len, vec![lookup_start, lookup_end]);
+    let (_, final_state) = hasher.hash_control_block(h1, h2, join2_block.domain(), expected_hash);
 
     // make sure the hash of the final state of Join2 is the same as the expected hash.
     assert_eq!(Digest::new(final_state), expected_hash);
@@ -916,46 +451,10 @@ fn hash_memoization_span_blocks_check(span_block: CodeBlock) {
     };
 
     // builds the hash execution trace of the first span block from scratch.
-    let mut lookups = Vec::new();
     let (addr, final_state) =
-        hasher.hash_span_block(span1_block_val.op_batches(), span1_block.hash(), &mut lookups);
-
-    let num_batches = span1_block_val.op_batches().len();
-    let lookup_start_addr = 17;
-
-    let expected_lookups_len = 2 + num_batches - 1;
-
-    let mut expected_lookups = Vec::new();
-
-    // add lookup for start of span block
-    let lookup_start =
-        HasherLookup::new(LINEAR_HASH_LABEL, lookup_start_addr, ZERO, HasherLookupContext::Start);
-    expected_lookups.push(lookup_start);
+        hasher.hash_span_block(span1_block_val.op_batches(), span1_block.hash());
 
-    // add lookups for absorbed batches
-    for i in 1..num_batches {
-        let lookup = HasherLookup::new(
-            LINEAR_HASH_LABEL,
-            lookup_start_addr + (i * HASH_CYCLE_LEN) as u32 - 1,
-            ZERO,
-            HasherLookupContext::Absorb,
-        );
-        expected_lookups.push(lookup);
-    }
-
-    let last_lookup_addr_memoized_block =
-        lookup_start_addr + (num_batches * HASH_CYCLE_LEN) as u32 - 1;
-
-    // add lookup for end of span block
-    let lookup_end = HasherLookup::new(
-        RETURN_HASH_LABEL,
-        last_lookup_addr_memoized_block,
-        ZERO,
-        HasherLookupContext::Return,
-    );
-    expected_lookups.push(lookup_end);
-
-    check_lookups_validity(lookups, expected_lookups_len, expected_lookups);
+    let _num_batches = span1_block_val.op_batches().len();
 
     let first_span_block_final_state = final_state;
 
@@ -972,45 +471,12 @@ fn hash_memoization_span_blocks_check(span_block: CodeBlock) {
         unreachable!()
     };
 
-    let mut lookups = Vec::new();
     // builds the hash execution trace of the second span block by copying the sections of the
     // trace corresponding to the first span block with the same hash.
     let (addr, final_state) =
-        hasher.hash_span_block(span2_block_val.op_batches(), span2_block.hash(), &mut lookups);
-
-    let num_batches = span2_block_val.op_batches().len();
-    let lookup_start_addr = last_lookup_addr_memoized_block + 1;
-
-    let expected_lookups_len = 2 + num_batches - 1;
+        hasher.hash_span_block(span2_block_val.op_batches(), span2_block.hash());
 
-    let mut expected_lookups = Vec::new();
-
-    // add lookup for start of span block
-    let lookup_start =
-        HasherLookup::new(LINEAR_HASH_LABEL, lookup_start_addr, ZERO, HasherLookupContext::Start);
-    expected_lookups.push(lookup_start);
-
-    // add lookups for absorbed batches
-    for i in 1..num_batches {
-        let lookup = HasherLookup::new(
-            LINEAR_HASH_LABEL,
-            lookup_start_addr + (i * HASH_CYCLE_LEN) as u32 - 1,
-            ZERO,
-            HasherLookupContext::Absorb,
-        );
-        expected_lookups.push(lookup);
-    }
-
-    // add lookup for end of span block
-    let lookup_end = HasherLookup::new(
-        RETURN_HASH_LABEL,
-        lookup_start_addr + (num_batches * HASH_CYCLE_LEN) as u32 - 1,
-        ZERO,
-        HasherLookupContext::Return,
-    );
-    expected_lookups.push(lookup_end);
-
-    check_lookups_validity(lookups, expected_lookups_len, expected_lookups);
+    let _num_batches = span2_block_val.op_batches().len();
 
     let expected_hash = span2_block.hash();
     // make sure the hash of the final state of Span2 block is the same as the expected hash.
@@ -1022,7 +488,7 @@ fn hash_memoization_span_blocks_check(span_block: CodeBlock) {
     let copied_start_row = addr.as_int() as usize - 1;
     let copied_end_row = hasher.trace_len() - 1;
 
-    let (trace, _) = build_trace(hasher, copied_end_row + 1);
+    let trace = build_trace(hasher, copied_end_row + 1);
 
     // check correct copy after memoization
     check_memoized_trace(&trace, start_row, end_row, copied_start_row, copied_end_row);
@@ -1033,11 +499,11 @@ fn hash_memoization_span_blocks_check(span_block: CodeBlock) {
 
 /// Builds an execution trace for the provided hasher. The trace must have the number of rows
 /// specified by num_rows.
-fn build_trace(hasher: Hasher, num_rows: usize) -> (Vec<Vec<Felt>>, ChipletsVTableTraceBuilder) {
+fn build_trace(hasher: Hasher, num_rows: usize) -> Vec<Vec<Felt>> {
     let mut trace = (0..TRACE_WIDTH).map(|_| vec![ZERO; num_rows]).collect::<Vec<_>>();
     let mut fragment = TraceFragment::trace_to_fragment(&mut trace);
-    let aux_trace_builder = hasher.fill_trace(&mut fragment);
-    (trace, aux_trace_builder)
+    hasher.fill_trace(&mut fragment);
+    trace
 }
 
 /// Makes sure that the provided trace is consistent with verifying the specified Merkle path
@@ -1145,24 +611,6 @@ fn check_memoized_trace(
     }
 }
 
-/// Makes sure the lookups are built correctly.
-fn check_lookups_validity(
-    lookups: Vec<HasherLookup>,
-    expected_lookups_length: usize,
-    expected_lookups: Vec<HasherLookup>,
-) {
-    // make sure the length of the lookups is what we expect.
-    assert_eq!(expected_lookups_length, lookups.len());
-
-    // make sure the length of lookups and expected lookups is same.
-    assert_eq!(expected_lookups.len(), lookups.len());
-
-    for (lookup, expected_lookup) in lookups.iter().zip(expected_lookups) {
-        // make sure the lookups match with what we expect.
-        assert_eq!(expected_lookup, *lookup);
-    }
-}
-
 /// Makes sure that a row in the provided trace is equal to the provided values at the specified
 /// row index.
 fn assert_row_equal(trace: &[Vec<Felt>], row_idx: usize, values: &[Felt]) {
diff --git a/processor/src/chiplets/hasher/trace.rs b/processor/src/chiplets/hasher/trace.rs
index 2efa840104..e19174907a 100644
--- a/processor/src/chiplets/hasher/trace.rs
+++ b/processor/src/chiplets/hasher/trace.rs
@@ -1,4 +1,5 @@
-use super::{Felt, HasherState, Selectors, TraceFragment, Vec, STATE_WIDTH, TRACE_WIDTH, ZERO};
+use super::{Felt, HasherState, Selectors, TraceFragment, STATE_WIDTH, TRACE_WIDTH, ZERO};
+use crate::utils::collections::*;
 use core::ops::Range;
 use miden_air::trace::chiplets::hasher::NUM_ROUNDS;
 use vm_core::chiplets::hasher::apply_round;
diff --git a/processor/src/chiplets/kernel_rom/mod.rs b/processor/src/chiplets/kernel_rom/mod.rs
index ee2a4a2422..8e5c0100ae 100644
--- a/processor/src/chiplets/kernel_rom/mod.rs
+++ b/processor/src/chiplets/kernel_rom/mod.rs
@@ -1,8 +1,5 @@
-use super::{
-    trace::LookupTableRow, BTreeMap, ChipletsBus, ChipletsVTableTraceBuilder, ColMatrix, Digest,
-    ExecutionError, Felt, FieldElement, Kernel, TraceFragment, Word, ONE, ZERO,
-};
-use miden_air::trace::chiplets::kernel_rom::{KERNEL_PROC_LABEL, TRACE_WIDTH};
+use super::{BTreeMap, Digest, ExecutionError, Felt, Kernel, TraceFragment, Word, ONE, ZERO};
+use miden_air::trace::chiplets::kernel_rom::TRACE_WIDTH;
 
 #[cfg(test)]
 mod tests;
@@ -99,13 +96,7 @@ impl KernelRom {
     // --------------------------------------------------------------------------------------------
 
     /// Populates the provided execution trace fragment with execution trace of this kernel ROM.
-    pub fn fill_trace(
-        self,
-        trace: &mut TraceFragment,
-        chiplets_bus: &mut ChipletsBus,
-        virtual_table: &mut ChipletsVTableTraceBuilder,
-        kernel_rom_start_row: usize,
-    ) {
+    pub fn fill_trace(self, trace: &mut TraceFragment) {
         debug_assert_eq!(TRACE_WIDTH, trace.width(), "inconsistent trace fragment width");
         let mut row = 0;
         for (idx, access_info) in self.access_map.values().enumerate() {
@@ -113,21 +104,13 @@ impl KernelRom {
 
             // write at least one row into the trace for each kernel procedure
             access_info.write_into_trace(trace, row, idx);
-            // add the procedure to the virtual table
-            virtual_table.add_kernel_proc(row as u32, idx, access_info.proc_hash);
 
-            // provide the kernel procedure to the chiplets bus, if it was accessed at least once
-            let lookup = KernelProcLookup::new(access_info.proc_hash);
-            if access_info.num_accesses >= 1 {
-                chiplets_bus.provide_kernel_proc_call(lookup, (kernel_rom_start_row + row) as u32);
-            }
             row += 1;
 
             // if the procedure was accessed more than once, we need write a row and provide the
             // procedure to the bus per additional access
             for _ in 1..access_info.num_accesses {
                 access_info.write_into_trace(trace, row, idx);
-                chiplets_bus.provide_kernel_proc_call(lookup, (kernel_rom_start_row + row) as u32);
                 row += 1;
             }
         }
@@ -171,33 +154,3 @@ impl ProcAccessInfo {
         trace.set(row, 5, self.proc_hash[3]);
     }
 }
-
-// KERNEL ROM PROCEDURE LOOKUPS
-// ================================================================================================
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct KernelProcLookup {
-    proc_hash: Word,
-}
-
-impl KernelProcLookup {
-    pub fn new(proc_hash: Word) -> Self {
-        Self { proc_hash }
-    }
-}
-
-impl LookupTableRow for KernelProcLookup {
-    /// Reduces this row to a single field element in the field specified by E. This requires
-    /// at least 6 alpha values.
-    fn to_value<E: FieldElement<BaseField = Felt>>(
-        &self,
-        _main_trace: &ColMatrix<Felt>,
-        alphas: &[E],
-    ) -> E {
-        alphas[0]
-            + alphas[1].mul_base(KERNEL_PROC_LABEL)
-            + alphas[2].mul_base(self.proc_hash[0])
-            + alphas[3].mul_base(self.proc_hash[1])
-            + alphas[4].mul_base(self.proc_hash[2])
-            + alphas[5].mul_base(self.proc_hash[3])
-    }
-}
diff --git a/processor/src/chiplets/kernel_rom/tests.rs b/processor/src/chiplets/kernel_rom/tests.rs
index dfe8b89d89..a2cf510b60 100644
--- a/processor/src/chiplets/kernel_rom/tests.rs
+++ b/processor/src/chiplets/kernel_rom/tests.rs
@@ -1,12 +1,5 @@
-use super::{
-    ChipletsBus, Felt, Kernel, KernelProcLookup, KernelRom, TraceFragment, Word, ONE, TRACE_WIDTH,
-    ZERO,
-};
-use crate::chiplets::{
-    aux_trace::{ChipletLookup, ChipletsBusRow},
-    ChipletsVTableTraceBuilder,
-};
-use vm_core::utils::collections::Vec;
+use super::{Felt, Kernel, KernelRom, TraceFragment, Word, ONE, TRACE_WIDTH, ZERO};
+use vm_core::utils::collections::*;
 
 // CONSTANTS
 // ================================================================================================
@@ -17,19 +10,6 @@ const PROC2_HASH: Word = [ONE, ONE, ONE, ONE];
 // TESTS
 // ================================================================================================
 
-#[test]
-fn kernel_rom_empty() {
-    let kernel = Kernel::default();
-    let rom = KernelRom::new(kernel);
-    assert_eq!(0, rom.trace_len());
-
-    // generate trace
-    let (_, _, virtual_table) = build_trace(rom, 0);
-
-    // make sure the chiplets table includes no kernel procedures
-    verify_proc_table(&virtual_table, &[]);
-}
-
 #[test]
 fn kernel_rom_invalid_access() {
     let kernel = build_kernel();
@@ -51,7 +31,7 @@ fn kernel_rom_no_access() {
     assert_eq!(expected_trace_len, rom.trace_len());
 
     // generate trace
-    let (trace, _, virtual_table) = build_trace(rom, expected_trace_len);
+    let trace = build_trace(rom, expected_trace_len);
 
     // first row of the trace should correspond to the first procedure
     let row = 0;
@@ -72,9 +52,6 @@ fn kernel_rom_no_access() {
     assert_eq!(trace[3][row], PROC2_HASH[1]);
     assert_eq!(trace[4][row], PROC2_HASH[2]);
     assert_eq!(trace[5][row], PROC2_HASH[3]);
-
-    // make sure the chiplets table includes each kernel procedure exactly once
-    verify_proc_table(&virtual_table, &[PROC1_HASH, PROC2_HASH]);
 }
 
 #[test]
@@ -93,7 +70,7 @@ fn kernel_rom_with_access() {
     assert_eq!(expected_trace_len, rom.trace_len());
 
     // generate trace
-    let (trace, chiplets_bus, virtual_table) = build_trace(rom, expected_trace_len);
+    let trace = build_trace(rom, expected_trace_len);
 
     // first 3 rows of the trace should correspond to the first procedure
     for row in 0..3 {
@@ -114,19 +91,6 @@ fn kernel_rom_with_access() {
         assert_eq!(trace[4][row], PROC2_HASH[2]);
         assert_eq!(trace[5][row], PROC2_HASH[3]);
     }
-
-    // make sure the lookups were sent to the bus correctly from the kernel rom chiplet
-    let proc1_lookup = KernelProcLookup::new(PROC1_HASH);
-    let proc2_lookup = KernelProcLookup::new(PROC2_HASH);
-
-    verify_bus(&chiplets_bus, 0, 0, &proc1_lookup);
-    verify_bus(&chiplets_bus, 1, 1, &proc1_lookup);
-    verify_bus(&chiplets_bus, 2, 2, &proc1_lookup);
-    verify_bus(&chiplets_bus, 3, 3, &proc2_lookup);
-    verify_bus(&chiplets_bus, 4, 4, &proc2_lookup);
-
-    // make sure the chiplets table includes each kernel procedure exactly once
-    verify_proc_table(&virtual_table, &[PROC1_HASH, PROC2_HASH]);
 }
 
 // HELPER FUNCTIONS
@@ -134,51 +98,15 @@ fn kernel_rom_with_access() {
 
 /// Creates a kernel with two dummy procedures
 fn build_kernel() -> Kernel {
-    Kernel::new(&[PROC1_HASH.into(), PROC2_HASH.into()])
+    Kernel::new(&[PROC1_HASH.into(), PROC2_HASH.into()]).unwrap()
 }
 
 /// Builds a trace of the specified length and fills it with data from the provided KernelRom
 /// instance.
-fn build_trace(
-    kernel_rom: KernelRom,
-    num_rows: usize,
-) -> (Vec<Vec<Felt>>, ChipletsBus, ChipletsVTableTraceBuilder) {
-    let mut chiplets_bus = ChipletsBus::default();
-    let mut virtual_table = ChipletsVTableTraceBuilder::default();
+fn build_trace(kernel_rom: KernelRom, num_rows: usize) -> Vec<Vec<Felt>> {
     let mut trace = (0..TRACE_WIDTH).map(|_| vec![ZERO; num_rows]).collect::<Vec<_>>();
     let mut fragment = TraceFragment::trace_to_fragment(&mut trace);
-    kernel_rom.fill_trace(&mut fragment, &mut chiplets_bus, &mut virtual_table, 0);
-
-    (trace, chiplets_bus, virtual_table)
-}
-
-/// Verifies that the chiplet bus received the specified KernelProcLookup response at `cycle` which
-/// was added to the list of responses at `index`.
-fn verify_bus(
-    chiplets_bus: &ChipletsBus,
-    index: usize,
-    cycle: u32,
-    proc_lookup: &KernelProcLookup,
-) {
-    let expected_lookup = ChipletLookup::KernelRom(*proc_lookup);
-    let expected_hint = ChipletsBusRow::new(&[], Some(index as u32));
-
-    let lookup = chiplets_bus.get_response_row(index);
-    let hint = chiplets_bus.get_lookup_hint(cycle).unwrap();
-
-    assert_eq!(expected_lookup, lookup);
-    assert_eq!(&expected_hint, hint);
-}
+    kernel_rom.fill_trace(&mut fragment);
 
-/// Verifies that the kernel procedure table contains every procedure in the kernel exactly once.
-fn verify_proc_table(virtual_table: &ChipletsVTableTraceBuilder, proc_hashes: &[Word]) {
-    // these tests are only for the kernel rom chiplet, so the virtual table should not be used by
-    // other chiplets in these cases
-    assert_eq!(virtual_table.rows().len(), proc_hashes.len());
-    for (row, proc_hash) in virtual_table.rows().iter().zip(proc_hashes) {
-        assert!(row.kernel_proc().is_some());
-        if let Some(proc) = row.kernel_proc() {
-            assert_eq!(proc.proc_hash(), *proc_hash);
-        }
-    }
+    trace
 }
diff --git a/processor/src/chiplets/memory/mod.rs b/processor/src/chiplets/memory/mod.rs
index 98a6a8676b..a2a522b44e 100644
--- a/processor/src/chiplets/memory/mod.rs
+++ b/processor/src/chiplets/memory/mod.rs
@@ -1,9 +1,8 @@
 use super::{
-    trace::LookupTableRow,
     utils::{split_element_u32_into_u16, split_u32_into_u16},
-    BTreeMap, ChipletsBus, ColMatrix, Felt, FieldElement, RangeChecker, StarkField, TraceFragment,
-    Vec, Word, EMPTY_WORD, ONE,
+    Felt, FieldElement, RangeChecker, TraceFragment, Word, EMPTY_WORD, ONE,
 };
+use crate::{system::ContextId, utils::collections::*};
 use miden_air::trace::chiplets::memory::{
     ADDR_COL_IDX, CLK_COL_IDX, CTX_COL_IDX, D0_COL_IDX, D1_COL_IDX, D_INV_COL_IDX, V_COL_RANGE,
 };
@@ -74,7 +73,7 @@ const INIT_MEM_VALUE: Word = EMPTY_WORD;
 #[derive(Default)]
 pub struct Memory {
     /// Memory segment traces sorted by their execution context ID.
-    trace: BTreeMap<u32, MemorySegmentTrace>,
+    trace: BTreeMap<ContextId, MemorySegmentTrace>,
 
     /// Total number of entries in the trace (across all contexts); tracked separately so that we
     /// don't have to sum up lengths of all address trace vectors for all contexts all the time.
@@ -96,7 +95,7 @@ impl Memory {
     ///
     /// Unlike read() which modifies the memory access trace, this method returns the value at the
     /// specified address (if one exists) without altering the memory access trace.
-    pub fn get_value(&self, ctx: u32, addr: u32) -> Option<Word> {
+    pub fn get_value(&self, ctx: ContextId, addr: u32) -> Option<Word> {
         match self.trace.get(&ctx) {
             Some(segment) => segment.get_value(addr),
             None => None,
@@ -105,7 +104,7 @@ impl Memory {
 
     /// Returns the word at the specified context/address which should be used as the "old value" for a
     /// write request. It will be the previously stored value, if one exists, or initialized memory.
-    pub fn get_old_value(&self, ctx: u32, addr: u32) -> Word {
+    pub fn get_old_value(&self, ctx: ContextId, addr: u32) -> Word {
         // get the stored word or return [0, 0, 0, 0], since the memory is initialized with zeros
         self.get_value(ctx, addr).unwrap_or(INIT_MEM_VALUE)
     }
@@ -113,7 +112,7 @@ impl Memory {
     /// Returns the entire memory state for the specified execution context at the specified cycle.
     /// The state is returned as a vector of (address, value) tuples, and includes addresses which
     /// have been accessed at least once.
-    pub fn get_state_at(&self, ctx: u32, clk: u32) -> Vec<(u64, Word)> {
+    pub fn get_state_at(&self, ctx: ContextId, clk: u32) -> Vec<(u64, Word)> {
         if clk == 0 {
             return vec![];
         }
@@ -131,13 +130,13 @@ impl Memory {
     ///
     /// If the specified address hasn't been previously written to, four ZERO elements are
     /// returned. This effectively implies that memory is initialized to ZERO.
-    pub fn read(&mut self, ctx: u32, addr: u32, clk: u32) -> Word {
+    pub fn read(&mut self, ctx: ContextId, addr: u32, clk: u32) -> Word {
         self.num_trace_rows += 1;
         self.trace.entry(ctx).or_default().read(addr, Felt::from(clk))
     }
 
     /// Writes the provided word at the specified context/address.
-    pub fn write(&mut self, ctx: u32, addr: u32, clk: u32, value: Word) {
+    pub fn write(&mut self, ctx: ContextId, addr: u32, clk: u32, value: Word) {
         self.num_trace_rows += 1;
         self.trace.entry(ctx).or_default().write(addr, Felt::from(clk), value);
     }
@@ -168,7 +167,7 @@ impl Memory {
 
                     // compute delta as difference between context IDs, addresses, or clock cycles
                     let delta = if prev_ctx != ctx {
-                        (ctx - prev_ctx) as u64
+                        (u32::from(ctx) - u32::from(prev_ctx)).into()
                     } else if prev_addr != addr {
                         (addr - prev_addr) as u64
                     } else {
@@ -189,12 +188,7 @@ impl Memory {
     }
 
     /// Fills the provided trace fragment with trace data from this memory instance.
-    pub fn fill_trace(
-        self,
-        trace: &mut TraceFragment,
-        chiplets_bus: &mut ChipletsBus,
-        memory_start_row: usize,
-    ) {
+    pub fn fill_trace(self, trace: &mut TraceFragment) {
         debug_assert_eq!(self.trace_len(), trace.len(), "inconsistent trace lengths");
 
         // set the pervious address and clock cycle to the first address and clock cycle of the
@@ -244,17 +238,6 @@ impl Memory {
                     // TODO: switch to batch inversion to improve efficiency.
                     trace.set(row, D_INV_COL_IDX, delta.inv());
 
-                    // provide the memory access data to the chiplets bus.
-                    let memory_lookup = MemoryLookup::new(
-                        memory_access.op_label(),
-                        ctx,
-                        Felt::from(addr),
-                        clk,
-                        value,
-                    );
-                    chiplets_bus
-                        .provide_memory_operation(memory_lookup, (memory_start_row + row) as u32);
-
                     // update values for the next iteration of the loop
                     prev_ctx = ctx;
                     prev_addr = felt_addr;
@@ -270,7 +253,7 @@ impl Memory {
 
     /// Returns the context, address, and clock cycle of the first trace row, or None if the trace
     /// is empty.
-    fn get_first_row_info(&self) -> Option<(u32, u32, Felt)> {
+    fn get_first_row_info(&self) -> Option<(ContextId, u32, Felt)> {
         let (ctx, segment) = match self.trace.iter().next() {
             Some((&ctx, segment)) => (ctx, segment),
             None => return None,
@@ -290,62 +273,3 @@ impl Memory {
         self.trace.iter().fold(0, |acc, (_, s)| acc + s.size())
     }
 }
-
-// MEMORY LOOKUPS
-// ================================================================================================
-
-/// Contains the data required to describe a memory read or write.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct MemoryLookup {
-    // unique label identifying the memory operation
-    label: u8,
-    ctx: Felt,
-    addr: Felt,
-    clk: Felt,
-    word: Word,
-}
-
-impl MemoryLookup {
-    pub fn new(label: u8, ctx: Felt, addr: Felt, clk: Felt, word: Word) -> Self {
-        Self {
-            label,
-            ctx,
-            addr,
-            clk,
-            word,
-        }
-    }
-
-    pub fn from_ints(label: u8, ctx: u32, addr: u32, clk: u32, word: Word) -> Self {
-        Self {
-            label,
-            ctx: Felt::from(ctx),
-            addr: Felt::from(addr),
-            clk: Felt::from(clk),
-            word,
-        }
-    }
-}
-
-impl LookupTableRow for MemoryLookup {
-    /// Reduces this row to a single field element in the field specified by E. This requires
-    /// at least 9 alpha values.
-    fn to_value<E: FieldElement<BaseField = Felt>>(
-        &self,
-        _main_trace: &ColMatrix<Felt>,
-        alphas: &[E],
-    ) -> E {
-        let word_value = self
-            .word
-            .iter()
-            .enumerate()
-            .fold(E::ZERO, |acc, (j, element)| acc + alphas[j + 5].mul_base(*element));
-
-        alphas[0]
-            + alphas[1].mul_base(Felt::from(self.label))
-            + alphas[2].mul_base(self.ctx)
-            + alphas[3].mul_base(self.addr)
-            + alphas[4].mul_base(self.clk)
-            + word_value
-    }
-}
diff --git a/processor/src/chiplets/memory/segment.rs b/processor/src/chiplets/memory/segment.rs
index 15ceb760ec..e6ee97210c 100644
--- a/processor/src/chiplets/memory/segment.rs
+++ b/processor/src/chiplets/memory/segment.rs
@@ -1,9 +1,9 @@
 use miden_air::trace::chiplets::memory::{
-    Selectors, MEMORY_COPY_READ, MEMORY_INIT_READ, MEMORY_READ_LABEL, MEMORY_WRITE,
-    MEMORY_WRITE_LABEL,
+    Selectors, MEMORY_COPY_READ, MEMORY_INIT_READ, MEMORY_WRITE,
 };
 
-use super::{BTreeMap, Felt, StarkField, Vec, Word, INIT_MEM_VALUE};
+use super::{Felt, Word, INIT_MEM_VALUE};
+use crate::utils::collections::*;
 
 // MEMORY SEGMENT TRACE
 // ================================================================================================
@@ -164,14 +164,6 @@ impl MemorySegmentAccess {
         }
     }
 
-    /// Returns the operation label of the memory operation used in this memory access.
-    pub(super) fn op_label(&self) -> u8 {
-        match self.op {
-            MemoryOperation::InitRead | MemoryOperation::CopyRead => MEMORY_READ_LABEL,
-            MemoryOperation::Write => MEMORY_WRITE_LABEL,
-        }
-    }
-
     /// Returns the word value for this memory access.
     pub(super) fn value(&self) -> Word {
         self.value
diff --git a/processor/src/chiplets/memory/tests.rs b/processor/src/chiplets/memory/tests.rs
index 3e33bfdf05..ebb98db017 100644
--- a/processor/src/chiplets/memory/tests.rs
+++ b/processor/src/chiplets/memory/tests.rs
@@ -1,13 +1,12 @@
 use super::{
-    super::aux_trace::{ChipletLookup, ChipletsBusRow},
-    super::ZERO,
-    ChipletsBus, Felt, FieldElement, Memory, MemoryLookup, TraceFragment, Vec, ADDR_COL_IDX,
-    CLK_COL_IDX, CTX_COL_IDX, D0_COL_IDX, D1_COL_IDX, D_INV_COL_IDX, EMPTY_WORD, ONE, V_COL_RANGE,
+    super::ZERO, Felt, FieldElement, Memory, TraceFragment, ADDR_COL_IDX, CLK_COL_IDX, CTX_COL_IDX,
+    D0_COL_IDX, D1_COL_IDX, D_INV_COL_IDX, EMPTY_WORD, ONE, V_COL_RANGE,
 };
+use crate::ContextId;
 use miden_air::trace::chiplets::memory::{
-    Selectors, MEMORY_COPY_READ, MEMORY_INIT_READ, MEMORY_READ_LABEL, MEMORY_WRITE,
-    MEMORY_WRITE_LABEL, TRACE_WIDTH as MEMORY_TRACE_WIDTH,
+    Selectors, MEMORY_COPY_READ, MEMORY_INIT_READ, MEMORY_WRITE, TRACE_WIDTH as MEMORY_TRACE_WIDTH,
 };
+use vm_core::{utils::collections::*, Word};
 
 #[test]
 fn mem_init() {
@@ -22,53 +21,50 @@ fn mem_read() {
 
     // read a value from address 0; clk = 1
     let addr0 = 0;
-    let value = mem.read(0, addr0, 1);
+    let value = mem.read(ContextId::root(), addr0, 1);
     assert_eq!(EMPTY_WORD, value);
     assert_eq!(1, mem.size());
     assert_eq!(1, mem.trace_len());
 
     // read a value from address 3; clk = 2
     let addr3 = 3;
-    let value = mem.read(0, addr3, 2);
+    let value = mem.read(ContextId::root(), addr3, 2);
     assert_eq!(EMPTY_WORD, value);
     assert_eq!(2, mem.size());
     assert_eq!(2, mem.trace_len());
 
     // read a value from address 0 again; clk = 3
-    let value = mem.read(0, addr0, 3);
+    let value = mem.read(ContextId::root(), addr0, 3);
     assert_eq!(EMPTY_WORD, value);
     assert_eq!(2, mem.size());
     assert_eq!(3, mem.trace_len());
 
     // read a value from address 2; clk = 4
     let addr2 = 2;
-    let value = mem.read(0, addr2, 4);
+    let value = mem.read(ContextId::root(), addr2, 4);
     assert_eq!(EMPTY_WORD, value);
     assert_eq!(3, mem.size());
     assert_eq!(4, mem.trace_len());
 
     // check generated trace and memory data provided to the ChipletsBus; rows should be sorted by
     // address and then clock cycle
-    let (trace, chiplets_bus) = build_trace(mem, 4);
+    let trace = build_trace(mem, 4);
 
     // address 0
     let mut prev_row = [ZERO; MEMORY_TRACE_WIDTH];
-    let memory_access = MemoryLookup::from_ints(MEMORY_READ_LABEL, 0, addr0, 1, EMPTY_WORD);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 0, MEMORY_INIT_READ, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr0, 1, EMPTY_WORD);
+    prev_row = verify_memory_access(&trace, 0, MEMORY_INIT_READ, &memory_access, prev_row);
 
-    let memory_access = MemoryLookup::from_ints(MEMORY_READ_LABEL, 0, addr0, 3, EMPTY_WORD);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 1, MEMORY_COPY_READ, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr0, 3, EMPTY_WORD);
+    prev_row = verify_memory_access(&trace, 1, MEMORY_COPY_READ, &memory_access, prev_row);
 
     // address 2
-    let memory_access = MemoryLookup::from_ints(MEMORY_READ_LABEL, 0, addr2, 4, EMPTY_WORD);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 2, MEMORY_INIT_READ, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr2, 4, EMPTY_WORD);
+    prev_row = verify_memory_access(&trace, 2, MEMORY_INIT_READ, &memory_access, prev_row);
 
     // address 3
-    let memory_access = MemoryLookup::from_ints(MEMORY_READ_LABEL, 0, addr3, 2, EMPTY_WORD);
-    verify_memory_access(&trace, &chiplets_bus, 3, MEMORY_INIT_READ, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr3, 2, EMPTY_WORD);
+    verify_memory_access(&trace, 3, MEMORY_INIT_READ, &memory_access, prev_row);
 }
 
 #[test]
@@ -78,56 +74,53 @@ fn mem_write() {
     // write a value into address 0; clk = 1
     let addr0 = 0;
     let value1 = [ONE, ZERO, ZERO, ZERO];
-    mem.write(0, addr0, 1, value1);
-    assert_eq!(value1, mem.get_value(0, addr0).unwrap());
+    mem.write(ContextId::root(), addr0, 1, value1);
+    assert_eq!(value1, mem.get_value(ContextId::root(), addr0).unwrap());
     assert_eq!(1, mem.size());
     assert_eq!(1, mem.trace_len());
 
     // write a value into address 2; clk = 2
     let addr2 = 2;
     let value5 = [Felt::new(5), ZERO, ZERO, ZERO];
-    mem.write(0, addr2, 2, value5);
-    assert_eq!(value5, mem.get_value(0, addr2).unwrap());
+    mem.write(ContextId::root(), addr2, 2, value5);
+    assert_eq!(value5, mem.get_value(ContextId::root(), addr2).unwrap());
     assert_eq!(2, mem.size());
     assert_eq!(2, mem.trace_len());
 
     // write a value into address 1; clk = 3
     let addr1 = 1;
     let value7 = [Felt::new(7), ZERO, ZERO, ZERO];
-    mem.write(0, addr1, 3, value7);
-    assert_eq!(value7, mem.get_value(0, addr1).unwrap());
+    mem.write(ContextId::root(), addr1, 3, value7);
+    assert_eq!(value7, mem.get_value(ContextId::root(), addr1).unwrap());
     assert_eq!(3, mem.size());
     assert_eq!(3, mem.trace_len());
 
     // write a value into address 0; clk = 4
     let value9 = [Felt::new(9), ZERO, ZERO, ZERO];
-    mem.write(0, addr0, 4, value9);
-    assert_eq!(value7, mem.get_value(0, addr1).unwrap());
+    mem.write(ContextId::root(), addr0, 4, value9);
+    assert_eq!(value7, mem.get_value(ContextId::root(), addr1).unwrap());
     assert_eq!(3, mem.size());
     assert_eq!(4, mem.trace_len());
 
     // check generated trace and memory data provided to the ChipletsBus; rows should be sorted by
     // address and then clock cycle
-    let (trace, chiplets_bus) = build_trace(mem, 4);
+    let trace = build_trace(mem, 4);
 
     // address 0
     let mut prev_row = [ZERO; MEMORY_TRACE_WIDTH];
-    let memory_access = MemoryLookup::from_ints(MEMORY_WRITE_LABEL, 0, addr0, 1, value1);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 0, MEMORY_WRITE, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr0, 1, value1);
+    prev_row = verify_memory_access(&trace, 0, MEMORY_WRITE, &memory_access, prev_row);
 
-    let memory_access = MemoryLookup::from_ints(MEMORY_WRITE_LABEL, 0, addr0, 4, value9);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 1, MEMORY_WRITE, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr0, 4, value9);
+    prev_row = verify_memory_access(&trace, 1, MEMORY_WRITE, &memory_access, prev_row);
 
     // address 1
-    let memory_access = MemoryLookup::from_ints(MEMORY_WRITE_LABEL, 0, addr1, 3, value7);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 2, MEMORY_WRITE, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr1, 3, value7);
+    prev_row = verify_memory_access(&trace, 2, MEMORY_WRITE, &memory_access, prev_row);
 
     // address 2
-    let memory_access = MemoryLookup::from_ints(MEMORY_WRITE_LABEL, 0, addr2, 2, value5);
-    verify_memory_access(&trace, &chiplets_bus, 3, MEMORY_WRITE, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr2, 2, value5);
+    verify_memory_access(&trace, 3, MEMORY_WRITE, &memory_access, prev_row);
 }
 
 #[test]
@@ -137,142 +130,130 @@ fn mem_write_read() {
     // write 1 into address 5; clk = 1
     let addr5 = 5;
     let value1 = [ONE, ZERO, ZERO, ZERO];
-    mem.write(0, addr5, 1, value1);
+    mem.write(ContextId::root(), addr5, 1, value1);
 
     // write 4 into address 2; clk = 2
     let addr2 = 2;
     let value4 = [Felt::new(4), ZERO, ZERO, ZERO];
-    mem.write(0, addr2, 2, value4);
+    mem.write(ContextId::root(), addr2, 2, value4);
 
     // read a value from address 5; clk = 3
-    mem.read(0, addr5, 3);
+    mem.read(ContextId::root(), addr5, 3);
 
     // write 2 into address 5; clk = 4
     let value2 = [Felt::new(2), ZERO, ZERO, ZERO];
-    mem.write(0, addr5, 4, value2);
+    mem.write(ContextId::root(), addr5, 4, value2);
 
     // read a value from address 2; clk = 5
-    mem.read(0, addr2, 5);
+    mem.read(ContextId::root(), addr2, 5);
 
     // write 7 into address 2; clk = 6
     let value7 = [Felt::new(7), ZERO, ZERO, ZERO];
-    mem.write(0, addr2, 6, value7);
+    mem.write(ContextId::root(), addr2, 6, value7);
 
     // read a value from address 5; clk = 7
-    mem.read(0, addr5, 7);
+    mem.read(ContextId::root(), addr5, 7);
 
     // read a value from address 2; clk = 8
-    mem.read(0, addr2, 8);
+    mem.read(ContextId::root(), addr2, 8);
 
     // read a value from address 5; clk = 9
-    mem.read(0, addr5, 9);
+    mem.read(ContextId::root(), addr5, 9);
 
     // check generated trace and memory data provided to the ChipletsBus; rows should be sorted by
     // address and then clock cycle
-    let (trace, chiplets_bus) = build_trace(mem, 9);
+    let trace = build_trace(mem, 9);
 
     // address 2
     let mut prev_row = [ZERO; MEMORY_TRACE_WIDTH];
-    let memory_access = MemoryLookup::from_ints(MEMORY_WRITE_LABEL, 0, addr2, 2, value4);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 0, MEMORY_WRITE, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr2, 2, value4);
+    prev_row = verify_memory_access(&trace, 0, MEMORY_WRITE, &memory_access, prev_row);
 
-    let memory_access = MemoryLookup::from_ints(MEMORY_READ_LABEL, 0, addr2, 5, value4);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 1, MEMORY_COPY_READ, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr2, 5, value4);
+    prev_row = verify_memory_access(&trace, 1, MEMORY_COPY_READ, &memory_access, prev_row);
 
-    let memory_access = MemoryLookup::from_ints(MEMORY_WRITE_LABEL, 0, addr2, 6, value7);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 2, MEMORY_WRITE, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr2, 6, value7);
+    prev_row = verify_memory_access(&trace, 2, MEMORY_WRITE, &memory_access, prev_row);
 
-    let memory_access = MemoryLookup::from_ints(MEMORY_READ_LABEL, 0, addr2, 8, value7);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 3, MEMORY_COPY_READ, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr2, 8, value7);
+    prev_row = verify_memory_access(&trace, 3, MEMORY_COPY_READ, &memory_access, prev_row);
 
     // address 5
-    let memory_access = MemoryLookup::from_ints(MEMORY_WRITE_LABEL, 0, addr5, 1, value1);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 4, MEMORY_WRITE, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr5, 1, value1);
+    prev_row = verify_memory_access(&trace, 4, MEMORY_WRITE, &memory_access, prev_row);
 
-    let memory_access = MemoryLookup::from_ints(MEMORY_READ_LABEL, 0, addr5, 3, value1);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 5, MEMORY_COPY_READ, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr5, 3, value1);
+    prev_row = verify_memory_access(&trace, 5, MEMORY_COPY_READ, &memory_access, prev_row);
 
-    let memory_access = MemoryLookup::from_ints(MEMORY_WRITE_LABEL, 0, addr5, 4, value2);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 6, MEMORY_WRITE, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr5, 4, value2);
+    prev_row = verify_memory_access(&trace, 6, MEMORY_WRITE, &memory_access, prev_row);
 
-    let memory_access = MemoryLookup::from_ints(MEMORY_READ_LABEL, 0, addr5, 7, value2);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 7, MEMORY_COPY_READ, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr5, 7, value2);
+    prev_row = verify_memory_access(&trace, 7, MEMORY_COPY_READ, &memory_access, prev_row);
 
-    let memory_access = MemoryLookup::from_ints(MEMORY_READ_LABEL, 0, addr5, 9, value2);
-    verify_memory_access(&trace, &chiplets_bus, 8, MEMORY_COPY_READ, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), addr5, 9, value2);
+    verify_memory_access(&trace, 8, MEMORY_COPY_READ, &memory_access, prev_row);
 }
 
 #[test]
 fn mem_multi_context() {
     let mut mem = Memory::default();
 
-    // write a value into ctx = 0, addr = 0; clk = 1
+    // write a value into ctx = ContextId::root(), addr = 0; clk = 1
     let value1 = [ONE, ZERO, ZERO, ZERO];
-    mem.write(0, 0, 1, value1);
-    assert_eq!(value1, mem.get_value(0, 0).unwrap());
+    mem.write(ContextId::root(), 0, 1, value1);
+    assert_eq!(value1, mem.get_value(ContextId::root(), 0).unwrap());
     assert_eq!(1, mem.size());
     assert_eq!(1, mem.trace_len());
 
     // write a value into ctx = 3, addr = 1; clk = 4
     let value2 = [ZERO, ONE, ZERO, ZERO];
-    mem.write(3, 1, 4, value2);
-    assert_eq!(value2, mem.get_value(3, 1).unwrap());
+    mem.write(3.into(), 1, 4, value2);
+    assert_eq!(value2, mem.get_value(3.into(), 1).unwrap());
     assert_eq!(2, mem.size());
     assert_eq!(2, mem.trace_len());
 
     // read a value from ctx = 3, addr = 1; clk = 6
-    let value = mem.read(3, 1, 6);
+    let value = mem.read(3.into(), 1, 6);
     assert_eq!(value2, value);
     assert_eq!(2, mem.size());
     assert_eq!(3, mem.trace_len());
 
     // write a value into ctx = 3, addr = 0; clk = 7
     let value3 = [ZERO, ZERO, ONE, ZERO];
-    mem.write(3, 0, 7, value3);
-    assert_eq!(value3, mem.get_value(3, 0).unwrap());
+    mem.write(3.into(), 0, 7, value3);
+    assert_eq!(value3, mem.get_value(3.into(), 0).unwrap());
     assert_eq!(3, mem.size());
     assert_eq!(4, mem.trace_len());
 
     // read a value from ctx = 0, addr = 0; clk = 9
-    let value = mem.read(0, 0, 9);
+    let value = mem.read(ContextId::root(), 0, 9);
     assert_eq!(value1, value);
     assert_eq!(3, mem.size());
     assert_eq!(5, mem.trace_len());
 
     // check generated trace and memory data provided to the ChipletsBus; rows should be sorted by
     // address and then clock cycle
-    let (trace, chiplets_bus) = build_trace(mem, 5);
+    let trace = build_trace(mem, 5);
 
     // ctx = 0, addr = 0
     let mut prev_row = [ZERO; MEMORY_TRACE_WIDTH];
-    let memory_access = MemoryLookup::from_ints(MEMORY_WRITE_LABEL, 0, 0, 1, value1);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 0, MEMORY_WRITE, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), 0, 1, value1);
+    prev_row = verify_memory_access(&trace, 0, MEMORY_WRITE, &memory_access, prev_row);
 
-    let memory_access = MemoryLookup::from_ints(MEMORY_READ_LABEL, 0, 0, 9, value1);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 1, MEMORY_COPY_READ, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(ContextId::root(), 0, 9, value1);
+    prev_row = verify_memory_access(&trace, 1, MEMORY_COPY_READ, &memory_access, prev_row);
 
     // ctx = 3, addr = 0
-    let memory_access = MemoryLookup::from_ints(MEMORY_WRITE_LABEL, 3, 0, 7, value3);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 2, MEMORY_WRITE, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(3.into(), 0, 7, value3);
+    prev_row = verify_memory_access(&trace, 2, MEMORY_WRITE, &memory_access, prev_row);
 
     // ctx = 3, addr = 1
-    let memory_access = MemoryLookup::from_ints(MEMORY_WRITE_LABEL, 3, 1, 4, value2);
-    prev_row =
-        verify_memory_access(&trace, &chiplets_bus, 3, MEMORY_WRITE, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(3.into(), 1, 4, value2);
+    prev_row = verify_memory_access(&trace, 3, MEMORY_WRITE, &memory_access, prev_row);
 
-    let memory_access = MemoryLookup::from_ints(MEMORY_READ_LABEL, 3, 1, 6, value2);
-    verify_memory_access(&trace, &chiplets_bus, 4, MEMORY_COPY_READ, &memory_access, prev_row);
+    let memory_access = MemoryAccess::new(3.into(), 1, 6, value2);
+    verify_memory_access(&trace, 4, MEMORY_COPY_READ, &memory_access, prev_row);
 }
 
 #[test]
@@ -282,46 +263,64 @@ fn mem_get_state_at() {
     // Write 1 into (ctx = 0, addr = 5) at clk = 1.
     // This means that mem[5] = 1 at the beginning of clk = 2
     let value1 = [ONE, ZERO, ZERO, ZERO];
-    mem.write(0, 5, 1, value1);
+    mem.write(ContextId::root(), 5, 1, value1);
 
     // Write 4 into (ctx = 0, addr = 2) at clk = 2.
     // This means that mem[2] = 4 at the beginning of clk = 3
     let value4 = [Felt::new(4), ZERO, ZERO, ZERO];
-    mem.write(0, 2, 2, value4);
+    mem.write(ContextId::root(), 2, 2, value4);
 
     // write 7 into (ctx = 3, addr = 3) at clk = 4
     // This means that mem[3] = 7 at the beginning of clk = 4
     let value7 = [Felt::new(7), ZERO, ZERO, ZERO];
-    mem.write(3, 3, 4, value7);
+    mem.write(3.into(), 3, 4, value7);
 
     // Check memory state at clk = 2
-    assert_eq!(mem.get_state_at(0, 2), vec![(5, value1)]);
-    assert_eq!(mem.get_state_at(3, 2), vec![]);
+    assert_eq!(mem.get_state_at(ContextId::root(), 2), vec![(5, value1)]);
+    assert_eq!(mem.get_state_at(3.into(), 2), vec![]);
 
     // Check memory state at clk = 3
-    assert_eq!(mem.get_state_at(0, 3), vec![(2, value4), (5, value1)]);
-    assert_eq!(mem.get_state_at(3, 3), vec![]);
+    assert_eq!(mem.get_state_at(ContextId::root(), 3), vec![(2, value4), (5, value1)]);
+    assert_eq!(mem.get_state_at(3.into(), 3), vec![]);
 
     // Check memory state at clk = 4
-    assert_eq!(mem.get_state_at(0, 4), vec![(2, value4), (5, value1)]);
-    assert_eq!(mem.get_state_at(3, 4), vec![]);
+    assert_eq!(mem.get_state_at(ContextId::root(), 4), vec![(2, value4), (5, value1)]);
+    assert_eq!(mem.get_state_at(3.into(), 4), vec![]);
 
     // Check memory state at clk = 5
-    assert_eq!(mem.get_state_at(0, 5), vec![(2, value4), (5, value1)]);
-    assert_eq!(mem.get_state_at(3, 5), vec![(3, value7)]);
+    assert_eq!(mem.get_state_at(ContextId::root(), 5), vec![(2, value4), (5, value1)]);
+    assert_eq!(mem.get_state_at(3.into(), 5), vec![(3, value7)]);
 }
 
 // HELPER STRUCT & FUNCTIONS
 // ================================================================================================
 
+/// Contains data representing a memory access.
+pub struct MemoryAccess {
+    ctx: ContextId,
+    addr: Felt,
+    clk: Felt,
+    word: [Felt; 4],
+}
+
+impl MemoryAccess {
+    pub fn new(ctx: ContextId, addr: u32, clk: u32, word: Word) -> Self {
+        Self {
+            ctx,
+            addr: Felt::from(addr),
+            clk: Felt::from(clk),
+            word,
+        }
+    }
+}
+
 /// Builds a trace of the specified length and fills it with data from the provided Memory instance.
-fn build_trace(mem: Memory, num_rows: usize) -> (Vec<Vec<Felt>>, ChipletsBus) {
-    let mut chiplets_bus = ChipletsBus::default();
+fn build_trace(mem: Memory, num_rows: usize) -> Vec<Vec<Felt>> {
     let mut trace = (0..MEMORY_TRACE_WIDTH).map(|_| vec![ZERO; num_rows]).collect::<Vec<_>>();
     let mut fragment = TraceFragment::trace_to_fragment(&mut trace);
-    mem.fill_trace(&mut fragment, &mut chiplets_bus, 0);
+    mem.fill_trace(&mut fragment);
 
-    (trace, chiplets_bus)
+    trace
 }
 
 fn read_trace_row(trace: &[Vec<Felt>], step: usize) -> [Felt; MEMORY_TRACE_WIDTH] {
@@ -333,12 +332,11 @@ fn read_trace_row(trace: &[Vec<Felt>], step: usize) -> [Felt; MEMORY_TRACE_WIDTH
 }
 
 fn build_trace_row(
-    memory_access: &MemoryLookup,
+    memory_access: &MemoryAccess,
     op_selectors: Selectors,
     prev_row: [Felt; MEMORY_TRACE_WIDTH],
 ) -> [Felt; MEMORY_TRACE_WIDTH] {
-    let MemoryLookup {
-        label: _,
+    let MemoryAccess {
         ctx,
         addr,
         clk,
@@ -349,8 +347,8 @@ fn build_trace_row(
 
     row[0] = op_selectors[0];
     row[1] = op_selectors[1];
-    row[CTX_COL_IDX] = ctx;
-    row[ADDR_COL_IDX] = Felt::from(addr);
+    row[CTX_COL_IDX] = ctx.into();
+    row[ADDR_COL_IDX] = addr;
     row[CLK_COL_IDX] = clk;
     row[V_COL_RANGE.start] = new_val[0];
     row[V_COL_RANGE.start + 1] = new_val[1];
@@ -377,22 +375,13 @@ fn build_trace_row(
 
 fn verify_memory_access(
     trace: &[Vec<Felt>],
-    chiplets_bus: &ChipletsBus,
     row: u32,
     op_selectors: Selectors,
-    memory_access: &MemoryLookup,
+    memory_access: &MemoryAccess,
     prev_row: [Felt; MEMORY_TRACE_WIDTH],
 ) -> [Felt; MEMORY_TRACE_WIDTH] {
     let expected_row = build_trace_row(memory_access, op_selectors, prev_row);
-    let expected_lookup = ChipletLookup::Memory(*memory_access);
-    let expected_hint = ChipletsBusRow::new(&[], Some(row));
-
-    let lookup = chiplets_bus.get_response_row(row as usize);
-    let hint = chiplets_bus.get_lookup_hint(row).unwrap();
-
     assert_eq!(expected_row, read_trace_row(trace, row as usize));
-    assert_eq!(expected_lookup, lookup);
-    assert_eq!(&expected_hint, hint);
 
     expected_row
 }
diff --git a/processor/src/chiplets/mod.rs b/processor/src/chiplets/mod.rs
index 2bd9cba103..74654a81a5 100644
--- a/processor/src/chiplets/mod.rs
+++ b/processor/src/chiplets/mod.rs
@@ -1,61 +1,34 @@
+use crate::system::ContextId;
+
 use super::{
-    crypto::MerklePath, trace, utils, BTreeMap, ChipletsTrace, ColMatrix, ExecutionError, Felt,
-    FieldElement, RangeChecker, StarkField, TraceFragment, Vec, Word, CHIPLETS_WIDTH, EMPTY_WORD,
-    ONE, ZERO,
-};
-use miden_air::trace::chiplets::{
-    bitwise::{BITWISE_AND_LABEL, BITWISE_XOR_LABEL},
-    hasher::{Digest, HasherState},
-    memory::{MEMORY_READ_LABEL, MEMORY_WRITE_LABEL},
+    crypto::MerklePath, utils, ChipletsTrace, ExecutionError, Felt, FieldElement, RangeChecker,
+    TraceFragment, Word, CHIPLETS_WIDTH, EMPTY_WORD, ONE, ZERO,
 };
+use crate::utils::collections::*;
+use miden_air::trace::chiplets::hasher::{Digest, HasherState};
 use vm_core::{code_blocks::OpBatch, Kernel};
 
 mod bitwise;
-use bitwise::{Bitwise, BitwiseLookup};
+use bitwise::Bitwise;
 
 mod hasher;
-pub use hasher::init_state_from_words;
+#[cfg(test)]
+pub(crate) use hasher::init_state_from_words;
 use hasher::Hasher;
 
 mod memory;
-use memory::{Memory, MemoryLookup};
+use memory::Memory;
 
 mod kernel_rom;
-use kernel_rom::{KernelProcLookup, KernelRom};
+use kernel_rom::KernelRom;
 
 mod aux_trace;
-#[cfg(test)]
-pub(crate) use aux_trace::ChipletsVTableRow;
-pub(crate) use aux_trace::{AuxTraceBuilder, ChipletsBus, ChipletsVTableTraceBuilder};
+
+pub(crate) use aux_trace::AuxTraceBuilder;
 
 #[cfg(test)]
 mod tests;
 
-// HELPER STRUCTS
-// ================================================================================================
-
-/// Result of a merkle tree node update. The result contains the old merkle_root, which
-/// corresponding to the old_value, and the new merkle_root, for the updated value. As well as the
-/// row address of the execution trace at which the computation started.
-#[derive(Debug, Copy, Clone)]
-pub struct MerkleRootUpdate {
-    address: Felt,
-    old_root: Word,
-    new_root: Word,
-}
-
-impl MerkleRootUpdate {
-    pub fn get_address(&self) -> Felt {
-        self.address
-    }
-    pub fn get_old_root(&self) -> Word {
-        self.old_root
-    }
-    pub fn get_new_root(&self) -> Word {
-        self.new_root
-    }
-}
-
 // CHIPLETS MODULE OF HASHER, BITWISE, MEMORY, AND KERNEL ROM CHIPLETS
 // ================================================================================================
 
@@ -99,6 +72,46 @@ impl MerkleRootUpdate {
 /// exactly enough rows remaining for the specified number of random rows.
 /// - columns 0-3: selector columns with values set to ONE
 /// - columns 3-17: unused columns padded with ZERO
+///
+/// The following is a pictorial representation of the chiplet module:
+///             +---+-------------------------------------------------------+-------------+
+///             | 0 |                   |                                   |-------------|
+///             | . |  Hash chiplet     |       Hash chiplet                |-------------|
+///             | . |  internal         |       16 columns                  |-- Padding --|
+///             | . |  selectors        |       constraint degree 8         |-------------|
+///             | 0 |                   |                                   |-------------|
+///             +---+---+---------------------------------------------------+-------------+
+///             | 1 | 0 |               |                                   |-------------|
+///             | . | . |   Bitwise     |       Bitwise chiplet             |-------------|
+///             | . | . |   chiplet     |       13 columns                  |-- Padding --|
+///             | . | . |   internal    |       constraint degree 13        |-------------|
+///             | . | . |   selectors   |                                   |-------------|
+///             | . | 0 |               |                                   |-------------|
+///             | . +---+---+-----------------------------------------------+-------------+
+///             | . | 1 | 0 |                |                              |-------------|
+///             | . | . | . | Memory chiplet |      Memory chiplet          |-------------|
+///             | . | . | . | internal       |      12 columns              |-- Padding --|
+///             | . | . | . | selectors      |      constraint degree 9     |-------------|
+///             | . | . | 0 |                |                              |-------------|
+///             | . + . |---+---+-------------------------------------------+-------------+
+///             | . | . | 1 | 0 |                   |                       |-------------|
+///             | . | . | . | . |  Kernel ROM       |   Kernel ROM chiplet  |-------------|
+///             | . | . | . | . |  chiplet internal |   6 columns           |-- Padding --|
+///             | . | . | . | . |  selectors        |   constraint degree 9 |-------------|
+///             | . | . | . | 0 |                   |                       |-------------|
+///             | . + . | . |---+-------------------------------------------+-------------+
+///             | . | . | . | 1 |---------------------------------------------------------|
+///             | . | . | . | . |---------------------------------------------------------|
+///             | . | . | . | . |---------------------------------------------------------|
+///             | . | . | . | . |---------------------------------------------------------|
+///             | . | . | . | . |----------------------- Padding -------------------------|
+///             | . + . | . | . |---------------------------------------------------------|
+///             | . | . | . | . |---------------------------------------------------------|
+///             | . | . | . | . |---------------------------------------------------------|
+///             | . | . | . | . |---------------------------------------------------------|
+///             | 1 | 1 | 1 | 1 |---------------------------------------------------------|
+///             +---+---+---+---+---------------------------------------------------------+
+///
 pub struct Chiplets {
     /// Current clock cycle of the VM.
     clk: u32,
@@ -106,7 +119,6 @@ pub struct Chiplets {
     bitwise: Bitwise,
     memory: Memory,
     kernel_rom: KernelRom,
-    bus: ChipletsBus,
 }
 
 impl Chiplets {
@@ -120,7 +132,6 @@ impl Chiplets {
             bitwise: Bitwise::default(),
             memory: Memory::default(),
             kernel_rom: KernelRom::new(kernel),
-            bus: ChipletsBus::default(),
         }
     }
 
@@ -172,12 +183,7 @@ impl Chiplets {
     /// The returned tuple contains the hasher state after the permutation and the row address of
     /// the execution trace at which the permutation started.
     pub fn permute(&mut self, state: HasherState) -> (Felt, HasherState) {
-        let mut lookups = Vec::new();
-        let (addr, return_state) = self.hasher.permute(state, &mut lookups);
-        self.bus.request_hasher_operation(&lookups, self.clk);
-
-        // provide the responses to the bus
-        self.bus.provide_hasher_lookups(&lookups);
+        let (addr, return_state) = self.hasher.permute(state);
 
         (addr, return_state)
     }
@@ -198,13 +204,7 @@ impl Chiplets {
         path: &MerklePath,
         index: Felt,
     ) -> (Felt, Word) {
-        let mut lookups = Vec::new();
-        let (addr, root) = self.hasher.build_merkle_root(value, path, index, &mut lookups);
-
-        self.bus.request_hasher_operation(&lookups, self.clk);
-
-        // provide the responses to the bus
-        self.bus.provide_hasher_lookups(&lookups);
+        let (addr, root) = self.hasher.build_merkle_root(value, path, index);
 
         (addr, root)
     }
@@ -222,16 +222,7 @@ impl Chiplets {
         path: &MerklePath,
         index: Felt,
     ) -> MerkleRootUpdate {
-        let mut lookups = Vec::new();
-
-        let merkle_root_update =
-            self.hasher.update_merkle_root(old_value, new_value, path, index, &mut lookups);
-        self.bus.request_hasher_operation(&lookups, self.clk);
-
-        // provide the responses to the bus
-        self.bus.provide_hasher_lookups(&lookups);
-
-        merkle_root_update
+        self.hasher.update_merkle_root(old_value, new_value, path, index)
     }
 
     // HASH CHIPLET ACCESSORS FOR CONTROL BLOCK DECODING
@@ -248,22 +239,11 @@ impl Chiplets {
         domain: Felt,
         expected_hash: Digest,
     ) -> Felt {
-        let mut lookups = Vec::new();
-        let (addr, result) =
-            self.hasher.hash_control_block(h1, h2, domain, expected_hash, &mut lookups);
+        let (addr, result) = self.hasher.hash_control_block(h1, h2, domain, expected_hash);
 
         // make sure the result computed by the hasher is the same as the expected block hash
         debug_assert_eq!(expected_hash, result.into());
 
-        // send the request for the hash initialization
-        self.bus.request_hasher_lookup(lookups[0], self.clk);
-
-        // enqueue the request for the hash result
-        self.bus.enqueue_hasher_request(lookups[1]);
-
-        // provide the responses to the bus
-        self.bus.provide_hasher_lookups(&lookups);
-
         addr
     }
 
@@ -272,48 +252,14 @@ impl Chiplets {
     ///
     /// It returns the row address of the execution trace at which the hash computation started.
     pub fn hash_span_block(&mut self, op_batches: &[OpBatch], expected_hash: Digest) -> Felt {
-        let mut lookups = Vec::new();
-        let (addr, result) = self.hasher.hash_span_block(op_batches, expected_hash, &mut lookups);
+        let (addr, result) = self.hasher.hash_span_block(op_batches, expected_hash);
 
         // make sure the result computed by the hasher is the same as the expected block hash
         debug_assert_eq!(expected_hash, result.into());
 
-        // send the request for the hash initialization
-        self.bus.request_hasher_lookup(lookups[0], self.clk);
-
-        // enqueue the rest of the requests in reverse order so that the next request is at
-        // the top of the queue.
-        for lookup in lookups.iter().skip(1).rev() {
-            self.bus.enqueue_hasher_request(*lookup);
-        }
-
-        // provide the responses to the bus
-        self.bus.provide_hasher_lookups(&lookups);
-
         addr
     }
 
-    /// Sends a request for a [HasherLookup] required for verifying absorption of a new `SPAN` batch
-    /// to the Chiplets Bus. It's expected to be called by the decoder while processing a `RESPAN`.
-    ///
-    /// It's processed by moving the corresponding lookup from the Chiplets bus' queued lookups to
-    /// its requested lookups. Therefore, the next queued lookup is expected to be a precomputed
-    /// lookup for absorbing new elements into the hasher state.
-    pub fn absorb_span_batch(&mut self) {
-        self.bus.send_queued_hasher_request(self.clk);
-    }
-
-    /// Sends a request for a control block hash result to the Chiplets Bus. It's expected to be
-    /// called by the decoder to request the finalization (return hash) of a control block hash
-    /// computation for the control block it has just finished decoding.
-    ///
-    /// It's processed by moving the corresponding lookup from the Chiplets bus' queued lookups to
-    /// its requested lookups. Therefore, the next queued lookup is expected to be a precomputed
-    /// lookup for returning a hash result.
-    pub fn read_hash_result(&mut self) {
-        self.bus.send_queued_hasher_request(self.clk);
-    }
-
     // BITWISE CHIPLET ACCESSORS
     // --------------------------------------------------------------------------------------------
 
@@ -323,9 +269,6 @@ impl Chiplets {
     pub fn u32and(&mut self, a: Felt, b: Felt) -> Result<Felt, ExecutionError> {
         let result = self.bitwise.u32and(a, b)?;
 
-        let bitwise_lookup = BitwiseLookup::new(BITWISE_AND_LABEL, a, b, result);
-        self.bus.request_bitwise_operation(bitwise_lookup, self.clk);
-
         Ok(result)
     }
 
@@ -335,9 +278,6 @@ impl Chiplets {
     pub fn u32xor(&mut self, a: Felt, b: Felt) -> Result<Felt, ExecutionError> {
         let result = self.bitwise.u32xor(a, b)?;
 
-        let bitwise_lookup = BitwiseLookup::new(BITWISE_XOR_LABEL, a, b, result);
-        self.bus.request_bitwise_operation(bitwise_lookup, self.clk);
-
         Ok(result)
     }
 
@@ -349,15 +289,9 @@ impl Chiplets {
     ///
     /// If the specified address hasn't been previously written to, four ZERO elements are
     /// returned. This effectively implies that memory is initialized to ZERO.
-    pub fn read_mem(&mut self, ctx: u32, addr: u32) -> Word {
+    pub fn read_mem(&mut self, ctx: ContextId, addr: u32) -> Word {
         // read the word from memory
-        let value = self.memory.read(ctx, addr, self.clk);
-
-        // send the memory read request to the bus
-        let lookup = MemoryLookup::from_ints(MEMORY_READ_LABEL, ctx, addr, self.clk, value);
-        self.bus.request_memory_operation(&[lookup], self.clk);
-
-        value
+        self.memory.read(ctx, addr, self.clk)
     }
 
     /// Returns two words read from consecutive addresses started with `addr` in the specified
@@ -365,68 +299,35 @@ impl Chiplets {
     ///
     /// If either of the accessed addresses hasn't been previously written to, ZERO elements are
     /// returned. This effectively implies that memory is initialized to ZERO.
-    pub fn read_mem_double(&mut self, ctx: u32, addr: u32) -> [Word; 2] {
+    pub fn read_mem_double(&mut self, ctx: ContextId, addr: u32) -> [Word; 2] {
         // read two words from memory: from addr and from addr + 1
         let addr2 = addr + 1;
-        let words = [self.memory.read(ctx, addr, self.clk), self.memory.read(ctx, addr2, self.clk)];
-
-        // create lookups for both memory reads
-        let lookups = [
-            MemoryLookup::from_ints(MEMORY_READ_LABEL, ctx, addr, self.clk, words[0]),
-            MemoryLookup::from_ints(MEMORY_READ_LABEL, ctx, addr2, self.clk, words[1]),
-        ];
-
-        // send lookups to the bus and return the result
-        self.bus.request_memory_operation(&lookups, self.clk);
-        words
+        [self.memory.read(ctx, addr, self.clk), self.memory.read(ctx, addr2, self.clk)]
     }
 
     /// Writes the provided word at the specified context/address.
-    ///
-    /// This also modifies the memory access trace and sends a memory lookup request to the bus.
-    pub fn write_mem(&mut self, ctx: u32, addr: u32, word: Word) {
+    pub fn write_mem(&mut self, ctx: ContextId, addr: u32, word: Word) {
         self.memory.write(ctx, addr, self.clk, word);
-
-        // send the memory write request to the bus
-        let lookup = MemoryLookup::from_ints(MEMORY_WRITE_LABEL, ctx, addr, self.clk, word);
-        self.bus.request_memory_operation(&[lookup], self.clk);
     }
 
     /// Writes the provided element into the specified context/address leaving the remaining 3
     /// elements of the word previously stored at that address unchanged.
-    ///
-    /// This also modifies the memory access trace and sends a memory lookup request to the bus.
-    pub fn write_mem_element(&mut self, ctx: u32, addr: u32, value: Felt) -> Word {
+    pub fn write_mem_element(&mut self, ctx: ContextId, addr: u32, value: Felt) -> Word {
         let old_word = self.memory.get_old_value(ctx, addr);
         let new_word = [value, old_word[1], old_word[2], old_word[3]];
 
         self.memory.write(ctx, addr, self.clk, new_word);
 
-        // send the memory write request to the bus
-        let lookup = MemoryLookup::from_ints(MEMORY_WRITE_LABEL, ctx, addr, self.clk, new_word);
-        self.bus.request_memory_operation(&[lookup], self.clk);
-
         old_word
     }
 
     /// Writes the two provided words to two consecutive addresses in memory in the specified
     /// context, starting at the specified address.
-    ///
-    /// This also modifies the memory access trace and sends two memory lookup requests to the bus.
-    pub fn write_mem_double(&mut self, ctx: u32, addr: u32, words: [Word; 2]) {
+    pub fn write_mem_double(&mut self, ctx: ContextId, addr: u32, words: [Word; 2]) {
         let addr2 = addr + 1;
         // write two words to memory at addr and addr + 1
         self.memory.write(ctx, addr, self.clk, words[0]);
         self.memory.write(ctx, addr2, self.clk, words[1]);
-
-        // create lookups for both memory writes
-        let lookups = [
-            MemoryLookup::from_ints(MEMORY_WRITE_LABEL, ctx, addr, self.clk, words[0]),
-            MemoryLookup::from_ints(MEMORY_WRITE_LABEL, ctx, addr2, self.clk, words[1]),
-        ];
-
-        // send lookups to the bus
-        self.bus.request_memory_operation(&lookups, self.clk);
     }
 
     /// Returns a word located at the specified context/address, or None if the address hasn't
@@ -434,14 +335,14 @@ impl Chiplets {
     ///
     /// Unlike mem_read() which modifies the memory access trace, this method returns the value at
     /// the specified address (if one exists) without altering the memory access trace.
-    pub fn get_mem_value(&self, ctx: u32, addr: u32) -> Option<Word> {
+    pub fn get_mem_value(&self, ctx: ContextId, addr: u32) -> Option<Word> {
         self.memory.get_value(ctx, addr)
     }
 
     /// Returns the entire memory state for the specified execution context at the specified cycle.
     /// The state is returned as a vector of (address, value) tuples, and includes addresses which
     /// have been accessed at least once.
-    pub fn get_mem_state_at(&self, ctx: u32, clk: u32) -> Vec<(u64, Word)> {
+    pub fn get_mem_state_at(&self, ctx: ContextId, clk: u32) -> Vec<(u64, Word)> {
         self.memory.get_state_at(ctx, clk)
     }
 
@@ -462,10 +363,6 @@ impl Chiplets {
     pub fn access_kernel_proc(&mut self, proc_hash: Digest) -> Result<(), ExecutionError> {
         self.kernel_rom.access_proc(proc_hash)?;
 
-        // record the access in the chiplet bus
-        let kernel_proc_lookup = KernelProcLookup::new(proc_hash.into());
-        self.bus.request_kernel_proc_call(kernel_proc_lookup, self.clk);
-
         Ok(())
     }
 
@@ -481,7 +378,7 @@ impl Chiplets {
     // --------------------------------------------------------------------------------------------
 
     /// Adds all range checks required by the memory chiplet to the provided [RangeChecker]
-    /// instance, along with the cycle rows at which the processor performs the lookups.
+    /// instance.
     pub fn append_range_checks(&self, range_checker: &mut RangeChecker) {
         self.memory.append_range_checks(self.memory_start(), range_checker);
     }
@@ -501,10 +398,12 @@ impl Chiplets {
             .collect::<Vec<_>>()
             .try_into()
             .expect("failed to convert vector to array");
+        self.fill_trace(&mut trace);
 
-        let aux_builder = self.fill_trace(&mut trace);
-
-        ChipletsTrace { trace, aux_builder }
+        ChipletsTrace {
+            trace,
+            aux_builder: AuxTraceBuilder::default(),
+        }
     }
 
     // HELPER METHODS
@@ -516,7 +415,7 @@ impl Chiplets {
     ///
     /// It returns the auxiliary trace builders for generating auxiliary trace columns that depend
     /// on data from [Chiplets].
-    fn fill_trace(self, trace: &mut [Vec<Felt>; CHIPLETS_WIDTH]) -> AuxTraceBuilder {
+    fn fill_trace(self, trace: &mut [Vec<Felt>; CHIPLETS_WIDTH]) {
         // get the rows where chiplets begin.
         let bitwise_start = self.bitwise_start();
         let memory_start = self.memory_start();
@@ -529,7 +428,6 @@ impl Chiplets {
             bitwise,
             memory,
             kernel_rom,
-            mut bus,
         } = self;
 
         // populate external selector columns for all chiplets
@@ -576,16 +474,34 @@ impl Chiplets {
 
         // fill the fragments with the execution trace from each chiplet
         // TODO: this can be parallelized to fill the traces in multiple threads
-        let mut table_builder = hasher.fill_trace(&mut hasher_fragment);
-        bitwise.fill_trace(&mut bitwise_fragment, &mut bus, bitwise_start);
-        memory.fill_trace(&mut memory_fragment, &mut bus, memory_start);
-        kernel_rom.fill_trace(
-            &mut kernel_rom_fragment,
-            &mut bus,
-            &mut table_builder,
-            kernel_rom_start,
-        );
-
-        AuxTraceBuilder::new(bus.into_aux_builder(), table_builder)
+        hasher.fill_trace(&mut hasher_fragment);
+        bitwise.fill_trace(&mut bitwise_fragment);
+        memory.fill_trace(&mut memory_fragment);
+        kernel_rom.fill_trace(&mut kernel_rom_fragment);
+    }
+}
+
+// HELPER STRUCTS
+// ================================================================================================
+
+/// Result of a Merkle tree node update. The result contains the old Merkle_root, which
+/// corresponding to the old_value, and the new merkle_root, for the updated value. As well as the
+/// row address of the execution trace at which the computation started.
+#[derive(Debug, Copy, Clone)]
+pub struct MerkleRootUpdate {
+    address: Felt,
+    old_root: Word,
+    new_root: Word,
+}
+
+impl MerkleRootUpdate {
+    pub fn get_address(&self) -> Felt {
+        self.address
+    }
+    pub fn get_old_root(&self) -> Word {
+        self.old_root
+    }
+    pub fn get_new_root(&self) -> Word {
+        self.new_root
     }
 }
diff --git a/processor/src/chiplets/tests.rs b/processor/src/chiplets/tests.rs
index 8816cba2bb..7bbb683f5b 100644
--- a/processor/src/chiplets/tests.rs
+++ b/processor/src/chiplets/tests.rs
@@ -1,6 +1,6 @@
 use crate::{
-    utils::get_trace_len, CodeBlock, DefaultHost, ExecutionOptions, ExecutionTrace, Kernel,
-    Operation, Process, StackInputs, Vec,
+    utils::collections::*, CodeBlock, DefaultHost, ExecutionOptions, ExecutionTrace, Kernel,
+    Operation, Process, StackInputs,
 };
 use miden_air::trace::{
     chiplets::{
@@ -100,7 +100,7 @@ fn stacked_chiplet_trace() {
 fn build_kernel() -> Kernel {
     let proc_hash1: Digest = [ONE, ZERO, ONE, ZERO].into();
     let proc_hash2: Digest = [ONE, ONE, ONE, ONE].into();
-    Kernel::new(&[proc_hash1, proc_hash2])
+    Kernel::new(&[proc_hash1, proc_hash2]).unwrap()
 }
 
 /// Builds a sample trace by executing a span block containing the specified operations. This
@@ -117,11 +117,11 @@ fn build_trace(
     process.execute_code_block(&program, &CodeBlockTable::default()).unwrap();
 
     let (trace, _, _) = ExecutionTrace::test_finalize_trace(process);
-    let trace_len = get_trace_len(&trace) - ExecutionTrace::NUM_RAND_ROWS;
+    let trace_len = trace.num_rows() - ExecutionTrace::NUM_RAND_ROWS;
 
     (
-        trace[CHIPLETS_RANGE]
-            .to_vec()
+        trace
+            .get_column_range(CHIPLETS_RANGE)
             .try_into()
             .expect("failed to convert vector to array"),
         trace_len,
diff --git a/processor/src/debug.rs b/processor/src/debug.rs
index 3a7a9d4d8d..48df4dfde8 100644
--- a/processor/src/debug.rs
+++ b/processor/src/debug.rs
@@ -1,18 +1,18 @@
 use crate::{
-    range::RangeChecker, Chiplets, ChipletsLengths, Decoder, ExecutionError, Felt, Host, Process,
-    Stack, StarkField, System, TraceLenSummary, Vec,
+    range::RangeChecker,
+    system::ContextId,
+    utils::{collections::*, string::*},
+    Chiplets, ChipletsLengths, Decoder, ExecutionError, Felt, Host, Process, Stack, System,
+    TraceLenSummary,
 };
 use core::fmt;
-use vm_core::{
-    utils::string::{String, ToString},
-    AssemblyOp, Operation, StackOutputs, Word,
-};
+use vm_core::{AssemblyOp, Operation, StackOutputs, Word};
 
 /// VmState holds a current process state information at a specific clock cycle.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct VmState {
     pub clk: u32,
-    pub ctx: u32,
+    pub ctx: ContextId,
     pub op: Option<Operation>,
     pub asmop: Option<AsmOpInfo>,
     pub fmp: Felt,
diff --git a/processor/src/decoder/aux_hints.rs b/processor/src/decoder/aux_hints.rs
deleted file mode 100644
index 72f1543bf9..0000000000
--- a/processor/src/decoder/aux_hints.rs
+++ /dev/null
@@ -1,483 +0,0 @@
-use super::{
-    super::trace::LookupTableRow, get_num_groups_in_next_batch, BlockInfo, ColMatrix, Felt,
-    FieldElement, StarkField, Vec, Word, EMPTY_WORD, ONE, ZERO,
-};
-
-// AUXILIARY TRACE HINTS
-// ================================================================================================
-
-/// Contains information which can be used to simplify construction of execution traces of
-/// decoder-related auxiliary trace segment columns (used in multiset checks).
-pub struct AuxTraceHints {
-    /// A list of updates made to the block stack and block hash tables. Each entry contains a
-    /// clock cycle at which the update was made, as well as the description of the update.
-    block_exec_hints: Vec<(u32, BlockTableUpdate)>,
-    /// A list of rows which were added and then removed from the block stack table. The rows are
-    /// sorted by `block_id` in ascending order.
-    block_stack_rows: Vec<BlockStackTableRow>,
-    /// A list of rows which were added and then removed from the block hash table. The rows are
-    /// sorted first by `parent_id` and then by `is_first_child` with the entry where
-    /// `is_first_child` = true coming first.
-    block_hash_rows: Vec<BlockHashTableRow>,
-    /// A list of updates made to the op group table where each entry is a tuple containing the
-    /// cycle at which the update was made and the update description.
-    op_group_hints: Vec<(u32, OpGroupTableUpdate)>,
-    /// A list of rows which were added to and then removed from the op group table.
-    op_group_rows: Vec<OpGroupTableRow>,
-}
-
-impl AuxTraceHints {
-    // CONSTRUCTOR
-    // --------------------------------------------------------------------------------------------
-    /// Returns an empty [AuxTraceHints] struct.
-    pub fn new() -> Self {
-        // initialize block hash table with an blank entry, this will be replaced with an entry
-        // containing the actual program hash at the end of trace generation
-        let block_hash_rows = vec![BlockHashTableRow::from_program_hash(EMPTY_WORD)];
-
-        Self {
-            block_exec_hints: Vec::new(),
-            block_stack_rows: Vec::new(),
-            block_hash_rows,
-            op_group_hints: Vec::new(),
-            op_group_rows: Vec::new(),
-        }
-    }
-
-    // PUBLIC ACCESSORS
-    // --------------------------------------------------------------------------------------------
-
-    /// Returns hints which describe how the block stack and block hash tables were updated during
-    /// program execution. Each hint consists of a clock cycle and the update description for that
-    /// cycle. The hints are sorted by clock cycle in ascending order.
-    pub fn block_exec_hints(&self) -> &[(u32, BlockTableUpdate)] {
-        &self.block_exec_hints
-    }
-
-    /// Returns a list of table rows which were added to and then removed from the block stack
-    /// table. We don't specify which cycles these rows were added/removed at because this info
-    /// can be inferred from execution hints.
-    ///
-    /// The rows are sorted by block_id in ascending order.
-    pub fn block_stack_table_rows(&self) -> &[BlockStackTableRow] {
-        &self.block_stack_rows
-    }
-
-    /// Returns a list of table rows which were added to and then removed from the block hash
-    /// table. We don't specify which cycles these rows were added/removed at because this info
-    /// can be inferred from execution hints.
-    ///
-    /// The rows are sorted first by `parent_id` in ascending order and then by `is_first_child`
-    /// with the entry where `is_first_child` = true coming first.
-    pub fn block_hash_table_rows(&self) -> &[BlockHashTableRow] {
-        &self.block_hash_rows
-    }
-
-    /// Returns hints which describe how the op group was updated during program execution. Each
-    /// hint consists of a clock cycle and the update description for that cycle.
-    pub fn op_group_table_hints(&self) -> &[(u32, OpGroupTableUpdate)] {
-        &self.op_group_hints
-    }
-
-    /// Returns a list of table rows which were added to and then removed from the op group table.
-    /// We don't specify which cycles these rows were added/removed at because this info can be
-    /// inferred from the op group table hints.
-    pub fn op_group_table_rows(&self) -> &[OpGroupTableRow] {
-        &self.op_group_rows
-    }
-
-    /// Returns an index of the row with the specified block_id in the list of block stack table
-    /// rows. Since the rows in the list are sorted by block_id, we can use binary search to find
-    /// the relevant row.
-    ///
-    /// If the row for the specified block_id is not found, None is returned.
-    pub fn get_block_stack_row_idx(&self, block_id: Felt) -> Option<usize> {
-        let block_id = block_id.as_int();
-        self.block_stack_rows
-            .binary_search_by_key(&block_id, |row| row.block_id.as_int())
-            .ok()
-    }
-
-    /// Returns an index of the row with the specified parent_id and is_first_child in the list of
-    /// block hash table rows. Since the rows in the list are sorted by parent_id, we can use
-    /// binary search to find the relevant row.
-    ///
-    /// If the row for the specified parent_id and is_first_child is not found, None is returned.
-    pub fn get_block_hash_row_idx(&self, parent_id: Felt, is_first_child: bool) -> Option<usize> {
-        let parent_id = parent_id.as_int();
-        match self
-            .block_hash_rows
-            .binary_search_by_key(&parent_id, |row| row.parent_id.as_int())
-        {
-            Ok(idx) => {
-                // check if the row for the found index is the right one; we need to do this
-                // because binary search may return an index for either of the two entries for
-                // the specified parent_id
-                if self.block_hash_rows[idx].is_first_child == is_first_child {
-                    Some(idx)
-                } else if is_first_child {
-                    // if we got here, it means that is_first_child for the row at the found index
-                    // is false. thus, the row with is_first_child = true should be right before it
-                    let row = &self.block_hash_rows[idx - 1];
-                    debug_assert_eq!(row.parent_id.as_int(), parent_id);
-                    debug_assert_eq!(row.is_first_child, is_first_child);
-                    Some(idx - 1)
-                } else {
-                    // similarly, if we got here, is_first_child for the row at the found index
-                    // must be true. thus, the row with is_first_child = false should be right
-                    // after it
-                    let row = &self.block_hash_rows[idx + 1];
-                    debug_assert_eq!(row.parent_id.as_int(), parent_id);
-                    debug_assert_eq!(row.is_first_child, is_first_child);
-                    Some(idx + 1)
-                }
-            }
-            Err(_) => None,
-        }
-    }
-
-    // STATE MUTATORS
-    // --------------------------------------------------------------------------------------------
-
-    /// Specifies that a new code block started executing at the specified clock cycle. This also
-    /// records the relevant rows for both, block stack and block hash tables.
-    pub fn block_started(
-        &mut self,
-        clk: u32,
-        block_info: &BlockInfo,
-        child1_hash: Option<Word>,
-        child2_hash: Option<Word>,
-    ) {
-        // insert the hint with the relevant update
-        let hint = BlockTableUpdate::BlockStarted(block_info.num_children());
-        self.block_exec_hints.push((clk, hint));
-
-        // create a row which would be inserted into the block stack table
-        let bst_row = BlockStackTableRow::new(block_info);
-        self.block_stack_rows.push(bst_row);
-
-        // create rows for the block hash table. this may result in creation of 0, 1, or 2 rows:
-        // - no rows are created for SPAN blocks (both child hashes are None).
-        // - one row is created with is_first_child=false for SPLIT and LOOP blocks.
-        // - two rows are created for JOIN blocks with first row having is_first_child=true, and
-        //   the second row having is_first_child=false
-        if let Some(child1_hash) = child1_hash {
-            let is_first_child = child2_hash.is_some();
-            let bsh_row1 = BlockHashTableRow::from_parent(block_info, child1_hash, is_first_child);
-            self.block_hash_rows.push(bsh_row1);
-
-            if let Some(child2_hash) = child2_hash {
-                let bsh_row2 = BlockHashTableRow::from_parent(block_info, child2_hash, false);
-                self.block_hash_rows.push(bsh_row2);
-            }
-        }
-    }
-
-    /// Specifies that a code block execution was completed at the specified clock cycle. We also
-    /// need to specify whether the block was the first child of a JOIN block so that we can find
-    /// correct block hash table row.
-    pub fn block_ended(&mut self, clk: u32, is_first_child: bool) {
-        self.block_exec_hints.push((clk, BlockTableUpdate::BlockEnded(is_first_child)));
-    }
-
-    /// Specifies that another execution of a loop's body started at the specified clock cycle.
-    /// This is triggered by the REPEAT operation.
-    pub fn loop_repeat_started(&mut self, clk: u32) {
-        self.block_exec_hints.push((clk, BlockTableUpdate::LoopRepeated));
-    }
-
-    /// Specifies that execution of a SPAN block was extended at the specified clock cycle. This
-    /// is triggered by the RESPAN operation. This also adds a row for the new span batch to the
-    /// block stack table.
-    pub fn span_extended(&mut self, clk: u32, block_info: &BlockInfo) {
-        let row = BlockStackTableRow::new(block_info);
-        self.block_stack_rows.push(row);
-        self.block_exec_hints.push((clk, BlockTableUpdate::SpanExtended))
-    }
-
-    /// Specifies that an operation batch may have been inserted into the op group table at the
-    /// specified cycle. Operation groups are inserted into the table only if the number of groups
-    /// left is greater than 1.
-    pub fn insert_op_batch(&mut self, clk: u32, num_groups_left: Felt) {
-        // compute number of op groups in this batch
-        let num_batch_groups = get_num_groups_in_next_batch(num_groups_left);
-        debug_assert!(num_batch_groups > 0, "op batch is empty");
-
-        // the first op group in a batch is not added to the op_group table, so, we subtract 1 here
-        let num_inserted_groups = num_batch_groups - 1;
-
-        // if at least one group was inserted, mark the current clock cycle with the number of op
-        // groups added to the op group table
-        if num_inserted_groups > 0 {
-            let update = OpGroupTableUpdate::InsertRows(num_inserted_groups as u32);
-            self.op_group_hints.push((clk, update));
-        }
-    }
-
-    /// Specifies that an entry for an operation group was removed from the op group table at the
-    /// specified clock cycle.
-    pub fn remove_op_group(
-        &mut self,
-        clk: u32,
-        batch_id: Felt,
-        group_pos: Felt,
-        group_value: Felt,
-    ) {
-        self.op_group_hints.push((clk, OpGroupTableUpdate::RemoveRow));
-        // we record a row only when it is deleted because rows are added and deleted in the same
-        // order. thus, a sequence of deleted rows is exactly the same as the sequence of added
-        // rows.
-        self.op_group_rows.push(OpGroupTableRow::new(batch_id, group_pos, group_value));
-    }
-
-    /// Inserts the first entry into the block hash table.
-    pub fn set_program_hash(&mut self, program_hash: Word) {
-        self.block_hash_rows[0] = BlockHashTableRow::from_program_hash(program_hash);
-    }
-}
-
-impl Default for AuxTraceHints {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-// UPDATE HINTS
-// ================================================================================================
-
-/// Describes updates to both, block stack and block hash tables as follows:
-/// - `BlockStarted` and `BlockEnded` are relevant for both tables.
-/// - `SpanExtended` is relevant only for the block stack table.
-/// - `LoopRepeated` is relevant only for the block hash table.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub enum BlockTableUpdate {
-    BlockStarted(u32), // inner value contains the number of children for the block: 0, 1, or 2.
-    SpanExtended,
-    LoopRepeated,
-    BlockEnded(bool), // true indicates that the block was the first child of a JOIN block
-}
-
-/// Describes an update to the op group table. There could be two types of updates:
-/// - Some number of rows could be added to the table. In this case, the associated value specifies
-///   how many rows were added.
-/// - A single row could be removed from the table.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub enum OpGroupTableUpdate {
-    InsertRows(u32),
-    RemoveRow,
-}
-
-// BLOCK STACK TABLE ROW
-// ================================================================================================
-
-/// Describes a single entry in the block stack table.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct BlockStackTableRow {
-    block_id: Felt,
-    parent_id: Felt,
-    is_loop: bool,
-    parent_ctx: u32,
-    parent_fn_hash: Word,
-    parent_fmp: Felt,
-    parent_stack_depth: u32,
-    parent_next_overflow_addr: Felt,
-}
-
-impl BlockStackTableRow {
-    /// Returns a new [BlockStackTableRow] instantiated from the specified block info.
-    pub fn new(block_info: &BlockInfo) -> Self {
-        let ctx_info = block_info.ctx_info.unwrap_or_default();
-        Self {
-            block_id: block_info.addr,
-            parent_id: block_info.parent_addr,
-            is_loop: block_info.is_entered_loop() == ONE,
-            parent_ctx: ctx_info.parent_ctx,
-            parent_fn_hash: ctx_info.parent_fn_hash,
-            parent_fmp: ctx_info.parent_fmp,
-            parent_stack_depth: ctx_info.parent_stack_depth,
-            parent_next_overflow_addr: ctx_info.parent_next_overflow_addr,
-        }
-    }
-
-    /// Returns a new [BlockStackTableRow] instantiated with the specified parameters. This is
-    /// used for test purpose only.
-    #[cfg(test)]
-    pub fn new_test(block_id: Felt, parent_id: Felt, is_loop: bool) -> Self {
-        Self {
-            block_id,
-            parent_id,
-            is_loop,
-            parent_ctx: 0,
-            parent_fn_hash: EMPTY_WORD,
-            parent_fmp: ZERO,
-            parent_stack_depth: 0,
-            parent_next_overflow_addr: ZERO,
-        }
-    }
-
-    #[cfg(test)]
-    /// Returns a new [BlockStackTableRow] corresponding to a CALL code block. This is used for
-    /// test purpose only.
-    pub fn new_test_with_ctx(
-        block_id: Felt,
-        parent_id: Felt,
-        is_loop: bool,
-        ctx_info: super::ExecutionContextInfo,
-    ) -> Self {
-        Self {
-            block_id,
-            parent_id,
-            is_loop,
-            parent_ctx: ctx_info.parent_ctx,
-            parent_fn_hash: ctx_info.parent_fn_hash,
-            parent_fmp: ctx_info.parent_fmp,
-            parent_stack_depth: ctx_info.parent_stack_depth,
-            parent_next_overflow_addr: ctx_info.parent_next_overflow_addr,
-        }
-    }
-}
-
-impl LookupTableRow for BlockStackTableRow {
-    /// Reduces this row to a single field element in the field specified by E. This requires
-    /// at least 12 alpha values.
-    fn to_value<E: FieldElement<BaseField = Felt>>(
-        &self,
-        _main_trace: &ColMatrix<Felt>,
-        alphas: &[E],
-    ) -> E {
-        let is_loop = if self.is_loop { ONE } else { ZERO };
-        alphas[0]
-            + alphas[1].mul_base(self.block_id)
-            + alphas[2].mul_base(self.parent_id)
-            + alphas[3].mul_base(is_loop)
-            + alphas[4].mul_base(Felt::from(self.parent_ctx))
-            + alphas[5].mul_base(self.parent_fmp)
-            + alphas[6].mul_base(Felt::from(self.parent_stack_depth))
-            + alphas[7].mul_base(self.parent_next_overflow_addr)
-            + alphas[8].mul_base(self.parent_fn_hash[0])
-            + alphas[9].mul_base(self.parent_fn_hash[1])
-            + alphas[10].mul_base(self.parent_fn_hash[2])
-            + alphas[11].mul_base(self.parent_fn_hash[3])
-    }
-}
-
-// BLOCK HASH TABLE ROW
-// ================================================================================================
-
-/// Describes a single entry in the block hash table. An entry in the block hash table is a tuple
-/// (parent_id, block_hash, is_first_child, is_loop_body).
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct BlockHashTableRow {
-    parent_id: Felt,
-    block_hash: Word,
-    is_first_child: bool,
-    is_loop_body: bool,
-}
-
-impl BlockHashTableRow {
-    // CONSTRUCTORS
-    // --------------------------------------------------------------------------------------------
-    /// Returns a new [BlockHashTableRow] instantiated with the specified parameters.
-    pub fn from_parent(parent_info: &BlockInfo, block_hash: Word, is_first_child: bool) -> Self {
-        Self {
-            parent_id: parent_info.addr,
-            block_hash,
-            is_first_child,
-            is_loop_body: parent_info.is_entered_loop() == ONE,
-        }
-    }
-
-    /// Returns a new [BlockHashTableRow] containing the hash of the entire program.
-    pub fn from_program_hash(program_hash: Word) -> Self {
-        Self {
-            parent_id: ZERO,
-            block_hash: program_hash,
-            is_first_child: false,
-            is_loop_body: false,
-        }
-    }
-
-    /// Returns a new [BlockHashTableRow] instantiated with the specified parameters. This is
-    /// used for test purpose only.
-    #[cfg(test)]
-    pub fn new_test(
-        parent_id: Felt,
-        block_hash: Word,
-        is_first_child: bool,
-        is_loop_body: bool,
-    ) -> Self {
-        Self {
-            parent_id,
-            block_hash,
-            is_first_child,
-            is_loop_body,
-        }
-    }
-
-    // PUBLIC ACCESSORS
-    // --------------------------------------------------------------------------------------------
-
-    /// Returns true if this table row is for a block which is the first child of a JOIN block.
-    pub fn is_first_child(&self) -> bool {
-        self.is_first_child
-    }
-}
-
-impl LookupTableRow for BlockHashTableRow {
-    /// Reduces this row to a single field element in the field specified by E. This requires
-    /// at least 8 alpha values.
-    fn to_value<E: FieldElement<BaseField = Felt>>(
-        &self,
-        _main_trace: &ColMatrix<Felt>,
-        alphas: &[E],
-    ) -> E {
-        let is_first_child = if self.is_first_child { ONE } else { ZERO };
-        let is_loop_body = if self.is_loop_body { ONE } else { ZERO };
-        alphas[0]
-            + alphas[1].mul_base(self.parent_id)
-            + alphas[2].mul_base(self.block_hash[0])
-            + alphas[3].mul_base(self.block_hash[1])
-            + alphas[4].mul_base(self.block_hash[2])
-            + alphas[5].mul_base(self.block_hash[3])
-            + alphas[6].mul_base(is_first_child)
-            + alphas[7].mul_base(is_loop_body)
-    }
-}
-
-// OP GROUP TABLE ROW
-// ================================================================================================
-
-/// Describes a single entry in the op group table. An entry in the op group table is a tuple
-/// (batch_id, group_pos, group_value).
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub struct OpGroupTableRow {
-    batch_id: Felt,
-    group_pos: Felt,
-    group_value: Felt,
-}
-
-impl OpGroupTableRow {
-    /// Returns a new [OpGroupTableRow] instantiated with the specified parameters.
-    pub fn new(batch_id: Felt, group_pos: Felt, group_value: Felt) -> Self {
-        Self {
-            batch_id,
-            group_pos,
-            group_value,
-        }
-    }
-}
-
-impl LookupTableRow for OpGroupTableRow {
-    /// Reduces this row to a single field element in the field specified by E. This requires
-    /// at least 4 alpha values.
-    fn to_value<E: FieldElement<BaseField = Felt>>(
-        &self,
-        _main_trace: &ColMatrix<Felt>,
-        alphas: &[E],
-    ) -> E {
-        alphas[0]
-            + alphas[1].mul_base(self.batch_id)
-            + alphas[2].mul_base(self.group_pos)
-            + alphas[3].mul_base(self.group_value)
-    }
-}
diff --git a/processor/src/decoder/aux_trace/block_hash_table.rs b/processor/src/decoder/aux_trace/block_hash_table.rs
new file mode 100644
index 0000000000..db3b9ab14f
--- /dev/null
+++ b/processor/src/decoder/aux_trace/block_hash_table.rs
@@ -0,0 +1,203 @@
+use super::{
+    AuxColumnBuilder, Felt, FieldElement, MainTrace, DYN, END, HALT, JOIN, LOOP, ONE, REPEAT, SPLIT,
+};
+
+// BLOCK HASH TABLE COLUMN BUILDER
+// ================================================================================================
+
+/// Builds the execution trace of the decoder's `p2` column which describes the state of the block
+/// hash table via multiset checks.
+#[derive(Default)]
+pub struct BlockHashTableColumnBuilder {}
+
+impl<E: FieldElement<BaseField = Felt>> AuxColumnBuilder<E> for BlockHashTableColumnBuilder {
+    fn init_responses(&self, main_trace: &MainTrace, alphas: &[E]) -> E {
+        let row_index = (0..main_trace.num_rows())
+            .find(|row| main_trace.get_op_code(*row) == Felt::from(HALT))
+            .expect("execution trace must include at least one occurrence of HALT");
+        let program_hash = main_trace.decoder_hasher_state_first_half(row_index);
+
+        // Computes the initialization value for the block hash table.
+        alphas[0]
+            + alphas[2].mul_base(program_hash[0])
+            + alphas[3].mul_base(program_hash[1])
+            + alphas[4].mul_base(program_hash[2])
+            + alphas[5].mul_base(program_hash[3])
+    }
+
+    /// Removes a row from the block hash table.
+    fn get_requests_at(&self, main_trace: &MainTrace, alphas: &[E], i: usize) -> E {
+        let op_code_felt = main_trace.get_op_code(i);
+        let op_code = op_code_felt.as_int() as u8;
+
+        let op_code_felt_next = main_trace.get_op_code(i + 1);
+        let op_code_next = op_code_felt_next.as_int() as u8;
+
+        match op_code {
+            END => get_block_hash_table_removal_multiplicand(main_trace, i, alphas, op_code_next),
+            _ => E::ONE,
+        }
+    }
+
+    /// Adds a row to the block hash table.
+    fn get_responses_at(&self, main_trace: &MainTrace, alphas: &[E], i: usize) -> E {
+        let op_code_felt = main_trace.get_op_code(i);
+        let op_code = op_code_felt.as_int() as u8;
+
+        match op_code {
+            JOIN => get_block_hash_table_inclusion_multiplicand_join(main_trace, i, alphas),
+            SPLIT => get_block_hash_table_inclusion_multiplicand_split(main_trace, i, alphas),
+            LOOP => get_block_hash_table_inclusion_multiplicand_loop(main_trace, i, alphas),
+            REPEAT => get_block_hash_table_inclusion_multiplicand_repeat(main_trace, i, alphas),
+            DYN => get_block_hash_table_inclusion_multiplicand_dyn(main_trace, i, alphas),
+            _ => E::ONE,
+        }
+    }
+}
+
+// HELPER FUNCTIONS
+// ================================================================================================
+
+/// Computes the multiplicand representing the removal of a row from the block hash table.
+fn get_block_hash_table_removal_multiplicand<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    i: usize,
+    alphas: &[E],
+    op_code_next: u8,
+) -> E {
+    let a = main_trace.addr(i + 1);
+    let digest = main_trace.decoder_hasher_state_first_half(i);
+    let is_loop_body = main_trace.is_loop_body_flag(i);
+    let next_end_or_repeat =
+        if op_code_next == END || op_code_next == REPEAT || op_code_next == HALT {
+            E::ZERO
+        } else {
+            alphas[6]
+        };
+
+    alphas[0]
+        + alphas[1].mul_base(a)
+        + alphas[2].mul_base(digest[0])
+        + alphas[3].mul_base(digest[1])
+        + alphas[4].mul_base(digest[2])
+        + alphas[5].mul_base(digest[3])
+        + alphas[7].mul_base(is_loop_body)
+        + next_end_or_repeat
+}
+
+/// Computes the multiplicand representing the inclusion of a new row representing a JOIN block
+/// to the block hash table.
+fn get_block_hash_table_inclusion_multiplicand_join<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    i: usize,
+    alphas: &[E],
+) -> E {
+    let a_prime = main_trace.addr(i + 1);
+    let state = main_trace.decoder_hasher_state(i);
+    let ch1 = alphas[0]
+        + alphas[1].mul_base(a_prime)
+        + alphas[2].mul_base(state[0])
+        + alphas[3].mul_base(state[1])
+        + alphas[4].mul_base(state[2])
+        + alphas[5].mul_base(state[3]);
+    let ch2 = alphas[0]
+        + alphas[1].mul_base(a_prime)
+        + alphas[2].mul_base(state[4])
+        + alphas[3].mul_base(state[5])
+        + alphas[4].mul_base(state[6])
+        + alphas[5].mul_base(state[7]);
+
+    (ch1 + alphas[6]) * ch2
+}
+
+/// Computes the multiplicand representing the inclusion of a new row representing a SPLIT block
+/// to the block hash table.
+fn get_block_hash_table_inclusion_multiplicand_split<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    i: usize,
+    alphas: &[E],
+) -> E {
+    let s0 = main_trace.stack_element(0, i);
+    let a_prime = main_trace.addr(i + 1);
+    let state = main_trace.decoder_hasher_state(i);
+
+    if s0 == ONE {
+        alphas[0]
+            + alphas[1].mul_base(a_prime)
+            + alphas[2].mul_base(state[0])
+            + alphas[3].mul_base(state[1])
+            + alphas[4].mul_base(state[2])
+            + alphas[5].mul_base(state[3])
+    } else {
+        alphas[0]
+            + alphas[1].mul_base(a_prime)
+            + alphas[2].mul_base(state[4])
+            + alphas[3].mul_base(state[5])
+            + alphas[4].mul_base(state[6])
+            + alphas[5].mul_base(state[7])
+    }
+}
+
+/// Computes the multiplicand representing the inclusion of a new row representing a LOOP block
+/// to the block hash table.
+fn get_block_hash_table_inclusion_multiplicand_loop<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    i: usize,
+    alphas: &[E],
+) -> E {
+    let s0 = main_trace.stack_element(0, i);
+
+    if s0 == ONE {
+        let a_prime = main_trace.addr(i + 1);
+        let state = main_trace.decoder_hasher_state(i);
+        alphas[0]
+            + alphas[1].mul_base(a_prime)
+            + alphas[2].mul_base(state[0])
+            + alphas[3].mul_base(state[1])
+            + alphas[4].mul_base(state[2])
+            + alphas[5].mul_base(state[3])
+            + alphas[7]
+    } else {
+        E::ONE
+    }
+}
+
+/// Computes the multiplicand representing the inclusion of a new row representing a REPEAT
+/// to the block hash table.
+fn get_block_hash_table_inclusion_multiplicand_repeat<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    i: usize,
+    alphas: &[E],
+) -> E {
+    let a_prime = main_trace.addr(i + 1);
+    let state = main_trace.decoder_hasher_state_first_half(i);
+
+    alphas[0]
+        + alphas[1].mul_base(a_prime)
+        + alphas[2].mul_base(state[0])
+        + alphas[3].mul_base(state[1])
+        + alphas[4].mul_base(state[2])
+        + alphas[5].mul_base(state[3])
+        + alphas[7]
+}
+
+/// Computes the multiplicand representing the inclusion of a new row representing a DYN block
+/// to the block hash table.
+fn get_block_hash_table_inclusion_multiplicand_dyn<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    i: usize,
+    alphas: &[E],
+) -> E {
+    let a_prime = main_trace.addr(i + 1);
+    let s0 = main_trace.stack_element(0, i);
+    let s1 = main_trace.stack_element(1, i);
+    let s2 = main_trace.stack_element(2, i);
+    let s3 = main_trace.stack_element(3, i);
+
+    alphas[0]
+        + alphas[1].mul_base(a_prime)
+        + alphas[2].mul_base(s3)
+        + alphas[3].mul_base(s2)
+        + alphas[4].mul_base(s1)
+        + alphas[5].mul_base(s0)
+}
diff --git a/processor/src/decoder/aux_trace/block_stack_table.rs b/processor/src/decoder/aux_trace/block_stack_table.rs
new file mode 100644
index 0000000000..bd21450978
--- /dev/null
+++ b/processor/src/decoder/aux_trace/block_stack_table.rs
@@ -0,0 +1,150 @@
+use super::{
+    AuxColumnBuilder, Felt, FieldElement, MainTrace, CALL, DYN, END, JOIN, LOOP, ONE, RESPAN, SPAN,
+    SPLIT, SYSCALL, ZERO,
+};
+
+// BLOCK STACK TABLE COLUMN BUILDER
+// ================================================================================================
+
+/// Builds the execution trace of the decoder's `p1` column which describes the state of the block
+/// stack table via multiset checks.
+#[derive(Default)]
+pub struct BlockStackColumnBuilder {}
+
+impl<E: FieldElement<BaseField = Felt>> AuxColumnBuilder<E> for BlockStackColumnBuilder {
+    /// Removes a row from the block stack table.
+    fn get_requests_at(&self, main_trace: &MainTrace, alphas: &[E], i: usize) -> E {
+        let op_code_felt = main_trace.get_op_code(i);
+        let op_code = op_code_felt.as_int() as u8;
+
+        match op_code {
+            RESPAN => get_block_stack_table_removal_multiplicand(main_trace, i, true, alphas),
+            END => get_block_stack_table_removal_multiplicand(main_trace, i, false, alphas),
+            _ => E::ONE,
+        }
+    }
+
+    /// Adds a row to the block stack table.
+    fn get_responses_at(&self, main_trace: &MainTrace, alphas: &[E], i: usize) -> E {
+        let op_code_felt = main_trace.get_op_code(i);
+        let op_code = op_code_felt.as_int() as u8;
+
+        match op_code {
+            JOIN | SPLIT | SPAN | DYN | LOOP | RESPAN | CALL | SYSCALL => {
+                get_block_stack_table_inclusion_multiplicand(main_trace, i, alphas, op_code)
+            }
+            _ => E::ONE,
+        }
+    }
+}
+
+// HELPER FUNCTIONS
+// ================================================================================================
+
+/// Computes the multiplicand representing the removal of a row from the block stack table.
+fn get_block_stack_table_removal_multiplicand<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    i: usize,
+    is_respan: bool,
+    alphas: &[E],
+) -> E {
+    let block_id = main_trace.addr(i);
+    let parent_id = if is_respan {
+        main_trace.decoder_hasher_state_element(1, i + 1)
+    } else {
+        main_trace.addr(i + 1)
+    };
+    let is_loop = main_trace.is_loop_flag(i);
+
+    let elements = if main_trace.is_call_flag(i) == ONE || main_trace.is_syscall_flag(i) == ONE {
+        let parent_ctx = main_trace.ctx(i + 1);
+        let parent_fmp = main_trace.fmp(i + 1);
+        let parent_stack_depth = main_trace.stack_depth(i + 1);
+        let parent_next_overflow_addr = main_trace.parent_overflow_address(i + 1);
+        let parent_fn_hash = main_trace.fn_hash(i);
+
+        [
+            ONE,
+            block_id,
+            parent_id,
+            is_loop,
+            parent_ctx,
+            parent_fmp,
+            parent_stack_depth,
+            parent_next_overflow_addr,
+            parent_fn_hash[0],
+            parent_fn_hash[1],
+            parent_fn_hash[2],
+            parent_fn_hash[0],
+        ]
+    } else {
+        let mut result = [ZERO; 12];
+        result[0] = ONE;
+        result[1] = block_id;
+        result[2] = parent_id;
+        result[3] = is_loop;
+        result
+    };
+
+    let mut value = E::ZERO;
+
+    for (&alpha, &element) in alphas.iter().zip(elements.iter()) {
+        value += alpha.mul_base(element);
+    }
+    value
+}
+
+/// Computes the multiplicand representing the inclusion of a new row to the block stack table.
+fn get_block_stack_table_inclusion_multiplicand<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    i: usize,
+    alphas: &[E],
+    op_code: u8,
+) -> E {
+    let block_id = main_trace.addr(i + 1);
+    let parent_id = if op_code == RESPAN {
+        main_trace.decoder_hasher_state_element(1, i + 1)
+    } else {
+        main_trace.addr(i)
+    };
+    let is_loop = if op_code == LOOP {
+        main_trace.stack_element(0, i)
+    } else {
+        ZERO
+    };
+    let elements = if op_code == CALL || op_code == SYSCALL {
+        let parent_ctx = main_trace.ctx(i);
+        let parent_fmp = main_trace.fmp(i);
+        let parent_stack_depth = main_trace.stack_depth(i);
+        let parent_next_overflow_addr = main_trace.parent_overflow_address(i);
+        let parent_fn_hash = main_trace.decoder_hasher_state_first_half(i);
+        [
+            ONE,
+            block_id,
+            parent_id,
+            is_loop,
+            parent_ctx,
+            parent_fmp,
+            parent_stack_depth,
+            parent_next_overflow_addr,
+            parent_fn_hash[0],
+            parent_fn_hash[1],
+            parent_fn_hash[2],
+            parent_fn_hash[3],
+        ]
+    } else {
+        let mut result = [ZERO; 12];
+        result[0] = ONE;
+        result[1] = block_id;
+        result[2] = parent_id;
+        result[3] = is_loop;
+        result
+    };
+
+    let mut value = E::ZERO;
+
+    for (&alpha, &element) in alphas.iter().zip(elements.iter()) {
+        value += alpha.mul_base(element);
+    }
+    value
+}
diff --git a/processor/src/decoder/aux_trace/mod.rs b/processor/src/decoder/aux_trace/mod.rs
new file mode 100644
index 0000000000..8b083f99ba
--- /dev/null
+++ b/processor/src/decoder/aux_trace/mod.rs
@@ -0,0 +1,57 @@
+use super::{Felt, ONE, ZERO};
+use crate::{trace::AuxColumnBuilder, utils::collections::*};
+use miden_air::trace::main_trace::MainTrace;
+use vm_core::{FieldElement, Operation};
+
+mod block_hash_table;
+use block_hash_table::BlockHashTableColumnBuilder;
+
+mod block_stack_table;
+use block_stack_table::BlockStackColumnBuilder;
+
+mod op_group_table;
+use op_group_table::OpGroupTableColumnBuilder;
+
+// CONSTANTS
+// ================================================================================================
+
+const JOIN: u8 = Operation::Join.op_code();
+const SPLIT: u8 = Operation::Split.op_code();
+const LOOP: u8 = Operation::Loop.op_code();
+const REPEAT: u8 = Operation::Repeat.op_code();
+const DYN: u8 = Operation::Dyn.op_code();
+const CALL: u8 = Operation::Call.op_code();
+const SYSCALL: u8 = Operation::SysCall.op_code();
+const SPAN: u8 = Operation::Span.op_code();
+const RESPAN: u8 = Operation::Respan.op_code();
+const PUSH: u8 = Operation::Push(ZERO).op_code();
+const END: u8 = Operation::End.op_code();
+const HALT: u8 = Operation::Halt.op_code();
+
+// AUXILIARY TRACE BUILDER
+// ================================================================================================
+
+/// Constructs the execution traces of stack-related auxiliary trace segment columns
+/// (used in multiset checks).
+#[derive(Default, Clone, Copy)]
+pub struct AuxTraceBuilder {}
+
+impl AuxTraceBuilder {
+    /// Builds and returns decoder auxiliary trace columns p1, p2, and p3 describing states of block
+    /// stack, block hash, and op group tables respectively.
+    pub fn build_aux_columns<E: FieldElement<BaseField = Felt>>(
+        &self,
+        main_trace: &MainTrace,
+        rand_elements: &[E],
+    ) -> Vec<Vec<E>> {
+        let block_stack_column_builder = BlockStackColumnBuilder::default();
+        let block_hash_column_builder = BlockHashTableColumnBuilder::default();
+        let op_group_table_column_builder = OpGroupTableColumnBuilder::default();
+
+        let p1 = block_stack_column_builder.build_aux_column(main_trace, rand_elements);
+        let p2 = block_hash_column_builder.build_aux_column(main_trace, rand_elements);
+        let p3 = op_group_table_column_builder.build_aux_column(main_trace, rand_elements);
+
+        vec![p1, p2, p3]
+    }
+}
diff --git a/processor/src/decoder/aux_trace/op_group_table.rs b/processor/src/decoder/aux_trace/op_group_table.rs
new file mode 100644
index 0000000000..8e780bc343
--- /dev/null
+++ b/processor/src/decoder/aux_trace/op_group_table.rs
@@ -0,0 +1,98 @@
+use super::{AuxColumnBuilder, Felt, FieldElement, MainTrace, ONE, PUSH, RESPAN, SPAN};
+use miden_air::trace::decoder::{OP_BATCH_2_GROUPS, OP_BATCH_4_GROUPS, OP_BATCH_8_GROUPS};
+
+// OP GROUP TABLE COLUMN
+// ================================================================================================
+
+/// Builds the execution trace of the decoder's `p3` column which describes the state of the op
+/// group table via multiset checks.
+#[derive(Default)]
+pub struct OpGroupTableColumnBuilder {}
+
+impl<E: FieldElement<BaseField = Felt>> AuxColumnBuilder<E> for OpGroupTableColumnBuilder {
+    /// Removes a row from the block hash table.
+    fn get_requests_at(&self, main_trace: &MainTrace, alphas: &[E], i: usize) -> E {
+        let delete_group_flag = main_trace.delta_group_count(i) * main_trace.is_in_span(i);
+
+        if delete_group_flag == ONE {
+            get_op_group_table_removal_multiplicand(main_trace, i, alphas)
+        } else {
+            E::ONE
+        }
+    }
+
+    /// Adds a row to the block hash table.
+    fn get_responses_at(&self, main_trace: &MainTrace, alphas: &[E], i: usize) -> E {
+        let op_code_felt = main_trace.get_op_code(i);
+        let op_code = op_code_felt.as_int() as u8;
+
+        match op_code {
+            SPAN | RESPAN => get_op_group_table_inclusion_multiplicand(main_trace, i, alphas),
+            _ => E::ONE,
+        }
+    }
+}
+
+// HELPER FUNCTIONS
+// ================================================================================================
+
+/// Computes the multiplicand representing the inclusion of a new row to the op group table.
+fn get_op_group_table_inclusion_multiplicand<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    i: usize,
+    alphas: &[E],
+) -> E {
+    let block_id = main_trace.addr(i + 1);
+    let group_count = main_trace.group_count(i);
+    let op_batch_flag = main_trace.op_batch_flag(i);
+
+    if op_batch_flag == OP_BATCH_8_GROUPS {
+        let h = main_trace.decoder_hasher_state(i);
+        (1..8_u8).fold(E::ONE, |acc, k| {
+            acc * (alphas[0]
+                + alphas[1].mul_base(block_id)
+                + alphas[2].mul_base(group_count - Felt::from(k))
+                + alphas[3].mul_base(h[k as usize]))
+        })
+    } else if op_batch_flag == OP_BATCH_4_GROUPS {
+        let h = main_trace.decoder_hasher_state_first_half(i);
+        (1..4_u8).fold(E::ONE, |acc, k| {
+            acc * (alphas[0]
+                + alphas[1].mul_base(block_id)
+                + alphas[2].mul_base(group_count - Felt::from(k))
+                + alphas[3].mul_base(h[k as usize]))
+        })
+    } else if op_batch_flag == OP_BATCH_2_GROUPS {
+        let h = main_trace.decoder_hasher_state_first_half(i);
+        alphas[0]
+            + alphas[1].mul_base(block_id)
+            + alphas[2].mul_base(group_count - ONE)
+            + alphas[3].mul_base(h[1])
+    } else {
+        E::ONE
+    }
+}
+
+/// Computes the multiplicand representing the removal of a row from the op group table.
+fn get_op_group_table_removal_multiplicand<E: FieldElement<BaseField = Felt>>(
+    main_trace: &MainTrace,
+    i: usize,
+    alphas: &[E],
+) -> E {
+    let group_count = main_trace.group_count(i);
+    let block_id = main_trace.addr(i);
+
+    let op_code = main_trace.get_op_code(i);
+    let tmp = if op_code == Felt::from(PUSH) {
+        main_trace.stack_element(0, i + 1)
+    } else {
+        let h0 = main_trace.decoder_hasher_state_first_half(i + 1)[0];
+
+        let op_prime = main_trace.get_op_code(i + 1);
+        h0.mul_small(1 << 7) + op_prime
+    };
+    alphas[0]
+        + alphas[1].mul_base(block_id)
+        + alphas[2].mul_base(group_count)
+        + alphas[3].mul_base(tmp)
+}
diff --git a/processor/src/decoder/block_stack.rs b/processor/src/decoder/block_stack.rs
index cef32a8bd7..c478829694 100644
--- a/processor/src/decoder/block_stack.rs
+++ b/processor/src/decoder/block_stack.rs
@@ -1,4 +1,5 @@
-use super::{Felt, Vec, Word, ONE, ZERO};
+use super::{Felt, Word, ONE, ZERO};
+use crate::{system::ContextId, utils::collections::*};
 
 // BLOCK STACK
 // ================================================================================================
@@ -138,20 +139,6 @@ impl BlockInfo {
             _ => ZERO,
         }
     }
-
-    /// Returns the number of children a block has. This is an integer between 0 and 2 (both
-    /// inclusive).
-    pub fn num_children(&self) -> u32 {
-        match self.block_type {
-            BlockType::Join(_) => 2,
-            BlockType::Split => 1,
-            BlockType::Loop(is_entered) => u32::from(is_entered),
-            BlockType::Call => 1,
-            BlockType::Dyn => 1,
-            BlockType::SysCall => 1,
-            BlockType::Span => 0,
-        }
-    }
 }
 
 // EXECUTION CONTEXT INFO
@@ -162,7 +149,7 @@ impl BlockInfo {
 #[derive(Debug, Default, Clone, Copy)]
 pub struct ExecutionContextInfo {
     /// Context ID of the block's parent.
-    pub parent_ctx: u32,
+    pub parent_ctx: ContextId,
     /// Hash of the function which initiated execution of the block's parent. If the parent is a
     /// root context, this will be set to [ZERO; 4].
     pub parent_fn_hash: Word,
@@ -177,7 +164,7 @@ pub struct ExecutionContextInfo {
 impl ExecutionContextInfo {
     /// Returns an new [ExecutionContextInfo] instantiated with the specified parameters.
     pub fn new(
-        parent_ctx: u32,
+        parent_ctx: ContextId,
         parent_fn_hash: Word,
         parent_fmp: Felt,
         parent_stack_depth: u32,
diff --git a/processor/src/decoder/mod.rs b/processor/src/decoder/mod.rs
index fcd423c50c..7b012d27d4 100644
--- a/processor/src/decoder/mod.rs
+++ b/processor/src/decoder/mod.rs
@@ -1,8 +1,8 @@
 use super::{
-    Call, ColMatrix, Dyn, ExecutionError, Felt, FieldElement, Host, Join, Loop, OpBatch, Operation,
-    Process, Span, Split, StarkField, Vec, Word, EMPTY_WORD, MIN_TRACE_LEN, ONE, OP_BATCH_SIZE,
-    ZERO,
+    Call, Dyn, ExecutionError, Felt, Host, Join, Loop, OpBatch, Operation, Process, Span, Split,
+    Word, EMPTY_WORD, MIN_TRACE_LEN, ONE, OP_BATCH_SIZE, ZERO,
 };
+use crate::utils::collections::*;
 use miden_air::trace::{
     chiplets::hasher::DIGEST_LEN,
     decoder::{
@@ -15,17 +15,14 @@ use vm_core::{code_blocks::get_span_op_group_count, stack::STACK_TOP_SIZE, Assem
 mod trace;
 use trace::DecoderTrace;
 
-#[cfg(test)]
-use miden_air::trace::decoder::NUM_USER_OP_HELPERS;
+mod aux_trace;
+pub use aux_trace::AuxTraceBuilder;
 
 mod block_stack;
-use block_stack::{BlockInfo, BlockStack, BlockType, ExecutionContextInfo};
+use block_stack::{BlockStack, BlockType, ExecutionContextInfo};
 
-mod aux_hints;
-pub use aux_hints::{
-    AuxTraceHints, BlockHashTableRow, BlockStackTableRow, BlockTableUpdate, OpGroupTableRow,
-    OpGroupTableUpdate,
-};
+#[cfg(test)]
+use miden_air::trace::decoder::NUM_USER_OP_HELPERS;
 
 #[cfg(test)]
 mod tests;
@@ -68,9 +65,6 @@ where
         // executed the rest of the VM state does not change
         self.decoder.end_control_block(block.hash().into());
 
-        // send the end of control block to the chiplets bus to handle the final hash request.
-        self.chiplets.read_hash_result();
-
         self.execute_op(Operation::Noop)
     }
 
@@ -93,7 +87,7 @@ where
 
         // start decoding the SPLIT block. this appends a row with SPLIT operation to the decoder
         // trace. we also pop the value off the top of the stack and return it.
-        self.decoder.start_split(child1_hash, child2_hash, addr, condition);
+        self.decoder.start_split(child1_hash, child2_hash, addr);
         self.execute_op(Operation::Drop)?;
         Ok(condition)
     }
@@ -104,9 +98,6 @@ where
         // executed the rest of the VM state does not change
         self.decoder.end_control_block(block.hash().into());
 
-        // send the end of control block to the chiplets bus to handle the final hash request.
-        self.chiplets.read_hash_result();
-
         self.execute_op(Operation::Noop)
     }
 
@@ -147,9 +138,6 @@ where
         // this appends a row with END operation to the decoder trace.
         self.decoder.end_control_block(block.hash().into());
 
-        // send the end of control block to the chiplets bus to handle the final hash request.
-        self.chiplets.read_hash_result();
-
         // if we are exiting a loop, we also need to pop the top value off the stack (and this
         // value must be ZERO - otherwise, we should have stayed in the loop). but, if we never
         // entered the loop in the first place, the stack would have been popped when the LOOP
@@ -222,9 +210,6 @@ where
             .end_control_block(block.hash().into())
             .expect("no execution context");
 
-        // send the end of control block to the chiplets bus to handle the final hash request.
-        self.chiplets.read_hash_result();
-
         // when returning from a function call or a syscall, restore the context of the system
         // registers and the operand stack to what it was prior to the call.
         self.system.restore_context(
@@ -264,9 +249,6 @@ where
         // executed the rest of the VM state does not change
         self.decoder.end_control_block(block.hash().into());
 
-        // send the end of control block to the chiplets bus to handle the final hash request.
-        self.chiplets.read_hash_result();
-
         self.execute_op(Operation::Noop)
     }
 
@@ -293,9 +275,6 @@ where
     /// Continues decoding a SPAN block by absorbing the next batch of operations.
     pub(super) fn respan(&mut self, op_batch: &OpBatch) {
         self.decoder.respan(op_batch);
-
-        // send a request to the chiplets to continue the hash and absorb new elements.
-        self.chiplets.absorb_span_batch();
     }
 
     /// Ends decoding a SPAN block.
@@ -304,9 +283,6 @@ where
         // executed the rest of the VM state does not change
         self.decoder.end_span(block.hash().into());
 
-        // send the end of control block to the chiplets bus to handle the final hash request.
-        self.chiplets.read_hash_result();
-
         self.execute_op(Operation::Noop)
     }
 }
@@ -356,7 +332,6 @@ where
 ///   - `be1` is set when the two most significant op bits are ONE.
 ///
 /// In addition to the execution trace, the decoder also contains the following:
-/// - A set of hints used in construction of decoder-related columns in auxiliary trace segment.
 /// - An instance of [DebugInfo] which is only populated in debug mode. This debug_info instance
 ///   includes operations executed by the VM and AsmOp decorators. AsmOp decorators are populated
 ///   only when both the processor and assembler are in debug mode.
@@ -364,7 +339,6 @@ pub struct Decoder {
     block_stack: BlockStack,
     span_context: Option<SpanContext>,
     trace: DecoderTrace,
-    aux_hints: AuxTraceHints,
     debug_info: DebugInfo,
 }
 
@@ -377,7 +351,6 @@ impl Decoder {
             block_stack: BlockStack::default(),
             span_context: None,
             trace: DecoderTrace::new(),
-            aux_hints: AuxTraceHints::new(),
             debug_info: DebugInfo::new(in_debug_mode),
         }
     }
@@ -416,24 +389,11 @@ impl Decoder {
     /// This pushes a block with ID=addr onto the block stack and appends execution of a JOIN
     /// operation to the trace.
     pub fn start_join(&mut self, child1_hash: Word, child2_hash: Word, addr: Felt) {
-        // get the current clock cycle here (before the trace table is updated)
-        let clk = self.trace_len() as u32;
-
         // append a JOIN row to the execution trace
         let parent_addr = self.block_stack.push(addr, BlockType::Join(false), None);
         self.trace
             .append_block_start(parent_addr, Operation::Join, child1_hash, child2_hash);
 
-        // mark this cycle as the cycle at which a new JOIN block began execution (this affects
-        // block stack and block hash tables). Both children of the JOIN block are expected to
-        // be executed, and thus we record both of their hashes.
-        self.aux_hints.block_started(
-            clk,
-            self.block_stack.peek(),
-            Some(child1_hash),
-            Some(child2_hash),
-        );
-
         self.debug_info.append_operation(Operation::Join);
     }
 
@@ -441,28 +401,12 @@ impl Decoder {
     ///
     /// This pushes a block with ID=addr onto the block stack and appends execution of a SPLIT
     /// operation to the trace.
-    pub fn start_split(
-        &mut self,
-        child1_hash: Word,
-        child2_hash: Word,
-        addr: Felt,
-        stack_top: Felt,
-    ) {
-        // get the current clock cycle here (before the trace table is updated)
-        let clk = self.trace_len() as u32;
-
+    pub fn start_split(&mut self, child1_hash: Word, child2_hash: Word, addr: Felt) {
         // append a SPLIT row to the execution trace
         let parent_addr = self.block_stack.push(addr, BlockType::Split, None);
         self.trace
             .append_block_start(parent_addr, Operation::Split, child1_hash, child2_hash);
 
-        // mark this cycle as the cycle at which a SPLIT block began execution (this affects block
-        // stack and block hash tables). Only one child of the SPLIT block is expected to be
-        // executed, and thus, we record the hash only for that child.
-        let taken_branch_hash = if stack_top == ONE { child1_hash } else { child2_hash };
-        self.aux_hints
-            .block_started(clk, self.block_stack.peek(), Some(taken_branch_hash), None);
-
         self.debug_info.append_operation(Operation::Split);
     }
 
@@ -471,22 +415,12 @@ impl Decoder {
     /// This pushes a block with ID=addr onto the block stack and appends execution of a LOOP
     /// operation to the trace. A block is marked as a loop block only if is_loop = ONE.
     pub fn start_loop(&mut self, loop_body_hash: Word, addr: Felt, stack_top: Felt) {
-        // get the current clock cycle here (before the trace table is updated)
-        let clk = self.trace_len() as u32;
-
         // append a LOOP row to the execution trace
         let enter_loop = stack_top == ONE;
         let parent_addr = self.block_stack.push(addr, BlockType::Loop(enter_loop), None);
         self.trace
             .append_block_start(parent_addr, Operation::Loop, loop_body_hash, EMPTY_WORD);
 
-        // mark this cycle as the cycle at which a new LOOP block has started (this may affect
-        // block hash table). A loop block has a single child only if the body of the loop is
-        // executed at least once.
-        let executed_loop_body = if enter_loop { Some(loop_body_hash) } else { None };
-        self.aux_hints
-            .block_started(clk, self.block_stack.peek(), executed_loop_body, None);
-
         self.debug_info.append_operation(Operation::Loop);
     }
 
@@ -494,18 +428,11 @@ impl Decoder {
     ///
     /// This appends an execution of a REPEAT operation to the trace.
     pub fn repeat(&mut self) {
-        // get the current clock cycle here (before the trace table is updated)
-        let clk = self.trace_len() as u32;
-
         // append a REPEAT row to the execution trace
         let block_info = self.block_stack.peek();
         debug_assert_eq!(ONE, block_info.is_entered_loop());
         self.trace.append_loop_repeat(block_info.addr);
 
-        // mark this cycle as the cycle at which a new iteration of a loop started (this affects
-        // block hash table)
-        self.aux_hints.loop_repeat_started(clk);
-
         self.debug_info.append_operation(Operation::Repeat);
     }
 
@@ -514,17 +441,10 @@ impl Decoder {
     /// This pushes a block with ID=addr onto the block stack and appends execution of a CALL
     /// operation to the trace.
     pub fn start_call(&mut self, fn_hash: Word, addr: Felt, ctx_info: ExecutionContextInfo) {
-        // get the current clock cycle here (before the trace table is updated)
-        let clk = self.trace_len() as u32;
-
         // push CALL block info onto the block stack and append a CALL row to the execution trace
         let parent_addr = self.block_stack.push(addr, BlockType::Call, Some(ctx_info));
         self.trace.append_block_start(parent_addr, Operation::Call, fn_hash, EMPTY_WORD);
 
-        // mark this cycle as the cycle at which a new CALL block began execution (this affects
-        // block stack and block hash tables). A CALL block has only a single child.
-        self.aux_hints.block_started(clk, self.block_stack.peek(), Some(fn_hash), None);
-
         self.debug_info.append_operation(Operation::Call);
     }
 
@@ -533,19 +453,12 @@ impl Decoder {
     /// This pushes a block with ID=addr onto the block stack and appends execution of a SYSCALL
     /// operation to the trace.
     pub fn start_syscall(&mut self, fn_hash: Word, addr: Felt, ctx_info: ExecutionContextInfo) {
-        // get the current clock cycle here (before the trace table is updated)
-        let clk = self.trace_len() as u32;
-
         // push SYSCALL block info onto the block stack and append a SYSCALL row to the execution
         // trace
         let parent_addr = self.block_stack.push(addr, BlockType::SysCall, Some(ctx_info));
         self.trace
             .append_block_start(parent_addr, Operation::SysCall, fn_hash, EMPTY_WORD);
 
-        // mark this cycle as the cycle at which a new SYSCALL block began execution (this affects
-        // block stack and block hash tables). A SYSCALL block has only a single child.
-        self.aux_hints.block_started(clk, self.block_stack.peek(), Some(fn_hash), None);
-
         self.debug_info.append_operation(Operation::SysCall);
     }
 
@@ -554,18 +467,10 @@ impl Decoder {
     /// This pushes a block with ID=addr onto the block stack and appends execution of a DYN
     /// operation to the trace.
     pub fn start_dyn(&mut self, dyn_hash: Word, addr: Felt) {
-        // get the current clock cycle here (before the trace table is updated)
-        let clk = self.trace_len() as u32;
-
         // push DYN block info onto the block stack and append a DYN row to the execution trace
         let parent_addr = self.block_stack.push(addr, BlockType::Dyn, None);
         self.trace.append_block_start(parent_addr, Operation::Dyn, dyn_hash, [ZERO; 4]);
 
-        // mark this cycle as the cycle at which a new DYN block began execution (this affects
-        // block stack and block hash tables). A DYN block has no children but points to the hash
-        // provided on the stack.
-        self.aux_hints.block_started(clk, self.block_stack.peek(), Some(dyn_hash), None);
-
         self.debug_info.append_operation(Operation::Dyn);
     }
 
@@ -578,9 +483,6 @@ impl Decoder {
     /// execution context and free memory pointers were set before the CALL block started
     /// executing. For non-CALL blocks these values are set to zeros and should be ignored.
     pub fn end_control_block(&mut self, block_hash: Word) -> Option<ExecutionContextInfo> {
-        // get the current clock cycle here (before the trace table is updated)
-        let clk = self.trace_len() as u32;
-
         // remove the block from the top of the block stack and add an END row to the trace
         let block_info = self.block_stack.pop();
         self.trace.append_block_end(
@@ -592,9 +494,6 @@ impl Decoder {
             block_info.is_syscall(),
         );
 
-        // mark this cycle as the cycle at which block execution has ended
-        self.aux_hints.block_ended(clk, block_info.is_first_child);
-
         self.debug_info.append_operation(Operation::End);
 
         block_info.ctx_info
@@ -608,9 +507,6 @@ impl Decoder {
         debug_assert!(self.span_context.is_none(), "already in span");
         let parent_addr = self.block_stack.push(addr, BlockType::Span, None);
 
-        // get the current clock cycle here (before the trace table is updated)
-        let clk = self.trace_len() as u32;
-
         // add a SPAN row to the trace
         self.trace
             .append_span_start(parent_addr, first_op_batch.groups(), num_op_groups);
@@ -622,22 +518,12 @@ impl Decoder {
             group_ops_left: first_op_batch.groups()[0],
         });
 
-        // mark the current cycle as a cycle at which an operation batch may have been inserted
-        // into the op_group table
-        self.aux_hints.insert_op_batch(clk, num_op_groups);
-
-        // mark the current cycle as the cycle at which a SPAN block has started; SPAN block has
-        // no children
-        self.aux_hints.block_started(clk, self.block_stack.peek(), None, None);
-
         self.debug_info.append_operation(Operation::Span);
     }
 
     /// Starts decoding of the next operation batch in the current SPAN.
     pub fn respan(&mut self, op_batch: &OpBatch) {
         // get the current clock cycle here (before the trace table is updated)
-        let clk = self.trace_len() as u32;
-
         // add RESPAN row to the trace
         self.trace.append_respan(op_batch.groups());
 
@@ -648,14 +534,6 @@ impl Decoder {
 
         let ctx = self.span_context.as_mut().expect("not in span");
 
-        // mark the current cycle as a cycle at which an operation batch may have been inserted
-        // into the op_group table
-        self.aux_hints.insert_op_batch(clk, ctx.num_groups_left);
-
-        // mark the current cycle as a cycle at which the ID of the span block was changed (this
-        // causes an update in the block stack table)
-        self.aux_hints.span_extended(clk, block_info);
-
         // after RESPAN operation is executed, we decrement the number of remaining groups by ONE
         // because executing RESPAN consumes the first group of the batch
         ctx.num_groups_left -= ONE;
@@ -666,16 +544,8 @@ impl Decoder {
 
     /// Starts decoding a new operation group.
     pub fn start_op_group(&mut self, op_group: Felt) {
-        let clk = self.trace_len() as u32;
         let ctx = self.span_context.as_mut().expect("not in span");
 
-        // mark the cycle of the last operation as a cycle at which an operation group was
-        // removed from the op_group table. decoding of the removed operation will begin
-        // at the current cycle.
-        let group_pos = ctx.num_groups_left;
-        let batch_id = self.block_stack.peek().addr;
-        self.aux_hints.remove_op_group(clk - 1, batch_id, group_pos, op_group);
-
         // reset the current group value and decrement the number of left groups by ONE
         debug_assert_eq!(ZERO, ctx.group_ops_left, "not all ops executed in current group");
         ctx.group_ops_left = op_group;
@@ -684,9 +554,6 @@ impl Decoder {
 
     /// Decodes a user operation (i.e., not a control flow operation).
     pub fn execute_user_op(&mut self, op: Operation, op_idx: usize) {
-        // get the current clock cycle here (before the trace table is updated)
-        let clk = self.trace_len() as u32;
-
         let block = self.block_stack.peek();
         let ctx = self.span_context.as_mut().expect("not in span");
 
@@ -707,10 +574,7 @@ impl Decoder {
         // groups left to decode. this number will be inserted into the trace in the next row.
         // we also mark the current clock cycle as a cycle at which the immediate value was
         // removed from the op_group table.
-        if let Some(imm_value) = op.imm_value() {
-            let group_pos = ctx.num_groups_left;
-            self.aux_hints.remove_op_group(clk, block.addr, group_pos, imm_value);
-
+        if op.imm_value().is_some() {
             ctx.num_groups_left -= ONE;
         }
 
@@ -729,43 +593,30 @@ impl Decoder {
 
     /// Ends decoding of a SPAN block.
     pub fn end_span(&mut self, block_hash: Word) {
-        // get the current clock cycle here (before the trace table is updated)
-        let clk = self.trace_len() as u32;
-
         // remove the block from the stack of executing blocks and add an END row to the
         // execution trace
         let block_info = self.block_stack.pop();
         self.trace.append_span_end(block_hash, block_info.is_loop_body());
         self.span_context = None;
 
-        // mark this cycle as the cycle at which block execution has ended
-        self.aux_hints.block_ended(clk, block_info.is_first_child);
-
         self.debug_info.append_operation(Operation::End);
     }
 
     // TRACE GENERATIONS
     // --------------------------------------------------------------------------------------------
 
-    /// Returns an array of columns containing an execution trace of this decoder together with
-    /// hints to be used in construction of decoder-related auxiliary trace segment columns.
+    /// Returns an array of columns containing an execution trace of this decoder.
     ///
     /// Trace columns are extended to match the specified trace length.
-    pub fn into_trace(mut self, trace_len: usize, num_rand_rows: usize) -> super::DecoderTrace {
-        // once we know the hash of the program, we update the auxiliary trace hints so that the
-        // block hash table could be initialized properly
-        self.aux_hints.set_program_hash(self.program_hash());
-
+    pub fn into_trace(self, trace_len: usize, num_rand_rows: usize) -> super::DecoderTrace {
         let trace = self
             .trace
             .into_vec(trace_len, num_rand_rows)
             .try_into()
             .expect("failed to convert vector to array");
+        let aux_builder = AuxTraceBuilder::default();
 
-        super::DecoderTrace {
-            trace,
-            aux_trace_hints: self.aux_hints,
-        }
+        super::DecoderTrace { trace, aux_builder }
     }
 
     // HELPERS
diff --git a/processor/src/decoder/tests.rs b/processor/src/decoder/tests.rs
index 2381ee3aa5..c0897cfd58 100644
--- a/processor/src/decoder/tests.rs
+++ b/processor/src/decoder/tests.rs
@@ -1,10 +1,8 @@
 use super::{
     super::{
-        utils::get_trace_len, ExecutionOptions, ExecutionTrace, Felt, Kernel, Operation, Process,
-        StackInputs, Word,
+        ExecutionOptions, ExecutionTrace, Felt, Kernel, Operation, Process, StackInputs, Word,
     },
-    build_op_group, AuxTraceHints, BlockHashTableRow, BlockStackTableRow, BlockTableUpdate,
-    ExecutionContextInfo, OpGroupTableRow, OpGroupTableUpdate,
+    build_op_group,
 };
 use crate::DefaultHost;
 use miden_air::trace::{
@@ -20,8 +18,8 @@ use miden_air::trace::{
 use test_utils::rand::rand_value;
 use vm_core::{
     code_blocks::{CodeBlock, Span, OP_BATCH_SIZE},
-    utils::collections::Vec,
-    CodeBlockTable, StarkField, EMPTY_WORD, ONE, ZERO,
+    utils::collections::*,
+    CodeBlockTable, EMPTY_WORD, ONE, ZERO,
 };
 
 // CONSTANTS
@@ -33,7 +31,7 @@ const EIGHT: Felt = Felt::new(8);
 
 const INIT_ADDR: Felt = ONE;
 const FMP_MIN: Felt = Felt::new(crate::FMP_MIN);
-const SYSCALL_FMP_MIN: Felt = Felt::new(crate::SYSCALL_FMP_MIN);
+const SYSCALL_FMP_MIN: Felt = Felt::new(crate::SYSCALL_FMP_MIN as u64);
 
 // TYPE ALIASES
 // ================================================================================================
@@ -50,7 +48,7 @@ fn span_block_one_group() {
     let span = Span::new(ops.clone());
     let program = CodeBlock::new_span(ops.clone());
 
-    let (trace, aux_hints, trace_len) = build_trace(&[], &program);
+    let (trace, trace_len) = build_trace(&[], &program);
 
     // --- check block address, op_bits, group count, op_index, and in_span columns ---------------
     check_op_decoding(&trace, 0, ZERO, Operation::Span, 1, 0, 0);
@@ -80,24 +78,6 @@ fn span_block_one_group() {
         assert_eq!(ONE, trace[OP_BITS_EXTRA_COLS_RANGE.start + 1][i]);
         assert_eq!(program_hash, get_hasher_state1(&trace, i));
     }
-
-    // --- check op_group table hints -------------------------------------------------------------
-    // op_group table should not have been touched
-    assert!(&aux_hints.op_group_table_hints().is_empty());
-    assert!(aux_hints.op_group_table_rows().is_empty());
-
-    // --- check block execution hints ------------------------------------------------------------
-    let expected_hints =
-        vec![(0, BlockTableUpdate::BlockStarted(0)), (4, BlockTableUpdate::BlockEnded(false))];
-    assert_eq!(expected_hints, aux_hints.block_exec_hints());
-
-    // --- check block stack table hints ----------------------------------------------------------
-    let expected_rows = vec![BlockStackTableRow::new_test(INIT_ADDR, ZERO, false)];
-    assert_eq!(expected_rows, aux_hints.block_stack_table_rows());
-
-    // --- check block hash table hints ----------------------------------------------------------
-    let expected_rows = vec![BlockHashTableRow::from_program_hash(program_hash)];
-    assert_eq!(expected_rows, aux_hints.block_hash_table_rows());
 }
 
 #[test]
@@ -107,7 +87,7 @@ fn span_block_small() {
     let span = Span::new(ops.clone());
     let program = CodeBlock::new_span(ops.clone());
 
-    let (trace, aux_hints, trace_len) = build_trace(&[], &program);
+    let (trace, trace_len) = build_trace(&[], &program);
 
     // --- check block address, op_bits, group count, op_index, and in_span columns ---------------
     check_op_decoding(&trace, 0, ZERO, Operation::Span, 4, 0, 0);
@@ -141,38 +121,6 @@ fn span_block_small() {
         assert_eq!(ONE, trace[OP_BITS_EXTRA_COLS_RANGE.start + 1][i]);
         assert_eq!(program_hash, get_hasher_state1(&trace, i));
     }
-
-    // --- check op_group table hints -------------------------------------------------------------
-
-    // 3 op groups should be inserted at cycle 0, and removed one by one in subsequent cycles
-    let expected_ogt_hints = vec![
-        (0, OpGroupTableUpdate::InsertRows(3)),
-        (1, OpGroupTableUpdate::RemoveRow),
-        (2, OpGroupTableUpdate::RemoveRow),
-        (3, OpGroupTableUpdate::RemoveRow),
-    ];
-    assert_eq!(&expected_ogt_hints, aux_hints.op_group_table_hints());
-
-    // the groups are imm(1), imm(2), and op group with a single NOOP
-    let expected_ogt_rows = vec![
-        OpGroupTableRow::new(INIT_ADDR, Felt::new(3), iv[0]),
-        OpGroupTableRow::new(INIT_ADDR, TWO, iv[1]),
-        OpGroupTableRow::new(INIT_ADDR, ONE, ZERO),
-    ];
-    assert_eq!(expected_ogt_rows, aux_hints.op_group_table_rows());
-
-    // --- check block execution hints ------------------------------------------------------------
-    let expected_hints =
-        vec![(0, BlockTableUpdate::BlockStarted(0)), (5, BlockTableUpdate::BlockEnded(false))];
-    assert_eq!(expected_hints, aux_hints.block_exec_hints());
-
-    // --- check block stack table hints ----------------------------------------------------------
-    let expected_rows = vec![BlockStackTableRow::new_test(INIT_ADDR, ZERO, false)];
-    assert_eq!(expected_rows, aux_hints.block_stack_table_rows());
-
-    // --- check block hash table hints ----------------------------------------------------------
-    let expected_rows = vec![BlockHashTableRow::from_program_hash(program_hash)];
-    assert_eq!(expected_rows, aux_hints.block_hash_table_rows());
 }
 
 #[test]
@@ -194,7 +142,7 @@ fn span_block() {
     ];
     let span = Span::new(ops.clone());
     let program = CodeBlock::new_span(ops.clone());
-    let (trace, aux_hints, trace_len) = build_trace(&[], &program);
+    let (trace, trace_len) = build_trace(&[], &program);
 
     // --- check block address, op_bits, group count, op_index, and in_span columns ---------------
     check_op_decoding(&trace, 0, ZERO, Operation::Span, 8, 0, 0);
@@ -249,47 +197,6 @@ fn span_block() {
         assert_eq!(ONE, trace[OP_BITS_EXTRA_COLS_RANGE.start + 1][i]);
         assert_eq!(program_hash, get_hasher_state1(&trace, i));
     }
-
-    // --- check op_group table hints -------------------------------------------------------------
-
-    let expected_ogt_hints = vec![
-        (0, OpGroupTableUpdate::InsertRows(7)),
-        (1, OpGroupTableUpdate::RemoveRow),
-        (2, OpGroupTableUpdate::RemoveRow),
-        (3, OpGroupTableUpdate::RemoveRow),
-        (8, OpGroupTableUpdate::RemoveRow),
-        (9, OpGroupTableUpdate::RemoveRow),
-        (10, OpGroupTableUpdate::RemoveRow),
-        (13, OpGroupTableUpdate::RemoveRow),
-    ];
-    assert_eq!(&expected_ogt_hints, aux_hints.op_group_table_hints());
-
-    let batch0_groups = &span.op_batches()[0].groups();
-    let expected_ogt_rows = vec![
-        OpGroupTableRow::new(INIT_ADDR, Felt::new(7), batch0_groups[1]),
-        OpGroupTableRow::new(INIT_ADDR, Felt::new(6), batch0_groups[2]),
-        OpGroupTableRow::new(INIT_ADDR, Felt::new(5), batch0_groups[3]),
-        OpGroupTableRow::new(INIT_ADDR, Felt::new(4), batch0_groups[4]),
-        OpGroupTableRow::new(INIT_ADDR, Felt::new(3), batch0_groups[5]),
-        OpGroupTableRow::new(INIT_ADDR, TWO, batch0_groups[6]),
-        OpGroupTableRow::new(INIT_ADDR, ONE, batch0_groups[7]),
-    ];
-    assert_eq!(expected_ogt_rows, aux_hints.op_group_table_rows());
-
-    // --- check block execution hints ------------------------------------------------------------
-    let expected_hints = vec![
-        (0, BlockTableUpdate::BlockStarted(0)),
-        (15, BlockTableUpdate::BlockEnded(false)),
-    ];
-    assert_eq!(expected_hints, aux_hints.block_exec_hints());
-
-    // --- check block stack table hints ----------------------------------------------------------
-    let expected_rows = vec![BlockStackTableRow::new_test(INIT_ADDR, ZERO, false)];
-    assert_eq!(expected_rows, aux_hints.block_stack_table_rows());
-
-    // --- check block hash table hints ----------------------------------------------------------
-    let expected_rows = vec![BlockHashTableRow::from_program_hash(program_hash)];
-    assert_eq!(expected_rows, aux_hints.block_hash_table_rows());
 }
 
 #[test]
@@ -320,7 +227,7 @@ fn span_block_with_respan() {
     ];
     let span = Span::new(ops.clone());
     let program = CodeBlock::new_span(ops.clone());
-    let (trace, aux_hints, trace_len) = build_trace(&[], &program);
+    let (trace, trace_len) = build_trace(&[], &program);
 
     // --- check block address, op_bits, group count, op_index, and in_span columns ---------------
     check_op_decoding(&trace, 0, ZERO, Operation::Span, 12, 0, 0);
@@ -377,60 +284,6 @@ fn span_block_with_respan() {
         assert_eq!(ONE, trace[OP_BITS_EXTRA_COLS_RANGE.start + 1][i]);
         assert_eq!(program_hash, get_hasher_state1(&trace, i));
     }
-
-    // --- check op_group table hints -------------------------------------------------------------
-
-    let expected_ogt_hints = vec![
-        (0, OpGroupTableUpdate::InsertRows(7)),
-        (1, OpGroupTableUpdate::RemoveRow),
-        (2, OpGroupTableUpdate::RemoveRow),
-        (3, OpGroupTableUpdate::RemoveRow),
-        (4, OpGroupTableUpdate::RemoveRow),
-        (5, OpGroupTableUpdate::RemoveRow),
-        (6, OpGroupTableUpdate::RemoveRow),
-        (7, OpGroupTableUpdate::RemoveRow),
-        (9, OpGroupTableUpdate::InsertRows(3)),
-        (10, OpGroupTableUpdate::RemoveRow),
-        (12, OpGroupTableUpdate::RemoveRow),
-        (13, OpGroupTableUpdate::RemoveRow),
-    ];
-    assert_eq!(&expected_ogt_hints, aux_hints.op_group_table_hints());
-
-    let batch0_groups = &span.op_batches()[0].groups();
-    let batch1_groups = &span.op_batches()[1].groups();
-    let expected_ogt_rows = vec![
-        OpGroupTableRow::new(INIT_ADDR, Felt::new(11), batch0_groups[1]),
-        OpGroupTableRow::new(INIT_ADDR, Felt::new(10), batch0_groups[2]),
-        OpGroupTableRow::new(INIT_ADDR, Felt::new(9), batch0_groups[3]),
-        OpGroupTableRow::new(INIT_ADDR, EIGHT, batch0_groups[4]),
-        OpGroupTableRow::new(INIT_ADDR, Felt::new(7), batch0_groups[5]),
-        OpGroupTableRow::new(INIT_ADDR, Felt::new(6), batch0_groups[6]),
-        OpGroupTableRow::new(INIT_ADDR, Felt::new(5), batch0_groups[7]),
-        // skipping the first group of batch 1
-        OpGroupTableRow::new(batch1_addr, Felt::new(3), batch1_groups[1]),
-        OpGroupTableRow::new(batch1_addr, TWO, batch1_groups[2]),
-        OpGroupTableRow::new(batch1_addr, ONE, batch1_groups[3]),
-    ];
-    assert_eq!(expected_ogt_rows, aux_hints.op_group_table_rows());
-
-    // --- check block execution hints ------------------------------------------------------------
-    let expected_hints = vec![
-        (0, BlockTableUpdate::BlockStarted(0)),
-        (9, BlockTableUpdate::SpanExtended),
-        (15, BlockTableUpdate::BlockEnded(false)),
-    ];
-    assert_eq!(expected_hints, aux_hints.block_exec_hints());
-
-    // --- check block stack table hints ----------------------------------------------------------
-    let expected_rows = vec![
-        BlockStackTableRow::new_test(INIT_ADDR, ZERO, false),
-        BlockStackTableRow::new_test(batch1_addr, ZERO, false),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_stack_table_rows());
-
-    // --- check block hash table hints ----------------------------------------------------------
-    let expected_rows = vec![BlockHashTableRow::from_program_hash(program_hash)];
-    assert_eq!(expected_rows, aux_hints.block_hash_table_rows());
 }
 
 // JOIN BLOCK TESTS
@@ -442,7 +295,7 @@ fn join_block() {
     let span2 = CodeBlock::new_span(vec![Operation::Add]);
     let program = CodeBlock::new_join([span1.clone(), span2.clone()]);
 
-    let (trace, aux_hints, trace_len) = build_trace(&[], &program);
+    let (trace, trace_len) = build_trace(&[], &program);
 
     // --- check block address, op_bits, group count, op_index, and in_span columns ---------------
     check_op_decoding(&trace, 0, ZERO, Operation::Join, 0, 0, 0);
@@ -487,38 +340,6 @@ fn join_block() {
         assert_eq!(ONE, trace[OP_BITS_EXTRA_COLS_RANGE.start + 1][i]);
         assert_eq!(program_hash, get_hasher_state1(&trace, i));
     }
-
-    // --- check op_group table hints -------------------------------------------------------------
-    // op_group table should not have been touched
-    assert!(&aux_hints.op_group_table_hints().is_empty());
-    assert!(aux_hints.op_group_table_rows().is_empty());
-
-    // --- check block execution hints ------------------------------------------------------------
-    let expected_hints = vec![
-        (0, BlockTableUpdate::BlockStarted(2)),
-        (1, BlockTableUpdate::BlockStarted(0)),
-        (3, BlockTableUpdate::BlockEnded(true)),
-        (4, BlockTableUpdate::BlockStarted(0)),
-        (6, BlockTableUpdate::BlockEnded(false)),
-        (7, BlockTableUpdate::BlockEnded(false)),
-    ];
-    assert_eq!(expected_hints, aux_hints.block_exec_hints());
-
-    // --- check block stack table hints ----------------------------------------------------------
-    let expected_rows = vec![
-        BlockStackTableRow::new_test(INIT_ADDR, ZERO, false),
-        BlockStackTableRow::new_test(span1_addr, INIT_ADDR, false),
-        BlockStackTableRow::new_test(span2_addr, INIT_ADDR, false),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_stack_table_rows());
-
-    // --- check block hash table hints ----------------------------------------------------------
-    let expected_rows = vec![
-        BlockHashTableRow::from_program_hash(program_hash),
-        BlockHashTableRow::new_test(INIT_ADDR, span1_hash, true, false),
-        BlockHashTableRow::new_test(INIT_ADDR, span2_hash, false, false),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_hash_table_rows());
 }
 
 // SPLIT BLOCK TESTS
@@ -530,7 +351,7 @@ fn split_block_true() {
     let span2 = CodeBlock::new_span(vec![Operation::Add]);
     let program = CodeBlock::new_split(span1.clone(), span2.clone());
 
-    let (trace, aux_hints, trace_len) = build_trace(&[1], &program);
+    let (trace, trace_len) = build_trace(&[1], &program);
 
     // --- check block address, op_bits, group count, op_index, and in_span columns ---------------
     let span_addr = INIT_ADDR + EIGHT;
@@ -565,34 +386,6 @@ fn split_block_true() {
         assert_eq!(ONE, trace[OP_BITS_EXTRA_COLS_RANGE.start + 1][i]);
         assert_eq!(program_hash, get_hasher_state1(&trace, i));
     }
-
-    // --- check op_group table hints -------------------------------------------------------------
-    // op_group table should not have been touched
-    assert!(&aux_hints.op_group_table_hints().is_empty());
-    assert!(aux_hints.op_group_table_rows().is_empty());
-
-    // --- check block execution hints ------------------------------------------------------------
-    let expected_hints = vec![
-        (0, BlockTableUpdate::BlockStarted(1)),
-        (1, BlockTableUpdate::BlockStarted(0)),
-        (3, BlockTableUpdate::BlockEnded(false)),
-        (4, BlockTableUpdate::BlockEnded(false)),
-    ];
-    assert_eq!(expected_hints, aux_hints.block_exec_hints());
-
-    // --- check block stack table hints ----------------------------------------------------------
-    let expected_rows = vec![
-        BlockStackTableRow::new_test(INIT_ADDR, ZERO, false),
-        BlockStackTableRow::new_test(span_addr, INIT_ADDR, false),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_stack_table_rows());
-
-    // --- check block hash table hints ----------------------------------------------------------
-    let expected_rows = vec![
-        BlockHashTableRow::from_program_hash(program_hash),
-        BlockHashTableRow::new_test(INIT_ADDR, span1_hash, false, false),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_hash_table_rows());
 }
 
 #[test]
@@ -601,7 +394,7 @@ fn split_block_false() {
     let span2 = CodeBlock::new_span(vec![Operation::Add]);
     let program = CodeBlock::new_split(span1.clone(), span2.clone());
 
-    let (trace, aux_hints, trace_len) = build_trace(&[0], &program);
+    let (trace, trace_len) = build_trace(&[0], &program);
 
     // --- check block address, op_bits, group count, op_index, and in_span columns ---------------
     let span_addr = INIT_ADDR + EIGHT;
@@ -636,34 +429,6 @@ fn split_block_false() {
         assert_eq!(ONE, trace[OP_BITS_EXTRA_COLS_RANGE.start + 1][i]);
         assert_eq!(program_hash, get_hasher_state1(&trace, i));
     }
-
-    // --- check op_group table hints -------------------------------------------------------------
-    // op_group table should not have been touched
-    assert!(&aux_hints.op_group_table_hints().is_empty());
-    assert!(aux_hints.op_group_table_rows().is_empty());
-
-    // --- check block execution hints ------------------------------------------------------------
-    let expected_hints = vec![
-        (0, BlockTableUpdate::BlockStarted(1)),
-        (1, BlockTableUpdate::BlockStarted(0)),
-        (3, BlockTableUpdate::BlockEnded(false)),
-        (4, BlockTableUpdate::BlockEnded(false)),
-    ];
-    assert_eq!(expected_hints, aux_hints.block_exec_hints());
-
-    // --- check block stack table hints ----------------------------------------------------------
-    let expected_rows = vec![
-        BlockStackTableRow::new_test(INIT_ADDR, ZERO, false),
-        BlockStackTableRow::new_test(span_addr, INIT_ADDR, false),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_stack_table_rows());
-
-    // --- check block hash table hints ----------------------------------------------------------
-    let expected_rows = vec![
-        BlockHashTableRow::from_program_hash(program_hash),
-        BlockHashTableRow::new_test(INIT_ADDR, span2_hash, false, false),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_hash_table_rows());
 }
 
 // LOOP BLOCK TESTS
@@ -674,7 +439,7 @@ fn loop_block() {
     let loop_body = CodeBlock::new_span(vec![Operation::Pad, Operation::Drop]);
     let program = CodeBlock::new_loop(loop_body.clone());
 
-    let (trace, aux_hints, trace_len) = build_trace(&[0, 1], &program);
+    let (trace, trace_len) = build_trace(&[0, 1], &program);
 
     // --- check block address, op_bits, group count, op_index, and in_span columns ---------------
     let body_addr = INIT_ADDR + EIGHT;
@@ -711,34 +476,6 @@ fn loop_block() {
         assert_eq!(ONE, trace[OP_BITS_EXTRA_COLS_RANGE.start + 1][i]);
         assert_eq!(program_hash, get_hasher_state1(&trace, i));
     }
-
-    // --- check op_group table hints -------------------------------------------------------------
-    // op_group table should not have been touched
-    assert!(&aux_hints.op_group_table_hints().is_empty());
-    assert!(aux_hints.op_group_table_rows().is_empty());
-
-    // --- check block execution hints ------------------------------------------------------------
-    let expected_hints = vec![
-        (0, BlockTableUpdate::BlockStarted(1)),
-        (1, BlockTableUpdate::BlockStarted(0)),
-        (4, BlockTableUpdate::BlockEnded(false)),
-        (5, BlockTableUpdate::BlockEnded(false)),
-    ];
-    assert_eq!(expected_hints, aux_hints.block_exec_hints());
-
-    // --- check block stack table hints ----------------------------------------------------------
-    let expected_rows = vec![
-        BlockStackTableRow::new_test(INIT_ADDR, ZERO, true),
-        BlockStackTableRow::new_test(body_addr, INIT_ADDR, false),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_stack_table_rows());
-
-    // --- check block hash table hints ----------------------------------------------------------
-    let expected_rows = vec![
-        BlockHashTableRow::from_program_hash(program_hash),
-        BlockHashTableRow::new_test(INIT_ADDR, loop_body_hash, false, true),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_hash_table_rows());
 }
 
 #[test]
@@ -746,7 +483,7 @@ fn loop_block_skip() {
     let loop_body = CodeBlock::new_span(vec![Operation::Pad, Operation::Drop]);
     let program = CodeBlock::new_loop(loop_body.clone());
 
-    let (trace, aux_hints, trace_len) = build_trace(&[0], &program);
+    let (trace, trace_len) = build_trace(&[0], &program);
 
     // --- check block address, op_bits, group count, op_index, and in_span columns ---------------
     check_op_decoding(&trace, 0, ZERO, Operation::Loop, 0, 0, 0);
@@ -773,24 +510,6 @@ fn loop_block_skip() {
         assert_eq!(ONE, trace[OP_BITS_EXTRA_COLS_RANGE.start + 1][i]);
         assert_eq!(program_hash, get_hasher_state1(&trace, i));
     }
-
-    // --- check op_group table hints -------------------------------------------------------------
-    // op_group table should not have been touched
-    assert!(&aux_hints.op_group_table_hints().is_empty());
-    assert!(aux_hints.op_group_table_rows().is_empty());
-
-    // --- check block execution hints ------------------------------------------------------------
-    let expected_hints =
-        vec![(0, BlockTableUpdate::BlockStarted(0)), (1, BlockTableUpdate::BlockEnded(false))];
-    assert_eq!(expected_hints, aux_hints.block_exec_hints());
-
-    // --- check block stack table hints ----------------------------------------------------------
-    let expected_rows = vec![BlockStackTableRow::new_test(INIT_ADDR, ZERO, false)];
-    assert_eq!(expected_rows, aux_hints.block_stack_table_rows());
-
-    // --- check block hash table hints ----------------------------------------------------------
-    let expected_rows = vec![BlockHashTableRow::from_program_hash(program_hash)];
-    assert_eq!(expected_rows, aux_hints.block_hash_table_rows());
 }
 
 #[test]
@@ -798,7 +517,7 @@ fn loop_block_repeat() {
     let loop_body = CodeBlock::new_span(vec![Operation::Pad, Operation::Drop]);
     let program = CodeBlock::new_loop(loop_body.clone());
 
-    let (trace, aux_hints, trace_len) = build_trace(&[0, 1, 1], &program);
+    let (trace, trace_len) = build_trace(&[0, 1, 1], &program);
 
     // --- check block address, op_bits, group count, op_index, and in_span columns ---------------
     let iter1_addr = INIT_ADDR + EIGHT;
@@ -852,38 +571,6 @@ fn loop_block_repeat() {
         assert_eq!(ONE, trace[OP_BITS_EXTRA_COLS_RANGE.start + 1][i]);
         assert_eq!(program_hash, get_hasher_state1(&trace, i));
     }
-
-    // --- check op_group table hints -------------------------------------------------------------
-    // op_group table should not have been touched
-    assert!(&aux_hints.op_group_table_hints().is_empty());
-    assert!(aux_hints.op_group_table_rows().is_empty());
-
-    // --- check block execution hints ------------------------------------------------------------
-    let expected_hints = vec![
-        (0, BlockTableUpdate::BlockStarted(1)),
-        (1, BlockTableUpdate::BlockStarted(0)),
-        (4, BlockTableUpdate::BlockEnded(false)),
-        (5, BlockTableUpdate::LoopRepeated),
-        (6, BlockTableUpdate::BlockStarted(0)),
-        (9, BlockTableUpdate::BlockEnded(false)),
-        (10, BlockTableUpdate::BlockEnded(false)),
-    ];
-    assert_eq!(expected_hints, aux_hints.block_exec_hints());
-
-    // --- check block stack table hints ----------------------------------------------------------
-    let expected_rows = vec![
-        BlockStackTableRow::new_test(INIT_ADDR, ZERO, true),
-        BlockStackTableRow::new_test(iter1_addr, INIT_ADDR, false),
-        BlockStackTableRow::new_test(iter2_addr, INIT_ADDR, false),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_stack_table_rows());
-
-    // --- check block hash table hints ----------------------------------------------------------
-    let expected_rows = vec![
-        BlockHashTableRow::from_program_hash(program_hash),
-        BlockHashTableRow::new_test(INIT_ADDR, loop_body_hash, false, true),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_hash_table_rows());
 }
 
 // CALL BLOCK TESTS
@@ -916,7 +603,7 @@ fn call_block() {
     let join1 = CodeBlock::new_join([first_span.clone(), foo_call.clone()]);
     let program = CodeBlock::new_join([join1.clone(), last_span.clone()]);
 
-    let (sys_trace, dec_trace, aux_hints, trace_len) =
+    let (sys_trace, dec_trace,   trace_len) =
         build_call_trace(&program, foo_root.clone(), None);
 
     // --- check block address, op_bits, group count, op_index, and in_span columns ---------------
@@ -929,10 +616,6 @@ fn call_block() {
     check_op_decoding(&dec_trace, 2, join1_addr, Operation::Span, 2, 0, 0);
     check_op_decoding(&dec_trace, 3, first_span_addr, Operation::Push(TWO), 1, 0, 1);
     check_op_decoding(&dec_trace, 4, first_span_addr, Operation::FmpUpdate, 0, 1, 1);
-    // as PAD operation is executed, the last item from the stack top moves to the overflow table.
-    // thus, the overflow address for the top row in the table will be set to the clock cycle at
-    // which PAD was executed - which is 5.
-    let overflow_addr_after_pad = Felt::new(5);
     check_op_decoding(&dec_trace, 5, first_span_addr, Operation::Pad, 0, 2, 1);
     check_op_decoding(&dec_trace, 6, first_span_addr, Operation::End, 0, 0, 0);
     // starting CALL block
@@ -1078,47 +761,6 @@ fn call_block() {
     for i in 13..trace_len {
         assert_eq!(get_fn_hash(&sys_trace, i), EMPTY_WORD);
     }
-
-    // --- check block execution hints ------------------------------------------------------------
-    let expected_hints = vec![
-        (0, BlockTableUpdate::BlockStarted(2)),
-        (1, BlockTableUpdate::BlockStarted(2)),
-        (2, BlockTableUpdate::BlockStarted(0)),
-        (6, BlockTableUpdate::BlockEnded(true)),
-        (7, BlockTableUpdate::BlockStarted(1)),
-        (8, BlockTableUpdate::BlockStarted(0)),
-        (11, BlockTableUpdate::BlockEnded(false)),
-        (12, BlockTableUpdate::BlockEnded(false)),
-        (13, BlockTableUpdate::BlockEnded(true)),
-        (14, BlockTableUpdate::BlockStarted(0)),
-        (16, BlockTableUpdate::BlockEnded(false)),
-        (17, BlockTableUpdate::BlockEnded(false)),
-    ];
-    assert_eq!(expected_hints, aux_hints.block_exec_hints());
-
-    // --- check block stack table rows -----------------------------------------------------------
-    let call_ctx =
-        ExecutionContextInfo::new(0, EMPTY_WORD, FMP_MIN + TWO, 17, overflow_addr_after_pad);
-    let expected_rows = vec![
-        BlockStackTableRow::new_test(INIT_ADDR, ZERO, false),
-        BlockStackTableRow::new_test(join1_addr, INIT_ADDR, false),
-        BlockStackTableRow::new_test(first_span_addr, join1_addr, false),
-        BlockStackTableRow::new_test_with_ctx(foo_call_addr, join1_addr, false, call_ctx),
-        BlockStackTableRow::new_test(foo_root_addr, foo_call_addr, false),
-        BlockStackTableRow::new_test(last_span_addr, INIT_ADDR, false),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_stack_table_rows());
-
-    // --- check block hash table hints ----------------------------------------------------------
-    let expected_rows = vec![
-        BlockHashTableRow::from_program_hash(program_hash),
-        BlockHashTableRow::new_test(INIT_ADDR, join1_hash, true, false),
-        BlockHashTableRow::new_test(INIT_ADDR, last_span_hash, false, false),
-        BlockHashTableRow::new_test(join1_addr, first_span_hash, true, false),
-        BlockHashTableRow::new_test(join1_addr, foo_call_hash, false, false),
-        BlockHashTableRow::new_test(foo_call_addr, foo_root_hash, false, false),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_hash_table_rows());
 }
 
 // SYSCALL BLOCK TESTS
@@ -1166,7 +808,7 @@ fn syscall_block() {
     let inner_join = CodeBlock::new_join([first_span.clone(), bar_call.clone()]);
     let program = CodeBlock::new_join([inner_join.clone(), last_span.clone()]);
 
-    let (sys_trace, dec_trace, aux_hints, trace_len) =
+    let (sys_trace, dec_trace,   trace_len) =
         build_call_trace(&program, bar_root.clone(), Some(foo_root.clone()));
 
     // --- check block address, op_bits, group count, op_index, and in_span columns ---------------
@@ -1179,10 +821,6 @@ fn syscall_block() {
     check_op_decoding(&dec_trace, 2, inner_join_addr, Operation::Span, 2, 0, 0);
     check_op_decoding(&dec_trace, 3, first_span_addr, Operation::Push(TWO), 1, 0, 1);
     check_op_decoding(&dec_trace, 4, first_span_addr, Operation::FmpUpdate, 0, 1, 1);
-    // as PAD operation is executed, the last item from the stack top moves to the overflow table.
-    // thus, the overflow address for the top row in the table will be set to the clock cycle at
-    // which PAD was executed - which is 5.
-    let overflow_addr_after_pad = Felt::new(5);
     check_op_decoding(&dec_trace, 5, first_span_addr, Operation::Pad, 0, 2, 1);
     check_op_decoding(&dec_trace, 6, first_span_addr, Operation::End, 0, 0, 0);
 
@@ -1409,60 +1047,6 @@ fn syscall_block() {
     for i in 21..trace_len {
         assert_eq!(get_fn_hash(&sys_trace, i), EMPTY_WORD);
     }
-
-    // --- check block execution hints ------------------------------------------------------------
-    let expected_hints = vec![
-        (0, BlockTableUpdate::BlockStarted(2)),    // join0
-        (1, BlockTableUpdate::BlockStarted(2)),    // join1
-        (2, BlockTableUpdate::BlockStarted(0)),    // span0
-        (6, BlockTableUpdate::BlockEnded(true)),   // end span0
-        (7, BlockTableUpdate::BlockStarted(1)),    // call
-        (8, BlockTableUpdate::BlockStarted(2)),    // join2
-        (9, BlockTableUpdate::BlockStarted(0)),    // span1
-        (12, BlockTableUpdate::BlockEnded(true)),  // end span1
-        (13, BlockTableUpdate::BlockStarted(1)),   // syscall
-        (14, BlockTableUpdate::BlockStarted(0)),   // span2
-        (17, BlockTableUpdate::BlockEnded(false)), // end span2
-        (18, BlockTableUpdate::BlockEnded(false)), // end syscall
-        (19, BlockTableUpdate::BlockEnded(false)), // end join2
-        (20, BlockTableUpdate::BlockEnded(false)), // end join1
-        (21, BlockTableUpdate::BlockEnded(true)),  // end join0
-        (22, BlockTableUpdate::BlockStarted(0)),   // span3
-        (24, BlockTableUpdate::BlockEnded(false)), // end span3
-        (25, BlockTableUpdate::BlockEnded(false)), // end program
-    ];
-    assert_eq!(expected_hints, aux_hints.block_exec_hints());
-
-    // --- check block stack table rows -----------------------------------------------------------
-    let call_ctx =
-        ExecutionContextInfo::new(0, EMPTY_WORD, FMP_MIN + ONE, 17, overflow_addr_after_pad);
-    let syscall_ctx = ExecutionContextInfo::new(8, bar_root_hash, FMP_MIN + TWO, 16, ZERO);
-    let expected_rows = vec![
-        BlockStackTableRow::new_test(INIT_ADDR, ZERO, false),
-        BlockStackTableRow::new_test(inner_join_addr, INIT_ADDR, false),
-        BlockStackTableRow::new_test(first_span_addr, inner_join_addr, false),
-        BlockStackTableRow::new_test_with_ctx(call_addr, inner_join_addr, false, call_ctx),
-        BlockStackTableRow::new_test(bar_join_addr, call_addr, false),
-        BlockStackTableRow::new_test(bar_span_addr, bar_join_addr, false),
-        BlockStackTableRow::new_test_with_ctx(syscall_addr, bar_join_addr, false, syscall_ctx),
-        BlockStackTableRow::new_test(syscall_span_addr, syscall_addr, false),
-        BlockStackTableRow::new_test(last_span_addr, INIT_ADDR, false),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_stack_table_rows());
-
-    // --- check block hash table hints ----------------------------------------------------------
-    let expected_rows = vec![
-        BlockHashTableRow::from_program_hash(program_hash),
-        BlockHashTableRow::new_test(INIT_ADDR, inner_join_hash, true, false),
-        BlockHashTableRow::new_test(INIT_ADDR, last_span_hash, false, false),
-        BlockHashTableRow::new_test(inner_join_addr, first_span_hash, true, false),
-        BlockHashTableRow::new_test(inner_join_addr, bar_call_hash, false, false),
-        BlockHashTableRow::new_test(call_addr, bar_root_hash, false, false),
-        BlockHashTableRow::new_test(bar_join_addr, bar_span_hash, true, false),
-        BlockHashTableRow::new_test(bar_join_addr, foo_call_hash, false, false),
-        BlockHashTableRow::new_test(syscall_addr, foo_root_hash, false, false),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_hash_table_rows());
 }
 
 // DYN BLOCK TESTS
@@ -1480,7 +1064,7 @@ fn dyn_block() {
     let dyn_block = CodeBlock::new_dyn();
     let program = CodeBlock::new_join([join.clone(), dyn_block.clone()]);
 
-    let (trace, aux_hints, trace_len) = build_dyn_trace(
+    let (trace, trace_len) = build_dyn_trace(
         &[
             foo_root.hash()[0].as_int(),
             foo_root.hash()[1].as_int(),
@@ -1574,55 +1158,6 @@ fn dyn_block() {
         assert_eq!(ONE, trace[OP_BITS_EXTRA_COLS_RANGE.start + 1][i]);
         assert_eq!(program_hash, get_hasher_state1(&trace, i));
     }
-
-    // --- check op_group table hints -------------------------------------------------------------
-    // 1 op group should be inserted at cycle 10, and removed in the subsequent cycle
-    let expected_ogt_hints =
-        vec![(10, OpGroupTableUpdate::InsertRows(1)), (11, OpGroupTableUpdate::RemoveRow)];
-    assert_eq!(&expected_ogt_hints, aux_hints.op_group_table_hints());
-
-    // the group is an op group with a single ADD
-    let expected_ogt_rows = vec![OpGroupTableRow::new(add_span_addr, ONE, ONE)];
-    assert_eq!(expected_ogt_rows, aux_hints.op_group_table_rows());
-
-    // --- check block execution hints ------------------------------------------------------------
-    let expected_hints = vec![
-        (0, BlockTableUpdate::BlockStarted(2)),    // outer join start
-        (1, BlockTableUpdate::BlockStarted(2)),    // inner join start
-        (2, BlockTableUpdate::BlockStarted(0)),    // mul span start
-        (4, BlockTableUpdate::BlockEnded(true)),   // mul span end
-        (5, BlockTableUpdate::BlockStarted(0)),    // save span start
-        (7, BlockTableUpdate::BlockEnded(false)),  // save span end
-        (8, BlockTableUpdate::BlockEnded(true)),   // inner join end
-        (9, BlockTableUpdate::BlockStarted(1)),    // dyn start
-        (10, BlockTableUpdate::BlockStarted(0)),   // foo span start
-        (13, BlockTableUpdate::BlockEnded(false)), // foo span end
-        (14, BlockTableUpdate::BlockEnded(false)), // dyn end
-        (15, BlockTableUpdate::BlockEnded(false)), // outer join end
-    ];
-    assert_eq!(expected_hints, aux_hints.block_exec_hints());
-
-    // --- check block stack table hints ----------------------------------------------------------
-    let expected_rows = vec![
-        BlockStackTableRow::new_test(INIT_ADDR, ZERO, false), // join
-        BlockStackTableRow::new_test(join_addr, INIT_ADDR, false), // inner join
-        BlockStackTableRow::new_test(mul_span_addr, join_addr, false), // mul span
-        BlockStackTableRow::new_test(save_span_addr, join_addr, false), // save span
-        BlockStackTableRow::new_test(dyn_addr, INIT_ADDR, false), // dyn
-        BlockStackTableRow::new_test(add_span_addr, dyn_addr, false), // foo span
-    ];
-    assert_eq!(expected_rows, aux_hints.block_stack_table_rows());
-
-    // --- check block hash table hints ----------------------------------------------------------
-    let expected_rows = vec![
-        BlockHashTableRow::from_program_hash(program_hash),
-        BlockHashTableRow::new_test(INIT_ADDR, join_hash, true, false),
-        BlockHashTableRow::new_test(INIT_ADDR, dyn_hash, false, false),
-        BlockHashTableRow::new_test(join_addr, mul_span_hash, true, false),
-        BlockHashTableRow::new_test(join_addr, save_span_hash, false, false),
-        BlockHashTableRow::new_test(dyn_addr, foo_hash, false, false),
-    ];
-    assert_eq!(expected_rows, aux_hints.block_hash_table_rows());
 }
 
 // HELPER REGISTERS TESTS
@@ -1658,22 +1193,21 @@ fn set_user_op_helpers_many() {
 // HELPER FUNCTIONS
 // ================================================================================================
 
-fn build_trace(stack_inputs: &[u64], program: &CodeBlock) -> (DecoderTrace, AuxTraceHints, usize) {
+fn build_trace(stack_inputs: &[u64], program: &CodeBlock) -> (DecoderTrace, usize) {
     let stack_inputs = StackInputs::try_from_values(stack_inputs.iter().copied()).unwrap();
     let host = DefaultHost::default();
     let mut process =
         Process::new(Kernel::default(), stack_inputs, host, ExecutionOptions::default());
     process.execute_code_block(program, &CodeBlockTable::default()).unwrap();
 
-    let (trace, aux_hints, _) = ExecutionTrace::test_finalize_trace(process);
-    let trace_len = get_trace_len(&trace) - ExecutionTrace::NUM_RAND_ROWS;
+    let (trace, _, _) = ExecutionTrace::test_finalize_trace(process);
+    let trace_len = trace.num_rows() - ExecutionTrace::NUM_RAND_ROWS;
 
     (
-        trace[DECODER_TRACE_RANGE]
-            .to_vec()
+        trace
+            .get_column_range(DECODER_TRACE_RANGE)
             .try_into()
             .expect("failed to convert vector to array"),
-        aux_hints.decoder,
         trace_len,
     )
 }
@@ -1682,7 +1216,7 @@ fn build_dyn_trace(
     stack_inputs: &[u64],
     program: &CodeBlock,
     fn_block: CodeBlock,
-) -> (DecoderTrace, AuxTraceHints, usize) {
+) -> (DecoderTrace, usize) {
     let stack_inputs = StackInputs::try_from_values(stack_inputs.iter().copied()).unwrap();
     let host = DefaultHost::default();
     let mut process =
@@ -1694,15 +1228,14 @@ fn build_dyn_trace(
 
     process.execute_code_block(program, &cb_table).unwrap();
 
-    let (trace, aux_hints, _) = ExecutionTrace::test_finalize_trace(process);
-    let trace_len = get_trace_len(&trace) - ExecutionTrace::NUM_RAND_ROWS;
+    let (trace, _, _) = ExecutionTrace::test_finalize_trace(process);
+    let trace_len = trace.num_rows() - ExecutionTrace::NUM_RAND_ROWS;
 
     (
-        trace[DECODER_TRACE_RANGE]
-            .to_vec()
+        trace
+            .get_column_range(DECODER_TRACE_RANGE)
             .try_into()
             .expect("failed to convert vector to array"),
-        aux_hints.decoder,
         trace_len,
     )
 }
@@ -1711,9 +1244,9 @@ fn build_call_trace(
     program: &CodeBlock,
     fn_block: CodeBlock,
     kernel_proc: Option<CodeBlock>,
-) -> (SystemTrace, DecoderTrace, AuxTraceHints, usize) {
+) -> (SystemTrace, DecoderTrace, usize) {
     let kernel = match kernel_proc {
-        Some(ref proc) => Kernel::new(&[proc.hash()]),
+        Some(ref proc) => Kernel::new(&[proc.hash()]).unwrap(),
         None => Kernel::default(),
     };
     let host = DefaultHost::default();
@@ -1729,20 +1262,20 @@ fn build_call_trace(
 
     process.execute_code_block(program, &cb_table).unwrap();
 
-    let (trace, aux_hints, _) = ExecutionTrace::test_finalize_trace(process);
-    let trace_len = get_trace_len(&trace) - ExecutionTrace::NUM_RAND_ROWS;
+    let (trace, _, _) = ExecutionTrace::test_finalize_trace(process);
+    let trace_len = trace.num_rows() - ExecutionTrace::NUM_RAND_ROWS;
 
-    let sys_trace = trace[SYS_TRACE_RANGE]
-        .to_vec()
+    let sys_trace = trace
+        .get_column_range(SYS_TRACE_RANGE)
         .try_into()
         .expect("failed to convert vector to array");
 
-    let decoder_trace = trace[DECODER_TRACE_RANGE]
-        .to_vec()
+    let decoder_trace = trace
+        .get_column_range(DECODER_TRACE_RANGE)
         .try_into()
         .expect("failed to convert vector to array");
 
-    (sys_trace, decoder_trace, aux_hints.decoder, trace_len)
+    (sys_trace, decoder_trace, trace_len)
 }
 
 // OPCODES
diff --git a/processor/src/decoder/trace.rs b/processor/src/decoder/trace.rs
index 286601264b..59d0f4c9c5 100644
--- a/processor/src/decoder/trace.rs
+++ b/processor/src/decoder/trace.rs
@@ -1,9 +1,10 @@
 use super::{
-    super::utils::get_trace_len, get_num_groups_in_next_batch, Felt, Operation, StarkField, Vec,
-    Word, DIGEST_LEN, MIN_TRACE_LEN, NUM_HASHER_COLUMNS, NUM_OP_BATCH_FLAGS, NUM_OP_BITS,
-    NUM_OP_BITS_EXTRA_COLS, ONE, OP_BATCH_1_GROUPS, OP_BATCH_2_GROUPS, OP_BATCH_4_GROUPS,
-    OP_BATCH_8_GROUPS, OP_BATCH_SIZE, ZERO,
+    super::utils::get_trace_len, get_num_groups_in_next_batch, Felt, Operation, Word, DIGEST_LEN,
+    MIN_TRACE_LEN, NUM_HASHER_COLUMNS, NUM_OP_BATCH_FLAGS, NUM_OP_BITS, NUM_OP_BITS_EXTRA_COLS,
+    ONE, OP_BATCH_1_GROUPS, OP_BATCH_2_GROUPS, OP_BATCH_4_GROUPS, OP_BATCH_8_GROUPS, OP_BATCH_SIZE,
+    ZERO,
 };
+use crate::utils::collections::*;
 use core::ops::Range;
 use vm_core::utils::new_array_vec;
 
diff --git a/processor/src/errors.rs b/processor/src/errors.rs
index 8730456ca5..46587b2424 100644
--- a/processor/src/errors.rs
+++ b/processor/src/errors.rs
@@ -3,6 +3,7 @@ use super::{
     system::{FMP_MAX, FMP_MIN},
     CodeBlock, Digest, Felt, QuadFelt, Word,
 };
+use crate::utils::string::*;
 use core::fmt::{Display, Formatter};
 use vm_core::{stack::STACK_TOP_SIZE, utils::to_hex};
 use winter_prover::{math::FieldElement, ProverError};
@@ -13,37 +14,57 @@ use std::error::Error;
 // EXECUTION ERROR
 // ================================================================================================
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq)]
 pub enum ExecutionError {
     AdviceMapKeyNotFound(Word),
-    AdviceMapValueInvalidLength(Word, usize, usize),
     AdviceStackReadFailed(u32),
     CallerNotInSyscall,
     CodeBlockNotFound(Digest),
-    DynamicCodeBlockNotFound(Digest),
     CycleLimitExceeded(u32),
     DivideByZero(u32),
+    DynamicCodeBlockNotFound(Digest),
+    EventError(String),
     Ext2InttError(Ext2InttError),
-    FailedAssertion(u32, Felt),
+    FailedAssertion {
+        clk: u32,
+        err_code: u32,
+        err_msg: Option<String>,
+    },
+    FailedSignatureGeneration(&'static str),
     InvalidFmpValue(Felt, Felt),
     InvalidFriDomainSegment(u64),
     InvalidFriLayerFolding(QuadFelt, QuadFelt),
-    InvalidMemoryRange { start_addr: u64, end_addr: u64 },
+    InvalidMemoryRange {
+        start_addr: u64,
+        end_addr: u64,
+    },
     InvalidStackDepthOnReturn(usize),
     InvalidStackWordOffset(usize),
-    InvalidTreeDepth { depth: Felt },
-    InvalidTreeNodeIndex { depth: Felt, value: Felt },
+    InvalidTreeDepth {
+        depth: Felt,
+    },
+    InvalidTreeNodeIndex {
+        depth: Felt,
+        value: Felt,
+    },
+    LogArgumentZero(u32),
+    MalformedSignatureKey(&'static str),
     MemoryAddressOutOfBounds(u64),
-    MerkleStoreMergeFailed(MerkleError),
+    MerklePathVerificationFailed {
+        value: Word,
+        index: Felt,
+        root: Digest,
+    },
     MerkleStoreLookupFailed(MerkleError),
+    MerkleStoreMergeFailed(MerkleError),
     MerkleStoreUpdateFailed(MerkleError),
     NotBinaryValue(Felt),
     NotU32Value(Felt, Felt),
     ProverError(ProverError),
+    SmtNodeNotFound(Word),
+    SmtNodePreImageNotValid(Word, usize),
     SyscallTargetNotInKernel(Digest),
     UnexecutableCodeBlock(CodeBlock),
-    MalformedSignatureKey(&'static str),
-    FailedSignatureGeneration(&'static str),
 }
 
 impl Display for ExecutionError {
@@ -55,13 +76,6 @@ impl Display for ExecutionError {
                 let hex = to_hex(Felt::elements_as_bytes(key))?;
                 write!(f, "Value for key {hex} not present in the advice map")
             }
-            AdviceMapValueInvalidLength(key, expected, actual) => {
-                let hex = to_hex(Felt::elements_as_bytes(key))?;
-                write!(
-                    f,
-                    "Expected value for key {hex} to contain {expected} elements, but was {actual}"
-                )
-            }
             AdviceStackReadFailed(step) => write!(f, "Advice stack read failed at step {step}"),
             CallerNotInSyscall => {
                 write!(f, "Instruction `caller` used outside of kernel context")
@@ -73,6 +87,10 @@ impl Display for ExecutionError {
                     "Failed to execute code block with root {hex}; the block could not be found"
                 )
             }
+            CycleLimitExceeded(max_cycles) => {
+                write!(f, "Exceeded the allowed number of cycles (max cycles = {max_cycles})")
+            }
+            DivideByZero(clk) => write!(f, "Division by zero at clock cycle {clk}"),
             DynamicCodeBlockNotFound(digest) => {
                 let hex = to_hex(&digest.as_bytes())?;
                 write!(
@@ -80,13 +98,24 @@ impl Display for ExecutionError {
                     "Failed to execute the dynamic code block provided by the stack with root {hex}; the block could not be found"
                 )
             }
-            CycleLimitExceeded(max_cycles) => {
-                write!(f, "Exceeded the allowed number of cycles (max cycles = {max_cycles})")
-            }
-            DivideByZero(clk) => write!(f, "Division by zero at clock cycle {clk}"),
+            EventError(error) => write!(f, "Failed to process event - {error}"),
             Ext2InttError(err) => write!(f, "Failed to execute Ext2Intt operation: {err}"),
-            FailedAssertion(clk, err_code) => {
-                write!(f, "Assertion failed at clock cycle {clk} with error code {err_code}")
+            FailedAssertion {
+                clk,
+                err_code,
+                err_msg,
+            } => {
+                if let Some(err_msg) = err_msg {
+                    write!(
+                        f,
+                        "Assertion failed at clock cycle {clk} with error code {err_code}: {err_msg}"
+                    )
+                } else {
+                    write!(f, "Assertion failed at clock cycle {clk} with error code {err_code}")
+                }
+            }
+            FailedSignatureGeneration(signature) => {
+                write!(f, "Failed to generate signature: {signature}")
             }
             InvalidFmpValue(old, new) => {
                 write!(f, "Updating FMP register from {old} to {new} failed because {new} is outside of {FMP_MIN}..{FMP_MAX}")
@@ -115,9 +144,21 @@ impl Display for ExecutionError {
             InvalidTreeNodeIndex { depth, value } => {
                 write!(f, "The provided index {value} is out of bounds for a node at depth {depth}")
             }
+            LogArgumentZero(clk) => {
+                write!(
+                    f,
+                    "Calculating of the integer logarithm with zero argument at clock cycle {clk}"
+                )
+            }
+            MalformedSignatureKey(signature) => write!(f, "Malformed signature key: {signature}"),
             MemoryAddressOutOfBounds(addr) => {
                 write!(f, "Memory address cannot exceed 2^32 but was {addr}")
             }
+            MerklePathVerificationFailed { value, index, root } => {
+                let value = to_hex(Felt::elements_as_bytes(value))?;
+                let root = to_hex(&root.as_bytes())?;
+                write!(f, "Merkle path verification failed for value {value} at index {index}, in the Merkle tree with root {root}")
+            }
             MerkleStoreLookupFailed(reason) => {
                 write!(f, "Advice provider Merkle store backend lookup failed: {reason}")
             }
@@ -136,6 +177,14 @@ impl Display for ExecutionError {
                     "An operation expected a u32 value, but received {v} (error code: {err_code})"
                 )
             }
+            SmtNodeNotFound(node) => {
+                let node_hex = to_hex(Felt::elements_as_bytes(node))?;
+                write!(f, "Smt node {node_hex} not found")
+            }
+            SmtNodePreImageNotValid(node, preimage_len) => {
+                let node_hex = to_hex(Felt::elements_as_bytes(node))?;
+                write!(f, "Invalid pre-image for node {node_hex}. Expected pre-image length to be a multiple of 8, but was {preimage_len}")
+            }
             ProverError(error) => write!(f, "Proof generation failed: {error}"),
             SyscallTargetNotInKernel(proc) => {
                 let hex = to_hex(&proc.as_bytes())?;
@@ -144,10 +193,6 @@ impl Display for ExecutionError {
             UnexecutableCodeBlock(block) => {
                 write!(f, "Execution reached unexecutable code block {block:?}")
             }
-            MalformedSignatureKey(signature) => write!(f, "Malformed signature key: {signature}"),
-            FailedSignatureGeneration(signature) => {
-                write!(f, "Failed to generate signature: {signature}")
-            }
         }
     }
 }
@@ -164,7 +209,7 @@ impl From<Ext2InttError> for ExecutionError {
 // EXT2INTT ERROR
 // ================================================================================================
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq, Eq)]
 pub enum Ext2InttError {
     DomainSizeNotPowerOf2(u64),
     DomainSizeTooSmall(u64),
diff --git a/processor/src/host/advice/extractors.rs b/processor/src/host/advice/extractors.rs
index a7e8e7b871..ed9175015d 100644
--- a/processor/src/host/advice/extractors.rs
+++ b/processor/src/host/advice/extractors.rs
@@ -39,7 +39,7 @@ pub enum AdviceExtractor {
     ///  Advice stack: [d, c, b, a, ...]
     ///  Advice map: {...}
     ///  Merkle store: {...}
-    ///  
+    ///
     /// Outputs:
     ///  Operand stack: [...]
     ///  Advice stack: [...]
diff --git a/processor/src/host/advice/injectors/adv_map_injectors.rs b/processor/src/host/advice/injectors/adv_map_injectors.rs
index e5712e0ef3..6f5d08a920 100644
--- a/processor/src/host/advice/injectors/adv_map_injectors.rs
+++ b/processor/src/host/advice/injectors/adv_map_injectors.rs
@@ -1,8 +1,7 @@
-use super::super::{AdviceProvider, ExecutionError, Felt, HostResponse, StarkField};
-use crate::ProcessState;
+use super::super::{AdviceProvider, ExecutionError, Felt, HostResponse};
+use crate::{utils::collections::*, ProcessState};
 use vm_core::{
     crypto::hash::{Rpo256, RpoDigest},
-    utils::collections::Vec,
     EMPTY_WORD, WORD_SIZE,
 };
 
diff --git a/processor/src/host/advice/injectors/adv_stack_injectors.rs b/processor/src/host/advice/injectors/adv_stack_injectors.rs
index 6804a71595..028901600f 100644
--- a/processor/src/host/advice/injectors/adv_stack_injectors.rs
+++ b/processor/src/host/advice/injectors/adv_stack_injectors.rs
@@ -1,5 +1,7 @@
-use super::super::{AdviceSource, ExecutionError, Felt, HostResponse, StarkField};
-use crate::{AdviceProvider, Ext2InttError, FieldElement, ProcessState, Vec};
+use super::super::{AdviceSource, ExecutionError, Felt, HostResponse};
+use crate::{
+    utils::collections::*, AdviceProvider, Ext2InttError, FieldElement, ProcessState, ZERO,
+};
 use vm_core::{QuadExtension, SignatureKind};
 use winter_prover::math::fft;
 
@@ -296,11 +298,124 @@ pub(crate) fn push_signature<S: ProcessState, A: AdviceProvider>(
     Ok(HostResponse::None)
 }
 
+/// Pushes the number of the leading zeros of the top stack element onto the advice stack.
+///
+/// Inputs:
+///   Operand stack: [n, ...]
+///   Advice stack: [...]
+///
+/// Outputs:
+///   Operand stack: [n, ...]
+///   Advice stack: [leading_zeros, ...]
+pub(crate) fn push_leading_zeros<S: ProcessState, A: AdviceProvider>(
+    advice_provider: &mut A,
+    process: &S,
+) -> Result<HostResponse, ExecutionError> {
+    push_transformed_stack_top(advice_provider, process, |stack_top| {
+        Felt::from(stack_top.leading_zeros())
+    })
+}
+
+/// Pushes the number of the trailing zeros of the top stack element onto the advice stack.
+///
+/// Inputs:
+///   Operand stack: [n, ...]
+///   Advice stack: [...]
+///
+/// Outputs:
+///   Operand stack: [n, ...]
+///   Advice stack: [trailing_zeros, ...]
+pub(crate) fn push_trailing_zeros<S: ProcessState, A: AdviceProvider>(
+    advice_provider: &mut A,
+    process: &S,
+) -> Result<HostResponse, ExecutionError> {
+    push_transformed_stack_top(advice_provider, process, |stack_top| {
+        Felt::from(stack_top.trailing_zeros())
+    })
+}
+
+/// Pushes the number of the leading ones of the top stack element onto the advice stack.
+///
+/// Inputs:
+///   Operand stack: [n, ...]
+///   Advice stack: [...]
+///
+/// Outputs:
+///   Operand stack: [n, ...]
+///   Advice stack: [leading_ones, ...]
+pub(crate) fn push_leading_ones<S: ProcessState, A: AdviceProvider>(
+    advice_provider: &mut A,
+    process: &S,
+) -> Result<HostResponse, ExecutionError> {
+    push_transformed_stack_top(advice_provider, process, |stack_top| {
+        Felt::from(stack_top.leading_ones())
+    })
+}
+
+/// Pushes the number of the trailing ones of the top stack element onto the advice stack.
+///
+/// Inputs:
+///   Operand stack: [n, ...]
+///   Advice stack: [...]
+///
+/// Outputs:
+///   Operand stack: [n, ...]
+///   Advice stack: [trailing_ones, ...]
+pub(crate) fn push_trailing_ones<S: ProcessState, A: AdviceProvider>(
+    advice_provider: &mut A,
+    process: &S,
+) -> Result<HostResponse, ExecutionError> {
+    push_transformed_stack_top(advice_provider, process, |stack_top| {
+        Felt::from(stack_top.trailing_ones())
+    })
+}
+
+/// Pushes the base 2 logarithm of the top stack element, rounded down.
+/// Inputs:
+///   Operand stack: [n, ...]
+///   Advice stack: [...]
+///
+/// Outputs:
+///   Operand stack: [n, ...]
+///   Advice stack: [ilog2(n), ...]
+///
+/// # Errors
+/// Returns an error if the logarithm argument (top stack element) equals ZERO.
+pub(crate) fn push_ilog2<S: ProcessState, A: AdviceProvider>(
+    advice_provider: &mut A,
+    process: &S,
+) -> Result<HostResponse, ExecutionError> {
+    let n = process.get_stack_item(0).as_int();
+    if n == 0 {
+        return Err(ExecutionError::LogArgumentZero(process.clk()));
+    }
+    let ilog2 = Felt::from(n.ilog2());
+    advice_provider.push_stack(AdviceSource::Value(ilog2))?;
+    Ok(HostResponse::None)
+}
+
 // HELPER FUNCTIONS
 // ================================================================================================
 
 fn u64_to_u32_elements(value: u64) -> (Felt, Felt) {
-    let hi = Felt::new(value >> 32);
-    let lo = Felt::new((value as u32) as u64);
+    let hi = Felt::from((value >> 32) as u32);
+    let lo = Felt::from(value as u32);
     (hi, lo)
 }
+
+/// Gets the top stack element, applies a provided function to it and pushes it to the advice
+/// provider.
+fn push_transformed_stack_top<S: ProcessState, A: AdviceProvider>(
+    advice_provider: &mut A,
+    process: &S,
+    f: impl FnOnce(u32) -> Felt,
+) -> Result<HostResponse, ExecutionError> {
+    let stack_top = process.get_stack_item(0);
+    let stack_top: u32 = stack_top
+        .as_int()
+        .try_into()
+        .map_err(|_| ExecutionError::NotU32Value(stack_top, ZERO))?;
+    let transformed_stack_top = f(stack_top);
+    advice_provider.push_stack(AdviceSource::Value(transformed_stack_top))?;
+    Ok(HostResponse::None)
+}
diff --git a/processor/src/host/advice/injectors/dsa.rs b/processor/src/host/advice/injectors/dsa.rs
index 83376593a7..64083cfa17 100644
--- a/processor/src/host/advice/injectors/dsa.rs
+++ b/processor/src/host/advice/injectors/dsa.rs
@@ -1,4 +1,5 @@
-use super::super::{ExecutionError, Felt, StarkField, Vec, Word};
+use super::super::{ExecutionError, Felt, Word};
+use crate::utils::collections::*;
 use vm_core::{
     crypto::dsa::rpo_falcon512::{KeyPair, Polynomial},
     utils::Deserializable,
diff --git a/processor/src/host/advice/injectors/smt.rs b/processor/src/host/advice/injectors/smt.rs
index b82f5e9be1..f7c96d7f62 100644
--- a/processor/src/host/advice/injectors/smt.rs
+++ b/processor/src/host/advice/injectors/smt.rs
@@ -1,95 +1,16 @@
-use super::super::{AdviceSource, ExecutionError, Felt, HostResponse, StarkField, Word};
-use crate::{AdviceProvider, ProcessState};
+use super::super::{AdviceSource, ExecutionError, Felt, HostResponse, Word};
+use crate::{utils::collections::*, AdviceProvider, ProcessState};
 use vm_core::{
     crypto::{
-        hash::{Rpo256, RpoDigest},
-        merkle::{EmptySubtreeRoots, NodeIndex, TieredSmt},
+        hash::RpoDigest,
+        merkle::{EmptySubtreeRoots, Smt, SMT_DEPTH},
     },
-    utils::collections::{btree_map::Entry, BTreeMap, Vec},
-    ONE, WORD_SIZE, ZERO,
+    WORD_SIZE,
 };
 
-// CONSTANTS
-// ================================================================================================
-
-/// Maximum depth of a Sparse Merkle Tree
-const SMT_MAX_TREE_DEPTH: Felt = Felt::new(64);
-
-/// Lookup table for Sparse Merkle Tree depth normalization
-const SMT_NORMALIZED_DEPTHS: [u8; 65] = [
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 32,
-    32, 32, 32, 32, 32, 32, 32, 32, 32, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
-    48, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
-];
-
 // SMT INJECTORS
 // ================================================================================================
 
-/// Pushes values onto the advice stack which are required for successful retrieval of a
-/// value from a Sparse Merkle Tree data structure.
-///
-/// The Sparse Merkle Tree is tiered, meaning it will have leaf depths in `{16, 32, 48, 64}`.
-/// The depth flags define the tier on which the leaf is located.
-///
-/// Inputs:
-///   Operand stack: [KEY, ROOT, ...]
-///   Advice stack: [...]
-///
-/// Outputs:
-///   Operand stack: [KEY, ROOT, ...]
-///   Advice stack: [f0, f1, K, V, f2]
-///
-/// Where:
-/// - f0 is a boolean flag set to `1` if the depth is `16` or `48`.
-/// - f1 is a boolean flag set to `1` if the depth is `16` or `32`.
-/// - K is the key; will be zeroed if the tree don't contain a mapped value for the key.
-/// - V is the value word; will be zeroed if the tree don't contain a mapped value for the key.
-/// - f2 is a boolean flag set to `1` if the key is not zero.
-///
-/// # Errors
-/// Returns an error if the provided Merkle root doesn't exist on the advice provider.
-///
-/// # Panics
-/// Will panic as unimplemented if the target depth is `64`.
-pub(crate) fn push_smtget_inputs<S: ProcessState, A: AdviceProvider>(
-    advice_provider: &mut A,
-    process: &S,
-) -> Result<HostResponse, ExecutionError> {
-    // fetch the arguments from the operand stack
-    let key = process.get_stack_word(0);
-    let root = process.get_stack_word(1);
-
-    // get the node from the SMT for the specified key; this node can be either a leaf node,
-    // or a root of an empty subtree at the returned depth
-    let (node, depth, _) = get_smt_node(advice_provider, root, key)?;
-
-    // set the node value; zeroed if empty sub-tree
-    let empty = EmptySubtreeRoots::empty_hashes(64);
-    if Word::from(empty[depth as usize]) == node {
-        // push zeroes for remaining key, value & empty remaining key flag
-        for _ in 0..9 {
-            advice_provider.push_stack(AdviceSource::Value(ZERO))?;
-        }
-    } else {
-        // push a flag indicating that a remaining key exists
-        advice_provider.push_stack(AdviceSource::Value(ONE))?;
-
-        // map is expected to contain `node |-> {K, V}`
-        advice_provider.push_stack(AdviceSource::Map {
-            key: node,
-            include_len: false,
-        })?;
-    }
-
-    // set the flags
-    let is_16_or_32 = if depth == 16 || depth == 32 { ONE } else { ZERO };
-    let is_16_or_48 = if depth == 16 || depth == 48 { ONE } else { ZERO };
-    advice_provider.push_stack(AdviceSource::Value(is_16_or_32))?;
-    advice_provider.push_stack(AdviceSource::Value(is_16_or_48))?;
-
-    Ok(HostResponse::None)
-}
-
 /// Pushes onto the advice stack the value associated with the specified key in a Sparse
 /// Merkle Tree defined by the specified root.
 ///
@@ -106,495 +27,83 @@ pub(crate) fn push_smtget_inputs<S: ProcessState, A: AdviceProvider>(
 ///
 /// # Errors
 /// Returns an error if the provided Merkle root doesn't exist on the advice provider.
-///
-/// # Panics
-/// Will panic as unimplemented if the target depth is `64`.
 pub(crate) fn push_smtpeek_result<S: ProcessState, A: AdviceProvider>(
     advice_provider: &mut A,
     process: &S,
 ) -> Result<HostResponse, ExecutionError> {
+    let empty_leaf = EmptySubtreeRoots::entry(SMT_DEPTH, SMT_DEPTH);
     // fetch the arguments from the operand stack
     let key = process.get_stack_word(0);
     let root = process.get_stack_word(1);
 
     // get the node from the SMT for the specified key; this node can be either a leaf node,
     // or a root of an empty subtree at the returned depth
-    let (node, depth, _) = get_smt_node(advice_provider, root, key)?;
+    let node = advice_provider.get_tree_node(root, &Felt::new(SMT_DEPTH as u64), &key[3])?;
 
-    let empty = EmptySubtreeRoots::empty_hashes(64)[depth as usize];
-    if node == Word::from(empty) {
+    if node == Word::from(empty_leaf) {
         // if the node is a root of an empty subtree, then there is no value associated with
         // the specified key
-        advice_provider.push_stack(AdviceSource::Word(TieredSmt::EMPTY_VALUE))?;
+        advice_provider.push_stack(AdviceSource::Word(Smt::EMPTY_VALUE))?;
     } else {
-        // get the key and value stored in the current leaf
-        let (leaf_key, leaf_value) = get_smt_upper_leaf_preimage(advice_provider, node)?;
-
-        // if the leaf is for a different key, then there is no value associated with the
-        // specified key
-        if leaf_key == key {
-            advice_provider.push_stack(AdviceSource::Word(leaf_value))?;
-        } else {
-            advice_provider.push_stack(AdviceSource::Word(TieredSmt::EMPTY_VALUE))?;
-        }
-    }
-
-    Ok(HostResponse::None)
-}
-
-/// Pushes values onto the advice stack which are required for successful insertion of a
-/// key-value pair into a Sparse Merkle Tree data structure.
-///
-/// The Sparse Merkle Tree is tiered, meaning it will have leaf depths in `{16, 32, 48, 64}`.
-///
-/// Inputs:
-///   Operand stack: [VALUE, KEY, ROOT, ...]
-///   Advice stack: [...]
-///
-/// Outputs:
-///   Operand stack: [OLD_VALUE, NEW_ROOT, ...]
-///   Advice stack: see comments for specialized handlers below.
-///
-/// Where:
-/// - ROOT and NEW_ROOT are the roots of the TSMT before and after the insert respectively.
-/// - VALUE is the value to be inserted.
-/// - OLD_VALUE is the value previously associated with the specified KEY.
-///
-/// # Errors
-/// Returns an error if:
-/// - The Merkle store does not contain a node with the specified root.
-/// - The Merkle store does not contain all nodes needed to validate the path between the root
-///   and the relevant TSMT nodes.
-/// - The advice map does not contain required data about TSMT leaves to be modified.
-///
-/// # Panics
-/// Will panic as unimplemented if the target depth is `64`.
-pub(crate) fn push_smtset_inputs<S: ProcessState, A: AdviceProvider>(
-    advice_provider: &mut A,
-    process: &S,
-) -> Result<HostResponse, ExecutionError> {
-    // get the key, value, and tree root from the stack
-    let value = process.get_stack_word(0);
-    let key = process.get_stack_word(1);
-    let root = process.get_stack_word(2);
-
-    // get the node from the SMT for the specified key; this node can be either a leaf node,
-    // or a root of an empty subtree at the returned depth
-    let (node, depth, index) = get_smt_node(advice_provider, root, key)?;
+        let leaf_preimage = get_smt_leaf_preimage(advice_provider, node)?;
 
-    // if the value to be inserted is an empty word, we need to process it as a delete
-    if value == TieredSmt::EMPTY_VALUE {
-        return handle_smt_delete(advice_provider, root, node, depth, index, key);
-    }
-
-    // figure out what kind of insert we are doing; possible options are:
-    // - if the node is a root of an empty subtree, this is a simple insert.
-    // - if the node is a leaf, this could be either an update (for the same key), or a
-    //   complex insert (i.e., the existing leaf needs to be moved to a lower tier).
-    let empty = EmptySubtreeRoots::empty_hashes(64)[depth as usize];
-    if node == Word::from(empty) {
-        handle_smt_simple_insert(advice_provider, root, depth, index)
-    } else {
-        // get the key and value stored in the current leaf
-        let (leaf_key, leaf_value) = get_smt_upper_leaf_preimage(advice_provider, node)?;
+        for (key_in_leaf, value_in_leaf) in leaf_preimage {
+            if key == key_in_leaf {
+                // Found key - push value associated with key, and return
+                advice_provider.push_stack(AdviceSource::Word(value_in_leaf))?;
 
-        // if the key for the value to be inserted is the same as the leaf's key, we are
-        // dealing with a simple update; otherwise, we are dealing with a complex insert
-        if leaf_key == key {
-            handle_smt_update(advice_provider, depth, leaf_value)
-        } else {
-            handle_smt_complex_insert(advice_provider, depth, key, leaf_key, leaf_value)
+                return Ok(HostResponse::None);
+            }
         }
-    }
-}
-
-// TSMT UPDATE HELPER METHODS
-// --------------------------------------------------------------------------------------------
 
-/// Returns first leaf or an empty tree node for the provided key in the Sparse Merkle tree
-/// with the specified root.
-///
-/// Also returns the depth and index of the returned node at this depth.
-fn get_smt_node<A: AdviceProvider>(
-    advice_provider: &A,
-    root: Word,
-    key: Word,
-) -> Result<(Word, u8, Felt), ExecutionError> {
-    // determine the depth of the first leaf or an empty tree node
-    let index = &key[3];
-    let depth = advice_provider.get_leaf_depth(root, &SMT_MAX_TREE_DEPTH, index)?;
-    debug_assert!(depth < 65);
-
-    // map the depth value to its tier; this rounds up depth to 16, 32, 48, or 64
-    let depth = SMT_NORMALIZED_DEPTHS[depth as usize];
-    if depth == 64 {
-        unimplemented!("handling of depth=64 tier hasn't been implemented yet");
+        // if we can't find any key in the leaf that matches `key`, it means no value is associated
+        // with `key`
+        advice_provider.push_stack(AdviceSource::Word(Smt::EMPTY_VALUE))?;
     }
 
-    // get the value of the node at this index/depth
-    let index = index.as_int() >> (64 - depth);
-    let index = Felt::new(index);
-    let node = advice_provider.get_tree_node(root, &Felt::from(depth), &index)?;
-
-    Ok((node, depth, index))
-}
-
-/// Retrieves a key-value pair for the specified leaf node from the advice map.
-///
-/// # Errors
-/// Returns an error if the value under the specified node does not exist or does not consist
-/// of exactly 8 elements.
-fn get_smt_upper_leaf_preimage<A: AdviceProvider>(
-    advice_provider: &A,
-    node: Word,
-) -> Result<(Word, Word), ExecutionError> {
-    let node_bytes = RpoDigest::from(node).as_bytes();
-    let kv = advice_provider
-        .get_mapped_values(&node_bytes)
-        .ok_or(ExecutionError::AdviceMapKeyNotFound(node))?;
-
-    if kv.len() != WORD_SIZE * 2 {
-        return Err(ExecutionError::AdviceMapValueInvalidLength(node, WORD_SIZE * 2, kv.len()));
-    }
-
-    let key = [kv[0], kv[1], kv[2], kv[3]];
-    let val = [kv[4], kv[5], kv[6], kv[7]];
-    Ok((key, val))
-}
-
-/// Prepares the advice stack for a TSMT update operation. Specifically, the advice stack will
-/// be arranged as follows:
-///
-/// - [ZERO (padding), d0, d1, ONE (is_update), OLD_VALUE]
-///
-/// Where:
-/// - d0 is a boolean flag set to `1` if the depth is `16` or `48`.
-/// - d1 is a boolean flag set to `1` if the depth is `16` or `32`.
-/// - OLD_VALUE is the current value in the leaf to be updated.
-fn handle_smt_update<A: AdviceProvider>(
-    advice_provider: &mut A,
-    depth: u8,
-    old_value: Word,
-) -> Result<HostResponse, ExecutionError> {
-    // put the old value onto the advice stack
-    advice_provider.push_stack(AdviceSource::Word(old_value))?;
-
-    // set is_update flag to ONE
-    advice_provider.push_stack(AdviceSource::Value(ONE))?;
-
-    // set depth flags based on leaf's depth
-    let (is_16_or_32, is_16_or_48) = get_depth_flags(depth);
-    advice_provider.push_stack(AdviceSource::Value(is_16_or_32))?;
-    advice_provider.push_stack(AdviceSource::Value(is_16_or_48))?;
-
-    // pad the advice stack with an extra value to make it consistent with other cases when
-    // we expect 4 flag values on the top of the advice stack
-    advice_provider.push_stack(AdviceSource::Value(ZERO))?;
-
     Ok(HostResponse::None)
 }
 
-/// Prepares the advice stack for a TSMT simple insert operation (i.e., when we are replacing
-/// an empty node). Specifically, the advice stack will be arranged as follows:
-///
-/// - Simple insert at depth 16: [d0, d1, ONE (is_simple_insert), ZERO (is_update)]
-/// - Simple insert at depth 32 or 48: [d0, d1, ONE (is_simple_insert), ZERO (is_update), P_NODE]
-///
-/// Where:
-/// - d0 is a boolean flag set to `1` if the depth is `16` or `48`.
-/// - d1 is a boolean flag set to `1` if the depth is `16` or `32`.
-/// - P_NODE is an internal node located at the tier above the insert tier.
-fn handle_smt_simple_insert<A: AdviceProvider>(
-    advice_provider: &mut A,
-    root: Word,
-    depth: u8,
-    index: Felt,
+/// Currently unimplemented
+pub(crate) fn push_smtget_inputs<S: ProcessState, A: AdviceProvider>(
+    _advice_provider: &mut A,
+    _process: &S,
 ) -> Result<HostResponse, ExecutionError> {
-    // put additional data onto the advice stack as needed
-    match depth {
-        16 => (), // nothing to do; all the required data is already in the VM
-        32 | 48 => {
-            // for depth 32 and 48, we need to provide the internal node located on the tier
-            // above the insert tier
-            let p_index = Felt::from(index.as_int() >> 16);
-            let p_depth = Felt::from(depth - 16);
-            let p_node = advice_provider.get_tree_node(root, &p_depth, &p_index)?;
-            advice_provider.push_stack(AdviceSource::Word(p_node))?;
-        }
-        64 => unimplemented!("insertions at depth 64 are not yet implemented"),
-        _ => unreachable!("invalid depth {depth}"),
-    }
-
-    // push is_update and is_simple_insert flags onto the advice stack
-    advice_provider.push_stack(AdviceSource::Value(ZERO))?;
-    advice_provider.push_stack(AdviceSource::Value(ONE))?;
-
-    // set depth flags based on node's depth
-    let (is_16_or_32, is_16_or_48) = get_depth_flags(depth);
-    advice_provider.push_stack(AdviceSource::Value(is_16_or_32))?;
-    advice_provider.push_stack(AdviceSource::Value(is_16_or_48))?;
-
-    Ok(HostResponse::None)
+    unimplemented!()
 }
 
-/// Prepares the advice stack for a TSMT complex insert operation (i.e., when a leaf node needs
-/// to be replaced with a subtree of nodes at a lower tier). Specifically, the advice stack
-/// will be arranged as follows:
-///
-///  - [d0, d1, ZERO (is_simple_insert), ZERO (is_update), E_KEY, E_VALUE]
-///
-/// Where:
-/// - d0 and d1 are boolean flags a combination of which determines the source and the target
-///   tiers as follows:
-///   - (0, 0): depth 16 -> 32
-///   - (0, 1): depth 16 -> 48
-///   - (1, 0): depth 32 -> 48
-///   - (1, 1): depth 16, 32, or 48 -> 64
-/// - E_KEY and E_VALUE are the key-value pair for a leaf which is to be replaced by a subtree.
-fn handle_smt_complex_insert<A: AdviceProvider>(
-    advice_provider: &mut A,
-    depth: u8,
-    key: Word,
-    leaf_key: Word,
-    leaf_value: Word,
+/// Currently unimplemented
+pub(crate) fn push_smtset_inputs<S: ProcessState, A: AdviceProvider>(
+    _advice_provider: &mut A,
+    _process: &S,
 ) -> Result<HostResponse, ExecutionError> {
-    // push the key and value onto the advice stack
-    advice_provider.push_stack(AdviceSource::Word(leaf_value))?;
-    advice_provider.push_stack(AdviceSource::Word(leaf_key))?;
-
-    // push is_update and is_simple_insert flags onto the advice stack
-    advice_provider.push_stack(AdviceSource::Value(ZERO))?;
-    advice_provider.push_stack(AdviceSource::Value(ZERO))?;
-
-    // determine the combination of the source and target tiers for the insert
-    // and populate the depth flags accordingly
-    let common_prefix = get_common_prefix(&key, &leaf_key);
-    let target_depth = SMT_NORMALIZED_DEPTHS[common_prefix as usize + 1];
-    match target_depth {
-        32 if depth == 16 => {
-            advice_provider.push_stack(AdviceSource::Value(ONE))?;
-            advice_provider.push_stack(AdviceSource::Value(ONE))?;
-        }
-        48 if depth == 16 => {
-            advice_provider.push_stack(AdviceSource::Value(ONE))?;
-            advice_provider.push_stack(AdviceSource::Value(ZERO))?;
-        }
-        48 if depth == 32 => {
-            advice_provider.push_stack(AdviceSource::Value(ZERO))?;
-            advice_provider.push_stack(AdviceSource::Value(ONE))?;
-        }
-        64 => unimplemented!("insertions at depth 64 are not yet implemented"),
-        _ => unreachable!("invalid source/target tier combination: {depth} -> {target_depth}"),
-    }
-
-    Ok(HostResponse::None)
+    unimplemented!()
 }
 
-/// Prepares the advice stack for a TSMT deletion operation. Specifically, the advice stack
-/// will be arranged as follows (depending on the type of the node which occupies the location
-/// at which the node for the specified key should be present):
-///
-/// - Root of empty subtree: [d0, d1, ZERO (is_leaf), ONE (key_not_set)]
-/// - Leaf for another key: [d0, d1, ONE (is_leaf), ONE (key_not_set), KEY, VALUE]
-/// - Leaf for the provided key: [ZERO, ZERO, ZERO, ZERO (key_not_set), NEW_ROOT, OLD_VALUE]
-///
-/// Where:
-/// - d0 is a boolean flag set to `1` if the depth is `16` or `48`.
-/// - d1 is a boolean flag set to `1` if the depth is `16` or `32`.
-/// - KEY and VALUE is the key-value pair of a leaf node occupying the location of the node
-///   for the specified key. Note that KEY may be the same as the specified key or different
-///   from the specified key if the location is occupied by a different key-value pair.
-/// - NEW_ROOT is the new root of the TSMT post deletion.
-/// - OLD_VALUE is the value which is to be replaced with [ZERO; 4].
-fn handle_smt_delete<A: AdviceProvider>(
-    advice_provider: &mut A,
-    root: Word,
-    node: Word,
-    depth: u8,
-    index: Felt,
-    key: Word,
-) -> Result<HostResponse, ExecutionError> {
-    let empty = EmptySubtreeRoots::empty_hashes(TieredSmt::MAX_DEPTH)[depth as usize];
-
-    if node == Word::from(empty) {
-        // if the node to be replaced is already an empty node, we set key_not_set = ONE,
-        // and is_leaf = ZERO
-        advice_provider.push_stack(AdviceSource::Value(ONE))?;
-        advice_provider.push_stack(AdviceSource::Value(ZERO))?;
-
-        // set depth flags based on node's depth
-        let (is_16_or_32, is_16_or_48) = get_depth_flags(depth);
-        advice_provider.push_stack(AdviceSource::Value(is_16_or_32))?;
-        advice_provider.push_stack(AdviceSource::Value(is_16_or_48))?;
-
-        Ok(HostResponse::None)
-    } else {
-        // if the node is not a root of an empty subtree, it must be a leaf; thus we can get
-        // the key and the value stored in the leaf.
-        let (leaf_key, leaf_value) = get_smt_upper_leaf_preimage(advice_provider, node)?;
-
-        if leaf_key != key {
-            // if the node to be replaced is a leaf for different key, we push that key-value
-            // pair onto the advice stack and set key_not_set = ONE and is_leaf = ONE
-
-            advice_provider.push_stack(AdviceSource::Word(leaf_value))?;
-            advice_provider.push_stack(AdviceSource::Word(leaf_key))?;
-
-            advice_provider.push_stack(AdviceSource::Value(ONE))?;
-            advice_provider.push_stack(AdviceSource::Value(ONE))?;
-
-            // set depth flags based on node's depth
-            let (is_16_or_32, is_16_or_48) = get_depth_flags(depth);
-            advice_provider.push_stack(AdviceSource::Value(is_16_or_32))?;
-            advice_provider.push_stack(AdviceSource::Value(is_16_or_48))?;
-        } else {
-            // if the key which we want to set to [ZERO; 4] does have an associated value,
-            // we update the tree in the advice provider to get the new root, then push the root
-            // and the old value onto the advice stack, key_not_set = ZERO, and also push 3
-            // ZERO values for padding
-            let new_root = match find_lone_sibling(advice_provider, root, depth, &index)? {
-                Some((sibling, new_index)) => {
-                    // if the node to be deleted has a lone sibling, we need to move it to a
-                    // higher tier.
-
-                    // first, we compute the value of the new node on the higher tier
-                    let (leaf_key, leaf_val) =
-                        get_smt_upper_leaf_preimage(advice_provider, *sibling)?;
-                    let new_node = Rpo256::merge_in_domain(
-                        &[leaf_key.into(), leaf_val.into()],
-                        new_index.depth().into(),
-                    );
-
-                    // then we insert the node and its pre-image into the advice provider
-                    let mut elements = leaf_key.to_vec();
-                    elements.extend_from_slice(&leaf_val);
-                    advice_provider.insert_into_map(new_node.into(), elements)?;
-
-                    // and finally we update the tree in the advice provider
-                    let (_, new_root) = advice_provider.update_merkle_node(
-                        root,
-                        &new_index.depth().into(),
-                        &new_index.value().into(),
-                        new_node.into(),
-                    )?;
-                    new_root
-                }
-                None => {
-                    // if the node does not have a lone sibling, we just replace it with an
-                    // empty node
-                    let (_, new_root) = advice_provider.update_merkle_node(
-                        root,
-                        &Felt::from(depth),
-                        &index,
-                        empty.into(),
-                    )?;
-                    new_root
-                }
-            };
-
-            advice_provider.push_stack(AdviceSource::Word(leaf_value))?;
-            advice_provider.push_stack(AdviceSource::Word(new_root))?;
-
-            advice_provider.push_stack(AdviceSource::Value(ZERO))?;
-            advice_provider.push_stack(AdviceSource::Value(ZERO))?;
-
-            advice_provider.push_stack(AdviceSource::Value(ZERO))?;
-            advice_provider.push_stack(AdviceSource::Value(ZERO))?;
-        }
-        Ok(HostResponse::None)
-    }
-}
+// HELPER METHODS
+// --------------------------------------------------------------------------------------------
 
-/// Returns info about a lone sibling of a leaf specified by depth and index parameters in the
-/// Tiered Sparse Merkle tree defined by the specified root. If no lone siblings exist for the
-/// specified parameters, None is returned.
-///
-/// A lone sibling is defined as a leaf which has a common root with the specified leaf at a
-/// higher tier such that the subtree starting at this root contains only these two leaves.
-///
-/// In addition to the leaf node itself, this also returns the index of the common root at a
-/// higher tier.
-fn find_lone_sibling<A: AdviceProvider>(
+fn get_smt_leaf_preimage<A: AdviceProvider>(
     advice_provider: &A,
-    root: Word,
-    depth: u8,
-    index: &Felt,
-) -> Result<Option<(RpoDigest, NodeIndex)>, ExecutionError> {
-    debug_assert!(matches!(depth, 16 | 32 | 48));
-
-    // if the leaf is on the first tier (depth=16), we don't care about lone siblings as they
-    // cannot be moved to a higher tier.
-    if depth == TieredSmt::TIER_SIZE {
-        return Ok(None);
-    }
-
-    let empty = &EmptySubtreeRoots::empty_hashes(TieredSmt::MAX_DEPTH)[..=depth as usize];
-
-    // get the path to the leaf node
-    let path: Vec<_> = advice_provider.get_merkle_path(root, &depth.into(), index)?.into();
-
-    // traverse the path from the leaf up to the root, keeping track of all non-empty nodes;
-    // here we ignore the top 16 depths because lone siblings cannot be moved to a higher tier
-    // from tier at depth 16.
-    let mut non_empty_nodes = BTreeMap::new();
-    for (depth, sibling) in (TieredSmt::TIER_SIZE..=depth).rev().zip(path.iter()) {
-        // map the depth of each node to the tier it would "round up" to. For example, 17 maps
-        // to tier 1, 32 also maps to tier 1, but 33 maps to tier 2.
-        let tier = (depth - 1) / TieredSmt::TIER_SIZE;
-
-        // if the node is non-empty, insert it into the map, but if a node for the same tier
-        // is already in the map, stop traversing the tree. we do this because if two nodes in
-        // a given tier are non-empty a lone sibling cannot exist at this tier or any higher
-        // tier. to indicate the the tier cannot contain a lone sibling, we set the value in
-        // the map to None.
-        if sibling != &empty[depth as usize] {
-            match non_empty_nodes.entry(tier) {
-                Entry::Vacant(entry) => {
-                    entry.insert(Some((depth, *sibling)));
-                }
-                Entry::Occupied(mut entry) => {
-                    entry.insert(None);
-                    break;
-                }
-            }
-        }
-    }
-
-    // take the deepest non-empty node and check if its subtree contains just a single leaf
-    if let Some((_, Some((node_depth, node)))) = non_empty_nodes.pop_last() {
-        let mut node_index = NodeIndex::new(depth, index.as_int()).expect("invalid node index");
-        node_index.move_up_to(node_depth);
-        let node_index = node_index.sibling();
+    node: Word,
+) -> Result<Vec<(Word, Word)>, ExecutionError> {
+    let node_bytes = RpoDigest::from(node);
 
-        if let Some((mut leaf_index, leaf)) =
-            advice_provider.find_lone_leaf(node.into(), node_index, TieredSmt::MAX_DEPTH)?
-        {
-            // if the node's subtree does contain a single leaf, figure out to which depth
-            // we can move it up to. we do this by taking the next tier down from the tier
-            // which contained at least one non-empty node on the path from the original leaf
-            // up to the root. if there were no non-empty nodes on this path, we default to
-            // the first tier (i.e., depth 16).
-            let target_tier = non_empty_nodes.keys().last().map(|&t| t + 1).unwrap_or(1);
-            leaf_index.move_up_to(target_tier * TieredSmt::TIER_SIZE);
+    let kv_pairs = advice_provider
+        .get_mapped_values(&node_bytes)
+        .ok_or(ExecutionError::SmtNodeNotFound(node))?;
 
-            return Ok(Some((leaf.into(), leaf_index)));
-        }
+    if kv_pairs.len() % WORD_SIZE * 2 != 0 {
+        return Err(ExecutionError::SmtNodePreImageNotValid(node, kv_pairs.len()));
     }
 
-    Ok(None)
-}
-
-// HELPER FUNCTIONS
-// ================================================================================================
-
-fn get_common_prefix(key1: &Word, key2: &Word) -> u8 {
-    let k1 = key1[3].as_int();
-    let k2 = key2[3].as_int();
-    (k1 ^ k2).leading_zeros() as u8
-}
+    Ok(kv_pairs
+        .chunks_exact(WORD_SIZE * 2)
+        .map(|kv_chunk| {
+            let key = [kv_chunk[0], kv_chunk[1], kv_chunk[2], kv_chunk[3]];
+            let value = [kv_chunk[4], kv_chunk[5], kv_chunk[6], kv_chunk[7]];
 
-fn get_depth_flags(depth: u8) -> (Felt, Felt) {
-    let is_16_or_32 = if depth == 16 || depth == 32 { ONE } else { ZERO };
-    let is_16_or_48 = if depth == 16 || depth == 48 { ONE } else { ZERO };
-    (is_16_or_32, is_16_or_48)
+            (key, value)
+        })
+        .collect())
 }
diff --git a/processor/src/host/advice/inputs.rs b/processor/src/host/advice/inputs.rs
index 175cc36b0e..c289222aae 100644
--- a/processor/src/host/advice/inputs.rs
+++ b/processor/src/host/advice/inputs.rs
@@ -1,4 +1,7 @@
-use super::{BTreeMap, Felt, InnerNodeInfo, InputError, MerkleStore, Vec};
+use vm_core::crypto::hash::RpoDigest;
+
+use super::{AdviceMap, Felt, InnerNodeInfo, InputError, MerkleStore};
+use crate::utils::collections::*;
 
 // ADVICE INPUTS
 // ================================================================================================
@@ -18,7 +21,7 @@ use super::{BTreeMap, Felt, InnerNodeInfo, InputError, MerkleStore, Vec};
 #[derive(Clone, Debug, Default)]
 pub struct AdviceInputs {
     stack: Vec<Felt>,
-    map: BTreeMap<[u8; 32], Vec<Felt>>,
+    map: AdviceMap,
     store: MerkleStore,
 }
 
@@ -34,13 +37,10 @@ impl AdviceInputs {
     {
         let stack = iter
             .into_iter()
-            .map(|v| {
-                Felt::try_from(v).map_err(|_| {
-                    InputError::NotFieldElement(v, "the provided value isn't a valid field element")
-                })
-            })
+            .map(|v| Felt::try_from(v).map_err(|e| InputError::NotFieldElement(v, e)))
             .collect::<Result<Vec<_>, _>>()?;
-        self.stack.extend(stack);
+
+        self.stack.extend(stack.iter());
         Ok(self)
     }
 
@@ -56,7 +56,7 @@ impl AdviceInputs {
     /// Extends the map of values with the given argument, replacing previously inserted items.
     pub fn with_map<I>(mut self, iter: I) -> Self
     where
-        I: IntoIterator<Item = ([u8; 32], Vec<Felt>)>,
+        I: IntoIterator<Item = (RpoDigest, Vec<Felt>)>,
     {
         self.map.extend(iter);
         self
@@ -82,7 +82,7 @@ impl AdviceInputs {
     /// Extends the map of values with the given argument, replacing previously inserted items.
     pub fn extend_map<I>(&mut self, iter: I)
     where
-        I: IntoIterator<Item = ([u8; 32], Vec<Felt>)>,
+        I: IntoIterator<Item = (RpoDigest, Vec<Felt>)>,
     {
         self.map.extend(iter);
     }
@@ -95,6 +95,13 @@ impl AdviceInputs {
         self.store.extend(iter);
     }
 
+    /// Extends the contents of this instance with the contents of the other instance.
+    pub fn extend(&mut self, other: Self) {
+        self.stack.extend(other.stack);
+        self.map.extend(other.map);
+        self.store.extend(other.store.inner_nodes());
+    }
+
     // PUBLIC ACCESSORS
     // --------------------------------------------------------------------------------------------
 
@@ -104,8 +111,8 @@ impl AdviceInputs {
     }
 
     /// Fetch a values set mapped by the given key.
-    pub fn mapped_values(&self, key: &[u8; 32]) -> Option<&[Felt]> {
-        self.map.get(key).map(Vec::as_slice)
+    pub fn mapped_values(&self, key: &RpoDigest) -> Option<&[Felt]> {
+        self.map.get(key)
     }
 
     /// Returns the underlying [MerkleStore].
@@ -118,7 +125,7 @@ impl AdviceInputs {
 
     /// Decomposes these `[Self]` into their raw components.
     #[allow(clippy::type_complexity)]
-    pub(crate) fn into_parts(self) -> (Vec<Felt>, BTreeMap<[u8; 32], Vec<Felt>>, MerkleStore) {
+    pub(crate) fn into_parts(self) -> (Vec<Felt>, AdviceMap, MerkleStore) {
         let Self { stack, map, store } = self;
         (stack, map, store)
     }
@@ -131,6 +138,6 @@ impl AdviceInputs {
 #[derive(Clone, Debug, Default)]
 pub struct AdviceInputs {
     pub stack: Vec<Felt>,
-    pub map: BTreeMap<[u8; 32], Vec<Felt>>,
+    pub map: AdviceMap,
     pub store: MerkleStore,
 }
diff --git a/processor/src/host/advice/map.rs b/processor/src/host/advice/map.rs
new file mode 100644
index 0000000000..62a0b393ce
--- /dev/null
+++ b/processor/src/host/advice/map.rs
@@ -0,0 +1,57 @@
+use super::Felt;
+use crate::utils::collections::*;
+use vm_core::{crypto::hash::RpoDigest, utils::collections::btree_map::IntoIter};
+
+// ADVICE MAP
+// ================================================================================================
+
+/// Defines a set of non-deterministic (advice) inputs which the VM can access by their keys.
+///
+/// Each key maps to one or more field element. To access the elements, the VM can move the values
+/// associated with a given key onto the advice stack using `adv.push_mapval` instruction. The VM
+/// can also insert new values into the advice map during execution.
+#[derive(Debug, Clone, Default)]
+pub struct AdviceMap(BTreeMap<RpoDigest, Vec<Felt>>);
+
+impl AdviceMap {
+    /// Creates a new advice map.
+    pub fn new() -> Self {
+        Self(BTreeMap::<RpoDigest, Vec<Felt>>::new())
+    }
+
+    /// Returns the values associated with given key.
+    pub fn get(&self, key: &RpoDigest) -> Option<&[Felt]> {
+        self.0.get(key).map(|v| v.as_slice())
+    }
+
+    /// Inserts a key value pair in the advice map and returns the inserted value.
+    pub fn insert(&mut self, key: RpoDigest, value: Vec<Felt>) -> Option<Vec<Felt>> {
+        self.0.insert(key, value)
+    }
+
+    /// Removes the value associated with the key and returns the removed element.
+    pub fn remove(&mut self, key: RpoDigest) -> Option<Vec<Felt>> {
+        self.0.remove(&key)
+    }
+}
+
+impl From<BTreeMap<RpoDigest, Vec<Felt>>> for AdviceMap {
+    fn from(value: BTreeMap<RpoDigest, Vec<Felt>>) -> Self {
+        Self(value)
+    }
+}
+
+impl IntoIterator for AdviceMap {
+    type Item = (RpoDigest, Vec<Felt>);
+    type IntoIter = IntoIter<RpoDigest, Vec<Felt>>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.into_iter()
+    }
+}
+
+impl Extend<(RpoDigest, Vec<Felt>)> for AdviceMap {
+    fn extend<T: IntoIterator<Item = (RpoDigest, Vec<Felt>)>>(&mut self, iter: T) {
+        self.0.extend(iter)
+    }
+}
diff --git a/processor/src/host/advice/mod.rs b/processor/src/host/advice/mod.rs
index f7bb0e2579..c28c53c00b 100644
--- a/processor/src/host/advice/mod.rs
+++ b/processor/src/host/advice/mod.rs
@@ -1,15 +1,12 @@
 use super::HostResponse;
-use crate::{ExecutionError, Felt, InputError, ProcessState, StarkField, Word};
+use crate::{ExecutionError, Felt, InputError, ProcessState, Word};
 use core::borrow::Borrow;
 use vm_core::{
     crypto::{
         hash::RpoDigest,
         merkle::{InnerNodeInfo, MerklePath, MerkleStore, NodeIndex, StoreNode},
     },
-    utils::{
-        collections::{BTreeMap, KvMap, RecordingMap, Vec},
-        IntoBytes,
-    },
+    utils::collections::*,
     AdviceInjector, SignatureKind,
 };
 
@@ -27,6 +24,9 @@ pub use providers::{MemAdviceProvider, RecAdviceProvider};
 mod source;
 pub use source::AdviceSource;
 
+mod map;
+pub use map::AdviceMap;
+
 // ADVICE PROVIDER
 // ================================================================================================
 
@@ -63,12 +63,18 @@ pub trait AdviceProvider: Sized {
                 key_offset,
             } => self.copy_map_value_to_adv_stack(process, *include_len, *key_offset),
             AdviceInjector::UpdateMerkleNode => self.update_operand_stack_merkle_node(process),
-            AdviceInjector::DivU64 => self.push_u64_div_result(process),
+            AdviceInjector::U64Div => self.push_u64_div_result(process),
             AdviceInjector::Ext2Inv => self.push_ext2_inv_result(process),
             AdviceInjector::Ext2Intt => self.push_ext2_intt_result(process),
             AdviceInjector::SmtGet => self.push_smtget_inputs(process),
             AdviceInjector::SmtSet => self.push_smtset_inputs(process),
             AdviceInjector::SmtPeek => self.push_smtpeek_result(process),
+            AdviceInjector::U32Clz => self.push_leading_zeros(process),
+            AdviceInjector::U32Ctz => self.push_trailing_zeros(process),
+            AdviceInjector::U32Clo => self.push_leading_ones(process),
+            AdviceInjector::U32Cto => self.push_trailing_ones(process),
+            AdviceInjector::ILog2 => self.push_ilog2(process),
+
             AdviceInjector::MemToMap => self.insert_mem_values_into_adv_map(process),
             AdviceInjector::HdwordToMap { domain } => {
                 self.insert_hdword_into_adv_map(process, *domain)
@@ -362,6 +368,85 @@ pub trait AdviceProvider: Sized {
         injectors::adv_stack_injectors::push_signature(self, process, kind)
     }
 
+    /// Pushes the number of the leading zeros of the top stack element onto the advice stack.
+    ///
+    /// Inputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [...]
+    ///
+    /// Outputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [leading_zeros, ...]
+    fn push_leading_zeros<S: ProcessState>(
+        &mut self,
+        process: &S,
+    ) -> Result<HostResponse, ExecutionError> {
+        injectors::adv_stack_injectors::push_leading_zeros(self, process)
+    }
+
+    /// Pushes the number of the trailing zeros of the top stack element onto the advice stack.
+    ///
+    /// Inputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [...]
+    ///
+    /// Outputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [trailing_zeros, ...]
+    fn push_trailing_zeros<S: ProcessState>(
+        &mut self,
+        process: &S,
+    ) -> Result<HostResponse, ExecutionError> {
+        injectors::adv_stack_injectors::push_trailing_zeros(self, process)
+    }
+
+    /// Pushes the number of the leading ones of the top stack element onto the advice stack.
+    ///
+    /// Inputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [...]
+    ///
+    /// Outputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [leading_ones, ...]
+    fn push_leading_ones<S: ProcessState>(
+        &mut self,
+        process: &S,
+    ) -> Result<HostResponse, ExecutionError> {
+        injectors::adv_stack_injectors::push_leading_ones(self, process)
+    }
+
+    /// Pushes the number of the trailing ones of the top stack element onto the advice stack.
+    ///
+    /// Inputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [...]
+    ///
+    /// Outputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [trailing_ones, ...]
+    fn push_trailing_ones<S: ProcessState>(
+        &mut self,
+        process: &S,
+    ) -> Result<HostResponse, ExecutionError> {
+        injectors::adv_stack_injectors::push_trailing_ones(self, process)
+    }
+
+    /// Pushes the base 2 logarithm of the top stack element, rounded down.
+    /// Inputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [...]
+    ///
+    /// Outputs:
+    ///   Operand stack: [n, ...]
+    ///   Advice stack: [ilog2(n), ...]
+    ///
+    /// # Errors
+    /// Returns an error if the logarithm argument (top stack element) equals ZERO.
+    fn push_ilog2<S: ProcessState>(&mut self, process: &S) -> Result<HostResponse, ExecutionError> {
+        injectors::adv_stack_injectors::push_ilog2(self, process)
+    }
+
     // DEFAULT MERKLE STORE INJECTORS
     // --------------------------------------------------------------------------------------------
 
@@ -424,39 +509,6 @@ pub trait AdviceProvider: Sized {
     // DEFAULT SMT INJECTORS
     // --------------------------------------------------------------------------------------------
 
-    /// Pushes values onto the advice stack which are required for successful retrieval of a
-    /// value from a Sparse Merkle Tree data structure.
-    ///
-    /// The Sparse Merkle Tree is tiered, meaning it will have leaf depths in `{16, 32, 48, 64}`.
-    /// The depth flags define the tier on which the leaf is located.
-    ///
-    /// Inputs:
-    ///   Operand stack: [KEY, ROOT, ...]
-    ///   Advice stack: [...]
-    ///
-    /// Outputs:
-    ///   Operand stack: [KEY, ROOT, ...]
-    ///   Advice stack: [f0, f1, K, V, f2]
-    ///
-    /// Where:
-    /// - f0 is a boolean flag set to `1` if the depth is `16` or `48`.
-    /// - f1 is a boolean flag set to `1` if the depth is `16` or `32`.
-    /// - K is the key; will be zeroed if the tree don't contain a mapped value for the key.
-    /// - V is the value word; will be zeroed if the tree don't contain a mapped value for the key.
-    /// - f2 is a boolean flag set to `1` if the key is not zero.
-    ///
-    /// # Errors
-    /// Returns an error if the provided Merkle root doesn't exist on the advice provider.
-    ///
-    /// # Panics
-    /// Will panic as unimplemented if the target depth is `64`.
-    fn push_smtget_inputs<S: ProcessState>(
-        &mut self,
-        process: &S,
-    ) -> Result<HostResponse, ExecutionError> {
-        injectors::smt::push_smtget_inputs(self, process)
-    }
-
     /// Pushes onto the advice stack the value associated with the specified key in a Sparse
     /// Merkle Tree defined by the specified root.
     ///
@@ -483,33 +535,15 @@ pub trait AdviceProvider: Sized {
         injectors::smt::push_smtpeek_result(self, process)
     }
 
-    /// Pushes values onto the advice stack which are required for successful insertion of a
-    /// key-value pair into a Sparse Merkle Tree data structure.
-    ///
-    /// The Sparse Merkle Tree is tiered, meaning it will have leaf depths in `{16, 32, 48, 64}`.
-    ///
-    /// Inputs:
-    ///   Operand stack: [VALUE, KEY, ROOT, ...]
-    ///   Advice stack: [...]
-    ///
-    /// Outputs:
-    ///   Operand stack: [OLD_VALUE, NEW_ROOT, ...]
-    ///   Advice stack: see comments for specialized handlers below.
-    ///
-    /// Where:
-    /// - ROOT and NEW_ROOT are the roots of the TSMT before and after the insert respectively.
-    /// - VALUE is the value to be inserted.
-    /// - OLD_VALUE is the value previously associated with the specified KEY.
-    ///
-    /// # Errors
-    /// Returns an error if:
-    /// - The Merkle store does not contain a node with the specified root.
-    /// - The Merkle store does not contain all nodes needed to validate the path between the root
-    ///   and the relevant TSMT nodes.
-    /// - The advice map does not contain required data about TSMT leaves to be modified.
-    ///
-    /// # Panics
-    /// Will panic as unimplemented if the target depth is `64`.
+    /// Currently unimplemented
+    fn push_smtget_inputs<S: ProcessState>(
+        &mut self,
+        process: &S,
+    ) -> Result<HostResponse, ExecutionError> {
+        injectors::smt::push_smtget_inputs(self, process)
+    }
+
+    /// Currently unimplemented
     fn push_smtset_inputs<S: ProcessState>(
         &mut self,
         process: &S,
@@ -579,7 +613,7 @@ pub trait AdviceProvider: Sized {
     // --------------------------------------------------------------------------------------------
 
     /// Returns a reference to the value(s) associated with the specified key in the advice map.
-    fn get_mapped_values(&self, key: &[u8; 32]) -> Option<&[Felt]>;
+    fn get_mapped_values(&self, key: &RpoDigest) -> Option<&[Felt]>;
 
     /// Inserts the provided value into the advice map under the specified key.
     ///
@@ -736,7 +770,7 @@ where
         T::get_signature(self, kind, pub_key, msg)
     }
 
-    fn get_mapped_values(&self, key: &[u8; 32]) -> Option<&[Felt]> {
+    fn get_mapped_values(&self, key: &RpoDigest) -> Option<&[Felt]> {
         T::get_mapped_values(self, key)
     }
 
diff --git a/processor/src/host/advice/providers.rs b/processor/src/host/advice/providers.rs
index 21ee51e373..3088d90526 100644
--- a/processor/src/host/advice/providers.rs
+++ b/processor/src/host/advice/providers.rs
@@ -1,9 +1,8 @@
 use super::{
-    injectors, AdviceInputs, AdviceProvider, AdviceSource, BTreeMap, ExecutionError, Felt,
-    IntoBytes, KvMap, MerklePath, MerkleStore, NodeIndex, RecordingMap, RpoDigest, StarkField,
-    StoreNode, Vec, Word,
+    injectors, AdviceInputs, AdviceProvider, AdviceSource, ExecutionError, Felt, MerklePath,
+    MerkleStore, NodeIndex, RpoDigest, StoreNode, Word,
 };
-use crate::ProcessState;
+use crate::{utils::collections::*, ProcessState};
 use vm_core::SignatureKind;
 
 // TYPE ALIASES
@@ -12,8 +11,8 @@ use vm_core::SignatureKind;
 type SimpleMerkleMap = BTreeMap<RpoDigest, StoreNode>;
 type RecordingMerkleMap = RecordingMap<RpoDigest, StoreNode>;
 
-type SimpleAdviceMap = BTreeMap<[u8; 32], Vec<Felt>>;
-type RecordingAdviceMap = RecordingMap<[u8; 32], Vec<Felt>>;
+type SimpleAdviceMap = BTreeMap<RpoDigest, Vec<Felt>>;
+type RecordingAdviceMap = RecordingMap<RpoDigest, Vec<Felt>>;
 
 // BASE ADVICE PROVIDER
 // ================================================================================================
@@ -23,7 +22,7 @@ type RecordingAdviceMap = RecordingMap<[u8; 32], Vec<Felt>>;
 #[derive(Debug, Clone, Default)]
 pub struct BaseAdviceProvider<M, S>
 where
-    M: KvMap<[u8; 32], Vec<Felt>>,
+    M: KvMap<RpoDigest, Vec<Felt>>,
     S: KvMap<RpoDigest, StoreNode>,
 {
     stack: Vec<Felt>,
@@ -33,7 +32,7 @@ where
 
 impl<M, S> From<AdviceInputs> for BaseAdviceProvider<M, S>
 where
-    M: KvMap<[u8; 32], Vec<Felt>>,
+    M: KvMap<RpoDigest, Vec<Felt>>,
     S: KvMap<RpoDigest, StoreNode>,
 {
     fn from(inputs: AdviceInputs) -> Self {
@@ -49,7 +48,7 @@ where
 
 impl<M, S> AdviceProvider for BaseAdviceProvider<M, S>
 where
-    M: KvMap<[u8; 32], Vec<Felt>>,
+    M: KvMap<RpoDigest, Vec<Felt>>,
     S: KvMap<RpoDigest, StoreNode>,
 {
     // ADVICE STACK
@@ -92,14 +91,13 @@ where
                 self.stack.extend(word.iter().rev());
             }
             AdviceSource::Map { key, include_len } => {
-                let values = self
-                    .map
-                    .get(&key.into_bytes())
-                    .ok_or(ExecutionError::AdviceMapKeyNotFound(key))?;
+                let values =
+                    self.map.get(&key.into()).ok_or(ExecutionError::AdviceMapKeyNotFound(key))?;
 
                 self.stack.extend(values.iter().rev());
                 if include_len {
-                    self.stack.push(Felt::from(values.len() as u64));
+                    self.stack
+                        .push(Felt::try_from(values.len() as u64).expect("value length too big"));
                 }
             }
         }
@@ -115,7 +113,7 @@ where
     ) -> Result<Vec<Felt>, ExecutionError> {
         let pk_sk = self
             .map
-            .get(&pub_key.into_bytes())
+            .get(&pub_key.into())
             .ok_or(ExecutionError::AdviceMapKeyNotFound(pub_key))?;
 
         match kind {
@@ -126,12 +124,12 @@ where
     // ADVICE MAP
     // --------------------------------------------------------------------------------------------
 
-    fn get_mapped_values(&self, key: &[u8; 32]) -> Option<&[Felt]> {
+    fn get_mapped_values(&self, key: &RpoDigest) -> Option<&[Felt]> {
         self.map.get(key).map(|v| v.as_slice())
     }
 
     fn insert_into_map(&mut self, key: Word, values: Vec<Felt>) -> Result<(), ExecutionError> {
-        self.map.insert(key.into_bytes(), values);
+        self.map.insert(key.into(), values);
         Ok(())
     }
 
@@ -275,7 +273,7 @@ impl MemAdviceProvider {
 }
 
 /// Pass-through implementations of [AdviceProvider] methods.
-/// 
+///
 /// TODO: potentially do this via a macro.
 #[rustfmt::skip]
 impl AdviceProvider for MemAdviceProvider {
@@ -303,7 +301,7 @@ impl AdviceProvider for MemAdviceProvider {
         self.provider.get_signature(kind, pub_key, msg)
     }
 
-    fn get_mapped_values(&self, key: &[u8; 32]) -> Option<&[Felt]> {
+    fn get_mapped_values(&self, key: &RpoDigest) -> Option<&[Felt]> {
         self.provider.get_mapped_values(key)
     }
 
@@ -400,7 +398,7 @@ impl RecAdviceProvider {
 }
 
 /// Pass-through implementations of [AdviceProvider] methods.
-/// 
+///
 /// TODO: potentially do this via a macro.
 #[rustfmt::skip]
 impl AdviceProvider for RecAdviceProvider {
@@ -423,12 +421,12 @@ impl AdviceProvider for RecAdviceProvider {
     fn insert_into_map(&mut self, key: Word, values: Vec<Felt>) -> Result<(), ExecutionError> {
         self.provider.insert_into_map(key, values)
     }
-    
+
     fn get_signature(&self, kind: SignatureKind, pub_key: Word, msg: Word) -> Result<Vec<Felt>, ExecutionError> {
         self.provider.get_signature(kind, pub_key, msg)
     }
 
-    fn get_mapped_values(&self, key: &[u8; 32]) -> Option<&[Felt]> {
+    fn get_mapped_values(&self, key: &RpoDigest) -> Option<&[Felt]> {
         self.provider.get_mapped_values(key)
     }
 
diff --git a/processor/src/host/debug.rs b/processor/src/host/debug.rs
index 0b90230ac3..1cd9810e13 100644
--- a/processor/src/host/debug.rs
+++ b/processor/src/host/debug.rs
@@ -1,52 +1,233 @@
-use super::{Felt, ProcessState};
-use crate::Vec;
-use vm_core::DebugOptions;
+use super::ProcessState;
+use crate::{system::ContextId, utils::collections::*};
+use vm_core::{DebugOptions, Word};
 
 // DEBUG HANDLER
 // ================================================================================================
 
 /// Prints the info about the VM state specified by the provided options to stdout.
 pub fn print_debug_info<S: ProcessState>(process: &S, options: &DebugOptions) {
-    let clk = process.clk();
+    let printer = Printer::new(process.clk(), process.ctx(), process.fmp());
     match options {
         DebugOptions::StackAll => {
-            let stack = process.get_stack_state();
-            let n = stack.len();
-            print_vm_stack(clk, stack, n);
+            printer.print_vm_stack(process, None);
         }
         DebugOptions::StackTop(n) => {
-            let stack = process.get_stack_state();
-            print_vm_stack(clk, stack, *n as usize);
+            printer.print_vm_stack(process, Some(*n as usize));
         }
+        DebugOptions::MemAll => {
+            printer.print_mem_all(process);
+        }
+        DebugOptions::MemInterval(n, m) => {
+            printer.print_mem_interval(process, *n, *m);
+        }
+        DebugOptions::LocalInterval(n, m, num_locals) => {
+            printer.print_local_interval(process, (*n as u32, *m as u32), *num_locals as u32);
+        }
+    }
+}
+
+// HELPER FUNCTIONS
+// ================================================================================================
+
+struct Printer {
+    clk: u32,
+    ctx: ContextId,
+    fmp: u32,
+}
+
+impl Printer {
+    fn new(clk: u32, ctx: ContextId, fmp: u64) -> Self {
+        Self {
+            clk,
+            ctx,
+            fmp: fmp as u32,
+        }
+    }
+
+    /// Prints the number of stack items specified by `n` if it is provided, otherwise prints
+    /// the whole stack.
+    fn print_vm_stack<S: ProcessState>(&self, process: &S, n: Option<usize>) {
+        let stack = process.get_stack_state();
+
+        // determine how many items to print out
+        let num_items = core::cmp::min(stack.len(), n.unwrap_or(stack.len()));
+
+        // print all items except for the last one
+        println!("Stack state before step {}:", self.clk);
+        for (i, element) in stack.iter().take(num_items - 1).enumerate() {
+            println!("├── {i:>2}: {element}");
+        }
+
+        // print the last item, and in case the stack has more items, print the total number of
+        // un-printed items
+        let i = num_items - 1;
+        if num_items == stack.len() {
+            println!("└── {i:>2}: {}\n", stack[i]);
+        } else {
+            println!("├── {i:>2}: {}", stack[i]);
+            println!("└── ({} more items)\n", stack.len() - num_items);
+        }
+    }
+
+    /// Prints the whole memory state at the cycle `clk` in context `ctx`.
+    fn print_mem_all<S: ProcessState>(&self, process: &S) {
+        let mem = process.get_mem_state(self.ctx);
+        let padding =
+            mem.iter().fold(0, |max, value| word_elem_max_len(Some(value.1)).max(max)) as usize;
+
+        println!("Memory state before step {} for the context {}:", self.clk, self.ctx);
+
+        // print the main part of the memory (wihtout the last value)
+        for (addr, value) in mem.iter().take(mem.len() - 1) {
+            print_mem_address(*addr as u32, Some(*value), false, false, padding);
+        }
+
+        // print the last memory value
+        if let Some((addr, value)) = mem.last() {
+            print_mem_address(*addr as u32, Some(*value), true, false, padding);
+        }
+    }
+
+    /// Prints memory values in the provided addresses interval.
+    fn print_mem_interval<S: ProcessState>(&self, process: &S, n: u32, m: u32) {
+        let mut mem_interval = Vec::new();
+        for addr in n..m + 1 {
+            mem_interval.push((addr, process.get_mem_value(self.ctx, addr)));
+        }
+
+        if n == m {
+            println!(
+                "Memory state before step {} for the context {} at address {}:",
+                self.clk, self.ctx, n
+            )
+        } else {
+            println!(
+                "Memory state before step {} for the context {} in the interval [{}, {}]:",
+                self.clk, self.ctx, n, m
+            )
+        };
+
+        print_interval(mem_interval, false);
+    }
+
+    /// Prints locals in provided indexes interval.
+    fn print_local_interval<S: ProcessState>(
+        &self,
+        process: &S,
+        interval: (u32, u32),
+        num_locals: u32,
+    ) {
+        let mut local_mem_interval = Vec::new();
+        let local_memory_offset = self.fmp - num_locals + 1;
+
+        // in case start index is 0 and end index is 2^16, we should print all available locals.
+        let (start, end) = if interval.0 == 0 && interval.1 == u16::MAX as u32 {
+            (0, num_locals - 1)
+        } else {
+            interval
+        };
+        for index in start..end + 1 {
+            local_mem_interval
+                .push((index, process.get_mem_value(self.ctx, index + local_memory_offset)))
+        }
+
+        if interval.0 == 0 && interval.1 == u16::MAX as u32 {
+            println!("State of procedure locals before step {}:", self.clk)
+        } else if interval.0 == interval.1 {
+            println!("State of procedure local at index {} before step {}:", interval.0, self.clk,)
+        } else {
+            println!(
+                "State of procedure locals [{}, {}] before step {}:",
+                interval.0, interval.1, self.clk,
+            )
+        };
+
+        print_interval(local_mem_interval, true);
     }
 }
 
 // HELPER FUNCTIONS
 // ================================================================================================
 
-#[cfg(feature = "std")]
-fn print_vm_stack(clk: u32, stack: Vec<Felt>, n: usize) {
-    // determine how many items to print out
-    let num_items = core::cmp::min(stack.len(), n);
+/// Prints the provided memory interval.
+///
+/// If `is_local` is true, the output addresses are formatted as decimal values, otherwise as hex
+/// strings.
+fn print_interval(mem_interval: Vec<(u32, Option<Word>)>, is_local: bool) {
+    let padding =
+        mem_interval.iter().fold(0, |max, value| word_elem_max_len(value.1).max(max)) as usize;
+
+    // print the main part of the memory (wihtout the last value)
+    for (addr, value) in mem_interval.iter().take(mem_interval.len() - 1) {
+        print_mem_address(*addr, *value, false, is_local, padding)
+    }
 
-    // print all items except for the last one
-    println!("Stack state before step {clk}:");
-    for (i, element) in stack.iter().take(num_items - 1).enumerate() {
-        println!("├── {i:>2}: {element}");
+    // print the last memory value
+    if let Some((addr, value)) = mem_interval.last() {
+        print_mem_address(*addr, *value, true, is_local, padding);
     }
+}
 
-    // print the last item, and in case the stack has more items, print the total number of
-    // un-printed items
-    let i = num_items - 1;
-    if num_items == stack.len() {
-        println!("└── {i:>2}: {}", stack[i]);
+/// Prints single memory value with its address.
+///
+/// If `is_local` is true, the output address is formatted as decimal value, otherwise as hex
+/// string.
+fn print_mem_address(
+    addr: u32,
+    value: Option<Word>,
+    is_last: bool,
+    is_local: bool,
+    padding: usize,
+) {
+    if let Some(value) = value {
+        if is_last {
+            if is_local {
+                print!("└── {addr:>5}: ");
+            } else {
+                print!("└── {addr:#010x}: ");
+            }
+            print_word(value, padding);
+            println!();
+        } else {
+            if is_local {
+                print!("├── {addr:>5}: ");
+            } else {
+                print!("├── {addr:#010x}: ");
+            }
+            print_word(value, padding);
+        }
+    } else if is_last {
+        if is_local {
+            println!("└── {addr:>5}: EMPTY\n");
+        } else {
+            println!("└── {addr:#010x}: EMPTY\n");
+        }
+    } else if is_local {
+        println!("├── {addr:>5}: EMPTY");
     } else {
-        println!("├── {i:>2}: {}", stack[i]);
-        println!("└── ({} more items)", stack.len() - num_items);
+        println!("├── {addr:#010x}: EMPTY");
     }
 }
 
-#[cfg(not(feature = "std"))]
-fn print_vm_stack(_clk: u32, _stack: Vec<Felt>, _n: usize) {
-    // in no_std environments, this is a NOOP
+/// Prints the provided Word with specified padding.
+fn print_word(value: Word, padding: usize) {
+    println!(
+        "[{:>width$}, {:>width$}, {:>width$}, {:>width$}]",
+        value[0].as_int(),
+        value[1].as_int(),
+        value[2].as_int(),
+        value[3].as_int(),
+        width = padding
+    )
+}
+
+/// Returns the maximum length among the word elements.
+fn word_elem_max_len(word: Option<Word>) -> u32 {
+    if let Some(word) = word {
+        word.iter()
+            .fold(0, |max, value| (value.as_int().checked_ilog10().unwrap_or(1) + 1).max(max))
+    } else {
+        0
+    }
 }
diff --git a/processor/src/host/mod.rs b/processor/src/host/mod.rs
index d1bf11e0c5..c2817ff55b 100644
--- a/processor/src/host/mod.rs
+++ b/processor/src/host/mod.rs
@@ -5,6 +5,7 @@ use vm_core::{crypto::merkle::MerklePath, AdviceInjector, DebugOptions, Word};
 pub(super) mod advice;
 use advice::{AdviceExtractor, AdviceProvider};
 
+#[cfg(feature = "std")]
 mod debug;
 
 // HOST TRAIT
@@ -37,6 +38,9 @@ pub trait Host {
         injector: AdviceInjector,
     ) -> Result<HostResponse, ExecutionError>;
 
+    // PROVIDED METHODS
+    // --------------------------------------------------------------------------------------------
+
     /// Creates a "by reference" host for this instance.
     ///
     /// The returned adapter also implements [Host] and will simply mutably borrow this
@@ -51,8 +55,21 @@ pub trait Host {
         self
     }
 
-    // PROVIDED METHODS
-    // --------------------------------------------------------------------------------------------
+    /// Handles the event emitted from the VM.
+    fn on_event<S: ProcessState>(
+        &mut self,
+        process: &S,
+        event_id: u32,
+    ) -> Result<HostResponse, ExecutionError> {
+        #[cfg(feature = "std")]
+        println!(
+            "Event with id {} emitted at step {} in context {}",
+            event_id,
+            process.clk(),
+            process.ctx()
+        );
+        Ok(HostResponse::None)
+    }
 
     /// Handles the debug request from the VM.
     fn on_debug<S: ProcessState>(
@@ -60,10 +77,36 @@ pub trait Host {
         process: &S,
         options: &DebugOptions,
     ) -> Result<HostResponse, ExecutionError> {
+        #[cfg(feature = "std")]
         debug::print_debug_info(process, options);
         Ok(HostResponse::None)
     }
 
+    /// Handles the trace emmited from the VM.
+    fn on_trace<S: ProcessState>(
+        &mut self,
+        process: &S,
+        trace_id: u32,
+    ) -> Result<HostResponse, ExecutionError> {
+        #[cfg(feature = "std")]
+        println!(
+            "Trace with id {} emitted at step {} in context {}",
+            trace_id,
+            process.clk(),
+            process.ctx()
+        );
+        Ok(HostResponse::None)
+    }
+
+    /// Handles the failure of the assertion instruction.
+    fn on_assert_failed<S: ProcessState>(&mut self, process: &S, err_code: u32) -> ExecutionError {
+        ExecutionError::FailedAssertion {
+            clk: process.clk(),
+            err_code,
+            err_msg: None,
+        }
+    }
+
     /// Pops an element from the advice stack and returns it.
     ///
     /// # Errors
@@ -138,6 +181,34 @@ where
     ) -> Result<HostResponse, ExecutionError> {
         H::set_advice(self, process, injector)
     }
+
+    fn on_debug<S: ProcessState>(
+        &mut self,
+        process: &S,
+        options: &DebugOptions,
+    ) -> Result<HostResponse, ExecutionError> {
+        H::on_debug(self, process, options)
+    }
+
+    fn on_event<S: ProcessState>(
+        &mut self,
+        process: &S,
+        event_id: u32,
+    ) -> Result<HostResponse, ExecutionError> {
+        H::on_event(self, process, event_id)
+    }
+
+    fn on_trace<S: ProcessState>(
+        &mut self,
+        process: &S,
+        trace_id: u32,
+    ) -> Result<HostResponse, ExecutionError> {
+        H::on_trace(self, process, trace_id)
+    }
+
+    fn on_assert_failed<S: ProcessState>(&mut self, process: &S, err_code: u32) -> ExecutionError {
+        H::on_assert_failed(self, process, err_code)
+    }
 }
 
 // HOST RESPONSE
@@ -192,7 +263,7 @@ impl From<HostResponse> for Felt {
 // DEFAULT HOST IMPLEMENTATION
 // ================================================================================================
 
-/// TODO: add comments
+/// A default [Host] implementation that provides the essential functionality required by the VM.
 pub struct DefaultHost<A> {
     adv_provider: A,
 }
diff --git a/processor/src/lib.rs b/processor/src/lib.rs
index 45dc1fed8e..ec5412c4a9 100644
--- a/processor/src/lib.rs
+++ b/processor/src/lib.rs
@@ -12,25 +12,25 @@ use miden_air::trace::{
 };
 pub use miden_air::{ExecutionOptions, ExecutionOptionsError};
 pub use vm_core::{
-    chiplets::hasher::Digest, errors::InputError, utils::DeserializationError, AdviceInjector,
-    AssemblyOp, Kernel, Operation, Program, ProgramInfo, QuadExtension, StackInputs, StackOutputs,
-    Word, EMPTY_WORD, ONE, ZERO,
+    chiplets::hasher::Digest, crypto::merkle::SMT_DEPTH, errors::InputError,
+    utils::DeserializationError, AdviceInjector, AssemblyOp, Kernel, Operation, Program,
+    ProgramInfo, QuadExtension, StackInputs, StackOutputs, Word, EMPTY_WORD, ONE, ZERO,
 };
 use vm_core::{
     code_blocks::{
         Call, CodeBlock, Dyn, Join, Loop, OpBatch, Span, Split, OP_BATCH_SIZE, OP_GROUP_SIZE,
     },
-    utils::collections::{BTreeMap, Vec},
-    CodeBlockTable, Decorator, DecoratorIterator, Felt, FieldElement, StackTopState, StarkField,
+    utils::collections::*,
+    CodeBlockTable, Decorator, DecoratorIterator, Felt, FieldElement, StackTopState,
 };
 
-use winter_prover::ColMatrix;
+pub use winter_prover::matrix::ColMatrix;
 
 mod operations;
 
 mod system;
 use system::System;
-pub use system::{FMP_MIN, SYSCALL_FMP_MIN};
+pub use system::{ContextId, FMP_MIN, SYSCALL_FMP_MIN};
 
 mod decoder;
 use decoder::Decoder;
@@ -43,8 +43,11 @@ use range::RangeChecker;
 
 mod host;
 pub use host::{
-    advice::{AdviceInputs, AdviceProvider, AdviceSource, MemAdviceProvider, RecAdviceProvider},
-    DefaultHost, Host,
+    advice::{
+        AdviceExtractor, AdviceInputs, AdviceMap, AdviceProvider, AdviceSource, MemAdviceProvider,
+        RecAdviceProvider,
+    },
+    DefaultHost, Host, HostResponse,
 };
 
 mod chiplets;
@@ -90,7 +93,7 @@ type SysTrace = [Vec<Felt>; SYS_TRACE_WIDTH];
 
 pub struct DecoderTrace {
     trace: [Vec<Felt>; DECODER_TRACE_WIDTH],
-    aux_trace_hints: decoder::AuxTraceHints,
+    aux_builder: decoder::AuxTraceBuilder,
 }
 
 pub struct StackTrace {
@@ -113,6 +116,7 @@ pub struct ChipletsTrace {
 
 /// Returns an execution trace resulting from executing the provided program against the provided
 /// inputs.
+#[tracing::instrument("execute_program", skip_all)]
 pub fn execute<H>(
     program: &Program,
     stack_inputs: StackInputs,
@@ -162,6 +166,7 @@ where
     chiplets: Chiplets,
     host: RefCell<H>,
     max_cycles: u32,
+    enable_tracing: bool,
 }
 
 impl<H> Process<H>
@@ -182,7 +187,13 @@ where
 
     /// Creates a new process with provided inputs and debug options enabled.
     pub fn new_debug(kernel: Kernel, stack_inputs: StackInputs, host: H) -> Self {
-        Self::initialize(kernel, stack_inputs, host, true, ExecutionOptions::default())
+        Self::initialize(
+            kernel,
+            stack_inputs,
+            host,
+            true,
+            ExecutionOptions::default().with_tracing(),
+        )
     }
 
     fn initialize(
@@ -200,6 +211,7 @@ where
             chiplets: Chiplets::new(kernel),
             host: RefCell::new(host),
             max_cycles: execution_options.max_cycles(),
+            enable_tracing: execution_options.enable_tracing(),
         }
     }
 
@@ -387,7 +399,7 @@ where
         // can happen for decorators appearing after all operations in a block. these decorators
         // are executed after SPAN block is closed to make sure the VM clock cycle advances beyond
         // the last clock cycle of the SPAN block ops.
-        if let Some(decorator) = decorators.next() {
+        for decorator in decorators {
             self.execute_decorator(decorator)?;
         }
 
@@ -495,6 +507,14 @@ where
                     self.decoder.append_asmop(self.system.clk(), assembly_op.clone());
                 }
             }
+            Decorator::Event(id) => {
+                self.host.borrow_mut().on_event(self, *id)?;
+            }
+            Decorator::Trace(id) => {
+                if self.enable_tracing {
+                    self.host.borrow_mut().on_trace(self, *id)?;
+                }
+            }
         }
         Ok(())
     }
@@ -527,7 +547,10 @@ pub trait ProcessState {
     fn clk(&self) -> u32;
 
     /// Returns the current execution context ID.
-    fn ctx(&self) -> u32;
+    fn ctx(&self) -> ContextId;
+
+    /// Returns the current value of the free memory pointer.
+    fn fmp(&self) -> u64;
 
     /// Returns the value located at the specified position on the stack at the current clock cycle.
     fn get_stack_item(&self, pos: usize) -> Felt;
@@ -550,7 +573,14 @@ pub trait ProcessState {
 
     /// Returns a word located at the specified context/address, or None if the address hasn't
     /// been accessed previously.
-    fn get_mem_value(&self, ctx: u32, addr: u32) -> Option<Word>;
+    fn get_mem_value(&self, ctx: ContextId, addr: u32) -> Option<Word>;
+
+    /// Returns the entire memory state for the specified execution context at the current clock
+    /// cycle.
+    ///
+    /// The state is returned as a vector of (address, value) tuples, and includes addresses which
+    /// have been accessed at least once.
+    fn get_mem_state(&self, ctx: ContextId) -> Vec<(u64, Word)>;
 }
 
 impl<H: Host> ProcessState for Process<H> {
@@ -558,10 +588,14 @@ impl<H: Host> ProcessState for Process<H> {
         self.system.clk()
     }
 
-    fn ctx(&self) -> u32 {
+    fn ctx(&self) -> ContextId {
         self.system.ctx()
     }
 
+    fn fmp(&self) -> u64 {
+        self.system.fmp().as_int()
+    }
+
     fn get_stack_item(&self, pos: usize) -> Felt {
         self.stack.get(pos)
     }
@@ -574,9 +608,13 @@ impl<H: Host> ProcessState for Process<H> {
         self.stack.get_state_at(self.system.clk())
     }
 
-    fn get_mem_value(&self, ctx: u32, addr: u32) -> Option<Word> {
+    fn get_mem_value(&self, ctx: ContextId, addr: u32) -> Option<Word> {
         self.chiplets.get_mem_value(ctx, addr)
     }
+
+    fn get_mem_state(&self, ctx: ContextId) -> Vec<(u64, Word)> {
+        self.chiplets.get_mem_state_at(ctx, self.system.clk())
+    }
 }
 
 // INTERNALS
@@ -594,4 +632,5 @@ where
     pub chiplets: Chiplets,
     pub host: RefCell<H>,
     pub max_cycles: u32,
+    pub enable_tracing: bool,
 }
diff --git a/processor/src/operations/comb_ops.rs b/processor/src/operations/comb_ops.rs
new file mode 100644
index 0000000000..f64b95e3f8
--- /dev/null
+++ b/processor/src/operations/comb_ops.rs
@@ -0,0 +1,367 @@
+use vm_core::{Felt, Operation, ONE, ZERO};
+
+use crate::{ExecutionError, Host, Process, QuadFelt};
+
+// RANDOM LINEAR COMBINATION OPERATIONS
+// ================================================================================================
+
+impl<H> Process<H>
+where
+    H: Host,
+{
+    // COMBINE VALUES USING RANDOMNESS
+    // --------------------------------------------------------------------------------------------
+    /// Performs a single step in the computation of the random linear combination:
+    ///
+    /// \sum_{i=0}^k{\alpha_i \cdot \left(\frac{T_i(x) - T_i(z)}{x - z} +
+    ///            \frac{T_i(x) - T_i(g \cdot z)}{x - g \cdot z} \right)}
+    ///
+    /// The instruction computes the numerators $\alpha_i \cdot (T_i(x) - T_i(z))$ and
+    /// $\alpha_i \cdot (T_i(x) - T_i(g \cdot z))$ and stores the values in two accumulators $p$
+    /// and $r$, respectively. This instruction is specialized to main trace columns i.e.
+    /// the values $T_i(x)$ are base field elements.
+    ///
+    /// The instruction is used in the context of STARK proof verification in order to compute
+    /// the queries of the DEEP composition polynomial for FRI. It works in combination with
+    /// the `mem_stream` instruction where it is called 8 times in a row for each call to
+    /// `mem_stream`.
+    ///
+    /// The stack transition of the instruction can be visualized as follows:
+    ///
+    /// Input:
+    ///
+    /// +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+---+
+    /// |  T7  |  T6  |  T5  |  T4  |  T3  |  T2  |  T1  |  T0  |  p1  |  p0  |  r1  |  r0  |x_addr|z_addr|a_addr| - |
+    /// +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+---+
+    ///
+    ///
+    /// Output:                                                      
+    ///                                                    
+    /// +------+------+------+------+------+------+------+------+------+------+------+------+------+--------+--------+---+
+    /// |  T0  |  T7  |  T6  |  T5  |  T4  |  T3  |  T2  |  T1  |  p1' |  p0' |  r1' |  r0' |x_addr|z_addr+1|a_addr+1| - |
+    /// +------+------+------+------+------+------+------+------+------+------+------+------+------+--------+--------+---+
+    ///
+    ///
+    /// Here:
+    ///
+    /// 1. Ti for i in 0..=7 stands for the the value of the i-th trace polynomial for the current
+    ///  query i.e. T_i(x).
+    /// 2. (p0, p1) stands for an extension field element accumulating the values for the quotients
+    ///  with common denominator (x - z).
+    /// 3. (r0, r1) stands for an extension field element accumulating the values for the quotients
+    ///  with common denominator (x - gz).
+    /// 4. x_addr is the memory address from which we are loading the Ti's using the MSTREAM
+    ///  instruction.
+    /// 5. z_addr is the memory address to the i-th OOD evaluations at z and gz
+    ///  i.e. T_i(z):= (T_i(z)0, T_i(z)1) and T_i(gz):= (T_i(gz)0, T_i(gz)1).
+    /// 6. a_addr is the memory address of the i-th random element alpha_i used in batching
+    ///  the trace polynomial quotients.
+    ///
+    /// The instruction also makes use of the helper registers to hold the values of T_i(z), T_i(gz)
+    /// and alpha_i during the course of its execution.
+    pub(super) fn op_rcomb_base(&mut self) -> Result<(), ExecutionError> {
+        // --- read the T_i(x) value from the stack -----------------------------------------------
+        let [t7, t6, t5, t4, t3, t2, t1, t0] = self.get_trace_values();
+
+        // --- read the randomness from memory ----------------------------------------------------
+        let alpha = self.get_randomness();
+
+        // --- read the OOD values from memory ----------------------------------------------------
+        let [tz, tgz] = self.get_ood_values();
+
+        // --- read the accumulator values from stack ---------------------------------------------
+        let [p, r] = self.read_accumulators();
+
+        // --- compute the updated accumulator values ---------------------------------------------
+        let v0 = self.stack.get(7);
+        let tx = QuadFelt::new(v0, ZERO);
+        let [p_new, r_new] = [p + alpha * (tx - tz), r + alpha * (tx - tgz)];
+
+        // --- rotate the top 8 elements of the stack ---------------------------------------------
+        self.stack.set(0, t0);
+        self.stack.set(1, t7);
+        self.stack.set(2, t6);
+        self.stack.set(3, t5);
+        self.stack.set(4, t4);
+        self.stack.set(5, t3);
+        self.stack.set(6, t2);
+        self.stack.set(7, t1);
+
+        // --- update the accumulators ------------------------------------------------------------
+        self.stack.set(8, p_new.to_base_elements()[1]);
+        self.stack.set(9, p_new.to_base_elements()[0]);
+        self.stack.set(10, r_new.to_base_elements()[1]);
+        self.stack.set(11, r_new.to_base_elements()[0]);
+
+        // --- update the memory pointers ---------------------------------------------------------
+        self.stack.set(12, self.stack.get(12));
+        self.stack.set(13, self.stack.get(13) + ONE);
+        self.stack.set(14, self.stack.get(14) + ONE);
+
+        // --- copy the rest of the stack ---------------------------------------------------------
+        self.stack.copy_state(15);
+
+        // --- set the helper registers -----------------------------------------------------------
+        self.set_helper_reg(alpha, tz, tgz);
+
+        Ok(())
+    }
+
+    //// HELPER METHODS
+    //// ------------------------------------------------------------------------------------------
+
+    /// Returns the top 8 elements of the operand stack.
+    fn get_trace_values(&self) -> [Felt; 8] {
+        let v7 = self.stack.get(0);
+        let v6 = self.stack.get(1);
+        let v5 = self.stack.get(2);
+        let v4 = self.stack.get(3);
+        let v3 = self.stack.get(4);
+        let v2 = self.stack.get(5);
+        let v1 = self.stack.get(6);
+        let v0 = self.stack.get(7);
+
+        [v7, v6, v5, v4, v3, v2, v1, v0]
+    }
+
+    /// Returns randomness.
+    fn get_randomness(&mut self) -> QuadFelt {
+        let ctx = self.system.ctx();
+        let addr = self.stack.get(14);
+        let word = self.chiplets.read_mem(ctx, addr.as_int() as u32);
+        let a0 = word[0];
+        let a1 = word[1];
+        QuadFelt::new(a0, a1)
+    }
+
+    /// Returns the OOD values.
+    fn get_ood_values(&mut self) -> [QuadFelt; 2] {
+        let ctx = self.system.ctx();
+        let addr = self.stack.get(13);
+        let word = self.chiplets.read_mem(ctx, addr.as_int() as u32);
+
+        [QuadFelt::new(word[0], word[1]), QuadFelt::new(word[2], word[3])]
+    }
+
+    /// Reads the accumulator values.
+    fn read_accumulators(&self) -> [QuadFelt; 2] {
+        let p1 = self.stack.get(8);
+        let p0 = self.stack.get(9);
+        let p = QuadFelt::new(p0, p1);
+
+        let r1 = self.stack.get(10);
+        let r0 = self.stack.get(11);
+        let r = QuadFelt::new(r0, r1);
+
+        [p, r]
+    }
+
+    /// Populates helper registers with OOD values and randomness.
+    fn set_helper_reg(&mut self, alpha: QuadFelt, tz: QuadFelt, tgz: QuadFelt) {
+        let [a0, a1] = alpha.to_base_elements();
+        let [tz0, tz1] = tz.to_base_elements();
+        let [tgz0, tgz1] = tgz.to_base_elements();
+        let values = [tz0, tz1, tgz0, tgz1, a0, a1];
+        self.decoder.set_user_op_helpers(Operation::RCombBase, &values);
+    }
+}
+
+// TESTS
+// ================================================================================================
+
+#[cfg(test)]
+mod tests {
+    use crate::utils::collections::*;
+    use test_utils::{build_test, rand::rand_array};
+    use vm_core::{Felt, FieldElement, Operation, StackInputs, ONE, ZERO};
+
+    use crate::{ContextId, Process, QuadFelt};
+
+    #[test]
+    fn rcombine_main() {
+        // --- build stack inputs -----------------------------------------------------------------
+        let mut inputs = rand_array::<Felt, 16>();
+
+        // set x_addr to
+        inputs[12] = Felt::ZERO;
+
+        // set z_addr pointer to x_addr + offset, where offset is a large enough value.
+        let offset = Felt::new(1000);
+        inputs[13] = inputs[12] + offset;
+
+        // set a_addr to z_addr + offset
+        inputs[14] = inputs[13] + offset;
+        inputs[15] = ZERO;
+        inputs.reverse();
+
+        // --- setup the operand stack ------------------------------------------------------------
+        let stack_inputs = StackInputs::new(inputs.to_vec());
+        let mut process = Process::new_dummy_with_decoder_helpers(stack_inputs);
+
+        // --- setup memory -----------------------------------------------------------------------
+        let ctx = ContextId::root();
+        let tztgz = rand_array::<Felt, 4>();
+        process.chiplets.write_mem(
+            ctx,
+            inputs[2].as_int().try_into().expect("Shouldn't fail by construction"),
+            tztgz,
+        );
+
+        let a = rand_array::<Felt, 4>();
+        process.chiplets.write_mem(
+            ctx,
+            inputs[1].as_int().try_into().expect("Shouldn't fail by construction"),
+            a,
+        );
+
+        // --- execute RCOMB1 operation -----------------------------------------------------------
+        process.execute_op(Operation::RCombBase).unwrap();
+
+        // --- check that the top 8 stack elements are correctly rotated --------------------------
+        let stack_state = process.stack.trace_state();
+        inputs.reverse();
+        assert_eq!(stack_state[1], inputs[0]);
+        assert_eq!(stack_state[2], inputs[1]);
+        assert_eq!(stack_state[3], inputs[2]);
+        assert_eq!(stack_state[4], inputs[3]);
+        assert_eq!(stack_state[5], inputs[4]);
+        assert_eq!(stack_state[6], inputs[5]);
+        assert_eq!(stack_state[7], inputs[6]);
+        assert_eq!(stack_state[0], inputs[7]);
+
+        // --- check that the accumulator was updated correctly -----------------------------------
+        let p1 = inputs[8];
+        let p0 = inputs[9];
+        let p = QuadFelt::new(p0, p1);
+
+        let r1 = inputs[10];
+        let r0 = inputs[11];
+        let r = QuadFelt::new(r0, r1);
+
+        let tz0 = tztgz[0];
+        let tz1 = tztgz[1];
+        let tz = QuadFelt::new(tz0, tz1);
+        let tgz0 = tztgz[2];
+        let tgz1 = tztgz[3];
+        let tgz = QuadFelt::new(tgz0, tgz1);
+
+        let tx = QuadFelt::new(inputs[7], ZERO);
+
+        let a0 = a[0];
+        let a1 = a[1];
+        let alpha = QuadFelt::new(a0, a1);
+
+        let p_new = p + alpha * (tx - tz);
+        let r_new = r + alpha * (tx - tgz);
+
+        assert_eq!(p_new.to_base_elements()[1], stack_state[8]);
+        assert_eq!(p_new.to_base_elements()[0], stack_state[9]);
+        assert_eq!(r_new.to_base_elements()[1], stack_state[10]);
+        assert_eq!(r_new.to_base_elements()[0], stack_state[11]);
+
+        // --- check that memory pointers were updated --------------------------------------------
+        assert_eq!(inputs[12], stack_state[12]);
+        assert_eq!(inputs[13] + ONE, stack_state[13]);
+        assert_eq!(inputs[14] + ONE, stack_state[14]);
+
+        // --- check that the helper registers were updated correctly -----------------------------
+        let helper_reg_expected = [tz0, tz1, tgz0, tgz1, a0, a1];
+        assert_eq!(helper_reg_expected, process.decoder.get_user_op_helpers());
+    }
+
+    #[test]
+    fn prove_verify() {
+        let source = "  begin
+                            # I) Prepare memory and stack
+
+                            # 1) Load T_i(x) for i=0,..,7
+                            push.0 padw
+                            adv_pipe
+
+                            # 2) Load [T_i(z), T_i(gz)] for i=0,..,7
+                            repeat.4
+                                adv_pipe
+                            end
+
+                            # 3) Load [a0, a1, 0, 0] for i=0,..,7
+                            repeat.4
+                                adv_pipe
+                            end
+
+                            # 4) Clean up stack
+                            dropw dropw dropw drop
+
+                            # 5) Prepare stack
+
+                            ## a) Push pointers
+                            push.10     # a_ptr
+                            push.2      # z_ptr
+                            push.0      # x_ptr
+
+                            ## b) Push accumulators
+                            padw
+
+                            ## c) Add padding for mem_stream
+                            padw padw
+
+                            # II) Execute `rcomb_base` op
+                            mem_stream
+                            repeat.8
+                                rcomb_base
+                            end
+                        end
+                        ";
+
+        // generate the data
+        let tx: [Felt; 8] = rand_array();
+        let tz_tgz: [QuadFelt; 16] = rand_array();
+        let a: [QuadFelt; 8] = rand_array();
+
+        // compute the expected values of the accumulators
+        let mut p = QuadFelt::ZERO;
+        let mut r = QuadFelt::ZERO;
+        let tz: Vec<QuadFelt> = tz_tgz.iter().step_by(2).map(|e| e.to_owned()).collect();
+        let tgz: Vec<QuadFelt> = tz_tgz.iter().skip(1).step_by(2).map(|e| e.to_owned()).collect();
+        for i in 0..8 {
+            p += a[i] * (QuadFelt::from(tx[i]) - tz[i]);
+            r += a[i] * (QuadFelt::from(tx[i]) - tgz[i]);
+        }
+
+        // prepare the advice stack with the generated data
+        let mut adv_stack = Vec::new();
+        let tz_tgz: Vec<Felt> = tz_tgz.iter().flat_map(|e| e.to_base_elements()).collect();
+        let a: Vec<Felt> = a
+            .iter()
+            .flat_map(|e| {
+                let element = e.to_base_elements();
+                [element[0], element[1], ZERO, ZERO]
+            })
+            .collect();
+        adv_stack.extend_from_slice(&tx);
+        adv_stack.extend_from_slice(&tz_tgz);
+        adv_stack.extend_from_slice(&a);
+        let adv_stack: Vec<u64> = adv_stack.iter().map(|e| e.as_int()).collect();
+
+        // create the expected operand stack
+        let mut expected = Vec::new();
+        // updated pointers
+        expected.extend_from_slice(&[ZERO, Felt::from(18_u8), Felt::from(10_u8), Felt::from(2_u8)]);
+        // updated accumulators
+        expected.extend_from_slice(&[
+            r.to_base_elements()[0],
+            r.to_base_elements()[1],
+            p.to_base_elements()[0],
+            p.to_base_elements()[1],
+        ]);
+        // the top 8 stack elements should equal tx since 8 calls to `rcomb_base` implies 8 circular
+        // shifts of the top 8 elements i.e., the identity map on the top 8 element.
+        expected.extend_from_slice(&tx);
+        let expected: Vec<u64> = expected.iter().rev().map(|e| e.as_int()).collect();
+
+        let test = build_test!(source, &[], &adv_stack);
+        test.expect_stack(&expected);
+
+        let pub_inputs: Vec<u64> = Vec::new();
+        test.prove_and_verify(pub_inputs, false);
+    }
+}
diff --git a/processor/src/operations/crypto_ops.rs b/processor/src/operations/crypto_ops.rs
index 11f8adcec0..cec3477461 100644
--- a/processor/src/operations/crypto_ops.rs
+++ b/processor/src/operations/crypto_ops.rs
@@ -1,6 +1,6 @@
 use super::{ExecutionError, Host, Operation, Process};
 use crate::crypto::MerklePath;
-use vm_core::{AdviceInjector, StarkField};
+use vm_core::AdviceInjector;
 
 // CRYPTOGRAPHIC OPERATIONS
 // ================================================================================================
@@ -33,8 +33,8 @@ where
             self.stack.get(0),
         ];
 
-        let (_addr, output_state) = self.chiplets.permute(input_state);
-
+        let (addr, output_state) = self.chiplets.permute(input_state);
+        self.decoder.set_user_op_helpers(Operation::HPerm, &[addr]);
         for (i, &value) in output_state.iter().rev().enumerate() {
             self.stack.set(i, value);
         }
@@ -84,9 +84,15 @@ where
         // helper registers.
         self.decoder.set_user_op_helpers(Operation::MpVerify, &[addr]);
 
-        // Asserting the computed root of the Merkle path from the advice provider is consistent with
-        // the input root.
-        assert_eq!(root, computed_root, "inconsistent Merkle tree root");
+        if root != computed_root {
+            // If the hasher chiplet doesn't compute the same root (using the same path),
+            // then it means that `node` is not the value currently in the tree at `index`
+            return Err(ExecutionError::MerklePathVerificationFailed {
+                value: node,
+                index,
+                root: root.into(),
+            });
+        }
 
         // The same state is copied over to the next clock cycle with no changes.
         self.stack.copy_state(0);
@@ -175,7 +181,7 @@ where
 #[cfg(test)]
 mod tests {
     use super::{
-        super::{Felt, Operation, StarkField},
+        super::{Felt, Operation},
         Process,
     };
     use crate::{AdviceInputs, StackInputs, Word, ZERO};
@@ -183,7 +189,7 @@ mod tests {
     use vm_core::{
         chiplets::hasher::{apply_permutation, STATE_WIDTH},
         crypto::merkle::{MerkleStore, MerkleTree, NodeIndex},
-        utils::collections::Vec,
+        utils::collections::*,
     };
 
     #[test]
@@ -196,7 +202,7 @@ mod tests {
             1, 0, 0, 0, 0, 0 // padding: ONE followed by the necessary ZEROs
         ];
         let stack = StackInputs::try_from_values(inputs).unwrap();
-        let mut process = Process::new_dummy(stack);
+        let mut process = Process::new_dummy_with_decoder_helpers(stack);
 
         let expected: [Felt; STATE_WIDTH] = build_expected_perm(&inputs);
         process.execute_op(Operation::HPerm).unwrap();
@@ -207,7 +213,7 @@ mod tests {
         let mut inputs: Vec<u64> = vec![values.len() as u64, 0, 0, 0];
         inputs.extend_from_slice(&values);
         let stack = StackInputs::try_from_values(inputs.clone()).unwrap();
-        let mut process = Process::new_dummy(stack);
+        let mut process = Process::new_dummy_with_decoder_helpers(stack);
 
         // add the capacity to prepare the input vector
         let expected: [Felt; STATE_WIDTH] = build_expected_perm(&inputs);
@@ -221,7 +227,7 @@ mod tests {
         inputs.extend_from_slice(&values);
 
         let stack = StackInputs::try_from_values(inputs).unwrap();
-        let mut process = Process::new_dummy(stack);
+        let mut process = Process::new_dummy_with_decoder_helpers(stack);
         process.execute_op(Operation::HPerm).unwrap();
         assert_eq!(expected, &process.stack.trace_state()[12..16]);
     }
@@ -230,7 +236,7 @@ mod tests {
     fn op_mpverify() {
         let index = 5usize;
         let nodes = init_leaves(&[1, 2, 3, 4, 5, 6, 7, 8]);
-        let tree = MerkleTree::new(nodes.to_vec()).unwrap();
+        let tree = MerkleTree::new(&nodes).unwrap();
         let store = MerkleStore::from(&tree);
         let root = tree.root();
         let node = nodes[index];
diff --git a/processor/src/operations/field_ops.rs b/processor/src/operations/field_ops.rs
index ef5810fa59..878efb4b8c 100644
--- a/processor/src/operations/field_ops.rs
+++ b/processor/src/operations/field_ops.rs
@@ -1,5 +1,5 @@
 use super::{utils::assert_binary, ExecutionError, Felt, FieldElement, Host, Process};
-use vm_core::{Operation, StarkField, ONE, ZERO};
+use vm_core::{Operation, ONE, ZERO};
 
 // FIELD OPERATIONS
 // ================================================================================================
@@ -221,7 +221,7 @@ where
 #[cfg(test)]
 mod tests {
     use super::{
-        super::{Felt, FieldElement, Operation, StarkField, STACK_TOP_SIZE},
+        super::{Felt, FieldElement, Operation, STACK_TOP_SIZE},
         Process,
     };
     use crate::{AdviceInputs, StackInputs};
diff --git a/processor/src/operations/fri_ops.rs b/processor/src/operations/fri_ops.rs
index c0beeafda4..32d7f58e0c 100644
--- a/processor/src/operations/fri_ops.rs
+++ b/processor/src/operations/fri_ops.rs
@@ -244,7 +244,7 @@ mod tests {
         ExtensionOf, Felt, FieldElement, Operation, Process, QuadFelt, StarkField, TWO, TWO_INV,
     };
     use test_utils::rand::{rand_array, rand_value, rand_vector};
-    use vm_core::{utils::collections::Vec, StackInputs};
+    use vm_core::{utils::collections::*, StackInputs};
     use winter_prover::math::{fft, get_power_series_with_offset};
     use winter_utils::transpose_slice;
 
diff --git a/processor/src/operations/io_ops.rs b/processor/src/operations/io_ops.rs
index 1a01e2f23b..aa4f7e1ac5 100644
--- a/processor/src/operations/io_ops.rs
+++ b/processor/src/operations/io_ops.rs
@@ -1,6 +1,5 @@
 use super::{ExecutionError, Felt, Host, Operation, Process};
 use crate::Word;
-use vm_core::StarkField;
 
 // INPUT / OUTPUT OPERATIONS
 // ================================================================================================
@@ -275,7 +274,7 @@ mod tests {
         super::{super::AdviceProvider, Operation, STACK_TOP_SIZE},
         Felt, Host, Process,
     };
-    use crate::AdviceSource;
+    use crate::{AdviceSource, ContextId};
     use vm_core::{utils::ToElements, Word, ONE, ZERO};
 
     #[test]
@@ -332,10 +331,10 @@ mod tests {
 
         // check memory state
         assert_eq!(1, process.chiplets.get_mem_size());
-        assert_eq!(word, process.chiplets.get_mem_value(0, 1).unwrap());
+        assert_eq!(word, process.chiplets.get_mem_value(ContextId::root(), 1).unwrap());
 
         // --- calling MLOADW with address greater than u32::MAX leads to an error ----------------
-        process.execute_op(Operation::Push(Felt::from(u64::MAX / 2))).unwrap();
+        process.execute_op(Operation::Push(Felt::new(u64::MAX / 2))).unwrap();
         assert!(process.execute_op(Operation::MLoadW).is_err());
 
         // --- calling MLOADW with a stack of minimum depth is ok ----------------
@@ -361,10 +360,10 @@ mod tests {
 
         // check memory state
         assert_eq!(1, process.chiplets.get_mem_size());
-        assert_eq!(word, process.chiplets.get_mem_value(0, 2).unwrap());
+        assert_eq!(word, process.chiplets.get_mem_value(ContextId::root(), 2).unwrap());
 
         // --- calling MLOAD with address greater than u32::MAX leads to an error -----------------
-        process.execute_op(Operation::Push(Felt::from(u64::MAX / 2))).unwrap();
+        process.execute_op(Operation::Push(Felt::new(u64::MAX / 2))).unwrap();
         assert!(process.execute_op(Operation::MLoad).is_err());
 
         // --- calling MLOAD with a stack of minimum depth is ok ----------------
@@ -386,8 +385,8 @@ mod tests {
 
         // check memory state
         assert_eq!(2, process.chiplets.get_mem_size());
-        assert_eq!(word1_felts, process.chiplets.get_mem_value(0, 1).unwrap());
-        assert_eq!(word2_felts, process.chiplets.get_mem_value(0, 2).unwrap());
+        assert_eq!(word1_felts, process.chiplets.get_mem_value(ContextId::root(), 1).unwrap());
+        assert_eq!(word2_felts, process.chiplets.get_mem_value(ContextId::root(), 2).unwrap());
 
         // clear the stack
         for _ in 0..8 {
@@ -433,7 +432,7 @@ mod tests {
 
         // check memory state
         assert_eq!(1, process.chiplets.get_mem_size());
-        assert_eq!(word1, process.chiplets.get_mem_value(0, 0).unwrap());
+        assert_eq!(word1, process.chiplets.get_mem_value(ContextId::root(), 0).unwrap());
 
         // push the second word onto the stack and save it at address 3
         let word2 = [2, 4, 6, 8].to_elements().try_into().unwrap();
@@ -445,11 +444,11 @@ mod tests {
 
         // check memory state
         assert_eq!(2, process.chiplets.get_mem_size());
-        assert_eq!(word1, process.chiplets.get_mem_value(0, 0).unwrap());
-        assert_eq!(word2, process.chiplets.get_mem_value(0, 3).unwrap());
+        assert_eq!(word1, process.chiplets.get_mem_value(ContextId::root(), 0).unwrap());
+        assert_eq!(word2, process.chiplets.get_mem_value(ContextId::root(), 3).unwrap());
 
         // --- calling MSTOREW with address greater than u32::MAX leads to an error ----------------
-        process.execute_op(Operation::Push(Felt::from(u64::MAX / 2))).unwrap();
+        process.execute_op(Operation::Push(Felt::new(u64::MAX / 2))).unwrap();
         assert!(process.execute_op(Operation::MStoreW).is_err());
 
         // --- calling STOREW with a stack of minimum depth is ok ----------------
@@ -474,7 +473,7 @@ mod tests {
         // check memory state
         let mem_0 = [element, ZERO, ZERO, ZERO];
         assert_eq!(1, process.chiplets.get_mem_size());
-        assert_eq!(mem_0, process.chiplets.get_mem_value(0, 0).unwrap());
+        assert_eq!(mem_0, process.chiplets.get_mem_value(ContextId::root(), 0).unwrap());
 
         // push the word onto the stack and save it at address 2
         let word_2 = [1, 3, 5, 7].to_elements().try_into().unwrap();
@@ -491,10 +490,10 @@ mod tests {
         // check memory state to make sure the other 3 elements were not affected
         let mem_2 = [element, Felt::new(3), Felt::new(5), Felt::new(7)];
         assert_eq!(2, process.chiplets.get_mem_size());
-        assert_eq!(mem_2, process.chiplets.get_mem_value(0, 2).unwrap());
+        assert_eq!(mem_2, process.chiplets.get_mem_value(ContextId::root(), 2).unwrap());
 
         // --- calling MSTORE with address greater than u32::MAX leads to an error ----------------
-        process.execute_op(Operation::Push(Felt::from(u64::MAX / 2))).unwrap();
+        process.execute_op(Operation::Push(Felt::new(u64::MAX / 2))).unwrap();
         assert!(process.execute_op(Operation::MStore).is_err());
 
         // --- calling MSTORE with a stack of minimum depth is ok ----------------
@@ -538,8 +537,8 @@ mod tests {
 
         // check memory state contains the words from the advice stack
         assert_eq!(2, process.chiplets.get_mem_size());
-        assert_eq!(word1_felts, process.chiplets.get_mem_value(0, 1).unwrap());
-        assert_eq!(word2_felts, process.chiplets.get_mem_value(0, 2).unwrap());
+        assert_eq!(word1_felts, process.chiplets.get_mem_value(ContextId::root(), 1).unwrap());
+        assert_eq!(word2_felts, process.chiplets.get_mem_value(ContextId::root(), 2).unwrap());
 
         // the first 8 values should be the values from the advice stack. the next 4 values should
         // remain unchanged, and the address should be incremented by 2 (i.e., 1 -> 3).
diff --git a/processor/src/operations/mod.rs b/processor/src/operations/mod.rs
index e18179c288..db0c45e973 100644
--- a/processor/src/operations/mod.rs
+++ b/processor/src/operations/mod.rs
@@ -1,6 +1,7 @@
-use super::{ExecutionError, Felt, FieldElement, Host, Operation, Process, StarkField};
+use super::{ExecutionError, Felt, FieldElement, Host, Operation, Process};
 use vm_core::stack::STACK_TOP_SIZE;
 
+mod comb_ops;
 mod crypto_ops;
 mod ext2_ops;
 mod field_ops;
@@ -148,6 +149,7 @@ where
             Operation::MpVerify => self.op_mpverify()?,
             Operation::MrUpdate => self.op_mrupdate()?,
             Operation::FriE2F4 => self.op_fri_ext2fold4()?,
+            Operation::RCombBase => self.op_rcomb_base()?,
         }
 
         self.advance_clock()?;
diff --git a/processor/src/operations/stack_ops.rs b/processor/src/operations/stack_ops.rs
index 14267c2cf5..6a5d28fd77 100644
--- a/processor/src/operations/stack_ops.rs
+++ b/processor/src/operations/stack_ops.rs
@@ -1,4 +1,4 @@
-use super::{ExecutionError, Host, Process, StarkField, STACK_TOP_SIZE};
+use super::{ExecutionError, Host, Process, STACK_TOP_SIZE};
 use crate::ZERO;
 
 impl<H> Process<H>
diff --git a/processor/src/operations/sys_ops.rs b/processor/src/operations/sys_ops.rs
index ba62fdb4c8..90ecb8f962 100644
--- a/processor/src/operations/sys_ops.rs
+++ b/processor/src/operations/sys_ops.rs
@@ -3,7 +3,7 @@ use super::{
         system::{FMP_MAX, FMP_MIN},
         ONE,
     },
-    ExecutionError, Felt, Host, Process, StarkField,
+    ExecutionError, Felt, Host, Process,
 };
 
 // SYSTEM OPERATIONS
@@ -17,9 +17,9 @@ where
     ///
     /// # Errors
     /// Returns an error if the popped value is not ONE.
-    pub(super) fn op_assert(&mut self, err_code: Felt) -> Result<(), ExecutionError> {
+    pub(super) fn op_assert(&mut self, err_code: u32) -> Result<(), ExecutionError> {
         if self.stack.get(0) != ONE {
-            return Err(ExecutionError::FailedAssertion(self.system.clk(), err_code));
+            return Err(self.host.borrow_mut().on_assert_failed(self, err_code));
         }
         self.stack.shift_left(1);
         Ok(())
@@ -128,7 +128,7 @@ mod tests {
         process.execute_op(Operation::Swap).unwrap();
         process.execute_op(Operation::Drop).unwrap();
 
-        assert!(process.execute_op(Operation::Assert(ZERO)).is_ok());
+        assert!(process.execute_op(Operation::Assert(0)).is_ok());
     }
 
     #[test]
diff --git a/processor/src/operations/u32_ops.rs b/processor/src/operations/u32_ops.rs
index 6a9524d4f2..458a4346b2 100644
--- a/processor/src/operations/u32_ops.rs
+++ b/processor/src/operations/u32_ops.rs
@@ -1,6 +1,6 @@
 use super::{
     super::utils::{split_element, split_u32_into_u16},
-    ExecutionError, Felt, FieldElement, Host, Operation, Process, StarkField,
+    ExecutionError, Felt, FieldElement, Host, Operation, Process,
 };
 use crate::ZERO;
 
diff --git a/processor/src/range/aux_trace.rs b/processor/src/range/aux_trace.rs
index a0d9c258bd..cd93359f45 100644
--- a/processor/src/range/aux_trace.rs
+++ b/processor/src/range/aux_trace.rs
@@ -1,6 +1,7 @@
-use super::{uninit_vector, BTreeMap, ColMatrix, Felt, FieldElement, Vec, NUM_RAND_ROWS};
+use super::{uninit_vector, Felt, FieldElement, NUM_RAND_ROWS};
+use crate::utils::collections::*;
+use miden_air::trace::main_trace::MainTrace;
 use miden_air::trace::range::{M_COL_IDX, V_COL_IDX};
-use vm_core::StarkField;
 
 // AUXILIARY TRACE BUILDER
 // ================================================================================================
@@ -36,24 +37,24 @@ impl AuxTraceBuilder {
     // AUX COLUMN BUILDERS
     // --------------------------------------------------------------------------------------------
 
-    /// Builds and returns range checker auxiliary trace columns. Currently this consists of two
-    /// columns:
+    /// Builds and returns range checker auxiliary trace columns. Currently this consists of one
+    /// column:
     /// - `b_range`: ensures that the range checks performed by the Range Checker match those
     ///   requested by the Stack and Memory processors.
     pub fn build_aux_columns<E: FieldElement<BaseField = Felt>>(
         &self,
-        main_trace: &ColMatrix<Felt>,
+        main_trace: &MainTrace,
         rand_elements: &[E],
     ) -> Vec<Vec<E>> {
         let b_range = self.build_aux_col_b_range(main_trace, rand_elements);
         vec![b_range]
     }
 
-    /// Builds the execution trace of the range check `b_range` and `q` columns which ensure that the
-    /// range check lookups performed by user operations match those executed by the Range Checker.
+    /// Builds the execution trace of the range check `b_range` column which ensure that the range
+    /// check lookups performed by user operations match those executed by the Range Checker.
     fn build_aux_col_b_range<E: FieldElement<BaseField = Felt>>(
         &self,
-        main_trace: &ColMatrix<Felt>,
+        main_trace: &MainTrace,
         rand_elements: &[E],
     ) -> Vec<E> {
         // run batch inversion on the lookup values
@@ -138,7 +139,7 @@ impl AuxTraceBuilder {
     }
 }
 
-/// Runs batch inversion on all range check lookup values and returns a map which maps of each value
+/// Runs batch inversion on all range check lookup values and returns a map which maps each value
 /// to the divisor used for including it in the LogUp lookup. In other words, the map contains
 /// mappings of x to 1/(alpha - x).
 fn get_divisors<E: FieldElement<BaseField = Felt>>(
diff --git a/processor/src/range/mod.rs b/processor/src/range/mod.rs
index b22b2169a8..51f4a220cb 100644
--- a/processor/src/range/mod.rs
+++ b/processor/src/range/mod.rs
@@ -1,7 +1,5 @@
-use super::{
-    trace::NUM_RAND_ROWS, utils::uninit_vector, BTreeMap, ColMatrix, Felt, FieldElement,
-    RangeCheckTrace, Vec, ZERO,
-};
+use super::{trace::NUM_RAND_ROWS, Felt, FieldElement, RangeCheckTrace, ZERO};
+use crate::utils::{collections::*, uninit_vector};
 
 mod aux_trace;
 pub use aux_trace::AuxTraceBuilder;
@@ -99,7 +97,7 @@ impl RangeChecker {
     /// Converts this [RangeChecker] into an execution trace with 2 columns and the number of rows
     /// specified by the `target_len` parameter.
     ///
-    /// If the number of rows need to represent execution trace of this range checker is smaller
+    /// If the number of rows needed to represent execution trace of this range checker is smaller
     /// than `target_len` parameter, the trace is padded with extra rows.
     ///
     /// `num_rand_rows` indicates the number of rows at the end of the trace which will be
diff --git a/processor/src/range/request.rs b/processor/src/range/request.rs
index 28cf9c0704..c71256eb2e 100644
--- a/processor/src/range/request.rs
+++ b/processor/src/range/request.rs
@@ -54,7 +54,7 @@ impl CycleRangeChecks {
     /// element in the field specified by E.
     pub fn to_stack_value<E: FieldElement<BaseField = Felt>>(
         &self,
-        main_trace: &ColMatrix<Felt>,
+        main_trace: &MainTrace,
         alphas: &[E],
     ) -> E {
         let mut value = E::ONE;
@@ -70,7 +70,7 @@ impl CycleRangeChecks {
     /// element in the field specified by E.
     fn to_mem_value<E: FieldElement<BaseField = Felt>>(
         &self,
-        main_trace: &ColMatrix<Felt>,
+        main_trace: &MainTrace,
         alphas: &[E],
     ) -> E {
         let mut value = E::ONE;
@@ -88,7 +88,7 @@ impl LookupTableRow for CycleRangeChecks {
     /// at least 1 alpha value. Includes all values included at this cycle from all processors.
     fn to_value<E: FieldElement<BaseField = Felt>>(
         &self,
-        main_trace: &ColMatrix<Felt>,
+        main_trace: &MainTrace,
         alphas: &[E],
     ) -> E {
         let stack_value = self.to_stack_value(main_trace, alphas);
@@ -115,7 +115,7 @@ impl LookupTableRow for RangeCheckRequest {
     /// at least 1 alpha value.
     fn to_value<E: FieldElement<BaseField = Felt>>(
         &self,
-        _main_trace: &ColMatrix<Felt>,
+        _main_trace: &MainTrace,
         alphas: &[E],
     ) -> E {
         let alpha: E = alphas[0];
diff --git a/processor/src/range/tests.rs b/processor/src/range/tests.rs
index 109734f6f9..650ff58c03 100644
--- a/processor/src/range/tests.rs
+++ b/processor/src/range/tests.rs
@@ -1,7 +1,7 @@
-use super::{BTreeMap, Felt, RangeChecker, Vec, ZERO};
+use super::{Felt, RangeChecker, ZERO};
 use crate::{utils::get_trace_len, RangeCheckTrace};
 use test_utils::rand::rand_array;
-use vm_core::{utils::ToElements, StarkField};
+use vm_core::utils::{collections::*, ToElements};
 
 #[test]
 fn range_checks() {
@@ -67,8 +67,8 @@ fn range_checks_rand() {
 // ================================================================================================
 
 fn validate_row(trace: &[Vec<Felt>], row_idx: &mut usize, value: u64, num_lookups: u64) {
-    assert_eq!(trace[0][*row_idx], Felt::from(num_lookups));
-    assert_eq!(trace[1][*row_idx], Felt::from(value));
+    assert_eq!(trace[0][*row_idx], Felt::try_from(num_lookups).unwrap());
+    assert_eq!(trace[1][*row_idx], Felt::try_from(value).unwrap());
     *row_idx += 1;
 }
 
diff --git a/processor/src/stack/aux_trace.rs b/processor/src/stack/aux_trace.rs
index d040a63b83..325a642227 100644
--- a/processor/src/stack/aux_trace.rs
+++ b/processor/src/stack/aux_trace.rs
@@ -1,7 +1,6 @@
-use super::{
-    super::trace::AuxColumnBuilder, ColMatrix, Felt, FieldElement, OverflowTableRow,
-    OverflowTableUpdate, Vec,
-};
+use super::{Felt, FieldElement, OverflowTableRow};
+use crate::{trace::AuxColumnBuilder, utils::collections::*};
+use miden_air::trace::main_trace::MainTrace;
 
 // AUXILIARY TRACE BUILDER
 // ================================================================================================
@@ -9,16 +8,10 @@ use super::{
 /// Describes how to construct execution traces of stack-related auxiliary trace segment columns
 /// (used in multiset checks).
 pub struct AuxTraceBuilder {
-    /// A list of updates made to the overflow table during program execution. For each update we
-    /// also track the cycle at which the update happened.
-    pub(super) overflow_hints: Vec<(u64, OverflowTableUpdate)>,
     /// A list of all rows that were added to and then removed from the overflow table.
     pub(super) overflow_table_rows: Vec<OverflowTableRow>,
     /// The number of rows in the overflow table when execution begins.
     pub(super) num_init_rows: usize,
-    /// A list of indices into the `all_rows` vector which describes the rows remaining in the
-    /// overflow table at the end of execution.
-    pub(super) final_rows: Vec<usize>,
 }
 
 impl AuxTraceBuilder {
@@ -26,7 +19,7 @@ impl AuxTraceBuilder {
     /// column p1 describing states of the stack overflow table.
     pub fn build_aux_columns<E: FieldElement<BaseField = Felt>>(
         &self,
-        main_trace: &ColMatrix<Felt>,
+        main_trace: &MainTrace,
         rand_elements: &[E],
     ) -> Vec<Vec<E>> {
         let p1 = self.build_aux_column(main_trace, rand_elements);
@@ -34,71 +27,47 @@ impl AuxTraceBuilder {
     }
 }
 
-// OVERFLOW TABLE
-// ================================================================================================
-
-impl AuxColumnBuilder<OverflowTableUpdate, OverflowTableRow, u64> for AuxTraceBuilder {
-    /// Returns a list of rows which were added to and then removed from the stack overflow table.
-    ///
-    /// The order of the rows in the list is the same as the order in which the rows were added to
-    /// the table.
-    fn get_table_rows(&self) -> &[OverflowTableRow] {
-        &self.overflow_table_rows
+impl<E: FieldElement<BaseField = Felt>> AuxColumnBuilder<E> for AuxTraceBuilder {
+    /// Initializes the overflow stack auxiliary column.
+    fn init_responses(&self, _main_trace: &MainTrace, alphas: &[E]) -> E {
+        let mut initial_column_value = E::ONE;
+        for row in self.overflow_table_rows.iter().take(self.num_init_rows) {
+            let value = (*row).to_value(alphas);
+            initial_column_value *= value;
+        }
+        initial_column_value
     }
 
-    /// Returns hints which describe how the stack overflow table was updated during program
-    /// execution. Each update hint is accompanied by a clock cycle at which the update happened.
-    ///
-    /// Internally, each update hint also contains an index of the row into the full list of rows
-    /// which was either added or removed.
-    fn get_table_hints(&self) -> &[(u64, OverflowTableUpdate)] {
-        &self.overflow_hints[self.num_init_rows..]
-    }
+    /// Removes a row from the stack overflow table.
+    fn get_requests_at(&self, main_trace: &MainTrace, alphas: &[E], i: usize) -> E {
+        let is_left_shift = main_trace.is_left_shift(i);
+        let is_non_empty_overflow = main_trace.is_non_empty_overflow(i);
 
-    /// Returns the value by which the running product column should be multiplied for the provided
-    /// hint value.
-    fn get_multiplicand<E: FieldElement<BaseField = Felt>>(
-        &self,
-        hint: OverflowTableUpdate,
-        row_values: &[E],
-        inv_row_values: &[E],
-    ) -> E {
-        match hint {
-            OverflowTableUpdate::RowInserted(inserted_row_idx) => {
-                row_values[inserted_row_idx as usize]
-            }
-            OverflowTableUpdate::RowRemoved(removed_row_idx) => {
-                inv_row_values[removed_row_idx as usize]
-            }
+        if is_left_shift && is_non_empty_overflow {
+            let b1 = main_trace.parent_overflow_address(i);
+            let s15_prime = main_trace.stack_element(15, i + 1);
+            let b1_prime = main_trace.parent_overflow_address(i + 1);
+
+            let row = OverflowTableRow::new(b1, s15_prime, b1_prime);
+            row.to_value(alphas)
+        } else {
+            E::ONE
         }
     }
 
-    /// Returns the initial value in the auxiliary column.
-    fn init_column_value<E: FieldElement<BaseField = Felt>>(&self, row_values: &[E]) -> E {
-        let mut init_column_value = E::ONE;
-        // iterate through the elements in the initial table
-        for (_, hint) in &self.overflow_hints[..self.num_init_rows] {
-            // no rows should have been removed from the table before execution begins.
-            if let OverflowTableUpdate::RowInserted(row) = hint {
-                init_column_value *= row_values[*row as usize];
-            } else {
-                debug_assert!(
-                    false,
-                    "overflow table row incorrectly removed before execution started"
-                )
-            }
-        }
+    /// Adds a row to the stack overflow table.
+    fn get_responses_at(&self, main_trace: &MainTrace, alphas: &[E], i: usize) -> E {
+        let is_right_shift = main_trace.is_right_shift(i);
 
-        init_column_value
-    }
+        if is_right_shift {
+            let k0 = main_trace.clk(i);
+            let s15 = main_trace.stack_element(15, i);
+            let b1 = main_trace.parent_overflow_address(i);
 
-    /// Returns the final value in the auxiliary column.
-    fn final_column_value<E: FieldElement<BaseField = Felt>>(&self, row_values: &[E]) -> E {
-        let mut final_column_value = E::ONE;
-        for &row in &self.final_rows {
-            final_column_value *= row_values[row];
+            let row = OverflowTableRow::new(k0, s15, b1);
+            row.to_value(alphas)
+        } else {
+            E::ONE
         }
-
-        final_column_value
     }
 }
diff --git a/processor/src/stack/mod.rs b/processor/src/stack/mod.rs
index bbfee3de17..8027c21b3f 100644
--- a/processor/src/stack/mod.rs
+++ b/processor/src/stack/mod.rs
@@ -1,7 +1,5 @@
-use super::{
-    BTreeMap, ColMatrix, Felt, FieldElement, StackInputs, StackOutputs, Vec, ONE,
-    STACK_TRACE_WIDTH, ZERO,
-};
+use super::{Felt, FieldElement, StackInputs, StackOutputs, ONE, STACK_TRACE_WIDTH, ZERO};
+use crate::utils::collections::*;
 use core::cmp;
 use vm_core::{stack::STACK_TOP_SIZE, Word, WORD_SIZE};
 
@@ -10,7 +8,7 @@ use trace::StackTrace;
 
 mod overflow;
 use overflow::OverflowTable;
-pub use overflow::{OverflowTableRow, OverflowTableUpdate};
+pub use overflow::OverflowTableRow;
 
 mod aux_trace;
 pub use aux_trace::AuxTraceBuilder;
@@ -190,7 +188,9 @@ impl Stack {
         self.trace.copy_stack_state_at(
             self.clk,
             start_pos,
-            Felt::from(self.active_depth as u64),
+            // TODO: change type of `active_depth` to `u32`
+            Felt::try_from(self.active_depth as u64)
+                .expect("value is greater than or equal to the field modulus"),
             self.overflow.last_row_addr(),
         );
     }
@@ -240,7 +240,7 @@ impl Stack {
 
         // Update the overflow table.
         let to_overflow = self.trace.get_stack_value_at(self.clk, MAX_TOP_IDX);
-        self.overflow.push(to_overflow, self.clk as u64);
+        self.overflow.push(to_overflow, Felt::from(self.clk));
 
         // Stack depth always increases on right shift.
         self.active_depth += 1;
diff --git a/processor/src/stack/overflow.rs b/processor/src/stack/overflow.rs
index e4d1cc4872..96be4bf725 100644
--- a/processor/src/stack/overflow.rs
+++ b/processor/src/stack/overflow.rs
@@ -1,7 +1,5 @@
-use super::{
-    super::trace::LookupTableRow, AuxTraceBuilder, BTreeMap, ColMatrix, Felt, FieldElement, Vec,
-    ZERO,
-};
+use super::{AuxTraceBuilder, Felt, FieldElement, ZERO};
+use crate::utils::collections::*;
 use vm_core::{utils::uninit_vector, StarkField};
 
 // OVERFLOW TABLE
@@ -19,9 +17,6 @@ pub struct OverflowTable {
     /// A list of indices into the `all_rows` vector which describes the rows currently in the
     /// overflow table.
     active_rows: Vec<usize>,
-    /// A list of updates made to the overflow table during program execution. For each update we
-    /// also track the cycle at which the update happened.
-    update_trace: Vec<(u64, OverflowTableUpdate)>,
     /// A map which records the full state of the overflow table at every cycle during which an
     /// update happened. This map is populated only when `trace_enabled` = true.
     trace: BTreeMap<u64, Vec<Felt>>,
@@ -45,7 +40,6 @@ impl OverflowTable {
         Self {
             all_rows: Vec::new(),
             active_rows: Vec::new(),
-            update_trace: Vec::new(),
             trace: BTreeMap::new(),
             trace_enabled: enable_trace,
             num_init_rows: 0,
@@ -65,7 +59,7 @@ impl OverflowTable {
 
         let mut clk = Felt::MODULUS - init_values.len() as u64;
         for &val in init_values.iter().rev() {
-            overflow_table.push(val, clk);
+            overflow_table.push(val, Felt::new(clk));
             clk += 1;
         }
 
@@ -78,12 +72,12 @@ impl OverflowTable {
     /// Pushes the specified value into the overflow table.
     ///
     /// Parameter clk specifies the clock cycle at which the value is added to the table.
-    pub fn push(&mut self, value: Felt, clk: u64) {
+    pub fn push(&mut self, value: Felt, clk: Felt) {
         // ZERO address indicates that the overflow table is empty, and thus, no actual value
         // should be inserted into the table with this address. This is not a problem since for
         // every real program, we first execute an operation marking the start of a code block,
         // and thus, no operation can shift the stack to the right at clk = 0.
-        debug_assert_ne!(clk, 0, "cannot add value to overflow at clk=0");
+        debug_assert_ne!(clk, ZERO, "cannot add value to overflow at clk=0");
 
         // create and record the new row, and also put it at the top of the overflow table
         let row_idx = self.all_rows.len() as u32;
@@ -92,14 +86,11 @@ impl OverflowTable {
         self.active_rows.push(row_idx as usize);
 
         // set the last row address to the address of the newly added row
-        self.last_row_addr = Felt::from(clk);
-
-        // mark this clock cycle as the cycle at which a new row was inserted into the table
-        self.update_trace.push((clk, OverflowTableUpdate::RowInserted(row_idx)));
+        self.last_row_addr = clk;
 
         if self.trace_enabled {
             // insert a copy of the current table state into the trace
-            self.save_current_state(clk);
+            self.save_current_state(clk.as_int());
         }
     }
 
@@ -122,10 +113,6 @@ impl OverflowTable {
         let removed_value = last_row.val;
         self.last_row_addr = last_row.prev;
 
-        // mark this clock cycle as the clock cycle at which a row was removed from the table
-        self.update_trace
-            .push((clk, OverflowTableUpdate::RowRemoved(last_row_idx as u32)));
-
         if self.trace_enabled {
             // insert a copy of the current table state into the trace
             self.save_current_state(clk);
@@ -211,9 +198,7 @@ impl OverflowTable {
     pub fn into_aux_builder(self) -> AuxTraceBuilder {
         AuxTraceBuilder {
             num_init_rows: self.num_init_rows,
-            overflow_hints: self.update_trace,
             overflow_table_rows: self.all_rows,
-            final_rows: self.active_rows,
         }
     }
 
@@ -257,40 +242,18 @@ pub struct OverflowTableRow {
 }
 
 impl OverflowTableRow {
-    pub fn new(clk: u64, val: Felt, prev: Felt) -> Self {
-        Self {
-            val,
-            clk: Felt::from(clk),
-            prev,
-        }
+    pub fn new(clk: Felt, val: Felt, prev: Felt) -> Self {
+        Self { val, clk, prev }
     }
 }
 
-impl LookupTableRow for OverflowTableRow {
+impl OverflowTableRow {
     /// Reduces this row to a single field element in the field specified by E. This requires
     /// at least 4 alpha values.
-    fn to_value<E: FieldElement<BaseField = Felt>>(
-        &self,
-        _main_trace: &ColMatrix<Felt>,
-        alphas: &[E],
-    ) -> E {
+    pub fn to_value<E: FieldElement<BaseField = Felt>>(&self, alphas: &[E]) -> E {
         alphas[0]
             + alphas[1].mul_base(self.clk)
             + alphas[2].mul_base(self.val)
             + alphas[3].mul_base(self.prev)
     }
 }
-
-// OVERFLOW TABLE UPDATES
-// ================================================================================================
-
-/// Describes an update to the stack overflow table. There could be two types of updates:
-/// - A single row can be added to the table. This happens during a right shift.
-/// - A single row can be removed from the table. This happens during a left shift.
-///
-/// For each update we also record the index of the row that was added/removed from the table.
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub enum OverflowTableUpdate {
-    RowInserted(u32),
-    RowRemoved(u32),
-}
diff --git a/processor/src/stack/tests.rs b/processor/src/stack/tests.rs
index 4319ca4326..f12302fd5d 100644
--- a/processor/src/stack/tests.rs
+++ b/processor/src/stack/tests.rs
@@ -1,12 +1,11 @@
 use super::{
-    super::StackTopState, Felt, OverflowTableRow, Stack, StackInputs, Vec, ONE, STACK_TOP_SIZE,
-    ZERO,
+    super::StackTopState, Felt, OverflowTableRow, Stack, StackInputs, ONE, STACK_TOP_SIZE, ZERO,
 };
 use miden_air::trace::{
     stack::{B0_COL_IDX, B1_COL_IDX, H0_COL_IDX, NUM_STACK_HELPER_COLS},
     STACK_TRACE_WIDTH,
 };
-use vm_core::{FieldElement, StarkField};
+use vm_core::{utils::collections::*, FieldElement, StarkField};
 
 // TYPE ALIASES
 // ================================================================================================
@@ -53,9 +52,9 @@ fn initialize_overflow() {
     ];
     let init_addr = Felt::MODULUS - 3;
     let expected_overflow_rows = vec![
-        OverflowTableRow::new(init_addr, ONE, ZERO),
-        OverflowTableRow::new(init_addr + 1, Felt::new(2), Felt::new(init_addr)),
-        OverflowTableRow::new(init_addr + 2, Felt::new(3), Felt::new(init_addr + 1)),
+        OverflowTableRow::new(Felt::new(init_addr), ONE, ZERO),
+        OverflowTableRow::new(Felt::new(init_addr + 1), Felt::new(2), Felt::new(init_addr)),
+        OverflowTableRow::new(Felt::new(init_addr + 2), Felt::new(3), Felt::new(init_addr + 1)),
     ];
     let expected_overflow_active_rows = vec![0, 1, 2];
 
diff --git a/processor/src/stack/trace.rs b/processor/src/stack/trace.rs
index f90faccfe3..a4fbee690b 100644
--- a/processor/src/stack/trace.rs
+++ b/processor/src/stack/trace.rs
@@ -1,8 +1,8 @@
 use super::{
-    super::utils::get_trace_len, Felt, FieldElement, Vec, MAX_TOP_IDX, ONE, STACK_TRACE_WIDTH, ZERO,
+    super::utils::get_trace_len, Felt, FieldElement, MAX_TOP_IDX, ONE, STACK_TRACE_WIDTH, ZERO,
 };
+use crate::utils::{collections::*, math::batch_inversion};
 use miden_air::trace::stack::{H0_COL_IDX, NUM_STACK_HELPER_COLS, STACK_TOP_SIZE};
-use vm_core::utils::math::batch_inversion;
 
 // STACK TRACE
 // ================================================================================================
@@ -256,7 +256,9 @@ fn init_helper_columns(
 
     // if the overflow table is not empty, set h0 to (init_depth - 16)
     let mut h0 = Felt::zeroed_vector(init_trace_capacity);
-    h0[0] = Felt::from((init_depth - STACK_TOP_SIZE) as u64);
+    // TODO: change type of `init_depth` to `u32`
+    h0[0] = Felt::try_from((init_depth - STACK_TOP_SIZE) as u64)
+        .expect("value is greater than or equal to the field modulus");
 
     [b0, b1, h0]
 }
diff --git a/processor/src/system/mod.rs b/processor/src/system/mod.rs
index b6a568f09c..7c4cceb05c 100644
--- a/processor/src/system/mod.rs
+++ b/processor/src/system/mod.rs
@@ -1,6 +1,6 @@
-use super::{
-    ExecutionError, Felt, FieldElement, StarkField, SysTrace, Vec, Word, EMPTY_WORD, ONE, ZERO,
-};
+use super::{ExecutionError, Felt, FieldElement, SysTrace, Word, EMPTY_WORD, ONE, ZERO};
+use crate::utils::collections::*;
+use core::fmt::{self, Display};
 
 #[cfg(test)]
 mod tests;
@@ -20,7 +20,7 @@ mod tests;
 /// Memory addresses for procedure locals start at 2^30.
 pub const FMP_MIN: u64 = 2_u64.pow(30);
 /// Memory address for procedure locals within a SYSCALL starts at 2^31.
-pub const SYSCALL_FMP_MIN: u64 = 2_u64.pow(31);
+pub const SYSCALL_FMP_MIN: u32 = 2_u32.pow(31);
 /// Value of FMP register should not exceed 3 * 2^30 - 1.
 pub const FMP_MAX: u64 = 3 * 2_u64.pow(30) - 1;
 
@@ -39,7 +39,7 @@ pub const FMP_MAX: u64 = 3 * 2_u64.pow(30) - 1;
 ///   initiated from the root context, this will be set to ZEROs.
 pub struct System {
     clk: u32,
-    ctx: u32,
+    ctx: ContextId,
     fmp: Felt,
     in_syscall: bool,
     fn_hash: Word,
@@ -58,13 +58,13 @@ impl System {
     /// Initializes the free memory pointer `fmp` used for local memory offsets to 2^30.
     pub fn new(init_trace_capacity: usize) -> Self {
         // set the first value of the fmp trace to 2^30.
-        let fmp = Felt::from(FMP_MIN);
+        let fmp = Felt::new(FMP_MIN);
         let mut fmp_trace = Felt::zeroed_vector(init_trace_capacity);
         fmp_trace[0] = fmp;
 
         Self {
             clk: 0,
-            ctx: 0,
+            ctx: ContextId::root(),
             fmp,
             in_syscall: false,
             fn_hash: EMPTY_WORD,
@@ -92,7 +92,7 @@ impl System {
 
     /// Returns the current execution context ID.
     #[inline(always)]
-    pub fn ctx(&self) -> u32 {
+    pub fn ctx(&self) -> ContextId {
         self.ctx
     }
 
@@ -123,8 +123,8 @@ impl System {
 
     /// Returns execution context ID at the specified clock cycle.
     #[inline(always)]
-    pub fn get_ctx_at(&self, clk: u32) -> u32 {
-        self.ctx_trace[clk as usize].as_int() as u32
+    pub fn get_ctx_at(&self, clk: u32) -> ContextId {
+        (self.ctx_trace[clk as usize].as_int() as u32).into()
     }
 
     /// Returns free memory pointer at the specified clock cycle.
@@ -178,8 +178,8 @@ impl System {
     /// A CALL cannot be started when the VM is executing a SYSCALL.
     pub fn start_call(&mut self, fn_hash: Word) {
         debug_assert!(!self.in_syscall, "call in syscall");
-        self.ctx = self.clk + 1;
-        self.fmp = Felt::from(FMP_MIN);
+        self.ctx = (self.clk + 1).into();
+        self.fmp = Felt::new(FMP_MIN);
         self.fn_hash = fn_hash;
     }
 
@@ -198,7 +198,7 @@ impl System {
     /// for SYSCALLs this remains set to the hash of the last invoked function.
     pub fn start_syscall(&mut self) {
         debug_assert!(!self.in_syscall, "already in syscall");
-        self.ctx = 0;
+        self.ctx = ContextId::root();
         self.fmp = Felt::from(SYSCALL_FMP_MIN);
         self.in_syscall = true;
     }
@@ -208,7 +208,7 @@ impl System {
     ///
     /// Note that we set in_syscall flag to true regardless of whether we return from a CALL or a
     /// SYSCALL.
-    pub fn restore_context(&mut self, ctx: u32, fmp: Felt, fn_hash: Word) {
+    pub fn restore_context(&mut self, ctx: ContextId, fmp: Felt, fn_hash: Word) {
         self.ctx = ctx;
         self.fmp = fmp;
         self.in_syscall = false;
@@ -249,7 +249,7 @@ impl System {
 
         // complete the ctx column by filling all values after the last clock cycle with ZEROs as
         // the last context must be zero context.
-        debug_assert_eq!(0, self.ctx);
+        debug_assert!(self.ctx.is_root());
         self.ctx_trace.resize(trace_len, ZERO);
 
         // complete the fmp column by filling in all values after the last clock cycle with the
@@ -296,3 +296,52 @@ impl System {
         }
     }
 }
+
+// EXECUTION CONTEXT
+// ================================================================================================
+
+/// Represents the ID of an execution context
+#[derive(Clone, Copy, Debug, Default, Eq, Ord, PartialEq, PartialOrd)]
+pub struct ContextId(u32);
+
+impl ContextId {
+    /// Returns the root context ID
+    pub fn root() -> Self {
+        Self(0)
+    }
+
+    /// Returns true if the context ID represents the root context
+    pub fn is_root(&self) -> bool {
+        self.0 == 0
+    }
+}
+
+impl From<u32> for ContextId {
+    fn from(value: u32) -> Self {
+        Self(value)
+    }
+}
+
+impl From<ContextId> for u32 {
+    fn from(context_id: ContextId) -> Self {
+        context_id.0
+    }
+}
+
+impl From<ContextId> for u64 {
+    fn from(context_id: ContextId) -> Self {
+        context_id.0.into()
+    }
+}
+
+impl From<ContextId> for Felt {
+    fn from(context_id: ContextId) -> Self {
+        context_id.0.into()
+    }
+}
+
+impl Display for ContextId {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
diff --git a/processor/src/system/tests.rs b/processor/src/system/tests.rs
index 1b7fe9c1f5..7fcf59fbf7 100644
--- a/processor/src/system/tests.rs
+++ b/processor/src/system/tests.rs
@@ -1,21 +1,18 @@
-#[cfg(test)]
-mod tests {
-    use crate::{DefaultHost, ExecutionOptions, Kernel, Operation, Process, StackInputs};
+use crate::{DefaultHost, ExecutionOptions, Kernel, Operation, Process, StackInputs};
 
-    // Check that process returns an error if a maximum number of cycles is exceeded.
-    #[test]
-    fn cycles_num_exceeded() {
-        let stack = StackInputs::default();
-        let host = DefaultHost::default();
-        let mut process = Process::new(
-            Kernel::default(),
-            stack,
-            host,
-            ExecutionOptions::new(Some(64), 64).unwrap(),
-        );
-        for _ in 0..64 {
-            process.execute_op(Operation::Noop).unwrap();
-        }
-        assert!(process.execute_op(Operation::Noop).is_err());
+// Check that process returns an error if a maximum number of cycles is exceeded.
+#[test]
+fn cycles_num_exceeded() {
+    let stack = StackInputs::default();
+    let host = DefaultHost::default();
+    let mut process = Process::new(
+        Kernel::default(),
+        stack,
+        host,
+        ExecutionOptions::new(Some(64), 64, false).unwrap(),
+    );
+    for _ in 0..64 {
+        process.execute_op(Operation::Noop).unwrap();
     }
+    assert!(process.execute_op(Operation::Noop).is_err());
 }
diff --git a/processor/src/trace/decoder/mod.rs b/processor/src/trace/decoder/mod.rs
deleted file mode 100644
index d101665fed..0000000000
--- a/processor/src/trace/decoder/mod.rs
+++ /dev/null
@@ -1,306 +0,0 @@
-use super::{
-    super::decoder::{AuxTraceHints, BlockTableUpdate, OpGroupTableUpdate},
-    utils::build_lookup_table_row_values,
-    ColMatrix, Felt, FieldElement, Vec, DECODER_TRACE_OFFSET,
-};
-use vm_core::utils::uninit_vector;
-
-#[cfg(test)]
-mod tests;
-
-// CONSTANTS
-// ================================================================================================
-
-const ADDR_COL_IDX: usize = DECODER_TRACE_OFFSET + miden_air::trace::decoder::ADDR_COL_IDX;
-
-// DECODER AUXILIARY TRACE COLUMNS
-// ================================================================================================
-
-/// Builds and returns decoder auxiliary trace columns p1, p2, and p3 describing states of block
-/// stack, block hash, and op group tables respectively.
-pub fn build_aux_columns<E: FieldElement<BaseField = Felt>>(
-    main_trace: &ColMatrix<Felt>,
-    aux_trace_hints: &AuxTraceHints,
-    rand_elements: &[E],
-) -> Vec<Vec<E>> {
-    let p1 = build_aux_col_p1(main_trace, aux_trace_hints, rand_elements);
-    let p2 = build_aux_col_p2(main_trace, aux_trace_hints, rand_elements);
-    let p3 = build_aux_col_p3(main_trace, main_trace.num_rows(), aux_trace_hints, rand_elements);
-    vec![p1, p2, p3]
-}
-
-// BLOCK STACK TABLE COLUMN
-// ================================================================================================
-
-/// Builds the execution trace of the decoder's `p1` column which describes the state of the block
-/// stack table via multiset checks.
-fn build_aux_col_p1<E: FieldElement<BaseField = Felt>>(
-    main_trace: &ColMatrix<Felt>,
-    aux_trace_hints: &AuxTraceHints,
-    alphas: &[E],
-) -> Vec<E> {
-    // compute row values and their inverses for all rows that were added to the block stack table
-    let table_rows = aux_trace_hints.block_stack_table_rows();
-    let (row_values, inv_row_values) =
-        build_lookup_table_row_values(table_rows, main_trace, alphas);
-
-    // allocate memory for the running product column and set the initial value to ONE
-    let mut result = unsafe { uninit_vector(main_trace.num_rows()) };
-    result[0] = E::ONE;
-
-    // keep track of the index into the list of block stack table rows for started blocks; we can
-    // use this index because the sequence in which blocks are started is exactly the same as the
-    // sequence in which the rows are added to the block stack table.
-    let mut started_block_idx = 0;
-
-    // keep track of the last updated row in the running product column
-    let mut result_idx = 0_usize;
-
-    // iterate through the list of updates and apply them one by one
-    for (clk, update) in aux_trace_hints.block_exec_hints() {
-        let clk = *clk as usize;
-
-        // if we skipped some cycles since the last update was processed, values in the last
-        // updated row should by copied over until the current cycle.
-        if result_idx < clk {
-            let last_value = result[result_idx];
-            result[(result_idx + 1)..=clk].fill(last_value);
-        }
-
-        // move the result pointer to the next row
-        result_idx = clk + 1;
-
-        // apply the relevant updates to the column
-        match update {
-            BlockTableUpdate::BlockStarted(_) => {
-                // when a new block is started, multiply the running product by the value
-                // representing the entry for the block in the block stack table.
-                result[result_idx] = result[clk] * row_values[started_block_idx];
-                started_block_idx += 1;
-            }
-            BlockTableUpdate::SpanExtended => {
-                // when a RESPAN operation is executed, we need to remove the entry for
-                // the last batch from the block stack table and also add an entry for the
-                // new batch.
-                let old_row_value_inv = inv_row_values[started_block_idx - 1];
-                let new_row_value = row_values[started_block_idx];
-                result[result_idx] = result[clk] * old_row_value_inv * new_row_value;
-                started_block_idx += 1;
-            }
-            BlockTableUpdate::BlockEnded(_) => {
-                // when a block is ended, we need to remove the entry for the block from the
-                // block stack table; we can look up the index of the entry using the block's
-                // ID which we get from the current row of the execution trace.
-                let block_id = get_block_addr(main_trace, clk as u32);
-                let row_idx = aux_trace_hints
-                    .get_block_stack_row_idx(block_id)
-                    .expect("block stack row not found");
-                result[result_idx] = result[clk] * inv_row_values[row_idx];
-            }
-            // REPEAT operation has no effect on the block stack table
-            BlockTableUpdate::LoopRepeated => result[result_idx] = result[clk],
-        }
-    }
-
-    // at this point, block stack table must be empty - so, the last value must be ONE;
-    // we also fill in all the remaining values in the column with ONE's.
-    let last_value = result[result_idx];
-    assert_eq!(last_value, E::ONE);
-    if result_idx < result.len() - 1 {
-        result[(result_idx + 1)..].fill(E::ONE);
-    }
-
-    result
-}
-
-// BLOCK HASH TABLE COLUMN
-// ================================================================================================
-
-/// Builds the execution trace of the decoder's `p2` column which describes the state of the block
-/// hash table via multiset checks.
-fn build_aux_col_p2<E: FieldElement<BaseField = Felt>>(
-    main_trace: &ColMatrix<Felt>,
-    aux_trace_hints: &AuxTraceHints,
-    alphas: &[E],
-) -> Vec<E> {
-    // compute row values and their inverses for all rows that were added to the block hash table
-    let table_rows = aux_trace_hints.block_hash_table_rows();
-    let (row_values, inv_row_values) =
-        build_lookup_table_row_values(table_rows, main_trace, alphas);
-
-    // initialize memory for the running product column, and set the first value in the column to
-    // the value of the first row (which represents an entry for the root block of the program)
-    let mut result = unsafe { uninit_vector(main_trace.num_rows()) };
-    result[0] = row_values[0];
-
-    // keep track of the index into the list of block hash table rows for started blocks; we can
-    // use this index because the sequence in which blocks are started is exactly the same as the
-    // sequence in which the rows are added to the block hash table. we start at 1 because the
-    // first row is already included in the running product above.
-    let mut started_block_idx = 1;
-
-    // keep track of the last updated row in the running product column
-    let mut result_idx = 0_usize;
-
-    // iterate through the list of updates and apply them one by one
-    for (clk, update) in aux_trace_hints.block_exec_hints() {
-        let clk = *clk as usize;
-
-        // if we skipped some cycles since the last update was processed, values in the last
-        // updated row should by copied over until the current cycle.
-        if result_idx < clk {
-            let last_value = result[result_idx];
-            result[(result_idx + 1)..=clk].fill(last_value);
-        }
-
-        // move the result pointer to the next row
-        result_idx = clk + 1;
-
-        // apply relevant updates
-        match update {
-            BlockTableUpdate::BlockStarted(num_children) => {
-                // if a new block was started, entries for the block's children are added to the
-                // table; in case this was a JOIN block with two children, the first child should
-                // have is_first_child set to true.
-                match *num_children {
-                    0 => result[result_idx] = result[clk],
-                    1 => {
-                        debug_assert!(!table_rows[started_block_idx].is_first_child());
-                        result[result_idx] = result[clk] * row_values[started_block_idx];
-                    }
-                    2 => {
-                        debug_assert!(table_rows[started_block_idx].is_first_child());
-                        debug_assert!(!table_rows[started_block_idx + 1].is_first_child());
-                        result[result_idx] = result[clk]
-                            * row_values[started_block_idx]
-                            * row_values[started_block_idx + 1];
-                    }
-                    _ => panic!("invalid number of children for a block"),
-                }
-
-                // move pointer into the table row list by the number of children
-                started_block_idx += *num_children as usize;
-            }
-            BlockTableUpdate::LoopRepeated => {
-                // When a REPEAT operation is executed, we need to add an entry for the loop's
-                // body to the table. Entries for blocks in the block hash table can be identified
-                // by their parent ID (which is the ID of the executing LOOP block). Parent ID is
-                // always the address value in the next row of the execution trace after a REPEAT
-                // operation is executed. Therefore, we can get the parent ID from the execution
-                // trace at the next row: clk + 1 (which is the same as result_idx), and use it to
-                // find this entry.
-                let parent_id = get_block_addr(main_trace, result_idx as u32);
-                let row_idx = aux_trace_hints
-                    .get_block_hash_row_idx(parent_id, false)
-                    .expect("block hash row not found");
-                result[result_idx] = result[clk] * row_values[row_idx];
-            }
-            BlockTableUpdate::BlockEnded(is_first_child) => {
-                // when END operation is executed, we need to remove an entry for the block from
-                // the block hash table. we can find the entry by its parent_id, which we can get
-                // from the trace in the same way as described above. we also need to know whether
-                // this block is the first or the second child of its parent, because for JOIN
-                // block, the same parent ID would map to two children.
-                let parent_id = get_block_addr(main_trace, result_idx as u32);
-                let row_idx = aux_trace_hints
-                    .get_block_hash_row_idx(parent_id, *is_first_child)
-                    .expect("block hash row not found");
-                result[result_idx] = result[clk] * inv_row_values[row_idx];
-            }
-            // RESPAN operation has no effect on the block hash table
-            BlockTableUpdate::SpanExtended => result[result_idx] = result[clk],
-        }
-    }
-
-    // at this point, block hash table must be empty - so, the last value must be ONE;
-    // we also fill in all the remaining values in the column with ONE's.
-    let last_value = result[result_idx];
-    assert_eq!(last_value, E::ONE);
-    if result_idx < result.len() - 1 {
-        result[(result_idx + 1)..].fill(E::ONE);
-    }
-
-    result
-}
-
-// OP GROUP TABLE COLUMN
-// ================================================================================================
-
-/// Builds the execution trace of the decoder's `p3` column which describes the state of the op
-/// group table via multiset checks.
-fn build_aux_col_p3<E: FieldElement<BaseField = Felt>>(
-    main_trace: &ColMatrix<Felt>,
-    trace_len: usize,
-    aux_trace_hints: &AuxTraceHints,
-    alphas: &[E],
-) -> Vec<E> {
-    // allocate memory for the column and set the starting value to ONE
-    let mut result = unsafe { uninit_vector(trace_len) };
-    result[0] = E::ONE;
-
-    // compute row values and their inverses for all rows which were added to the op group table
-    let (row_values, inv_row_values) =
-        build_lookup_table_row_values(aux_trace_hints.op_group_table_rows(), main_trace, alphas);
-
-    // keep track of indexes into the list of op group table rows separately for inserted and
-    // removed rows
-    let mut inserted_group_idx = 0_usize;
-    let mut removed_group_idx = 0_usize;
-
-    // keep track of the last updated row in the running product column
-    let mut result_idx = 0_usize;
-
-    for (clk, update) in aux_trace_hints.op_group_table_hints() {
-        let clk = *clk as usize;
-
-        // if we skipped some cycles since the last update was processed, values in the last
-        // updated row should by copied over until the current cycle.
-        if result_idx < clk {
-            let last_value = result[result_idx];
-            result[(result_idx + 1)..=clk].fill(last_value);
-        }
-
-        // apply the relevant updates to the column
-        result_idx = clk + 1;
-        match update {
-            OpGroupTableUpdate::InsertRows(num_op_groups) => {
-                // if the rows were added, multiply the current value in the column by the values
-                // of all added rows
-                let mut value = row_values[inserted_group_idx];
-                for i in 1..(*num_op_groups as usize) {
-                    value *= row_values[inserted_group_idx + i];
-                }
-                result[result_idx] = result[clk] * value;
-
-                // advance the inserted group pointer by the number of inserted rows
-                inserted_group_idx += *num_op_groups as usize;
-            }
-            OpGroupTableUpdate::RemoveRow => {
-                // if a row was removed, divide the current value in the column by the value
-                // of the row
-                result[result_idx] = result[clk] * inv_row_values[removed_group_idx];
-
-                // advance the removed group pointer by one
-                removed_group_idx += 1;
-            }
-        }
-    }
-
-    // at this point, op group table must be empty - so, the last value must be ONE;
-    // we also fill in all the remaining values in the column with ONE's.
-    let last_value = result[result_idx];
-    assert_eq!(last_value, E::ONE);
-    if result_idx < result.len() - 1 {
-        result[(result_idx + 1)..].fill(E::ONE);
-    }
-
-    result
-}
-
-// HELPER FUNCTIONS
-// ================================================================================================
-
-/// Returns the value in the block address column at the specified row.
-fn get_block_addr(main_trace: &ColMatrix<Felt>, row_idx: u32) -> Felt {
-    main_trace.get(ADDR_COL_IDX, row_idx as usize)
-}
diff --git a/processor/src/trace/mod.rs b/processor/src/trace/mod.rs
index 88f9ee1236..0124e41d9e 100644
--- a/processor/src/trace/mod.rs
+++ b/processor/src/trace/mod.rs
@@ -1,28 +1,22 @@
 use super::{
     chiplets::AuxTraceBuilder as ChipletsAuxTraceBuilder, crypto::RpoRandomCoin,
-    decoder::AuxTraceHints as DecoderAuxTraceHints,
+    decoder::AuxTraceBuilder as DecoderAuxTraceBuilder,
     range::AuxTraceBuilder as RangeCheckerAuxTraceBuilder,
     stack::AuxTraceBuilder as StackAuxTraceBuilder, ColMatrix, Digest, Felt, FieldElement, Host,
-    Process, StackTopState, Vec,
+    Process, StackTopState,
 };
+use crate::utils::collections::*;
 use miden_air::trace::{
     decoder::{NUM_USER_OP_HELPERS, USER_OP_HELPERS_OFFSET},
+    main_trace::MainTrace,
     AUX_TRACE_RAND_ELEMENTS, AUX_TRACE_WIDTH, DECODER_TRACE_OFFSET, MIN_TRACE_LEN,
     STACK_TRACE_OFFSET, TRACE_WIDTH,
 };
 use vm_core::{stack::STACK_TOP_SIZE, ProgramInfo, StackOutputs, ZERO};
 use winter_prover::{crypto::RandomCoin, EvaluationFrame, Trace, TraceLayout};
 
-#[cfg(feature = "std")]
-use vm_core::StarkField;
-
 mod utils;
-pub use utils::{
-    build_lookup_table_row_values, AuxColumnBuilder, ChipletsLengths, LookupTableRow,
-    TraceFragment, TraceLenSummary,
-};
-
-mod decoder;
+pub use utils::{AuxColumnBuilder, ChipletsLengths, TraceFragment, TraceLenSummary};
 
 #[cfg(test)]
 mod tests;
@@ -38,8 +32,8 @@ pub const NUM_RAND_ROWS: usize = 1;
 // VM EXECUTION TRACE
 // ================================================================================================
 
-pub struct AuxTraceHints {
-    pub(crate) decoder: DecoderAuxTraceHints,
+pub struct AuxTraceBuilders {
+    pub(crate) decoder: DecoderAuxTraceBuilder,
     pub(crate) stack: StackAuxTraceBuilder,
     pub(crate) range: RangeCheckerAuxTraceBuilder,
     pub(crate) chiplets: ChipletsAuxTraceBuilder,
@@ -55,8 +49,8 @@ pub struct AuxTraceHints {
 pub struct ExecutionTrace {
     meta: Vec<u8>,
     layout: TraceLayout,
-    main_trace: ColMatrix<Felt>,
-    aux_trace_hints: AuxTraceHints,
+    main_trace: MainTrace,
+    aux_trace_builders: AuxTraceBuilders,
     program_info: ProgramInfo,
     stack_outputs: StackOutputs,
     trace_len_summary: TraceLenSummary,
@@ -80,19 +74,19 @@ impl ExecutionTrace {
         // to inject random values at the end of the trace; using program hash here is OK because
         // we are using random values only to stabilize constraint degrees, and not to achieve
         // perfect zero knowledge.
-        let program_hash: Digest = process.decoder.program_hash().into();
-        let rng = RpoRandomCoin::new(program_hash.as_elements());
+        let program_hash = process.decoder.program_hash();
+        let rng = RpoRandomCoin::new(program_hash);
 
         // create a new program info instance with the underlying kernel
         let kernel = process.kernel().clone();
-        let program_info = ProgramInfo::new(program_hash, kernel);
+        let program_info = ProgramInfo::new(program_hash.into(), kernel);
         let (main_trace, aux_trace_hints, trace_len_summary) = finalize_trace(process, rng);
 
         Self {
             meta: Vec::new(),
             layout: TraceLayout::new(TRACE_WIDTH, [AUX_TRACE_WIDTH], [AUX_TRACE_RAND_ELEMENTS]),
-            main_trace: ColMatrix::new(main_trace),
-            aux_trace_hints,
+            aux_trace_builders: aux_trace_hints,
+            main_trace,
             program_info,
             stack_outputs,
             trace_len_summary,
@@ -178,11 +172,11 @@ impl ExecutionTrace {
     #[cfg(test)]
     pub fn test_finalize_trace<H>(
         process: Process<H>,
-    ) -> (Vec<Vec<Felt>>, AuxTraceHints, TraceLenSummary)
+    ) -> (MainTrace, AuxTraceBuilders, TraceLenSummary)
     where
         H: Host,
     {
-        let rng = RpoRandomCoin::new(&EMPTY_WORD);
+        let rng = RpoRandomCoin::new(EMPTY_WORD);
         finalize_trace(process, rng)
     }
 }
@@ -222,23 +216,24 @@ impl Trace for ExecutionTrace {
         // TODO: build auxiliary columns in multiple threads
 
         // add decoder's running product columns
-        let decoder_aux_columns = decoder::build_aux_columns(
-            &self.main_trace,
-            &self.aux_trace_hints.decoder,
-            rand_elements,
-        );
+        let decoder_aux_columns = self
+            .aux_trace_builders
+            .decoder
+            .build_aux_columns(&self.main_trace, rand_elements);
 
         // add stack's running product columns
         let stack_aux_columns =
-            self.aux_trace_hints.stack.build_aux_columns(&self.main_trace, rand_elements);
+            self.aux_trace_builders.stack.build_aux_columns(&self.main_trace, rand_elements);
 
         // add the range checker's running product columns
         let range_aux_columns =
-            self.aux_trace_hints.range.build_aux_columns(&self.main_trace, rand_elements);
+            self.aux_trace_builders.range.build_aux_columns(&self.main_trace, rand_elements);
 
         // add the running product columns for the chiplets
-        let chiplets =
-            self.aux_trace_hints.chiplets.build_aux_columns(&self.main_trace, rand_elements);
+        let chiplets = self
+            .aux_trace_builders
+            .chiplets
+            .build_aux_columns(&self.main_trace, rand_elements);
 
         // combine all auxiliary columns into a single vector
         let mut aux_columns = decoder_aux_columns
@@ -249,7 +244,7 @@ impl Trace for ExecutionTrace {
             .collect::<Vec<_>>();
 
         // inject random values into the last rows of the trace
-        let mut rng = RpoRandomCoin::new(self.program_hash().as_elements());
+        let mut rng = RpoRandomCoin::new(self.program_hash().into());
         for i in self.length() - NUM_RAND_ROWS..self.length() {
             for column in aux_columns.iter_mut() {
                 column[i] = rng.draw().expect("failed to draw a random value");
@@ -280,7 +275,7 @@ impl Trace for ExecutionTrace {
 fn finalize_trace<H>(
     process: Process<H>,
     mut rng: RpoRandomCoin,
-) -> (Vec<Vec<Felt>>, AuxTraceHints, TraceLenSummary)
+) -> (MainTrace, AuxTraceBuilders, TraceLenSummary)
 where
     H: Host,
 {
@@ -338,12 +333,14 @@ where
         }
     }
 
-    let aux_trace_hints = AuxTraceHints {
-        decoder: decoder_trace.aux_trace_hints,
+    let aux_trace_hints = AuxTraceBuilders {
+        decoder: decoder_trace.aux_builder,
         stack: stack_trace.aux_builder,
         range: range_check_trace.aux_builder,
         chiplets: chiplets_trace.aux_builder,
     };
 
-    (trace, aux_trace_hints, trace_len_summary)
+    let main_trace = MainTrace::new(ColMatrix::new(trace));
+
+    (main_trace, aux_trace_hints, trace_len_summary)
 }
diff --git a/processor/src/trace/tests/chiplets/hasher.rs b/processor/src/trace/tests/chiplets/hasher.rs
index 2cbc15d0d4..65b8fc7595 100644
--- a/processor/src/trace/tests/chiplets/hasher.rs
+++ b/processor/src/trace/tests/chiplets/hasher.rs
@@ -22,8 +22,8 @@ use vm_core::{
     chiplets::hasher::apply_permutation,
     code_blocks::CodeBlock,
     crypto::merkle::{MerkleStore, MerkleTree, NodeIndex},
-    utils::{collections::Vec, range},
-    StarkField, Word,
+    utils::{collections::*, range},
+    Word,
 };
 
 // CONSTANTS
@@ -408,7 +408,7 @@ pub fn b_chip_permutation() {
 fn b_chip_mpverify() {
     let index = 5usize;
     let leaves = init_leaves(&[1, 2, 3, 4, 5, 6, 7, 8]);
-    let tree = MerkleTree::new(leaves.to_vec()).unwrap();
+    let tree = MerkleTree::new(&leaves).unwrap();
 
     let stack_inputs = [
         tree.root()[0].as_int(),
@@ -538,6 +538,219 @@ fn b_chip_mpverify() {
     }
 }
 
+/// Tests the generation of the `b_chip` bus column when the hasher performs a Merkle root update
+/// requested by the `MrUpdate` user operation.
+#[test]
+#[allow(clippy::needless_range_loop)]
+fn b_chip_mrupdate() {
+    let index = 5usize;
+    let leaves = init_leaves(&[1, 2, 3, 4, 5, 6, 7, 8]);
+    let mut tree = MerkleTree::new(&leaves).unwrap();
+
+    let old_root = tree.root();
+    let old_leaf_value = leaves[index];
+
+    let new_leaf_value = leaves[0];
+
+    let stack_inputs = [
+        new_leaf_value[0].as_int(),
+        new_leaf_value[1].as_int(),
+        new_leaf_value[2].as_int(),
+        new_leaf_value[3].as_int(),
+        old_root[0].as_int(),
+        old_root[1].as_int(),
+        old_root[2].as_int(),
+        old_root[3].as_int(),
+        index as u64,
+        tree.depth() as u64,
+        old_leaf_value[0].as_int(),
+        old_leaf_value[1].as_int(),
+        old_leaf_value[2].as_int(),
+        old_leaf_value[3].as_int(),
+    ];
+    let stack_inputs = StackInputs::try_from_values(stack_inputs).unwrap();
+    let store = MerkleStore::from(&tree);
+    let advice_inputs = AdviceInputs::default().with_merkle_store(store);
+
+    let mut trace =
+        build_trace_from_ops_with_inputs(vec![Operation::MrUpdate], stack_inputs, advice_inputs);
+    let alphas = rand_array::<Felt, AUX_TRACE_RAND_ELEMENTS>();
+    let aux_columns = trace.build_aux_segment(&[], &alphas).unwrap();
+    let b_chip = aux_columns.get_column(CHIPLETS_AUX_TRACE_OFFSET);
+
+    assert_eq!(trace.length(), b_chip.len());
+    assert_eq!(ONE, b_chip[0]);
+
+    // at cycle 0 the following are added for inclusion in the next row:
+    // - the initialization of the span hash is requested by the decoder
+    // - the initialization of the span hash is provided by the hasher
+
+    // initialize the request state.
+    let mut span_state = [ZERO; STATE_WIDTH];
+    fill_state_from_decoder_with_domain(&trace, &mut span_state, 0);
+    // request the initialization of the span hash
+    let span_init =
+        build_expected(&alphas, LINEAR_HASH_LABEL, span_state, [ZERO; STATE_WIDTH], ONE, ZERO);
+    let mut expected = span_init.inv();
+    // provide the initialization of the span hash
+    expected *= build_expected_from_trace(&trace, &alphas, 0);
+    assert_eq!(expected, b_chip[1]);
+
+    // at cycle 1 a merkle path verification is executed and the initialization and result of the
+    // hash are both requested by the stack.
+    let path = tree
+        .get_path(NodeIndex::new(tree.depth(), index as u64).unwrap())
+        .expect("failed to get Merkle tree path");
+    let mp_state = init_state_from_words(
+        &[path[0][0], path[0][1], path[0][2], path[0][3]],
+        &[leaves[index][0], leaves[index][1], leaves[index][2], leaves[index][3]],
+    );
+    let mp_init_old = build_expected(
+        &alphas,
+        MR_UPDATE_OLD_LABEL,
+        mp_state,
+        [ZERO; STATE_WIDTH],
+        Felt::new(9),
+        Felt::new(index as u64),
+    );
+    // request the initialization of the (first) Merkle path verification
+    expected *= mp_init_old.inv();
+
+    let mp_old_verify_complete = HASH_CYCLE_LEN + (tree.depth() as usize) * HASH_CYCLE_LEN;
+    let mp_result_old = build_expected(
+        &alphas,
+        RETURN_HASH_LABEL,
+        // for the return hash, only the state digest matters, and it should match the root
+        [
+            ZERO,
+            ZERO,
+            ZERO,
+            ZERO,
+            tree.root()[0],
+            tree.root()[1],
+            tree.root()[2],
+            tree.root()[3],
+            ZERO,
+            ZERO,
+            ZERO,
+            ZERO,
+        ],
+        [ZERO; STATE_WIDTH],
+        Felt::new(mp_old_verify_complete as u64),
+        Felt::new(index as u64 >> tree.depth()),
+    );
+
+    // request the result of the first Merkle path verification
+    expected *= mp_result_old.inv();
+
+    let new_leaf_value = leaves[0];
+    tree.update_leaf(index as u64, new_leaf_value).unwrap();
+    let new_root = tree.root();
+
+    // a second merkle path verification is executed and the initialization and result of the
+    // hash are both requested by the stack.
+    let path = tree
+        .get_path(NodeIndex::new(tree.depth(), index as u64).unwrap())
+        .expect("failed to get Merkle tree path");
+    let mp_state = init_state_from_words(
+        &[path[0][0], path[0][1], path[0][2], path[0][3]],
+        &[new_leaf_value[0], new_leaf_value[1], new_leaf_value[2], new_leaf_value[3]],
+    );
+
+    let mp_new_verify_complete = mp_old_verify_complete + (tree.depth() as usize) * HASH_CYCLE_LEN;
+    let mp_init_new = build_expected(
+        &alphas,
+        MR_UPDATE_NEW_LABEL,
+        mp_state,
+        [ZERO; STATE_WIDTH],
+        Felt::new(mp_old_verify_complete as u64 + 1),
+        Felt::new(index as u64),
+    );
+
+    // request the initialization of the second Merkle path verification
+    expected *= mp_init_new.inv();
+
+    let mp_result_new = build_expected(
+        &alphas,
+        RETURN_HASH_LABEL,
+        // for the return hash, only the state digest matters, and it should match the root
+        [
+            ZERO,
+            ZERO,
+            ZERO,
+            ZERO,
+            new_root[0],
+            new_root[1],
+            new_root[2],
+            new_root[3],
+            ZERO,
+            ZERO,
+            ZERO,
+            ZERO,
+        ],
+        [ZERO; STATE_WIDTH],
+        Felt::new(mp_new_verify_complete as u64),
+        Felt::new(index as u64 >> tree.depth()),
+    );
+
+    // request the result of the second Merkle path verification
+    expected *= mp_result_new.inv();
+    assert_eq!(expected, b_chip[2]);
+
+    // at cycle 2 the result of the span hash is requested by the decoder
+    apply_permutation(&mut span_state);
+    let span_result = build_expected(
+        &alphas,
+        RETURN_HASH_LABEL,
+        span_state,
+        [ZERO; STATE_WIDTH],
+        Felt::new(8),
+        ZERO,
+    );
+    expected *= span_result.inv();
+    assert_eq!(expected, b_chip[3]);
+
+    // Nothing changes when there is no communication with the hash chiplet.
+    for row in 3..8 {
+        assert_eq!(expected, b_chip[row]);
+    }
+
+    // at cycle 7 the result of the span hash is provided by the hasher
+    expected *= build_expected_from_trace(&trace, &alphas, 7);
+    assert_eq!(expected, b_chip[8]);
+
+    // at cycle 8 the initialization of the first merkle path is provided by the hasher
+    expected *= build_expected_from_trace(&trace, &alphas, 8);
+    assert_eq!(expected, b_chip[9]);
+
+    // Nothing changes when there is no communication with the hash chiplet.
+    for row in 10..(mp_old_verify_complete) {
+        assert_eq!(expected, b_chip[row]);
+    }
+
+    // when the first merkle path verification has been completed the hasher provides the result
+    expected *= build_expected_from_trace(&trace, &alphas, mp_old_verify_complete - 1);
+    assert_eq!(expected, b_chip[mp_old_verify_complete]);
+
+    // at cycle 32 the initialization of the second merkle path is provided by the hasher
+    expected *= build_expected_from_trace(&trace, &alphas, mp_old_verify_complete);
+    assert_eq!(expected, b_chip[mp_old_verify_complete + 1]);
+
+    // Nothing changes when there is no communication with the hash chiplet.
+    for row in (mp_old_verify_complete + 1)..(mp_new_verify_complete) {
+        assert_eq!(expected, b_chip[row]);
+    }
+
+    // when the merkle path verification has been completed the hasher provides the result
+    expected *= build_expected_from_trace(&trace, &alphas, mp_new_verify_complete - 1);
+    assert_eq!(expected, b_chip[mp_new_verify_complete]);
+
+    // The value in b_chip should be ONE now and for the rest of the trace.
+    for row in (mp_new_verify_complete)..trace.length() - NUM_RAND_ROWS {
+        assert_eq!(ONE, b_chip[row]);
+    }
+}
+
 // TEST HELPERS
 // ================================================================================================
 
@@ -572,7 +785,7 @@ fn build_expected(
                 || label == MR_UPDATE_NEW_LABEL
                 || label == MR_UPDATE_OLD_LABEL
         );
-        let bit = (index.as_int() >> 1) & 1;
+        let bit = index.as_int() & 1;
         let left_word = build_value(&alphas[8..12], &state[DIGEST_RANGE]);
         let right_word = build_value(&alphas[8..12], &state[DIGEST_RANGE.end..]);
 
diff --git a/processor/src/trace/decoder/tests.rs b/processor/src/trace/tests/decoder.rs
similarity index 80%
rename from processor/src/trace/decoder/tests.rs
rename to processor/src/trace/tests/decoder.rs
index 71c806a1ef..4763ce01af 100644
--- a/processor/src/trace/decoder/tests.rs
+++ b/processor/src/trace/tests/decoder.rs
@@ -2,17 +2,17 @@ use super::{
     super::{
         tests::{build_trace_from_block, build_trace_from_ops},
         utils::build_span_with_respan_ops,
-        LookupTableRow, Trace, NUM_RAND_ROWS,
+        Trace, NUM_RAND_ROWS,
     },
     Felt,
 };
-use crate::decoder::{build_op_group, BlockHashTableRow, BlockStackTableRow, OpGroupTableRow};
+use crate::{decoder::build_op_group, ContextId};
 use miden_air::trace::{
     decoder::{P1_COL_IDX, P2_COL_IDX, P3_COL_IDX},
     AUX_TRACE_RAND_ELEMENTS,
 };
 use test_utils::rand::rand_array;
-use vm_core::{code_blocks::CodeBlock, FieldElement, Operation, ONE, ZERO};
+use vm_core::{code_blocks::CodeBlock, FieldElement, Operation, Word, ONE, ZERO};
 
 // BLOCK STACK TABLE TESTS
 // ================================================================================================
@@ -27,9 +27,8 @@ fn decoder_p1_span_with_respan() {
     let p1 = aux_columns.get_column(P1_COL_IDX);
 
     let row_values = [
-        BlockStackTableRow::new_test(ONE, ZERO, false).to_value(&trace.main_trace, &alphas),
-        BlockStackTableRow::new_test(Felt::new(9), ZERO, false)
-            .to_value(&trace.main_trace, &alphas),
+        BlockStackTableRow::new(ONE, ZERO, false).to_value(&alphas),
+        BlockStackTableRow::new(Felt::new(9), ZERO, false).to_value(&alphas),
     ];
 
     // make sure the first entry is ONE
@@ -76,9 +75,9 @@ fn decoder_p1_join() {
     let a_9 = Felt::new(9);
     let a_17 = Felt::new(17);
     let row_values = [
-        BlockStackTableRow::new_test(ONE, ZERO, false).to_value(&trace.main_trace, &alphas),
-        BlockStackTableRow::new_test(a_9, ONE, false).to_value(&trace.main_trace, &alphas),
-        BlockStackTableRow::new_test(a_17, ONE, false).to_value(&trace.main_trace, &alphas),
+        BlockStackTableRow::new(ONE, ZERO, false).to_value(&alphas),
+        BlockStackTableRow::new(a_9, ONE, false).to_value(&alphas),
+        BlockStackTableRow::new(a_17, ONE, false).to_value(&alphas),
     ];
 
     // make sure the first entry is ONE
@@ -135,8 +134,8 @@ fn decoder_p1_split() {
 
     let a_9 = Felt::new(9);
     let row_values = [
-        BlockStackTableRow::new_test(ONE, ZERO, false).to_value(&trace.main_trace, &alphas),
-        BlockStackTableRow::new_test(a_9, ONE, false).to_value(&trace.main_trace, &alphas),
+        BlockStackTableRow::new(ONE, ZERO, false).to_value(&alphas),
+        BlockStackTableRow::new(a_9, ONE, false).to_value(&alphas),
     ];
 
     // make sure the first entry is ONE
@@ -188,13 +187,13 @@ fn decoder_p1_loop_with_repeat() {
     let a_41 = Felt::new(41); // address of the first SPAN block in the second iteration
     let a_49 = Felt::new(49); // address of the second SPAN block in the second iteration
     let row_values = [
-        BlockStackTableRow::new_test(ONE, ZERO, true).to_value(&trace.main_trace, &alphas),
-        BlockStackTableRow::new_test(a_9, ONE, false).to_value(&trace.main_trace, &alphas),
-        BlockStackTableRow::new_test(a_17, a_9, false).to_value(&trace.main_trace, &alphas),
-        BlockStackTableRow::new_test(a_25, a_9, false).to_value(&trace.main_trace, &alphas),
-        BlockStackTableRow::new_test(a_33, ONE, false).to_value(&trace.main_trace, &alphas),
-        BlockStackTableRow::new_test(a_41, a_33, false).to_value(&trace.main_trace, &alphas),
-        BlockStackTableRow::new_test(a_49, a_33, false).to_value(&trace.main_trace, &alphas),
+        BlockStackTableRow::new(ONE, ZERO, true).to_value(&alphas),
+        BlockStackTableRow::new(a_9, ONE, false).to_value(&alphas),
+        BlockStackTableRow::new(a_17, a_9, false).to_value(&alphas),
+        BlockStackTableRow::new(a_25, a_9, false).to_value(&alphas),
+        BlockStackTableRow::new(a_33, ONE, false).to_value(&alphas),
+        BlockStackTableRow::new(a_41, a_33, false).to_value(&alphas),
+        BlockStackTableRow::new(a_49, a_33, false).to_value(&alphas),
     ];
 
     // make sure the first entry is ONE
@@ -295,8 +294,8 @@ fn decoder_p2_span_with_respan() {
     let aux_columns = trace.build_aux_segment(&[], &alphas).unwrap();
     let p2 = aux_columns.get_column(P2_COL_IDX);
 
-    let row_values = [BlockHashTableRow::new_test(ZERO, span.hash().into(), false, false)
-        .to_value(&trace.main_trace, &alphas)];
+    let row_values =
+        [BlockHashTableRow::new_test(ZERO, span.hash().into(), false, false).to_value(&alphas)];
 
     // make sure the first entry is initialized to program hash
     let mut expected_value = row_values[0];
@@ -328,12 +327,9 @@ fn decoder_p2_join() {
     let p2 = aux_columns.get_column(P2_COL_IDX);
 
     let row_values = [
-        BlockHashTableRow::new_test(ZERO, program.hash().into(), false, false)
-            .to_value(&trace.main_trace, &alphas),
-        BlockHashTableRow::new_test(ONE, span1.hash().into(), true, false)
-            .to_value(&trace.main_trace, &alphas),
-        BlockHashTableRow::new_test(ONE, span2.hash().into(), false, false)
-            .to_value(&trace.main_trace, &alphas),
+        BlockHashTableRow::new_test(ZERO, program.hash().into(), false, false).to_value(&alphas),
+        BlockHashTableRow::new_test(ONE, span1.hash().into(), true, false).to_value(&alphas),
+        BlockHashTableRow::new_test(ONE, span2.hash().into(), false, false).to_value(&alphas),
     ];
 
     // make sure the first entry is initialized to program hash
@@ -384,10 +380,8 @@ fn decoder_p2_split_true() {
     let p2 = aux_columns.get_column(P2_COL_IDX);
 
     let row_values = [
-        BlockHashTableRow::new_test(ZERO, program.hash().into(), false, false)
-            .to_value(&trace.main_trace, &alphas),
-        BlockHashTableRow::new_test(ONE, span1.hash().into(), false, false)
-            .to_value(&trace.main_trace, &alphas),
+        BlockHashTableRow::new_test(ZERO, program.hash().into(), false, false).to_value(&alphas),
+        BlockHashTableRow::new_test(ONE, span1.hash().into(), false, false).to_value(&alphas),
     ];
 
     // make sure the first entry is initialized to program hash
@@ -430,10 +424,8 @@ fn decoder_p2_split_false() {
     let p2 = aux_columns.get_column(P2_COL_IDX);
 
     let row_values = [
-        BlockHashTableRow::new_test(ZERO, program.hash().into(), false, false)
-            .to_value(&trace.main_trace, &alphas),
-        BlockHashTableRow::new_test(ONE, span2.hash().into(), false, false)
-            .to_value(&trace.main_trace, &alphas),
+        BlockHashTableRow::new_test(ZERO, program.hash().into(), false, false).to_value(&alphas),
+        BlockHashTableRow::new_test(ONE, span2.hash().into(), false, false).to_value(&alphas),
     ];
 
     // make sure the first entry is initialized to program hash
@@ -479,18 +471,12 @@ fn decoder_p2_loop_with_repeat() {
     let a_9 = Felt::new(9); // address of the JOIN block in the first iteration
     let a_33 = Felt::new(33); // address of the JOIN block in the second iteration
     let row_values = [
-        BlockHashTableRow::new_test(ZERO, program.hash().into(), false, false)
-            .to_value(&trace.main_trace, &alphas),
-        BlockHashTableRow::new_test(ONE, body.hash().into(), false, true)
-            .to_value(&trace.main_trace, &alphas),
-        BlockHashTableRow::new_test(a_9, span1.hash().into(), true, false)
-            .to_value(&trace.main_trace, &alphas),
-        BlockHashTableRow::new_test(a_9, span2.hash().into(), false, false)
-            .to_value(&trace.main_trace, &alphas),
-        BlockHashTableRow::new_test(a_33, span1.hash().into(), true, false)
-            .to_value(&trace.main_trace, &alphas),
-        BlockHashTableRow::new_test(a_33, span2.hash().into(), false, false)
-            .to_value(&trace.main_trace, &alphas),
+        BlockHashTableRow::new_test(ZERO, program.hash().into(), false, false).to_value(&alphas),
+        BlockHashTableRow::new_test(ONE, body.hash().into(), false, true).to_value(&alphas),
+        BlockHashTableRow::new_test(a_9, span1.hash().into(), true, false).to_value(&alphas),
+        BlockHashTableRow::new_test(a_9, span2.hash().into(), false, false).to_value(&alphas),
+        BlockHashTableRow::new_test(a_33, span1.hash().into(), true, false).to_value(&alphas),
+        BlockHashTableRow::new_test(a_33, span2.hash().into(), false, false).to_value(&alphas),
     ];
 
     // make sure the first entry is initialized to program hash
@@ -616,12 +602,9 @@ fn decoder_p3_trace_one_batch() {
 
     // make sure 3 groups were inserted at clock cycle 1; these entries are for the two immediate
     // values and the second operation group consisting of [SWAP, MUL, ADD]
-    let g1_value =
-        OpGroupTableRow::new(ONE, Felt::new(3), ONE).to_value(&trace.main_trace, &alphas);
-    let g2_value =
-        OpGroupTableRow::new(ONE, Felt::new(2), Felt::new(2)).to_value(&trace.main_trace, &alphas);
-    let g3_value = OpGroupTableRow::new(ONE, ONE, build_op_group(&ops[9..]))
-        .to_value(&trace.main_trace, &alphas);
+    let g1_value = OpGroupTableRow::new(ONE, Felt::new(3), ONE).to_value(&alphas);
+    let g2_value = OpGroupTableRow::new(ONE, Felt::new(2), Felt::new(2)).to_value(&alphas);
+    let g3_value = OpGroupTableRow::new(ONE, ONE, build_op_group(&ops[9..])).to_value(&alphas);
     let expected_value = g1_value * g2_value * g3_value;
     assert_eq!(expected_value, p3[1]);
 
@@ -672,13 +655,13 @@ fn decoder_p3_trace_two_batches() {
     // --- first batch ----------------------------------------------------------------------------
     // make sure entries for 7 groups were inserted at clock cycle 1
     let b0_values = [
-        OpGroupTableRow::new(ONE, Felt::new(11), iv[0]).to_value(&trace.main_trace, &alphas),
-        OpGroupTableRow::new(ONE, Felt::new(10), iv[1]).to_value(&trace.main_trace, &alphas),
-        OpGroupTableRow::new(ONE, Felt::new(9), iv[2]).to_value(&trace.main_trace, &alphas),
-        OpGroupTableRow::new(ONE, Felt::new(8), iv[3]).to_value(&trace.main_trace, &alphas),
-        OpGroupTableRow::new(ONE, Felt::new(7), iv[4]).to_value(&trace.main_trace, &alphas),
-        OpGroupTableRow::new(ONE, Felt::new(6), iv[5]).to_value(&trace.main_trace, &alphas),
-        OpGroupTableRow::new(ONE, Felt::new(5), iv[6]).to_value(&trace.main_trace, &alphas),
+        OpGroupTableRow::new(ONE, Felt::new(11), iv[0]).to_value(&alphas),
+        OpGroupTableRow::new(ONE, Felt::new(10), iv[1]).to_value(&alphas),
+        OpGroupTableRow::new(ONE, Felt::new(9), iv[2]).to_value(&alphas),
+        OpGroupTableRow::new(ONE, Felt::new(8), iv[3]).to_value(&alphas),
+        OpGroupTableRow::new(ONE, Felt::new(7), iv[4]).to_value(&alphas),
+        OpGroupTableRow::new(ONE, Felt::new(6), iv[5]).to_value(&alphas),
+        OpGroupTableRow::new(ONE, Felt::new(5), iv[6]).to_value(&alphas),
     ];
     let mut expected_value: Felt = b0_values.iter().fold(ONE, |acc, &val| acc * val);
     assert_eq!(expected_value, p3[1]);
@@ -701,9 +684,9 @@ fn decoder_p3_trace_two_batches() {
     let batch1_addr = ONE + Felt::new(8);
     let op_group3 = build_op_group(&[Operation::Drop; 2]);
     let b1_values = [
-        OpGroupTableRow::new(batch1_addr, Felt::new(3), iv[7]).to_value(&trace.main_trace, &alphas),
-        OpGroupTableRow::new(batch1_addr, Felt::new(2), iv[8]).to_value(&trace.main_trace, &alphas),
-        OpGroupTableRow::new(batch1_addr, ONE, op_group3).to_value(&trace.main_trace, &alphas),
+        OpGroupTableRow::new(batch1_addr, Felt::new(3), iv[7]).to_value(&alphas),
+        OpGroupTableRow::new(batch1_addr, Felt::new(2), iv[8]).to_value(&alphas),
+        OpGroupTableRow::new(batch1_addr, ONE, op_group3).to_value(&alphas),
     ];
     let mut expected_value: Felt = b1_values.iter().fold(ONE, |acc, &val| acc * val);
     assert_eq!(expected_value, p3[10]);
@@ -730,3 +713,133 @@ fn decoder_p3_trace_two_batches() {
         assert_eq!(ONE, p3[i]);
     }
 }
+
+// HELPER STRUCTS AND METHODS
+// ================================================================================================
+
+/// Describes a single entry in the block stack table.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct BlockStackTableRow {
+    block_id: Felt,
+    parent_id: Felt,
+    is_loop: bool,
+    parent_ctx: ContextId,
+    parent_fn_hash: Word,
+    parent_fmp: Felt,
+    parent_stack_depth: u32,
+    parent_next_overflow_addr: Felt,
+}
+
+impl BlockStackTableRow {
+    /// Returns a new [BlockStackTableRow] instantiated with the specified parameters. This is
+    /// used for test purpose only.
+    #[cfg(test)]
+    pub fn new(block_id: Felt, parent_id: Felt, is_loop: bool) -> Self {
+        Self {
+            block_id,
+            parent_id,
+            is_loop,
+            parent_ctx: ContextId::root(),
+            parent_fn_hash: vm_core::EMPTY_WORD,
+            parent_fmp: ZERO,
+            parent_stack_depth: 0,
+            parent_next_overflow_addr: ZERO,
+        }
+    }
+}
+
+impl BlockStackTableRow {
+    /// Reduces this row to a single field element in the field specified by E. This requires
+    /// at least 12 alpha values.
+    pub fn to_value<E: FieldElement<BaseField = Felt>>(&self, alphas: &[E]) -> E {
+        let is_loop = if self.is_loop { ONE } else { ZERO };
+        alphas[0]
+            + alphas[1].mul_base(self.block_id)
+            + alphas[2].mul_base(self.parent_id)
+            + alphas[3].mul_base(is_loop)
+            + alphas[4].mul_base(Felt::from(self.parent_ctx))
+            + alphas[5].mul_base(self.parent_fmp)
+            + alphas[6].mul_base(Felt::from(self.parent_stack_depth))
+            + alphas[7].mul_base(self.parent_next_overflow_addr)
+            + alphas[8].mul_base(self.parent_fn_hash[0])
+            + alphas[9].mul_base(self.parent_fn_hash[1])
+            + alphas[10].mul_base(self.parent_fn_hash[2])
+            + alphas[11].mul_base(self.parent_fn_hash[3])
+    }
+}
+
+/// Describes a single entry in the block hash table. An entry in the block hash table is a tuple
+/// (parent_id, block_hash, is_first_child, is_loop_body).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct BlockHashTableRow {
+    parent_id: Felt,
+    block_hash: Word,
+    is_first_child: bool,
+    is_loop_body: bool,
+}
+
+impl BlockHashTableRow {
+    /// Returns a new [BlockHashTableRow] instantiated with the specified parameters. This is
+    /// used for test purpose only.
+    pub fn new_test(
+        parent_id: Felt,
+        block_hash: Word,
+        is_first_child: bool,
+        is_loop_body: bool,
+    ) -> Self {
+        Self {
+            parent_id,
+            block_hash,
+            is_first_child,
+            is_loop_body,
+        }
+    }
+}
+
+impl BlockHashTableRow {
+    /// Reduces this row to a single field element in the field specified by E. This requires
+    /// at least 8 alpha values.
+    pub fn to_value<E: FieldElement<BaseField = Felt>>(&self, alphas: &[E]) -> E {
+        let is_first_child = if self.is_first_child { ONE } else { ZERO };
+        let is_loop_body = if self.is_loop_body { ONE } else { ZERO };
+        alphas[0]
+            + alphas[1].mul_base(self.parent_id)
+            + alphas[2].mul_base(self.block_hash[0])
+            + alphas[3].mul_base(self.block_hash[1])
+            + alphas[4].mul_base(self.block_hash[2])
+            + alphas[5].mul_base(self.block_hash[3])
+            + alphas[6].mul_base(is_first_child)
+            + alphas[7].mul_base(is_loop_body)
+    }
+}
+
+/// Describes a single entry in the op group table. An entry in the op group table is a tuple
+/// (batch_id, group_pos, group_value).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct OpGroupTableRow {
+    batch_id: Felt,
+    group_pos: Felt,
+    group_value: Felt,
+}
+
+impl OpGroupTableRow {
+    /// Returns a new [OpGroupTableRow] instantiated with the specified parameters.
+    pub fn new(batch_id: Felt, group_pos: Felt, group_value: Felt) -> Self {
+        Self {
+            batch_id,
+            group_pos,
+            group_value,
+        }
+    }
+}
+
+impl OpGroupTableRow {
+    /// Reduces this row to a single field element in the field specified by E. This requires
+    /// at least 4 alpha values.
+    pub fn to_value<E: FieldElement<BaseField = Felt>>(&self, alphas: &[E]) -> E {
+        alphas[0]
+            + alphas[1].mul_base(self.batch_id)
+            + alphas[2].mul_base(self.group_pos)
+            + alphas[3].mul_base(self.group_value)
+    }
+}
diff --git a/processor/src/trace/tests/hasher.rs b/processor/src/trace/tests/hasher.rs
index bcb54b7451..af84f1c885 100644
--- a/processor/src/trace/tests/hasher.rs
+++ b/processor/src/trace/tests/hasher.rs
@@ -1,13 +1,15 @@
 use super::{
     super::{Trace, NUM_RAND_ROWS},
-    build_trace_from_ops_with_inputs, rand_array, AdviceInputs, Felt, LookupTableRow, Operation,
-    Vec, Word, ONE, ZERO,
+    build_trace_from_ops_with_inputs, rand_array, AdviceInputs, Felt, Operation, Word, ONE, ZERO,
+};
+
+use crate::{utils::collections::*, StackInputs};
+use miden_air::trace::{
+    chiplets::hasher::P1_COL_IDX, main_trace::MainTrace, AUX_TRACE_RAND_ELEMENTS,
 };
-use crate::{chiplets::ChipletsVTableRow, StackInputs};
-use miden_air::trace::{chiplets::hasher::P1_COL_IDX, AUX_TRACE_RAND_ELEMENTS};
 use vm_core::{
     crypto::merkle::{MerkleStore, MerkleTree, NodeIndex},
-    FieldElement, StarkField,
+    FieldElement,
 };
 
 // SIBLING TABLE TESTS
@@ -72,11 +74,10 @@ fn hasher_p1_mr_update() {
     let p1 = aux_columns.get_column(P1_COL_IDX);
 
     let row_values = [
-        ChipletsVTableRow::new_sibling(Felt::new(index), path[0].into())
-            .to_value(&trace.main_trace, &alphas),
-        ChipletsVTableRow::new_sibling(Felt::new(index >> 1), path[1].into())
+        SiblingTableRow::new(Felt::new(index), path[0].into()).to_value(&trace.main_trace, &alphas),
+        SiblingTableRow::new(Felt::new(index >> 1), path[1].into())
             .to_value(&trace.main_trace, &alphas),
-        ChipletsVTableRow::new_sibling(Felt::new(index >> 2), path[2].into())
+        SiblingTableRow::new(Felt::new(index >> 2), path[2].into())
             .to_value(&trace.main_trace, &alphas),
     ];
 
@@ -148,7 +149,7 @@ fn hasher_p1_mr_update() {
     }
 }
 
-// HELPER FUNCTIONS
+// HELPER STRUCTS, METHODS AND FUNCTIONS
 // ================================================================================================
 
 fn build_merkle_tree() -> (MerkleTree, Vec<Word>) {
@@ -168,3 +169,48 @@ fn init_leaf(value: u64) -> Word {
 fn append_word(target: &mut Vec<u64>, word: Word) {
     word.iter().rev().for_each(|v| target.push(v.as_int()));
 }
+
+/// Describes a single entry in the sibling table which consists of a tuple `(index, node)` where
+/// index is the index of the node at its depth. For example, assume a leaf has index n. For the
+/// leaf's parent the index will be n << 1. For the parent of the parent, the index will be
+/// n << 2 etc.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct SiblingTableRow {
+    index: Felt,
+    sibling: Word,
+}
+
+impl SiblingTableRow {
+    pub fn new(index: Felt, sibling: Word) -> Self {
+        Self { index, sibling }
+    }
+
+    /// Reduces this row to a single field element in the field specified by E. This requires
+    /// at least 6 alpha values.
+    pub fn to_value<E: FieldElement<BaseField = Felt>>(
+        &self,
+        _main_trace: &MainTrace,
+        alphas: &[E],
+    ) -> E {
+        // when the least significant bit of the index is 0, the sibling will be in the 3rd word
+        // of the hasher state, and when the least significant bit is 1, it will be in the 2nd
+        // word. we compute the value in this way to make constraint evaluation a bit easier since
+        // we need to compute the 2nd and the 3rd word values for other purposes as well.
+        let lsb = self.index.as_int() & 1;
+        if lsb == 0 {
+            alphas[0]
+                + alphas[3].mul_base(self.index)
+                + alphas[12].mul_base(self.sibling[0])
+                + alphas[13].mul_base(self.sibling[1])
+                + alphas[14].mul_base(self.sibling[2])
+                + alphas[15].mul_base(self.sibling[3])
+        } else {
+            alphas[0]
+                + alphas[3].mul_base(self.index)
+                + alphas[8].mul_base(self.sibling[0])
+                + alphas[9].mul_base(self.sibling[1])
+                + alphas[10].mul_base(self.sibling[2])
+                + alphas[11].mul_base(self.sibling[3])
+        }
+    }
+}
diff --git a/processor/src/trace/tests/mod.rs b/processor/src/trace/tests/mod.rs
index 8aeac004a9..70af6682ee 100644
--- a/processor/src/trace/tests/mod.rs
+++ b/processor/src/trace/tests/mod.rs
@@ -1,14 +1,18 @@
 use super::{
-    super::chiplets::init_state_from_words, ExecutionTrace, Felt, FieldElement, LookupTableRow,
-    Process, Trace, Vec, NUM_RAND_ROWS,
+    super::chiplets::init_state_from_words, ExecutionTrace, Felt, FieldElement, Process, Trace,
+    NUM_RAND_ROWS,
+};
+use crate::{
+    utils::collections::*, AdviceInputs, DefaultHost, ExecutionOptions, MemAdviceProvider,
+    StackInputs,
 };
-use crate::{AdviceInputs, DefaultHost, ExecutionOptions, MemAdviceProvider, StackInputs};
 use test_utils::rand::rand_array;
 use vm_core::{
     code_blocks::CodeBlock, CodeBlockTable, Kernel, Operation, StackOutputs, Word, ONE, ZERO,
 };
 
 mod chiplets;
+mod decoder;
 mod hasher;
 mod range;
 mod stack;
diff --git a/processor/src/trace/tests/stack.rs b/processor/src/trace/tests/stack.rs
index 09c22cb09a..ec32f2e3b1 100644
--- a/processor/src/trace/tests/stack.rs
+++ b/processor/src/trace/tests/stack.rs
@@ -1,8 +1,8 @@
 use super::{
-    build_trace_from_ops, rand_array, Felt, FieldElement, LookupTableRow, Operation, Trace, Vec,
-    NUM_RAND_ROWS, ONE, ZERO,
+    build_trace_from_ops, rand_array, Felt, FieldElement, Operation, Trace, NUM_RAND_ROWS, ONE,
+    ZERO,
 };
-use crate::stack::OverflowTableRow;
+use crate::{stack::OverflowTableRow, utils::collections::*};
 use miden_air::trace::{AUX_TRACE_RAND_ELEMENTS, STACK_AUX_TRACE_OFFSET};
 
 // CONSTANTS
@@ -37,10 +37,10 @@ fn p1_trace() {
     let p1 = aux_columns.get_column(P1_COL_IDX);
 
     let row_values = [
-        OverflowTableRow::new(2, ONE, ZERO).to_value(&trace.main_trace, &alphas),
-        OverflowTableRow::new(3, TWO, TWO).to_value(&trace.main_trace, &alphas),
-        OverflowTableRow::new(6, TWO, TWO).to_value(&trace.main_trace, &alphas),
-        OverflowTableRow::new(10, ZERO, ZERO).to_value(&trace.main_trace, &alphas),
+        OverflowTableRow::new(Felt::new(2), ONE, ZERO).to_value(&alphas),
+        OverflowTableRow::new(Felt::new(3), TWO, TWO).to_value(&alphas),
+        OverflowTableRow::new(Felt::new(6), TWO, TWO).to_value(&alphas),
+        OverflowTableRow::new(Felt::new(10), ZERO, ZERO).to_value(&alphas),
     ];
 
     // make sure the first entry is ONE
diff --git a/processor/src/trace/utils.rs b/processor/src/trace/utils.rs
index 705fa314ba..848e63f8a5 100644
--- a/processor/src/trace/utils.rs
+++ b/processor/src/trace/utils.rs
@@ -1,7 +1,13 @@
-use super::{ColMatrix, Felt, FieldElement, Vec, NUM_RAND_ROWS};
-use crate::chiplets::Chiplets;
+use super::{Felt, FieldElement, NUM_RAND_ROWS};
+use crate::{
+    chiplets::Chiplets,
+    utils::{collections::*, uninit_vector},
+};
 use core::slice;
-use vm_core::utils::uninit_vector;
+use miden_air::trace::main_trace::MainTrace;
+
+#[cfg(test)]
+use vm_core::{utils::ToElements, Operation};
 
 // TRACE FRAGMENT
 // ================================================================================================
@@ -68,176 +74,6 @@ impl<'a> TraceFragment<'a> {
     }
 }
 
-// LOOKUP TABLES
-// ================================================================================================
-
-/// Defines a single row in a lookup table defined via multiset checks.
-pub trait LookupTableRow {
-    /// Returns a single element representing the row in the field defined by E. The value is
-    /// computed using the provided random values.
-    fn to_value<E: FieldElement<BaseField = Felt>>(
-        &self,
-        main_trace: &ColMatrix<Felt>,
-        rand_values: &[E],
-    ) -> E;
-}
-
-/// Computes values as well as inverse value for all specified lookup table rows.
-///
-/// To compute the inverses of row values we use a modified version of batch inversion algorithm.
-/// The main modification is that we don't need to check for ZERO values, because, assuming
-/// random values are drawn from a large enough field, coming across a ZERO value should be
-/// computationally infeasible.
-pub fn build_lookup_table_row_values<E: FieldElement<BaseField = Felt>, R: LookupTableRow>(
-    rows: &[R],
-    main_trace: &ColMatrix<Felt>,
-    rand_values: &[E],
-) -> (Vec<E>, Vec<E>) {
-    let mut row_values = unsafe { uninit_vector(rows.len()) };
-    let mut inv_row_values = unsafe { uninit_vector(rows.len()) };
-
-    // compute row values and compute their product
-    let mut acc = E::ONE;
-    for ((row, value), inv_value) in
-        rows.iter().zip(row_values.iter_mut()).zip(inv_row_values.iter_mut())
-    {
-        *inv_value = acc;
-        *value = row.to_value(main_trace, rand_values);
-        debug_assert_ne!(*value, E::ZERO, "row value cannot be ZERO");
-
-        acc *= *value;
-    }
-
-    // invert the accumulated product
-    acc = acc.inv();
-
-    // multiply the accumulated value by original values to compute inverses
-    for i in (0..row_values.len()).rev() {
-        inv_row_values[i] *= acc;
-        acc *= row_values[i];
-    }
-
-    (row_values, inv_row_values)
-}
-
-// AUX COLUMN BUILDER
-// ================================================================================================
-
-/// Defines a builder responsible for building a single column in an auxiliary segment of the
-/// execution trace.
-pub trait AuxColumnBuilder<H: Clone, R: LookupTableRow, U: HintCycle> {
-    // REQUIRED METHODS
-    // --------------------------------------------------------------------------------------------
-
-    /// Returns an exhaustive list of rows which are present in the table.
-    fn get_table_rows(&self) -> &[R];
-
-    /// Returns a sequence of hints which indicate how the table was updated. Each hint consists
-    /// of a clock cycle at which the update happened as well as the hint describing the update.
-    fn get_table_hints(&self) -> &[(U, H)];
-
-    /// Returns a value by which the current value of the column should be multiplied to get the
-    /// next value. It is expected that this value should never be ZERO in practice.
-    fn get_multiplicand<E: FieldElement<BaseField = Felt>>(
-        &self,
-        hint: H,
-        row_values: &[E],
-        inv_row_values: &[E],
-    ) -> E;
-
-    // PROVIDED METHODS
-    // --------------------------------------------------------------------------------------------
-
-    /// Builds and returns the auxiliary trace column managed by this builder.
-    fn build_aux_column<E>(&self, main_trace: &ColMatrix<Felt>, alphas: &[E]) -> Vec<E>
-    where
-        E: FieldElement<BaseField = Felt>,
-    {
-        // compute row values and their inverses for all rows that were added to the table
-        let (row_values, inv_row_values) = self.build_row_values(main_trace, alphas);
-
-        // allocate memory for the running product column and set its initial value
-        let mut result = unsafe { uninit_vector(main_trace.num_rows()) };
-        result[0] = self.init_column_value(&row_values);
-
-        // keep track of the last updated row in the running product column
-        let mut result_idx = 0_usize;
-
-        // iterate through the list of updates and apply them one by one
-        for (clk, hint) in self.get_table_hints() {
-            let clk = clk.as_index();
-
-            // if we skipped some cycles since the last update was processed, values in the last
-            // updated row should by copied over until the current cycle.
-            if result_idx < clk {
-                let last_value = result[result_idx];
-                result[(result_idx + 1)..=clk].fill(last_value);
-            }
-
-            // move the result pointer to the next row
-            result_idx = clk + 1;
-
-            // apply the relevant updates to the column; since the multiplicand value should be
-            // generated by "mixing-in" random values from a large field, the probability that we
-            // get a ZERO should be negligible (i.e., it should never come up in practice).
-            let multiplicand = self.get_multiplicand(hint.clone(), &row_values, &inv_row_values);
-            debug_assert_ne!(E::ZERO, multiplicand);
-            result[result_idx] = result[clk] * multiplicand;
-        }
-
-        // after all updates have been processed, the table should not change; we make sure that
-        // the last value in the column is equal to the expected value, and fill in all the
-        // remaining column values with the last value
-        let last_value = result[result_idx];
-        assert_eq!(last_value, self.final_column_value(&row_values));
-        if result_idx < result.len() - 1 {
-            result[(result_idx + 1)..].fill(last_value);
-        }
-
-        result
-    }
-
-    /// Builds and returns row values and their inverses for all rows which were added to the
-    /// lookup table managed by this column builder.
-    fn build_row_values<E>(&self, main_trace: &ColMatrix<Felt>, alphas: &[E]) -> (Vec<E>, Vec<E>)
-    where
-        E: FieldElement<BaseField = Felt>,
-    {
-        build_lookup_table_row_values(self.get_table_rows(), main_trace, alphas)
-    }
-
-    /// Returns the initial value in the auxiliary column. Default implementation of this method
-    /// returns ONE.
-    fn init_column_value<E: FieldElement<BaseField = Felt>>(&self, _row_values: &[E]) -> E {
-        E::ONE
-    }
-
-    /// Returns the final value in the auxiliary column. Default implementation of this method
-    /// returns ONE.
-    fn final_column_value<E: FieldElement<BaseField = Felt>>(&self, _row_values: &[E]) -> E {
-        E::ONE
-    }
-}
-
-/// Defines a simple trait to recognize the possible types of clock cycles associated with auxiliary
-/// column update hints.
-pub trait HintCycle {
-    /// Returns the cycle as a `usize` for indexing.
-    fn as_index(&self) -> usize;
-}
-
-impl HintCycle for u32 {
-    fn as_index(&self) -> usize {
-        *self as usize
-    }
-}
-
-impl HintCycle for u64 {
-    fn as_index(&self) -> usize {
-        *self as usize
-    }
-}
-
 // TRACE LENGTH SUMMARY
 // ================================================================================================
 
@@ -267,12 +103,12 @@ impl TraceLenSummary {
         }
     }
 
-    /// Returns length of the main trace
+    /// Returns length of the main trace.
     pub fn main_trace_len(&self) -> usize {
         self.main_trace_len
     }
 
-    /// Returns length of the range table
+    /// Returns length of the range checker trace.
     pub fn range_trace_len(&self) -> usize {
         self.range_trace_len
     }
@@ -293,6 +129,12 @@ impl TraceLenSummary {
     pub fn padded_trace_len(&self) -> usize {
         (self.trace_len() + NUM_RAND_ROWS).next_power_of_two()
     }
+
+    /// Returns the percent (0 - 100) of the steps that were added to the trace to pad it to the
+    /// next power of tow.
+    pub fn padding_percentage(&self) -> usize {
+        (self.padded_trace_len() - self.trace_len()) * 100 / self.padded_trace_len()
+    }
 }
 
 /// Contains trace lengths of all chilplets: hash, bitwise, memory and kernel ROM trace
@@ -361,10 +203,59 @@ impl ChipletsLengths {
     }
 }
 
+// AUXILIARY COLUMN BUILDER
+// ================================================================================================
+
+/// Defines a builder responsible for building a single column in an auxiliary segment of the
+/// execution trace.
+pub trait AuxColumnBuilder<E: FieldElement<BaseField = Felt>> {
+    // REQUIRED METHODS
+    // --------------------------------------------------------------------------------------------
+
+    fn get_requests_at(&self, main_trace: &MainTrace, alphas: &[E], row_idx: usize) -> E;
+
+    fn get_responses_at(&self, main_trace: &MainTrace, alphas: &[E], row_idx: usize) -> E;
+
+    // PROVIDED METHODS
+    // --------------------------------------------------------------------------------------------
+
+    fn init_requests(&self, _main_trace: &MainTrace, _alphas: &[E]) -> E {
+        E::ONE
+    }
+
+    fn init_responses(&self, _main_trace: &MainTrace, _alphas: &[E]) -> E {
+        E::ONE
+    }
+
+    /// Builds the chiplets bus auxiliary trace column.
+    fn build_aux_column(&self, main_trace: &MainTrace, alphas: &[E]) -> Vec<E> {
+        let mut responses_prod: Vec<E> = unsafe { uninit_vector(main_trace.num_rows()) };
+        let mut requests: Vec<E> = unsafe { uninit_vector(main_trace.num_rows()) };
+
+        responses_prod[0] = self.init_responses(main_trace, alphas);
+        requests[0] = self.init_requests(main_trace, alphas);
+
+        let mut requests_running_prod = E::ONE;
+        for row_idx in 0..main_trace.num_rows() - 1 {
+            responses_prod[row_idx + 1] =
+                responses_prod[row_idx] * self.get_responses_at(main_trace, alphas, row_idx);
+            requests[row_idx + 1] = self.get_requests_at(main_trace, alphas, row_idx);
+            requests_running_prod *= requests[row_idx + 1];
+        }
+
+        let mut requests_running_divisor = requests_running_prod.inv();
+        let mut result_aux_column = responses_prod;
+        for i in (0..main_trace.num_rows()).rev() {
+            result_aux_column[i] *= requests_running_divisor;
+            requests_running_divisor *= requests[i];
+        }
+        result_aux_column
+    }
+}
+
 // TEST HELPERS
 // ================================================================================================
-#[cfg(test)]
-use vm_core::{utils::ToElements, Operation};
+
 #[cfg(test)]
 pub fn build_span_with_respan_ops() -> (Vec<Operation>, Vec<Felt>) {
     let iv = [1, 3, 5, 7, 9, 11, 13, 15, 17].to_elements();
diff --git a/processor/src/utils.rs b/processor/src/utils.rs
index 0a0e42c871..7c316a1699 100644
--- a/processor/src/utils.rs
+++ b/processor/src/utils.rs
@@ -1,4 +1,5 @@
-use super::{Felt, StarkField, Vec};
+use super::Felt;
+use collections::*;
 
 // RE-EXPORTS
 // ================================================================================================
diff --git a/prover/Cargo.toml b/prover/Cargo.toml
index 32ddd66462..be3c7c1802 100644
--- a/prover/Cargo.toml
+++ b/prover/Cargo.toml
@@ -1,11 +1,12 @@
 [package]
 name = "miden-prover"
-version = "0.7.0"
+version = "0.8.0"
 description = "Miden VM prover"
 authors = ["miden contributors"]
 readme = "README.md"
 license = "MIT"
 repository = "https://github.com/0xPolygonMiden/miden-vm"
+documentation = "https://docs.rs/miden-prover/0.8.0"
 categories = ["cryptography", "emulators", "no-std"]
 keywords = ["miden", "prover", "stark", "zkp"]
 edition = "2021"
@@ -15,16 +16,15 @@ rust-version = "1.73"
 concurrent = ["processor/concurrent", "std", "winter-prover/concurrent"]
 default = ["std"]
 metal = ["dep:ministark-gpu", "dep:elsa", "dep:pollster", "concurrent", "std"]
-std = ["air/std", "processor/std", "log/std", "winter-prover/std"]
-sve = ["processor/sve", "std"]
+std = ["air/std", "processor/std", "winter-prover/std"]
 
 [dependencies]
-air = { package = "miden-air", path = "../air", version = "0.7", default-features = false }
-log = { version = "0.4", default-features = false, optional = true }
-processor = { package = "miden-processor", path = "../processor", version = "0.7", default-features = false }
-winter-prover = { package = "winter-prover", version = "0.6", default-features = false }
+air = { package = "miden-air", path = "../air", version = "0.8", default-features = false }
+processor = { package = "miden-processor", path = "../processor", version = "0.8", default-features = false }
+tracing = { version = "0.1", default-features = false, features = ["attributes"] }
+winter-prover = { package = "winter-prover", version = "0.8", default-features = false }
 
 [target.'cfg(all(target_arch = "aarch64", target_os = "macos"))'.dependencies]
 elsa = { version = "1.9", optional = true }
-ministark-gpu = { version = "0.1", features = [ "winterfell" ], optional = true }
+ministark-gpu = { version = "0.3", features = [ "winterfell" ], optional = true }
 pollster = { version = "0.3", optional = true }
diff --git a/prover/README.md b/prover/README.md
index f707bb5f63..87e84bac44 100644
--- a/prover/README.md
+++ b/prover/README.md
@@ -44,7 +44,6 @@ Miden prover can be compiled with the following features:
 
 * `std` - enabled by default and relies on the Rust standard library.
 * `concurrent` - implies `std` and also enables multi-threaded proof generation.
-* `sve` - enables [SVE](https://en.wikipedia.org/wiki/AArch64#Scalable_Vector_Extension_(SVE))-based acceleration of the RPO hash function on supported platforms (e.g., Graviton 3).
 * `metal` - enables [Metal](https://en.wikipedia.org/wiki/Metal_(API))-based acceleration of proof generation (for recursive proofs) on supported platforms (e.g., Apple silicon).
 * `no_std` does not rely on the Rust standard library and enables compilation to WebAssembly.
 
diff --git a/prover/src/gpu.rs b/prover/src/gpu.rs
index 3f61c71475..af04c4cee3 100644
--- a/prover/src/gpu.rs
+++ b/prover/src/gpu.rs
@@ -1,42 +1,55 @@
 //! This module contains GPU acceleration logic for Apple Silicon devices. For now the
 //! logic is limited to GPU accelerating RPO 256 trace commitments.
-use crate::{ExecutionProver, WinterProofOptions};
-use air::{FieldElement, PublicInputs};
+
+use super::{
+    crypto::{RandomCoin, Rpo256, RpoDigest},
+    event,
+    math::fft,
+    ExecutionProver, ExecutionTrace, Felt, FieldElement, Level, ProcessorAir, PublicInputs,
+    WinterProofOptions,
+};
 use elsa::FrozenVec;
-use log::debug;
 use ministark_gpu::{
     plan::{gen_rpo_merkle_tree, GpuRpo256RowMajor},
     utils::page_aligned_uninit_vector,
 };
 use pollster::block_on;
-use processor::{
-    crypto::{RandomCoin, Rpo256, RpoDigest},
-    math::{fft, Felt},
-    ExecutionTrace, ONE,
-};
+use processor::ONE;
 use std::time::Instant;
 use winter_prover::{
     crypto::MerkleTree,
-    matrix::{build_segments, get_evaluation_offsets, Segment},
-    ColMatrix, CompositionPoly, ConstraintCommitment, Prover, RowMatrix, StarkDomain,
+    matrix::{build_segments, get_evaluation_offsets, ColMatrix, RowMatrix, Segment},
+    proof::Queries,
+    AuxTraceRandElements, CompositionPoly, CompositionPolyTrace, ConstraintCommitment,
+    ConstraintCompositionCoefficients, DefaultConstraintEvaluator, EvaluationFrame, Prover,
+    StarkDomain, TraceInfo, TraceLayout, TraceLde, TracePolyTable,
 };
 
+// CONSTANTS
+// ================================================================================================
+
 const RPO_RATE: usize = Rpo256::RATE_RANGE.end - Rpo256::RATE_RANGE.start;
 
+// METAL RPO PROVER
+// ================================================================================================
+
 /// Wraps an [ExecutionProver] and provides GPU acceleration for building Rpo256 trace commitments.
-pub(crate) struct GpuRpoExecutionProver<R>(pub ExecutionProver<Rpo256, R>)
+pub(crate) struct MetalRpoExecutionProver<R>(pub ExecutionProver<Rpo256, R>)
 where
     R: RandomCoin<BaseField = Felt, Hasher = Rpo256>;
 
-impl<R> Prover for GpuRpoExecutionProver<R>
+impl<R> Prover for MetalRpoExecutionProver<R>
 where
     R: RandomCoin<BaseField = Felt, Hasher = Rpo256>,
 {
-    type Air = <ExecutionProver<Rpo256, R> as Prover>::Air;
     type BaseField = Felt;
-    type Trace = <ExecutionProver<Rpo256, R> as Prover>::Trace;
+    type Air = ProcessorAir;
+    type Trace = ExecutionTrace;
     type HashFn = Rpo256;
     type RandomCoin = R;
+    type TraceLde<E: FieldElement<BaseField = Felt>> = MetalRpoTraceLde<E>;
+    type ConstraintEvaluator<'a, E: FieldElement<BaseField = Felt>> =
+        DefaultConstraintEvaluator<'a, ProcessorAir, E>;
 
     fn options(&self) -> &WinterProofOptions {
         self.0.options()
@@ -46,93 +59,22 @@ where
         self.0.get_pub_inputs(trace)
     }
 
-    /// Computes a low-degree extension (LDE) of the provided execution trace over the specified
-    /// domain and builds a commitment to the extended trace.
-    ///
-    /// The extension is performed by interpolating each column of the execution trace into a
-    /// polynomial of degree = trace_length - 1, and then evaluating the polynomial over the LDE
-    /// domain.
-    ///
-    /// Trace commitment is computed by hashing each row of the extended execution trace, and then
-    /// building a Merkle tree from the resulting hashes.
-    ///
-    /// Interpolations and evaluations are computed on the CPU while hashes are simultaneously
-    /// computed on the GPU:
-    ///
-    /// ```text
-    ///        ──────────────────────────────────────────────────────
-    ///               ┌───┐   ┌────┐   ┌───┐   ┌────┐   ┌───┐
-    ///  CPU:   ... ──┤fft├─┬─┤ifft├───┤fft├─┬─┤ifft├───┤fft├─┬─ ...
-    ///               └───┘ │ └────┘   └───┘ │ └────┘   └───┘ │
-    ///        ╴╴╴╴╴╴╴╴╴╴╴╴╴┼╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴┼╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴┼╴╴╴╴╴╴
-    ///                     │ ┌──────────┐   │ ┌──────────┐   │
-    ///  GPU:               └─┤   hash   │   └─┤   hash   │   └─ ...
-    ///                       └──────────┘     └──────────┘
-    ///        ────┼────────┼────────┼────────┼────────┼────────┼────
-    ///           t=n     t=n+1    t=n+2     t=n+3   t=n+4    t=n+5
-    /// ```
-    fn build_trace_commitment<E>(
+    fn new_trace_lde<E: FieldElement<BaseField = Felt>>(
         &self,
-        trace: &ColMatrix<E>,
+        trace_info: &TraceInfo,
+        main_trace: &ColMatrix<Felt>,
         domain: &StarkDomain<Felt>,
-    ) -> (RowMatrix<E>, MerkleTree<Self::HashFn>, ColMatrix<E>)
-    where
-        E: air::FieldElement<BaseField = Felt>,
-    {
-        // interpolate the execution trace
-        let now = Instant::now();
-        let inv_twiddles = fft::get_inv_twiddles::<E::BaseField>(trace.num_rows());
-        let trace_polys = trace.columns().map(|col| {
-            let mut poly = col.to_vec();
-            fft::interpolate_poly(&mut poly, &inv_twiddles);
-            poly
-        });
-
-        // extend the execution trace and generate hashes on the gpu
-        let lde_segments = FrozenVec::new();
-        let lde_domain_size = domain.lde_domain_size();
-        let num_base_columns = trace.num_base_cols();
-        let rpo_requires_padding = num_base_columns % RPO_RATE != 0;
-        let rpo_padded_segment_idx = rpo_requires_padding.then_some(num_base_columns / RPO_RATE);
-        let mut row_hasher = GpuRpo256RowMajor::<Felt>::new(lde_domain_size, rpo_requires_padding);
-        let mut rpo_padded_segment: Vec<[Felt; RPO_RATE]>;
-        let mut lde_segment_generator = SegmentGenerator::new(trace_polys, domain);
-        let mut lde_segment_iter = lde_segment_generator.gen_segment_iter().enumerate();
-        for (segment_idx, segment) in &mut lde_segment_iter {
-            let segment = lde_segments.push_get(Box::new(segment));
-            // check if the segment requires padding
-            if rpo_padded_segment_idx.map_or(false, |pad_idx| pad_idx == segment_idx) {
-                // duplicate and modify the last segment with Rpo256's padding
-                // rule ("1" followed by "0"s). Our segments are already
-                // padded with "0"s we only need to add the "1"s.
-                let rpo_pad_column = num_base_columns % RPO_RATE;
-                rpo_padded_segment = unsafe { page_aligned_uninit_vector(lde_domain_size) };
-                rpo_padded_segment.copy_from_slice(segment);
-                rpo_padded_segment.iter_mut().for_each(|row| row[rpo_pad_column] = ONE);
-                row_hasher.update(&rpo_padded_segment);
-                assert!(lde_segment_iter.next().is_none(), "padded segment should be the last");
-                break;
-            }
-            row_hasher.update(segment);
-        }
-        let row_hashes = block_on(row_hasher.finish());
-        let tree_nodes = gen_rpo_merkle_tree(&row_hashes);
-        // aggregate segments at the same time as the GPU generates the merkle tree nodes
-        let lde_segments = lde_segments.into_vec().into_iter().map(|p| *p).collect();
-        let trace_lde = RowMatrix::from_segments(lde_segments, num_base_columns);
-        let trace_polys = lde_segment_generator.into_polys().unwrap();
-        let nodes = block_on(tree_nodes).into_iter().map(RpoDigest::new).collect();
-        let leaves = row_hashes.into_iter().map(RpoDigest::new).collect();
-        let trace_tree = MerkleTree::from_raw_parts(nodes, leaves).unwrap();
-        debug!(
-            "Extended (on CPU) and committed (on GPU) to an execution trace of {} columns from 2^{} to 2^{} steps in {} ms",
-            trace_polys.num_cols(),
-            trace_polys.num_rows().ilog2(),
-            trace_lde.num_rows().ilog2(),
-            now.elapsed().as_millis()
-        );
+    ) -> (Self::TraceLde<E>, TracePolyTable<E>) {
+        MetalRpoTraceLde::new(trace_info, main_trace, domain)
+    }
 
-        (trace_lde, trace_tree, trace_polys)
+    fn new_evaluator<'a, E: FieldElement<BaseField = Felt>>(
+        &self,
+        air: &'a ProcessorAir,
+        aux_rand_elements: AuxTraceRandElements<E>,
+        composition_coefficients: ConstraintCompositionCoefficients<E>,
+    ) -> Self::ConstraintEvaluator<'a, E> {
+        self.0.new_evaluator(air, aux_rand_elements, composition_coefficients)
     }
 
     /// Evaluates constraint composition polynomial over the LDE domain and builds a commitment
@@ -159,26 +101,24 @@ where
     ///        ────┼────────┼────────┼────────┼────────┼────────┼───
     ///           t=n     t=n+1    t=n+2     t=n+3   t=n+4    t=n+5
     /// ```
-    // TODO: consider merging build_constraint_commitment and build_trace_commitment in Winterfell
-    // * https://github.com/facebook/winterfell/pull/192
-    // * https://github.com/0xPolygonMiden/miden-vm/issues/877
-    fn build_constraint_commitment<E>(
+    fn build_constraint_commitment<E: FieldElement<BaseField = Felt>>(
         &self,
-        composition_poly: &CompositionPoly<E>,
-        domain: &StarkDomain<Self::BaseField>,
-    ) -> ConstraintCommitment<E, Self::HashFn>
-    where
-        E: FieldElement<BaseField = Self::BaseField>,
-    {
+        composition_poly_trace: CompositionPolyTrace<E>,
+        num_trace_poly_columns: usize,
+        domain: &StarkDomain<Felt>,
+    ) -> (ConstraintCommitment<E, Rpo256>, CompositionPoly<E>) {
         // evaluate composition polynomial columns over the LDE domain
         let now = Instant::now();
-        let polys = composition_poly.data();
+        let composition_poly =
+            CompositionPoly::new(composition_poly_trace, domain, num_trace_poly_columns);
         let blowup = domain.trace_to_lde_blowup();
-        let offsets = get_evaluation_offsets::<E>(polys.num_rows(), blowup, domain.offset());
+        let offsets =
+            get_evaluation_offsets::<E>(composition_poly.column_len(), blowup, domain.offset());
         let segments = build_segments(composition_poly.data(), domain.trace_twiddles(), &offsets);
-        debug!(
+        event!(
+            Level::INFO,
             "Evaluated {} composition polynomial columns over LDE domain (2^{} elements) in {} ms",
-            polys.num_cols(),
+            composition_poly.num_columns(),
             offsets.len().ilog2(),
             now.elapsed().as_millis()
         );
@@ -186,7 +126,8 @@ where
         // build constraint evaluation commitment
         let now = Instant::now();
         let lde_domain_size = domain.lde_domain_size();
-        let num_base_columns = polys.num_base_cols();
+        let num_base_columns =
+            composition_poly.num_columns() * <E as FieldElement>::EXTENSION_DEGREE;
         let rpo_requires_padding = num_base_columns % RPO_RATE != 0;
         let rpo_padded_segment_idx = rpo_requires_padding.then_some(num_base_columns / RPO_RATE);
         let mut row_hasher = GpuRpo256RowMajor::<Felt>::new(lde_domain_size, rpo_requires_padding);
@@ -215,15 +156,292 @@ where
         let leaves = row_hashes.into_iter().map(RpoDigest::new).collect();
         let commitment = MerkleTree::<Rpo256>::from_raw_parts(nodes, leaves).unwrap();
         let constraint_commitment = ConstraintCommitment::new(composed_evaluations, commitment);
-        debug!(
+        event!(
+            Level::INFO,
             "Computed constraint evaluation commitment on the GPU (Merkle tree of depth {}) in {} ms",
             constraint_commitment.tree_depth(),
             now.elapsed().as_millis()
         );
-        constraint_commitment
+        (constraint_commitment, composition_poly)
+    }
+}
+
+// TRACE LOW DEGREE EXTENSION (METAL)
+// ================================================================================================
+
+/// Contains all segments of the extended execution trace, the commitments to these segments, the
+/// LDE blowup factor, and the [TraceInfo].
+///
+/// Segments are stored in two groups:
+/// - Main segment: this is the first trace segment generated by the prover. Values in this segment
+///   will always be elements in the base field (even when an extension field is used).
+/// - Auxiliary segments: a list of 0 or more segments for traces generated after the prover
+///   commits to the first trace segment. Currently, at most 1 auxiliary segment is possible.
+pub struct MetalRpoTraceLde<E: FieldElement<BaseField = Felt>> {
+    // low-degree extension of the main segment of the trace
+    main_segment_lde: RowMatrix<Felt>,
+    // commitment to the main segment of the trace
+    main_segment_tree: MerkleTree<Rpo256>,
+    // low-degree extensions of the auxiliary segments of the trace
+    aux_segment_ldes: Vec<RowMatrix<E>>,
+    // commitment to the auxiliary segments of the trace
+    aux_segment_trees: Vec<MerkleTree<Rpo256>>,
+    blowup: usize,
+    trace_info: TraceInfo,
+}
+
+impl<E: FieldElement<BaseField = Felt>> MetalRpoTraceLde<E> {
+    /// Takes the main trace segment columns as input, interpolates them into polynomials in
+    /// coefficient form, evaluates the polynomials over the LDE domain, commits to the
+    /// polynomial evaluations, and creates a new [DefaultTraceLde] with the LDE of the main trace
+    /// segment and the commitment.
+    ///
+    /// Returns a tuple containing a [TracePolyTable] with the trace polynomials for the main trace
+    /// segment and the new [DefaultTraceLde].
+    pub fn new(
+        trace_info: &TraceInfo,
+        main_trace: &ColMatrix<Felt>,
+        domain: &StarkDomain<Felt>,
+    ) -> (Self, TracePolyTable<E>) {
+        // extend the main execution trace and build a Merkle tree from the extended trace
+        let (main_segment_lde, main_segment_tree, main_segment_polys) =
+            build_trace_commitment(main_trace, domain);
+
+        let trace_poly_table = TracePolyTable::new(main_segment_polys);
+        let trace_lde = MetalRpoTraceLde {
+            main_segment_lde,
+            main_segment_tree,
+            aux_segment_ldes: Vec::new(),
+            aux_segment_trees: Vec::new(),
+            blowup: domain.trace_to_lde_blowup(),
+            trace_info: trace_info.clone(),
+        };
+
+        (trace_lde, trace_poly_table)
+    }
+
+    // TEST HELPERS
+    // --------------------------------------------------------------------------------------------
+
+    /// Returns number of columns in the main segment of the execution trace.
+    #[cfg(test)]
+    pub fn main_segment_width(&self) -> usize {
+        self.main_segment_lde.num_cols()
+    }
+
+    /// Returns a reference to [Matrix] representing the main trace segment.
+    #[cfg(test)]
+    pub fn get_main_segment(&self) -> &RowMatrix<Felt> {
+        &self.main_segment_lde
+    }
+
+    /// Returns the entire trace for the column at the specified index.
+    #[cfg(test)]
+    pub fn get_main_segment_column(&self, col_idx: usize) -> Vec<Felt> {
+        (0..self.main_segment_lde.num_rows())
+            .map(|row_idx| self.main_segment_lde.get(col_idx, row_idx))
+            .collect()
+    }
+}
+
+impl<E: FieldElement<BaseField = Felt>> TraceLde<E> for MetalRpoTraceLde<E> {
+    type HashFn = Rpo256;
+
+    /// Returns the commitment to the low-degree extension of the main trace segment.
+    fn get_main_trace_commitment(&self) -> RpoDigest {
+        let root_hash = self.main_segment_tree.root();
+        *root_hash
+    }
+
+    /// Takes auxiliary trace segment columns as input, interpolates them into polynomials in
+    /// coefficient form, evaluates the polynomials over the LDE domain, and commits to the
+    /// polynomial evaluations.
+    ///
+    /// Returns a tuple containing the column polynomials in coefficient from and the commitment
+    /// to the polynomial evaluations over the LDE domain.
+    ///
+    /// # Panics
+    ///
+    /// This function will panic if any of the following are true:
+    /// - the number of rows in the provided `aux_trace` does not match the main trace.
+    /// - this segment would exceed the number of segments specified by the trace layout.
+    fn add_aux_segment(
+        &mut self,
+        aux_trace: &ColMatrix<E>,
+        domain: &StarkDomain<Felt>,
+    ) -> (ColMatrix<E>, RpoDigest) {
+        // extend the auxiliary trace segment and build a Merkle tree from the extended trace
+        let (aux_segment_lde, aux_segment_tree, aux_segment_polys) =
+            build_trace_commitment::<E>(aux_trace, domain);
+
+        // check errors
+        assert!(
+            self.aux_segment_ldes.len() < self.trace_info.layout().num_aux_segments(),
+            "the specified number of auxiliary segments has already been added"
+        );
+        assert_eq!(
+            self.main_segment_lde.num_rows(),
+            aux_segment_lde.num_rows(),
+            "the number of rows in the auxiliary segment must be the same as in the main segment"
+        );
+
+        // save the lde and commitment
+        self.aux_segment_ldes.push(aux_segment_lde);
+        let root_hash = *aux_segment_tree.root();
+        self.aux_segment_trees.push(aux_segment_tree);
+
+        (aux_segment_polys, root_hash)
+    }
+
+    /// Reads current and next rows from the main trace segment into the specified frame.
+    fn read_main_trace_frame_into(&self, lde_step: usize, frame: &mut EvaluationFrame<Felt>) {
+        // at the end of the trace, next state wraps around and we read the first step again
+        let next_lde_step = (lde_step + self.blowup()) % self.trace_len();
+
+        // copy main trace segment values into the frame
+        frame.current_mut().copy_from_slice(self.main_segment_lde.row(lde_step));
+        frame.next_mut().copy_from_slice(self.main_segment_lde.row(next_lde_step));
+    }
+
+    /// Reads current and next rows from the auxiliary trace segment into the specified frame.
+    ///
+    /// # Panics
+    /// This currently assumes that there is exactly one auxiliary trace segment, and will panic
+    /// otherwise.
+    fn read_aux_trace_frame_into(&self, lde_step: usize, frame: &mut EvaluationFrame<E>) {
+        // at the end of the trace, next state wraps around and we read the first step again
+        let next_lde_step = (lde_step + self.blowup()) % self.trace_len();
+
+        // copy auxiliary trace segment values into the frame
+        let segment = &self.aux_segment_ldes[0];
+        frame.current_mut().copy_from_slice(segment.row(lde_step));
+        frame.next_mut().copy_from_slice(segment.row(next_lde_step));
+    }
+
+    /// Returns trace table rows at the specified positions along with Merkle authentication paths
+    /// from the commitment root to these rows.
+    fn query(&self, positions: &[usize]) -> Vec<Queries> {
+        // build queries for the main trace segment
+        let mut result = vec![build_segment_queries(
+            &self.main_segment_lde,
+            &self.main_segment_tree,
+            positions,
+        )];
+
+        // build queries for auxiliary trace segments
+        for (i, segment_tree) in self.aux_segment_trees.iter().enumerate() {
+            let segment_lde = &self.aux_segment_ldes[i];
+            result.push(build_segment_queries(segment_lde, segment_tree, positions));
+        }
+
+        result
     }
+
+    /// Returns the number of rows in the execution trace.
+    fn trace_len(&self) -> usize {
+        self.main_segment_lde.num_rows()
+    }
+
+    /// Returns blowup factor which was used to extend original execution trace into trace LDE.
+    fn blowup(&self) -> usize {
+        self.blowup
+    }
+
+    /// Returns the trace layout of the execution trace.
+    fn trace_layout(&self) -> &TraceLayout {
+        self.trace_info.layout()
+    }
+}
+
+/// Computes a low-degree extension (LDE) of the provided execution trace over the specified
+/// domain and builds a commitment to the extended trace.
+///
+/// The extension is performed by interpolating each column of the execution trace into a
+/// polynomial of degree = trace_length - 1, and then evaluating the polynomial over the LDE
+/// domain.
+///
+/// Trace commitment is computed by hashing each row of the extended execution trace, and then
+/// building a Merkle tree from the resulting hashes.
+///
+/// Interpolations and evaluations are computed on the CPU while hashes are simultaneously
+/// computed on the GPU:
+///
+/// ```text
+///        ──────────────────────────────────────────────────────
+///               ┌───┐   ┌────┐   ┌───┐   ┌────┐   ┌───┐
+///  CPU:   ... ──┤fft├─┬─┤ifft├───┤fft├─┬─┤ifft├───┤fft├─┬─ ...
+///               └───┘ │ └────┘   └───┘ │ └────┘   └───┘ │
+///        ╴╴╴╴╴╴╴╴╴╴╴╴╴┼╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴┼╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴╴┼╴╴╴╴╴╴
+///                     │ ┌──────────┐   │ ┌──────────┐   │
+///  GPU:               └─┤   hash   │   └─┤   hash   │   └─ ...
+///                       └──────────┘     └──────────┘
+///        ────┼────────┼────────┼────────┼────────┼────────┼────
+///           t=n     t=n+1    t=n+2     t=n+3   t=n+4    t=n+5
+/// ```
+fn build_trace_commitment<E: FieldElement<BaseField = Felt>>(
+    trace: &ColMatrix<E>,
+    domain: &StarkDomain<Felt>,
+) -> (RowMatrix<E>, MerkleTree<Rpo256>, ColMatrix<E>) {
+    // interpolate the execution trace
+    let now = Instant::now();
+    let inv_twiddles = fft::get_inv_twiddles::<Felt>(trace.num_rows());
+    let trace_polys = trace.columns().map(|col| {
+        let mut poly = col.to_vec();
+        fft::interpolate_poly(&mut poly, &inv_twiddles);
+        poly
+    });
+
+    // extend the execution trace and generate hashes on the gpu
+    let lde_segments = FrozenVec::new();
+    let lde_domain_size = domain.lde_domain_size();
+    let num_base_columns = trace.num_base_cols();
+    let rpo_requires_padding = num_base_columns % RPO_RATE != 0;
+    let rpo_padded_segment_idx = rpo_requires_padding.then_some(num_base_columns / RPO_RATE);
+    let mut row_hasher = GpuRpo256RowMajor::<Felt>::new(lde_domain_size, rpo_requires_padding);
+    let mut rpo_padded_segment: Vec<[Felt; RPO_RATE]>;
+    let mut lde_segment_generator = SegmentGenerator::new(trace_polys, domain);
+    let mut lde_segment_iter = lde_segment_generator.gen_segment_iter().enumerate();
+    for (segment_idx, segment) in &mut lde_segment_iter {
+        let segment = lde_segments.push_get(Box::new(segment));
+        // check if the segment requires padding
+        if rpo_padded_segment_idx.map_or(false, |pad_idx| pad_idx == segment_idx) {
+            // duplicate and modify the last segment with Rpo256's padding
+            // rule ("1" followed by "0"s). Our segments are already
+            // padded with "0"s we only need to add the "1"s.
+            let rpo_pad_column = num_base_columns % RPO_RATE;
+            rpo_padded_segment = unsafe { page_aligned_uninit_vector(lde_domain_size) };
+            rpo_padded_segment.copy_from_slice(segment);
+            rpo_padded_segment.iter_mut().for_each(|row| row[rpo_pad_column] = ONE);
+            row_hasher.update(&rpo_padded_segment);
+            assert!(lde_segment_iter.next().is_none(), "padded segment should be the last");
+            break;
+        }
+        row_hasher.update(segment);
+    }
+    let row_hashes = block_on(row_hasher.finish());
+    let tree_nodes = gen_rpo_merkle_tree(&row_hashes);
+    // aggregate segments at the same time as the GPU generates the merkle tree nodes
+    let lde_segments = lde_segments.into_vec().into_iter().map(|p| *p).collect();
+    let trace_lde = RowMatrix::from_segments(lde_segments, num_base_columns);
+    let trace_polys = lde_segment_generator.into_polys().unwrap();
+    let nodes = block_on(tree_nodes).into_iter().map(RpoDigest::new).collect();
+    let leaves = row_hashes.into_iter().map(RpoDigest::new).collect();
+    let trace_tree = MerkleTree::from_raw_parts(nodes, leaves).unwrap();
+    event!(
+            Level::INFO,
+            "Extended (on CPU) and committed (on GPU) to an execution trace of {} columns from 2^{} to 2^{} steps in {} ms",
+            trace_polys.num_cols(),
+            trace_polys.num_rows().ilog2(),
+            trace_lde.num_rows().ilog2(),
+            now.elapsed().as_millis()
+        );
+
+    (trace_lde, trace_tree, trace_polys)
 }
 
+// SEGMENT GENERATOR
+// ================================================================================================
+
 struct SegmentGenerator<'a, E, I, const N: usize>
 where
     E: FieldElement<BaseField = Felt>,
@@ -232,8 +450,8 @@ where
     poly_iter: I::IntoIter,
     polys: Option<ColMatrix<E>>,
     poly_offset: usize,
-    offsets: Vec<E::BaseField>,
-    domain: &'a StarkDomain<E::BaseField>,
+    offsets: Vec<Felt>,
+    domain: &'a StarkDomain<Felt>,
 }
 
 impl<'a, E, I, const N: usize> SegmentGenerator<'a, E, I, N>
@@ -266,7 +484,7 @@ where
     }
 
     /// Generates the next segment if it exists otherwise returns None.
-    fn gen_next_segment(&mut self) -> Option<Segment<E::BaseField, N>> {
+    fn gen_next_segment(&mut self) -> Option<Segment<Felt, N>> {
         // initialize our col matrix
         if self.polys.is_none() {
             self.polys = Some(ColMatrix::new(vec![self.poly_iter.next()?]));
@@ -291,7 +509,7 @@ where
         let mut data = unsafe { page_aligned_uninit_vector(domain_size) };
         if polys.num_base_cols() < offset + N {
             // the segment will remain unfilled so we pad it with zeros
-            data.fill([E::BaseField::ZERO; N]);
+            data.fill([Felt::ZERO; N]);
         }
 
         let twiddles = self.domain.trace_twiddles();
@@ -301,6 +519,24 @@ where
     }
 }
 
+fn build_segment_queries<E: FieldElement<BaseField = Felt>>(
+    segment_lde: &RowMatrix<E>,
+    segment_tree: &MerkleTree<Rpo256>,
+    positions: &[usize],
+) -> Queries {
+    // for each position, get the corresponding row from the trace segment LDE and put all these
+    // rows into a single vector
+    let trace_states =
+        positions.iter().map(|&pos| segment_lde.row(pos).to_vec()).collect::<Vec<_>>();
+
+    // build Merkle authentication paths to the leaves specified by positions
+    let trace_proof = segment_tree
+        .prove_batch(positions)
+        .expect("failed to generate a Merkle proof for trace queries");
+
+    Queries::new(trace_proof, trace_states)
+}
+
 struct SegmentIterator<'a, 'b, E, I, const N: usize>(&'b mut SegmentGenerator<'a, E, I, N>)
 where
     E: FieldElement<BaseField = Felt>,
@@ -311,7 +547,7 @@ where
     E: FieldElement<BaseField = Felt>,
     I: IntoIterator<Item = Vec<E>>,
 {
-    type Item = Segment<E::BaseField, N>;
+    type Item = Segment<Felt, N>;
 
     fn next(&mut self) -> Option<Self::Item> {
         self.0.gen_next_segment()
@@ -328,80 +564,112 @@ mod tests {
     use processor::{crypto::RpoRandomCoin, StackInputs, StackOutputs};
     use winter_prover::math::fields::CubeExtension;
 
+    type CubeFelt = CubeExtension<Felt>;
+
     #[test]
     fn build_trace_commitment_on_gpu_with_padding_matches_cpu() {
         let cpu_prover = create_test_prover();
-        let gpu_prover = GpuRpoExecutionProver(create_test_prover());
+        let gpu_prover = MetalRpoExecutionProver(create_test_prover());
         let num_rows = 1 << 8;
+        let trace_info = get_trace_info(1, num_rows);
         let trace = gen_random_trace(num_rows, RPO_RATE + 1);
         let domain = StarkDomain::from_twiddles(fft::get_twiddles(num_rows), 8, Felt::GENERATOR);
-        let (cpu_lde, cpu_mt, cpu_polys) = cpu_prover.build_trace_commitment(&trace, &domain);
 
-        let (gpu_lde, gpu_mt, gpu_polys) = gpu_prover.build_trace_commitment(&trace, &domain);
+        let (cpu_trace_lde, cpu_polys) =
+            cpu_prover.new_trace_lde::<CubeFelt>(&trace_info, &trace, &domain);
+        let (gpu_trace_lde, gpu_polys) =
+            gpu_prover.new_trace_lde::<CubeFelt>(&trace_info, &trace, &domain);
 
-        assert_eq!(cpu_lde.data(), gpu_lde.data());
-        assert_eq!(cpu_mt.root(), gpu_mt.root());
-        assert_eq!(cpu_polys.into_columns(), gpu_polys.into_columns());
+        assert_eq!(
+            cpu_trace_lde.get_main_trace_commitment(),
+            gpu_trace_lde.get_main_trace_commitment()
+        );
+        assert_eq!(
+            cpu_polys.main_trace_polys().collect::<Vec<_>>(),
+            gpu_polys.main_trace_polys().collect::<Vec<_>>()
+        );
     }
 
     #[test]
     fn build_trace_commitment_on_gpu_without_padding_matches_cpu() {
         let cpu_prover = create_test_prover();
-        let gpu_prover = GpuRpoExecutionProver(create_test_prover());
+        let gpu_prover = MetalRpoExecutionProver(create_test_prover());
         let num_rows = 1 << 8;
+        let trace_info = get_trace_info(1, num_rows);
         let trace = gen_random_trace(num_rows, RPO_RATE);
         let domain = StarkDomain::from_twiddles(fft::get_twiddles(num_rows), 8, Felt::GENERATOR);
-        let (cpu_lde, cpu_mt, cpu_polys) = cpu_prover.build_trace_commitment(&trace, &domain);
 
-        let (gpu_lde, gpu_mt, gpu_polys) = gpu_prover.build_trace_commitment(&trace, &domain);
+        let (cpu_trace_lde, cpu_polys) =
+            cpu_prover.new_trace_lde::<CubeFelt>(&trace_info, &trace, &domain);
+        let (gpu_trace_lde, gpu_polys) =
+            gpu_prover.new_trace_lde::<CubeFelt>(&trace_info, &trace, &domain);
 
-        assert_eq!(cpu_lde.data(), gpu_lde.data());
-        assert_eq!(cpu_mt.root(), gpu_mt.root());
-        assert_eq!(cpu_polys.into_columns(), gpu_polys.into_columns());
+        assert_eq!(
+            cpu_trace_lde.get_main_trace_commitment(),
+            gpu_trace_lde.get_main_trace_commitment()
+        );
+        assert_eq!(
+            cpu_polys.main_trace_polys().collect::<Vec<_>>(),
+            gpu_polys.main_trace_polys().collect::<Vec<_>>()
+        );
     }
 
     #[test]
     fn build_constraint_commitment_on_gpu_with_padding_matches_cpu() {
         let cpu_prover = create_test_prover();
-        let gpu_prover = GpuRpoExecutionProver(create_test_prover());
+        let gpu_prover = MetalRpoExecutionProver(create_test_prover());
         let num_rows = 1 << 8;
         let ce_blowup_factor = 2;
-        let coeffs = gen_random_coeffs::<CubeExtension<Felt>>(num_rows * ce_blowup_factor);
-        let composition_poly = CompositionPoly::new(coeffs, num_rows);
+        let values = get_random_values::<CubeFelt>(num_rows * ce_blowup_factor);
         let domain = StarkDomain::from_twiddles(fft::get_twiddles(num_rows), 8, Felt::GENERATOR);
-        let commitment_cpu = cpu_prover.build_constraint_commitment(&composition_poly, &domain);
 
-        let commitment_gpu = gpu_prover.build_constraint_commitment(&composition_poly, &domain);
+        let (commitment_cpu, composition_poly_cpu) = cpu_prover.build_constraint_commitment(
+            CompositionPolyTrace::new(values.clone()),
+            2,
+            &domain,
+        );
+        let (commitment_gpu, composition_poly_gpu) =
+            gpu_prover.build_constraint_commitment(CompositionPolyTrace::new(values), 2, &domain);
 
         assert_eq!(commitment_cpu.root(), commitment_gpu.root());
-        assert_ne!(0, composition_poly.data().num_base_cols() % RPO_RATE);
+        assert_ne!(0, composition_poly_cpu.data().num_base_cols() % RPO_RATE);
+        assert_eq!(composition_poly_cpu.into_columns(), composition_poly_gpu.into_columns());
     }
 
     #[test]
     fn build_constraint_commitment_on_gpu_without_padding_matches_cpu() {
         let cpu_prover = create_test_prover();
-        let gpu_prover = GpuRpoExecutionProver(create_test_prover());
+        let gpu_prover = MetalRpoExecutionProver(create_test_prover());
         let num_rows = 1 << 8;
         let ce_blowup_factor = 8;
-        let coeffs = gen_random_coeffs::<Felt>(num_rows * ce_blowup_factor);
-        let composition_poly = CompositionPoly::new(coeffs, num_rows);
+        let values = get_random_values::<Felt>(num_rows * ce_blowup_factor);
         let domain = StarkDomain::from_twiddles(fft::get_twiddles(num_rows), 8, Felt::GENERATOR);
-        let commitment_cpu = cpu_prover.build_constraint_commitment(&composition_poly, &domain);
 
-        let commitment_gpu = gpu_prover.build_constraint_commitment(&composition_poly, &domain);
+        let (commitment_cpu, composition_poly_cpu) = cpu_prover.build_constraint_commitment(
+            CompositionPolyTrace::new(values.clone()),
+            8,
+            &domain,
+        );
+        let (commitment_gpu, composition_poly_gpu) =
+            gpu_prover.build_constraint_commitment(CompositionPolyTrace::new(values), 8, &domain);
 
         assert_eq!(commitment_cpu.root(), commitment_gpu.root());
-        assert_eq!(0, composition_poly.data().num_base_cols() % RPO_RATE);
+        assert_eq!(0, composition_poly_cpu.data().num_base_cols() % RPO_RATE);
+        assert_eq!(composition_poly_cpu.into_columns(), composition_poly_gpu.into_columns());
     }
 
     fn gen_random_trace(num_rows: usize, num_cols: usize) -> ColMatrix<Felt> {
         ColMatrix::new((0..num_cols as u64).map(|col| vec![Felt::new(col); num_rows]).collect())
     }
 
-    fn gen_random_coeffs<E: FieldElement>(num_rows: usize) -> Vec<E> {
+    fn get_random_values<E: FieldElement>(num_rows: usize) -> Vec<E> {
         (0..num_rows).map(|i| E::from(i as u32)).collect()
     }
 
+    fn get_trace_info(num_cols: usize, num_rows: usize) -> TraceInfo {
+        TraceInfo::new(num_cols, num_rows)
+    }
+
     fn create_test_prover() -> ExecutionProver<Rpo256, RpoRandomCoin> {
         ExecutionProver::new(
             ProvingOptions::with_128_bit_security(true),
diff --git a/prover/src/lib.rs b/prover/src/lib.rs
index e773d8a4c0..d3435d9b5c 100644
--- a/prover/src/lib.rs
+++ b/prover/src/lib.rs
@@ -6,17 +6,18 @@ use processor::{
     crypto::{
         Blake3_192, Blake3_256, ElementHasher, RandomCoin, Rpo256, RpoRandomCoin, WinterRandomCoin,
     },
-    math::Felt,
+    math::{Felt, FieldElement},
     ExecutionTrace,
 };
-use winter_prover::{ProofOptions as WinterProofOptions, Prover};
+use tracing::{event, instrument, Level};
+use winter_prover::{
+    matrix::ColMatrix, AuxTraceRandElements, ConstraintCompositionCoefficients,
+    DefaultConstraintEvaluator, DefaultTraceLde, ProofOptions as WinterProofOptions, Prover,
+    StarkDomain, TraceInfo, TracePolyTable,
+};
 
 #[cfg(feature = "std")]
-use log::debug;
-#[cfg(feature = "std")]
-use std::time::Instant;
-#[cfg(feature = "std")]
-use winter_prover::Trace;
+use {std::time::Instant, winter_prover::Trace};
 
 #[cfg(all(feature = "metal", target_arch = "aarch64", target_os = "macos"))]
 mod gpu;
@@ -43,6 +44,7 @@ pub use winter_prover::StarkProof;
 ///
 /// # Errors
 /// Returns an error if program execution or STARK proof generation fails for any reason.
+#[instrument("prove_program", skip_all)]
 pub fn prove<H>(
     program: &Program,
     stack_inputs: StackInputs,
@@ -58,16 +60,12 @@ where
     let trace =
         processor::execute(program, stack_inputs.clone(), host, *options.execution_options())?;
     #[cfg(feature = "std")]
-    let padding_percentage = (trace.trace_len_summary().padded_trace_len()
-        - trace.trace_len_summary().trace_len())
-        * 100
-        / trace.trace_len_summary().padded_trace_len();
-    #[cfg(feature = "std")]
-    debug!(
+    event!(
+        Level::INFO,
         "Generated execution trace of {} columns and {} steps ({}% padded) in {} ms",
         trace.layout().main_trace_width(),
         trace.trace_len_summary().padded_trace_len(),
-        padding_percentage,
+        trace.trace_len_summary().padding_percentage(),
         now.elapsed().as_millis()
     );
 
@@ -95,7 +93,7 @@ where
                 stack_outputs.clone(),
             );
             #[cfg(all(feature = "metal", target_arch = "aarch64", target_os = "macos"))]
-            let prover = gpu::GpuRpoExecutionProver(prover);
+            let prover = gpu::MetalRpoExecutionProver(prover);
             prover.prove(trace)
         }
     }
@@ -164,11 +162,14 @@ where
     H: ElementHasher<BaseField = Felt>,
     R: RandomCoin<BaseField = Felt, Hasher = H>,
 {
-    type Air = ProcessorAir;
     type BaseField = Felt;
+    type Air = ProcessorAir;
     type Trace = ExecutionTrace;
     type HashFn = H;
     type RandomCoin = R;
+    type TraceLde<E: FieldElement<BaseField = Felt>> = DefaultTraceLde<E, H>;
+    type ConstraintEvaluator<'a, E: FieldElement<BaseField = Felt>> =
+        DefaultConstraintEvaluator<'a, ProcessorAir, E>;
 
     fn options(&self) -> &WinterProofOptions {
         &self.options
@@ -188,4 +189,22 @@ where
         let program_info = trace.program_info().clone();
         PublicInputs::new(program_info, self.stack_inputs.clone(), self.stack_outputs.clone())
     }
+
+    fn new_trace_lde<E: FieldElement<BaseField = Felt>>(
+        &self,
+        trace_info: &TraceInfo,
+        main_trace: &ColMatrix<Felt>,
+        domain: &StarkDomain<Felt>,
+    ) -> (Self::TraceLde<E>, TracePolyTable<E>) {
+        DefaultTraceLde::new(trace_info, main_trace, domain)
+    }
+
+    fn new_evaluator<'a, E: FieldElement<BaseField = Felt>>(
+        &self,
+        air: &'a ProcessorAir,
+        aux_rand_elements: AuxTraceRandElements<E>,
+        composition_coefficients: ConstraintCompositionCoefficients<E>,
+    ) -> Self::ConstraintEvaluator<'a, E> {
+        DefaultConstraintEvaluator::new(air, aux_rand_elements, composition_coefficients)
+    }
 }
diff --git a/stdlib/Cargo.toml b/stdlib/Cargo.toml
index b25adc7388..10e5c311dd 100644
--- a/stdlib/Cargo.toml
+++ b/stdlib/Cargo.toml
@@ -1,11 +1,12 @@
 [package]
 name = "miden-stdlib"
-version = "0.6.0"
+version = "0.8.0"
 description = "Miden VM standard library"
 authors = ["miden contributors"]
 readme = "README.md"
 license = "MIT"
 repository = "https://github.com/0xPolygonMiden/miden-vm"
+documentation = "https://docs.rs/miden-stdlib/0.8.0"
 categories = ["cryptography", "mathematics"]
 keywords = ["miden", "program", "stdlib"]
 edition = "2021"
@@ -21,22 +22,22 @@ path = "tests/main.rs"
 
 [features]
 default = ["std"]
-std = ["test-utils/std"]
+std = []
 
 [dependencies]
-assembly = { package = "miden-assembly", default-features = false, path = "../assembly", version = "0.7" }
+assembly = { package = "miden-assembly", path = "../assembly", version = "0.8", default-features = false }
 
 [dev-dependencies]
 blake3 = "1.5"
-miden-air = { package = "miden-air", path = "../air", version = "0.7", default-features = false }
+miden-air = { package = "miden-air", path = "../air", version = "0.8", default-features = false }
 num-bigint = "0.4"
-processor = { package = "miden-processor", path = "../processor", version = "0.7", features = ["internals"], default-features = false }
+processor = { package = "miden-processor", path = "../processor", version = "0.8", features = ["internals"], default-features = false }
 serde_json = "1.0"
 sha2 = "0.10"
 sha3 = "0.10"
 test-utils = { package = "miden-test-utils", path = "../test-utils" }
-winter-air = { package = "winter-air", version = "0.6" }
-winter-fri = { package = "winter-fri", version = "0.6" }
+winter-air = { package = "winter-air", version = "0.8" }
+winter-fri = { package = "winter-fri", version = "0.8" }
 
 [build-dependencies]
-assembly = { package = "miden-assembly", path = "../assembly", version = "0.7" }
+assembly = { package = "miden-assembly", path = "../assembly", version = "0.8" }
diff --git a/stdlib/asm/collections/mmr.masm b/stdlib/asm/collections/mmr.masm
index 5a32ce3ef8..06df3a23c2 100644
--- a/stdlib/asm/collections/mmr.masm
+++ b/stdlib/asm/collections/mmr.masm
@@ -17,11 +17,11 @@ export.u32unchecked_trailing_ones
 
   while.true
     # update the flag (2 cycles)
-    dup.2 u32checked_and
+    dup.2 u32and
     # => [flag, count, number, ...]
 
     # update the number (4 cycles)
-    movup.2 u32unchecked_div.2
+    movup.2 u32div.2
     # => [number/2, flag, count, ...]
 
     # update the counter (3 cycles)
@@ -85,12 +85,12 @@ export.ilog2_checked
   push.0          # bit_pos from the most-signficant, `31-bit_pos` equals to ilog2
   # stack: [bit_pos, power_of_two, number, ...]
 
-  dup.1 dup.3 u32checked_and eq.0 # (4 cycles)
+  dup.1 dup.3 u32and eq.0 # (4 cycles)
 
   # find the first most-significant true bit (9 * leading_zeros cycles)
   while.true
     add.1 swap div.2 swap  # (5 cycles)
-    dup.1 dup.3 u32checked_and eq.0 # (4 cycles)
+    dup.1 dup.3 u32and eq.0 # (4 cycles)
   end
 
   # compute ilog2 (4 cycles)
@@ -117,7 +117,7 @@ export.get
   # stack: [num_leaves, pos, mmr_ptr, ...]
 
   # compute `num_leaves & pos`, this contains all peaks before `pos` (and maybe some after the owning peak) (3 cycles)
-  dup.1 dup.1 u32checked_and
+  dup.1 dup.1 u32and
   # stack: [before_candidates, num_leaves, pos, mmr_ptr, ...]
 
   # compute `num_leaves - before_candidates`, this removes every peak before the owner (result may include peaks after owner) (4 cycles)
@@ -133,7 +133,7 @@ export.get
   # stack: [after_mask, owner_peak, depth, num_leaves, pos, mmr_ptr, ...]
 
   # compute `num_leaves & after_mask`, uses the mask to compute the actual after peaks (2 cycles)
-  dup.3 u32checked_and
+  dup.3 u32and
   # stack: [after_peaks, owner_peak, depth, num_leaves, pos, mmr_ptr, ...]
 
   # compute `num_leaves - (after_peaks + owner_peak)`, this computes the before peaks (5 cycles)
@@ -145,7 +145,7 @@ export.get
   # stack: [relative_pos, peaks_before, depth, mmr_ptr, ...]
 
   # compute `popcount(peaks_before)`, the count peaks before the target to be skipped when loading from mem (2 cycles)
-  swap u32checked_popcnt
+  swap u32assert u32popcnt
   # stack: [peak_count, relative_pos, depth, mmr_ptr, ...]
 
   # compute `mmr_ptr + peak_count + 1` the target tree index (3 cycles)
@@ -184,7 +184,7 @@ end
 #! Cycles: 69
 export.num_leaves_to_num_peaks
   # count number of peaks (69 cycles)
-  u32split u32unchecked_popcnt swap u32unchecked_popcnt add
+  u32split u32popcnt swap u32popcnt add
   # => [count, ...]
 end
 
@@ -196,7 +196,7 @@ end
 #! Cycles: 17
 export.num_peaks_to_message_size
   # the peaks are padded to a minimum length of 16 (10 cycles)
-  push.16 u32unchecked_max
+  push.16 u32max
   # => [count_min, ...]
 
   # when the number of peaks is greater than 16, then they are padded to an even number (7 cycles)
diff --git a/stdlib/asm/collections/smt.masm b/stdlib/asm/collections/smt.masm
index 9bb402e426..3d0ce9421a 100644
--- a/stdlib/asm/collections/smt.masm
+++ b/stdlib/asm/collections/smt.masm
@@ -1,1437 +1,439 @@
-# Constant value for empty sub-tree root at depth 16
-const.EMPTY_16_0=17483286922353768131
-const.EMPTY_16_1=353378057542380712
-const.EMPTY_16_2=1935183237414585408
-const.EMPTY_16_3=4820339620987989650
-
-# Constant value for empty sub-tree root at depth 32
-const.EMPTY_32_0=11677748883385181208
-const.EMPTY_32_1=15891398395707500576
-const.EMPTY_32_2=3790704659934033620
-const.EMPTY_32_3=2126099371106695189
-
-# Constant value for empty sub-tree root at depth 48
-const.EMPTY_48_0=10650694022550988030
-const.EMPTY_48_1=5634734408638476525
-const.EMPTY_48_2=9233115969432897632
-const.EMPTY_48_3=1437907447409278328
-
-# HELPER METHODS
-# =================================================================================================
-
-#! Extracts 16 most significant bits from the passed-in value.
-#!
-#! Input:  [v, ...]
-#! Output: [v >> 48, ...]
-#!
-#! Cycles: 6
-proc.get_top_16_bits
-    u32split
-    swap drop
-    u32unchecked_shr.16
-end
-
-#! Extracts 32 most significant bits from the passed-in value.
-#!
-#! Input:  [v, ...]
-#! Output: [v >> 32, ...]
-#!
-#! Cycles: 3
-proc.get_top_32_bits
-    u32split swap drop
-end
-
-#! Extracts 48 most significant bits from the passed-in value.
-#!
-#! Input:  [v, ...]
-#! Output: [v >> 16, ...]
-#!
-#! Cycles: 9
-proc.get_top_48_bits
-    u32split
-    swap
-    u32unchecked_shr.16
-    swap
-    mul.65536
-    add
-end
+# Constant value for the depth at which leaves sit
+const.LEAF_DEPTH=64
 
-#! Extracts top 16 and the next 16 bits from the most significant elements of U and V.
-#!
-#! Also verifies that the top 16 bits of these elements are the same, while the next 16 bits are
-#! different.
-#!
-#! Input:  [U, V, ...]
-#! Output: [(u3 << 16) >> 48, (v3 << 16) >> 48, v3 >> 48, U, V, ...]
-#!
-#! Cycles: 20
-proc.extract_index_16_16
-    # extract the top 16 and the next 16 bits from the most significant element of V (6 cycles)
-    dup.4 u32split swap drop u32unchecked_divmod.65536
-    # => [v3_hi_lo, v3_hi_hi, U, V, ...]
-
-    # extract the top 16 and the next 16 bits from the most significant element of U (4 cycles)
-    dup.2 u32split u32unchecked_divmod.65536
-    # => [u3_hi_lo, u3_hi_hi, u3_lo, v3_hi_lo, v3_hi_hi, U, V, ...]
-
-    # make sure the lower 16 bits are different (5 cycles)
-    dup dup.4 neq assert
-    # => [u3_hi_lo, u3_hi_hi, u3_lo, v3_hi_lo, v3_hi_hi, U, V, ...]
-
-    # make sure the top 16 bits are the same (5 cycles)
-    movdn.2 dup.4 assert_eq drop
-    # => [u3_hi_lo, v3_hi_lo, v3_hi_hi, U, V, ...]
-end
+# SET
+# =================================================================================================
 
-#! Extracts top 16 and the next 32 bits from the most significant elements of U and V.
+#! Inserts or removes a value associated with the given key. The leaf to which we're inserting is
+#! guaranteed to be empty.
 #!
-#! Also verifies that the top 32 bits of these elements are the same, while the next 16 bits are
-#! different.
+#! Inputs:  
+#!   Operand stack: [V, K, R, ...]
 #!
-#! Input:  [U, V, ...]
-#! Output: [(u3 << 16) >> 32, (v3 << 16) >> 32, v3 >> 48, U, V, ...]
+#! Outputs: 
+#!   Operand stack: [V_old, R_new, ...]
 #!
-#! Cycles: 30
-proc.extract_index_16_32
-    # split the most significant elements of U and V into 32-bit chunks and make sure the top
-    # 32 bit chunks are the same (i.e. u3_hi = v3_hi) - (8 cycles)
-    dup.4 u32split dup.2 u32split dup movup.3 assert_eq
-    # => [u3_hi, u3_lo, v3_lo, U, V, ...]
-
-    u32unchecked_divmod.65536 mul.65536
-    # => [idx_mid, idx_hi, u3_lo, v3_lo, U, V, ...]
-
-    movup.3 u32unchecked_shr.16
-    # => [v3_lo_hi, idx_mid, idx_hi, u3_lo, U, V, ...]
-
-    dup dup.2 add
-    # => [idx_lo_v, v3_lo_hi, idx_mid, idx_hi, u3_lo, U, V, ...]
+#! Cycles
+#!   Insert empty value: X cycles
+#!   Insert non-empty value: X cycles
+proc.set_empty_leaf
+    # Check if we're inserting the empty value (X cycles)
+    padw eqw
+    #=> [V == ZERO, ZERO, V, K, R]
 
-    movup.4 u32unchecked_shr.16
-    # => [u3_lo_hi, idx_lo_v, v3_lo_hi, idx_mid, idx_hi, U, V, ...]
-
-    dup movup.3 neq assert
-    # => [u3_lo_hi, idx_lo_v, idx_mid, idx_hi, U, V, ...]
-
-    movup.2 add
-    # => [idx_lo_u, idx_lo_v, idx_hi, U, V, ...]
+    if.true
+        # Inserting an empty value; this is a no-op (4 cycles)
+        dropw
+        #=> [V (=ZERO), K, R, ...]
+
+        # Prepare stack: verify that the leaf is actually empty
+        # (X cycles)
+        movupw.2 swapw dup.8 movdn.4 push.LEAF_DEPTH movdn.4
+        #=> [V (=ZERO), depth, K[3], R, K, ...]
+
+        # (1 cycle)
+        mtree_verify
+        #=> [V (=ZERO), depth, K[3], R, K, ...]
+        
+        # Prepare stack for return (X cycles)
+        movup.4 drop movup.4 drop movupw.2 dropw
+        #=> [V (=ZERO), R, ...]
+    else
+        # Inserting a non-empty value (4 cycles)
+        dropw
+        #=> [V, K, R, ...]
+
+        # Update advice map
+        adv.insert_hdword
+        #=> [V, K, R, ...]
+
+        # Compute hash([K, V]); the new node value (NV)
+        # (21 cycles)
+        dupw.1 swapw hmerge
+        # => [NV, K, R]
+
+        # Prepare stack for `mtree_set` (5 cycles)
+        movupw.2 dup.8 push.LEAF_DEPTH
+        #=> [depth, K[3], R, NV, K]
+
+        # Insert node in Merkle store (29 cycles)
+        mtree_set
+        #=> [V_in_leaf, R_new, K]
+
+        # Check that V_in_leaf is indeed empty (15 cycles)
+        padw assert_eqw
+        #=> [R_new, K]
+
+        # Prepare stack for return (9 cycles)
+        swapw dropw padw
+        #=> [ZERO, R_new]
+    end
 end
 
-#! Extracts top 32 bits and the next 16 bits from the most significant elements of U and V.
+#! Inserts a value at the given key. The leaf to which we're inserting is
+#! guaranteed to hold a single key-value pair (provided on the advice stack).
 #!
-#! Also verifies that the top 32 bits of these elements are the same, while the next 16 bits are
-#! different.
+#! Inputs: 
+#!   Operand stack: [V, K, R, ...]
+#!   Advice stack:  [K_in_leaf, V_in_leaf]
 #!
-#! Input:  [U, V, ...]
-#! Output: [(u3 << 32) >> 48, (v3 << 32) >> 48, v3 >> 32, U, V, ...]
+#! Outputs:
+#!   Operand stack: [V_old, R_new, ...]
 #!
-#! Cycles: 20
-proc.extract_index_32_16
-    # split the most significant elements of U and V into 32-bit chunks (4 cycles)
-    dup.4 u32split dup.2 u32split
-    # => [u3_hi, u3_lo, v3_hi, v3_lo, U, V, ...]
-
-    # make sure that the top 32 bit chunks are the same (3 cycles)
-    dup.2 assert_eq
-    # => [u3_lo, idx_hi, v3_lo, U, V, ...]
-
-    # drop the least significant 16 bits from the lower 32-bit chunks (8 cycles)
-    u32unchecked_shr.16 movup.2 u32unchecked_shr.16 swap
-    # => [idx_lo_u, idx_lo_v, idx_hi, U, V, ...]
-
-    # make sure the lower 16-bit chunks are different (5 cycles)
-    dup dup.2 neq assert
-    # => [idx_lo_u, idx_lo_v, idx_hi, U, V, ...]
-end
-
-# GET
-# =================================================================================================
+#! Cycles:
+#!   Leaf single after insertion: X cycles
+#!   Leaf multiple after insertion: unimplemented
+proc.insert_single_leaf
+    # Push the leaf pre-image on stack
+    # (X cycles)
+    adv_push.8
+    # => [V_in_leaf, K_in_leaf, V, K, R]
+
+    # Check if the key stored in the leaf is the same as K
+    # (X cycles)
+    movupw.3 movupw.2 eqw
+    # => [K_in_leaf==K, K_in_leaf, K, V_in_leaf, V, R]
 
-#! Get the leaf value for depth 16.
-#!
-#! Input:  [K, R, ...]
-#! Output: [V, R, ...]
-#!
-#! Cycles: 85
-proc.get_16.2
-    # compute index of the node by extracting top 16 bits from the key (8 cycles)
-    dup exec.get_top_16_bits movdn.4
-    # => [K, i, R, ...]
-
-    # save [0, 0, 0, 0] into loc[0] (7 cycles)
-    padw loc_storew.0
-
-    # load Ka from advice provider and compare it to K (16 cycles)
-    # Ka is expected to be the key for the node stored at depth 16; it could be either equal
-    # to K, or could be something different
-    adv_loadw eqw
-    # => [Ka ?= K, Ka, K, i, R, ...]
-
-    # move the comparison result out of the way (1 cycle)
-    movdn.8
-    # => [Ka, K, Ka ?= K, i, R, ...]
-
-    # load the value from adv provider and prepare hash (6 cycles)
-    push.0.16.0.0 swapw.2 adv_loadw
-    # => [V, Ka, D, Ka ?= K, i, R, ...]
-
-    # save the value into loc[1] (4 cycles)
-    loc_storew.1
-    # => [V, Ka, D, Ka ?= K, i, R, ...]
-
-    # compute the value of the node as hash(K, V, domain=16) (10 cycles)
-    hperm dropw swapw dropw
-    # => [N, Ka ?= K, i, R, ...]
-
-    # push the root of the empty subtree (5 cycles)
-    push.EMPTY_16_0.EMPTY_16_1.EMPTY_16_2.EMPTY_16_3 swapw
-    # => [N, E, Ka ?= K, i, R, ...]
-
-    # read the flag if the node is empty subtree (5 cycles)
-    adv_push.1 movdn.8 dup.8
-    # => [not_empty?, N, E, not_empty?, Ka ?= K, i, R, ...]
-
-    # conditionally select node (5 cycles)
-    cdropw
-    # => [N', not_empty?, Ka ?= K, i, R, ...]
-
-    # compute the flag indicating if value is not zero (3 cycles)
-    movup.5 movup.5 and
-    # => [take_val?, N', i, R, ...]
-
-    # move take_val out of the way (4 cycles)
-    movdn.9
-    # => [N', i, R, take_val?, ...]
-
-    # verify Merkle path from N' to R (3 cycles)
-    push.16 movdn.4 mtree_verify
-    # => [N', 16, i, R, take_val?, ...]
-
-    # reorganize stack (4 cycles)
-    movup.4 drop movup.4 drop movup.8
-    # => [take_val?, N', R, ...]
-
-    # compute the address of the return value based on `take_val` and return it, being either
-    # zero or V (3 cycles)
-    locaddr.0 add
-    # => [addr, N', R, ...]
-
-    # load the selected value and return (1 cycle)
-    mem_loadw
-    # => [V, R, ...]
+    if.true
+        # Leaf stays a "single" variant
+
+        # (4 cycles)
+        dropw 
+        # => [K, V_in_leaf, V, R]
+
+        # Update advice map (3 cycles)
+        movupw.2 adv.insert_hdword
+        # => [V, K, V_in_leaf, R]
+
+        # Compute hash([K, V]); the new node value (NV)
+        # (X cycles)
+        dupw.1 swapw hmerge
+        # => [NV, K, V_in_leaf, R]
+
+        # Prepare stack to update Merkle store
+        # (X cycles)
+        movupw.3 dup.8 push.LEAF_DEPTH
+        # => [depth, K[3], R, NV, K, V_in_leaf]
+
+        # Update Merkle store (29 cycles)
+        mtree_set
+        # => [NV_old, R_new, K, V_in_leaf]
+
+        # Confirm that claimed `V_in_leaf` from advice provider is correct by checking if 
+        # `[K, V_in_leaf]` hashes to `NV_old`
+        # (33 cycles)
+        movupw.2 dupw.3 hmerge assert_eqw
+        # => [R_new, V_in_leaf]
+
+        # Clean up stack for return
+        # (1 cycle)
+        swapw
+        # => [V_in_leaf, R_new]
+    else
+        # Leaf becomes a Multiple kv-pair case
+        # TODO (fail for now)
+        push.1 assertz
+    end
 end
 
-#! Get the leaf value for depth 32.
+#! Removes the provided key/value pair from the leaf. The leaf to which we're inserting is
+#! guaranteed to hold a single key-value pair (provided on the advice stack). Hence, after the
+#! operation, the leaf will be empty.
 #!
-#! Input:  [K, R, ...]
-#! Output: [V, R, ...]
+#! Inputs:
+#!   Operand stack: [V (=ZERO), K, R, ...]
+#!   Advice stack:  [K_in_leaf, V_in_leaf]
 #!
-#! Cycles: 81
-proc.get_32.2
-    # compute index of the node by extracting top 16 bits from the key (4 cycles)
-    dup u32split movdn.5 drop
-    # => [K, i, R, ...]
-
-    # save [0, 0, 0, 0] into loc[0] (7 cycles)
-    padw loc_storew.0
-
-    # load Ka from advice provider and compare it to K (16 cycles)
-    # Ka is expected to be the key for the node stored at depth 32; it could be either equal
-    # to K, or could be something different
-    adv_loadw eqw
-    # => [Ka ?= K, Ka, K, i, R, ...]
-
-    # move the comparison result out of the way (1 cycle)
-    movdn.8
-    # => [Ka, K, Ka ?= K, i, R, ...]
-
-    # load the value from adv provider and prepare hash (6 cycles)
-    push.0.32.0.0 swapw.2 adv_loadw
-    # => [V, Ka, D, Ka ?= K, i, R, ...]
-
-    # save the value into loc[1] (4 cycles)
-    loc_storew.1
-    # => [V, Ka, D, Ka ?= K, i, R, ...]
-
-    # compute the value of the node as hash(K, V, domain=32) (10 cycles)
-    hperm dropw swapw dropw
-    # => [N, Ka ?= K, i, R, ...]
-
-    # push the root of the empty subtree (5 cycles)
-    push.EMPTY_32_0.EMPTY_32_1.EMPTY_32_2.EMPTY_32_3 swapw
-    # => [N, E, Ka ?= K, i, R, ...]
-
-    # read the flag if the node is empty subtree (5 cycles)
-    adv_push.1 movdn.8 dup.8
-    # => [not_empty?, N, E, not_empty?, Ka ?= K, i, R, ...]
-
-    # conditionally select node (5 cycles)
-    cdropw
-    # => [N', not_empty?, Ka ?= K, i, R, ...]
-
-    # compute the flag indicating if value is not zero (3 cycles)
-    movup.5 movup.5 and
-    # => [take_val?, N', i, R, ...]
-
-    # move take_val out of the way (4 cycles)
-    movdn.9
-    # => [N', i, R, take_val?, ...]
-
-    # verify Merkle path from N' to R (3 cycles)
-    push.32 movdn.4 mtree_verify
-    # => [N', 32, i, R, take_val?, ...]
-
-    # reorganize stack (4 cycles)
-    movup.4 drop movup.4 drop movup.8
-    # => [take_val?, N', R, ...]
-
-    # compute the address of the return value based on `take_val` and return it, being either
-    # zero or V (3 cycles)
-    locaddr.0 add
-    # => [addr, N', R, ...]
-
-    # load the selected value and return (1 cycle)
-    mem_loadw
-    # => [V, R, ...]
-end
-
-#! Get the leaf value for depth 48.
+#! Outputs:
+#!   Operand stack: [V_old, R_new, ...]
 #!
-#! Input:  [K, R, ...]
-#! Output: [V, R, ...]
-#!
-#! Cycles: 88
-proc.get_48.2
-    # compute index of the node by extracting top 48 bits from the key (11 cycles)
-    dup exec.get_top_48_bits movdn.4
-    # => [K, i, R, ...]
-
-    # save [0, 0, 0, 0] into loc[0] (7 cycles)
-    padw loc_storew.0
-
-    # load Ka from advice provider and compare it to K (16 cycles)
-    # Ka is expected to be the remaining key for the node stored at depth 48; it could be either
-    # equal to K, or could be something different
-    adv_loadw eqw
-    # => [Ka ?= K, Ka, K, i, R, ...]
-
-    # move the comparison result out of the way (1 cycle)
-    movdn.8
-    # => [Ka, K, Ka ?= K, i, R, ...]
-
-    # load the value from adv provider and prepare hash (6 cycles)
-    push.0.48.0.0 swapw.2 adv_loadw
-    # => [V, Ka, D, Ka ?= K, i, R, ...]
-
-    # save the value into loc[1] (4 cycles)
-    loc_storew.1
-    # => [V, Ka, D, Ka ?= K, i, R, ...]
-
-    # compute the value of the node as hash(K, V, domain=48) (10 cycles)
-    hperm dropw swapw dropw
-    # => [N, Ka ?= K, i, R, ...]
-
-    # push the root of the empty subtree (5 cycles)
-    push.EMPTY_48_0.EMPTY_48_1.EMPTY_48_2.EMPTY_48_3 swapw
-    # => [N, E, Ka ?= K, i, R, ...]
-
-    # read the flag if the node is empty subtree (5 cycles)
-    adv_push.1 movdn.8 dup.8
-    # => [not_empty?, N, E, not_empty?, Ka ?= K, i, R, ...]
-
-    # conditionally select node (5 cycles)
-    cdropw
-    # => [N', not_empty?, Ka ?= K, i, R, ...]
-
-    # compute the flag indicating if value is not zero (3 cycles)
-    movup.5 movup.5 and
-    # => [take_val?, N', i, R, ...]
-
-    # move take_val out of the way (4 cycles)
-    movdn.9
-    # => [N', i, R, take_val?, ...]
-
-    # verify Merkle path from N' to R (3 cycles)
-    push.48 movdn.4 mtree_verify
-    # => [N', 48, i, R, take_val?, ...]
-
-    # reorganize stack (4 cycles)
-    movup.4 drop movup.4 drop movup.8
-    # => [take_val?, N', R, ...]
-
-    # compute the address of the return value based on `take_val` and return it, being either
-    # zero or V (3 cycles)
-    locaddr.0 add
-    # => [addr, N', R, ...]
-
-    # load the selected value and return (1 cycle)
-    mem_loadw
-    # => [V, R, ...]
-end
+#! Cycles: X 
+proc.remove_single_leaf
+    # Push the leaf pre-image on stack
+    # (0 cycles)
+    adv_push.8
+    # => [V_in_leaf, K_in_leaf, V, K, R]
 
-#! Returns the value stored under the specified key in a Sparse Merkle Tree with the specified root.
-#!
-#! If the value for a given key has not been set, the returned `V` will consist of all zeroes.
-#!
-#! Input:  [K, R, ...]
-#! Output: [V, R, ...]
-#!
-#! Depth 16: 91 cycles
-#! Depth 32: 87 cycles
-#! Depth 48: 94 cycles
-#! Depth 64: unimplemented
-export.get
-    # invoke adv and fetch target depth flags
-    adv.push_smtget adv_push.2
-    # => [d ∈ {16, 32}, d ∈ {16, 48}, K, R, ...]
+    # Check if the key stored in the leaf is the same as K
+    # (X cycles)
+    movupw.3 movupw.2 eqw
+    # => [K_in_leaf==K, K_in_leaf, K, V_in_leaf, V, R]
 
-    # call the inner procedure depending on the depth
     if.true
-        if.true
-            # depth 16
-            exec.get_16
-        else
-            # depth 32
-            exec.get_32
-        end
+        # Keys match; we're removing the value associated with K
+
+        # (4 cycles)
+        dropw 
+        # => [K, V_in_leaf, V, R]
+
+        # Update advice map (3 cycles)
+        movupw.2 adv.insert_hdword
+        # => [V, K, V_in_leaf, R]
+
+        # Prepare the stack for `mtree_set`
+        # Note that the new node value will be the empty word, so we can use `V`
+        # as the node value (since we confirmed that it's `ZERO`)
+        # (7 cycles)
+        movupw.3 dup.8 push.LEAF_DEPTH
+        # => [depth, K[3], R, V, K, V_in_leaf]
+
+        # (29 cycles)
+        mtree_set
+        # => [NV_old, R_new, K, V_in_leaf, ...] 
+
+        # Confirm that hmerge([K, V_in_leaf]) = NV_old
+        # (33 cycles)
+        movupw.2 dupw.3 hmerge assert_eqw
+        # => [R_new, V_in_leaf, ...] 
+
+        # Cleanup stack for return (1 cycle)
+        swapw
+        # => [V_in_leaf, R_new, ...] 
     else
-        if.true
-            # depth 48
-            exec.get_48
-        else
-            # depth 64
-            # currently not implemented
-            push.0 assert
-        end
-    end
-    # => [V, R, ...]
-end
+        # Keys don't match; this is a no-op
+        # We need to ensure that hash([K_in_leaf, V_in_leaf]) = NV;
+        # that is, we need to verify the advice provider's claims.
+        # If all checks pass, we're done.
 
-# INSERT
-# =================================================================================================
+        # => [K_in_leaf, K, V_in_leaf, V, R]
 
-#! Updates a leaf node at depths 16, 32, or 48.
-#!
-#! Input:  [d, idx, V, K, R, ...]
-#! Output: [V_old, R_new, ...]
-#!
-#! Where:
-#! - R is the initial root of the TSMT, and R_new is the new root of the TSMT.
-#! - d, idx are the depth and index (at that depth) of the leaf node to be updated.
-#! - K, V are the key-value pair for the leaf node where V is a new value for key K.
-#! - V_old is the value previously stored under key K.
-#!
-#! This procedure succeeds only if:
-#! - Node to be replaced at (d, idx) is a leaf node for the same key K.
-#!
-#! Cycles: 101
-proc.update_16_32_48.2
-    # save [idx, d, 0, 0] in loc[0] (5 cycles)
-    push.0.0 loc_storew.0
-    # => [0, 0, d, idx, V, K, R, ...]
-
-    # prepare the stack for computing N = hash([K, V], domain=d), and also save K into loc[1]
-    # (10 cycles)
-    movdn.3 movup.2 drop push.0 swapw.2 loc_storew.1 swapw
-    # => [V, K, 0, 0, d, 0, R, ...]
-
-    # insert N |-> [K, V] into the advice map (0 cycles)
-    adv.insert_hperm
-
-    # compute the hash of the node N = hash([K, V], domain=d) - (1 cycle)
-    hperm
-    # => [X, N, X, R, ...]
-
-    # prepare the stack for the mtree_set operation (8 cycles)
-    swapw.3 swapw swapw.2 loc_loadw.0 drop drop
-    # => [d, idx, R, N, X, ...]
-
-    # insert the new leaf node into the tree at the specified index/depth; this also leaves the
-    # previous value of the node on the stack (29 cycle)
-    mtree_set
-    # => [N_old, R_new, X, ...]
-
-    # verify that N_old is a leaf node for the same key K
-
-    # prepare the stack for computing E = hash([K, V_old], domain=d); value of V_old is read
-    # from the advice provider and is saved into loc[0] (21 cycles)
-    swapw.2 loc_loadw.0 movdn.3 push.0 movup.3 push.0.0.0 loc_loadw.1 adv_push.4 loc_storew.0
-    # => [V_old, K, 0, 0, d, 0, R_new, N_old, ...]
-
-    # compute E = hash([K, V_old], domain=d)
-    # (10 cycle)
-    hperm dropw swapw dropw
-    # => [E, R_new, N_old, ...]
-
-    # make sure E and N_old are the same (14 cycles)
-    swapw swapw.2
-    repeat.4
-        dup.4 assert_eq
-    end
-    # => [E, R_new, ...]
+        # We no longer need V, since we're not removing anything
+        movupw.3 dropw
+        # => [K_in_leaf, K, V_in_leaf, R]
 
-    # load the old value (which we saved previously) onto the stack (3 cycles)
-    loc_loadw.0
-    # => [V_old, R_new, ...]
-end
+        # Prepare stack for mtree_get
+        movupw.3 dup.8 push.LEAF_DEPTH
+        # => [depth, K[3], R, K_in_leaf, K, V_in_leaf]
 
-#! Inserts a new leaf node at depth 16.
-#!
-#! Input:  [V, K, R, ...]
-#! Output: [0, 0, 0, 0, R_new, ...]
-#!
-#! Where:
-#! - R is the initial root of the TSMT, and R_new is the new root of the TSMT.
-#! - K and V is the key-value pair for the leaf node to be inserted.
-#!
-#! This procedure succeeds only if:
-#! - Node to be replaced at depth 16 is a root of an empty subtree.
-#!
-#! Cycles: 73
-proc.insert_16
-    # extract 16-bit index from the key (8 cycles)
-    swapw dup exec.get_top_16_bits
-    # => [idx, K, V, R, ...]
-
-    # prepare the stack for computing N = hash([K, V], domain=16) (6 cycles)
-    movdn.8 push.0.16.0.0 swapw.2
-    # => [V, K, 0, 0, 16, 0, idx, R, ...]
-
-    # insert N |-> [K, V] into the advice map (0 cycles)
-    adv.insert_hperm
-
-    # compute leaf node value as N = hash([K, V], domain=16) (10 cycles)
-    hperm dropw swapw dropw
-    # => [N, idx, R, ...]
-
-    # prepare the stack for mtree_set operation (4 cycles)
-    swapw movup.8 movdn.4 push.16
-    # => [16, idx, R, N, ...]
-
-    # insert the node into the tree at depth 16; this also leaves the old value of the node on the
-    # stack (29 cycle)
-    mtree_set
-    # => [N_old, R_new, ...]
-
-    # verify that the old value of the node was a root of an empty subtree for depth 16 (12 cycles)
-    push.EMPTY_16_3 assert_eq
-    push.EMPTY_16_2 assert_eq
-    push.EMPTY_16_1 assert_eq
-    push.EMPTY_16_0 assert_eq
-
-    # put the return value onto the stack and return (4 cycles)
-    padw
-    # => [0, 0, 0, 0, R_new, ...]
-end
+        # Retrieve node value (NV) from merkle tree
+        mtree_get
+        # => [NV, R, K_in_leaf, K, V_in_leaf]
 
-#! Inserts a new leaf node at depth 32.
-#!
-#! Input:  [V, K, R, ...]
-#! Output: [0, 0, 0, 0, R_new, ...]
-#!
-#! Where:
-#! - R is the initial root of the TSMT, and R_new is the new root of the TSMT.
-#! - K, V is the key-value pair for the leaf node to be inserted into the TSMT.
-#!
-#! This procedure consists of two high-level steps:
-#! - First, insert N = hash([K, V], domain=32) into a subtree with root P, where P is the
-#!   internal node at depth 16 on the path to the new leaf node. This outputs the new root
-#!   of the subtree P_new.
-#! - Then, insert P_new into the TSMT with root R.
-#!
-#! We do this to minimize the number of hashes consumed by the procedure for Merkle path
-#! verification. Specifically, Merkle path verification will require exactly 64 hashes.
-#!
-#! This procedure succeeds only if:
-#! - Node at depth 16 is an internal node.
-#! - Node at depth 32 is a root of an empty subtree.
-#!
-#! Cycles: 154
-proc.insert_32.2
-    # load the value of P from the advice provider (5 cycles)
-    adv_push.4 swapw.2
-    # => [K, V, P, R, ...]
-
-    # save k3 into loc[0][0] (4 cycles)
-    dup loc_store.0
-    # => [K, V, P, R, ...]
-
-    # prepare the stack for computing N = hash([K, V], domain=32) - (5 cycles)
-    push.0.32.0.0 swapw.2
-    # => [V, K, 0, 0, 32, 0, P, R, ...]
-
-    # insert N |-> [K, V] into the advice map (0 cycles)
-    adv.insert_hperm
-
-    # compute N = hash([K, V], domain=32) (1 cycle)
-    hperm
-    # => [X, N, X, P, R, ...]
-
-    # save P into loc[1] to be used later (5 cycles)
-    swapw.3 loc_storew.1
-    # => [P, N, X, X, R, ...]
-
-    # make sure P is not a root of an empty subtree at depth 16 (17 cycles)
-    dup push.EMPTY_16_3 eq
-    dup.2 push.EMPTY_16_2 eq
-    dup.4 push.EMPTY_16_1 eq
-    dup.6 push.EMPTY_16_0 eq
-    and and and assertz
-    # => [P, N, X, X, R, ...]
-
-    # load k3 from memory, extract upper 32 bits from it and split them into two 16-bit values
-    # such that the top 16-bits are in idx_hi and the next 16 bits are in idx_lo (9 cycles)
-    loc_load.0 exec.get_top_32_bits u32unchecked_divmod.65536
-    # => [idx_lo, idx_hi, P, N, X, X, R, ...]
-
-    # save idx_hi into loc[0][0] to be used later (5 cycles)
-    swap loc_store.0
-    # => [idx_lo, P, N, X, X, R, ...]
-
-    # replace node at idx_lo in P with N, the old value of the node is left on the stack; this also
-    # proves that P is a leaf node because a leaf node cannot have children at depth 16 (30 cycles)
-    push.16 mtree_set
-    # => [N_old, P_new, X, X, R, ...]
-
-    # make sure that N_old is a root of an empty subtree at depth 32 (12 cycles)
-    push.EMPTY_32_3 assert_eq
-    push.EMPTY_32_2 assert_eq
-    push.EMPTY_32_1 assert_eq
-    push.EMPTY_32_0 assert_eq
-    # => [P_new, X, X, R, ...]
-
-    # prepare the stack for mtree_set operation against R; here we load idx_hi from loc[0][0]
-    # (11 cycles)
-    swapw.2 dropw swapw.2 loc_load.0 push.16
-    # => [16, idx_hi, R, P_new, X, ...]
-
-    # insert P_new into tree with root R at depth 16 and idx_hi index (29 cycles)
-    mtree_set
-    # => [P_old, R_new, X, ...]
-
-    # load previously saved P to compare it with P_old (6 cycles)
-    swapw swapw.2 loc_loadw.1
-    # => [P, P_old, R_new, ...]
-
-    # make sure P and P_old are the same (11 cycles)
-    assert_eqw
-    # => [R_new, ...]
-
-    # put the return value onto the stack and return (4 cycles)
-    padw
-    # => [0, 0, 0, 0, R_new, ...]
-end
+        # Cleanup stack (we no longer need K)
+        movupw.3 dropw
+        # => [NV, R, K_in_leaf, V_in_leaf]
 
-#! Inserts a new leaf node at depth 48.
-#!
-#! Input:  [V, K, R, ...]
-#! Output: [0, 0, 0, 0, R_new, ...]
-#! 
-#! This procedure is nearly identical to the insert_32 procedure above, adjusted for the use of
-#! constants and idx_hi/idx_lo computation. It may be possible to combine the two at the expense
-#! of extra 10 - 20 cycles.
-proc.insert_48.2
-    # load the value of P from the advice provider (5 cycles)
-    adv_push.4 swapw.2
-    # => [K, V, P, R, ...]
-
-    # save k3 into loc[0][0] (4 cycles)
-    dup loc_store.0
-    # => [K, V, P, R, ...]
-
-    # prepare the stack for computing N = hash([K, V], domain=48) - (5 cycles)
-    push.0.48.0.0 swapw.2
-    # => [V, K, 0, 0, 48, 0, P, R, ...]
-
-    # insert N |-> [K, V] into the advice map (0 cycles)
-    adv.insert_hperm
-
-    # compute N = hash([K, V], domain=48) (1 cycle)
-    hperm
-    # => [X, N, X, P, R, ...]
-
-    # save P into loc[1] to be used later (5 cycles)
-    swapw.3 loc_storew.1
-    # => [P, N, X, X, R, ...]
-
-    # make sure P is not a root of an empty subtree at depth 32 (17 cycles)
-    dup push.EMPTY_32_3 eq
-    dup.2 push.EMPTY_32_2 eq
-    dup.4 push.EMPTY_32_1 eq
-    dup.6 push.EMPTY_32_0 eq
-    and and and assertz
-    # => [P, N, X, X, R, ...]
-
-    # load k3 from memory, extract upper 48 bits from it and split them into two values such that
-    # the top 32-bits are in idx_hi and the next 16 bits are in idx_lo (9 cycles)
-    loc_load.0 u32split swap u32unchecked_divmod.65536 drop
-    # => [idx_lo, idx_hi, P, N, X, X, R, ...]
-
-    # save idx_hi into loc[0][0] to be used later (5 cycles)
-    swap loc_store.0
-    # => [idx_lo, P, N, X, X, R, ...]
-
-    # replace node at idx_lo in P with N, the old value of the node is left on the stack; this also
-    # proves that P is a leaf node because a leaf node cannot have children at depth 16 (30 cycles)
-    push.16 mtree_set
-    # => [N_old, P_new, X, X, R, ...]
-
-    # make sure that N_old is a root of an empty subtree at depth 48 (12 cycles)
-    push.EMPTY_48_3 assert_eq
-    push.EMPTY_48_2 assert_eq
-    push.EMPTY_48_1 assert_eq
-    push.EMPTY_48_0 assert_eq
-    # => [P_new, X, X, R, ...]
-
-    # prepare the stack for mtree_set operation against R; here we load idx_hi from loc[0][0]
-    # (11 cycles)
-    swapw.2 dropw swapw.2 loc_load.0 push.32
-    # => [32, idx_hi, R, P_new, X, ...]
-
-    # insert P_new into tree with root R at depth 32 and idx_hi index (29 cycles)
-    mtree_set
-    # => [P_old, R_new, X, ...]
-
-    # load previously saved P with P_old to make sure they are the same (6 cycles)
-    swapw swapw.2 loc_loadw.1
-    # => [P, P_old, R_new, ...]
-
-    # make sure P and P_old are the same (11 cycles)
-    assert_eqw
-    # => [R_new, ...]
-
-    # put the return value onto the stack and return (4 cycles)
-    padw
-    # => [0, 0, 0, 0, R_new, ...]
-end
+        # Ensure that hash([K_in_leaf, V_in_leaf]) == NV
+        movupw.2 movupw.3 hmerge assert_eqw
+        # => [R]
 
-#! Replaces a leaf node at depth 16 with a subtree containing two leaf nodes at depth 32 such that
-#! one of the leaf nodes commits to a key-value pair equal to the leaf node at depth 16, and the
-#! other leaf node commits to the key-value pair being inserted.
-#!
-#! Input:  [idx_lo_e, idx_lo_n, idx_hi, K_e, K, V, R, ...]
-#! Output: [0, 0, 0, 0, R_new, ...]
-#!
-#! Where:
-#! - R is the initial root of the TSMT, and R_new is the new root of the TSMT.
-#! - K, V is the key-value pair for the leaf node to be inserted into the TSMT.
-#!
-#! This procedure consists of three high-level steps:
-#! - First, insert M = hash([K_e, V_e], domain=32) into an empty subtree at depth 16, where K_e
-#!   and V_e are the key-value pair for the existing leaf node. This outputs the new root
-#!   of the subtree T.
-#! - Then, insert N = hash([K, V], domain=32) into a subtree with root T. This outputs the new
-#!   root of the subtree P_new.
-#! - Then, insert P_new into the TSMT with root R.
-#!
-#! This procedure succeeds only if:
-#! - Node at depth 16 is a leaf node.
-#! - The key in this node has a common prefix with the key to be inserted. This common prefix
-#!   must be greater or equal to 16, but smaller than 32.
-#!
-#! Cycles: 188
-proc.replace_32.3
-    # save [idx_hi, idx_lo_n, idx_lo_e, 16] into loc[0] - (4 cycles)
-    push.16 loc_storew.0
-    # => [16, idx_lo_e, idx_lo_n, idx_hi, K_e, K, V, R, ...]
-
-    # load V_e from the advice provider (1 cycle)
-    adv_loadw
-    # => [V_e, K_e, K, V, R, ...]
-
-    # save K_e and V_e into loc[1] and loc[2] respectively (13 cycles)
-    push.0.16.0.0 swapw.2 loc_storew.1 swapw loc_storew.2
-
-    # compute P = hash([K_e, V_e], domain=16) - (1 cycles)
-    hperm
-    # => [X, P, X, K, V, R, ...]
-
-    # prepare the stack for computing M = hash([K_e, V_e], domain=32) - (13 cycles)
-    loc_loadw.1 push.0.32.0.0 swapw.2 swapw.3 loc_loadw.2
-    # => [V_e, K_e, 0, 0, 32, 0, P, K, V, R, ...]
-
-    # insert M |-> [K_e, V_e] into the advice map (0 cycles)
-    adv.insert_hperm
-
-    # compute M = hash([K_e, V_e], domain=32) - 1 cycle
-    hperm
-    # => [X, M, X, P, K, V, R, ...]
-
-    # push the root of an empty subtree at depth 16 onto the stack (4 cycles)
-    push.EMPTY_16_0.EMPTY_16_1.EMPTY_16_2.EMPTY_16_3
-    # => [E, X, M, X, P, K, V, R, ...]
-
-    # prepare the stack for inserting M into E (8 cycles)
-    swapw loc_loadw.0 movup.2 drop movup.2 drop
-    # => [16, idx_lo_e, E, M, X, P, K, V, R, ...]
-
-    # insert M into an empty subtree rooted at E; this leaves a root of empty subtree at depth 32
-    # on the stack - though, we don't need to verify this (29 cycles)
-    mtree_set
-    # => [E32, T, X, P, K, V, R, ...]
-
-    # prepare the stack for computing N = hash([K, V], domain=32) - (15 cycles)
-    dropw swapw dropw swapdw push.0.32.0.0 swapw.2
-    # => [V, K, 0, 0, 32, 0, T, P, R, ...]
-
-    # insert N |-> [K, V] into the advice map (0 cycles)
-    adv.insert_hperm
-
-    # compute N = hash([K, V], domain=32) - 1 cycle
-    hperm
-    # => [X, N, X, T, P, R, ...]
-
-    # prepare the stack for inserting N into T (13 cycles)
-    dropw swapw.2 swapw loc_loadw.0 swap drop movup.2 drop
-    # => [16, idx_lo_n, T, N, P, R, ...]
-
-    # insert N into an empty subtree rooted at T; this leaves a root of empty subtree at depth 32
-    # on the stack - though, we don't need to verify this (29 cycles)
-    mtree_set
-    # => [E32, P_new, P, R, ...]
-
-    # prepare the stack for inserting P_new into R (10 cycles)
-    swapw.3 swapw swapw.2 swapw.3 loc_loadw.0 movdn.2 drop drop
-    # => [16, idx_hi, R, P_new, P, ...]
-
-    # insert P_new into the tree rooted at R; this also leaves P_old (the old value of the node)
-    # on the stack (29 cycles)
-    mtree_set
-    # => [P_old, R_new, P, ...]
-
-    # make sure P and P_old are the same (13 cycles)
-    swapw swapw.2 assert_eqw
-    # => [R_new, ...]
-
-    # put the return value onto the stack and return (4 cycles)
-    padw
-    # => [0, 0, 0, 0, R_new, ...]
+        # Prepare stack for return
+        padw
+        # => [ZERO, R]
+    end
 end
 
-#! Replaces a leaf node at depth 16 or 32 with a subtree containing two leaf nodes at depth 48
-#! such that one of the leaf nodes commits to a key-value pair equal to the leaf node at the
-#! original depth, and the other leaf node commits to the key-value pair being inserted.
-#!
-#! Input:  [E, idx_lo_e, idx_lo_n, idx_hi, d, K_e, K, V, R, ...]
-#! Output: [0, 0, 0, 0, R_new, ...]
-#!
-#! Where:
-#! - R is the initial root of the TSMT, and R_new is the new root of the TSMT.
-#! - K, V is the key-value pair for the leaf node to be inserted into the TSMT.
-#! - d is the depth of the current leaf node (i.e., depth 16 or 32).
-#! - idx_hi is the index of the last common node on the path from R to the leaves at depth 48.
-#! - idx_lo_e and idx_lo_n are the indexes of the new leaf nodes in a subtree rooted in the
-#!   last common node.
-#! - E is a root of an empty subtree at depth d.
-#!
-#! This procedure consists of three high-level steps:
-#! - First, insert M = hash([K_e, V_e], domain=48) into an empty subtree at depth 48 - d, where
-#!   K_e and V_e is the key-value pair for the existing leaf node at depth d. This outputs the
-#!   new root of the subtree T.
-#! - Then, insert N = hash([K, V], domain=48) into a subtree with root T. This outputs the new
-#!   root of the subtree P_new.
-#! - Then, insert P_new into the TSMT with root R at depth d.
+#! Inserts or removes a value associated with the given key. The leaf to which we're inserting is
+#! guaranteed to hold a single key-value pair (provided on the advice stack).
 #!
-#! This procedure succeeds only if:
-#! - Node at depth d is a leaf node.
+#! Inputs:
+#!   Operand stack: [V, K, R, ...]
+#!   Advice stack:  [K_in_leaf, V_in_leaf]
 #!
-#! The procedure assumes but does not check that:
-#! - d is either 16 or 32.
-#! - idx_hi is within range valid for depth d.
-#! - idx_lo_e and idx_hi_e are different values.
-#! - idx_lo_e and idx_hi_e are within range valid for depth 48 - d.
-#!
-#! Cycles: 195
-proc.replace_48.4
-    # save E into loc[3] and drop it from the stack (7 cycles)
-    loc_storew.3 dropw
-    # => [idx_lo_e, idx_lo_n, idx_hi, d, K_e, K, V, R, ...]
-
-    # save [d, idx_hi, idx_lo_n, idx_lo_e] into loc[0] (3 cycles)
-    loc_storew.0
-    # => [idx_lo_e, idx_lo_n, idx_hi, d, K_e, K, V, R, ...]
-
-    # prepare the stack for computing P = hash([K_e, V_e], domain=d)
-
-    # load V_e from the advice provider and save it into loc[1] (5 cycles)
-    adv_loadw loc_storew.1
-    # => [V_e, K_e, K, V, R, ...]
-
-    # (6 cycles)
-    push.0 loc_load.0 push.0.0
-    # => [0, 0, d, 0, V_e, K_e, K, V, R, ...]
-
-    # save K_e into loc[2] - (5 cycles)
-    swapw.2 loc_storew.2 swapw
-    # => [V_e, K_e, 0, 0, d, 0, K, V, R, ...]
-
-    # compute P = hash([K_e, V_e], domain=d) (1 cycle)
-    hperm
-    # => [X, P, X, K, V, R, ...]
-
-    # prepare the stack for computing M = hash([K_e, V_e], domain=48)
-
-    # load K_e and V_e from loc[2] and loc[1] respectively (13 cycles)
-    loc_loadw.2 push.0.48.0.0 swapw.2 swapw.3 loc_loadw.1
-    # => [V_e, K_e, 0, 0, 48, 0, P, K, V, R, ...]
-
-    # insert M |-> [K_e, V_e] into the advice map (0 cycles)
-    adv.insert_hperm
-
-    # compute M = hash([K_e, V_e], domain=48) (1 cycle)
-    hperm
-    # => [X, M, X, P, K, V, R, ...]
-
-    # load the root of empty subtree at depth d from loc[3] (3 cycles)
-    loc_loadw.3
-    # => [E, M, X, P, K, V, R, ...]
-
-    # prepare the stack for inserting M into E
-
-    # (5 cycles)
-    swapw swapw.2 loc_loadw.0
-    # => [idx_lo_e, idx_lo_n, idx_hi, d, E, M, P, K, V, R, ...]
-
-    # (6 cycles)
-    movdn.3 drop drop neg add.48
-    # => [48 - d, idx_lo_e, E, M, P, K, V, R, ...]
-
-    # insert M into an empty subtree rooted at E; this leaves a root of empty subtree at depth 48
-    # on the stack - though, we don't need to verify this (29 cycles)
-    mtree_set
-    # => [E48, T, P, K, V, R, ...]
-
-    # prepare the stack for computing N = hash([K, V], domain=48)
-
-    # (5 cycles)
-    dropw swapdw
-    # => [K, V, T, P, R, ...]
+#! Outputs:
+#!   Operand stack: [V_old, R_new, ...]
+#! Cycles:
+#!   Remove: X cycles
+#!   Insert; leaf single after insertion: X cycles
+#!   Insert; leaf multiple after insertion: unimplemented
+proc.set_single_leaf
+    # Check if we're inserting or removing a value
+    # (X cycles)
+    padw eqw
+    # => [V==ZERO, ZERO, V, K, R, ...] 
+    if.true
+        # we're removing the value associated with K (if any)
 
-    # (5 cycles)
-    push.0.48.0.0 swapw.2
-    # => [V, K, 0, 0, 48, 0, T, P, R, ...]
+        # (4 cycles)
+        dropw 
+        # => [V, K, R, ...] 
 
-    # insert N |-> [K, V] into the advice map (0 cycles)
-    adv.insert_hperm
+        # (X cycles)
+        exec.remove_single_leaf
+        # => [V_old, R_new]
+    else
+        # we're inserting the key/value pair
 
-    # compute N = hash([K, V], domain=48) - (1 cycles)
-    hperm
-    # => [X, N, X, T, P, R, ...]
+        # (4 cycles)
+        dropw 
+        # => [V, K, R, ...] 
 
-    # prepare the stack for inserting N into T
+        # (X cycles)
+        exec.insert_single_leaf
+        # => [V_old, R_new]
+    end
+end
 
-    # (6 cycles)
-    dropw swapw.2 swapw
-    # => [X, T, N, P, R, ...]
+#! Inserts the specified value under the specified key in a Sparse Merkle Tree defined by the
+#! specified root. If the insert is successful, the old value located under the specified key
+#! is returned via the stack.
+#!
+#! If the VALUE is an empty word (i.e., [ZERO; 4]), the new state of the tree is guaranteed to
+#! be equivalent to the state as if the updated value was never inserted.
+#!
+#! Inputs:
+#!   Operand stack: [V, K, R, ...]
+#! Outputs:
+#!   Operand stack: [V_old, R_new, ...]
+#!
+#! Fails if the tree with the specified root does not exits in the VM's advice provider.
+#!
+#! Cycles
+#!   Leaf empty
+#!     removal: 74 cycles
+#!     insertion: 133 cycles
+#!   Leaf single
+#!     removal: 227 cycles
+#!     insertion (leaf remains single): 205
+#!     insertion (leaf becomes multiple): unimplemented
+#!   Leaf multiple
+#!     unimplemented
+export.set
+    # Prepare stack for adv.push_mtnode
+    # (X cycles)
+    movupw.2 dup.8 push.LEAF_DEPTH
+    # => [depth, leaf_index, R, V, K]
 
-    # (3 cycles)
-    loc_loadw.0
-    # => [idx_lo_e, idx_lo_n, idx_hi, d, T, N, P, R, ...]
+    # Push MT node on advice stack, cleanup operand stack, and then
+    # push MT node on operand stack (NV)
+    # (X cycles)
+    adv.push_mtnode drop drop movdnw.2 adv_push.4
+    # => [NV, V, K, R]
 
-    # (6 cycles)
-    drop movdn.2 drop neg add.48
-    # => [48 - d, idx_lo_n, T, N, P, R, ...]
+    # (X cycles)
+    padw eqw
+    # => [NV == ZERO, ZERO, NV, V, K, R]
 
-    # insert N into a subtree with root T; this leaves a root of an empty subtree at depth 48
-    # on the stack - though, we don't need to verify this (29 cycles)
-    mtree_set
-    # => [E48, P_new, P, R, ...]
+    if.true
+        # empty leaf
 
-    # prepare the stack for inserting P_new into R
+        # (8 cycles)
+        dropw dropw
+        #=> [V, K, R]
 
-    # (4 cycles)
-    swapw.3 swapw swapw.2 swapw.3
-    # => [E48, R, P_new, P, ...]
+        # (insert empty value: X cycles)
+        # (insert non-empty value: X cycles)
+        exec.set_empty_leaf
+    else
+        # Single or Multiple leaf
 
-    # (3 cycles)
-    loc_loadw.0
-    # => [idx_lo_e, idx_lo_n, idx_hi, d, R, P_new, P, ...]
+        # (X cycles)
+        dropw
+        # => [NV, V, K, R]
 
-    # (3 cycles)
-    drop drop swap
-    # => [d, idx_hi, R, P_new, P, ...]
+        # Retrieve leaf pre-image on advice stack, and push leaf size on stack
+        # Note: the rest of the leaf pre-image will be pulled out later
+        # (4 cycles)
+        adv.push_mapvaln dropw adv_push.1
+        # => [leaf_size, V, K, R]
 
-    # insert P_new into the tree rooted at R; this also leaves P_old (the old value of the node)
-    # on the stack (29 cycles)
-    mtree_set
-    # => [P_old, R_new, P, ...]
+        # Leaf size will be a multiple of 8 (each kv-pair in a leaf is 8 elements)
+        # (3 cycles)
+        dup eq.8
+        # => [is_single_kv_pair, leaf_size, V, K, R]
 
-    # make sure P and P_old are the same (13 cycles)
-    swapw swapw.2 assert_eqw
-    # => [R_new, ...]
+        if.true
+            #  Single kv-pair case
 
-    # put the return value onto the stack and return (4 cycles)
-    padw
-    # => [0, 0, 0, 0, R_new, ...]
-end
+            # (1 cycle)
+            drop
+            # => [V, K, R]
 
-#! Inserts the specified value into a Sparse Merkle Tree with the specified root under the
-#! specified key.
-#!
-#! This is the actual implementation of the `insert` procedure below, except for two things:
-#! - This procedure assumes that value V is not [ZERO; 4], but this is not checked.
-#! - This procedure assumes that the relevant flags have already been read from the advice provider.
-#!
-#! Input:  [is_update, f0, f1, f2, V, K, R, ...]
-#! Output: [V_old, R_new, ...]
-#!
-#! Where:
-#! - is_update is a flag specifying whether the insert is just an update of a value under an
-#!   existing key.
-#! - Meaning of the flags f0, f1, and f2 depends on what type of insert is being executed.
-#!
-#! Cycles:
-#! - Update existing leaf:
-#!   - Depth 16: 116
-#!   - Depth 32: 113
-#!   - Depth 48: 118
-#! - Insert new leaf:
-#!   - Depth 16: 81
-#!   - Depth 32: 162
-#!   - Depth 48: 162
-#! - Replace a leaf with a subtree:
-#!   - Depth 16 -> 32: 221
-#!   - Depth 16 -> 48: 244
-#!   - Depth 32 -> 48: 234
-proc.insert_internal
-    # call the inner procedure depending on the type of insert and depth
-    if.true # --- update leaf ---------------------------------------------------------------------
-        # => [is_16_or_32, is_16_or_48, ZERO, V, K, R, ...]
-        if.true 
-            if.true # --- update a leaf node at depth 16 ---
-                drop
-                # => [V, K, R, ...]
-
-                # (cycles 8)
-                dup.4 exec.get_top_16_bits
-                push.16
-                # => [16, idx, V, K, R, ...]
-
-                exec.update_16_32_48
-            else # --- update a leaf node at depth 32 ---
-                drop
-                # => [V, K, R, ...]
-
-                #(5 cycles)
-                dup.4 exec.get_top_32_bits
-                push.32
-                # => [32, idx, V, K, R, ...]
-
-                exec.update_16_32_48
-            end
+            # (remove key/value: X cycles)
+            # (insert; leaf single after insertion: X cycles)
+            exec.set_single_leaf
         else
-            if.true # --- update a leaf node at depth 48 ---
-                drop
-                # => [V, K, R, ...]
-
-                # (10 cycles)
-                dup.4 exec.get_top_48_bits
-                push.48
-                # => [48, idx, V, K, R, ...]
-
-                exec.update_16_32_48
-            else
-                # depth 64 - currently not implemented
-                push.0 assert
-            end
-        end
-    else
-        # => [is_simple_insert, is_16_or_32, is_16_or_48, V, K, R, ...]
-        if.true # --- insert new leaf -------------------------------------------------------------
-            if.true
-                if.true
-                    exec.insert_16
-                else
-                    exec.insert_32
-                end
-            else
-                if.true
-                    exec.insert_48
-                else
-                    # depth 64 - currently not implemented
-                    push.0 assert
-                end
-            end
-        else # --- replace leaf with subtree ------------------------------------------------------
-            if.true
-                if.true # --- replace a leaf at depth 16 with two leaves at depth 32  ---
-                    # load K_e from the advice provider (5 cycles)
-                    swapw adv_push.4
-                    # => [K_e, K, V, R, ...]
-
-                    # (20 cycles)
-                    exec.extract_index_16_16
-                    # => [idx_lo_e, idx_lo, idx_hi, K_e, K, V, R, ...]
-
-                    # (188 cycles)
-                    exec.replace_32
-                else # --- replace a leaf at depth 16 with two leaves at depth 48  ---
-                    # load K_e from the advice provider (5 cycles)
-                    swapw adv_push.4
-                    # => [K_e, K, V, R, ...]
-
-                    # (30 cycles)
-                    exec.extract_index_16_32
-                    # => [idx_lo_e, idx_lo, idx_hi, K_e, K, V, R, ...]
-
-                    # (2 cycles)
-                    push.16 movdn.3
-                    # => [idx_lo_e, idx_lo, idx_hi, 16, K_e, K, V, R, ...]
-
-                    # (4 cycles)
-                    push.EMPTY_16_0.EMPTY_16_1.EMPTY_16_2.EMPTY_16_3
-                    # => [E, idx_lo_e, idx_lo, idx_hi, 16, K_e, K, V, R, ...]
-
-                    # (195 cycles)
-                    exec.replace_48
-                end
-            else
-                if.true # --- replace a leaf at depth 32 with two leaves at depth 48  ---
-                    # load K_e from the advice provider (5 cycles)
-                    swapw adv_push.4
-                    # => [K_e, K, V, R, ...]
-
-                    # (20 cycles)
-                    exec.extract_index_32_16
-                    # => [idx_lo_e, idx_lo, idx_hi, K_e, K, V, R, ...]
-
-                    # (2 cycles)
-                    push.32 movdn.3
-                    # => [idx_lo_e, idx_lo, idx_hi, 16, K_e, K, V, R, ...]
-
-                    # (4 cycles)
-                    push.EMPTY_32_0.EMPTY_32_1.EMPTY_32_2.EMPTY_32_3
-                    # => [E, idx_lo_e, idx_lo, idx_hi, 16, K_e, K, V, R, ...]
-
-                    # (195 cycles)
-                    exec.replace_48
-                else # --- replace a leaf at depth 16, 32, or 48 with two leaves at depth 64  ---
-                    # depth 64 - currently not implemented
-                    push.0 assert
-                end
-            end
+            # Multiple kv-pair case
+            # TODO (fail for now)
+            push.1 assertz
         end
     end
-
-    # => [V_old, R_new, ...]
 end
 
-#! Inserts the specified value into a Sparse Merkle Tree with the specified root under the
-#! specified key.
-#!
-#! The value previously stored in the SMT under this key is left on the stack together with
-#! the updated tree root.
-#!
-#! This assumes that the value is not [ZERO; 4]. If it is, the procedure fails.
-#!
-#! Input:  [V, K, R, ...]
-#! Output: [V_old, R_new, ...]
-#!
-#! Cycles:
-#! - Update existing leaf:
-#!   - Depth 16: 137
-#!   - Depth 32: 134
-#!   - Depth 48: 139
-#! - Insert new leaf:
-#!   - Depth 16: 102
-#!   - Depth 32: 183
-#!   - Depth 48: 183
-#! - Replace a leaf with a subtree:
-#!   - Depth 16 -> 32: 242
-#!   - Depth 16 -> 48: 265
-#!   - Depth 32 -> 48: 255
-export.insert
-    # make sure the value is not [ZERO; 4] (17 cycles)
-    repeat.4
-        dup.3 eq.0
-    end
-    and and and assertz
-    # => [V, K, R, ...]
-
-    # arrange the data needed for the insert procedure on the advice stack and move the
-    # first 4 flags onto the operand stack; meaning of the flags f0, f1, and f2 depends
-    # on what type of insert is being executed (4 cycles)
-    adv.push_smtset adv_push.4
-    # => [is_update, f0, f1, f2, V, K, R, ...]
-
-    # execute the actual insert procedure
-    exec.insert_internal
-    # => [V_old, R_new, ...]
-end
-
-# DELETE
+# GET
 # =================================================================================================
 
-#! Verifies that a node at depth 16 and index defined by the most significant element of K in a
-#! tree with root R is a root of an empty subtree.
+#! Returns the value located under the specified key in the Sparse Merkle Tree defined by the
+#! specified root.
 #!
-#! Input:  [Z, K, R, ...]
-#! Output: [Z, R, ...]
+#! If no values had been previously inserted under the specified key, an empty word (i.e.,
+#! [ZERO; 4]) is returned.
 #!
-#! Where Z is [ZERO; 4].
+#! Inputs:
+#!   Operand stack: [K, R, ...]
 #!
-#! Cycles: 26
-proc.verify_empty_node_16
-    # (9 cycles)
-    swapw.2 dup.4 exec.get_top_16_bits push.16
-    # => [16, idx, R, K, Z, ...]
-
-    # (4 cycles)
-    push.EMPTY_16_0.EMPTY_16_1.EMPTY_16_2.EMPTY_16_3
-    # => [E16, 16, idx, R, K, Z, ...]
-
-    # (1 cycle)
-    mtree_verify
-    # => [E16, 16, idx, R, K, Z, ...]
-
-    # (12 cycles)
-    dropw drop drop swapw dropw swapw
-    # => [Z, R, ...]
-end
-
-#! Verifies that a node at depth 32 and index defined by the most significant element of K in a
-#! tree with root R is a root of an empty subtree.
-#!
-#! Input:  [Z, K, R, ...]
-#! Output: [Z, R, ...]
+#! Outputs:
+#!   Operand stack: [V, R, ...]
 #!
-#! Where Z is [ZERO; 4].
+#! Fails if the tree with the specified root does not exits in the VM's advice provider.
 #!
-#! Cycles: 23
-proc.verify_empty_node_32 
+#! Cycles
+#!   Leaf empty: 48 cycles
+#!   Leaf single: 99 cycles
+#!   Leaf multiple: unimplemented
+export.get
+    # Prepare for `mtree_get`
     # (6 cycles)
-    swapw.2 dup.4 exec.get_top_32_bits push.32
-    # => [32, idx, R, K, Z, ...]
-
-    # (4 cycles)
-    push.EMPTY_32_0.EMPTY_32_1.EMPTY_32_2.EMPTY_32_3
-    # => [E32, 32, idx, R, K, Z, ...]
-
-    # (1 cycle)
-    mtree_verify
-    # => [E32, 32, idx, R, K, Z, ...]
-
-    # (12 cycles)
-    dropw drop drop swapw dropw swapw
-    # => [Z, K, ...]
-end
-
-#! Verifies that a node at depth 48 and index defined by the most significant element of K in a
-#! tree with root R is a root of an empty subtree.
-#!
-#! Input:  [Z, K, R, ...]
-#! Output: [Z, R, ...]
-#!
-#! Where Z is [ZERO; 4].
-#!
-#! Cycles: 29
-proc.verify_empty_node_48
-    # (12 cycles)
-    swapw.2 dup.4 exec.get_top_48_bits push.48
-    # => [48, idx, R, K, Z, ...]
-
-    # (4 cycles)
-    push.EMPTY_48_0.EMPTY_48_1.EMPTY_48_2.EMPTY_48_3
-    # => [E48, 48, idx, R, K, Z, ...]
-
-    # (1 cycle)
-    mtree_verify
-    # => [E48, 48, idx, R, K, Z, ...]
-
-    # (12 cycles)
-    dropw drop drop swapw dropw swapw
-    # => [Z, K, ...]
-end
+    dupw.1 dup.4 push.LEAF_DEPTH
+    # => [depth, K[3], R, K, R]
 
-#! Verifies that a leaf node located at depth d and index idx in the tree defined by root R is
-#! contained key which is different from K.
-#!
-#! Input:  [d, idx, K, R, Z, ...]
-#! Output: [Z, R, ...]
-#!
-#! Where Z is [ZERO; 4].
-#!
-#! Cycles: 50
-proc.verify_leaf_with_another_key
-    # load the leaf key K_e from the advice provider (11 cycles)
-    movdn.5 movdn.5 push.0 dup.5 push.0.0 swapw adv_push.4
-    # => [K_e, K, 0, 0, d, 0, d, idx, R, Z, ...]
-
-    # make sure K_e and K are not the same (17 cycles)
-    repeat.4
-        dup.3 movup.8 eq
-    end
-    and and and assertz
-    # => [K_e, 0, 0, d, 0, d, idx, R, Z, ...]
+    # Retrieve node value from merkle store
+    # (14 cycles)
+    mtree_get swapw dropw
+    # => [NV, K, R]
 
-    # load leaf value V_e from the advice provider (4 cycles)
-    adv_push.4
-    # => [V_e, K_e, 0, 0, d, 0, d, idx, R, Z, ...]
+    # Check if value is empty; if so, return empty value
+    # (19 cycles)
+    padw eqw
+    # => [NV == 0, ZERO, V, K, R]
 
-    # compute N = hash([K_e, K_v], domain=d) - (10 cycles)
-    hperm dropw swapw dropw
-    # => [N, d, idx, R, Z, ...]
+    if.true
+        # Return empty value
+        # (9 cycles)
+        dropw swapw dropw
+        # => [NV, R]
+    else
+        # Drop extra ZERO word
+        # (4 cycles)
+        dropw
+        # => [NV, K, R]
 
-    # verify that node N exists in the tree with root R at the specified index and depth
-    mtree_verify
-    # => [N, d, idx, R, Z, ...]
+        # Get leaf pre-image from advice map. Push the leaf preimage size on the stack
+        # (0 cycles)
+        adv.push_mapvaln adv_push.1
+        # => [leaf_size, NV, K, R]
 
-    # clean up the stack and return (7 cycles)
-    dropw drop drop swapw
-    # => [Z, R, ...]
-end
+        # Leaf size will be a multiple of 8 (each kv-pair in a leaf is 8 elements)
+        # (3 cycles)
+        dup eq.8
+        # => [is_single_kv_pair, leaf_size, NV, K, R]
 
-#! Removes a key-value under key K from the Tiered Sparse Merkle tree defined by root R, and
-#! returns the new tree root together with the value previously associated with key K.
-#!
-#! If the key-value pair is not in the tree, this proves that the key-value pair is not in the
-#! tree but does not modify the tree itself.
-#!
-#! Input:  [key_not_in_tree, f0, f1, f2, Z, K, R, ...]
-#! Output: [V_old, R_new, ...]
-#!
-#! Where:
-#! - key_not_in_tree is a flag specifying whether the specified key is present in the tree defined
-#!   by R.
-#! - Meaning of the flags f0, f1, and f2 depends on what type of delete is being executed.
-#! - Z is [ZERO; 4]. This is assumed but not checked.
-#!
-#! Cycles:
-#! - Key not in the tree (key with common prefix in the tree):
-#!   - Depth 16: 68
-#!   - Depth 32: 65
-#!   - Depth 48: 71
-#! - Key not in the tree (no key with common prefix):
-#!   - Depth 16: 34
-#!   - Depth 32: 31
-#!   - Depth 48: 37
-#! - Remove a leaf from the tree:
-#!   - 121 - 284 cycles
-proc.delete
-    if.true # --- key is not in the tree ----------------------------------------------------------
-        if.true # --- key with common prefix in the tree ---
-            if.true
-                if.true
-                    # (10 cycles)
-                    swapw.2 swapw dup exec.get_top_16_bits push.16
-                    # => [16, idx, K, R, Z, ...]
-
-                    exec.verify_leaf_with_another_key
-                else
-                    # (7 cycles)
-                    swapw.2 swapw dup exec.get_top_32_bits push.32
-                    # => [32, idx, K, R, Z, ...]
-
-                    exec.verify_leaf_with_another_key
-                end
-            else
-                if.true
-                    # (13 cycles)
-                    swapw.2 swapw dup exec.get_top_48_bits push.48
-                    # => [48, idx, K, R, Z, ...]
-
-                    exec.verify_leaf_with_another_key
-                else
-                    # depth 64 - currently not implemented
-                    push.0 assert
-                end
-            end
-        else # --- no key with common prefix in the tree ---
-            if.true
-                if.true
-                    exec.verify_empty_node_16
-                else
-                    exec.verify_empty_node_32
-                end
-            else
-                if.true
-                    exec.verify_empty_node_48
-                else
-                    # depth 64 - currently not implemented
-                    push.0 assert
-                end
-            end
+        if.true
+            # Single kv-pair case
+
+            # Push leaf pre-image on stack (single K-V pair)
+            # (1 cycle)
+            drop adv_push.8
+            # => [V, K, NV, K, R]
+
+            # Confirm that the key stored in the leaf is as expected
+            # (18 cycles)
+            movupw.3 dupw.2 assert_eqw
+            # => [V, K, NV, R]
+
+            # Duplicate V to return it after hash check
+            # (7 cycles)
+            dupw movdnw.3
+            # => [V, K, NV, V, R]
+
+            # Hash leaf preimage and ensure that it equals node value
+            # (27 cycles)
+            hmerge assert_eqw
+            # => [V, R]
+        else
+            # Multiple kv-pair case
+            # TODO (fail for now)
+            push.1 assertz
         end
-    else # --- key is in the tree ---------------------------------------------------------------
-        # load from the advice provider the root of the tree with the leaf removed (2 cycles)
-        push.0 adv_loadw
-        # => [R_new, Z, K, R, ...]
-
-        # load the value to be removed from the advice provider (6 cycles)
-        dupw swapw.2 adv_loadw
-        # => [V_old, R_new, R_new, K, R, ...]
-
-        # prepare the stack for TSMT insert operation (9 cycles)
-        swapw.3 dupw.3 adv.push_smtset adv_push.4
-        # => [f0, f1, f2, f3, V_old, K, R_new, R_new, V_old, R, ...]
-
-        # insert key-pair (K, V_old) into TSMT with root R_new - i.e., insert the key-value pair
-        # to be removed into the TSMT with value already removed. if all values were provided
-        # correctly, we should get TSMT with the root prior to the key-pair's removal.
-        # (81 - 244 cycles)
-        exec.insert_internal
-        # => [Z, R_old, R_new, V_old, R, ...]
-
-        # make sure the value we got back is an empty node (8 cycles)
-        assertz assertz assertz assertz
-        # => [R_old, R_new, V_old, R, ...]
-
-        # Make sure R and R_old are the same (13 cycles)
-        swapw swapw.3 assert_eqw
-        # => [V_old, R_new, ...]
-    end
-end
-
-# SET
-# =================================================================================================
-
-#! Sets the value associated with key K to V in a Sparse Merkle tree with root R. Returns the new
-#! root of the tree together with the value previously associated with key K.
-#!
-#! If no value was previously associated with K, [ZERO; 4] is returned.
-#!
-#! Unlike the `insert` procedure defined above, this procedure allows for values to be set to
-#! [ZERO; 4].
-#!
-#! Input:  [V, K, R, ...]
-#! Output: [V_old, R_new, ...]
-#!
-#! Cycles:
-#! - Update existing leaf:
-#!   - Depth 16: 137
-#!   - Depth 32: 133
-#!   - Depth 48: 139
-#! - Insert new leaf:
-#!   - Depth 16: 102
-#!   - Depth 32: 183
-#!   - Depth 48: 183
-#! - Replace a leaf with a subtree:
-#!   - Depth 16 -> 32: 242
-#!   - Depth 16 -> 48: 265
-#!   - Depth 32 -> 48: 255
-#! - Remove a key-value pair:
-#!   - Key-value pair not in tree: 52 - 93
-#!   - Key-value pair is in tree: 142 - 305
-export.set
-    # arrange the data needed for the update procedure on the advice stack and move the first
-    # 4 flags onto the operand stack; meaning of the flags f0, f1, f2, and f3 depends on what
-    # type of update is being executed (4 cycles)
-    adv.push_smtset adv_push.4
-    # => [f0, f1, f2, f3, V, K, R, ...]
-
-    # determine if the value is an empty word (15 cycles)
-    repeat.4
-        dup.7 push.0 eq
-    end
-    and and and
-    # => [is_empty_value, f0, f1, f2, f3, V, K, R, ...]
-
-    # the value is an empty word execute the delete procedure; otherwise execute the internal
-    # insert procedure because we already have all required data on the advice provider and know
-    # that the value being inserted is not an empty word.
-    if.true
-        exec.delete
-    else
-        exec.insert_internal
     end
 end
diff --git a/stdlib/asm/collections/smt64.masm b/stdlib/asm/collections/smt64.masm
deleted file mode 100644
index 295950ddfa..0000000000
--- a/stdlib/asm/collections/smt64.masm
+++ /dev/null
@@ -1,75 +0,0 @@
-#! A key-value map with single-element keys and 4-element values.
-#!
-#! Current implementation is a thin wrapper over a simple Sparse Merkle Tree of depth 64. In the
-#! future, this will be replaced with a compact Sparse Merkle Tree implementation.
-
-#! Returns the value located under the specified key in the Sparse Merkle Tree defined by the
-#! specified root.
-#!
-#! If no values had been previously inserted under the specified key, an empty word (i.e.,
-#! [ZERO; 4]) is returned.
-#!
-#! Inputs:
-#!   Operand stack: [key, ROOT, ...]
-#!
-#! Outputs:
-#!   Operand stack: [VALUE, ROOT, ...]
-#!
-#! Fails if the tree with the specified root does not exits in the VM's advice provider.
-export.get
-  push.64
-  mtree_get
-end
-
-#! Inserts the specified value under the specified key in a Sparse Merkle Tree defined by the
-#! specified root. If the insert is successful, the old value located under the specified key
-#! is returned via the stack.
-#!
-#! This procedure assumes that VALUE is a non-empty word (i.e., not [ZERO; 4]).
-#!
-#! Inputs:
-#!   Operand stack: [VALUE, key, ROOT, ...]
-#!
-#! Outputs:
-#!   Operand stack: [OLD_VALUE, NEW_ROOT, ...]
-#!
-#! Fails if:
-#! - The tree with the specified root does not exits in the VM's advice provider.
-#! - The provided value is an empty word.
-export.insert
-    # make sure the value is not [ZERO; 4] (17 cycles)
-    repeat.4
-        dup.3 eq.0
-    end
-    and and and assertz
-
-    # prepare the stack for mtree_set operation
-    movup.4 movdn.8 swapw movup.8 push.64
-    # => [64, key, ROOT, VALUE, ...]
-
-    mtree_set
-    # => [OLD_VALUE, NEW_ROOT, ...]
-end
-
-#! Inserts the specified value under the specified key in a Sparse Merkle Tree defined by the
-#! specified root. If the insert is successful, the old value located under the specified key
-#! is returned via the stack.
-#!
-#! If the VALUE is an empty word (i.e., [ZERO; 4]), the new state of the tree is guaranteed to
-#! be equivalent to the state as if the updated value was never inserted.
-#!
-#! Inputs:
-#!   Operand stack: [VALUE, key, ROOT, ...]
-#!
-#! Outputs:
-#!   Operand stack: [OLD_VALUE, NEW_ROOT, ...]
-#!
-#! Fails if the tree with the specified root does not exits in the VM's advice provider.
-export.set
-    # prepare the stack for mtree_set operation
-    movup.4 movdn.8 swapw movup.8 push.64
-    # => [64, key, ROOT, VALUE, ...]
-
-    mtree_set
-    # => [OLD_VALUE, NEW_ROOT, ...]
-end
diff --git a/stdlib/asm/crypto/dsa/rpo_falcon512.masm b/stdlib/asm/crypto/dsa/rpo_falcon512.masm
index 7dd00b59e9..710147e6ce 100644
--- a/stdlib/asm/crypto/dsa/rpo_falcon512.masm
+++ b/stdlib/asm/crypto/dsa/rpo_falcon512.masm
@@ -1,5 +1,3 @@
-use.std::crypto::stark::deep_queries
-
 # CONSTANTS
 # =================================================================================================
 
@@ -7,7 +5,7 @@ const.J=77321994752
 const.M=12289
 const.SQUARE_NORM_BOUND=34034726
 
-# MODULAR REDUCTION FALCON PRIME  
+# MODULAR REDUCTION FALCON PRIME
 # =============================================================================================
 
 #! Given dividend ( i.e. field element a ) on stack top, this routine computes c = a % 12289
@@ -61,15 +59,15 @@ export.mod_12289
     drop
 end
 
-# HASH-TO-POINT  
+# HASH-TO-POINT
 # =============================================================================================
 
 #! Takes as input a message digest, a nonce of size 40 bytes represented as 8 field elements
 #! and a pointer. The procedure absorbs MSG and NONCE into a fresh RPO state and squeezes the
 #! coefficients of a polynomial c representing the hash-to-point of (MSG || NONCE). The coefficients
 #! are then saved in the memory region [c_ptr, c_ptr + 128).
-#! This implementation of the `hash_to_point` procedure avoids the rejection-sampling step 
-#! required in the per-the-spec algorithm by using the observation on page 31 in 
+#! This implementation of the `hash_to_point` procedure avoids the rejection-sampling step
+#! required in the per-the-spec algorithm by using the observation on page 31 in
 #! https://falcon-sign.info/falcon.pdf
 #!
 #! Input: [c_ptr, MSG, NONCE1, NONCE0, ...]
@@ -108,7 +106,7 @@ export.hash_to_point.2
 end
 
 
-# PROBABILISTIC POLYNOMIAL MULTIPLICATION IN Z_Q[x] 
+# PROBABILISTIC POLYNOMIAL MULTIPLICATION IN Z_Q[x]
 # =============================================================================================
 
 #! For an element `tau := (tau0, tau1)` in the quadratic extension field, computes all its powers
@@ -120,7 +118,7 @@ end
 #!
 #! Cycles: 8323
 export.powers_of_tau
-        
+
     # 1) Save tau^0 i.e. (0, 1)
     push.1 push.0.0.0
     dup.6 add.1 swap.7
@@ -133,9 +131,9 @@ export.powers_of_tau
         dupw ext2mul
 
         movup.3 movup.3
-            
+
         dup.6 add.1 swap.7 mem_storew
-            
+
         drop drop
     end
 
@@ -164,7 +162,7 @@ end
 #! public key, and a pointer to the memory location where the coefficients of the polynomial `h`
 #! will be stored.
 #! The procedure loads `h` from the advice stack and compares its hash with the provided hash `PK`.
-#! It then loads the polynomial `s2` representing the signature from the advice stack and lays it 
+#! It then loads the polynomial `s2` representing the signature from the advice stack and lays it
 #! in memory right after `h`.
 #! It then loads the claimed polynomial `h * s2` in Z_Q[x] where Q is the Miden VM prime from
 #! the advice stack and lays it right after `s2`.
@@ -183,37 +181,37 @@ export.load_h_s2_and_product.1
     loc_storew.0
 
     # 2) Prepare stack and load h polynomial. We also range check the coefficients of h.
-    padw swapw 
+    padw swapw
     padw
     repeat.64
         adv_pipe
 
         dupw.1
         u32assert2
-        push.M u32unchecked_lt assert
-        push.M u32unchecked_lt assert
+        push.M u32lt assert
+        push.M u32lt assert
         u32assert2
-        push.M u32unchecked_lt assert
-        push.M u32unchecked_lt assert
+        push.M u32lt assert
+        push.M u32lt assert
 
         dupw
         u32assert2
-        push.M u32unchecked_lt assert
-        push.M u32unchecked_lt assert
+        push.M u32lt assert
+        push.M u32lt assert
         u32assert2
-        push.M u32unchecked_lt assert
-        push.M u32unchecked_lt assert
+        push.M u32lt assert
+        push.M u32lt assert
 
         hperm
     end
-        
+
     # 3) Load saved claimed hash of h and compare
     loc_loadw.0
     movup.4 assert_eq
     movup.3 assert_eq
     movup.2 assert_eq
     assert_eq
-        
+
     # 4) Load s2 (Due to the final norm test we do not need to range check the s2 coefficents)
     padw padw
     repeat.64
@@ -227,13 +225,13 @@ export.load_h_s2_and_product.1
 
     # 6) Return the challenge point and the incremented pointer
     dropw swapw dropw
-    drop drop 
+    drop drop
     #=> [tau1, tau0, ptr + 512]
 end
 
 #! Checks that pi == h * s2 in Z_Q[x] by evaluating both sides at a random point.
 #! The procedure takes as input a pointer h_ptr to h. The other two polynomials
-#! are located at h_ptr + 128, for s2, and h_ptr + 256, for pi. The procedure takes 
+#! are located at h_ptr + 128, for s2, and h_ptr + 256, for pi. The procedure takes
 #! also a pointer zeros_ptr to a region of memory [zeros_ptr, zeros_ptr + 1024)
 #! and a pointer tau_ptr to powers of the random point we are evaluating at stored
 #! as [a_i, b_i, x, x] where (a_i, b_i) := tau^i for i in [0, 1023].
@@ -257,11 +255,11 @@ export.probablistic_product.4
     # For mem_stream
     padw padw
 
-    # Compute h(tau) 
+    # Compute h(tau)
     repeat.64
         mem_stream
         repeat.8
-            exec.deep_queries::combine_main
+            rcomb_base
         end
     end
 
@@ -284,7 +282,7 @@ export.probablistic_product.4
     repeat.64
         mem_stream
         repeat.8
-            exec.deep_queries::combine_main
+            rcomb_base
         end
     end
 
@@ -313,12 +311,12 @@ export.probablistic_product.4
     repeat.64
         mem_stream
         repeat.8
-            exec.deep_queries::combine_main
+            rcomb_base
         end
     end
     #=> [X, X, ev1, ev0, ev1, ev0, pi_ptr, zeros_ptr, tau_ptr, 0, ...]
     # where (ev0, ev1) := pi1(tau)
-    
+
     # Save pi_1(tau)
     swapw.2 loc_storew.3
 
@@ -333,7 +331,7 @@ export.probablistic_product.4
     repeat.64
         mem_stream
         repeat.8
-            exec.deep_queries::combine_main
+            rcomb_base
         end
     end
     #=> [X, X, ev1, ev0, ev1, ev0, pi_ptr, zeros_ptr, tau_ptr, 0, ...]
@@ -369,7 +367,7 @@ export.probablistic_product.4
     ext2mul
 
     ## d) assert equality
-    movup.2 
+    movup.2
     assert_eq
     assert_eq
 
@@ -379,7 +377,7 @@ export.probablistic_product.4
 end
 
 
-# SQUARE NORM OF Z_q[x]/(phi) POLYNOMIALS 
+# SQUARE NORM OF Z_q[x]/(phi) POLYNOMIALS
 # =============================================================================================
 
 #! Normalizes an `e` in [0, q) to be in [-(q-1) << 1, (q-1) << 1) and returns its square norm.
@@ -392,7 +390,7 @@ end
 #! if e > (q-1)/2:
 #!   return (q - e)^2
 #! else:
-#!   return e^2 
+#!   return e^2
 #!
 #! The use of the formula avoids using the if-else block.
 #!
@@ -401,7 +399,7 @@ end
 #!
 #! Cycles: 21
 export.norm_sq
-    
+
     dup
     dup mul
     swap
@@ -409,11 +407,11 @@ export.norm_sq
 
     dup
     push.6144
-    u32unchecked_gt
+    u32gt
     #=> [phi, e, e^2, ...]
 
     swap
-    mul.24578       # 2*q 
+    mul.24578       # 2*q
     push.151019521  # q^2
     sub
     #=> [2*q*e - q^2, phi, e^2, ...]
@@ -424,8 +422,8 @@ end
 
 #! On input a tuple (u, w, v), the following computes (v - (u + (- w % q) % q) % q).
 #! We can avoid doing three modular reductions by using the following facts:
-#! 
-#! 1. q is much smaller than the Miden prime. Precisely, q * 2^50 < Q 
+#!
+#! 1. q is much smaller than the Miden prime. Precisely, q * 2^50 < Q
 #! 2. The coefficients of the product polynomial, u and w, are less than J := 512 * q^2
 #! 3. The coefficients of c are less than q.
 #!
@@ -440,7 +438,7 @@ end
 #! Cycles: 44
 export.diff_mod_q
 
-    # 1) v + w + J 
+    # 1) v + w + J
     add push.J add
     #=> [v + w + J, u]
 
@@ -467,16 +465,16 @@ end
 #! We can compute s1 in a single pass by delaying the q-modular reduction til the end. This can
 #! be achieved through a careful analysis of the computation of the difference between pi and c.
 #!
-#! The i-th coefficient s1_i of s1 is equal to c_i - (pi_i - pi_{512 + i}) which is equal to 
+#! The i-th coefficient s1_i of s1 is equal to c_i - (pi_i - pi_{512 + i}) which is equal to
 #! c_i  + pi_{512 + i} - pi_i. Now, we know that the size of the pi_i coefficients is bounded by
-#! J := 512 * q^2 and this means that J + pi_{512 + i} - pi_i does not Q-underflow and since 
-#! J = 0 modulo q, the addition of J does not affect the final result. It is also important to 
+#! J := 512 * q^2 and this means that J + pi_{512 + i} - pi_i does not Q-underflow and since
+#! J = 0 modulo q, the addition of J does not affect the final result. It is also important to
 #! note that adding J does not Q-overflow by virtue of q * 2^50 < Q.
 #! All of the above implies that we can compute s1_i with only one modular reduction at the end,
 #! in addition to one modular reduction applied to c_i.
 #! Moreover, since we are only interested in the square norm of s1_i, we do not have to store
 #! s1_i and then load it at a later point, and instead we can immediatly follow the computation
-#! of s1_i with computing its square norm. 
+#! of s1_i with computing its square norm.
 #! After computing the square norm of s1_i, we can accumulate into an accumulator to compute the
 #! sum of the square norms of all the coefficients of polynomial c. Using the overflow stack, this
 #! can be delayed til the end.
@@ -485,7 +483,7 @@ end
 #! Output: [norm_sq(s1), ...]
 #!
 #! Cycles: 58888
-export.compute_s1_norm_sq 
+export.compute_s1_norm_sq
     repeat.128
         # 1) Load the next 4 * 3 coefficients
         # load c_i
@@ -500,7 +498,7 @@ export.compute_s1_norm_sq
 
         # load pi_i
         padw
-        dup.12 
+        dup.12
         mem_loadw
         #=> [PI, PI_{i+512}, C, pi_ptr, ...]
 
@@ -519,7 +517,7 @@ export.compute_s1_norm_sq
 
         # Move the result out of the way so that we can process the remaining coefficents
         movdn.10
-   
+
         # 3) Compute the squared norm of (i + 1)-th coefficient of s1
         movup.6
         exec.mod_12289
@@ -527,7 +525,7 @@ export.compute_s1_norm_sq
         exec.diff_mod_q
         exec.norm_sq
         movdn.7
-    
+
         # 4) Compute the squared norm of (i + 2)-th coefficient of s1
         movup.4
         exec.mod_12289
@@ -584,7 +582,7 @@ end
 # FALCON SIGNATURE VERIFICATION ALGORITHM
 # =============================================================================================
 
-#! Verifies a signature against a public key and a message. The procedure gets as inputs the hash 
+#! Verifies a signature against a public key and a message. The procedure gets as inputs the hash
 #! of the public key and the hash of the message via the operand stack. The signature is provided
 #! via the advice stack.
 #! The signature is valid if and only if the procedure returns.
@@ -638,19 +636,19 @@ export.verify.1665
     #=> [...]                                       (Cycles: 2504)
 
     # 6) Compute the squared norm of s1 := c - h * s2 (in Z_q[x]/(phi))
-    
+
     locaddr.256
     #=> [pi_ptr, ...]
 
     exec.compute_s1_norm_sq
-    #=> [norm_sq(s1), ...]                          (Cycles: 58888)  
+    #=> [norm_sq(s1), ...]                          (Cycles: 58888)
 
     # 7) Compute the squared norm of s2
 
     locaddr.128
-    #=> [s2_ptr, norm_sq(s1), ...]                  
+    #=> [s2_ptr, norm_sq(s1), ...]
 
-    exec.compute_s2_norm_sq                         
+    exec.compute_s2_norm_sq
     #=> [norm_sq(s2), norm_sq(s1), ...]             (Cycles: 13322)
 
     # 8) Check that ||(s1, s2)||^2 < K
@@ -659,6 +657,6 @@ export.verify.1665
     #=> [norm_sq(s1) + norm_sq(s2), ...]
 
     push.SQUARE_NORM_BOUND
-    u32checked_lt assert
+    u32assert2 u32lt assert
     #=> [...]                                       (Cycles: 8)
-end
\ No newline at end of file
+end
diff --git a/stdlib/asm/crypto/elgamal_ecgfp5.masm b/stdlib/asm/crypto/elgamal_ecgfp5.masm
index e3f5fff24c..d3b51ba2bc 100644
--- a/stdlib/asm/crypto/elgamal_ecgfp5.masm
+++ b/stdlib/asm/crypto/elgamal_ecgfp5.masm
@@ -1,8 +1,8 @@
 use.std::math::ecgfp5::group
 
 #! Generates the public key, point H
-#! the private key is expected as input and is a 319-bit random 
-#! number of 10 32-bit limbs. 
+#! the private key is expected as input and is a 319-bit random
+#! number of 10 32-bit limbs.
 export.gen_privatekey.8
     exec.group::gen_mul
 end
diff --git a/stdlib/asm/crypto/fri/ext2fri.masm b/stdlib/asm/crypto/fri/ext2fri.masm
index beabc12f80..dc558d1c44 100644
--- a/stdlib/asm/crypto/fri/ext2fri.masm
+++ b/stdlib/asm/crypto/fri/ext2fri.masm
@@ -564,7 +564,7 @@ proc.compute_alpha_64
     ext2add
     #=> [acc1, acc0, τ1, τ0, p_ptr-1, ...]
 
-    movup.4 dup sub.1 movdn.5 
+    movup.4 dup sub.1 movdn.5
     padw movup.4
     mem_loadw
     #=> [a11, a10, a01, a00, acc1, acc0, τ1, τ0, p_ptr-1, ...]
@@ -580,7 +580,7 @@ proc.compute_alpha_64
     ext2add
     #=> [acc1, acc0, τ1, τ0, p_ptr-1, ...]
 
-    movup.4 dup sub.1 movdn.5 
+    movup.4 dup sub.1 movdn.5
     padw movup.4
     mem_loadw
     #=> [a11, a10, a01, a00, acc1, acc0, τ1, τ0, p_ptr-1, ...]
@@ -627,10 +627,10 @@ proc.compute_alpha_32
     padw dup.6 sub.1 swap.7
     mem_loadw
     dup.5 dup.5
-    ext2mul 
+    ext2mul
     ext2add
 
-    padw movup.8 
+    padw movup.8
     mem_loadw
     movup.5 movup.5 dup.7 dup.7
     ext2mul
@@ -667,11 +667,11 @@ export.verify_remainder_64
     #=> [β1, β0, τ1, τ0, q_ptr, ...]
 
     # Pointer to the last word of the remainder polynomial for Horner evaluation.
-    movup.4 add.4 
+    movup.4 add.4
     #=> [p_ptr, β1, β0, τ1, τ0, ...]
 
     # We need to multiply τ by the domain offset before evaluation.
-    movup.4 mul.7 
+    movup.4 mul.7
     movup.4 mul.7
     exec.compute_alpha_64
 
@@ -714,11 +714,11 @@ export.verify_remainder_32
     #=> [β1, β0, τ1, τ0, q_ptr, ...]
 
     # Pointer to the last word of the remainder polynomial for Horner evaluation.
-    movup.4 add.2 
+    movup.4 add.2
     #=> [p_ptr, β1, β0, τ1, τ0, ...]
 
     # We need to multiply τ by the domain offset before evaluation.
-    movup.4 mul.7 
+    movup.4 mul.7
     movup.4 mul.7
     exec.compute_alpha_32
 
@@ -732,4 +732,4 @@ export.verify_remainder_32
     and
     assert
     # [...]
-end
\ No newline at end of file
+end
diff --git a/stdlib/asm/crypto/fri/frie2f4.masm b/stdlib/asm/crypto/fri/frie2f4.masm
index 9d2b16778f..c9c13fb775 100644
--- a/stdlib/asm/crypto/fri/frie2f4.masm
+++ b/stdlib/asm/crypto/fri/frie2f4.masm
@@ -109,7 +109,7 @@ export.verify_query_layer.3
     swapw.2             # [poe, p, e1, e0, d_size, t_depth, a1, a0, C, layer_ptr, rem_ptr, ...]
     swap                # [p, poe, e1, e0, d_size, t_depth, a1, a0, C, layer_ptr, rem_ptr, ...]
     movup.4             # [d_size, p, poe, e1, e0, t_depth, a1, a0, C, layer_ptr, rem_ptr, ...]
-    u32unchecked_divmod # p and d_size must be u32 values
+    u32divmod           # p and d_size must be u32 values
     movup.5
     movupw.2
     dup.5
@@ -205,7 +205,7 @@ export.verify_query
     # of the two elements we should compare against. (7 cycles)
     movup.3
     push.2
-    u32unchecked_divmod     # f_pos must be a u32 value
+    u32divmod     # f_pos must be a u32 value
     movdn.4
     dup.1
     dup.1
diff --git a/stdlib/asm/crypto/fri/helper.masm b/stdlib/asm/crypto/fri/helper.masm
index 60b581b4a9..bf7b4ffeb5 100644
--- a/stdlib/asm/crypto/fri/helper.masm
+++ b/stdlib/asm/crypto/fri/helper.masm
@@ -20,7 +20,7 @@ export.generate_fri_parameters
     # TODO: move to somewhere else
     # ---------------------------------------------------------------------------------------------
 
-    # load z from memory 
+    # load z from memory
     padw
     exec.constants::z_ptr mem_loadw
     #=> [(z1, z0)^n, z1, z0, lde_size, log2(lde_size), lde_g, 0, ...] (6 cycles)
diff --git a/stdlib/asm/crypto/hashes/blake3.masm b/stdlib/asm/crypto/hashes/blake3.masm
index 33316ddc39..326023d1c6 100644
--- a/stdlib/asm/crypto/hashes/blake3.masm
+++ b/stdlib/asm/crypto/hashes/blake3.masm
@@ -120,41 +120,41 @@ end
 #! that's because it doesn't dictate what output of 2-to-1 hash will be.
 proc.finalize
     movup.8
-    u32checked_xor
+    u32xor
 
     swap
     movup.8
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.8
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.8
-    u32checked_xor
+    u32xor
     movdn.3
 
     movup.4
     movup.8
-    u32checked_xor
+    u32xor
     movdn.4
 
     movup.5
     movup.8
-    u32checked_xor
+    u32xor
     movdn.5
 
     movup.6
     movup.8
-    u32checked_xor
+    u32xor
     movdn.6
 
     movup.7
     movup.8
-    u32checked_xor
+    u32xor
     movdn.7
 end
 
@@ -227,25 +227,25 @@ proc.columnar_mixing.1
     mem_loadw
 
     dup.4
-    u32checked_xor
-    u32unchecked_rotr.16
+    u32xor
+    u32rotr.16
 
     swap
     dup.5
-    u32checked_xor
-    u32unchecked_rotr.16
+    u32xor
+    u32rotr.16
     swap
 
     movup.2
     dup.6
-    u32checked_xor
-    u32unchecked_rotr.16
+    u32xor
+    u32rotr.16
     movdn.2
 
     movup.3
     dup.7
-    u32checked_xor
-    u32unchecked_rotr.16
+    u32xor
+    u32rotr.16
     movdn.3
 
     movup.12
@@ -274,25 +274,25 @@ proc.columnar_mixing.1
     movupw.3
 
     dup.4
-    u32checked_xor
-    u32unchecked_rotr.12
+    u32xor
+    u32rotr.12
 
     swap
     dup.5
-    u32checked_xor
-    u32unchecked_rotr.12
+    u32xor
+    u32rotr.12
     swap
 
     movup.2
     dup.6
-    u32checked_xor
-    u32unchecked_rotr.12
+    u32xor
+    u32rotr.12
     movdn.2
 
     movup.3
     dup.7
-    u32checked_xor
-    u32unchecked_rotr.12
+    u32xor
+    u32rotr.12
     movdn.3
 
     movupw.3
@@ -329,25 +329,25 @@ proc.columnar_mixing.1
     movupw.3
 
     dup.4
-    u32checked_xor
-    u32unchecked_rotr.8
+    u32xor
+    u32rotr.8
 
     swap
     dup.5
-    u32checked_xor
-    u32unchecked_rotr.8
+    u32xor
+    u32rotr.8
     swap
 
     movup.2
     dup.6
-    u32checked_xor
-    u32unchecked_rotr.8
+    u32xor
+    u32rotr.8
     movdn.2
 
     movup.3
     dup.7
-    u32checked_xor
-    u32unchecked_rotr.8
+    u32xor
+    u32rotr.8
     movdn.3
 
     movupw.3
@@ -373,25 +373,25 @@ proc.columnar_mixing.1
     movupw.3
 
     dup.4
-    u32checked_xor
-    u32unchecked_rotr.7
+    u32xor
+    u32rotr.7
 
     swap
     dup.5
-    u32checked_xor
-    u32unchecked_rotr.7
+    u32xor
+    u32rotr.7
     swap
 
     movup.2
     dup.6
-    u32checked_xor
-    u32unchecked_rotr.7
+    u32xor
+    u32rotr.7
     movdn.2
 
     movup.3
     dup.7
-    u32checked_xor
-    u32unchecked_rotr.7
+    u32xor
+    u32rotr.7
     movdn.3
 
     movupw.3
@@ -467,24 +467,24 @@ proc.diagonal_mixing.1
 
     movup.3
     dup.4
-    u32checked_xor
-    u32unchecked_rotr.16
+    u32xor
+    u32rotr.16
     movdn.3
 
     dup.5
-    u32checked_xor
-    u32unchecked_rotr.16
+    u32xor
+    u32rotr.16
 
     swap
     dup.6
-    u32checked_xor
-    u32unchecked_rotr.16
+    u32xor
+    u32rotr.16
     swap
 
     movup.2
     dup.7
-    u32checked_xor
-    u32unchecked_rotr.16
+    u32xor
+    u32rotr.16
     movdn.2
 
     movup.12
@@ -514,25 +514,25 @@ proc.diagonal_mixing.1
 
     swap
     dup.6
-    u32checked_xor
-    u32unchecked_rotr.12
+    u32xor
+    u32rotr.12
     swap
 
     movup.2
     dup.7
-    u32checked_xor
-    u32unchecked_rotr.12
+    u32xor
+    u32rotr.12
     movdn.2
 
     movup.3
     dup.4
-    u32checked_xor
-    u32unchecked_rotr.12
+    u32xor
+    u32rotr.12
     movdn.3
 
     dup.5
-    u32checked_xor
-    u32unchecked_rotr.12
+    u32xor
+    u32rotr.12
 
     movupw.3
     push.0.0.0.0
@@ -569,24 +569,24 @@ proc.diagonal_mixing.1
 
     movup.3
     dup.4
-    u32checked_xor
-    u32unchecked_rotr.8
+    u32xor
+    u32rotr.8
     movdn.3
 
     dup.5
-    u32checked_xor
-    u32unchecked_rotr.8
+    u32xor
+    u32rotr.8
 
     swap
     dup.6
-    u32checked_xor
-    u32unchecked_rotr.8
+    u32xor
+    u32rotr.8
     swap
 
     movup.2
     dup.7
-    u32checked_xor
-    u32unchecked_rotr.8
+    u32xor
+    u32rotr.8
     movdn.2
 
     movupw.3
@@ -613,25 +613,25 @@ proc.diagonal_mixing.1
 
     swap
     dup.6
-    u32checked_xor
-    u32unchecked_rotr.7
+    u32xor
+    u32rotr.7
     swap
 
     movup.2
     dup.7
-    u32checked_xor
-    u32unchecked_rotr.7
+    u32xor
+    u32rotr.7
     movdn.2
 
     movup.3
     dup.4
-    u32checked_xor
-    u32unchecked_rotr.7
+    u32xor
+    u32rotr.7
     movdn.3
 
     dup.5
-    u32checked_xor
-    u32unchecked_rotr.7
+    u32xor
+    u32rotr.7
 
     movupw.3
 end
diff --git a/stdlib/asm/crypto/hashes/keccak256.masm b/stdlib/asm/crypto/hashes/keccak256.masm
index 07ca58be54..a342ee646d 100644
--- a/stdlib/asm/crypto/hashes/keccak256.masm
+++ b/stdlib/asm/crypto/hashes/keccak256.masm
@@ -46,12 +46,12 @@ proc.theta.3
     drop
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -70,12 +70,12 @@ proc.theta.3
     drop
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -92,12 +92,12 @@ proc.theta.3
     drop
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -115,12 +115,12 @@ proc.theta.3
     drop
 
     movup.2
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.2
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -154,12 +154,12 @@ proc.theta.3
     drop
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -176,12 +176,12 @@ proc.theta.3
     drop
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -200,12 +200,12 @@ proc.theta.3
     drop
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -221,12 +221,12 @@ proc.theta.3
     drop
 
     movup.2
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.2
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -269,12 +269,12 @@ proc.theta.3
     drop
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -293,12 +293,12 @@ proc.theta.3
     drop
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -315,12 +315,12 @@ proc.theta.3
     drop
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -338,12 +338,12 @@ proc.theta.3
     drop
 
     movup.2
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.2
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -378,12 +378,12 @@ proc.theta.3
     drop
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -400,12 +400,12 @@ proc.theta.3
     drop
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -424,12 +424,12 @@ proc.theta.3
     drop
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -445,12 +445,12 @@ proc.theta.3
     drop
 
     movup.2
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.2
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -493,12 +493,12 @@ proc.theta.3
     drop
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -517,12 +517,12 @@ proc.theta.3
     drop
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -539,12 +539,12 @@ proc.theta.3
     drop
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.3
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -562,12 +562,12 @@ proc.theta.3
     drop
 
     movup.2
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.2
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -586,48 +586,48 @@ proc.theta.3
 
     dup.8
     dup.4
-    u32unchecked_rotl.1
-    u32checked_xor
+    u32rotl.1
+    u32xor
 
     dup.10
     dup.4
-    u32checked_xor
+    u32xor
 
     dup.2
     dup.8
-    u32unchecked_rotl.1
-    u32checked_xor
+    u32rotl.1
+    u32xor
 
     dup.4
     dup.8
-    u32checked_xor
+    u32xor
 
     movup.6
     dup.11
-    u32unchecked_rotl.1
-    u32checked_xor
+    u32rotl.1
+    u32xor
 
     movup.7
     dup.10
-    u32checked_xor
+    u32xor
 
     movup.8
     movup.13
-    u32unchecked_rotl.1
-    u32checked_xor
+    u32rotl.1
+    u32xor
 
     movup.9
     movup.12
-    u32checked_xor
+    u32xor
 
     movup.10
     movup.10
-    u32unchecked_rotl.1
-    u32checked_xor
+    u32rotl.1
+    u32xor
 
     movup.10
     movup.10
-    u32checked_xor
+    u32xor
 
     # stack = [d9, d8, d7, d6, d5, d4, d3, d2, d1, d0]
 
@@ -654,21 +654,21 @@ proc.theta.3
     mem_loadw
 
     dup.5
-    u32checked_xor
+    u32xor
 
     swap
     dup.6
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     dup.7
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     dup.8
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -685,21 +685,21 @@ proc.theta.3
     mem_loadw
 
     dup.9
-    u32checked_xor
+    u32xor
 
     swap
     dup.10
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     dup.11
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     dup.12
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -716,21 +716,21 @@ proc.theta.3
     mem_loadw
 
     dup.13
-    u32checked_xor
+    u32xor
 
     swap
     dup.14
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     dup.5
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     dup.6
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -747,21 +747,21 @@ proc.theta.3
     mem_loadw
 
     dup.7
-    u32checked_xor
+    u32xor
 
     swap
     dup.8
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     dup.9
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     dup.10
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -778,21 +778,21 @@ proc.theta.3
     mem_loadw
 
     dup.11
-    u32checked_xor
+    u32xor
 
     swap
     dup.12
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     dup.13
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     dup.14
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -809,21 +809,21 @@ proc.theta.3
     mem_loadw
 
     dup.5
-    u32checked_xor
+    u32xor
 
     swap
     dup.6
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     dup.7
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     dup.8
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -840,21 +840,21 @@ proc.theta.3
     mem_loadw
 
     dup.9
-    u32checked_xor
+    u32xor
 
     swap
     dup.10
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     dup.11
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     dup.12
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -871,21 +871,21 @@ proc.theta.3
     mem_loadw
 
     dup.13
-    u32checked_xor
+    u32xor
 
     swap
     dup.14
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     dup.5
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     dup.6
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -902,21 +902,21 @@ proc.theta.3
     mem_loadw
 
     dup.7
-    u32checked_xor
+    u32xor
 
     swap
     dup.8
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     dup.9
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     dup.10
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -933,21 +933,21 @@ proc.theta.3
     mem_loadw
 
     dup.11
-    u32checked_xor
+    u32xor
 
     swap
     dup.12
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     dup.13
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     dup.14
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -964,21 +964,21 @@ proc.theta.3
     mem_loadw
 
     movup.5
-    u32checked_xor
+    u32xor
 
     swap
     movup.5
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.5
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.5
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -995,21 +995,21 @@ proc.theta.3
     mem_loadw
 
     movup.5
-    u32checked_xor
+    u32xor
 
     swap
     movup.5
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.5
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.5
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -1026,11 +1026,11 @@ proc.theta.3
     mem_loadw
 
     movup.5
-    u32checked_xor
+    u32xor
 
     swap
     movup.5
-    u32checked_xor
+    u32xor
     swap
 
     movup.4
@@ -1066,7 +1066,7 @@ proc.rho.1
     mem_loadw
 
     movup.3
-    u32unchecked_rotl.1
+    u32rotl.1
     movdn.2
 
     movup.4
@@ -1079,16 +1079,16 @@ proc.rho.1
     dup.4
     mem_loadw
 
-    u32unchecked_rotl.31
+    u32rotl.31
     swap
-    u32unchecked_rotl.31
+    u32rotl.31
     swap
 
     movup.2
-    u32unchecked_rotl.14
+    u32rotl.14
     movdn.2
     movup.3
-    u32unchecked_rotl.14
+    u32rotl.14
     movdn.3
 
     movup.4
@@ -1101,15 +1101,15 @@ proc.rho.1
     dup.4
     mem_loadw
 
-    u32unchecked_rotl.13
+    u32rotl.13
     swap
-    u32unchecked_rotl.14
+    u32rotl.14
 
     movup.2
-    u32unchecked_rotl.18
+    u32rotl.18
     movdn.2
     movup.3
-    u32unchecked_rotl.18
+    u32rotl.18
     movdn.3
 
     movup.4
@@ -1122,16 +1122,16 @@ proc.rho.1
     dup.4
     mem_loadw
 
-    u32unchecked_rotl.22
+    u32rotl.22
     swap
-    u32unchecked_rotl.22
+    u32rotl.22
     swap
 
     movup.2
-    u32unchecked_rotl.3
+    u32rotl.3
     movdn.2
     movup.3
-    u32unchecked_rotl.3
+    u32rotl.3
     movdn.3
 
     movup.4
@@ -1144,15 +1144,15 @@ proc.rho.1
     dup.4
     mem_loadw
 
-    u32unchecked_rotl.27
+    u32rotl.27
     swap
-    u32unchecked_rotl.28
+    u32rotl.28
 
     movup.2
-    u32unchecked_rotl.10
+    u32rotl.10
     movdn.2
     movup.3
-    u32unchecked_rotl.10
+    u32rotl.10
     movdn.3
 
     movup.4
@@ -1165,15 +1165,15 @@ proc.rho.1
     dup.4
     mem_loadw
 
-    u32unchecked_rotl.1
+    u32rotl.1
     swap
-    u32unchecked_rotl.2
+    u32rotl.2
 
     movup.2
-    u32unchecked_rotl.5
+    u32rotl.5
     movdn.2
     movup.3
-    u32unchecked_rotl.5
+    u32rotl.5
     movdn.3
 
     movup.4
@@ -1186,15 +1186,15 @@ proc.rho.1
     dup.4
     mem_loadw
 
-    u32unchecked_rotl.21
+    u32rotl.21
     swap
-    u32unchecked_rotl.22
+    u32rotl.22
 
     movup.2
-    u32unchecked_rotl.12
+    u32rotl.12
     movdn.3
     movup.2
-    u32unchecked_rotl.13
+    u32rotl.13
     movdn.2
 
     movup.4
@@ -1207,15 +1207,15 @@ proc.rho.1
     dup.4
     mem_loadw
 
-    u32unchecked_rotl.19
+    u32rotl.19
     swap
-    u32unchecked_rotl.20
+    u32rotl.20
 
     movup.2
-    u32unchecked_rotl.20
+    u32rotl.20
     movdn.3
     movup.2
-    u32unchecked_rotl.21
+    u32rotl.21
     movdn.2
 
     movup.4
@@ -1228,15 +1228,15 @@ proc.rho.1
     dup.4
     mem_loadw
 
-    u32unchecked_rotl.22
+    u32rotl.22
     swap
-    u32unchecked_rotl.23
+    u32rotl.23
 
     movup.2
-    u32unchecked_rotl.7
+    u32rotl.7
     movdn.3
     movup.2
-    u32unchecked_rotl.8
+    u32rotl.8
     movdn.2
 
     movup.4
@@ -1249,15 +1249,15 @@ proc.rho.1
     dup.4
     mem_loadw
 
-    u32unchecked_rotl.10
+    u32rotl.10
     swap
-    u32unchecked_rotl.11
+    u32rotl.11
 
     movup.2
-    u32unchecked_rotl.4
+    u32rotl.4
     movdn.2
     movup.3
-    u32unchecked_rotl.4
+    u32rotl.4
     movdn.3
 
     movup.4
@@ -1270,16 +1270,16 @@ proc.rho.1
     dup.4
     mem_loadw
 
-    u32unchecked_rotl.9
+    u32rotl.9
     swap
-    u32unchecked_rotl.9
+    u32rotl.9
     swap
 
     movup.2
-    u32unchecked_rotl.1
+    u32rotl.1
     movdn.2
     movup.3
-    u32unchecked_rotl.1
+    u32rotl.1
     movdn.3
 
     movup.4
@@ -1292,15 +1292,15 @@ proc.rho.1
     dup.4
     mem_loadw
 
-    u32unchecked_rotl.30
+    u32rotl.30
     swap
-    u32unchecked_rotl.31
+    u32rotl.31
 
     movup.2
-    u32unchecked_rotl.28
+    u32rotl.28
     movdn.2
     movup.3
-    u32unchecked_rotl.28
+    u32rotl.28
     movdn.3
 
     movup.4
@@ -1313,9 +1313,9 @@ proc.rho.1
     dup.4
     mem_loadw
 
-    u32unchecked_rotl.7
+    u32rotl.7
     swap
-    u32unchecked_rotl.7
+    u32rotl.7
     swap
 
     movup.4
@@ -1773,9 +1773,9 @@ proc.chi.4
     drop
     drop
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     movup.2
@@ -1791,25 +1791,25 @@ proc.chi.4
     dup.1
 
     movup.6
-    u32checked_and
+    u32and
 
     swap
 
     movup.6
-    u32checked_and
+    u32and
 
     swap
 
     movup.3
-    u32checked_not
+    u32not
     movup.3
-    u32checked_not
+    u32not
 
     movup.4
-    u32checked_and
+    u32and
     swap
     movup.4
-    u32checked_and
+    u32and
     swap
 
     movup.3
@@ -1824,9 +1824,9 @@ proc.chi.4
     drop
     drop
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     movup.2
@@ -1847,10 +1847,10 @@ proc.chi.4
     dup.1
 
     movup.4
-    u32checked_and
+    u32and
     swap
     movup.4
-    u32checked_and
+    u32and
     swap
 
     movup.3
@@ -1863,15 +1863,15 @@ proc.chi.4
     mem_loadw
 
     movup.5
-    u32checked_not
+    u32not
     movup.5
-    u32checked_not
+    u32not
 
     dup.2
-    u32checked_and
+    u32and
     swap
     dup.3
-    u32checked_and
+    u32and
     swap
 
     movup.7
@@ -1881,16 +1881,16 @@ proc.chi.4
     mem_storew
     dropw
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     locaddr.0
@@ -1907,21 +1907,21 @@ proc.chi.4
     mem_loadw
 
     movup.4
-    u32checked_xor
+    u32xor
 
     swap
     movup.4
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.4
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -1940,21 +1940,21 @@ proc.chi.4
     mem_loadw
 
     movup.4
-    u32checked_xor
+    u32xor
 
     swap
     movup.4
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.4
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -1968,10 +1968,10 @@ proc.chi.4
     mem_loadw
 
     movup.5
-    u32checked_xor
+    u32xor
     swap
     movup.5
-    u32checked_xor
+    u32xor
     swap
 
     dup.4
@@ -1985,19 +1985,19 @@ proc.chi.4
     dup.4
     mem_loadw
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     dup.3
     dup.3
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     push.0.0
@@ -2014,31 +2014,31 @@ proc.chi.4
     movup.5
     movup.5
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     dup.2
-    u32checked_and
+    u32and
     swap
     dup.3
-    u32checked_and
+    u32and
     swap
 
     movup.3
     movup.3
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     dup.4
-    u32checked_and
+    u32and
     swap
     dup.5
-    u32checked_and
+    u32and
     swap
 
     movup.3
@@ -2061,16 +2061,16 @@ proc.chi.4
     dup.1
 
     movup.4
-    u32checked_not
+    u32not
     movup.5
-    u32checked_not
+    u32not
     swap
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     movup.3
@@ -2090,16 +2090,16 @@ proc.chi.4
     movup.3
     movup.3
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     movup.3
@@ -2120,21 +2120,21 @@ proc.chi.4
     loc_loadw.1
 
     movup.4
-    u32checked_xor
+    u32xor
 
     swap
     movup.4
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.4
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -2151,21 +2151,21 @@ proc.chi.4
     loc_loadw.2
 
     movup.4
-    u32checked_xor
+    u32xor
 
     swap
     movup.4
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.4
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -2182,21 +2182,21 @@ proc.chi.4
     loc_loadw.3
 
     movup.4
-    u32checked_xor
+    u32xor
 
     swap
     movup.4
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.4
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -2213,9 +2213,9 @@ proc.chi.4
     drop
     drop
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     movup.2
@@ -2231,25 +2231,25 @@ proc.chi.4
     dup.1
 
     movup.6
-    u32checked_and
+    u32and
     swap
     movup.6
-    u32checked_and
+    u32and
     swap
 
     movup.3
     movup.3
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     dup.4
-    u32checked_and
+    u32and
     swap
     dup.5
-    u32checked_and
+    u32and
     swap
 
     movup.3
@@ -2275,16 +2275,16 @@ proc.chi.4
     movup.5
     movup.5
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     movup.4
@@ -2299,19 +2299,19 @@ proc.chi.4
     movup.7
     movup.7
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     dup.3
     dup.3
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     movup.7
@@ -2320,16 +2320,16 @@ proc.chi.4
     loc_storew.2
     dropw
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     push.0.0
@@ -2345,21 +2345,21 @@ proc.chi.4
     loc_loadw.1
 
     movup.4
-    u32checked_xor
+    u32xor
 
     swap
     movup.4
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.4
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -2376,21 +2376,21 @@ proc.chi.4
     loc_loadw.2
 
     movup.4
-    u32checked_xor
+    u32xor
 
     swap
     movup.4
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.4
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -2407,21 +2407,21 @@ proc.chi.4
     loc_loadw.3
 
     movup.4
-    u32checked_xor
+    u32xor
 
     swap
     movup.4
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.4
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -2435,19 +2435,19 @@ proc.chi.4
     dup.4
     mem_loadw
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     dup.3
     dup.3
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     push.0.0
@@ -2463,37 +2463,37 @@ proc.chi.4
     movup.5
     movup.5
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     dup.3
     dup.3
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     movup.3
     movup.3
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     dup.5
     dup.5
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     movup.3
@@ -2514,19 +2514,19 @@ proc.chi.4
     movup.3
     movup.3
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     dup.3
     dup.3
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     movup.4
@@ -2546,16 +2546,16 @@ proc.chi.4
     movup.5
     movup.5
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     movup.3
@@ -2574,21 +2574,21 @@ proc.chi.4
     loc_loadw.1
 
     movup.4
-    u32checked_xor
+    u32xor
 
     swap
     movup.4
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.4
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -2605,21 +2605,21 @@ proc.chi.4
     loc_loadw.2
 
     movup.4
-    u32checked_xor
+    u32xor
 
     swap
     movup.4
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.4
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -2636,21 +2636,21 @@ proc.chi.4
     loc_loadw.3
 
     movup.4
-    u32checked_xor
+    u32xor
 
     swap
     movup.4
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.4
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -2679,37 +2679,37 @@ proc.chi.4
     movup.5
     movup.5
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     dup.3
     dup.3
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     movup.3
     movup.3
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     dup.5
     dup.5
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     movup.3
@@ -2732,19 +2732,19 @@ proc.chi.4
     movup.3
     movup.3
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     dup.3
     dup.3
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     movup.4
@@ -2759,19 +2759,19 @@ proc.chi.4
     movup.7
     movup.7
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     dup.3
     dup.3
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     movup.7
@@ -2780,16 +2780,16 @@ proc.chi.4
     loc_storew.2
     dropw
 
-    u32checked_not
+    u32not
     swap
-    u32checked_not
+    u32not
     swap
 
     movup.2
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
     swap
 
     push.0.0
@@ -2805,21 +2805,21 @@ proc.chi.4
     loc_loadw.1
 
     movup.4
-    u32checked_xor
+    u32xor
 
     swap
     movup.4
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.4
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -2836,21 +2836,21 @@ proc.chi.4
     loc_loadw.2
 
     movup.4
-    u32checked_xor
+    u32xor
 
     swap
     movup.4
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.4
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -2867,21 +2867,21 @@ proc.chi.4
     loc_loadw.3
 
     movup.4
-    u32checked_xor
+    u32xor
 
     swap
     movup.4
-    u32checked_xor
+    u32xor
     swap
 
     movup.2
     movup.4
-    u32checked_xor
+    u32xor
     movdn.2
 
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
     movdn.3
 
     dup.4
@@ -2915,12 +2915,12 @@ proc.iota
     mem_loadw
 
     movup.5
-    u32checked_xor
+    u32xor
 
     swap
 
     movup.5
-    u32checked_xor
+    u32xor
 
     swap
 
@@ -3197,9 +3197,9 @@ export.to_bit_interleaved
     push.0.0
 
     repeat.16
-        u32unchecked_shr.1
+        u32shr.1
         swap
-        u32unchecked_shr.1
+        u32shr.1
         swap
 
         # ---
@@ -3208,19 +3208,19 @@ export.to_bit_interleaved
         dup.3
 
         push.1
-        u32checked_and
+        u32and
         swap
         push.1
-        u32checked_and
+        u32and
         swap
 
-        u32unchecked_shl.31
+        u32shl.31
         swap
-        u32unchecked_shl.15
+        u32shl.15
         swap
 
-        u32checked_xor
-        u32checked_xor
+        u32xor
+        u32xor
 
         # ---
 
@@ -3228,30 +3228,30 @@ export.to_bit_interleaved
         dup.3
 
         push.2
-        u32checked_and
+        u32and
         swap
         push.2
-        u32checked_and
+        u32and
         swap
 
-        u32unchecked_shl.30
+        u32shl.30
         swap
-        u32unchecked_shl.14
+        u32shl.14
         swap
 
         movup.3
-        u32checked_xor
-        u32checked_xor
+        u32xor
+        u32xor
         swap
 
         # ---
 
         movup.2
-        u32unchecked_shr.2
+        u32shr.2
         movdn.2
 
         movup.3
-        u32unchecked_shr.2
+        u32shr.2
         movdn.3
     end
 
@@ -3285,9 +3285,9 @@ export.from_bit_interleaved
     push.0.0
 
     repeat.16
-        u32unchecked_shr.2
+        u32shr.2
         swap
-        u32unchecked_shr.2
+        u32shr.2
         swap
 
         # ---
@@ -3296,18 +3296,18 @@ export.from_bit_interleaved
         dup.3
 
         push.1
-        u32checked_and
+        u32and
         swap
         push.1
-        u32checked_and
+        u32and
 
-        u32unchecked_shl.31
+        u32shl.31
         swap
-        u32unchecked_shl.30
-        u32checked_xor
+        u32shl.30
+        u32xor
 
         movup.2
-        u32checked_xor
+        u32xor
         swap
 
         # ---
@@ -3316,26 +3316,26 @@ export.from_bit_interleaved
         dup.3
 
         push.65536
-        u32checked_and
+        u32and
         swap
         push.65536
-        u32checked_and
+        u32and
 
-        u32unchecked_shl.15
+        u32shl.15
         swap
-        u32unchecked_shl.14
-        u32checked_xor
+        u32shl.14
+        u32xor
 
-        u32checked_xor
+        u32xor
 
         # ---
 
         movup.2
-        u32unchecked_shr.1
+        u32shr.1
         movdn.2
 
         movup.3
-        u32unchecked_shr.1
+        u32shr.1
         movdn.3
     end
 
diff --git a/stdlib/asm/crypto/hashes/native.masm b/stdlib/asm/crypto/hashes/native.masm
index 4b2ec0d8f0..278a2da2bf 100644
--- a/stdlib/asm/crypto/hashes/native.masm
+++ b/stdlib/asm/crypto/hashes/native.masm
@@ -46,7 +46,7 @@ end
 #! odd words: 60 cycles + 3 * words
 export.hash_memory
   # enforce `start_addr < end_addr`
-  dup.1 dup.1 u32checked_gt assert
+  dup.1 dup.1 u32assert2 u32gt assert
 
   # figure out if the range is for an odd number of words (9 cycles)
   dup.1 dup.1 sub is_odd
diff --git a/stdlib/asm/crypto/hashes/sha256.masm b/stdlib/asm/crypto/hashes/sha256.masm
index 197d966aee..7b627c1114 100644
--- a/stdlib/asm/crypto/hashes/sha256.masm
+++ b/stdlib/asm/crypto/hashes/sha256.masm
@@ -7,19 +7,19 @@
 #! See https://github.com/itzmeanjan/merklize-sha/blob/8a2c006/include/sha2.hpp#L73-L79
 proc.small_sigma_0
     dup
-    u32unchecked_rotr.7
+    u32rotr.7
 
     swap
 
     dup
-    u32unchecked_rotr.18
+    u32rotr.18
 
     swap
 
-    u32unchecked_shr.3
+    u32shr.3
 
-    u32checked_xor
-    u32checked_xor
+    u32xor
+    u32xor
 end
 
 #! Computes SHA2 small sigma 1.
@@ -31,19 +31,19 @@ end
 #! See https://github.com/itzmeanjan/merklize-sha/blob/8a2c006/include/sha2.hpp#L81-L87
 proc.small_sigma_1
     dup
-    u32unchecked_rotr.17
+    u32rotr.17
 
     swap
 
     dup
-    u32unchecked_rotr.19
+    u32rotr.19
 
     swap
 
-    u32unchecked_shr.10
+    u32shr.10
 
-    u32checked_xor
-    u32checked_xor
+    u32xor
+    u32xor
 end
 
 #! Computes SHA2 big sigma 0.
@@ -55,19 +55,19 @@ end
 #! See https://github.com/itzmeanjan/merklize-sha/blob/8a2c006/include/sha2.hpp#L57-L63
 proc.cap_sigma_0
     dup
-    u32unchecked_rotr.2
+    u32rotr.2
 
     swap
 
     dup
-    u32unchecked_rotr.13
+    u32rotr.13
 
     swap
 
-    u32unchecked_rotr.22
+    u32rotr.22
 
-    u32checked_xor
-    u32checked_xor
+    u32xor
+    u32xor
 end
 
 #! Computes SHA2 big sigma 1.
@@ -79,19 +79,19 @@ end
 #! See https://github.com/itzmeanjan/merklize-sha/blob/8a2c006/include/sha2.hpp#L65-L71
 proc.cap_sigma_1
     dup
-    u32unchecked_rotr.6
+    u32rotr.6
 
     swap
 
     dup
-    u32unchecked_rotr.11
+    u32rotr.11
 
     swap
 
-    u32unchecked_rotr.25
+    u32rotr.25
 
-    u32checked_xor
-    u32checked_xor
+    u32xor
+    u32xor
 end
 
 #! Computes SHA2 ch.
@@ -104,15 +104,15 @@ end
 proc.ch
     swap
     dup.1
-    u32checked_and
+    u32and
 
     swap
-    u32checked_not
+    u32not
 
     movup.2
-    u32checked_and
+    u32and
 
-    u32checked_xor
+    u32xor
 end
 
 #! Computes SHA2 maj.
@@ -125,18 +125,18 @@ end
 proc.maj
     dup.1
     dup.1
-    u32checked_and
+    u32and
 
     swap
     dup.3
-    u32checked_and
+    u32and
 
     movup.2
     movup.3
-    u32checked_and
+    u32and
 
-    u32checked_xor
-    u32checked_xor
+    u32xor
+    u32xor
 end
 
 #! Reverses order of first four elements on stack
@@ -1571,23 +1571,23 @@ export.hash_memory.12
     loc_store.1
 
     # loc.2 (padded length): input_length + (55 - input_length) % 64 + 9
-    push.55 loc_load.1 u32wrapping_sub push.63 u32checked_and
-    loc_load.1 u32checked_add u32checked_add.9 loc_store.2
+    push.55 loc_load.1 u32wrapping_sub push.63 u32and
+    loc_load.1 u32assert2 u32overflowing_add assertz u32assert u32overflowing_add.9 assertz loc_store.2
 
     # loc.3 (last memory address in padding): input_address + padded_length / 16 - 1
-    loc_load.2 u32checked_div.16 loc_load.0 u32wrapping_add u32wrapping_sub.1 loc_store.3
+    loc_load.2 u32assert u32div.16 loc_load.0 u32wrapping_add u32wrapping_sub.1 loc_store.3
 
     # loc.4 (u32 aligned padding byte): 0x80000000 >> ((input_length % 4) * 8)
-    loc_load.1 u32checked_mod.4 u32checked_mul.8 push.0x80000000 swap u32checked_shr loc_store.4
+    loc_load.1 u32assert u32mod.4 u32assert u32overflowing_mul.8 assertz push.0x80000000 swap u32shr loc_store.4
 
     # loc.5 (memory offset of first padding byte): (input_length / 4) % 4
-    loc_load.1 u32checked_div.4 u32checked_mod.4 loc_store.5
+    loc_load.1 u32assert u32div.4 u32mod.4 loc_store.5
 
     # loc.6 (memory address of first padding byte): input_address + (len / 16)
-    loc_load.0 loc_load.1 u32checked_div.16 u32checked_add loc_store.6
+    loc_load.0 loc_load.1 u32assert u32div.16 u32assert2 u32overflowing_add assertz loc_store.6
 
     # loc.7 (number of remaining 512-bit blocks to consume): padded_length / 64
-    loc_load.2 u32checked_div.64 loc_store.7
+    loc_load.2 u32assert u32div.64 loc_store.7
 
     # Set the first byte after the message to 0x80
     padw loc_load.6 mem_loadw loc_store.8 loc_store.9 loc_store.10 loc_store.11
@@ -1596,7 +1596,7 @@ export.hash_memory.12
 
     # Set message length in bits at end of padding
     padw loc_load.3 mem_loadw
-    movup.3 drop loc_load.1 u32checked_mul.8 movdn.3
+    movup.3 drop loc_load.1 u32assert u32overflowing_mul.8 assertz movdn.3
     loc_load.3 mem_storew dropw
 
     # Sha256 init
@@ -1604,16 +1604,16 @@ export.hash_memory.12
     push.0xa54ff53a.0x3c6ef372.0xbb67ae85.0x6a09e667
 
     # Consume sha256 blocks
-    loc_load.7 u32checked_neq.0
+    loc_load.7 u32assert neq.0
     while.true
-        padw loc_load.0 u32checked_add.3 mem_loadw movdnw.2
-        padw loc_load.0 u32checked_add.2 mem_loadw movdnw.2
-        padw loc_load.0 u32checked_add.1 mem_loadw movdnw.2
-        padw loc_load.0 u32checked_add.0 mem_loadw movdnw.2
+        padw loc_load.0 u32assert u32overflowing_add.3 assertz mem_loadw movdnw.2
+        padw loc_load.0 u32assert u32overflowing_add.2 assertz mem_loadw movdnw.2
+        padw loc_load.0 u32assert u32overflowing_add.1 assertz mem_loadw movdnw.2
+        padw loc_load.0 u32assert u32overflowing_add.0 assertz mem_loadw movdnw.2
         exec.prepare_message_schedule_and_consume
 
-        loc_load.0 u32checked_add.4 loc_store.0
-        loc_load.7 u32checked_sub.1 dup loc_store.7
-        u32checked_neq.0
+        loc_load.0 u32assert u32overflowing_add.4 assertz loc_store.0
+        loc_load.7 u32assert u32overflowing_sub.1 assertz dup loc_store.7
+        u32assert neq.0
     end
 end
diff --git a/stdlib/asm/crypto/stark/constants.masm b/stdlib/asm/crypto/stark/constants.masm
index f292f2dc21..d062e2007d 100644
--- a/stdlib/asm/crypto/stark/constants.masm
+++ b/stdlib/asm/crypto/stark/constants.masm
@@ -17,19 +17,19 @@ const.TRACE_DOMAIN_GENERATOR_PTR=4294799999
 const.PUBLIC_INPUTS_PTR=4294800000
 
 # OOD Frames
-# (72 + 9 + 8) * 2 * 2 Felt for current and next trace rows and 8 * 2 Felt for constraint composition 
+# (72 + 9 + 8) * 2 * 2 Felt for current and next trace rows and 8 * 2 Felt for constraint composition
 # polynomials. Total memory slots required: ((72 + 9 + 8) * 2 * 2 + 8 * 2) / 4 = 93
 const.OOD_TRACE_PTR=4294900000
 const.OOD_CONSTRAINT_EVALS_PTR=4294900081
 
 # Current trace row
 # 72 Felt for main portion of trace, 9 * 2 Felt for auxiliary portion of trace and 8 * 2 Felt for
-# constraint composition polynomials. Since we store these with the padding to make each of the 
+# constraint composition polynomials. Since we store these with the padding to make each of the
 # three portions a multiple of 8, the number of slots required is (80 + 24 + 16) / 4 = 30
 const.CURRENT_TRACE_ROW_PTR=4294900100
 
 # Random elements
-# There are are currently 16 ExtFelt for a total of 32 Felt. Thus the number of slots required is 8. 
+# There are are currently 16 ExtFelt for a total of 32 Felt. Thus the number of slots required is 8.
 const.AUX_RAND_ELEM_PTR=4294900150
 
 # We need 2 Felt for each constraint. We take 2800 slots as an upper bound
@@ -41,28 +41,28 @@ const.DEEP_RAND_CC_PTR=4294903000
 
 # FRI
 #
-#       (FRI_COM_PTR - 100)    ---| 
+#       (FRI_COM_PTR - 100)    ---|
 #              .
 #              .                  | <- FRI queries
 #              .
 #         FRI_COM_PTR          ---|
-#              .                   
+#              .
 #              .                  | <- FRI layer commitments and folding challenges
 #              .
-#       (FRI_COM_PTR + 32)     ---|  
+#       (FRI_COM_PTR + 32)     ---|
 #              .
 #              .                  | <- Remainder codeword and polynomial
 #              .
-#       (FRI_COM_PTR + 66-1)   ---|  
+#       (FRI_COM_PTR + 66-1)   ---|
 #
-# For each FRI layer, we need 2 memory slots, one for storing the FRI layer commitment and one for 
+# For each FRI layer, we need 2 memory slots, one for storing the FRI layer commitment and one for
 # storing the word [a0, a1, log2(lde_size), lde_size] where a := (a0, a1) is the folding randomness
-# and lde_size is the size of the LDE domain. Since we are using a folding factor of 4 and the 
+# and lde_size is the size of the LDE domain. Since we are using a folding factor of 4 and the
 # maximal degree of the remainder polynomial that we allow is 7, an upper limit of 16 FRI layers is
-# ample and the number of memory slots we thus allocate for this is 32. Moreover, we allocate 
-# an additional 32 slots for the remainder codeword and 2 for the remainder polynomial. These are 
-# expected to be laid out right after the FRI commitments. 
-# The total number of slots thus becomes 66. 
+# ample and the number of memory slots we thus allocate for this is 32. Moreover, we allocate
+# an additional 32 slots for the remainder codeword and 2 for the remainder polynomial. These are
+# expected to be laid out right after the FRI commitments.
+# The total number of slots thus becomes 66.
 const.FRI_COM_PTR=4294903200
 
 # Commitment to main, auxiliary and composition polynomials traces
@@ -136,7 +136,7 @@ const.TMP8=4294903322
 #   | TMP8                                     |       4294903322        |
 #   +------------------------------------------+-------------------------+
 
-# ACCESSORS 
+# ACCESSORS
 # =================================================================================================
 
 export.root_unity
@@ -180,7 +180,7 @@ export.aux_rand_elem_ptr
     push.AUX_RAND_ELEM_PTR
 end
 
-export.composition_coef_ptr 
+export.composition_coef_ptr
     push.COMPOSITION_COEF_PTR
 end
 
diff --git a/stdlib/asm/crypto/stark/deep_queries.masm b/stdlib/asm/crypto/stark/deep_queries.masm
index 06c89a8468..ce8ef49e30 100644
--- a/stdlib/asm/crypto/stark/deep_queries.masm
+++ b/stdlib/asm/crypto/stark/deep_queries.masm
@@ -4,7 +4,7 @@ use.std::crypto::stark::constants
 #! Computes a single step of the random linear combination defining the DEEP composition polynomial
 #! that is the input to the FRI protocol. More precisely, the sum in question is:
 #! $$
-#! \sum_{i=0}^k{\alpha_i \cdot \left(\frac{T_i(x) - T_i(z)}{x - z} + 
+#! \sum_{i=0}^k{\alpha_i \cdot \left(\frac{T_i(x) - T_i(z)}{x - z} +
 #!            \frac{T_i(x) - T_i(z \cdot g)}{x - z \cdot g} \right)}
 #! $$
 #!
@@ -18,15 +18,15 @@ use.std::crypto::stark::constants
 #! +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+---+
 #! |  T7  |  T6  |  T5  |  T4  |  T3  |  T2  |  T1  |  T0  |  p1  |  p0  |  r1  |  r0  |x_addr|z_addr|a_addr| - |
 #! +------+------+------+------+------+------+------+------+------+------+------+------+------+------+------+---+
-#! 
+#!
 #!                                                       ||
 #!                                                       \/
-#!                                                    
+#!
 #! +------+------+------+------+------+------+------+------+------+------+------+------+------+--------+--------+---+
 #! |  T0  |  T7  |  T6  |  T5  |  T4  |  T3  |  T2  |  T1  |  p1' |  p0' |  r1' |  r0' |x_addr|z_addr+1|a_addr+1| - |
 #! +------+------+------+------+------+------+------+------+------+------+------+------+------+--------+--------+---+
-#! 
-#! 
+#!
+#!
 #! Here:
 #! 1- Ti for i in 0..=7 stands for the the value of the i-th trace polynomial for the current query i.e. T_i(x).
 #! 2- (p0, p1) stands for an extension field element accumulating the values for the quotients with common denominator (x - gz).
@@ -34,15 +34,15 @@ use.std::crypto::stark::constants
 #! 4- x_addr is the memory address from which we are loading the Ti's using the MSTREAM instruction.
 #! 5- z_addr is the memory address to the i-th OOD evaluation frame at z and gz i.e. T_i(z):= (T_i(z)0, T_i(z)1)
 #!  and T_i(gz):= (T_i(gz)0, T_i(gz)1)
-#! 6- a_addr is the memory address of the i-th random element used in batching the trace polynomial quotients. 
+#! 6- a_addr is the memory address of the i-th random element used in batching the trace polynomial quotients.
 #!  The random elements a := (a0, a1) are stored in memory as [0, 0, a0, a1].
-#! 
+#!
 #! Input: [T7, T6, T5, T4, T3, T2, T1, T0, p1, p0, r1, r0, x_addr, z_addr, a_addr, 0]
 #! Output: [T0, T7, T6, T5, T4, T3, T2, T1, p1', p0', r1', r0', x_addr, z_addr+1, a_addr+1, 0]
 export.combine_main
 
     # 1) Shift trace columns values left
-    movup.7   
+    movup.7
     #=> [T0, T7, T6, T5, T4, T3, T2, T1, p1, p0, r1, r0, x_addr, z_addr, a_addr, 0]
 
     # 2) Get a_addr and update it. This is done here before the element becomes inaccessible.
@@ -108,13 +108,13 @@ export.combine_main
     movupw.3
 
     # a) Accumulate into (r0, r1)
-    movup.7 movup.7 
+    movup.7 movup.7
     #=> [prod1, prod0, p1, p0, r1, r0, prodg1, prodg0, T0, T7, T6, T5, T4, T3, T2, T1, x_addr, z_addr', a_addr', 0]
     movup.5 movup.5 ext2add
     #=> [r1', r0', p1, p0, prodg1, prodg0, T0, T7, T6, T5, T4, T3, T2, T1, x_addr, z_addr', a_addr', 0]
 
     # b) Accumulate into (p0, p1)
-    movdn.5 movdn.5 ext2add    
+    movdn.5 movdn.5 ext2add
     #=> [p1', p0', r1', r0', T0, T7, T6, T5, T4, T3, T2, T1, x_addr, z_addr', a_addr', 0]
 
     # c) Prepare for next iteration.
@@ -125,7 +125,7 @@ end
 #! Computes a single step of the random linear combination defining the DEEP composition polynomial
 #! that is the input to the FRI protocol. More precisely, the sum in question is:
 #! $$
-#! \sum_{i=0}^k{\alpha_i \cdot \left(\frac{T_i(x) - T_i(z)}{x - z} + 
+#! \sum_{i=0}^k{\alpha_i \cdot \left(\frac{T_i(x) - T_i(z)}{x - z} +
 #!            \frac{T_i(x) - T_i(z \cdot g)}{x - z \cdot g} \right)}
 #! $$
 #!
@@ -139,15 +139,15 @@ end
 #! +-------+-------+-------+-------+-------+-------+-------+-------+------+------+------+------+------+------+------+---+
 #! |  T31  |  T30  |  T21  |  T20  |  T11  |  T10  |  T01  |  T00  |  p1  |  p0  |  r1  |  r0  |x_addr|z_addr|a_addr| - |
 #! +-------+-------+-------+-------+-------+-------+-------+-------+------+------+------+------+------+------+------+---+
-#! 
+#!
 #!                                                       ||
 #!                                                       \/
-#!                                                         
+#!
 #! +-------+-------+-------+-------+-------+-------+-------+-------+------+------+------+------+------+--------+--------+-----+
 #! |  T31  |  T30  |  T21  |  T20  |  T11  |  T10  |  T01  |  T00  |  p1' |  p0' |  r1' |  r0' |x_addr|z_addr+1|a_addr+b|  -  |
 #! +-------+-------+-------+-------+-------+-------+-------+-------+------+------+------+------+------+--------+--------------+
-#! 
-#! 
+#!
+#!
 #! Here:
 #! 1- Tij for i in 0..=3 and j=0,1 stands for the the value of the j-th coordinate in the quadratic extension field
 #! of the i-th auxiliary trace polynomial for the current query i.e. $T_i(x)$.
@@ -163,7 +163,7 @@ end
 export.combine_aux
 
     # 1) Shift trace columns values (as quadratic extension field element) left
-    movup.7 movup.7   
+    movup.7 movup.7
     #=> [T01, T00, T31, T30, T21, T20, T11, T10, p1, p0, r1, r0, x_addr, z_addr, a_addr, 0]
 
     # 2) Get a_addr and update it. This is done here before it becomes inaccessible.
@@ -232,13 +232,13 @@ export.combine_aux
     movupw.3
 
     # a) Accumulate into (r0, r1)
-    movup.7 movup.7 
+    movup.7 movup.7
     #=> [prod1, prod0, p1, p0, r1, r0, prodg1, prodg0, T01, T00, T31, T30, T21, T20, T11, T10, x_addr, z_addr', a_addr', 0]
     movup.5 movup.5 ext2add
     #=> [r1', r0', p1, p0, prodg1, prodg0, T01, T00, T31, T30, T21, T20, T11, T10, x_addr, z_addr', a_addr', 0]
 
     # b) Accumulate into (p0, p1)
-    movdn.5 movdn.5 ext2add    
+    movdn.5 movdn.5 ext2add
     #=> [p1', p0', r1', r0', T01, T00, T31, T30, T21, T20, T11, T10, x_addr, z_addr', a_addr', 0]
 
     # c) Prepare for next iteration
@@ -248,7 +248,7 @@ end
 
 #! Loads the next query rows in the main, auxiliary and constraint composition polynomials traces.
 #! It takes a pointer to the current random query index and returns that index.
-#! 
+#!
 #! Input: [query_ptr, ...]
 #! Output: [index, query_ptr, ...]
 #!
@@ -268,7 +268,7 @@ proc.load_query_row
     push.0.0
     exec.constants::main_trace_com_ptr mem_loadw
     #=>[R, depth, index, query_ptr, ...]
-        
+
     ## Get the leaf in the main trace commitment and save it
     dup.5 dup.5
     mtree_get
@@ -323,9 +323,9 @@ proc.load_query_row
     adv_push.1
     push.1
     push.0
-        
+
     ## Store the 9-th auxiliary column
-    dup.12 mem_storew 
+    dup.12 mem_storew
 
     ## Since combine_aux follows a mem_stream we need to store (i.e. pad with) the all zero word in
     ## order to avoid over-stepping into the constraint polynomial columns.
@@ -380,7 +380,7 @@ proc.load_query_row
     assert_eq
     assert_eq
     #=> [Y, ptr, y, y, y, depth, index, query_ptr, ...]
-        
+
     dropw dropw drop
     #=> [index, query_ptr, ...]
 end
@@ -488,7 +488,7 @@ proc.combine_aux_trace_columns
             exec.combine_aux
         end
     end
-        
+
     # and the 9th aux column
     mem_stream
     exec.combine_aux
@@ -503,9 +503,9 @@ end
 #! 2. [Y, Y] is a "garbage" double-word used to mem_stream data referenced by CURRENT_TRACE_ROW_PTR.
 #! 3. Acc =: [Acc3, Acc2, Acc1, Acc0] is the accumulator holding the current numerator values.
 #!
-#! The procedure then outputs the final accumulator value including main and auxiliary trace columns 
+#! The procedure then outputs the final accumulator value including main and auxiliary trace columns
 #! as well as constraint composition polynomial columns.
-#! The procedure uses the `combine_aux` by discarding its effect on the second half of the 
+#! The procedure uses the `combine_aux` by discarding its effect on the second half of the
 #! accumulator (i.e. the "gz" part). To do this, we save the value of the accumulator before calling
 #! `combine_aux` and then restore the second half of the accumulator after the call.
 #!
@@ -578,7 +578,7 @@ export.compute_deep_composition_polynomial_queries
 
     push.1
     while.true
-        # I) 
+        # I)
         #
         # Load the (main, aux, constraint)-traces rows associated with the current query and get
         # the index of the query.
@@ -589,10 +589,10 @@ export.compute_deep_composition_polynomial_queries
 
 
         # II)
-        # 
+        #
         # Compute x := offset * domain_gen^index and denominators (x - z) and (x - gz)
         #
-        # Cycles: 68 
+        # Cycles: 68
         exec.compute_denominators
         #=> [Z, x, index, query_ptr, query_end_ptr, ...] where Z := [-gz1, x - gz0, -z1, x - z0]
 
@@ -611,7 +611,7 @@ export.compute_deep_composition_polynomial_queries
         exec.constants::deep_rand_coef_ptr
         exec.constants::ood_trace_ptr
         exec.constants::current_trace_row_ptr
-        #=> [P, Z, x, index, query_ptr, query_end_ptr, ...] 
+        #=> [P, Z, x, index, query_ptr, query_end_ptr, ...]
         # where P := [CURRENT_TRACE_ROW_PTR, OOD_TRACE_PTR, DEEP_RAND_CC_PTR, 0]
 
         ## b) Push the accumulators
@@ -620,7 +620,7 @@ export.compute_deep_composition_polynomial_queries
         padw
         #=> [Acc, P, Z, x, index, query_ptr, query_end_ptr, ...]
         #=> where Acc =: [Acc3, Acc2, Acc1, Acc0]
-        
+
         ## c) This will be used to mstream the elements T_i(x)
         ##
         ## Cycles: 8
@@ -634,11 +634,11 @@ export.compute_deep_composition_polynomial_queries
         exec.combine_aux_trace_columns
         exec.combine_constraint_poly_columns
         #=> [Acc, Z, x, index, query_ptr, query_end_ptr, ...]
-        
+
         ## e) Divide by denominators and sum to get final result
         ##
         ## Cycles: 38
-        exec.divide_by_denominators_and_sum       
+        exec.divide_by_denominators_and_sum
         #=> [eval1, eval0, x, index, query_ptr, query_end_ptr, ...]
 
 
@@ -646,7 +646,7 @@ export.compute_deep_composition_polynomial_queries
         #
         # Store [poe, index, eval_1, eval_0] where poe := g^index = x / offset and prepare stack
         # for next iteration.
-        
+
         ## a) Compute poe
         ##
         ## Cycles: 4
diff --git a/stdlib/asm/crypto/stark/mod.masm b/stdlib/asm/crypto/stark/mod.masm
index f047cf1f19..492f6da59d 100644
--- a/stdlib/asm/crypto/stark/mod.masm
+++ b/stdlib/asm/crypto/stark/mod.masm
@@ -23,4 +23,4 @@ use.std::crypto::stark::verifier
 #!   5000 + num_queries * (40 + num_fri_layers * 76 + 26 + 463) + 83 * num_fri_layers + 10 * log(trace_length) + 1633
 #!  2- Remainder codeword size 64:
 #!   5000 + num_queries * (40 + num_fri_layers * 76 + 26 + 463) + 83 * num_fri_layers + 10 * log(trace_length) + 3109
-export.verifier::verify
\ No newline at end of file
+export.verifier::verify
diff --git a/stdlib/asm/crypto/stark/ood_frames.masm b/stdlib/asm/crypto/stark/ood_frames.masm
index 061a78fb1c..c6b9e50f26 100644
--- a/stdlib/asm/crypto/stark/ood_frames.masm
+++ b/stdlib/asm/crypto/stark/ood_frames.masm
@@ -3,7 +3,7 @@ use.std::crypto::stark::constants
 
 #! Loads OOD evaluation frame, with current and next rows interleaved, into memory. This ouputs
 #! the hash of the OOD for reseeding the random coin.
-#! 
+#!
 #! Input: [...]
 #! Output: [OOD_FRAME_HASH, ...]
 #! Cycles: 106
@@ -14,16 +14,16 @@ export.load_evaluation_frame
     # 324 = 40 * 8 + 4
     # The elements are stored from the stack as (a1_1, a1_0, a0_1, a0_0) where a0 is from the
     # current row and a1 from the next row.
-    
+
     exec.constants::ood_trace_ptr
 
     push.1.0.0.0
-    padw padw 
+    padw padw
     repeat.40
         adv_pipe
         hperm
     end
-    
+
     # Load the last remaining word and pad with 1 followed by three 0
     adv_loadw
     dup.12 mem_storew
@@ -50,7 +50,7 @@ export.load_constraint_evaluations
     # is the execution trace length.
     # In order to facilitate the computation of the DEEP composition polynomial queries, we lay out
     # the values in memory as [v0, v1, 0, 0] where v := (v0, v1) ranges over the 8 values `value_i`.
-    
+
     # Load value_0 and value_1
     padw
     padw
diff --git a/stdlib/asm/crypto/stark/random_coin.masm b/stdlib/asm/crypto/stark/random_coin.masm
index 20264f7d53..e66b024a40 100644
--- a/stdlib/asm/crypto/stark/random_coin.masm
+++ b/stdlib/asm/crypto/stark/random_coin.masm
@@ -91,7 +91,7 @@ export.init_seed
     #
     # Cycles: 22
     padw
-    exec.constants::zero_word mem_storew    
+    exec.constants::zero_word mem_storew
     exec.constants::c_ptr mem_storew
     exec.constants::r1_ptr mem_storew
     exec.constants::r2_ptr mem_storew
@@ -223,7 +223,7 @@ export.reseed
     dropw
     exec.constants::r1_ptr mem_storew
     dropw
-    exec.constants::c_ptr mem_storew        
+    exec.constants::c_ptr mem_storew
     dropw
     # => [...] (18 cycles)
 end
@@ -247,7 +247,7 @@ proc.generate_random_coefficients
     # If we use field division and num_tuples is not a multiple of 4 then we will enter into
     # a very large loop with high probability.
     push.0 dup movup.2 movup.3
-    u32checked_divmod.4
+    u32assert u32divmod.4
     assertz
     neg
     #=> [loop_ctr, dest_ptr, x, x, ...]
@@ -298,7 +298,7 @@ proc.generate_random_coefficients
     dropw
     exec.constants::r1_ptr mem_storew
     dropw
-    exec.constants::c_ptr mem_storew        
+    exec.constants::c_ptr mem_storew
     dropw
     exec.constants::r2_ptr mem_storew
     dropw
@@ -320,7 +320,7 @@ proc.generate_random_coefficients_pad
     # If we use field division and num_tuples is not a multiple of 4 then we will enter into
     # a very large loop with high probability.
     push.0 dup movup.2 movup.3
-    u32checked_divmod.4
+    u32assert u32divmod.4
     assertz
     neg
     #=> [loop_ctr, dest_ptr, x, x, ...]
@@ -410,7 +410,7 @@ proc.generate_random_coefficients_pad
     dropw
     exec.constants::r1_ptr mem_storew
     dropw
-    exec.constants::c_ptr mem_storew        
+    exec.constants::c_ptr mem_storew
     dropw
     exec.constants::r2_ptr mem_storew
     dropw
@@ -520,7 +520,7 @@ proc.generate_four_integers
     dup.3               # [r0, R1, ptr, mask, depth, ...]
     u32split swap       # [r0_lo, r0_hi, R1, ptr, mask, depth, ...]
     dup.7               # [mask, r0_lo, r0_hi, R1, ptr, mask, depth, ...]
-    u32checked_and      # [r, r0_hi, R1, ptr, mask, depth, ...]
+    u32and              # [r, r0_hi, R1, ptr, mask, depth, ...]
     dup.8 swap          # [r, depth, r0_hi, R1, ptr, mask, depth, ...]
     push.0 movdn.3      # [r, depth, r0_hi, 0, R1, ptr, mask, depth, ...]
 
@@ -533,7 +533,7 @@ proc.generate_four_integers
     dup.2               # [r1, R1, ptr, mask, depth, ...]
     u32split swap       # [r1_lo, r1_hi, R1, ptr, mask, depth, ...]
     dup.7               # [mask, r1_lo, r1_hi, R1, ptr, mask, depth, ...]
-    u32checked_and      # [r, r1_hi, R1, ptr, mask, depth, ...]
+    u32and              # [r, r1_hi, R1, ptr, mask, depth, ...]
     dup.8 swap          # [r, depth, r1_hi, R1, ptr, mask, depth, ...]
     push.0 movdn.3      # [r, depth, r1_hi, 0, R1, ptr, mask, depth, ...]
 
@@ -546,7 +546,7 @@ proc.generate_four_integers
     dup.1
     u32split swap
     dup.7
-    u32checked_and
+    u32and
     dup.8 swap
     push.0 movdn.3
 
@@ -559,7 +559,7 @@ proc.generate_four_integers
     dup
     u32split swap
     dup.7
-    u32checked_and
+    u32and
     dup.8 swap
     push.0 movdn.3
 
@@ -585,7 +585,7 @@ proc.generate_three_integers
     dup.2               # [r0, R1, ptr, mask, depth, ...]
     u32split swap       # [r0_lo, r0_hi, R1, ptr, mask, depth, ...]
     dup.7               # [mask, r0_lo, r0_hi, R1, ptr, mask, depth, ...]
-    u32checked_and      # [r, r0_hi, R1, ptr, mask, depth, ...]
+    u32and              # [r, r0_hi, R1, ptr, mask, depth, ...]
     dup.8 swap          # [r, depth, r0_hi, R1, ptr, mask, depth, ...]
     push.0 movdn.3      # [r, depth, r0_hi, 0, R1, ptr, mask, depth, ...]
 
@@ -598,7 +598,7 @@ proc.generate_three_integers
     dup.1               # [r1, R1, ptr, mask, depth, ...]
     u32split swap       # [r1_lo, r1_hi, R1, ptr, mask, depth, ...]
     dup.7               # [mask, r1_lo, r1_hi, R1, ptr, mask, depth, ...]
-    u32checked_and      # [r, r1_hi, R1, ptr, mask, depth, ...]
+    u32and              # [r, r1_hi, R1, ptr, mask, depth, ...]
     dup.8 swap          # [r, depth, r1_hi, R1, ptr, mask, depth, ...]
     push.0 movdn.3      # [r, depth, r1_hi, 0, R1, ptr, mask, depth, ...]
 
@@ -611,7 +611,7 @@ proc.generate_three_integers
     dup.0
     u32split swap
     dup.7
-    u32checked_and
+    u32and
     dup.8 swap
     push.0 movdn.3
 
@@ -623,7 +623,7 @@ end
 
 #! Generate a list of `num_queries` number of random indices in the range
 #! [0, lde_size] and store it in memory starting from `query_ptr`.
-#! The list is stored as `(r, depth, y, y)` where `depth` is `log(lde_domain_size)`. 
+#! The list is stored as `(r, depth, y, y)` where `depth` is `log(lde_domain_size)`.
 #!`depth` is needed when computing the deep queries.
 #! TODO: the case of duplicate queries
 #!
@@ -668,15 +668,15 @@ export.generate_list_indices
     dropw swapw dropw
     #=> [R1, query_ptr, mask, depth, num_queries, ...]
 
-    
+
     # Use `num_queries` to iterate.
 
-    ## Subtract the 7 elements we have already generated above. 
+    ## Subtract the 7 elements we have already generated above.
     movup.7
     push.7 sub
 
     ## Divide by 8 to get the number of iterations
-    u32checked_divmod.8
+    u32assert u32divmod.8
     #=> [remainder, quotient, X, query_ptr, mask, depth, ...]
 
     ## Save remainder for later use
@@ -706,9 +706,9 @@ export.generate_list_indices
 
 
     ## Use remainder
-    
+
     ### Put the remaining number of queries to generate in the appropriate stack position
-    movup.8 movdn.7  
+    movup.8 movdn.7
 
     ### Load the second half of the rate portion of the state of the random coin.
     padw exec.constants::r2_ptr mem_loadw
@@ -718,10 +718,10 @@ export.generate_list_indices
     dup.11 sub.1 swap.12
     neq.0
     while.true
-        movup.7  
+        movup.7
         u32split swap       # [r0_lo, r0_hi, R2, r3, r2, r1, ptr, mask, depth, ...]
         dup.10              # [mask, r0_lo, r0_hi, R2, r3, r2, r1, ptr, mask, depth, ...]
-        u32checked_and      # [r, r0_hi, R2, r3, r2, r1, ptr, mask, depth, ...]
+        u32and              # [r, r0_hi, R2, r3, r2, r1, ptr, mask, depth, ...]
         dup.11 swap         # [r, depth, r0_hi, R2, r3, r2, r1, ptr, mask, depth, ...]
         push.0 movdn.3      # [r, depth, r0_hi, 0, R2, r3, r2, r1, ptr, mask, depth, ...]
 
@@ -750,7 +750,7 @@ export.check_pow
 
     # Compute the mask.
     pow2
-    u32checked_sub.1
+    u32assert u32overflowing_sub.1 assertz
     #=> [mask, ...]
 
     # Load Capacity portion
@@ -783,7 +783,7 @@ export.check_pow
     # Make sure the PoW is valid
     u32split
     drop
-    u32checked_and
+    u32and
     assertz
     drop
     #=> [...]
diff --git a/stdlib/asm/crypto/stark/verifier.masm b/stdlib/asm/crypto/stark/verifier.masm
index d7991f8f54..8a9461efb0 100644
--- a/stdlib/asm/crypto/stark/verifier.masm
+++ b/stdlib/asm/crypto/stark/verifier.masm
@@ -205,7 +205,7 @@ export.verify
     #   6) Compute evaluations of DEEP composition
     #   polynomial at randomly chosen query positions
     #============================================
-    
+
     # Compute the pointer to the first query using the pointer to
     # the first layer commitment and total number of queries.
     exec.constants::fri_com_ptr
diff --git a/stdlib/asm/math/ecgfp5/group.masm b/stdlib/asm/math/ecgfp5/group.masm
index a9967baffb..ddecc813cd 100644
--- a/stdlib/asm/math/ecgfp5/group.masm
+++ b/stdlib/asm/math/ecgfp5/group.masm
@@ -634,7 +634,7 @@ export.mul.10
         repeat.32
             dup
             push.1
-            u32checked_and
+            u32and
 
             if.true
                 # bring base
@@ -697,7 +697,7 @@ export.mul.10
 
             loc_store.4
 
-            u32unchecked_shr.1
+            u32shr.1
         end
 
         drop
@@ -1094,10 +1094,10 @@ export.gen_mul.8
 
                 dup
                 push.1
-                u32checked_and
+                u32and
                 movdn.4
 
-                u32unchecked_shr.1
+                u32shr.1
 
                 loc_storew.0
                 dropw
@@ -1159,10 +1159,10 @@ export.gen_mul.8
 
             dup
             push.1
-            u32checked_and
+            u32and
             movdn.4
 
-            u32unchecked_shr.1
+            u32shr.1
 
             loc_storew.2
             dropw
diff --git a/stdlib/asm/math/ecgfp5/scalar_field.masm b/stdlib/asm/math/ecgfp5/scalar_field.masm
index e9a37da8c8..2572fb3cdd 100644
--- a/stdlib/asm/math/ecgfp5/scalar_field.masm
+++ b/stdlib/asm/math/ecgfp5/scalar_field.masm
@@ -149,82 +149,82 @@ proc.select
     dup
     movup.12
     dup.3
-    u32checked_xor
-    u32checked_and
+    u32xor
+    u32and
     movup.2
-    u32checked_xor
+    u32xor
 
     dup.1
     movup.12
     dup.4
-    u32checked_xor
-    u32checked_and
+    u32xor
+    u32and
     movup.3
-    u32checked_xor
+    u32xor
 
     dup.2
     movup.12
     dup.5
-    u32checked_xor
-    u32checked_and
+    u32xor
+    u32and
     movup.4
-    u32checked_xor
+    u32xor
 
     dup.3
     movup.12
     dup.6
-    u32checked_xor
-    u32checked_and
+    u32xor
+    u32and
     movup.5
-    u32checked_xor
+    u32xor
 
     dup.4
     movup.12
     dup.7
-    u32checked_xor
-    u32checked_and
+    u32xor
+    u32and
     movup.6
-    u32checked_xor
+    u32xor
 
     dup.5
     movup.12
     dup.8
-    u32checked_xor
-    u32checked_and
+    u32xor
+    u32and
     movup.7
-    u32checked_xor
+    u32xor
 
     dup.6
     movup.12
     dup.9
-    u32checked_xor
-    u32checked_and
+    u32xor
+    u32and
     movup.8
-    u32checked_xor
+    u32xor
 
     dup.7
     movup.12
     dup.10
-    u32checked_xor
-    u32checked_and
+    u32xor
+    u32and
     movup.9
-    u32checked_xor
+    u32xor
 
     dup.8
     movup.12
     dup.11
-    u32checked_xor
-    u32checked_and
+    u32xor
+    u32and
     movup.10
-    u32checked_xor
+    u32xor
 
     movup.9
     movup.11
     dup.11
-    u32checked_xor
-    u32checked_and
+    u32xor
+    u32and
     movup.10
-    u32checked_xor
+    u32xor
 
     swap
     movup.2
@@ -3857,7 +3857,7 @@ export.inv.6
             dropw
 
             dup
-            u32unchecked_shr.31
+            u32shr.31
             if.true
                 # bring base back to stack
                 push.0.0.0.0.0.0.0.0.0.0.0.0
@@ -3897,7 +3897,7 @@ export.inv.6
                 dropw
             end
 
-            u32unchecked_shl.1
+            u32shl.1
         end
 
         drop
diff --git a/stdlib/asm/math/secp256k1/base_field.masm b/stdlib/asm/math/secp256k1/base_field.masm
index 5b5fde8250..66b2d7f76f 100644
--- a/stdlib/asm/math/secp256k1/base_field.masm
+++ b/stdlib/asm/math/secp256k1/base_field.masm
@@ -608,7 +608,7 @@ export.inv.4
       dropw
 
       dup
-      u32unchecked_shr.31
+      u32shr.31
       if.true
         push.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0
 
@@ -630,7 +630,7 @@ export.inv.4
         dropw
       end
 
-      u32unchecked_shl.1
+      u32shl.1
     end
 
     drop
diff --git a/stdlib/asm/math/secp256k1/group.masm b/stdlib/asm/math/secp256k1/group.masm
index 5478223347..d92994f713 100644
--- a/stdlib/asm/math/secp256k1/group.masm
+++ b/stdlib/asm/math/secp256k1/group.masm
@@ -1029,7 +1029,7 @@ export.mul.18
     repeat.32
       dup
       push.1
-      u32checked_and
+      u32and
 
       if.true
         # res = base + res
@@ -1135,7 +1135,7 @@ export.mul.18
 
       dropw
 
-      u32unchecked_shr.1
+      u32shr.1
     end
 
     drop
@@ -3289,9 +3289,9 @@ export.gen_mul.20
         loc_loadw.18
         dup
         push.1
-        u32checked_and
+        u32and
         movdn.4
-        u32unchecked_shr.1
+        u32shr.1
         loc_storew.18
         dropw
 
diff --git a/stdlib/asm/math/secp256k1/scalar_field.masm b/stdlib/asm/math/secp256k1/scalar_field.masm
index 6347b5816c..4e5a7cc44a 100644
--- a/stdlib/asm/math/secp256k1/scalar_field.masm
+++ b/stdlib/asm/math/secp256k1/scalar_field.masm
@@ -468,7 +468,7 @@ export.inv.4
       dropw
 
       dup
-      u32unchecked_shr.31
+      u32shr.31
       if.true
         push.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0
 
@@ -490,7 +490,7 @@ export.inv.4
         dropw
       end
 
-      u32unchecked_shl.1
+      u32shl.1
     end
 
     drop
diff --git a/stdlib/asm/math/u256.masm b/stdlib/asm/math/u256.masm
index 9dd5c49086..7598009cd3 100644
--- a/stdlib/asm/math/u256.masm
+++ b/stdlib/asm/math/u256.masm
@@ -93,87 +93,87 @@ export.and
     swapw.3
     movup.3
     movup.7
-    u32checked_and
+    u32and
     movup.3
     movup.6
-    u32checked_and
+    u32and
     movup.3
     movup.5
-    u32checked_and
+    u32and
     movup.3
     movup.4
-    u32checked_and
+    u32and
     swapw.2
     movup.3
     movup.7
-    u32checked_and
+    u32and
     movup.3
     movup.6
-    u32checked_and
+    u32and
     movup.3
     movup.5
-    u32checked_and
+    u32and
     movup.3
     movup.4
-    u32checked_and
+    u32and
 end
 
 export.or
     swapw.3
     movup.3
     movup.7
-    u32checked_or
+    u32or
     movup.3
     movup.6
-    u32checked_or
+    u32or
     movup.3
     movup.5
-    u32checked_or
+    u32or
     movup.3
     movup.4
-    u32checked_or
+    u32or
     swapw.2
     movup.3
     movup.7
-    u32checked_or
+    u32or
     movup.3
     movup.6
-    u32checked_or
+    u32or
     movup.3
     movup.5
-    u32checked_or
+    u32or
     movup.3
     movup.4
-    u32checked_or
+    u32or
 end
 
 export.xor
     swapw.3
     movup.3
     movup.7
-    u32checked_xor
+    u32xor
     movup.3
     movup.6
-    u32checked_xor
+    u32xor
     movup.3
     movup.5
-    u32checked_xor
+    u32xor
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
     swapw.2
     movup.3
     movup.7
-    u32checked_xor
+    u32xor
     movup.3
     movup.6
-    u32checked_xor
+    u32xor
     movup.3
     movup.5
-    u32checked_xor
+    u32xor
     movup.3
     movup.4
-    u32checked_xor
+    u32xor
 end
 
 export.iszero_unsafe
diff --git a/stdlib/asm/math/u64.masm b/stdlib/asm/math/u64.masm
index 4e6cabf229..63cc2ae957 100644
--- a/stdlib/asm/math/u64.masm
+++ b/stdlib/asm/math/u64.masm
@@ -2,6 +2,7 @@
 
 #! Asserts that both values at the top of the stack are u64 values.
 #! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
+#! This takes 6 cycles.
 proc.u32assert4
     u32assert2
     movup.3
@@ -17,6 +18,7 @@ end
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [overflowing_flag, c_hi, c_lo, ...], where c = (a + b) % 2^64
+#! This takes 6 cycles.
 export.overflowing_add
     swap
     movup.3
@@ -30,34 +32,19 @@ end
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a + b) % 2^64
+#! This takes 7 cycles.
 export.wrapping_add
     exec.overflowing_add
     drop
 end
 
-#! Performs addition of two unsigned 64 bit integers, fails when overflowing.
-#! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
-#! Stack transition looks as follows:
-#! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a + b) % 2^64
-export.checked_add
-    swap
-    movup.3
-    u32assert2
-    u32overflowing_add
-    movup.3
-    movup.3
-    u32assert2
-    u32overflowing_add3
-    eq.0
-    assert
-end
-
 # ===== SUBTRACTION ===============================================================================
 
 #! Performs subtraction of two unsigned 64 bit integers discarding the overflow.
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a - b) % 2^64
+#! This takes 10 cycles.
 export.wrapping_sub
     movup.3
     movup.2
@@ -71,31 +58,11 @@ export.wrapping_sub
     drop
 end
 
-#! Performs subtraction of two unsigned 64 bit integers, fails when underflowing.
-#! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
-#! Stack transition looks as follows:
-#! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a - b) % 2^64
-export.checked_sub
-    movup.3
-    movup.2
-    u32assert2
-    u32overflowing_sub
-    movup.3
-    movup.3
-    u32assert2
-    u32overflowing_sub
-    eq.0
-    assert
-    swap
-    u32overflowing_sub
-    eq.0
-    assert
-end
-
 #! Performs subtraction of two unsigned 64 bit integers preserving the overflow.
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [underflowing_flag, c_hi, c_lo, ...], where c = (a - b) % 2^64
+#! This takes 11 cycles.
 export.overflowing_sub
     movup.3
     movup.2
@@ -116,6 +83,7 @@ end
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a * b) % 2^64
+#! This takes 11 cycles.
 export.wrapping_mul
     dup.3
     dup.2
@@ -156,67 +124,19 @@ export.overflowing_mul
     add
 end
 
-#! Performs multiplication of two unsigned 64 bit integers, fails when overflowing.
-#! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
-#! Stack transition looks as follows:
-#! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a * b) % 2^64
-export.checked_mul
-    dup.3
-    dup.2
-    u32assert2         # make sure lower limbs of operands are 32-bit
-    u32overflowing_mul
-    dup.4
-    movup.4
-    u32overflowing_madd
-    swap
-    movup.5
-    dup.4
-    u32overflowing_madd
-    movup.5
-    movup.5
-    u32assert2         # make sure higher limbs of operands are 32-bit
-    u32overflowing_madd
-    movup.3
-    movup.2
-    u32overflowing_add
-    add
-    add
-    eq.0
-    assert
-end
-
 # ===== COMPARISONS ===============================================================================
 
 #! Performs less-than comparison of two unsigned 64 bit integers.
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a < b, and 0 otherwise.
-export.unchecked_lt
-    movup.3
-    movup.2
-    u32overflowing_sub
-    movdn.3
-    drop
-    u32overflowing_sub
-    swap
-    eq.0
-    movup.2
-    and
-    or
-end
-
-#! Performs less-than comparison of two unsigned 64 bit integers.
-#! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
-#! Stack transition looks as follows:
-#! [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a < b, and 0 otherwise.
-export.checked_lt
+#! This takes 11 cycles.
+export.lt
     movup.3
     movup.2
-    u32assert2
     u32overflowing_sub
     movdn.3
     drop
-    u32assert2
     u32overflowing_sub
     swap
     eq.0
@@ -230,7 +150,7 @@ end
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a > b, and 0 otherwise.
 #! This takes 11 cycles.
-export.unchecked_gt
+export.gt
     movup.2
     u32overflowing_sub
     movup.2
@@ -244,41 +164,13 @@ export.unchecked_gt
     or
 end
 
-#! Performs greater-than comparison of two unsigned 64 bit integers.
-#! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
-#! Stack transition looks as follows:
-#! [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a > b, and 0 otherwise.
-export.checked_gt
-    movup.2
-    u32assert2
-    u32overflowing_sub
-    movup.2
-    movup.3
-    u32assert2
-    u32overflowing_sub
-    swap
-    drop
-    movup.2
-    eq.0
-    and
-    or
-end
-
 #! Performs less-than-or-equal comparison of two unsigned 64 bit integers.
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a <= b, and 0 otherwise.
-export.unchecked_lte
-    exec.unchecked_gt
-    not
-end
-
-#! Performs less-than-or-equal comparison of two unsigned 64 bit integers.
-#! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
-#! Stack transition looks as follows:
-#! [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a <= b, and 0 otherwise.
-export.checked_lte
-    exec.checked_gt
+#! This takes 12 cycles.
+export.lte
+    exec.gt
     not
 end
 
@@ -286,17 +178,9 @@ end
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a >= b, and 0 otherwise.
-export.unchecked_gte
-    exec.unchecked_lt
-    not
-end
-
-#! Performs greater-than-or-equal comparison of two unsigned 64 bit integers.
-#! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
-#! Stack transition looks as follows:
-#! [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a >= b, and 0 otherwise.
-export.checked_gte
-    exec.checked_lt
+#! This takes 12 cycles.
+export.gte
+    exec.lt
     not
 end
 
@@ -304,25 +188,13 @@ end
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == b, and 0 otherwise.
-export.unchecked_eq
-    movup.2
-    u32checked_eq
-    swap
-    movup.2
-    u32checked_eq
-    and
-end
-
-#! Performs equality comparison of two unsigned 64 bit integers.
-#! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
-#! Stack transition looks as follows:
-#! [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == b, and 0 otherwise.
-export.checked_eq
+#! This takes 6 cycles.
+export.eq
     movup.2
-    u32checked_eq
+    eq
     swap
     movup.2
-    u32checked_eq
+    eq
     and
 end
 
@@ -330,41 +202,22 @@ end
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a != b, and 0 otherwise.
-export.unchecked_neq
+#! This takes 6 cycles.
+export.neq
     movup.2
-    u32checked_neq
+    neq
     swap
     movup.2
-    u32checked_neq
+    neq
     or
 end
 
-#! Performs inequality comparison of two unsigned 64 bit integers.
-#! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
-#! Stack transition looks as follows:
-#! [b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == b, and 0 otherwise.
-export.checked_neq
-    exec.checked_eq
-    not
-end
-
 #! Performs comparison to zero of an unsigned 64 bit integer.
 #! The input value is assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == 0, and 0 otherwise.
-export.unchecked_eqz
-    eq.0
-    swap
-    eq.0
-    and
-end
-
-#! Performs comparison to zero of an unsigned 64 bit integer.
-#! The input value is assumed to be represented using 32 bit limbs, fails if it is not.
-#! Stack transition looks as follows:
-#! [a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == 0, and 0 otherwise.
-export.checked_eqz
-    u32assert2
+#! This takes 4 cycles.
+export.eqz
     eq.0
     swap
     eq.0
@@ -375,9 +228,10 @@ end
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a < b, and b otherwise.
-export.unchecked_min
+#! This takes 23 cycles.
+export.min
     dupw
-    exec.unchecked_gt
+    exec.gt
     movup.4
     movup.3
     dup.2
@@ -386,22 +240,14 @@ export.unchecked_min
     cdrop
 end
 
-#! Compares two unsigned 64 bit integers and drop the larger one from the stack.
-#! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
-#! Stack transition looks as follows:
-#! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a < b, and b otherwise.
-export.checked_min
-    exec.u32assert4
-    exec.unchecked_min
-end
-
 #! Compares two unsigned 64 bit integers and drop the smaller one from the stack.
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a > b, and b otherwise.
-export.unchecked_max
+#! This takes 23 cycles.
+export.max
     dupw
-    exec.unchecked_lt
+    exec.lt
     movup.4
     movup.3
     dup.2
@@ -410,23 +256,14 @@ export.unchecked_max
     cdrop
 end
 
-#! Compares two unsigned 64 bit integers and drop the smaller one from the stack.
-#! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
-#! Stack transition looks as follows:
-#! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a > b, and b otherwise.
-export.checked_max
-    exec.u32assert4
-    exec.unchecked_max
-end
-
-
 # ===== DIVISION ==================================================================================
 
 #! Performs division of two unsigned 64 bit integers discarding the remainder.
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a // b
-export.unchecked_div
+#! This takes 54 cycles.
+export.div
     adv.push_u64div     # push the quotient and the remainder onto the advice stack
 
     adv_push.2          # pop the quotient from the advice stack and assert it consists of
@@ -458,7 +295,7 @@ export.unchecked_div
     movup.7             # the divisor
     dup.3
     dup.3
-    exec.unchecked_gt
+    exec.gt
     assert
 
     swap                # add remainder to the previous result; this also consumes the remainder
@@ -476,22 +313,14 @@ export.unchecked_div
     assert_eq           # quotient remains on the stack
 end
 
-#! Performs division of two unsigned 64 bit integers discarding the remainder.
-#! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
-#! Stack transition looks as follows:
-#! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a // b
-export.checked_div
-    exec.u32assert4
-    exec.unchecked_div
-end
-
 # ===== MODULO OPERATION ==========================================================================
 
 #! Performs modulo operation of two unsigned 64 bit integers.
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a % b
-export.unchecked_mod
+#! This takes 54 cycles.
+export.mod
     adv.push_u64div     # push the quotient and the remainder onto the advice stack
 
     adv_push.2          # pop the quotient from the advice stack and assert it consists of
@@ -523,7 +352,7 @@ export.unchecked_mod
     movup.5             # the divisor
     dup.3
     dup.3
-    exec.unchecked_gt
+    exec.gt
     assert
 
     dup.1               # add remainder to the previous result
@@ -541,22 +370,14 @@ export.unchecked_mod
     assert_eq           # remainder remains on the stack
 end
 
-#! Performs modulo operation of two unsigned 64 bit integers.
-#! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
-#! Stack transition looks as follows:
-#! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a % b
-export.checked_mod
-    exec.u32assert4
-    exec.unchecked_mod
-end
-
 # ===== DIVMOD OPERATION ==========================================================================
 
 #! Performs divmod operation of two unsigned 64 bit integers.
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [r_hi, r_lo, q_hi, q_lo ...], where r = a % b, q = a / b
-export.unchecked_divmod
+#! This takes 54 cycles.
+export.divmod
     adv.push_u64div     # push the quotient and the remainder onto the advice stack
 
     adv_push.2          # pop the quotient from the advice stack and assert it consists of
@@ -588,7 +409,7 @@ export.unchecked_divmod
     movup.7             # the divisor
     dup.3
     dup.3
-    exec.unchecked_gt
+    exec.gt
     assert
 
     dup.1               # add remainder to the previous result
@@ -606,54 +427,48 @@ export.unchecked_divmod
     assert_eq           # remainder remains on the stack
 end
 
-#! Performs divmod operation of two unsigned 64 bit integers.
-#! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
-#! Stack transition looks as follows:
-#! [b_hi, b_lo, a_hi, a_lo, ...] -> [r_hi, r_lo, q_hi, q_lo ...], where r = a % b, q = a / b
-export.checked_divmod
-    exec.u32assert4
-    exec.unchecked_divmod
-end
-
 # ===== BITWISE OPERATIONS ========================================================================
 
 #! Performs bitwise AND of two unsigned 64-bit integers.
 #! The input values are assumed to be represented using 32 bit limbs, but this is not checked.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a AND b.
-export.checked_and
+#! This takes 6 cycles.
+export.and
     swap
     movup.3
-    u32checked_and
+    u32and
     swap
     movup.2
-    u32checked_and
+    u32and
 end
 
 #! Performs bitwise OR of two unsigned 64 bit integers.
 #! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a OR b.
-export.checked_or
+#! This takes 16 cycles.
+export.or
     swap
     movup.3
-    u32checked_or
+    u32or
     swap
     movup.2
-    u32checked_or
+    u32or
 end
 
 #! Performs bitwise XOR of two unsigned 64 bit integers.
 #! The input values are assumed to be represented using 32 bit limbs, fails if they are not.
 #! Stack transition looks as follows:
 #! [b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a XOR b.
-export.checked_xor
+#! This takes 6 cycles.
+export.xor
     swap
     movup.3
-    u32checked_xor
+    u32xor
     swap
     movup.2
-    u32checked_xor
+    u32xor
 end
 
 #! Performs left shift of one unsigned 64-bit integer using the pow2 operation.
@@ -663,7 +478,7 @@ end
 #! Stack transition looks as follows:
 #! [b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a << b mod 2^64.
 #! This takes 28 cycles.
-export.unchecked_shl
+export.shl
     pow2
     u32split
     exec.wrapping_mul
@@ -677,7 +492,7 @@ end
 #! Stack transition looks as follows:
 #! [b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a >> b.
 #! This takes 44 cycles.
-export.unchecked_shr
+export.shr
     pow2
     u32split
 
@@ -685,7 +500,7 @@ export.unchecked_shr
     add
     movup.2
     swap
-    u32unchecked_divmod
+    u32divmod
     movup.3
     movup.3
     dup
@@ -695,7 +510,7 @@ export.unchecked_shr
     movdn.4
     dup
     movdn.4
-    u32unchecked_divmod
+    u32divmod
     drop
     push.4294967296
     dup.5
@@ -709,54 +524,6 @@ export.unchecked_shr
     cswap
 end
 
-#! Performs left shift of one unsigned 64-bit integer preserving the overflow and
-#! using the pow2 operation.
-#! The input value to be shifted is assumed to be represented using 32 bit limbs.
-#! The shift value should be in the range [0, 64), otherwise it will result in an
-#! error.
-#! Stack transition looks as follows:
-#! [b, a_hi, a_lo, ...] -> [d_hi, d_lo, c_hi, c_lo, ...], where (d,c) = a << b,
-#! which d contains the bits shifted out.
-#! This takes 35 cycles.
-export.overflowing_shl
-    pow2
-    u32split
-    exec.overflowing_mul
-end
-
-#! Performs right shift of one unsigned 64-bit integer preserving the overflow and
-#! using the pow2 operation.
-#! The input value to be shifted is assumed to be represented using 32 bit limbs.
-#! The shift value should be in the range [0, 64), otherwise it will result in an
-#! error.
-#! Stack transition looks as follows:
-#! [b, a_hi, a_lo, ...] -> [d_hi, d_lo, c_hi, c_lo, ...], where c = a >> b, d = a << (64 - b).
-#! This takes 94 cycles.
-export.overflowing_shr
-    push.64             # (64 - b)
-    dup.1
-    sub
-
-    dup.3               # dup [b, a_hi, a_lo]
-    dup.3
-    dup.3
-    exec.unchecked_shr  # c = a >> b
-
-    movdn.5             # move result [c_hi, c_lo] to be in the format [d_hi, d_lo, c_hi, c_lo, ...]
-    movdn.5
-
-    padw                # padding positions 0, 1, 2, 3 and 4 to be able to use cdropw
-    push.0
-
-    movup.6             # bring and b
-    eq.0
-    cdropw              # if b is 0, swap the positions 0, 1, 2 and 3 with 0, (64 - b), a_hi, a_lo
-                        # regardless of this condition, drop 0, 1, 2 and 3
-    drop                # drop the last added 0 or dup b to keep the format [b, a_hi, a_lo, ....]
-
-    exec.unchecked_shl  # d = a << (64 - b)
-end
-
 #! Performs left rotation of one unsigned 64-bit integer using the pow2 operation.
 #! The input value to be shifted is assumed to be represented using 32 bit limbs.
 #! The shift value should be in the range [0, 64), otherwise it will result in an
@@ -764,7 +531,7 @@ end
 #! Stack transition looks as follows:
 #! [b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a << b mod 2^64.
 #! This takes 35 cycles.
-export.unchecked_rotl
+export.rotl
     push.31
     dup.1
     u32overflowing_sub
@@ -774,7 +541,7 @@ export.unchecked_rotl
 
     # Shift the low limb.
     push.31
-    u32checked_and
+    u32and
     pow2
     dup
     movup.3
@@ -802,7 +569,7 @@ end
 #! Stack transition looks as follows:
 #! [b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a << b mod 2^64.
 #! This takes 40 cycles.
-export.unchecked_rotr
+export.rotr
     push.31
     dup.1
     u32overflowing_sub
@@ -812,7 +579,7 @@ export.unchecked_rotr
 
     # Shift the low limb left by 32-b.
     push.31
-    u32checked_and
+    u32and
     push.32
     swap
     u32overflowing_sub
@@ -837,3 +604,85 @@ export.unchecked_rotr
     not
     cswap
 end
+
+#! Counts the number of leading zeros of one unsigned 64-bit integer.
+#! The input value is assumed to be represented using 32 bit limbs, but this is not checked.
+#! Stack transition looks as follows:
+#! [n_hi, n_lo, ...] -> [clz, ...], where clz is a number of leading zeros of value n.
+#! This takes 43 cycles.
+export.clz
+    dup.0
+    eq.0
+
+    if.true    # if n_hi == 0
+        drop
+        u32clz
+        add.32 # clz(n_lo) + 32
+    else 
+        swap
+        drop
+        u32clz # clz(n_hi)
+    end
+end
+
+#! Counts the number of trailing zeros of one unsigned 64-bit integer.
+#! The input value is assumed to be represented using 32 bit limbs, but this is not checked.
+#! Stack transition looks as follows:
+#! [n_hi, n_lo, ...] -> [ctz, ...], where ctz is a number of trailing zeros of value n.
+#! This takes 41 cycles.
+export.ctz
+    swap
+    dup.0
+    eq.0
+
+    if.true    # if n_lo == 0
+        drop
+        u32ctz
+        add.32 # ctz(n_hi) + 32
+    else 
+        swap
+        drop
+        u32ctz # ctz(n_lo)
+    end
+end
+
+#! Counts the number of leading ones of one unsigned 64-bit integer.
+#! The input value is assumed to be represented using 32 bit limbs, but this is not checked.
+#! Stack transition looks as follows:
+#! [n_hi, n_lo, ...] -> [clo, ...], where clo is a number of leading ones of value n.
+#! This takes 42 cycles.
+export.clo
+    dup.0
+    eq.4294967295
+
+    if.true    # if n_hi == 11111111111111111111111111111111
+        drop
+        u32clo
+        add.32 # clo(n_lo) + 32
+    else 
+        swap
+        drop
+        u32clo # clo(n_hi)
+    end
+end
+
+#! Counts the number of trailing ones of one unsigned 64-bit integer.
+#! The input value is assumed to be represented using 32 bit limbs, but this is not checked.
+#! Stack transition looks as follows:
+#! [n_hi, n_lo, ...] -> [cto, ...], where cto is a number of trailing ones of value n.
+#! This takes 40 cycles.
+export.cto
+    swap
+    dup.0
+    eq.4294967295
+
+    if.true    # if n_lo == 11111111111111111111111111111111
+        drop
+        u32cto
+        add.32 # cto(n_hi) + 32
+    else 
+        swap
+        drop
+        u32cto # ctz(n_lo)
+    end
+end
diff --git a/stdlib/asm/utils.masm b/stdlib/asm/utils.masm
new file mode 100644
index 0000000000..39709b5b16
--- /dev/null
+++ b/stdlib/asm/utils.masm
@@ -0,0 +1,15 @@
+#! Returns a boolean indicating whether the input word is an empty word.
+#!
+#! Inputs: [INPUT_WORD]
+#! Outputs: [is_empty_word, INPUT_WORD]
+#!
+#! - INPUT_WORD is the word whose emptiness is to be determined.
+#! - is_empty_word is a boolean indicating whether INPUT_WORD is empty.
+#!
+#! Cycles: 11
+export.is_empty_word
+    repeat.4
+        dup.3 eq.0
+    end
+    and and and
+end
diff --git a/stdlib/build.rs b/stdlib/build.rs
index c792a6f7f3..75f780020a 100644
--- a/stdlib/build.rs
+++ b/stdlib/build.rs
@@ -23,7 +23,7 @@ type ModuleMap = BTreeMap<String, ModuleAst>;
 /// `assets` folder under `std` namespace.
 #[cfg(not(feature = "docs-rs"))]
 fn main() -> io::Result<()> {
-    // re-build the `./assets/std.masl` file iff something in the `./asm` directory
+    // re-build the `[OUT_DIR]/assets/std.masl` file iff something in the `./asm` directory
     // or its builder changed:
     println!("cargo:rerun-if-changed=asm");
     println!("cargo:rerun-if-changed=../assembly/src");
@@ -42,7 +42,7 @@ fn main() -> io::Result<()> {
     stdlib.write_to_dir(Path::new(&build_dir).join(ASL_DIR_PATH))?;
 
     // updates the documentation of these modules
-    build_stdlib_docs(&docs, DOC_DIR_PATH);
+    build_stdlib_docs(&docs, DOC_DIR_PATH)?;
 
     Ok(())
 }
@@ -57,12 +57,24 @@ trait Renderer {
 }
 
 /// Writes Miden standard library modules documentation markdown files based on the available modules and comments.
-pub fn build_stdlib_docs(module_map: &ModuleMap, output_dir: &str) {
-    // Remove functions folder to re-generate
-    fs::remove_dir_all(output_dir).unwrap();
-    fs::create_dir(output_dir).unwrap();
+pub fn build_stdlib_docs(module_map: &ModuleMap, output_dir: &str) -> io::Result<()> {
+    // Clean the output folder. This only deletes the folder's content, and not the folder itself,
+    // because removing the folder fails on docs.rs
+    for entry in fs::read_dir(output_dir)? {
+        let entry = entry?;
+        let metadata = entry.metadata()?;
+
+        if metadata.is_dir() {
+            fs::remove_dir_all(entry.path())?;
+        } else {
+            assert!(metadata.is_file());
+            fs::remove_file(entry.path())?;
+        }
+    }
 
     // Render the stdlib struct into markdown
     // TODO: Make the renderer choice pluggable.
     MarkdownRenderer::render(module_map, output_dir);
+
+    Ok(())
 }
diff --git a/stdlib/docs/collections/smt.md b/stdlib/docs/collections/smt.md
index bed60441bb..d4083b7bdd 100644
--- a/stdlib/docs/collections/smt.md
+++ b/stdlib/docs/collections/smt.md
@@ -2,6 +2,5 @@
 ## std::collections::smt
 | Procedure | Description |
 | ----------- | ------------- |
-| get | Returns the value stored under the specified key in a Sparse Merkle Tree with the specified root.<br /><br />If the value for a given key has not been set, the returned `V` will consist of all zeroes.<br /><br />Input:  [K, R, ...]<br /><br />Output: [V, R, ...]<br /><br />Depth 16: 91 cycles<br /><br />Depth 32: 87 cycles<br /><br />Depth 48: 94 cycles<br /><br />Depth 64: unimplemented |
-| insert | Inserts the specified value into a Sparse Merkle Tree with the specified root under the<br /><br />specified key.<br /><br />The value previously stored in the SMT under this key is left on the stack together with<br /><br />the updated tree root.<br /><br />This assumes that the value is not [ZERO; 4]. If it is, the procedure fails.<br /><br />Input:  [V, K, R, ...]<br /><br />Output: [V_old, R_new, ...]<br /><br />Cycles:<br /><br />- Update existing leaf:<br /><br />- Depth 16: 137<br /><br />- Depth 32: 134<br /><br />- Depth 48: 139<br /><br />- Insert new leaf:<br /><br />- Depth 16: 102<br /><br />- Depth 32: 183<br /><br />- Depth 48: 183<br /><br />- Replace a leaf with a subtree:<br /><br />- Depth 16 -> 32: 242<br /><br />- Depth 16 -> 48: 265<br /><br />- Depth 32 -> 48: 255 |
-| set | Sets the value associated with key K to V in a Sparse Merkle tree with root R. Returns the new<br /><br />root of the tree together with the value previously associated with key K.<br /><br />If no value was previously associated with K, [ZERO; 4] is returned.<br /><br />Unlike the `insert` procedure defined above, this procedure allows for values to be set to<br /><br />[ZERO; 4].<br /><br />Input:  [V, K, R, ...]<br /><br />Output: [V_old, R_new, ...]<br /><br />Cycles:<br /><br />- Update existing leaf:<br /><br />- Depth 16: 137<br /><br />- Depth 32: 133<br /><br />- Depth 48: 139<br /><br />- Insert new leaf:<br /><br />- Depth 16: 102<br /><br />- Depth 32: 183<br /><br />- Depth 48: 183<br /><br />- Replace a leaf with a subtree:<br /><br />- Depth 16 -> 32: 242<br /><br />- Depth 16 -> 48: 265<br /><br />- Depth 32 -> 48: 255<br /><br />- Remove a key-value pair:<br /><br />- Key-value pair not in tree: 52 - 93<br /><br />- Key-value pair is in tree: 142 - 305 |
+| set | Inserts the specified value under the specified key in a Sparse Merkle Tree defined by the<br /><br />specified root. If the insert is successful, the old value located under the specified key<br /><br />is returned via the stack.<br /><br />If the VALUE is an empty word (i.e., [ZERO; 4]), the new state of the tree is guaranteed to<br /><br />be equivalent to the state as if the updated value was never inserted.<br /><br />Inputs:<br /><br />Operand stack: [V, K, R, ...]<br /><br />Outputs:<br /><br />Operand stack: [V_old, R_new, ...]<br /><br />Fails if the tree with the specified root does not exits in the VM's advice provider.<br /><br />Cycles<br /><br />Leaf empty<br /><br />removal: 74 cycles<br /><br />insertion: 133 cycles<br /><br />Leaf single<br /><br />removal: 227 cycles<br /><br />insertion (leaf remains single): 205<br /><br />insertion (leaf becomes multiple): unimplemented<br /><br />Leaf multiple<br /><br />unimplemented |
+| get | Returns the value located under the specified key in the Sparse Merkle Tree defined by the<br /><br />specified root.<br /><br />If no values had been previously inserted under the specified key, an empty word (i.e.,<br /><br />[ZERO; 4]) is returned.<br /><br />Inputs:<br /><br />Operand stack: [K, R, ...]<br /><br />Outputs:<br /><br />Operand stack: [V, R, ...]<br /><br />Fails if the tree with the specified root does not exits in the VM's advice provider.<br /><br />Cycles<br /><br />Leaf empty: 48 cycles<br /><br />Leaf single: 99 cycles<br /><br />Leaf multiple: unimplemented |
diff --git a/stdlib/docs/collections/smt64.md b/stdlib/docs/collections/smt64.md
deleted file mode 100644
index 3b789ea33f..0000000000
--- a/stdlib/docs/collections/smt64.md
+++ /dev/null
@@ -1,7 +0,0 @@
-A key-value map with single-element keys and 4-element values.<br />Current implementation is a thin wrapper over a simple Sparse Merkle Tree of depth 64. In the<br />future, this will be replaced with a compact Sparse Merkle Tree implementation.
-## std::collections::smt64
-| Procedure | Description |
-| ----------- | ------------- |
-| get | Returns the value located under the specified key in the Sparse Merkle Tree defined by the<br /><br />specified root.<br /><br />If no values had been previously inserted under the specified key, an empty word (i.e.,<br /><br />[ZERO; 4]) is returned.<br /><br />Inputs:<br /><br />Operand stack: [key, ROOT, ...]<br /><br />Outputs:<br /><br />Operand stack: [VALUE, ROOT, ...]<br /><br />Fails if the tree with the specified root does not exits in the VM's advice provider. |
-| insert | Inserts the specified value under the specified key in a Sparse Merkle Tree defined by the<br /><br />specified root. If the insert is successful, the old value located under the specified key<br /><br />is returned via the stack.<br /><br />This procedure assumes that VALUE is a non-empty word (i.e., not [ZERO; 4]).<br /><br />Inputs:<br /><br />Operand stack: [VALUE, key, ROOT, ...]<br /><br />Outputs:<br /><br />Operand stack: [OLD_VALUE, NEW_ROOT, ...]<br /><br />Fails if:<br /><br />- The tree with the specified root does not exits in the VM's advice provider.<br /><br />- The provided value is an empty word. |
-| set | Inserts the specified value under the specified key in a Sparse Merkle Tree defined by the<br /><br />specified root. If the insert is successful, the old value located under the specified key<br /><br />is returned via the stack.<br /><br />If the VALUE is an empty word (i.e., [ZERO; 4]), the new state of the tree is guaranteed to<br /><br />be equivalent to the state as if the updated value was never inserted.<br /><br />Inputs:<br /><br />Operand stack: [VALUE, key, ROOT, ...]<br /><br />Outputs:<br /><br />Operand stack: [OLD_VALUE, NEW_ROOT, ...]<br /><br />Fails if the tree with the specified root does not exits in the VM's advice provider. |
diff --git a/stdlib/docs/math/u64.md b/stdlib/docs/math/u64.md
index 9745204e41..453ca6b438 100644
--- a/stdlib/docs/math/u64.md
+++ b/stdlib/docs/math/u64.md
@@ -2,45 +2,32 @@
 ## std::math::u64
 | Procedure | Description |
 | ----------- | ------------- |
-| overflowing_add | Performs addition of two unsigned 64 bit integers preserving the overflow.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [overflowing_flag, c_hi, c_lo, ...], where c = (a + b) % 2^64 |
-| wrapping_add | Performs addition of two unsigned 64 bit integers discarding the overflow.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a + b) % 2^64 |
-| checked_add | Performs addition of two unsigned 64 bit integers, fails when overflowing.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a + b) % 2^64 |
-| wrapping_sub | Performs subtraction of two unsigned 64 bit integers discarding the overflow.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a - b) % 2^64 |
-| checked_sub | Performs subtraction of two unsigned 64 bit integers, fails when underflowing.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a - b) % 2^64 |
-| overflowing_sub | Performs subtraction of two unsigned 64 bit integers preserving the overflow.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [underflowing_flag, c_hi, c_lo, ...], where c = (a - b) % 2^64 |
-| wrapping_mul | Performs multiplication of two unsigned 64 bit integers discarding the overflow.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a * b) % 2^64 |
+| overflowing_add | Performs addition of two unsigned 64 bit integers preserving the overflow.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [overflowing_flag, c_hi, c_lo, ...], where c = (a + b) % 2^64<br /><br />This takes 6 cycles. |
+| wrapping_add | Performs addition of two unsigned 64 bit integers discarding the overflow.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a + b) % 2^64<br /><br />This takes 7 cycles. |
+| wrapping_sub | Performs subtraction of two unsigned 64 bit integers discarding the overflow.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a - b) % 2^64<br /><br />This takes 10 cycles. |
+| overflowing_sub | Performs subtraction of two unsigned 64 bit integers preserving the overflow.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [underflowing_flag, c_hi, c_lo, ...], where c = (a - b) % 2^64<br /><br />This takes 11 cycles. |
+| wrapping_mul | Performs multiplication of two unsigned 64 bit integers discarding the overflow.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a * b) % 2^64<br /><br />This takes 11 cycles. |
 | overflowing_mul | Performs multiplication of two unsigned 64 bit integers preserving the overflow.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_mid_hi, c_mid_lo, c_lo, ...], where c = (a * b) % 2^64<br /><br />This takes 18 cycles. |
-| checked_mul | Performs multiplication of two unsigned 64 bit integers, fails when overflowing.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = (a * b) % 2^64 |
-| unchecked_lt | Performs less-than comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a < b, and 0 otherwise. |
-| checked_lt | Performs less-than comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a < b, and 0 otherwise. |
-| unchecked_gt | Performs greater-than comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a > b, and 0 otherwise.<br /><br />This takes 11 cycles. |
-| checked_gt | Performs greater-than comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a > b, and 0 otherwise. |
-| unchecked_lte | Performs less-than-or-equal comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a <= b, and 0 otherwise. |
-| checked_lte | Performs less-than-or-equal comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a <= b, and 0 otherwise. |
-| unchecked_gte | Performs greater-than-or-equal comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a >= b, and 0 otherwise. |
-| checked_gte | Performs greater-than-or-equal comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a >= b, and 0 otherwise. |
-| unchecked_eq | Performs equality comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == b, and 0 otherwise. |
-| checked_eq | Performs equality comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == b, and 0 otherwise. |
-| unchecked_neq | Performs inequality comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a != b, and 0 otherwise. |
-| checked_neq | Performs inequality comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == b, and 0 otherwise. |
-| unchecked_eqz | Performs comparison to zero of an unsigned 64 bit integer.<br /><br />The input value is assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == 0, and 0 otherwise. |
-| checked_eqz | Performs comparison to zero of an unsigned 64 bit integer.<br /><br />The input value is assumed to be represented using 32 bit limbs, fails if it is not.<br /><br />Stack transition looks as follows:<br /><br />[a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == 0, and 0 otherwise. |
-| unchecked_min | Compares two unsigned 64 bit integers and drop the larger one from the stack.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a < b, and b otherwise. |
-| checked_min | Compares two unsigned 64 bit integers and drop the larger one from the stack.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a < b, and b otherwise. |
-| unchecked_max | Compares two unsigned 64 bit integers and drop the smaller one from the stack.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a > b, and b otherwise. |
-| checked_max | Compares two unsigned 64 bit integers and drop the smaller one from the stack.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a > b, and b otherwise. |
-| unchecked_div | Performs division of two unsigned 64 bit integers discarding the remainder.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a // b |
-| checked_div | Performs division of two unsigned 64 bit integers discarding the remainder.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a // b |
-| unchecked_mod | Performs modulo operation of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a % b |
-| checked_mod | Performs modulo operation of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a % b |
-| unchecked_divmod | Performs divmod operation of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [r_hi, r_lo, q_hi, q_lo ...], where r = a % b, q = a / b |
-| checked_divmod | Performs divmod operation of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [r_hi, r_lo, q_hi, q_lo ...], where r = a % b, q = a / b |
-| checked_and | Performs bitwise AND of two unsigned 64-bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a AND b. |
-| checked_or | Performs bitwise OR of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a OR b. |
-| checked_xor | Performs bitwise XOR of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a XOR b. |
-| unchecked_shl | Performs left shift of one unsigned 64-bit integer using the pow2 operation.<br /><br />The input value to be shifted is assumed to be represented using 32 bit limbs.<br /><br />The shift value should be in the range [0, 64), otherwise it will result in an<br /><br />error.<br /><br />Stack transition looks as follows:<br /><br />[b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a << b mod 2^64.<br /><br />This takes 28 cycles. |
-| unchecked_shr | Performs right shift of one unsigned 64-bit integer using the pow2 operation.<br /><br />The input value to be shifted is assumed to be represented using 32 bit limbs.<br /><br />The shift value should be in the range [0, 64), otherwise it will result in an<br /><br />error.<br /><br />Stack transition looks as follows:<br /><br />[b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a >> b.<br /><br />This takes 44 cycles. |
-| overflowing_shl | Performs left shift of one unsigned 64-bit integer preserving the overflow and<br /><br />using the pow2 operation.<br /><br />The input value to be shifted is assumed to be represented using 32 bit limbs.<br /><br />The shift value should be in the range [0, 64), otherwise it will result in an<br /><br />error.<br /><br />Stack transition looks as follows:<br /><br />[b, a_hi, a_lo, ...] -> [d_hi, d_lo, c_hi, c_lo, ...], where (d,c) = a << b,<br /><br />which d contains the bits shifted out.<br /><br />This takes 35 cycles. |
-| overflowing_shr | Performs right shift of one unsigned 64-bit integer preserving the overflow and<br /><br />using the pow2 operation.<br /><br />The input value to be shifted is assumed to be represented using 32 bit limbs.<br /><br />The shift value should be in the range [0, 64), otherwise it will result in an<br /><br />error.<br /><br />Stack transition looks as follows:<br /><br />[b, a_hi, a_lo, ...] -> [d_hi, d_lo, c_hi, c_lo, ...], where c = a >> b, d = a << (64 - b).<br /><br />This takes 94 cycles. |
-| unchecked_rotl | Performs left rotation of one unsigned 64-bit integer using the pow2 operation.<br /><br />The input value to be shifted is assumed to be represented using 32 bit limbs.<br /><br />The shift value should be in the range [0, 64), otherwise it will result in an<br /><br />error.<br /><br />Stack transition looks as follows:<br /><br />[b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a << b mod 2^64.<br /><br />This takes 35 cycles. |
-| unchecked_rotr | Performs right rotation of one unsigned 64-bit integer using the pow2 operation.<br /><br />The input value to be shifted is assumed to be represented using 32 bit limbs.<br /><br />The shift value should be in the range [0, 64), otherwise it will result in an<br /><br />error.<br /><br />Stack transition looks as follows:<br /><br />[b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a << b mod 2^64.<br /><br />This takes 40 cycles. |
+| lt | Performs less-than comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a < b, and 0 otherwise.<br /><br />This takes 11 cycles. |
+| gt | Performs greater-than comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a > b, and 0 otherwise.<br /><br />This takes 11 cycles. |
+| lte | Performs less-than-or-equal comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a <= b, and 0 otherwise.<br /><br />This takes 12 cycles. |
+| gte | Performs greater-than-or-equal comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a >= b, and 0 otherwise.<br /><br />This takes 12 cycles. |
+| eq | Performs equality comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == b, and 0 otherwise.<br /><br />This takes 6 cycles. |
+| neq | Performs inequality comparison of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c, ...], where c = 1 when a != b, and 0 otherwise.<br /><br />This takes 6 cycles. |
+| eqz | Performs comparison to zero of an unsigned 64 bit integer.<br /><br />The input value is assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[a_hi, a_lo, ...] -> [c, ...], where c = 1 when a == 0, and 0 otherwise.<br /><br />This takes 4 cycles. |
+| min | Compares two unsigned 64 bit integers and drop the larger one from the stack.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a < b, and b otherwise.<br /><br />This takes 23 cycles. |
+| max | Compares two unsigned 64 bit integers and drop the smaller one from the stack.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a when a > b, and b otherwise.<br /><br />This takes 23 cycles. |
+| div | Performs division of two unsigned 64 bit integers discarding the remainder.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a // b<br /><br />This takes 54 cycles. |
+| mod | Performs modulo operation of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a % b<br /><br />This takes 54 cycles. |
+| divmod | Performs divmod operation of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [r_hi, r_lo, q_hi, q_lo ...], where r = a % b, q = a / b<br /><br />This takes 54 cycles. |
+| and | Performs bitwise AND of two unsigned 64-bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a AND b.<br /><br />This takes 6 cycles. |
+| or | Performs bitwise OR of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a OR b.<br /><br />This takes 16 cycles. |
+| xor | Performs bitwise XOR of two unsigned 64 bit integers.<br /><br />The input values are assumed to be represented using 32 bit limbs, fails if they are not.<br /><br />Stack transition looks as follows:<br /><br />[b_hi, b_lo, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a XOR b.<br /><br />This takes 6 cycles. |
+| shl | Performs left shift of one unsigned 64-bit integer using the pow2 operation.<br /><br />The input value to be shifted is assumed to be represented using 32 bit limbs.<br /><br />The shift value should be in the range [0, 64), otherwise it will result in an<br /><br />error.<br /><br />Stack transition looks as follows:<br /><br />[b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a << b mod 2^64.<br /><br />This takes 28 cycles. |
+| shr | Performs right shift of one unsigned 64-bit integer using the pow2 operation.<br /><br />The input value to be shifted is assumed to be represented using 32 bit limbs.<br /><br />The shift value should be in the range [0, 64), otherwise it will result in an<br /><br />error.<br /><br />Stack transition looks as follows:<br /><br />[b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a >> b.<br /><br />This takes 44 cycles. |
+| rotl | Performs left rotation of one unsigned 64-bit integer using the pow2 operation.<br /><br />The input value to be shifted is assumed to be represented using 32 bit limbs.<br /><br />The shift value should be in the range [0, 64), otherwise it will result in an<br /><br />error.<br /><br />Stack transition looks as follows:<br /><br />[b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a << b mod 2^64.<br /><br />This takes 35 cycles. |
+| rotr | Performs right rotation of one unsigned 64-bit integer using the pow2 operation.<br /><br />The input value to be shifted is assumed to be represented using 32 bit limbs.<br /><br />The shift value should be in the range [0, 64), otherwise it will result in an<br /><br />error.<br /><br />Stack transition looks as follows:<br /><br />[b, a_hi, a_lo, ...] -> [c_hi, c_lo, ...], where c = a << b mod 2^64.<br /><br />This takes 40 cycles. |
+| clz | Counts the number of leading zeros of one unsigned 64-bit integer.<br /><br />The input value is assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[n_hi, n_lo, ...] -> [clz, ...], where clz is a number of leading zeros of value n.<br /><br />This takes 43 cycles. |
+| ctz | Counts the number of trailing zeros of one unsigned 64-bit integer.<br /><br />The input value is assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[n_hi, n_lo, ...] -> [ctz, ...], where ctz is a number of trailing zeros of value n.<br /><br />This takes 41 cycles. |
+| clo | Counts the number of leading ones of one unsigned 64-bit integer.<br /><br />The input value is assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[n_hi, n_lo, ...] -> [clo, ...], where clo is a number of leading ones of value n.<br /><br />This takes 42 cycles. |
+| cto | Counts the number of trailing ones of one unsigned 64-bit integer.<br /><br />The input value is assumed to be represented using 32 bit limbs, but this is not checked.<br /><br />Stack transition looks as follows:<br /><br />[n_hi, n_lo, ...] -> [cto, ...], where cto is a number of trailing ones of value n.<br /><br />This takes 40 cycles. |
diff --git a/stdlib/docs/utils.md b/stdlib/docs/utils.md
new file mode 100644
index 0000000000..cde94250ef
--- /dev/null
+++ b/stdlib/docs/utils.md
@@ -0,0 +1,5 @@
+
+## std::utils
+| Procedure | Description |
+| ----------- | ------------- |
+| is_empty_word | Returns a boolean indicating whether the input word is an empty word.<br /><br />Inputs: [INPUT_WORD]<br /><br />Outputs: [is_empty_word, INPUT_WORD]<br /><br />- INPUT_WORD is the word whose emptiness is to be determined.<br /><br />- is_empty_word is a boolean indicating whether INPUT_WORD is empty.<br /><br />Cycles: 11 |
diff --git a/stdlib/md_renderer.rs b/stdlib/md_renderer.rs
index 43273033e1..4db270f296 100644
--- a/stdlib/md_renderer.rs
+++ b/stdlib/md_renderer.rs
@@ -70,8 +70,8 @@ impl Renderer for MarkdownRenderer {
 
             let f = fs::OpenOptions::new()
                 .write(true)
-                .append(true)
                 .create(true)
+                .truncate(true)
                 .open(file_path)
                 .expect("unable to open stdlib markdown file");
 
diff --git a/stdlib/src/lib.rs b/stdlib/src/lib.rs
index f84a4e45c1..d4fea9c6ee 100644
--- a/stdlib/src/lib.rs
+++ b/stdlib/src/lib.rs
@@ -1,6 +1,9 @@
 #![no_std]
 
-use assembly::{utils::Deserializable, Library, LibraryNamespace, MaslLibrary, Version};
+use assembly::{
+    ast::ModuleAst, utils::Deserializable, Library, LibraryNamespace, LibraryPath, MaslLibrary,
+    Version,
+};
 
 // STANDARD LIBRARY
 // ================================================================================================
@@ -40,6 +43,10 @@ impl Library for StdLibrary {
     fn dependencies(&self) -> &[assembly::LibraryNamespace] {
         self.0.dependencies()
     }
+
+    fn get_module_ast(&self, path: &LibraryPath) -> Option<&ModuleAst> {
+        self.0.get_module_ast(path)
+    }
 }
 
 #[test]
diff --git a/stdlib/tests/collections/mmr.rs b/stdlib/tests/collections/mmr.rs
index c56cc5b129..cb7f8f34dc 100644
--- a/stdlib/tests/collections/mmr.rs
+++ b/stdlib/tests/collections/mmr.rs
@@ -1,4 +1,5 @@
 use test_utils::{
+    collections::*,
     crypto::{
         init_merkle_leaf, init_merkle_leaves, MerkleError, MerkleStore, MerkleTree, Mmr, NodeIndex,
         RpoDigest,
@@ -376,12 +377,12 @@ fn test_mmr_unpack() {
     let store = MerkleStore::new();
 
     let mut map_data: Vec<Felt> = Vec::with_capacity(hash_data.len() + 1);
-    map_data.extend_from_slice(&[number_of_leaves.into(), ZERO, ZERO, ZERO]);
+    map_data.extend_from_slice(&[number_of_leaves.try_into().unwrap(), ZERO, ZERO, ZERO]);
     map_data.extend_from_slice(&hash_data.as_slice().concat());
 
-    let advice_map: &[([u8; 32], Vec<Felt>)] = &[
+    let advice_map: &[(RpoDigest, Vec<Felt>)] = &[
         // Under the MMR key is the number_of_leaves, followed by the MMR peaks, and any padding
-        (hash.as_bytes(), map_data),
+        (hash, map_data),
     ];
 
     let source = "
@@ -444,9 +445,9 @@ fn test_mmr_unpack_invalid_hash() {
     map_data.extend_from_slice(&[Felt::new(0b10101), ZERO, ZERO, ZERO]); // 3 peaks, 21 leaves
     map_data.extend_from_slice(&hash_data.as_slice().concat());
 
-    let advice_map: &[([u8; 32], Vec<Felt>)] = &[
+    let advice_map: &[(RpoDigest, Vec<Felt>)] = &[
         // Under the MMR key is the number_of_leaves, followed by the MMR peaks, and any padding
-        (hash.as_bytes(), map_data),
+        (hash, map_data),
     ];
 
     let source = "
@@ -500,12 +501,12 @@ fn test_mmr_unpack_large_mmr() {
     let store = MerkleStore::new();
 
     let mut map_data: Vec<Felt> = Vec::with_capacity(hash_data.len() + 1);
-    map_data.extend_from_slice(&[number_of_leaves.into(), ZERO, ZERO, ZERO]);
+    map_data.extend_from_slice(&[number_of_leaves.try_into().unwrap(), ZERO, ZERO, ZERO]);
     map_data.extend_from_slice(&hash_data.as_slice().concat());
 
-    let advice_map: &[([u8; 32], Vec<Felt>)] = &[
+    let advice_map: &[(RpoDigest, Vec<Felt>)] = &[
         // Under the MMR key is the number_of_leaves, followed by the MMR peaks, and any padding
-        (hash.as_bytes(), map_data),
+        (hash, map_data),
     ];
 
     let source = "
@@ -546,11 +547,11 @@ fn test_mmr_pack_roundtrip() {
     mmr.add(init_merkle_leaf(2).into());
     mmr.add(init_merkle_leaf(3).into());
 
-    let accumulator = mmr.accumulator();
+    let accumulator = mmr.peaks(mmr.forest()).unwrap();
     let hash = accumulator.hash_peaks();
 
     // Set up the VM stack with the MMR hash, and its target address
-    let mut stack = stack_to_ints(&hash);
+    let mut stack = stack_to_ints(hash.as_elements());
     let mmr_ptr = 1000;
     stack.insert(0, mmr_ptr); // first value is used by unpack, to load data to memory
     stack.insert(0, mmr_ptr); // second is used by pack, to load data from memory
@@ -560,20 +561,15 @@ fn test_mmr_pack_roundtrip() {
     let advice_stack = &[];
     let store = MerkleStore::new();
 
-    let mut hash_data = accumulator.peaks.clone();
+    let mut hash_data = accumulator.peaks().to_vec();
     hash_data.resize(16, RpoDigest::default());
     let mut map_data: Vec<Felt> = Vec::with_capacity(hash_data.len() + 1);
-    map_data.extend_from_slice(&[
-        Felt::new(accumulator.num_leaves.try_into().unwrap()),
-        ZERO,
-        ZERO,
-        ZERO,
-    ]);
+    map_data.extend_from_slice(&[Felt::new(accumulator.num_leaves() as u64), ZERO, ZERO, ZERO]);
     map_data.extend_from_slice(digests_to_elements(&hash_data).as_ref());
 
-    let advice_map: &[([u8; 32], Vec<Felt>)] = &[
+    let advice_map: &[(RpoDigest, Vec<Felt>)] = &[
         // Under the MMR key is the number_of_leaves, followed by the MMR peaks, and any padding
-        (RpoDigest::new(hash).as_bytes(), map_data),
+        (hash, map_data),
     ];
 
     let source = "
@@ -589,9 +585,9 @@ fn test_mmr_pack_roundtrip() {
     let mut expect_memory: Vec<u64> = Vec::new();
 
     // first the number of leaves
-    expect_memory.extend_from_slice(&[accumulator.num_leaves as u64, 0, 0, 0]);
+    expect_memory.extend_from_slice(&[accumulator.num_leaves() as u64, 0, 0, 0]);
     // followed by the peaks
-    expect_memory.extend(digests_to_ints(&accumulator.peaks));
+    expect_memory.extend(digests_to_ints(accumulator.peaks()));
     // followed by padding data
     let size = 4 + 16 * 4;
     expect_memory.resize(size, 0);
@@ -623,7 +619,7 @@ fn test_mmr_pack() {
     hash_data.resize(16 * 4, ZERO); // padding data
 
     let hash = hash_elements(&hash_data);
-    let hash_u8 = hash.as_bytes();
+    let hash_u8 = hash;
 
     let mut expect_data: Vec<Felt> = Vec::new();
     expect_data.extend_from_slice(&[Felt::new(3), ZERO, ZERO, ZERO]); // num_leaves
@@ -684,10 +680,10 @@ fn test_mmr_two() {
     mmr.add([ONE, Felt::new(2), Felt::new(3), Felt::new(4)].into());
     mmr.add([Felt::new(5), Felt::new(6), Felt::new(7), Felt::new(8)].into());
 
-    let accumulator = mmr.accumulator();
-    let peak = accumulator.peaks[0];
+    let accumulator = mmr.peaks(mmr.forest()).unwrap();
+    let peak = accumulator.peaks()[0];
 
-    let num_leaves = accumulator.num_leaves.try_into().unwrap();
+    let num_leaves = accumulator.num_leaves() as u64;
     let mut expected_memory = vec![num_leaves, 0, 0, 0];
     expected_memory.extend(peak.iter().map(|v| v.as_int()));
 
@@ -724,11 +720,11 @@ fn test_mmr_large() {
     mmr.add([ZERO, ZERO, ZERO, Felt::new(6)].into());
     mmr.add([ZERO, ZERO, ZERO, Felt::new(7)].into());
 
-    let accumulator = mmr.accumulator();
+    let accumulator = mmr.peaks(mmr.forest()).unwrap();
 
-    let num_leaves = accumulator.num_leaves.try_into().unwrap();
+    let num_leaves = accumulator.num_leaves() as u64;
     let mut expected_memory = vec![num_leaves, 0, 0, 0];
-    expected_memory.extend(digests_to_ints(&accumulator.peaks));
+    expected_memory.extend(digests_to_ints(accumulator.peaks()));
 
     let expect_stack: Vec<u64> =
         accumulator.hash_peaks().iter().rev().map(|v| v.as_int()).collect();
@@ -749,11 +745,11 @@ fn test_mmr_large_add_roundtrip() {
         [ZERO, ZERO, ZERO, Felt::new(7)].into(),
     ]);
 
-    let old_accumulator = mmr.accumulator();
+    let old_accumulator = mmr.peaks(mmr.forest()).unwrap();
     let hash = old_accumulator.hash_peaks();
 
     // Set up the VM stack with the MMR hash, and its target address
-    let mut stack = stack_to_ints(&hash);
+    let mut stack = stack_to_ints(hash.as_elements());
     stack.insert(0, mmr_ptr as u64);
 
     // both the advice stack and merkle store start empty (data is available in
@@ -761,17 +757,17 @@ fn test_mmr_large_add_roundtrip() {
     let advice_stack = &[];
     let store = MerkleStore::new();
 
-    let mut hash_data = old_accumulator.peaks.clone();
+    let mut hash_data = old_accumulator.peaks().to_vec();
     hash_data.resize(16, RpoDigest::default());
 
     let mut map_data: Vec<Felt> = Vec::with_capacity(hash_data.len() + 1);
-    let num_leaves: u64 = old_accumulator.num_leaves as u64;
-    map_data.extend_from_slice(&[Felt::from(num_leaves), ZERO, ZERO, ZERO]);
+    let num_leaves = old_accumulator.num_leaves() as u64;
+    map_data.extend_from_slice(&[Felt::try_from(num_leaves).unwrap(), ZERO, ZERO, ZERO]);
     map_data.extend_from_slice(&digests_to_elements(&hash_data));
 
-    let advice_map: &[([u8; 32], Vec<Felt>)] = &[
+    let advice_map: &[(RpoDigest, Vec<Felt>)] = &[
         // Under the MMR key is the number_of_leaves, followed by the MMR peaks, and any padding
-        (RpoDigest::new(hash).as_bytes(), map_data),
+        (hash, map_data),
     ];
 
     let source = format!(
@@ -788,10 +784,10 @@ fn test_mmr_large_add_roundtrip() {
 
     mmr.add([ZERO, ZERO, ZERO, Felt::new(8)].into());
 
-    let new_accumulator = mmr.accumulator();
-    let num_leaves = new_accumulator.num_leaves.try_into().unwrap();
+    let new_accumulator = mmr.peaks(mmr.forest()).unwrap();
+    let num_leaves = new_accumulator.num_leaves() as u64;
     let mut expected_memory = vec![num_leaves, 0, 0, 0];
-    let mut new_peaks = new_accumulator.peaks.clone();
+    let mut new_peaks = new_accumulator.peaks().to_vec();
     // make sure the old peaks are zeroed
     new_peaks.resize(16, RpoDigest::default());
     expected_memory.extend(digests_to_ints(&new_peaks));
diff --git a/stdlib/tests/collections/mod.rs b/stdlib/tests/collections/mod.rs
index 33bc04890c..f49ddd50dc 100644
--- a/stdlib/tests/collections/mod.rs
+++ b/stdlib/tests/collections/mod.rs
@@ -1,8 +1,7 @@
 use test_utils::{
-    crypto::{MerkleStore, SimpleSmt},
-    Felt, StarkField, TestError, Word, EMPTY_WORD, ONE, ZERO,
+    crypto::{MerkleStore, RpoDigest, Smt},
+    Felt, Word, EMPTY_WORD,
 };
 
 mod mmr;
 mod smt;
-mod smt64;
diff --git a/stdlib/tests/collections/smt.rs b/stdlib/tests/collections/smt.rs
index 366d6bbbd1..85c1d56a3d 100644
--- a/stdlib/tests/collections/smt.rs
+++ b/stdlib/tests/collections/smt.rs
@@ -1,664 +1,308 @@
-use crate::build_test;
-use test_utils::{
-    crypto::{MerkleStore, Rpo256, RpoDigest, TieredSmt},
-    stack_to_ints, stack_top_to_ints, Felt, StarkField, Word, ONE, ZERO,
-};
+use super::*;
 
-type AdvMapEntry = ([u8; 32], Vec<Felt>);
-
-// CONSTANTS
+// TEST DATA
 // ================================================================================================
 
-const EMPTY_VALUE: Word = TieredSmt::EMPTY_VALUE;
+/// Note: We never insert at the same key twice. This is so that the `smt::get` test can loop over
+/// leaves, get the associated value, and compare. We test inserting at the same key twice in tests
+/// that use different data.
+const LEAVES: [(RpoDigest, Word); 2] = [
+    (
+        RpoDigest::new([Felt::new(101), Felt::new(102), Felt::new(103), Felt::new(104)]),
+        [Felt::new(1_u64), Felt::new(2_u64), Felt::new(3_u64), Felt::new(4_u64)],
+    ),
+    // Most significant Felt differs from previous
+    (
+        RpoDigest::new([Felt::new(105), Felt::new(106), Felt::new(107), Felt::new(108)]),
+        [Felt::new(5_u64), Felt::new(6_u64), Felt::new(7_u64), Felt::new(8_u64)],
+    ),
+];
+
+/// Tests `get` on every key present in the SMT, as well as an empty leaf
+#[test]
+fn test_smt_get() {
+    fn expect_value_from_get(key: RpoDigest, value: Word, smt: &Smt) {
+        let source = "
+            use.std::collections::smt
+            begin
+            exec.smt::get
+            end
+        ";
+        let mut initial_stack = Vec::new();
+        append_word_to_vec(&mut initial_stack, smt.root().into());
+        append_word_to_vec(&mut initial_stack, key.into());
+        let expected_output = build_expected_stack(value, smt.root().into());
+
+        let (store, advice_map) = build_advice_inputs(smt);
+        build_test!(source, &initial_stack, &[], store, advice_map).expect_stack(&expected_output);
+    }
 
-// RETRIEVAL TESTS
-// ================================================================================================
+    let smt = Smt::with_entries(LEAVES).unwrap();
 
-#[test]
-fn tsmt_get_16() {
-    let mut smt = TieredSmt::default();
-
-    // create a key
-    let raw_a = 0b_01010101_01101100_00011111_11111111_10010110_10010011_11100000_00000000_u64;
-    let key_a = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_a)]);
-
-    // make sure we get an empty value for this key
-    assert_get(&smt, key_a, EMPTY_VALUE);
-
-    // insert a value under this key and make sure we get it back when queried
-    let val_a = [ONE, ONE, ONE, ONE];
-    smt.insert(key_a, val_a);
-    assert_get(&smt, key_a, val_a);
-
-    // make sure that another key still returns empty value
-    let raw_b = 0b_01111101_01101100_00011111_11111111_10010110_10010011_11100000_00000000_u64;
-    let key_b = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_b)]);
-    assert_get(&smt, key_b, EMPTY_VALUE);
-
-    // make sure that another key with the same 16-bit prefix returns an empty value
-    let raw_c = 0b_01010101_01101100_11111111_11111111_10010110_10010011_11100000_00000000_u64;
-    let key_c = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_c)]);
-    assert_get(&smt, key_c, EMPTY_VALUE);
-}
+    // Get all leaves present in tree
+    for (key, value) in LEAVES {
+        expect_value_from_get(key, value, &smt);
+    }
 
-#[test]
-fn tsmt_get_32() {
-    let mut smt = TieredSmt::default();
-
-    // populate the tree with two key-value pairs sharing the same 16-bit prefix for the keys
-    let raw_a = 0b_01010101_01010101_00011111_11111111_10010110_10010011_11100000_00000000_u64;
-    let key_a = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_a)]);
-    let val_a = [ONE, ONE, ONE, ONE];
-    smt.insert(key_a, val_a);
-
-    let raw_b = 0b_01010101_01010101_11100000_11111111_10010110_10010011_11100000_00000000_u64;
-    let key_b = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_b)]);
-    let val_b = [ZERO, ONE, ONE, ONE];
-    smt.insert(key_b, val_b);
-
-    // make sure the values for these keys are retrieved correctly
-    assert_get(&smt, key_a, val_a);
-    assert_get(&smt, key_b, val_b);
-
-    // make sure another key with the same 16-bit prefix returns an empty value
-    let raw_c = 0b_01010101_01010101_11100111_11111111_10010110_10010011_11100000_00000000_u64;
-    let key_c = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_c)]);
-    assert_get(&smt, key_c, EMPTY_VALUE);
-
-    // make sure keys with the same 32-bit prefixes return empty value
-    let raw_d = 0b_01010101_01010101_00011111_11111111_11111110_10010011_11100000_00000000_u64;
-    let key_d = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_d)]);
-    assert_get(&smt, key_d, EMPTY_VALUE);
-
-    // make sure keys with the same 32-bit prefixes return empty value
-    let raw_e = 0b_01010101_01010101_11100000_11111111_10011111_10010011_11100000_00000000_u64;
-    let key_e = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_e)]);
-    assert_get(&smt, key_e, EMPTY_VALUE);
+    // Get an empty leaf
+    expect_value_from_get(
+        RpoDigest::new([42_u32.into(), 42_u32.into(), 42_u32.into(), 42_u32.into()]),
+        EMPTY_WORD,
+        &smt,
+    );
 }
 
+/// Tests inserting and removing key-value pairs to an SMT. We do the insert/removal twice to ensure
+/// that the removal properly updates the advice map/stack.
 #[test]
-fn tsmt_get_48() {
-    let mut smt = TieredSmt::default();
-
-    // populate the tree with two key-value pairs sharing the same 32-bit prefix for the keys
-    let raw_a = 0b_01010101_01010101_00011111_11111111_10010110_10010011_11100000_00000000_u64;
-    let key_a = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_a)]);
-    let val_a = [ONE, ONE, ONE, ONE];
-    smt.insert(key_a, val_a);
-
-    let raw_b = 0b_01010101_01010101_00011111_11111111_11111111_10010011_11100000_00000000_u64;
-    let key_b = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_b)]);
-    let val_b = [ZERO, ONE, ONE, ONE];
-    smt.insert(key_b, val_b);
-
-    // make sure the values for these keys are retrieved correctly
-    assert_get(&smt, key_a, val_a);
-    assert_get(&smt, key_b, val_b);
-
-    // make sure another key with the same 32-bit prefix returns an empty value
-    let raw_c = 0b_01010101_01010101_00011111_11111111_00000000_10010011_11100000_00000000_u64;
-    let key_c = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_c)]);
-    assert_get(&smt, key_c, EMPTY_VALUE);
-
-    // make sure keys with the same 48-bit prefixes return empty value
-    let raw_d = 0b_01010101_01010101_00011111_11111111_10010110_10010011_00000111_00000000_u64;
-    let key_d = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_d)]);
-    assert_get(&smt, key_d, EMPTY_VALUE);
-
-    // make sure keys with the same 48-bit prefixes return empty value
-    let raw_e = 0b_01010101_01010101_00011111_11111111_11111111_10010011_000001011_00000000_u64;
-    let key_e = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_e)]);
-    assert_get(&smt, key_e, EMPTY_VALUE);
-}
-
-/// Asserts key/value opens to root for the provided Tiered Sparse Merkle tree.
-fn assert_get(smt: &TieredSmt, key: RpoDigest, value: Word) {
-    let root = smt.root();
-    let source = r#"
-        use.std::collections::smt
+fn test_smt_set() {
+    fn assert_insert_and_remove(smt: &mut Smt) {
+        let empty_tree_root = smt.root();
 
-        begin
-            exec.smt::get
-        end
-    "#;
-    let initial_stack = [
-        root[0].as_int(),
-        root[1].as_int(),
-        root[2].as_int(),
-        root[3].as_int(),
-        key[0].as_int(),
-        key[1].as_int(),
-        key[2].as_int(),
-        key[3].as_int(),
-    ];
-    let expected_output = [
-        value[3].as_int(),
-        value[2].as_int(),
-        value[1].as_int(),
-        value[0].as_int(),
-        root[3].as_int(),
-        root[2].as_int(),
-        root[1].as_int(),
-        root[0].as_int(),
-    ];
-
-    let (store, advice_map) = build_advice_inputs(smt);
-    let advice_stack = [];
-    build_test!(source, &initial_stack, &advice_stack, store, advice_map.into_iter())
-        .expect_stack(&expected_output);
-}
+        let source = "
+            use.std::collections::smt
+            begin
+            exec.smt::set
+            end
+        ";
+
+        // insert values one-by-one into the tree
+        let mut old_roots = Vec::new();
+        for (key, value) in LEAVES {
+            old_roots.push(smt.root());
+            let (init_stack, final_stack, store, advice_map) =
+                prepare_insert_or_set(key, value, smt);
+            build_test!(source, &init_stack, &[], store, advice_map).expect_stack(&final_stack);
+        }
+
+        // setting to [ZERO; 4] should return the tree to the prior state
+        for (key, old_value) in LEAVES.iter().rev() {
+            let value = EMPTY_WORD;
+            let (init_stack, final_stack, store, advice_map) =
+                prepare_insert_or_set(*key, value, smt);
+
+            let expected_final_stack =
+                build_expected_stack(*old_value, old_roots.pop().unwrap().into());
+            assert_eq!(expected_final_stack, final_stack);
+            build_test!(source, &init_stack, &[], store, advice_map).expect_stack(&final_stack);
+        }
+
+        assert_eq!(smt.root(), empty_tree_root);
+    }
 
-fn build_advice_inputs(smt: &TieredSmt) -> (MerkleStore, Vec<([u8; 32], Vec<Felt>)>) {
-    let store = MerkleStore::from(smt);
-    let advice_map = smt
-        .upper_leaves()
-        .map(|(node, key, value)| {
-            let mut elements = key.as_elements().to_vec();
-            elements.extend(&value);
-            (node.as_bytes(), elements)
-        })
-        .collect::<Vec<_>>();
+    let mut smt = Smt::new();
 
-    (store, advice_map)
+    assert_insert_and_remove(&mut smt);
+    assert_insert_and_remove(&mut smt);
 }
 
-// INSERTION TESTS
-// ================================================================================================
-
+/// Tests updating an existing key with a different value
 #[test]
-fn tsmt_insert_16() {
-    let mut smt = TieredSmt::default();
-
-    let raw_a = 0b00000000_00000000_11111111_11111111_11111111_11111111_11111111_11111111_u64;
-    let key_a = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_a)]);
-    let val_a1 = [ONE, ZERO, ZERO, ZERO];
-    let val_a2 = [ONE, ONE, ZERO, ZERO];
-
-    // insert a value under key_a into an empty tree; this inserts one entry into the advice map
-    let init_smt = smt.clone();
-    smt.insert(key_a.into(), val_a1);
-    let new_map_entries = [build_node_entry(key_a, val_a1, 16)];
-    assert_insert(&init_smt, key_a, EMPTY_VALUE, val_a1, smt.root().into(), &new_map_entries);
-
-    // update a value under key_a; this inserts one entry into the advice map
-    let init_smt = smt.clone();
-    smt.insert(key_a.into(), val_a2);
-    let new_map_entries = [build_node_entry(key_a, val_a2, 16)];
-    assert_insert(&init_smt, key_a, val_a1, val_a2, smt.root().into(), &new_map_entries);
+fn test_smt_set_same_key() {
+    let mut smt = Smt::with_entries(LEAVES).unwrap();
+
+    let source = "
+    use.std::collections::smt
+    begin
+      exec.smt::set
+    end
+    ";
+
+    let key = LEAVES[0].0;
+    let value = [42323_u32.into(); 4];
+    let (init_stack, final_stack, store, advice_map) = prepare_insert_or_set(key, value, &mut smt);
+    build_test!(source, &init_stack, &[], store, advice_map).expect_stack(&final_stack);
 }
 
+/// Tests inserting an empty value to an empty tree
 #[test]
-fn tsmt_insert_32() {
-    let mut smt = TieredSmt::default();
-
-    // insert a value under key_a into an empty tree
-    let raw_a = 0b00000000_00000000_11111111_11111111_11111111_11111111_11111111_11111111_u64;
-    let key_a = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_a)]);
-    let val_a = [ONE, ZERO, ZERO, ZERO];
-    smt.insert(key_a.into(), val_a);
-
-    // insert a value under key_b which has the same 16-bit prefix as A
-    let raw_b = 0b00000000_00000000_01111111_11111111_11111111_11111111_11111111_11111111_u64;
-    let key_b = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_b)]);
-    let val_b = [ONE, ONE, ZERO, ZERO];
-
-    // this tests a complex insertion when a leaf node moves from depth 16 to depth 32; this
-    // moves the original node to depth 32, and thus two new entries are added to the advice map
-    let init_smt = smt.clone();
-    smt.insert(key_b.into(), val_b);
-    let new_map_entries = [build_node_entry(key_a, val_a, 32), build_node_entry(key_b, val_b, 32)];
-    assert_insert(&init_smt, key_b, EMPTY_VALUE, val_b, smt.root().into(), &new_map_entries);
-
-    // update a value under key_a; this adds one new entry to the advice map
-    let init_smt = smt.clone();
-    let val_a2 = [ONE, ZERO, ZERO, ONE];
-    smt.insert(key_a.into(), val_a2);
-    let new_map_entries = [build_node_entry(key_a, val_a2, 32)];
-    assert_insert(&init_smt, key_a, val_a, val_a2, smt.root().into(), &new_map_entries);
-
-    // insert a value under key_c which has the same 16-bit prefix as A and B; this inserts a new
-    // node at depth 32, and thus adds one entry to the advice map
-    let raw_c = 0b00000000_00000000_00111111_11111111_11111111_11111111_11111111_11111111_u64;
-    let key_c = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_c)]);
-    let val_c = [ONE, ONE, ONE, ZERO];
-
-    let init_smt = smt.clone();
-    smt.insert(key_c.into(), val_c);
-    let new_map_entries = [build_node_entry(key_c, val_c, 32)];
-    assert_insert(&init_smt, key_c, EMPTY_VALUE, val_c, smt.root().into(), &new_map_entries);
+fn test_smt_set_empty_value_to_empty_leaf() {
+    let mut smt = Smt::new();
+    let empty_tree_root = smt.root();
+
+    let source = "
+    use.std::collections::smt
+    begin
+      exec.smt::set
+    end
+    ";
+
+    let key = RpoDigest::new([41_u32.into(), 42_u32.into(), 43_u32.into(), 44_u32.into()]);
+    let value = EMPTY_WORD;
+    let (init_stack, final_stack, store, advice_map) = prepare_insert_or_set(key, value, &mut smt);
+    build_test!(source, &init_stack, &[], store, advice_map).expect_stack(&final_stack);
+
+    assert_eq!(smt.root(), empty_tree_root);
 }
 
+/// Tests that the advice map is properly updated after a `set` on an empty key
 #[test]
-fn tsmt_insert_48() {
-    let mut smt = TieredSmt::default();
-
-    // insert a value under key_a into an empty tree
-    let raw_a = 0b00000000_00000000_11111111_11111111_11111111_11111111_11111111_11111111_u64;
-    let key_a = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_a)]);
-    let val_a = [ONE, ZERO, ZERO, ZERO];
-    smt.insert(key_a.into(), val_a);
-
-    // insert a value under key_b which has the same 32-bit prefix as A
-    let raw_b = 0b00000000_00000000_11111111_11111111_01111111_11111111_11111111_11111111_u64;
-    let key_b = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_b)]);
-    let val_b = [ONE, ONE, ZERO, ZERO];
-
-    // this tests a complex insertion when a leaf moves from depth 16 to depth 48; this moves
-    // node at depth 16 to depth 48 and inserts a new node at depth 48
-    let init_smt = smt.clone();
-    smt.insert(key_b.into(), val_b);
-    let new_map_entries = [build_node_entry(key_a, val_a, 48), build_node_entry(key_b, val_b, 48)];
-    assert_insert(&init_smt, key_b, EMPTY_VALUE, val_b, smt.root().into(), &new_map_entries);
-
-    // update a value under key_a; this inserts one entry into the advice map
-    let init_smt = smt.clone();
-    let val_a2 = [ONE, ZERO, ZERO, ONE];
-    smt.insert(key_a.into(), val_a2);
-    let new_map_entries = [build_node_entry(key_a, val_a2, 48)];
-    assert_insert(&init_smt, key_a, val_a, val_a2, smt.root().into(), &new_map_entries);
-
-    // insert a value under key_c which has the same 32-bit prefix as A and B; this inserts
-    // one entry into the advice map
-    let raw_c = 0b00000000_00000000_11111111_11111111_00111111_11111111_11111111_11111111_u64;
-    let key_c = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_c)]);
-    let val_c = [ONE, ONE, ONE, ZERO];
-
-    let init_smt = smt.clone();
-    smt.insert(key_c.into(), val_c);
-    let new_map_entries = [build_node_entry(key_c, val_c, 48)];
-    assert_insert(&init_smt, key_c, EMPTY_VALUE, val_c, smt.root().into(), &new_map_entries);
+fn test_set_advice_map_empty_key() {
+    let mut smt = Smt::new();
+
+    let source = "
+    use.std::collections::smt
+    # Stack: [V, K, R]
+    begin
+        # copy V and K, and save lower on stack
+        dupw.1 movdnw.3 dupw movdnw.3
+        # => [V, K, R, V, K]
+
+        # Sets the advice map
+        exec.smt::set
+        # => [V_old, R_new, V, K]
+
+        # Prepare for peek
+        dropw movupw.2
+        # => [K, R_new, V]
+
+        # Fetch what was stored on advice map and clean stack
+        adv.push_smtpeek dropw dropw
+        # => [V]
+        
+        # Push advice map values on stack
+        adv_push.4
+        # => [V_in_map, V]
+
+        # Check for equality of V's
+        assert_eqw
+        # => [K]
+    end
+    ";
+
+    let key = RpoDigest::new([41_u32.into(), 42_u32.into(), 43_u32.into(), 44_u32.into()]);
+    let value: [Felt; 4] = [42323_u32.into(); 4];
+    let (init_stack, _, store, advice_map) = prepare_insert_or_set(key, value, &mut smt);
+
+    // assert is checked in MASM
+    build_test!(source, &init_stack, &[], store, advice_map).execute().unwrap();
 }
 
+/// Tests that the advice map is properly updated after a `set` on a key that has existing value
 #[test]
-fn tsmt_insert_48_from_32() {
-    let mut smt = TieredSmt::default();
-
-    // insert a value under key_a into an empty tree
-    let raw_a = 0b00000000_00000000_11111111_11111111_11111111_11111111_11111111_11111111_u64;
-    let key_a = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_a)]);
-    let val_a = [ONE, ZERO, ZERO, ZERO];
-    smt.insert(key_a.into(), val_a);
-
-    // insert a value under key_b which has the same 16-bit prefix as A
-    let raw_b = 0b00000000_00000000_01111111_11111111_01111111_11111111_11111111_11111111_u64;
-    let key_b = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_b)]);
-    let val_b = [ONE, ONE, ZERO, ZERO];
-    smt.insert(key_b.into(), val_b);
-
-    // insert a value under key_c which has the same 32-bit prefix as A
-    let raw_c = 0b00000000_00000000_11111111_11111111_00111111_11111111_11111111_11111111_u64;
-    let key_c = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_c)]);
-    let val_c = [ONE, ONE, ONE, ZERO];
-
-    // this tests a complex insertion when a leaf moves from depth 32 to depth 48; two new
-    // entries are added to the advice map
-    let init_smt = smt.clone();
-    smt.insert(key_c.into(), val_c);
-    let new_map_entries = [build_node_entry(key_a, val_a, 48), build_node_entry(key_c, val_c, 48)];
-    assert_insert(&init_smt, key_c, EMPTY_VALUE, val_c, smt.root().into(), &new_map_entries);
+fn test_set_advice_map_single_key() {
+    let mut smt = Smt::with_entries(LEAVES).unwrap();
+
+    let source = "
+    use.std::collections::smt
+    # Stack: [V, K, R]
+    begin
+        # copy V and K, and save lower on stack
+        dupw.1 movdnw.3 dupw movdnw.3
+        # => [V, K, R, V, K]
+
+        # Sets the advice map
+        exec.smt::set
+        # => [V_old, R_new, V, K]
+
+        # Prepare for peek
+        dropw movupw.2
+        # => [K, R_new, V]
+
+        # Fetch what was stored on advice map and clean stack
+        adv.push_smtpeek dropw dropw
+        # => [V]
+        
+        # Push advice map values on stack
+        adv_push.4
+        # => [V_in_map, V]
+
+        # Check for equality of V's
+        assert_eqw
+        # => [K]
+    end
+    ";
+
+    let key = LEAVES[0].0;
+    let value: [Felt; 4] = [42323_u32.into(); 4];
+    let (init_stack, _, store, advice_map) = prepare_insert_or_set(key, value, &mut smt);
+
+    // assert is checked in MASM
+    build_test!(source, &init_stack, &[], store, advice_map).execute().unwrap();
 }
 
-fn assert_insert(
-    init_smt: &TieredSmt,
-    key: RpoDigest,
-    old_value: Word,
-    new_value: Word,
-    new_root: RpoDigest,
-    new_map_entries: &[AdvMapEntry],
-) {
-    let old_root = init_smt.root();
-    let source = r#"
-        use.std::collections::smt
-
-        begin
-            exec.smt::insert
-        end
-    "#;
-    let initial_stack = [
-        old_root[0].as_int(),
-        old_root[1].as_int(),
-        old_root[2].as_int(),
-        old_root[3].as_int(),
-        key[0].as_int(),
-        key[1].as_int(),
-        key[2].as_int(),
-        key[3].as_int(),
-        new_value[0].as_int(),
-        new_value[1].as_int(),
-        new_value[2].as_int(),
-        new_value[3].as_int(),
-    ];
-    let expected_output = stack_top_to_ints(&[
-        old_value[3].as_int(),
-        old_value[2].as_int(),
-        old_value[1].as_int(),
-        old_value[0].as_int(),
-        new_root[3].as_int(),
-        new_root[2].as_int(),
-        new_root[1].as_int(),
-        new_root[0].as_int(),
-    ]);
-    let (store, adv_map) = build_advice_inputs(init_smt);
-    let process = build_test!(source, &initial_stack, &[], store, adv_map.clone())
-        .execute_process()
-        .unwrap();
-
-    // check the returned values
-    let stack = stack_to_ints(&process.stack.trace_state());
-    assert_eq!(stack, expected_output);
-
-    // remove the initial key-value pairs from the advice map
-    let mut new_adv_map = process.host.borrow().advice_provider().map().clone();
-    for (key, value) in adv_map.iter() {
-        let init_value = new_adv_map.remove(key).unwrap();
-        assert_eq!(value, &init_value);
-    }
-
-    // make sure the remaining values in the advice map are the same as expected new entries
-    assert_eq!(new_adv_map.len(), new_map_entries.len());
-    for (key, val) in new_map_entries {
-        let old_val = new_adv_map.get(key).unwrap();
-        assert_eq!(old_val, val);
-    }
+/// Tests setting an empty value to an empty key, but that maps to a leaf with another key
+/// (i.e. removing a value that's already empty)
+#[test]
+fn test_set_empty_key_in_non_empty_leaf() {
+    let key_mse = Felt::new(42);
+
+    let leaves: [(RpoDigest, Word); 1] = [(
+        RpoDigest::new([Felt::new(101), Felt::new(102), Felt::new(103), key_mse]),
+        [Felt::new(1_u64), Felt::new(2_u64), Felt::new(3_u64), Felt::new(4_u64)],
+    )];
+
+    let mut smt = Smt::with_entries(leaves).unwrap();
+
+    // This key has same most significant element as key in the existing leaf, so will map to that
+    // leaf
+    let new_key = RpoDigest::new([Felt::new(1), Felt::new(12), Felt::new(3), key_mse]);
+
+    let source = "
+    use.std::collections::smt
+    begin
+      exec.smt::set
+    end
+    ";
+    let (init_stack, final_stack, store, advice_map) =
+        prepare_insert_or_set(new_key, EMPTY_WORD, &mut smt);
+
+    build_test!(source, &init_stack, &[], store, advice_map).expect_stack(&final_stack);
 }
 
-// SET TESTS
+// HELPER FUNCTIONS
 // ================================================================================================
 
-#[test]
-fn tsmt_set_16() {
-    let mut smt = TieredSmt::default();
-
-    let raw_a = 0b00000000_00000000_11111111_11111111_11111111_11111111_11111111_11111111_u64;
-    let key_a = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_a)]);
-    let val_a1 = [ONE, ZERO, ZERO, ZERO];
-    let val_a2 = [ONE, ONE, ZERO, ZERO];
-
-    // set a value under key_a into an empty tree; this inserts one entry into the advice map
-    let init_smt = smt.clone();
-    smt.insert(key_a.into(), val_a1);
-    let new_map_entries = [build_node_entry(key_a, val_a1, 16)];
-    assert_set(&init_smt, key_a, EMPTY_VALUE, val_a1, smt.root().into(), &new_map_entries);
-
-    // update a value under key_a; this inserts one entry into the advice map
-    let init_smt = smt.clone();
-    smt.insert(key_a.into(), val_a2);
-    let new_map_entries = [build_node_entry(key_a, val_a2, 16)];
-    assert_set(&init_smt, key_a, val_a1, val_a2, smt.root().into(), &new_map_entries);
-
-    // set an empty value for a previously un-set key; this should not change the tree
-    let raw_b = 0b00000000_10000000_11111111_11111111_11111111_11111111_11111111_11111111_u64;
-    let key_b = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_b)]);
-    assert_set(&smt, key_b, EMPTY_VALUE, EMPTY_VALUE, smt.root().into(), &[]);
-
-    // set an empty value for a previously un-set key which shares 16-bit prefix with A;
-    // this should not change the tree
-    let raw_c = 0b00000000_00000000_01111111_11111111_11111111_11111111_11111111_11111111_u64;
-    let key_c = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_c)]);
-    assert_set(&smt, key_c, EMPTY_VALUE, EMPTY_VALUE, smt.root().into(), &[]);
-
-    // set the value at key A to an empty word; this removes node A from the tree
-    let init_smt = smt.clone();
-    smt.insert(key_a.into(), EMPTY_VALUE);
-    assert_set(&init_smt, key_a, val_a2, EMPTY_VALUE, smt.root().into(), &[]);
-}
+fn prepare_insert_or_set(
+    key: RpoDigest,
+    value: Word,
+    smt: &mut Smt,
+) -> (Vec<u64>, Vec<u64>, MerkleStore, Vec<(RpoDigest, Vec<Felt>)>) {
+    // set initial state of the stack to be [VALUE, KEY, ROOT, ...]
+    let mut initial_stack = Vec::new();
+    append_word_to_vec(&mut initial_stack, smt.root().into());
+    append_word_to_vec(&mut initial_stack, key.into());
+    append_word_to_vec(&mut initial_stack, value);
+
+    // build a Merkle store for the test before the tree is updated, and then update the tree
+    let (store, advice_map) = build_advice_inputs(smt);
+    let old_value = smt.insert(key, value);
 
-#[test]
-fn tsmt_set_32() {
-    let mut smt = TieredSmt::default();
-
-    // insert a value under key_a into an empty tree
-    let raw_a = 0b00000000_00000000_11111111_11111111_11111111_11111111_11111111_11111111_u64;
-    let key_a = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_a)]);
-    let val_a = [ONE, ZERO, ZERO, ZERO];
-    smt.insert(key_a.into(), val_a);
-
-    // insert a value under key_b which has the same 16-bit prefix as A
-    let raw_b = 0b00000000_00000000_01111111_11111111_11111111_11111111_11111111_11111111_u64;
-    let key_b = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_b)]);
-    let val_b = [ONE, ONE, ZERO, ZERO];
-
-    // this tests a complex insertion when a leaf node moves from depth 16 to depth 32; this
-    // moves the original node to depth 32, and thus two new entries are added to the advice map
-    let init_smt = smt.clone();
-    smt.insert(key_b.into(), val_b);
-    let new_map_entries = [build_node_entry(key_a, val_a, 32), build_node_entry(key_b, val_b, 32)];
-    assert_set(&init_smt, key_b, EMPTY_VALUE, val_b, smt.root().into(), &new_map_entries);
-
-    // update a value under key_a; this adds one new entry to the advice map
-    let init_smt = smt.clone();
-    let val_a2 = [ONE, ZERO, ZERO, ONE];
-    smt.insert(key_a.into(), val_a2);
-    let new_map_entries = [build_node_entry(key_a, val_a2, 32)];
-    assert_set(&init_smt, key_a, val_a, val_a2, smt.root().into(), &new_map_entries);
-
-    // insert a value under key_c which has the same 16-bit prefix as A and B; this inserts a new
-    // node at depth 32, and thus adds one entry to the advice map
-    let raw_c = 0b00000000_00000000_00111111_11111111_11111111_11111111_11111111_11111111_u64;
-    let key_c = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_c)]);
-    let val_c = [ONE, ONE, ONE, ZERO];
-
-    let init_smt = smt.clone();
-    smt.insert(key_c.into(), val_c);
-    let new_map_entries = [build_node_entry(key_c, val_c, 32)];
-    assert_set(&init_smt, key_c, EMPTY_VALUE, val_c, smt.root().into(), &new_map_entries);
-
-    // remove C from the tree
-    let init_smt = smt.clone();
-    smt.insert(key_c.into(), EMPTY_VALUE);
-    assert_set(&init_smt, key_c, val_c, EMPTY_VALUE, smt.root().into(), &[]);
-
-    // remove A from the tree; this should move B to depth 16, and thus inserts a new entry into
-    // the advice map for node B at depth 16
-    let init_smt = smt.clone();
-    smt.insert(key_a.into(), EMPTY_VALUE);
-    let new_map_entries = [build_node_entry(key_b, val_b, 16)];
-    assert_set(&init_smt, key_a, val_a2, EMPTY_VALUE, smt.root().into(), &new_map_entries);
-}
+    // after insert or set, the stack should be [OLD_VALUE, ROOT, ...]
+    let expected_output = build_expected_stack(old_value, smt.root().into());
 
-#[test]
-fn tsmt_set_48() {
-    let mut smt = TieredSmt::default();
-
-    // insert a value under key_a into an empty tree
-    let raw_a = 0b00000000_00000000_11111111_11111111_11111111_11111111_11111111_11111111_u64;
-    let key_a = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_a)]);
-    let val_a = [ONE, ZERO, ZERO, ZERO];
-    smt.insert(key_a.into(), val_a);
-
-    // insert a value under key_b which has the same 32-bit prefix as A
-    let raw_b = 0b00000000_00000000_11111111_11111111_00111111_11111111_11111111_11111111_u64;
-    let key_b = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_b)]);
-    let val_b = [ONE, ONE, ZERO, ZERO];
-
-    // this tests a complex insertion when a leaf moves from depth 16 to depth 48; this moves
-    // node at depth 16 to depth 48 and inserts a new node at depth 48
-    let init_smt = smt.clone();
-    smt.insert(key_b.into(), val_b);
-    let new_map_entries = [build_node_entry(key_a, val_a, 48), build_node_entry(key_b, val_b, 48)];
-    assert_set(&init_smt, key_b, EMPTY_VALUE, val_b, smt.root().into(), &new_map_entries);
-
-    // update a value under key_a; this inserts one entry into the advice map
-    let init_smt = smt.clone();
-    let val_a2 = [ONE, ZERO, ZERO, ONE];
-    smt.insert(key_a.into(), val_a2);
-    let new_map_entries = [build_node_entry(key_a, val_a2, 48)];
-    assert_set(&init_smt, key_a, val_a, val_a2, smt.root().into(), &new_map_entries);
-
-    // insert a value under key_c which has the same 32-bit prefix as A and B; this inserts
-    // one entry into the advice map
-    let raw_c = 0b00000000_00000000_11111111_11111111_00111111_01111111_11111111_11111111_u64;
-    let key_c = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_c)]);
-    let val_c = [ONE, ONE, ONE, ZERO];
-
-    let init_smt = smt.clone();
-    smt.insert(key_c.into(), val_c);
-    let new_map_entries = [build_node_entry(key_c, val_c, 48)];
-    assert_set(&init_smt, key_c, EMPTY_VALUE, val_c, smt.root().into(), &new_map_entries);
-
-    // at this point the tree has 3 nodes A, B, C, all 3 are a depth 48 and share the same 32-bit
-    // prefix; also B and C share the same 34-bit prefix.
-
-    // remove node A from the tree; since B and C share the same 34-bit prefix, they remain at
-    // depth 48
-    let init_smt = smt.clone();
-    smt.insert(key_a.into(), EMPTY_VALUE);
-    assert_set(&init_smt, key_a, val_a2, EMPTY_VALUE, smt.root().into(), &[]);
-
-    // remove node B from the tree; this will move node C to depth 16, and thus inserts a new
-    // entry into the advice map for node C at depth 16
-    let init_smt = smt.clone();
-    smt.insert(key_b.into(), EMPTY_VALUE);
-    let new_map_entries = [build_node_entry(key_c, val_c, 16)];
-    assert_set(&init_smt, key_b, val_b, EMPTY_VALUE, smt.root().into(), &new_map_entries);
+    (initial_stack, expected_output, store, advice_map)
 }
 
-#[test]
-fn tsmt_set_48_from_32() {
-    let mut smt = TieredSmt::default();
-
-    // insert a value under key_a into an empty tree
-    let raw_a = 0b00000000_00000000_11111111_11111111_11111111_11111111_11111111_11111111_u64;
-    let key_a = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_a)]);
-    let val_a = [ONE, ZERO, ZERO, ZERO];
-    smt.insert(key_a.into(), val_a);
-
-    // insert a value under key_b which has the same 16-bit prefix as A
-    let raw_b = 0b00000000_00000000_01111111_11111111_01111111_11111111_11111111_11111111_u64;
-    let key_b = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_b)]);
-    let val_b = [ONE, ONE, ZERO, ZERO];
-    smt.insert(key_b.into(), val_b);
-
-    // insert a value under key_c which has the same 32-bit prefix as A
-    let raw_c = 0b00000000_00000000_11111111_11111111_00111111_11111111_11111111_11111111_u64;
-    let key_c = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_c)]);
-    let val_c = [ONE, ONE, ONE, ZERO];
-
-    // this tests a complex insertion when a leaf moves from depth 32 to depth 48; two new
-    // entries are added to the advice map
-    let init_smt = smt.clone();
-    smt.insert(key_c.into(), val_c);
-    let new_map_entries = [build_node_entry(key_a, val_a, 48), build_node_entry(key_c, val_c, 48)];
-    assert_set(&init_smt, key_c, EMPTY_VALUE, val_c, smt.root().into(), &new_map_entries);
-
-    // remove C from the tree; this should cause leaf A to move to depth 32
-    let init_smt = smt.clone();
-    smt.insert(key_c.into(), EMPTY_VALUE);
-    let new_map_entries = [build_node_entry(key_a, val_a, 32)];
-    assert_set(&init_smt, key_c, val_c, EMPTY_VALUE, smt.root().into(), &new_map_entries);
-
-    // remove B from the tree; this should cause leaf A to move to depth 16
-    let init_smt = smt.clone();
-    smt.insert(key_b.into(), EMPTY_VALUE);
-    let new_map_entries = [build_node_entry(key_a, val_a, 16)];
-    assert_set(&init_smt, key_b, val_b, EMPTY_VALUE, smt.root().into(), &new_map_entries);
-}
+fn build_advice_inputs(smt: &Smt) -> (MerkleStore, Vec<(RpoDigest, Vec<Felt>)>) {
+    let store = MerkleStore::from(smt);
+    let advice_map = smt
+        .leaves()
+        .map(|(_, leaf)| {
+            let leaf_hash = leaf.hash();
+            (leaf_hash, leaf.to_elements())
+        })
+        .collect::<Vec<_>>();
 
-#[test]
-fn tsmt_set_48_lone_sibling_move_to_32() {
-    let mut smt = TieredSmt::default();
-
-    // depth 48 leaf
-    let raw_a = 0b00000000_00000000_11111111_11111111_11111111_11111111_11111111_11111111_u64;
-    let key_a = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_a)]);
-    let val_a = [ONE, ZERO, ZERO, ZERO];
-    smt.insert(key_a.into(), val_a);
-
-    // depth 48 leaf
-    let raw_b = 0b00000000_00000000_11111111_11111111_11111111_00111111_11111111_11111111_u64;
-    let key_b = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_b)]);
-    let val_b = [ONE, ONE, ZERO, ZERO];
-    smt.insert(key_b.into(), val_b);
-
-    // depth 32 leaf
-    let raw_c = 0b00000000_00000000_11111111_11111110_11111111_11111111_11111111_11111111_u64;
-    let key_c = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_c)]);
-    let val_c = [ONE, ZERO, ZERO, ZERO];
-    smt.insert(key_c.into(), val_c);
-
-    // depth 32 leaf
-    let raw_d = 0b00000000_00000000_11111111_11111101_11111111_11111111_11111111_11111111_u64;
-    let key_d = RpoDigest::from([ONE, ONE, ONE, Felt::new(raw_d)]);
-    let val_d = [ONE, ONE, ZERO, ZERO];
-    smt.insert(key_d, val_d);
-
-    // remove leaf a such that it key_b, val_b should move to depth 32
-    let init_smt = smt.clone();
-    smt.insert(key_a.into(), EMPTY_VALUE);
-    let new_map_entries = [build_node_entry(key_b, val_b, 32)];
-    assert_set(&init_smt, key_a, val_a, EMPTY_VALUE, smt.root().into(), &new_map_entries);
+    (store, advice_map)
 }
 
-fn assert_set(
-    init_smt: &TieredSmt,
-    key: RpoDigest,
-    old_value: Word,
-    new_value: Word,
-    new_root: RpoDigest,
-    new_map_entries: &[AdvMapEntry],
-) {
-    let old_root = init_smt.root();
-    let source = r#"
-        use.std::collections::smt
-
-        begin
-            exec.smt::set
-        end
-    "#;
-    let initial_stack = [
-        old_root[0].as_int(),
-        old_root[1].as_int(),
-        old_root[2].as_int(),
-        old_root[3].as_int(),
-        key[0].as_int(),
-        key[1].as_int(),
-        key[2].as_int(),
-        key[3].as_int(),
-        new_value[0].as_int(),
-        new_value[1].as_int(),
-        new_value[2].as_int(),
-        new_value[3].as_int(),
-    ];
-    let expected_output = stack_top_to_ints(&[
-        old_value[3].as_int(),
-        old_value[2].as_int(),
-        old_value[1].as_int(),
-        old_value[0].as_int(),
-        new_root[3].as_int(),
-        new_root[2].as_int(),
-        new_root[1].as_int(),
-        new_root[0].as_int(),
-    ]);
-    let (store, adv_map) = build_advice_inputs(init_smt);
-    let process = build_test!(source, &initial_stack, &[], store, adv_map.clone())
-        .execute_process()
-        .unwrap();
-
-    // check the returned values
-    let stack = stack_to_ints(&process.stack.trace_state());
-    assert_eq!(stack, expected_output);
-
-    // remove the initial key-value pairs from the advice map
-    let mut new_adv_map = process.host.borrow().advice_provider().map().clone();
-    for (key, value) in adv_map.iter() {
-        let init_value = new_adv_map.remove(key).unwrap();
-        assert_eq!(value, &init_value);
-    }
-
-    // make sure the remaining values in the advice map are the same as expected new entries
-    assert_eq!(new_adv_map.len(), new_map_entries.len());
-    for (key, val) in new_map_entries {
-        let old_val = new_adv_map.get(key).unwrap();
-        assert_eq!(old_val, val);
-    }
+fn build_expected_stack(word0: Word, word1: Word) -> Vec<u64> {
+    vec![
+        word0[3].as_int(),
+        word0[2].as_int(),
+        word0[1].as_int(),
+        word0[0].as_int(),
+        word1[3].as_int(),
+        word1[2].as_int(),
+        word1[1].as_int(),
+        word1[0].as_int(),
+    ]
 }
 
-// HELPER FUNCTIONS
-// ================================================================================================
-
-fn build_node_entry(key: RpoDigest, value: Word, depth: u8) -> AdvMapEntry {
-    let digest = Rpo256::merge_in_domain(&[key.into(), value.into()], depth.into());
-    let mut elements = key.to_vec();
-    elements.extend_from_slice(&value);
-    (digest.into(), elements)
+fn append_word_to_vec(target: &mut Vec<u64>, word: Word) {
+    target.push(word[0].as_int());
+    target.push(word[1].as_int());
+    target.push(word[2].as_int());
+    target.push(word[3].as_int());
 }
diff --git a/stdlib/tests/collections/smt64.rs b/stdlib/tests/collections/smt64.rs
deleted file mode 100644
index 3aa2aade6f..0000000000
--- a/stdlib/tests/collections/smt64.rs
+++ /dev/null
@@ -1,172 +0,0 @@
-use super::{Felt, MerkleStore, SimpleSmt, StarkField, TestError, Word, EMPTY_WORD, ONE, ZERO};
-use crate::build_test;
-
-// TEST DATA
-// ================================================================================================
-
-const LEAVES: [(u64, Word); 5] = [
-    (
-        0b00000000_00000000_11111111_11111111_11111111_11111111_11111111_11111111_u64,
-        [ONE, ZERO, ZERO, ZERO],
-    ),
-    (
-        // different from the first key starting from the first bit
-        0b10000000_00000000_11111111_11111111_11111111_11111111_11111111_11111111_u64,
-        [Felt::new(2), ZERO, ZERO, ZERO],
-    ),
-    (
-        // same 16-bit prefix as the first key
-        0b00000000_00000000_01111111_11111111_11111111_11111111_11111111_11111111_u64,
-        [Felt::new(2), ZERO, ZERO, ZERO],
-    ),
-    (
-        // same 32-bit prefix as the first key
-        0b00000000_00000000_11111111_11111111_01111111_11111111_11111111_11111111_u64,
-        [Felt::new(3), ZERO, ZERO, ZERO],
-    ),
-    (
-        // same 48-bit prefix as the first key
-        0b00000000_00000000_11111111_11111111_11111111_11111111_01111111_11111111_u64,
-        [Felt::new(4), ZERO, ZERO, ZERO],
-    ),
-];
-
-// TESTS
-// ================================================================================================
-
-#[test]
-fn get() {
-    let smt = SimpleSmt::with_leaves(64, LEAVES).unwrap();
-
-    let source = "
-    use.std::collections::smt64
-    begin
-      exec.smt64::get
-    end
-    ";
-
-    for (index, value) in LEAVES {
-        let mut initial_stack = Vec::new();
-        append_word_to_vec(&mut initial_stack, smt.root().into());
-        initial_stack.push(index);
-        let expected_output = build_expected_stack(value, smt.root().into());
-
-        let store = MerkleStore::from(&smt);
-        build_test!(source, &initial_stack, &[], store, vec![]).expect_stack(&expected_output);
-    }
-}
-
-#[test]
-fn insert() {
-    let mut smt = SimpleSmt::new(64).unwrap();
-
-    let source = "
-    use.std::collections::smt64
-    begin
-      exec.smt64::insert
-    end
-    ";
-
-    // insert values one-by-one into the tree
-    for (index, value) in LEAVES {
-        let (init_stack, final_stack, store) = prepare_insert_or_set(index, value, &mut smt);
-        build_test!(source, &init_stack, &[], store, vec![]).expect_stack(&final_stack);
-    }
-
-    // update one of the previously inserted values
-    let index = LEAVES[0].0;
-    let value = [ONE; 4];
-    let (init_stack, final_stack, store) = prepare_insert_or_set(index, value, &mut smt);
-    build_test!(source, &init_stack, &[], store, vec![]).expect_stack(&final_stack);
-
-    // try to insert an invalid value
-    let value = EMPTY_WORD;
-    let (init_stack, _, store) = prepare_insert_or_set(index, value, &mut smt);
-    build_test!(source, &init_stack, &[], store, vec![])
-        .expect_error(TestError::ExecutionError("FailedAssertion"));
-}
-
-#[test]
-fn set() {
-    let mut smt = SimpleSmt::new(64).unwrap();
-    let empty_tree_root = smt.root();
-
-    let source = "
-    use.std::collections::smt64
-    begin
-      exec.smt64::set
-    end
-    ";
-
-    // insert values one-by-one into the tree
-    let mut old_roots = Vec::new();
-    for (index, value) in LEAVES {
-        old_roots.push(smt.root());
-        let (init_stack, final_stack, store) = prepare_insert_or_set(index, value, &mut smt);
-        build_test!(source, &init_stack, &[], store, vec![]).expect_stack(&final_stack);
-    }
-
-    // update one of the previously inserted values
-    let mut smt2 = smt.clone();
-    let index = LEAVES[0].0;
-    let value = [ONE; 4];
-    let (init_stack, final_stack, store) = prepare_insert_or_set(index, value, &mut smt2);
-    build_test!(source, &init_stack, &[], store, vec![]).expect_stack(&final_stack);
-
-    // setting to [ZERO; 4] should return the tree to the prior state
-    for (index, old_value) in LEAVES.iter().rev() {
-        let value = EMPTY_WORD;
-        let (init_stack, final_stack, store) = prepare_insert_or_set(*index, value, &mut smt);
-
-        let expected_final_stack =
-            build_expected_stack(*old_value, old_roots.pop().unwrap().into());
-        assert_eq!(expected_final_stack, final_stack);
-        build_test!(source, &init_stack, &[], store, vec![]).expect_stack(&final_stack);
-    }
-
-    assert_eq!(smt.root(), empty_tree_root);
-}
-
-// HELPER FUNCTIONS
-// ================================================================================================
-
-fn prepare_insert_or_set(
-    index: u64,
-    value: Word,
-    smt: &mut SimpleSmt,
-) -> (Vec<u64>, Vec<u64>, MerkleStore) {
-    // set initial state of the stack to be [VALUE, key, ROOT, ...]
-    let mut initial_stack = Vec::new();
-    append_word_to_vec(&mut initial_stack, smt.root().into());
-    initial_stack.push(index);
-    append_word_to_vec(&mut initial_stack, value);
-
-    // build a Merkle store for the test before the tree is updated, and then update the tree
-    let store: MerkleStore = (&*smt).into();
-    let old_value = smt.update_leaf(index, value).unwrap();
-
-    // after insert or set, the stack should be [OLD_VALUE, ROOT, ...]
-    let expected_output = build_expected_stack(old_value, smt.root().into());
-
-    (initial_stack, expected_output, store)
-}
-
-fn build_expected_stack(word0: Word, word1: Word) -> Vec<u64> {
-    vec![
-        word0[3].as_int(),
-        word0[2].as_int(),
-        word0[1].as_int(),
-        word0[0].as_int(),
-        word1[3].as_int(),
-        word1[2].as_int(),
-        word1[1].as_int(),
-        word1[0].as_int(),
-    ]
-}
-
-fn append_word_to_vec(target: &mut Vec<u64>, word: Word) {
-    target.push(word[0].as_int());
-    target.push(word[1].as_int());
-    target.push(word[2].as_int());
-    target.push(word[3].as_int());
-}
diff --git a/stdlib/tests/crypto/blake3.rs b/stdlib/tests/crypto/blake3.rs
index 2d23a38692..633d81610a 100644
--- a/stdlib/tests/crypto/blake3.rs
+++ b/stdlib/tests/crypto/blake3.rs
@@ -1,4 +1,3 @@
-use crate::build_test;
 use test_utils::{group_slice_elements, rand::rand_array, Felt, IntoBytes};
 
 #[test]
diff --git a/stdlib/tests/crypto/ecdsa_secp256k1.rs b/stdlib/tests/crypto/ecdsa_secp256k1.rs
index efdfda11a9..32cf16f457 100644
--- a/stdlib/tests/crypto/ecdsa_secp256k1.rs
+++ b/stdlib/tests/crypto/ecdsa_secp256k1.rs
@@ -1,4 +1,3 @@
-use crate::build_test;
 use test_utils::test_case;
 
 // Wrapper types for ease of writing parameterized test cases
diff --git a/stdlib/tests/crypto/elgamal.rs b/stdlib/tests/crypto/elgamal.rs
index 0e598be50f..77d307b0d4 100644
--- a/stdlib/tests/crypto/elgamal.rs
+++ b/stdlib/tests/crypto/elgamal.rs
@@ -1,8 +1,6 @@
-use crate::build_test;
-use crate::math::ecgfp5::base_field::Ext5;
-use crate::math::ecgfp5::group::ECExt5;
+use crate::math::ecgfp5::{base_field::Ext5, group::ECExt5};
 use std::ops::Add;
-use test_utils::{rand::rand_array, Felt, FieldElement, StarkField};
+use test_utils::{rand::rand_array, Felt, FieldElement};
 
 fn gen_random_private_key() -> [u32; 10] {
     rand_array::<u32, 10>()
diff --git a/stdlib/tests/crypto/falcon.rs b/stdlib/tests/crypto/falcon.rs
index 7ad94ba06c..8ec911acc0 100644
--- a/stdlib/tests/crypto/falcon.rs
+++ b/stdlib/tests/crypto/falcon.rs
@@ -1,25 +1,56 @@
-use assembly::utils::Serializable;
-use miden_air::{Felt, StarkField};
-use processor::Digest;
-
-use std::vec;
+use assembly::{utils::Serializable, Assembler};
+use miden_air::{Felt, ProvingOptions};
+use miden_stdlib::StdLibrary;
+use processor::{AdviceInputs, DefaultHost, Digest, MemAdviceProvider, StackInputs};
 use test_utils::{
     crypto::{rpo_falcon512::KeyPair, MerkleStore},
     rand::rand_vector,
-    Test, Word,
+    ProgramInfo, Word,
 };
 
 #[test]
-fn test_falcon() {
+fn falcon_execution() {
     let keypair = KeyPair::new().unwrap();
-
     let message = rand_vector::<Felt>(4).try_into().unwrap();
+    let (source, op_stack, adv_stack, store, advice_map) = generate_test(keypair, message);
 
-    let test = generate_test(keypair, message);
+    let test = build_test!(source, &op_stack, &adv_stack, store, advice_map.into_iter());
     test.expect_stack(&[])
 }
 
-fn generate_test(keypair: KeyPair, message: Word) -> Test {
+#[test]
+#[ignore]
+fn falcon_prove_verify() {
+    let keypair = KeyPair::new().unwrap();
+    let message = rand_vector::<Felt>(4).try_into().unwrap();
+    let (source, op_stack, _, _, advice_map) = generate_test(keypair, message);
+
+    let program = Assembler::default()
+        .with_library(&StdLibrary::default())
+        .expect("failed to load stdlib")
+        .compile(source)
+        .expect("failed to compile test source");
+
+    let stack_inputs =
+        StackInputs::try_from_values(op_stack).expect("failed to create stack inputs");
+    let advice_inputs = AdviceInputs::default().with_map(advice_map);
+    let advice_provider = MemAdviceProvider::from(advice_inputs);
+    let host = DefaultHost::new(advice_provider);
+
+    let options = ProvingOptions::with_96_bit_security(false);
+    let (stack_outputs, proof) = test_utils::prove(&program, stack_inputs.clone(), host, options)
+        .expect("failed to generate proof");
+
+    let program_info = ProgramInfo::from(program);
+    let result = test_utils::verify(program_info, stack_inputs, stack_outputs, proof);
+
+    assert!(result.is_ok(), "error: {result:?}");
+}
+
+fn generate_test(
+    keypair: KeyPair,
+    message: Word,
+) -> (&'static str, Vec<u64>, Vec<u64>, MerkleStore, Vec<(Digest, Vec<Felt>)>) {
     let source = "
     use.std::crypto::dsa::rpo_falcon512
 
@@ -31,18 +62,18 @@ fn generate_test(keypair: KeyPair, message: Word) -> Test {
     let pk: Word = keypair.public_key().into();
     let pk: Digest = pk.into();
     let pk_sk_bytes = keypair.to_bytes();
-    let to_adv_map = pk_sk_bytes.iter().map(|a| Felt::new(*a as u64)).collect::<Vec<Felt>>();
 
-    let advice_map: Vec<([u8; 32], Vec<Felt>)> = vec![(pk.as_bytes(), to_adv_map.into())];
+    let to_adv_map = pk_sk_bytes.iter().map(|a| Felt::new(*a as u64)).collect::<Vec<Felt>>();
 
-    let message = message.into_iter().map(|a| a.as_int() as u64).collect::<Vec<u64>>();
+    let advice_map: Vec<(Digest, Vec<Felt>)> = vec![(pk, to_adv_map)];
 
     let mut op_stack = vec![];
+    let message = message.into_iter().map(|a| a.as_int()).collect::<Vec<u64>>();
     op_stack.extend_from_slice(&message);
     op_stack.extend_from_slice(&pk.as_elements().iter().map(|a| a.as_int()).collect::<Vec<u64>>());
+
     let adv_stack = vec![];
     let store = MerkleStore::new();
-    let test = build_test!(source, &op_stack, &adv_stack, store, advice_map.into_iter());
 
-    test
+    (source, op_stack, adv_stack, store, advice_map)
 }
diff --git a/stdlib/tests/crypto/fri/channel.rs b/stdlib/tests/crypto/fri/channel.rs
index 749bdfb7dd..fd10128bfa 100644
--- a/stdlib/tests/crypto/fri/channel.rs
+++ b/stdlib/tests/crypto/fri/channel.rs
@@ -1,3 +1,4 @@
+use processor::Digest;
 use test_utils::{
     crypto::{BatchMerkleProof, ElementHasher, Hasher as HasherTrait, PartialMerkleTree},
     math::fft,
@@ -12,7 +13,7 @@ pub trait UnBatch<E: FieldElement, H: ElementHasher> {
         positions: &[usize],
         domain_size: usize,
         layer_commitments: Vec<<H as HasherTrait>::Digest>,
-    ) -> (Vec<PartialMerkleTree>, Vec<([u8; 32], Vec<Felt>)>);
+    ) -> (Vec<PartialMerkleTree>, Vec<(Digest, Vec<Felt>)>);
 }
 
 pub struct MidenFriVerifierChannel<E: FieldElement, H: ElementHasher<BaseField = E::BaseField>> {
diff --git a/stdlib/tests/crypto/fri/mod.rs b/stdlib/tests/crypto/fri/mod.rs
index 9a7ff60196..69b6350235 100644
--- a/stdlib/tests/crypto/fri/mod.rs
+++ b/stdlib/tests/crypto/fri/mod.rs
@@ -1,8 +1,7 @@
-use crate::build_test;
+use processor::Digest;
 use test_utils::{collections::BTreeMap, crypto::MerkleStore, Felt, StarkField};
 
 mod channel;
-pub use channel::*;
 
 pub(crate) mod verifier_fri_e2f4;
 pub use verifier_fri_e2f4::*;
@@ -45,7 +44,7 @@ fn fri_fold4_ext2_remainder32() {
         remainder,
     );
 
-    let advice_map: BTreeMap<[u8; 32], Vec<Felt>> = BTreeMap::from_iter(advice_maps);
+    let advice_map: BTreeMap<Digest, Vec<Felt>> = BTreeMap::from_iter(advice_maps);
     let domain_generator = Felt::get_root_of_unity(domain_size.ilog2()).as_int();
 
     let mut store = MerkleStore::new();
@@ -93,7 +92,7 @@ fn fri_fold4_ext2_remainder64() {
         remainder,
     );
 
-    let advice_map: BTreeMap<[u8; 32], Vec<Felt>> = BTreeMap::from_iter(advice_maps);
+    let advice_map: BTreeMap<Digest, Vec<Felt>> = BTreeMap::from_iter(advice_maps);
     let domain_generator = Felt::get_root_of_unity(domain_size.ilog2()).as_int();
 
     let mut store = MerkleStore::new();
@@ -133,7 +132,7 @@ fn prepare_advice_stack(
         stack.extend_from_slice(&com[(4 * i)..(4 * i + 4)]);
         stack.extend_from_slice(&alphas[(4 * i)..(4 * i + 2)]);
         // - 2 is due to the fact that we are folding by 4
-        stack.extend_from_slice(&vec![current_depth - 2, current_domain_size]);
+        stack.extend_from_slice(&[current_depth - 2, current_domain_size]);
         current_depth -= 2;
     }
 
diff --git a/stdlib/tests/crypto/fri/remainder.rs b/stdlib/tests/crypto/fri/remainder.rs
index cb6429a9fb..6056aa9d6c 100644
--- a/stdlib/tests/crypto/fri/remainder.rs
+++ b/stdlib/tests/crypto/fri/remainder.rs
@@ -1,4 +1,3 @@
-use crate::build_test;
 use test_utils::{
     math::fft, rand::rand_vector, test_case, Felt, FieldElement, QuadFelt, StarkField, ONE,
 };
diff --git a/stdlib/tests/crypto/fri/verifier_fri_e2f4.rs b/stdlib/tests/crypto/fri/verifier_fri_e2f4.rs
index d5e35fbdd0..9f5f65bb94 100644
--- a/stdlib/tests/crypto/fri/verifier_fri_e2f4.rs
+++ b/stdlib/tests/crypto/fri/verifier_fri_e2f4.rs
@@ -8,7 +8,7 @@ use test_utils::{
     crypto::{MerklePath, NodeIndex, PartialMerkleTree, Rpo256 as MidenHasher},
     group_vector_elements,
     math::fft,
-    Felt, FieldElement, IntoBytes, QuadFelt as QuadExt, StarkField, EMPTY_WORD,
+    Felt, FieldElement, QuadFelt as QuadExt, StarkField, EMPTY_WORD,
 };
 use winter_fri::{
     folding::fold_positions, DefaultProverChannel, FriOptions, FriProof, FriProver, VerifierError,
@@ -19,7 +19,7 @@ pub struct FriResult {
     pub partial_trees: Vec<PartialMerkleTree>,
 
     /// used to unhash Merkle nodes to a sequence of field elements representing the query-values.
-    pub advice_maps: Vec<([u8; 32], Vec<Felt>)>,
+    pub advice_maps: Vec<(RpoDigest, Vec<Felt>)>,
 
     /// A vector of consecutive quadruples of the form (poe, p, e1, e0) where p is index of the
     /// query at the first layer and (e1, e0) is its corresponding evaluation and poe is g^p with g
@@ -55,6 +55,7 @@ pub fn fri_prove_verify_fold4_ext2(trace_length_e: usize) -> Result<FriResult, V
     let lde_blowup = 1 << 3;
     let max_remainder_size = 1 << max_remainder_size_e;
     let folding_factor = 1 << folding_factor_e;
+    let nonce = 0_u64;
 
     let options = FriOptions::new(lde_blowup, folding_factor, max_remainder_size);
     let mut channel = build_prover_channel(trace_length, &options);
@@ -63,7 +64,7 @@ pub fn fri_prove_verify_fold4_ext2(trace_length_e: usize) -> Result<FriResult, V
     // instantiate the prover and generate the proof
     let mut prover = FriProver::new(options.clone());
     prover.build_layers(&mut channel, evaluations.clone());
-    let positions = channel.draw_query_positions();
+    let positions = channel.draw_query_positions(nonce);
     let proof = prover.build_proof(&positions);
 
     // make sure the proof can be verified
@@ -83,7 +84,7 @@ pub fn fri_prove_verify_fold4_ext2(trace_length_e: usize) -> Result<FriResult, V
         .layer_commitments()
         .to_vec()
         .iter()
-        .map(|digest| digest.as_elements().into_iter().map(|e| e.as_int()))
+        .map(|digest| digest.as_elements().iter().map(|e| e.as_int()))
         .flatten()
         .collect();
 
@@ -99,18 +100,16 @@ pub fn fri_prove_verify_fold4_ext2(trace_length_e: usize) -> Result<FriResult, V
         .collect();
 
     match result {
-        Ok(((partial_trees, advice_maps), all_position_evaluation, alphas)) => {
-            return Ok(FriResult {
-                partial_trees,
-                advice_maps,
-                positions: all_position_evaluation,
-                alphas,
-                commitments,
-                remainder,
-                num_queries: positions.len(),
-            });
-        }
-        Err(err) => return Err(err),
+        Ok(((partial_trees, advice_maps), all_position_evaluation, alphas)) => Ok(FriResult {
+            partial_trees,
+            advice_maps,
+            positions: all_position_evaluation,
+            alphas,
+            commitments,
+            remainder,
+            num_queries: positions.len(),
+        }),
+        Err(err) => Err(err),
     }
 }
 
@@ -125,7 +124,7 @@ pub fn build_prover_channel(
 }
 
 pub fn build_evaluations(trace_length: usize, lde_blowup: usize) -> Vec<QuadExt> {
-    let mut p = (0..trace_length as u64)
+    let mut p = (0..trace_length as u32)
         .map(|i| (i, i))
         .map(|(i, j)| QuadExt::new(i.into(), j.into()))
         .collect::<Vec<_>>();
@@ -146,8 +145,10 @@ fn verify_proof(
     domain_size: usize,
     positions: &[usize],
     options: &FriOptions,
-) -> Result<((Vec<PartialMerkleTree>, Vec<([u8; 32], Vec<Felt>)>), Vec<u64>, Vec<u64>), VerifierError>
-{
+) -> Result<
+    ((Vec<PartialMerkleTree>, Vec<(RpoDigest, Vec<Felt>)>), Vec<u64>, Vec<u64>),
+    VerifierError,
+> {
     let mut channel = MidenFriVerifierChannel::<QuadExt, MidenHasher>::new(
         proof,
         commitments.clone(),
@@ -163,7 +164,7 @@ fn verify_proof(
     let queried_evaluations = positions.iter().map(|&p| evaluations[p]).collect::<Vec<_>>();
 
     let result =
-        miden_verifier.verify_fold_4_ext_2(&mut channel, &queried_evaluations, &positions)?;
+        miden_verifier.verify_fold_4_ext_2(&mut channel, &queried_evaluations, positions)?;
 
     Ok(result)
 }
@@ -245,7 +246,7 @@ impl FriVerifierFold4Ext2 {
         evaluations: &[QuadExt],
         positions: &[usize],
     ) -> Result<
-        ((Vec<PartialMerkleTree>, Vec<([u8; 32], Vec<Felt>)>), Vec<u64>, Vec<u64>),
+        ((Vec<PartialMerkleTree>, Vec<(RpoDigest, Vec<Felt>)>), Vec<u64>, Vec<u64>),
         VerifierError,
     > {
         // 1 ----- verify the recursive components of the FRI proof -----------------------------------
@@ -293,9 +294,9 @@ impl FriVerifierFold4Ext2 {
 }
 
 fn iterate_query_fold_4_quad_ext(
-    layer_alphas: &Vec<QuadExt>,
-    partial_trees: &Vec<PartialMerkleTree>,
-    key_val_map: &Vec<([u8; 32], Vec<Felt>)>,
+    layer_alphas: &[QuadExt],
+    partial_trees: &[PartialMerkleTree],
+    key_val_map: &[(RpoDigest, Vec<Felt>)],
     position: usize,
     number_of_layers: usize,
     initial_domain_size: usize,
@@ -309,13 +310,13 @@ fn iterate_query_fold_4_quad_ext(
 
     let initial_domain_generator = *domain_generator;
     let norm_cst = Felt::get_root_of_unity(2).inv();
-    let mut init_exp = initial_domain_generator.exp((position as u64).into());
+    let mut init_exp = initial_domain_generator.exp(position as u64);
 
     let arr = vec![evaluation];
     let a = QuadExt::slice_as_base_elements(&arr);
 
     let position_evaluation =
-        vec![a[0].as_int(), a[1].as_int(), (position as u64).into(), init_exp.as_int()];
+        vec![a[0].as_int(), a[1].as_int(), position as u64, init_exp.as_int()];
 
     let mut alphas = vec![];
     for depth in 0..number_of_layers {
@@ -333,7 +334,7 @@ fn iterate_query_fold_4_quad_ext(
             .unwrap();
         let query_values = &key_val_map
             .iter()
-            .filter(|(k, _)| *k == query_nodes.into_bytes())
+            .filter(|(k, _)| *k == query_nodes)
             .next()
             .expect("must contain the leaf values")
             .1;
@@ -384,7 +385,7 @@ fn iterate_query_fold_4_quad_ext(
         alphas.push(0);
         alphas.push(0);
 
-        *domain_generator = (*domain_generator).exp((4 as u32).into());
+        *domain_generator = (*domain_generator).exp((4_u32).into());
         cur_pos = folded_pos;
         domain_size /= 4;
     }
@@ -398,7 +399,7 @@ impl UnBatch<QuadExt, MidenHasher> for MidenFriVerifierChannel<QuadExt, MidenHas
         positions_: &[usize],
         domain_size: usize,
         layer_commitments: Vec<MidenDigest>,
-    ) -> (Vec<PartialMerkleTree>, Vec<([u8; 32], Vec<Felt>)>) {
+    ) -> (Vec<PartialMerkleTree>, Vec<(RpoDigest, Vec<Felt>)>) {
         let queries = self.layer_queries().clone();
         let mut current_domain_size = domain_size;
         let mut positions = positions_.to_vec();
@@ -447,7 +448,7 @@ impl UnBatch<QuadExt, MidenHasher> for MidenFriVerifierChannel<QuadExt, MidenHas
                 .map(|(a, b)| {
                     let mut value = QuadExt::slice_as_base_elements(b).to_owned();
                     value.extend(EMPTY_WORD);
-                    adv_key_map.push((a.to_owned().into_bytes(), value));
+                    adv_key_map.push((a.to_owned().into(), value));
                 })
                 .collect();
 
diff --git a/stdlib/tests/crypto/keccak256.rs b/stdlib/tests/crypto/keccak256.rs
index f3853a079c..bc6b89c3c4 100644
--- a/stdlib/tests/crypto/keccak256.rs
+++ b/stdlib/tests/crypto/keccak256.rs
@@ -1,4 +1,3 @@
-use crate::build_test;
 use sha3::{Digest, Keccak256};
 use test_utils::{
     rand::{rand_array, rand_value},
diff --git a/stdlib/tests/crypto/native.rs b/stdlib/tests/crypto/native.rs
index eae5866e69..c62243f812 100644
--- a/stdlib/tests/crypto/native.rs
+++ b/stdlib/tests/crypto/native.rs
@@ -1,5 +1,5 @@
-use crate::build_test;
-use test_utils::{build_expected_hash, build_expected_perm, StarkField, TestError};
+use processor::ExecutionError;
+use test_utils::{build_expected_hash, build_expected_perm, TestError};
 
 #[test]
 fn test_invalid_end_addr() {
@@ -14,7 +14,13 @@ fn test_invalid_end_addr() {
         exec.native::hash_memory
     end
     ";
-    build_test!(empty_range, &[]).expect_error(TestError::ExecutionError(""));
+    build_test!(empty_range, &[]).expect_error(TestError::ExecutionError(
+        ExecutionError::FailedAssertion {
+            clk: 18,
+            err_code: 0,
+            err_msg: None,
+        },
+    ));
 
     // address range can not contain zero elements
     let empty_range = "
@@ -27,7 +33,13 @@ fn test_invalid_end_addr() {
         exec.native::hash_memory
     end
     ";
-    build_test!(empty_range, &[]).expect_error(TestError::ExecutionError(""));
+    build_test!(empty_range, &[]).expect_error(TestError::ExecutionError(
+        ExecutionError::FailedAssertion {
+            clk: 18,
+            err_code: 0,
+            err_msg: None,
+        },
+    ));
 }
 
 #[test]
diff --git a/stdlib/tests/crypto/sha256.rs b/stdlib/tests/crypto/sha256.rs
index 07c8028483..8a40459f7f 100644
--- a/stdlib/tests/crypto/sha256.rs
+++ b/stdlib/tests/crypto/sha256.rs
@@ -1,4 +1,3 @@
-use crate::build_test;
 use sha2::{Digest, Sha256};
 use test_utils::{
     group_slice_elements,
@@ -18,15 +17,15 @@ fn sha256_hash_memory() {
         # mem.1 - length in bytes
         mem_store.1
 
-        # mem.2 - length in felts                                 
-        mem_load.1 u32checked_add.3 u32checked_div.4 mem_store.2
+        # mem.2 - length in felts
+        mem_load.1 u32assert u32overflowing_add.3 assertz u32assert u32div.4 mem_store.2
 
         # Load input data into memory address 10000, 10001, ...
-        mem_load.2 u32checked_neq.0
+        mem_load.2 u32assert neq.0
         while.true
             mem_load.0 mem_storew dropw
-            mem_load.0 u32checked_add.1 mem_store.0
-            mem_load.2 u32checked_sub.1 dup mem_store.2 u32checked_neq.0
+            mem_load.0 u32assert u32overflowing_add.1 assertz mem_store.0
+            mem_load.2 u32assert u32overflowing_sub.1 assertz dup mem_store.2 u32assert neq.0
         end
 
         # Compute hash of memory address 10000, 10001, ...
diff --git a/stdlib/tests/crypto/stark/mod.rs b/stdlib/tests/crypto/stark/mod.rs
index 179ed8e892..64877654c4 100644
--- a/stdlib/tests/crypto/stark/mod.rs
+++ b/stdlib/tests/crypto/stark/mod.rs
@@ -1,8 +1,6 @@
 mod verifier_recursive;
-
 use verifier_recursive::{generate_advice_inputs, VerifierData};
 
-use crate::build_test;
 use assembly::Assembler;
 use miden_air::{FieldExtension, HashFunction, PublicInputs};
 use processor::DefaultHost;
@@ -52,7 +50,7 @@ pub fn generate_recursive_verifier_data(
     source: &str,
     stack_inputs: Vec<u64>,
 ) -> Result<VerifierData, VerifierError> {
-    let program = Assembler::default().compile(&source).unwrap();
+    let program = Assembler::default().compile(source).unwrap();
     let stack_inputs = StackInputs::try_from_values(stack_inputs).unwrap();
     let advice_inputs = AdviceInputs::default();
     let advice_provider = MemAdviceProvider::from(advice_inputs);
diff --git a/stdlib/tests/crypto/stark/verifier_recursive/channel.rs b/stdlib/tests/crypto/stark/verifier_recursive/channel.rs
index cecf54aa6f..61864c34f3 100644
--- a/stdlib/tests/crypto/stark/verifier_recursive/channel.rs
+++ b/stdlib/tests/crypto/stark/verifier_recursive/channel.rs
@@ -3,11 +3,11 @@
 
 use miden_air::ProcessorAir;
 use test_utils::{
-    collections::Vec,
+    collections::*,
     crypto::{BatchMerkleProof, MerklePath, PartialMerkleTree, Rpo256, RpoDigest},
     group_vector_elements,
     math::{FieldElement, QuadExtension, StarkField},
-    Felt, IntoBytes, VerifierError, EMPTY_WORD,
+    Felt, VerifierError, EMPTY_WORD,
 };
 use winter_air::{
     proof::{Queries, StarkProof, Table},
@@ -54,6 +54,7 @@ impl VerifierChannel {
             ood_frame,
             fri_proof,
             pow_nonce,
+            num_unique_queries,
         } = proof;
 
         // make AIR and proof base fields are the same
@@ -73,8 +74,9 @@ impl VerifierChannel {
             .parse::<Rpo256>(num_trace_segments, fri_options.num_fri_layers(lde_domain_size))
             .map_err(|err| VerifierError::ProofDeserializationError(err.to_string()))?;
         // --- parse trace and constraint queries -------------------------------------------------
-        let trace_queries = TraceQueries::new(trace_queries, air)?;
-        let constraint_queries = ConstraintQueries::new(constraint_queries, air)?;
+        let trace_queries = TraceQueries::new(trace_queries, air, num_unique_queries as usize)?;
+        let constraint_queries =
+            ConstraintQueries::new(constraint_queries, air, num_unique_queries as usize)?;
 
         // --- parse FRI proofs -------------------------------------------------------------------
         let fri_num_partitions = fri_proof.num_partitions();
@@ -160,7 +162,7 @@ impl VerifierChannel {
     pub fn read_queried_trace_states(
         &mut self,
         positions: &[usize],
-    ) -> Result<(Vec<([u8; 32], Vec<Felt>)>, Vec<PartialMerkleTree>), VerifierError> {
+    ) -> Result<(Vec<(RpoDigest, Vec<Felt>)>, Vec<PartialMerkleTree>), VerifierError> {
         let queries = self.trace_queries.take().expect("already read");
         let mut trees = Vec::new();
 
@@ -190,7 +192,7 @@ impl VerifierChannel {
     pub fn read_constraint_evaluations(
         &mut self,
         positions: &[usize],
-    ) -> Result<(Vec<([u8; 32], Vec<Felt>)>, PartialMerkleTree), VerifierError> {
+    ) -> Result<(Vec<(RpoDigest, Vec<Felt>)>, PartialMerkleTree), VerifierError> {
         let queries = self.constraint_queries.take().expect("already read");
         let proof = queries.query_proofs;
 
@@ -224,7 +226,7 @@ impl VerifierChannel {
         positions_: &[usize],
         domain_size: usize,
         layer_commitments: Vec<RpoDigest>,
-    ) -> (Vec<PartialMerkleTree>, Vec<([u8; 32], Vec<Felt>)>) {
+    ) -> (Vec<PartialMerkleTree>, Vec<(RpoDigest, Vec<Felt>)>) {
         let queries = self.fri_layer_queries.clone();
         let mut current_domain_size = domain_size;
         let mut positions = positions_.to_vec();
@@ -274,7 +276,7 @@ impl VerifierChannel {
                     let mut value = QuadExt::slice_as_base_elements(b).to_owned();
                     value.extend(EMPTY_WORD);
 
-                    adv_key_map.push((a.to_owned().into_bytes(), value));
+                    adv_key_map.push((a.to_owned().into(), value));
                 })
                 .collect();
 
@@ -330,7 +332,11 @@ struct TraceQueries {
 impl TraceQueries {
     /// Parses the provided trace queries into trace states in the specified field and
     /// corresponding Merkle authentication paths.
-    pub fn new(mut queries: Vec<Queries>, air: &ProcessorAir) -> Result<Self, VerifierError> {
+    pub fn new(
+        mut queries: Vec<Queries>,
+        air: &ProcessorAir,
+        num_queries: usize,
+    ) -> Result<Self, VerifierError> {
         assert_eq!(
             queries.len(),
             air.trace_layout().num_segments(),
@@ -339,8 +345,6 @@ impl TraceQueries {
             queries.len()
         );
 
-        let num_queries = air.options().num_queries();
-
         // parse main trace segment queries; parsing also validates that hashes of each table row
         // form the leaves of Merkle authentication paths in the proofs
         let main_segment_width = air.trace_layout().main_trace_width();
@@ -403,8 +407,11 @@ struct ConstraintQueries {
 impl ConstraintQueries {
     /// Parses the provided constraint queries into evaluations in the specified field and
     /// corresponding Merkle authentication paths.
-    pub fn new(queries: Queries, air: &ProcessorAir) -> Result<Self, VerifierError> {
-        let num_queries = air.options().num_queries();
+    pub fn new(
+        queries: Queries,
+        air: &ProcessorAir,
+        num_queries: usize,
+    ) -> Result<Self, VerifierError> {
         let (query_proofs, evaluations) = queries
             .parse::<Rpo256, QuadExt>(air.lde_domain_size(), num_queries, air.ce_blowup_factor())
             .map_err(|err| {
@@ -477,7 +484,7 @@ pub fn unbatch_to_partial_mt(
     mut positions: Vec<usize>,
     queries: Vec<Vec<Felt>>,
     proof: BatchMerkleProof<Rpo256>,
-) -> (PartialMerkleTree, Vec<([u8; 32], Vec<Felt>)>) {
+) -> (PartialMerkleTree, Vec<(RpoDigest, Vec<Felt>)>) {
     let mut unbatched_proof = proof.into_paths(&positions).unwrap();
     let mut adv_key_map = Vec::new();
     let nodes: Vec<[Felt; 4]> = unbatched_proof
@@ -505,7 +512,7 @@ pub fn unbatch_to_partial_mt(
         .zip(queries.iter())
         .map(|(a, b)| {
             let data = b.to_owned();
-            adv_key_map.push((a.to_owned().into_bytes(), data));
+            adv_key_map.push((a.to_owned().into(), data));
         })
         .collect();
 
diff --git a/stdlib/tests/crypto/stark/verifier_recursive/mod.rs b/stdlib/tests/crypto/stark/verifier_recursive/mod.rs
index ae280a2b4d..e698b9092a 100644
--- a/stdlib/tests/crypto/stark/verifier_recursive/mod.rs
+++ b/stdlib/tests/crypto/stark/verifier_recursive/mod.rs
@@ -1,7 +1,7 @@
 use miden_air::ProcessorAir;
 use processor::crypto::RpoRandomCoin;
 use test_utils::{
-    collections::Vec,
+    collections::*,
     crypto::{MerkleStore, RandomCoin, Rpo256, RpoDigest},
     math::{fft, FieldElement, QuadExtension, StarkField, ToElements},
     Felt, VerifierError,
@@ -19,7 +19,7 @@ pub struct VerifierData {
     pub initial_stack: Vec<u64>,
     pub tape: Vec<u64>,
     pub store: MerkleStore,
-    pub advice_map: Vec<([u8; 32], Vec<Felt>)>,
+    pub advice_map: Vec<(RpoDigest, Vec<Felt>)>,
 }
 
 pub fn generate_advice_inputs(
@@ -46,8 +46,8 @@ pub fn generate_advice_inputs(
 
     // create AIR instance for the computation specified in the proof
     let air = ProcessorAir::new(proof.get_trace_info(), pub_inputs, proof.options().clone());
-
-    let mut public_coin: RpoRandomCoin = RpoRandomCoin::new(&public_coin_seed);
+    let seed_digest = Rpo256::hash_elements(&public_coin_seed);
+    let mut public_coin: RpoRandomCoin = RpoRandomCoin::new(seed_digest.into());
     let mut channel = VerifierChannel::new(&air, proof)?;
 
     // 1 ----- trace commitment -------------------------------------------------------------------
@@ -55,7 +55,7 @@ pub fn generate_advice_inputs(
 
     // reseed the coin with the commitment to the main trace segment
     public_coin.reseed(trace_commitments[0]);
-    tape.extend_from_slice(&digest_to_int_vec(&trace_commitments));
+    tape.extend_from_slice(&digest_to_int_vec(trace_commitments));
 
     // process auxiliary trace segments, to build a set of random elements for each segment
     let mut aux_trace_rand_elements = AuxTraceRandElements::<QuadExt>::new();
@@ -81,7 +81,7 @@ pub fn generate_advice_inputs(
     let _ood_main_trace_frame = ood_trace_frame.main_frame();
     let _ood_aux_trace_frame = ood_trace_frame.aux_frame();
 
-    tape.extend_from_slice(&to_int_vec(&ood_trace_frame.values()));
+    tape.extend_from_slice(&to_int_vec(ood_trace_frame.values()));
     public_coin.reseed(Rpo256::hash_elements(ood_trace_frame.values()));
 
     // read evaluations of composition polynomial columns
@@ -112,20 +112,13 @@ pub fn generate_advice_inputs(
     }
 
     // 5 ----- trace and constraint queries -------------------------------------------------------
-    // read proof-of-work nonce sent by the prover and update the public coin with it
-    let pow_nonce = channel.read_pow_nonce();
-    tape.extend_from_slice(&[pow_nonce]);
-    public_coin.reseed_with_int(pow_nonce);
-
-    // make sure the proof-of-work specified by the grinding factor is satisfied
-    if public_coin.leading_zeros() < air.options().grinding_factor() {
-        return Err(VerifierError::QuerySeedProofOfWorkVerificationFailed);
-    }
 
-    // draw pseudo-random query positions for the LDE domain from the public coin.
-    // this is needed in order to construct Partial Merkle Trees
+    // read proof-of-work nonce sent by the prover and draw pseudo-random query positions for
+    // the LDE domain from the public coin.
+    // This is needed in order to construct Partial Merkle Trees
+    let pow_nonce = channel.read_pow_nonce();
     let query_positions = public_coin
-        .draw_integers(air.options().num_queries(), air.lde_domain_size())
+        .draw_integers(air.options().num_queries(), air.lde_domain_size(), pow_nonce)
         .map_err(|_| VerifierError::RandomCoinError)?;
 
     // read advice maps and Merkle paths related to trace and constraint composition polynomial evaluations
@@ -158,14 +151,11 @@ pub fn generate_advice_inputs(
 pub fn digest_to_int_vec(digest: &[RpoDigest]) -> Vec<u64> {
     digest
         .iter()
-        .map(|digest| digest.as_elements().into_iter().map(|e| e.as_int()))
+        .map(|digest| digest.as_elements().iter().map(|e| e.as_int()))
         .flatten()
         .collect()
 }
 
 pub fn to_int_vec(ext_felts: &[QuadExt]) -> Vec<u64> {
-    QuadExt::slice_as_base_elements(ext_felts)
-        .into_iter()
-        .map(|e| e.as_int())
-        .collect()
+    QuadExt::slice_as_base_elements(ext_felts).iter().map(|e| e.as_int()).collect()
 }
diff --git a/stdlib/tests/math/ecgfp5/base_field.rs b/stdlib/tests/math/ecgfp5/base_field.rs
index 649124d420..c60889ff49 100644
--- a/stdlib/tests/math/ecgfp5/base_field.rs
+++ b/stdlib/tests/math/ecgfp5/base_field.rs
@@ -1,8 +1,4 @@
-use crate::build_test;
-use core::{
-    cmp::PartialEq,
-    ops::{Add, Div, Mul, Neg, Sub},
-};
+use core::ops::{Add, Div, Mul, Neg, Sub};
 use test_utils::{rand::rand_value, Felt, FieldElement, StarkField, ONE, ZERO};
 
 // Given an element v ∈ Z_q | q = 2^64 - 2^32 + 1, this routine raises
diff --git a/stdlib/tests/math/ecgfp5/group.rs b/stdlib/tests/math/ecgfp5/group.rs
index b1c649923f..a61a6fdee8 100644
--- a/stdlib/tests/math/ecgfp5/group.rs
+++ b/stdlib/tests/math/ecgfp5/group.rs
@@ -1,7 +1,6 @@
 use super::base_field::{bv_or, Ext5};
-use crate::build_test;
 use std::ops::Add;
-use test_utils::{test_case, Felt, StarkField, ONE, ZERO};
+use test_utils::{test_case, Felt, ONE, ZERO};
 
 #[derive(PartialEq, Eq, Copy, Clone, Debug)]
 pub struct ECExt5 {
diff --git a/stdlib/tests/math/ecgfp5/scalar_field.rs b/stdlib/tests/math/ecgfp5/scalar_field.rs
index 416eb1f6eb..8ded33e5b6 100644
--- a/stdlib/tests/math/ecgfp5/scalar_field.rs
+++ b/stdlib/tests/math/ecgfp5/scalar_field.rs
@@ -1,6 +1,5 @@
-use crate::build_test;
-use std::{cmp::PartialEq, ops::Mul};
-use test_utils::{rand::rand_value, StarkField};
+use std::ops::Mul;
+use test_utils::rand::rand_value;
 
 #[derive(Copy, Clone, Debug)]
 struct Scalar {
diff --git a/stdlib/tests/math/secp256k1/base_field.rs b/stdlib/tests/math/secp256k1/base_field.rs
index 69c997582f..10400dcf2e 100644
--- a/stdlib/tests/math/secp256k1/base_field.rs
+++ b/stdlib/tests/math/secp256k1/base_field.rs
@@ -1,5 +1,3 @@
-use crate::build_test;
-use core::cmp::PartialEq;
 use core::ops::{Add, Mul, Neg, Sub};
 use test_utils::rand::rand_array;
 
diff --git a/stdlib/tests/math/secp256k1/group.rs b/stdlib/tests/math/secp256k1/group.rs
index ec926ba7c3..9837028184 100644
--- a/stdlib/tests/math/secp256k1/group.rs
+++ b/stdlib/tests/math/secp256k1/group.rs
@@ -1,4 +1,3 @@
-use crate::build_test;
 use test_utils::test_case;
 
 // Wrapper types introduced for parameterized testing
@@ -64,26 +63,26 @@ fn test_secp256k1_point_doubling(src: Point, dst: Point) {
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
 
         push.0.0.0.0
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
         # --- end asserting X3 ---
 
@@ -92,26 +91,26 @@ fn test_secp256k1_point_doubling(src: Point, dst: Point) {
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
 
         push.0.0.0.0
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
         # --- end asserting Y3 ---
 
@@ -120,26 +119,26 @@ fn test_secp256k1_point_doubling(src: Point, dst: Point) {
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
 
         push.0.0.0.0
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
         # --- end asserting Z3 ---
     end
@@ -291,26 +290,26 @@ fn test_secp256k1_point_addition(src0: Point, src1: Point, dst: Point) {
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
 
         push.0.0.0.0
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
         # --- end asserting X3 ---
 
@@ -319,26 +318,26 @@ fn test_secp256k1_point_addition(src0: Point, src1: Point, dst: Point) {
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
 
         push.0.0.0.0
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
         # --- end asserting Y3 ---
 
@@ -347,26 +346,26 @@ fn test_secp256k1_point_addition(src0: Point, src1: Point, dst: Point) {
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
 
         push.0.0.0.0
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
         # --- end asserting Z3 ---
     end
@@ -517,26 +516,26 @@ fn test_secp256k1_point_multiplication(src_point: Point, scalar: FieldElement, d
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
 
         push.0.0.0.0
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
         # --- end asserting X ---
 
@@ -545,26 +544,26 @@ fn test_secp256k1_point_multiplication(src_point: Point, scalar: FieldElement, d
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
 
         push.0.0.0.0
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
         # --- end asserting Y ---
 
@@ -573,26 +572,26 @@ fn test_secp256k1_point_multiplication(src_point: Point, scalar: FieldElement, d
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
 
         push.0.0.0.0
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
         # --- end asserting Z ---
     end
@@ -693,26 +692,26 @@ fn test_secp256k1_generator_multiplication(scalar: FieldElement, point: Point) {
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
 
         push.0.0.0.0
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
         # --- end asserting X ---
 
@@ -721,26 +720,26 @@ fn test_secp256k1_generator_multiplication(scalar: FieldElement, point: Point) {
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
 
         push.0.0.0.0
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
         # --- end asserting Y ---
 
@@ -749,26 +748,26 @@ fn test_secp256k1_generator_multiplication(scalar: FieldElement, point: Point) {
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
 
         push.0.0.0.0
         movup.4
         mem_loadw
 
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
-        u32checked_eq.{}
+        u32assert eq.{}
         assert
         # --- end asserting Z ---
     end
diff --git a/stdlib/tests/math/secp256k1/scalar_field.rs b/stdlib/tests/math/secp256k1/scalar_field.rs
index 60d22dd0a9..2314dcc107 100644
--- a/stdlib/tests/math/secp256k1/scalar_field.rs
+++ b/stdlib/tests/math/secp256k1/scalar_field.rs
@@ -1,5 +1,3 @@
-use crate::build_test;
-use core::cmp::PartialEq;
 use core::ops::Mul;
 use test_utils::rand::rand_array;
 
diff --git a/stdlib/tests/math/u256_mod.rs b/stdlib/tests/math/u256_mod.rs
index 596eeff0be..14150c321c 100644
--- a/stdlib/tests/math/u256_mod.rs
+++ b/stdlib/tests/math/u256_mod.rs
@@ -1,4 +1,3 @@
-use crate::build_test;
 use num_bigint::BigUint;
 use test_utils::rand::rand_vector;
 
diff --git a/stdlib/tests/math/u64_mod.rs b/stdlib/tests/math/u64_mod.rs
index 1c5a7e5333..28ac1aa7c9 100644
--- a/stdlib/tests/math/u64_mod.rs
+++ b/stdlib/tests/math/u64_mod.rs
@@ -1,6 +1,6 @@
-use crate::build_test;
 use core::cmp;
-use test_utils::{proptest::prelude::*, rand::rand_value, TestError, U32_BOUND};
+use processor::ExecutionError;
+use test_utils::{proptest::prelude::*, rand::rand_value, Felt, TestError, U32_BOUND, ZERO};
 
 // ADDITION
 // ------------------------------------------------------------------------------------------------
@@ -25,58 +25,6 @@ fn wrapping_add() {
     test.expect_stack(&[c1, c0]);
 }
 
-#[test]
-fn checked_add() {
-    let source = "
-    use.std::math::u64
-    begin
-        exec.u64::checked_add
-    end";
-
-    // --- simple case ----------------------------------------------------------------------------
-    let test = build_test!(source, &[1, 2, 3, 4]);
-    test.expect_stack(&[6, 4]);
-
-    // --- random values --------------------------------------------------------------------------
-    // test using u16 values to ensure there's no overflow so the result is valid
-    let a0 = rand_value::<u64>() as u16 as u64;
-    let b0 = rand_value::<u64>() as u16 as u64;
-    let a1 = rand_value::<u64>() as u16 as u64;
-    let b1 = rand_value::<u64>() as u16 as u64;
-    let c0 = a0 + b0;
-    let c1 = a1 + b1;
-
-    let test = build_test!(source, &[a0, a1, b0, b1]);
-    test.expect_stack(&[c1, c0]);
-}
-
-#[test]
-fn checked_add_fail() {
-    let source = "
-    use.std::math::u64
-    begin
-        exec.u64::checked_add
-    end";
-
-    // result overflow
-    let a0 = rand_value::<u64>() as u32 as u64;
-    let b0 = rand_value::<u64>() as u32 as u64;
-    let a1 = u32::MAX as u64;
-    let b1 = u32::MAX as u64;
-
-    let test = build_test!(source, &[a0, a1, b0, b1]);
-    test.expect_error(TestError::ExecutionError("FailedAssertion"));
-
-    // u32 limb assertion failure
-    let a0 = rand_value::<u64>();
-    let b0 = rand_value::<u64>();
-    let a1 = U32_BOUND;
-    let b1 = U32_BOUND;
-
-    let test = build_test!(source, &[a0, a1, b0, b1]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-}
-
 #[test]
 fn overflowing_add() {
     let source = "
@@ -131,61 +79,6 @@ fn wrapping_sub() {
     test.expect_stack(&[c1, c0]);
 }
 
-#[test]
-fn checked_sub() {
-    let source = "
-    use.std::math::u64
-    begin
-        exec.u64::checked_sub
-    end";
-
-    // --- simple case ----------------------------------------------------------------------------
-    let test = build_test!(source, &[3, 4, 1, 2]);
-    test.expect_stack(&[2, 2]);
-
-    // --- random values --------------------------------------------------------------------------
-    let common = rand_value::<u64>();
-    let dif = rand_value::<u64>() as u16 as u64;
-
-    let a = common + dif;
-    let b = common;
-    let c = a - b;
-
-    let (a1, a0) = split_u64(a);
-    let (b1, b0) = split_u64(b);
-    let (c1, c0) = split_u64(c);
-
-    let test = build_test!(source, &[a0, a1, b0, b1]);
-    test.expect_stack(&[c1, c0]);
-}
-
-#[test]
-fn checked_sub_fail() {
-    let source = "
-        use.std::math::u64
-        begin
-            exec.u64::checked_sub
-        end";
-
-    // result underflow
-    let a0 = rand_value::<u64>() as u32 as u64;
-    let b0 = rand_value::<u64>() as u32 as u64;
-    let a1 = u16::MAX as u64;
-    let b1 = u32::MAX as u64;
-
-    let test = build_test!(source, &[a0, a1, b0, b1]);
-    test.expect_error(TestError::ExecutionError("FailedAssertion"));
-
-    // u32 limb assertion failure
-    let a0 = rand_value::<u64>();
-    let b0 = rand_value::<u64>();
-    let a1 = U32_BOUND;
-    let b1 = U32_BOUND;
-
-    let test = build_test!(source, &[a0, a1, b0, b1]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
-}
-
 #[test]
 fn overflowing_sub() {
     let a: u64 = rand_value();
@@ -257,75 +150,6 @@ fn wrapping_mul() {
     test.expect_stack(&[c1, c0]);
 }
 
-#[test]
-fn checked_mul() {
-    let source = "
-    use.std::math::u64
-    begin
-        exec.u64::checked_mul
-    end";
-
-    // --- simple cases ---------------------------------------------------------------------------
-    let test = build_test!(source, &[1, 2, 0, 0]);
-    test.expect_stack(&[0, 0]);
-
-    let test = build_test!(source, &[0, 0, 1, 2]);
-    test.expect_stack(&[0, 0]);
-
-    let test = build_test!(source, &[5, 1, 1, 0]);
-    test.expect_stack(&[1, 5]);
-
-    let test = build_test!(source, &[5, 2, 2, 0]);
-    test.expect_stack(&[4, 10]);
-
-    // --- random values --------------------------------------------------------------------------
-    let a0 = rand_value::<u64>() as u16 as u64;
-    let a1 = rand_value::<u64>() as u16 as u64;
-    let b0 = rand_value::<u64>() as u16 as u64;
-    let b1 = 0u64;
-    let c0 = a0 * b0;
-    let c1 = a1 * b0;
-
-    let test = build_test!(source, &[a0, a1, b0, b1]);
-    test.expect_stack(&[c1, c0]);
-}
-
-#[test]
-fn checked_mul_fail() {
-    let source = "
-    use.std::math::u64
-    begin
-        exec.u64::checked_mul
-    end";
-
-    // u32 limb assertion failure
-    for i in 0..4 {
-        let mut stack_init = [1, 2, 3, 4];
-        stack_init[i] = U32_BOUND;
-        let test = build_test!(source, &stack_init);
-        test.expect_error(TestError::ExecutionError("NotU32Value"));
-    }
-
-    // Higher bits assertion failure (a_hi * b_hi != 0)
-
-    let a0 = rand_value::<u64>() as u16 as u64;
-    let a1 = 2u64;
-    let b0 = rand_value::<u64>() as u16 as u64;
-    let b1 = 3u64;
-
-    let test = build_test!(source, &[a0, a1, b0, b1]);
-    test.expect_error(TestError::ExecutionError("FailedAssertion"));
-
-    // result overflow
-    let a0 = rand_value::<u64>() as u32 as u64;
-    let a1 = u16::MAX as u64 + rand_value::<u64>() as u16 as u64;
-    let b0 = u16::MAX as u64 + rand_value::<u64>() as u16 as u64;
-    let b1 = 0u64;
-
-    let test = build_test!(source, &[a0, a1, b0, b1]);
-    test.expect_error(TestError::ExecutionError("FailedAssertion"));
-}
-
 #[test]
 fn overflowing_mul() {
     let source = "
@@ -372,7 +196,7 @@ fn unchecked_lt() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_lt
+            exec.u64::lt
         end";
 
     // a = 0, b = 0
@@ -390,7 +214,7 @@ fn unchecked_lte() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_lte
+            exec.u64::lte
         end";
 
     // a = 0, b = 0
@@ -418,7 +242,7 @@ fn unchecked_gt() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_gt
+            exec.u64::gt
         end";
 
     // a = 0, b = 0
@@ -436,7 +260,7 @@ fn unchecked_gte() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_gte
+            exec.u64::gte
         end";
 
     // a = 0, b = 0
@@ -464,7 +288,7 @@ fn unchecked_min() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_min
+            exec.u64::min
         end";
 
     // a = 0, b = 0
@@ -483,7 +307,7 @@ fn unchecked_max() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_max
+            exec.u64::max
         end";
 
     // a = 0, b = 0
@@ -501,7 +325,7 @@ fn unchecked_eq() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_eq
+            exec.u64::eq
         end";
 
     // a = 0, b = 0
@@ -528,7 +352,7 @@ fn unchecked_neq() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_neq
+            exec.u64::neq
         end";
 
     // a = 0, b = 0
@@ -555,7 +379,7 @@ fn unchecked_eqz() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_eqz
+            exec.u64::eqz
         end";
 
     // a = 0
@@ -584,7 +408,7 @@ fn unchecked_div() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_div
+            exec.u64::div
         end";
 
     let (a1, a0) = split_u64(a);
@@ -601,23 +425,6 @@ fn unchecked_div() {
     test.expect_stack(&[d1, d0]);
 }
 
-#[test]
-fn checked_div_fail() {
-    let source = "
-        use.std::math::u64
-        begin
-            exec.u64::checked_div
-        end";
-
-    // u32 limb assertion failure
-    for i in 0..4 {
-        let mut stack_init = [1, 2, 3, 4];
-        stack_init[i] = U32_BOUND;
-        let test = build_test!(source, &stack_init);
-        test.expect_error(TestError::ExecutionError("NotU32Value"));
-    }
-}
-
 // MODULO OPERATION
 // ------------------------------------------------------------------------------------------------
 
@@ -630,7 +437,7 @@ fn unchecked_mod() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_mod
+            exec.u64::mod
         end";
 
     let (a1, a0) = split_u64(a);
@@ -647,23 +454,6 @@ fn unchecked_mod() {
     test.expect_stack(&[d1, d0]);
 }
 
-#[test]
-fn checked_mod_fail() {
-    let source = "
-        use.std::math::u64
-        begin
-            exec.u64::checked_mod
-        end";
-
-    // u32 limb assertion failure
-    for i in 0..4 {
-        let mut stack_init = [1, 2, 3, 4];
-        stack_init[i] = U32_BOUND;
-        let test = build_test!(source, &stack_init);
-        test.expect_error(TestError::ExecutionError("NotU32Value"));
-    }
-}
-
 // DIVMOD OPERATION
 // ------------------------------------------------------------------------------------------------
 
@@ -677,7 +467,7 @@ fn unchecked_divmod() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_divmod
+            exec.u64::divmod
         end";
 
     let (a1, a0) = split_u64(a);
@@ -689,23 +479,6 @@ fn unchecked_divmod() {
     test.expect_stack(&[r1, r0, q1, q0]);
 }
 
-#[test]
-fn checked_divmod_fail() {
-    let source = "
-        use.std::math::u64
-        begin
-            exec.u64::checked_divmod
-        end";
-
-    // u32 limb assertion failure
-    for i in 0..4 {
-        let mut stack_init = [1, 2, 3, 4];
-        stack_init[i] = U32_BOUND;
-        let test = build_test!(source, &stack_init);
-        test.expect_error(TestError::ExecutionError("NotU32Value"));
-    }
-}
-
 // BITWISE OPERATIONS
 // ------------------------------------------------------------------------------------------------
 
@@ -718,7 +491,7 @@ fn checked_and() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::checked_and
+            exec.u64::and
         end";
 
     let (a1, a0) = split_u64(a);
@@ -740,11 +513,11 @@ fn checked_and_fail() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::checked_and
+            exec.u64::and
         end";
 
     let test = build_test!(source, &[a0, a1, b0, b1]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(Felt::new(b0), ZERO)));
 }
 
 #[test]
@@ -756,7 +529,7 @@ fn checked_or() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::checked_or
+            exec.u64::or
         end";
 
     let (a1, a0) = split_u64(a);
@@ -778,11 +551,11 @@ fn checked_or_fail() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::checked_or
+            exec.u64::or
         end";
 
     let test = build_test!(source, &[a0, a1, b0, b1]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(Felt::new(b0), ZERO)));
 }
 
 #[test]
@@ -794,7 +567,7 @@ fn checked_xor() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::checked_xor
+            exec.u64::xor
         end";
 
     let (a1, a0) = split_u64(a);
@@ -816,19 +589,20 @@ fn checked_xor_fail() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::checked_xor
+            exec.u64::xor
         end";
 
     let test = build_test!(source, &[a0, a1, b0, b1]);
-    test.expect_error(TestError::ExecutionError("NotU32Value"));
+    test.expect_error(TestError::ExecutionError(ExecutionError::NotU32Value(Felt::new(b0), ZERO)));
 }
 
 #[test]
+#[ignore]
 fn unchecked_shl() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_shl
+            exec.u64::shl
         end";
 
     // shift by 0
@@ -878,7 +652,7 @@ fn unchecked_shr() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_shr
+            exec.u64::shr
         end";
 
     // shift by 0
@@ -929,141 +703,12 @@ fn unchecked_shr() {
     build_test!(source, &[5, a0, a1, b as u64]).expect_stack(&[c1, c0, 5]);
 }
 
-#[test]
-fn overflowing_shl() {
-    let source = "
-        use.std::math::u64
-        begin
-            exec.u64::overflowing_shl
-        end";
-
-    // shl u64 to u128 to avoid overflowing
-    let shl_to_u128 = |a: u64, b: u32| -> u128 { (a as u128) << b };
-
-    // shift by 0
-    let a: u64 = rand_value();
-    let (a1, a0) = split_u64(a);
-    let b: u32 = 0;
-
-    let c = shl_to_u128(a, b);
-    let (d1, d0, c1, c0) = split_u128(c);
-
-    build_test!(source, &[5, a0, a1, b as u64]).expect_stack(&[d1, d0, c1, c0, 5]);
-
-    // shift by 31 (max lower limb of b)
-    let b: u32 = 31;
-    let c = shl_to_u128(a, b);
-    let (d1, d0, c1, c0) = split_u128(c);
-
-    build_test!(source, &[5, a0, a1, b as u64]).expect_stack(&[d1, d0, c1, c0, 5]);
-
-    // shift by 32 (min for upper limb of b)
-    let a = 1_u64;
-    let (a1, a0) = split_u64(a);
-    let b: u32 = 32;
-    let c = shl_to_u128(a, b);
-    let (d1, d0, c1, c0) = split_u128(c);
-
-    build_test!(source, &[5, a0, a1, b as u64]).expect_stack(&[d1, d0, c1, c0, 5]);
-
-    // shift by 33
-    let a = 1_u64;
-    let (a1, a0) = split_u64(a);
-    let b: u32 = 33;
-    let c = shl_to_u128(a, b);
-    let (d1, d0, c1, c0) = split_u128(c);
-
-    build_test!(source, &[5, a0, a1, b as u64]).expect_stack(&[d1, d0, c1, c0, 5]);
-
-    // shift 64 by 58
-    let a = 64_u64;
-    let (a1, a0) = split_u64(a);
-    let b: u32 = 58;
-    let c = shl_to_u128(a, b);
-    let (d1, d0, c1, c0) = split_u128(c);
-
-    build_test!(source, &[5, a0, a1, b as u64]).expect_stack(&[d1, d0, c1, c0, 5]);
-}
-
-#[test]
-fn overflowing_shr() {
-    let source = "
-        use.std::math::u64
-        begin
-            exec.u64::overflowing_shr
-        end";
-
-    // get bits shifted out and return 0 if b is 0 or 64
-    let bits_shifted_out = |a: u64, b: u32| -> u64 {
-        if b % 64 == 0 {
-            0_u64
-        } else {
-            a.wrapping_shl(64 - b)
-        }
-    };
-
-    // shift by 0
-    let a: u64 = rand_value();
-    let (a1, a0) = split_u64(a);
-    let b: u32 = 0;
-
-    let c = a.wrapping_shr(b);
-    let (c1, c0) = split_u64(c);
-    let d = bits_shifted_out(a, b);
-    let (d1, d0) = split_u64(d);
-
-    build_test!(source, &[5, a0, a1, b as u64]).expect_stack(&[d1, d0, c1, c0, 5]);
-
-    // shift by 31 (max lower limb of b)
-    let b: u32 = 31;
-
-    let c = a.wrapping_shr(b);
-    let (c1, c0) = split_u64(c);
-    let d = bits_shifted_out(a, b);
-    let (d1, d0) = split_u64(d);
-
-    build_test!(source, &[5, a0, a1, b as u64]).expect_stack(&[d1, d0, c1, c0, 5]);
-
-    // shift by 32 (min for upper limb of b)
-    let a = 1_u64;
-    let (a1, a0) = split_u64(a);
-    let b: u32 = 32;
-    let c = a.wrapping_shr(b);
-    let (c1, c0) = split_u64(c);
-    let d = bits_shifted_out(a, b);
-    let (d1, d0) = split_u64(d);
-
-    build_test!(source, &[5, a0, a1, b as u64]).expect_stack(&[d1, d0, c1, c0, 5]);
-
-    // shift by 33
-    let a = 1_u64;
-    let (a1, a0) = split_u64(a);
-    let b: u32 = 33;
-    let c = a.wrapping_shr(b);
-    let (c1, c0) = split_u64(c);
-    let d = bits_shifted_out(a, b);
-    let (d1, d0) = split_u64(d);
-
-    build_test!(source, &[5, a0, a1, b as u64]).expect_stack(&[d1, d0, c1, c0, 5]);
-
-    // shift 64 by 58
-    let a = 64_u64;
-    let (a1, a0) = split_u64(a);
-    let b: u32 = 58;
-    let c = a.wrapping_shr(b);
-    let (c1, c0) = split_u64(c);
-    let d = bits_shifted_out(a, b);
-    let (d1, d0) = split_u64(d);
-
-    build_test!(source, &[5, a0, a1, b as u64]).expect_stack(&[d1, d0, c1, c0, 5]);
-}
-
 #[test]
 fn unchecked_rotl() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_rotl
+            exec.u64::rotl
         end";
 
     // shift by 0
@@ -1113,7 +758,7 @@ fn unchecked_rotr() {
     let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_rotr
+            exec.u64::rotr
         end";
 
     // shift by 0
@@ -1158,6 +803,66 @@ fn unchecked_rotr() {
     build_test!(source, &[5, a0, a1, b as u64]).expect_stack(&[c1, c0, 5]);
 }
 
+#[test]
+fn clz() {
+    let source = "
+    use.std::math::u64
+    begin
+        exec.u64::clz
+    end";
+
+    build_test!(source, &[0, 0]).expect_stack(&[64]);
+    build_test!(source, &[492665065, 0]).expect_stack(&[35]);
+    build_test!(source, &[3941320520, 0]).expect_stack(&[32]);
+    build_test!(source, &[3941320520, 492665065]).expect_stack(&[3]);
+    build_test!(source, &[492665065, 492665065]).expect_stack(&[3]);
+}
+
+#[test]
+fn ctz() {
+    let source = "
+    use.std::math::u64
+    begin
+        exec.u64::ctz
+    end";
+
+    build_test!(source, &[0, 0]).expect_stack(&[64]);
+    build_test!(source, &[0, 3668265216]).expect_stack(&[40]);
+    build_test!(source, &[0, 3668265217]).expect_stack(&[32]);
+    build_test!(source, &[3668265216, 3668265217]).expect_stack(&[8]);
+    build_test!(source, &[3668265216, 3668265216]).expect_stack(&[8]);
+}
+
+#[test]
+fn clo() {
+    let source = "
+    use.std::math::u64
+    begin
+        exec.u64::clo
+    end";
+
+    build_test!(source, &[4294967295, 4294967295]).expect_stack(&[64]);
+    build_test!(source, &[4278190080, 4294967295]).expect_stack(&[40]);
+    build_test!(source, &[0, 4294967295]).expect_stack(&[32]);
+    build_test!(source, &[0, 4278190080]).expect_stack(&[8]);
+    build_test!(source, &[4278190080, 4278190080]).expect_stack(&[8]);
+}
+
+#[test]
+fn cto() {
+    let source = "
+    use.std::math::u64
+    begin
+        exec.u64::cto
+    end";
+
+    build_test!(source, &[4294967295, 4294967295]).expect_stack(&[64]);
+    build_test!(source, &[4294967295, 255]).expect_stack(&[40]);
+    build_test!(source, &[4294967295, 0]).expect_stack(&[32]);
+    build_test!(source, &[255, 0]).expect_stack(&[8]);
+    build_test!(source, &[255, 255]).expect_stack(&[8]);
+}
+
 // RANDOMIZED TESTS
 // ================================================================================================
 
@@ -1172,7 +877,7 @@ proptest! {
         let source = "
             use.std::math::u64
             begin
-                exec.u64::unchecked_lt
+                exec.u64::lt
             end";
 
         build_test!(source, &[a0, a1, b0, b1]).prop_expect_stack(&[c])?;
@@ -1188,7 +893,7 @@ proptest! {
         let source = "
             use.std::math::u64
             begin
-                exec.u64::unchecked_gt
+                exec.u64::gt
             end";
 
         build_test!(source, &[a0, a1, b0, b1]).prop_expect_stack(&[c])?;
@@ -1204,7 +909,7 @@ proptest! {
         let source = "
             use.std::math::u64
             begin
-                exec.u64::unchecked_min
+                exec.u64::min
             end";
 
         build_test!(source, &[a0, a1, b0, b1]).prop_expect_stack(&[c1, c0])?;
@@ -1220,7 +925,7 @@ proptest! {
         let source = "
             use.std::math::u64
             begin
-                exec.u64::unchecked_max
+                exec.u64::max
             end";
 
         build_test!(source, &[a0, a1, b0, b1]).prop_expect_stack(&[c1, c0])?;
@@ -1238,7 +943,7 @@ proptest! {
         let source = "
             use.std::math::u64
             begin
-                exec.u64::unchecked_div
+                exec.u64::div
             end";
 
         build_test!(source, &[a0, a1, b0, b1]).prop_expect_stack(&[c1, c0])?;
@@ -1256,12 +961,13 @@ proptest! {
         let source = "
             use.std::math::u64
             begin
-                exec.u64::unchecked_mod
+                exec.u64::mod
             end";
 
         build_test!(source, &[a0, a1, b0, b1]).prop_expect_stack(&[c1, c0])?;
     }
 
+    #[test]
     fn shl_proptest(a in any::<u64>(), b in 0_u32..64) {
 
         let c = a.wrapping_shl(b);
@@ -1272,7 +978,7 @@ proptest! {
         let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_shl
+            exec.u64::shl
         end";
 
         build_test!(source, &[5, a0, a1, b as u64]).prop_expect_stack(&[c1, c0, 5])?;
@@ -1289,7 +995,7 @@ proptest! {
         let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_shr
+            exec.u64::shr
         end";
 
         build_test!(source, &[5, a0, a1, b as u64]).prop_expect_stack(&[c1, c0, 5])?;
@@ -1306,7 +1012,7 @@ proptest! {
         let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_rotl
+            exec.u64::rotl
         end";
 
         build_test!(source, &[5, a0, a1, b as u64]).prop_expect_stack(&[c1, c0, 5])?;
@@ -1323,11 +1029,71 @@ proptest! {
         let source = "
         use.std::math::u64
         begin
-            exec.u64::unchecked_rotr
+            exec.u64::rotr
         end";
 
         build_test!(source, &[5, a0, a1, b as u64]).prop_expect_stack(&[c1, c0, 5])?;
     }
+
+    #[test]
+    fn clz_proptest(a in any::<u64>()) {
+
+        let (a1, a0) = split_u64(a);
+        let c = a.leading_zeros() as u64;
+
+        let source = "
+            use.std::math::u64
+            begin
+                exec.u64::clz
+            end";
+
+        build_test!(source, &[a0, a1]).prop_expect_stack(&[c])?;
+    }
+
+    #[test]
+    fn ctz_proptest(a in any::<u64>()) {
+
+        let (a1, a0) = split_u64(a);
+        let c = a.trailing_zeros() as u64;
+
+        let source = "
+            use.std::math::u64
+            begin
+                exec.u64::ctz
+            end";
+
+        build_test!(source, &[a0, a1]).prop_expect_stack(&[c])?;
+    }
+
+    #[test]
+    fn clo_proptest(a in any::<u64>()) {
+
+        let (a1, a0) = split_u64(a);
+        let c = a.leading_ones() as u64;
+
+        let source = "
+            use.std::math::u64
+            begin
+                exec.u64::clo
+            end";
+
+        build_test!(source, &[a0, a1]).prop_expect_stack(&[c])?;
+    }
+
+    #[test]
+    fn cto_proptest(a in any::<u64>()) {
+
+        let (a1, a0) = split_u64(a);
+        let c = a.trailing_ones() as u64;
+
+        let source = "
+            use.std::math::u64
+            begin
+                exec.u64::cto
+            end";
+
+        build_test!(source, &[a0, a1]).prop_expect_stack(&[c])?;
+    }
 }
 
 // HELPER FUNCTIONS
diff --git a/stdlib/tests/mem/mod.rs b/stdlib/tests/mem/mod.rs
index 24d5845273..fd89168d2b 100644
--- a/stdlib/tests/mem/mod.rs
+++ b/stdlib/tests/mem/mod.rs
@@ -1,4 +1,4 @@
-use processor::{DefaultHost, ProcessState};
+use processor::{ContextId, DefaultHost, ProcessState};
 use test_utils::{
     build_expected_hash, build_expected_perm, stack_to_ints, ExecutionOptions, Process,
     StackInputs, ONE, ZERO,
@@ -36,17 +36,57 @@ fn test_memcopy() {
     );
     process.execute(&program).unwrap();
 
-    assert_eq!(process.get_mem_value(0, 1000), Some([ZERO, ZERO, ZERO, ONE]), "Address 1000");
-    assert_eq!(process.get_mem_value(0, 1001), Some([ZERO, ZERO, ONE, ZERO]), "Address 1001");
-    assert_eq!(process.get_mem_value(0, 1002), Some([ZERO, ZERO, ONE, ONE]), "Address 1002");
-    assert_eq!(process.get_mem_value(0, 1003), Some([ZERO, ONE, ZERO, ZERO]), "Address 1003");
-    assert_eq!(process.get_mem_value(0, 1004), Some([ZERO, ONE, ZERO, ONE]), "Address 1004");
-
-    assert_eq!(process.get_mem_value(0, 2000), Some([ZERO, ZERO, ZERO, ONE]), "Address 2000");
-    assert_eq!(process.get_mem_value(0, 2001), Some([ZERO, ZERO, ONE, ZERO]), "Address 2001");
-    assert_eq!(process.get_mem_value(0, 2002), Some([ZERO, ZERO, ONE, ONE]), "Address 2002");
-    assert_eq!(process.get_mem_value(0, 2003), Some([ZERO, ONE, ZERO, ZERO]), "Address 2003");
-    assert_eq!(process.get_mem_value(0, 2004), Some([ZERO, ONE, ZERO, ONE]), "Address 2004");
+    assert_eq!(
+        process.get_mem_value(ContextId::root(), 1000),
+        Some([ZERO, ZERO, ZERO, ONE]),
+        "Address 1000"
+    );
+    assert_eq!(
+        process.get_mem_value(ContextId::root(), 1001),
+        Some([ZERO, ZERO, ONE, ZERO]),
+        "Address 1001"
+    );
+    assert_eq!(
+        process.get_mem_value(ContextId::root(), 1002),
+        Some([ZERO, ZERO, ONE, ONE]),
+        "Address 1002"
+    );
+    assert_eq!(
+        process.get_mem_value(ContextId::root(), 1003),
+        Some([ZERO, ONE, ZERO, ZERO]),
+        "Address 1003"
+    );
+    assert_eq!(
+        process.get_mem_value(ContextId::root(), 1004),
+        Some([ZERO, ONE, ZERO, ONE]),
+        "Address 1004"
+    );
+
+    assert_eq!(
+        process.get_mem_value(ContextId::root(), 2000),
+        Some([ZERO, ZERO, ZERO, ONE]),
+        "Address 2000"
+    );
+    assert_eq!(
+        process.get_mem_value(ContextId::root(), 2001),
+        Some([ZERO, ZERO, ONE, ZERO]),
+        "Address 2001"
+    );
+    assert_eq!(
+        process.get_mem_value(ContextId::root(), 2002),
+        Some([ZERO, ZERO, ONE, ONE]),
+        "Address 2002"
+    );
+    assert_eq!(
+        process.get_mem_value(ContextId::root(), 2003),
+        Some([ZERO, ONE, ZERO, ZERO]),
+        "Address 2003"
+    );
+    assert_eq!(
+        process.get_mem_value(ContextId::root(), 2004),
+        Some([ZERO, ONE, ZERO, ONE]),
+        "Address 2004"
+    );
 }
 
 #[test]
@@ -171,7 +211,7 @@ fn test_pipe_preimage_to_memory_invalid_preimage() {
     let data = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
     let mut advice_stack = stack_to_ints(&build_expected_hash(data));
     advice_stack.reverse();
-    advice_stack[0] = advice_stack[0] + 1; // corrupt the expected hash
+    advice_stack[0] += 1; // corrupt the expected hash
     advice_stack.extend(data);
     let res = build_test!(three_words, operand_stack, &advice_stack).execute();
     assert!(res.is_err());
diff --git a/stdlib/tests/sys/mod.rs b/stdlib/tests/sys/mod.rs
index 7479e4986e..50aee4de8c 100644
--- a/stdlib/tests/sys/mod.rs
+++ b/stdlib/tests/sys/mod.rs
@@ -1,4 +1,3 @@
-use crate::build_test;
 use test_utils::{proptest::prelude::*, rand::rand_vector, STACK_TOP_SIZE};
 
 #[test]
diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml
index a7a816f055..4c070bd809 100644
--- a/test-utils/Cargo.toml
+++ b/test-utils/Cargo.toml
@@ -16,14 +16,14 @@ default = ["std"]
 std = ["assembly/std", "processor/std", "prover/std", "verifier/std", "vm-core/std", "winter-prover/std"]
 
 [dependencies]
-assembly = { package = "miden-assembly", path = "../assembly", version = "0.7", default-features = false }
-processor = { package = "miden-processor", path = "../processor", version = "0.7", features = ["internals"], default-features = false }
-prover = { package = "miden-prover", path = "../prover", version = "0.7", default-features = false }
+assembly = { package = "miden-assembly", path = "../assembly", version = "0.8", default-features = false }
+processor = { package = "miden-processor", path = "../processor", version = "0.8", features = ["internals"], default-features = false }
+prover = { package = "miden-prover", path = "../prover", version = "0.8", default-features = false }
 test-case = "3.2"
-verifier = { package = "miden-verifier", path = "../verifier", version = "0.7", default-features = false }
-vm-core = { package = "miden-core", path = "../core", version = "0.7", default-features = false }
-winter-prover = { package = "winter-prover", version = "0.6", default-features = false }
+verifier = { package = "miden-verifier", path = "../verifier", version = "0.8", default-features = false }
+vm-core = { package = "miden-core", path = "../core", version = "0.8", default-features = false }
+winter-prover = { package = "winter-prover", version = "0.8", default-features = false }
 
 [target.'cfg(not(target_family = "wasm"))'.dependencies]
-proptest = { version = "1.3"  }
-rand-utils = { package = "winter-rand-utils", version = "0.6" }
+proptest = "1.3"
+rand-utils = { package = "winter-rand-utils", version = "0.8" }
diff --git a/test-utils/src/crypto.rs b/test-utils/src/crypto.rs
index 6aded79b56..c4ffe45c6c 100644
--- a/test-utils/src/crypto.rs
+++ b/test-utils/src/crypto.rs
@@ -1,4 +1,4 @@
-use super::{Felt, Vec, Word, ZERO};
+use super::{collections::*, Felt, Word, ZERO};
 
 // RE-EXPORTS
 // ================================================================================================
@@ -7,8 +7,8 @@ pub use vm_core::crypto::{
     dsa::*,
     hash::{Rpo256, RpoDigest},
     merkle::{
-        EmptySubtreeRoots, MerkleError, MerklePath, MerkleStore, MerkleTree, Mmr, MmrPeaks,
-        NodeIndex, PartialMerkleTree, SimpleSmt, TieredSmt,
+        EmptySubtreeRoots, LeafIndex, MerkleError, MerklePath, MerkleStore, MerkleTree, Mmr,
+        MmrPeaks, NodeIndex, PartialMerkleTree, SimpleSmt, Smt,
     },
 };
 
diff --git a/test-utils/src/lib.rs b/test-utils/src/lib.rs
index 255fc021a2..d0aa78f9b6 100644
--- a/test-utils/src/lib.rs
+++ b/test-utils/src/lib.rs
@@ -9,23 +9,24 @@ extern crate alloc;
 #[cfg(not(target_family = "wasm"))]
 use proptest::prelude::{Arbitrary, Strategy};
 
-use vm_core::chiplets::hasher::apply_permutation;
-use vm_core::utils::{collections::Vec, string::String};
+use vm_core::{
+    chiplets::hasher::apply_permutation,
+    utils::{collections::*, string::*},
+};
 
 // EXPORTS
 // ================================================================================================
 
-pub use vm_core::chiplets::hasher::{hash_elements, STATE_WIDTH};
-
 pub use assembly::{Library, MaslLibrary};
 pub use processor::{
-    AdviceInputs, AdviceProvider, DefaultHost, ExecutionError, ExecutionOptions, ExecutionTrace,
-    Process, ProcessState, StackInputs, VmStateIterator,
+    AdviceInputs, AdviceProvider, ContextId, DefaultHost, ExecutionError, ExecutionOptions,
+    ExecutionTrace, Process, ProcessState, StackInputs, VmStateIterator,
 };
 pub use prover::{prove, MemAdviceProvider, ProvingOptions};
 pub use test_case::test_case;
-pub use verifier::{ProgramInfo, VerifierError};
+pub use verifier::{verify, AcceptableOptions, ProgramInfo, VerifierError};
 pub use vm_core::{
+    chiplets::hasher::{hash_elements, STATE_WIDTH},
     stack::STACK_TOP_SIZE,
     utils::{collections, group_slice_elements, group_vector_elements, IntoBytes, ToElements},
     Felt, FieldElement, Program, StarkField, Word, EMPTY_WORD, ONE, WORD_SIZE, ZERO,
@@ -49,8 +50,8 @@ pub mod crypto;
 pub mod rand;
 
 mod test_builders;
-pub use test_builders::*;
 
+use assembly::AssemblyError;
 #[cfg(not(target_family = "wasm"))]
 pub use proptest;
 
@@ -72,9 +73,10 @@ pub const U32_BOUND: u64 = u32::MAX as u64 + 1;
 /// `Test::expect_error` will try to either compile or execute the test data, according to the
 /// provided TestError variant. Then it will validate that the resulting error contains the
 /// TestError variant's string slice.
-pub enum TestError<'a> {
-    AssemblyError(&'a str),
-    ExecutionError(&'a str),
+#[derive(Debug, PartialEq)]
+pub enum TestError {
+    AssemblyError(AssemblyError),
+    ExecutionError(ExecutionError),
 }
 
 /// This is a container for the data required to run tests, which allows for running several
@@ -118,28 +120,19 @@ impl Test {
     // TEST METHODS
     // --------------------------------------------------------------------------------------------
 
-    /// Asserts that running the test for the expected TestError variant will result in an error
-    /// that contains the TestError's error substring in its error message.
+    /// Asserts that running the test will result in the expected error.
     #[cfg(all(feature = "std", not(target_family = "wasm")))]
-    pub fn expect_error(&self, error: TestError) {
-        match error {
-            TestError::AssemblyError(substr) => {
-                assert_eq!(
-                    std::panic::catch_unwind(|| self.compile())
-                        .err()
-                        .and_then(|a| { a.downcast_ref::<String>().map(|s| s.contains(substr)) }),
-                    Some(true)
-                );
+    pub fn expect_error(&self, expected_error: TestError) {
+        match expected_error {
+            TestError::AssemblyError(assembly_error) => {
+                let actual_error = self.compile().err().unwrap();
+                assert_eq!(assembly_error, actual_error);
             }
-            TestError::ExecutionError(substr) => {
-                assert_eq!(
-                    std::panic::catch_unwind(|| self.execute().unwrap())
-                        .err()
-                        .and_then(|a| { a.downcast_ref::<String>().map(|s| s.contains(substr)) }),
-                    Some(true)
-                );
+            TestError::ExecutionError(execution_error) => {
+                let actual_error = self.execute().err().unwrap();
+                assert_eq!(execution_error, actual_error);
             }
-        }
+        };
     }
 
     /// Builds a final stack from the provided stack-ordered array and asserts that executing the
@@ -160,7 +153,7 @@ impl Test {
         expected_mem: &[u64],
     ) {
         // compile the program
-        let program = self.compile();
+        let program = self.compile().expect("Failed to compile test source.");
         let host = DefaultHost::new(MemAdviceProvider::from(self.advice_inputs.clone()));
 
         // execute the test
@@ -175,7 +168,8 @@ impl Test {
         // validate the memory state
         for data in expected_mem.chunks(WORD_SIZE) {
             // Main memory is zeroed by default, use zeros as a fallback when unwrap to make testing easier
-            let mem_state = process.get_mem_value(0, mem_start_addr).unwrap_or(EMPTY_WORD);
+            let mem_state =
+                process.get_mem_value(ContextId::root(), mem_start_addr).unwrap_or(EMPTY_WORD);
 
             let mem_state = stack_to_ints(&mem_state);
             assert_eq!(
@@ -207,8 +201,8 @@ impl Test {
     // UTILITY METHODS
     // --------------------------------------------------------------------------------------------
 
-    /// Compiles a test's source and returns the resulting Program.
-    pub fn compile(&self) -> Program {
+    /// Compiles a test's source and returns the resulting Program or Assembly error.
+    pub fn compile(&self) -> Result<Program, AssemblyError> {
         let assembler = assembly::Assembler::default()
             .with_debug_mode(self.in_debug_mode)
             .with_libraries(self.libraries.iter())
@@ -219,13 +213,12 @@ impl Test {
             None => assembler,
         }
         .compile(&self.source)
-        .expect("Failed to compile test source.")
     }
 
     /// Compiles the test's source to a Program and executes it with the tests inputs. Returns a
     /// resulting execution trace or error.
     pub fn execute(&self) -> Result<ExecutionTrace, ExecutionError> {
-        let program = self.compile();
+        let program = self.compile().expect("Failed to compile test source.");
         let host = DefaultHost::new(MemAdviceProvider::from(self.advice_inputs.clone()));
         processor::execute(&program, self.stack_inputs.clone(), host, ExecutionOptions::default())
     }
@@ -235,7 +228,7 @@ impl Test {
     pub fn execute_process(
         &self,
     ) -> Result<Process<DefaultHost<MemAdviceProvider>>, ExecutionError> {
-        let program = self.compile();
+        let program = self.compile().expect("Failed to compile test source.");
         let host = DefaultHost::new(MemAdviceProvider::from(self.advice_inputs.clone()));
         let mut process = Process::new(
             program.kernel().clone(),
@@ -252,7 +245,7 @@ impl Test {
     /// is true, this function will force a failure by modifying the first output.
     pub fn prove_and_verify(&self, pub_inputs: Vec<u64>, test_fail: bool) {
         let stack_inputs = StackInputs::try_from_values(pub_inputs).unwrap();
-        let program = self.compile();
+        let program = self.compile().expect("Failed to compile test source.");
         let host = DefaultHost::new(MemAdviceProvider::from(self.advice_inputs.clone()));
         let (mut stack_outputs, proof) =
             prover::prove(&program, stack_inputs.clone(), host, ProvingOptions::default()).unwrap();
@@ -271,7 +264,7 @@ impl Test {
     /// VmStateIterator that allows us to iterate through each clock cycle and inspect the process
     /// state.
     pub fn execute_iter(&self) -> VmStateIterator {
-        let program = self.compile();
+        let program = self.compile().expect("Failed to compile test source.");
         let host = DefaultHost::new(MemAdviceProvider::from(self.advice_inputs.clone()));
         processor::execute_iter(&program, self.stack_inputs.clone(), host)
     }
diff --git a/verifier/Cargo.toml b/verifier/Cargo.toml
index 32b47f06b8..9fcb1bc966 100644
--- a/verifier/Cargo.toml
+++ b/verifier/Cargo.toml
@@ -1,15 +1,16 @@
 [package]
 name = "miden-verifier"
-version = "0.7.0"
+version = "0.8.0"
 description="Miden VM execution verifier"
 authors = ["miden contributors"]
 readme="README.md"
 license = "MIT"
 repository = "https://github.com/0xPolygonMiden/miden-vm"
+documentation = "https://docs.rs/miden-verifier/0.8.0"
 categories = ["cryptography", "no-std"]
 keywords = ["miden", "stark", "verifier", "zkp"]
 edition = "2021"
-rust-version = "1.67"
+rust-version = "1.73"
 
 [lib]
 bench = false
@@ -20,6 +21,7 @@ default = ["std"]
 std = ["air/std", "vm-core/std", "winter-verifier/std"]
 
 [dependencies]
-air = { package = "miden-air", path = "../air", version = "0.7", default-features = false }
-vm-core = { package = "miden-core", path = "../core", version = "0.7", default-features = false }
-winter-verifier = { package = "winter-verifier", version = "0.6", default-features = false }
+air = { package = "miden-air", path = "../air", version = "0.8", default-features = false }
+tracing = { version = "0.1", default-features = false, features = ["attributes"] }
+vm-core = { package = "miden-core", path = "../core", version = "0.8", default-features = false }
+winter-verifier = { package = "winter-verifier", version = "0.8", default-features = false }
diff --git a/verifier/src/lib.rs b/verifier/src/lib.rs
index b321e84f07..f704758ec6 100644
--- a/verifier/src/lib.rs
+++ b/verifier/src/lib.rs
@@ -1,10 +1,13 @@
 #![cfg_attr(not(feature = "std"), no_std)]
 
-use air::{HashFunction, ProcessorAir, PublicInputs};
+use air::{HashFunction, ProcessorAir, ProvingOptions, PublicInputs};
 use core::fmt;
-use vm_core::crypto::{
-    hash::{Blake3_192, Blake3_256, Rpo256},
-    random::{RpoRandomCoin, WinterRandomCoin},
+use vm_core::{
+    crypto::{
+        hash::{Blake3_192, Blake3_256, Rpo256},
+        random::{RpoRandomCoin, WinterRandomCoin},
+    },
+    utils::vec,
 };
 use winter_verifier::verify as verify_proof;
 
@@ -12,7 +15,7 @@ use winter_verifier::verify as verify_proof;
 // ================================================================================================
 
 pub use vm_core::{chiplets::hasher::Digest, Kernel, ProgramInfo, StackInputs, StackOutputs, Word};
-pub use winter_verifier::VerifierError;
+pub use winter_verifier::{AcceptableOptions, VerifierError};
 pub mod math {
     pub use vm_core::{Felt, FieldElement, StarkField};
 }
@@ -36,8 +39,19 @@ pub use air::ExecutionProof;
 /// `stack_outputs` slice, and the order of the rest of the output elements will also match the
 /// order on the stack. This is the reverse of the order of the `stack_inputs` slice.
 ///
+/// The verifier accepts proofs generated using a parameter set defined in [ProvingOptions].
+/// Specifically, parameter sets targeting the following are accepted:
+/// - 96-bit security level, non-recursive context (BLAKE3 hash function).
+/// - 96-bit security level, recursive context (BLAKE3 hash function).
+/// - 128-bit security level, non-recursive context (RPO hash function).
+/// - 128-bit security level, recursive context (RPO hash function).
+///
 /// # Errors
-/// Returns an error if the provided proof does not prove a correct execution of the program.
+/// Returns an error if:
+/// - The provided proof does not prove a correct execution of the program.
+/// - The the protocol parameters used to generate the proof is not in the set of acceptable
+///   parameters.
+#[tracing::instrument("verify_program", skip_all)]
 pub fn verify(
     program_info: ProgramInfo,
     stack_inputs: StackInputs,
@@ -52,13 +66,19 @@ pub fn verify(
     let (hash_fn, proof) = proof.into_parts();
     match hash_fn {
         HashFunction::Blake3_192 => {
-            verify_proof::<ProcessorAir, Blake3_192, WinterRandomCoin<_>>(proof, pub_inputs)
+            let opts = AcceptableOptions::OptionSet(vec![ProvingOptions::REGULAR_96_BITS]);
+            verify_proof::<ProcessorAir, Blake3_192, WinterRandomCoin<_>>(proof, pub_inputs, &opts)
         }
         HashFunction::Blake3_256 => {
-            verify_proof::<ProcessorAir, Blake3_256, WinterRandomCoin<_>>(proof, pub_inputs)
+            let opts = AcceptableOptions::OptionSet(vec![ProvingOptions::REGULAR_128_BITS]);
+            verify_proof::<ProcessorAir, Blake3_256, WinterRandomCoin<_>>(proof, pub_inputs, &opts)
         }
         HashFunction::Rpo256 => {
-            verify_proof::<ProcessorAir, Rpo256, RpoRandomCoin>(proof, pub_inputs)
+            let opts = AcceptableOptions::OptionSet(vec![
+                ProvingOptions::RECURSIVE_96_BITS,
+                ProvingOptions::RECURSIVE_128_BITS,
+            ]);
+            verify_proof::<ProcessorAir, Rpo256, RpoRandomCoin>(proof, pub_inputs, &opts)
         }
     }
     .map_err(VerificationError::VerifierError)?;