From f42469bf122f0364d0a5d455ce285e54216d9ad6 Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Mon, 14 Oct 2024 12:34:28 +0300
Subject: [PATCH 01/17] Sketch `yab` benchmarking

---
 Cargo.lock                             | 49 +++++++++++++++++-------
 core/tests/vm-benchmark/Cargo.toml     |  2 +
 core/tests/vm-benchmark/benches/iai.rs | 53 +++++++++++---------------
 3 files changed, 61 insertions(+), 43 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 774471d3d6c..f927bcb64a3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -945,7 +945,7 @@ name = "block_reverter"
 version = "0.1.0"
 dependencies = [
  "anyhow",
- "clap 4.5.18",
+ "clap 4.5.20",
  "serde_json",
  "tokio",
  "zksync_block_reverter",
@@ -1431,9 +1431,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.18"
+version = "4.5.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0956a43b323ac1afaffc053ed5c4b7c1f1800bacd1683c353aabbb752515dd3"
+checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -1441,14 +1441,15 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.18"
+version = "4.5.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d72166dd41634086d5803a47eb71ae740e61d84709c36f3c34110173db3961b"
+checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54"
 dependencies = [
  "anstream",
  "anstyle",
  "clap_lex 0.7.2",
  "strsim 0.11.1",
+ "terminal_size",
 ]
 
 [[package]]
@@ -2782,7 +2783,7 @@ name = "genesis_generator"
 version = "0.1.0"
 dependencies = [
  "anyhow",
- "clap 4.5.18",
+ "clap 4.5.20",
  "futures 0.3.30",
  "serde",
  "serde_json",
@@ -4108,7 +4109,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
 dependencies = [
  "cfg-if",
- "windows-targets 0.52.6",
+ "windows-targets 0.48.5",
 ]
 
 [[package]]
@@ -4378,7 +4379,7 @@ name = "merkle_tree_consistency_checker"
 version = "0.1.0"
 dependencies = [
  "anyhow",
- "clap 4.5.18",
+ "clap 4.5.20",
  "tracing",
  "zksync_config",
  "zksync_env_config",
@@ -5484,7 +5485,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4"
 dependencies = [
  "bytes",
- "heck 0.5.0",
+ "heck 0.4.1",
  "itertools 0.12.1",
  "log",
  "multimap",
@@ -6568,7 +6569,7 @@ name = "selector_generator"
 version = "0.1.0"
 dependencies = [
  "anyhow",
- "clap 4.5.18",
+ "clap 4.5.20",
  "ethabi",
  "glob",
  "hex",
@@ -7892,6 +7893,16 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "terminal_size"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f599bd7ca042cfdf8f4512b277c02ba102247820f9d9d4a9f521f496751a6ef"
+dependencies = [
+ "rustix",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "test-casing"
 version = "0.1.3"
@@ -8738,6 +8749,7 @@ dependencies = [
  "rand 0.8.5",
  "tokio",
  "vise",
+ "yab",
  "zksync_contracts",
  "zksync_multivm",
  "zksync_types",
@@ -9221,6 +9233,17 @@ dependencies = [
  "zeroize",
 ]
 
+[[package]]
+name = "yab"
+version = "0.1.0"
+source = "git+https://github.com/slowli/yab.git?rev=01e8ed2e4c4021bacdddfaa25f8a6e92b3439a9a#01e8ed2e4c4021bacdddfaa25f8a6e92b3439a9a"
+dependencies = [
+ "anes",
+ "clap 4.5.20",
+ "num_cpus",
+ "thiserror",
+]
+
 [[package]]
 name = "yansi"
 version = "1.0.1"
@@ -10207,7 +10230,7 @@ dependencies = [
  "anyhow",
  "assert_matches",
  "async-trait",
- "clap 4.5.18",
+ "clap 4.5.20",
  "envy",
  "futures 0.3.30",
  "rustc_version",
@@ -10415,7 +10438,7 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "assert_matches",
- "clap 4.5.18",
+ "clap 4.5.20",
  "insta",
  "leb128",
  "once_cell",
@@ -10993,7 +11016,7 @@ name = "zksync_server"
 version = "0.1.0"
 dependencies = [
  "anyhow",
- "clap 4.5.18",
+ "clap 4.5.20",
  "futures 0.3.30",
  "serde_json",
  "tikv-jemallocator",
diff --git a/core/tests/vm-benchmark/Cargo.toml b/core/tests/vm-benchmark/Cargo.toml
index 59c1e21493b..248c3248bd1 100644
--- a/core/tests/vm-benchmark/Cargo.toml
+++ b/core/tests/vm-benchmark/Cargo.toml
@@ -22,6 +22,8 @@ tokio.workspace = true
 [dev-dependencies]
 assert_matches.workspace = true
 iai.workspace = true
+# FIXME: use workspace dep
+yab = { version = "0.1.0", git = "https://github.com/slowli/yab.git", rev = "01e8ed2e4c4021bacdddfaa25f8a6e92b3439a9a" }
 
 [[bench]]
 name = "oneshot"
diff --git a/core/tests/vm-benchmark/benches/iai.rs b/core/tests/vm-benchmark/benches/iai.rs
index 8cbb9f10dd8..309292dfee7 100644
--- a/core/tests/vm-benchmark/benches/iai.rs
+++ b/core/tests/vm-benchmark/benches/iai.rs
@@ -1,35 +1,28 @@
-use iai::black_box;
-use vm_benchmark::{BenchmarkingVm, BenchmarkingVmFactory, Bytecode, Fast, Legacy};
+use vm_benchmark::{BenchmarkingVm, BenchmarkingVmFactory, Fast, Legacy, BYTECODES};
+use yab::{Bencher, BenchmarkId};
 
-fn run_bytecode<VM: BenchmarkingVmFactory>(name: &str) {
-    let tx = Bytecode::get(name).deploy_tx();
-    black_box(BenchmarkingVm::<VM>::default().run_transaction(&tx));
-}
-
-macro_rules! make_functions_and_main {
-    ($($file:ident => $legacy_name:ident,)+) => {
-        $(
-        fn $file() {
-            run_bytecode::<Fast>(stringify!($file));
-        }
+fn benchmarks_for_vm<VM: BenchmarkingVmFactory>(bencher: &mut Bencher) {
+    bencher.bench(
+        BenchmarkId::new("init", VM::LABEL.as_str()),
+        BenchmarkingVm::<VM>::default,
+    );
 
-        fn $legacy_name() {
-            run_bytecode::<Legacy>(stringify!($file));
-        }
-        )+
+    for bytecode in BYTECODES {
+        let tx = bytecode.deploy_tx();
+        bencher.bench_with_capture(
+            BenchmarkId::new(bytecode.name, VM::LABEL.as_str()),
+            |capture| {
+                let mut vm = BenchmarkingVm::<VM>::default();
+                capture.measure(|| vm.run_transaction(yab::black_box(&tx)));
+            },
+        );
+    }
+}
 
-        iai::main!($($file, $legacy_name,)+);
-    };
+fn benchmarks(bencher: &mut Bencher) {
+    // FIXME: integrate reporting here
+    benchmarks_for_vm::<Fast>(bencher);
+    benchmarks_for_vm::<Legacy>(bencher);
 }
 
-make_functions_and_main!(
-    access_memory => access_memory_legacy,
-    call_far => call_far_legacy,
-    decode_shl_sub => decode_shl_sub_legacy,
-    deploy_simple_contract => deploy_simple_contract_legacy,
-    finish_eventful_frames => finish_eventful_frames_legacy,
-    write_and_decode => write_and_decode_legacy,
-    event_spam => event_spam_legacy,
-    slot_hash_collision => slot_hash_collision_legacy,
-    heap_read_write => heap_read_write_legacy,
-);
+yab::main!(benchmarks);

From 88d23b3cbe318c7fbb584273544ab78ce9666a4f Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Mon, 14 Oct 2024 17:07:39 +0300
Subject: [PATCH 02/17] Add benchmark reporters

---
 core/tests/vm-benchmark/Cargo.toml            |   2 +-
 core/tests/vm-benchmark/benches/iai.rs        |  28 ---
 .../vm-benchmark/benches/instructions.rs      | 182 ++++++++++++++++++
 core/tests/vm-benchmark/src/criterion.rs      |   6 +-
 4 files changed, 186 insertions(+), 32 deletions(-)
 delete mode 100644 core/tests/vm-benchmark/benches/iai.rs
 create mode 100644 core/tests/vm-benchmark/benches/instructions.rs

diff --git a/core/tests/vm-benchmark/Cargo.toml b/core/tests/vm-benchmark/Cargo.toml
index 248c3248bd1..070b078160b 100644
--- a/core/tests/vm-benchmark/Cargo.toml
+++ b/core/tests/vm-benchmark/Cargo.toml
@@ -34,5 +34,5 @@ name = "batch"
 harness = false
 
 [[bench]]
-name = "iai"
+name = "instructions"
 harness = false
diff --git a/core/tests/vm-benchmark/benches/iai.rs b/core/tests/vm-benchmark/benches/iai.rs
deleted file mode 100644
index 309292dfee7..00000000000
--- a/core/tests/vm-benchmark/benches/iai.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-use vm_benchmark::{BenchmarkingVm, BenchmarkingVmFactory, Fast, Legacy, BYTECODES};
-use yab::{Bencher, BenchmarkId};
-
-fn benchmarks_for_vm<VM: BenchmarkingVmFactory>(bencher: &mut Bencher) {
-    bencher.bench(
-        BenchmarkId::new("init", VM::LABEL.as_str()),
-        BenchmarkingVm::<VM>::default,
-    );
-
-    for bytecode in BYTECODES {
-        let tx = bytecode.deploy_tx();
-        bencher.bench_with_capture(
-            BenchmarkId::new(bytecode.name, VM::LABEL.as_str()),
-            |capture| {
-                let mut vm = BenchmarkingVm::<VM>::default();
-                capture.measure(|| vm.run_transaction(yab::black_box(&tx)));
-            },
-        );
-    }
-}
-
-fn benchmarks(bencher: &mut Bencher) {
-    // FIXME: integrate reporting here
-    benchmarks_for_vm::<Fast>(bencher);
-    benchmarks_for_vm::<Legacy>(bencher);
-}
-
-yab::main!(benchmarks);
diff --git a/core/tests/vm-benchmark/benches/instructions.rs b/core/tests/vm-benchmark/benches/instructions.rs
new file mode 100644
index 00000000000..2b04d6b7dad
--- /dev/null
+++ b/core/tests/vm-benchmark/benches/instructions.rs
@@ -0,0 +1,182 @@
+//! Measures instruction counts for the .
+
+use std::{
+    collections::HashMap,
+    env,
+    sync::{Arc, Mutex},
+};
+
+use vise::{Gauge, LabeledFamily, Metrics};
+use vm_benchmark::{
+    criterion::PrometheusRuntime, BenchmarkingVm, BenchmarkingVmFactory, Fast, Legacy, BYTECODES,
+};
+use yab::{
+    reporter::{BenchmarkOutput, BenchmarkReporter, Reporter},
+    AccessSummary, Bencher, BenchmarkId,
+};
+
+fn benchmarks_for_vm<VM: BenchmarkingVmFactory>(bencher: &mut Bencher) {
+    bencher.bench(
+        BenchmarkId::new("init", VM::LABEL.as_str()),
+        BenchmarkingVm::<VM>::default,
+    );
+
+    for bytecode in BYTECODES {
+        bencher.bench_with_capture(
+            BenchmarkId::new(bytecode.name, VM::LABEL.as_str()),
+            |capture| {
+                let mut vm = yab::black_box(BenchmarkingVm::<VM>::default());
+                let tx = yab::black_box(bytecode.deploy_tx());
+                capture.measure(|| vm.run_transaction(&tx));
+            },
+        );
+    }
+}
+
+/// Reporter that pushes cachegrind metrics to Prometheus.
+#[derive(Debug)]
+struct MetricsReporter {
+    _runtime: Option<PrometheusRuntime>,
+}
+
+impl Default for MetricsReporter {
+    fn default() -> Self {
+        Self {
+            _runtime: PrometheusRuntime::new(),
+        }
+    }
+}
+
+impl Reporter for MetricsReporter {
+    fn new_benchmark(&mut self, id: &BenchmarkId) -> Box<dyn BenchmarkReporter> {
+        Box::new(MetricsBenchmarkReporter(id.clone()))
+    }
+}
+
+#[derive(Debug)]
+struct MetricsBenchmarkReporter(BenchmarkId);
+
+impl BenchmarkReporter for MetricsBenchmarkReporter {
+    fn ok(self: Box<Self>, output: &BenchmarkOutput) {
+        #[derive(Debug, Metrics)]
+        #[metrics(prefix = "vm_cachegrind")]
+        struct VmCachegrindMetrics {
+            #[metrics(labels = ["benchmark"])]
+            instructions: LabeledFamily<String, Gauge<u64>>,
+            #[metrics(labels = ["benchmark"])]
+            l1_accesses: LabeledFamily<String, Gauge<u64>>,
+            #[metrics(labels = ["benchmark"])]
+            l2_accesses: LabeledFamily<String, Gauge<u64>>,
+            #[metrics(labels = ["benchmark"])]
+            ram_accesses: LabeledFamily<String, Gauge<u64>>,
+            #[metrics(labels = ["benchmark"])]
+            cycles: LabeledFamily<String, Gauge<u64>>,
+        }
+
+        #[vise::register]
+        static VM_CACHEGRIND_METRICS: vise::Global<VmCachegrindMetrics> = vise::Global::new();
+
+        let id = self.0.to_string();
+        VM_CACHEGRIND_METRICS.instructions[&id].set(output.stats.total_instructions());
+        if let Some(&full) = output.stats.as_full() {
+            let summary = AccessSummary::from(full);
+            VM_CACHEGRIND_METRICS.l1_accesses[&id].set(summary.l1_hits);
+            VM_CACHEGRIND_METRICS.l2_accesses[&id].set(summary.l3_hits);
+            VM_CACHEGRIND_METRICS.ram_accesses[&id].set(summary.ram_accesses);
+            VM_CACHEGRIND_METRICS.cycles[&id].set(summary.estimated_cycles());
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+struct Comparison {
+    est_cycles: u64,
+    est_cycles_diff: f64,
+}
+
+impl Comparison {
+    fn percent_difference(a: u64, b: u64) -> f64 {
+        ((b as f64) - (a as f64)) / (a as f64) * 100.0
+    }
+
+    fn new(output: &BenchmarkOutput) -> Option<Self> {
+        let current_cycles = AccessSummary::from(*output.stats.as_full()?).estimated_cycles();
+        let prev_cycles = AccessSummary::from(*output.prev_stats?.as_full()?).estimated_cycles();
+        Some(Self {
+            est_cycles: current_cycles,
+            est_cycles_diff: Self::percent_difference(prev_cycles, current_cycles),
+        })
+    }
+}
+
+#[derive(Debug, Default)]
+struct ComparisonReporter {
+    comparisons: Arc<Mutex<HashMap<String, Comparison>>>,
+}
+
+impl Drop for ComparisonReporter {
+    fn drop(&mut self) {
+        const ENV_VAR: &str = "BENCHMARK_DIFF_THRESHOLD_PERCENT";
+
+        let diff_threshold = env::var(ENV_VAR).unwrap_or_else(|_| "1.0".into());
+        let diff_threshold: f64 = diff_threshold.parse().unwrap_or_else(|err| {
+            panic!("incorrect `{ENV_VAR}` value: {err}");
+        });
+
+        let mut comparisons = self.comparisons.lock().expect("poisoned").clone();
+        comparisons.retain(|_, diff| diff.est_cycles_diff.abs() > diff_threshold);
+        if comparisons.is_empty() {
+            return;
+        }
+
+        println!("\n## Detected VM performance changes");
+        println!("Benchmark name | est. cycles | change in est. cycles |");
+        println!("|:---|---:|---:|");
+        for (name, comparison) in &comparisons {
+            println!(
+                "| {name} | {} | {:+.1}% |",
+                comparison.est_cycles, comparison.est_cycles_diff
+            );
+        }
+    }
+}
+
+impl Reporter for ComparisonReporter {
+    fn new_benchmark(&mut self, id: &BenchmarkId) -> Box<dyn BenchmarkReporter> {
+        Box::new(BenchmarkComparison {
+            comparisons: self.comparisons.clone(),
+            id: id.clone(),
+        })
+    }
+}
+
+#[derive(Debug)]
+struct BenchmarkComparison {
+    comparisons: Arc<Mutex<HashMap<String, Comparison>>>,
+    id: BenchmarkId,
+}
+
+impl BenchmarkReporter for BenchmarkComparison {
+    fn ok(self: Box<Self>, output: &BenchmarkOutput) {
+        if let Some(diff) = Comparison::new(output) {
+            self.comparisons
+                .lock()
+                .expect("poisoned")
+                .insert(self.id.to_string(), diff);
+        }
+    }
+}
+
+fn benchmarks(bencher: &mut Bencher) {
+    if env::args().any(|arg| arg == "--print") {
+        // Only customize reporting if outputting previously collected benchmark result in order to prevent
+        // reporters influencing cachegrind stats.
+        bencher
+            .add_reporter(MetricsReporter::default())
+            .add_reporter(ComparisonReporter::default());
+    }
+    benchmarks_for_vm::<Fast>(bencher);
+    benchmarks_for_vm::<Legacy>(bencher);
+}
+
+yab::main!(benchmarks);
diff --git a/core/tests/vm-benchmark/src/criterion.rs b/core/tests/vm-benchmark/src/criterion.rs
index 9515ac4ef98..024ccf14139 100644
--- a/core/tests/vm-benchmark/src/criterion.rs
+++ b/core/tests/vm-benchmark/src/criterion.rs
@@ -57,7 +57,7 @@ struct VmBenchmarkMetrics {
 static METRICS: vise::Global<VmBenchmarkMetrics> = vise::Global::new();
 
 #[derive(Debug)]
-struct PrometheusRuntime {
+pub struct PrometheusRuntime {
     stop_sender: watch::Sender<bool>,
     _runtime: tokio::runtime::Runtime,
 }
@@ -72,7 +72,7 @@ impl Drop for PrometheusRuntime {
 }
 
 impl PrometheusRuntime {
-    fn new() -> Option<Self> {
+    pub fn new() -> Option<Self> {
         const PUSH_INTERVAL: Duration = Duration::from_millis(100);
 
         let gateway_url = env::var("BENCHMARK_PROMETHEUS_PUSHGATEWAY_URL").ok()?;
@@ -164,7 +164,7 @@ thread_local! {
 
 static BIN_NAME: SyncOnceCell<&'static str> = SyncOnceCell::new();
 
-/// Measurement for criterion that exports .
+/// Measurement for criterion that exports timing-related metrics.
 #[derive(Debug)]
 pub struct MeteredTime {
     _prometheus: Option<PrometheusRuntime>,

From 8fc1a23e6e1f73edca57477fb8bdabe704ad491f Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Mon, 14 Oct 2024 17:08:03 +0300
Subject: [PATCH 03/17] Update benchmark CI workflows

---
 .github/workflows/vm-perf-comparison.yml    | 22 +++++++++++----------
 .github/workflows/vm-perf-to-prometheus.yml |  4 ++--
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/vm-perf-comparison.yml b/.github/workflows/vm-perf-comparison.yml
index 49830a30cc1..66cfd07d925 100644
--- a/.github/workflows/vm-perf-comparison.yml
+++ b/.github/workflows/vm-perf-comparison.yml
@@ -10,6 +10,9 @@ jobs:
     name: Run VM benchmarks
     runs-on: [ matterlabs-ci-runner-highmem-long ]
 
+    env:
+      BENCHMARK_INSTRUCTIONS_DIFF: target/instructions-diff.md
+
     steps:
       - name: checkout base branch
         uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4
@@ -40,6 +43,8 @@ jobs:
           echo "SCCACHE_GCS_SERVICE_ACCOUNT=gha-ci-runners@matterlabs-infra.iam.gserviceaccount.com" >> .env
           echo "SCCACHE_GCS_RW_MODE=READ_WRITE" >> .env
           echo "RUSTC_WRAPPER=sccache" >> .env
+          # Set the minimum reported instruction count difference to reduce noise 
+          echo "BENCHMARK_DIFF_THRESHOLD_PERCENT=2" >> .env
 
       - name: init
         run: |
@@ -51,8 +56,7 @@ jobs:
         run: |
           ci_run zkstackup -g --local
           ci_run zkstack dev contracts --system-contracts
-          ci_run cargo bench --package vm-benchmark --bench iai | tee base-iai
-          ci_run cargo run --package vm-benchmark --release --bin instruction_counts | tee base-opcodes || touch base-opcodes
+          ci_run cargo bench --package vm-benchmark --bench instructions
 
       - name: checkout PR
         run: |
@@ -60,24 +64,22 @@ jobs:
 
       - name: run benchmarks on PR
         shell: bash
+        id: comparison
         run: |
           ci_run zkstackup -g --local
           ci_run zkstack dev contracts --system-contracts
-          ci_run cargo bench --package vm-benchmark --bench iai | tee pr-iai
-          ci_run cargo run --package vm-benchmark --release --bin instruction_counts | tee pr-opcodes || touch pr-opcodes
+          ci_run cargo bench --package vm-benchmark --bench instructions -- --verbose
 
           EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
           echo "speedup<<$EOF" >> $GITHUB_OUTPUT
-          ci_run cargo run --package vm-benchmark --release --bin compare_iai_results base-iai pr-iai base-opcodes pr-opcodes >> $GITHUB_OUTPUT
+          # Output all lines starting from the "## ..." comparison header
+          ci_run cargo bench --package vm-benchmark --bench instructions -- --print | grep -A 1000 --color=never -e '^## ' >> $GITHUB_OUTPUT
           echo "$EOF" >> $GITHUB_OUTPUT
-        id: comparison
 
       - name: Comment on PR
         uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # v2.5.0
         with:
-          message: |
-            ${{ steps.comparison.outputs.speedup == '' && '## No performance difference detected (anymore)' || '## Detected VM performance changes' }}
-            ${{ steps.comparison.outputs.speedup }}
+          message: ${{ steps.comparison.outputs.speedup }}
           comment_tag: vm-performance-changes
-          mode: recreate
+          mode: ${{ steps.comparison.outputs.speedup == '' && 'delete' || 'recreate' }}
           create_if_not_exists: ${{ steps.comparison.outputs.speedup != '' }}
diff --git a/.github/workflows/vm-perf-to-prometheus.yml b/.github/workflows/vm-perf-to-prometheus.yml
index d336a1472e4..93d33116794 100644
--- a/.github/workflows/vm-perf-to-prometheus.yml
+++ b/.github/workflows/vm-perf-to-prometheus.yml
@@ -48,5 +48,5 @@ jobs:
           ci_run cargo bench --package vm-benchmark --bench oneshot
           # Run only benches with 1,000 transactions per batch to not spend too much time
           ci_run cargo bench --package vm-benchmark --bench batch '/1000$'
-          ci_run cargo bench --package vm-benchmark --bench iai | tee iai-result
-          ci_run cargo run --package vm-benchmark --bin iai_results_to_prometheus --release < iai-result
+          ci_run cargo bench --package vm-benchmark --bench instructions -- --verbose
+          ci_run cargo bench --package vm-benchmark --bench instructions -- --print

From 6355f95a1b1c17fc2e81cdfa3e418b2ed844d3e6 Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Mon, 14 Oct 2024 17:11:12 +0300
Subject: [PATCH 04/17] Remove `iai` from workspace

---
 Cargo.lock                                    |   7 --
 Cargo.toml                                    |   1 -
 core/tests/vm-benchmark/Cargo.toml            |   1 -
 core/tests/vm-benchmark/src/bin/common/mod.rs |  54 ---------
 .../src/bin/compare_iai_results.rs            | 108 ------------------
 .../src/bin/iai_results_to_prometheus.rs      |  52 ---------
 6 files changed, 223 deletions(-)
 delete mode 100644 core/tests/vm-benchmark/src/bin/common/mod.rs
 delete mode 100644 core/tests/vm-benchmark/src/bin/compare_iai_results.rs
 delete mode 100644 core/tests/vm-benchmark/src/bin/iai_results_to_prometheus.rs

diff --git a/Cargo.lock b/Cargo.lock
index f927bcb64a3..74101c4527e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3459,12 +3459,6 @@ dependencies = [
  "tracing",
 ]
 
-[[package]]
-name = "iai"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71a816c97c42258aa5834d07590b718b4c9a598944cd39a52dc25b351185d678"
-
 [[package]]
 name = "iana-time-zone"
 version = "0.1.61"
@@ -8744,7 +8738,6 @@ version = "0.1.0"
 dependencies = [
  "assert_matches",
  "criterion",
- "iai",
  "once_cell",
  "rand 0.8.5",
  "tokio",
diff --git a/Cargo.toml b/Cargo.toml
index 60b5628f419..e44abfa52af 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -131,7 +131,6 @@ hex = "0.4"
 http = "1.1"
 httpmock = "0.7.0"
 hyper = "1.3"
-iai = "0.1"
 insta = "1.29.0"
 itertools = "0.10"
 jsonrpsee = { version = "0.23", default-features = false }
diff --git a/core/tests/vm-benchmark/Cargo.toml b/core/tests/vm-benchmark/Cargo.toml
index 070b078160b..09b142dbb24 100644
--- a/core/tests/vm-benchmark/Cargo.toml
+++ b/core/tests/vm-benchmark/Cargo.toml
@@ -21,7 +21,6 @@ tokio.workspace = true
 
 [dev-dependencies]
 assert_matches.workspace = true
-iai.workspace = true
 # FIXME: use workspace dep
 yab = { version = "0.1.0", git = "https://github.com/slowli/yab.git", rev = "01e8ed2e4c4021bacdddfaa25f8a6e92b3439a9a" }
 
diff --git a/core/tests/vm-benchmark/src/bin/common/mod.rs b/core/tests/vm-benchmark/src/bin/common/mod.rs
deleted file mode 100644
index a92c9d5f710..00000000000
--- a/core/tests/vm-benchmark/src/bin/common/mod.rs
+++ /dev/null
@@ -1,54 +0,0 @@
-use std::io::BufRead;
-
-#[derive(Debug)]
-pub struct IaiResult {
-    pub name: String,
-    pub instructions: u64,
-    pub l1_accesses: u64,
-    pub l2_accesses: u64,
-    pub ram_accesses: u64,
-    pub cycles: u64,
-}
-
-pub fn parse_iai<R: BufRead>(iai_output: R) -> impl Iterator<Item = IaiResult> {
-    IaiResultParser {
-        lines: iai_output.lines().map(|x| x.unwrap()),
-    }
-}
-
-struct IaiResultParser<I: Iterator<Item = String>> {
-    lines: I,
-}
-
-impl<I: Iterator<Item = String>> Iterator for IaiResultParser<I> {
-    type Item = IaiResult;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        self.lines.next().map(|name| {
-            let result = IaiResult {
-                name,
-                instructions: self.parse_stat(),
-                l1_accesses: self.parse_stat(),
-                l2_accesses: self.parse_stat(),
-                ram_accesses: self.parse_stat(),
-                cycles: self.parse_stat(),
-            };
-            self.lines.next();
-            result
-        })
-    }
-}
-
-impl<I: Iterator<Item = String>> IaiResultParser<I> {
-    fn parse_stat(&mut self) -> u64 {
-        let line = self.lines.next().unwrap();
-        let number = line
-            .split(':')
-            .nth(1)
-            .unwrap()
-            .split_whitespace()
-            .next()
-            .unwrap();
-        number.parse().unwrap()
-    }
-}
diff --git a/core/tests/vm-benchmark/src/bin/compare_iai_results.rs b/core/tests/vm-benchmark/src/bin/compare_iai_results.rs
deleted file mode 100644
index c274b039c9b..00000000000
--- a/core/tests/vm-benchmark/src/bin/compare_iai_results.rs
+++ /dev/null
@@ -1,108 +0,0 @@
-use std::{
-    collections::{HashMap, HashSet},
-    fs::File,
-    io::{BufRead, BufReader},
-};
-
-pub use crate::common::parse_iai;
-
-mod common;
-
-fn main() {
-    let [iai_before, iai_after, opcodes_before, opcodes_after] = std::env::args()
-        .skip(1)
-        .take(4)
-        .collect::<Vec<_>>()
-        .try_into()
-        .expect("expected four arguments");
-
-    let iai_before = get_name_to_cycles(&iai_before);
-    let iai_after = get_name_to_cycles(&iai_after);
-    let opcodes_before = get_name_to_opcodes(&opcodes_before);
-    let opcodes_after = get_name_to_opcodes(&opcodes_after);
-
-    let perf_changes = iai_before
-        .keys()
-        .collect::<HashSet<_>>()
-        .intersection(&iai_after.keys().collect())
-        .map(|&name| (name, percent_difference(iai_before[name], iai_after[name])))
-        .collect::<HashMap<_, _>>();
-
-    let duration_changes = opcodes_before
-        .keys()
-        .collect::<HashSet<_>>()
-        .intersection(&opcodes_after.keys().collect())
-        .map(|&name| {
-            let opcodes_abs_diff = (opcodes_after[name] as i64) - (opcodes_before[name] as i64);
-            (name, opcodes_abs_diff)
-        })
-        .collect::<HashMap<_, _>>();
-
-    let mut nonzero_diff = false;
-
-    for name in perf_changes
-        .iter()
-        .filter_map(|(key, value)| (value.abs() > 2.).then_some(key))
-        .collect::<HashSet<_>>()
-        .union(
-            &duration_changes
-                .iter()
-                .filter_map(|(key, value)| (*value != 0).then_some(key))
-                .collect(),
-        )
-    {
-        // write the header before writing the first line of diff
-        if !nonzero_diff {
-            println!("Benchmark name | change in estimated runtime | change in number of opcodes executed \n--- | --- | ---");
-            nonzero_diff = true;
-        }
-
-        let n_a = "N/A".to_string();
-        println!(
-            "{} | {} | {}",
-            name,
-            perf_changes
-                .get(**name)
-                .map(|percent| format!("{:+.1}%", percent))
-                .unwrap_or(n_a.clone()),
-            duration_changes
-                .get(**name)
-                .map(|abs_diff| format!(
-                    "{:+} ({:+.1}%)",
-                    abs_diff,
-                    percent_difference(opcodes_before[**name], opcodes_after[**name])
-                ))
-                .unwrap_or(n_a),
-        );
-    }
-
-    if nonzero_diff {
-        println!("\n Changes in number of opcodes executed indicate that the gas price of the benchmark has changed, which causes it run out of gas at a different time. Or that it is behaving completely differently.");
-    }
-}
-
-fn percent_difference(a: u64, b: u64) -> f64 {
-    ((b as f64) - (a as f64)) / (a as f64) * 100.0
-}
-
-fn get_name_to_cycles(filename: &str) -> HashMap<String, u64> {
-    parse_iai(BufReader::new(
-        File::open(filename).expect("failed to open file"),
-    ))
-    .map(|x| (x.name, x.cycles))
-    .collect()
-}
-
-fn get_name_to_opcodes(filename: &str) -> HashMap<String, u64> {
-    BufReader::new(File::open(filename).expect("failed to open file"))
-        .lines()
-        .map(|line| {
-            let line = line.unwrap();
-            let mut it = line.split_whitespace();
-            (
-                it.next().unwrap().to_string(),
-                it.next().unwrap().parse().unwrap(),
-            )
-        })
-        .collect()
-}
diff --git a/core/tests/vm-benchmark/src/bin/iai_results_to_prometheus.rs b/core/tests/vm-benchmark/src/bin/iai_results_to_prometheus.rs
deleted file mode 100644
index 3b3aa05bf69..00000000000
--- a/core/tests/vm-benchmark/src/bin/iai_results_to_prometheus.rs
+++ /dev/null
@@ -1,52 +0,0 @@
-use std::{env, io::BufReader, time::Duration};
-
-use tokio::sync::watch;
-use vise::{Gauge, LabeledFamily, Metrics};
-use zksync_vlog::prometheus::PrometheusExporterConfig;
-
-use crate::common::{parse_iai, IaiResult};
-
-mod common;
-
-#[derive(Debug, Metrics)]
-#[metrics(prefix = "vm_cachegrind")]
-pub(crate) struct VmCachegrindMetrics {
-    #[metrics(labels = ["benchmark"])]
-    pub instructions: LabeledFamily<String, Gauge<u64>>,
-    #[metrics(labels = ["benchmark"])]
-    pub l1_accesses: LabeledFamily<String, Gauge<u64>>,
-    #[metrics(labels = ["benchmark"])]
-    pub l2_accesses: LabeledFamily<String, Gauge<u64>>,
-    #[metrics(labels = ["benchmark"])]
-    pub ram_accesses: LabeledFamily<String, Gauge<u64>>,
-    #[metrics(labels = ["benchmark"])]
-    pub cycles: LabeledFamily<String, Gauge<u64>>,
-}
-
-#[vise::register]
-pub(crate) static VM_CACHEGRIND_METRICS: vise::Global<VmCachegrindMetrics> = vise::Global::new();
-
-#[tokio::main]
-async fn main() {
-    let results: Vec<IaiResult> = parse_iai(BufReader::new(std::io::stdin())).collect();
-
-    let endpoint = env::var("BENCHMARK_PROMETHEUS_PUSHGATEWAY_URL")
-        .expect("`BENCHMARK_PROMETHEUS_PUSHGATEWAY_URL` env var is not set");
-    let (stop_sender, stop_receiver) = watch::channel(false);
-    let prometheus_config =
-        PrometheusExporterConfig::push(endpoint.to_owned(), Duration::from_millis(100));
-    tokio::spawn(prometheus_config.run(stop_receiver));
-
-    for result in results {
-        let name = result.name;
-        VM_CACHEGRIND_METRICS.instructions[&name.clone()].set(result.instructions);
-        VM_CACHEGRIND_METRICS.l1_accesses[&name.clone()].set(result.l1_accesses);
-        VM_CACHEGRIND_METRICS.l2_accesses[&name.clone()].set(result.l2_accesses);
-        VM_CACHEGRIND_METRICS.ram_accesses[&name.clone()].set(result.ram_accesses);
-        VM_CACHEGRIND_METRICS.cycles[&name].set(result.cycles);
-    }
-
-    println!("Waiting for push to happen...");
-    tokio::time::sleep(Duration::from_secs(1)).await;
-    stop_sender.send_replace(true);
-}

From 991f76ad2e7bf73e2eee169c1d0a1e950f197fad Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Wed, 16 Oct 2024 09:32:29 +0300
Subject: [PATCH 05/17] Improve instruction counting

---
 core/lib/multivm/src/versions/vm_fast/mod.rs |   2 +-
 core/lib/vm_executor/src/batch/factory.rs    |   2 +-
 core/tests/vm-benchmark/src/vm.rs            | 118 ++++++++++++-------
 3 files changed, 75 insertions(+), 47 deletions(-)

diff --git a/core/lib/multivm/src/versions/vm_fast/mod.rs b/core/lib/multivm/src/versions/vm_fast/mod.rs
index bb5a342bff2..35789c6cdc9 100644
--- a/core/lib/multivm/src/versions/vm_fast/mod.rs
+++ b/core/lib/multivm/src/versions/vm_fast/mod.rs
@@ -1,4 +1,4 @@
-pub use zksync_vm2::interface::Tracer;
+pub use zksync_vm2::interface;
 
 pub use self::{circuits_tracer::CircuitsTracer, vm::Vm};
 
diff --git a/core/lib/vm_executor/src/batch/factory.rs b/core/lib/vm_executor/src/batch/factory.rs
index bc19086c969..860ff6e77c1 100644
--- a/core/lib/vm_executor/src/batch/factory.rs
+++ b/core/lib/vm_executor/src/batch/factory.rs
@@ -35,7 +35,7 @@ pub trait BatchTracer: fmt::Debug + 'static + Send + Sealed {
     const TRACE_CALLS: bool;
     /// Tracer for the fast VM.
     #[doc(hidden)]
-    type Fast: vm_fast::Tracer + Default + 'static;
+    type Fast: vm_fast::interface::Tracer + Default + 'static;
 }
 
 impl Sealed for () {}
diff --git a/core/tests/vm-benchmark/src/vm.rs b/core/tests/vm-benchmark/src/vm.rs
index 30e2321298f..f68fdc6b989 100644
--- a/core/tests/vm-benchmark/src/vm.rs
+++ b/core/tests/vm-benchmark/src/vm.rs
@@ -72,19 +72,21 @@ pub trait BenchmarkingVmFactory {
         system_env: SystemEnv,
         storage: &'static InMemoryStorage,
     ) -> Self::Instance;
+}
 
+pub trait CountInstructions {
     /// Counts instructions executed by the VM while processing the transaction.
     fn count_instructions(tx: &Transaction) -> usize;
 }
 
 /// Factory for the new / fast VM.
 #[derive(Debug)]
-pub struct Fast<Tr = ()>(Tr);
+pub struct Fast;
 
-impl<Tr: vm_fast::Tracer + Default + 'static> BenchmarkingVmFactory for Fast<Tr> {
+impl BenchmarkingVmFactory for Fast {
     const LABEL: VmLabel = VmLabel::Fast;
 
-    type Instance = vm_fast::Vm<&'static InMemoryStorage, Tr>;
+    type Instance = vm_fast::Vm<&'static InMemoryStorage>;
 
     fn create(
         batch_env: L1BatchEnv,
@@ -93,27 +95,30 @@ impl<Tr: vm_fast::Tracer + Default + 'static> BenchmarkingVmFactory for Fast<Tr>
     ) -> Self::Instance {
         vm_fast::Vm::custom(batch_env, system_env, storage)
     }
+}
 
+impl CountInstructions for Fast {
     fn count_instructions(tx: &Transaction) -> usize {
-        let mut vm = BenchmarkingVm::<Fast<InstructionCount>>::default();
-        vm.0.push_transaction(tx.clone());
+        use vm_fast::interface as vm2;
 
         #[derive(Default)]
         struct InstructionCount(usize);
-        impl vm_fast::Tracer for InstructionCount {
-            fn before_instruction<
-                OP: zksync_vm2::interface::OpcodeType,
-                S: zksync_vm2::interface::StateInterface,
-            >(
+
+        impl vm2::Tracer for InstructionCount {
+            fn before_instruction<OP: vm2::OpcodeType, S: vm2::StateInterface>(
                 &mut self,
                 _: &mut S,
             ) {
                 self.0 += 1;
             }
         }
-        let mut tracer = InstructionCount(0);
 
-        vm.0.inspect(&mut tracer, VmExecutionMode::OneTx);
+        let (system_env, l1_batch_env) = test_env();
+        let mut vm =
+            vm_fast::Vm::<_, InstructionCount>::custom(l1_batch_env, system_env, &*STORAGE);
+        vm.push_transaction(tx.clone());
+        let mut tracer = InstructionCount(0);
+        vm.inspect(&mut tracer, VmExecutionMode::OneTx);
         tracer.0
     }
 }
@@ -135,7 +140,12 @@ impl BenchmarkingVmFactory for Legacy {
         let storage = StorageView::new(storage).to_rc_ptr();
         vm_latest::Vm::new(batch_env, system_env, storage)
     }
+}
 
+#[derive(Debug)]
+pub struct BenchmarkingVm<VM: BenchmarkingVmFactory>(VM::Instance);
+
+impl CountInstructions for Legacy {
     fn count_instructions(tx: &Transaction) -> usize {
         let mut vm = BenchmarkingVm::<Self>::default();
         vm.0.push_transaction(tx.clone());
@@ -150,41 +160,41 @@ impl BenchmarkingVmFactory for Legacy {
     }
 }
 
-#[derive(Debug)]
-pub struct BenchmarkingVm<VM: BenchmarkingVmFactory>(VM::Instance);
+fn test_env() -> (SystemEnv, L1BatchEnv) {
+    let timestamp = unix_timestamp_ms();
+    let system_env = SystemEnv {
+        zk_porter_available: false,
+        version: ProtocolVersionId::latest(),
+        base_system_smart_contracts: SYSTEM_CONTRACTS.clone(),
+        bootloader_gas_limit: BATCH_COMPUTATIONAL_GAS_LIMIT,
+        execution_mode: TxExecutionMode::VerifyExecute,
+        default_validation_computational_gas_limit: BATCH_COMPUTATIONAL_GAS_LIMIT,
+        chain_id: L2ChainId::from(270),
+    };
+    let l1_batch_env = L1BatchEnv {
+        previous_batch_hash: None,
+        number: L1BatchNumber(1),
+        timestamp,
+        fee_input: BatchFeeInput::l1_pegged(
+            50_000_000_000, // 50 gwei
+            250_000_000,    // 0.25 gwei
+        ),
+        fee_account: Address::random(),
+        enforced_base_fee: None,
+        first_l2_block: L2BlockEnv {
+            number: 1,
+            timestamp,
+            prev_block_hash: L2BlockHasher::legacy_hash(L2BlockNumber(0)),
+            max_virtual_blocks_to_create: 100,
+        },
+    };
+    (system_env, l1_batch_env)
+}
 
 impl<VM: BenchmarkingVmFactory> Default for BenchmarkingVm<VM> {
     fn default() -> Self {
-        let timestamp = unix_timestamp_ms();
-        Self(VM::create(
-            L1BatchEnv {
-                previous_batch_hash: None,
-                number: L1BatchNumber(1),
-                timestamp,
-                fee_input: BatchFeeInput::l1_pegged(
-                    50_000_000_000, // 50 gwei
-                    250_000_000,    // 0.25 gwei
-                ),
-                fee_account: Address::random(),
-                enforced_base_fee: None,
-                first_l2_block: L2BlockEnv {
-                    number: 1,
-                    timestamp,
-                    prev_block_hash: L2BlockHasher::legacy_hash(L2BlockNumber(0)),
-                    max_virtual_blocks_to_create: 100,
-                },
-            },
-            SystemEnv {
-                zk_porter_available: false,
-                version: ProtocolVersionId::latest(),
-                base_system_smart_contracts: SYSTEM_CONTRACTS.clone(),
-                bootloader_gas_limit: BATCH_COMPUTATIONAL_GAS_LIMIT,
-                execution_mode: TxExecutionMode::VerifyExecute,
-                default_validation_computational_gas_limit: BATCH_COMPUTATIONAL_GAS_LIMIT,
-                chain_id: L2ChainId::from(270),
-            },
-            &STORAGE,
-        ))
+        let (system_env, l1_batch_env) = test_env();
+        Self(VM::create(l1_batch_env, system_env, &STORAGE))
     }
 }
 
@@ -231,7 +241,7 @@ mod tests {
     use super::*;
     use crate::{
         get_deploy_tx, get_heavy_load_test_tx, get_load_test_deploy_tx, get_load_test_tx,
-        get_realistic_load_test_tx, get_transfer_tx, LoadTestParams,
+        get_realistic_load_test_tx, get_transfer_tx, LoadTestParams, BYTECODES,
     };
 
     #[test]
@@ -282,4 +292,22 @@ mod tests {
         let res = vm.run_transaction(&get_heavy_load_test_tx(1));
         assert_matches!(res.result, ExecutionResult::Success { .. });
     }
+
+    #[test]
+    fn instruction_count_matches_on_both_vms_for_transfer() {
+        let tx = get_transfer_tx(0);
+        let legacy_count = Legacy::count_instructions(&tx);
+        let fast_count = Fast::count_instructions(&tx);
+        assert_eq!(legacy_count, fast_count);
+    }
+
+    #[test]
+    fn instruction_count_matches_on_both_vms_for_fuzzed_bytecodes() {
+        for bytecode in BYTECODES {
+            let tx = bytecode.deploy_tx();
+            let legacy_count = Legacy::count_instructions(&tx);
+            let fast_count = Fast::count_instructions(&tx);
+            assert_eq!(legacy_count, fast_count, "bytecode: {}", bytecode.name);
+        }
+    }
 }

From d5c3592fc04fdd8763b62fe6f4b18ce812b540b2 Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Wed, 16 Oct 2024 10:03:18 +0300
Subject: [PATCH 06/17] Update `instruction_counts` binary

---
 .../src/bin/instruction_counts.rs             | 102 ++++++++++++++++--
 core/tests/vm-benchmark/src/lib.rs            |   2 +-
 2 files changed, 92 insertions(+), 12 deletions(-)

diff --git a/core/tests/vm-benchmark/src/bin/instruction_counts.rs b/core/tests/vm-benchmark/src/bin/instruction_counts.rs
index 96208007fd9..2b03e49151e 100644
--- a/core/tests/vm-benchmark/src/bin/instruction_counts.rs
+++ b/core/tests/vm-benchmark/src/bin/instruction_counts.rs
@@ -1,16 +1,96 @@
 //! Runs all benchmarks and prints out the number of zkEVM opcodes each one executed.
 
-use vm_benchmark::{BenchmarkingVmFactory, Fast, Legacy, BYTECODES};
+use std::{collections::HashMap, env, fs, io, path::PathBuf};
 
-fn main() {
-    for bytecode in BYTECODES {
-        let tx = bytecode.deploy_tx();
-        let name = bytecode.name;
-        println!("{name} {}", Fast::<()>::count_instructions(&tx));
-        println!(
-            "{} {}",
-            name.to_string() + "_legacy",
-            Legacy::count_instructions(&tx)
-        );
+use vm_benchmark::{CountInstructions, Fast, Legacy, BYTECODES};
+
+#[derive(Debug)]
+enum Command {
+    Print,
+    Diff { old: PathBuf },
+}
+
+impl Command {
+    fn from_env() -> Self {
+        let mut args = env::args().skip(1);
+        let Some(first) = args.next() else {
+            return Self::Print;
+        };
+        assert_eq!(first, "--diff", "Unsupported command-line arg");
+        let old = args.next().expect("`--diff` requires a path to old file");
+        Self::Diff { old: old.into() }
     }
+
+    fn print_instructions(counts: &HashMap<&str, usize>) {
+        for (bytecode_name, count) in counts {
+            println!("{bytecode_name} {count}");
+        }
+    }
+
+    fn parse_counts(reader: impl io::BufRead) -> HashMap<String, usize> {
+        let mut counts = HashMap::new();
+        for line in reader.lines() {
+            let line = line.unwrap();
+            if line.is_empty() {
+                continue;
+            }
+            let (name, count) = line.split_once(' ').expect("invalid output format");
+            let count = count.parse().unwrap_or_else(|err| {
+                panic!("invalid count for `{name}`: {err}");
+            });
+            counts.insert(name.to_owned(), count);
+        }
+        counts
+    }
+
+    fn run(self) {
+        let counts: HashMap<_, _> = BYTECODES
+            .iter()
+            .map(|bytecode| {
+                let tx = bytecode.deploy_tx();
+                // We have a unit test comparing stats, but do it here as well just in case.
+                let fast_count = Fast::count_instructions(&tx);
+                let legacy_count = Legacy::count_instructions(&tx);
+                assert_eq!(
+                    fast_count, legacy_count,
+                    "mismatch on number of instructions on bytecode `{}`",
+                    bytecode.name
+                );
+
+                (bytecode.name, fast_count)
+            })
+            .collect();
+
+        match self {
+            Self::Print => Self::print_instructions(&counts),
+            Self::Diff { old } => {
+                let file = fs::File::open(&old).unwrap_or_else(|err| {
+                    panic!("failed opening `{}`: {err}", old.display());
+                });
+                let reader = io::BufReader::new(file);
+                let old_counts = Self::parse_counts(reader);
+
+                let differing_counts: Vec<_> = counts
+                    .iter()
+                    .filter_map(|(&name, &new_count)| {
+                        let old_count = *old_counts.get(name)?;
+                        (old_count != new_count).then_some((name, old_count, new_count))
+                    })
+                    .collect();
+
+                if !differing_counts.is_empty() {
+                    println!("## Detected differing instruction counts");
+                    println!("| Benchmark | Old count | New count |");
+                    println!("|-----------|----------:|----------:|");
+                    for (name, old_count, new_count) in differing_counts {
+                        println!("| {name} | {old_count} | {new_count} |");
+                    }
+                }
+            }
+        }
+    }
+}
+
+fn main() {
+    Command::from_env().run();
 }
diff --git a/core/tests/vm-benchmark/src/lib.rs b/core/tests/vm-benchmark/src/lib.rs
index 4bd008d3319..9c4f547c1de 100644
--- a/core/tests/vm-benchmark/src/lib.rs
+++ b/core/tests/vm-benchmark/src/lib.rs
@@ -6,7 +6,7 @@ pub use crate::{
         get_load_test_deploy_tx, get_load_test_tx, get_realistic_load_test_tx, get_transfer_tx,
         LoadTestParams,
     },
-    vm::{BenchmarkingVm, BenchmarkingVmFactory, Fast, Legacy, VmLabel},
+    vm::{BenchmarkingVm, BenchmarkingVmFactory, CountInstructions, Fast, Legacy, VmLabel},
 };
 
 pub mod criterion;

From 7c6bbfa0e517bf2d61363c4c4eda9bca039cea25 Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Wed, 16 Oct 2024 10:11:28 +0300
Subject: [PATCH 07/17] Run `instruction_counts` in CI

---
 .github/workflows/vm-perf-comparison.yml | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/vm-perf-comparison.yml b/.github/workflows/vm-perf-comparison.yml
index 66cfd07d925..bf7e7832da3 100644
--- a/.github/workflows/vm-perf-comparison.yml
+++ b/.github/workflows/vm-perf-comparison.yml
@@ -43,8 +43,8 @@ jobs:
           echo "SCCACHE_GCS_SERVICE_ACCOUNT=gha-ci-runners@matterlabs-infra.iam.gserviceaccount.com" >> .env
           echo "SCCACHE_GCS_RW_MODE=READ_WRITE" >> .env
           echo "RUSTC_WRAPPER=sccache" >> .env
-          # Set the minimum reported instruction count difference to reduce noise 
-          echo "BENCHMARK_DIFF_THRESHOLD_PERCENT=2" >> .env
+          # FIXME: Set the minimum reported instruction count difference to reduce noise
+          echo "BENCHMARK_DIFF_THRESHOLD_PERCENT=-1" >> .env
 
       - name: init
         run: |
@@ -57,6 +57,7 @@ jobs:
           ci_run zkstackup -g --local
           ci_run zkstack dev contracts --system-contracts
           ci_run cargo bench --package vm-benchmark --bench instructions
+          ci_run cargo run --package vm-benchmark --release --bin instruction_counts | tee base-opcodes
 
       - name: checkout PR
         run: |
@@ -75,11 +76,17 @@ jobs:
           # Output all lines starting from the "## ..." comparison header
           ci_run cargo bench --package vm-benchmark --bench instructions -- --print | grep -A 1000 --color=never -e '^## ' >> $GITHUB_OUTPUT
           echo "$EOF" >> $GITHUB_OUTPUT
+          
+          echo "opcodes<<$EOF" >> $GITHUB_OUTPUT
+          ci_run cargo run --package vm-benchmark --release --bin instruction_counts -- --diff base-opcodes | grep -A 1000 --color=never -e '^## ' >> $GITHUB_OUTPUT
+          echo "$EOF" >> $GITHUB_OUTPUT
 
       - name: Comment on PR
         uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # v2.5.0
         with:
-          message: ${{ steps.comparison.outputs.speedup }}
+          message: |
+            ${{ steps.comparison.outputs.speedup }}
+            ${{ steps.comparison.outputs.opcodes }}
           comment_tag: vm-performance-changes
-          mode: ${{ steps.comparison.outputs.speedup == '' && 'delete' || 'recreate' }}
-          create_if_not_exists: ${{ steps.comparison.outputs.speedup != '' }}
+          mode: ${{ (steps.comparison.outputs.speedup == '' && steps.comparison.outputs.opcodes == '') && 'delete' || 'recreate' }}
+          create_if_not_exists: ${{ steps.comparison.outputs.speedup != '' || steps.comparison.outputs.opcodes != '' }}

From c1fb81d403e12be5b18cdd7cb2bff2f3f5a9c680 Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Wed, 16 Oct 2024 10:25:13 +0300
Subject: [PATCH 08/17] Fix / improve docs

---
 .github/workflows/vm-perf-comparison.yml        | 3 ---
 core/tests/vm-benchmark/benches/instructions.rs | 7 +++++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/vm-perf-comparison.yml b/.github/workflows/vm-perf-comparison.yml
index bf7e7832da3..d04cd756211 100644
--- a/.github/workflows/vm-perf-comparison.yml
+++ b/.github/workflows/vm-perf-comparison.yml
@@ -10,9 +10,6 @@ jobs:
     name: Run VM benchmarks
     runs-on: [ matterlabs-ci-runner-highmem-long ]
 
-    env:
-      BENCHMARK_INSTRUCTIONS_DIFF: target/instructions-diff.md
-
     steps:
       - name: checkout base branch
         uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4
diff --git a/core/tests/vm-benchmark/benches/instructions.rs b/core/tests/vm-benchmark/benches/instructions.rs
index 2b04d6b7dad..52251194783 100644
--- a/core/tests/vm-benchmark/benches/instructions.rs
+++ b/core/tests/vm-benchmark/benches/instructions.rs
@@ -1,4 +1,4 @@
-//! Measures instruction counts for the .
+//! Measures instruction counts for the fuzzed bytecodes.
 
 use std::{
     collections::HashMap,
@@ -109,6 +109,9 @@ impl Comparison {
     }
 }
 
+/// Reporter that outputs diffs in a Markdown table to stdout after all benchmarks are completed.
+///
+/// Significant diff level can be changed via `BENCHMARK_DIFF_THRESHOLD_PERCENT` env var; it is set to 1% by default.
 #[derive(Debug, Default)]
 struct ComparisonReporter {
     comparisons: Arc<Mutex<HashMap<String, Comparison>>>,
@@ -130,7 +133,7 @@ impl Drop for ComparisonReporter {
         }
 
         println!("\n## Detected VM performance changes");
-        println!("Benchmark name | est. cycles | change in est. cycles |");
+        println!("Benchmark name | Est. cycles | Change in est. cycles |");
         println!("|:---|---:|---:|");
         for (name, comparison) in &comparisons {
             println!(

From 86e64dfbbc394eef900a9fcdfc6477c6a5187a10 Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Wed, 16 Oct 2024 11:03:21 +0300
Subject: [PATCH 09/17] Handle case with missing benchmark in base

---
 .github/workflows/vm-perf-comparison.yml      |  2 +-
 .../vm-benchmark/benches/instructions.rs      | 33 +++++++++++++------
 2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/vm-perf-comparison.yml b/.github/workflows/vm-perf-comparison.yml
index d04cd756211..b4fbcc2c21a 100644
--- a/.github/workflows/vm-perf-comparison.yml
+++ b/.github/workflows/vm-perf-comparison.yml
@@ -53,7 +53,7 @@ jobs:
         run: |
           ci_run zkstackup -g --local
           ci_run zkstack dev contracts --system-contracts
-          ci_run cargo bench --package vm-benchmark --bench instructions
+          ci_run cargo bench --package vm-benchmark --bench instructions -- --verbose || echo "Instructions benchmark is missing"
           ci_run cargo run --package vm-benchmark --release --bin instruction_counts | tee base-opcodes
 
       - name: checkout PR
diff --git a/core/tests/vm-benchmark/benches/instructions.rs b/core/tests/vm-benchmark/benches/instructions.rs
index 52251194783..9635e0c7fed 100644
--- a/core/tests/vm-benchmark/benches/instructions.rs
+++ b/core/tests/vm-benchmark/benches/instructions.rs
@@ -90,8 +90,8 @@ impl BenchmarkReporter for MetricsBenchmarkReporter {
 
 #[derive(Debug, Clone, Copy)]
 struct Comparison {
-    est_cycles: u64,
-    est_cycles_diff: f64,
+    current_cycles: u64,
+    prev_cycles: Option<u64>,
 }
 
 impl Comparison {
@@ -101,12 +101,22 @@ impl Comparison {
 
     fn new(output: &BenchmarkOutput) -> Option<Self> {
         let current_cycles = AccessSummary::from(*output.stats.as_full()?).estimated_cycles();
-        let prev_cycles = AccessSummary::from(*output.prev_stats?.as_full()?).estimated_cycles();
+        let prev_cycles = if let Some(prev_stats) = &output.prev_stats {
+            Some(AccessSummary::from(*prev_stats.as_full()?).estimated_cycles())
+        } else {
+            None
+        };
+
         Some(Self {
-            est_cycles: current_cycles,
-            est_cycles_diff: Self::percent_difference(prev_cycles, current_cycles),
+            current_cycles,
+            prev_cycles,
         })
     }
+
+    fn cycles_diff(&self) -> Option<f64> {
+        self.prev_cycles
+            .map(|prev_cycles| Self::percent_difference(prev_cycles, self.current_cycles))
+    }
 }
 
 /// Reporter that outputs diffs in a Markdown table to stdout after all benchmarks are completed.
@@ -127,7 +137,10 @@ impl Drop for ComparisonReporter {
         });
 
         let mut comparisons = self.comparisons.lock().expect("poisoned").clone();
-        comparisons.retain(|_, diff| diff.est_cycles_diff.abs() > diff_threshold);
+        comparisons.retain(|_, diff| {
+            // Output all stats if `diff_threshold <= 0.0` since this is what the user expects
+            diff.cycles_diff().unwrap_or(0.0) >= diff_threshold
+        });
         if comparisons.is_empty() {
             return;
         }
@@ -136,10 +149,10 @@ impl Drop for ComparisonReporter {
         println!("Benchmark name | Est. cycles | Change in est. cycles |");
         println!("|:---|---:|---:|");
         for (name, comparison) in &comparisons {
-            println!(
-                "| {name} | {} | {:+.1}% |",
-                comparison.est_cycles, comparison.est_cycles_diff
-            );
+            let diff = comparison
+                .cycles_diff()
+                .map_or_else(|| "N/A".to_string(), |diff| format!("{diff:+.1}%"));
+            println!("| {name} | {} | {diff} |", comparison.current_cycles);
         }
     }
 }

From 8c55d2e1ae4e1c8d60fa28f7f35a910288994a73 Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Wed, 16 Oct 2024 11:42:09 +0300
Subject: [PATCH 10/17] Fix bench ordering & empty outputs

---
 .github/workflows/vm-perf-comparison.yml              |  6 +++---
 core/tests/vm-benchmark/benches/instructions.rs       |  6 +++---
 core/tests/vm-benchmark/src/bin/instruction_counts.rs | 10 +++++-----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/vm-perf-comparison.yml b/.github/workflows/vm-perf-comparison.yml
index b4fbcc2c21a..5cfd9fc7829 100644
--- a/.github/workflows/vm-perf-comparison.yml
+++ b/.github/workflows/vm-perf-comparison.yml
@@ -70,12 +70,12 @@ jobs:
 
           EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
           echo "speedup<<$EOF" >> $GITHUB_OUTPUT
-          # Output all lines starting from the "## ..." comparison header
-          ci_run cargo bench --package vm-benchmark --bench instructions -- --print | grep -A 1000 --color=never -e '^## ' >> $GITHUB_OUTPUT
+          # Output all lines starting from the "## ..." comparison header. Ignore the `grep` error if no lines were matched
+          ci_run cargo bench --package vm-benchmark --bench instructions -- --print | grep -A 1000 --color=never -e '^## ' >> $GITHUB_OUTPUT || [ "$?" = 1 ]
           echo "$EOF" >> $GITHUB_OUTPUT
           
           echo "opcodes<<$EOF" >> $GITHUB_OUTPUT
-          ci_run cargo run --package vm-benchmark --release --bin instruction_counts -- --diff base-opcodes | grep -A 1000 --color=never -e '^## ' >> $GITHUB_OUTPUT
+          ci_run cargo run --package vm-benchmark --release --bin instruction_counts -- --diff base-opcodes | grep -A 1000 --color=never -e '^## ' >> $GITHUB_OUTPUT || [ "$?" = 1 ]
           echo "$EOF" >> $GITHUB_OUTPUT
 
       - name: Comment on PR
diff --git a/core/tests/vm-benchmark/benches/instructions.rs b/core/tests/vm-benchmark/benches/instructions.rs
index 9635e0c7fed..0920de8d35b 100644
--- a/core/tests/vm-benchmark/benches/instructions.rs
+++ b/core/tests/vm-benchmark/benches/instructions.rs
@@ -1,7 +1,7 @@
 //! Measures instruction counts for the fuzzed bytecodes.
 
 use std::{
-    collections::HashMap,
+    collections::BTreeMap,
     env,
     sync::{Arc, Mutex},
 };
@@ -124,7 +124,7 @@ impl Comparison {
 /// Significant diff level can be changed via `BENCHMARK_DIFF_THRESHOLD_PERCENT` env var; it is set to 1% by default.
 #[derive(Debug, Default)]
 struct ComparisonReporter {
-    comparisons: Arc<Mutex<HashMap<String, Comparison>>>,
+    comparisons: Arc<Mutex<BTreeMap<String, Comparison>>>,
 }
 
 impl Drop for ComparisonReporter {
@@ -168,7 +168,7 @@ impl Reporter for ComparisonReporter {
 
 #[derive(Debug)]
 struct BenchmarkComparison {
-    comparisons: Arc<Mutex<HashMap<String, Comparison>>>,
+    comparisons: Arc<Mutex<BTreeMap<String, Comparison>>>,
     id: BenchmarkId,
 }
 
diff --git a/core/tests/vm-benchmark/src/bin/instruction_counts.rs b/core/tests/vm-benchmark/src/bin/instruction_counts.rs
index 2b03e49151e..b536d08acea 100644
--- a/core/tests/vm-benchmark/src/bin/instruction_counts.rs
+++ b/core/tests/vm-benchmark/src/bin/instruction_counts.rs
@@ -1,6 +1,6 @@
 //! Runs all benchmarks and prints out the number of zkEVM opcodes each one executed.
 
-use std::{collections::HashMap, env, fs, io, path::PathBuf};
+use std::{collections::BTreeMap, env, fs, io, path::PathBuf};
 
 use vm_benchmark::{CountInstructions, Fast, Legacy, BYTECODES};
 
@@ -21,14 +21,14 @@ impl Command {
         Self::Diff { old: old.into() }
     }
 
-    fn print_instructions(counts: &HashMap<&str, usize>) {
+    fn print_instructions(counts: &BTreeMap<&str, usize>) {
         for (bytecode_name, count) in counts {
             println!("{bytecode_name} {count}");
         }
     }
 
-    fn parse_counts(reader: impl io::BufRead) -> HashMap<String, usize> {
-        let mut counts = HashMap::new();
+    fn parse_counts(reader: impl io::BufRead) -> BTreeMap<String, usize> {
+        let mut counts = BTreeMap::new();
         for line in reader.lines() {
             let line = line.unwrap();
             if line.is_empty() {
@@ -44,7 +44,7 @@ impl Command {
     }
 
     fn run(self) {
-        let counts: HashMap<_, _> = BYTECODES
+        let counts: BTreeMap<_, _> = BYTECODES
             .iter()
             .map(|bytecode| {
                 let tx = bytecode.deploy_tx();

From 9b6d7488268c18d4e8d195a4ed01e3af7880da4c Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Wed, 16 Oct 2024 14:32:29 +0300
Subject: [PATCH 11/17] Set reported diff threshold to 2%

---
 .github/workflows/vm-perf-comparison.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/vm-perf-comparison.yml b/.github/workflows/vm-perf-comparison.yml
index 5cfd9fc7829..98862ed670f 100644
--- a/.github/workflows/vm-perf-comparison.yml
+++ b/.github/workflows/vm-perf-comparison.yml
@@ -40,8 +40,8 @@ jobs:
           echo "SCCACHE_GCS_SERVICE_ACCOUNT=gha-ci-runners@matterlabs-infra.iam.gserviceaccount.com" >> .env
           echo "SCCACHE_GCS_RW_MODE=READ_WRITE" >> .env
           echo "RUSTC_WRAPPER=sccache" >> .env
-          # FIXME: Set the minimum reported instruction count difference to reduce noise
-          echo "BENCHMARK_DIFF_THRESHOLD_PERCENT=-1" >> .env
+          # Set the minimum reported instruction count difference to reduce noise
+          echo "BENCHMARK_DIFF_THRESHOLD_PERCENT=2" >> .env
 
       - name: init
         run: |

From 4dc0f13cd678bc95c4e41c51bc92be7cd28e8f0d Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Wed, 16 Oct 2024 15:18:43 +0300
Subject: [PATCH 12/17] Fix commenting steps in CI

---
 .github/workflows/vm-perf-comparison.yml | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/vm-perf-comparison.yml b/.github/workflows/vm-perf-comparison.yml
index 98862ed670f..a0220933c82 100644
--- a/.github/workflows/vm-perf-comparison.yml
+++ b/.github/workflows/vm-perf-comparison.yml
@@ -80,10 +80,18 @@ jobs:
 
       - name: Comment on PR
         uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # v2.5.0
+        if: steps.comparison.outputs.speedup != '' || steps.comparison.outputs.opcodes != ''
         with:
           message: |
             ${{ steps.comparison.outputs.speedup }}
             ${{ steps.comparison.outputs.opcodes }}
           comment_tag: vm-performance-changes
-          mode: ${{ (steps.comparison.outputs.speedup == '' && steps.comparison.outputs.opcodes == '') && 'delete' || 'recreate' }}
-          create_if_not_exists: ${{ steps.comparison.outputs.speedup != '' || steps.comparison.outputs.opcodes != '' }}
+          mode: recreate
+          create_if_not_exists: true
+      - name: Remove PR comment
+        uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # v2.5.0
+        if: steps.comparison.outputs.speedup == '' && steps.comparison.outputs.opcodes == ''
+        with:
+          comment_tag: vm-performance-changes
+          message: 'No performance difference detected (anymore)'
+          mode: delete

From 5631a0f56ce13019ed8f908ec3b99cf52d6728f2 Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Thu, 17 Oct 2024 16:27:47 +0300
Subject: [PATCH 13/17] Clarify differing instruction counts message

---
 core/tests/vm-benchmark/src/bin/instruction_counts.rs | 6 +++++-
 core/tests/vm-benchmark/src/vm.rs                     | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/core/tests/vm-benchmark/src/bin/instruction_counts.rs b/core/tests/vm-benchmark/src/bin/instruction_counts.rs
index b536d08acea..ece30a66cee 100644
--- a/core/tests/vm-benchmark/src/bin/instruction_counts.rs
+++ b/core/tests/vm-benchmark/src/bin/instruction_counts.rs
@@ -79,12 +79,16 @@ impl Command {
                     .collect();
 
                 if !differing_counts.is_empty() {
-                    println!("## Detected differing instruction counts");
+                    println!("## ⚠ Detected differing instruction counts");
                     println!("| Benchmark | Old count | New count |");
                     println!("|-----------|----------:|----------:|");
                     for (name, old_count, new_count) in differing_counts {
                         println!("| {name} | {old_count} | {new_count} |");
                     }
+                    println!(
+                        "\nChanges in number of opcodes executed indicate that the gas price of the benchmark has changed, \
+                         which causes it to run out of gas at a different time."
+                    );
                 }
             }
         }
diff --git a/core/tests/vm-benchmark/src/vm.rs b/core/tests/vm-benchmark/src/vm.rs
index f68fdc6b989..42de2e7be71 100644
--- a/core/tests/vm-benchmark/src/vm.rs
+++ b/core/tests/vm-benchmark/src/vm.rs
@@ -302,7 +302,7 @@ mod tests {
     }
 
     #[test]
-    fn instruction_count_matches_on_both_vms_for_fuzzed_bytecodes() {
+    fn instruction_count_matches_on_both_vms_for_benchmark_bytecodes() {
         for bytecode in BYTECODES {
             let tx = bytecode.deploy_tx();
             let legacy_count = Legacy::count_instructions(&tx);

From 48496b96c71ee2443ddf833e1e02f07f0e2782c9 Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Mon, 21 Oct 2024 14:11:44 +0300
Subject: [PATCH 14/17] Fix misc issues

---
 Cargo.lock                                    |  2 +-
 core/tests/vm-benchmark/Cargo.toml            |  2 +-
 .../vm-benchmark/benches/instructions.rs      | 64 +++++++++++--------
 core/tests/vm-benchmark/src/vm.rs             |  6 +-
 4 files changed, 41 insertions(+), 33 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 74101c4527e..966f9c7a115 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -9229,7 +9229,7 @@ dependencies = [
 [[package]]
 name = "yab"
 version = "0.1.0"
-source = "git+https://github.com/slowli/yab.git?rev=01e8ed2e4c4021bacdddfaa25f8a6e92b3439a9a#01e8ed2e4c4021bacdddfaa25f8a6e92b3439a9a"
+source = "git+https://github.com/slowli/yab.git?rev=46b5b14e952964cc931afdd66776a073afdd37d5#46b5b14e952964cc931afdd66776a073afdd37d5"
 dependencies = [
  "anes",
  "clap 4.5.20",
diff --git a/core/tests/vm-benchmark/Cargo.toml b/core/tests/vm-benchmark/Cargo.toml
index 09b142dbb24..3ecd2b9a54b 100644
--- a/core/tests/vm-benchmark/Cargo.toml
+++ b/core/tests/vm-benchmark/Cargo.toml
@@ -22,7 +22,7 @@ tokio.workspace = true
 [dev-dependencies]
 assert_matches.workspace = true
 # FIXME: use workspace dep
-yab = { version = "0.1.0", git = "https://github.com/slowli/yab.git", rev = "01e8ed2e4c4021bacdddfaa25f8a6e92b3439a9a" }
+yab = { version = "0.1.0", git = "https://github.com/slowli/yab.git", rev = "46b5b14e952964cc931afdd66776a073afdd37d5" }
 
 [[bench]]
 name = "oneshot"
diff --git a/core/tests/vm-benchmark/benches/instructions.rs b/core/tests/vm-benchmark/benches/instructions.rs
index 0920de8d35b..1e44b28113b 100644
--- a/core/tests/vm-benchmark/benches/instructions.rs
+++ b/core/tests/vm-benchmark/benches/instructions.rs
@@ -1,10 +1,6 @@
-//! Measures instruction counts for the fuzzed bytecodes.
+//! Measures the number of host instructions required to run the benchmark bytecodes.
 
-use std::{
-    collections::BTreeMap,
-    env,
-    sync::{Arc, Mutex},
-};
+use std::{env, sync::mpsc};
 
 use vise::{Gauge, LabeledFamily, Metrics};
 use vm_benchmark::{
@@ -12,7 +8,7 @@ use vm_benchmark::{
 };
 use yab::{
     reporter::{BenchmarkOutput, BenchmarkReporter, Reporter},
-    AccessSummary, Bencher, BenchmarkId,
+    AccessSummary, BenchMode, Bencher, BenchmarkId,
 };
 
 fn benchmarks_for_vm<VM: BenchmarkingVmFactory>(bencher: &mut Bencher) {
@@ -122,13 +118,31 @@ impl Comparison {
 /// Reporter that outputs diffs in a Markdown table to stdout after all benchmarks are completed.
 ///
 /// Significant diff level can be changed via `BENCHMARK_DIFF_THRESHOLD_PERCENT` env var; it is set to 1% by default.
-#[derive(Debug, Default)]
+#[derive(Debug)]
 struct ComparisonReporter {
-    comparisons: Arc<Mutex<BTreeMap<String, Comparison>>>,
+    comparisons_sender: mpsc::Sender<(String, Comparison)>,
+    comparisons_receiver: mpsc::Receiver<(String, Comparison)>,
+}
+
+impl Default for ComparisonReporter {
+    fn default() -> Self {
+        let (comparisons_sender, comparisons_receiver) = mpsc::channel();
+        Self {
+            comparisons_sender,
+            comparisons_receiver,
+        }
+    }
 }
 
-impl Drop for ComparisonReporter {
-    fn drop(&mut self) {
+impl Reporter for ComparisonReporter {
+    fn new_benchmark(&mut self, id: &BenchmarkId) -> Box<dyn BenchmarkReporter> {
+        Box::new(BenchmarkComparison {
+            comparisons_sender: self.comparisons_sender.clone(),
+            id: id.clone(),
+        })
+    }
+
+    fn ok(self: Box<Self>) {
         const ENV_VAR: &str = "BENCHMARK_DIFF_THRESHOLD_PERCENT";
 
         let diff_threshold = env::var(ENV_VAR).unwrap_or_else(|_| "1.0".into());
@@ -136,8 +150,10 @@ impl Drop for ComparisonReporter {
             panic!("incorrect `{ENV_VAR}` value: {err}");
         });
 
-        let mut comparisons = self.comparisons.lock().expect("poisoned").clone();
-        comparisons.retain(|_, diff| {
+        // Drop the sender to not hang on the iteration below.
+        drop(self.comparisons_sender);
+        let mut comparisons: Vec<_> = self.comparisons_receiver.iter().collect();
+        comparisons.retain(|(_, diff)| {
             // Output all stats if `diff_threshold <= 0.0` since this is what the user expects
             diff.cycles_diff().unwrap_or(0.0) >= diff_threshold
         });
@@ -145,6 +161,8 @@ impl Drop for ComparisonReporter {
             return;
         }
 
+        comparisons.sort_unstable_by(|(name, _), (other_name, _)| name.cmp(other_name));
+
         println!("\n## Detected VM performance changes");
         println!("Benchmark name | Est. cycles | Change in est. cycles |");
         println!("|:---|---:|---:|");
@@ -157,34 +175,24 @@ impl Drop for ComparisonReporter {
     }
 }
 
-impl Reporter for ComparisonReporter {
-    fn new_benchmark(&mut self, id: &BenchmarkId) -> Box<dyn BenchmarkReporter> {
-        Box::new(BenchmarkComparison {
-            comparisons: self.comparisons.clone(),
-            id: id.clone(),
-        })
-    }
-}
-
 #[derive(Debug)]
 struct BenchmarkComparison {
-    comparisons: Arc<Mutex<BTreeMap<String, Comparison>>>,
+    comparisons_sender: mpsc::Sender<(String, Comparison)>,
     id: BenchmarkId,
 }
 
 impl BenchmarkReporter for BenchmarkComparison {
     fn ok(self: Box<Self>, output: &BenchmarkOutput) {
         if let Some(diff) = Comparison::new(output) {
-            self.comparisons
-                .lock()
-                .expect("poisoned")
-                .insert(self.id.to_string(), diff);
+            self.comparisons_sender
+                .send((self.id.to_string(), diff))
+                .ok();
         }
     }
 }
 
 fn benchmarks(bencher: &mut Bencher) {
-    if env::args().any(|arg| arg == "--print") {
+    if bencher.mode() == BenchMode::PrintResults {
         // Only customize reporting if outputting previously collected benchmark result in order to prevent
         // reporters influencing cachegrind stats.
         bencher
diff --git a/core/tests/vm-benchmark/src/vm.rs b/core/tests/vm-benchmark/src/vm.rs
index 42de2e7be71..5424239edc7 100644
--- a/core/tests/vm-benchmark/src/vm.rs
+++ b/core/tests/vm-benchmark/src/vm.rs
@@ -142,9 +142,6 @@ impl BenchmarkingVmFactory for Legacy {
     }
 }
 
-#[derive(Debug)]
-pub struct BenchmarkingVm<VM: BenchmarkingVmFactory>(VM::Instance);
-
 impl CountInstructions for Legacy {
     fn count_instructions(tx: &Transaction) -> usize {
         let mut vm = BenchmarkingVm::<Self>::default();
@@ -191,6 +188,9 @@ fn test_env() -> (SystemEnv, L1BatchEnv) {
     (system_env, l1_batch_env)
 }
 
+#[derive(Debug)]
+pub struct BenchmarkingVm<VM: BenchmarkingVmFactory>(VM::Instance);
+
 impl<VM: BenchmarkingVmFactory> Default for BenchmarkingVm<VM> {
     fn default() -> Self {
         let (system_env, l1_batch_env) = test_env();

From 8c53de500e55fb49dcc5ae610ca5986b69af132f Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Thu, 24 Oct 2024 21:20:29 +0300
Subject: [PATCH 15/17] Improve conversions in `percent_difference`

---
 core/tests/vm-benchmark/benches/instructions.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/tests/vm-benchmark/benches/instructions.rs b/core/tests/vm-benchmark/benches/instructions.rs
index 1e44b28113b..654dfef71b2 100644
--- a/core/tests/vm-benchmark/benches/instructions.rs
+++ b/core/tests/vm-benchmark/benches/instructions.rs
@@ -92,7 +92,7 @@ struct Comparison {
 
 impl Comparison {
     fn percent_difference(a: u64, b: u64) -> f64 {
-        ((b as f64) - (a as f64)) / (a as f64) * 100.0
+        ((b as i64) - (a as i64)) as f64 / (a as f64) * 100.0
     }
 
     fn new(output: &BenchmarkOutput) -> Option<Self> {

From 93536a75b59a18dd6fd5b094dfd70836bb4cc694 Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Mon, 28 Oct 2024 15:55:14 +0200
Subject: [PATCH 16/17] Use published `yab` version

---
 Cargo.lock                         | 3 ++-
 Cargo.toml                         | 3 ++-
 core/tests/vm-benchmark/Cargo.toml | 3 +--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 07b6a917787..4cbb5b5ea70 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -9247,7 +9247,8 @@ dependencies = [
 [[package]]
 name = "yab"
 version = "0.1.0"
-source = "git+https://github.com/slowli/yab.git?rev=46b5b14e952964cc931afdd66776a073afdd37d5#46b5b14e952964cc931afdd66776a073afdd37d5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b06cc62d4cec617d3c259537be0fcaa8a5bcf72ddf2983823d9528605f36ed3"
 dependencies = [
  "anes",
  "clap 4.5.20",
diff --git a/Cargo.toml b/Cargo.toml
index e75186f1ae7..9e8c66ecfb2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -122,6 +122,7 @@ derive_more = "1.0.0"
 envy = "0.4"
 ethabi = "18.0.0"
 flate2 = "1.0.28"
+fraction = "0.15.3"
 futures = "0.3"
 glob = "0.3"
 google-cloud-auth = "0.16.0"
@@ -189,7 +190,7 @@ tracing-opentelemetry = "0.25.0"
 time = "0.3.36"                                                               # Has to be same as used by `tracing-subscriber`
 url = "2"
 web3 = "0.19.0"
-fraction = "0.15.3"
+yab = "0.1.0"
 
 # Proc-macro
 syn = "2.0"
diff --git a/core/tests/vm-benchmark/Cargo.toml b/core/tests/vm-benchmark/Cargo.toml
index 3ecd2b9a54b..892bcf1c105 100644
--- a/core/tests/vm-benchmark/Cargo.toml
+++ b/core/tests/vm-benchmark/Cargo.toml
@@ -21,8 +21,7 @@ tokio.workspace = true
 
 [dev-dependencies]
 assert_matches.workspace = true
-# FIXME: use workspace dep
-yab = { version = "0.1.0", git = "https://github.com/slowli/yab.git", rev = "46b5b14e952964cc931afdd66776a073afdd37d5" }
+yab.workspace = true
 
 [[bench]]
 name = "oneshot"

From d6aff266c1b7234d0a250f5fc4ce0ff03d867473 Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Mon, 28 Oct 2024 15:56:39 +0200
Subject: [PATCH 17/17] Use `sed` to process bench output

---
 .github/workflows/vm-perf-comparison.yml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/vm-perf-comparison.yml b/.github/workflows/vm-perf-comparison.yml
index a0220933c82..3520419f133 100644
--- a/.github/workflows/vm-perf-comparison.yml
+++ b/.github/workflows/vm-perf-comparison.yml
@@ -68,14 +68,17 @@ jobs:
           ci_run zkstack dev contracts --system-contracts
           ci_run cargo bench --package vm-benchmark --bench instructions -- --verbose
 
+          ci_run cargo bench --package vm-benchmark --bench instructions -- --print > instructions.log 2>/dev/null
+          # Output all lines from the benchmark result starting from the "## ..." comparison header.
+          # Since the output spans multiple lines, we use a heredoc declaration.
           EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
           echo "speedup<<$EOF" >> $GITHUB_OUTPUT
-          # Output all lines starting from the "## ..." comparison header. Ignore the `grep` error if no lines were matched
-          ci_run cargo bench --package vm-benchmark --bench instructions -- --print | grep -A 1000 --color=never -e '^## ' >> $GITHUB_OUTPUT || [ "$?" = 1 ]
+          sed -n '/^## /,$p' instructions.log >> $GITHUB_OUTPUT
           echo "$EOF" >> $GITHUB_OUTPUT
           
+          ci_run cargo run --package vm-benchmark --release --bin instruction_counts -- --diff base-opcodes > opcodes.log
           echo "opcodes<<$EOF" >> $GITHUB_OUTPUT
-          ci_run cargo run --package vm-benchmark --release --bin instruction_counts -- --diff base-opcodes | grep -A 1000 --color=never -e '^## ' >> $GITHUB_OUTPUT || [ "$?" = 1 ]
+          sed -n '/^## /,$p' opcodes.log >> $GITHUB_OUTPUT
           echo "$EOF" >> $GITHUB_OUTPUT
 
       - name: Comment on PR