feat: Mega memory benchmarks (#9858)

It would be better to actually use Google Bench's memory manager functionality and count allocations. We already have something similar implemented for Tracy. After striking out with that approach for a bit I reverted to just manually counting the size of the biggest vectors. The PR uncovered this issue: some trace structures have unusable capacity, not just due to using fewer than a dyadic number of gates, but also because of coupling of certain gate types AztecProtocol/barretenberg#1149 See #9858 for logs of benchmarks.
AztecProtocol · Nov 14, 2024 · 7e587d6 · 7e587d6 · AztecBot · Nov 14, 2024
1 parent e07cac7
commit 7e587d6
Show file tree

Hide file tree

Showing 8 changed files with 422 additions and 4 deletions.
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/benchmark/CMakeLists.txt
@@ -17,3 +17,4 @@ add_subdirectory(append_only_tree_bench)
 add_subdirectory(ultra_bench)
 add_subdirectory(stdlib_hash)
 add_subdirectory(circuit_construction_bench)
+add_subdirectory(mega_memory_bench)
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/mega_memory_bench/CMakeLists.txt b/barretenberg/cpp/src/barretenberg/benchmark/mega_memory_bench/CMakeLists.txt
@@ -0,0 +1,5 @@
+barretenberg_module(
+    mega_memory_bench
+    ultra_honk
+    stdlib_primitives
+)
diff --git a/barretenberg/cpp/src/barretenberg/benchmark/mega_memory_bench/mega_memory.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/mega_memory_bench/mega_memory.bench.cpp
diff --git a/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/mega_arithmetization.hpp b/barretenberg/cpp/src/barretenberg/plonk_honk_shared/arithmetization/mega_arithmetization.hpp
@@ -38,6 +38,14 @@ template <typename FF_> class MegaArith {
         T lookup;
         T overflow; // block gates of arbitrary type that overflow their designated block
 
+        std::vector<std::string_view> get_labels() const
+        {
+            return { "ecc_op",     "pub_inputs",         "busread",
+                     "arithmetic", "delta_range",        "elliptic",
+                     "aux",        "poseidon2_external", "poseidon2_internal",
+                     "lookup" };
+        }
+
         auto get()
         {
             return RefArray{ ecc_op,
@@ -52,6 +60,7 @@ template <typename FF_> class MegaArith {
                              lookup,
                              overflow };
         }
+
         auto get() const
         {
             return RefArray{ ecc_op,
@@ -297,7 +306,11 @@ template <typename FF_> class MegaArith {
         {
             info("Gate blocks summary: (actual gates / fixed capacity)");
             info("goblin ecc op :\t", this->ecc_op.size(), "/", this->ecc_op.get_fixed_size());
-            info("pub inputs    :\t", this->pub_inputs.size(), "/", this->pub_inputs.get_fixed_size());
+            info("pub inputs    :\t",
+                 this->pub_inputs.size(),
+                 "/",
+                 this->pub_inputs.get_fixed_size(),
+                 " (populated in decider pk constructor)");
             info("busread       :\t", this->busread.size(), "/", this->busread.get_fixed_size());
             info("arithmetic    :\t", this->arithmetic.size(), "/", this->arithmetic.get_fixed_size());
             info("delta range   :\t", this->delta_range.size(), "/", this->delta_range.get_fixed_size());

diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_circuit_builder.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_circuit_builder.hpp
@@ -236,6 +236,44 @@ template <typename FF> class MegaCircuitBuilder_ : public UltraCircuitBuilder_<M
     const BusVector& get_calldata() const { return databus[static_cast<size_t>(BusId::CALLDATA)]; }
     const BusVector& get_secondary_calldata() const { return databus[static_cast<size_t>(BusId::SECONDARY_CALLDATA)]; }
     const BusVector& get_return_data() const { return databus[static_cast<size_t>(BusId::RETURNDATA)]; }
+    uint64_t estimate_memory() const
+    {
+        vinfo("++Estimating builder memory++");
+        uint64_t result{ 0 };
+
+        // gates:
+        for (auto [block, label] : zip_view(this->blocks.get(), this->blocks.get_labels())) {
+            uint64_t size{ 0 };
+            for (const auto& wire : block.wires) {
+                size += wire.capacity() * sizeof(uint32_t);
+            }
+            for (const auto& selector : block.selectors) {
+                size += selector.capacity() * sizeof(FF);
+            }
+            vinfo(label, " size ", size >> 10, " KiB");
+            result += size;
+        }
+
+        // variables
+        size_t to_add{ this->variables.capacity() * sizeof(FF) };
+        result += to_add;
+        vinfo("variables: ", to_add);
+
+        // public inputs
+        to_add = this->public_inputs.capacity() * sizeof(uint32_t);
+        result += to_add;
+        vinfo("public inputs: ", to_add);
+
+        // other variable indices
+        to_add = this->next_var_index.capacity() * sizeof(uint32_t);
+        to_add += this->prev_var_index.capacity() * sizeof(uint32_t);
+        to_add += this->real_variable_index.capacity() * sizeof(uint32_t);
+        to_add += this->real_variable_tags.capacity() * sizeof(uint32_t);
+        result += to_add;
+        vinfo("variable indices: ", to_add);
+
+        return result;
+    }
 };
 using MegaCircuitBuilder = MegaCircuitBuilder_<bb::fr>;
 } // namespace bb
diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_flavor.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/mega_flavor.hpp
@@ -520,6 +520,24 @@ class MegaFlavor {
             compute_grand_product<MegaFlavor, UltraPermutationRelation<FF>>(
                 this->polynomials, relation_parameters, size_override);
         }
+
+        uint64_t estimate_memory()
+        {
+            vinfo("++Estimating proving key memory++");
+            for (auto [polynomial, label] : zip_view(polynomials.get_all(), polynomials.get_labels())) {
+                uint64_t size = polynomial.size();
+                vinfo(label, " num: ", size, " size: ", (size * sizeof(FF)) >> 10, " KiB");
+            }
+
+            uint64_t result(0);
+            for (auto& polynomial : polynomials.get_unshifted()) {
+                result += polynomial.size() * sizeof(FF);
+            }
+
+            result += public_inputs.capacity() * sizeof(FF);
+
+            return result;
+        }
     };
 
     /**

diff --git a/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/plookup_tables/types.hpp b/barretenberg/cpp/src/barretenberg/stdlib_circuit_builders/plookup_tables/types.hpp
@@ -111,7 +111,6 @@ enum MultiTableId {
     BLAKE_XOR_ROTATE_16,
     BLAKE_XOR_ROTATE_8,
     BLAKE_XOR_ROTATE_7,
-    PEDERSEN_IV,
     HONK_DUMMY_MULTI,
     KECCAK_THETA_OUTPUT,
     KECCAK_CHI_OUTPUT,

diff --git a/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp b/barretenberg/cpp/src/barretenberg/ultra_honk/decider_proving_key.hpp
@@ -30,12 +30,12 @@ template <IsHonkFlavor Flavor> class DeciderProvingKey_ {
     using Polynomial = typename Flavor::Polynomial;
     using RelationSeparator = typename Flavor::RelationSeparator;
 
-    using Trace = ExecutionTrace_<Flavor>;
-
     // Flag indicating whether the polynomials will be constructed with fixed block sizes for each gate type
     bool is_structured;
 
   public:
+    using Trace = ExecutionTrace_<Flavor>;
+
     ProvingKey proving_key;
 
     bool is_accumulator = false;
Benchmark suite	Current: `7e587d6`	Previous: `f566503`	Ratio
`nativeconstruct_proof_ultrahonk_power_of_2/20`	`5348.5002859999895` ms/iter	`4945.2317020000155` ms/iter	`1.08`
`Goblin::merge(t)`	`143841826` ns/iter	`136194915` ns/iter	`1.06`