Skip to content

Commit

Permalink
feat: Mega memory benchmarks (#9858)
Browse files Browse the repository at this point in the history
It would be better to actually use Google Bench's memory manager
functionality and count allocations. We already have something similar
implemented for Tracy. After striking out with that approach for a bit I
reverted to just manually counting the size of the biggest vectors.

The PR uncovered this issue: some trace structures have unusable
capacity, not just due to using fewer than a dyadic number of gates, but
also because of coupling of certain gate types
AztecProtocol/barretenberg#1149

See #9858 for logs of benchmarks.
  • Loading branch information
codygunton authored Nov 14, 2024
1 parent e07cac7 commit 7e587d6
Show file tree
Hide file tree
Showing 8 changed files with 422 additions and 4 deletions.
1 change: 1 addition & 0 deletions barretenberg/cpp/src/barretenberg/benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ add_subdirectory(append_only_tree_bench)
add_subdirectory(ultra_bench)
add_subdirectory(stdlib_hash)
add_subdirectory(circuit_construction_bench)
add_subdirectory(mega_memory_bench)
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
barretenberg_module(
mega_memory_bench
ultra_honk
stdlib_primitives
)

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ template <typename FF_> class MegaArith {
T lookup;
T overflow; // block gates of arbitrary type that overflow their designated block

std::vector<std::string_view> get_labels() const
{
return { "ecc_op", "pub_inputs", "busread",
"arithmetic", "delta_range", "elliptic",
"aux", "poseidon2_external", "poseidon2_internal",
"lookup" };
}

auto get()
{
return RefArray{ ecc_op,
Expand All @@ -52,6 +60,7 @@ template <typename FF_> class MegaArith {
lookup,
overflow };
}

auto get() const
{
return RefArray{ ecc_op,
Expand Down Expand Up @@ -297,7 +306,11 @@ template <typename FF_> class MegaArith {
{
info("Gate blocks summary: (actual gates / fixed capacity)");
info("goblin ecc op :\t", this->ecc_op.size(), "/", this->ecc_op.get_fixed_size());
info("pub inputs :\t", this->pub_inputs.size(), "/", this->pub_inputs.get_fixed_size());
info("pub inputs :\t",
this->pub_inputs.size(),
"/",
this->pub_inputs.get_fixed_size(),
" (populated in decider pk constructor)");
info("busread :\t", this->busread.size(), "/", this->busread.get_fixed_size());
info("arithmetic :\t", this->arithmetic.size(), "/", this->arithmetic.get_fixed_size());
info("delta range :\t", this->delta_range.size(), "/", this->delta_range.get_fixed_size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,44 @@ template <typename FF> class MegaCircuitBuilder_ : public UltraCircuitBuilder_<M
const BusVector& get_calldata() const { return databus[static_cast<size_t>(BusId::CALLDATA)]; }
const BusVector& get_secondary_calldata() const { return databus[static_cast<size_t>(BusId::SECONDARY_CALLDATA)]; }
const BusVector& get_return_data() const { return databus[static_cast<size_t>(BusId::RETURNDATA)]; }
uint64_t estimate_memory() const
{
vinfo("++Estimating builder memory++");
uint64_t result{ 0 };

// gates:
for (auto [block, label] : zip_view(this->blocks.get(), this->blocks.get_labels())) {
uint64_t size{ 0 };
for (const auto& wire : block.wires) {
size += wire.capacity() * sizeof(uint32_t);
}
for (const auto& selector : block.selectors) {
size += selector.capacity() * sizeof(FF);
}
vinfo(label, " size ", size >> 10, " KiB");
result += size;
}

// variables
size_t to_add{ this->variables.capacity() * sizeof(FF) };
result += to_add;
vinfo("variables: ", to_add);

// public inputs
to_add = this->public_inputs.capacity() * sizeof(uint32_t);
result += to_add;
vinfo("public inputs: ", to_add);

// other variable indices
to_add = this->next_var_index.capacity() * sizeof(uint32_t);
to_add += this->prev_var_index.capacity() * sizeof(uint32_t);
to_add += this->real_variable_index.capacity() * sizeof(uint32_t);
to_add += this->real_variable_tags.capacity() * sizeof(uint32_t);
result += to_add;
vinfo("variable indices: ", to_add);

return result;
}
};
using MegaCircuitBuilder = MegaCircuitBuilder_<bb::fr>;
} // namespace bb
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,24 @@ class MegaFlavor {
compute_grand_product<MegaFlavor, UltraPermutationRelation<FF>>(
this->polynomials, relation_parameters, size_override);
}

uint64_t estimate_memory()
{
vinfo("++Estimating proving key memory++");
for (auto [polynomial, label] : zip_view(polynomials.get_all(), polynomials.get_labels())) {
uint64_t size = polynomial.size();
vinfo(label, " num: ", size, " size: ", (size * sizeof(FF)) >> 10, " KiB");
}

uint64_t result(0);
for (auto& polynomial : polynomials.get_unshifted()) {
result += polynomial.size() * sizeof(FF);
}

result += public_inputs.capacity() * sizeof(FF);

return result;
}
};

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ enum MultiTableId {
BLAKE_XOR_ROTATE_16,
BLAKE_XOR_ROTATE_8,
BLAKE_XOR_ROTATE_7,
PEDERSEN_IV,
HONK_DUMMY_MULTI,
KECCAK_THETA_OUTPUT,
KECCAK_CHI_OUTPUT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ template <IsHonkFlavor Flavor> class DeciderProvingKey_ {
using Polynomial = typename Flavor::Polynomial;
using RelationSeparator = typename Flavor::RelationSeparator;

using Trace = ExecutionTrace_<Flavor>;

// Flag indicating whether the polynomials will be constructed with fixed block sizes for each gate type
bool is_structured;

public:
using Trace = ExecutionTrace_<Flavor>;

ProvingKey proving_key;

bool is_accumulator = false;
Expand Down

1 comment on commit 7e587d6

@AztecBot
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark 'C++ Benchmark'.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 1.05.

Benchmark suite Current: 7e587d6 Previous: f566503 Ratio
nativeconstruct_proof_ultrahonk_power_of_2/20 5348.5002859999895 ms/iter 4945.2317020000155 ms/iter 1.08
Goblin::merge(t) 143841826 ns/iter 136194915 ns/iter 1.06

This comment was automatically generated by workflow using github-action-benchmark.

CC: @ludamad @codygunton

Please sign in to comment.