From 77be6cb90faf783597ab9b04949e49287eef2b87 Mon Sep 17 00:00:00 2001 From: aldenhu Date: Mon, 15 Apr 2024 21:47:43 +0000 Subject: [PATCH] add counters --- Cargo.lock | 1 + crates/aptos-infallible/src/rwlock.rs | 4 + crates/aptos-metrics-core/src/lib.rs | 10 ++ types/Cargo.toml | 1 + types/src/state_store/metrics.rs | 27 +++++ types/src/state_store/mod.rs | 1 + types/src/state_store/state_key.rs | 151 ++++++++++++-------------- 7 files changed, 115 insertions(+), 80 deletions(-) create mode 100644 types/src/state_store/metrics.rs diff --git a/Cargo.lock b/Cargo.lock index 87b70d6778d5a1..91e68c9840fbd4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3918,6 +3918,7 @@ dependencies = [ "aptos-dkg", "aptos-experimental-runtimes", "aptos-infallible", + "aptos-metrics-core", "ark-bn254", "ark-ff", "ark-groth16", diff --git a/crates/aptos-infallible/src/rwlock.rs b/crates/aptos-infallible/src/rwlock.rs index f7052098028813..63840e8fb995c2 100644 --- a/crates/aptos-infallible/src/rwlock.rs +++ b/crates/aptos-infallible/src/rwlock.rs @@ -36,6 +36,10 @@ impl RwLock { .into_inner() .expect("Cannot currently handle a poisoned lock") } + + pub fn inner(&self) -> &StdRwLock { + &self.0 + } } #[cfg(test)] diff --git a/crates/aptos-metrics-core/src/lib.rs b/crates/aptos-metrics-core/src/lib.rs index 793bb72d717664..3b7c7b63b2d615 100644 --- a/crates/aptos-metrics-core/src/lib.rs +++ b/crates/aptos-metrics-core/src/lib.rs @@ -35,3 +35,13 @@ impl IntGaugeHelper for IntGaugeVec { self.with_label_values(labels).set(val) } } + +pub trait IntCounterHelper { + fn inc_with(&self, labels: &[&str]); +} + +impl IntCounterHelper for IntCounterVec { + fn inc_with(&self, labels: &[&str]) { + self.with_label_values(labels).inc() + } +} diff --git a/types/Cargo.toml b/types/Cargo.toml index 68da0c8b85e172..6531977f7c3ebe 100644 --- a/types/Cargo.toml +++ b/types/Cargo.toml @@ -20,6 +20,7 @@ aptos-crypto-derive = { workspace = true } aptos-dkg = { workspace = true } aptos-experimental-runtimes = { workspace = true } aptos-infallible = { workspace = true } +aptos-metrics-core = { workspace = true } ark-bn254 = { workspace = true } ark-ff = { workspace = true } ark-groth16 = { workspace = true } diff --git a/types/src/state_store/metrics.rs b/types/src/state_store/metrics.rs new file mode 100644 index 00000000000000..64eb9491cfe3f5 --- /dev/null +++ b/types/src/state_store/metrics.rs @@ -0,0 +1,27 @@ +// Copyright © Aptos Foundation +// SPDX-License-Identifier: Apache-2.0 + +use aptos_metrics_core::{ + exponential_buckets, register_histogram_vec, register_int_counter_vec, HistogramVec, + IntCounterVec, +}; +use once_cell::sync::Lazy; + +pub static STATE_KEY_COUNTERS: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "aptos_state_key_counters", + "Aptos storage state key counters", + &["key_type", "event"] + ) + .unwrap() +}); + +pub static STATE_KEY_TIMER: Lazy = Lazy::new(|| { + register_histogram_vec!( + "aptos_state_key_timer", + "Various timers for performance analysis.", + &["key_type", "event"], + exponential_buckets(/*start=*/ 1e-6, /*factor=*/ 2.0, /*count=*/ 22).unwrap(), + ) + .unwrap() +}); diff --git a/types/src/state_store/mod.rs b/types/src/state_store/mod.rs index 096119e00bdef9..3ad06ddf890926 100644 --- a/types/src/state_store/mod.rs +++ b/types/src/state_store/mod.rs @@ -18,6 +18,7 @@ use std::{collections::HashMap, ops::Deref}; pub mod errors; pub mod in_memory_state_view; +mod metrics; pub mod state_key; pub mod state_key_prefix; pub mod state_storage_usage; diff --git a/types/src/state_store/state_key.rs b/types/src/state_store/state_key.rs index 58e7003e90b248..b14d9467e780b4 100644 --- a/types/src/state_store/state_key.rs +++ b/types/src/state_store/state_key.rs @@ -2,13 +2,15 @@ // SPDX-License-Identifier: Apache-2.0 #![allow(clippy::non_canonical_partial_ord_impl)] -// FIXME(aldenhu): remove -#![allow(dead_code)] -#![allow(unused_variables)] use crate::{ - access_path, access_path::AccessPath, on_chain_config::OnChainConfig, - state_store::table::TableHandle, + access_path, + access_path::AccessPath, + on_chain_config::OnChainConfig, + state_store::{ + metrics::{STATE_KEY_COUNTERS, STATE_KEY_TIMER}, + table::TableHandle, + }, }; use anyhow::Result; use aptos_crypto::{ @@ -17,7 +19,7 @@ use aptos_crypto::{ }; use aptos_crypto_derive::CryptoHasher; use aptos_infallible::RwLock; -use bytes::Bytes; +use aptos_metrics_core::{IntCounterHelper, TimerHelper}; use move_core_types::{ account_address::AccountAddress, identifier::{IdentStr, Identifier}, @@ -55,7 +57,7 @@ pub enum StateKeyInner { Raw(Vec), } -impl fmt::Debug for StateKeyInner { +impl Debug for StateKeyInner { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { StateKeyInner::AccessPath(ap) => { @@ -184,35 +186,16 @@ impl Entry { hash_value, } } - - pub fn from_serialized(serialized: Bytes) -> Result { - let deserialized = bcs::from_bytes(&serialized)?; - let hash_value = CryptoHash::hash(&deserialized); - Ok(Self { - deserialized, - hash_value, - }) - } - - pub fn from_encoded(encoded: Bytes) -> Result { - let decoded = StateKeyInner::decode(&encoded)?; - let hash_value = CryptoHash::hash(&decoded); - Ok(Self { - deserialized: decoded, - hash_value, - }) - } } impl Drop for Entry { fn drop(&mut self) { - use access_path::Path; - // FIXME(aldenhu): implement: remove entry in the registry - // todo!() match &self.deserialized { StateKeyInner::AccessPath(AccessPath { address, path }) => { - // FIXME(aldenhu): maybe hold reference to the map(s)? - // FIXME(aldenhu): maybe let Inner carry the deserialized Path? + use access_path::Path; + + // TODO(aldenhu): maybe hold reference to the map(s)? + // TODO(aldenhu): maybe let Inner carry the deserialized Path? match &bcs::from_bytes::(path).expect("Failed to deserialize Path.") { Path::Code(module_id) => GLOBAL_REGISTRY .module_keys @@ -234,22 +217,22 @@ impl Drop for Entry { } struct TwoLevelRegistry { + key_type: &'static str, inner: RwLock>>>, } -impl Default for TwoLevelRegistry { - fn default() -> Self { - Self { - inner: RwLock::new(HashMap::new()), - } - } -} - impl TwoLevelRegistry where Key1: Clone + Eq + Hash, Key2: Clone + Eq + Hash, { + fn new_empty(key_type: &'static str) -> Self { + Self { + key_type, + inner: RwLock::new(HashMap::new()), + } + } + fn try_get(&self, key1: &Q1, key2: &Q2) -> Option> where Key1: Borrow, @@ -257,8 +240,16 @@ where Q1: Eq + Hash + ?Sized, Q2: Eq + Hash + ?Sized, { - self.inner - .read() + let locked = match self.inner.inner().try_read() { + Ok(locked) => locked, + Err(..) => { + STATE_KEY_COUNTERS.inc_with(&[self.key_type, "read_blocked_by_write"]); + + self.inner.read() + }, + }; + + locked .get(key1) .and_then(|m| m.get(key2)) .and_then(|weak| weak.upgrade()) @@ -271,6 +262,8 @@ where Q1: Eq + Hash + ToOwned + ?Sized, Q2: Eq + Hash + ToOwned + ?Sized, { + let _timer = STATE_KEY_TIMER.timer_with(&[self.key_type, "lock_and_get_or_add"]); + const MAX_TRIES: usize = 100; for _ in 0..MAX_TRIES { @@ -282,19 +275,23 @@ where .entry(key2.to_owned()) { hash_map::Entry::Occupied(occupied) => { - if let Some(key_info) = occupied.get().upgrade() { + if let Some(entry) = occupied.get().upgrade() { // some other thread has added it - return key_info; + STATE_KEY_COUNTERS.inc_with(&[self.key_type, "entry_create_collision"]); + return entry; } else { - // FIXME(aldenhu): make sure drop is implemented properly // the key is being dropped, release lock and retry + STATE_KEY_COUNTERS + .inc_with(&[self.key_type, "entry_create_while_dropping"]); continue; } }, hash_map::Entry::Vacant(vacant) => { - let key_info = Arc::new(maybe_add); - vacant.insert(Arc::downgrade(&key_info)); - return key_info; + STATE_KEY_COUNTERS.inc_with(&[self.key_type, "entry_create"]); + + let entry = Arc::new(maybe_add); + vacant.insert(Arc::downgrade(&entry)); + return entry; }, } } @@ -302,7 +299,26 @@ where } fn lock_and_remove(&self, key1: &Key1, key2: &Key2) { - // FIXME(aldenhu): remove empty first level entries + match self.inner.write().entry(key1.to_owned()) { + hash_map::Entry::Occupied(mut occupied) => { + match occupied.get_mut().remove(key2) { + Some(..) => { + STATE_KEY_COUNTERS.inc_with(&[self.key_type, "entry_remove"]); + }, + None => { + unreachable!("Entry missing in registry when dropping.") + }, + } + if occupied.get().is_empty() { + occupied.remove(); + } + }, + hash_map::Entry::Vacant(_) => { + // This should not happen + unreachable!("level 1 map must exist when an entry is supposed to be in it."); + }, + } + self.inner .write() .entry(key1.to_owned()) @@ -311,9 +327,8 @@ where } } -static GLOBAL_REGISTRY: Lazy = Lazy::new(StateKeyRegistry::default); +static GLOBAL_REGISTRY: Lazy = Lazy::new(StateKeyRegistry::new_empty); -#[derive(Default)] pub struct StateKeyRegistry { // FIXME(aldenhu): reverse dimensions to save memory? resource_keys: TwoLevelRegistry, @@ -324,38 +339,14 @@ pub struct StateKeyRegistry { } impl StateKeyRegistry { - fn get_or_add_resource(&self, address: &AccountAddress, struct_tag: &StructTag) -> Arc { - if let Some(key_info) = self.resource_keys.try_get(address, struct_tag) { - return key_info; - } - - let inner = StateKeyInner::AccessPath( - AccessPath::resource_access_path(*address, struct_tag.clone()) - .expect("Failed to create access path"), - ); - let key_info = Entry::from_deserialized(inner); - - self.resource_keys - .lock_and_get_or_add(address, struct_tag, key_info) - } - - fn get_or_add_resource_group( - &self, - address: &AccountAddress, - struct_tag: &StructTag, - ) -> Arc { - if let Some(key_info) = self.resource_group_keys.try_get(address, struct_tag) { - return key_info; + fn new_empty() -> Self { + Self { + resource_keys: TwoLevelRegistry::new_empty("resource"), + resource_group_keys: TwoLevelRegistry::new_empty("resource_group"), + module_keys: TwoLevelRegistry::new_empty("module"), + table_item_keys: TwoLevelRegistry::new_empty("table_item"), + raw_keys: TwoLevelRegistry::new_empty("raw"), } - - let inner = StateKeyInner::AccessPath(AccessPath::resource_group_access_path( - *address, - struct_tag.clone(), - )); - let key_info = Entry::from_deserialized(inner); - - self.resource_group_keys - .lock_and_get_or_add(address, struct_tag, key_info) } }