Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

more prometheus metrics #1612

Merged
merged 28 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
3ba456b
add rpc metrics
brenzi Oct 13, 2024
006fe82
update metrics as batches to reduce ocalls
brenzi Oct 13, 2024
aa69c5e
count all rpc requests and TC submissions
brenzi Oct 13, 2024
b7251bd
add histogram metric for stf execution duration
brenzi Oct 13, 2024
7dbe47f
add histogram metric for stf execution call counts for success and fa…
brenzi Oct 13, 2024
096aac9
add total issuance metric
brenzi Oct 13, 2024
0a9a3c4
add parentchains processed block metrics
brenzi Oct 13, 2024
6e6328a
measure time to load and write state
brenzi Oct 13, 2024
c93c606
add metric for remaining duration in AURA slots
brenzi Oct 13, 2024
6a7edc6
fmt
brenzi Oct 13, 2024
65b9e0d
taplo fmt
brenzi Oct 13, 2024
42a9b5f
clippy
brenzi Oct 13, 2024
f8a97c3
fix teeracle build
brenzi Oct 13, 2024
cdf2659
add metric for state size
brenzi Oct 13, 2024
7ca88dd
add account balances for enclave on all parentchains and shard vault
brenzi Oct 14, 2024
4ec85ab
balance metrics with decimals applied
brenzi Oct 14, 2024
4c2efbf
add enclave fingerprint and version as labels
brenzi Oct 14, 2024
44d3780
add sidechain peer count metric
brenzi Oct 14, 2024
dfa9959
add metric for last finalized sidechain block
brenzi Oct 14, 2024
8c51390
refactor to use more lables
brenzi Oct 14, 2024
9a7189c
cosmetics
brenzi Oct 14, 2024
b7108a9
fix and simplify bogus top-pool-size metric
brenzi Oct 14, 2024
248930a
clippy
brenzi Oct 14, 2024
8cf60fb
fix tests
brenzi Oct 14, 2024
8fb55dd
review fixes
brenzi Oct 15, 2024
9d8fa29
nicen up
brenzi Oct 15, 2024
f470ae1
change top pool size to a priori
brenzi Oct 15, 2024
be591f4
clippy
brenzi Oct 15, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3235,6 +3235,7 @@ dependencies = [
name = "itp-enclave-metrics"
version = "0.9.0"
dependencies = [
"itp-types",
"parity-scale-codec",
"sgx_tstd",
"substrate-fixed",
Expand Down Expand Up @@ -3453,6 +3454,7 @@ version = "0.9.0"
dependencies = [
"hex",
"itc-parentchain-test",
"itp-enclave-metrics",
"itp-node-api",
"itp-ocall-api",
"itp-sgx-crypto",
Expand All @@ -3461,6 +3463,7 @@ dependencies = [
"itp-stf-primitives",
"itp-stf-state-handler",
"itp-stf-state-observer",
"itp-storage",
"itp-test",
"itp-time-utils",
"itp-top-pool",
Expand Down Expand Up @@ -3848,7 +3851,9 @@ dependencies = [
name = "its-rpc-handler"
version = "0.9.0"
dependencies = [
"itp-enclave-metrics",
"itp-import-queue",
"itp-ocall-api",
"itp-rpc",
"itp-stf-primitives",
"itp-top-pool-author",
Expand Down
22 changes: 12 additions & 10 deletions app-libs/oracle/src/metrics_exporter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::types::{ExchangeRate, TradingPair};
use itp_enclave_metrics::{EnclaveMetric, ExchangeRateOracleMetric, OracleMetric};
use itp_ocall_api::EnclaveMetricsOCallApi;
use log::error;
use std::{string::String, sync::Arc, time::Instant};
use std::{string::String, sync::Arc, time::Instant, vec, vec::Vec};

/// Trait to export metrics for any Teeracle.
pub trait ExportMetrics<MetricsInfo> {
Expand All @@ -38,7 +38,7 @@ pub trait ExportMetrics<MetricsInfo> {
}

pub trait UpdateMetric<MetricInfo> {
fn update_metric(&self, metric: OracleMetric<MetricInfo>);
fn update_metrics(&self, metric: OracleMetric<MetricInfo>);
}

/// Metrics exporter implementation.
Expand All @@ -50,7 +50,7 @@ impl<OCallApi, MetricInfo> UpdateMetric<MetricInfo> for MetricsExporter<OCallApi
where
OCallApi: EnclaveMetricsOCallApi,
{
fn update_metric(&self, _metric: OracleMetric<MetricInfo>) {
fn update_metrics(&self, _metric: OracleMetric<MetricInfo>) {
// TODO: Implement me
}
}
Expand All @@ -63,8 +63,10 @@ where
MetricsExporter { ocall_api }
}

fn update_metric(&self, metric: ExchangeRateOracleMetric) {
if let Err(e) = self.ocall_api.update_metric(EnclaveMetric::ExchangeRateOracle(metric)) {
fn update_metrics(&self, metrics: Vec<ExchangeRateOracleMetric>) {
let wrapped_metrics =
metrics.iter().map(|m| EnclaveMetric::ExchangeRateOracle(m.clone())).collect();
if let Err(e) = self.ocall_api.update_metrics(wrapped_metrics) {
error!("Failed to update enclave metric, sgx_status_t: {}", e)
}
}
Expand All @@ -75,14 +77,14 @@ where
OCallApi: EnclaveMetricsOCallApi,
{
fn increment_number_requests(&self, source: String) {
self.update_metric(ExchangeRateOracleMetric::NumberRequestsIncrement(source));
self.update_metrics(vec![ExchangeRateOracleMetric::NumberRequestsIncrement(source)]);
}

fn record_response_time(&self, source: String, timer: Instant) {
self.update_metric(ExchangeRateOracleMetric::ResponseTime(
self.update_metrics(vec![ExchangeRateOracleMetric::ResponseTime(
source,
timer.elapsed().as_millis(),
));
)]);
}

fn update_exchange_rate(
Expand All @@ -91,11 +93,11 @@ where
exchange_rate: ExchangeRate,
trading_pair: TradingPair,
) {
self.update_metric(ExchangeRateOracleMetric::ExchangeRate(
self.update_metrics(vec![ExchangeRateOracleMetric::ExchangeRate(
source,
trading_pair.key(),
exchange_rate,
));
)]);
}

fn update_weather(&self, _source: String, _metrics_info: MetricsInfo) {
Expand Down
1 change: 1 addition & 0 deletions app-libs/stf/src/stf_sgx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ where
}
fn on_finalize(_state: &mut State) -> Result<(), Self::Error> {
trace!("on_finalize called");

Ok(())
}
}
Expand Down
2 changes: 1 addition & 1 deletion core-primitives/enclave-metrics/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ edition = "2021"

[dependencies]
# sgx
itp-types = { path = "../types", default-features = false }
sgx_tstd = { branch = "master", git = "https://github.com/apache/teaclave-sgx-sdk.git", optional = true }

# no-std dependencies
codec = { package = "parity-scale-codec", version = "3.0.0", default-features = false, features = ["derive", "full"] }
substrate-fixed = { default-features = false, git = "https://github.com/encointer/substrate-fixed", tag = "v0.5.9" }
Expand Down
20 changes: 17 additions & 3 deletions core-primitives/enclave-metrics/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,41 @@
#[cfg(all(feature = "std", feature = "sgx"))]
compile_error!("feature \"std\" and feature \"sgx\" cannot be enabled at the same time");

extern crate core;
brenzi marked this conversation as resolved.
Show resolved Hide resolved
#[cfg(all(not(feature = "std"), feature = "sgx"))]
extern crate sgx_tstd as std;

use codec::{Decode, Encode};
use core::time::Duration;
use itp_types::ShardIdentifier;
use std::string::String;
use substrate_fixed::types::U32F32;

// FIXME: Copied from ita-oracle because of cyclic deps. Should be removed after integritee-network/pallets#71
pub type ExchangeRate = U32F32;

#[derive(Encode, Decode, Debug)]
#[derive(Encode, Decode, Debug, Clone)]
pub enum EnclaveMetric {
SetSidechainBlockHeight(u64),
TopPoolSizeSet(u64),
TopPoolSizeIncrement,
TopPoolSizeDecrement,
RpcRequestsIncrement,
RpcTrustedCallsIncrement,
SidechainAuraSlotRemainingTimes(String, Duration),
StfStateUpdateExecutionDuration(Duration),
StfStateUpdateExecutedCallsSuccessfulCount(u32),
StfStateUpdateExecutedCallsFailedCount(u32),
StfStateSizeSet(ShardIdentifier, u32),
StfRuntimeTotalIssuanceSet(f64),
StfRuntimeParentchainIntegriteeProcessedBlockNumberSet(u32),
StfRuntimeParentchainTargetAProcessedBlockNumberSet(u32),
StfRuntimeParentchainTargetBProcessedBlockNumberSet(u32),
ExchangeRateOracle(ExchangeRateOracleMetric),
// OracleMetric(OracleMetric<MetricsInfo>),
}

#[derive(Encode, Decode, Debug)]
#[derive(Encode, Decode, Debug, Clone)]
pub enum ExchangeRateOracleMetric {
/// Exchange Rate from CoinGecko - (Source, TradingPair, ExchangeRate)
ExchangeRate(String, String, ExchangeRate),
Expand All @@ -50,7 +64,7 @@ pub enum ExchangeRateOracleMetric {
NumberRequestsIncrement(String),
}

#[derive(Encode, Decode, Debug)]
#[derive(Encode, Decode, Debug, Clone)]
pub enum OracleMetric<MetricsInfo> {
OracleSpecificMetric(MetricsInfo),
ResponseTime(String, u128),
Expand Down
2 changes: 1 addition & 1 deletion core-primitives/ocall-api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ pub trait EnclaveOnChainOCallApi: Clone + Send + Sync {

/// Trait for sending metric updates.
pub trait EnclaveMetricsOCallApi: Clone + Send + Sync {
fn update_metric<Metric: Encode>(&self, metric: Metric) -> SgxResult<()>;
fn update_metrics<Metric: Encode>(&self, metric: Vec<Metric>) -> SgxResult<()>;
}

pub trait EnclaveSidechainOCallApi: Clone + Send + Sync {
Expand Down
2 changes: 2 additions & 0 deletions core-primitives/stf-executor/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ sgx_tstd = { branch = "master", git = "https://github.com/apache/teaclave-sgx-sd
sgx_types = { branch = "master", git = "https://github.com/apache/teaclave-sgx-sdk.git" }

# local dependencies
itp-enclave-metrics = { path = "../enclave-metrics", default-features = false }
itp-node-api = { path = "../node-api", default-features = false }
itp-ocall-api = { path = "../ocall-api", default-features = false }
itp-sgx-crypto = { path = "../sgx/crypto", default-features = false }
Expand All @@ -20,6 +21,7 @@ itp-stf-interface = { path = "../stf-interface", default-features = false }
itp-stf-primitives = { path = "../stf-primitives", default-features = false }
itp-stf-state-handler = { path = "../stf-state-handler", default-features = false }
itp-stf-state-observer = { path = "../stf-state-observer", default-features = false }
itp-storage = { path = "../storage", default-features = false }
itp-time-utils = { path = "../time-utils", default-features = false }
itp-top-pool-author = { path = "../top-pool-author", default-features = false }
itp-types = { path = "../types", default-features = false }
Expand Down
82 changes: 76 additions & 6 deletions core-primitives/stf-executor/src/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ use crate::{
BatchExecutionResult, ExecutedOperation,
};
use codec::{Decode, Encode};
use itp_enclave_metrics::EnclaveMetric;
use itp_node_api::metadata::{provider::AccessNodeMetadata, NodeMetadataTrait};
use itp_ocall_api::{EnclaveAttestationOCallApi, EnclaveOnChainOCallApi};
use itp_ocall_api::{EnclaveAttestationOCallApi, EnclaveMetricsOCallApi, EnclaveOnChainOCallApi};
use itp_sgx_externalities::{SgxExternalitiesTrait, StateHash};
use itp_stf_interface::{
parentchain_pallet::ParentchainPalletInstancesInterface, StateCallInterface, UpdateState,
Expand All @@ -32,11 +33,12 @@ use itp_stf_primitives::{
types::{ShardIdentifier, TrustedOperation, TrustedOperationOrHash},
};
use itp_stf_state_handler::{handle_state::HandleState, query_shard_state::QueryShardState};
use itp_storage::keys::storage_value_key;
use itp_time_utils::{duration_now, now_as_millis};
use itp_types::{
parentchain::{Header as ParentchainHeader, ParentchainCall, ParentchainId},
parentchain::{BlockNumber, Header as ParentchainHeader, ParentchainCall, ParentchainId},
storage::StorageEntryVerified,
H256,
Balance, H256,
};
use log::*;
use sp_runtime::traits::Header as HeaderTrait;
Expand All @@ -59,7 +61,7 @@ where
impl<OCallApi, StateHandler, NodeMetadataRepository, Stf, TCS, G>
StfExecutor<OCallApi, StateHandler, NodeMetadataRepository, Stf, TCS, G>
where
OCallApi: EnclaveAttestationOCallApi + EnclaveOnChainOCallApi,
OCallApi: EnclaveAttestationOCallApi + EnclaveOnChainOCallApi + EnclaveMetricsOCallApi,
StateHandler: HandleState<HashType = H256>,
StateHandler::StateT: SgxExternalitiesTrait + Encode,
NodeMetadataRepository: AccessNodeMetadata,
Expand Down Expand Up @@ -278,7 +280,7 @@ where
impl<OCallApi, StateHandler, NodeMetadataRepository, Stf, TCS, G> StateUpdateProposer<TCS, G>
for StfExecutor<OCallApi, StateHandler, NodeMetadataRepository, Stf, TCS, G>
where
OCallApi: EnclaveAttestationOCallApi + EnclaveOnChainOCallApi,
OCallApi: EnclaveAttestationOCallApi + EnclaveOnChainOCallApi + EnclaveMetricsOCallApi,
StateHandler: HandleState<HashType = H256>,
StateHandler::StateT: SgxExternalitiesTrait + Encode + StateHash,
<StateHandler::StateT as SgxExternalitiesTrait>::SgxExternalitiesType: Encode,
Expand Down Expand Up @@ -310,7 +312,8 @@ where
PH: HeaderTrait<Hash = H256>,
F: FnOnce(Self::Externalities) -> Self::Externalities,
{
let ends_at = duration_now() + max_exec_duration;
let started_at = duration_now();
let ends_at = started_at + max_exec_duration;

let (state, state_hash_before_execution) = self.state_handler.load_cloned(shard)?;

Expand Down Expand Up @@ -350,6 +353,33 @@ where
error!("on_finalize failed: {:?}", e);
});

let state_size_bytes = state.size();
let runtime_metrics = gather_runtime_metrics(&state);

let propsing_duration = duration_now() - started_at;
let successful_call_count =
executed_and_failed_calls.iter().filter(|call| call.is_success()).count();
let failed_call_count = executed_and_failed_calls.len() - successful_call_count;
self.ocall_api
.update_metrics(vec![
EnclaveMetric::StfStateUpdateExecutionDuration(propsing_duration),
EnclaveMetric::StfStateUpdateExecutedCallsSuccessfulCount(
successful_call_count as u32,
),
EnclaveMetric::StfStateUpdateExecutedCallsFailedCount(failed_call_count as u32),
EnclaveMetric::StfStateSizeSet(*shard, state_size_bytes as u32),
EnclaveMetric::StfRuntimeTotalIssuanceSet(runtime_metrics.total_issuance),
EnclaveMetric::StfRuntimeParentchainIntegriteeProcessedBlockNumberSet(
runtime_metrics.parentchain_integritee_processed_block_number,
),
EnclaveMetric::StfRuntimeParentchainTargetAProcessedBlockNumberSet(
runtime_metrics.parentchain_target_a_processed_block_number,
),
EnclaveMetric::StfRuntimeParentchainTargetBProcessedBlockNumberSet(
runtime_metrics.parentchain_target_b_processed_block_number,
),
])
.unwrap_or_else(|e| error!("failed to update prometheus metric: {:?}", e));
Ok(BatchExecutionResult {
executed_operations: executed_and_failed_calls,
state_hash_before_execution,
Expand All @@ -374,3 +404,43 @@ pub fn shards_key_hash() -> Vec<u8> {
// ShardIdentifiers the enclave uses this to autosubscribe to no shards
vec![]
}

/// assumes a common structure of sgx_runtime and extracts interesting metrics
/// while this may not be the best abstraction, it avoids circular dependencies
/// with app-libs and will be suitable in 99% of cases
fn gather_runtime_metrics<State>(state: &State) -> RuntimeMetrics
brenzi marked this conversation as resolved.
Show resolved Hide resolved
where
State: SgxExternalitiesTrait + Encode,
{
// prometheus has no support for NaN, therefore we fall back to -1
let total_issuance: f64 = state
.get(&storage_value_key("Balances", "TotalIssuance"))
.map(|v| Balance::decode(&mut v.as_slice()).map(|b| b as f64).unwrap_or(-1.0))
.unwrap_or(-1.0);
// fallback to zero is fine here
let parentchain_integritee_processed_block_number: u32 = state
.get(&storage_value_key("ParentchainIntegritee", "Number"))
.map(|v| BlockNumber::decode(&mut v.as_slice()).unwrap_or_default())
.unwrap_or_default();
let parentchain_target_a_processed_block_number: u32 = state
.get(&storage_value_key("ParentchainTargetA", "Number"))
.map(|v| BlockNumber::decode(&mut v.as_slice()).unwrap_or_default())
.unwrap_or_default();
let parentchain_target_b_processed_block_number: u32 = state
.get(&storage_value_key("ParentchainTargetB", "Number"))
.map(|v| BlockNumber::decode(&mut v.as_slice()).unwrap_or_default())
.unwrap_or_default();
RuntimeMetrics {
total_issuance,
parentchain_integritee_processed_block_number,
parentchain_target_a_processed_block_number,
parentchain_target_b_processed_block_number,
}
}

struct RuntimeMetrics {
total_issuance: f64,
parentchain_integritee_processed_block_number: u32,
parentchain_target_a_processed_block_number: u32,
parentchain_target_b_processed_block_number: u32,
}
13 changes: 10 additions & 3 deletions core-primitives/stf-state-handler/src/file_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ pub mod sgx {
use itp_sgx_crypto::{key_repository::AccessKey, StateCrypto};
use itp_sgx_externalities::SgxExternalitiesTrait;
use itp_sgx_io::{read as io_read, write as io_write};
use itp_time_utils::duration_now;
use itp_types::H256;
use log::*;
use std::{fs, marker::PhantomData, path::Path, sync::Arc};
Expand Down Expand Up @@ -241,14 +242,17 @@ pub mod sgx {
}

let state_path = self.state_dir.state_file_path(shard_identifier, state_id);
let started_at = duration_now();
trace!("loading state from: {:?}", state_path);
let state_encoded = self.read(&state_path)?;
let load_duration = duration_now() - started_at;

// State is now decrypted.
debug!(
"State loaded from {:?} with size {}B, deserializing...",
"State loaded from {:?} with size {}B in {:.4}s, deserializing...",
state_path,
state_encoded.len()
state_encoded.len(),
load_duration.as_secs_f32(),
);
let state = <State as SgxExternalitiesTrait>::SgxExternalitiesType::decode(
&mut state_encoded.as_slice(),
Expand Down Expand Up @@ -290,14 +294,17 @@ pub mod sgx {
) -> Result<Self::HashType> {
let state_path = self.state_dir.state_file_path(shard_identifier, state_id);
trace!("writing state to: {:?}", state_path);

let started_at = duration_now();
// Only save the state, the state diff is pruned.
let cyphertext = self.encrypt(state.state().encode())?;

let state_hash = state.hash();

io_write(&cyphertext, &state_path)?;

let write_duration = duration_now() - started_at;
trace!("state encrypted and stored in {:.4}s", write_duration.as_secs_f32());

Ok(state_hash)
}

Expand Down
Loading
Loading