Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validator health status #3207

Open
wants to merge 3 commits into
base: albatross
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions primitives/account/src/account/staking_contract/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ use crate::{
/// in the first place.
/// (**) The validator may be set to automatically reactivate itself upon inactivation.
/// If this setting is not enabled the state change can only be triggered manually.
/// However, there is a delay incurred if the validator is deactivated multiple consecutive times
/// in the current epoch. The delay is a function of the number of deactivations and is reduced
/// if the validator starts producing blocks in time.
///
/// Create, Update, Deactivate, Retire and Re-activate are incoming transactions to the staking contract.
/// Delete is an outgoing transaction from the staking contract.
Expand Down
3 changes: 2 additions & 1 deletion test-utils/src/validator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ pub async fn build_validators<N: TestNetwork + NetworkInterface>(
peer_ids: &[u64],
hub: &mut Option<MockHub>,
is_prover_active: bool,
automatic_reactivate: bool,
) -> Vec<Validator<ValidatorNetworkImpl<N>>>
where
N::Error: Send + Sync,
Expand Down Expand Up @@ -113,7 +114,7 @@ where
let (v, c) = build_validator(
peer_ids[i],
Address::from(&validator_keys[i]),
false,
automatic_reactivate,
signing_keys[i].clone(),
voting_keys[i].clone(),
fee_keys[i].clone(),
Expand Down
156 changes: 156 additions & 0 deletions validator/src/health.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
use std::cmp;

use nimiq_keys::Address;

/// Enum that represents the overall health of a validator
/// - Green means the Validator is working as expected.
/// - Yellow means there has been more than 'VALIDATOR_YELLOW_HEALTH_INACTIVATIONS' consecutive inactivations
/// in the current epoch.
/// - Red means there has been more than 'VALIDATOR_RED_HEALTH_INACTIVATIONS' consecutive inactivations
/// in the current epoch.
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum ValidatorHealthState {
Green,
Yellow,
Red,
}

/// Struct that represents the overall Validator Health
pub struct ValidatorHealth {
/// The current validator health
health: ValidatorHealthState,
/// Validator address
address: Address,
/// Only used for testing purposes controls whether blocks are published by the validator
publish: bool,
/// Number of consecutive inactivations that have occurred in the current epoch
inactivations: u32,
/// Next block number indicating when the re-activate transaction should be sent
reactivate_block_number: u32,
/// Flag that indicates an ongoing reactivation
pending_reactivate: bool,
}

impl ValidatorHealth {
/// The number of consecutive inactivations from which a validator is considered with yellow health
const VALIDATOR_YELLOW_HEALTH_INACTIVATIONS: u32 = 2;
/// The number of consecutive inactivations from which a validator is considered with red health
const VALIDATOR_RED_HEALTH_INACTIVATIONS: u32 = 4;
// The maximum number of blocks the reactivate transaction can be delayed
const MAX_REACTIVATE_DELAY: u32 = 10_000;

/// Creates a new instance of the validator health structure
pub fn new(validator_address: &Address) -> ValidatorHealth {
ValidatorHealth {
health: ValidatorHealthState::Green,
publish: true,
inactivations: 0,
reactivate_block_number: 0,
pending_reactivate: false,
address: validator_address.clone(),
}
}

/// Computes the associated delay based on the current number of inactivations
fn get_reactivate_delay(&self) -> u32 {
cmp::min(self.inactivations.pow(2), Self::MAX_REACTIVATE_DELAY)
}

/// Returns the current health state of the validator
pub fn health(&self) -> ValidatorHealthState {
self.health
}

/// This flag should only be set in testing environments.
pub fn _set_publish_flag(&mut self, publish: bool) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The method can be annotated with #[cfg(test)] such that it's only included in cargo test and not in cargo build.

self.publish = publish
}

/// Decides if blocks should be published by the validator (only used for testing)
pub fn publish_block(&self) -> bool {
self.publish
}

/// Recomputes the current validator health state
pub fn refresh_validator_health_status(&mut self) {
log::debug!(
address=%self.address,
self.inactivations,
"Current validator Inactivations counter"
);

match self.health {
ValidatorHealthState::Green => {}
ValidatorHealthState::Yellow => {
if self.inactivations < Self::VALIDATOR_YELLOW_HEALTH_INACTIVATIONS {
log::info!(self.inactivations, "Changed validator health to green");
self.health = ValidatorHealthState::Green;
}
}
ValidatorHealthState::Red => {
if self.inactivations < Self::VALIDATOR_RED_HEALTH_INACTIVATIONS {
log::info!(self.inactivations, "Changed validator health to yellow");
self.health = ValidatorHealthState::Yellow;
}
}
}
}

/// Decreases the number of consecutive deactivations
pub fn block_produced(&mut self) {
self.inactivations = self.inactivations.saturating_sub(1);
}

/// Increases the number of consecutive deactivations
pub fn inactivate(&mut self, block_number: u32) {
if !self.pending_reactivate {
self.inactivations = self.inactivations.saturating_add(1);
self.reactivate_block_number = block_number + self.get_reactivate_delay();
self.pending_reactivate = true;

match self.health {
ValidatorHealthState::Green => {
if self.inactivations >= Self::VALIDATOR_YELLOW_HEALTH_INACTIVATIONS {
log::warn!(self.inactivations, "Changed validator health to yellow");
self.health = ValidatorHealthState::Yellow;
}
}
ValidatorHealthState::Yellow => {
if self.inactivations >= Self::VALIDATOR_RED_HEALTH_INACTIVATIONS {
log::warn!(self.inactivations, "Changed validator health to red");
self.health = ValidatorHealthState::Red;
}
}
ValidatorHealthState::Red => {
log::warn!("Validator health is still red")
}
}

log::debug!(
address=%self.address,
self.inactivations,
self.reactivate_block_number,
block_number,
"New inactivation, current status",
);
}
}

/// Resets the internal counters and validator health state for a new epoch
pub fn reset_epoch(&mut self) {
// Reset the inactivations counter
self.inactivations = 0;
// Reset the validator health every epoch
self.health = ValidatorHealthState::Green;
}

/// Resets the pending reactivation flag
pub fn reset_reactivation(&mut self) {
self.pending_reactivate = false;
}

/// Returns the block number for the next re-activate transaction
pub fn get_reactivate_block_number(&self) -> u32 {
self.reactivate_block_number
}
}
1 change: 1 addition & 0 deletions validator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
extern crate log;

pub mod aggregation;
pub mod health;
mod jail;
pub mod key_utils;
mod r#macro;
Expand Down
25 changes: 24 additions & 1 deletion validator/src/micro.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use futures::{future::BoxFuture, ready, FutureExt, Stream};
use nimiq_block::{Block, EquivocationProof, MicroBlock, SkipBlockInfo};
use nimiq_blockchain::{BlockProducer, BlockProducerError, Blockchain};
use nimiq_blockchain_interface::AbstractBlockchain;
use nimiq_keys::Address;
use nimiq_mempool::mempool::Mempool;
use nimiq_primitives::policy::Policy;
use nimiq_time::sleep;
Expand All @@ -18,7 +19,9 @@ use nimiq_validator_network::ValidatorNetwork;
use nimiq_vrf::VrfSeed;
use parking_lot::RwLock;

use crate::{aggregation::skip_block::SkipBlockAggregation, validator::Validator};
use crate::{
aggregation::skip_block::SkipBlockAggregation, health::ValidatorHealth, validator::Validator,
};

pub(crate) enum ProduceMicroBlockEvent {
MicroBlock,
Expand All @@ -36,6 +39,8 @@ struct NextProduceMicroBlockEvent<TValidatorNetwork> {
block_number: u32,
producer_timeout: Duration,
block_separation_time: Duration,
validator_address: Address,
validator_health: Arc<RwLock<ValidatorHealth>>,
}

impl<TValidatorNetwork: ValidatorNetwork + 'static> NextProduceMicroBlockEvent<TValidatorNetwork> {
Expand All @@ -53,6 +58,8 @@ impl<TValidatorNetwork: ValidatorNetwork + 'static> NextProduceMicroBlockEvent<T
block_number: u32,
producer_timeout: Duration,
block_separation_time: Duration,
validator_address: Address,
validator_health: Arc<RwLock<ValidatorHealth>>,
) -> Self {
Self {
blockchain,
Expand All @@ -65,6 +72,8 @@ impl<TValidatorNetwork: ValidatorNetwork + 'static> NextProduceMicroBlockEvent<T
block_number,
producer_timeout,
block_separation_time,
validator_address,
validator_health,
}
}

Expand Down Expand Up @@ -117,6 +126,7 @@ impl<TValidatorNetwork: ValidatorNetwork + 'static> NextProduceMicroBlockEvent<T
info!(
block_number = self.block_number,
slot_band = self.validator_slot_band,
address = %self.validator_address,
"Our turn, producing micro block #{}",
self.block_number,
);
Expand Down Expand Up @@ -153,6 +163,11 @@ impl<TValidatorNetwork: ValidatorNetwork + 'static> NextProduceMicroBlockEvent<T
num_transactions
);

if !self.validator_health.read().publish_block() {
log::warn!(block = block.block_number(), "Not publishing block");
break Some(Some(ProduceMicroBlockEvent::MicroBlock));
}

// Publish the block. It is valid as we have just created it.
Validator::publish_block(Arc::clone(&self.network), block.clone());

Expand Down Expand Up @@ -181,6 +196,9 @@ impl<TValidatorNetwork: ValidatorNetwork + 'static> NextProduceMicroBlockEvent<T
continue;
}

// Each successful block will decrease the number of inactivations
self.validator_health.write().block_produced();

let event = result
.map(move |_result| ProduceMicroBlockEvent::MicroBlock)
.ok();
Expand All @@ -194,6 +212,7 @@ impl<TValidatorNetwork: ValidatorNetwork + 'static> NextProduceMicroBlockEvent<T
debug!(
block_number = self.block_number,
slot_band = self.validator_slot_band,
address = %self.validator_address,
"Not our turn, waiting for micro block #{}",
self.block_number,
);
Expand Down Expand Up @@ -404,6 +423,8 @@ impl<TValidatorNetwork: ValidatorNetwork + 'static> ProduceMicroBlock<TValidator
block_number: u32,
producer_timeout: Duration,
block_separation_time: Duration,
validator_address: Address,
validator_health: Arc<RwLock<ValidatorHealth>>,
) -> Self {
let next_event = NextProduceMicroBlockEvent::new(
blockchain,
Expand All @@ -416,6 +437,8 @@ impl<TValidatorNetwork: ValidatorNetwork + 'static> ProduceMicroBlock<TValidator
block_number,
producer_timeout,
block_separation_time,
validator_address,
validator_health,
)
.next()
.boxed();
Expand Down
Loading