diff --git a/.github/workflows/bitcoin-tests.yml b/.github/workflows/bitcoin-tests.yml index 3f3781cf03..b359063c7e 100644 --- a/.github/workflows/bitcoin-tests.yml +++ b/.github/workflows/bitcoin-tests.yml @@ -125,6 +125,7 @@ jobs: - tests::signer::v0::continue_after_tenure_extend - tests::signer::v0::tenure_extend_after_idle_signers - tests::signer::v0::tenure_extend_after_idle_miner + - tests::signer::v0::tenure_extend_after_failed_miner - tests::signer::v0::tenure_extend_succeeds_after_rejected_attempt - tests::signer::v0::stx_transfers_dont_effect_idle_timeout - tests::signer::v0::idle_tenure_extend_active_mining @@ -141,6 +142,9 @@ jobs: - tests::signer::v0::incoming_signers_ignore_block_proposals - tests::signer::v0::outgoing_signers_ignore_block_proposals - tests::signer::v0::injected_signatures_are_ignored_across_boundaries + - tests::signer::v0::fast_sortition + - tests::signer::v0::single_miner_empty_sortition + - tests::signer::v0::multiple_miners_empty_sortition - tests::signer::v0::block_proposal_timeout - tests::signer::v0::rejected_blocks_count_towards_miner_validity - tests::nakamoto_integrations::burn_ops_integration_test @@ -162,6 +166,7 @@ jobs: - tests::nakamoto_integrations::sip029_coinbase_change - tests::nakamoto_integrations::clarity_cost_spend_down - tests::nakamoto_integrations::v3_blockbyheight_api_endpoint + - tests::nakamoto_integrations::test_tenure_extend_from_flashblocks # TODO: enable these once v1 signer is supported by a new nakamoto epoch # - tests::signer::v1::dkg # - tests::signer::v1::sign_request_rejected diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e2fc5c172..6fb699d1e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to the versioning scheme outlined in the [README.md](README.md). +## [Unreleased] + +### Added + +- The stacks-node miner now performs accurate tenure-extensions in certain bitcoin block production + cases: when a bitcoin block is produced before the previous bitcoin block's Stacks tenure started. + Previously, the miner had difficulty restarting their missed tenure and extending into the new + bitcoin block, leading to 1-2 bitcoin blocks of missed Stacks block production. + ## [3.1.0.0.3] ### Added diff --git a/libstackerdb/src/libstackerdb.rs b/libstackerdb/src/libstackerdb.rs index 714ef838c4..36d7dd3643 100644 --- a/libstackerdb/src/libstackerdb.rs +++ b/libstackerdb/src/libstackerdb.rs @@ -135,6 +135,12 @@ pub struct StackerDBChunkAckData { pub code: Option, } +impl fmt::Display for StackerDBChunkAckData { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:?}", self) + } +} + impl SlotMetadata { /// Make a new unsigned slot metadata pub fn new_unsigned( diff --git a/stacks-signer/src/chainstate.rs b/stacks-signer/src/chainstate.rs index 9003debff7..3e59e58850 100644 --- a/stacks-signer/src/chainstate.rs +++ b/stacks-signer/src/chainstate.rs @@ -200,6 +200,7 @@ impl SortitionsView { info!( "Current miner timed out, marking as invalid."; "block_height" => block.header.chain_length, + "block_proposal_timeout" => ?self.config.block_proposal_timeout, "current_sortition_consensus_hash" => ?self.cur_sortition.consensus_hash, ); self.cur_sortition.miner_status = SortitionMinerStatus::InvalidatedBeforeFirstBlock; @@ -320,7 +321,7 @@ impl SortitionsView { return Ok(false); } } - ProposedBy::LastSortition(_last_sortition) => { + ProposedBy::LastSortition(last_sortition) => { // should only consider blocks from the last sortition if the new sortition was invalidated // before we signed their first block. if self.cur_sortition.miner_status @@ -331,6 +332,7 @@ impl SortitionsView { "proposed_block_consensus_hash" => %block.header.consensus_hash, "proposed_block_signer_sighash" => %block.header.signer_signature_hash(), "current_sortition_miner_status" => ?self.cur_sortition.miner_status, + "last_sortition" => %last_sortition.consensus_hash ); return Ok(false); } diff --git a/stackslib/src/config/mod.rs b/stackslib/src/config/mod.rs index 80874d1c48..6d2d5e4389 100644 --- a/stackslib/src/config/mod.rs +++ b/stackslib/src/config/mod.rs @@ -94,6 +94,8 @@ const DEFAULT_FIRST_REJECTION_PAUSE_MS: u64 = 5_000; const DEFAULT_SUBSEQUENT_REJECTION_PAUSE_MS: u64 = 10_000; const DEFAULT_BLOCK_COMMIT_DELAY_MS: u64 = 20_000; const DEFAULT_TENURE_COST_LIMIT_PER_BLOCK_PERCENTAGE: u8 = 25; +const DEFAULT_TENURE_EXTEND_POLL_SECS: u64 = 1; + // This should be greater than the signers' timeout. This is used for issuing fallback tenure extends const DEFAULT_TENURE_TIMEOUT_SECS: u64 = 420; @@ -2149,6 +2151,9 @@ pub struct MinerConfig { pub block_commit_delay: Duration, /// The percentage of the remaining tenure cost limit to consume each block. pub tenure_cost_limit_per_block_percentage: Option, + /// The number of seconds to wait in-between polling the sortition DB to see if we need to + /// extend the ongoing tenure (e.g. because the current sortition is empty or invalid). + pub tenure_extend_poll_secs: Duration, /// Duration to wait before attempting to issue a tenure extend pub tenure_timeout: Duration, } @@ -2187,6 +2192,7 @@ impl Default for MinerConfig { tenure_cost_limit_per_block_percentage: Some( DEFAULT_TENURE_COST_LIMIT_PER_BLOCK_PERCENTAGE, ), + tenure_extend_poll_secs: Duration::from_secs(DEFAULT_TENURE_EXTEND_POLL_SECS), tenure_timeout: Duration::from_secs(DEFAULT_TENURE_TIMEOUT_SECS), } } @@ -2582,6 +2588,7 @@ pub struct MinerConfigFile { pub subsequent_rejection_pause_ms: Option, pub block_commit_delay_ms: Option, pub tenure_cost_limit_per_block_percentage: Option, + pub tenure_extend_poll_secs: Option, pub tenure_timeout_secs: Option, } @@ -2723,6 +2730,7 @@ impl MinerConfigFile { subsequent_rejection_pause_ms: self.subsequent_rejection_pause_ms.unwrap_or(miner_default_config.subsequent_rejection_pause_ms), block_commit_delay: self.block_commit_delay_ms.map(Duration::from_millis).unwrap_or(miner_default_config.block_commit_delay), tenure_cost_limit_per_block_percentage, + tenure_extend_poll_secs: self.tenure_extend_poll_secs.map(Duration::from_secs).unwrap_or(miner_default_config.tenure_extend_poll_secs), tenure_timeout: self.tenure_timeout_secs.map(Duration::from_secs).unwrap_or(miner_default_config.tenure_timeout), }) } diff --git a/stackslib/src/net/api/postblock_proposal.rs b/stackslib/src/net/api/postblock_proposal.rs index 65bb589c39..3fab1b478a 100644 --- a/stackslib/src/net/api/postblock_proposal.rs +++ b/stackslib/src/net/api/postblock_proposal.rs @@ -190,12 +190,15 @@ impl BlockValidateResponse { } #[cfg(any(test, feature = "testing"))] -fn inject_validation_delay() { +fn fault_injection_validation_delay() { let delay = TEST_VALIDATE_DELAY_DURATION_SECS.get(); warn!("Sleeping for {} seconds to simulate slow processing", delay); thread::sleep(Duration::from_secs(delay)); } +#[cfg(not(any(test, feature = "testing")))] +fn fault_injection_validation_delay() {} + /// Represents a block proposed to the `v3/block_proposal` endpoint for validation #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct NakamotoBlockProposal { @@ -389,8 +392,7 @@ impl NakamotoBlockProposal { } let start = Instant::now(); - #[cfg(any(test, feature = "testing"))] - inject_validation_delay(); + fault_injection_validation_delay(); let mainnet = self.chain_id == CHAIN_ID_MAINNET; if self.chain_id != chainstate.chain_id || mainnet != chainstate.mainnet { diff --git a/testnet/stacks-node/src/nakamoto_node.rs b/testnet/stacks-node/src/nakamoto_node.rs index 09f8c7285f..c49e0bbc73 100644 --- a/testnet/stacks-node/src/nakamoto_node.rs +++ b/testnet/stacks-node/src/nakamoto_node.rs @@ -23,6 +23,7 @@ use stacks::burnchains::{BurnchainSigner, Txid}; use stacks::chainstate::burn::db::sortdb::SortitionDB; use stacks::chainstate::burn::BlockSnapshot; use stacks::chainstate::stacks::Error as ChainstateError; +use stacks::libstackerdb::StackerDBChunkAckData; use stacks::monitoring; use stacks::monitoring::update_active_miners_count_gauge; use stacks::net::atlas::AtlasConfig; @@ -130,6 +131,9 @@ pub enum Error { /// An error occurred while operating as the signing coordinator #[error("An error occurred while operating as the signing coordinator: {0}")] SigningCoordinatorFailure(String), + /// An error occurred on StackerDB post + #[error("An error occurred while uploading data to StackerDB: {0}")] + StackerDBUploadError(StackerDBChunkAckData), // The thread that we tried to send to has closed #[error("The thread that we tried to send to has closed")] ChannelClosed, diff --git a/testnet/stacks-node/src/nakamoto_node/miner.rs b/testnet/stacks-node/src/nakamoto_node/miner.rs index 1e89c3292c..6309f0f0d5 100644 --- a/testnet/stacks-node/src/nakamoto_node/miner.rs +++ b/testnet/stacks-node/src/nakamoto_node/miner.rs @@ -13,10 +13,11 @@ // // You should have received a copy of the GNU General Public License // along with this program. If not, see . +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; #[cfg(test)] use std::sync::LazyLock; use std::thread; -use std::thread::JoinHandle; use std::time::{Duration, Instant}; use clarity::boot_util::boot_code_id; @@ -39,25 +40,25 @@ use stacks::chainstate::stacks::{ TenureChangeCause, TenureChangePayload, TransactionAnchorMode, TransactionPayload, TransactionVersion, }; +use stacks::net::api::poststackerdbchunk::StackerDBErrorCodes; use stacks::net::p2p::NetworkHandle; use stacks::net::stackerdb::StackerDBs; use stacks::net::{NakamotoBlocksData, StacksMessageType}; use stacks::util::get_epoch_time_secs; use stacks::util::secp256k1::MessageSignature; -#[cfg(test)] -use stacks::util::tests::TestFlag; use stacks_common::types::chainstate::{StacksAddress, StacksBlockId}; use stacks_common::types::{PrivateKey, StacksEpochId}; +#[cfg(test)] +use stacks_common::util::tests::TestFlag; use stacks_common::util::vrf::VRFProof; -use super::relayer::RelayerThread; +use super::relayer::{MinerStopHandle, RelayerThread}; use super::{Config, Error as NakamotoNodeError, EventDispatcher, Keychain}; use crate::nakamoto_node::signer_coordinator::SignerCoordinator; use crate::nakamoto_node::VRF_MOCK_MINER_KEY; use crate::neon_node; use crate::run_loop::nakamoto::Globals; use crate::run_loop::RegisteredKey; - #[cfg(test)] /// Test flag to stall the miner thread pub static TEST_MINE_STALL: LazyLock> = LazyLock::new(TestFlag::default); @@ -65,20 +66,31 @@ pub static TEST_MINE_STALL: LazyLock> = LazyLock::new(TestFlag::d /// Test flag to stall block proposal broadcasting pub static TEST_BROADCAST_STALL: LazyLock> = LazyLock::new(TestFlag::default); #[cfg(test)] -pub static TEST_BLOCK_ANNOUNCE_STALL: std::sync::Mutex> = std::sync::Mutex::new(None); +pub static TEST_BLOCK_ANNOUNCE_STALL: LazyLock> = LazyLock::new(TestFlag::default); #[cfg(test)] -pub static TEST_SKIP_P2P_BROADCAST: std::sync::Mutex> = std::sync::Mutex::new(None); +pub static TEST_SKIP_P2P_BROADCAST: LazyLock> = LazyLock::new(TestFlag::default); /// If the miner was interrupted while mining a block, how long should the /// miner thread sleep before trying again? const ABORT_TRY_AGAIN_MS: u64 = 200; #[allow(clippy::large_enum_variant)] +#[derive(Debug)] pub enum MinerDirective { /// The miner won sortition so they should begin a new tenure BeginTenure { + /// This is the block ID of the first block in the parent tenure parent_tenure_start: StacksBlockId, + /// This is the snapshot that this miner won, and will produce a tenure for + election_block: BlockSnapshot, + /// This is the snapshot that caused the relayer to initiate this event (may be different + /// than the election block in the case where the miner is trying to mine a late block). burnchain_tip: BlockSnapshot, + /// This is `true` if the snapshot above is known not to be the the latest burnchain tip, + /// but an ancestor of it (for example, the burnchain tip could be an empty flash block, but the + /// miner may nevertheless need to produce a Stacks block with a BlockFound tenure-change + /// transaction for the tenure began by winning `burnchain_tip`'s sortition). + late: bool, }, /// The miner should try to continue their tenure if they are the active miner ContinueTenure { new_burn_view: ConsensusHash }, @@ -108,28 +120,46 @@ struct ParentStacksBlockInfo { #[derive(PartialEq, Clone, Debug)] pub enum MinerReason { /// The miner thread was spawned to begin a new tenure - BlockFound, + BlockFound { + /// `late` indicates whether or not the tenure that is about to be started corresponds to + /// an ancestor of the canonical tip. This can happen if this miner won the highest + /// sortition, but that sortition's snapshot is not the canonical tip (e.g. the canonical + /// tip may have no sortition, but its parent (or Nth ancestor) would have had a sortition + /// that this miner won, and it would be the latest non-empty sortition ancestor of the + /// tip). This indication is important because the miner would issue a BlockFound + /// tenure-change, and then issue an Extended tenure-change right afterwards in order to + /// update the burnchain view exposed to Clarity for the highest sortition. + late: bool, + }, /// The miner thread was spawned to extend an existing tenure Extended { /// Current consensus hash on the underlying burnchain. Corresponds to the last-seen /// sortition. burn_view_consensus_hash: ConsensusHash, }, - /// The miner thread was spawned to initialize a prior empty tenure - EmptyTenure, +} + +impl MinerReason { + pub fn is_late_block(&self) -> bool { + match self { + Self::BlockFound { ref late } => *late, + Self::Extended { .. } => false, + } + } } impl std::fmt::Display for MinerReason { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - MinerReason::BlockFound => write!(f, "BlockFound"), + MinerReason::BlockFound { late } => { + write!(f, "BlockFound({})", if *late { "late" } else { "current" }) + } MinerReason::Extended { burn_view_consensus_hash, } => write!( f, "Extended: burn_view_consensus_hash = {burn_view_consensus_hash:?}", ), - MinerReason::EmptyTenure => write!(f, "EmptyTenure"), } } } @@ -153,7 +183,7 @@ pub struct BlockMinerThread { burn_election_block: BlockSnapshot, /// Current burnchain tip as of the last TenureChange /// * if the last tenure-change was a BlockFound, then this is the same as the - /// `burn_election_block`. + /// `burn_election_block` (and it is also the `burn_view`) /// * otherwise, if the last tenure-change is an Extend, then this is the sortition of the burn /// view consensus hash in the TenureChange burn_block: BlockSnapshot, @@ -168,6 +198,14 @@ pub struct BlockMinerThread { signer_set_cache: Option, /// The time at which tenure change/extend was attempted tenure_change_time: Instant, + /// The current tip when this miner thread was started. + /// This *should not* be passed into any block building code, as it + /// is not necessarily the burn view for the block being constructed. + /// Rather, this burn block is used to determine whether or not a new + /// burn block has arrived since this thread started. + burn_tip_at_start: ConsensusHash, + /// flag to indicate an abort driven from the relayer + abort_flag: Arc, } impl BlockMinerThread { @@ -178,6 +216,7 @@ impl BlockMinerThread { burn_election_block: BlockSnapshot, burn_block: BlockSnapshot, parent_tenure_id: StacksBlockId, + burn_tip_at_start: &ConsensusHash, reason: MinerReason, ) -> BlockMinerThread { BlockMinerThread { @@ -195,10 +234,16 @@ impl BlockMinerThread { reason, p2p_handle: rt.get_p2p_handle(), signer_set_cache: None, + burn_tip_at_start: burn_tip_at_start.clone(), tenure_change_time: Instant::now(), + abort_flag: Arc::new(AtomicBool::new(false)), } } + pub fn get_abort_flag(&self) -> Arc { + self.abort_flag.clone() + } + #[cfg(test)] fn fault_injection_block_broadcast_stall(new_block: &NakamotoBlock) { if TEST_BROADCAST_STALL.get() { @@ -225,7 +270,7 @@ impl BlockMinerThread { #[cfg(test)] fn fault_injection_block_announce_stall(new_block: &NakamotoBlock) { - if *TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap() == Some(true) { + if TEST_BLOCK_ANNOUNCE_STALL.get() { // Do an extra check just so we don't log EVERY time. warn!("Fault injection: Block announcement is stalled due to testing directive."; "stacks_block_id" => %new_block.block_id(), @@ -233,7 +278,7 @@ impl BlockMinerThread { "height" => new_block.header.chain_length, "consensus_hash" => %new_block.header.consensus_hash ); - while *TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap() == Some(true) { + while TEST_BLOCK_ANNOUNCE_STALL.get() { std::thread::sleep(std::time::Duration::from_millis(10)); } info!("Fault injection: Block announcement is no longer stalled due to testing directive."; @@ -249,7 +294,7 @@ impl BlockMinerThread { #[cfg(test)] fn fault_injection_skip_block_broadcast() -> bool { - if *TEST_SKIP_P2P_BROADCAST.lock().unwrap() == Some(true) { + if TEST_SKIP_P2P_BROADCAST.get() { return true; } false @@ -261,32 +306,24 @@ impl BlockMinerThread { } /// Stop a miner tenure by blocking the miner and then joining the tenure thread - pub fn stop_miner( - globals: &Globals, - prior_miner: JoinHandle>, - ) -> Result<(), NakamotoNodeError> { - debug!( - "Stopping prior miner thread ID {:?}", - prior_miner.thread().id() - ); - globals.block_miner(); - let prior_miner_result = prior_miner - .join() - .map_err(|_| ChainstateError::MinerAborted)?; - if let Err(e) = prior_miner_result { - // it's okay if the prior miner thread exited with an error. - // in many cases this is expected (i.e., a burnchain block occurred) - // if some error condition should be handled though, this is the place - // to do that handling. - debug!("Prior mining thread exited with: {e:?}"); + #[cfg(test)] + fn fault_injection_stall_miner() { + if TEST_MINE_STALL.get() { + // Do an extra check just so we don't log EVERY time. + warn!("Mining is stalled due to testing directive"); + while TEST_MINE_STALL.get() { + std::thread::sleep(std::time::Duration::from_millis(10)); + } + warn!("Mining is no longer stalled due to testing directive. Continuing..."); } - globals.unblock_miner(); - Ok(()) } + #[cfg(not(test))] + fn fault_injection_stall_miner() {} + pub fn run_miner( mut self, - prior_miner: Option>>, + prior_miner: Option, ) -> Result<(), NakamotoNodeError> { // when starting a new tenure, block the mining thread if its currently running. // the new mining thread will join it (so that the new mining thread stalls, not the relayer) @@ -296,10 +333,16 @@ impl BlockMinerThread { "parent_tenure_id" => %self.parent_tenure_id, "thread_id" => ?thread::current().id(), "burn_block_consensus_hash" => %self.burn_block.consensus_hash, + "burn_election_block_consensus_hash" => %self.burn_election_block.consensus_hash, "reason" => %self.reason, ); if let Some(prior_miner) = prior_miner { - Self::stop_miner(&self.globals, prior_miner)?; + debug!( + "Miner thread {:?}: will try and stop prior miner {:?}", + thread::current().id(), + prior_miner.inner_thread().id() + ); + prior_miner.stop(&self.globals)?; } let mut stackerdbs = StackerDBs::connect(&self.config.get_stacker_db_file_path(), true)?; let mut last_block_rejected = false; @@ -316,18 +359,17 @@ impl BlockMinerThread { self.burnchain.pox_constants.clone(), ) .expect("FATAL: could not open sortition DB"); - let burn_tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()) - .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); // Start the signer coordinator let mut coordinator = SignerCoordinator::new( self.event_dispatcher.stackerdb_channel.clone(), self.globals.should_keep_running.clone(), &reward_set, - &burn_tip, + &self.burn_election_block, &self.burnchain, miner_privkey, &self.config, + &self.burn_tip_at_start, ) .map_err(|e| { NakamotoNodeError::SigningCoordinatorFailure(format!( @@ -351,6 +393,29 @@ impl BlockMinerThread { } } + /// Pause the miner thread and retry to mine + fn pause_and_retry( + &self, + new_block: &NakamotoBlock, + last_block_rejected: &mut bool, + e: NakamotoNodeError, + ) { + // Sleep for a bit to allow signers to catch up + let pause_ms = if *last_block_rejected { + self.config.miner.subsequent_rejection_pause_ms + } else { + self.config.miner.first_rejection_pause_ms + }; + + error!("Error while gathering signatures: {e:?}. Will try mining again in {pause_ms}."; + "signer_sighash" => %new_block.header.signer_signature_hash(), + "block_height" => new_block.header.chain_length, + "consensus_hash" => %new_block.header.consensus_hash, + ); + thread::sleep(Duration::from_millis(pause_ms)); + *last_block_rejected = true; + } + /// The main loop for the miner thread. This is where the miner will mine /// blocks and then attempt to sign and broadcast them. fn miner_main_loop( @@ -361,15 +426,23 @@ impl BlockMinerThread { last_block_rejected: &mut bool, reward_set: &RewardSet, ) -> Result<(), NakamotoNodeError> { - #[cfg(test)] - if TEST_MINE_STALL.get() { - // Do an extra check just so we don't log EVERY time. - warn!("Mining is stalled due to testing directive"); - while TEST_MINE_STALL.get() { - std::thread::sleep(std::time::Duration::from_millis(10)); - } - warn!("Mining is no longer stalled due to testing directive. Continuing..."); + Self::fault_injection_stall_miner(); + let mut chain_state = + neon_node::open_chainstate_with_faults(&self.config).map_err(|e| { + NakamotoNodeError::SigningCoordinatorFailure(format!( + "Failed to open chainstate DB. Cannot mine! {e:?}" + )) + })?; + // Late block tenures are initiated only to issue the BlockFound + // tenure change tx (because they can be immediately extended to + // the next burn view). This checks whether or not we're in such a + // tenure and have produced a block already. If so, it exits the + // mining thread to allow the tenure extension thread to take over. + if self.last_block_mined.is_some() && self.reason.is_late_block() { + info!("Miner: finished mining a late tenure"); + return Err(NakamotoNodeError::StacksTipChanged); } + let new_block = loop { // If we're mock mining, we may not have processed the block that the // actual tenure winner committed to yet. So, before attempting to @@ -380,8 +453,6 @@ impl BlockMinerThread { SortitionDB::open(&burn_db_path, true, self.burnchain.pox_constants.clone()) .expect("FATAL: could not open sortition DB"); let burn_tip_changed = self.check_burn_tip_changed(&burn_db); - let mut chain_state = neon_node::open_chainstate_with_faults(&self.config) - .expect("FATAL: could not open chainstate DB"); match burn_tip_changed .and_then(|_| self.load_block_parent_info(&mut burn_db, &mut chain_state)) { @@ -409,6 +480,13 @@ impl BlockMinerThread { break Some(x); } Err(NakamotoNodeError::MiningFailure(ChainstateError::MinerAborted)) => { + if self.abort_flag.load(Ordering::SeqCst) { + info!("Miner interrupted while mining in order to shut down"); + self.globals + .raise_initiative(format!("MiningFailure: aborted by node")); + return Err(ChainstateError::MinerAborted.into()); + } + info!("Miner interrupted while mining, will try again"); // sleep, and try again. if the miner was interrupted because the burnchain // view changed, the next `mine_block()` invocation will error @@ -432,6 +510,7 @@ impl BlockMinerThread { if let Some(mut new_block) = new_block { Self::fault_injection_block_broadcast_stall(&new_block); + let signer_signature = match self.propose_block( coordinator, &mut new_block, @@ -456,21 +535,20 @@ impl BlockMinerThread { ); return Err(e); } + NakamotoNodeError::StackerDBUploadError(ref ack) => { + if ack.code == Some(StackerDBErrorCodes::BadSigner.code()) { + error!("Error while gathering signatures: failed to upload miner StackerDB data: {ack:?}. Giving up."; + "signer_sighash" => %new_block.header.signer_signature_hash(), + "block_height" => new_block.header.chain_length, + "consensus_hash" => %new_block.header.consensus_hash, + ); + return Err(e); + } + self.pause_and_retry(&new_block, last_block_rejected, e); + return Ok(()); + } _ => { - // Sleep for a bit to allow signers to catch up - let pause_ms = if *last_block_rejected { - self.config.miner.subsequent_rejection_pause_ms - } else { - self.config.miner.first_rejection_pause_ms - }; - - error!("Error while gathering signatures: {e:?}. Will try mining again in {pause_ms}."; - "signer_sighash" => %new_block.header.signer_signature_hash(), - "block_height" => new_block.header.chain_length, - "consensus_hash" => %new_block.header.consensus_hash, - ); - thread::sleep(Duration::from_millis(pause_ms)); - *last_block_rejected = true; + self.pause_and_retry(&new_block, last_block_rejected, e); return Ok(()); } }, @@ -494,7 +572,7 @@ impl BlockMinerThread { // update mined-block counters and mined-tenure counters self.globals.counters.bump_naka_mined_blocks(); - if self.last_block_mined.is_some() { + if self.last_block_mined.is_none() { // this is the first block of the tenure, bump tenure counter self.globals.counters.bump_naka_mined_tenures(); } @@ -547,13 +625,12 @@ impl BlockMinerThread { })?; coordinator.propose_block( new_block, - &self.burn_block, &self.burnchain, sortdb, &mut chain_state, stackerdbs, &self.globals.counters, - &self.burn_election_block.consensus_hash, + &self.burn_election_block, ) } @@ -656,7 +733,12 @@ impl BlockMinerThread { return Ok(()); } - let mut sortition_handle = sort_db.index_handle_at_ch(&block.header.consensus_hash)?; + let parent_block_info = + NakamotoChainState::get_block_header(chain_state.db(), &block.header.parent_block_id)? + .ok_or_else(|| ChainstateError::NoSuchBlockError)?; + let burn_view_ch = + NakamotoChainState::get_block_burn_view(sort_db, &block, &parent_block_info)?; + let mut sortition_handle = sort_db.index_handle_at_ch(&burn_view_ch)?; let chainstate_config = chain_state.config(); let (headers_conn, staging_tx) = chain_state.headers_conn_and_staging_tx_begin()?; let accepted = NakamotoChainState::accept_block( @@ -756,7 +838,6 @@ impl BlockMinerThread { &mut miners_session, &self.burn_election_block.consensus_hash, ) - .map_err(NakamotoNodeError::SigningCoordinatorFailure) } /// Get the coinbase recipient address, if set in the config and if allowed in this epoch @@ -947,6 +1028,7 @@ impl BlockMinerThread { miner_address, &self.parent_tenure_id, stacks_tip_header, + &self.reason, ) { Ok(parent_info) => Ok(parent_info), Err(NakamotoNodeError::BurnchainTipChanged) => { @@ -1053,12 +1135,12 @@ impl BlockMinerThread { SortitionDB::open(&burn_db_path, true, self.burnchain.pox_constants.clone()) .expect("FATAL: could not open sortition DB"); - self.check_burn_tip_changed(&burn_db)?; - neon_node::fault_injection_long_tenure(); - let mut chain_state = neon_node::open_chainstate_with_faults(&self.config) .expect("FATAL: could not open chainstate DB"); + self.check_burn_tip_changed(&burn_db)?; + neon_node::fault_injection_long_tenure(); + let mut mem_pool = self .config .connect_mempool_db() @@ -1225,7 +1307,7 @@ impl BlockMinerThread { }; let (tenure_change_tx, coinbase_tx) = match &self.reason { - MinerReason::BlockFound | MinerReason::EmptyTenure => { + MinerReason::BlockFound { .. } => { let tenure_change_tx = self.generate_tenure_change_tx(current_miner_nonce, payload)?; let coinbase_tx = @@ -1245,6 +1327,8 @@ impl BlockMinerThread { "parent_block_id" => %parent_block_id, "num_blocks_so_far" => num_blocks_so_far, ); + + // NOTE: this switches payload.cause to TenureChangeCause::Extend payload = payload.extend( *burn_view_consensus_hash, parent_block_id, @@ -1256,6 +1340,11 @@ impl BlockMinerThread { } }; + debug!( + "make_tenure_start_info: reason = {:?}, burn_view = {:?}, tenure_change_tx = {:?}", + &self.reason, &self.burn_block.consensus_hash, &tenure_change_tx + ); + Ok(NakamotoTenureInfo { coinbase_tx, tenure_change_tx, @@ -1263,13 +1352,16 @@ impl BlockMinerThread { } /// Check if the tenure needs to change -- if so, return a BurnchainTipChanged error - /// The tenure should change if there is a new burnchain tip with a valid sortition + /// The tenure should change if there is a new burnchain tip with a valid sortition, + /// or if the stacks chain state's burn view has advanced beyond our burn view. fn check_burn_tip_changed(&self, sortdb: &SortitionDB) -> Result<(), NakamotoNodeError> { let cur_burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()) .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); - if cur_burn_chain_tip.consensus_hash != self.burn_block.consensus_hash { - info!("Miner: Cancel block assembly; burnchain tip has changed"); + if cur_burn_chain_tip.consensus_hash != self.burn_tip_at_start { + info!("Miner: Cancel block assembly; burnchain tip has changed"; + "new_tip" => %cur_burn_chain_tip.consensus_hash, + "local_tip" => %self.burn_tip_at_start); self.globals.counters.bump_missed_tenures(); Err(NakamotoNodeError::BurnchainTipChanged) } else { @@ -1290,7 +1382,7 @@ impl ParentStacksBlockInfo { // TODO: add tests from mutation testing results #4869 #[cfg_attr(test, mutants::skip)] /// Determine where in the set of forks to attempt to mine the next anchored block. - /// `mine_tip_ch` and `mine_tip_bhh` identify the parent block on top of which to mine. + /// `parent_tenure_id` and `stacks_tip_header` identify the parent block on top of which to mine. /// `check_burn_block` identifies what we believe to be the burn chain's sortition history tip. /// This is used to mitigate (but not eliminate) a TOCTTOU issue with mining: the caller's /// conception of the sortition history tip may have become stale by the time they call this @@ -1302,6 +1394,7 @@ impl ParentStacksBlockInfo { miner_address: StacksAddress, parent_tenure_id: &StacksBlockId, stacks_tip_header: StacksHeaderInfo, + reason: &MinerReason, ) -> Result { // the stacks block I'm mining off of's burn header hash and vtxindex: let parent_snapshot = SortitionDB::get_block_snapshot_consensus( @@ -1311,11 +1404,15 @@ impl ParentStacksBlockInfo { .expect("Failed to look up block's parent snapshot") .expect("Failed to look up block's parent snapshot"); - // don't mine off of an old burnchain block + // don't mine off of an old burnchain block, unless we're late let burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(burn_db.conn()) .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); - if burn_chain_tip.consensus_hash != check_burn_block.consensus_hash { + // if we're mining a tenure that we were late to initialize, allow the burn tipped + // to be slightly stale + if !reason.is_late_block() + && burn_chain_tip.consensus_hash != check_burn_block.consensus_hash + { info!( "New canonical burn chain tip detected. Will not try to mine."; "new_consensus_hash" => %burn_chain_tip.consensus_hash, @@ -1389,6 +1486,10 @@ impl ParentStacksBlockInfo { "stacks_tip_consensus_hash" => %parent_snapshot.consensus_hash, "stacks_tip_burn_hash" => %parent_snapshot.burn_header_hash, "stacks_tip_burn_height" => parent_snapshot.block_height, + "parent_tenure_info" => ?parent_tenure_info, + "stacks_tip_header.consensus_hash" => %stacks_tip_header.consensus_hash, + "parent_tenure_header.consensus_hash" => %parent_tenure_header.consensus_hash, + "reason" => %reason ); let coinbase_nonce = { diff --git a/testnet/stacks-node/src/nakamoto_node/relayer.rs b/testnet/stacks-node/src/nakamoto_node/relayer.rs index 8cc1293acd..f3dc2c09a6 100644 --- a/testnet/stacks-node/src/nakamoto_node/relayer.rs +++ b/testnet/stacks-node/src/nakamoto_node/relayer.rs @@ -15,12 +15,17 @@ // along with this program. If not, see . use core::fmt; use std::collections::HashSet; -use std::fs; use std::io::Read; +use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::mpsc::{Receiver, RecvTimeoutError}; +use std::sync::Arc; +#[cfg(test)] +use std::sync::LazyLock; use std::thread::JoinHandle; use std::time::{Duration, Instant}; +use std::{fs, thread}; +use rand::{thread_rng, Rng}; use stacks::burnchains::{Burnchain, Txid}; use stacks::chainstate::burn::db::sortdb::SortitionDB; use stacks::chainstate::burn::operations::leader_block_commit::{ @@ -37,6 +42,7 @@ use stacks::chainstate::stacks::db::StacksChainState; use stacks::chainstate::stacks::miner::{ get_mining_spend_amount, signal_mining_blocked, signal_mining_ready, }; +use stacks::chainstate::stacks::Error as ChainstateError; use stacks::core::mempool::MemPoolDB; use stacks::core::STACKS_EPOCH_3_1_MARKER; use stacks::monitoring::increment_stx_blocks_mined_counter; @@ -50,6 +56,8 @@ use stacks_common::types::chainstate::{ use stacks_common::types::StacksEpochId; use stacks_common::util::get_epoch_time_ms; use stacks_common::util::hash::Hash160; +#[cfg(test)] +use stacks_common::util::tests::TestFlag; use stacks_common::util::vrf::VRFPublicKey; use super::miner::MinerReason; @@ -66,6 +74,15 @@ use crate::run_loop::nakamoto::{Globals, RunLoop}; use crate::run_loop::RegisteredKey; use crate::BitcoinRegtestController; +#[cfg(test)] +/// Mutex to stall the relayer thread right before it creates a miner thread. +pub static TEST_MINER_THREAD_STALL: LazyLock> = LazyLock::new(TestFlag::default); + +#[cfg(test)] +/// Mutex to stall the miner thread right after it starts up (does not block the relayer thread) +pub static TEST_MINER_THREAD_START_STALL: LazyLock> = + LazyLock::new(TestFlag::default); + /// Command types for the Nakamoto relayer thread, issued to it by other threads #[allow(clippy::large_enum_variant)] pub enum RelayerDirective { @@ -172,6 +189,67 @@ impl LastCommit { } } +pub type MinerThreadJoinHandle = JoinHandle>; + +/// Miner thread join handle, as well as an "abort" flag to force the miner thread to exit when it +/// is blocked. +pub struct MinerStopHandle { + /// The join handle itself + join_handle: MinerThreadJoinHandle, + /// The relayer-set abort flag + abort_flag: Arc, +} + +impl MinerStopHandle { + pub fn new(join_handle: MinerThreadJoinHandle, abort_flag: Arc) -> Self { + Self { + join_handle, + abort_flag, + } + } + + /// Get a ref to the inner thread object + pub fn inner_thread(&self) -> &std::thread::Thread { + self.join_handle.thread() + } + + /// Destroy this stop handle to get the thread join handle + pub fn into_inner(self) -> MinerThreadJoinHandle { + self.join_handle + } + + /// Stop the inner miner thread. + /// Blocks the miner, and sets the abort flag so that a blocked miner will error out. + pub fn stop(self, globals: &Globals) -> Result<(), NakamotoNodeError> { + let my_id = thread::current().id(); + let prior_thread_id = self.inner_thread().id(); + debug!( + "[Thread {:?}]: Stopping prior miner thread ID {:?}", + &my_id, &prior_thread_id + ); + + self.abort_flag.store(true, Ordering::SeqCst); + globals.block_miner(); + + let prior_miner = self.into_inner(); + let prior_miner_result = prior_miner.join().map_err(|_| { + error!("Miner: failed to join prior miner"); + ChainstateError::MinerAborted + })?; + debug!("Stopped prior miner thread ID {:?}", &prior_thread_id); + if let Err(e) = prior_miner_result { + // it's okay if the prior miner thread exited with an error. + // in many cases this is expected (i.e., a burnchain block occurred) + // if some error condition should be handled though, this is the place + // to do that handling. + debug!("Prior mining thread exited with: {e:?}"); + } + + globals.unblock_miner(); + Ok(()) + } +} + /// Relayer thread /// * accepts network results and stores blocks and microblocks /// * forwards new blocks, microblocks, and transactions to the p2p thread @@ -228,7 +306,10 @@ pub struct RelayerThread { relayer: Relayer, /// handle to the subordinate miner thread - miner_thread: Option>>, + miner_thread: Option, + /// miner thread's burn view + miner_thread_burn_view: Option, + /// The relayer thread reads directives from the relay_rcv, but it also periodically wakes up /// to check if it should issue a block commit or try to register a VRF key next_initiative: Instant, @@ -238,6 +319,8 @@ pub struct RelayerThread { last_committed: Option, /// Timeout for waiting for the first block in a tenure before submitting a block commit new_tenure_timeout: Option, + /// Timeout for waiting for a BlockFound in a subsequent tenure before trying to extend our own + tenure_extend_timeout: Option, } impl RelayerThread { @@ -292,10 +375,12 @@ impl RelayerThread { relayer, miner_thread: None, + miner_thread_burn_view: None, is_miner, next_initiative: Instant::now() + Duration::from_millis(next_initiative_delay), last_committed: None, new_tenure_timeout: None, + tenure_extend_timeout: None, } } @@ -378,89 +463,228 @@ impl RelayerThread { } /// Choose a miner directive based on the outcome of a sortition. + /// + /// The decision process is a little tricky, because the right decision depends on: + /// * whether or not we won the _given_ sortition (`sn`) + /// * whether or not we won the sortition that started the ongoing Stacks tenure + /// * whether or not we won the last sortition with a winner + /// * whether or not the last sortition winner has produced a Stacks block + /// * whether or not the ongoing Stacks tenure is at or descended from the last-winning + /// sortition + /// + /// Specifically: + /// + /// If we won the given sortition `sn`, then we can start mining immediately with a `BlockFound` + /// tenure-change. Otherwise, if we won the tenure which started the ongoing Stacks tenure + /// (i.e. we're the active miner), then we _may_ start mining after a timeout _if_ the winning + /// miner (not us) fails to submit a `BlockFound` tenure-change block for `sn`. + /// + /// Otherwise, if the given sortition `sn` has no winner, the find out who won the last sortition + /// with a winner. If it was us, and if we haven't yet submitted a `BlockFound` tenure-change + /// for it (which can happen if this given sortition is from a flash block), then start mining + /// immediately with a "late" `BlockFound` tenure, _and_ prepare to start mining right afterwards + /// with an `Extended` tenure-change so as to represent the given sortition `sn`'s burn view in + /// the Stacks chain. + /// + /// Otherwise, if this sortition has no winner, and we did not win the last-winning sortition, + /// then check to see if we're the ongoing Stack's tenure's miner. If so, then we _may_ start + /// mining after a timeout _if_ the winner of the last-good sortition (not us) fails to submit + /// a `BlockFound` tenure-change block. This can happen if `sn` was a flash block, and the + /// remote miner has yet to process it. + /// /// We won't always be able to mine -- for example, this could be an empty sortition, but the /// parent block could be an epoch 2 block. In this case, the right thing to do is to wait for /// the next block-commit. pub(crate) fn choose_miner_directive( - &self, + &mut self, sn: BlockSnapshot, won_sortition: bool, committed_index_hash: StacksBlockId, ) -> Option { - let (cur_stacks_tip_ch, cur_stacks_tip_bh) = + let (cur_stacks_tip_ch, _) = SortitionDB::get_canonical_stacks_chain_tip_hash(self.sortdb.conn()) .expect("FATAL: failed to query sortition DB for stacks tip"); - let stacks_tip = StacksBlockId::new(&cur_stacks_tip_ch, &cur_stacks_tip_bh); - let highest_tenure_start_block_header = NakamotoChainState::get_tenure_start_block_header( - &mut self.chainstate.index_conn(), - &stacks_tip, - &cur_stacks_tip_ch, - ) - .expect( - "Relayer: Failed to get tenure-start block header for stacks tip {stacks_tip}: {e:?}", - ) - .expect("Relayer: Failed to find tenure-start block header for stacks tip {stacks_tip}"); - - let directive = if sn.sortition { - Some( - if won_sortition || self.config.get_node_config(false).mock_mining { - info!("Relayer: Won sortition; begin tenure."); - MinerDirective::BeginTenure { - parent_tenure_start: committed_index_hash, - burnchain_tip: sn, - } - } else if committed_index_hash - != highest_tenure_start_block_header.index_block_hash() - { - info!( - "Relayer: Winner of sortition {} did not commit to the correct parent tenure. Attempt to continue tenure.", - &sn.consensus_hash - ); - // We didn't win the sortition, but the miner that did win - // did not commit to the correct parent tenure. This means - // it will be unable to produce a valid block, so we should - // continue our tenure. - MinerDirective::ContinueTenure { - new_burn_view: sn.consensus_hash, - } - } else { - info!("Relayer: Stop tenure"); - MinerDirective::StopTenure - }, - ) - } else { - // find out what epoch the Stacks tip is in. - // If it's in epoch 2.x, then we must always begin a new tenure, but we can't do so - // right now since this sortition has no winner. - let (cur_stacks_tip_ch, _cur_stacks_tip_bh) = - SortitionDB::get_canonical_stacks_chain_tip_hash(self.sortdb.conn()) - .expect("FATAL: failed to query sortition DB for stacks tip"); - - let stacks_tip_sn = - SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), &cur_stacks_tip_ch) - .expect("FATAL: failed to query sortiiton DB for epoch") - .expect("FATAL: no sortition for canonical stacks tip"); - - let cur_epoch = - SortitionDB::get_stacks_epoch(self.sortdb.conn(), stacks_tip_sn.block_height) - .expect("FATAL: failed to query sortition DB for epoch") - .expect("FATAL: no epoch defined for existing sortition"); - - if cur_epoch.epoch_id < StacksEpochId::Epoch30 { - debug!( - "As of sortition {}, there has not yet been a Nakamoto tip. Cannot mine.", + self.tenure_extend_timeout = None; + + if sn.sortition { + // a sortition happened + if won_sortition || self.config.get_node_config(false).mock_mining { + // a sortition happenend, and we won + info!("Relayer: Won sortition; begin tenure."; + "winning_sortition" => %sn.consensus_hash); + return Some(MinerDirective::BeginTenure { + parent_tenure_start: committed_index_hash, + burnchain_tip: sn.clone(), + election_block: sn, + late: false, + }); + } + + // a sortition happened, but we didn't win. + debug!( + "Relayer: did not win sortition {}, so stopping tenure", + &sn.sortition + ); + + let mining_pkh_opt = self.get_mining_key_pkh(); + + match Self::can_continue_tenure( + &self.sortdb, + &mut self.chainstate, + sn.consensus_hash, + mining_pkh_opt, + ) { + Ok(Some(_)) => { + // we can continue our ongoing tenure, but we should give the new winning miner + // a chance to send their BlockFound first. + debug!("Relayer: Did not win sortition, but am mining the ongoing tenure. Allowing the new miner some time to come online before trying to continue."); + self.tenure_extend_timeout = Some(Instant::now()); + return Some(MinerDirective::StopTenure); + } + Ok(None) => { + return Some(MinerDirective::StopTenure); + } + Err(e) => { + warn!("Relayer: failed to check to see if we can continue tenure: {e:?}"); + return Some(MinerDirective::StopTenure); + } + } + } + + // no sortition happened. + // find out what epoch the Stacks tip is in. + // If it's in epoch 2.x, then we must always begin a new tenure, but we can't do so + // right now since this sortition has no winner. + let stacks_tip_sn = + SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), &cur_stacks_tip_ch) + .expect("FATAL: failed to query sortiiton DB for epoch") + .expect("FATAL: no sortition for canonical stacks tip"); + + let cur_epoch = + SortitionDB::get_stacks_epoch(self.sortdb.conn(), stacks_tip_sn.block_height) + .expect("FATAL: failed to query sortition DB for epoch") + .expect("FATAL: no epoch defined for existing sortition"); + + if cur_epoch.epoch_id < StacksEpochId::Epoch30 { + debug!( + "As of sortition {}, there has not yet been a Nakamoto tip. Cannot mine.", + &stacks_tip_sn.consensus_hash + ); + return None; + } + + // find out who won the last non-empty sortition. It may have been us. + let Ok(last_winning_snapshot) = Self::get_last_winning_snapshot(&self.sortdb, &sn) + .inspect_err(|e| { + warn!("Relayer: Failed to load last winning snapshot: {e:?}"); + }) + else { + // this should be unreachable, but don't tempt fate. + info!("Relayer: No prior snapshots have a winning sortition. Will not try to mine."); + return None; + }; + + if last_winning_snapshot.miner_pk_hash == self.get_mining_key_pkh() { + debug!( + "Relayer: we won the last winning sortition {}", + &last_winning_snapshot.consensus_hash + ); + + // we won the last non-empty sortition. Has there been a BlockFound issued for it? + // This would be true if the stacks tip's tenure is at or descends from this snapshot. + // If there has _not_ been a BlockFound, then we should issue one. + let ih = self + .sortdb + .index_handle(&last_winning_snapshot.sortition_id); + let need_blockfound = if stacks_tip_sn.block_height > last_winning_snapshot.block_height + { + // stacks tip is ahead of this snapshot, so no BlockFound can be issued. + test_debug!("Relayer: stacks_tip_sn.block_height ({}) > last_winning_snapshot.block_height ({})", stacks_tip_sn.block_height, last_winning_snapshot.block_height); + false + } else if stacks_tip_sn.block_height == last_winning_snapshot.block_height + && stacks_tip_sn.consensus_hash == last_winning_snapshot.consensus_hash + { + // this is the ongoing tenure snapshot. A BlockFound has already been issued. We + // can instead opt to Extend + test_debug!( + "Relayer: ongoing tenure {} already represents last-winning snapshot", &stacks_tip_sn.consensus_hash ); - None + self.tenure_extend_timeout = Some(Instant::now()); + false } else { - info!("Relayer: No sortition; continue tenure."); - Some(MinerDirective::ContinueTenure { - new_burn_view: sn.consensus_hash, + // stacks tip's snapshot may be an ancestor of the last-won sortition. + // If so, then we can issue a BlockFound. + SortitionDB::get_ancestor_snapshot( + &ih, + stacks_tip_sn.block_height, + &last_winning_snapshot.sortition_id, + ) + .map_err(|e| { + error!("Relayer: Failed to load ancestor snapshot: {e:?}"); + e + }) + .ok() + .flatten() + .map(|sn| { + let need_blockfound = sn.consensus_hash == stacks_tip_sn.consensus_hash; + if !need_blockfound { + test_debug!( + "Relayer: stacks_tip_sn.consensus_hash ({}) != sn.consensus_hash ({})", + &stacks_tip_sn.consensus_hash, + &sn.consensus_hash + ); + } + need_blockfound }) + .unwrap_or_else(|| { + test_debug!( + "Relayer: no ancestor at height {} off of sortition {} height {}", + stacks_tip_sn.block_height, + &last_winning_snapshot.consensus_hash, + last_winning_snapshot.block_height + ); + false + }) + }; + if need_blockfound { + info!( + "Relayer: will submit late BlockFound for {}", + &last_winning_snapshot.consensus_hash + ); + // prepare to extend after our BlockFound gets mined. + self.tenure_extend_timeout = Some(Instant::now()); + return Some(MinerDirective::BeginTenure { + parent_tenure_start: StacksBlockId( + last_winning_snapshot.winning_stacks_block_hash.clone().0, + ), + burnchain_tip: sn, + election_block: last_winning_snapshot, + late: true, + }); } - }; - directive + } + + // try to continue our tenure if we produced the canonical Stacks tip. + if stacks_tip_sn.miner_pk_hash == self.get_mining_key_pkh() { + info!("Relayer: No sortition, but we produced the canonical Stacks tip. Will continue tenure."); + + if last_winning_snapshot.miner_pk_hash != self.get_mining_key_pkh() { + // delay trying to continue since the last snasphot with a sortition was won + // by someone else -- there's a chance that this other miner will produce a + // BlockFound in the interim. + debug!("Relayer: Did not win last winning snapshot despite mining the ongoing tenure, so allowing the new miner some time to come online."); + self.tenure_extend_timeout = Some(Instant::now()); + return None; + } + return Some(MinerDirective::ContinueTenure { + new_burn_view: sn.consensus_hash, + }); + } + + info!("Relayer: No sortition, and we did not produce the last Stacks tip. Will not mine."); + return None; } /// Given the pointer to a recently processed sortition, see if we won the sortition, and @@ -481,12 +705,14 @@ impl RelayerThread { .expect("FATAL: unknown consensus hash"); // always clear this even if this isn't the latest sortition - let won_sortition = sn.sortition && self.last_commits.remove(&sn.winning_block_txid); + let cleared = self.last_commits.remove(&sn.winning_block_txid); + let won_sortition = sn.sortition && cleared; if won_sortition { increment_stx_blocks_mined_counter(); } self.globals.set_last_sortition(sn.clone()); self.globals.counters.bump_blocks_processed(); + self.globals.counters.bump_sortitions_processed(); // there may be a bufferred stacks block to process, so wake up the coordinator to check self.globals.coord_comms.announce_new_stacks_block(); @@ -512,6 +738,10 @@ impl RelayerThread { } let directive_opt = self.choose_miner_directive(sn, won_sortition, committed_index_hash); + debug!( + "Relayer: Processed sortition {}: Miner directive is {:?}", + &consensus_hash, &directive_opt + ); Ok(directive_opt) } @@ -767,6 +997,40 @@ impl RelayerThread { )) } + #[cfg(test)] + fn fault_injection_stall_miner_startup() { + if TEST_MINER_THREAD_STALL.get() { + // Do an extra check just so we don't log EVERY time. + warn!("Relayer miner thread startup is stalled due to testing directive to stall the miner"); + while TEST_MINER_THREAD_STALL.get() { + std::thread::sleep(std::time::Duration::from_millis(10)); + } + warn!( + "Relayer miner thread startup is no longer stalled due to testing directive. Continuing..." + ); + } + } + + #[cfg(not(test))] + fn fault_injection_stall_miner_startup() {} + + #[cfg(test)] + fn fault_injection_stall_miner_thread_startup() { + if TEST_MINER_THREAD_START_STALL.get() { + // Do an extra check just so we don't log EVERY time. + warn!("Miner thread startup is stalled due to testing directive"); + while TEST_MINER_THREAD_START_STALL.get() { + std::thread::sleep(std::time::Duration::from_millis(10)); + } + warn!( + "Miner thread startup is no longer stalled due to testing directive. Continuing..." + ); + } + } + + #[cfg(not(test))] + fn fault_injection_stall_miner_thread_startup() {} + /// Create the block miner thread state. /// Only proceeds if all of the following are true: /// * the miner is not blocked @@ -782,6 +1046,7 @@ impl RelayerThread { burn_tip: BlockSnapshot, parent_tenure_id: StacksBlockId, reason: MinerReason, + burn_tip_at_start: &ConsensusHash, ) -> Result { if fault_injection_skip_mining(&self.config.node.rpc_bind, burn_tip.block_height) { debug!( @@ -790,6 +1055,7 @@ impl RelayerThread { ); return Err(NakamotoNodeError::FaultInjection); } + Self::fault_injection_stall_miner_startup(); let burn_header_hash = burn_tip.burn_header_hash; let burn_chain_sn = SortitionDB::get_canonical_burn_chain_tip(self.sortdb.conn()) @@ -797,8 +1063,8 @@ impl RelayerThread { let burn_chain_tip = burn_chain_sn.burn_header_hash; - if burn_chain_tip != burn_header_hash { - debug!( + if &burn_chain_sn.consensus_hash != burn_tip_at_start { + info!( "Relayer: Drop stale RunTenure for {burn_header_hash}: current sortition is for {burn_chain_tip}" ); self.globals.counters.bump_missed_tenures(); @@ -821,6 +1087,7 @@ impl RelayerThread { burn_election_block, burn_tip, parent_tenure_id, + burn_tip_at_start, reason, ); Ok(miner_thread_state) @@ -832,10 +1099,13 @@ impl RelayerThread { block_election_snapshot: BlockSnapshot, burn_tip: BlockSnapshot, reason: MinerReason, + burn_tip_at_start: &ConsensusHash, ) -> Result<(), NakamotoNodeError> { // when starting a new tenure, block the mining thread if its currently running. // the new mining thread will join it (so that the new mining thread stalls, not the relayer) let prior_tenure_thread = self.miner_thread.take(); + self.miner_thread_burn_view = None; + let vrf_key = self .globals .get_leader_key_registration_state() @@ -847,17 +1117,26 @@ impl RelayerThread { let new_miner_state = self.create_block_miner( vrf_key, block_election_snapshot, - burn_tip, + burn_tip.clone(), parent_tenure_start, reason, + burn_tip_at_start, )?; + let miner_abort_flag = new_miner_state.get_abort_flag(); debug!("Relayer: starting new tenure thread"); + let rand_id = thread_rng().gen::(); + let new_miner_handle = std::thread::Builder::new() - .name(format!("miner.{parent_tenure_start}",)) + .name(format!("miner.{parent_tenure_start}.{rand_id}",)) .stack_size(BLOCK_PROCESSOR_STACK_SIZE) .spawn(move || { + debug!( + "New block miner thread ID is {:?}", + std::thread::current().id() + ); + Self::fault_injection_stall_miner_thread_startup(); if let Err(e) = new_miner_state.run_miner(prior_tenure_thread) { info!("Miner thread failed: {e:?}"); Err(e) @@ -873,8 +1152,9 @@ impl RelayerThread { "Relayer: started tenure thread ID {:?}", new_miner_handle.thread().id() ); - self.miner_thread.replace(new_miner_handle); - + self.miner_thread + .replace(MinerStopHandle::new(new_miner_handle, miner_abort_flag)); + self.miner_thread_burn_view.replace(burn_tip); Ok(()) } @@ -885,18 +1165,25 @@ impl RelayerThread { debug!("Relayer: no tenure thread to stop"); return Ok(()); }; - let id = prior_tenure_thread.thread().id(); + self.miner_thread_burn_view = None; + + let id = prior_tenure_thread.inner_thread().id(); + let abort_flag = prior_tenure_thread.abort_flag.clone(); let globals = self.globals.clone(); let stop_handle = std::thread::Builder::new() - .name(format!("tenure-stop-{}", self.local_peer.data_url)) - .spawn(move || BlockMinerThread::stop_miner(&globals, prior_tenure_thread)) + .name(format!( + "tenure-stop({:?})-{}", + id, self.local_peer.data_url + )) + .spawn(move || prior_tenure_thread.stop(&globals)) .map_err(|e| { error!("Relayer: Failed to spawn a stop-tenure thread: {e:?}"); NakamotoNodeError::SpawnError(e) })?; - self.miner_thread.replace(stop_handle); + self.miner_thread + .replace(MinerStopHandle::new(stop_handle, abort_flag)); debug!("Relayer: stopped tenure thread ID {id:?}"); Ok(()) } @@ -911,112 +1198,246 @@ impl RelayerThread { )) } - /// Get the tenure-start block header hash of a given consensus hash. - /// For Nakamoto blocks, this is the first block in the tenure identified by the consensus - /// hash. - /// For epoch2 blocks, this is simply the block whose winning sortition happened in the - /// sortition identified by the consensus hash. - /// - /// `tip_block_id` is the chain tip from which to perform the query. - fn get_tenure_bhh( - &self, - tip_block_id: &StacksBlockId, - ch: &ConsensusHash, - ) -> Result { - let highest_tenure_start_block_header = NakamotoChainState::get_tenure_start_block_header( - &mut self.chainstate.index_conn(), - tip_block_id, - &ch, - )? - .ok_or_else(|| { - error!( - "Relayer: Failed to find tenure-start block header for stacks tip {tip_block_id}" - ); - NakamotoNodeError::ParentNotFound - })?; - Ok(BlockHeaderHash( - highest_tenure_start_block_header.index_block_hash().0, - )) + /// Helper method to get the last snapshot with a winner + fn get_last_winning_snapshot( + sortdb: &SortitionDB, + sort_tip: &BlockSnapshot, + ) -> Result { + let ih = sortdb.index_handle(&sort_tip.sortition_id); + Ok(ih.get_last_snapshot_with_sortition(sort_tip.block_height)?) } - /// Determine the type of tenure change to issue based on whether this - /// miner was the last successful miner (miner of the canonical tip). - fn determine_tenure_type( - &self, - canonical_snapshot: BlockSnapshot, - last_snapshot: BlockSnapshot, - new_burn_view: ConsensusHash, - mining_pkh: Hash160, - ) -> (StacksBlockId, BlockSnapshot, MinerReason) { - if canonical_snapshot.miner_pk_hash != Some(mining_pkh) { - debug!("Relayer: Miner was not the last successful miner. Issue a new tenure change payload."); - ( - StacksBlockId(last_snapshot.winning_stacks_block_hash.0), - last_snapshot, - MinerReason::EmptyTenure, - ) - } else { - debug!("Relayer: Miner was the last successful miner. Issue a tenure extend from the chain tip."); - ( - self.sortdb.get_canonical_stacks_tip_block_id(), - canonical_snapshot, - MinerReason::Extended { - burn_view_consensus_hash: new_burn_view, - }, - ) + /// Is the given sortition a valid sortition? + /// I.e. whose winning commit's parent tenure ID is on the canonical Stacks history, + /// and whose consensus hash corresponds to the ongoing tenure or a confirmed tenure? + fn is_valid_sortition( + chain_state: &mut StacksChainState, + stacks_tip_id: &StacksBlockId, + stacks_tip_sn: &BlockSnapshot, + burn_tip_ch: &ConsensusHash, + sn: &BlockSnapshot, + ) -> Result { + if !sn.sortition { + // definitely not a valid sortition + debug!("Relayer: Sortition {} is empty", &sn.consensus_hash); + return Ok(false); } - } - /// Get the block snapshot of the most recent sortition that committed to - /// the canonical tip. If the latest sortition did not commit to the - /// canonical tip, then the tip's tenure is the last good sortition. - fn get_last_good_block_snapshot( - &self, - burn_tip: &BlockSnapshot, - highest_tenure_bhh: &BlockHeaderHash, - canonical_stacks_tip_ch: &ConsensusHash, - ) -> Result { - let ih = self.sortdb.index_handle(&burn_tip.sortition_id); - let sn = ih - .get_last_snapshot_with_sortition(burn_tip.block_height) - .map_err(|e| { - error!("Relayer: failed to get last snapshot with sortition: {e:?}"); - NakamotoNodeError::SnapshotNotFoundForChainTip + // check that this commit's parent tenure ID is on the history tipped at + // `stacks_tip_id` + let mut ic = chain_state.index_conn(); + let parent_tenure_id = StacksBlockId(sn.winning_stacks_block_hash.clone().0); + let height_opt = ic.get_ancestor_block_height(&parent_tenure_id, stacks_tip_id)?; + if height_opt.is_none() { + // parent_tenure_id is not an ancestor of stacks_tip_id + debug!( + "Relayer: Sortition {} has winning commit hash {}, which is not canonical", + &sn.consensus_hash, &parent_tenure_id + ); + return Ok(false); + } + + if sn.consensus_hash == *burn_tip_ch { + // sn is the sortition tip, so this sortition must commit to the tenure start block of + // the ongoing Stacks tenure. + let highest_tenure_start_block_header = NakamotoChainState::get_tenure_start_block_header( + &mut ic, + stacks_tip_id, + &stacks_tip_sn.consensus_hash + )? + .ok_or_else(|| { + error!( + "Relayer: Failed to find tenure-start block header for stacks tip {stacks_tip_id}" + ); + NakamotoNodeError::ParentNotFound })?; - if &sn.winning_stacks_block_hash != highest_tenure_bhh { - info!( - "Relayer: Sortition winner is not committed to the canonical tip; allowing last miner to extend"; - "burn_block_height" => burn_tip.block_height, - "consensus_hash" => %burn_tip.consensus_hash, + + let highest_tenure_start_block_id = + highest_tenure_start_block_header.index_block_hash(); + if highest_tenure_start_block_id != parent_tenure_id { + debug!("Relayer: Sortition {} is at the tip, but does not commit to {} so cannot be valid", &sn.consensus_hash, &parent_tenure_id; + "highest_tenure_start_block_header.block_id()" => %highest_tenure_start_block_id); + return Ok(false); + } + } + + Ok(true) + } + + /// Determine the highest valid sortition higher than `elected_tenure_id`, but no higher than + /// `sort_tip`. + /// + /// This is the highest non-empty sortition (up to and including `sort_tip`) + /// whose winning commit's parent tenure ID matches the + /// Stacks tip, and whose consensus hash matches the Stacks tip's tenure ID. + /// + /// Returns Ok(Some(..)) if such a sortition is found, and is higher than that of + /// `elected_tenure_id`. + /// Returns Ok(None) if no such sortition is found. + /// Returns Err(..) on DB errors. + fn find_highest_valid_sortition( + sortdb: &SortitionDB, + chain_state: &mut StacksChainState, + sort_tip: &BlockSnapshot, + elected_tenure_id: &ConsensusHash, + ) -> Result, NakamotoNodeError> { + // sanity check -- if sort_tip is the elected_tenure_id sortition, then there are no higher + // valid sortitions. + if sort_tip.consensus_hash == *elected_tenure_id { + return Ok(None); + } + + let mut cursor = sort_tip.clone(); + let (canonical_stacks_tip_ch, canonical_stacks_tip_bh) = + SortitionDB::get_canonical_stacks_chain_tip_hash(sortdb.conn()).unwrap(); + let canonical_stacks_tip = + StacksBlockId::new(&canonical_stacks_tip_ch, &canonical_stacks_tip_bh); + + let Ok(Some(canonical_stacks_tip_sn)) = + SortitionDB::get_block_snapshot_consensus(sortdb.conn(), &canonical_stacks_tip_ch) + else { + return Err(NakamotoNodeError::ParentNotFound); + }; + + loop { + debug!( + "Relayer: check sortition {} to see if it is valid", + &cursor.consensus_hash ); - SortitionDB::get_block_snapshot_consensus(self.sortdb.conn(), canonical_stacks_tip_ch) - .map_err(|e| { - error!("Relayer: failed to get block snapshot for canonical tip: {e:?}"); - NakamotoNodeError::SnapshotNotFoundForChainTip - })? + // is this a valid sortiton? + if Self::is_valid_sortition( + chain_state, + &canonical_stacks_tip, + &canonical_stacks_tip_sn, + &sort_tip.consensus_hash, + &cursor, + )? { + return Ok(Some(cursor)); + } + + // nope. continue the search + let Some(cursor_parent) = + SortitionDB::get_block_snapshot(sortdb.conn(), &cursor.parent_sortition_id)? + else { + return Ok(None); + }; + + if cursor_parent.consensus_hash == *elected_tenure_id { + return Ok(None); + } + + cursor = cursor_parent; + } + } + + /// Determine if the miner can contine an existing tenure with the new sortition (identified + /// by `new_burn_view`) + /// + /// Assumes that the caller has already checked that the given miner has _not_ won the new + /// sortition. + /// + /// Returns Ok(Some(stacks-tip-election-snapshot)) if the last-winning miner needs to extend. + /// For now, this only happens if the miner's election snapshot was the last-known valid and + /// non-empty snapshot. In the future, this function may return Ok(Some(..)) if the node + /// determines that a subsequent miner won sortition, but never came online. + /// + /// Returns OK(None) if the last-winning miner should not extend its tenure. + /// + /// Returns Err(..) on DB error + pub(crate) fn can_continue_tenure( + sortdb: &SortitionDB, + chain_state: &mut StacksChainState, + new_burn_view: ConsensusHash, + mining_key_opt: Option, + ) -> Result, NakamotoNodeError> { + let Some(mining_pkh) = mining_key_opt else { + return Ok(None); + }; + + // Get the necessary snapshots and state + let (canonical_stacks_tip_ch, canonical_stacks_tip_bh) = + SortitionDB::get_canonical_stacks_chain_tip_hash(sortdb.conn()).unwrap(); + + let canonical_stacks_tip = + StacksBlockId::new(&canonical_stacks_tip_ch, &canonical_stacks_tip_bh); + + let canonical_stacks_snapshot = + SortitionDB::get_block_snapshot_consensus(sortdb.conn(), &canonical_stacks_tip_ch)? .ok_or_else(|| { error!("Relayer: failed to get block snapshot for canonical tip"); NakamotoNodeError::SnapshotNotFoundForChainTip - }) - } else { - Ok(sn) + })?; + + let won_ongoing_tenure_sortition = + canonical_stacks_snapshot.miner_pk_hash == Some(mining_pkh); + + let sort_tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()).unwrap(); + let won_current_tip = sort_tip.miner_pk_hash == Some(mining_pkh); + + info!( + "Relayer: Checking for tenure continuation."; + "won_ongoing_tenure_sortition" => won_ongoing_tenure_sortition, + "won_current_tip" => won_current_tip, + "current_mining_pkh" => %mining_pkh, + "canonical_stacks_tip_id" => %canonical_stacks_tip, + "canonical_stacks_tip_ch" => %canonical_stacks_tip_ch, + "canonical_stacks_tip_miner" => ?canonical_stacks_snapshot.miner_pk_hash, + "burn_view_ch" => %new_burn_view, + ); + + if !won_ongoing_tenure_sortition { + info!("Relayer: Did not win the last sortition that commits to our Stacks fork. Cannot continue tenure."); + return Ok(None); + } + + if won_current_tip { + info!("Relayer: Won current sortition, so no need to continue tenure. Just start a new one."); + return Ok(None); + } + + // For now, only allow the miner to extend its tenure if won the highest valid sortition. + // There cannot be any higher sortitions that are valid (as defined above). + // + // In the future, the miner will be able to extend its tenure even if there are higher + // valid sortitions, but only if it determines that the miners of those sortitions are + // offline. + if let Some(highest_valid_sortition) = Self::find_highest_valid_sortition( + sortdb, + chain_state, + &sort_tip, + &canonical_stacks_snapshot.consensus_hash, + )? { + info!("Relayer: will not extend tenure -- we won sortition {}, but the highest valid sortition is {}", &canonical_stacks_snapshot.consensus_hash, &highest_valid_sortition.consensus_hash); + return Ok(None); } + + Ok(Some(canonical_stacks_snapshot)) } /// Attempt to continue a miner's tenure into the next burn block. - /// This is allowed if the miner won the last good sortition and one of the - /// following conditions is met: - /// - There was no sortition in the latest burn block - /// - The winner of the latest sortition did not commit to the canonical tip - /// - The winner of the latest sortition did not mine any blocks within the - /// timeout period (not yet implemented) + /// This is allowed if the miner won the last good sortition -- that is, the sortition which + /// elected the local view of the canonical Stacks fork's ongoing tenure. + /// + /// This function assumes that the caller has checked that the sortition referred to by + /// `new_burn_view` does not have a sortition winner. fn continue_tenure(&mut self, new_burn_view: ConsensusHash) -> Result<(), NakamotoNodeError> { if let Err(e) = self.stop_tenure() { error!("Relayer: Failed to stop tenure: {e:?}"); return Ok(()); } - debug!("Relayer: successfully stopped tenure."); + debug!("Relayer: successfully stopped tenure; will try to continue."); + + let mining_pkh_opt = self.get_mining_key_pkh(); + let Some(canonical_stacks_tip_election_snapshot) = Self::can_continue_tenure( + &self.sortdb, + &mut self.chainstate, + new_burn_view.clone(), + mining_pkh_opt, + )? + else { + return Ok(()); + }; // Get the necessary snapshots and state let burn_tip = @@ -1029,59 +1450,26 @@ impl RelayerThread { SortitionDB::get_canonical_stacks_chain_tip_hash(self.sortdb.conn()).unwrap(); let canonical_stacks_tip = StacksBlockId::new(&canonical_stacks_tip_ch, &canonical_stacks_tip_bh); - let Some(mining_pkh) = self.get_mining_key_pkh() else { - return Ok(()); - }; - let highest_tenure_bhh = - self.get_tenure_bhh(&canonical_stacks_tip, &canonical_stacks_tip_ch)?; - let last_good_block_election_snapshot = self.get_last_good_block_snapshot( - &burn_tip, - &highest_tenure_bhh, - &canonical_stacks_tip_ch, - )?; - let won_last_sortition = - last_good_block_election_snapshot.miner_pk_hash == Some(mining_pkh); - info!( - "Relayer: Current burn block had no sortition or a bad sortition. Checking for tenure continuation."; - "won_last_sortition" => won_last_sortition, - "current_mining_pkh" => %mining_pkh, - "last_good_block_election_snapshot.consensus_hash" => %last_good_block_election_snapshot.consensus_hash, - "last_good_block_election_snapshot.miner_pk_hash" => ?last_good_block_election_snapshot.miner_pk_hash, - "canonical_stacks_tip_id" => %canonical_stacks_tip, - "canonical_stacks_tip_ch" => %canonical_stacks_tip_ch, - "burn_view_ch" => %new_burn_view, - ); - - if !won_last_sortition { - info!("Relayer: Did not win the last sortition. Cannot continue tenure."); - return Ok(()); - } - - let canonical_snapshot = SortitionDB::get_block_snapshot_consensus( - self.sortdb.conn(), - &canonical_stacks_tip_ch, - )? - .ok_or_else(|| { - error!("Relayer: failed to get block snapshot for canonical tip"); - NakamotoNodeError::SnapshotNotFoundForChainTip - })?; - let (parent_tenure_start, block_election_snapshot, reason) = self.determine_tenure_type( - canonical_snapshot, - last_good_block_election_snapshot, - new_burn_view, - mining_pkh, - ); + let reason = MinerReason::Extended { + burn_view_consensus_hash: new_burn_view.clone(), + }; if let Err(e) = self.start_new_tenure( - parent_tenure_start, - block_election_snapshot, - burn_tip, - reason, + canonical_stacks_tip.clone(), + canonical_stacks_tip_election_snapshot.clone(), + burn_tip.clone(), + reason.clone(), + &new_burn_view, ) { error!("Relayer: Failed to start new tenure: {e:?}"); } else { - debug!("Relayer: successfully started new tenure."); + debug!("Relayer: successfully started new tenure."; + "parent_tenure_start" => %canonical_stacks_tip, + "burn_tip" => %burn_tip.consensus_hash, + "burn_view_snapshot" => %burn_tip.consensus_hash, + "block_election_snapshot" => %canonical_stacks_tip_election_snapshot.consensus_hash, + "reason" => %reason); } Ok(()) } @@ -1108,14 +1496,22 @@ impl RelayerThread { MinerDirective::BeginTenure { parent_tenure_start, burnchain_tip, + election_block, + late, } => match self.start_new_tenure( parent_tenure_start, - burnchain_tip.clone(), - burnchain_tip, - MinerReason::BlockFound, + election_block.clone(), + election_block.clone(), + MinerReason::BlockFound { late }, + &burnchain_tip.consensus_hash, ) { Ok(()) => { - debug!("Relayer: successfully started new tenure."); + debug!("Relayer: successfully started new tenure."; + "parent_tenure_start" => %parent_tenure_start, + "burn_tip" => %burnchain_tip.consensus_hash, + "burn_view_snapshot" => %burnchain_tip.consensus_hash, + "block_election_snapshot" => %burnchain_tip.consensus_hash, + "reason" => %MinerReason::BlockFound { late }); } Err(e) => { error!("Relayer: Failed to start new tenure: {e:?}"); @@ -1142,6 +1538,7 @@ impl RelayerThread { }, } + self.globals.counters.bump_naka_miner_directives(); true } @@ -1239,6 +1636,8 @@ impl RelayerThread { /// * If this isn't a miner, then it's always nothing. /// * Otherwise, if we haven't done so already, go register a VRF public key /// * If the stacks chain tip or burnchain tip has changed, then issue a block-commit + /// * If the last burn view we started a miner for is not the canonical burn view, then + /// try and start a new tenure (or continue an existing one). fn initiative(&mut self) -> Option { if !self.is_miner { return None; @@ -1345,16 +1744,82 @@ impl RelayerThread { )) } + /// Try to start up a tenure-extend. + /// Only do this if the miner won the highest valid sortition but the burn view has changed. + /// In the future, the miner will also try to extend its tenure if a subsequent miner appears + /// to be offline. + fn try_continue_tenure(&mut self) { + if self.tenure_extend_timeout.is_none() { + return; + } + + // time to poll to see if we should begin a tenure-extend? + let deadline_passed = self + .tenure_extend_timeout + .map(|tenure_extend_timeout| { + let deadline_passed = + tenure_extend_timeout.elapsed() > self.config.miner.tenure_extend_poll_secs; + if !deadline_passed { + test_debug!( + "Relayer: will not try to tenure-extend yet ({} <= {})", + tenure_extend_timeout.elapsed().as_secs(), + self.config.miner.tenure_extend_poll_secs.as_secs() + ); + } + deadline_passed + }) + .unwrap_or(false); + + if !deadline_passed { + return; + } + + // reset timer so we can try again if for some reason a miner was already running (e.g. a + // blockfound from earlier). + self.tenure_extend_timeout = Some(Instant::now()); + + // try to extend, but only if we aren't already running a thread for the current or newer + // burnchain view + let Ok(sn) = + SortitionDB::get_canonical_burn_chain_tip(self.sortdb.conn()).inspect_err(|e| { + error!("Relayer: failed to read canonical burnchain sortition: {e:?}"); + }) + else { + return; + }; + + if let Some(miner_thread_burn_view) = self.miner_thread_burn_view.as_ref() { + // a miner thread is already running. If its burn view is the same as the canonical + // tip, then do nothing + if sn.consensus_hash == miner_thread_burn_view.consensus_hash { + info!("Relayer: will not try to start a tenure extend -- the current miner thread's burn view matches the sortition tip"; "sortition tip" => %sn.consensus_hash); + return; + } + } + + if let Err(e) = self.continue_tenure(sn.consensus_hash.clone()) { + warn!( + "Relayer: failed to continue tenure for burn view {}: {e:?}", + &sn.consensus_hash + ); + } + } + /// Main loop of the relayer. /// Runs in a separate thread. - /// Continuously receives + /// Continuously receives from `relay_rcv`. + /// Wakes up once per second to see if we need to continue mining an ongoing tenure. pub fn main(mut self, relay_rcv: Receiver) { debug!("relayer thread ID is {:?}", std::thread::current().id()); self.next_initiative = Instant::now() + Duration::from_millis(self.config.node.next_initiative_delay); + // how often we perform a loop pass below + let poll_frequency_ms = 1_000; + while self.globals.keep_running() { + self.try_continue_tenure(); let raised_initiative = self.globals.take_initiative(); let timed_out = Instant::now() >= self.next_initiative; let mut initiative_directive = if raised_initiative.is_some() || timed_out { @@ -1365,33 +1830,31 @@ impl RelayerThread { None }; - let directive = if let Some(directive) = initiative_directive.take() { - directive + let directive_opt = if let Some(directive) = initiative_directive.take() { + Some(directive) } else { // channel was drained, so do a time-bound recv - match relay_rcv.recv_timeout(Duration::from_millis( - self.config.node.next_initiative_delay, - )) { + match relay_rcv.recv_timeout(Duration::from_millis(poll_frequency_ms)) { Ok(directive) => { // only do this once, so we can call .initiative() again - directive - } - Err(RecvTimeoutError::Timeout) => { - continue; + Some(directive) } + Err(RecvTimeoutError::Timeout) => None, Err(RecvTimeoutError::Disconnected) => { break; } } }; - debug!("Relayer: main loop directive"; - "directive" => %directive, - "raised_initiative" => ?raised_initiative, - "timed_out" => %timed_out); + if let Some(directive) = directive_opt { + debug!("Relayer: main loop directive"; + "directive" => %directive, + "raised_initiative" => ?raised_initiative, + "timed_out" => %timed_out); - if !self.handle_directive(directive) { - break; + if !self.handle_directive(directive) { + break; + } } } diff --git a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs index f3df78c66b..06aaec8147 100644 --- a/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs +++ b/testnet/stacks-node/src/nakamoto_node/signer_coordinator.rs @@ -60,6 +60,12 @@ pub struct SignerCoordinator { keep_running: Arc, /// Handle for the signer DB listener thread listener_thread: Option>, + /// The current tip when this miner thread was started. + /// This *should not* be passed into any block building code, as it + /// is not necessarily the burn view for the block being constructed. + /// Rather, this burn block is used to determine whether or not a new + /// burn block has arrived since this thread started. + burn_tip_at_start: ConsensusHash, } impl SignerCoordinator { @@ -69,10 +75,11 @@ impl SignerCoordinator { stackerdb_channel: Arc>, node_keep_running: Arc, reward_set: &RewardSet, - burn_tip: &BlockSnapshot, + election_block: &BlockSnapshot, burnchain: &Burnchain, message_key: StacksPrivateKey, config: &Config, + burn_tip_at_start: &ConsensusHash, ) -> Result { info!("SignerCoordinator: starting up"); let keep_running = Arc::new(AtomicBool::new(true)); @@ -83,7 +90,7 @@ impl SignerCoordinator { node_keep_running.clone(), keep_running.clone(), reward_set, - burn_tip, + election_block, burnchain, )?; let is_mainnet = config.is_mainnet(); @@ -103,11 +110,15 @@ impl SignerCoordinator { stackerdb_comms: listener.get_comms(), keep_running, listener_thread: None, + burn_tip_at_start: burn_tip_at_start.clone(), }; // Spawn the signer DB listener thread let listener_thread = std::thread::Builder::new() - .name(format!("stackerdb_listener_{}", burn_tip.block_height)) + .name(format!( + "stackerdb_listener_{}", + election_block.block_height + )) .spawn(move || { if let Err(e) = listener.run() { error!("StackerDBListener: exited with error: {e:?}"); @@ -135,18 +146,26 @@ impl SignerCoordinator { is_mainnet: bool, miners_session: &mut StackerDBSession, election_sortition: &ConsensusHash, - ) -> Result<(), String> { + ) -> Result<(), NakamotoNodeError> { let Some(slot_range) = NakamotoChainState::get_miner_slot(sortdb, tip, election_sortition) - .map_err(|e| format!("Failed to read miner slot information: {e:?}"))? + .map_err(|e| { + NakamotoNodeError::SigningCoordinatorFailure(format!( + "Failed to read miner slot information: {e:?}" + )) + })? else { - return Err("No slot for miner".into()); + return Err(NakamotoNodeError::SigningCoordinatorFailure( + "No slot for miner".into(), + )); }; let slot_id = slot_range .start .saturating_add(miner_slot_id.to_u8().into()); if !slot_range.contains(&slot_id) { - return Err("Not enough slots for miner messages".into()); + return Err(NakamotoNodeError::SigningCoordinatorFailure( + "Not enough slots for miner messages".into(), + )); } // Get the LAST slot version number written to the DB. If not found, use 0. // Add 1 to get the NEXT version number @@ -154,13 +173,19 @@ impl SignerCoordinator { let miners_contract_id = boot_code_id(MINERS_NAME, is_mainnet); let slot_version = stackerdbs .get_slot_version(&miners_contract_id, slot_id) - .map_err(|e| format!("Failed to read slot version: {e:?}"))? + .map_err(|e| { + NakamotoNodeError::SigningCoordinatorFailure(format!( + "Failed to read slot version: {e:?}" + )) + })? .unwrap_or(0) .saturating_add(1); let mut chunk = StackerDBChunkData::new(slot_id, slot_version, message.serialize_to_vec()); - chunk - .sign(miner_sk) - .map_err(|_| "Failed to sign StackerDB chunk")?; + chunk.sign(miner_sk).map_err(|e| { + NakamotoNodeError::SigningCoordinatorFailure(format!( + "Failed to sign StackerDB chunk: {e:?}" + )) + })?; match miners_session.put_chunk(&chunk) { Ok(ack) => { @@ -168,10 +193,12 @@ impl SignerCoordinator { debug!("Wrote message to stackerdb: {ack:?}"); Ok(()) } else { - Err(format!("{ack:?}")) + Err(NakamotoNodeError::StackerDBUploadError(ack)) } } - Err(e) => Err(format!("{e:?}")), + Err(e) => Err(NakamotoNodeError::SigningCoordinatorFailure(format!( + "{e:?}" + ))), } } @@ -191,24 +218,23 @@ impl SignerCoordinator { pub fn propose_block( &mut self, block: &NakamotoBlock, - burn_tip: &BlockSnapshot, burnchain: &Burnchain, sortdb: &SortitionDB, chain_state: &mut StacksChainState, stackerdbs: &StackerDBs, counters: &Counters, - election_sortition: &ConsensusHash, + election_sortition: &BlockSnapshot, ) -> Result, NakamotoNodeError> { // Add this block to the block status map. self.stackerdb_comms.insert_block(&block.header); let reward_cycle_id = burnchain - .block_height_to_reward_cycle(burn_tip.block_height) + .block_height_to_reward_cycle(election_sortition.block_height) .expect("FATAL: tried to initialize coordinator before first burn block height"); let block_proposal = BlockProposal { block: block.clone(), - burn_height: burn_tip.block_height, + burn_height: election_sortition.block_height, reward_cycle: reward_cycle_id, }; @@ -219,15 +245,14 @@ impl SignerCoordinator { Self::send_miners_message::( &self.message_key, sortdb, - burn_tip, + election_sortition, stackerdbs, block_proposal_message, MinerSlotID::BlockProposal, self.is_mainnet, &mut self.miners_session, - election_sortition, - ) - .map_err(NakamotoNodeError::SigningCoordinatorFailure)?; + &election_sortition.consensus_hash, + )?; counters.bump_naka_proposed_blocks(); #[cfg(test)] @@ -251,7 +276,6 @@ impl SignerCoordinator { &block.block_id(), chain_state, sortdb, - burn_tip, counters, ) } @@ -267,7 +291,6 @@ impl SignerCoordinator { block_id: &StacksBlockId, chain_state: &mut StacksChainState, sortdb: &SortitionDB, - burn_tip: &BlockSnapshot, counters: &Counters, ) -> Result, NakamotoNodeError> { loop { @@ -308,7 +331,7 @@ impl SignerCoordinator { return Ok(stored_block.header.signer_signature); } - if Self::check_burn_tip_changed(sortdb, burn_tip) { + if self.check_burn_tip_changed(sortdb) { debug!("SignCoordinator: Exiting due to new burnchain tip"); return Err(NakamotoNodeError::BurnchainTipChanged); } @@ -350,12 +373,12 @@ impl SignerCoordinator { } /// Check if the tenure needs to change - fn check_burn_tip_changed(sortdb: &SortitionDB, burn_block: &BlockSnapshot) -> bool { + fn check_burn_tip_changed(&self, sortdb: &SortitionDB) -> bool { let cur_burn_chain_tip = SortitionDB::get_canonical_burn_chain_tip(sortdb.conn()) .expect("FATAL: failed to query sortition DB for canonical burn chain tip"); - if cur_burn_chain_tip.consensus_hash != burn_block.consensus_hash { - info!("SignerCoordinator: Cancel signature aggregation; burnchain tip has changed"); + if cur_burn_chain_tip.consensus_hash != self.burn_tip_at_start { + info!("SignCoordinator: Cancel signature aggregation; burnchain tip has changed"); true } else { false diff --git a/testnet/stacks-node/src/neon_node.rs b/testnet/stacks-node/src/neon_node.rs index 2d4dc7fadd..070837997d 100644 --- a/testnet/stacks-node/src/neon_node.rs +++ b/testnet/stacks-node/src/neon_node.rs @@ -2376,7 +2376,7 @@ impl BlockMinerThread { ) .map_err(|e| { warn!("Failed to write mock proposal to stackerdb."); - e + e.to_string() })?; // Retrieve any MockSignatures from stackerdb @@ -2404,7 +2404,7 @@ impl BlockMinerThread { ) .map_err(|e| { warn!("Failed to write mock block to stackerdb."); - e + e.to_string() })?; Ok(()) } diff --git a/testnet/stacks-node/src/run_loop/neon.rs b/testnet/stacks-node/src/run_loop/neon.rs index 4ecc84b73b..deead51066 100644 --- a/testnet/stacks-node/src/run_loop/neon.rs +++ b/testnet/stacks-node/src/run_loop/neon.rs @@ -104,6 +104,8 @@ pub struct Counters { pub missed_microblock_tenures: RunLoopCounter, pub cancelled_commits: RunLoopCounter, + pub sortitions_processed: RunLoopCounter, + pub naka_submitted_vrfs: RunLoopCounter, pub naka_submitted_commits: RunLoopCounter, pub naka_mined_blocks: RunLoopCounter, @@ -111,6 +113,7 @@ pub struct Counters { pub naka_proposed_blocks: RunLoopCounter, pub naka_mined_tenures: RunLoopCounter, pub naka_signer_pushed_blocks: RunLoopCounter, + pub naka_miner_directives: RunLoopCounter, #[cfg(test)] pub naka_skip_commit_op: TestFlag, @@ -141,6 +144,10 @@ impl Counters { Counters::inc(&self.blocks_processed); } + pub fn bump_sortitions_processed(&self) { + Counters::inc(&self.sortitions_processed); + } + pub fn bump_microblocks_processed(&self) { Counters::inc(&self.microblocks_processed); } @@ -185,6 +192,10 @@ impl Counters { Counters::inc(&self.naka_mined_tenures); } + pub fn bump_naka_miner_directives(&self) { + Counters::inc(&self.naka_miner_directives); + } + pub fn set_microblocks_processed(&self, value: u64) { Counters::set(&self.microblocks_processed, value) } diff --git a/testnet/stacks-node/src/tests/nakamoto_integrations.rs b/testnet/stacks-node/src/tests/nakamoto_integrations.rs index 28122d5fcf..7e7355e489 100644 --- a/testnet/stacks-node/src/tests/nakamoto_integrations.rs +++ b/testnet/stacks-node/src/tests/nakamoto_integrations.rs @@ -99,6 +99,7 @@ use super::bitcoin_regtest::BitcoinCoreController; use crate::nakamoto_node::miner::{ TEST_BLOCK_ANNOUNCE_STALL, TEST_BROADCAST_STALL, TEST_MINE_STALL, TEST_SKIP_P2P_BROADCAST, }; +use crate::nakamoto_node::relayer::{RelayerThread, TEST_MINER_THREAD_STALL}; use crate::neon::{Counters, RunLoopCounter}; use crate::operations::BurnchainOpSigner; use crate::run_loop::boot_nakamoto; @@ -195,9 +196,7 @@ lazy_static! { pub static TEST_SIGNING: Mutex> = Mutex::new(None); pub struct TestSigningChannel { - // pub recv: Option>, pub recv: Option>>, - // pub send: Sender, pub send: Sender>, } @@ -207,8 +206,6 @@ impl TestSigningChannel { /// Returns None if the singleton isn't instantiated and the miner should coordinate /// a real signer set signature. /// Panics if the blind-signer times out. - /// - /// TODO: update to use signatures vec pub fn get_signature() -> Option> { let mut signer = TEST_SIGNING.lock().unwrap(); let sign_channels = signer.as_mut()?; @@ -732,11 +729,29 @@ pub fn next_block_and_mine_commit( timeout_secs, &[coord_channels], &[commits_submitted], + true, + ) +} + +/// Mine a bitcoin block, and wait until a block-commit has been issued, **or** a timeout occurs +/// (timeout_secs) +pub fn next_block_and_commits_only( + btc_controller: &mut BitcoinRegtestController, + timeout_secs: u64, + coord_channels: &Arc>, + commits_submitted: &Arc, +) -> Result<(), String> { + next_block_and_wait_for_commits( + btc_controller, + timeout_secs, + &[coord_channels], + &[commits_submitted], + false, ) } /// Mine a bitcoin block, and wait until: -/// (1) a new block has been processed by the coordinator +/// (1) a new block has been processed by the coordinator (if `wait_for_stacks_block` is true) /// (2) 2 block commits have been issued ** or ** more than 10 seconds have /// passed since (1) occurred /// This waits for this check to pass on *all* supplied channels @@ -745,6 +760,7 @@ pub fn next_block_and_wait_for_commits( timeout_secs: u64, coord_channels: &[&Arc>], commits_submitted: &[&Arc], + wait_for_stacks_block: bool, ) -> Result<(), String> { let commits_submitted: Vec<_> = commits_submitted.to_vec(); let blocks_processed_before: Vec<_> = coord_channels @@ -778,6 +794,24 @@ pub fn next_block_and_wait_for_commits( } } + if !wait_for_stacks_block { + for i in 0..commits_submitted.len() { + // just wait for the commit + let commits_sent = commits_submitted[i].load(Ordering::SeqCst); + if commits_sent <= commits_before[i] { + return Ok(false); + } + + // if two commits have been sent, one of them must have been after + if commits_sent >= commits_before[i] + 1 { + continue; + } + return Ok(false); + } + return Ok(true); + } + + // waiting for both commit and stacks block for i in 0..commits_submitted.len() { let blocks_processed = coord_channels[i] .lock() @@ -786,15 +820,17 @@ pub fn next_block_and_wait_for_commits( let commits_sent = commits_submitted[i].load(Ordering::SeqCst); if blocks_processed > blocks_processed_before[i] { + // either we don't care about the stacks block count, or the block count advanced. + // Check the block-commits. let block_processed_time = block_processed_time[i] .as_ref() - .ok_or("TEST-ERROR: Processed time wasn't set")?; + .ok_or("TEST-ERROR: Processed block time wasn't set")?; if commits_sent <= commits_before[i] { return Ok(false); } let commit_sent_time = commit_sent_time[i] .as_ref() - .ok_or("TEST-ERROR: Processed time wasn't set")?; + .ok_or("TEST-ERROR: Processed commit time wasn't set")?; // try to ensure the commit was sent after the block was processed if commit_sent_time > block_processed_time { continue; @@ -5036,7 +5072,8 @@ fn forked_tenure_is_ignored() { // For the next tenure, submit the commit op but do not allow any stacks blocks to be broadcasted. // Stall the miner thread; only wait until the number of submitted commits increases. TEST_BROADCAST_STALL.set(true); - TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap().replace(true); + TEST_BLOCK_ANNOUNCE_STALL.set(true); + let blocks_before = mined_blocks.load(Ordering::SeqCst); let commits_before = commits_submitted.load(Ordering::SeqCst); @@ -5106,7 +5143,7 @@ fn forked_tenure_is_ignored() { .get_stacks_blocks_processed(); next_block_and(&mut btc_regtest_controller, 60, || { test_skip_commit_op.set(false); - TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap().replace(false); + TEST_BLOCK_ANNOUNCE_STALL.set(false); let commits_count = commits_submitted.load(Ordering::SeqCst); let blocks_count = mined_blocks.load(Ordering::SeqCst); let blocks_processed = coord_channel @@ -7061,6 +7098,15 @@ fn continue_tenure_extend() { ) .unwrap(); + // wait for the extended miner to include the tx in a block + // before we produce the next bitcoin block (this test will assert + // that this is the case at the end of the test). + wait_for(60, || { + let nonce = get_account(&http_origin, &to_addr(&sender_sk)).nonce; + Ok(nonce > transfer_nonce) + }) + .unwrap(); + let blocks_processed_before = coord_channel .lock() .expect("Mutex poisoned") @@ -9763,7 +9809,7 @@ fn skip_mining_long_tx() { }) .unwrap(); - TEST_SKIP_P2P_BROADCAST.lock().unwrap().replace(true); + TEST_SKIP_P2P_BROADCAST.set(true); let tx = make_contract_publish( &sender_2_sk, 0, @@ -9790,7 +9836,7 @@ fn skip_mining_long_tx() { }) .unwrap(); - TEST_SKIP_P2P_BROADCAST.lock().unwrap().replace(false); + TEST_SKIP_P2P_BROADCAST.set(false); } else { let transfer_tx = make_stacks_transfer( &sender_1_sk, @@ -10628,7 +10674,457 @@ fn consensus_hash_event_dispatcher() { .lock() .expect("Mutex poisoned") .stop_chains_coordinator(); + run_loop_stopper.store(false, Ordering::SeqCst); run_loop_thread.join().unwrap(); } + +/// Miner wins sortition at Bitcoin height N +/// Relayer processes sortition N +/// Miner wins sortition at Bitcoin height N+1 +/// Transactions that depend on the burn view get submitted to the mempool +/// A flash block at height N+2 happens before the miner can publish its block-found for N+1 +/// The miner mines these transactions with a burn view for height N+2 +/// Result: the miner issues a tenure-extend from N+1 with burn view for N+2 +#[test] +#[ignore] +fn test_tenure_extend_from_flashblocks() { + if env::var("BITCOIND_TEST") != Ok("1".into()) { + return; + } + + let mut account_keys: Vec<_> = (0..11) + .map(|i| StacksPrivateKey::from_seed(&[6, 6, 6, i as u8])) + .collect(); + let initial_balances: Vec<_> = account_keys + .iter() + .map(|privk| { + let address = to_addr(&privk).into(); + (address, 1_000_000) + }) + .collect(); + + let deployer_sk = account_keys.pop().unwrap(); + let deployer_addr = tests::to_addr(&deployer_sk); + + let mut signer_test: SignerTest = SignerTest::new_with_config_modifications( + 1, + initial_balances, + |_| {}, + |_config| {}, + None, + None, + ); + signer_test.boot_to_epoch_3(); + + let naka_conf = signer_test.running_nodes.conf.clone(); + let mining_key = naka_conf.miner.mining_key.clone().unwrap(); + let mining_key_pkh = Hash160::from_node_public_key(&StacksPublicKey::from_private(&mining_key)); + + let http_origin = format!("http://{}", &naka_conf.node.rpc_bind); + let btc_regtest_controller = &mut signer_test.running_nodes.btc_regtest_controller; + let coord_channel = signer_test.running_nodes.coord_channel.clone(); + let commits_submitted = signer_test.running_nodes.commits_submitted.clone(); + let sortitions_processed = signer_test.running_nodes.sortitions_processed.clone(); + let nakamoto_test_skip_commit_op = signer_test + .running_nodes + .nakamoto_test_skip_commit_op + .clone(); + let nakamoto_miner_directives = signer_test.running_nodes.nakamoto_miner_directives.clone(); + + let tx_fee = 1_000; + + let burnchain = naka_conf.get_burnchain(); + let sortdb = burnchain.open_sortition_db(true).unwrap(); + let (mut chainstate, _) = StacksChainState::open( + naka_conf.is_mainnet(), + naka_conf.burnchain.chain_id, + &naka_conf.get_chainstate_path_str(), + None, + ) + .unwrap(); + + for _ in 0..3 { + next_block_and_mine_commit( + btc_regtest_controller, + 60, + &coord_channel, + &commits_submitted, + ) + .unwrap(); + } + + let burn_view_contract = r#" +(define-data-var my-var uint u0) +(define-data-var my-counter uint u0) + +(define-public (f) + (begin + (var-set my-var burn-block-height) + (if (is-eq u0 (mod burn-block-height u2)) + (var-set my-counter (+ u1 (var-get my-counter))) + (var-set my-counter (+ u2 (var-get my-counter)))) + (print burn-block-height) + (ok 1) + ) +) + +(begin (f)) +"# + .to_string(); + + let contract_tx = make_contract_publish( + &deployer_sk, + 0, + tx_fee, + naka_conf.burnchain.chain_id, + "burn-view-contract", + &burn_view_contract, + ); + submit_tx(&http_origin, &contract_tx); + + let blocks_processed_before = coord_channel + .lock() + .expect("Mutex poisoned") + .get_stacks_blocks_processed(); + + wait_for(120, || { + let blocks_processed = coord_channel + .lock() + .expect("Mutex poisoned") + .get_stacks_blocks_processed(); + Ok(blocks_processed > blocks_processed_before) + }) + .expect("Timed out waiting for interim blocks to be mined"); + + next_block_and_mine_commit( + btc_regtest_controller, + 60, + &coord_channel, + &commits_submitted, + ) + .unwrap(); + + // stall miner and relayer + + // make tenure but don't wait for a stacks block + next_block_and_commits_only( + btc_regtest_controller, + 60, + &coord_channel, + &commits_submitted, + ) + .unwrap(); + + // prevent the mienr from sending another block-commit + nakamoto_test_skip_commit_op.set(true); + + // make sure we get a block-found tenure change + let blocks_processed_before = coord_channel + .lock() + .expect("Mutex poisoned") + .get_stacks_blocks_processed(); + + // make sure the relayer processes both sortitions + let sortitions_processed_before = sortitions_processed.load(Ordering::SeqCst); + + // mine another Bitcoin block right away, since it will contain a block-commit + btc_regtest_controller.bootstrap_chain(1); + + wait_for(60, || { + sleep_ms(100); + let sortitions_cnt = sortitions_processed.load(Ordering::SeqCst); + Ok(sortitions_cnt > sortitions_processed_before) + }) + .unwrap(); + + wait_for(120, || { + let blocks_processed = coord_channel + .lock() + .expect("Mutex poisoned") + .get_stacks_blocks_processed(); + Ok(blocks_processed > blocks_processed_before) + }) + .expect("Timed out waiting for interim blocks to be mined"); + + let (canonical_stacks_tip_ch, _) = + SortitionDB::get_canonical_stacks_chain_tip_hash(&sortdb.conn()).unwrap(); + let election_tip = + SortitionDB::get_block_snapshot_consensus(&sortdb.conn(), &canonical_stacks_tip_ch) + .unwrap() + .unwrap(); + let sort_tip = SortitionDB::get_canonical_burn_chain_tip(&sortdb.conn()).unwrap(); + + // Stacks chain tip originates from the tenure started at the burnchain tip + assert!(sort_tip.sortition); + assert_eq!(sort_tip.consensus_hash, election_tip.consensus_hash); + + // stop the relayer thread from starting a miner thread, and stop the miner thread from mining + TEST_MINE_STALL.set(true); + TEST_MINER_THREAD_STALL.set(true); + + // mine another Bitcoin block right away, and force it to be a flash block + btc_regtest_controller.bootstrap_chain(1); + + let miner_directives_before = nakamoto_miner_directives.load(Ordering::SeqCst); + + // unblock the relayer so it can process the flash block sortition. + // Given the above, this will be an `Extend` tenure. + TEST_MINER_THREAD_STALL.set(false); + + let sortitions_processed_before = sortitions_processed.load(Ordering::SeqCst); + wait_for(60, || { + sleep_ms(100); + let sortitions_cnt = sortitions_processed.load(Ordering::SeqCst); + Ok(sortitions_cnt > sortitions_processed_before) + }) + .unwrap(); + + let (new_canonical_stacks_tip_ch, _) = + SortitionDB::get_canonical_stacks_chain_tip_hash(&sortdb.conn()).unwrap(); + let election_tip = + SortitionDB::get_block_snapshot_consensus(&sortdb.conn(), &new_canonical_stacks_tip_ch) + .unwrap() + .unwrap(); + let sort_tip = SortitionDB::get_canonical_burn_chain_tip(&sortdb.conn()).unwrap(); + + // this was a flash block -- no sortition + assert!(!sort_tip.sortition); + // canonical stacks tip burn view has not advanced + assert_eq!(new_canonical_stacks_tip_ch, canonical_stacks_tip_ch); + // the sortition that elected the ongoing tenure is not the canonical sortition tip + assert_ne!(sort_tip.consensus_hash, election_tip.consensus_hash); + + // we can, however, continue the tenure + let canonical_stacks_tip = RelayerThread::can_continue_tenure( + &sortdb, + &mut chainstate, + sort_tip.consensus_hash.clone(), + Some(mining_key_pkh.clone()), + ) + .unwrap() + .unwrap(); + assert_eq!(canonical_stacks_tip, election_tip); + + // if we didn't win the last block -- tantamount to the sortition winner miner key being + // different -- then we can't continue the tenure. + assert!(RelayerThread::can_continue_tenure( + &sortdb, + &mut chainstate, + sort_tip.consensus_hash.clone(), + Some(Hash160([0x11; 20])) + ) + .unwrap() + .is_none()); + + let mut accounts_before = vec![]; + let mut sent_txids = vec![]; + + // fill mempool with transactions that depend on the burn view + for sender_sk in account_keys.iter() { + let sender_addr = tests::to_addr(&sender_sk); + let account = loop { + let Ok(account) = get_account_result(&http_origin, &sender_addr) else { + debug!("follower_bootup: Failed to load miner account"); + thread::sleep(Duration::from_millis(100)); + continue; + }; + break account; + }; + + // Fill up the mempool with contract calls + let contract_tx = make_contract_call( + &sender_sk, + account.nonce, + tx_fee, + naka_conf.burnchain.chain_id, + &deployer_addr, + "burn-view-contract", + "f", + &[], + ); + let txid = submit_tx(&http_origin, &contract_tx); + sent_txids.push(format!("0x{}", &txid.to_string())); + accounts_before.push(account); + } + + // unstall miner thread and allow block-commits again + nakamoto_test_skip_commit_op.set(false); + TEST_MINE_STALL.set(false); + + // wait for the miner directive to be processed + wait_for(60, || { + sleep_ms(30_000); + let directives_cnt = nakamoto_miner_directives.load(Ordering::SeqCst); + Ok(directives_cnt > miner_directives_before) + }) + .unwrap(); + + // wait for all of the aforementioned transactions to get mined + wait_for(120, || { + // fill mempool with transactions that depend on the burn view + for (sender_sk, account_before) in account_keys.iter().zip(accounts_before.iter()) { + let sender_addr = tests::to_addr(&sender_sk); + let account = loop { + let Ok(account) = get_account_result(&http_origin, &sender_addr) else { + thread::sleep(Duration::from_millis(100)); + continue; + }; + break account; + }; + + if account.nonce > account_before.nonce { + continue; + } + return Ok(false); + } + Ok(true) + }) + .unwrap(); + + // transactions are all mined, and all reflect the flash block's burn view. + // we had a tenure-extend as well. + let mut blocks = test_observer::get_blocks(); + blocks.sort_by_key(|block| block["block_height"].as_u64().unwrap()); + + let mut included_txids = HashSet::new(); + let mut has_extend = false; + for block in blocks.iter() { + for tx in block.get("transactions").unwrap().as_array().unwrap() { + let txid_str = tx.get("txid").unwrap().as_str().unwrap().to_string(); + included_txids.insert(txid_str); + + let raw_tx = tx.get("raw_tx").unwrap().as_str().unwrap(); + if raw_tx == "0x00" { + continue; + } + let tx_bytes = hex_bytes(&raw_tx[2..]).unwrap(); + let parsed = StacksTransaction::consensus_deserialize(&mut &tx_bytes[..]).unwrap(); + + if let TransactionPayload::TenureChange(payload) = &parsed.payload { + if payload.cause == TenureChangeCause::Extended { + has_extend = true; + } + } + } + } + + assert!(has_extend); + + let expected_txids: HashSet<_> = sent_txids.clone().into_iter().collect(); + for expected_txid in expected_txids.iter() { + if !included_txids.contains(expected_txid) { + panic!("Missing {}", expected_txid); + } + } + + // mine one additional tenure, to verify that we're on track + let commits_before = commits_submitted.load(Ordering::SeqCst); + let node_info_before = get_chain_info_opt(&naka_conf).unwrap(); + + btc_regtest_controller.bootstrap_chain(1); + + wait_for(20, || { + Ok(commits_submitted.load(Ordering::SeqCst) > commits_before) + }) + .unwrap(); + + // there was a sortition winner + let sort_tip = SortitionDB::get_canonical_burn_chain_tip(&sortdb.conn()).unwrap(); + assert!(sort_tip.sortition); + + wait_for(20, || { + let node_info = get_chain_info_opt(&naka_conf).unwrap(); + Ok( + node_info.burn_block_height > node_info_before.burn_block_height + && node_info.stacks_tip_height > node_info_before.stacks_tip_height, + ) + }) + .unwrap(); + + // boot a follower. it should reach the chain tip + info!("----- BEGIN FOLLOWR BOOTUP ------"); + + // see if we can boot a follower off of this node now + let mut follower_conf = naka_conf.clone(); + follower_conf.node.miner = false; + follower_conf.events_observers.clear(); + follower_conf.node.working_dir = format!("{}-follower", &naka_conf.node.working_dir); + follower_conf.node.seed = vec![0x01; 32]; + follower_conf.node.local_peer_seed = vec![0x02; 32]; + + let rpc_port = gen_random_port(); + let p2p_port = gen_random_port(); + + let localhost = "127.0.0.1"; + follower_conf.node.rpc_bind = format!("{localhost}:{rpc_port}"); + follower_conf.node.p2p_bind = format!("{localhost}:{p2p_port}"); + follower_conf.node.data_url = format!("http://{localhost}:{rpc_port}"); + follower_conf.node.p2p_address = format!("{localhost}:{p2p_port}"); + follower_conf.node.pox_sync_sample_secs = 30; + + let node_info = get_chain_info(&naka_conf); + follower_conf.node.add_bootstrap_node( + &format!( + "{}@{}", + &node_info.node_public_key.unwrap(), + naka_conf.node.p2p_bind + ), + naka_conf.burnchain.chain_id, + PEER_VERSION_TESTNET, + ); + + let mut follower_run_loop = boot_nakamoto::BootRunLoop::new(follower_conf.clone()).unwrap(); + let follower_run_loop_stopper = follower_run_loop.get_termination_switch(); + let follower_coord_channel = follower_run_loop.coordinator_channels(); + + debug!( + "Booting follower-thread ({},{})", + &follower_conf.node.p2p_bind, &follower_conf.node.rpc_bind + ); + debug!( + "Booting follower-thread: neighbors = {:?}", + &follower_conf.node.bootstrap_node + ); + + // spawn a follower thread + let follower_thread = thread::Builder::new() + .name("follower-thread".into()) + .spawn(move || follower_run_loop.start(None, 0)) + .unwrap(); + + debug!("Booted follower-thread"); + + let miner_info = get_chain_info_result(&naka_conf).unwrap(); + + wait_for(300, || { + let Ok(info) = get_chain_info_result(&follower_conf) else { + sleep_ms(1000); + return Ok(false); + }; + debug!( + "Miner tip is {}/{}; follower tip is {}/{}", + &miner_info.stacks_tip_consensus_hash, + &miner_info.stacks_tip, + &info.stacks_tip_consensus_hash, + &info.stacks_tip + ); + Ok(miner_info.stacks_tip == info.stacks_tip + && miner_info.stacks_tip_consensus_hash == info.stacks_tip_consensus_hash) + }) + .unwrap(); + + coord_channel + .lock() + .expect("Mutex poisoned") + .stop_chains_coordinator(); + + follower_coord_channel + .lock() + .expect("Mutex poisoned") + .stop_chains_coordinator(); + follower_run_loop_stopper.store(false, Ordering::SeqCst); + + follower_thread.join().unwrap(); +} diff --git a/testnet/stacks-node/src/tests/neon_integrations.rs b/testnet/stacks-node/src/tests/neon_integrations.rs index 9be1514ea6..9be9658f89 100644 --- a/testnet/stacks-node/src/tests/neon_integrations.rs +++ b/testnet/stacks-node/src/tests/neon_integrations.rs @@ -53,6 +53,7 @@ use stacks::net::api::getaccount::AccountEntryResponse; use stacks::net::api::getcontractsrc::ContractSrcResponse; use stacks::net::api::getinfo::RPCPeerInfoData; use stacks::net::api::getpoxinfo::RPCPoxInfoData; +use stacks::net::api::getsortition::SortitionInfo; use stacks::net::api::gettransaction_unconfirmed::UnconfirmedTransactionResponse; use stacks::net::api::postblock::StacksBlockAcceptedData; use stacks::net::api::postfeerate::RPCFeeEstimateResponse; @@ -1364,7 +1365,7 @@ pub fn get_account_result( let client = reqwest::blocking::Client::new(); let path = format!("{http_origin}/v2/accounts/{account}?proof=0"); let res = client.get(&path).send()?.json::()?; - info!("Account response: {res:#?}"); + debug!("Account response: {res:#?}"); Ok(Account { balance: u128::from_str_radix(&res.balance[2..], 16).unwrap(), locked: u128::from_str_radix(&res.locked[2..], 16).unwrap(), @@ -1376,6 +1377,22 @@ pub fn get_account(http_origin: &str, account: &F) -> Acco get_account_result(http_origin, account).unwrap() } +pub fn get_sortition_info(conf: &Config) -> SortitionInfo { + let client = reqwest::blocking::Client::new(); + let http_origin = format!("http://{}", &conf.node.rpc_bind); + let path = format!("{http_origin}/v3/sortitions"); + let mut resp: Vec<_> = client.get(&path).send().unwrap().json().unwrap(); + resp.pop().unwrap() +} + +pub fn get_sortition_info_ch(conf: &Config, ch: &ConsensusHash) -> SortitionInfo { + let client = reqwest::blocking::Client::new(); + let http_origin = format!("http://{}", &conf.node.rpc_bind); + let path = format!("{http_origin}/v3/sortitions/consensus/{ch}"); + let mut resp: Vec<_> = client.get(&path).send().unwrap().json().unwrap(); + resp.pop().unwrap() +} + pub fn get_neighbors(conf: &Config) -> Option { let client = reqwest::blocking::Client::new(); let http_origin = format!("http://{}", &conf.node.rpc_bind); diff --git a/testnet/stacks-node/src/tests/signer/mod.rs b/testnet/stacks-node/src/tests/signer/mod.rs index ed4560c70a..7839126533 100644 --- a/testnet/stacks-node/src/tests/signer/mod.rs +++ b/testnet/stacks-node/src/tests/signer/mod.rs @@ -89,10 +89,12 @@ pub struct RunningNodes { pub vrfs_submitted: RunLoopCounter, pub commits_submitted: RunLoopCounter, pub blocks_processed: RunLoopCounter, + pub sortitions_processed: RunLoopCounter, pub nakamoto_blocks_proposed: RunLoopCounter, pub nakamoto_blocks_mined: RunLoopCounter, pub nakamoto_blocks_rejected: RunLoopCounter, pub nakamoto_blocks_signer_pushed: RunLoopCounter, + pub nakamoto_miner_directives: Arc, pub nakamoto_test_skip_commit_op: TestFlag, pub coord_channel: Arc>, pub conf: NeonConfig, @@ -127,7 +129,7 @@ impl + Send + 'static, T: SignerEventTrait + 'static> SignerTest( + pub fn new_with_config_modifications( num_signers: usize, initial_balances: Vec<(StacksAddress, u64)>, mut signer_config_modifier: F, @@ -378,6 +380,7 @@ impl + Send + 'static, T: SignerEventTrait + 'static> SignerTest( let run_loop_stopper = run_loop.get_termination_switch(); let Counters { blocks_processed, + sortitions_processed, naka_submitted_vrfs: vrfs_submitted, naka_submitted_commits: commits_submitted, naka_proposed_blocks: naka_blocks_proposed, naka_mined_blocks: naka_blocks_mined, naka_rejected_blocks: naka_blocks_rejected, + naka_miner_directives, naka_skip_commit_op: nakamoto_test_skip_commit_op, naka_signer_pushed_blocks, .. @@ -937,11 +942,13 @@ fn setup_stx_btc_node( vrfs_submitted, commits_submitted, blocks_processed, + sortitions_processed, nakamoto_blocks_proposed: naka_blocks_proposed, nakamoto_blocks_mined: naka_blocks_mined, nakamoto_blocks_rejected: naka_blocks_rejected, nakamoto_blocks_signer_pushed: naka_signer_pushed_blocks, nakamoto_test_skip_commit_op, + nakamoto_miner_directives: naka_miner_directives.0, coord_channel, conf: naka_conf, } diff --git a/testnet/stacks-node/src/tests/signer/v0.rs b/testnet/stacks-node/src/tests/signer/v0.rs index 005a492757..cebd773ecf 100644 --- a/testnet/stacks-node/src/tests/signer/v0.rs +++ b/testnet/stacks-node/src/tests/signer/v0.rs @@ -84,8 +84,8 @@ use crate::tests::nakamoto_integrations::{ POX_4_DEFAULT_STACKER_BALANCE, POX_4_DEFAULT_STACKER_STX_AMT, }; use crate::tests::neon_integrations::{ - get_account, get_chain_info, get_chain_info_opt, next_block_and_wait, - run_until_burnchain_height, submit_tx, submit_tx_fallible, test_observer, + get_account, get_chain_info, get_chain_info_opt, get_sortition_info, get_sortition_info_ch, + next_block_and_wait, run_until_burnchain_height, submit_tx, submit_tx_fallible, test_observer, }; use crate::tests::{ self, gen_random_port, make_contract_call, make_contract_publish, make_stacks_transfer, @@ -586,9 +586,7 @@ fn miner_gather_signatures() { // Disable p2p broadcast of the nakamoto blocks, so that we rely // on the signer's using StackerDB to get pushed blocks - *nakamoto_node::miner::TEST_SKIP_P2P_BROADCAST - .lock() - .unwrap() = Some(true); + nakamoto_node::miner::TEST_SKIP_P2P_BROADCAST.set(true); info!("------------------------- Test Setup -------------------------"); let num_signers = 5; @@ -1020,7 +1018,8 @@ fn forked_tenure_testing( // For the next tenure, submit the commit op but do not allow any stacks blocks to be broadcasted TEST_BROADCAST_STALL.set(true); - TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap().replace(true); + TEST_BLOCK_ANNOUNCE_STALL.set(true); + let blocks_before = mined_blocks.load(Ordering::SeqCst); let commits_before = commits_submitted.load(Ordering::SeqCst); @@ -1097,7 +1096,7 @@ fn forked_tenure_testing( if !expect_tenure_c { // allow B to process, so it'll be distinct from C - TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap().replace(false); + TEST_BLOCK_ANNOUNCE_STALL.set(false); sleep_ms(1000); } @@ -1123,7 +1122,7 @@ fn forked_tenure_testing( let commits_count = commits_submitted.load(Ordering::SeqCst); if commits_count > commits_before { // now allow block B to process if it hasn't already. - TEST_BLOCK_ANNOUNCE_STALL.lock().unwrap().replace(false); + TEST_BLOCK_ANNOUNCE_STALL.set(false); } let rejected_count = rejected_blocks.load(Ordering::SeqCst); let (blocks_count, rbf_count, has_reject_count) = if expect_tenure_c { @@ -1360,6 +1359,8 @@ fn bitcoind_forking_test() { info!("Wait for block off of shallow fork"); + TEST_MINE_STALL.set(true); + // we need to mine some blocks to get back to being considered a frequent miner for i in 0..3 { let current_burn_height = get_chain_info(&signer_test.running_nodes.conf).burn_block_height; @@ -1403,8 +1404,10 @@ fn bitcoind_forking_test() { let post_fork_1_nonce = get_account(&http_origin, &miner_address).nonce; + // We should have forked 1 block (-2 nonces) assert_eq!(post_fork_1_nonce, pre_fork_1_nonce - 2); + TEST_MINE_STALL.set(false); for i in 0..5 { info!("Mining post-fork tenure {} of 5", i + 1); signer_test.mine_nakamoto_block(Duration::from_secs(30), true); @@ -1437,6 +1440,7 @@ fn bitcoind_forking_test() { info!("Wait for block off of deep fork"); // we need to mine some blocks to get back to being considered a frequent miner + TEST_MINE_STALL.set(true); for i in 0..3 { let current_burn_height = get_chain_info(&signer_test.running_nodes.conf).burn_block_height; info!( @@ -1481,6 +1485,8 @@ fn bitcoind_forking_test() { assert_eq!(post_fork_2_nonce, pre_fork_2_nonce - 4 * 2); + TEST_MINE_STALL.set(false); + for i in 0..5 { info!("Mining post-fork tenure {} of 5", i + 1); signer_test.mine_nakamoto_block(Duration::from_secs(30), true); @@ -4455,9 +4461,7 @@ fn duplicate_signers() { // Disable p2p broadcast of the nakamoto blocks, so that we rely // on the signer's using StackerDB to get pushed blocks - *nakamoto_node::miner::TEST_SKIP_P2P_BROADCAST - .lock() - .unwrap() = Some(true); + nakamoto_node::miner::TEST_SKIP_P2P_BROADCAST.set(true); info!("------------------------- Test Setup -------------------------"); let num_signers = 5; @@ -6481,6 +6485,9 @@ fn continue_after_fast_block_no_sortition() { let node_2_rpc = gen_random_port(); let node_2_p2p = gen_random_port(); + debug!("Node 1 bound at (p2p={}, rpc={})", node_1_p2p, node_1_rpc); + debug!("Node 2 bound at (p2p={}, rpc={})", node_2_p2p, node_2_rpc); + let localhost = "127.0.0.1"; let node_1_rpc_bind = format!("{localhost}:{node_1_rpc}"); let node_2_rpc_bind = format!("{localhost}:{node_2_rpc}"); @@ -6813,7 +6820,11 @@ fn continue_after_fast_block_no_sortition() { // Allow signers to respond to proposals again TEST_REJECT_ALL_BLOCK_PROPOSAL.set(Vec::new()); - info!("------------------------- Wait for Miner B's Block N -------------------------"); + info!("------------------------- Wait for Miner B's Block N -------------------------"; + "blocks_processed_before_2" => %blocks_processed_before_2, + "stacks_height_before" => %stacks_height_before, + "nmb_old_blocks" => %nmb_old_blocks); + // wait for the new block to be processed wait_for(30, || { let stacks_height = signer_test @@ -6821,6 +6832,15 @@ fn continue_after_fast_block_no_sortition() { .get_peer_info() .expect("Failed to get peer info") .stacks_tip_height; + + let blocks_mined1_val = blocks_mined1.load(Ordering::SeqCst); + let blocks_mined2_val = blocks_mined2.load(Ordering::SeqCst); + info!("Waiting for Miner B's Block N"; + "blocks_mined1_val" => %blocks_mined1_val, + "blocks_mined2_val" => %blocks_mined2_val, + "stacks_height" => %stacks_height, + "observed_blocks" => %test_observer::get_blocks().len()); + Ok( blocks_mined2.load(Ordering::SeqCst) > blocks_processed_before_2 && stacks_height > stacks_height_before @@ -6855,7 +6875,7 @@ fn continue_after_fast_block_no_sortition() { ); submit_tx(&http_origin, &transfer_tx); - // wait for the new block to be processed + // wait for the tenure-extend block to be processed wait_for(30, || { let stacks_height = signer_test .stacks_client @@ -6870,6 +6890,40 @@ fn continue_after_fast_block_no_sortition() { }) .expect("Timed out waiting for block to be mined and processed"); + verify_last_block_contains_tenure_change_tx(TenureChangeCause::Extended); + + let nmb_old_blocks = test_observer::get_blocks().len(); + let blocks_processed_before_2 = blocks_mined2.load(Ordering::SeqCst); + let stacks_height_before = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + + // wait for the new block with the STX transfer to be processed + wait_for(30, || { + let stacks_height = signer_test + .stacks_client + .get_peer_info() + .expect("Failed to get peer info") + .stacks_tip_height; + + let blocks_mined1_val = blocks_mined1.load(Ordering::SeqCst); + let blocks_mined2_val = blocks_mined2.load(Ordering::SeqCst); + info!("Waiting for Miner B's Block N"; + "blocks_mined1_val" => %blocks_mined1_val, + "blocks_mined2_val" => %blocks_mined2_val, + "stacks_height" => %stacks_height, + "observed_blocks" => %test_observer::get_blocks().len()); + + Ok( + blocks_mined2.load(Ordering::SeqCst) > blocks_processed_before_2 + && stacks_height > stacks_height_before + && test_observer::get_blocks().len() > nmb_old_blocks, + ) + }) + .expect("Timed out waiting for block to be mined and processed"); + info!("------------------------- Verify Miner B's Block N+1 -------------------------"); verify_last_block_contains_transfer_tx(); @@ -6934,7 +6988,7 @@ fn continue_after_fast_block_no_sortition() { .expect("Failed to get peer info"); assert_eq!(get_burn_height(), starting_burn_height + btc_blocks_mined); - assert_eq!(peer_info.stacks_tip_height, starting_peer_height + 5); + assert_eq!(peer_info.stacks_tip_height, starting_peer_height + 6); info!("------------------------- Shutdown -------------------------"); rl2_coord_channels @@ -8079,10 +8133,16 @@ fn new_tenure_while_validating_previous_scenario() { #[test] #[ignore] +#[should_panic] /// Test that a miner will extend its tenure after the succeding miner fails to mine a block. /// - Miner 1 wins a tenure and mines normally /// - Miner 2 wins a tenure but fails to mine a block /// - Miner 1 extends its tenure +/// +/// As of today, this test will panic because Miner 1 will not issue a TenureExtend due to Miner +/// 2's preceding block-commit being seemingly-valid. This test verifies that this panic does +/// indeed occur, and will be subsequently modified once the mienr code is updated so that miner 1 +/// can deduce that miner 2 is likely offline. fn tenure_extend_after_failed_miner() { if env::var("BITCOIND_TEST") != Ok("1".into()) { return; @@ -8370,7 +8430,8 @@ fn tenure_extend_after_failed_miner() { info!("------------------------- Miner 1 Extends Tenure A -------------------------"); - // Re-enable block mining + // Re-enable block mining, for both miners. + // Since miner B has been offline, it won't be able to mine. TEST_MINE_STALL.set(false); // wait for a tenure extend block from miner 1 to be processed @@ -8426,38 +8487,6 @@ fn tenure_extend_after_failed_miner() { }) .expect("Timed out waiting for block to be mined and processed"); - // Re-enable block commits for miner 2 - let rl2_commits_before = rl2_commits.load(Ordering::SeqCst); - rl2_skip_commit_op.set(true); - - // Wait for block commit from miner 2 - wait_for(30, || { - Ok(rl2_commits.load(Ordering::SeqCst) > rl2_commits_before) - }) - .expect("Timed out waiting for block commit from miner 2"); - - info!("------------------------- Miner 2 Mines the Next Tenure -------------------------"); - - let stacks_height_before = signer_test - .stacks_client - .get_peer_info() - .expect("Failed to get peer info") - .stacks_tip_height; - - next_block_and( - &mut signer_test.running_nodes.btc_regtest_controller, - 60, - || { - let stacks_height = signer_test - .stacks_client - .get_peer_info() - .expect("Failed to get peer info") - .stacks_tip_height; - Ok(stacks_height > stacks_height_before) - }, - ) - .expect("Timed out waiting for final block to be mined and processed"); - info!("------------------------- Shutdown -------------------------"); rl2_coord_channels .lock() @@ -8655,6 +8684,7 @@ fn tenure_extend_after_bad_commit() { }; info!("------------------------- Pause Miner 1's Block Commit -------------------------"); + // Make sure miner 1 doesn't submit any further block commits for the next tenure BEFORE mining the bitcoin block rl1_skip_commit_op.set(true); @@ -11180,7 +11210,7 @@ fn injected_signatures_are_ignored_across_boundaries() { /// The signers reject block N'. /// The miner proposes block N+1. /// The signers accept block N+1. -// +/// /// Test Assertion: /// Stacks tip advances to N+1 fn rejected_blocks_count_towards_miner_validity() { @@ -11347,6 +11377,482 @@ fn rejected_blocks_count_towards_miner_validity() { signer_test.shutdown(); } +#[test] +#[ignore] +fn fast_sortition() { + if env::var("BITCOIND_TEST") != Ok("1".into()) { + return; + } + + info!("------------------------- Test Setup -------------------------"); + let num_signers = 5; + let sender_sk = Secp256k1PrivateKey::new(); + let sender_addr = tests::to_addr(&sender_sk); + + let mut sender_nonce = 0; + let send_amt = 100; + let send_fee = 400; + let num_transfers = 3; + let recipient = PrincipalData::from(StacksAddress::burn_address(false)); + let mut signer_test: SignerTest = SignerTest::new( + num_signers, + vec![(sender_addr, num_transfers * (send_amt + send_fee))], + ); + + let http_origin = format!("http://{}", &signer_test.running_nodes.conf.node.rpc_bind); + + signer_test.boot_to_epoch_3(); + + info!("------------------------- Mine a Block -------------------------"); + let transfer_tx = make_stacks_transfer( + &sender_sk, + sender_nonce, + send_fee, + signer_test.running_nodes.conf.burnchain.chain_id, + &recipient, + send_amt, + ); + submit_tx(&http_origin, &transfer_tx); + sender_nonce += 1; + + wait_for(60, || { + Ok(get_account(&http_origin, &sender_addr).nonce == sender_nonce) + }) + .expect("Timed out waiting for call tx to be mined"); + + info!("------------------------- Cause a missed sortition -------------------------"); + + signer_test + .running_nodes + .btc_regtest_controller + .build_next_block(1); + next_block_and_process_new_stacks_block( + &mut signer_test.running_nodes.btc_regtest_controller, + 60, + &signer_test.running_nodes.coord_channel, + ) + .expect("Failed to mine a block"); + + info!("------------------------- Mine a Block -------------------------"); + let transfer_tx = make_stacks_transfer( + &sender_sk, + sender_nonce, + send_fee, + signer_test.running_nodes.conf.burnchain.chain_id, + &recipient, + send_amt, + ); + submit_tx(&http_origin, &transfer_tx); + sender_nonce += 1; + + wait_for(60, || { + Ok(get_account(&http_origin, &sender_addr).nonce == sender_nonce) + }) + .expect("Timed out waiting for call tx to be mined"); + + info!("------------------------- Shutdown -------------------------"); + signer_test.shutdown(); +} + +#[test] +#[ignore] +/// This test spins up two nakamoto nodes, both configured to mine. +/// After Nakamoto blocks are mined, it waits for a normal tenure, then issues +/// two bitcoin blocks in quick succession -- the first will contain block commits, +/// and the second "flash block" will contain no block commits. +/// The test checks if the winner of the first block is different than the previous tenure. +/// If so, it performs the actual test: asserting that the miner wakes up and produces valid blocks. +/// This test uses the burn-block-height to ensure consistent calculation of the burn view between +/// the miner thread and the block processor +fn multiple_miners_empty_sortition() { + if env::var("BITCOIND_TEST") != Ok("1".into()) { + return; + } + let num_signers = 5; + let sender_sk = Secp256k1PrivateKey::new(); + let sender_addr = tests::to_addr(&sender_sk); + let send_fee = 180; + + let btc_miner_1_seed = vec![1, 1, 1, 1]; + let btc_miner_2_seed = vec![2, 2, 2, 2]; + let btc_miner_1_pk = Keychain::default(btc_miner_1_seed.clone()).get_pub_key(); + let btc_miner_2_pk = Keychain::default(btc_miner_2_seed.clone()).get_pub_key(); + + let node_1_rpc = gen_random_port(); + let node_1_p2p = gen_random_port(); + let node_2_rpc = gen_random_port(); + let node_2_p2p = gen_random_port(); + + let localhost = "127.0.0.1"; + let node_1_rpc_bind = format!("{localhost}:{node_1_rpc}"); + let node_2_rpc_bind = format!("{localhost}:{node_2_rpc}"); + let mut node_2_listeners = Vec::new(); + + let max_nakamoto_tenures = 30; + // partition the signer set so that ~half are listening and using node 1 for RPC and events, + // and the rest are using node 2 + + let mut signer_test: SignerTest = SignerTest::new_with_config_modifications( + num_signers, + vec![(sender_addr, send_fee * 2 * 60 + 1000)], + |signer_config| { + let node_host = if signer_config.endpoint.port() % 2 == 0 { + &node_1_rpc_bind + } else { + &node_2_rpc_bind + }; + signer_config.node_host = node_host.to_string(); + }, + |config| { + config.node.rpc_bind = format!("{localhost}:{node_1_rpc}"); + config.node.p2p_bind = format!("{localhost}:{node_1_p2p}"); + config.node.data_url = format!("http://{localhost}:{node_1_rpc}"); + config.node.p2p_address = format!("{localhost}:{node_1_p2p}"); + config.miner.wait_on_interim_blocks = Duration::from_secs(5); + config.node.pox_sync_sample_secs = 30; + config.burnchain.pox_reward_length = Some(max_nakamoto_tenures); + + config.node.seed = btc_miner_1_seed.clone(); + config.node.local_peer_seed = btc_miner_1_seed.clone(); + config.burnchain.local_mining_public_key = Some(btc_miner_1_pk.to_hex()); + config.miner.mining_key = Some(Secp256k1PrivateKey::from_seed(&[1])); + + config.events_observers.retain(|listener| { + let Ok(addr) = std::net::SocketAddr::from_str(&listener.endpoint) else { + warn!( + "Cannot parse {} to a socket, assuming it isn't a signer-listener binding", + listener.endpoint + ); + return true; + }; + if addr.port() % 2 == 0 || addr.port() == test_observer::EVENT_OBSERVER_PORT { + return true; + } + node_2_listeners.push(listener.clone()); + false + }) + }, + Some(vec![btc_miner_1_pk, btc_miner_2_pk]), + None, + ); + let conf = signer_test.running_nodes.conf.clone(); + let mut conf_node_2 = conf.clone(); + conf_node_2.node.rpc_bind = format!("{localhost}:{node_2_rpc}"); + conf_node_2.node.p2p_bind = format!("{localhost}:{node_2_p2p}"); + conf_node_2.node.data_url = format!("http://{localhost}:{node_2_rpc}"); + conf_node_2.node.p2p_address = format!("{localhost}:{node_2_p2p}"); + conf_node_2.node.seed = btc_miner_2_seed.clone(); + conf_node_2.burnchain.local_mining_public_key = Some(btc_miner_2_pk.to_hex()); + conf_node_2.node.local_peer_seed = btc_miner_2_seed.clone(); + conf_node_2.miner.mining_key = Some(Secp256k1PrivateKey::from_seed(&[2])); + conf_node_2.node.miner = true; + conf_node_2.events_observers.clear(); + conf_node_2.events_observers.extend(node_2_listeners); + assert!(!conf_node_2.events_observers.is_empty()); + + let node_1_sk = Secp256k1PrivateKey::from_seed(&conf.node.local_peer_seed); + let node_1_pk = StacksPublicKey::from_private(&node_1_sk); + + conf_node_2.node.working_dir = format!("{}-1", conf_node_2.node.working_dir); + + conf_node_2.node.set_bootstrap_nodes( + format!("{}@{}", &node_1_pk.to_hex(), conf.node.p2p_bind), + conf.burnchain.chain_id, + conf.burnchain.peer_version, + ); + + let mut run_loop_2 = boot_nakamoto::BootRunLoop::new(conf_node_2.clone()).unwrap(); + let run_loop_stopper_2 = run_loop_2.get_termination_switch(); + let rl2_coord_channels = run_loop_2.coordinator_channels(); + let Counters { + naka_submitted_commits: rl2_commits, + .. + } = run_loop_2.counters(); + let run_loop_2_thread = thread::Builder::new() + .name("run_loop_2".into()) + .spawn(move || run_loop_2.start(None, 0)) + .unwrap(); + + signer_test.boot_to_epoch_3(); + + wait_for(120, || { + let Some(node_1_info) = get_chain_info_opt(&conf) else { + return Ok(false); + }; + let Some(node_2_info) = get_chain_info_opt(&conf_node_2) else { + return Ok(false); + }; + Ok(node_1_info.stacks_tip_height == node_2_info.stacks_tip_height) + }) + .expect("Timed out waiting for boostrapped node to catch up to the miner"); + + info!("------------------------- Reached Epoch 3.0 -------------------------"); + + let burn_height_contract = " + (define-data-var local-burn-block-ht uint u0) + (define-public (run-update) + (ok (var-set local-burn-block-ht burn-block-height))) + "; + + let contract_tx = make_contract_publish( + &sender_sk, + 0, + 1000, + conf.burnchain.chain_id, + "burn-height-local", + burn_height_contract, + ); + submit_tx(&conf.node.data_url, &contract_tx); + + let rl1_coord_channels = signer_test.running_nodes.coord_channel.clone(); + let rl1_commits = signer_test.running_nodes.commits_submitted.clone(); + + let last_sender_nonce = loop { + // Mine 1 nakamoto tenures + info!("Mining tenure..."); + + signer_test.mine_block_wait_on_processing( + &[&rl1_coord_channels, &rl2_coord_channels], + &[&rl1_commits, &rl2_commits], + Duration::from_secs(30), + ); + + // mine the interim blocks + for _ in 0..2 { + let sender_nonce = get_account(&conf.node.data_url, &sender_addr).nonce; + // check if the burn contract is already produced, if not wait for it to be included in + // an interim block + if sender_nonce >= 1 { + let contract_call_tx = make_contract_call( + &sender_sk, + sender_nonce, + send_fee, + conf.burnchain.chain_id, + &sender_addr, + "burn-height-local", + "run-update", + &[], + ); + submit_tx(&conf.node.data_url, &contract_call_tx); + } + + // make sure the sender's tx gets included (whether it was the contract publish or call) + wait_for(60, || { + let next_sender_nonce = get_account(&conf.node.data_url, &sender_addr).nonce; + Ok(next_sender_nonce > sender_nonce) + }) + .unwrap(); + } + + let last_active_sortition = get_sortition_info(&conf); + assert!(last_active_sortition.was_sortition); + + // lets mine a btc flash block + let rl2_commits_before = rl2_commits.load(Ordering::SeqCst); + let rl1_commits_before = rl1_commits.load(Ordering::SeqCst); + signer_test + .running_nodes + .btc_regtest_controller + .build_next_block(2); + + wait_for(60, || { + Ok(rl2_commits.load(Ordering::SeqCst) > rl2_commits_before + && rl1_commits.load(Ordering::SeqCst) > rl1_commits_before) + }) + .unwrap(); + + let cur_empty_sortition = get_sortition_info(&conf); + assert!(!cur_empty_sortition.was_sortition); + let inactive_sortition = get_sortition_info_ch( + &conf, + cur_empty_sortition.last_sortition_ch.as_ref().unwrap(), + ); + assert!(inactive_sortition.was_sortition); + assert_eq!( + inactive_sortition.burn_block_height, + last_active_sortition.burn_block_height + 1 + ); + + info!("==================== Mined a flash block ===================="); + info!("Flash block sortition info"; + "last_active_winner" => ?last_active_sortition.miner_pk_hash160, + "last_winner" => ?inactive_sortition.miner_pk_hash160, + "last_active_ch" => %last_active_sortition.consensus_hash, + "last_winner_ch" => %inactive_sortition.consensus_hash, + "cur_empty_sortition" => %cur_empty_sortition.consensus_hash, + ); + + if last_active_sortition.miner_pk_hash160 != inactive_sortition.miner_pk_hash160 { + info!( + "==================== Mined a flash block with changed miners ====================" + ); + break get_account(&conf.node.data_url, &sender_addr).nonce; + } + }; + + // after the flash block, make sure we get block processing without a new bitcoin block + // being mined. + + for _ in 0..2 { + let sender_nonce = get_account(&conf.node.data_url, &sender_addr).nonce; + let contract_call_tx = make_contract_call( + &sender_sk, + sender_nonce, + send_fee, + conf.burnchain.chain_id, + &sender_addr, + "burn-height-local", + "run-update", + &[], + ); + submit_tx(&conf.node.data_url, &contract_call_tx); + + wait_for(60, || { + let next_sender_nonce = get_account(&conf.node.data_url, &sender_addr).nonce; + Ok(next_sender_nonce > sender_nonce) + }) + .unwrap(); + } + + assert_eq!( + get_account(&conf.node.data_url, &sender_addr).nonce, + last_sender_nonce + 2, + "The last two transactions after the flash block must be included in a block" + ); + + rl2_coord_channels + .lock() + .expect("Mutex poisoned") + .stop_chains_coordinator(); + run_loop_stopper_2.store(false, Ordering::SeqCst); + run_loop_2_thread.join().unwrap(); + signer_test.shutdown(); +} + +#[test] +#[ignore] +/// This test spins up a single nakamoto node configured to mine. +/// After Nakamoto blocks are mined, it waits for a normal tenure, then issues +/// two bitcoin blocks in quick succession -- the first will contain block commits, +/// and the second "flash block" will contain no block commits. +/// The test then tries to continue producing a normal tenure: issuing a bitcoin block +/// with a sortition in it. +/// The test does 3 rounds of this to make sure that the network continues producing blocks throughout. +fn single_miner_empty_sortition() { + if env::var("BITCOIND_TEST") != Ok("1".into()) { + return; + } + let num_signers = 5; + let sender_sk = Secp256k1PrivateKey::new(); + let sender_addr = tests::to_addr(&sender_sk); + let send_fee = 180; + + // partition the signer set so that ~half are listening and using node 1 for RPC and events, + // and the rest are using node 2 + + let mut signer_test: SignerTest = + SignerTest::new(num_signers, vec![(sender_addr, send_fee * 2 * 60 + 1000)]); + let conf = signer_test.running_nodes.conf.clone(); + + signer_test.boot_to_epoch_3(); + + info!("------------------------- Reached Epoch 3.0 -------------------------"); + + let burn_height_contract = " + (define-data-var local-burn-block-ht uint u0) + (define-public (run-update) + (ok (var-set local-burn-block-ht burn-block-height))) + "; + + let contract_tx = make_contract_publish( + &sender_sk, + 0, + 1000, + conf.burnchain.chain_id, + "burn-height-local", + burn_height_contract, + ); + submit_tx(&conf.node.data_url, &contract_tx); + + let rl1_coord_channels = signer_test.running_nodes.coord_channel.clone(); + let rl1_commits = signer_test.running_nodes.commits_submitted.clone(); + + for _i in 0..3 { + // Mine 1 nakamoto tenures + info!("Mining tenure..."); + + signer_test.mine_block_wait_on_processing( + &[&rl1_coord_channels], + &[&rl1_commits], + Duration::from_secs(30), + ); + + // mine the interim blocks + for _ in 0..2 { + let sender_nonce = get_account(&conf.node.data_url, &sender_addr).nonce; + // check if the burn contract is already produced, if not wait for it to be included in + // an interim block + if sender_nonce >= 1 { + let contract_call_tx = make_contract_call( + &sender_sk, + sender_nonce, + send_fee, + conf.burnchain.chain_id, + &sender_addr, + "burn-height-local", + "run-update", + &[], + ); + submit_tx(&conf.node.data_url, &contract_call_tx); + } + + // make sure the sender's tx gets included (whether it was the contract publish or call) + wait_for(60, || { + let next_sender_nonce = get_account(&conf.node.data_url, &sender_addr).nonce; + Ok(next_sender_nonce > sender_nonce) + }) + .unwrap(); + } + + let last_active_sortition = get_sortition_info(&conf); + assert!(last_active_sortition.was_sortition); + + // lets mine a btc flash block + let rl1_commits_before = rl1_commits.load(Ordering::SeqCst); + signer_test + .running_nodes + .btc_regtest_controller + .build_next_block(2); + + wait_for(60, || { + Ok(rl1_commits.load(Ordering::SeqCst) > rl1_commits_before) + }) + .unwrap(); + + let cur_empty_sortition = get_sortition_info(&conf); + assert!(!cur_empty_sortition.was_sortition); + let inactive_sortition = get_sortition_info_ch( + &conf, + cur_empty_sortition.last_sortition_ch.as_ref().unwrap(), + ); + assert!(inactive_sortition.was_sortition); + assert_eq!( + inactive_sortition.burn_block_height, + last_active_sortition.burn_block_height + 1 + ); + + info!("==================== Mined a flash block ===================="); + info!("Flash block sortition info"; + "last_active_winner" => ?last_active_sortition.miner_pk_hash160, + "last_winner" => ?inactive_sortition.miner_pk_hash160, + "last_active_ch" => %last_active_sortition.consensus_hash, + "last_winner_ch" => %inactive_sortition.consensus_hash, + "cur_empty_sortition" => %cur_empty_sortition.consensus_hash, + ); + } + signer_test.shutdown(); +} + #[test] #[ignore] /// Test that signers mark a miner malicious if it doesn't propose any blocks before the block proposal timeout @@ -11363,7 +11869,7 @@ fn rejected_blocks_count_towards_miner_validity() { /// Miner propose a block N. /// Signers reject the block and mark the miner as malicious. /// -// +/// /// Test Assertion: /// Stacks tip does not advance to block N. fn block_proposal_timeout() {