Skip to content
This repository has been archived by the owner on Aug 22, 2024. It is now read-only.

Commit

Permalink
Merge pull request stacks-network#4345 from stacks-network/hotfix/sta…
Browse files Browse the repository at this point in the history
…ckerdb-stale-view

Hotfix/stackerdb stale view
  • Loading branch information
wileyj authored Feb 6, 2024
2 parents c659fca + e97b244 commit 5aaf813
Show file tree
Hide file tree
Showing 9 changed files with 299 additions and 34 deletions.
32 changes: 17 additions & 15 deletions stackslib/src/chainstate/burn/db/sortdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3883,34 +3883,21 @@ impl SortitionDB {
.unwrap_or(&burnchain.first_block_hash)
.clone();

let rc = burnchain
.block_height_to_reward_cycle(chain_tip.block_height)
.expect("FATAL: block height does not have a reward cycle");

let rc_height = burnchain.reward_cycle_to_block_height(rc);
let rc_consensus_hash = SortitionDB::get_ancestor_snapshot(
conn,
cmp::min(chain_tip.block_height, rc_height),
&chain_tip.sortition_id,
)?
.map(|sn| sn.consensus_hash)
.ok_or(db_error::NotFoundError)?;

test_debug!(
"Chain view: {},{}-{},{},{}",
chain_tip.block_height,
chain_tip.burn_header_hash,
stable_block_height,
&burn_stable_block_hash,
&rc_consensus_hash,
&chain_tip.canonical_stacks_tip_consensus_hash,
);
Ok(BurnchainView {
burn_block_height: chain_tip.block_height,
burn_block_hash: chain_tip.burn_header_hash,
burn_stable_block_height: stable_block_height,
burn_stable_block_hash: burn_stable_block_hash,
last_burn_block_hashes: last_burn_block_hashes,
rc_consensus_hash,
rc_consensus_hash: chain_tip.canonical_stacks_tip_consensus_hash,
})
}
}
Expand Down Expand Up @@ -4099,6 +4086,21 @@ impl SortitionDB {
Ok((consensus_hash, stacks_block_hash))
}

#[cfg(test)]
pub fn set_canonical_stacks_chain_tip(
conn: &Connection,
ch: &ConsensusHash,
bhh: &BlockHeaderHash,
height: u64,
) -> Result<(), db_error> {
let tip = SortitionDB::get_canonical_burn_chain_tip(conn)?;
let args: &[&dyn ToSql] = &[ch, bhh, &u64_to_sql(height)?, &tip.sortition_id];
conn.execute("UPDATE snapshots SET canonical_stacks_tip_consensus_hash = ?1, canonical_stacks_tip_hash = ?2, canonical_stacks_tip_height = ?3
WHERE sortition_id = ?4", args)
.map_err(db_error::SqliteError)?;
Ok(())
}

/// Get the maximum arrival index for any known snapshot.
fn get_max_arrival_index(conn: &Connection) -> Result<u64, db_error> {
match conn
Expand Down
24 changes: 19 additions & 5 deletions stackslib/src/net/api/poststackerdbchunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,20 +116,23 @@ impl HttpRequest for RPCPostStackerDBChunkRequestHandler {
pub enum StackerDBErrorCodes {
DataAlreadyExists,
NoSuchSlot,
BadSigner,
}

impl StackerDBErrorCodes {
pub fn code(&self) -> u32 {
match self {
Self::DataAlreadyExists => 0,
Self::NoSuchSlot => 1,
Self::BadSigner => 2,
}
}

pub fn reason(&self) -> &'static str {
match self {
Self::DataAlreadyExists => "Data for this slot and version already exist",
Self::NoSuchSlot => "No such StackerDB slot",
Self::BadSigner => "Signature does not match slot signer",
}
}

Expand Down Expand Up @@ -183,11 +186,18 @@ impl RPCRequestHandler for RPCPostStackerDBChunkRequestHandler {
&HttpNotFound::new("StackerDB not found".to_string()),
));
}
if let Err(_e) = tx.try_replace_chunk(
if let Err(e) = tx.try_replace_chunk(
&contract_identifier,
&stackerdb_chunk.get_slot_metadata(),
&stackerdb_chunk.data,
) {
test_debug!(
"Failed to replace chunk {}.{} in {}: {:?}",
stackerdb_chunk.slot_id,
stackerdb_chunk.slot_version,
&contract_identifier,
&e
);
let slot_metadata_opt =
match tx.get_slot_metadata(&contract_identifier, stackerdb_chunk.slot_id) {
Ok(slot_opt) => slot_opt,
Expand All @@ -209,11 +219,15 @@ impl RPCRequestHandler for RPCPostStackerDBChunkRequestHandler {

let (reason, slot_metadata_opt) = if let Some(slot_metadata) = slot_metadata_opt
{
let code = if let NetError::BadSlotSigner(..) = e {
StackerDBErrorCodes::BadSigner
} else {
StackerDBErrorCodes::DataAlreadyExists
};

(
serde_json::to_string(
&StackerDBErrorCodes::DataAlreadyExists.into_json(),
)
.unwrap_or("(unable to encode JSON)".to_string()),
serde_json::to_string(&code.into_json())
.unwrap_or("(unable to encode JSON)".to_string()),
Some(slot_metadata),
)
} else {
Expand Down
11 changes: 7 additions & 4 deletions stackslib/src/net/chat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1339,8 +1339,8 @@ impl ConversationP2P {
self.update_from_stacker_db_handshake_data(stackerdb_accept);
} else {
// remote peer's burnchain view has diverged, so assume no longer replicating (we
// can't talk to it anyway). This can happen once per reward cycle for a few
// minutes as nodes begin the next reward cycle, but it's harmless -- at worst, it
// can't talk to it anyway). This can happen once per burnchain block for a few
// seconds as nodes begin processing the next Stacks blocks, but it's harmless -- at worst, it
// just means that no stacker DB replication happens between this peer and
// localhost during this time.
self.clear_stacker_db_handshake_data();
Expand Down Expand Up @@ -1779,13 +1779,16 @@ impl ConversationP2P {
let local_peer = network.get_local_peer();
let burnchain_view = network.get_chain_view();

// remote peer's Stacks chain tip is different from ours, meaning it might have a different
// stackerdb configuration view (and we won't be able to authenticate their chunks, and
// vice versa)
if burnchain_view.rc_consensus_hash != getchunkinv.rc_consensus_hash {
debug!(
"{:?}: NACK StackerDBGetChunkInv; {} != {}",
local_peer, &burnchain_view.rc_consensus_hash, &getchunkinv.rc_consensus_hash
);
return Ok(StacksMessageType::Nack(NackData::new(
NackErrorCodes::InvalidPoxFork,
NackErrorCodes::StaleView,
)));
}

Expand Down Expand Up @@ -1827,7 +1830,7 @@ impl ConversationP2P {
local_peer, &burnchain_view.rc_consensus_hash, &getchunk.rc_consensus_hash
);
return Ok(StacksMessageType::Nack(NackData::new(
NackErrorCodes::InvalidPoxFork,
NackErrorCodes::StaleView,
)));
}

Expand Down
11 changes: 7 additions & 4 deletions stackslib/src/net/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -975,6 +975,7 @@ pub mod NackErrorCodes {
pub const InvalidMessage: u32 = 5;
pub const NoSuchDB: u32 = 6;
pub const StaleVersion: u32 = 7;
pub const StaleView: u32 = 8;
}

#[derive(Debug, Clone, PartialEq)]
Expand All @@ -997,7 +998,9 @@ pub struct NatPunchData {
/// Inform the remote peer of (a page of) the list of stacker DB contracts this node supports
#[derive(Debug, Clone, PartialEq)]
pub struct StackerDBHandshakeData {
/// current reward cycle ID
/// current reward cycle consensus hash (i.e. the consensus hash of the Stacks tip in the
/// current reward cycle, which commits to both the Stacks block tip and the underlying PoX
/// history).
pub rc_consensus_hash: ConsensusHash,
/// list of smart contracts that we index.
/// there can be as many as 256 entries.
Expand All @@ -1009,7 +1012,7 @@ pub struct StackerDBHandshakeData {
pub struct StackerDBGetChunkInvData {
/// smart contract being used to determine chunk quantity and order
pub contract_id: QualifiedContractIdentifier,
/// consensus hash of the sortition that started this reward cycle
/// consensus hash of the Stacks chain tip in this reward cycle
pub rc_consensus_hash: ConsensusHash,
}

Expand All @@ -1028,7 +1031,7 @@ pub struct StackerDBChunkInvData {
pub struct StackerDBGetChunkData {
/// smart contract being used to determine slot quantity and order
pub contract_id: QualifiedContractIdentifier,
/// consensus hash of the sortition that started this reward cycle
/// consensus hash of the Stacks chain tip in this reward cycle
pub rc_consensus_hash: ConsensusHash,
/// slot ID
pub slot_id: u32,
Expand All @@ -1041,7 +1044,7 @@ pub struct StackerDBGetChunkData {
pub struct StackerDBPushChunkData {
/// smart contract being used to determine chunk quantity and order
pub contract_id: QualifiedContractIdentifier,
/// consensus hash of the sortition that started this reward cycle
/// consensus hash of the Stacks chain tip in this reward cycle
pub rc_consensus_hash: ConsensusHash,
/// the pushed chunk
pub chunk_data: StackerDBChunkData,
Expand Down
19 changes: 17 additions & 2 deletions stackslib/src/net/p2p.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5224,7 +5224,12 @@ impl PeerNetwork {
// update burnchain snapshot if we need to (careful -- it's expensive)
let sn = SortitionDB::get_canonical_burn_chain_tip(&sortdb.conn())?;
let mut ret: HashMap<NeighborKey, Vec<StacksMessage>> = HashMap::new();
if sn.block_height != self.chain_view.burn_block_height {
let mut need_stackerdb_refresh = sn.canonical_stacks_tip_consensus_hash
!= self.burnchain_tip.canonical_stacks_tip_consensus_hash;

if sn.block_height != self.chain_view.burn_block_height
|| self.num_state_machine_passes == 0
{
debug!(
"{:?}: load chain view for burn block {}",
&self.local_peer, sn.block_height
Expand Down Expand Up @@ -5303,7 +5308,17 @@ impl PeerNetwork {
.get_last_selected_anchor_block_txid()?
.unwrap_or(Txid([0x00; 32]));

// refresh stackerdb configs
test_debug!(
"{:?}: chain view is {:?}",
&self.get_local_peer(),
&self.chain_view
);
need_stackerdb_refresh = true;
}

if need_stackerdb_refresh {
// refresh stackerdb configs -- canonical stacks tip has changed
debug!("{:?}: Refresh all stackerdbs", &self.get_local_peer());
let mut new_stackerdb_configs = HashMap::new();
let stacker_db_configs = mem::replace(&mut self.stacker_db_configs, HashMap::new());
for (stackerdb_contract_id, stackerdb_config) in stacker_db_configs.into_iter() {
Expand Down
5 changes: 5 additions & 0 deletions stackslib/src/net/stackerdb/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ pub struct StackerDBSyncResult {
dead: HashSet<NeighborKey>,
/// neighbors that misbehaved while syncing
broken: HashSet<NeighborKey>,
/// neighbors that have stale views, but are otherwise online
pub(crate) stale: HashSet<NeighborAddress>,
}

/// Settings for the Stacker DB
Expand Down Expand Up @@ -262,6 +264,8 @@ pub struct StackerDBSync<NC: NeighborComms> {
/// whether or not we should immediately re-fetch chunks because we learned about new chunks
/// from our peers when they replied to our chunk-pushes with new inventory state
need_resync: bool,
/// Track stale neighbors
pub(crate) stale_neighbors: HashSet<NeighborAddress>,
}

impl StackerDBSyncResult {
Expand All @@ -274,6 +278,7 @@ impl StackerDBSyncResult {
chunks_to_store: vec![chunk.chunk_data],
dead: HashSet::new(),
broken: HashSet::new(),
stale: HashSet::new(),
}
}
}
Expand Down
25 changes: 22 additions & 3 deletions stackslib/src/net/stackerdb/sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ use crate::net::stackerdb::{
StackerDBConfig, StackerDBSync, StackerDBSyncResult, StackerDBSyncState, StackerDBs,
};
use crate::net::{
Error as net_error, NackData, Neighbor, NeighborAddress, NeighborKey, StackerDBChunkData,
StackerDBChunkInvData, StackerDBGetChunkData, StackerDBGetChunkInvData, StackerDBPushChunkData,
StacksMessageType,
Error as net_error, NackData, NackErrorCodes, Neighbor, NeighborAddress, NeighborKey,
StackerDBChunkData, StackerDBChunkInvData, StackerDBGetChunkData, StackerDBGetChunkInvData,
StackerDBPushChunkData, StacksMessageType,
};

const MAX_CHUNKS_IN_FLIGHT: usize = 6;
Expand Down Expand Up @@ -72,6 +72,7 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
total_pushed: 0,
last_run_ts: 0,
need_resync: false,
stale_neighbors: HashSet::new(),
};
dbsync.reset(None, config);
dbsync
Expand Down Expand Up @@ -178,6 +179,7 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
chunks_to_store: chunks,
dead: self.comms.take_dead_neighbors(),
broken: self.comms.take_broken_neighbors(),
stale: std::mem::replace(&mut self.stale_neighbors, HashSet::new()),
};

// keep all connected replicas, and replenish from config hints and the DB as needed
Expand Down Expand Up @@ -677,6 +679,7 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
&network.get_chain_view().rc_consensus_hash,
&db_data.rc_consensus_hash
);
self.connected_replicas.remove(&naddr);
continue;
}
db_data
Expand All @@ -688,6 +691,10 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
&naddr,
data.error_code
);
self.connected_replicas.remove(&naddr);
if data.error_code == NackErrorCodes::StaleView {
self.stale_neighbors.insert(naddr);
}
continue;
}
x => {
Expand Down Expand Up @@ -800,10 +807,15 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
&naddr,
data.error_code
);
self.connected_replicas.remove(&naddr);
if data.error_code == NackErrorCodes::StaleView {
self.stale_neighbors.insert(naddr);
}
continue;
}
x => {
info!("Received unexpected message {:?}", &x);
self.connected_replicas.remove(&naddr);
continue;
}
};
Expand Down Expand Up @@ -929,10 +941,14 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
data.error_code
);
self.connected_replicas.remove(&naddr);
if data.error_code == NackErrorCodes::StaleView {
self.stale_neighbors.insert(naddr);
}
continue;
}
x => {
info!("Received unexpected message {:?}", &x);
self.connected_replicas.remove(&naddr);
continue;
}
};
Expand Down Expand Up @@ -1072,6 +1088,9 @@ impl<NC: NeighborComms> StackerDBSync<NC> {
data.error_code
);
self.connected_replicas.remove(&naddr);
if data.error_code == NackErrorCodes::StaleView {
self.stale_neighbors.insert(naddr);
}
continue;
}
x => {
Expand Down
Loading

0 comments on commit 5aaf813

Please sign in to comment.