From a19e019f8f6924bfbc1f0e9d3b0ef7d12b4713d3 Mon Sep 17 00:00:00 2001 From: David Estes Date: Tue, 18 Jun 2024 16:01:45 -0600 Subject: [PATCH 01/15] fix: correct/better IOD - is now correct and handles long streams out of order successfully in all cases (afaict). Should be more performant as it only does work when it should be necessary. - includes test of long streams --- service/src/event/migration.rs | 6 +- service/src/event/mod.rs | 1 + service/src/event/order_events.rs | 100 +++ service/src/event/ordering_task.rs | 1008 +++++++++++++--------------- service/src/event/service.rs | 430 +++++------- service/src/tests/mod.rs | 87 +-- service/src/tests/ordering.rs | 151 ++++- store/Cargo.toml | 6 +- store/benches/sqlite_store.rs | 2 +- store/src/sql/access/event.rs | 39 +- store/src/sql/entities/event.rs | 64 +- store/src/sql/query.rs | 11 +- store/src/sql/test.rs | 3 +- 13 files changed, 1005 insertions(+), 903 deletions(-) create mode 100644 service/src/event/order_events.rs diff --git a/service/src/event/migration.rs b/service/src/event/migration.rs index 0172ef9bd..4b85342ef 100644 --- a/service/src/event/migration.rs +++ b/service/src/event/migration.rs @@ -327,11 +327,7 @@ impl EventBuilder { .build(); Ok(EventInsertable::try_new( event_id, - EventInsertableBody { - cid: self.event_cid, - deliverable: false, - blocks: self.blocks, - }, + EventInsertableBody::new(self.event_cid, self.blocks, false), )?) } } diff --git a/service/src/event/mod.rs b/service/src/event/mod.rs index fdf734ee7..5fb6b01b9 100644 --- a/service/src/event/mod.rs +++ b/service/src/event/mod.rs @@ -1,4 +1,5 @@ mod migration; +mod order_events; mod ordering_task; mod service; mod store; diff --git a/service/src/event/order_events.rs b/service/src/event/order_events.rs new file mode 100644 index 000000000..88111ea0f --- /dev/null +++ b/service/src/event/order_events.rs @@ -0,0 +1,100 @@ +use std::collections::HashSet; + +use ceramic_store::{CeramicOneEvent, EventInsertable, SqlitePool}; +use cid::Cid; + +use crate::Result; + +use super::service::EventHeader; + +pub(crate) struct OrderEvents { + pub(crate) deliverable: Vec<(EventInsertable, EventHeader)>, + pub(crate) missing_history: Vec<(EventInsertable, EventHeader)>, +} + +impl OrderEvents { + /// Groups the events into lists of those with a delivered prev and those without. This can be used to return an error if the event is required to have history. + /// The events will be marked as deliverable so that they can be passed directly to the store to be persisted. + /// Will look up the prev from the database if needed to check if it's deliverable (could possibly change this for recon and allow the ordering task to handle it?) + pub async fn try_new( + pool: &SqlitePool, + mut candidate_events: Vec<(EventInsertable, EventHeader)>, + ) -> Result { + let new_cids: HashSet = + HashSet::from_iter(candidate_events.iter().map(|(e, _)| e.cid())); + let mut deliverable = Vec::with_capacity(candidate_events.len()); + candidate_events.retain(|(e, h)| { + if e.deliverable() { + deliverable.push((e.clone(), h.clone())); + false + } else { + true + } + }); + if candidate_events.is_empty() { + return Ok(OrderEvents { + deliverable, + missing_history: Vec::new(), + }); + } + + let mut prevs_in_memory = Vec::with_capacity(candidate_events.len()); + let mut missing_history = Vec::with_capacity(candidate_events.len()); + + while let Some((mut event, header)) = candidate_events.pop() { + match header.prev() { + None => { + unreachable!("Init events should have been filtered out since they're always deliverable"); + } + Some(prev) => { + if new_cids.contains(&prev) { + prevs_in_memory.push((event, header)); + continue; + } else { + let (_exists, prev_deliverable) = + CeramicOneEvent::deliverable_by_cid(pool, &prev).await?; + if prev_deliverable { + event.body.set_deliverable(true); + deliverable.push((event, header)); + } else { + // technically, we may have the "rosetta stone" event in memory that could unlock this chain, if we loaded everything and recursed, + // but the immediate prev is not in this set and has not been delivered to the client yet, so they shouldn't have known how to + // construct this event so we'll consider this missing history. This can be used to return an error if the event is required to have history. + missing_history.push((event, header)); + } + } + } + } + } + + // We add the events to the deliverable list until nothing changes + while let Some((mut event, header)) = prevs_in_memory.pop() { + let mut made_changes = false; + match header.prev() { + None => { + unreachable!("Init events should have been filtered out of the in memory set"); + } + Some(prev) => { + // a hashset would be better loopkup but we're not going to have that many events so hashing + // for a handful of lookups and then convert back to a vec probably isn't worth it. + if deliverable.iter().any(|(e, _)| e.cid() == prev) { + event.body.set_deliverable(true); + deliverable.push((event, header)); + made_changes = true; + } else { + prevs_in_memory.push((event, header)); + } + } + } + if !made_changes { + missing_history.extend(prevs_in_memory); + break; + } + } + + Ok(OrderEvents { + deliverable, + missing_history, + }) + } +} diff --git a/service/src/event/ordering_task.rs b/service/src/event/ordering_task.rs index 9956e44f9..d3ce40f72 100644 --- a/service/src/event/ordering_task.rs +++ b/service/src/event/ordering_task.rs @@ -1,73 +1,33 @@ -use std::collections::{HashMap, HashSet, VecDeque}; +use std::collections::{HashMap, VecDeque}; -use anyhow::anyhow; +use ceramic_core::EventId; use ceramic_store::{CeramicOneEvent, SqlitePool}; use cid::Cid; use tracing::{debug, error, info, trace, warn}; use crate::{CeramicEventService, Error, Result}; +use super::service::{EventHeader, InsertableBodyWithHeader}; + /// How many events to select at once to see if they've become deliverable when we have downtime /// Used at startup and occassionally in case we ever dropped something /// We keep the number small for now as we may need to traverse many prevs for each one of these and load them into memory. -const DELIVERABLE_EVENTS_BATCH_SIZE: usize = 1000; +const DELIVERABLE_EVENTS_BATCH_SIZE: u32 = 1000; + /// How many batches of undelivered events are we willing to process on start up? /// To avoid an infinite loop. It's going to take a long time to process `DELIVERABLE_EVENTS_BATCH_SIZE * MAX_ITERATIONS` events const MAX_ITERATIONS: usize = 100_000_000; -/// How often should we try to process all undelivered events in case we missed something -const CHECK_ALL_INTERVAL_SECONDS: u64 = 60 * 10; // 10 minutes - -type InitCid = cid::Cid; -type PrevCid = cid::Cid; -type EventCid = cid::Cid; - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct DeliveredEvent { - pub(crate) cid: Cid, - pub(crate) init_cid: InitCid, -} - -impl DeliveredEvent { - pub fn new(cid: Cid, init_cid: InitCid) -> Self { - Self { cid, init_cid } - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub(crate) struct DeliverableMetadata { - pub(crate) init_cid: InitCid, - pub(crate) prev: PrevCid, -} - -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct DeliverableEvent { - pub(crate) cid: EventCid, - pub(crate) meta: DeliverableMetadata, - attempts: usize, - last_attempt: std::time::Instant, - started: std::time::Instant, - expires: Option, -} - -impl DeliverableEvent { - pub fn new(cid: Cid, meta: DeliverableMetadata, expires: Option) -> Self { - Self { - cid, - meta, - attempts: 0, - last_attempt: std::time::Instant::now(), - started: std::time::Instant::now(), - expires, - } - } -} +type StreamCid = Cid; +type EventCid = Cid; +type PrevCid = Cid; #[derive(Debug)] pub struct DeliverableTask { pub(crate) _handle: tokio::task::JoinHandle<()>, - pub(crate) tx: tokio::sync::mpsc::Sender, - pub(crate) tx_new: tokio::sync::mpsc::Sender, + /// Currently events discovered over recon that are out of order and need to be marked ready (deliverable) + /// when their prev chain is discovered and complete (i.e. my prev is deliverable then I am deliverable). + pub(crate) tx_inserted: tokio::sync::mpsc::Sender, } #[derive(Debug)] @@ -75,30 +35,33 @@ pub struct OrderingTask {} impl OrderingTask { pub async fn run(pool: SqlitePool, q_depth: usize, load_delivered: bool) -> DeliverableTask { - let (tx, rx) = tokio::sync::mpsc::channel::(q_depth); - let (tx_new, rx_new) = tokio::sync::mpsc::channel::(q_depth); + let (tx_inserted, rx_inserted) = + tokio::sync::mpsc::channel::(q_depth); let handle = - tokio::spawn(async move { Self::run_loop(pool, load_delivered, rx, rx_new).await }); + tokio::spawn(async move { Self::run_loop(pool, load_delivered, rx_inserted).await }); DeliverableTask { _handle: handle, - tx, - tx_new, + tx_inserted, } } async fn run_loop( pool: SqlitePool, load_undelivered: bool, - mut rx: tokio::sync::mpsc::Receiver, - mut rx_new: tokio::sync::mpsc::Receiver, + mut rx_inserted: tokio::sync::mpsc::Receiver, ) { // before starting, make sure we've updated any events in the database we missed + // this could take a long time. possibly we want to put it in another task so we can start processing events immediately let mut state = OrderingState::new(); if load_undelivered && state - .process_all_undelivered_events(&pool, MAX_ITERATIONS) + .process_all_undelivered_events( + &pool, + MAX_ITERATIONS, + DELIVERABLE_EVENTS_BATCH_SIZE, + ) .await .map_err(Self::log_error) .is_err() @@ -106,53 +69,37 @@ impl OrderingTask { return; } - let mut last_processed = std::time::Instant::now(); loop { - let mut modified: Option> = None; - let mut need_prev_buf = Vec::with_capacity(100); - let mut newly_added_buf = Vec::with_capacity(100); - - tokio::select! { - incoming = rx.recv_many(&mut need_prev_buf, 100) => { - if incoming > 0 { - modified = Some(state.add_incoming_batch(need_prev_buf)); - } - } - new = rx_new.recv_many(&mut newly_added_buf, 100) => { - if new > 0 { - modified = Some(newly_added_buf.into_iter().map(|ev| ev.init_cid).collect::>()); - } - } - else => { - info!(stream_count=%state.pending_by_stream.len(), "Server dropped the ordering task. Processing once more before exiting..."); - let _ = state - .process_events(&pool, None) - .await - .map_err(Self::log_error); + let mut recon_events = Vec::with_capacity(100); + // consider trying to recv in a loop until X or 10ms whatever comes first and then process + // the more events we get in memory, the fewer queries we need to run. + if rx_inserted.recv_many(&mut recon_events, 100).await > 0 { + trace!(?recon_events, "new events discovered!"); + state.add_inserted_events(recon_events); + + if state + .process_streams(&pool) + .await + .map_err(Self::log_error) + .is_err() + { return; } - }; - // Given the math on OrderingState and the generally low number of updates to streams, we are going - // to ignore pruning until there's more of an indication that it's necessary. Just log some stats. - if last_processed.elapsed().as_secs() > CHECK_ALL_INTERVAL_SECONDS { - let stream_count = state.pending_by_stream.len(); - if stream_count > 1000 { - info!(%stream_count, "Over 1000 pending streams without recent updates."); - } else { - debug!(%stream_count, "Fewer than 1000 streams pending without recent updates."); - } - } + } else if rx_inserted.is_closed() { + debug!( + "Server dropped the delivered events channel. Attempting to processing streams in memory once more before exiting." + ); - if modified.is_some() - && state - .process_events(&pool, modified) + if state + .process_streams(&pool) .await .map_err(Self::log_error) .is_err() - { - return; + { + return; + } + break; } - last_processed = std::time::Instant::now(); } } @@ -175,188 +122,310 @@ impl OrderingTask { } } -#[derive(Debug)] -/// Rough size estimate: -/// pending_by_stream: 96 * stream_cnt + 540 * event_cnt -/// ready_events: 96 * ready_event_cnt -/// so for stream_cnt = 1000, event_cnt = 2, ready_event_cnt = 1000 -/// we get about 1 MB of memory used. -pub struct OrderingState { - /// Map of undelivered events by init CID (i.e. the stream CID). - pending_by_stream: HashMap, - /// Queue of events that can be marked ready to deliver. - /// Can be added as long as their prev is stored or in this list ahead of them. - ready_events: VecDeque, +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +enum StreamEvent { + InitEvent(EventCid), + /// An event that is known to be deliverable from the database + KnownDeliverable(StreamEventMetadata), + /// An event that needs more history to be deliverable + Undelivered(StreamEventMetadata), +} + +impl StreamEvent { + /// Builds a stream event from the database if it exists. + async fn load_by_cid(pool: &SqlitePool, cid: EventCid) -> Result> { + // TODO: one query + let (exists, deliverable) = CeramicOneEvent::deliverable_by_cid(pool, &cid).await?; + if exists { + let parsed_body = + if let Some(body) = CeramicEventService::load_by_cid(pool, cid).await? { + body + } else { + warn!(%cid, "No event body found for event that should exist"); + return Ok(None); + }; + + let known_prev = match &parsed_body.header { + EventHeader::Init { cid, .. } => { + if !deliverable { + warn!(%cid,"Found init event in database that wasn't previously marked as deliverable. Updating now..."); + let mut tx = pool.begin_tx().await?; + CeramicOneEvent::mark_ready_to_deliver(&mut tx, cid).await?; + tx.commit().await?; + } + StreamEvent::InitEvent(*cid) + } + EventHeader::Data { prev, .. } | EventHeader::Time { prev, .. } => { + if deliverable { + trace!(%cid, "Found deliverable event in database"); + StreamEvent::KnownDeliverable(StreamEventMetadata::new(cid, *prev)) + } else { + trace!(%cid, "Found undelivered event in database"); + StreamEvent::Undelivered(StreamEventMetadata::new(cid, *prev)) + } + } + }; + Ok(Some(known_prev)) + } else { + trace!(%cid, "Missing event in database"); + Ok(None) + } + } +} + +impl From for StreamEvent { + fn from(ev: InsertableBodyWithHeader) -> Self { + match ev.header { + EventHeader::Init { cid, .. } => StreamEvent::InitEvent(cid), + EventHeader::Data { cid, prev, .. } | EventHeader::Time { cid, prev, .. } => { + let meta = StreamEventMetadata::new(cid, prev); + if ev.body.deliverable() { + StreamEvent::KnownDeliverable(meta) + } else { + StreamEvent::Undelivered(meta) + } + } + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +struct StreamEventMetadata { + cid: EventCid, + prev: PrevCid, +} + +impl StreamEventMetadata { + fn new(cid: EventCid, prev: PrevCid) -> Self { + Self { cid, prev } + } } #[derive(Debug, Clone, Default)] /// ~540 bytes per event in this struct pub(crate) struct StreamEvents { + /// Map of `event.prev` to `event.cid` to find the previous event easily. prev_map: HashMap, - cid_map: HashMap, + /// Map of `event.cid` to `metadata` for quick lookup of the event metadata. + cid_map: HashMap, + /// whether we should process this stream because new events have been added + skip_processing: bool, + /// The newly discovered events that are deliverable and should be processed. + new_deliverable: VecDeque, } -impl FromIterator for StreamEvents { - fn from_iter>(iter: T) -> Self { - let mut stream = Self::new(); - for item in iter { - stream.add_event(item); - } - stream +impl StreamEvents { + fn new(event: StreamEvent) -> Self { + let mut new = Self::default(); + new.add_event(event); + new } -} -impl StreamEvents { - pub fn new() -> Self { - Self::default() + // we'll be processed if something in memory depends on this event + fn update_should_process_for_new_delivered(&mut self, new_cid: &EventCid) { + // don't reset a true flag to false + if self.skip_processing { + self.skip_processing = !self.prev_map.contains_key(new_cid); + } } - /// returns Some(Stream Init CID) if this is a new event, else None. - pub fn add_event(&mut self, event: DeliverableEvent) -> Option { - let res = if self.prev_map.insert(event.meta.prev, event.cid).is_none() { - Some(event.meta.init_cid) - } else { - None + /// returns true if this is a new event. + fn add_event(&mut self, event: StreamEvent) -> bool { + let cid = match &event { + StreamEvent::InitEvent(cid) => { + self.update_should_process_for_new_delivered(cid); + *cid + } + StreamEvent::KnownDeliverable(meta) => { + self.prev_map.insert(meta.prev, meta.cid); + self.update_should_process_for_new_delivered(&meta.cid); + meta.cid + } + StreamEvent::Undelivered(meta) => { + self.prev_map.insert(meta.prev, meta.cid); + if self.skip_processing { + // we depend on something in memory + self.skip_processing = !self.prev_map.contains_key(&meta.prev) + } + meta.cid + } }; - self.cid_map.insert(event.cid, event); - res - } - pub fn is_empty(&self) -> bool { - // these should always match - self.prev_map.is_empty() && self.cid_map.is_empty() + self.cid_map.insert(cid, event).is_none() } - fn remove_by_event_cid(&mut self, cid: &Cid) -> Option { - if let Some(cid) = self.cid_map.remove(cid) { - self.prev_map.remove(&cid.meta.prev); + fn remove_by_prev_cid(&mut self, prev: &Cid) -> Option { + if let Some(cid) = self.prev_map.remove(prev) { + self.cid_map.remove(&cid); Some(cid) } else { None } } - fn remove_by_prev_cid(&mut self, cid: &Cid) -> Option { - if let Some(cid) = self.prev_map.remove(cid) { - self.cid_map.remove(&cid); - Some(cid) - } else { - None + /// Called when we've persisted the deliverable events to the database and can clean up our state. + /// Returns true if we should be retained for future processing (i.e we have more we need to discover) + /// and false if we can be dropped from memory. + fn completed_processing(&mut self) -> bool { + self.skip_processing = true; + + for cid in self.new_deliverable.iter() { + if let Some(ev) = self.cid_map.get_mut(cid) { + match ev { + StreamEvent::InitEvent(_) => {} + StreamEvent::KnownDeliverable(_) => { + warn!( + ?ev, + "Found event in deliverable queue that was already marked as deliverable." + ) + } + StreamEvent::Undelivered(meta) => { + // we're delivered now + *ev = StreamEvent::KnownDeliverable(meta.clone()); + } + } + } } + self.new_deliverable.clear(); + self.cid_map + .iter() + .any(|(_, ev)| matches!(ev, StreamEvent::Undelivered(_))) + } + + async fn order_events(&mut self, pool: &SqlitePool) -> Result<()> { + // We collect everything we can into memory and then order things. + // If our prev is the init event or already been delivered, we can mark ourselves as deliverable. + // If our prev wasn't deliverable yet, we track it and repeat (i.e. follow its prev if we don't have it) + + let mut deliverable_queue = VecDeque::new(); + let mut undelivered = + VecDeque::from_iter(self.cid_map.iter().filter_map(|(cid, ev)| match ev { + StreamEvent::Undelivered(meta) => { + assert_eq!(meta.cid, *cid); + Some((meta.cid, meta.prev)) + } + _ => None, + })); + + while let Some((cid, prev)) = undelivered.pop_front() { + if let Some(prev_event) = self.cid_map.get(&prev) { + match prev_event { + StreamEvent::InitEvent(_) | StreamEvent::KnownDeliverable(_) => { + trace!( + %prev, + %cid, + "Found event whose prev is already in memory and IS deliverable!" + ); + deliverable_queue.push_back(cid) + } + StreamEvent::Undelivered(_) => { + trace!( + %prev, + %cid, + "Found event whose prev is already in memory but NOT deliverable." + ); + // nothing to do until it arrives on the channel + } + } + + continue; + } + + let prev_event = StreamEvent::load_by_cid(pool, prev).await?; + if let Some(known_prev) = prev_event { + match &known_prev { + StreamEvent::InitEvent(_) | StreamEvent::KnownDeliverable(_) => { + deliverable_queue.push_back(cid); + } + StreamEvent::Undelivered(undelivered_ev) => { + // we'll try to follow this back to something deliverable + undelivered.push_back((undelivered_ev.cid, undelivered_ev.prev)); + } + } + self.add_event(known_prev); + } else { + trace!("Found event that depends on another event we haven't discovered yet"); + } + } + + let mut newly_ready = deliverable_queue.clone(); + while let Some(cid) = newly_ready.pop_front() { + if let Some(now_ready_ev) = self.remove_by_prev_cid(&cid) { + if let Some(ev) = self.cid_map.get(&now_ready_ev) { + match ev { + StreamEvent::InitEvent(_) | StreamEvent::KnownDeliverable(_) => { + warn!(?ev, "should not have found a deliverable event when we expected only undelivered events!"); + } + StreamEvent::Undelivered(_) => { + newly_ready.push_back(now_ready_ev); + } + } + } + deliverable_queue.push_back(now_ready_ev); + newly_ready.push_back(now_ready_ev); + } + } + self.new_deliverable = deliverable_queue; + debug!(count=%self.new_deliverable.len(), "deliverable events discovered"); + Ok(()) } } +#[derive(Debug)] +pub struct OrderingState { + pending_by_stream: HashMap, + deliverable: VecDeque, +} + impl OrderingState { - pub fn new() -> Self { + fn new() -> Self { Self { pending_by_stream: HashMap::new(), - ready_events: VecDeque::new(), + deliverable: VecDeque::new(), } } - /// This will review all the events for any streams known to have undelivered events and see if any of them are now deliverable. - /// If `streams_to_process` is None, all streams will be processed, otherwise only the streams in the set will be processed. - /// Processing all streams could take a long time and not necessarily do anything productive (if we're missing a key event, we're still blocked). - /// However, passing a value for `streams_to_process` when we know something has changed is likely to have positive results and be much faster. - pub(crate) async fn process_events( - &mut self, - pool: &SqlitePool, - streams_to_process: Option>, - ) -> Result<()> { - self.persist_ready_events(pool).await?; - for (cid, stream_events) in self.pending_by_stream.iter_mut() { - if streams_to_process - .as_ref() - .map_or(false, |to_do| !to_do.contains(cid)) - { - continue; - } - let deliverable = Self::discover_deliverable_events(pool, stream_events).await?; - if !deliverable.is_empty() { - self.ready_events.extend(deliverable) - } - } - if !self.ready_events.is_empty() { - self.persist_ready_events(pool).await?; + /// Add a stream to the list of streams to process. + /// We ignore delivered events for streams we're not tracking as we can look them up later if we need them. + /// We will get lots of init events we can ignore unless we need them, otherwise they'll be stuck in memory for a long time. + fn add_inserted_events(&mut self, events: Vec) { + for ev in events { + let stream_cid = ev.header.stream_cid(); + let event = ev.into(); + self.add_stream_event(stream_cid, event); } - - Ok(()) } - /// Removes deliverable events from the `prev_map` and returns them. This means prev is already delivered or in the - /// list to be marked as delivered. The input is expected to be a list of CIDs for a given stream that are waiting - /// to be processed. It will still work if it's intermixed for multiple streams, but it's not the most efficient way to use it. - /// The returned CIDs in the VeqDeque are for events that are expected to be updated FIFO i.e. vec.pop_front() - /// - /// This breaks with multi-prev as we expect a single prev for each event. The input map is expected to contain the - /// (prev <- event) relationship (that is, the value is the event that depends on the key). - pub(crate) async fn discover_deliverable_events( - pool: &SqlitePool, - stream_map: &mut StreamEvents, - ) -> Result> { - if stream_map.is_empty() { - return Ok(VecDeque::new()); + fn add_stream_event(&mut self, stream_cid: StreamCid, event: StreamEvent) { + if let Some(stream) = self.pending_by_stream.get_mut(&stream_cid) { + stream.add_event(event); + } else if matches!(event, StreamEvent::Undelivered(_)) { + let stream = StreamEvents::new(event); + self.pending_by_stream.insert(stream_cid, stream); } + } - let mut deliverable = VecDeque::new(); - let prev_map_cln = stream_map.prev_map.clone(); - for (prev, ev_cid) in prev_map_cln { - if stream_map.cid_map.contains_key(&prev) { - trace!( - ?prev, - cid=?ev_cid, - "Found event that depends on another event in memory" - ); - // we have it in memory so we need to order it related to others to insert correctly - // although it may not be possible if the chain just goes back to some unknown event - // once we find the first event that's deliverable, we can go back through and find the rest + /// Process every stream we know about that has undelivered events that should be "unlocked" now. This could be adjusted to commit things in batches, + /// but for now it assumes it can process all the streams and events in one go. It should be idempotent, so if it fails, it can be retried. + async fn process_streams(&mut self, pool: &SqlitePool) -> Result<()> { + for (_stream_cid, stream_events) in self.pending_by_stream.iter_mut() { + if stream_events.skip_processing { continue; - } else { - let (exists, delivered) = CeramicOneEvent::deliverable_by_cid(pool, &prev).await?; - if delivered { - trace!(deliverable=?ev_cid, "Found delivered prev in database. Adding to ready list"); - deliverable.push_back(ev_cid); - stream_map.remove_by_event_cid(&ev_cid); - } else if exists { - trace!("Found undelivered prev in database. Building data to check for deliverable."); - // if it's not in memory, we need to read it from the db and parse it for the prev value to add it to our set - let data = CeramicOneEvent::value_by_cid(pool, &prev) - .await? - .ok_or_else(|| { - Error::new_app(anyhow!( - "Missing data for event that exists should be impossible" - )) - })?; - let (insertable_body, maybe_prev) = - CeramicEventService::parse_event_carfile(prev, &data).await?; - - if let Some(prev) = maybe_prev { - let event = DeliverableEvent::new(insertable_body.cid, prev, None); - trace!(cid=%event.cid, "Adding event discovered in database to stream pending list"); - stream_map.add_event(event); - } else { - warn!(event_cid=%insertable_body.cid,"Found undelivered event with no prev while processing pending. Should not happen."); - deliverable.push_back(insertable_body.cid); - stream_map.remove_by_event_cid(&ev_cid); - } - } else { - trace!( - ?ev_cid, - "Found event that depends on unknown event. Will check later." - ); - } } + stream_events.order_events(pool).await?; + self.deliverable + .extend(stream_events.new_deliverable.iter()); } - let mut newly_ready = deliverable.clone(); - while let Some(cid) = newly_ready.pop_front() { - if let Some(now_ready_ev) = stream_map.remove_by_prev_cid(&cid) { - deliverable.push_back(now_ready_ev); - newly_ready.push_back(now_ready_ev); - } - } - debug!(?deliverable, "deliverable events discovered"); - Ok(deliverable) + self.persist_ready_events(pool).await?; + // keep things that still have missing history but don't process them again until we get something new + self.pending_by_stream + .retain(|_, stream_events| stream_events.completed_processing()); + + debug!(remaining_streams=%self.pending_by_stream.len(), "Finished processing streams"); + trace!(stream_state=?self, "Finished processing streams"); + + Ok(()) } /// Process all undelivered events in the database. This is a blocking operation that could take a long time. @@ -365,112 +434,96 @@ impl OrderingState { &mut self, pool: &SqlitePool, max_iterations: usize, - ) -> Result<()> { - let mut cnt = 0; - let mut offset: usize = 0; - while cnt < max_iterations { - cnt += 1; - let (new, found) = self - .add_undelivered_batch(pool, offset, DELIVERABLE_EVENTS_BATCH_SIZE) - .await?; - if new == 0 { + batch_size: u32, + ) -> Result { + let mut iter_cnt = 0; + let mut event_cnt = 0; + let mut highwater = 0; + while iter_cnt < max_iterations { + iter_cnt += 1; + let (undelivered, new_hw) = + CeramicOneEvent::undelivered_with_values(pool, batch_size.into(), highwater) + .await?; + highwater = new_hw; + if undelivered.is_empty() { break; } else { // We can start processing and we'll follow the stream history if we have it. In that case, we either arrive // at the beginning and mark them all delivered, or we find a gap and stop processing and leave them in memory. // In this case, we won't discover them until we start running recon with a peer, so maybe we should drop them // or otherwise mark them ignored somehow. - self.process_events(pool, None).await?; - if new < DELIVERABLE_EVENTS_BATCH_SIZE { + let found_all = undelivered.len() < batch_size as usize; + event_cnt += self + .process_undelivered_events_batch(pool, undelivered) + .await?; + if found_all { break; } - offset = offset.saturating_add(found); - } - if cnt >= max_iterations { - warn!(batch_size=DELIVERABLE_EVENTS_BATCH_SIZE, iterations=%max_iterations, "Exceeded max iterations for finding undelivered events!"); - break; } } - if self.ready_events.is_empty() { - Ok(()) - } else { - self.persist_ready_events(pool).await?; - Ok(()) + if iter_cnt > max_iterations { + info!(%batch_size, iterations=%iter_cnt, "Exceeded max iterations for finding undelivered events!"); } + + Ok(event_cnt) } - /// Add a batch of events from the database to the pending list to be processed. - /// Returns the (#events new events found , #events returned by query) - async fn add_undelivered_batch( + async fn process_undelivered_events_batch( &mut self, pool: &SqlitePool, - offset: usize, - limit: usize, - ) -> Result<(usize, usize)> { - let undelivered = CeramicOneEvent::undelivered_with_values(pool, offset, limit).await?; - trace!(count=%undelivered.len(), "Found undelivered events to process"); - if undelivered.is_empty() { - return Ok((0, 0)); - } - let found = undelivered.len(); - let mut new = 0; - for (key, data) in undelivered { - let event_cid = key.cid().ok_or_else(|| { - Error::new_invalid_arg(anyhow::anyhow!("EventID is missing a CID: {}", key)) + event_data: Vec<(EventId, Vec)>, + ) -> Result { + trace!(cnt=%event_data.len(), "Processing undelivered events batch"); + let mut to_store_asap = Vec::new(); + let mut event_cnt = 0; + for (event_id, carfile) in event_data { + let event_cid = event_id.cid().ok_or_else(|| { + Error::new_invalid_arg(anyhow::anyhow!("EventID is missing a CID: {}", event_id)) })?; - let (insertable_body, maybe_prev) = - CeramicEventService::parse_event_carfile(event_cid, &data).await?; - if let Some(prev) = maybe_prev { - let event = DeliverableEvent::new(insertable_body.cid, prev, None); - if self.track_pending(event).is_some() { - new += 1; + + let loaded = CeramicEventService::parse_event_carfile_cid(event_cid, &carfile).await?; + + let event = match &loaded.header { + EventHeader::Init { cid, .. } => { + warn!(%cid,"Found init event in database that wasn't previously marked as deliverable. Updating now..."); + to_store_asap.push(*cid); + StreamEvent::InitEvent(*cid) } - } else { - // safe to ignore in tests, shows up because when we mark init events as undelivered even though they don't have a prev - info!(event_cid=%insertable_body.cid, "Found undelivered event with no prev while processing undelivered. Should not happen. Likely means events were dropped before."); - self.ready_events.push_back(insertable_body.cid); - new += 1; // we treat this as new since it might unlock something else but it's not actually going in our queue is it's a bit odd - } + EventHeader::Data { cid, prev, .. } | EventHeader::Time { cid, prev, .. } => { + StreamEvent::Undelivered(StreamEventMetadata::new(*cid, *prev)) + } + }; + + event_cnt += 1; + self.add_stream_event(loaded.header.stream_cid(), event); } - trace!(%new, %found, "Adding undelivered events to pending set"); - Ok((new, found)) - } - fn add_incoming_batch(&mut self, events: Vec) -> HashSet { - let mut updated_streams = HashSet::with_capacity(events.len()); - for event in events { - if let Some(updated_stream) = self.track_pending(event) { - updated_streams.insert(updated_stream); + if !to_store_asap.is_empty() { + info!("storing init events that were somehow missed previously"); + let mut tx = pool.begin_tx().await?; + for cid in to_store_asap { + CeramicOneEvent::mark_ready_to_deliver(&mut tx, &cid).await?; } + tx.commit().await?; } - updated_streams - } + self.process_streams(pool).await?; - /// returns the init event CID (stream CID) if this is a new event - fn track_pending(&mut self, event: DeliverableEvent) -> Option { - self.pending_by_stream - .entry(event.meta.init_cid) - .or_default() - .add_event(event) + Ok(event_cnt) } - /// Modify all the events that are ready to be marked as delivered. - /// We should improve the error handling and likely add some batching if the number of ready events is very high. /// We copy the events up front to avoid losing any events if the task is cancelled. async fn persist_ready_events(&mut self, pool: &SqlitePool) -> Result<()> { - if !self.ready_events.is_empty() { - let mut to_process = self.ready_events.clone(); // to avoid cancel loss - tracing::debug!(count=%self.ready_events.len(), "Marking events as ready to deliver"); + if !self.deliverable.is_empty() { + tracing::debug!(count=%self.deliverable.len(), "Marking events as ready to deliver"); let mut tx = pool.begin_tx().await?; - - // We process the ready events as a FIFO queue so they are marked delivered before events - // that were added after and depend on them. - while let Some(cid) = to_process.pop_front() { - CeramicOneEvent::mark_ready_to_deliver(&mut tx, &cid).await?; + // We process the ready events as a FIFO queue so they are marked delivered before events that were added after and depend on them. + // Could use `pop_front` but we want to make sure we commit and then clear everything at once. + for cid in &self.deliverable { + CeramicOneEvent::mark_ready_to_deliver(&mut tx, cid).await?; } tx.commit().await?; - self.ready_events.clear(); // safe to clear since we are past any await points and hold exclusive access + self.deliverable.clear(); } Ok(()) } @@ -479,289 +532,166 @@ impl OrderingState { #[cfg(test)] mod test { use ceramic_store::EventInsertable; - use multihash_codetable::{Code, MultihashDigest}; - use recon::ReconItem; use test_log::test; - use crate::tests::{build_event, check_deliverable, random_block, TestEventInfo}; + use crate::tests::get_n_events; use super::*; - /// these events are init events so they should have been delivered - /// need to build with data events that have the prev stored already - async fn build_insertable_undelivered() -> EventInsertable { - let TestEventInfo { - event_id: id, car, .. - } = build_event().await; - let cid = id.cid().unwrap(); + async fn get_n_insertable_events(n: usize) -> Vec { + let mut res = Vec::with_capacity(n); + let events = get_n_events(n).await; + for event in events { + let (event, _) = + CeramicEventService::parse_event_carfile_order_key(event.0.to_owned(), &event.1) + .await + .unwrap(); + res.push(event); + } + res + } - let (body, _meta) = CeramicEventService::parse_event_carfile(cid, &car) + #[test(tokio::test)] + async fn test_undelivered_batch_empty() { + let pool = SqlitePool::connect_in_memory().await.unwrap(); + let processed = OrderingState::new() + .process_all_undelivered_events(&pool, 10, 100) .await .unwrap(); - assert!(!body.deliverable); - EventInsertable::try_new(id, body).unwrap() + assert_eq!(0, processed); } - fn assert_stream_map_elems(map: &StreamEvents, size: usize) { - assert_eq!(size, map.cid_map.len(), "{:?}", map); - assert_eq!(size, map.prev_map.len(), "{:?}", map); - } + async fn insert_10_with_9_undelivered(pool: &SqlitePool) { + let insertable = get_n_insertable_events(10).await; + let init = insertable.first().unwrap().to_owned(); + let undelivered = insertable.into_iter().skip(1).collect::>(); - fn build_linked_events( - number: usize, - stream_cid: Cid, - first_prev: Cid, - ) -> Vec { - let mut events = Vec::with_capacity(number); - - let first_cid = random_block().cid; - events.push(DeliverableEvent::new( - first_cid, - DeliverableMetadata { - init_cid: stream_cid, - prev: first_prev, - }, - None, - )); - - for i in 1..number { - let random = random_block(); - let ev = DeliverableEvent::new( - random.cid, - DeliverableMetadata { - init_cid: stream_cid, - prev: events[i - 1].cid, - }, - None, - ); - events.push(ev); - } + let new = CeramicOneEvent::insert_many(pool, &undelivered[..]) + .await + .unwrap(); - events + assert_eq!(9, new.inserted.len()); + assert_eq!(0, new.inserted.iter().filter(|e| e.deliverable).count()); + + let new = CeramicOneEvent::insert_many(pool, &[init]).await.unwrap(); + assert_eq!(1, new.inserted.len()); + assert_eq!(1, new.inserted.iter().filter(|e| e.deliverable).count()); } #[test(tokio::test)] - async fn test_none_deliverable_without_first() { - // they events all point to the one before but A has never been delivered so we can't do anything - let stream_cid = Cid::new_v1(0x71, Code::Sha2_256.digest(b"arbitrary")); - let missing = Cid::new_v1(0x71, Code::Sha2_256.digest(b"missing")); - let events = build_linked_events(4, stream_cid, missing); - let mut prev_map = StreamEvents::from_iter(events); - + async fn test_undelivered_batch_offset() { let pool = SqlitePool::connect_in_memory().await.unwrap(); - let deliverable = super::OrderingState::discover_deliverable_events(&pool, &mut prev_map) + insert_10_with_9_undelivered(&pool).await; + let (_, events) = CeramicOneEvent::new_events_since_value(&pool, 0, 100) .await .unwrap(); + assert_eq!(1, events.len()); - assert_eq!(0, deliverable.len()); - } - - #[test(tokio::test)] - async fn test_all_deliverable_one_stream() { - let TestEventInfo { - event_id: one_id, - car: one_car, - .. - } = build_event().await; - let one_cid = one_id.cid().unwrap(); - let store = CeramicEventService::new(SqlitePool::connect_in_memory().await.unwrap()) + let processed = OrderingState::new() + .process_all_undelivered_events(&pool, 1, 5) .await .unwrap(); - recon::Store::insert(&store, &ReconItem::new(&one_id, &one_car)) + assert_eq!(5, processed); + let (_, events) = CeramicOneEvent::new_events_since_value(&pool, 0, 100) .await .unwrap(); - - check_deliverable(&store.pool, &one_cid, true).await; - - let stream_cid = Cid::new_v1(0x71, Code::Sha2_256.digest(b"arbitrary")); - - let events = build_linked_events(4, stream_cid, one_cid); - let expected = VecDeque::from_iter(events.iter().map(|ev| ev.cid)); - let mut prev_map = StreamEvents::from_iter(events); - - assert_stream_map_elems(&prev_map, 4); - let deliverable = - super::OrderingState::discover_deliverable_events(&store.pool, &mut prev_map) - .await - .unwrap(); - - assert_eq!(4, deliverable.len()); - assert_eq!(expected, deliverable); - assert_stream_map_elems(&prev_map, 0); - } - - #[test(tokio::test)] - async fn test_some_deliverable_one_stream() { - let TestEventInfo { - event_id: one_id, - car: one_car, - .. - } = build_event().await; - let one_cid = one_id.cid().unwrap(); - let store = CeramicEventService::new(SqlitePool::connect_in_memory().await.unwrap()) + assert_eq!(6, events.len()); + // the last 5 are processed and we have 10 delivered + let processed = OrderingState::new() + .process_all_undelivered_events(&pool, 1, 5) .await .unwrap(); - recon::Store::insert(&store, &ReconItem::new(&one_id, &one_car)) + assert_eq!(4, processed); + let (_, events) = CeramicOneEvent::new_events_since_value(&pool, 0, 100) .await .unwrap(); + assert_eq!(10, events.len()); - check_deliverable(&store.pool, &one_cid, true).await; - - let stream_cid = Cid::new_v1(0x71, Code::Sha2_256.digest(b"arbitrary")); - let missing = Cid::new_v1(0x71, Code::Sha2_256.digest(b"missing")); - - let mut deliverable_events = build_linked_events(6, stream_cid, one_cid); - let stuck_events = build_linked_events(8, stream_cid, missing); - let expected = VecDeque::from_iter(deliverable_events.iter().map(|ev| ev.cid)); - deliverable_events.extend(stuck_events); - let mut prev_map = StreamEvents::from_iter(deliverable_events); - - assert_stream_map_elems(&prev_map, 14); - let deliverable = - super::OrderingState::discover_deliverable_events(&store.pool, &mut prev_map) - .await - .unwrap(); - - assert_eq!(6, deliverable.len()); - assert_eq!(expected, deliverable); - assert_stream_map_elems(&prev_map, 8); - } - - #[test(tokio::test)] - // expected to be per stream but all events are combined for the history required version currently so - // this needs to work as well - async fn test_all_deliverable_multiple_streams() { - let TestEventInfo { - event_id: one_id, - car: one_car, - .. - } = build_event().await; - let TestEventInfo { - event_id: two_id, - car: two_car, - .. - } = build_event().await; - let one_cid = one_id.cid().unwrap(); - let two_cid = two_id.cid().unwrap(); - let store = CeramicEventService::new(SqlitePool::connect_in_memory().await.unwrap()) + // nothing left + let processed = OrderingState::new() + .process_all_undelivered_events(&pool, 1, 100) .await .unwrap(); - recon::Store::insert_many( - &store, - &[ - ReconItem::new(&one_id, &one_car), - ReconItem::new(&two_id, &two_car), - ], - ) - .await - .unwrap(); - - check_deliverable(&store.pool, &one_cid, true).await; - check_deliverable(&store.pool, &two_cid, true).await; - - let stream_cid = Cid::new_v1(0x71, Code::Sha2_256.digest(b"arbitrary-one")); - let stream_cid_2 = Cid::new_v1(0x71, Code::Sha2_256.digest(b"arbitrary-two")); - - let mut events_a = build_linked_events(4, stream_cid, one_cid); - let mut events_b = build_linked_events(10, stream_cid_2, two_cid); - let expected_a = VecDeque::from_iter(events_a.iter().map(|ev| ev.cid)); - let expected_b = VecDeque::from_iter(events_b.iter().map(|ev| ev.cid)); - // we expect the events to be in the prev chain order, but they can be intervleaved across streams - // we reverse the items in the input to proov this (it's a hashmap internally so there is no order, but still) - events_a.reverse(); - events_b.reverse(); - events_a.extend(events_b); - assert_eq!(14, events_a.len()); - let mut prev_map = StreamEvents::from_iter(events_a); - - assert_stream_map_elems(&prev_map, 14); - let deliverable = - super::OrderingState::discover_deliverable_events(&store.pool, &mut prev_map) - .await - .unwrap(); - - assert_eq!(14, deliverable.len()); - assert_eq!(0, prev_map.cid_map.len(), "{:?}", prev_map); - assert_eq!(0, prev_map.prev_map.len(), "{:?}", prev_map); - - let mut split_a = VecDeque::new(); - let mut split_b = VecDeque::new(); - for cid in deliverable { - if expected_a.contains(&cid) { - split_a.push_back(cid); - } else if expected_b.contains(&cid) { - split_b.push_back(cid); - } else { - panic!("Unexpected CID in deliverable list: {:?}", cid); - } - } - assert_eq!(expected_a, split_a); - assert_eq!(expected_b, split_b); + assert_eq!(0, processed); } #[test(tokio::test)] - async fn test_undelivered_batch_empty() { + async fn test_undelivered_batch_iterations_ends_early() { let pool = SqlitePool::connect_in_memory().await.unwrap(); - let (new, found) = OrderingState::new() - .add_undelivered_batch(&pool, 0, 10) + // create 5 streams with 9 undelivered events each + insert_10_with_9_undelivered(&pool).await; + insert_10_with_9_undelivered(&pool).await; + insert_10_with_9_undelivered(&pool).await; + insert_10_with_9_undelivered(&pool).await; + insert_10_with_9_undelivered(&pool).await; + + let (_hw, event) = CeramicOneEvent::new_events_since_value(&pool, 0, 1000) + .await + .unwrap(); + assert_eq!(5, event.len()); + let mut state = OrderingState::new(); + state + .process_all_undelivered_events(&pool, 4, 10) .await .unwrap(); - assert_eq!(0, new); - assert_eq!(0, found); + + let (_hw, event) = CeramicOneEvent::new_events_since_value(&pool, 0, 1000) + .await + .unwrap(); + assert_eq!(45, event.len()); } #[test(tokio::test)] - async fn test_undelivered_batch_offset() { + async fn test_undelivered_batch_iterations_ends_when_all_found() { let pool = SqlitePool::connect_in_memory().await.unwrap(); - let insertable = build_insertable_undelivered().await; + // create 5 streams with 9 undelivered events each + insert_10_with_9_undelivered(&pool).await; + insert_10_with_9_undelivered(&pool).await; + insert_10_with_9_undelivered(&pool).await; + insert_10_with_9_undelivered(&pool).await; + insert_10_with_9_undelivered(&pool).await; - let _new = CeramicOneEvent::insert_many(&pool, &[insertable]) + let (_hw, event) = CeramicOneEvent::new_events_since_value(&pool, 0, 1000) .await .unwrap(); - + assert_eq!(5, event.len()); let mut state = OrderingState::new(); - let (new, found) = state.add_undelivered_batch(&pool, 0, 10).await.unwrap(); - assert_eq!(1, found); - assert_eq!(1, new); - let (new, found) = state.add_undelivered_batch(&pool, 10, 10).await.unwrap(); - assert_eq!(0, new); - assert_eq!(0, found); - state.persist_ready_events(&pool).await.unwrap(); - let (new, found) = state.add_undelivered_batch(&pool, 0, 10).await.unwrap(); - assert_eq!(0, new); - assert_eq!(0, found); + state + .process_all_undelivered_events(&pool, 100_000_000, 5) + .await + .unwrap(); + + let (_hw, event) = CeramicOneEvent::new_events_since_value(&pool, 0, 1000) + .await + .unwrap(); + assert_eq!(50, event.len()); } #[test(tokio::test)] - async fn test_undelivered_batch_all() { + async fn test_process_all_undelivered_one_batch() { let pool = SqlitePool::connect_in_memory().await.unwrap(); - let mut undelivered = Vec::with_capacity(10); - for _ in 0..10 { - let insertable = build_insertable_undelivered().await; - undelivered.push(insertable); - } + // create 5 streams with 9 undelivered events each + insert_10_with_9_undelivered(&pool).await; + insert_10_with_9_undelivered(&pool).await; + insert_10_with_9_undelivered(&pool).await; + insert_10_with_9_undelivered(&pool).await; + insert_10_with_9_undelivered(&pool).await; - let (hw, event) = CeramicOneEvent::new_events_since_value(&pool, 0, 1000) - .await - .unwrap(); - assert_eq!(0, hw); - assert!(event.is_empty()); - - let _new = CeramicOneEvent::insert_many(&pool, &undelivered[..]) + let (_hw, event) = CeramicOneEvent::new_events_since_value(&pool, 0, 1000) .await .unwrap(); - + assert_eq!(5, event.len()); let mut state = OrderingState::new(); state - .process_all_undelivered_events(&pool, 1) + .process_all_undelivered_events(&pool, 1, 100) .await .unwrap(); let (_hw, event) = CeramicOneEvent::new_events_since_value(&pool, 0, 1000) .await .unwrap(); - assert_eq!(event.len(), 10); + assert_eq!(50, event.len()); } } diff --git a/service/src/event/service.rs b/service/src/event/service.rs index aa18e3fc5..079d5dedf 100644 --- a/service/src/event/service.rs +++ b/service/src/event/service.rs @@ -1,5 +1,3 @@ -use std::collections::{HashMap, HashSet}; - use async_trait::async_trait; use ceramic_core::{EventId, Network}; use ceramic_event::unvalidated; @@ -7,15 +5,12 @@ use ceramic_store::{CeramicOneEvent, EventInsertable, EventInsertableBody, Sqlit use cid::Cid; use futures::Stream; use ipld_core::ipld::Ipld; -use recon::ReconItem; use tracing::{trace, warn}; use super::{ migration::Migrator, - ordering_task::{ - DeliverableEvent, DeliverableMetadata, DeliverableTask, DeliveredEvent, OrderingState, - OrderingTask, StreamEvents, - }, + order_events::OrderEvents, + ordering_task::{DeliverableTask, OrderingTask}, }; use crate::{Error, Result}; @@ -56,7 +51,7 @@ impl CeramicEventService { } /// Skip loading all undelivered events from the database on startup (for testing) - #[allow(dead_code)] // used in tests + #[cfg(test)] pub(crate) async fn new_without_undelivered(pool: SqlitePool) -> Result { CeramicOneEvent::init_delivered_order(&pool).await?; @@ -100,17 +95,33 @@ impl CeramicEventService { .await?; Ok(()) } - /// This function is used to parse the event from the carfile and return the insertable event and the previous cid pointer. /// Probably belongs in the event crate. - pub(crate) async fn parse_event_carfile( - event_cid: cid::Cid, + pub(crate) async fn parse_event_carfile_order_key( + event_id: EventId, + carfile: &[u8], + ) -> Result<(EventInsertable, EventHeader)> { + let mut insertable = EventInsertable::try_from_carfile(event_id, carfile).await?; + + let header = Self::parse_event_body(&mut insertable.body).await?; + Ok((insertable, header)) + } + + pub(crate) async fn parse_event_carfile_cid( + cid: ceramic_core::Cid, carfile: &[u8], - ) -> Result<(EventInsertableBody, Option)> { - let insertable = EventInsertableBody::try_from_carfile(event_cid, carfile).await?; - let ev_block = insertable.block_for_cid(&insertable.cid)?; + ) -> Result { + let mut body = EventInsertableBody::try_from_carfile(cid, carfile).await?; - trace!(count=%insertable.blocks.len(), cid=%event_cid, "parsing event blocks"); + let header = Self::parse_event_body(&mut body).await?; + Ok(InsertableBodyWithHeader { body, header }) + } + + pub(crate) async fn parse_event_body(body: &mut EventInsertableBody) -> Result { + let cid = body.cid(); // purely for convenience writing out the match + let ev_block = body.block_for_cid(&cid)?; + + trace!(count=%body.blocks().len(), %cid, "parsing event blocks"); let event_ipld: unvalidated::RawEvent = serde_ipld_dagcbor::from_slice(&ev_block.bytes).map_err(|e| { Error::new_invalid_arg( @@ -118,17 +129,26 @@ impl CeramicEventService { ) })?; - let maybe_init_prev = match event_ipld { - unvalidated::RawEvent::Time(t) => Some((t.id(), t.prev())), + let (deliverable, header) = match event_ipld { + unvalidated::RawEvent::Time(t) => ( + false, + EventHeader::Time { + cid, + stream_cid: t.id(), + prev: t.prev(), + }, + ), unvalidated::RawEvent::Signed(signed) => { let link = signed.link().ok_or_else(|| { Error::new_invalid_arg(anyhow::anyhow!("event should have a link")) })?; - let link = insertable.block_for_cid(&link).map_err(|e| { - Error::new_invalid_arg( - anyhow::anyhow!(e).context("prev CID missing from carfile"), - ) - })?; + let link = body + .blocks() + .iter() + .find(|b| b.cid() == link) + .ok_or_else(|| { + Error::new_invalid_arg(anyhow::anyhow!("prev CID missing from carfile")) + })?; let payload: unvalidated::Payload = serde_ipld_dagcbor::from_slice(&link.bytes).map_err(|e| { Error::new_invalid_arg( @@ -137,17 +157,21 @@ impl CeramicEventService { })?; match payload { - unvalidated::Payload::Data(d) => Some((*d.id(), *d.prev())), - unvalidated::Payload::Init(_init) => None, + unvalidated::Payload::Data(d) => ( + false, + EventHeader::Data { + cid, + stream_cid: *d.id(), + prev: *d.prev(), + }, + ), + unvalidated::Payload::Init(_init) => (true, EventHeader::Init { cid }), } } - unvalidated::RawEvent::Unsigned(_init) => None, + unvalidated::RawEvent::Unsigned(_init) => (true, EventHeader::Init { cid }), }; - let meta = maybe_init_prev.map(|(cid, prev)| DeliverableMetadata { - init_cid: cid, - prev, - }); - Ok((insertable, meta)) + body.set_deliverable(deliverable); + Ok(header) } #[tracing::instrument(skip(self, items), level = tracing::Level::DEBUG, fields(items = items.len()))] @@ -159,13 +183,7 @@ impl CeramicEventService { &self, items: &[recon::ReconItem<'a, EventId>], ) -> Result { - if items.is_empty() { - return Ok(InsertResult::default()); - } - - let ordering = - InsertEventOrdering::discover_deliverable_local_history(items, &self.pool).await?; - self.process_events(ordering).await + self.insert_events(items, true).await } #[tracing::instrument(skip(self, items), level = tracing::Level::DEBUG, fields(items = items.len()))] @@ -177,14 +195,9 @@ impl CeramicEventService { &self, items: &[recon::ReconItem<'a, EventId>], ) -> Result { - if items.is_empty() { - return Ok(recon::InsertResult::default()); - } - - let ordering = InsertEventOrdering::discover_deliverable_remote_history(items).await?; - let res = self.process_events(ordering).await?; - // we need to put things back in the right order that the recon trait expects, even though we don't really care about the result + let res = self.insert_events(items, false).await?; let mut keys = vec![false; items.len()]; + // we need to put things back in the right order that the recon trait expects, even though we don't really care about the result for (i, item) in items.iter().enumerate() { let new_key = res .store_result @@ -197,225 +210,108 @@ impl CeramicEventService { Ok(recon::InsertResult::new(keys)) } - async fn process_events(&self, ordering: InsertEventOrdering) -> Result { - let res = CeramicOneEvent::insert_many(&self.pool, &ordering.insert_now[..]).await?; - - for ev in ordering.background_task_deliverable { - trace!(cid=%ev.0, prev=%ev.1.prev, init=%ev.1.init_cid, "sending to delivery task"); - if let Err(e) = self - .delivery_task - .tx - .try_send(DeliverableEvent::new(ev.0, ev.1, None)) - { - match e { - tokio::sync::mpsc::error::TrySendError::Full(e) => { - // we should only be doing this during recon, in which case we can rediscover events. - // the delivery task will start picking up these events once it's drained since they are stored in the db - warn!(cid=%e.cid, meta=?e.meta, limit=%PENDING_EVENTS_CHANNEL_DEPTH, "Delivery task full. Dropping event and will not be able to mark deliverable until queue drains"); - } - tokio::sync::mpsc::error::TrySendError::Closed(_) => { - warn!("Delivery task closed. shutting down"); - return Err(Error::new_fatal(anyhow::anyhow!("Delivery task closed"))); - } - } - } - } - for new in ordering.notify_task_new { - if let Err(e) = self.delivery_task.tx_new.try_send(new) { - match e { - tokio::sync::mpsc::error::TrySendError::Full(ev) => { - // we should only be doing this during recon, in which case we can rediscover events. - // the delivery task will start picking up these events once it's drained since they are stored in the db - warn!(attempt=?ev, limit=%PENDING_EVENTS_CHANNEL_DEPTH, "Notify new task full"); - } - tokio::sync::mpsc::error::TrySendError::Closed(_) => { - warn!("Delivery task closed. shutting down"); - return Err(Error::new_fatal(anyhow::anyhow!("Delivery task closed"))); - } - } - } + async fn insert_events<'a>( + &self, + items: &[recon::ReconItem<'a, EventId>], + history_required: bool, + ) -> Result { + if items.is_empty() { + return Ok(InsertResult::default()); } - Ok(InsertResult { - store_result: res, - missing_history: vec![], - }) - } -} - -struct InsertEventOrdering { - insert_now: Vec, - notify_task_new: Vec, - background_task_deliverable: HashMap, -} -impl InsertEventOrdering { - /// This will mark events as deliverable if their prev exists locally. Otherwise they will be - /// sorted into the bucket for the background task to process. - pub(crate) async fn discover_deliverable_remote_history<'a>( - items: &[ReconItem<'a, EventId>], - ) -> Result { - let mut result = Self { - insert_now: Vec::with_capacity(items.len()), - notify_task_new: Vec::with_capacity(items.len()), - background_task_deliverable: HashMap::new(), - }; + let mut to_insert = Vec::with_capacity(items.len()); - for item in items { - let (insertable, maybe_prev) = Self::parse_item(item).await?; - if let Some(meta) = maybe_prev { - result.mark_event_deliverable_later(insertable, meta); - } else { - let init_cid = insertable.body.cid; - result.mark_event_deliverable_now(insertable, init_cid); - } + for event in items { + let insertable = + Self::parse_event_carfile_order_key(event.key.to_owned(), event.value).await?; + to_insert.push(insertable); } - Ok(result) - } - - /// This will error if any of the events doesn't have its prev on the local node (in the database/memory or in this batch). - pub(crate) async fn discover_deliverable_local_history<'a>( - items: &[ReconItem<'a, EventId>], - pool: &SqlitePool, - ) -> Result { - let mut result = Self { - insert_now: Vec::with_capacity(items.len()), - notify_task_new: Vec::with_capacity(items.len()), - background_task_deliverable: HashMap::new(), - }; - - let mut insert_after_history_check: Vec<(DeliverableMetadata, EventInsertable)> = - Vec::with_capacity(items.len()); - - for item in items { - let (insertable, maybe_prev) = Self::parse_item(item).await?; - if let Some(meta) = maybe_prev { - insert_after_history_check.push((meta, insertable)); - } else { - let init_cid = insertable.body.cid; - result.mark_event_deliverable_now(insertable, init_cid); - } - } + let ordered = OrderEvents::try_new(&self.pool, to_insert).await?; - trace!(local_events_checking=%insert_after_history_check.len(), "checking local history"); - result - .verify_history_inline(pool, insert_after_history_check) - .await?; - Ok(result) - } + let missing_history = ordered + .missing_history + .iter() + .map(|(e, _)| e.order_key.clone()) + .collect(); - async fn parse_item<'a>( - item: &ReconItem<'a, EventId>, - ) -> Result<(EventInsertable, Option)> { - let cid = item.key.cid().ok_or_else(|| { - Error::new_invalid_arg(anyhow::anyhow!("EventID is missing a CID: {}", item.key)) - })?; - // we want to end a conversation if any of the events aren't ceramic events and not store them - // this includes making sure the key matched the body cid - let (insertable_body, maybe_prev) = - CeramicEventService::parse_event_carfile(cid, item.value).await?; - let insertable = EventInsertable::try_new(item.key.to_owned(), insertable_body)?; - Ok((insertable, maybe_prev)) - } + let to_insert_with_header = if history_required { + ordered.deliverable + } else { + ordered + .deliverable + .into_iter() + .chain(ordered.missing_history) + .collect() + }; - fn mark_event_deliverable_later( - &mut self, - insertable: EventInsertable, - meta: DeliverableMetadata, - ) { - self.background_task_deliverable - .insert(insertable.body.cid, meta); - self.insert_now.push(insertable); - } + let to_insert = to_insert_with_header + .iter() + .map(|(e, _)| e.clone()) + .collect::>(); - fn mark_event_deliverable_now(&mut self, mut ev: EventInsertable, init_cid: Cid) { - ev.set_deliverable(true); - self.notify_task_new - .push(DeliveredEvent::new(ev.body.cid, init_cid)); - self.insert_now.push(ev); - } + // need to make a better interface around events. needing to know about the stream in some places but + // not in the store makes it inconvenient to map back and forth + let res = CeramicOneEvent::insert_many(&self.pool, &to_insert[..]).await?; - async fn verify_history_inline( - &mut self, - pool: &SqlitePool, - to_check: Vec<(DeliverableMetadata, EventInsertable)>, - ) -> Result<()> { - if to_check.is_empty() { - return Ok(()); - } + // api writes shouldn't have any missed pieces that need ordering so we don't send those + if !history_required { + let to_send = res + .inserted + .iter() + .filter(|i| i.new_key) + .collect::>(); - let incoming_deliverable_cids: HashSet = self - .insert_now - .iter() - .filter_map(|e| { - if e.body.deliverable { - Some(e.body.cid) + for ev in to_send { + if let Some((ev, header)) = to_insert_with_header + .iter() + .find(|(i, _)| i.order_key == ev.order_key) + { + let new = InsertableBodyWithHeader { + body: ev.body.clone(), + header: header.to_owned(), + }; + trace!(event=?ev, "sending delivered to ordering task"); + if let Err(e) = self.delivery_task.tx_inserted.try_send(new) { + match e { + tokio::sync::mpsc::error::TrySendError::Full(e) => { + warn!(event=?e, limit=%PENDING_EVENTS_CHANNEL_DEPTH, "Delivery task full. Dropping event and will not be able to mark deliverable new stream event arrives or process is restarted"); + } + tokio::sync::mpsc::error::TrySendError::Closed(_) => { + warn!("Delivery task closed. shutting down"); + return Err(Error::new_fatal(anyhow::anyhow!( + "Delivery task closed" + ))); + } + } + } } else { - None + tracing::error!(event_id=%ev.order_key, "Missing header for inserted event should be unreachable!"); + debug_assert!(false); // panic in debug mode + continue; } - }) - .collect(); - - // ideally, this map would be per stream, but we are just processing all of them together for now - let mut to_check_map = StreamEvents::new(); - - let required_to_find = to_check.len(); - let mut found_in_batch = 0; - let mut insert_if_greenlit = HashMap::with_capacity(required_to_find); - - for (meta, ev) in to_check { - if incoming_deliverable_cids.contains(&meta.prev) { - trace!(new=%ev.body.cid, prev=%meta.prev, "prev event being added in same batch"); - found_in_batch += 1; - self.mark_event_deliverable_now(ev, meta.init_cid); - } else { - trace!(new=%ev.body.cid, prev=%meta.prev, "will check for prev event in db"); - - let _new = to_check_map.add_event(DeliverableEvent::new( - ev.body.cid, - meta.to_owned(), - None, - )); - insert_if_greenlit.insert(ev.body.cid, (meta, ev)); } } - if to_check_map.is_empty() { - return Ok(()); - } - - let deliverable = - OrderingState::discover_deliverable_events(pool, &mut to_check_map).await?; - if deliverable.len() != required_to_find - found_in_batch { - let missing = insert_if_greenlit - .values() - .filter_map(|(_, ev)| { - if !deliverable.contains(&ev.body.cid) { - Some(ev.body.cid) - } else { - None - } - }) - .collect::>(); - - tracing::info!(?missing, ?deliverable, "Missing required `prev` event CIDs"); + Ok(InsertResult { + store_result: res, + missing_history, + }) + } - Err(Error::new_invalid_arg(anyhow::anyhow!( - "Missing required `prev` event CIDs: {:?}", - missing - ))) + pub(crate) async fn load_by_cid( + pool: &SqlitePool, + cid: ceramic_core::Cid, + ) -> Result> { + let data = if let Some(ev) = CeramicOneEvent::value_by_cid(pool, &cid).await? { + ev } else { - // we need to use the deliverable list's order because we might depend on something in the same batch, and that function will - // ensure we have a queue in the correct order. So we follow the order and use our insert_if_greenlit map to get the details. - for cid in deliverable { - if let Some((meta, insertable)) = insert_if_greenlit.remove(&cid) { - self.mark_event_deliverable_now(insertable, meta.init_cid); - } else { - warn!(%cid, "Didn't find event to insert in memory when it was expected"); - } - } - Ok(()) - } + return Ok(None); + }; + + let mut body = EventInsertableBody::try_from_carfile(cid, &data).await?; + let header = Self::parse_event_body(&mut body).await?; + Ok(Some(InsertableBodyWithHeader { body, header })) } } @@ -441,3 +337,47 @@ impl From for Vec { api_res } } + +#[derive(Debug, Clone)] +pub(crate) struct InsertableBodyWithHeader { + pub(crate) body: EventInsertableBody, + pub(crate) header: EventHeader, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +/// An event header wrapper for use in the store crate. +/// TODO: replace this with something from the event crate +pub(crate) enum EventHeader { + Init { + cid: ceramic_core::Cid, + }, + Data { + cid: ceramic_core::Cid, + stream_cid: ceramic_core::Cid, + prev: ceramic_core::Cid, + }, + Time { + cid: ceramic_core::Cid, + stream_cid: ceramic_core::Cid, + prev: ceramic_core::Cid, + }, +} + +impl EventHeader { + /// Returns the stream CID of the event + pub(crate) fn stream_cid(&self) -> ceramic_core::Cid { + match self { + EventHeader::Init { cid, .. } => *cid, + EventHeader::Data { stream_cid, .. } | EventHeader::Time { stream_cid, .. } => { + *stream_cid + } + } + } + + pub(crate) fn prev(&self) -> Option { + match self { + EventHeader::Init { .. } => None, + EventHeader::Data { prev, .. } | EventHeader::Time { prev, .. } => Some(*prev), + } + } +} diff --git a/service/src/tests/mod.rs b/service/src/tests/mod.rs index c052c0958..3a7b7b333 100644 --- a/service/src/tests/mod.rs +++ b/service/src/tests/mod.rs @@ -115,16 +115,6 @@ fn gen_rand_bytes() -> [u8; SIZE] { arr } -pub(crate) fn random_block() -> Block { - let mut data = [0u8; 1024]; - rand::Rng::fill(&mut ::rand::thread_rng(), &mut data); - let hash = Code::Sha2_256.digest(&data); - Block { - cid: Cid::new_v1(0x00, hash), - data: data.to_vec().into(), - } -} - pub(crate) async fn check_deliverable( pool: &ceramic_store::SqlitePool, cid: &Cid, @@ -173,28 +163,13 @@ async fn data_event( signed::Event::from_payload(unvalidated::Payload::Data(commit), signer.to_owned()).unwrap() } -async fn get_events_with_model(model: &StreamId) -> [(EventId, Vec); 3] { +// returns init + N events +async fn get_init_plus_n_events_with_model( + model: &StreamId, + number: usize, +) -> Vec<(EventId, Vec)> { let signer = Box::new(signer().await); - let data = gen_rand_bytes::<50>(); - let data2 = gen_rand_bytes::<50>(); - - let data = ipld!({ - "radius": 1, - "red": 2, - "green": 3, - "blue": 4, - "raw": data.as_slice(), - }); - - let data2 = ipld!({ - "radius": 1, - "red": 2, - "green": 3, - "blue": 4, - "raw": data2.as_slice(), - }); - let init = init_event(model, &signer).await; let init_cid = init.envelope_cid(); let (event_id, car) = ( @@ -203,33 +178,45 @@ async fn get_events_with_model(model: &StreamId) -> [(EventId, Vec); 3] { ); let init_cid = event_id.cid().unwrap(); - let data = data_event(init_cid, init_cid, data, &signer).await; - let cid = data.envelope_cid(); - let (data_id, data_car) = ( - build_event_id(&data.envelope_cid(), &init_cid, model), - data.encode_car().await.unwrap(), - ); - let data2 = data_event(init_cid, cid, data2, &signer).await; - let (data_id_2, data_car_2) = ( - build_event_id(&data2.envelope_cid(), &init_cid, model), - data2.encode_car().await.unwrap(), - ); - [ - (event_id, car), - (data_id, data_car), - (data_id_2, data_car_2), - ] + let mut events = Vec::with_capacity(number); + events.push((event_id, car)); + let mut prev = init_cid; + for _ in 0..number { + let data = gen_rand_bytes::<50>(); + let data = ipld!({ + "radius": 1, + "red": 2, + "green": 3, + "blue": 4, + "raw": data.as_slice(), + }); + + let data = data_event(init_cid, prev, data, &signer).await; + let (data_id, data_car) = ( + build_event_id(&data.envelope_cid(), &init_cid, model), + data.encode_car().await.unwrap(), + ); + prev = data_id.cid().unwrap(); + events.push((data_id, data_car)); + } + events } -pub(crate) async fn get_events_return_model() -> (StreamId, [(EventId, Vec); 3]) { +pub(crate) async fn get_events_return_model() -> (StreamId, Vec<(EventId, Vec)>) { let model = StreamId::document(random_cid()); - let events = get_events_with_model(&model).await; + let events = get_init_plus_n_events_with_model(&model, 3).await; (model, events) } // builds init -> data -> data that are a stream (will be a different stream each call) -pub(crate) async fn get_events() -> [(EventId, Vec); 3] { +pub(crate) async fn get_events() -> Vec<(EventId, Vec)> { let model = StreamId::document(random_cid()); - get_events_with_model(&model).await + get_init_plus_n_events_with_model(&model, 3).await +} + +// Get N events with the same model (init + N-1 data events) +pub(crate) async fn get_n_events(number: usize) -> Vec<(EventId, Vec)> { + let model = &StreamId::document(random_cid()); + get_init_plus_n_events_with_model(model, number - 1).await } diff --git a/service/src/tests/ordering.rs b/service/src/tests/ordering.rs index 976e9b1cd..1cad39fc9 100644 --- a/service/src/tests/ordering.rs +++ b/service/src/tests/ordering.rs @@ -1,5 +1,9 @@ +use std::collections::HashMap; + use ceramic_api::EventStore; use ceramic_core::EventId; +use rand::seq::SliceRandom; +use rand::thread_rng; use recon::ReconItem; use test_log::test; @@ -12,12 +16,14 @@ async fn setup_service() -> CeramicEventService { let conn = ceramic_store::SqlitePool::connect_in_memory() .await .unwrap(); + CeramicEventService::new_without_undelivered(conn) .await .unwrap() } async fn add_and_assert_new_recon_event(store: &CeramicEventService, item: ReconItem<'_, EventId>) { + tracing::trace!("inserted event: {}", item.key.cid().unwrap()); let new = store .insert_events_from_carfiles_recon(&[item]) .await @@ -45,30 +51,17 @@ async fn test_init_event_delivered() { } #[test(tokio::test)] -async fn test_missing_prev_error_history_required() { +async fn test_missing_prev_history_required_not_inserted() { let store = setup_service().await; let events = get_events().await; let data = &events[1]; let new = store .insert_events_from_carfiles_local_api(&[ReconItem::new(&data.0, &data.1)]) - .await; - match new { - Ok(v) => panic!("should have errored: {:?}", v), - Err(e) => { - match e { - crate::Error::InvalidArgument { error } => { - // yes fragile, but we want to make sure it's not a parsing error or something unexpected - assert!(error - .to_string() - .contains("Missing required `prev` event CIDs")); - } - e => { - panic!("unexpected error: {:?}", e); - } - }; - } - }; + .await + .unwrap(); + assert!(new.store_result.inserted.is_empty()); + assert_eq!(1, new.missing_history.len()); } #[test(tokio::test)] @@ -150,7 +143,7 @@ async fn test_missing_prev_pending_recon() { check_deliverable(&store.pool, &data.0.cid().unwrap(), true).await; // This happens out of band, so give it a moment to make sure everything is updated - tokio::time::sleep(std::time::Duration::from_millis(100)).await; + tokio::time::sleep(std::time::Duration::from_secs(1)).await; let (_, delivered) = store .events_since_highwater_mark(0, i64::MAX) @@ -178,9 +171,8 @@ async fn missing_prev_pending_recon_should_deliver_without_stream_update() { // now we add the second event, it should quickly become deliverable let data = &events[1]; add_and_assert_new_recon_event(&store, ReconItem::new(&data.0, &data.1)).await; - check_deliverable(&store.pool, &data.0.cid().unwrap(), false).await; // This happens out of band, so give it a moment to make sure everything is updated - tokio::time::sleep(std::time::Duration::from_millis(100)).await; + tokio::time::sleep(std::time::Duration::from_secs(1)).await; let (_, delivered) = store .events_since_highwater_mark(0, i64::MAX) @@ -234,7 +226,7 @@ async fn multiple_streams_missing_prev_recon_should_deliver_without_stream_updat // this _could_ be deliverable immediately if we checked but for now we just send to the other task, // so `check_deliverable` could return true or false depending on timing (but probably false). // as this is an implementation detail and we'd prefer true, we just use HW ordering to make sure it's been delivered - tokio::time::sleep(std::time::Duration::from_millis(100)).await; + tokio::time::sleep(std::time::Duration::from_secs(1)).await; let (_, delivered) = store .events_since_highwater_mark(0, i64::MAX) .await @@ -266,3 +258,118 @@ async fn multiple_streams_missing_prev_recon_should_deliver_without_stream_updat ]; assert_eq!(expected, delivered); } + +async fn validate_all_delivered(store: &CeramicEventService, expected_delivered: usize) { + loop { + let (_, delivered) = store + .events_since_highwater_mark(0, i64::MAX) + .await + .unwrap(); + let total = delivered.len(); + if total < expected_delivered { + tracing::trace!( + "found {} delivered, waiting for {}", + total, + expected_delivered + ); + tokio::time::sleep(std::time::Duration::from_millis(500)).await; + } else { + break; + } + } +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn recon_lots_of_streams() { + // adds `per_stream` events to `num_streams` streams, mixes up the event order for each stream, inserts half + // the events for each stream before mixing up the stream order and inserting the rest + // took like a minute on my machine to run 100 streams with 1000 events each, mostly inserting :( since ordering only gets 5 seconds + let per_stream = 100; + let num_streams = 10; + let store = setup_service().await; + let expected = per_stream * num_streams; + let mut streams = Vec::new(); + let mut all_cids = Vec::new(); + let mut expected_stream_order = Vec::new(); + let mut cid_to_stream_map = HashMap::new(); + + for i in 0..num_streams { + let mut events = crate::tests::get_n_events(per_stream).await; + let cids = events + .iter() + .map(|e| e.0.cid().unwrap()) + .collect::>(); + cids.iter().for_each(|cid| { + cid_to_stream_map.insert(*cid, i); + }); + expected_stream_order.push(cids.clone()); + all_cids.extend(cids); + assert_eq!(per_stream, events.len()); + events.shuffle(&mut thread_rng()); + streams.push(events); + } + let mut total_added = 0; + + assert_eq!(expected, all_cids.len()); + + tracing::debug!(?all_cids, "starting test"); + for stream in streams.iter_mut() { + while let Some(event) = stream.pop() { + if stream.len() > per_stream / 2 { + total_added += 1; + add_and_assert_new_recon_event(&store, ReconItem::new(&event.0, &event.1)).await; + } else { + total_added += 1; + add_and_assert_new_recon_event(&store, ReconItem::new(&event.0, &event.1)).await; + break; + } + } + } + streams.shuffle(&mut thread_rng()); + for stream in streams.iter_mut() { + while let Some(event) = stream.pop() { + total_added += 1; + add_and_assert_new_recon_event(&store, ReconItem::new(&event.0, &event.1)).await; + } + } + // first just make sure they were all inserted (not delivered yet) + for (i, cid) in all_cids.iter().enumerate() { + let (exists, _delivered) = + ceramic_store::CeramicOneEvent::deliverable_by_cid(&store.pool, cid) + .await + .unwrap(); + assert!(exists, "idx: {}. missing cid: {}", i, cid); + } + + assert_eq!(expected, total_added); + tokio::time::timeout( + std::time::Duration::from_secs(5), + validate_all_delivered(&store, expected), + ) + .await + .unwrap(); + + let (_, delivered) = store + .events_since_highwater_mark(0, i64::MAX) + .await + .unwrap(); + + assert_eq!(expected, delivered.len()); + let mut streams_at_the_end = Vec::new(); + for _ in 0..num_streams { + streams_at_the_end.push(Vec::with_capacity(per_stream)); + } + for cid in delivered { + let stream = cid_to_stream_map.get(&cid).unwrap(); + let stream = streams_at_the_end.get_mut(*stream).unwrap(); + stream.push(cid); + } + // now we check that all the events are deliverable + for cid in all_cids.iter() { + check_deliverable(&store.pool, cid, true).await; + } + // and make sure the events were delivered for each stream streams in the same order as they were at the start + for (i, stream) in expected_stream_order.iter().enumerate() { + assert_eq!(*stream, streams_at_the_end[i]); + } +} diff --git a/store/Cargo.toml b/store/Cargo.toml index 3fa78aefb..d9482a1a4 100644 --- a/store/Cargo.toml +++ b/store/Cargo.toml @@ -13,19 +13,21 @@ anyhow.workspace = true async-trait.workspace = true ceramic-api.workspace = true ceramic-core.workspace = true +ceramic-event.workspace = true ceramic-metrics.workspace = true cid.workspace = true futures.workspace = true hex.workspace = true +ipld-core.workspace = true iroh-bitswap.workspace = true iroh-car.workspace = true itertools = "0.12.0" -multihash.workspace = true multihash-codetable.workspace = true +multihash.workspace = true prometheus-client.workspace = true -thiserror.workspace = true recon.workspace = true sqlx.workspace = true +thiserror.workspace = true tokio.workspace = true [dev-dependencies] diff --git a/store/benches/sqlite_store.rs b/store/benches/sqlite_store.rs index b028df40e..31dda9ee8 100644 --- a/store/benches/sqlite_store.rs +++ b/store/benches/sqlite_store.rs @@ -75,7 +75,7 @@ async fn model_setup(tpe: ModelType, cnt: usize) -> ModelSetup { let body = EventInsertableBody::try_from_carfile(init.0.cid().unwrap(), &init.1) .await .unwrap(); - events.push(EventInsertable::try_new(init.0, body).unwrap()); + events.push(EventInsertable::try_from_carfile(init.0, body).unwrap()); } let pool = SqlitePool::connect_in_memory().await.unwrap(); diff --git a/store/src/sql/access/event.rs b/store/src/sql/access/event.rs index eaa0a9e33..2e9cbb622 100644 --- a/store/src/sql/access/event.rs +++ b/store/src/sql/access/event.rs @@ -175,7 +175,7 @@ impl CeramicOneEvent { item.deliverable(), )); if new_key { - for block in item.blocks().iter() { + for block in item.body.blocks().iter() { CeramicOneBlock::insert(&mut tx, block.multihash.inner(), &block.bytes).await?; CeramicOneEventBlock::insert(&mut tx, block).await?; } @@ -191,21 +191,40 @@ impl CeramicOneEvent { Ok(res) } - /// Find events that haven't been delivered to the client and may be ready + /// Find events that haven't been delivered to the client and may be ready. + /// Returns the events and their values, and the highwater mark of the last event. + /// The highwater mark can be used on the next call to get the next batch of events and will be 0 when done. pub async fn undelivered_with_values( pool: &SqlitePool, - offset: usize, - limit: usize, - ) -> Result)>> { - let all_blocks: Vec = + limit: i64, + highwater_mark: i64, + ) -> Result<(Vec<(EventId, Vec)>, i64)> { + struct UndeliveredEventBlockRow { + block: ReconEventBlockRaw, + row_id: i64, + } + + use sqlx::Row as _; + + impl sqlx::FromRow<'_, sqlx::sqlite::SqliteRow> for UndeliveredEventBlockRow { + fn from_row(row: &sqlx::sqlite::SqliteRow) -> std::result::Result { + let row_id = row.try_get("rowid")?; + let block = ReconEventBlockRaw::from_row(row)?; + Ok(Self { block, row_id }) + } + } + + let all_blocks: Vec = sqlx::query_as(EventQuery::undelivered_with_values()) - .bind(limit as i64) - .bind(offset as i64) + .bind(limit) + .bind(highwater_mark) .fetch_all(pool.reader()) .await?; - let values = ReconEventBlockRaw::into_carfiles(all_blocks).await?; - Ok(values) + let max_highwater = all_blocks.iter().map(|row| row.row_id).max().unwrap_or(0); // if there's nothing in the list we just return 0 + let blocks = all_blocks.into_iter().map(|b| b.block).collect(); + let values = ReconEventBlockRaw::into_carfiles(blocks).await?; + Ok((values, max_highwater)) } /// Calculate the hash of a range of events diff --git a/store/src/sql/entities/event.rs b/store/src/sql/entities/event.rs index 363ee7ae6..238d6c7df 100644 --- a/store/src/sql/entities/event.rs +++ b/store/src/sql/entities/event.rs @@ -49,14 +49,22 @@ pub struct EventInsertable { } impl EventInsertable { - /// Try to build the EventInsertable struct. Will error if the key and body don't match. + /// Try to build the EventInsertable struct from a carfile. + pub async fn try_from_carfile(order_key: EventId, body: &[u8]) -> Result { + let cid = order_key.cid().ok_or_else(|| { + Error::new_invalid_arg(anyhow::anyhow!("EventID is missing a CID: {}", order_key)) + })?; + let body = EventInsertableBody::try_from_carfile(cid, body).await?; + Ok(Self { order_key, body }) + } + + /// Build the EventInsertable struct from an EventID and EventInsertableBody. + /// Will error if the CID in the EventID doesn't match the CID in the EventInsertableBody. pub fn try_new(order_key: EventId, body: EventInsertableBody) -> Result { - if order_key.cid().as_ref() != Some(&body.cid) { - return Err(Error::new_app(anyhow!( - "Event ID and body CID do not match: {:?} != {:?}", - order_key.cid(), - body.cid - )))?; + if order_key.cid() != Some(body.cid()) { + return Err(Error::new_invalid_arg(anyhow!( + "EventID CID does not match the body CID" + ))); } Ok(Self { order_key, body }) } @@ -68,18 +76,7 @@ impl EventInsertable { /// Whether this event is deliverable currently pub fn deliverable(&self) -> bool { - self.body.deliverable - } - - /// Whether this event is deliverable currently - pub fn blocks(&self) -> &Vec { - &self.body.blocks - } - - /// Mark the event as deliverable. - /// This will be used when inserting the event to make sure the field is updated accordingly. - pub fn set_deliverable(&mut self, deliverable: bool) { - self.body.deliverable = deliverable; + self.body.deliverable() } } @@ -87,12 +84,12 @@ impl EventInsertable { /// The type we use to insert events into the database pub struct EventInsertableBody { /// The event CID i.e. the root CID from the car file - pub cid: Cid, - /// Whether this event is deliverable to clients or is waiting for more data - pub deliverable: bool, + cid: Cid, + /// Whether the event is deliverable i.e. it's prev has been delivered and the chain is continuous to an init event + deliverable: bool, /// The blocks of the event // could use a map but there aren't that many blocks per event (right?) - pub blocks: Vec, + blocks: Vec, } impl EventInsertableBody { @@ -105,6 +102,27 @@ impl EventInsertableBody { } } + /// Get the CID of the event + pub fn cid(&self) -> Cid { + self.cid + } + + /// Whether this event is deliverable currently + pub fn blocks(&self) -> &Vec { + &self.blocks + } + + /// Whether this event is deliverable currently + pub fn deliverable(&self) -> bool { + self.deliverable + } + + /// Mark the event as deliverable. + /// This will be used when inserting the event to make sure the field is updated accordingly. + pub fn set_deliverable(&mut self, deliverable: bool) { + self.deliverable = deliverable; + } + /// Find a block from the carfile for a given CID if it's included pub fn block_for_cid_opt(&self, cid: &Cid) -> Option<&EventBlockRaw> { self.blocks diff --git a/store/src/sql/query.rs b/store/src/sql/query.rs index fc652dfcf..2969a3244 100644 --- a/store/src/sql/query.rs +++ b/store/src/sql/query.rs @@ -81,20 +81,21 @@ impl EventQuery { /// Find event CIDs that have not yet been delivered to the client /// Useful after a restart, or if the task managing delivery has availability to try old events + /// Requires binding two parameters: + /// $1: limit (i64) + /// $2: rowid (i64) pub fn undelivered_with_values() -> &'static str { r#"SELECT - key.order_key, key.event_cid, eb.codec, eb.root, eb.idx, b.multihash, b.bytes + key.order_key, key.event_cid, eb.codec, eb.root, eb.idx, b.multihash, b.bytes, key.rowid FROM ( SELECT - e.cid as event_cid, e.order_key + e.cid as event_cid, e.order_key, e.rowid FROM ceramic_one_event e WHERE EXISTS (SELECT 1 FROM ceramic_one_event_block where event_cid = e.cid) - AND e.delivered IS NULL + AND e.delivered IS NULL and e.rowid > $2 LIMIT $1 - OFFSET - $2 ) key JOIN ceramic_one_event_block eb ON key.event_cid = eb.event_cid diff --git a/store/src/sql/test.rs b/store/src/sql/test.rs index c8beaeaf1..646f0c9b7 100644 --- a/store/src/sql/test.rs +++ b/store/src/sql/test.rs @@ -123,10 +123,11 @@ async fn range_query() { #[test(tokio::test)] async fn undelivered_with_values() { let pool = SqlitePool::connect_in_memory().await.unwrap(); - let res = CeramicOneEvent::undelivered_with_values(&pool, 0, 10000) + let (res, hw) = CeramicOneEvent::undelivered_with_values(&pool, 0, 10000) .await .unwrap(); assert_eq!(res.len(), 0); + assert_eq!(hw, 0); } #[test(tokio::test)] From 39255715593edb038c64f3b1a75a0470343111fd Mon Sep 17 00:00:00 2001 From: David Estes Date: Wed, 19 Jun 2024 15:55:38 -0600 Subject: [PATCH 02/15] fix: change ordering events function - use map of {cid: deliverable} to reduce the number of entires we have to review at the end - use a VecDeque instead of a Vec to fix a bug where it would just process the same event and exit rather than find the in memory history - need to write tests to exercise this. it would have rejected an api write that had a prev in one batch while recon events would be sorted out. would be hard to encounter but merits tests, particulary because it's hard to encounter. --- service/src/event/order_events.rs | 43 +++++++++++++++++++------------ 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/service/src/event/order_events.rs b/service/src/event/order_events.rs index 88111ea0f..a13719318 100644 --- a/service/src/event/order_events.rs +++ b/service/src/event/order_events.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::collections::{HashMap, VecDeque}; use ceramic_store::{CeramicOneEvent, EventInsertable, SqlitePool}; use cid::Cid; @@ -20,8 +20,11 @@ impl OrderEvents { pool: &SqlitePool, mut candidate_events: Vec<(EventInsertable, EventHeader)>, ) -> Result { - let new_cids: HashSet = - HashSet::from_iter(candidate_events.iter().map(|(e, _)| e.cid())); + let mut new_cids: HashMap = HashMap::from_iter( + candidate_events + .iter() + .map(|(e, _)| (e.cid(), e.body.deliverable())), + ); let mut deliverable = Vec::with_capacity(candidate_events.len()); candidate_events.retain(|(e, h)| { if e.deliverable() { @@ -38,7 +41,7 @@ impl OrderEvents { }); } - let mut prevs_in_memory = Vec::with_capacity(candidate_events.len()); + let mut undelivered_prevs_in_memory = VecDeque::with_capacity(candidate_events.len()); let mut missing_history = Vec::with_capacity(candidate_events.len()); while let Some((mut event, header)) = candidate_events.pop() { @@ -47,19 +50,22 @@ impl OrderEvents { unreachable!("Init events should have been filtered out since they're always deliverable"); } Some(prev) => { - if new_cids.contains(&prev) { - prevs_in_memory.push((event, header)); - continue; + if let Some(in_mem) = new_cids.get(&prev) { + if *in_mem { + event.body.set_deliverable(true); + *new_cids.get_mut(&event.cid()).expect("CID must exist") = true; + deliverable.push((event, header)); + } else { + undelivered_prevs_in_memory.push_back((event, header)); + } } else { let (_exists, prev_deliverable) = CeramicOneEvent::deliverable_by_cid(pool, &prev).await?; if prev_deliverable { event.body.set_deliverable(true); + *new_cids.get_mut(&event.cid()).expect("CID must exist") = true; deliverable.push((event, header)); } else { - // technically, we may have the "rosetta stone" event in memory that could unlock this chain, if we loaded everything and recursed, - // but the immediate prev is not in this set and has not been delivered to the client yet, so they shouldn't have known how to - // construct this event so we'll consider this missing history. This can be used to return an error if the event is required to have history. missing_history.push((event, header)); } } @@ -67,27 +73,30 @@ impl OrderEvents { } } - // We add the events to the deliverable list until nothing changes - while let Some((mut event, header)) = prevs_in_memory.pop() { + // We continually loop through the set adding events to the deliverable list until nothing changes. + // If our prev is in this list, we won't find it until it's added to the deliverable set. This means + // we may loop through multiple times putting things back in the queue, but it should be a short list + // and it will shrink every time we move something to the deliverable set, so it should be acceptable. + // We can't quite get rid of this loop because we may have discovered the prev from the database in the previous pass. + while let Some((mut event, header)) = undelivered_prevs_in_memory.pop_front() { let mut made_changes = false; match header.prev() { None => { unreachable!("Init events should have been filtered out of the in memory set"); } Some(prev) => { - // a hashset would be better loopkup but we're not going to have that many events so hashing - // for a handful of lookups and then convert back to a vec probably isn't worth it. - if deliverable.iter().any(|(e, _)| e.cid() == prev) { + if new_cids.get(&prev).map_or(false, |v| *v) { + *new_cids.get_mut(&event.cid()).expect("CID must exist") = true; event.body.set_deliverable(true); deliverable.push((event, header)); made_changes = true; } else { - prevs_in_memory.push((event, header)); + undelivered_prevs_in_memory.push_back((event, header)); } } } if !made_changes { - missing_history.extend(prevs_in_memory); + missing_history.extend(undelivered_prevs_in_memory); break; } } From 144cbcc2ba45d9ce85bf820368abb92685941d3c Mon Sep 17 00:00:00 2001 From: David Estes Date: Wed, 19 Jun 2024 16:23:46 -0600 Subject: [PATCH 03/15] refactor: rename structs with Meta instead of Header --- service/src/event/order_events.rs | 8 +++---- service/src/event/ordering_task.rs | 26 ++++++++++---------- service/src/event/service.rs | 38 +++++++++++++++--------------- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/service/src/event/order_events.rs b/service/src/event/order_events.rs index a13719318..2a1c837fb 100644 --- a/service/src/event/order_events.rs +++ b/service/src/event/order_events.rs @@ -5,11 +5,11 @@ use cid::Cid; use crate::Result; -use super::service::EventHeader; +use super::service::EventMetadata; pub(crate) struct OrderEvents { - pub(crate) deliverable: Vec<(EventInsertable, EventHeader)>, - pub(crate) missing_history: Vec<(EventInsertable, EventHeader)>, + pub(crate) deliverable: Vec<(EventInsertable, EventMetadata)>, + pub(crate) missing_history: Vec<(EventInsertable, EventMetadata)>, } impl OrderEvents { @@ -18,7 +18,7 @@ impl OrderEvents { /// Will look up the prev from the database if needed to check if it's deliverable (could possibly change this for recon and allow the ordering task to handle it?) pub async fn try_new( pool: &SqlitePool, - mut candidate_events: Vec<(EventInsertable, EventHeader)>, + mut candidate_events: Vec<(EventInsertable, EventMetadata)>, ) -> Result { let mut new_cids: HashMap = HashMap::from_iter( candidate_events diff --git a/service/src/event/ordering_task.rs b/service/src/event/ordering_task.rs index d3ce40f72..05cd0c0f7 100644 --- a/service/src/event/ordering_task.rs +++ b/service/src/event/ordering_task.rs @@ -7,7 +7,7 @@ use tracing::{debug, error, info, trace, warn}; use crate::{CeramicEventService, Error, Result}; -use super::service::{EventHeader, InsertableBodyWithHeader}; +use super::service::{EventMetadata, InsertableBodyWithMeta}; /// How many events to select at once to see if they've become deliverable when we have downtime /// Used at startup and occassionally in case we ever dropped something @@ -27,7 +27,7 @@ pub struct DeliverableTask { pub(crate) _handle: tokio::task::JoinHandle<()>, /// Currently events discovered over recon that are out of order and need to be marked ready (deliverable) /// when their prev chain is discovered and complete (i.e. my prev is deliverable then I am deliverable). - pub(crate) tx_inserted: tokio::sync::mpsc::Sender, + pub(crate) tx_inserted: tokio::sync::mpsc::Sender, } #[derive(Debug)] @@ -36,7 +36,7 @@ pub struct OrderingTask {} impl OrderingTask { pub async fn run(pool: SqlitePool, q_depth: usize, load_delivered: bool) -> DeliverableTask { let (tx_inserted, rx_inserted) = - tokio::sync::mpsc::channel::(q_depth); + tokio::sync::mpsc::channel::(q_depth); let handle = tokio::spawn(async move { Self::run_loop(pool, load_delivered, rx_inserted).await }); @@ -50,7 +50,7 @@ impl OrderingTask { async fn run_loop( pool: SqlitePool, load_undelivered: bool, - mut rx_inserted: tokio::sync::mpsc::Receiver, + mut rx_inserted: tokio::sync::mpsc::Receiver, ) { // before starting, make sure we've updated any events in the database we missed // this could take a long time. possibly we want to put it in another task so we can start processing events immediately @@ -146,7 +146,7 @@ impl StreamEvent { }; let known_prev = match &parsed_body.header { - EventHeader::Init { cid, .. } => { + EventMetadata::Init { cid, .. } => { if !deliverable { warn!(%cid,"Found init event in database that wasn't previously marked as deliverable. Updating now..."); let mut tx = pool.begin_tx().await?; @@ -155,7 +155,7 @@ impl StreamEvent { } StreamEvent::InitEvent(*cid) } - EventHeader::Data { prev, .. } | EventHeader::Time { prev, .. } => { + EventMetadata::Data { prev, .. } | EventMetadata::Time { prev, .. } => { if deliverable { trace!(%cid, "Found deliverable event in database"); StreamEvent::KnownDeliverable(StreamEventMetadata::new(cid, *prev)) @@ -173,11 +173,11 @@ impl StreamEvent { } } -impl From for StreamEvent { - fn from(ev: InsertableBodyWithHeader) -> Self { +impl From for StreamEvent { + fn from(ev: InsertableBodyWithMeta) -> Self { match ev.header { - EventHeader::Init { cid, .. } => StreamEvent::InitEvent(cid), - EventHeader::Data { cid, prev, .. } | EventHeader::Time { cid, prev, .. } => { + EventMetadata::Init { cid, .. } => StreamEvent::InitEvent(cid), + EventMetadata::Data { cid, prev, .. } | EventMetadata::Time { cid, prev, .. } => { let meta = StreamEventMetadata::new(cid, prev); if ev.body.deliverable() { StreamEvent::KnownDeliverable(meta) @@ -388,7 +388,7 @@ impl OrderingState { /// Add a stream to the list of streams to process. /// We ignore delivered events for streams we're not tracking as we can look them up later if we need them. /// We will get lots of init events we can ignore unless we need them, otherwise they'll be stuck in memory for a long time. - fn add_inserted_events(&mut self, events: Vec) { + fn add_inserted_events(&mut self, events: Vec) { for ev in events { let stream_cid = ev.header.stream_cid(); let event = ev.into(); @@ -484,12 +484,12 @@ impl OrderingState { let loaded = CeramicEventService::parse_event_carfile_cid(event_cid, &carfile).await?; let event = match &loaded.header { - EventHeader::Init { cid, .. } => { + EventMetadata::Init { cid, .. } => { warn!(%cid,"Found init event in database that wasn't previously marked as deliverable. Updating now..."); to_store_asap.push(*cid); StreamEvent::InitEvent(*cid) } - EventHeader::Data { cid, prev, .. } | EventHeader::Time { cid, prev, .. } => { + EventMetadata::Data { cid, prev, .. } | EventMetadata::Time { cid, prev, .. } => { StreamEvent::Undelivered(StreamEventMetadata::new(*cid, *prev)) } }; diff --git a/service/src/event/service.rs b/service/src/event/service.rs index 079d5dedf..ef5b8240e 100644 --- a/service/src/event/service.rs +++ b/service/src/event/service.rs @@ -100,7 +100,7 @@ impl CeramicEventService { pub(crate) async fn parse_event_carfile_order_key( event_id: EventId, carfile: &[u8], - ) -> Result<(EventInsertable, EventHeader)> { + ) -> Result<(EventInsertable, EventMetadata)> { let mut insertable = EventInsertable::try_from_carfile(event_id, carfile).await?; let header = Self::parse_event_body(&mut insertable.body).await?; @@ -110,14 +110,14 @@ impl CeramicEventService { pub(crate) async fn parse_event_carfile_cid( cid: ceramic_core::Cid, carfile: &[u8], - ) -> Result { + ) -> Result { let mut body = EventInsertableBody::try_from_carfile(cid, carfile).await?; let header = Self::parse_event_body(&mut body).await?; - Ok(InsertableBodyWithHeader { body, header }) + Ok(InsertableBodyWithMeta { body, header }) } - pub(crate) async fn parse_event_body(body: &mut EventInsertableBody) -> Result { + pub(crate) async fn parse_event_body(body: &mut EventInsertableBody) -> Result { let cid = body.cid(); // purely for convenience writing out the match let ev_block = body.block_for_cid(&cid)?; @@ -132,7 +132,7 @@ impl CeramicEventService { let (deliverable, header) = match event_ipld { unvalidated::RawEvent::Time(t) => ( false, - EventHeader::Time { + EventMetadata::Time { cid, stream_cid: t.id(), prev: t.prev(), @@ -159,16 +159,16 @@ impl CeramicEventService { match payload { unvalidated::Payload::Data(d) => ( false, - EventHeader::Data { + EventMetadata::Data { cid, stream_cid: *d.id(), prev: *d.prev(), }, ), - unvalidated::Payload::Init(_init) => (true, EventHeader::Init { cid }), + unvalidated::Payload::Init(_init) => (true, EventMetadata::Init { cid }), } } - unvalidated::RawEvent::Unsigned(_init) => (true, EventHeader::Init { cid }), + unvalidated::RawEvent::Unsigned(_init) => (true, EventMetadata::Init { cid }), }; body.set_deliverable(deliverable); Ok(header) @@ -267,7 +267,7 @@ impl CeramicEventService { .iter() .find(|(i, _)| i.order_key == ev.order_key) { - let new = InsertableBodyWithHeader { + let new = InsertableBodyWithMeta { body: ev.body.clone(), header: header.to_owned(), }; @@ -302,7 +302,7 @@ impl CeramicEventService { pub(crate) async fn load_by_cid( pool: &SqlitePool, cid: ceramic_core::Cid, - ) -> Result> { + ) -> Result> { let data = if let Some(ev) = CeramicOneEvent::value_by_cid(pool, &cid).await? { ev } else { @@ -311,7 +311,7 @@ impl CeramicEventService { let mut body = EventInsertableBody::try_from_carfile(cid, &data).await?; let header = Self::parse_event_body(&mut body).await?; - Ok(Some(InsertableBodyWithHeader { body, header })) + Ok(Some(InsertableBodyWithMeta { body, header })) } } @@ -339,15 +339,15 @@ impl From for Vec { } #[derive(Debug, Clone)] -pub(crate) struct InsertableBodyWithHeader { +pub(crate) struct InsertableBodyWithMeta { pub(crate) body: EventInsertableBody, - pub(crate) header: EventHeader, + pub(crate) header: EventMetadata, } #[derive(Debug, Clone, PartialEq, Eq)] /// An event header wrapper for use in the store crate. /// TODO: replace this with something from the event crate -pub(crate) enum EventHeader { +pub(crate) enum EventMetadata { Init { cid: ceramic_core::Cid, }, @@ -363,12 +363,12 @@ pub(crate) enum EventHeader { }, } -impl EventHeader { +impl EventMetadata { /// Returns the stream CID of the event pub(crate) fn stream_cid(&self) -> ceramic_core::Cid { match self { - EventHeader::Init { cid, .. } => *cid, - EventHeader::Data { stream_cid, .. } | EventHeader::Time { stream_cid, .. } => { + EventMetadata::Init { cid, .. } => *cid, + EventMetadata::Data { stream_cid, .. } | EventMetadata::Time { stream_cid, .. } => { *stream_cid } } @@ -376,8 +376,8 @@ impl EventHeader { pub(crate) fn prev(&self) -> Option { match self { - EventHeader::Init { .. } => None, - EventHeader::Data { prev, .. } | EventHeader::Time { prev, .. } => Some(*prev), + EventMetadata::Init { .. } => None, + EventMetadata::Data { prev, .. } | EventMetadata::Time { prev, .. } => Some(*prev), } } } From bb7e96b536b925c8b6d936cf085858bbd0929192 Mon Sep 17 00:00:00 2001 From: David Estes Date: Wed, 19 Jun 2024 16:42:51 -0600 Subject: [PATCH 04/15] fix: fixes and changes based on review comments - process all undelivered events at server startup so we can fail the process not just the task - adjust loop to while condition - update some comments/function names - fail loudly (i.e. panic) on anything expected to be unreachable (next up: refactor to make those states unrepresentable) --- service/src/event/order_events.rs | 2 +- service/src/event/ordering_task.rs | 196 +++++++++++------------------ service/src/event/service.rs | 24 +++- 3 files changed, 91 insertions(+), 131 deletions(-) diff --git a/service/src/event/order_events.rs b/service/src/event/order_events.rs index 2a1c837fb..17b17a619 100644 --- a/service/src/event/order_events.rs +++ b/service/src/event/order_events.rs @@ -77,7 +77,7 @@ impl OrderEvents { // If our prev is in this list, we won't find it until it's added to the deliverable set. This means // we may loop through multiple times putting things back in the queue, but it should be a short list // and it will shrink every time we move something to the deliverable set, so it should be acceptable. - // We can't quite get rid of this loop because we may have discovered the prev from the database in the previous pass. + // We can't quite get rid of this loop because we may have discovered our prev's prev from the database in the previous pass. while let Some((mut event, header)) = undelivered_prevs_in_memory.pop_front() { let mut made_changes = false; match header.prev() { diff --git a/service/src/event/ordering_task.rs b/service/src/event/ordering_task.rs index 05cd0c0f7..ed3b7f3a4 100644 --- a/service/src/event/ordering_task.rs +++ b/service/src/event/ordering_task.rs @@ -9,15 +9,6 @@ use crate::{CeramicEventService, Error, Result}; use super::service::{EventMetadata, InsertableBodyWithMeta}; -/// How many events to select at once to see if they've become deliverable when we have downtime -/// Used at startup and occassionally in case we ever dropped something -/// We keep the number small for now as we may need to traverse many prevs for each one of these and load them into memory. -const DELIVERABLE_EVENTS_BATCH_SIZE: u32 = 1000; - -/// How many batches of undelivered events are we willing to process on start up? -/// To avoid an infinite loop. It's going to take a long time to process `DELIVERABLE_EVENTS_BATCH_SIZE * MAX_ITERATIONS` events -const MAX_ITERATIONS: usize = 100_000_000; - type StreamCid = Cid; type EventCid = Cid; type PrevCid = Cid; @@ -34,12 +25,22 @@ pub struct DeliverableTask { pub struct OrderingTask {} impl OrderingTask { - pub async fn run(pool: SqlitePool, q_depth: usize, load_delivered: bool) -> DeliverableTask { + /// Discover all undelivered events in the database and mark them deliverable if possible. + /// Returns the number of events marked deliverable. + pub async fn process_all_undelivered_events( + pool: &SqlitePool, + max_iterations: usize, + batch_size: u32, + ) -> Result { + OrderingState::process_all_undelivered_events(pool, max_iterations, batch_size).await + } + + /// Spawn a task to run the ordering task background process in a loop + pub async fn run(pool: SqlitePool, q_depth: usize) -> DeliverableTask { let (tx_inserted, rx_inserted) = tokio::sync::mpsc::channel::(q_depth); - let handle = - tokio::spawn(async move { Self::run_loop(pool, load_delivered, rx_inserted).await }); + let handle = tokio::spawn(async move { Self::run_loop(pool, rx_inserted).await }); DeliverableTask { _handle: handle, @@ -49,27 +50,11 @@ impl OrderingTask { async fn run_loop( pool: SqlitePool, - load_undelivered: bool, mut rx_inserted: tokio::sync::mpsc::Receiver, ) { - // before starting, make sure we've updated any events in the database we missed - // this could take a long time. possibly we want to put it in another task so we can start processing events immediately let mut state = OrderingState::new(); - if load_undelivered - && state - .process_all_undelivered_events( - &pool, - MAX_ITERATIONS, - DELIVERABLE_EVENTS_BATCH_SIZE, - ) - .await - .map_err(Self::log_error) - .is_err() - { - return; - } - loop { + while !rx_inserted.is_closed() { let mut recon_events = Vec::with_capacity(100); // consider trying to recv in a loop until X or 10ms whatever comes first and then process // the more events we get in memory, the fewer queries we need to run. @@ -85,22 +70,10 @@ impl OrderingTask { { return; } - } else if rx_inserted.is_closed() { - debug!( - "Server dropped the delivered events channel. Attempting to processing streams in memory once more before exiting." - ); - - if state - .process_streams(&pool) - .await - .map_err(Self::log_error) - .is_err() - { - return; - } - break; } } + + let _ = state.process_streams(&pool).await.map_err(Self::log_error); } /// Log an error and return a result that can be used to stop the task if it was fatal @@ -134,7 +107,7 @@ enum StreamEvent { impl StreamEvent { /// Builds a stream event from the database if it exists. async fn load_by_cid(pool: &SqlitePool, cid: EventCid) -> Result> { - // TODO: one query + // TODO: Condense the multiple DB queries happening here into a single query let (exists, deliverable) = CeramicOneEvent::deliverable_by_cid(pool, &cid).await?; if exists { let parsed_body = @@ -147,12 +120,11 @@ impl StreamEvent { let known_prev = match &parsed_body.header { EventMetadata::Init { cid, .. } => { - if !deliverable { - warn!(%cid,"Found init event in database that wasn't previously marked as deliverable. Updating now..."); - let mut tx = pool.begin_tx().await?; - CeramicOneEvent::mark_ready_to_deliver(&mut tx, cid).await?; - tx.commit().await?; - } + assert!( + deliverable, + "Init event must always be deliverable. Found undelivered CID: {}", + cid + ); StreamEvent::InitEvent(*cid) } EventMetadata::Data { prev, .. } | EventMetadata::Time { prev, .. } => { @@ -209,7 +181,7 @@ pub(crate) struct StreamEvents { /// Map of `event.cid` to `metadata` for quick lookup of the event metadata. cid_map: HashMap, /// whether we should process this stream because new events have been added - skip_processing: bool, + should_process: bool, /// The newly discovered events that are deliverable and should be processed. new_deliverable: VecDeque, } @@ -223,9 +195,9 @@ impl StreamEvents { // we'll be processed if something in memory depends on this event fn update_should_process_for_new_delivered(&mut self, new_cid: &EventCid) { - // don't reset a true flag to false - if self.skip_processing { - self.skip_processing = !self.prev_map.contains_key(new_cid); + // don't reset the flag if we're already supposed to be processed + if !self.should_process { + self.should_process = self.prev_map.contains_key(new_cid); } } @@ -243,9 +215,9 @@ impl StreamEvents { } StreamEvent::Undelivered(meta) => { self.prev_map.insert(meta.prev, meta.cid); - if self.skip_processing { + if !self.should_process { // we depend on something in memory - self.skip_processing = !self.prev_map.contains_key(&meta.prev) + self.should_process = self.prev_map.contains_key(&meta.prev) } meta.cid } @@ -264,20 +236,17 @@ impl StreamEvents { } /// Called when we've persisted the deliverable events to the database and can clean up our state. - /// Returns true if we should be retained for future processing (i.e we have more we need to discover) - /// and false if we can be dropped from memory. - fn completed_processing(&mut self) -> bool { - self.skip_processing = true; + /// Returns `true` if we're finished processing and can be dropped from memory. + /// Returns `false` if we have more work to do and should be retained for future processing (i.e we have more we need to discover) + fn processing_completed(&mut self) -> bool { + self.should_process = false; for cid in self.new_deliverable.iter() { if let Some(ev) = self.cid_map.get_mut(cid) { match ev { - StreamEvent::InitEvent(_) => {} - StreamEvent::KnownDeliverable(_) => { - warn!( - ?ev, - "Found event in deliverable queue that was already marked as deliverable." - ) + StreamEvent::InitEvent(cid) + | StreamEvent::KnownDeliverable(StreamEventMetadata { cid, .. }) => { + unreachable!("should not have found a delivered event in our deliverable queue CID={}", cid); } StreamEvent::Undelivered(meta) => { // we're delivered now @@ -287,7 +256,8 @@ impl StreamEvents { } } self.new_deliverable.clear(); - self.cid_map + !self + .cid_map .iter() .any(|(_, ev)| matches!(ev, StreamEvent::Undelivered(_))) } @@ -327,24 +297,22 @@ impl StreamEvents { // nothing to do until it arrives on the channel } } - - continue; - } - - let prev_event = StreamEvent::load_by_cid(pool, prev).await?; - if let Some(known_prev) = prev_event { - match &known_prev { - StreamEvent::InitEvent(_) | StreamEvent::KnownDeliverable(_) => { - deliverable_queue.push_back(cid); - } - StreamEvent::Undelivered(undelivered_ev) => { - // we'll try to follow this back to something deliverable - undelivered.push_back((undelivered_ev.cid, undelivered_ev.prev)); + } else { + let prev_event = StreamEvent::load_by_cid(pool, prev).await?; + if let Some(known_prev) = prev_event { + match &known_prev { + StreamEvent::InitEvent(_) | StreamEvent::KnownDeliverable(_) => { + deliverable_queue.push_back(cid); + } + StreamEvent::Undelivered(undelivered_ev) => { + // add this to the queue we're processing and try to follow it back to something deliverable + undelivered.push_back((undelivered_ev.cid, undelivered_ev.prev)); + } } + self.add_event(known_prev); + } else { + trace!("Found event that depends on another event we haven't discovered yet"); } - self.add_event(known_prev); - } else { - trace!("Found event that depends on another event we haven't discovered yet"); } } @@ -354,7 +322,8 @@ impl StreamEvents { if let Some(ev) = self.cid_map.get(&now_ready_ev) { match ev { StreamEvent::InitEvent(_) | StreamEvent::KnownDeliverable(_) => { - warn!(?ev, "should not have found a deliverable event when we expected only undelivered events!"); + // this would result in a no-op as we to re-update it, but we'll fail loudly so tests crash if it ever happens and we can avoid it ever happening + unreachable!("should not have found a deliverable event when we expected only undelivered events! CID={}", now_ready_ev); } StreamEvent::Undelivered(_) => { newly_ready.push_back(now_ready_ev); @@ -409,7 +378,7 @@ impl OrderingState { /// but for now it assumes it can process all the streams and events in one go. It should be idempotent, so if it fails, it can be retried. async fn process_streams(&mut self, pool: &SqlitePool) -> Result<()> { for (_stream_cid, stream_events) in self.pending_by_stream.iter_mut() { - if stream_events.skip_processing { + if !stream_events.should_process { continue; } stream_events.order_events(pool).await?; @@ -420,7 +389,7 @@ impl OrderingState { self.persist_ready_events(pool).await?; // keep things that still have missing history but don't process them again until we get something new self.pending_by_stream - .retain(|_, stream_events| stream_events.completed_processing()); + .retain(|_, stream_events| !stream_events.processing_completed()); debug!(remaining_streams=%self.pending_by_stream.len(), "Finished processing streams"); trace!(stream_state=?self, "Finished processing streams"); @@ -431,11 +400,11 @@ impl OrderingState { /// Process all undelivered events in the database. This is a blocking operation that could take a long time. /// It is intended to be run at startup but could be used on an interval or after some errors to recover. pub(crate) async fn process_all_undelivered_events( - &mut self, pool: &SqlitePool, max_iterations: usize, batch_size: u32, ) -> Result { + let mut state = Self::new(); let mut iter_cnt = 0; let mut event_cnt = 0; let mut highwater = 0; @@ -445,20 +414,19 @@ impl OrderingState { CeramicOneEvent::undelivered_with_values(pool, batch_size.into(), highwater) .await?; highwater = new_hw; - if undelivered.is_empty() { - break; - } else { + let found_something = !undelivered.is_empty(); + let found_everything = undelivered.len() < batch_size as usize; + if found_something { // We can start processing and we'll follow the stream history if we have it. In that case, we either arrive // at the beginning and mark them all delivered, or we find a gap and stop processing and leave them in memory. // In this case, we won't discover them until we start running recon with a peer, so maybe we should drop them // or otherwise mark them ignored somehow. - let found_all = undelivered.len() < batch_size as usize; - event_cnt += self + event_cnt += state .process_undelivered_events_batch(pool, undelivered) .await?; - if found_all { - break; - } + } + if !found_something || found_everything { + break; } } if iter_cnt > max_iterations { @@ -474,7 +442,6 @@ impl OrderingState { event_data: Vec<(EventId, Vec)>, ) -> Result { trace!(cnt=%event_data.len(), "Processing undelivered events batch"); - let mut to_store_asap = Vec::new(); let mut event_cnt = 0; for (event_id, carfile) in event_data { let event_cid = event_id.cid().ok_or_else(|| { @@ -485,9 +452,7 @@ impl OrderingState { let event = match &loaded.header { EventMetadata::Init { cid, .. } => { - warn!(%cid,"Found init event in database that wasn't previously marked as deliverable. Updating now..."); - to_store_asap.push(*cid); - StreamEvent::InitEvent(*cid) + unreachable!("Init events should not be undelivered. CID={}", cid); } EventMetadata::Data { cid, prev, .. } | EventMetadata::Time { cid, prev, .. } => { StreamEvent::Undelivered(StreamEventMetadata::new(*cid, *prev)) @@ -497,15 +462,6 @@ impl OrderingState { event_cnt += 1; self.add_stream_event(loaded.header.stream_cid(), event); } - - if !to_store_asap.is_empty() { - info!("storing init events that were somehow missed previously"); - let mut tx = pool.begin_tx().await?; - for cid in to_store_asap { - CeramicOneEvent::mark_ready_to_deliver(&mut tx, &cid).await?; - } - tx.commit().await?; - } self.process_streams(pool).await?; Ok(event_cnt) @@ -554,8 +510,7 @@ mod test { #[test(tokio::test)] async fn test_undelivered_batch_empty() { let pool = SqlitePool::connect_in_memory().await.unwrap(); - let processed = OrderingState::new() - .process_all_undelivered_events(&pool, 10, 100) + let processed = OrderingState::process_all_undelivered_events(&pool, 10, 100) .await .unwrap(); assert_eq!(0, processed); @@ -588,8 +543,7 @@ mod test { .unwrap(); assert_eq!(1, events.len()); - let processed = OrderingState::new() - .process_all_undelivered_events(&pool, 1, 5) + let processed = OrderingState::process_all_undelivered_events(&pool, 1, 5) .await .unwrap(); assert_eq!(5, processed); @@ -598,8 +552,7 @@ mod test { .unwrap(); assert_eq!(6, events.len()); // the last 5 are processed and we have 10 delivered - let processed = OrderingState::new() - .process_all_undelivered_events(&pool, 1, 5) + let processed = OrderingState::process_all_undelivered_events(&pool, 1, 5) .await .unwrap(); assert_eq!(4, processed); @@ -609,8 +562,7 @@ mod test { assert_eq!(10, events.len()); // nothing left - let processed = OrderingState::new() - .process_all_undelivered_events(&pool, 1, 100) + let processed = OrderingState::process_all_undelivered_events(&pool, 1, 100) .await .unwrap(); @@ -631,9 +583,7 @@ mod test { .await .unwrap(); assert_eq!(5, event.len()); - let mut state = OrderingState::new(); - state - .process_all_undelivered_events(&pool, 4, 10) + let _res = OrderingState::process_all_undelivered_events(&pool, 4, 10) .await .unwrap(); @@ -657,9 +607,7 @@ mod test { .await .unwrap(); assert_eq!(5, event.len()); - let mut state = OrderingState::new(); - state - .process_all_undelivered_events(&pool, 100_000_000, 5) + let _res = OrderingState::process_all_undelivered_events(&pool, 100_000_000, 5) .await .unwrap(); @@ -683,9 +631,7 @@ mod test { .await .unwrap(); assert_eq!(5, event.len()); - let mut state = OrderingState::new(); - state - .process_all_undelivered_events(&pool, 1, 100) + let _res = OrderingState::process_all_undelivered_events(&pool, 1, 100) .await .unwrap(); diff --git a/service/src/event/service.rs b/service/src/event/service.rs index ef5b8240e..f86a66b85 100644 --- a/service/src/event/service.rs +++ b/service/src/event/service.rs @@ -15,8 +15,17 @@ use super::{ use crate::{Error, Result}; +/// How many events to select at once to see if they've become deliverable when we have downtime +/// Used at startup and occassionally in case we ever dropped something +/// We keep the number small for now as we may need to traverse many prevs for each one of these and load them into memory. +const DELIVERABLE_EVENTS_BATCH_SIZE: u32 = 1000; + +/// How many batches of undelivered events are we willing to process on start up? +/// To avoid an infinite loop. It's going to take a long time to process `DELIVERABLE_EVENTS_BATCH_SIZE * MAX_ITERATIONS` events +const MAX_ITERATIONS: usize = 100_000_000; + /// The max number of events we can have pending for delivery in the channel before we start dropping them. -pub(crate) const PENDING_EVENTS_CHANNEL_DEPTH: usize = 10_000; +const PENDING_EVENTS_CHANNEL_DEPTH: usize = 10_000; #[derive(Debug)] /// A database store that verifies the bytes it stores are valid Ceramic events. @@ -41,8 +50,14 @@ impl CeramicEventService { pub async fn new(pool: SqlitePool) -> Result { CeramicOneEvent::init_delivered_order(&pool).await?; - let delivery_task = - OrderingTask::run(pool.clone(), PENDING_EVENTS_CHANNEL_DEPTH, true).await; + let _updated = OrderingTask::process_all_undelivered_events( + &pool, + MAX_ITERATIONS, + DELIVERABLE_EVENTS_BATCH_SIZE, + ) + .await?; + + let delivery_task = OrderingTask::run(pool.clone(), PENDING_EVENTS_CHANNEL_DEPTH).await; Ok(Self { pool, @@ -55,8 +70,7 @@ impl CeramicEventService { pub(crate) async fn new_without_undelivered(pool: SqlitePool) -> Result { CeramicOneEvent::init_delivered_order(&pool).await?; - let delivery_task = - OrderingTask::run(pool.clone(), PENDING_EVENTS_CHANNEL_DEPTH, false).await; + let delivery_task = OrderingTask::run(pool.clone(), PENDING_EVENTS_CHANNEL_DEPTH).await; Ok(Self { pool, From 194942de3cd59c42dd17741a77f53cb561309cda Mon Sep 17 00:00:00 2001 From: David Estes Date: Wed, 19 Jun 2024 18:21:29 -0600 Subject: [PATCH 05/15] chore: add test logging --- service/src/tests/ordering.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/service/src/tests/ordering.rs b/service/src/tests/ordering.rs index 1cad39fc9..4ac58c85a 100644 --- a/service/src/tests/ordering.rs +++ b/service/src/tests/ordering.rs @@ -184,7 +184,7 @@ async fn missing_prev_pending_recon_should_deliver_without_stream_update() { assert_eq!(expected, delivered); } -#[test(tokio::test)] +#[test(test(tokio::test))] async fn multiple_streams_missing_prev_recon_should_deliver_without_stream_update() { let store = setup_service().await; let stream_1 = get_events().await; @@ -279,7 +279,7 @@ async fn validate_all_delivered(store: &CeramicEventService, expected_delivered: } } -#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +#[test(tokio::test(flavor = "multi_thread", worker_threads = 2))] async fn recon_lots_of_streams() { // adds `per_stream` events to `num_streams` streams, mixes up the event order for each stream, inserts half // the events for each stream before mixing up the stream order and inserting the rest From d5ef53a9f41127707bca3bcd6ec20390d44e527c Mon Sep 17 00:00:00 2001 From: David Estes Date: Thu, 20 Jun 2024 00:16:49 -0600 Subject: [PATCH 06/15] refactor: modify IOD task to hopefully be more readable - modified the approach to keep everything in the maps until we're done and change things in place when it's identified as deliverable. this has the advantage of better cancel safety and more consistency about what is in the maps. - also renamed some things from header->metadata --- service/src/event/ordering_task.rs | 229 +++++++++++++++++------------ service/src/event/service.rs | 22 ++- 2 files changed, 146 insertions(+), 105 deletions(-) diff --git a/service/src/event/ordering_task.rs b/service/src/event/ordering_task.rs index ed3b7f3a4..34df345e1 100644 --- a/service/src/event/ordering_task.rs +++ b/service/src/event/ordering_task.rs @@ -105,6 +105,13 @@ enum StreamEvent { } impl StreamEvent { + fn is_deliverable(&self) -> bool { + match self { + StreamEvent::InitEvent(_) | StreamEvent::KnownDeliverable(_) => true, + StreamEvent::Undelivered(_) => false, + } + } + /// Builds a stream event from the database if it exists. async fn load_by_cid(pool: &SqlitePool, cid: EventCid) -> Result> { // TODO: Condense the multiple DB queries happening here into a single query @@ -118,7 +125,7 @@ impl StreamEvent { return Ok(None); }; - let known_prev = match &parsed_body.header { + let known_prev = match &parsed_body.metadata { EventMetadata::Init { cid, .. } => { assert!( deliverable, @@ -147,7 +154,7 @@ impl StreamEvent { impl From for StreamEvent { fn from(ev: InsertableBodyWithMeta) -> Self { - match ev.header { + match ev.metadata { EventMetadata::Init { cid, .. } => StreamEvent::InitEvent(cid), EventMetadata::Data { cid, prev, .. } | EventMetadata::Time { cid, prev, .. } => { let meta = StreamEventMetadata::new(cid, prev); @@ -173,10 +180,11 @@ impl StreamEventMetadata { } } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone)] /// ~540 bytes per event in this struct pub(crate) struct StreamEvents { - /// Map of `event.prev` to `event.cid` to find the previous event easily. + /// Map of `event.prev` to `event.cid` to determine which event depended on a newly + /// discovered deliverable event. prev_map: HashMap, /// Map of `event.cid` to `metadata` for quick lookup of the event metadata. cid_map: HashMap, @@ -186,6 +194,19 @@ pub(crate) struct StreamEvents { new_deliverable: VecDeque, } +impl Default for StreamEvents { + fn default() -> Self { + Self { + prev_map: HashMap::default(), + cid_map: HashMap::default(), + // default to true so we try to follow the event history on the the first batch loading + // will also avoid any possible race conditions if we somehow get things out of order on the channel + should_process: true, + new_deliverable: VecDeque::new(), + } + } +} + impl StreamEvents { fn new(event: StreamEvent) -> Self { let mut new = Self::default(); @@ -193,14 +214,6 @@ impl StreamEvents { new } - // we'll be processed if something in memory depends on this event - fn update_should_process_for_new_delivered(&mut self, new_cid: &EventCid) { - // don't reset the flag if we're already supposed to be processed - if !self.should_process { - self.should_process = self.prev_map.contains_key(new_cid); - } - } - /// returns true if this is a new event. fn add_event(&mut self, event: StreamEvent) -> bool { let cid = match &event { @@ -217,7 +230,7 @@ impl StreamEvents { self.prev_map.insert(meta.prev, meta.cid); if !self.should_process { // we depend on something in memory - self.should_process = self.prev_map.contains_key(&meta.prev) + self.should_process = self.prev_map.contains_key(&meta.prev); } meta.cid } @@ -226,35 +239,19 @@ impl StreamEvents { self.cid_map.insert(cid, event).is_none() } - fn remove_by_prev_cid(&mut self, prev: &Cid) -> Option { - if let Some(cid) = self.prev_map.remove(prev) { - self.cid_map.remove(&cid); - Some(cid) - } else { - None + /// we'll be processed if something in memory depends on this event + fn update_should_process_for_new_delivered(&mut self, new_cid: &EventCid) { + // don't reset the flag if we're already supposed to be processed + if !self.should_process { + self.should_process = self.prev_map.contains_key(new_cid); } } /// Called when we've persisted the deliverable events to the database and can clean up our state. /// Returns `true` if we're finished processing and can be dropped from memory. - /// Returns `false` if we have more work to do and should be retained for future processing (i.e we have more we need to discover) + /// Returns `false` if we have more work to do and should be retained for future processing fn processing_completed(&mut self) -> bool { self.should_process = false; - - for cid in self.new_deliverable.iter() { - if let Some(ev) = self.cid_map.get_mut(cid) { - match ev { - StreamEvent::InitEvent(cid) - | StreamEvent::KnownDeliverable(StreamEventMetadata { cid, .. }) => { - unreachable!("should not have found a delivered event in our deliverable queue CID={}", cid); - } - StreamEvent::Undelivered(meta) => { - // we're delivered now - *ev = StreamEvent::KnownDeliverable(meta.clone()); - } - } - } - } self.new_deliverable.clear(); !self .cid_map @@ -262,79 +259,118 @@ impl StreamEvents { .any(|(_, ev)| matches!(ev, StreamEvent::Undelivered(_))) } + /// When we discover the prev event is deliverable, we can mark ourselves as deliverable. + /// This adds us to the queue to insert in the database and updates our state to deliverable. + fn discovered_deliverable_prev(&mut self, cid: EventCid) { + self.new_deliverable.push_back(cid); + let event = self + .cid_map + .get_mut(&cid) + .expect("Must have event in cid_map"); + match event { + StreamEvent::InitEvent(cid) + | StreamEvent::KnownDeliverable(StreamEventMetadata { cid, .. }) => { + unreachable!( + "should not have found a deliverable event in our undelivered queue: {}", + cid, + ) + } + StreamEvent::Undelivered(meta) => { + // we're delivered now + *self.cid_map.get_mut(&cid).unwrap() = StreamEvent::KnownDeliverable(meta.clone()); + } + } + } + async fn order_events(&mut self, pool: &SqlitePool) -> Result<()> { // We collect everything we can into memory and then order things. - // If our prev is the init event or already been delivered, we can mark ourselves as deliverable. - // If our prev wasn't deliverable yet, we track it and repeat (i.e. follow its prev if we don't have it) - - let mut deliverable_queue = VecDeque::new(); - let mut undelivered = + // If our prev is deliverable then we can mark ourselves as deliverable. If our prev wasn't deliverable yet, + // we track it and repeat (i.e. add it to our state and the set we're iterating to attempt to load its prev). + // We mutate out state as we go adding things to the queue and changing their known deliverability so that + // if we get canceled while querying the database, we can pick up where we left off. Our queue will still + // have all the events in the order they need to be inserted, and the cid_map state will reflect their deliverability. + let mut undelivered_q = VecDeque::from_iter(self.cid_map.iter().filter_map(|(cid, ev)| match ev { StreamEvent::Undelivered(meta) => { - assert_eq!(meta.cid, *cid); - Some((meta.cid, meta.prev)) + debug_assert_eq!(meta.cid, *cid); + Some(meta.clone()) } _ => None, })); - while let Some((cid, prev)) = undelivered.pop_front() { - if let Some(prev_event) = self.cid_map.get(&prev) { - match prev_event { - StreamEvent::InitEvent(_) | StreamEvent::KnownDeliverable(_) => { - trace!( - %prev, - %cid, - "Found event whose prev is already in memory and IS deliverable!" - ); - deliverable_queue.push_back(cid) + debug!(count=%undelivered_q.len(), "undelivered events to process"); + + while let Some(StreamEventMetadata { + cid: undelivered_cid, + prev: desired_prev, + }) = undelivered_q.pop_front() + { + if let Some(known_prev) = self.cid_map.get(&desired_prev) { + if known_prev.is_deliverable() { + trace!( + %undelivered_cid, + %desired_prev, + "Found event whose prev is already in memory and IS deliverable!" + ); + self.discovered_deliverable_prev(undelivered_cid); + } else { + trace!( + %undelivered_cid, + %desired_prev, + "Found event whose prev is already in memory but NOT deliverable." + ); + // nothing to do until it arrives on the channel + } + } else if let Some(discovered_prev) = + StreamEvent::load_by_cid(pool, desired_prev).await? + { + match &discovered_prev { + // we found our prev in the database and it's deliverable, so we're deliverable now + StreamEvent::InitEvent(cid) + | StreamEvent::KnownDeliverable(StreamEventMetadata { cid, .. }) => { + trace!(prev=%cid, cid=%undelivered_cid, "Found deliverable prev event in database"); + self.discovered_deliverable_prev(undelivered_cid); } - StreamEvent::Undelivered(_) => { - trace!( - %prev, - %cid, - "Found event whose prev is already in memory but NOT deliverable." - ); - // nothing to do until it arrives on the channel + // it's not deliverable yet so we add track it and append it to the queue we're iterating to search for its prev. + // if we find something deliverable, it will end up marking this new event and then the original event deliverable + // in the final loop at the end. + StreamEvent::Undelivered(prev_meta) => { + undelivered_q.push_back(StreamEventMetadata { + cid: prev_meta.cid, + prev: prev_meta.prev, + }); + self.add_event(discovered_prev); } } } else { - let prev_event = StreamEvent::load_by_cid(pool, prev).await?; - if let Some(known_prev) = prev_event { - match &known_prev { - StreamEvent::InitEvent(_) | StreamEvent::KnownDeliverable(_) => { - deliverable_queue.push_back(cid); - } - StreamEvent::Undelivered(undelivered_ev) => { - // add this to the queue we're processing and try to follow it back to something deliverable - undelivered.push_back((undelivered_ev.cid, undelivered_ev.prev)); - } - } - self.add_event(known_prev); - } else { - trace!("Found event that depends on another event we haven't discovered yet"); - } + trace!("Found event that depends on another event we haven't discovered yet"); } } - let mut newly_ready = deliverable_queue.clone(); + let mut newly_ready = self.new_deliverable.clone(); while let Some(cid) = newly_ready.pop_front() { - if let Some(now_ready_ev) = self.remove_by_prev_cid(&cid) { - if let Some(ev) = self.cid_map.get(&now_ready_ev) { - match ev { - StreamEvent::InitEvent(_) | StreamEvent::KnownDeliverable(_) => { - // this would result in a no-op as we to re-update it, but we'll fail loudly so tests crash if it ever happens and we can avoid it ever happening - unreachable!("should not have found a deliverable event when we expected only undelivered events! CID={}", now_ready_ev); - } - StreamEvent::Undelivered(_) => { - newly_ready.push_back(now_ready_ev); - } + if let Some(now_ready) = self.prev_map.get(&cid) { + let ev = self + .cid_map + .get(now_ready) + .expect("must have value in cid_map if it's in prev_map") + .to_owned(); + match ev { + StreamEvent::InitEvent(cid) => { + unreachable!("should not have found an undelivered init event and added it to our delivery queue {}", cid); + } + StreamEvent::KnownDeliverable(_) => { + // This is fine as we could have already discovered and added ourself to the queue above. + // We get marked as KnownDeliverable in that case and we don't have anything more to do. + } + StreamEvent::Undelivered(meta) => { + // This might have unlocked something else, so we add it to the back of the queue to check. + newly_ready.push_back(meta.cid); + self.discovered_deliverable_prev(meta.cid); } } - deliverable_queue.push_back(now_ready_ev); - newly_ready.push_back(now_ready_ev); } } - self.new_deliverable = deliverable_queue; debug!(count=%self.new_deliverable.len(), "deliverable events discovered"); Ok(()) } @@ -359,7 +395,7 @@ impl OrderingState { /// We will get lots of init events we can ignore unless we need them, otherwise they'll be stuck in memory for a long time. fn add_inserted_events(&mut self, events: Vec) { for ev in events { - let stream_cid = ev.header.stream_cid(); + let stream_cid = ev.metadata.stream_cid(); let event = ev.into(); self.add_stream_event(stream_cid, event); } @@ -378,12 +414,11 @@ impl OrderingState { /// but for now it assumes it can process all the streams and events in one go. It should be idempotent, so if it fails, it can be retried. async fn process_streams(&mut self, pool: &SqlitePool) -> Result<()> { for (_stream_cid, stream_events) in self.pending_by_stream.iter_mut() { - if !stream_events.should_process { - continue; + if stream_events.should_process { + stream_events.order_events(pool).await?; + self.deliverable + .extend(stream_events.new_deliverable.iter()); } - stream_events.order_events(pool).await?; - self.deliverable - .extend(stream_events.new_deliverable.iter()); } self.persist_ready_events(pool).await?; @@ -420,7 +455,7 @@ impl OrderingState { // We can start processing and we'll follow the stream history if we have it. In that case, we either arrive // at the beginning and mark them all delivered, or we find a gap and stop processing and leave them in memory. // In this case, we won't discover them until we start running recon with a peer, so maybe we should drop them - // or otherwise mark them ignored somehow. + // or otherwise mark them ignored somehow. When this function ends, we do drop everything so for now it's probably okay. event_cnt += state .process_undelivered_events_batch(pool, undelivered) .await?; @@ -450,7 +485,7 @@ impl OrderingState { let loaded = CeramicEventService::parse_event_carfile_cid(event_cid, &carfile).await?; - let event = match &loaded.header { + let event = match &loaded.metadata { EventMetadata::Init { cid, .. } => { unreachable!("Init events should not be undelivered. CID={}", cid); } @@ -460,7 +495,7 @@ impl OrderingState { }; event_cnt += 1; - self.add_stream_event(loaded.header.stream_cid(), event); + self.add_stream_event(loaded.metadata.stream_cid(), event); } self.process_streams(pool).await?; diff --git a/service/src/event/service.rs b/service/src/event/service.rs index f86a66b85..9917d8756 100644 --- a/service/src/event/service.rs +++ b/service/src/event/service.rs @@ -128,7 +128,10 @@ impl CeramicEventService { let mut body = EventInsertableBody::try_from_carfile(cid, carfile).await?; let header = Self::parse_event_body(&mut body).await?; - Ok(InsertableBodyWithMeta { body, header }) + Ok(InsertableBodyWithMeta { + body, + metadata: header, + }) } pub(crate) async fn parse_event_body(body: &mut EventInsertableBody) -> Result { @@ -249,7 +252,7 @@ impl CeramicEventService { .map(|(e, _)| e.order_key.clone()) .collect(); - let to_insert_with_header = if history_required { + let to_insert_with_metadata = if history_required { ordered.deliverable } else { ordered @@ -259,7 +262,7 @@ impl CeramicEventService { .collect() }; - let to_insert = to_insert_with_header + let to_insert = to_insert_with_metadata .iter() .map(|(e, _)| e.clone()) .collect::>(); @@ -277,15 +280,15 @@ impl CeramicEventService { .collect::>(); for ev in to_send { - if let Some((ev, header)) = to_insert_with_header + if let Some((ev, metadata)) = to_insert_with_metadata .iter() .find(|(i, _)| i.order_key == ev.order_key) { let new = InsertableBodyWithMeta { body: ev.body.clone(), - header: header.to_owned(), + metadata: metadata.to_owned(), }; - trace!(event=?ev, "sending delivered to ordering task"); + trace!(event_cid=%ev.cid(), deliverable=%ev.deliverable(), "sending delivered to ordering task"); if let Err(e) = self.delivery_task.tx_inserted.try_send(new) { match e { tokio::sync::mpsc::error::TrySendError::Full(e) => { @@ -325,7 +328,10 @@ impl CeramicEventService { let mut body = EventInsertableBody::try_from_carfile(cid, &data).await?; let header = Self::parse_event_body(&mut body).await?; - Ok(Some(InsertableBodyWithMeta { body, header })) + Ok(Some(InsertableBodyWithMeta { + body, + metadata: header, + })) } } @@ -355,7 +361,7 @@ impl From for Vec { #[derive(Debug, Clone)] pub(crate) struct InsertableBodyWithMeta { pub(crate) body: EventInsertableBody, - pub(crate) header: EventMetadata, + pub(crate) metadata: EventMetadata, } #[derive(Debug, Clone, PartialEq, Eq)] From 44fc70e8e94dc9ba41ded08e5304f3b01ace6793 Mon Sep 17 00:00:00 2001 From: David Estes Date: Thu, 20 Jun 2024 00:58:11 -0600 Subject: [PATCH 07/15] fix: clean up docs/comments --- service/src/event/ordering_task.rs | 32 +++++++++++++++++++----------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/service/src/event/ordering_task.rs b/service/src/event/ordering_task.rs index 34df345e1..ded1cc898 100644 --- a/service/src/event/ordering_task.rs +++ b/service/src/event/ordering_task.rs @@ -16,7 +16,7 @@ type PrevCid = Cid; #[derive(Debug)] pub struct DeliverableTask { pub(crate) _handle: tokio::task::JoinHandle<()>, - /// Currently events discovered over recon that are out of order and need to be marked ready (deliverable) + /// Currently only receives events discovered over recon that are out of order and need to be marked ready (deliverable) /// when their prev chain is discovered and complete (i.e. my prev is deliverable then I am deliverable). pub(crate) tx_inserted: tokio::sync::mpsc::Sender, } @@ -181,7 +181,7 @@ impl StreamEventMetadata { } #[derive(Debug, Clone)] -/// ~540 bytes per event in this struct +/// ~500 bytes per event in this struct pub(crate) struct StreamEvents { /// Map of `event.prev` to `event.cid` to determine which event depended on a newly /// discovered deliverable event. @@ -276,7 +276,7 @@ impl StreamEvents { ) } StreamEvent::Undelivered(meta) => { - // we're delivered now + // we're deliverable now *self.cid_map.get_mut(&cid).unwrap() = StreamEvent::KnownDeliverable(meta.clone()); } } @@ -326,14 +326,11 @@ impl StreamEvents { { match &discovered_prev { // we found our prev in the database and it's deliverable, so we're deliverable now - StreamEvent::InitEvent(cid) - | StreamEvent::KnownDeliverable(StreamEventMetadata { cid, .. }) => { - trace!(prev=%cid, cid=%undelivered_cid, "Found deliverable prev event in database"); + StreamEvent::InitEvent(_) | StreamEvent::KnownDeliverable(_) => { self.discovered_deliverable_prev(undelivered_cid); } // it's not deliverable yet so we add track it and append it to the queue we're iterating to search for its prev. - // if we find something deliverable, it will end up marking this new event and then the original event deliverable - // in the final loop at the end. + // if we follow this chain to something deliverable in this loop, the values we have in memory will be updated in the final loop at the end. StreamEvent::Undelivered(prev_meta) => { undelivered_q.push_back(StreamEventMetadata { cid: prev_meta.cid, @@ -364,7 +361,8 @@ impl StreamEvents { // We get marked as KnownDeliverable in that case and we don't have anything more to do. } StreamEvent::Undelivered(meta) => { - // This might have unlocked something else, so we add it to the back of the queue to check. + // Discovering this event's prev (and therefore this event) as deliverable might have unlocked + // something else, so we add it to the back of the queue to check. newly_ready.push_back(meta.cid); self.discovered_deliverable_prev(meta.cid); } @@ -391,8 +389,6 @@ impl OrderingState { } /// Add a stream to the list of streams to process. - /// We ignore delivered events for streams we're not tracking as we can look them up later if we need them. - /// We will get lots of init events we can ignore unless we need them, otherwise they'll be stuck in memory for a long time. fn add_inserted_events(&mut self, events: Vec) { for ev in events { let stream_cid = ev.metadata.stream_cid(); @@ -401,6 +397,9 @@ impl OrderingState { } } + /// Add an event to the list of events to process. Only creates a new stream to track if it's an undelivered event. + /// We ignore delivered events for streams we're not tracking as we can look them up later if we need them. + /// As we get lots of init events, we don't want them to be stuck in memory unless we have a reason to track them. fn add_stream_event(&mut self, stream_cid: StreamCid, event: StreamEvent) { if let Some(stream) = self.pending_by_stream.get_mut(&stream_cid) { stream.add_event(event); @@ -421,7 +420,16 @@ impl OrderingState { } } - self.persist_ready_events(pool).await?; + match self.persist_ready_events(pool).await { + Ok(_) => {} + Err(err) => { + // Clear the queue as we'll rediscover it on the next run, rather than try to double update everything. + // We will no-op the updates so it doesn't really hurt but it's unnecessary. + // The StreamEvents in our pending_by_stream map all have their state updated in memory so we can pick up where we left off. + self.deliverable.clear(); + return Err(err); + } + } // keep things that still have missing history but don't process them again until we get something new self.pending_by_stream .retain(|_, stream_events| !stream_events.processing_completed()); From 81df3bc561ab0931061447000c43b520debfdde3 Mon Sep 17 00:00:00 2001 From: David Estes Date: Thu, 20 Jun 2024 12:52:50 -0600 Subject: [PATCH 08/15] refactor: use event crate carfile parsing also changed struct sent to ordering task to avoid sending body we don't need and the ordering task correspondingly --- service/src/event/order_events.rs | 4 +- service/src/event/ordering_task.rs | 83 +++++++------ service/src/event/service.rs | 179 +++++++++++------------------ 3 files changed, 114 insertions(+), 152 deletions(-) diff --git a/service/src/event/order_events.rs b/service/src/event/order_events.rs index 17b17a619..1bfd9cad8 100644 --- a/service/src/event/order_events.rs +++ b/service/src/event/order_events.rs @@ -50,8 +50,8 @@ impl OrderEvents { unreachable!("Init events should have been filtered out since they're always deliverable"); } Some(prev) => { - if let Some(in_mem) = new_cids.get(&prev) { - if *in_mem { + if let Some(in_mem_is_deliverable) = new_cids.get(&prev) { + if *in_mem_is_deliverable { event.body.set_deliverable(true); *new_cids.get_mut(&event.cid()).expect("CID must exist") = true; deliverable.push((event, header)); diff --git a/service/src/event/ordering_task.rs b/service/src/event/ordering_task.rs index ded1cc898..5934fa6a5 100644 --- a/service/src/event/ordering_task.rs +++ b/service/src/event/ordering_task.rs @@ -1,13 +1,16 @@ use std::collections::{HashMap, VecDeque}; +use anyhow::anyhow; use ceramic_core::EventId; +use ceramic_event::unvalidated; use ceramic_store::{CeramicOneEvent, SqlitePool}; use cid::Cid; +use ipld_core::ipld::Ipld; use tracing::{debug, error, info, trace, warn}; -use crate::{CeramicEventService, Error, Result}; +use crate::{Error, Result}; -use super::service::{EventMetadata, InsertableBodyWithMeta}; +use super::service::{DiscoveredEvent, EventMetadata}; type StreamCid = Cid; type EventCid = Cid; @@ -18,7 +21,7 @@ pub struct DeliverableTask { pub(crate) _handle: tokio::task::JoinHandle<()>, /// Currently only receives events discovered over recon that are out of order and need to be marked ready (deliverable) /// when their prev chain is discovered and complete (i.e. my prev is deliverable then I am deliverable). - pub(crate) tx_inserted: tokio::sync::mpsc::Sender, + pub(crate) tx_inserted: tokio::sync::mpsc::Sender, } #[derive(Debug)] @@ -37,8 +40,7 @@ impl OrderingTask { /// Spawn a task to run the ordering task background process in a loop pub async fn run(pool: SqlitePool, q_depth: usize) -> DeliverableTask { - let (tx_inserted, rx_inserted) = - tokio::sync::mpsc::channel::(q_depth); + let (tx_inserted, rx_inserted) = tokio::sync::mpsc::channel::(q_depth); let handle = tokio::spawn(async move { Self::run_loop(pool, rx_inserted).await }); @@ -50,7 +52,7 @@ impl OrderingTask { async fn run_loop( pool: SqlitePool, - mut rx_inserted: tokio::sync::mpsc::Receiver, + mut rx_inserted: tokio::sync::mpsc::Receiver, ) { let mut state = OrderingState::new(); @@ -117,22 +119,28 @@ impl StreamEvent { // TODO: Condense the multiple DB queries happening here into a single query let (exists, deliverable) = CeramicOneEvent::deliverable_by_cid(pool, &cid).await?; if exists { - let parsed_body = - if let Some(body) = CeramicEventService::load_by_cid(pool, cid).await? { - body - } else { - warn!(%cid, "No event body found for event that should exist"); - return Ok(None); - }; + let data = CeramicOneEvent::value_by_cid(pool, &cid) + .await? + .ok_or_else(|| { + Error::new_app(anyhow!( + "Missing event data for event that must exist: CID={}", + cid + )) + })?; + let (_cid, parsed) = unvalidated::Event::::decode_car(data.as_slice(), false) + .await + .map_err(Error::new_app)?; + + let metadata = EventMetadata::from(parsed); - let known_prev = match &parsed_body.metadata { - EventMetadata::Init { cid, .. } => { + let known_prev = match &metadata { + EventMetadata::Init => { assert!( deliverable, "Init event must always be deliverable. Found undelivered CID: {}", cid ); - StreamEvent::InitEvent(*cid) + StreamEvent::InitEvent(cid) } EventMetadata::Data { prev, .. } | EventMetadata::Time { prev, .. } => { if deliverable { @@ -152,13 +160,13 @@ impl StreamEvent { } } -impl From for StreamEvent { - fn from(ev: InsertableBodyWithMeta) -> Self { +impl From for StreamEvent { + fn from(ev: DiscoveredEvent) -> Self { match ev.metadata { - EventMetadata::Init { cid, .. } => StreamEvent::InitEvent(cid), - EventMetadata::Data { cid, prev, .. } | EventMetadata::Time { cid, prev, .. } => { - let meta = StreamEventMetadata::new(cid, prev); - if ev.body.deliverable() { + EventMetadata::Init => StreamEvent::InitEvent(ev.cid), + EventMetadata::Data { prev, .. } | EventMetadata::Time { prev, .. } => { + let meta = StreamEventMetadata::new(ev.cid, prev); + if ev.known_deliverable { StreamEvent::KnownDeliverable(meta) } else { StreamEvent::Undelivered(meta) @@ -389,9 +397,9 @@ impl OrderingState { } /// Add a stream to the list of streams to process. - fn add_inserted_events(&mut self, events: Vec) { + fn add_inserted_events(&mut self, events: Vec) { for ev in events { - let stream_cid = ev.metadata.stream_cid(); + let stream_cid = ev.stream_cid(); let event = ev.into(); self.add_stream_event(stream_cid, event); } @@ -486,24 +494,27 @@ impl OrderingState { ) -> Result { trace!(cnt=%event_data.len(), "Processing undelivered events batch"); let mut event_cnt = 0; - for (event_id, carfile) in event_data { - let event_cid = event_id.cid().ok_or_else(|| { - Error::new_invalid_arg(anyhow::anyhow!("EventID is missing a CID: {}", event_id)) - })?; + for (_event_id, carfile) in event_data { + let (cid, parsed_event) = + unvalidated::Event::::decode_car(carfile.as_slice(), false) + .await + .map_err(Error::new_app)?; - let loaded = CeramicEventService::parse_event_carfile_cid(event_cid, &carfile).await?; + let metadata = EventMetadata::from(parsed_event); - let event = match &loaded.metadata { - EventMetadata::Init { cid, .. } => { + let (stream_cid, loaded) = match &metadata { + EventMetadata::Init => { unreachable!("Init events should not be undelivered. CID={}", cid); } - EventMetadata::Data { cid, prev, .. } | EventMetadata::Time { cid, prev, .. } => { - StreamEvent::Undelivered(StreamEventMetadata::new(*cid, *prev)) - } + EventMetadata::Data { stream_cid, prev } + | EventMetadata::Time { stream_cid, prev } => ( + stream_cid, + StreamEvent::Undelivered(StreamEventMetadata::new(cid, *prev)), + ), }; event_cnt += 1; - self.add_stream_event(loaded.metadata.stream_cid(), event); + self.add_stream_event(*stream_cid, loaded); } self.process_streams(pool).await?; @@ -542,7 +553,7 @@ mod test { let events = get_n_events(n).await; for event in events { let (event, _) = - CeramicEventService::parse_event_carfile_order_key(event.0.to_owned(), &event.1) + CeramicEventService::validate_discovered_event(event.0.to_owned(), &event.1) .await .unwrap(); res.push(event); diff --git a/service/src/event/service.rs b/service/src/event/service.rs index 9917d8756..554c41271 100644 --- a/service/src/event/service.rs +++ b/service/src/event/service.rs @@ -109,86 +109,35 @@ impl CeramicEventService { .await?; Ok(()) } - /// This function is used to parse the event from the carfile and return the insertable event and the previous cid pointer. - /// Probably belongs in the event crate. - pub(crate) async fn parse_event_carfile_order_key( - event_id: EventId, + + /// Currently only verifies that the event parses into a valid ceramic event, determining whether it's + /// immediately deliverable because it's an init event or it needs review. + /// In the future, we will need to do more event validation (verify all EventID pieces, hashes, signatures, etc). + pub(crate) async fn validate_discovered_event( + event_id: ceramic_core::EventId, carfile: &[u8], ) -> Result<(EventInsertable, EventMetadata)> { - let mut insertable = EventInsertable::try_from_carfile(event_id, carfile).await?; + let event_cid = event_id.cid().ok_or_else(|| { + Error::new_app(anyhow::anyhow!("EventId missing CID. EventID={}", event_id)) + })?; - let header = Self::parse_event_body(&mut insertable.body).await?; - Ok((insertable, header)) - } + let (cid, parsed_event) = unvalidated::Event::::decode_car(carfile, false) + .await + .map_err(Error::new_app)?; + + if event_cid != cid { + return Err(Error::new_app(anyhow::anyhow!( + "EventId CID ({}) does not match the body CID ({})", + event_cid, + cid + ))); + } - pub(crate) async fn parse_event_carfile_cid( - cid: ceramic_core::Cid, - carfile: &[u8], - ) -> Result { + let metadata = EventMetadata::from(parsed_event); let mut body = EventInsertableBody::try_from_carfile(cid, carfile).await?; + body.set_deliverable(matches!(metadata, EventMetadata::Init { .. })); - let header = Self::parse_event_body(&mut body).await?; - Ok(InsertableBodyWithMeta { - body, - metadata: header, - }) - } - - pub(crate) async fn parse_event_body(body: &mut EventInsertableBody) -> Result { - let cid = body.cid(); // purely for convenience writing out the match - let ev_block = body.block_for_cid(&cid)?; - - trace!(count=%body.blocks().len(), %cid, "parsing event blocks"); - let event_ipld: unvalidated::RawEvent = - serde_ipld_dagcbor::from_slice(&ev_block.bytes).map_err(|e| { - Error::new_invalid_arg( - anyhow::anyhow!(e).context("event block is not valid event format"), - ) - })?; - - let (deliverable, header) = match event_ipld { - unvalidated::RawEvent::Time(t) => ( - false, - EventMetadata::Time { - cid, - stream_cid: t.id(), - prev: t.prev(), - }, - ), - unvalidated::RawEvent::Signed(signed) => { - let link = signed.link().ok_or_else(|| { - Error::new_invalid_arg(anyhow::anyhow!("event should have a link")) - })?; - let link = body - .blocks() - .iter() - .find(|b| b.cid() == link) - .ok_or_else(|| { - Error::new_invalid_arg(anyhow::anyhow!("prev CID missing from carfile")) - })?; - let payload: unvalidated::Payload = - serde_ipld_dagcbor::from_slice(&link.bytes).map_err(|e| { - Error::new_invalid_arg( - anyhow::anyhow!(e).context("Failed to follow event link"), - ) - })?; - - match payload { - unvalidated::Payload::Data(d) => ( - false, - EventMetadata::Data { - cid, - stream_cid: *d.id(), - prev: *d.prev(), - }, - ), - unvalidated::Payload::Init(_init) => (true, EventMetadata::Init { cid }), - } - } - unvalidated::RawEvent::Unsigned(_init) => (true, EventMetadata::Init { cid }), - }; - body.set_deliverable(deliverable); - Ok(header) + Ok((EventInsertable::try_new(event_id, body)?, metadata)) } #[tracing::instrument(skip(self, items), level = tracing::Level::DEBUG, fields(items = items.len()))] @@ -240,7 +189,7 @@ impl CeramicEventService { for event in items { let insertable = - Self::parse_event_carfile_order_key(event.key.to_owned(), event.value).await?; + Self::validate_discovered_event(event.key.to_owned(), event.value).await?; to_insert.push(insertable); } @@ -284,12 +233,13 @@ impl CeramicEventService { .iter() .find(|(i, _)| i.order_key == ev.order_key) { - let new = InsertableBodyWithMeta { - body: ev.body.clone(), + let discovered = DiscoveredEvent { + cid: ev.cid(), + known_deliverable: ev.deliverable(), metadata: metadata.to_owned(), }; - trace!(event_cid=%ev.cid(), deliverable=%ev.deliverable(), "sending delivered to ordering task"); - if let Err(e) = self.delivery_task.tx_inserted.try_send(new) { + trace!(?discovered, "sending delivered to ordering task"); + if let Err(e) = self.delivery_task.tx_inserted.try_send(discovered) { match e { tokio::sync::mpsc::error::TrySendError::Full(e) => { warn!(event=?e, limit=%PENDING_EVENTS_CHANNEL_DEPTH, "Delivery task full. Dropping event and will not be able to mark deliverable new stream event arrives or process is restarted"); @@ -315,24 +265,6 @@ impl CeramicEventService { missing_history, }) } - - pub(crate) async fn load_by_cid( - pool: &SqlitePool, - cid: ceramic_core::Cid, - ) -> Result> { - let data = if let Some(ev) = CeramicOneEvent::value_by_cid(pool, &cid).await? { - ev - } else { - return Ok(None); - }; - - let mut body = EventInsertableBody::try_from_carfile(cid, &data).await?; - let header = Self::parse_event_body(&mut body).await?; - Ok(Some(InsertableBodyWithMeta { - body, - metadata: header, - })) - } } #[derive(Debug, PartialEq, Eq, Default)] @@ -358,42 +290,61 @@ impl From for Vec { } } -#[derive(Debug, Clone)] -pub(crate) struct InsertableBodyWithMeta { - pub(crate) body: EventInsertableBody, - pub(crate) metadata: EventMetadata, +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DiscoveredEvent { + pub cid: ceramic_core::Cid, + pub known_deliverable: bool, + pub metadata: EventMetadata, +} + +impl DiscoveredEvent { + pub(crate) fn stream_cid(&self) -> ceramic_core::Cid { + match self.metadata { + EventMetadata::Init => self.cid, + EventMetadata::Data { stream_cid, .. } | EventMetadata::Time { stream_cid, .. } => { + stream_cid + } + } + } } #[derive(Debug, Clone, PartialEq, Eq)] /// An event header wrapper for use in the store crate. /// TODO: replace this with something from the event crate pub(crate) enum EventMetadata { - Init { - cid: ceramic_core::Cid, - }, + /// The init CID and stream CID are the same + Init, Data { - cid: ceramic_core::Cid, stream_cid: ceramic_core::Cid, prev: ceramic_core::Cid, }, Time { - cid: ceramic_core::Cid, stream_cid: ceramic_core::Cid, prev: ceramic_core::Cid, }, } -impl EventMetadata { - /// Returns the stream CID of the event - pub(crate) fn stream_cid(&self) -> ceramic_core::Cid { - match self { - EventMetadata::Init { cid, .. } => *cid, - EventMetadata::Data { stream_cid, .. } | EventMetadata::Time { stream_cid, .. } => { - *stream_cid - } +impl From> for EventMetadata { + fn from(value: unvalidated::Event) -> Self { + match value { + unvalidated::Event::Time(t) => EventMetadata::Time { + stream_cid: t.id(), + prev: t.prev(), + }, + + unvalidated::Event::Signed(signed) => match signed.payload() { + unvalidated::Payload::Data(d) => EventMetadata::Data { + stream_cid: *d.id(), + prev: *d.prev(), + }, + unvalidated::Payload::Init(_init) => EventMetadata::Init, + }, + unvalidated::Event::Unsigned(_init) => EventMetadata::Init, } } +} +impl EventMetadata { pub(crate) fn prev(&self) -> Option { match self { EventMetadata::Init { .. } => None, From e4f5a6e8d008afce02ebb609244a07dd6d700ae5 Mon Sep 17 00:00:00 2001 From: David Estes Date: Thu, 20 Jun 2024 13:15:05 -0600 Subject: [PATCH 09/15] fix: optimization to do less work the check should have been cid_map before too --- service/src/event/ordering_task.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/service/src/event/ordering_task.rs b/service/src/event/ordering_task.rs index 5934fa6a5..08c0af3d4 100644 --- a/service/src/event/ordering_task.rs +++ b/service/src/event/ordering_task.rs @@ -238,7 +238,16 @@ impl StreamEvents { self.prev_map.insert(meta.prev, meta.cid); if !self.should_process { // we depend on something in memory - self.should_process = self.prev_map.contains_key(&meta.prev); + if let Some(in_memory_prev) = self.cid_map.get(&meta.prev) { + match in_memory_prev { + StreamEvent::InitEvent(_) | StreamEvent::KnownDeliverable(_) => { + self.should_process = true; + } + StreamEvent::Undelivered(_) => { + // nothing to do until it's prev arrives + } + } + } } meta.cid } @@ -544,7 +553,7 @@ mod test { use ceramic_store::EventInsertable; use test_log::test; - use crate::tests::get_n_events; + use crate::{tests::get_n_events, CeramicEventService}; use super::*; From eb4d4892826f686978267b1e60b775637527ab97 Mon Sep 17 00:00:00 2001 From: David Estes Date: Mon, 24 Jun 2024 09:51:44 -0600 Subject: [PATCH 10/15] fix: fix initial (shallow) ordering code and write tests --- service/src/event/order_events.rs | 213 +++++++++++++++++++++++++++++- 1 file changed, 207 insertions(+), 6 deletions(-) diff --git a/service/src/event/order_events.rs b/service/src/event/order_events.rs index 1bfd9cad8..79acef087 100644 --- a/service/src/event/order_events.rs +++ b/service/src/event/order_events.rs @@ -1,7 +1,7 @@ use std::collections::{HashMap, VecDeque}; +use ceramic_core::Cid; use ceramic_store::{CeramicOneEvent, EventInsertable, SqlitePool}; -use cid::Cid; use crate::Result; @@ -15,7 +15,15 @@ pub(crate) struct OrderEvents { impl OrderEvents { /// Groups the events into lists of those with a delivered prev and those without. This can be used to return an error if the event is required to have history. /// The events will be marked as deliverable so that they can be passed directly to the store to be persisted. - /// Will look up the prev from the database if needed to check if it's deliverable (could possibly change this for recon and allow the ordering task to handle it?) + /// + /// The job of this function is different than the `ordering_task` module. That module recurses indefinitely attempting to build history. This + /// will only traverse a single prev outside of the initial set of events (that is, to the database). This is important because we don't want to + /// allow API users to write an event they haven't yet seen the prev for, and for recon we can allow the other task to sort it out. + /// + /// The `missing_history` set means that the prev is not deliverable or in the `candidate_events` vec with a deliverable prev. For example, + /// with new events [C, D] (in any order), if D.prev = C, C.prev = B, then C and D are deliverable if B is in the database as deliverable. + /// Given the same situation, where B is not yet deliverable, but B.prev = A and A is deliverable (shouldn't happen, but as an example), we + /// *could* mark B deliverable and then C and D, but we DO NOT want to do this here to prevent API users from writing events that they haven't seen. pub async fn try_new( pool: &SqlitePool, mut candidate_events: Vec<(EventInsertable, EventMetadata)>, @@ -23,7 +31,7 @@ impl OrderEvents { let mut new_cids: HashMap = HashMap::from_iter( candidate_events .iter() - .map(|(e, _)| (e.cid(), e.body.deliverable())), + .map(|(e, _)| (e.cid(), e.deliverable())), ); let mut deliverable = Vec::with_capacity(candidate_events.len()); candidate_events.retain(|(e, h)| { @@ -78,8 +86,10 @@ impl OrderEvents { // we may loop through multiple times putting things back in the queue, but it should be a short list // and it will shrink every time we move something to the deliverable set, so it should be acceptable. // We can't quite get rid of this loop because we may have discovered our prev's prev from the database in the previous pass. + let max_iterations = undelivered_prevs_in_memory.len(); + let mut iteration = 0; while let Some((mut event, header)) = undelivered_prevs_in_memory.pop_front() { - let mut made_changes = false; + iteration += 1; match header.prev() { None => { unreachable!("Init events should have been filtered out of the in memory set"); @@ -89,13 +99,14 @@ impl OrderEvents { *new_cids.get_mut(&event.cid()).expect("CID must exist") = true; event.body.set_deliverable(true); deliverable.push((event, header)); - made_changes = true; + // reset the iteration count since we made changes. once it doesn't change for a loop through the queue we're done + iteration = 0; } else { undelivered_prevs_in_memory.push_back((event, header)); } } } - if !made_changes { + if iteration >= max_iterations { missing_history.extend(undelivered_prevs_in_memory); break; } @@ -107,3 +118,193 @@ impl OrderEvents { }) } } + +#[cfg(test)] +mod test { + use ceramic_core::EventId; + use rand::seq::SliceRandom; + use rand::thread_rng; + use test_log::test; + + use super::*; + + use crate::{tests::get_n_events, CeramicEventService}; + + async fn get_2_streams() -> ( + Vec<(EventId, Vec)>, + Vec<(EventId, Vec)>, + Vec<(EventInsertable, EventMetadata)>, + ) { + let stream_2 = get_n_events(10).await; + let stream_1 = get_n_events(10).await; + let mut to_insert = Vec::with_capacity(10); + for event in stream_1.iter().chain(stream_2.iter()) { + let insertable = CeramicEventService::validate_discovered_event( + event.0.to_owned(), + event.1.as_slice(), + ) + .await + .unwrap(); + to_insert.push(insertable); + } + (stream_1, stream_2, to_insert) + } + + #[test(tokio::test)] + async fn out_of_order_streams_valid() { + let pool = SqlitePool::connect_in_memory().await.unwrap(); + + let (stream_1, stream_2, mut to_insert) = get_2_streams().await; + to_insert.shuffle(&mut thread_rng()); + + let ordered = OrderEvents::try_new(&pool, to_insert).await.unwrap(); + assert!( + ordered.missing_history.is_empty(), + "Missing history: len={} {:?}", + ordered.missing_history.len(), + ordered.missing_history + ); + let mut after_1 = Vec::with_capacity(10); + let mut after_2 = Vec::with_capacity(10); + for (event, _) in ordered.deliverable { + assert!(event.deliverable()); + if stream_1.iter().any(|e| e.0 == event.order_key) { + after_1.push(event.order_key.clone()); + } else { + after_2.push(event.order_key.clone()); + } + } + + assert_eq!( + stream_1.into_iter().map(|e| e.0).collect::>(), + after_1 + ); + assert_eq!( + stream_2.into_iter().map(|e| e.0).collect::>(), + after_2 + ); + } + + #[test(tokio::test)] + async fn missing_history_in_memory() { + let pool = SqlitePool::connect_in_memory().await.unwrap(); + + let (stream_1, stream_2, mut to_insert) = get_2_streams().await; + // if event 2 is missing from stream_1, we will sort stream_2 but stream_1 will be "missing history" after the init event + to_insert.remove(1); + to_insert.shuffle(&mut thread_rng()); + + let ordered = OrderEvents::try_new(&pool, to_insert).await.unwrap(); + assert_eq!( + 8, + ordered.missing_history.len(), + "Missing history: {:?}", + ordered.missing_history + ); + let mut after_1 = Vec::with_capacity(10); + let mut after_2 = Vec::with_capacity(10); + for (event, _) in ordered.deliverable { + assert!(event.deliverable()); + if stream_1.iter().any(|e| e.0 == event.order_key) { + after_1.push(event.order_key.clone()); + } else { + after_2.push(event.order_key.clone()); + } + } + + assert_eq!(vec![stream_1[0].0.clone()], after_1); + assert_eq!( + stream_2.into_iter().map(|e| e.0).collect::>(), + after_2 + ); + } + + #[test(tokio::test)] + async fn missing_history_not_recursed() { + // this test validates that even though it's possible to build the history as deliverable, we don't do it here + // so that an API write that had never seen event 2, would not able to write event 3 or after + // the recon ordering task would sort this and mark all deliverable + let pool = SqlitePool::connect_in_memory().await.unwrap(); + + let stream_1 = get_n_events(10).await; + let mut insertable = Vec::with_capacity(10); + for event in stream_1.iter() { + let new = CeramicEventService::validate_discovered_event( + event.0.to_owned(), + event.1.as_slice(), + ) + .await + .unwrap(); + insertable.push(new); + } + let to_insert = insertable + .iter() + .take(3) + .map(|(i, _)| i.clone()) + .collect::>(); + let mut remaining = insertable.into_iter().skip(3).collect::>(); + CeramicOneEvent::insert_many(&pool, &to_insert[..]) + .await + .unwrap(); + + remaining.shuffle(&mut thread_rng()); + + let ordered = OrderEvents::try_new(&pool, remaining).await.unwrap(); + assert_eq!( + 7, + ordered.missing_history.len(), + "Missing history: {:?}", + ordered.missing_history + ); + } + + #[test(tokio::test)] + async fn database_deliverable_is_valid() { + // this test validates we can order in memory events with each other if one of them has a prev + // in the database that is deliverable, in which case the entire chain is deliverable + let pool = SqlitePool::connect_in_memory().await.unwrap(); + + let stream_1 = get_n_events(10).await; + let mut insertable = Vec::with_capacity(10); + for event in stream_1.iter() { + let new = CeramicEventService::validate_discovered_event( + event.0.to_owned(), + event.1.as_slice(), + ) + .await + .unwrap(); + insertable.push(new); + } + let to_insert = insertable + .iter_mut() + .take(3) + .map(|(i, _)| { + i.body.set_deliverable(true); + i.clone() + }) + .collect::>(); + let mut remaining = insertable.into_iter().skip(3).collect::>(); + CeramicOneEvent::insert_many(&pool, &to_insert[..]) + .await + .unwrap(); + + let expected = remaining + .iter() + .map(|(i, _)| i.order_key.clone()) + .collect::>(); + remaining.shuffle(&mut thread_rng()); + + let ordered = OrderEvents::try_new(&pool, remaining).await.unwrap(); + assert!( + ordered.missing_history.is_empty(), + "Missing history: {:?}", + ordered.missing_history + ); + let after = ordered + .deliverable + .iter() + .map(|(e, _)| e.order_key.clone()) + .collect::>(); + assert_eq!(expected, after); + } +} From e14b97293f893383e7a99c92ca369ef376ec14b4 Mon Sep 17 00:00:00 2001 From: David Estes Date: Mon, 24 Jun 2024 10:48:12 -0600 Subject: [PATCH 11/15] fix: only panic on internal states in release mode we should never discover undelivered init events while processing the start up batch, but we can correct them so we aren't going to crash --- service/src/event/ordering_task.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/service/src/event/ordering_task.rs b/service/src/event/ordering_task.rs index 08c0af3d4..71908c103 100644 --- a/service/src/event/ordering_task.rs +++ b/service/src/event/ordering_task.rs @@ -503,6 +503,7 @@ impl OrderingState { ) -> Result { trace!(cnt=%event_data.len(), "Processing undelivered events batch"); let mut event_cnt = 0; + let mut discovered_inits = Vec::new(); for (_event_id, carfile) in event_data { let (cid, parsed_event) = unvalidated::Event::::decode_car(carfile.as_slice(), false) @@ -513,7 +514,8 @@ impl OrderingState { let (stream_cid, loaded) = match &metadata { EventMetadata::Init => { - unreachable!("Init events should not be undelivered. CID={}", cid); + discovered_inits.push(cid); + continue; } EventMetadata::Data { stream_cid, prev } | EventMetadata::Time { stream_cid, prev } => ( @@ -525,6 +527,22 @@ impl OrderingState { event_cnt += 1; self.add_stream_event(*stream_cid, loaded); } + // while undelivered init events should be unreachable, we can fix the state if it happens so we won't panic in release mode + // and simply correct things in the database. We could make this fatal in the future, but for now it's just a warning to + // make sure we aren't overlooking something with the migration from ipfs or the previous state of the database. + if !discovered_inits.is_empty() { + warn!( + count=%discovered_inits.len(), + cids=?discovered_inits, + "Found init events in undelivered batch. This should never happen.", + ); + debug_assert!(false); + let mut tx = pool.begin_tx().await?; + for cid in discovered_inits { + CeramicOneEvent::mark_ready_to_deliver(&mut tx, &cid).await?; + } + tx.commit().await?; + } self.process_streams(pool).await?; Ok(event_cnt) From 81f098fc7499552c34164843c412548cec3a8675 Mon Sep 17 00:00:00 2001 From: David Estes Date: Mon, 24 Jun 2024 14:41:42 -0600 Subject: [PATCH 12/15] chore: improve logging for start up processing of undelivered events Don't want the process to appear to be hanging without any indication of what is going on if there is a large backlog of undelivered events --- service/src/event/ordering_task.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/service/src/event/ordering_task.rs b/service/src/event/ordering_task.rs index 71908c103..f43d381f2 100644 --- a/service/src/event/ordering_task.rs +++ b/service/src/event/ordering_task.rs @@ -16,6 +16,8 @@ type StreamCid = Cid; type EventCid = Cid; type PrevCid = Cid; +const LOG_EVERY_N_ENTRIES: usize = 10_000; + #[derive(Debug)] pub struct DeliverableTask { pub(crate) _handle: tokio::task::JoinHandle<()>, @@ -464,6 +466,7 @@ impl OrderingState { max_iterations: usize, batch_size: u32, ) -> Result { + info!("Attempting to process all undelivered events. This could take some time."); let mut state = Self::new(); let mut iter_cnt = 0; let mut event_cnt = 0; @@ -477,20 +480,25 @@ impl OrderingState { let found_something = !undelivered.is_empty(); let found_everything = undelivered.len() < batch_size as usize; if found_something { + debug!(new_batch_count=%undelivered.len(), "Found undelivered events in the database to process."); // We can start processing and we'll follow the stream history if we have it. In that case, we either arrive // at the beginning and mark them all delivered, or we find a gap and stop processing and leave them in memory. // In this case, we won't discover them until we start running recon with a peer, so maybe we should drop them // or otherwise mark them ignored somehow. When this function ends, we do drop everything so for now it's probably okay. - event_cnt += state + let number_processed = state .process_undelivered_events_batch(pool, undelivered) .await?; + event_cnt += number_processed; + if event_cnt % LOG_EVERY_N_ENTRIES < number_processed { + info!(count=%event_cnt, "Processed undelivered events"); + } } if !found_something || found_everything { break; } } if iter_cnt > max_iterations { - info!(%batch_size, iterations=%iter_cnt, "Exceeded max iterations for finding undelivered events!"); + warn!(%batch_size, iterations=%iter_cnt, "Exceeded max iterations for finding undelivered events!"); } Ok(event_cnt) From 4ed672135fb5f38c83a593e74ddd9714926d6ee2 Mon Sep 17 00:00:00 2001 From: David Estes Date: Mon, 24 Jun 2024 16:40:50 -0600 Subject: [PATCH 13/15] chore: address PR review feedback use `send` instead of `try_send` to enforce backpressure, clean up comments/docs --- service/src/event/ordering_task.rs | 13 ++++-- service/src/event/service.rs | 71 ++++++++++++++---------------- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/service/src/event/ordering_task.rs b/service/src/event/ordering_task.rs index f43d381f2..f4e398313 100644 --- a/service/src/event/ordering_task.rs +++ b/service/src/event/ordering_task.rs @@ -21,8 +21,7 @@ const LOG_EVERY_N_ENTRIES: usize = 10_000; #[derive(Debug)] pub struct DeliverableTask { pub(crate) _handle: tokio::task::JoinHandle<()>, - /// Currently only receives events discovered over recon that are out of order and need to be marked ready (deliverable) - /// when their prev chain is discovered and complete (i.e. my prev is deliverable then I am deliverable). + /// Currently receives events discovered over recon. pub(crate) tx_inserted: tokio::sync::mpsc::Sender, } @@ -246,7 +245,7 @@ impl StreamEvents { self.should_process = true; } StreamEvent::Undelivered(_) => { - // nothing to do until it's prev arrives + // nothing to do until its prev arrives } } } @@ -271,6 +270,11 @@ impl StreamEvents { /// Returns `false` if we have more work to do and should be retained for future processing fn processing_completed(&mut self) -> bool { self.should_process = false; + // It's worth considering whether we should prune the `cid_map` and `prev_map` with everything from the `new_deliverable` set to avoid + // memory leaks due to never finding events. This requires setting `self.should_process` to true for all undelivered events (or making a db query), + // as we will no longer have everything we've discovered in memory and need to go to the db to figure out what to do. As streams are generally + // short, I'm opting to not make this change now. We could consider something like an expiration time as well, but generally we don't want + // to be dropping things from memory that we might need to process in the future unless we have to. self.new_deliverable.clear(); !self .cid_map @@ -407,7 +411,8 @@ impl OrderingState { } } - /// Add a stream to the list of streams to process. + /// Update the list of streams to process with new events. + /// Relies on `add_stream_event` to handle updating the internal state. fn add_inserted_events(&mut self, events: Vec) { for ev in events { let stream_cid = ev.stream_cid(); diff --git a/service/src/event/service.rs b/service/src/event/service.rs index 554c41271..61f3fa2ce 100644 --- a/service/src/event/service.rs +++ b/service/src/event/service.rs @@ -25,7 +25,8 @@ const DELIVERABLE_EVENTS_BATCH_SIZE: u32 = 1000; const MAX_ITERATIONS: usize = 100_000_000; /// The max number of events we can have pending for delivery in the channel before we start dropping them. -const PENDING_EVENTS_CHANNEL_DEPTH: usize = 10_000; +/// This is currently 304 bytes per event, so this is 3 MB of data +const PENDING_EVENTS_CHANNEL_DEPTH: usize = 1_000_000; #[derive(Debug)] /// A database store that verifies the bytes it stores are valid Ceramic events. @@ -216,47 +217,41 @@ impl CeramicEventService { .map(|(e, _)| e.clone()) .collect::>(); - // need to make a better interface around events. needing to know about the stream in some places but - // not in the store makes it inconvenient to map back and forth let res = CeramicOneEvent::insert_many(&self.pool, &to_insert[..]).await?; - // api writes shouldn't have any missed pieces that need ordering so we don't send those - if !history_required { - let to_send = res - .inserted + // api writes shouldn't have any missed pieces that need ordering so we don't send those and return early + if history_required { + return Ok(InsertResult { + store_result: res, + missing_history, + }); + } + + let to_send = res + .inserted + .iter() + .filter(|i| i.new_key) + .collect::>(); + + for ev in to_send { + if let Some((ev, metadata)) = to_insert_with_metadata .iter() - .filter(|i| i.new_key) - .collect::>(); - - for ev in to_send { - if let Some((ev, metadata)) = to_insert_with_metadata - .iter() - .find(|(i, _)| i.order_key == ev.order_key) - { - let discovered = DiscoveredEvent { - cid: ev.cid(), - known_deliverable: ev.deliverable(), - metadata: metadata.to_owned(), - }; - trace!(?discovered, "sending delivered to ordering task"); - if let Err(e) = self.delivery_task.tx_inserted.try_send(discovered) { - match e { - tokio::sync::mpsc::error::TrySendError::Full(e) => { - warn!(event=?e, limit=%PENDING_EVENTS_CHANNEL_DEPTH, "Delivery task full. Dropping event and will not be able to mark deliverable new stream event arrives or process is restarted"); - } - tokio::sync::mpsc::error::TrySendError::Closed(_) => { - warn!("Delivery task closed. shutting down"); - return Err(Error::new_fatal(anyhow::anyhow!( - "Delivery task closed" - ))); - } - } - } - } else { - tracing::error!(event_id=%ev.order_key, "Missing header for inserted event should be unreachable!"); - debug_assert!(false); // panic in debug mode - continue; + .find(|(i, _)| i.order_key == ev.order_key) + { + let discovered = DiscoveredEvent { + cid: ev.cid(), + known_deliverable: ev.deliverable(), + metadata: metadata.to_owned(), + }; + trace!(?discovered, "sending delivered to ordering task"); + if let Err(_e) = self.delivery_task.tx_inserted.send(discovered).await { + warn!("Delivery task closed. shutting down"); + return Err(Error::new_fatal(anyhow::anyhow!("Delivery task closed"))); } + } else { + tracing::error!(event_id=%ev.order_key, "Missing header for inserted event should be unreachable!"); + debug_assert!(false); // panic in debug mode + continue; } } From 1c0ca5fa35121af4f8dd773dbf5f91763ca3a4ec Mon Sep 17 00:00:00 2001 From: David Estes Date: Tue, 25 Jun 2024 16:13:52 -0600 Subject: [PATCH 14/15] fix: prune data to avoid unbounded growth of delivered events if we're not done, we keep one event in memory until next time to optimize for in order streams. we also try to process on all undelivered events for a stream we're tracking as things fork and we don't know what happened while we were in the queue --- service/src/event/ordering_task.rs | 62 +++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/service/src/event/ordering_task.rs b/service/src/event/ordering_task.rs index f4e398313..0e3051743 100644 --- a/service/src/event/ordering_task.rs +++ b/service/src/event/ordering_task.rs @@ -199,6 +199,9 @@ pub(crate) struct StreamEvents { cid_map: HashMap, /// whether we should process this stream because new events have been added should_process: bool, + /// The last event in our deliverable queue that we kept in memory to try to + /// avoid looking it back up as soon as we get the next event. Need to remove next time. + last_deliverable: Option, /// The newly discovered events that are deliverable and should be processed. new_deliverable: VecDeque, } @@ -211,6 +214,7 @@ impl Default for StreamEvents { // default to true so we try to follow the event history on the the first batch loading // will also avoid any possible race conditions if we somehow get things out of order on the channel should_process: true, + last_deliverable: None, new_deliverable: VecDeque::new(), } } @@ -238,17 +242,11 @@ impl StreamEvents { StreamEvent::Undelivered(meta) => { self.prev_map.insert(meta.prev, meta.cid); if !self.should_process { - // we depend on something in memory - if let Some(in_memory_prev) = self.cid_map.get(&meta.prev) { - match in_memory_prev { - StreamEvent::InitEvent(_) | StreamEvent::KnownDeliverable(_) => { - self.should_process = true; - } - StreamEvent::Undelivered(_) => { - // nothing to do until its prev arrives - } - } - } + // as we could sit in the channel for a while and streams can fork, we can't rely + // on our in memory state to know what happened between enqueue and dequeque + // and need to try to process. We could add an LRU cache in the future to + // avoid looking up that were processed while were were enqueued. + self.should_process = true; } meta.cid } @@ -269,17 +267,43 @@ impl StreamEvents { /// Returns `true` if we're finished processing and can be dropped from memory. /// Returns `false` if we have more work to do and should be retained for future processing fn processing_completed(&mut self) -> bool { - self.should_process = false; - // It's worth considering whether we should prune the `cid_map` and `prev_map` with everything from the `new_deliverable` set to avoid - // memory leaks due to never finding events. This requires setting `self.should_process` to true for all undelivered events (or making a db query), - // as we will no longer have everything we've discovered in memory and need to go to the db to figure out what to do. As streams are generally - // short, I'm opting to not make this change now. We could consider something like an expiration time as well, but generally we don't want - // to be dropping things from memory that we might need to process in the future unless we have to. - self.new_deliverable.clear(); - !self + // if we're done, we don't need to bother cleaning up since we get dropped + if !self .cid_map .iter() .any(|(_, ev)| matches!(ev, StreamEvent::Undelivered(_))) + { + true + } else { + self.should_process = false; + + if let Some(prune_now) = self.last_deliverable.take() { + self.new_deliverable.push_front(prune_now); + } + self.last_deliverable = self.new_deliverable.back().cloned(); + for cid in self + .new_deliverable + .iter() + .take(self.new_deliverable.len().saturating_sub(1)) + { + if let Some(ev) = self.cid_map.remove(cid) { + match ev { + StreamEvent::InitEvent(_) => { + warn!(%cid, "should not have init event in our delivery queue when processing_completed"); + debug_assert!(false); // panic in debug mode + } + StreamEvent::KnownDeliverable(meta) => { + self.prev_map.remove(&meta.prev); + } + StreamEvent::Undelivered(_) => { + unreachable!("should not have undelivered event in our delivery queue") + } + } + } + } + self.new_deliverable.clear(); + false + } } /// When we discover the prev event is deliverable, we can mark ourselves as deliverable. From e933b961de056e457a8efa4a102944dc8d666b2d Mon Sep 17 00:00:00 2001 From: David Estes <5317198+dav1do@users.noreply.github.com> Date: Wed, 26 Jun 2024 11:02:55 -0600 Subject: [PATCH 15/15] Update ordering_task.rs Co-authored-by: Spencer T Brody --- service/src/event/ordering_task.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/src/event/ordering_task.rs b/service/src/event/ordering_task.rs index 0e3051743..7afe35441 100644 --- a/service/src/event/ordering_task.rs +++ b/service/src/event/ordering_task.rs @@ -245,7 +245,7 @@ impl StreamEvents { // as we could sit in the channel for a while and streams can fork, we can't rely // on our in memory state to know what happened between enqueue and dequeque // and need to try to process. We could add an LRU cache in the future to - // avoid looking up that were processed while were were enqueued. + // avoid looking up events that were processed while were were enqueued. self.should_process = true; } meta.cid