From 8c45f840a05d4c243e0c7970dda844516dc6c665 Mon Sep 17 00:00:00 2001 From: Ivan Enderlin Date: Wed, 20 Nov 2024 16:06:16 +0100 Subject: [PATCH] feat(common): Implement `RelationalLinkedChunk`. A `RelationalLinkedChunk` is like a `LinkedChunk` but with a relational layout, similar to what we would have in a database. This is used by memory stores. The idea is to have a data layout that is similar for memory stores and for relational database stores, to represent a `LinkedChunk`. This type is also designed to receive `Update`. Applying `Update`s directly on a `LinkedChunk` is not ideal and particularly not trivial as the `Update`s do _not_ match the internal data layout of the `LinkedChunk`, they have been designed for storages, like a relational database for example. This type is not as performant as `LinkedChunk` (in terms of memory layout, CPU caches etc.). It is only designed to be used in memory stores, which are mostly used for test purposes or light usages of the SDK. --- .../matrix-sdk-common/src/linked_chunk/mod.rs | 15 +- .../src/linked_chunk/relational.rs | 447 ++++++++++++++++++ 2 files changed, 460 insertions(+), 2 deletions(-) create mode 100644 crates/matrix-sdk-common/src/linked_chunk/relational.rs diff --git a/crates/matrix-sdk-common/src/linked_chunk/mod.rs b/crates/matrix-sdk-common/src/linked_chunk/mod.rs index eeba29be302..a250cbab4d6 100644 --- a/crates/matrix-sdk-common/src/linked_chunk/mod.rs +++ b/crates/matrix-sdk-common/src/linked_chunk/mod.rs @@ -93,6 +93,7 @@ macro_rules! assert_items_eq { } mod as_vector; +pub mod relational; mod updates; use std::{ @@ -933,7 +934,7 @@ impl ChunkIdentifierGenerator { /// Learn more with [`ChunkIdentifierGenerator`]. #[derive(Copy, Clone, Debug, PartialEq)] #[repr(transparent)] -pub struct ChunkIdentifier(u64); +pub struct ChunkIdentifier(pub(super) u64); impl PartialEq for ChunkIdentifier { fn eq(&self, other: &u64) -> bool { @@ -945,7 +946,7 @@ impl PartialEq for ChunkIdentifier { /// /// It's a pair of a chunk position and an item index. #[derive(Copy, Clone, Debug, PartialEq)] -pub struct Position(ChunkIdentifier, usize); +pub struct Position(pub(super) ChunkIdentifier, pub(super) usize); impl Position { /// Get the chunk identifier of the item. @@ -966,6 +967,16 @@ impl Position { pub fn decrement_index(&mut self) { self.1 = self.1.checked_sub(1).expect("Cannot decrement the index because it's already 0"); } + + /// Increment the index part (see [`Self::index`]), i.e. add 1. + /// + /// # Panic + /// + /// This method will panic if it will overflow, i.e. if the index is larger + /// than `usize::MAX`. + pub fn increment_index(&mut self) { + self.1 = self.1.checked_add(1).expect("Cannot increment the index because it's too large"); + } } /// An iterator over a [`LinkedChunk`] that traverses the chunk in backward diff --git a/crates/matrix-sdk-common/src/linked_chunk/relational.rs b/crates/matrix-sdk-common/src/linked_chunk/relational.rs new file mode 100644 index 00000000000..9e6f5a6ddfe --- /dev/null +++ b/crates/matrix-sdk-common/src/linked_chunk/relational.rs @@ -0,0 +1,447 @@ +// Copyright 2024 The Matrix.org Foundation C.I.C. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Implementation for a _relational linked chunk_, see +//! [`RelationalLinkedChunk`]. + +use crate::linked_chunk::{ChunkIdentifier, Position, Update}; + +/// A row of the [`RelationalLinkedChunk::chunks`]. +#[derive(Debug, PartialEq)] +struct ChunkRow { + previous_chunk: Option, + chunk: ChunkIdentifier, + next_chunk: Option, +} + +/// A row of the [`RelationalLinkedChunk::items`]. +#[derive(Debug, PartialEq)] +struct ItemRow { + position: Position, + item: Either, +} + +/// Kind of item. +#[derive(Debug, PartialEq)] +enum Either { + /// The content is an item. + Item(Item), + + /// The content is a gap. + Gap(Gap), +} + +/// A [`LinkedChunk`] but with a relational layout, similar to what we +/// would have in a database. +/// +/// This is used by memory stores. The idea is to have a data layout that is +/// similar for memory stores and for relational database stores, to represent a +/// [`LinkedChunk`]. +/// +/// This type is also designed to receive [`Update`]. Applying `Update`s +/// directly on a [`LinkedChunk`] is not ideal and particularly not trivial as +/// the `Update`s do _not_ match the internal data layout of the `LinkedChunk`, +/// they are been designed for storages, like a relational database for example. +/// +/// This type is not as performant as [`LinkedChunk`] (in terms of memory +/// layout, CPU caches etc.). It is only designed to be used in memory stores, +/// which are mostly used for test purposes or light usage of the SDK. +/// +/// [`LinkedChunk`]: super::LinkedChunk +#[derive(Debug)] +pub struct RelationalLinkedChunk { + /// Chunks. + chunks: Vec, + + /// Items. + items: Vec>, +} + +impl RelationalLinkedChunk { + /// Create a new relational linked chunk. + pub fn new() -> Self { + Self { chunks: Vec::new(), items: Vec::new() } + } + + /// Apply [`Update`]s. That's the only way to write data inside this + /// relational linked chunk. + pub fn apply_updates(&mut self, updates: &[Update]) + where + Item: Clone, + Gap: Clone, + { + for update in updates { + match update { + Update::NewItemsChunk { previous, new, next } => { + insert_chunk(&mut self.chunks, previous, new, next); + } + + Update::NewGapChunk { previous, new, next, gap } => { + insert_chunk(&mut self.chunks, previous, new, next); + self.items.push(ItemRow { + position: Position(*new, 0), + item: Either::Gap(gap.clone()), + }); + } + + Update::RemoveChunk(chunk_identifier) => { + remove_chunk(&mut self.chunks, chunk_identifier); + + let indices_to_remove = self + .items + .iter() + .enumerate() + .filter_map(|(nth, ItemRow { position, .. })| { + (position.chunk_identifier() == *chunk_identifier).then_some(nth) + }) + .collect::>(); + + for index_to_remove in indices_to_remove.into_iter().rev() { + self.items.remove(index_to_remove); + } + } + + Update::PushItems { at, items } => { + let mut at = at.clone(); + + for item in items { + self.items.push(ItemRow { + position: at.clone(), + item: Either::Item(item.clone()), + }); + at.increment_index(); + } + } + + Update::RemoveItem { at } => { + let mut entry_to_remove = None; + + for (nth, ItemRow { position, .. }) in self.items.iter_mut().enumerate() { + // Find the item to remove. + if position == at { + debug_assert!(entry_to_remove.is_none(), "Found the same entry twice"); + + entry_to_remove = Some(nth); + } + + // Update all items that come _after_ `at` to shift their index. + if position.chunk_identifier() == at.chunk_identifier() + && position.index() > at.index() + { + position.decrement_index(); + } + } + + self.items.remove(entry_to_remove.expect("Remove an unknown item")); + } + + Update::DetachLastItems { at } => { + let indices_to_remove = self + .items + .iter() + .enumerate() + .filter_map(|(nth, ItemRow { position, .. })| { + (position.chunk_identifier() == at.chunk_identifier() + && position.index() >= at.index()) + .then_some(nth) + }) + .collect::>(); + + for index_to_remove in indices_to_remove.into_iter().rev() { + self.items.remove(index_to_remove); + } + } + + Update::StartReattachItems | Update::EndReattachItems => { /* nothing */ } + } + } + + fn insert_chunk( + chunks: &mut Vec, + previous: &Option, + new: &ChunkIdentifier, + next: &Option, + ) { + // Find the previous chunk, and update its next chunk. + if let Some(previous) = previous { + let entry_for_previous_chunk = chunks + .iter_mut() + .find(|ChunkRow { chunk, .. }| chunk == previous) + .expect("Previous chunk should be present"); + + // Insert the chunk. + entry_for_previous_chunk.next_chunk = Some(*new); + } + + // Find the next chunk, and update its previous chunk. + if let Some(next) = next { + let entry_for_next_chunk = chunks + .iter_mut() + .find(|ChunkRow { chunk, .. }| chunk == next) + .expect("Next chunk should be present"); + + // Insert the chunk. + entry_for_next_chunk.previous_chunk = Some(*new); + } + + // Insert the chunk. + chunks.push(ChunkRow { previous_chunk: *previous, chunk: *new, next_chunk: *next }); + } + + fn remove_chunk(chunks: &mut Vec, chunk_to_remove: &ChunkIdentifier) { + let entry_nth_to_remove = chunks + .iter() + .enumerate() + .find_map(|(nth, ChunkRow { chunk, .. })| (chunk == chunk_to_remove).then_some(nth)) + .expect("Remove an unknown chunk"); + + let ChunkRow { previous_chunk: previous, next_chunk: next, .. } = + chunks.remove(entry_nth_to_remove); + + // Find the previous chunk, and update its next chunk. + if let Some(previous) = previous { + let entry_for_previous_chunk = chunks + .iter_mut() + .find(|ChunkRow { chunk, .. }| *chunk == previous) + .expect("Previous chunk should be present"); + + // Insert the chunk. + entry_for_previous_chunk.next_chunk = next; + } + + // Find the next chunk, and update its previous chunk. + if let Some(next) = next { + let entry_for_next_chunk = chunks + .iter_mut() + .find(|ChunkRow { chunk, .. }| *chunk == next) + .expect("Next chunk should be present"); + + // Insert the chunk. + entry_for_next_chunk.previous_chunk = previous; + } + } + } +} + +#[cfg(test)] +mod tests { + use super::{ChunkIdentifier as CId, *}; + + #[test] + fn test_new_items_chunk() { + let mut relational_linked_chunk = RelationalLinkedChunk::::new(); + + relational_linked_chunk.apply_updates(&[ + // 0 + Update::NewItemsChunk { previous: None, new: CId(0), next: None }, + // 1 after 0 + Update::NewItemsChunk { previous: Some(CId(0)), new: CId(1), next: None }, + // 2 before 0 + Update::NewItemsChunk { previous: None, new: CId(2), next: Some(CId(0)) }, + // 3 between 2 and 0 + Update::NewItemsChunk { previous: Some(CId(2)), new: CId(3), next: Some(CId(0)) }, + ]); + + // Chunks are correctly linked. + assert_eq!( + relational_linked_chunk.chunks, + &[ + ChunkRow { previous_chunk: Some(CId(3)), chunk: CId(0), next_chunk: Some(CId(1)) }, + ChunkRow { previous_chunk: Some(CId(0)), chunk: CId(1), next_chunk: None }, + ChunkRow { previous_chunk: None, chunk: CId(2), next_chunk: Some(CId(3)) }, + ChunkRow { previous_chunk: Some(CId(2)), chunk: CId(3), next_chunk: Some(CId(0)) }, + ], + ); + // Items have not been modified. + assert!(relational_linked_chunk.items.is_empty()); + } + + #[test] + fn test_new_gap_chunk() { + let mut relational_linked_chunk = RelationalLinkedChunk::::new(); + + relational_linked_chunk.apply_updates(&[ + // 0 + Update::NewItemsChunk { previous: None, new: CId(0), next: None }, + // 1 after 0 + Update::NewGapChunk { previous: Some(CId(0)), new: CId(1), next: None, gap: () }, + // 2 after 1 + Update::NewItemsChunk { previous: Some(CId(1)), new: CId(2), next: None }, + ]); + + // Chunks are correctly links. + assert_eq!( + relational_linked_chunk.chunks, + &[ + ChunkRow { previous_chunk: None, chunk: CId(0), next_chunk: Some(CId(1)) }, + ChunkRow { previous_chunk: Some(CId(0)), chunk: CId(1), next_chunk: Some(CId(2)) }, + ChunkRow { previous_chunk: Some(CId(1)), chunk: CId(2), next_chunk: None }, + ], + ); + // Items contains the gap. + assert_eq!( + relational_linked_chunk.items, + &[ItemRow { position: Position(CId(1), 0), item: Either::Gap(()) }], + ); + } + + #[test] + fn test_remove_chunk() { + let mut relational_linked_chunk = RelationalLinkedChunk::::new(); + + relational_linked_chunk.apply_updates(&[ + // 0 + Update::NewItemsChunk { previous: None, new: CId(0), next: None }, + // 1 after 0 + Update::NewGapChunk { previous: Some(CId(0)), new: CId(1), next: None, gap: () }, + // 2 after 1 + Update::NewItemsChunk { previous: Some(CId(1)), new: CId(2), next: None }, + // remove 1 + Update::RemoveChunk(CId(1)), + ]); + + // Chunks are correctly links. + assert_eq!( + relational_linked_chunk.chunks, + &[ + ChunkRow { previous_chunk: None, chunk: CId(0), next_chunk: Some(CId(2)) }, + ChunkRow { previous_chunk: Some(CId(0)), chunk: CId(2), next_chunk: None }, + ], + ); + // Items no longer contains the gap. + assert!(relational_linked_chunk.items.is_empty()); + } + + #[test] + fn test_push_items() { + let mut relational_linked_chunk = RelationalLinkedChunk::::new(); + + relational_linked_chunk.apply_updates(&[ + // new chunk (this is not mandatory for this test, but let's try to be realistic) + Update::NewItemsChunk { previous: None, new: CId(0), next: None }, + // new items on 0 + Update::PushItems { at: Position(CId(0), 0), items: vec!['a', 'b', 'c'] }, + // new chunk (to test new items are pushed in the correct chunk) + Update::NewItemsChunk { previous: Some(CId(0)), new: CId(1), next: None }, + // new items on 1 + Update::PushItems { at: Position(CId(1), 0), items: vec!['x', 'y', 'z'] }, + // new items on 0 again + Update::PushItems { at: Position(CId(0), 3), items: vec!['d', 'e'] }, + ]); + + // Chunks are correctly links. + assert_eq!( + relational_linked_chunk.chunks, + &[ + ChunkRow { previous_chunk: None, chunk: CId(0), next_chunk: Some(CId(1)) }, + ChunkRow { previous_chunk: Some(CId(0)), chunk: CId(1), next_chunk: None }, + ], + ); + // Items contains the pushed items. + assert_eq!( + relational_linked_chunk.items, + &[ + ItemRow { position: Position(CId(0), 0), item: Either::Item('a') }, + ItemRow { position: Position(CId(0), 1), item: Either::Item('b') }, + ItemRow { position: Position(CId(0), 2), item: Either::Item('c') }, + ItemRow { position: Position(CId(1), 0), item: Either::Item('x') }, + ItemRow { position: Position(CId(1), 1), item: Either::Item('y') }, + ItemRow { position: Position(CId(1), 2), item: Either::Item('z') }, + ItemRow { position: Position(CId(0), 3), item: Either::Item('d') }, + ItemRow { position: Position(CId(0), 4), item: Either::Item('e') }, + ], + ); + } + + #[test] + fn test_remove_item() { + let mut relational_linked_chunk = RelationalLinkedChunk::::new(); + + relational_linked_chunk.apply_updates(&[ + // new chunk (this is not mandatory for this test, but let's try to be realistic) + Update::NewItemsChunk { previous: None, new: CId(0), next: None }, + // new items on 0 + Update::PushItems { at: Position(CId(0), 0), items: vec!['a', 'b', 'c', 'd', 'e'] }, + // remove an item: 'a' + Update::RemoveItem { at: Position(CId(0), 0) }, + // remove an item: 'd' + Update::RemoveItem { at: Position(CId(0), 2) }, + ]); + + // Chunks are correctly links. + assert_eq!( + relational_linked_chunk.chunks, + &[ChunkRow { previous_chunk: None, chunk: CId(0), next_chunk: None }], + ); + // Items contains the pushed items. + assert_eq!( + relational_linked_chunk.items, + &[ + ItemRow { position: Position(CId(0), 0), item: Either::Item('b') }, + ItemRow { position: Position(CId(0), 1), item: Either::Item('c') }, + ItemRow { position: Position(CId(0), 2), item: Either::Item('e') }, + ], + ); + } + + #[test] + fn test_detach_last_items() { + let mut relational_linked_chunk = RelationalLinkedChunk::::new(); + + relational_linked_chunk.apply_updates(&[ + // new chunk + Update::NewItemsChunk { previous: None, new: CId(0), next: None }, + // new chunk + Update::NewItemsChunk { previous: Some(CId(0)), new: CId(1), next: None }, + // new items on 0 + Update::PushItems { at: Position(CId(0), 0), items: vec!['a', 'b', 'c', 'd', 'e'] }, + // new items on 1 + Update::PushItems { at: Position(CId(1), 0), items: vec!['x', 'y', 'z'] }, + // detach last items on 0 + Update::DetachLastItems { at: Position(CId(0), 2) }, + ]); + + // Chunks are correctly links. + assert_eq!( + relational_linked_chunk.chunks, + &[ + ChunkRow { previous_chunk: None, chunk: CId(0), next_chunk: Some(CId(1)) }, + ChunkRow { previous_chunk: Some(CId(0)), chunk: CId(1), next_chunk: None }, + ], + ); + // Items contains the pushed items. + assert_eq!( + relational_linked_chunk.items, + &[ + ItemRow { position: Position(CId(0), 0), item: Either::Item('a') }, + ItemRow { position: Position(CId(0), 1), item: Either::Item('b') }, + ItemRow { position: Position(CId(1), 0), item: Either::Item('x') }, + ItemRow { position: Position(CId(1), 1), item: Either::Item('y') }, + ItemRow { position: Position(CId(1), 2), item: Either::Item('z') }, + ], + ); + } + + #[test] + fn test_start_and_end_reattach_items() { + let mut relational_linked_chunk = RelationalLinkedChunk::::new(); + + relational_linked_chunk + .apply_updates(&[Update::StartReattachItems, Update::EndReattachItems]); + + // Nothing happened. + assert!(relational_linked_chunk.chunks.is_empty()); + assert!(relational_linked_chunk.items.is_empty()); + } +}