From 8d49a908fe95181df94512bc8066c7588a9c76a3 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Thu, 7 Nov 2024 12:21:28 +0100 Subject: [PATCH] fix(event cache): give looser parameters for the deduplicator's bloom filters The previous values would lead to super large memory allocations, as observed with `valgrind --tool=massive` on the tiny test added in this commit: - for 400 rooms each having 100 events, this led to 540MB of allocations. - for 1000 rooms each having 100 events, this led to 1.5GB of allocations. This is not acceptable for any kind of devices, especially for mobile devices which may be more constrained on memory. The bloom filter is an optimisation to avoid going through events in the room's event list, so it shouldn't cause a big toll like that; instead, we can reduce the parameters values given when creating the filters. With the given parameters, 1000 rooms each having 100 events leads to 1.2MB of allocations. --- .../src/event_cache/deduplicator.rs | 44 +++++++++++++++++-- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/crates/matrix-sdk/src/event_cache/deduplicator.rs b/crates/matrix-sdk/src/event_cache/deduplicator.rs index b87dbd01d42..57d4126afef 100644 --- a/crates/matrix-sdk/src/event_cache/deduplicator.rs +++ b/crates/matrix-sdk/src/event_cache/deduplicator.rs @@ -41,8 +41,10 @@ impl fmt::Debug for Deduplicator { } impl Deduplicator { - const APPROXIMATED_MAXIMUM_NUMBER_OF_EVENTS: usize = 800_000; - const DESIRED_FALSE_POSITIVE_RATE: f64 = 0.001; + // Note: don't use too high numbers here, or the amount of allocated memory will + // explode. See https://github.com/matrix-org/matrix-rust-sdk/pull/4231 for details. + const APPROXIMATED_MAXIMUM_NUMBER_OF_EVENTS: usize = 1_000; + const DESIRED_FALSE_POSITIVE_RATE: f64 = 0.01; /// Create a new `Deduplicator`. pub fn new() -> Self { @@ -138,7 +140,7 @@ pub enum Decoration { #[cfg(test)] mod tests { - use assert_matches2::assert_let; + use assert_matches2::{assert_let, assert_matches}; use matrix_sdk_base::deserialized_responses::SyncTimelineEvent; use ruma::{owned_event_id, user_id, EventId}; @@ -267,4 +269,40 @@ mod tests { assert!(events.next().is_none()); } } + + #[test] + fn test_bloom_filter_growth() { + // This test was used as a testbed to observe, using `valgrind --tool=massive`, + // the total memory allocated by the deduplicator. We keep it checked in + // to revive this experiment in the future, if needs be. + + let num_rooms = if let Ok(num_rooms) = std::env::var("ROOMS") { + num_rooms.parse().unwrap() + } else { + 10 + }; + + let num_events = if let Ok(num_events) = std::env::var("EVENTS") { + num_events.parse().unwrap() + } else { + 100 + }; + + let mut dedups = Vec::with_capacity(num_rooms); + + for _ in 0..num_rooms { + let dedup = Deduplicator::new(); + let existing_events = RoomEvents::new(); + + for i in 0..num_events { + let event = sync_timeline_event(&EventId::parse(format!("$event{i}")).unwrap()); + let mut it = dedup.scan_and_learn([event].into_iter(), &existing_events); + + assert_matches!(it.next(), Some(Decoration::Unique(..))); + assert_matches!(it.next(), None); + } + + dedups.push(dedup); + } + } }