Skip to content

Commit

Permalink
Speed up fetching partial-state rooms on sliding sync (#17666)
Browse files Browse the repository at this point in the history
Instead of having a large cache of `room_id -> bool` about whether a
room is partially stated, replace with a "fetch rooms the user is which
are partially-stated". This is a lot faster as the set of partially
stated rooms at any point across the whole server is small, and so such
a query is fast.

The main issue with the bulk cache lookup is the CPU time looking all
the rooms up in the cache.
  • Loading branch information
erikjohnston authored Sep 6, 2024
1 parent d5accec commit 786de85
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 20 deletions.
1 change: 1 addition & 0 deletions changelog.d/17666.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Small performance improvement in speeding up sliding sync.
28 changes: 8 additions & 20 deletions synapse/handlers/sliding_sync/room_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,11 +333,7 @@ async def _compute_interested_rooms_new_tables(

# Find which rooms are partially stated and may need to be filtered out
# depending on the `required_state` requested (see below).
partial_state_room_map = (
await self.store.is_partial_state_room_batched(
filtered_sync_room_map.keys()
)
)
partial_state_rooms = await self.store.get_partial_rooms()

# Since creating the `RoomSyncConfig` takes some work, let's just do it
# once.
Expand All @@ -349,7 +345,7 @@ async def _compute_interested_rooms_new_tables(
filtered_sync_room_map = {
room_id: room
for room_id, room in filtered_sync_room_map.items()
if not partial_state_room_map.get(room_id)
if room_id not in partial_state_rooms
}

all_rooms.update(filtered_sync_room_map)
Expand Down Expand Up @@ -409,9 +405,7 @@ async def _compute_interested_rooms_new_tables(
with start_active_span("assemble_room_subscriptions"):
# Find which rooms are partially stated and may need to be filtered out
# depending on the `required_state` requested (see below).
partial_state_room_map = await self.store.is_partial_state_room_batched(
sync_config.room_subscriptions.keys()
)
partial_state_rooms = await self.store.get_partial_rooms()

for (
room_id,
Expand All @@ -431,7 +425,7 @@ async def _compute_interested_rooms_new_tables(
# Exclude partially-stated rooms if we must wait for the room to be
# fully-stated
if room_sync_config.must_await_full_state(self.is_mine_id):
if partial_state_room_map.get(room_id):
if room_id in partial_state_rooms:
continue

all_rooms.add(room_id)
Expand Down Expand Up @@ -514,11 +508,7 @@ async def _compute_interested_rooms_fallback(

# Find which rooms are partially stated and may need to be filtered out
# depending on the `required_state` requested (see below).
partial_state_room_map = (
await self.store.is_partial_state_room_batched(
filtered_sync_room_map.keys()
)
)
partial_state_rooms = await self.store.get_partial_rooms()

# Since creating the `RoomSyncConfig` takes some work, let's just do it
# once.
Expand All @@ -530,7 +520,7 @@ async def _compute_interested_rooms_fallback(
filtered_sync_room_map = {
room_id: room
for room_id, room in filtered_sync_room_map.items()
if not partial_state_room_map.get(room_id)
if room_id not in partial_state_rooms
}

all_rooms.update(filtered_sync_room_map)
Expand Down Expand Up @@ -590,9 +580,7 @@ async def _compute_interested_rooms_fallback(
with start_active_span("assemble_room_subscriptions"):
# Find which rooms are partially stated and may need to be filtered out
# depending on the `required_state` requested (see below).
partial_state_room_map = await self.store.is_partial_state_room_batched(
sync_config.room_subscriptions.keys()
)
partial_state_rooms = await self.store.get_partial_rooms()

for (
room_id,
Expand Down Expand Up @@ -624,7 +612,7 @@ async def _compute_interested_rooms_fallback(
# Exclude partially-stated rooms if we must wait for the room to be
# fully-stated
if room_sync_config.must_await_full_state(self.is_mine_id):
if partial_state_room_map.get(room_id):
if room_id in partial_state_rooms:
continue

all_rooms.add(room_id)
Expand Down
26 changes: 26 additions & 0 deletions synapse/storage/databases/main/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -1382,6 +1382,30 @@ async def is_partial_state_room_batched(
partial_state_rooms = {row[0] for row in rows}
return {room_id: room_id in partial_state_rooms for room_id in room_ids}

@cached(max_entries=10000, iterable=True)
async def get_partial_rooms(self) -> AbstractSet[str]:
"""Get any "partial-state" rooms which the user is in.
This is fast as the set of partially stated rooms at any point across
the whole server is small, and so such a query is fast. This is also
faster than looking up whether a set of room ID's are partially stated
via `is_partial_state_room_batched(...)` because of the sheer amount of
CPU time looking all the rooms up in the cache.
"""

def _get_partial_rooms_for_user_txn(
txn: LoggingTransaction,
) -> AbstractSet[str]:
sql = """
SELECT room_id FROM partial_state_rooms
"""
txn.execute(sql)
return {room_id for (room_id,) in txn}

return await self.db_pool.runInteraction(
"get_partial_rooms_for_user", _get_partial_rooms_for_user_txn
)

async def get_join_event_id_and_device_lists_stream_id_for_partial_state(
self, room_id: str
) -> Tuple[str, int]:
Expand Down Expand Up @@ -2341,6 +2365,7 @@ def _store_partial_state_room_txn(
self._invalidate_cache_and_stream(
txn, self._get_partial_state_servers_at_join, (room_id,)
)
self._invalidate_all_cache_and_stream(txn, self.get_partial_rooms)

async def write_partial_state_rooms_join_event_id(
self,
Expand Down Expand Up @@ -2562,6 +2587,7 @@ def _clear_partial_state_room_txn(
self._invalidate_cache_and_stream(
txn, self._get_partial_state_servers_at_join, (room_id,)
)
self._invalidate_all_cache_and_stream(txn, self.get_partial_rooms)

DatabasePool.simple_insert_txn(
txn,
Expand Down

0 comments on commit 786de85

Please sign in to comment.