Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Commit

Permalink
Ensure stream position never goes backwards, and add test
Browse files Browse the repository at this point in the history
  • Loading branch information
erikjohnston committed Oct 20, 2023
1 parent 56e7fb3 commit fddfade
Show file tree
Hide file tree
Showing 3 changed files with 213 additions and 14 deletions.
43 changes: 29 additions & 14 deletions synapse/storage/util/id_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,11 @@ def __init__(
# The maximum stream ID that we have seen been allocated across any writer.
self._max_seen_allocated_stream_id = 1

# The maximum position of the local instance. This can be higher than
# the corresponding position in `current_positions` table when there are
# no active writes in progress.
self._max_position_of_local_instance = self._max_seen_allocated_stream_id

self._sequence_gen = PostgresSequenceGenerator(sequence_name)

# We check that the table and sequence haven't diverged.
Expand All @@ -439,6 +444,16 @@ def __init__(
self._current_positions.values(), default=1
)

# For the case where `stream_positions` is not up to date,
# `_persisted_upto_position` may be higher.
self._max_seen_allocated_stream_id = max(
self._max_seen_allocated_stream_id, self._persisted_upto_position
)

# Bump our local maximum position now that we've loaded things from the
# DB.
self._max_position_of_local_instance = self._max_seen_allocated_stream_id

if not writers:
# If there have been no explicit writers given then any instance can
# write to the stream. In which case, let's pre-seed our own
Expand Down Expand Up @@ -708,6 +723,7 @@ def _mark_id_as_finished(self, next_id: int) -> None:
if new_cur:
curr = self._current_positions.get(self._instance_name, 0)
self._current_positions[self._instance_name] = max(curr, new_cur)
self._max_position_of_local_instance = max(curr, new_cur)

self._add_persisted_position(next_id)

Expand All @@ -722,6 +738,9 @@ def get_current_token_for_writer(self, instance_name: str) -> int:
# persisted up to position. This stops Synapse from doing a full table
# scan when a new writer announces itself over replication.
with self._lock:
if self._instance_name == instance_name:
return self._return_factor * self._max_position_of_local_instance

pos = self._current_positions.get(
instance_name, self._persisted_upto_position
)
Expand All @@ -731,20 +750,6 @@ def get_current_token_for_writer(self, instance_name: str) -> int:
# possible.
pos = max(pos, self._persisted_upto_position)

if (
self._instance_name == instance_name
and not self._in_flight_fetches
and not self._unfinished_ids
):
# For our own instance when there's nothing in flight, it's safe
# to advance to the maximum persisted position we've seen (as we
# know that any new tokens we request will be greater).
max_pos_of_all_writers = max(
self._current_positions.values(),
default=self._persisted_upto_position,
)
pos = max(pos, max_pos_of_all_writers)

return self._return_factor * pos

def get_minimal_local_current_token(self) -> int:
Expand Down Expand Up @@ -821,6 +826,16 @@ def _add_persisted_position(self, new_id: int) -> None:

self._persisted_upto_position = max(min_curr, self._persisted_upto_position)

# Advance our local max position.
self._max_position_of_local_instance = max(
self._max_position_of_local_instance, self._persisted_upto_position
)

if not self._unfinished_ids and not self._in_flight_fetches:
# If we don't have anything in flight, it's safe to advance to the
# max seen stream ID.
self._max_position_of_local_instance = self._max_seen_allocated_stream_id

# We now iterate through the seen positions, discarding those that are
# less than the current min positions, and incrementing the min position
# if its exactly one greater.
Expand Down
51 changes: 51 additions & 0 deletions tests/storage/test_id_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,57 @@ def test_minimal_local_token(self) -> None:
self.assertEqual(second_id_gen.get_positions(), {"first": 3, "second": 7})
self.assertEqual(second_id_gen.get_minimal_local_current_token(), 7)

def test_current_token_gap(self) -> None:
"""Test that getting the current token for a writer returns the maximal
token when there are no writes.
"""
self._insert_rows("first", 3)
self._insert_rows("second", 4)

first_id_gen = self._create_id_generator(
"first", writers=["first", "second", "third"]
)
second_id_gen = self._create_id_generator(
"second", writers=["first", "second", "third"]
)

self.assertEqual(second_id_gen.get_current_token_for_writer("first"), 7)
self.assertEqual(second_id_gen.get_current_token_for_writer("second"), 7)
self.assertEqual(second_id_gen.get_current_token(), 7)

# Check that the first ID gen advancing causes the second ID gen to
# advance (as it has nothing in flight).

async def _get_next_async() -> None:
async with first_id_gen.get_next_mult(2):
pass

self.get_success(_get_next_async())
second_id_gen.advance("first", 9)

self.assertEqual(second_id_gen.get_current_token_for_writer("first"), 9)
self.assertEqual(second_id_gen.get_current_token_for_writer("second"), 9)
self.assertEqual(second_id_gen.get_current_token(), 7)

# Check that the first ID gen advancing doesn't advance the second ID
# gen when it has stuff in flight.
self.get_success(_get_next_async())

ctxmgr = second_id_gen.get_next()
self.get_success(ctxmgr.__aenter__())

second_id_gen.advance("first", 11)

self.assertEqual(second_id_gen.get_current_token_for_writer("first"), 11)
self.assertEqual(second_id_gen.get_current_token_for_writer("second"), 9)
self.assertEqual(second_id_gen.get_current_token(), 7)

self.get_success(ctxmgr.__aexit__(None, None, None))

self.assertEqual(second_id_gen.get_current_token_for_writer("first"), 11)
self.assertEqual(second_id_gen.get_current_token_for_writer("second"), 12)
self.assertEqual(second_id_gen.get_current_token(), 7)


class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase):
"""Tests MultiWriterIdGenerator that produce *negative* stream IDs."""
Expand Down
133 changes: 133 additions & 0 deletions tests/test_late_messages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import json

from parameterized import parameterized

from synapse.rest import admin
from synapse.rest.client import login, room, sync
from synapse.types import JsonDict

from tests import unittest
from tests.test_utils.event_injection import create_event


class TestLateArrivingMessages(unittest.HomeserverTestCase):
servlets = [
admin.register_servlets_for_client_rest_resource,
room.register_servlets,
login.register_servlets,
sync.register_servlets,
]

@parameterized.expand([(1,), (5,), (10,), (20,)])
def test_late_messages_have_received_ts(self, sync_timeline_limit: int) -> None:
user1 = self.register_user("user1", "pass")
tok1 = self.login("user1", "pass")

room_id = self.helper.create_room_as(user1, is_public=True, tok=tok1)

self.helper.send(room_id, "Hi!", tok=tok1)

prev_event_ids = None
event_and_contexts = []
depth = None
for i in range(100):
event, unpersisted_context = self.get_success(
create_event(
self.hs,
prev_event_ids=prev_event_ids,
room_id=room_id,
sender=user1,
type="m.room.message",
content={"body": f"late message {i}", "msgtype": "m.text"},
depth=depth,
)
)
if not prev_event_ids:
depth = event.depth
depth += 1
context = self.get_success(unpersisted_context.persist(event))

event_and_contexts.append((event, context))
prev_event_ids = [event.event_id]

self.reactor.advance(60 * 60)

for e, _ in event_and_contexts[-5:]:
self.get_success(self.hs.get_datastores().main.received_event(e))

self.get_success(
self.hs.get_storage_controllers().persistence.persist_events(
event_and_contexts[-5:], backfilled=False
)
)

self.reactor.advance(60 * 60)

for e, _ in event_and_contexts[-50:-5]:
self.get_success(self.hs.get_datastores().main.received_event(e))

self.get_success(
self.hs.get_storage_controllers().persistence.persist_events(
event_and_contexts[-50:-5], backfilled=True
)
)

sync_filter = json.dumps({"room": {"timeline": {"limit": sync_timeline_limit}}})

channel = self.make_request(
"GET", f"/sync?filter={sync_filter}", access_token=tok1
)
self.assertEqual(channel.code, 200)

room_result = channel.json_body["rooms"]["join"][room_id]

group_id = None

def check_is_late_event(event_json: JsonDict) -> None:
nonlocal group_id

self.assertSubstring("late message", event_json["content"]["body"])
self.assertIn("io.element.late_event", event_json["unsigned"])
late_metadata = event_json["unsigned"]["io.element.late_event"]
self.assertIn("received_ts", late_metadata)

self.assertGreater(
late_metadata["received_ts"] - event_json["origin_server_ts"], 60 * 60
)

self.assertIn("group_id", late_metadata)
if group_id:
self.assertEqual(late_metadata["group_id"], group_id)
else:
group_id = late_metadata["group_id"]

for event_json in room_result["timeline"]["events"]:
check_is_late_event(event_json)

prev_batch = room_result["timeline"]["prev_batch"]

channel = self.make_request(
"GET",
f"/rooms/{room_id}/messages?from={prev_batch}&dir=b",
access_token=tok1,
)
self.assertEqual(channel.code, 200)

events = channel.json_body["chunk"]
next_batch = channel.json_body["end"]

for event_json in events:
check_is_late_event(event_json)

channel = self.make_request(
"GET",
f"/rooms/{room_id}/messages?from={next_batch}&dir=b",
access_token=tok1,
)
self.assertEqual(channel.code, 200)

events = channel.json_body["chunk"]
next_batch = channel.json_body["end"]

for event_json in events:
check_is_late_event(event_json)

0 comments on commit fddfade

Please sign in to comment.