Ensure stream position never goes backwards, and add test

matrix-org · Oct 20, 2023 · fddfade · fddfade
1 parent 56e7fb3
commit fddfade
Show file tree

Hide file tree

Showing 3 changed files with 213 additions and 14 deletions.
diff --git a/synapse/storage/util/id_generators.py b/synapse/storage/util/id_generators.py
@@ -420,6 +420,11 @@ def __init__(
         # The maximum stream ID that we have seen been allocated across any writer.
         self._max_seen_allocated_stream_id = 1
 
+        # The maximum position of the local instance. This can be higher than
+        # the corresponding position in `current_positions` table when there are
+        # no active writes in progress.
+        self._max_position_of_local_instance = self._max_seen_allocated_stream_id
+
         self._sequence_gen = PostgresSequenceGenerator(sequence_name)
 
         # We check that the table and sequence haven't diverged.
@@ -439,6 +444,16 @@ def __init__(
             self._current_positions.values(), default=1
         )
 
+        # For the case where `stream_positions` is not up to date,
+        # `_persisted_upto_position` may be higher.
+        self._max_seen_allocated_stream_id = max(
+            self._max_seen_allocated_stream_id, self._persisted_upto_position
+        )
+
+        # Bump our local maximum position now that we've loaded things from the
+        # DB.
+        self._max_position_of_local_instance = self._max_seen_allocated_stream_id
+
         if not writers:
             # If there have been no explicit writers given then any instance can
             # write to the stream. In which case, let's pre-seed our own
@@ -708,6 +723,7 @@ def _mark_id_as_finished(self, next_id: int) -> None:
             if new_cur:
                 curr = self._current_positions.get(self._instance_name, 0)
                 self._current_positions[self._instance_name] = max(curr, new_cur)
+                self._max_position_of_local_instance = max(curr, new_cur)
 
             self._add_persisted_position(next_id)
 
@@ -722,6 +738,9 @@ def get_current_token_for_writer(self, instance_name: str) -> int:
         # persisted up to position. This stops Synapse from doing a full table
         # scan when a new writer announces itself over replication.
         with self._lock:
+            if self._instance_name == instance_name:
+                return self._return_factor * self._max_position_of_local_instance
+
             pos = self._current_positions.get(
                 instance_name, self._persisted_upto_position
             )
@@ -731,20 +750,6 @@ def get_current_token_for_writer(self, instance_name: str) -> int:
             # possible.
             pos = max(pos, self._persisted_upto_position)
 
-            if (
-                self._instance_name == instance_name
-                and not self._in_flight_fetches
-                and not self._unfinished_ids
-            ):
-                # For our own instance when there's nothing in flight, it's safe
-                # to advance to the maximum persisted position we've seen (as we
-                # know that any new tokens we request will be greater).
-                max_pos_of_all_writers = max(
-                    self._current_positions.values(),
-                    default=self._persisted_upto_position,
-                )
-                pos = max(pos, max_pos_of_all_writers)
-
             return self._return_factor * pos
 
     def get_minimal_local_current_token(self) -> int:
@@ -821,6 +826,16 @@ def _add_persisted_position(self, new_id: int) -> None:
 
         self._persisted_upto_position = max(min_curr, self._persisted_upto_position)
 
+        # Advance our local max position.
+        self._max_position_of_local_instance = max(
+            self._max_position_of_local_instance, self._persisted_upto_position
+        )
+
+        if not self._unfinished_ids and not self._in_flight_fetches:
+            # If we don't have anything in flight, it's safe to advance to the
+            # max seen stream ID.
+            self._max_position_of_local_instance = self._max_seen_allocated_stream_id
+
         # We now iterate through the seen positions, discarding those that are
         # less than the current min positions, and incrementing the min position
         # if its exactly one greater.

diff --git a/tests/storage/test_id_generators.py b/tests/storage/test_id_generators.py
@@ -661,6 +661,57 @@ def test_minimal_local_token(self) -> None:
         self.assertEqual(second_id_gen.get_positions(), {"first": 3, "second": 7})
         self.assertEqual(second_id_gen.get_minimal_local_current_token(), 7)
 
+    def test_current_token_gap(self) -> None:
+        """Test that getting the current token for a writer returns the maximal
+        token when there are no writes.
+        """
+        self._insert_rows("first", 3)
+        self._insert_rows("second", 4)
+
+        first_id_gen = self._create_id_generator(
+            "first", writers=["first", "second", "third"]
+        )
+        second_id_gen = self._create_id_generator(
+            "second", writers=["first", "second", "third"]
+        )
+
+        self.assertEqual(second_id_gen.get_current_token_for_writer("first"), 7)
+        self.assertEqual(second_id_gen.get_current_token_for_writer("second"), 7)
+        self.assertEqual(second_id_gen.get_current_token(), 7)
+
+        # Check that the first ID gen advancing causes the second ID gen to
+        # advance (as it has nothing in flight).
+
+        async def _get_next_async() -> None:
+            async with first_id_gen.get_next_mult(2):
+                pass
+
+        self.get_success(_get_next_async())
+        second_id_gen.advance("first", 9)
+
+        self.assertEqual(second_id_gen.get_current_token_for_writer("first"), 9)
+        self.assertEqual(second_id_gen.get_current_token_for_writer("second"), 9)
+        self.assertEqual(second_id_gen.get_current_token(), 7)
+
+        # Check that the first ID gen advancing doesn't advance the second ID
+        # gen  when it has stuff in flight.
+        self.get_success(_get_next_async())
+
+        ctxmgr = second_id_gen.get_next()
+        self.get_success(ctxmgr.__aenter__())
+
+        second_id_gen.advance("first", 11)
+
+        self.assertEqual(second_id_gen.get_current_token_for_writer("first"), 11)
+        self.assertEqual(second_id_gen.get_current_token_for_writer("second"), 9)
+        self.assertEqual(second_id_gen.get_current_token(), 7)
+
+        self.get_success(ctxmgr.__aexit__(None, None, None))
+
+        self.assertEqual(second_id_gen.get_current_token_for_writer("first"), 11)
+        self.assertEqual(second_id_gen.get_current_token_for_writer("second"), 12)
+        self.assertEqual(second_id_gen.get_current_token(), 7)
+
 
 class BackwardsMultiWriterIdGeneratorTestCase(HomeserverTestCase):
     """Tests MultiWriterIdGenerator that produce *negative* stream IDs."""

diff --git a/tests/test_late_messages.py b/tests/test_late_messages.py
@@ -0,0 +1,133 @@
+import json
+
+from parameterized import parameterized
+
+from synapse.rest import admin
+from synapse.rest.client import login, room, sync
+from synapse.types import JsonDict
+
+from tests import unittest
+from tests.test_utils.event_injection import create_event
+
+
+class TestLateArrivingMessages(unittest.HomeserverTestCase):
+    servlets = [
+        admin.register_servlets_for_client_rest_resource,
+        room.register_servlets,
+        login.register_servlets,
+        sync.register_servlets,
+    ]
+
+    @parameterized.expand([(1,), (5,), (10,), (20,)])
+    def test_late_messages_have_received_ts(self, sync_timeline_limit: int) -> None:
+        user1 = self.register_user("user1", "pass")
+        tok1 = self.login("user1", "pass")
+
+        room_id = self.helper.create_room_as(user1, is_public=True, tok=tok1)
+
+        self.helper.send(room_id, "Hi!", tok=tok1)
+
+        prev_event_ids = None
+        event_and_contexts = []
+        depth = None
+        for i in range(100):
+            event, unpersisted_context = self.get_success(
+                create_event(
+                    self.hs,
+                    prev_event_ids=prev_event_ids,
+                    room_id=room_id,
+                    sender=user1,
+                    type="m.room.message",
+                    content={"body": f"late message {i}", "msgtype": "m.text"},
+                    depth=depth,
+                )
+            )
+            if not prev_event_ids:
+                depth = event.depth
+            depth += 1
+            context = self.get_success(unpersisted_context.persist(event))
+
+            event_and_contexts.append((event, context))
+            prev_event_ids = [event.event_id]
+
+        self.reactor.advance(60 * 60)
+
+        for e, _ in event_and_contexts[-5:]:
+            self.get_success(self.hs.get_datastores().main.received_event(e))
+
+        self.get_success(
+            self.hs.get_storage_controllers().persistence.persist_events(
+                event_and_contexts[-5:], backfilled=False
+            )
+        )
+
+        self.reactor.advance(60 * 60)
+
+        for e, _ in event_and_contexts[-50:-5]:
+            self.get_success(self.hs.get_datastores().main.received_event(e))
+
+        self.get_success(
+            self.hs.get_storage_controllers().persistence.persist_events(
+                event_and_contexts[-50:-5], backfilled=True
+            )
+        )
+
+        sync_filter = json.dumps({"room": {"timeline": {"limit": sync_timeline_limit}}})
+
+        channel = self.make_request(
+            "GET", f"/sync?filter={sync_filter}", access_token=tok1
+        )
+        self.assertEqual(channel.code, 200)
+
+        room_result = channel.json_body["rooms"]["join"][room_id]
+
+        group_id = None
+
+        def check_is_late_event(event_json: JsonDict) -> None:
+            nonlocal group_id
+
+            self.assertSubstring("late message", event_json["content"]["body"])
+            self.assertIn("io.element.late_event", event_json["unsigned"])
+            late_metadata = event_json["unsigned"]["io.element.late_event"]
+            self.assertIn("received_ts", late_metadata)
+
+            self.assertGreater(
+                late_metadata["received_ts"] - event_json["origin_server_ts"], 60 * 60
+            )
+
+            self.assertIn("group_id", late_metadata)
+            if group_id:
+                self.assertEqual(late_metadata["group_id"], group_id)
+            else:
+                group_id = late_metadata["group_id"]
+
+        for event_json in room_result["timeline"]["events"]:
+            check_is_late_event(event_json)
+
+        prev_batch = room_result["timeline"]["prev_batch"]
+
+        channel = self.make_request(
+            "GET",
+            f"/rooms/{room_id}/messages?from={prev_batch}&dir=b",
+            access_token=tok1,
+        )
+        self.assertEqual(channel.code, 200)
+
+        events = channel.json_body["chunk"]
+        next_batch = channel.json_body["end"]
+
+        for event_json in events:
+            check_is_late_event(event_json)
+
+        channel = self.make_request(
+            "GET",
+            f"/rooms/{room_id}/messages?from={next_batch}&dir=b",
+            access_token=tok1,
+        )
+        self.assertEqual(channel.code, 200)
+
+        events = channel.json_body["chunk"]
+        next_batch = channel.json_body["end"]
+
+        for event_json in events:
+            check_is_late_event(event_json)