diff --git a/changelog.d/18000.bugfix b/changelog.d/18000.bugfix new file mode 100644 index 00000000000..a8f1545bf5b --- /dev/null +++ b/changelog.d/18000.bugfix @@ -0,0 +1 @@ +Add rate limit `rc_presence.per_user`. This prevents load from excessive presence updates sent by clients via sync api. Also rate limit `/_matrix/client/v3/presence` as per the spec. Contributed by @rda0. diff --git a/docker/complement/conf/workers-shared-extra.yaml.j2 b/docker/complement/conf/workers-shared-extra.yaml.j2 index c5228af72d0..a3c4a1e8350 100644 --- a/docker/complement/conf/workers-shared-extra.yaml.j2 +++ b/docker/complement/conf/workers-shared-extra.yaml.j2 @@ -86,6 +86,11 @@ rc_invites: per_second: 1000 burst_count: 1000 +rc_presence: + per_user: + per_second: 9999 + burst_count: 9999 + federation_rr_transactions_per_room_per_second: 9999 allow_device_name_lookup_over_federation: true diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 7a48d76bbb1..9d1319a232e 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -1866,6 +1866,27 @@ rc_federation: concurrent: 5 ``` --- +### `rc_presence` + +This option sets ratelimiting for presence. + +The `rc_presence.per_user` option sets rate limits on how often a specific +users' presence updates are evaluated. Ratelimited presence updates sent via sync are +ignored, and no error is returned to the client. +This option also sets the rate limit for the +[`PUT /_matrix/client/v3/presence/{userId}/status`](https://spec.matrix.org/latest/client-server-api/#put_matrixclientv3presenceuseridstatus) +endpoint. + +`per_user` defaults to `per_second: 0.1`, `burst_count: 1`. + +Example configuration: +```yaml +rc_presence: + per_user: + per_second: 0.05 + burst_count: 0.5 +``` +--- ### `federation_rr_transactions_per_room_per_second` Sets outgoing federation transaction frequency for sending read-receipts, diff --git a/synapse/api/ratelimiting.py b/synapse/api/ratelimiting.py index b80630c5d35..229329a5aef 100644 --- a/synapse/api/ratelimiting.py +++ b/synapse/api/ratelimiting.py @@ -275,6 +275,7 @@ async def ratelimit( update: bool = True, n_actions: int = 1, _time_now_s: Optional[float] = None, + pause: Optional[float] = 0.5, ) -> None: """Checks if an action can be performed. If not, raises a LimitExceededError @@ -298,6 +299,8 @@ async def ratelimit( at all. _time_now_s: The current time. Optional, defaults to the current time according to self.clock. Only used by tests. + pause: Time in seconds to pause when an action is being limited. Defaults to 0.5 + to stop clients from "tight-looping" on retrying their request. Raises: LimitExceededError: If an action could not be performed, along with the time in @@ -316,9 +319,8 @@ async def ratelimit( ) if not allowed: - # We pause for a bit here to stop clients from "tight-looping" on - # retrying their request. - await self.clock.sleep(0.5) + if pause: + await self.clock.sleep(pause) raise LimitExceededError( limiter_name=self._limiter_name, diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 3fa33f5373f..06af4da3c55 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -228,3 +228,9 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: config.get("remote_media_download_burst_count", "500M") ), ) + + self.rc_presence_per_user = RatelimitSettings.parse( + config, + "rc_presence.per_user", + defaults={"per_second": 0.1, "burst_count": 1}, + ) diff --git a/synapse/rest/client/presence.py b/synapse/rest/client/presence.py index ecc52956e48..104d54cd890 100644 --- a/synapse/rest/client/presence.py +++ b/synapse/rest/client/presence.py @@ -24,7 +24,8 @@ import logging from typing import TYPE_CHECKING, Tuple -from synapse.api.errors import AuthError, SynapseError +from synapse.api.errors import AuthError, Codes, LimitExceededError, SynapseError +from synapse.api.ratelimiting import Ratelimiter from synapse.handlers.presence import format_user_presence_state from synapse.http.server import HttpServer from synapse.http.servlet import RestServlet, parse_json_object_from_request @@ -48,6 +49,14 @@ def __init__(self, hs: "HomeServer"): self.presence_handler = hs.get_presence_handler() self.clock = hs.get_clock() self.auth = hs.get_auth() + self.store = hs.get_datastores().main + + # Ratelimiter for presence updates, keyed by requester. + self._presence_per_user_limiter = Ratelimiter( + store=self.store, + clock=self.clock, + cfg=hs.config.ratelimiting.rc_presence_per_user, + ) async def on_GET( self, request: SynapseRequest, user_id: str @@ -82,6 +91,17 @@ async def on_PUT( if requester.user != user: raise AuthError(403, "Can only set your own presence state") + # ignore the presence update if the ratelimit is exceeded + try: + await self._presence_per_user_limiter.ratelimit(requester) + except LimitExceededError as e: + logger.debug("User presence ratelimit exceeded; ignoring it.") + return 429, { + "errcode": Codes.LIMIT_EXCEEDED, + "error": "Too many requests", + "retry_after_ms": e.retry_after_ms, + } + state = {} content = parse_json_object_from_request(request) diff --git a/synapse/rest/client/sync.py b/synapse/rest/client/sync.py index f4ef84a038d..4fb9c0c8e7a 100644 --- a/synapse/rest/client/sync.py +++ b/synapse/rest/client/sync.py @@ -24,9 +24,10 @@ from typing import TYPE_CHECKING, Any, Dict, List, Mapping, Optional, Tuple, Union from synapse.api.constants import AccountDataTypes, EduTypes, Membership, PresenceState -from synapse.api.errors import Codes, StoreError, SynapseError +from synapse.api.errors import Codes, LimitExceededError, StoreError, SynapseError from synapse.api.filtering import FilterCollection from synapse.api.presence import UserPresenceState +from synapse.api.ratelimiting import Ratelimiter from synapse.events.utils import ( SerializeEventConfig, format_event_for_client_v2_without_room_id, @@ -126,6 +127,13 @@ def __init__(self, hs: "HomeServer"): cache_name="sync_valid_filter", ) + # Ratelimiter for presence updates, keyed by requester. + self._presence_per_user_limiter = Ratelimiter( + store=self.store, + clock=self.clock, + cfg=hs.config.ratelimiting.rc_presence_per_user, + ) + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: # This will always be set by the time Twisted calls us. assert request.args is not None @@ -239,7 +247,14 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: # send any outstanding server notices to the user. await self._server_notices_sender.on_user_syncing(user.to_string()) - affect_presence = set_presence != PresenceState.OFFLINE + # ignore the presence update if the ratelimit is exceeded but do not pause the request + try: + await self._presence_per_user_limiter.ratelimit(requester, pause=0.0) + except LimitExceededError: + affect_presence = False + logger.debug("User set_presence ratelimit exceeded; ignoring it.") + else: + affect_presence = set_presence != PresenceState.OFFLINE context = await self.presence_handler.user_syncing( user.to_string(), diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py index 598d6c13cdb..4cf048f0dfb 100644 --- a/tests/handlers/test_presence.py +++ b/tests/handlers/test_presence.py @@ -45,7 +45,7 @@ handle_update, ) from synapse.rest import admin -from synapse.rest.client import room +from synapse.rest.client import login, room, sync from synapse.server import HomeServer from synapse.storage.database import LoggingDatabaseConnection from synapse.types import JsonDict, UserID, get_domain_from_id @@ -53,10 +53,15 @@ from tests import unittest from tests.replication._base import BaseMultiWorkerStreamTestCase +from tests.unittest import override_config class PresenceUpdateTestCase(unittest.HomeserverTestCase): - servlets = [admin.register_servlets] + servlets = [ + admin.register_servlets, + login.register_servlets, + sync.register_servlets, + ] def prepare( self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer @@ -425,6 +430,102 @@ def test_override(self, initial_state: str, final_state: str) -> None: wheel_timer.insert.assert_not_called() + # `rc_presence` is set very high during unit tests to avoid ratelimiting + # subtly impacting unrelated tests. We set the ratelimiting back to a + # reasonable value for the tests specific to presence ratelimiting. + @override_config( + {"rc_presence": {"per_user": {"per_second": 0.1, "burst_count": 1}}} + ) + def test_over_ratelimit_offline_to_online_to_unavailable(self) -> None: + """ + Send a presence update, check that it went through, immediately send another one and + check that it was ignored. + """ + self._test_ratelimit_offline_to_online_to_unavailable(ratelimited=True) + + @override_config( + {"rc_presence": {"per_user": {"per_second": 0.1, "burst_count": 1}}} + ) + def test_within_ratelimit_offline_to_online_to_unavailable(self) -> None: + """ + Send a presence update, check that it went through, advancing time a sufficient amount, + send another presence update and check that it also worked. + """ + self._test_ratelimit_offline_to_online_to_unavailable(ratelimited=False) + + @override_config( + {"rc_presence": {"per_user": {"per_second": 0.1, "burst_count": 1}}} + ) + def _test_ratelimit_offline_to_online_to_unavailable( + self, ratelimited: bool + ) -> None: + """Test rate limit for presence updates sent with sync requests. + + Args: + ratelimited: Test rate limited case. + """ + wheel_timer = Mock() + user_id = "@user:pass" + now = 5000000 + sync_url = "/sync?access_token=%s&set_presence=%s" + + # Register the user who syncs presence + user_id = self.register_user("user", "pass") + access_token = self.login("user", "pass") + + # Get the handler (which kicks off a bunch of timers). + presence_handler = self.hs.get_presence_handler() + + # Ensure the user is initially offline. + prev_state = UserPresenceState.default(user_id) + new_state = prev_state.copy_and_replace( + state=PresenceState.OFFLINE, last_active_ts=now + ) + + state, persist_and_notify, federation_ping = handle_update( + prev_state, + new_state, + is_mine=True, + wheel_timer=wheel_timer, + now=now, + persist=False, + ) + + # Check that the user is offline. + state = self.get_success( + presence_handler.get_state(UserID.from_string(user_id)) + ) + self.assertEqual(state.state, PresenceState.OFFLINE) + + # Send sync request with set_presence=online. + channel = self.make_request("GET", sync_url % (access_token, "online")) + self.assertEqual(200, channel.code) + + # Assert the user is now online. + state = self.get_success( + presence_handler.get_state(UserID.from_string(user_id)) + ) + self.assertEqual(state.state, PresenceState.ONLINE) + + if not ratelimited: + # Advance time a sufficient amount to avoid rate limiting. + self.reactor.advance(30) + + # Send another sync request with set_presence=unavailable. + channel = self.make_request("GET", sync_url % (access_token, "unavailable")) + self.assertEqual(200, channel.code) + + state = self.get_success( + presence_handler.get_state(UserID.from_string(user_id)) + ) + + if ratelimited: + # Assert the user is still online and presence update was ignored. + self.assertEqual(state.state, PresenceState.ONLINE) + else: + # Assert the user is now unavailable. + self.assertEqual(state.state, PresenceState.UNAVAILABLE) + class PresenceTimeoutTestCase(unittest.TestCase): """Tests different timers and that the timer does not change `status_msg` of user.""" diff --git a/tests/rest/client/test_presence.py b/tests/rest/client/test_presence.py index 5ced8319e19..6b9c70974a7 100644 --- a/tests/rest/client/test_presence.py +++ b/tests/rest/client/test_presence.py @@ -29,6 +29,7 @@ from synapse.util import Clock from tests import unittest +from tests.unittest import override_config class PresenceTestCase(unittest.HomeserverTestCase): @@ -95,3 +96,54 @@ def test_put_presence_untracked(self) -> None: self.assertEqual(channel.code, HTTPStatus.OK) self.assertEqual(self.presence_handler.set_state.call_count, 0) + + @override_config( + {"rc_presence": {"per_user": {"per_second": 0.1, "burst_count": 1}}} + ) + def test_put_presence_over_ratelimit(self) -> None: + """ + Multiple PUTs to the status endpoint without sufficient delay will be rate limited. + """ + self.hs.config.server.presence_enabled = True + + body = {"presence": "here", "status_msg": "beep boop"} + channel = self.make_request( + "PUT", "/presence/%s/status" % (self.user_id,), body + ) + + self.assertEqual(channel.code, HTTPStatus.OK) + + body = {"presence": "here", "status_msg": "beep boop"} + channel = self.make_request( + "PUT", "/presence/%s/status" % (self.user_id,), body + ) + + self.assertEqual(channel.code, HTTPStatus.TOO_MANY_REQUESTS) + self.assertEqual(self.presence_handler.set_state.call_count, 1) + + @override_config( + {"rc_presence": {"per_user": {"per_second": 0.1, "burst_count": 1}}} + ) + def test_put_presence_within_ratelimit(self) -> None: + """ + Multiple PUTs to the status endpoint with sufficient delay should all call set_state. + """ + self.hs.config.server.presence_enabled = True + + body = {"presence": "here", "status_msg": "beep boop"} + channel = self.make_request( + "PUT", "/presence/%s/status" % (self.user_id,), body + ) + + self.assertEqual(channel.code, HTTPStatus.OK) + + # Advance time a sufficient amount to avoid rate limiting. + self.reactor.advance(30) + + body = {"presence": "here", "status_msg": "beep boop"} + channel = self.make_request( + "PUT", "/presence/%s/status" % (self.user_id,), body + ) + + self.assertEqual(channel.code, HTTPStatus.OK) + self.assertEqual(self.presence_handler.set_state.call_count, 2) diff --git a/tests/utils.py b/tests/utils.py index 9fd26ef348e..3f4a7bb5600 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -200,6 +200,7 @@ def default_config( "per_user": {"per_second": 10000, "burst_count": 10000}, }, "rc_3pid_validation": {"per_second": 10000, "burst_count": 10000}, + "rc_presence": {"per_user": {"per_second": 10000, "burst_count": 10000}}, "saml2_enabled": False, "public_baseurl": None, "default_identity_server": None,