diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 0185d6e54eaf..e2b678bafdd3 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -199,6 +199,11 @@ CephFS: Disallow delegating preallocated inode ranges to clients. Config default json format produces a rather massive output in large clusters and isn't scalable. So we have removed the 'network_ping_times' section from the output. Details in the tracker: https://tracker.ceph.com/issues/57460 +* mgr/REST: The REST manager module will trim requests based on the 'max_requests' option. + Without this feature, and in the absence of manual deletion of old requests, + the accumulation of requests in the array can lead to Out Of Memory (OOM) issues, + resulting in the Manager crashing. + * CephFS: The `subvolume snapshot clone` command now depends on the config option `snapshot_clone_no_wait` which is used to reject the clone operation when all the cloner threads are busy. This config option is enabled by default which means diff --git a/doc/mgr/restful.rst b/doc/mgr/restful.rst index d684399fcd83..c36f2cd0dc5f 100644 --- a/doc/mgr/restful.rst +++ b/doc/mgr/restful.rst @@ -77,6 +77,19 @@ If the port is not configured, *restful* will bind to port ``8003``. If the address it not configured, the *restful* will bind to ``::``, which corresponds to all available IPv4 and IPv6 addresses. +Configuring max_request +--------------------------- + +The maximum request size can be configured via a central configuration +option:: + + ceph config set mgr mgr/restful/$name/max_requests $NUM + +where ``$name`` is the ID of the ceph-mgr daemon (usually the hostname). + +.. mgr_module:: restful +.. confval:: max_requests + .. _creating-an-api-user: Creating an API User diff --git a/src/pybind/mgr/restful/module.py b/src/pybind/mgr/restful/module.py index cb8391ecd08d..ad76473afd06 100644 --- a/src/pybind/mgr/restful/module.py +++ b/src/pybind/mgr/restful/module.py @@ -12,6 +12,7 @@ import traceback import socket import fcntl +from typing import cast from . import common from . import context @@ -23,7 +24,7 @@ from werkzeug.serving import make_server, make_ssl_devcert from .hooks import ErrorHook -from mgr_module import MgrModule, CommandResult, NotifyType +from mgr_module import MgrModule, CommandResult, NotifyType, Option from mgr_util import build_url @@ -193,10 +194,18 @@ def __json__(self): class Module(MgrModule): MODULE_OPTIONS = [ - {'name': 'server_addr'}, - {'name': 'server_port'}, - {'name': 'key_file'}, - {'name': 'enable_auth', 'type': 'bool', 'default': True}, + Option(name='server_addr'), + Option(name='server_port'), + Option(name='key_file'), + Option(name='enable_auth', + type='bool', + default=True), + Option(name='max_requests', + type='int', + default=500, + desc='Maximum number of requests to keep in memory. ' + ' When new request comes in, the oldest request will be removed if the number of requests exceeds the max request number.' + 'if un-finished request is removed, error message will be logged in the ceph-mgr log.'), ] COMMANDS = [ @@ -243,6 +252,7 @@ def __init__(self, *args, **kwargs): self.stop_server = False self.serve_event = threading.Event() + self.max_requests = cast(int, self.get_localized_module_option('max_requests', 500)) def serve(self): @@ -599,6 +609,16 @@ def submit_request(self, _request, **kwargs): with self.requests_lock: request = CommandsRequest(_request) self.requests.append(request) + if len(self.requests) > self.max_requests: + req_to_trim = 0 + for i, req in enumerate(self.requests): + if req.is_finished(): + self.log.error("Trimmed one finished request due to exceeded maximum requests limit") + req_to_trim = i + break + else: + self.log.error("Trimmed the oldest unfinished request due to exceeded maximum requests limit") + self.requests.pop(req_to_trim) if kwargs.get('wait', 0): while not request.is_finished(): time.sleep(0.001)