diff --git a/PendingReleaseNotes b/PendingReleaseNotes
index 0185d6e54eaf..e2b678bafdd3 100644
--- a/PendingReleaseNotes
+++ b/PendingReleaseNotes
@@ -199,6 +199,11 @@ CephFS: Disallow delegating preallocated inode ranges to clients. Config
   default json format produces a rather massive output in large clusters and
   isn't scalable. So we have removed the 'network_ping_times' section from
   the output. Details in the tracker: https://tracker.ceph.com/issues/57460
+* mgr/REST: The REST manager module will trim requests based on the 'max_requests' option.
+  Without this feature, and in the absence of manual deletion of old requests,
+  the accumulation of requests in the array can lead to Out Of Memory (OOM) issues, 
+  resulting in the Manager crashing.
+
 * CephFS: The `subvolume snapshot clone` command now depends on the config option
   `snapshot_clone_no_wait` which is used to reject the clone operation when
   all the cloner threads are busy. This config option is enabled by default which means 
diff --git a/doc/mgr/restful.rst b/doc/mgr/restful.rst
index d684399fcd83..c36f2cd0dc5f 100644
--- a/doc/mgr/restful.rst
+++ b/doc/mgr/restful.rst
@@ -77,6 +77,19 @@ If the port is not configured, *restful* will bind to port ``8003``.
 If the address it not configured, the *restful* will bind to ``::``,
 which corresponds to all available IPv4 and IPv6 addresses.
 
+Configuring max_request
+---------------------------
+
+The maximum request size can be configured via a central configuration
+option::
+
+  ceph config set mgr mgr/restful/$name/max_requests $NUM
+
+where ``$name`` is the ID of the ceph-mgr daemon (usually the hostname).
+
+.. mgr_module:: restful
+.. confval:: max_requests
+
 .. _creating-an-api-user:
 
 Creating an API User
diff --git a/src/pybind/mgr/restful/module.py b/src/pybind/mgr/restful/module.py
index cb8391ecd08d..ad76473afd06 100644
--- a/src/pybind/mgr/restful/module.py
+++ b/src/pybind/mgr/restful/module.py
@@ -12,6 +12,7 @@
 import traceback
 import socket
 import fcntl
+from typing import cast
 
 from . import common
 from . import context
@@ -23,7 +24,7 @@
 from werkzeug.serving import make_server, make_ssl_devcert
 
 from .hooks import ErrorHook
-from mgr_module import MgrModule, CommandResult, NotifyType
+from mgr_module import MgrModule, CommandResult, NotifyType, Option
 from mgr_util import build_url
 
 
@@ -193,10 +194,18 @@ def __json__(self):
 
 class Module(MgrModule):
     MODULE_OPTIONS = [
-        {'name': 'server_addr'},
-        {'name': 'server_port'},
-        {'name': 'key_file'},
-        {'name': 'enable_auth', 'type': 'bool', 'default': True},
+        Option(name='server_addr'),
+        Option(name='server_port'),
+        Option(name='key_file'),
+        Option(name='enable_auth',
+               type='bool',
+               default=True),
+        Option(name='max_requests',
+               type='int',
+               default=500,
+               desc='Maximum number of requests to keep in memory. '
+                    ' When new request comes in, the oldest request will be removed if the number of requests exceeds the max request number.'
+                    'if un-finished request is removed, error message will be logged in the ceph-mgr log.'),
     ]
 
     COMMANDS = [
@@ -243,6 +252,7 @@ def __init__(self, *args, **kwargs):
 
         self.stop_server = False
         self.serve_event = threading.Event()
+        self.max_requests = cast(int, self.get_localized_module_option('max_requests', 500))
 
 
     def serve(self):
@@ -599,6 +609,16 @@ def submit_request(self, _request, **kwargs):
         with self.requests_lock:
             request = CommandsRequest(_request)
             self.requests.append(request)
+            if len(self.requests) > self.max_requests:
+                req_to_trim = 0
+                for i, req in enumerate(self.requests):
+                    if req.is_finished():
+                        self.log.error("Trimmed one finished request due to exceeded maximum requests limit")
+                        req_to_trim = i
+                        break
+                    else:
+                        self.log.error("Trimmed the oldest unfinished request due to exceeded maximum requests limit")
+                self.requests.pop(req_to_trim)
         if kwargs.get('wait', 0):
             while not request.is_finished():
                 time.sleep(0.001)