diff --git a/docs/examples/cassava/model/requirements.txt b/docs/examples/cassava/model/requirements.txt
index 90ee3a919..c7015344e 100644
--- a/docs/examples/cassava/model/requirements.txt
+++ b/docs/examples/cassava/model/requirements.txt
@@ -1,2 +1,2 @@
-tensorflow==2.12.0
+tensorflow==2.12.1
 tensorflow-hub==0.13.0
diff --git a/docs/examples/cassava/requirements.txt b/docs/examples/cassava/requirements.txt
index ea98fb42b..a27eb2ede 100644
--- a/docs/examples/cassava/requirements.txt
+++ b/docs/examples/cassava/requirements.txt
@@ -1,3 +1,3 @@
 mlserver==1.3.2
-tensorflow==2.12.0
+tensorflow==2.12.1
 tensorflow-hub==0.13.0
diff --git a/docs/examples/streaming/README.ipynb b/docs/examples/streaming/README.ipynb
index 025246237..13755c6d9 100644
--- a/docs/examples/streaming/README.ipynb
+++ b/docs/examples/streaming/README.ipynb
@@ -42,7 +42,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
@@ -121,7 +121,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -138,8 +138,7 @@
     "{\n",
     "  \"debug\": false,\n",
     "  \"parallel_workers\": 0,\n",
-    "  \"gzip_enabled\": false,\n",
-    "  \"metrics_endpoint\": null\n",
+    "  \"gzip_enabled\": false\n",
     "}\n"
    ]
   },
@@ -150,8 +149,7 @@
     "Note the currently there are three main limitations of the streaming support in MLServer:\n",
     "\n",
     "- distributed workers are not supported (i.e., the `parallel_workers` setting should be set to `0`)\n",
-    "- `gzip` middleware is not supported for REST (i.e., `gzip_enabled` setting should be set to `false`)\n",
-    "- metrics endpoint is not available (i.e. `metrics_endpoint` is also disabled for streaming for gRPC)"
+    "- `gzip` middleware is not supported for REST (i.e., `gzip_enabled` setting should be set to `false`)"
    ]
   },
   {
@@ -163,7 +161,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -227,14 +225,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Writing generate-request.json\n"
+      "Overwriting generate-request.json\n"
      ]
     }
    ],
@@ -272,9 +270,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['What']\n",
+      "[' is']\n",
+      "[' the']\n",
+      "[' capital']\n",
+      "[' of']\n",
+      "[' France?']\n"
+     ]
+    }
+   ],
    "source": [
     "import httpx\n",
     "from httpx_sse import connect_sse\n",
@@ -301,9 +312,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['What']\n",
+      "[' is']\n",
+      "[' the']\n",
+      "[' capital']\n",
+      "[' of']\n",
+      "[' France?']\n"
+     ]
+    }
+   ],
    "source": [
     "import grpc\n",
     "import mlserver.types as types\n",
@@ -315,7 +339,7 @@
     "inference_request = types.InferenceRequest.parse_file(\"./generate-request.json\")\n",
     "\n",
     "# need to convert from string to bytes for grpc\n",
-    "inference_request.inputs[0] = StringCodec.encode_input(\"prompt\", inference_request.inputs[0].data.__root__)\n",
+    "inference_request.inputs[0] = StringCodec.encode_input(\"prompt\", inference_request.inputs[0].data.root)\n",
     "inference_request_g = converters.ModelInferRequestConverter.from_types(\n",
     "    inference_request, model_name=\"text-model\", model_version=None\n",
     ")\n",
@@ -338,11 +362,6 @@
    "source": [
     "Note that for gRPC, the request is transformed into an async generator which is then passed to the `ModelStreamInfer` method. The response is also an async generator which can be iterated over to get the response."
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
   }
  ],
  "metadata": {
@@ -361,7 +380,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.14"
+   "version": "3.10.12"
   }
  },
  "nbformat": 4,
diff --git a/docs/examples/streaming/README.md b/docs/examples/streaming/README.md
index 7acdf2090..d91aa7492 100644
--- a/docs/examples/streaming/README.md
+++ b/docs/examples/streaming/README.md
@@ -78,8 +78,7 @@ The next step will be to create 2 configuration files:
 {
   "debug": false,
   "parallel_workers": 0,
-  "gzip_enabled": false,
-  "metrics_endpoint": null
+  "gzip_enabled": false
 }
 
 ```
@@ -88,7 +87,6 @@ Note the currently there are three main limitations of the streaming support in
 
 - distributed workers are not supported (i.e., the `parallel_workers` setting should be set to `0`)
 - `gzip` middleware is not supported for REST (i.e., `gzip_enabled` setting should be set to `false`)
-- metrics endpoint is not available (i.e. `metrics_endpoint` is also disabled for streaming for gRPC)
 
 #### model-settings.json
 
@@ -195,7 +193,7 @@ import mlserver.grpc.dataplane_pb2_grpc as dataplane
 inference_request = types.InferenceRequest.parse_file("./generate-request.json")
 
 # need to convert from string to bytes for grpc
-inference_request.inputs[0] = StringCodec.encode_input("prompt", inference_request.inputs[0].data.__root__)
+inference_request.inputs[0] = StringCodec.encode_input("prompt", inference_request.inputs[0].data.root)
 inference_request_g = converters.ModelInferRequestConverter.from_types(
     inference_request, model_name="text-model", model_version=None
 )
@@ -213,5 +211,3 @@ async with grpc.aio.insecure_channel("localhost:8081") as grpc_channel:
 ```
 
 Note that for gRPC, the request is transformed into an async generator which is then passed to the `ModelStreamInfer` method. The response is also an async generator which can be iterated over to get the response.
-
-
diff --git a/docs/examples/streaming/settings.json b/docs/examples/streaming/settings.json
index ec853b3ba..3a95c2882 100644
--- a/docs/examples/streaming/settings.json
+++ b/docs/examples/streaming/settings.json
@@ -2,6 +2,5 @@
 {
   "debug": false,
   "parallel_workers": 0,
-  "gzip_enabled": false,
-  "metrics_endpoint": null
+  "gzip_enabled": false
 }
diff --git a/docs/examples/streaming/text_model.py b/docs/examples/streaming/text_model.py
index 4475b3c92..d851f3bb9 100644
--- a/docs/examples/streaming/text_model.py
+++ b/docs/examples/streaming/text_model.py
@@ -7,19 +7,6 @@
 
 class TextModel(MLModel):
 
-    async def predict(self, payload: InferenceRequest) -> InferenceResponse:
-        text = StringCodec.decode_input(payload.inputs[0])[0]
-        return InferenceResponse(
-            model_name=self._settings.name,
-            outputs=[
-                StringCodec.encode_output(
-                    name="output",
-                    payload=[text],
-                    use_bytes=True,
-                ),
-            ],
-        )
-
     async def predict_stream(
         self, payloads: AsyncIterator[InferenceRequest]
     ) -> AsyncIterator[InferenceResponse]:
diff --git a/docs/user-guide/streaming.md b/docs/user-guide/streaming.md
index 41dec0b03..a576e6a3e 100644
--- a/docs/user-guide/streaming.md
+++ b/docs/user-guide/streaming.md
@@ -32,4 +32,3 @@ There are three main limitations of the streaming support in MLServer:
 
 - the `parallel_workers` setting should be set to `0` to disable distributed workers (to be addressed in future releases)
 - for REST, the `gzip_enabled` setting should be set to `false` to disable GZIP compression, as streaming is not compatible with GZIP compression (see issue [here]( https://github.com/encode/starlette/issues/20#issuecomment-704106436))
-- `metrics_endpoint` is also disabled for streaming for gRPC (to be addressed in future releases)
\ No newline at end of file
diff --git a/mlserver/grpc/interceptors.py b/mlserver/grpc/interceptors.py
index c1d6e2df8..99de6d500 100644
--- a/mlserver/grpc/interceptors.py
+++ b/mlserver/grpc/interceptors.py
@@ -1,9 +1,12 @@
-from typing import Awaitable, Callable, Tuple
+from typing import Awaitable, AsyncIterator, Callable, Tuple, Optional
 from functools import partial
 from timeit import default_timer
 
+from mlserver.grpc import dataplane_pb2 as pb
 from grpc.aio import ServerInterceptor, ServicerContext
 from grpc import HandlerCallDetails, RpcMethodHandler, RpcError, StatusCode
+
+from prometheus_client import Counter
 from py_grpc_prometheus.prometheus_server_interceptor import (
     grpc_utils,
     PromServerInterceptor as _PromServerInterceptor,
@@ -50,43 +53,10 @@ async def intercept_service(
         metrics_wrapper = partial(self._metrics_wrapper, method_call)
         return self._interceptor._wrap_rpc_behavior(handler, metrics_wrapper)
 
-    def _compute_status_code(self, servicer_context: ServicerContext) -> StatusCode:
-        """
-        This method is mostly copied from `py-grpc-prometheus`, with a couple
-        minor changes to avoid using private APIs from ServicerContext which
-        don't exist anymore in `grpc.aio`.
-        To see the original implementation, please check:
-
-        https://github.com/lchenn/py-grpc-prometheus/blob/eb9dee1f0a4e57cef220193ee48021dc9a9f3d82/py_grpc_prometheus/prometheus_server_interceptor.py#L127-L134
-        """
-        # Backwards compatibility for non-aio.
-        # TODO: It's not clear yet how to check whether the context has been
-        # cancelled with aio.
-        if hasattr(servicer_context, "_state"):
-            if servicer_context._state.client == "cancelled":
-                return StatusCode.CANCELLED
-
-        if not hasattr(servicer_context, "code"):
-            return StatusCode.OK
-
-        code = servicer_context.code()
-        if code is None:
-            return StatusCode.OK
-
-        # NOTE: With gRPC AIO, the `code` can be a plain integer that needs to
-        # be converted to an actual `StatusCode` entry
-        if isinstance(code, int):
-            if code not in self._status_codes:
-                return StatusCode.UNKNOWN
-
-            return self._status_codes[code]
-
-        return code
-
     def _metrics_wrapper(
         self,
         method_call: Tuple[str, str, str],
-        old_handler: RpcMethodHandler,
+        behavior: RpcMethodHandler,
         request_streaming: bool,
         response_streaming: bool,
     ):
@@ -99,60 +69,111 @@ def _metrics_wrapper(
         """
         grpc_service_name, grpc_method_name, _ = method_call
 
-        async def _new_handler(request_or_iterator, servicer_context: ServicerContext):
-            response_or_iterator = None
+        async def new_behavior(
+            request: pb.ModelMetadataRequest, servicer_context: ServicerContext
+        ) -> Optional[pb.ModelMetadataRequest]:
+            response = None
             try:
                 start = default_timer()
                 grpc_type = grpc_utils.get_method_type(
                     request_streaming, response_streaming
                 )
+
                 try:
-                    if request_streaming:
-                        request_or_iterator = grpc_utils.wrap_iterator_inc_counter(
-                            request_or_iterator,
-                            self._interceptor._metrics[
-                                "grpc_server_stream_msg_received"
-                            ],
-                            grpc_type,
-                            grpc_service_name,
-                            grpc_method_name,
+                    self._interceptor._metrics["grpc_server_started_counter"].labels(
+                        grpc_type=grpc_type,
+                        grpc_service=grpc_service_name,
+                        grpc_method=grpc_method_name,
+                    ).inc()
+
+                    # Invoke the original rpc behavior.
+                    # NOTE: This is the main change required with respect to
+                    # the original implementation in `py-grpc-prometheus`.
+                    response = await behavior(request, servicer_context)
+                    self._interceptor.increase_grpc_server_handled_total_counter(
+                        grpc_type,
+                        grpc_service_name,
+                        grpc_method_name,
+                        self._compute_status_code(servicer_context).name,
+                    )
+                    return response
+
+                except RpcError as e:
+                    self._interceptor.increase_grpc_server_handled_total_counter(
+                        grpc_type,
+                        grpc_service_name,
+                        grpc_method_name,
+                        self._interceptor._compute_error_code(e).name,
+                    )
+                    raise e
+
+                finally:
+                    if self._interceptor._legacy:
+                        self._interceptor._metrics[
+                            "legacy_grpc_server_handled_latency_seconds"
+                        ].labels(
+                            grpc_type=grpc_type,
+                            grpc_service=grpc_service_name,
+                            grpc_method=grpc_method_name,
+                        ).observe(
+                            max(default_timer() - start, 0)
                         )
-                    else:
+                    elif self._interceptor._enable_handling_time_histogram:
                         self._interceptor._metrics[
-                            "grpc_server_started_counter"
+                            "grpc_server_handled_histogram"
                         ].labels(
                             grpc_type=grpc_type,
                             grpc_service=grpc_service_name,
                             grpc_method=grpc_method_name,
-                        ).inc()
+                        ).observe(
+                            max(default_timer() - start, 0)
+                        )
+            except Exception as e:  # pylint: disable=broad-except
+                # Allow user to skip the exceptions in order to maintain
+                # the basic functionality in the server
+                # The logging function in exception can be toggled with log_exceptions
+                # in order to suppress the noise in logging
+                if self._interceptor._skip_exceptions:
+                    if self._interceptor._log_exceptions:
+                        logger.error(e)
 
-                    # Invoke the original rpc behavior.
-                    # NOTE: This is the main change required with respect to
-                    # the original implementation in `py-grpc-prometheus`.
-                    response_or_iterator = await old_handler(
-                        request_or_iterator, servicer_context
+                    if response is None:
+                        return response
+
+                    return await behavior(request, servicer_context)
+                raise e
+
+        async def new_behavior_stream(
+            request_async_iterator: AsyncIterator[pb.ModelInferRequest],
+            servicer_context: ServicerContext,
+        ) -> AsyncIterator[pb.ModelInferRequest]:
+            response_async_iterator = None
+            try:
+                grpc_type = grpc_utils.get_method_type(
+                    request_streaming, response_streaming
+                )
+                try:
+                    request_async_iterator = wrap_async_iterator_inc_counter(
+                        request_async_iterator,
+                        self._interceptor._metrics["grpc_server_stream_msg_received"],
+                        grpc_type,
+                        grpc_service_name,
+                        grpc_method_name,
                     )
 
-                    if response_streaming:
-                        sent_metric = self._interceptor._metrics[
-                            "grpc_server_stream_msg_sent"
-                        ]
-                        response_or_iterator = grpc_utils.wrap_iterator_inc_counter(
-                            response_or_iterator,
-                            sent_metric,
-                            grpc_type,
-                            grpc_service_name,
-                            grpc_method_name,
-                        )
+                    # wrap the original behavior with the metrics
+                    response_async_iterator = wrap_async_iterator_inc_counter(
+                        behavior(request_async_iterator, servicer_context),
+                        self._interceptor._metrics["grpc_server_stream_msg_sent"],
+                        grpc_type,
+                        grpc_service_name,
+                        grpc_method_name,
+                    )
+
+                    # invoke the original rpc behavior
+                    async for item in response_async_iterator:
+                        yield item
 
-                    else:
-                        self._interceptor.increase_grpc_server_handled_total_counter(
-                            grpc_type,
-                            grpc_service_name,
-                            grpc_method_name,
-                            self._compute_status_code(servicer_context).name,
-                        )
-                    return response_or_iterator
                 except RpcError as e:
                     self._interceptor.increase_grpc_server_handled_total_counter(
                         grpc_type,
@@ -162,28 +183,6 @@ async def _new_handler(request_or_iterator, servicer_context: ServicerContext):
                     )
                     raise e
 
-                finally:
-                    if not response_streaming:
-                        if self._interceptor._legacy:
-                            self._interceptor._metrics[
-                                "legacy_grpc_server_handled_latency_seconds"
-                            ].labels(
-                                grpc_type=grpc_type,
-                                grpc_service=grpc_service_name,
-                                grpc_method=grpc_method_name,
-                            ).observe(
-                                max(default_timer() - start, 0)
-                            )
-                        elif self._interceptor._enable_handling_time_histogram:
-                            self._interceptor._metrics[
-                                "grpc_server_handled_histogram"
-                            ].labels(
-                                grpc_type=grpc_type,
-                                grpc_service=grpc_service_name,
-                                grpc_method=grpc_method_name,
-                            ).observe(
-                                max(default_timer() - start, 0)
-                            )
             except Exception as e:  # pylint: disable=broad-except
                 # Allow user to skip the exceptions in order to maintain
                 # the basic functionality in the server
@@ -192,9 +191,66 @@ async def _new_handler(request_or_iterator, servicer_context: ServicerContext):
                 if self._interceptor._skip_exceptions:
                     if self._interceptor._log_exceptions:
                         logger.error(e)
-                    if response_or_iterator is None:
-                        return response_or_iterator
-                    return old_handler(request_or_iterator, servicer_context)
+
+                    if response_async_iterator is not None:
+                        async for item in behavior(
+                            request_async_iterator, servicer_context
+                        ):
+                            yield item
                 raise e
 
-        return _new_handler
+        if request_streaming and response_streaming:
+            return new_behavior_stream
+
+        return new_behavior
+
+    def _compute_status_code(self, servicer_context: ServicerContext) -> StatusCode:
+        """
+        This method is mostly copied from `py-grpc-prometheus`, with a couple
+        minor changes to avoid using private APIs from ServicerContext which
+        don't exist anymore in `grpc.aio`.
+        To see the original implementation, please check:
+
+        https://github.com/lchenn/py-grpc-prometheus/blob/eb9dee1f0a4e57cef220193ee48021dc9a9f3d82/py_grpc_prometheus/prometheus_server_interceptor.py#L127-L134
+        """
+        # Backwards compatibility for non-aio.
+        # TODO: It's not clear yet how to check whether the context has been
+        # cancelled with aio.
+        if hasattr(servicer_context, "_state"):
+            if servicer_context._state.client == "cancelled":
+                return StatusCode.CANCELLED
+
+        if not hasattr(servicer_context, "code"):
+            return StatusCode.OK
+
+        code = servicer_context.code()
+        if code is None:
+            return StatusCode.OK
+
+        # NOTE: With gRPC AIO, the `code` can be a plain integer that needs to
+        # be converted to an actual `StatusCode` entry
+        if isinstance(code, int):
+            if code not in self._status_codes:
+                return StatusCode.UNKNOWN
+
+            return self._status_codes[code]
+
+        return code
+
+
+async def wrap_async_iterator_inc_counter(
+    iterator: AsyncIterator[pb.ModelInferRequest],
+    counter: Counter,
+    grpc_type: str,
+    grpc_service_name: str,
+    grpc_method_name: str,
+) -> AsyncIterator[pb.ModelInferRequest]:
+    """Wraps an async iterator and collect metrics."""
+
+    async for item in iterator:
+        counter.labels(
+            grpc_type=grpc_type,
+            grpc_service=grpc_service_name,
+            grpc_method=grpc_method_name,
+        ).inc()
+        yield item
diff --git a/mlserver/grpc/server.py b/mlserver/grpc/server.py
index cc5a84529..2d8fef3e0 100644
--- a/mlserver/grpc/server.py
+++ b/mlserver/grpc/server.py
@@ -38,14 +38,14 @@ def _create_server(self):
             self._model_repository_handlers
         )
 
-        interceptors = []
+        self._interceptors = []
 
         if self._settings.debug:
             # If debug, enable access logs
-            interceptors = [LoggingInterceptor()]
+            self._interceptors = [LoggingInterceptor()]
 
         if self._settings.metrics_endpoint:
-            interceptors.append(
+            self._interceptors.append(
                 PromServerInterceptor(enable_handling_time_histogram=True)
             )
 
@@ -62,7 +62,7 @@ def _create_server(self):
                 )
             )
 
-            interceptors.append(
+            self._interceptors.append(
                 aio_server_interceptor(
                     tracer_provider=tracer_provider, filter_=excluded_urls
                 )
@@ -70,7 +70,7 @@ def _create_server(self):
 
         self._server = aio.server(
             ThreadPoolExecutor(max_workers=DefaultGrpcWorkers),
-            interceptors=tuple(interceptors),
+            interceptors=tuple(self._interceptors),
             options=self._get_options(),
         )
 
diff --git a/poetry.lock b/poetry.lock
index 06adca960..e1803d22e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -750,13 +750,13 @@ widget = ["ipython", "ipywidgets (>=7.0,<9.0)", "traitlets"]
 
 [[package]]
 name = "certifi"
-version = "2024.6.2"
+version = "2024.7.4"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "certifi-2024.6.2-py3-none-any.whl", hash = "sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56"},
-    {file = "certifi-2024.6.2.tar.gz", hash = "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516"},
+    {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"},
+    {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"},
 ]
 
 [[package]]
@@ -2449,13 +2449,13 @@ socks = ["socksio (==1.*)"]
 
 [[package]]
 name = "httpx-sse"
-version = "0.3.1"
+version = "0.4.0"
 description = "Consume Server-Sent Event (SSE) messages with HTTPX."
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "httpx-sse-0.3.1.tar.gz", hash = "sha256:3bb3289b2867f50cbdb2fee3eeeefecb1e86653122e164faac0023f1ffc88aea"},
-    {file = "httpx_sse-0.3.1-py3-none-any.whl", hash = "sha256:7376dd88732892f9b6b549ac0ad05a8e2341172fe7dcf9f8f9c8050934297316"},
+    {file = "httpx-sse-0.4.0.tar.gz", hash = "sha256:1e81a3a3070ce322add1d3529ed42eb5f70817f45ed6ec915ab753f961139721"},
+    {file = "httpx_sse-0.4.0-py3-none-any.whl", hash = "sha256:f329af6eae57eaa2bdfd962b42524764af68075ea87370a2de920af5341e318f"},
 ]
 
 [[package]]
@@ -2887,6 +2887,7 @@ description = "Clang Python Bindings, mirrored from the official LLVM repo: http
 optional = false
 python-versions = "*"
 files = [
+    {file = "libclang-18.1.1-1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:0b2e143f0fac830156feb56f9231ff8338c20aecfe72b4ffe96f19e5a1dbb69a"},
     {file = "libclang-18.1.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:6f14c3f194704e5d09769108f03185fce7acaf1d1ae4bbb2f30a72c2400cb7c5"},
     {file = "libclang-18.1.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:83ce5045d101b669ac38e6da8e58765f12da2d3aafb3b9b98d88b286a60964d8"},
     {file = "libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl", hash = "sha256:c533091d8a3bbf7460a00cb6c1a71da93bffe148f172c7d03b1c31fbf8aa2a0b"},
@@ -4214,6 +4215,7 @@ description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
 files = [
+    {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_aarch64.whl", hash = "sha256:004186d5ea6a57758fd6d57052a123c73a4815adf365eb8dd6a85c9eaa7535ff"},
     {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d9714f27c1d0f0895cd8915c07a87a1d0029a0aa36acaf9156952ec2a8a12189"},
     {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-win_amd64.whl", hash = "sha256:c3401dc8543b52d3a8158007a0c1ab4e9c768fcbd24153a48c86972102197ddd"},
 ]
@@ -5327,22 +5329,22 @@ diagrams = ["jinja2", "railroad-diagrams"]
 
 [[package]]
 name = "pyproject-api"
-version = "1.6.1"
+version = "1.7.1"
 description = "API to interact with the python pyproject.toml based projects"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pyproject_api-1.6.1-py3-none-any.whl", hash = "sha256:4c0116d60476b0786c88692cf4e325a9814965e2469c5998b830bba16b183675"},
-    {file = "pyproject_api-1.6.1.tar.gz", hash = "sha256:1817dc018adc0d1ff9ca1ed8c60e1623d5aaca40814b953af14a9cf9a5cae538"},
+    {file = "pyproject_api-1.7.1-py3-none-any.whl", hash = "sha256:2dc1654062c2b27733d8fd4cdda672b22fe8741ef1dde8e3a998a9547b071eeb"},
+    {file = "pyproject_api-1.7.1.tar.gz", hash = "sha256:7ebc6cd10710f89f4cf2a2731710a98abce37ebff19427116ff2174c9236a827"},
 ]
 
 [package.dependencies]
-packaging = ">=23.1"
+packaging = ">=24.1"
 tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""}
 
 [package.extras]
-docs = ["furo (>=2023.8.19)", "sphinx (<7.2)", "sphinx-autodoc-typehints (>=1.24)"]
-testing = ["covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "setuptools (>=68.1.2)", "wheel (>=0.41.2)"]
+docs = ["furo (>=2024.5.6)", "sphinx-autodoc-typehints (>=2.2.1)"]
+testing = ["covdefaults (>=2.3)", "pytest (>=8.2.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "setuptools (>=70.1)"]
 
 [[package]]
 name = "pyreadline3"
@@ -5444,18 +5446,18 @@ dev = ["pre-commit", "pytest-asyncio", "tox"]
 
 [[package]]
 name = "pytest-xdist"
-version = "3.5.0"
+version = "3.6.1"
 description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "pytest-xdist-3.5.0.tar.gz", hash = "sha256:cbb36f3d67e0c478baa57fa4edc8843887e0f6cfc42d677530a36d7472b32d8a"},
-    {file = "pytest_xdist-3.5.0-py3-none-any.whl", hash = "sha256:d075629c7e00b611df89f490a5063944bee7a4362a5ff11c7cc7824a03dfce24"},
+    {file = "pytest_xdist-3.6.1-py3-none-any.whl", hash = "sha256:9ed4adfb68a016610848639bb7e02c9352d5d9f03d04809919e2dafc3be4cca7"},
+    {file = "pytest_xdist-3.6.1.tar.gz", hash = "sha256:ead156a4db231eec769737f57668ef58a2084a34b2e55c4a8fa20d861107300d"},
 ]
 
 [package.dependencies]
-execnet = ">=1.1"
-pytest = ">=6.2.0"
+execnet = ">=2.1"
+pytest = ">=7.0.0"
 
 [package.extras]
 psutil = ["psutil (>=3.0)"]
@@ -7575,30 +7577,30 @@ scipy = ["scipy"]
 
 [[package]]
 name = "tox"
-version = "4.14.2"
+version = "4.16.0"
 description = "tox is a generic virtualenv management and test command line tool"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "tox-4.14.2-py3-none-any.whl", hash = "sha256:2900c4eb7b716af4a928a7fdc2ed248ad6575294ed7cfae2ea41203937422847"},
-    {file = "tox-4.14.2.tar.gz", hash = "sha256:0defb44f6dafd911b61788325741cc6b2e12ea71f987ac025ad4d649f1f1a104"},
+    {file = "tox-4.16.0-py3-none-any.whl", hash = "sha256:61e101061b977b46cf00093d4319438055290ad0009f84497a07bf2d2d7a06d0"},
+    {file = "tox-4.16.0.tar.gz", hash = "sha256:43499656f9949edb681c0f907f86fbfee98677af9919d8b11ae5ad77cb800748"},
 ]
 
 [package.dependencies]
-cachetools = ">=5.3.2"
+cachetools = ">=5.3.3"
 chardet = ">=5.2"
 colorama = ">=0.4.6"
-filelock = ">=3.13.1"
-packaging = ">=23.2"
-platformdirs = ">=4.1"
-pluggy = ">=1.3"
-pyproject-api = ">=1.6.1"
+filelock = ">=3.15.4"
+packaging = ">=24.1"
+platformdirs = ">=4.2.2"
+pluggy = ">=1.5"
+pyproject-api = ">=1.7.1"
 tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""}
-virtualenv = ">=20.25"
+virtualenv = ">=20.26.3"
 
 [package.extras]
-docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-argparse-cli (>=1.11.1)", "sphinx-autodoc-typehints (>=1.25.2)", "sphinx-copybutton (>=0.5.2)", "sphinx-inline-tabs (>=2023.4.21)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.11)"]
-testing = ["build[virtualenv] (>=1.0.3)", "covdefaults (>=2.3)", "detect-test-pollution (>=1.2)", "devpi-process (>=1)", "diff-cover (>=8.0.2)", "distlib (>=0.3.8)", "flaky (>=3.7)", "hatch-vcs (>=0.4)", "hatchling (>=1.21)", "psutil (>=5.9.7)", "pytest (>=7.4.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-xdist (>=3.5)", "re-assert (>=1.1)", "time-machine (>=2.13)", "wheel (>=0.42)"]
+docs = ["furo (>=2024.5.6)", "sphinx (>=7.3.7)", "sphinx-argparse-cli (>=1.16)", "sphinx-autodoc-typehints (>=2.2.2)", "sphinx-copybutton (>=0.5.2)", "sphinx-inline-tabs (>=2023.4.21)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.11)"]
+testing = ["build[virtualenv] (>=1.2.1)", "covdefaults (>=2.3)", "detect-test-pollution (>=1.2)", "devpi-process (>=1)", "diff-cover (>=9.1)", "distlib (>=0.3.8)", "flaky (>=3.8.1)", "hatch-vcs (>=0.4)", "hatchling (>=1.25)", "psutil (>=6)", "pytest (>=8.2.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-xdist (>=3.6.1)", "re-assert (>=1.1)", "setuptools (>=70.2)", "time-machine (>=2.14.2)", "wheel (>=0.43)"]
 
 [[package]]
 name = "tqdm"
@@ -7915,13 +7917,13 @@ test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)"
 
 [[package]]
 name = "virtualenv"
-version = "20.26.2"
+version = "20.26.3"
 description = "Virtual Python Environment builder"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "virtualenv-20.26.2-py3-none-any.whl", hash = "sha256:a624db5e94f01ad993d476b9ee5346fdf7b9de43ccaee0e0197012dc838a0e9b"},
-    {file = "virtualenv-20.26.2.tar.gz", hash = "sha256:82bf0f4eebbb78d36ddaee0283d43fe5736b53880b8a8cdcd37390a07ac3741c"},
+    {file = "virtualenv-20.26.3-py3-none-any.whl", hash = "sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589"},
+    {file = "virtualenv-20.26.3.tar.gz", hash = "sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a"},
 ]
 
 [package.dependencies]
@@ -8614,4 +8616,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.12"
-content-hash = "af1d6381eeefdabbb722c775f109b3dac402ee827851be7ac2e24b630af55faf"
+content-hash = "5c6a31d9433f4aa90865d8533c8d2e3dd05267ee46fa6cc839a94725fb5bb3e9"
diff --git a/pyproject.toml b/pyproject.toml
index 6c185e9bb..86837df21 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -76,7 +76,7 @@ opentelemetry-instrumentation-grpc = ">=0.43b0"
 opentelemetry-exporter-otlp-proto-grpc = "^1.22.0"
 
 [tool.poetry.group.test.dependencies]
-tox = "4.14.2"
+tox = "4.16.0"
 
 [tool.poetry.group.dev.dependencies]
 datamodel-code-generator = "0.25.5"
@@ -86,7 +86,7 @@ pytest-asyncio = "0.21.1"
 pytest-mock = "3.12.0"
 pytest-cases = "3.8.5"
 pytest-lazy-fixture = "^0.6.3"
-tox = "4.14.2"
+tox = "4.16.0"
 docker = "7.1.0"
 aiohttp = "3.9.4"
 aiohttp-retry = "2.8.3"
@@ -106,9 +106,9 @@ types-aiofiles = "23.2.0.20240106"
 types-requests = "2.31.0.20240311"
 black = "24.4.0"
 pip-licenses = "4.4.0"
-pytest-xdist = "3.5.0"
+pytest-xdist = "3.6.1"
 filelock = "^3.13.1"
-httpx-sse = "^0.3.1"
+httpx-sse = ">=0.3.1,<0.5.0"
 
 [tool.poetry.group.docker.dependencies]
 protobuf = "3.20.3"
diff --git a/runtimes/huggingface/poetry.lock b/runtimes/huggingface/poetry.lock
index 7189ef66c..f6f501fa4 100644
--- a/runtimes/huggingface/poetry.lock
+++ b/runtimes/huggingface/poetry.lock
@@ -3498,12 +3498,12 @@ files = [
 
 [[package]]
 name = "tensorboard"
-version = "2.16.2"
+version = "2.17.0"
 description = "TensorBoard lets you watch Tensors Flow"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "tensorboard-2.16.2-py3-none-any.whl", hash = "sha256:9f2b4e7dad86667615c0e5cd072f1ea8403fc032a299f0072d6f74855775cc45"},
+    {file = "tensorboard-2.17.0-py3-none-any.whl", hash = "sha256:859a499a9b1fb68a058858964486627100b71fcb21646861c61d31846a6478fb"},
 ]
 
 [package.dependencies]
@@ -3511,7 +3511,7 @@ absl-py = ">=0.4"
 grpcio = ">=1.48.2"
 markdown = ">=2.6.8"
 numpy = ">=1.12.0"
-protobuf = ">=3.19.6,<4.24.0 || >4.24.0"
+protobuf = ">=3.19.6,<4.24.0 || >4.24.0,<5.0.0"
 setuptools = ">=41.0.0"
 six = ">1.9"
 tensorboard-data-server = ">=0.7.0,<0.8.0"
@@ -3531,44 +3531,40 @@ files = [
 
 [[package]]
 name = "tensorflow"
-version = "2.16.2"
+version = "2.17.0"
 description = "TensorFlow is an open source machine learning framework for everyone."
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "tensorflow-2.16.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:546dc68d0740fb4b75593a6bfa308da9526fe31f65c2181d48c8551c4a0ad02f"},
-    {file = "tensorflow-2.16.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:72c84f0e0f8ad0e7cb7b4b3fe9d1c899e6cbebc51c0e64df42a2a32a904aacd7"},
-    {file = "tensorflow-2.16.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a0aee52797cd58870e3bb9c2b4bc0fc2a57eae29a334282bcc08943ca582718"},
-    {file = "tensorflow-2.16.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ed24662a3625b2eaa89a02ea177aadad840d6eb91445091fe1f7ad5fa528db3"},
-    {file = "tensorflow-2.16.2-cp310-cp310-win_amd64.whl", hash = "sha256:e340de5abf4d7dc1d8a5782559aa41757f8a84aeb2d4c490c0fa538a7521fae6"},
-    {file = "tensorflow-2.16.2-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:ec06570d57bfa0e2be804405e3cdc2960e94887e7619ffb6bc053e9775b695aa"},
-    {file = "tensorflow-2.16.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:2c8a0e79395639b762e62002db99b2f6cc608f744312c9940899c1128f325331"},
-    {file = "tensorflow-2.16.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8728b12bc86941d90d0a927c40d4b21f8820964a80439a7c45f850eb37d57067"},
-    {file = "tensorflow-2.16.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d8798dea8e2281b4a0b569d9c00e7949c0090509be363da271e1ef21828bffae"},
-    {file = "tensorflow-2.16.2-cp311-cp311-win_amd64.whl", hash = "sha256:1da04e39834cdba509b4dd5ac5c71c3a1d1ffe6bc03e6970e65791b9a4071340"},
-    {file = "tensorflow-2.16.2-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:912b8cd1f88fd1ef32b8db54f0193ad0a3f057691324436ba82c5f74a63a17dd"},
-    {file = "tensorflow-2.16.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:917366179b596d0dae13e194a26965229b09fef946e4a5892a47fa9b4f7e4ba1"},
-    {file = "tensorflow-2.16.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7df529f8db271d3def80538aa7fcd6f5abe306f7b01cb5b580138df68afb499"},
-    {file = "tensorflow-2.16.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5badc6744672a3181c012b6ab2815975be34d0573db3b561383634acc0d46a55"},
-    {file = "tensorflow-2.16.2-cp312-cp312-win_amd64.whl", hash = "sha256:505df82fde3b9c6a2a78bf679efb4d0a2e84f4f925202130477ca519ae1514e4"},
-    {file = "tensorflow-2.16.2-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:2528a162e879b40d81db3568c08256718cec4a0356580badbd362cd8af02a41b"},
-    {file = "tensorflow-2.16.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:4c94106b73ecd044b7772e4338f8aa65a43ef2e290fe3fc27cc094138f50a341"},
-    {file = "tensorflow-2.16.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec5c57e6828b074ddb460aa69fbaa2cd502c6080a4e200e0163f2a2c9e20acfc"},
-    {file = "tensorflow-2.16.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b085fc4b296e0daf2e8a8b71bf433acba0ba30d6c30f3d07ad05f10477c7762c"},
-    {file = "tensorflow-2.16.2-cp39-cp39-win_amd64.whl", hash = "sha256:5d5951e91435909d6023f8c5afcfde9cee946a65ed03020fc8b87e627c04c6d1"},
+    {file = "tensorflow-2.17.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:515fe5ae8a9bc50312575412b08515f3ca66514c155078e0707bdffbea75d783"},
+    {file = "tensorflow-2.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b36683ac28af20abc3a548c72bf4537b00df1b1f3dd39d59df3873fefaf26f15"},
+    {file = "tensorflow-2.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:147c93ded4cb7e500a65d3c26d74744ff41660db7a8afe2b00d1d08bf329b4ec"},
+    {file = "tensorflow-2.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:e46090587f69e33637d17d7c3d94a790cac7d4bc5ff5ecbf3e71fdc6982fe96e"},
+    {file = "tensorflow-2.17.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:e8d26d6c24ccfb139db1306599257ca8f5cfe254ef2d023bfb667f374a17a64d"},
+    {file = "tensorflow-2.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca82f98ea38fa6c9e08ccc69eb6c2fab5b35b30a8999115b8b63b6f02fc69d9d"},
+    {file = "tensorflow-2.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8339777b1b5ebd8ffadaa8196f786e65fbb081a371d8e87b52f24563392d8552"},
+    {file = "tensorflow-2.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:ef615c133cf4d592a073feda634ccbeb521a554be57de74f8c318d38febbeab5"},
+    {file = "tensorflow-2.17.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ee18b4fcd627c5e872eabb25092af6c808b6ec77948662c88fc5c89a60eb0211"},
+    {file = "tensorflow-2.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72adfef0ee39dd641627906fd7b244fcf21bdd8a87216a998ed74d9c74653aff"},
+    {file = "tensorflow-2.17.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ad7bfea6afb4ded3928ca5b24df9fda876cea4904c103a5163fcc0c3483e7a4"},
+    {file = "tensorflow-2.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:278bc80642d799adf08dc4e04f291aab603bba7457d50c1f9bc191ebbca83f43"},
+    {file = "tensorflow-2.17.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:97f89e95d68b4b46e1072243b9f315c3b340e27cc07b1e1988e2ca97ad844305"},
+    {file = "tensorflow-2.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dde37cff74ed22b8fa2eea944805b001ae38e96adc989666422bdea34f4e2d47"},
+    {file = "tensorflow-2.17.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ae8e6746deb2ec807b902ba26d62fcffb6a6b53555a1a5906ec00416c5e4175"},
+    {file = "tensorflow-2.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:8f80d11ad3766570deb6ff47d2bed2d166f51399ca08205e38ef024345571d6f"},
 ]
 
 [package.dependencies]
 absl-py = ">=1.0.0"
 astunparse = ">=1.6.0"
-flatbuffers = ">=23.5.26"
+flatbuffers = ">=24.3.25"
 gast = ">=0.2.1,<0.5.0 || >0.5.0,<0.5.1 || >0.5.1,<0.5.2 || >0.5.2"
 google-pasta = ">=0.1.1"
 grpcio = ">=1.24.3,<2.0"
 h5py = ">=3.10.0"
-keras = ">=3.0.0"
+keras = ">=3.2.0"
 libclang = ">=13.0.0"
-ml-dtypes = ">=0.3.1,<0.4.0"
+ml-dtypes = ">=0.3.1,<0.5.0"
 numpy = {version = ">=1.23.5,<2.0.0", markers = "python_version <= \"3.11\""}
 opt-einsum = ">=2.3.2"
 packaging = "*"
@@ -3576,7 +3572,7 @@ protobuf = ">=3.20.3,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4
 requests = ">=2.21.0,<3"
 setuptools = "*"
 six = ">=1.12.0"
-tensorboard = ">=2.16,<2.17"
+tensorboard = ">=2.17,<2.18"
 tensorflow-io-gcs-filesystem = {version = ">=0.23.1", markers = "python_version < \"3.12\""}
 termcolor = ">=1.1.0"
 typing-extensions = ">=3.6.6"
@@ -4540,4 +4536,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.12"
-content-hash = "8df10acd6677b595d7338a9d52feeeb57fbd6476b1fbf5bf5116e5eef70306e9"
+content-hash = "387768fda1c449abaa4b1e0bc334f856ee9ba22196cb54499f1c3f8da22b3fba"
diff --git a/runtimes/huggingface/pyproject.toml b/runtimes/huggingface/pyproject.toml
index 914bda6b4..5394fdfb4 100644
--- a/runtimes/huggingface/pyproject.toml
+++ b/runtimes/huggingface/pyproject.toml
@@ -24,7 +24,7 @@ torch = [
     {markers = "sys_platform != 'darwin' or platform_machine != 'x86_64'", version = "<2.4.0"}
 ]
 transformers = ">=4.30,<5.0"
-tensorflow = ">=2.12,<2.17"
+tensorflow = ">=2.12,<2.18"
 
 [build-system]
 requires = ["poetry-core"]
diff --git a/runtimes/lightgbm/poetry.lock b/runtimes/lightgbm/poetry.lock
index 41b0ff2ba..cadf0acf4 100644
--- a/runtimes/lightgbm/poetry.lock
+++ b/runtimes/lightgbm/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "aiofiles"
@@ -946,17 +946,17 @@ testing = ["jaraco.test (>=5.4)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "p
 
 [[package]]
 name = "lightgbm"
-version = "4.4.0"
+version = "4.5.0"
 description = "LightGBM Python Package"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "lightgbm-4.4.0-py3-none-macosx_10_15_x86_64.macosx_11_6_x86_64.macosx_12_5_x86_64.whl", hash = "sha256:f51f17a10ef9b4669b9c95a2297213b57debbc9deadfe5c1489a7f3c9e2617c5"},
-    {file = "lightgbm-4.4.0-py3-none-macosx_14_0_arm64.whl", hash = "sha256:d96b06c85f0840da95bbbf31a095b207186bb0e584cee0fe2f2e7f24fb07c70f"},
-    {file = "lightgbm-4.4.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:a04875e4c0ffda7c67a0ab5bd8892f154a491833f4f5b39c4acf5b3add099699"},
-    {file = "lightgbm-4.4.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:8700b41f637717d36763a282d280b8d4722a87103030b7f0f373b96da0225022"},
-    {file = "lightgbm-4.4.0-py3-none-win_amd64.whl", hash = "sha256:460dd78586dccfc0ed756571690fcfcd3d61770ed7972746c655c3b11cce8a93"},
-    {file = "lightgbm-4.4.0.tar.gz", hash = "sha256:9e8a7640911481134e60987d5d1e1cd157f430c3b4b38de8d36fc55c302bc299"},
+    {file = "lightgbm-4.5.0-py3-none-macosx_10_15_x86_64.whl", hash = "sha256:2212e2166af6379bc005e6f7041dd2dcba3750238eccbc55d09d3c0717c51187"},
+    {file = "lightgbm-4.5.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:1301aa853e1fe4bf318539aa132f373862b04aa537af502508711ce03dffff09"},
+    {file = "lightgbm-4.5.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7f0a3dded769d83560845f2c3fe1966630ec1ca527c380d9d48d9b35579a796e"},
+    {file = "lightgbm-4.5.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:960a0e7c077de0ca3053f1325d3edfc92ea815acf5176adcacdea0f635aeef9b"},
+    {file = "lightgbm-4.5.0-py3-none-win_amd64.whl", hash = "sha256:7ccb73ee9fb74fbbf89ad24c57a6edad505aa8f2165d02b999a082dbbbb0ee57"},
+    {file = "lightgbm-4.5.0.tar.gz", hash = "sha256:e1cd7baf0318d4e308a26575a63a4635f08df866ad3622a9d8e3d71d9637a1ba"},
 ]
 
 [package.dependencies]
diff --git a/runtimes/mlflow/poetry.lock b/runtimes/mlflow/poetry.lock
index 2b07379c9..640f367d0 100644
--- a/runtimes/mlflow/poetry.lock
+++ b/runtimes/mlflow/poetry.lock
@@ -687,6 +687,25 @@ files = [
 docs = ["ipython", "matplotlib", "numpydoc", "sphinx"]
 tests = ["pytest", "pytest-cov", "pytest-xdist"]
 
+[[package]]
+name = "databricks-sdk"
+version = "0.29.0"
+description = "Databricks SDK for Python (Beta)"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "databricks-sdk-0.29.0.tar.gz", hash = "sha256:23016df608bb025548582d378f94af2ea312c0d77250ac14aa57d1f863efe88c"},
+    {file = "databricks_sdk-0.29.0-py3-none-any.whl", hash = "sha256:3e08578f4128f759a6a9bba2c836ec32a4cff37fb594530209ab92f2534985bd"},
+]
+
+[package.dependencies]
+google-auth = ">=2.0,<3.0"
+requests = ">=2.28.1,<3"
+
+[package.extras]
+dev = ["autoflake", "databricks-connect", "ipython", "ipywidgets", "isort", "pycodestyle", "pyfakefs", "pytest", "pytest-cov", "pytest-mock", "pytest-rerunfailures", "pytest-xdist", "requests-mock", "wheel", "yapf"]
+notebook = ["ipython (>=8,<9)", "ipywidgets (>=8,<9)"]
+
 [[package]]
 name = "deprecated"
 version = "1.2.14"
@@ -1193,6 +1212,29 @@ gitdb = ">=4.0.1,<5"
 doc = ["sphinx (==4.3.2)", "sphinx-autodoc-typehints", "sphinx-rtd-theme", "sphinxcontrib-applehelp (>=1.0.2,<=1.0.4)", "sphinxcontrib-devhelp (==1.0.2)", "sphinxcontrib-htmlhelp (>=2.0.0,<=2.0.1)", "sphinxcontrib-qthelp (==1.0.3)", "sphinxcontrib-serializinghtml (==1.1.5)"]
 test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit", "pytest (>=7.3.1)", "pytest-cov", "pytest-instafail", "pytest-mock", "pytest-sugar", "typing-extensions"]
 
+[[package]]
+name = "google-auth"
+version = "2.32.0"
+description = "Google Authentication Library"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "google_auth-2.32.0-py2.py3-none-any.whl", hash = "sha256:53326ea2ebec768070a94bee4e1b9194c9646ea0c2bd72422785bd0f9abfad7b"},
+    {file = "google_auth-2.32.0.tar.gz", hash = "sha256:49315be72c55a6a37d62819e3573f6b416aca00721f7e3e31a008d928bf64022"},
+]
+
+[package.dependencies]
+cachetools = ">=2.0.0,<6.0"
+pyasn1-modules = ">=0.2.1"
+rsa = ">=3.1.4,<5"
+
+[package.extras]
+aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"]
+enterprise-cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"]
+pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"]
+reauth = ["pyu2f (>=0.1.5)"]
+requests = ["requests (>=2.20.0,<3.0.0.dev0)"]
+
 [[package]]
 name = "googleapis-common-protos"
 version = "1.63.1"
@@ -1798,48 +1840,35 @@ dev = ["meson-python (>=0.13.1)", "numpy (>=1.25)", "pybind11 (>=2.6)", "setupto
 
 [[package]]
 name = "mlflow"
-version = "2.14.1"
+version = "2.15.0"
 description = "MLflow is an open source platform for the complete machine learning lifecycle"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "mlflow-2.14.1-py3-none-any.whl", hash = "sha256:0bda38f94468d69a169415b1d751078f1083ddb44646f50854d387bcb6d0b9ec"},
-    {file = "mlflow-2.14.1.tar.gz", hash = "sha256:1258ade134b9fbb15b1b0f22b0e9f98b5ff6f5f57575c10adfb255b9331fc587"},
+    {file = "mlflow-2.15.0-py3-none-any.whl", hash = "sha256:1e2623fbfe7eb007efabef56bd9d31530de9407a1e4eda4cc6e61c6a00d3084b"},
+    {file = "mlflow-2.15.0.tar.gz", hash = "sha256:ad1dcfd73b5bc8167d698fb11b33f3cbb204276d1e3e80294aba15748a555b29"},
 ]
 
 [package.dependencies]
 alembic = "<1.10.0 || >1.10.0,<2"
-cachetools = ">=5.0.0,<6"
-click = ">=7.0,<9"
-cloudpickle = "<4"
 docker = ">=4.0.0,<8"
-entrypoints = "<1"
 Flask = "<4"
-gitpython = ">=3.1.9,<4"
 graphene = "<4"
 gunicorn = {version = "<23", markers = "platform_system != \"Windows\""}
-importlib-metadata = ">=3.7.0,<4.7.0 || >4.7.0,<8"
 Jinja2 = [
     {version = ">=2.11,<4", markers = "platform_system != \"Windows\""},
     {version = ">=3.0,<4", markers = "platform_system == \"Windows\""},
 ]
 markdown = ">=3.3,<4"
 matplotlib = "<4"
+mlflow-skinny = "2.15.0"
 numpy = "<2"
-opentelemetry-api = ">=1.9.0,<3"
-opentelemetry-sdk = ">=1.9.0,<3"
-packaging = "<25"
 pandas = "<3"
-protobuf = ">=3.12.0,<5"
 pyarrow = ">=4.0.0,<16"
-pytz = "<2025"
-pyyaml = ">=5.1,<7"
 querystring-parser = "<2"
-requests = ">=2.17.3,<3"
 scikit-learn = "<2"
 scipy = "<2"
 sqlalchemy = ">=1.4.0,<3"
-sqlparse = ">=0.4.0,<1"
 waitress = {version = "<4", markers = "platform_system == \"Windows\""}
 
 [package.extras]
@@ -1849,7 +1878,46 @@ extras = ["azureml-core (>=1.2.0)", "boto3", "botocore", "google-cloud-storage (
 gateway = ["aiohttp (<4)", "boto3 (>=1.28.56,<2)", "fastapi (<1)", "pydantic (>=1.0,<3)", "slowapi (>=0.1.9,<1)", "tiktoken (<1)", "uvicorn[standard] (<1)", "watchfiles (<1)"]
 genai = ["aiohttp (<4)", "boto3 (>=1.28.56,<2)", "fastapi (<1)", "pydantic (>=1.0,<3)", "slowapi (>=0.1.9,<1)", "tiktoken (<1)", "uvicorn[standard] (<1)", "watchfiles (<1)"]
 jfrog = ["mlflow-jfrog-plugin"]
-langchain = ["langchain (>=0.1.0,<=0.2.3)"]
+langchain = ["langchain (>=0.1.0,<=0.2.11)"]
+sqlserver = ["mlflow-dbstore"]
+xethub = ["mlflow-xethub"]
+
+[[package]]
+name = "mlflow-skinny"
+version = "2.15.0"
+description = "MLflow is an open source platform for the complete machine learning lifecycle"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "mlflow_skinny-2.15.0-py3-none-any.whl", hash = "sha256:5dfd648c38ab6b7412bc30706e03f8bfac92aef5428269f640ded8d01baa93dd"},
+    {file = "mlflow_skinny-2.15.0.tar.gz", hash = "sha256:6e7b36023af9fb6e0ed506642c6497a4c9921df3e1595b7ac57aca4688441309"},
+]
+
+[package.dependencies]
+cachetools = ">=5.0.0,<6"
+click = ">=7.0,<9"
+cloudpickle = "<4"
+databricks-sdk = ">=0.20.0,<1"
+entrypoints = "<1"
+gitpython = ">=3.1.9,<4"
+importlib-metadata = ">=3.7.0,<4.7.0 || >4.7.0,<8"
+opentelemetry-api = ">=1.9.0,<3"
+opentelemetry-sdk = ">=1.9.0,<3"
+packaging = "<25"
+protobuf = ">=3.12.0,<6"
+pytz = "<2025"
+pyyaml = ">=5.1,<7"
+requests = ">=2.17.3,<3"
+sqlparse = ">=0.4.0,<1"
+
+[package.extras]
+aliyun-oss = ["aliyunstoreplugin"]
+databricks = ["azure-storage-file-datalake (>12)", "boto3 (>1)", "botocore", "google-cloud-storage (>=1.30.0)"]
+extras = ["azureml-core (>=1.2.0)", "boto3", "botocore", "google-cloud-storage (>=1.30.0)", "kubernetes", "mlserver (>=1.2.0,!=1.3.1,<1.4.0)", "mlserver-mlflow (>=1.2.0,!=1.3.1,<1.4.0)", "prometheus-flask-exporter", "pyarrow", "pysftp", "requests-auth-aws-sigv4", "virtualenv"]
+gateway = ["aiohttp (<4)", "boto3 (>=1.28.56,<2)", "fastapi (<1)", "pydantic (>=1.0,<3)", "slowapi (>=0.1.9,<1)", "tiktoken (<1)", "uvicorn[standard] (<1)", "watchfiles (<1)"]
+genai = ["aiohttp (<4)", "boto3 (>=1.28.56,<2)", "fastapi (<1)", "pydantic (>=1.0,<3)", "slowapi (>=0.1.9,<1)", "tiktoken (<1)", "uvicorn[standard] (<1)", "watchfiles (<1)"]
+jfrog = ["mlflow-jfrog-plugin"]
+langchain = ["langchain (>=0.1.0,<=0.2.11)"]
 sqlserver = ["mlflow-dbstore"]
 xethub = ["mlflow-xethub"]
 
@@ -2198,6 +2266,7 @@ description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
 files = [
+    {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_aarch64.whl", hash = "sha256:004186d5ea6a57758fd6d57052a123c73a4815adf365eb8dd6a85c9eaa7535ff"},
     {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl", hash = "sha256:d9714f27c1d0f0895cd8915c07a87a1d0029a0aa36acaf9156952ec2a8a12189"},
     {file = "nvidia_nvjitlink_cu12-12.5.40-py3-none-win_amd64.whl", hash = "sha256:c3401dc8543b52d3a8158007a0c1ab4e9c768fcbd24153a48c86972102197ddd"},
 ]
@@ -2717,6 +2786,31 @@ files = [
 [package.dependencies]
 numpy = ">=1.16.6,<2"
 
+[[package]]
+name = "pyasn1"
+version = "0.6.0"
+description = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pyasn1-0.6.0-py2.py3-none-any.whl", hash = "sha256:cca4bb0f2df5504f02f6f8a775b6e416ff9b0b3b16f7ee80b5a3153d9b804473"},
+    {file = "pyasn1-0.6.0.tar.gz", hash = "sha256:3a35ab2c4b5ef98e17dfdec8ab074046fbda76e281c5a706ccd82328cfc8f64c"},
+]
+
+[[package]]
+name = "pyasn1-modules"
+version = "0.4.0"
+description = "A collection of ASN.1-based protocols modules"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pyasn1_modules-0.4.0-py3-none-any.whl", hash = "sha256:be04f15b66c206eed667e0bb5ab27e2b1855ea54a842e5037738099e8ca4ae0b"},
+    {file = "pyasn1_modules-0.4.0.tar.gz", hash = "sha256:831dbcea1b177b28c9baddf4c6d1013c24c3accd14a1873fffaa6a2e905f17b6"},
+]
+
+[package.dependencies]
+pyasn1 = ">=0.4.6,<0.7.0"
+
 [[package]]
 name = "pycparser"
 version = "2.22"
@@ -2997,13 +3091,13 @@ files = [
 
 [[package]]
 name = "pytorch-lightning"
-version = "2.3.1"
+version = "2.3.3"
 description = "PyTorch Lightning is the lightweight PyTorch wrapper for ML researchers. Scale your models. Write less boilerplate."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pytorch-lightning-2.3.1.tar.gz", hash = "sha256:8cfcc585fdf6570add34cf2cf4cabc3984d0e2d8b4cee52226485b61d3b8366d"},
-    {file = "pytorch_lightning-2.3.1-py3-none-any.whl", hash = "sha256:19a6aa7f800f721240e46aa80e3f9ae0b5a13baf65f04028b2e8f231e20e6426"},
+    {file = "pytorch-lightning-2.3.3.tar.gz", hash = "sha256:5f974015425af6873b5689246c5495ca12686b446751479273c154b73aeea843"},
+    {file = "pytorch_lightning-2.3.3-py3-none-any.whl", hash = "sha256:4365e3f2874e223e63cb42628d24c88c2bdc8d1794453cac38c0619b31115fba"},
 ]
 
 [package.dependencies]
@@ -3155,6 +3249,20 @@ urllib3 = ">=1.21.1,<3"
 socks = ["PySocks (>=1.5.6,!=1.5.7)"]
 use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
+[[package]]
+name = "rsa"
+version = "4.9"
+description = "Pure-Python RSA implementation"
+optional = false
+python-versions = ">=3.6,<4"
+files = [
+    {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"},
+    {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"},
+]
+
+[package.dependencies]
+pyasn1 = ">=0.1.3"
+
 [[package]]
 name = "scikit-learn"
 version = "1.5.0"
@@ -3510,13 +3618,12 @@ optree = ["optree (>=0.9.1)"]
 
 [[package]]
 name = "torchmetrics"
-version = "1.4.0.post0"
+version = "1.4.1"
 description = "PyTorch native Metrics"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "torchmetrics-1.4.0.post0-py3-none-any.whl", hash = "sha256:ab234216598e3fbd8d62ee4541a0e74e7e8fc935d099683af5b8da50f745b3c8"},
-    {file = "torchmetrics-1.4.0.post0.tar.gz", hash = "sha256:ab9bcfe80e65dbabbddb6cecd9be21f1f1d5207bb74051ef95260740f2762358"},
+    {file = "torchmetrics-1.4.1-py3-none-any.whl", hash = "sha256:c2e7cd56dd8bdc60ae63d712f3bdce649f23bd174d9180bdd0b746e0230b865a"},
 ]
 
 [package.dependencies]
@@ -3526,15 +3633,14 @@ packaging = ">17.1"
 torch = ">=1.10.0"
 
 [package.extras]
-all = ["SciencePlots (>=2.0.0)", "ipadic (>=1.0.0)", "matplotlib (>=3.3.0)", "mecab-python3 (>=1.0.6)", "mypy (==1.9.0)", "nltk (>=3.6)", "piq (<=0.8.0)", "pretty-errors (>=1.2.0)", "pycocotools (>2.0.0)", "pystoi (>=0.3.0)", "regex (>=2021.9.24)", "scipy (>1.0.0)", "sentencepiece (>=0.2.0)", "torch (==2.3.0)", "torch-fidelity (<=0.4.0)", "torchaudio (>=0.10.0)", "torchvision (>=0.8)", "tqdm (>=4.41.0)", "transformers (>4.4.0)", "transformers (>=4.10.0)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"]
-audio = ["pystoi (>=0.3.0)", "torchaudio (>=0.10.0)"]
-debug = ["pretty-errors (>=1.2.0)"]
+all = ["SciencePlots (>=2.0.0)", "gammatone (>1.0.0)", "ipadic (>=1.0.0)", "matplotlib (>=3.3.0)", "mecab-python3 (>=1.0.6)", "mypy (==1.11.0)", "nltk (>=3.6)", "pesq (>=0.0.4)", "piq (<=0.8.0)", "pycocotools (>2.0.0)", "pystoi (>=0.3.0)", "regex (>=2021.9.24)", "scipy (>1.0.0)", "sentencepiece (>=0.2.0)", "torch (==2.3.1)", "torch-fidelity (<=0.4.0)", "torchaudio (>=0.10.0)", "torchvision (>=0.8)", "tqdm (>=4.41.0)", "transformers (>4.4.0)", "transformers (>=4.42.3)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"]
+audio = ["gammatone (>1.0.0)", "pesq (>=0.0.4)", "pystoi (>=0.3.0)", "torchaudio (>=0.10.0)"]
 detection = ["pycocotools (>2.0.0)", "torchvision (>=0.8)"]
-dev = ["SciencePlots (>=2.0.0)", "bert-score (==0.3.13)", "dython (<=0.7.5)", "fairlearn", "fast-bss-eval (>=0.1.0)", "faster-coco-eval (>=1.3.3)", "huggingface-hub (<0.23)", "ipadic (>=1.0.0)", "jiwer (>=2.3.0)", "kornia (>=0.6.7)", "lpips (<=0.1.4)", "matplotlib (>=3.3.0)", "mecab-ko (>=1.0.0)", "mecab-ko-dic (>=1.0.0)", "mecab-python3 (>=1.0.6)", "mir-eval (>=0.6)", "monai (==1.3.0)", "mypy (==1.9.0)", "netcal (>1.0.0)", "nltk (>=3.6)", "numpy (<1.27.0)", "pandas (>1.0.0)", "pandas (>=1.4.0)", "piq (<=0.8.0)", "pretty-errors (>=1.2.0)", "pycocotools (>2.0.0)", "pystoi (>=0.3.0)", "pytorch-msssim (==1.0.0)", "regex (>=2021.9.24)", "rouge-score (>0.1.0)", "sacrebleu (>=2.3.0)", "scikit-image (>=0.19.0)", "scipy (>1.0.0)", "sentencepiece (>=0.2.0)", "sewar (>=0.4.4)", "statsmodels (>0.13.5)", "torch (==2.3.0)", "torch-complex (<=0.4.3)", "torch-fidelity (<=0.4.0)", "torchaudio (>=0.10.0)", "torchvision (>=0.8)", "tqdm (>=4.41.0)", "transformers (>4.4.0)", "transformers (>=4.10.0)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"]
+dev = ["SciencePlots (>=2.0.0)", "bert-score (==0.3.13)", "dython (<=0.7.6)", "fairlearn", "fast-bss-eval (>=0.1.0)", "faster-coco-eval (>=1.3.3)", "gammatone (>1.0.0)", "huggingface-hub (<0.25)", "ipadic (>=1.0.0)", "jiwer (>=2.3.0)", "kornia (>=0.6.7)", "lpips (<=0.1.4)", "matplotlib (>=3.3.0)", "mecab-ko (>=1.0.0)", "mecab-ko-dic (>=1.0.0)", "mecab-python3 (>=1.0.6)", "mir-eval (>=0.6)", "monai (==1.3.2)", "mypy (==1.11.0)", "netcal (>1.0.0)", "nltk (>=3.6)", "numpy (<2.1.0)", "pandas (>1.0.0)", "pandas (>=1.4.0)", "pesq (>=0.0.4)", "piq (<=0.8.0)", "pycocotools (>2.0.0)", "pystoi (>=0.3.0)", "pytorch-msssim (==1.0.0)", "regex (>=2021.9.24)", "rouge-score (>0.1.0)", "sacrebleu (>=2.3.0)", "scikit-image (>=0.19.0)", "scipy (>1.0.0)", "sentencepiece (>=0.2.0)", "sewar (>=0.4.4)", "statsmodels (>0.13.5)", "torch (==2.3.1)", "torch-complex (<0.5.0)", "torch-fidelity (<=0.4.0)", "torchaudio (>=0.10.0)", "torchvision (>=0.8)", "tqdm (>=4.41.0)", "transformers (>4.4.0)", "transformers (>=4.42.3)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"]
 image = ["scipy (>1.0.0)", "torch-fidelity (<=0.4.0)", "torchvision (>=0.8)"]
-multimodal = ["piq (<=0.8.0)", "transformers (>=4.10.0)"]
+multimodal = ["piq (<=0.8.0)", "transformers (>=4.42.3)"]
 text = ["ipadic (>=1.0.0)", "mecab-python3 (>=1.0.6)", "nltk (>=3.6)", "regex (>=2021.9.24)", "sentencepiece (>=0.2.0)", "tqdm (>=4.41.0)", "transformers (>4.4.0)"]
-typing = ["mypy (==1.9.0)", "torch (==2.3.0)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"]
+typing = ["mypy (==1.11.0)", "torch (==2.3.1)", "types-PyYAML", "types-emoji", "types-protobuf", "types-requests", "types-setuptools", "types-six", "types-tabulate"]
 visual = ["SciencePlots (>=2.0.0)", "matplotlib (>=3.3.0)"]
 
 [[package]]
@@ -4052,4 +4158,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.12"
-content-hash = "afbe36011cdd4a04aa6514e08ff568c409f37d7014290ad27957f9e39f92d45a"
+content-hash = "95ee37d59b75ef1172762c987156e6e5865b9fff7dbf64b3f5636dbe046a61f5"
diff --git a/runtimes/mlflow/pyproject.toml b/runtimes/mlflow/pyproject.toml
index d637fd6bd..3bcaa032d 100644
--- a/runtimes/mlflow/pyproject.toml
+++ b/runtimes/mlflow/pyproject.toml
@@ -15,10 +15,10 @@ mlflow = "*"
 [tool.poetry.group.dev.dependencies]
 mlserver = {path = "../..", develop = true}
 torch = "2.2.1"
-pytorch-lightning = "2.3.1"
-torchmetrics = "1.4.0.post0"
+pytorch-lightning = "2.3.3"
+torchmetrics = "1.4.1"
 torchvision = "0.17.1"
-mlflow = "2.14.1"
+mlflow = "2.15.0"
 
 [build-system]
 requires = ["poetry-core"]
diff --git a/runtimes/xgboost/poetry.lock b/runtimes/xgboost/poetry.lock
index 588879db9..a95fbc54b 100644
--- a/runtimes/xgboost/poetry.lock
+++ b/runtimes/xgboost/poetry.lock
@@ -2170,19 +2170,19 @@ files = [
 
 [[package]]
 name = "xgboost"
-version = "2.1.0"
+version = "2.1.1"
 description = "XGBoost Python Package"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "xgboost-2.1.0-py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64.whl", hash = "sha256:19d145eb847b070c32342b1bf2d7331c102783e07a484f8b13b7d759d707c6b0"},
-    {file = "xgboost-2.1.0-py3-none-macosx_12_0_arm64.whl", hash = "sha256:840a0c6e2119d8c8f260a5dace996ea064a267f62b301a25d7d452488a7ac860"},
-    {file = "xgboost-2.1.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:73673c9bb85927db7fe2e3aed6df6d35dba708cfd6767cc63d4ea11dda2dede5"},
-    {file = "xgboost-2.1.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:43b16205689249d7509daf7a6ab00ad0e6c570b3a9c263cb32b26e39d9477bb3"},
-    {file = "xgboost-2.1.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:cedc2e386e686795735448fd4597533acacc5ba6fb47dd910c204c468b80bb96"},
-    {file = "xgboost-2.1.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:b2a456eb0f3d3e8fd8ab37e44ac288292bf8ea8744c294be9fd88713d27af810"},
-    {file = "xgboost-2.1.0-py3-none-win_amd64.whl", hash = "sha256:74904b91c42524a6c32147fe5718569e78fb65911ff4499b053f81d0964514d4"},
-    {file = "xgboost-2.1.0.tar.gz", hash = "sha256:7144980923e76ce741c7b03a14d3bd7514db6de5c7cabe96ba95b229d274f5ca"},
+    {file = "xgboost-2.1.1-py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64.whl", hash = "sha256:4163ab55118628f605cfccf950e2d667150640f6fc746bb5a173bddfd935950f"},
+    {file = "xgboost-2.1.1-py3-none-macosx_12_0_arm64.whl", hash = "sha256:40d1f647022f497c1b0f69073765baf50ff5802ca77c6bb1aca55a6bc65df00d"},
+    {file = "xgboost-2.1.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4c534818aa08ab327ac2239ef211ef78db65a8573d069bc9898f824830fa2308"},
+    {file = "xgboost-2.1.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:deef471e8d353afa99e5cc0e2af7d99ace7013f40684fcf3eed9124de033265d"},
+    {file = "xgboost-2.1.1-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:8f3246a6d839dceb4553d3e5ea64ed718f9c692f072ee8275eeb895b58e283e6"},
+    {file = "xgboost-2.1.1-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:6475ca35dede1f87d1dc485b362caba08f69f6020f4440e97b167676a533850e"},
+    {file = "xgboost-2.1.1-py3-none-win_amd64.whl", hash = "sha256:fcf8413f3c621e97fdaaa45abb7ae808319c88eff5447328eff14c419c7c6ae0"},
+    {file = "xgboost-2.1.1.tar.gz", hash = "sha256:4b1729837f9f1ba88a32ef1be3f8efb860fee6454a68719b196dc88032c23d97"},
 ]
 
 [package.dependencies]
@@ -2194,7 +2194,7 @@ scipy = "*"
 [package.extras]
 dask = ["dask", "distributed", "pandas"]
 datatable = ["datatable"]
-pandas = ["pandas"]
+pandas = ["pandas (>=1.2)"]
 plotting = ["graphviz", "matplotlib"]
 pyspark = ["cloudpickle", "pyspark", "scikit-learn"]
 scikit-learn = ["scikit-learn"]
diff --git a/tests/grpc/test_interceptor.py b/tests/grpc/test_interceptor.py
new file mode 100644
index 000000000..ca3a7f007
--- /dev/null
+++ b/tests/grpc/test_interceptor.py
@@ -0,0 +1,129 @@
+import pytest
+from pytest_lazyfixture import lazy_fixture
+
+from typing import AsyncGenerator
+
+from grpc import StatusCode
+from mlserver.grpc.interceptors import PromServerInterceptor
+from mlserver.codecs import StringCodec
+from mlserver.grpc import converters
+from mlserver.grpc.server import GRPCServer
+from mlserver.grpc.dataplane_pb2_grpc import GRPCInferenceServiceStub
+from mlserver.grpc import dataplane_pb2 as pb
+
+
+@pytest.mark.parametrize("sum_model", [lazy_fixture("text_model")])
+@pytest.mark.parametrize("sum_model_settings", [lazy_fixture("text_model_settings")])
+async def test_prometheus_unary_unary(
+    grpc_server: GRPCServer,
+    inference_service_stub: AsyncGenerator[GRPCInferenceServiceStub, None],
+    model_generate_request: pb.ModelInferRequest,
+):
+    # send 10 requests
+    num_requests = 10
+    for _ in range(num_requests):
+        _ = await inference_service_stub.ModelInfer(model_generate_request)
+
+    grpc_type = "UNARY"
+    grpc_service_name = "inference.GRPCInferenceService"
+    grpc_method_name = "ModelInfer"
+    prom_interceptor = [
+        interceptor
+        for interceptor in grpc_server._interceptors
+        if isinstance(interceptor, PromServerInterceptor)
+    ][0]
+
+    # get the number of requests intercepted
+    counted_requests = (
+        prom_interceptor._interceptor._metrics["grpc_server_started_counter"]
+        .labels(
+            grpc_type,
+            grpc_service_name,
+            grpc_method_name,
+        )
+        ._value.get()
+    )
+
+    # get the number of ok responses intercepted
+    counted_responses = (
+        prom_interceptor._interceptor._grpc_server_handled_total_counter.labels(
+            grpc_type,
+            grpc_service_name,
+            grpc_method_name,
+            StatusCode.OK.name,
+        )._value.get()
+    )
+
+    assert int(counted_requests) == num_requests
+    assert int(counted_requests) == int(counted_responses)
+
+
+@pytest.mark.parametrize("settings", [lazy_fixture("settings_stream")])
+@pytest.mark.parametrize("sum_model", [lazy_fixture("text_stream_model")])
+@pytest.mark.parametrize("model_name", ["text-stream-model"])
+@pytest.mark.parametrize(
+    "sum_model_settings", [lazy_fixture("text_stream_model_settings")]
+)
+async def test_prometheus_stream_stream(
+    grpc_server: GRPCServer,
+    inference_service_stub: AsyncGenerator[GRPCInferenceServiceStub, None],
+    model_generate_request: pb.ModelInferRequest,
+    model_name: str,
+):
+    model_generate_request.model_name = model_name
+
+    async def get_stream_request(request):
+        yield request
+
+    # send 10 requests
+    num_requests = 10
+    for _ in range(num_requests):
+        _ = [
+            _
+            async for _ in inference_service_stub.ModelStreamInfer(
+                get_stream_request(model_generate_request)
+            )
+        ]
+
+    grpc_type = "BIDI_STREAMING"
+    grpc_service_name = "inference.GRPCInferenceService"
+    grpc_method_name = "ModelStreamInfer"
+    prom_interceptor = [
+        interceptor
+        for interceptor in grpc_server._interceptors
+        if isinstance(interceptor, PromServerInterceptor)
+    ][0]
+
+    # get the number of requests intercepted
+    counted_requests = (
+        prom_interceptor._interceptor._metrics["grpc_server_stream_msg_received"]
+        .labels(
+            grpc_type,
+            grpc_service_name,
+            grpc_method_name,
+        )
+        ._value.get()
+    )
+
+    # get the number of ok responses intercepted
+    counted_responses = (
+        prom_interceptor._interceptor._metrics["grpc_server_stream_msg_sent"]
+        .labels(
+            grpc_type,
+            grpc_service_name,
+            grpc_method_name,
+        )
+        ._value.get()
+    )
+
+    inference_request_g = converters.ModelInferRequestConverter.to_types(
+        model_generate_request
+    )
+
+    # we count the number of words because
+    # each word is gonna be streamed back
+    request_text = StringCodec.decode_input(inference_request_g.inputs[0])[0]
+    num_words = len(request_text.split())
+
+    assert int(counted_requests) == num_requests
+    assert int(counted_requests) * num_words == int(counted_responses)
diff --git a/tests/testdata/settings-stream.json b/tests/testdata/settings-stream.json
index 6727d5b59..809cb452b 100644
--- a/tests/testdata/settings-stream.json
+++ b/tests/testdata/settings-stream.json
@@ -3,7 +3,6 @@
   "host": "127.0.0.1",
   "parallel_workers": 0,
   "gzip_enabled": false,
-  "metrics_endpoint": null,
   "cors_settings": {
     "allow_origins": ["*"]
   }
diff --git a/tox.ini b/tox.ini
index e4a6e37be..34a6eea5c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -22,12 +22,19 @@ commands =
     python -m pytest {posargs} -n auto \
         {toxinidir}/tests \
         --ignore={toxinidir}/tests/kafka \
-        --ignore={toxinidir}/tests/parallel
+        --ignore={toxinidir}/tests/parallel \
+        --ignore={toxinidir}/tests/grpc
     # kafka and parallel tests are failing for macos when running in parallel
-    # with the entire test suite. So, we run them separately.
+    # with the entire test suite. grpc tests are are also flaky when running
+    # in parallel. So, we run them separately.
     python -m pytest {posargs} \
         {toxinidir}/tests/kafka \
-        {toxinidir}/tests/parallel
+        {toxinidir}/tests/parallel \
+        {toxinidir}/tests/grpc
+    # We run the grpc interceptor test separately because other tests will
+    # interfere with the metrics counter when running in parallel.
+    python -m pytest {posargs} \
+        {toxinidir}/tests/grpc/test_interceptor.py
 set_env =
     GITHUB_SERVER_URL = {env:GITHUB_SERVER_URL:https\://github.com}
     GITHUB_REPOSITORY = {env:GITHUB_REPOSITORY:SeldonIO/MLServer}