From f00c42601158d86bc951088eecb2f7fb92e07ea1 Mon Sep 17 00:00:00 2001
From: Bill Scherer <36514047+billschereriii@users.noreply.github.com>
Date: Fri, 6 Oct 2023 14:21:36 -0500
Subject: [PATCH] Add support for MINBATCHTIMEOUT (#387)

Add support for MINBATCHTIMEOUT
[ committed by @billschereriii ]
[ reviewed by @ashao @al-rigazzi]
---
 doc/changelog.rst                       | 5 +++++
 smartsim/_core/entrypoints/colocated.py | 6 +++---
 smartsim/_core/utils/redis.py           | 2 ++
 smartsim/entity/ensemble.py             | 4 ++++
 smartsim/entity/model.py                | 4 ++++
 5 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/doc/changelog.rst b/doc/changelog.rst
index e64f982f1..3d45a9f42 100644
--- a/doc/changelog.rst
+++ b/doc/changelog.rst
@@ -19,16 +19,21 @@ To be released at some future point in time
 
 Description
 
+- Added support for MINBATCHTIMEOUT in model execution
 - Remove support for RedisAI 1.2.5, use RedisAI 1.2.7 commit
 
 Detailed Notes
 
+- Added support for MINBATCHTIMEOUT in model execution, which caps the delay
+  waiting for a minimium number of model execution operations to accumulate
+  before executing them as a batch (PR387_)
 - RedisAI 1.2.5 is not supported anymore. The only RedisAI version
   is now 1.2.7. Since the officially released RedisAI 1.2.7 has a
   bug which breaks the build process on Mac OSX, it was decided to
   use commit 634916c_ from RedisAI's GitHub repository, where such
   bug has been fixed. This applies to all operating systems. (PR383_)
 
+  .. _PR387: https://github.com/CrayLabs/SmartSim/pull/387
   .. _PR383: https://github.com/CrayLabs/SmartSim/pull/383
   .. _634916c: https://github.com/RedisAI/RedisAI/commit/634916c722e718cc6ea3fad46e63f7d798f9adc2
 
diff --git a/smartsim/_core/entrypoints/colocated.py b/smartsim/_core/entrypoints/colocated.py
index 81b1f35bb..45baf35cd 100644
--- a/smartsim/_core/entrypoints/colocated.py
+++ b/smartsim/_core/entrypoints/colocated.py
@@ -78,12 +78,10 @@ def launch_db_model(client: Client, db_model: t.List[str]) -> str:
     parser.add_argument("--devices_per_node", type=int)
     parser.add_argument("--batch_size", type=int, default=0)
     parser.add_argument("--min_batch_size", type=int, default=0)
+    parser.add_argument("--min_batch_timeout", type=int, default=0)
     parser.add_argument("--tag", type=str, default="")
     parser.add_argument("--inputs", nargs="+", default=None)
     parser.add_argument("--outputs", nargs="+", default=None)
-
-    # Unused if we use SmartRedis
-    parser.add_argument("--min_batch_timeout", type=int, default=None)
     args = parser.parse_args(db_model)
 
     inputs = None
@@ -106,6 +104,7 @@ def launch_db_model(client: Client, db_model: t.List[str]) -> str:
             num_gpus=args.devices_per_node,
             batch_size=args.batch_size,
             min_batch_size=args.min_batch_size,
+            min_batch_timeout=args.min_batch_timeout,
             tag=args.tag,
             inputs=inputs,
             outputs=outputs,
@@ -118,6 +117,7 @@ def launch_db_model(client: Client, db_model: t.List[str]) -> str:
             device=args.device,
             batch_size=args.batch_size,
             min_batch_size=args.min_batch_size,
+            min_batch_timeout=args.min_batch_timeout,
             tag=args.tag,
             inputs=inputs,
             outputs=outputs,
diff --git a/smartsim/_core/utils/redis.py b/smartsim/_core/utils/redis.py
index 51f531e21..6570ab28b 100644
--- a/smartsim/_core/utils/redis.py
+++ b/smartsim/_core/utils/redis.py
@@ -172,6 +172,7 @@ def set_ml_model(db_model: DBModel, client: Client) -> None:
                     device=device,
                     batch_size=db_model.batch_size,
                     min_batch_size=db_model.min_batch_size,
+                    min_batch_timeout=db_model.min_batch_timeout,
                     tag=db_model.tag,
                     inputs=db_model.inputs,
                     outputs=db_model.outputs,
@@ -184,6 +185,7 @@ def set_ml_model(db_model: DBModel, client: Client) -> None:
                     device=device,
                     batch_size=db_model.batch_size,
                     min_batch_size=db_model.min_batch_size,
+                    min_batch_timeout=db_model.min_batch_timeout,
                     tag=db_model.tag,
                     inputs=db_model.inputs,
                     outputs=db_model.outputs,
diff --git a/smartsim/entity/ensemble.py b/smartsim/entity/ensemble.py
index c3b034247..2781234e3 100644
--- a/smartsim/entity/ensemble.py
+++ b/smartsim/entity/ensemble.py
@@ -364,6 +364,7 @@ def add_ml_model(
         devices_per_node: int = 1,
         batch_size: int = 0,
         min_batch_size: int = 0,
+        min_batch_timeout: int = 0,
         tag: str = "",
         inputs: t.Optional[t.List[str]] = None,
         outputs: t.Optional[t.List[str]] = None,
@@ -391,6 +392,8 @@ def add_ml_model(
         :type batch_size: int, optional
         :param min_batch_size: minimum batch size for model execution, defaults to 0
         :type min_batch_size: int, optional
+        :param min_batch_timeout: time to wait for minimum batch size, defaults to 0
+        :type min_batch_timeout: int, optional
         :param tag: additional tag for model information, defaults to ""
         :type tag: str, optional
         :param inputs: model inputs (TF only), defaults to None
@@ -407,6 +410,7 @@ def add_ml_model(
             devices_per_node=devices_per_node,
             batch_size=batch_size,
             min_batch_size=min_batch_size,
+            min_batch_timeout=min_batch_timeout,
             tag=tag,
             inputs=inputs,
             outputs=outputs,
diff --git a/smartsim/entity/model.py b/smartsim/entity/model.py
index 35690c526..4f1d7d1e8 100644
--- a/smartsim/entity/model.py
+++ b/smartsim/entity/model.py
@@ -451,6 +451,7 @@ def add_ml_model(
         devices_per_node: int = 1,
         batch_size: int = 0,
         min_batch_size: int = 0,
+        min_batch_timeout: int = 0,
         tag: str = "",
         inputs: t.Optional[t.List[str]] = None,
         outputs: t.Optional[t.List[str]] = None,
@@ -482,6 +483,8 @@ def add_ml_model(
         :type batch_size: int, optional
         :param min_batch_size: minimum batch size for model execution, defaults to 0
         :type min_batch_size: int, optional
+        :param min_batch_timeout: time to wait for minimum batch size, defaults to 0
+        :type min_batch_timeout: int, optional
         :param tag: additional tag for model information, defaults to ""
         :type tag: str, optional
         :param inputs: model inputs (TF only), defaults to None
@@ -498,6 +501,7 @@ def add_ml_model(
             devices_per_node=devices_per_node,
             batch_size=batch_size,
             min_batch_size=min_batch_size,
+            min_batch_timeout=min_batch_timeout,
             tag=tag,
             inputs=inputs,
             outputs=outputs,