diff --git a/_modules/ts/context.html b/_modules/ts/context.html index 5be53851a2..bdf7b7b82f 100644 --- a/_modules/ts/context.html +++ b/_modules/ts/context.html @@ -405,8 +405,7 @@

Source code for ts.context

         self._limit_max_image_pixels = True
         self.metrics = metrics
         self.model_yaml_config = model_yaml_config
-        # add client socket variable cl_socket to be used for send_intermediate_predict_response
-        self.cl_socket = None
+        self.stopping_criteria = None
 
     @property
     def system_properties(self):
diff --git a/_modules/ts/model_loader.html b/_modules/ts/model_loader.html
index 078888a371..01817b9a8b 100644
--- a/_modules/ts/model_loader.html
+++ b/_modules/ts/model_loader.html
@@ -439,7 +439,7 @@ 

Source code for ts.model_loader

         batch_size: Optional[int] = None,
         envelope: Optional[str] = None,
         limit_max_image_pixels: Optional[bool] = True,
-        metrics_cache: MetricsCacheYamlImpl = None,
+        metrics_cache: Optional[MetricsCacheYamlImpl] = None,
     ) -> Service:
         """
         Load TorchServe 1.0 model from file.
diff --git a/_modules/ts/model_service_worker.html b/_modules/ts/model_service_worker.html
index eba53e688c..98a97b626e 100644
--- a/_modules/ts/model_service_worker.html
+++ b/_modules/ts/model_service_worker.html
@@ -376,6 +376,7 @@ 

Source code for ts.model_service_worker

 import platform
 import socket
 import sys
+from typing import Optional
 
 from ts.arg_parser import ArgParser
 from ts.metrics.metric_cache_yaml_impl import MetricsCacheYamlImpl
@@ -385,8 +386,7 @@ 

Source code for ts.model_service_worker

 MAX_FAILURE_THRESHOLD = 5
 SOCKET_ACCEPT_TIMEOUT = 30.0
 DEBUG = False
-BENCHMARK = os.getenv("TS_BENCHMARK")
-BENCHMARK = BENCHMARK in ["True", "true", "TRUE"]
+BENCHMARK = os.getenv("TS_BENCHMARK") in ["True", "true", "TRUE"]
 LOCAL_RANK = int(os.getenv("LOCAL_RANK", 0))
 WORLD_SIZE = int(os.getenv("WORLD_SIZE", 0))
 WORLD_RANK = int(os.getenv("RANK", 0))
@@ -400,11 +400,11 @@ 

Source code for ts.model_service_worker

 
     def __init__(
         self,
-        s_type=None,
-        s_name=None,
-        host_addr=None,
-        port_num=None,
-        metrics_config=None,
+        s_type: Optional[str] = None,
+        s_name: Optional[str] = None,
+        host_addr: Optional[str] = None,
+        port_num: Optional[int] = None,
+        metrics_config: Optional[str] = None,
     ):
         self.sock_type = s_type
 
@@ -544,8 +544,13 @@ 

Source code for ts.model_service_worker

             if BENCHMARK:
                 pr.enable()
             if cmd == b"I":
-                resp = service.predict(msg)
-                cl_socket.sendall(resp)
+                if service is not None:
+                    resp = service.predict(msg)
+                    cl_socket.sendall(resp)
+                else:
+                    raise RuntimeError(
+                        "Received command: {}, but service is not loaded".format(cmd)
+                    )
             elif cmd == b"L":
                 service, result, code = self.load_model(msg)
                 resp = bytearray()
@@ -593,8 +598,8 @@ 

Source code for ts.model_service_worker

     while ts_path in sys.path:
         sys.path.remove(ts_path)
 
-    sock_type = None
-    socket_name = None
+    sock_type: Optional[str] = None
+    socket_name: Optional[str] = None
 
     # noinspection PyBroadException
     try:
@@ -628,7 +633,11 @@ 

Source code for ts.model_service_worker

     except Exception:  # pylint: disable=broad-except
         logging.error("Backend worker process died.", exc_info=True)
     finally:
-        if sock_type == "unix" and os.path.exists(socket_name):
+        if (
+            sock_type == "unix"
+            and socket_name is not None
+            and os.path.exists(socket_name)
+        ):
             os.remove(socket_name)
 
     sys.exit(1)
diff --git a/_modules/ts/protocol/otf_message_handler.html b/_modules/ts/protocol/otf_message_handler.html
index 6a5841f6c2..632a9fd82f 100644
--- a/_modules/ts/protocol/otf_message_handler.html
+++ b/_modules/ts/protocol/otf_message_handler.html
@@ -367,6 +367,7 @@ 

Source code for ts.protocol.otf_message_handler

< """ OTF Codec """ + import io import json import logging @@ -441,15 +442,26 @@

Source code for ts.protocol.otf_message_handler

< msg += struct.pack("!i", len(req_id)) msg += req_id - # Encoding Content-Type if context is None: + # Encoding Content-Type msg += struct.pack("!i", 0) # content_type + + # Encoding the per prediction HTTP response code + # status code and reason phrase set to none + msg += struct.pack("!i", code) + msg += struct.pack("!i", 0) # No code phrase is returned + # Response headers none + msg += struct.pack("!i", 0) else: if ts_stream_next is True: context.set_response_header(idx, "ts_stream_next", "true") - else: - if "true" == context.get_response_headers(idx).get("ts_stream_next"): - context.set_response_header(idx, "ts_stream_next", "false") + elif context.stopping_criteria: + ts_stream_next = ( + "false" if context.stopping_criteria[idx](ret[idx]) else "true" + ) + context.set_response_header(idx, "ts_stream_next", ts_stream_next) + elif "true" == context.get_response_headers(idx).get("ts_stream_next"): + context.set_response_header(idx, "ts_stream_next", "false") content_type = context.get_response_content_type(idx) if content_type is None or len(content_type) == 0: @@ -458,14 +470,6 @@

Source code for ts.protocol.otf_message_handler

< msg += struct.pack("!i", len(content_type)) msg += content_type.encode("utf-8") - # Encoding the per prediction HTTP response code - if context is None: - # status code and reason phrase set to none - msg += struct.pack("!i", code) - msg += struct.pack("!i", 0) # No code phrase is returned - # Response headers none - msg += struct.pack("!i", 0) - else: sc, phrase = context.get_response_status(idx) http_code = sc if sc is not None else 200 http_phrase = phrase if phrase is not None else "" diff --git a/_modules/ts/torch_handler/base_handler.html b/_modules/ts/torch_handler/base_handler.html index 76e0ad9e44..1bc1ba1f85 100644 --- a/_modules/ts/torch_handler/base_handler.html +++ b/_modules/ts/torch_handler/base_handler.html @@ -564,7 +564,7 @@

Source code for ts.torch_handler.base_handler

backend=pt2_backend, ) logger.info(f"Compiled model with backend {pt2_backend}") - except e: + except Exception as e: logger.warning( f"Compiling model model with backend {pt2_backend} has failed \n Proceeding without compilation" ) @@ -599,7 +599,7 @@

Source code for ts.torch_handler.base_handler

Loads the pickle file from the given model path. Args: - model_dir (str): Points to the location of the model artefacts. + model_dir (str): Points to the location of the model artifacts. model_file (.py): the file which contains the model class. model_pt_path (str): points to the location of the model pickle file. @@ -689,7 +689,7 @@

Source code for ts.torch_handler.base_handler

Args: data (list): The input data that needs to be made a prediction request on. context (Context): It is a JSON Object containing information pertaining to - the model artefacts parameters. + the model artifacts parameters. Returns: list : Returns a list of dictionary with the predicted response. diff --git a/_modules/ts/torch_handler/unit_tests/test_utils/mock_context.html b/_modules/ts/torch_handler/unit_tests/test_utils/mock_context.html index 9a6cbd769a..39f29ea4bf 100644 --- a/_modules/ts/torch_handler/unit_tests/test_utils/mock_context.html +++ b/_modules/ts/torch_handler/unit_tests/test_utils/mock_context.html @@ -368,11 +368,13 @@

Source code for ts.torch_handler.unit_tests.test_utils.mock_context

Mocks for adding model context without loading all of Torchserve """ +import os import uuid import torch from ts.metrics.metrics_store import MetricsStore +from ts.utils.util import get_yaml_config
[docs]class MockContext: diff --git a/api/ts.html b/api/ts.html index 86d4147746..f3f25b6dd7 100644 --- a/api/ts.html +++ b/api/ts.html @@ -714,7 +714,7 @@

Submodules
-class ts.model_service_worker.TorchModelServiceWorker(s_type=None, s_name=None, host_addr=None, port_num=None, metrics_config=None)[source]
+class ts.model_service_worker.TorchModelServiceWorker(s_type: Optional[str] = None, s_name: Optional[str] = None, host_addr: Optional[str] = None, port_num: Optional[int] = None, metrics_config: Optional[str] = None)[source]

Bases: object

Backend worker to handle Model Server’s python service code

diff --git a/api/ts.torch_handler.html b/api/ts.torch_handler.html index d1908b809a..cfc2477907 100644 --- a/api/ts.torch_handler.html +++ b/api/ts.torch_handler.html @@ -476,7 +476,7 @@

Submodules
  • data (list) – The input data that needs to be made a prediction request on.

  • context (Context) – It is a JSON Object containing information pertaining to -the model artefacts parameters.

  • +the model artifacts parameters.

Returns:
diff --git a/searchindex.js b/searchindex.js index 340b1f691a..2d61d1eaab 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["FAQs", "README", "Troubleshooting", "api/dev_api", "api/modules", "api/run_circleci_tests", "api/setup", "api/test", "api/torchserve_sanity", "api/ts", "api/ts.metrics", "api/ts.model_service", "api/ts.protocol", "api/ts.torch_handler", "api/ts.torch_handler.request_envelope", "api/ts.torch_handler.unit_tests", "api/ts.torch_handler.unit_tests.models", "api/ts.torch_handler.unit_tests.test_utils", "api/ts.utils", "api/ts_scripts", "apis", "batch_inference_with_ts", "code_coverage", "configuration", "contents", "custom_service", "default_handlers", "getting_started", "github_actions", "grpc_api", "index", "inference_api", "internals", "large_model_inference", "logging", "management_api", "metrics", "metrics_api", "model_loading", "model_zoo", "nvidia_mps", "performance_checklist", "performance_guide", "request_envelopes", "rest_api", "server", "snapshot", "sphinx/requirements", "torchserve_on_win_native", "torchserve_on_wsl", "use_cases", "workflow_inference_api", "workflow_management_api", "workflows"], "filenames": ["FAQs.md", "README.md", "Troubleshooting.md", "api/dev_api.rst", "api/modules.rst", "api/run_circleci_tests.rst", "api/setup.rst", "api/test.rst", "api/torchserve_sanity.rst", "api/ts.rst", "api/ts.metrics.rst", "api/ts.model_service.rst", "api/ts.protocol.rst", "api/ts.torch_handler.rst", "api/ts.torch_handler.request_envelope.rst", "api/ts.torch_handler.unit_tests.rst", "api/ts.torch_handler.unit_tests.models.rst", "api/ts.torch_handler.unit_tests.test_utils.rst", "api/ts.utils.rst", "api/ts_scripts.rst", "apis.rst", "batch_inference_with_ts.md", "code_coverage.md", "configuration.md", "contents.rst", "custom_service.md", "default_handlers.md", "getting_started.md", "github_actions.md", "grpc_api.md", "index.rst", "inference_api.md", "internals.md", "large_model_inference.md", "logging.md", "management_api.md", "metrics.md", "metrics_api.md", "model_loading.md", "model_zoo.md", "nvidia_mps.md", "performance_checklist.md", "performance_guide.md", "request_envelopes.md", "rest_api.md", "server.md", "snapshot.md", "sphinx/requirements.txt", "torchserve_on_win_native.md", "torchserve_on_wsl.md", "use_cases.md", "workflow_inference_api.md", "workflow_management_api.md", "workflows.md"], "titles": ["FAQ\u2019S", "TorchServe", "Troubleshooting Guide", "<no title>", "serve", "run_circleci_tests module", "setup module", "test package", "torchserve_sanity module", "ts package", "ts.metrics package", "ts.model_service package", "ts.protocol package", "ts.torch_handler package", "ts.torch_handler.request_envelope package", "ts.torch_handler.unit_tests package", "ts.torch_handler.unit_tests.models package", "ts.torch_handler.unit_tests.test_utils package", "ts.utils package", "ts_scripts package", "<no title>", "Batch Inference with TorchServe", "Code Coverage", "Advanced configuration", "<no title>", "Custom Service", "TorchServe default inference handlers", "Getting started", "GitHub Actions for TorchServe", "TorchServe gRPC API", "TorchServe", "Inference API", "TorchServe internals", "Serving large models with Torchserve", "Logging in Torchserve", "Management API", "TorchServe Metrics", "Metrics API", "How to load a model in TorchServe", "Model Zoo", "Running TorchServe with NVIDIA MPS", "Model Inference Optimization Checklist", "Performance Guide", "Request Envelopes", "TorchServe REST API", "Running TorchServe", "TorchServe model snapshot", "cf. https://github.com/ryanfox/sphinx-markdown-tables/issues/36", "TorchServe on Windows", "TorchServe on Windows Subsystem for Linux (WSL)", "Torchserve Use Cases", "Workflow Inference API", "Management API", "TorchServe Workflows"], "terms": {"content": [0, 2, 3, 4, 23, 31, 35, 42], "thi": [0, 1, 2, 9, 10, 13, 14, 16, 18, 22, 23, 26, 27, 28, 29, 31, 32, 33, 34, 35, 37, 39, 40, 41, 42, 46, 50, 52, 53], "document": [0, 27, 29, 32, 33, 34, 35, 40, 50], "relev": [0, 2, 9, 32, 53], "readm": [0, 9, 21], "compliant": [0, 44], "openapi": [0, 31, 35, 44], "3": [0, 21, 23, 27, 28, 29, 31, 33, 34, 35, 36, 37, 39, 40, 44, 47, 53], "0": [0, 9, 10, 13, 14, 17, 21, 23, 25, 26, 27, 31, 33, 34, 35, 36, 37, 39, 42, 44, 47, 48, 50], "your": [0, 1, 2, 21, 23, 25, 26, 27, 30, 33, 34, 35, 38, 40, 41, 42, 43, 45, 48, 50], "case": [0, 2, 10, 15, 23, 24, 25, 27, 29, 30, 31, 33, 36, 40, 41, 42, 45, 53], "you": [0, 1, 2, 21, 23, 25, 26, 27, 31, 33, 34, 35, 36, 37, 38, 41, 42, 43, 44, 45, 46, 48, 50, 52], "abl": [0, 25, 33, 36, 42, 48, 50], "mechan": [0, 23], "standalon": [0, 23], "refer": [0, 2, 21, 25, 27, 29, 32, 33, 36, 37, 39, 40, 45, 48, 50, 53], "cloud": [0, 23, 32, 43], "cloudform": 0, "main": [0, 21, 27, 29, 32, 42, 48], "purpos": [0, 34, 36], "serv": [0, 1, 3, 13, 14, 20, 21, 22, 23, 24, 25, 29, 30, 31, 34, 36, 37, 39, 40, 42, 43, 48, 49, 51, 53], "via": [0, 1, 23, 25, 31, 36, 42, 45], "http": [0, 9, 14, 21, 23, 27, 28, 29, 31, 32, 34, 35, 36, 37, 45, 48, 49, 50, 51, 52], "netti": [0, 23], "engin": [0, 25, 35, 41, 50], "issu": [0, 41], "581": 0, "569": 0, "variou": [0, 13, 30, 38, 42], "provid": [0, 2, 9, 13, 21, 23, 25, 26, 29, 32, 33, 35, 36, 40, 41, 42, 43, 45, 52, 53], "out": [0, 14, 21, 23, 25, 26, 27, 35, 41, 45], "box": [0, 1, 13, 21, 26], "checkout": [0, 28], "zoo": [0, 1, 24, 29], "list": [0, 1, 9, 10, 11, 13, 14, 18, 20, 23, 24, 25, 26, 29, 31, 32, 33, 36, 39, 43, 53], "all": [0, 9, 10, 11, 13, 14, 16, 17, 18, 21, 22, 23, 25, 26, 27, 30, 32, 33, 34, 35, 36, 41, 42, 45, 48, 50, 52, 53], "also": [0, 2, 10, 13, 14, 23, 25, 26, 27, 29, 32, 33, 34, 36, 42, 45, 48, 50], "check": [0, 1, 2, 20, 24, 26, 27, 29, 33, 36, 41, 50], "exampl": [0, 2, 15, 20, 21, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 39, 40, 41, 42, 43, 45, 50, 53], "folder": [0, 10, 23, 27, 32, 35, 45, 52], "No": [0, 23, 26, 38], "As": [0, 25, 26, 27, 34, 40, 41], "now": [0, 21, 23, 27, 45, 50], "onli": [0, 10, 16, 23, 25, 26, 27, 29, 31, 33, 35, 36, 37, 40, 41, 42, 43, 44, 45, 48, 51, 52, 53], "deriv": [0, 14, 25, 26], "howev": [0, 23, 37, 48, 50], "pytorch": [0, 1, 2, 13, 21, 23, 25, 27, 29, 30, 31, 34, 35, 38, 39, 41, 45, 47, 48, 49, 51, 53], "It": [0, 11, 13, 14, 22, 25, 26, 31, 33, 35, 36, 37, 40, 41, 42, 45, 50, 52, 53], "ha": [0, 23, 25, 32, 33, 35, 36, 38, 40, 42, 43, 45, 48, 50, 52], "new": [0, 25, 26, 28, 29, 31, 35, 36, 43, 50], "featur": [0, 21, 31, 33, 36, 46], "snapshot": [0, 1, 24, 45, 48, 53], "version": [0, 2, 3, 4, 20, 21, 22, 23, 24, 25, 27, 28, 29, 31, 32, 33, 34, 36, 41, 45, 46, 50, 53], "By": [0, 2, 23, 25, 29, 33, 37, 44, 48], "utf": [0, 23, 31, 33], "8": [0, 21, 23, 27, 28, 31, 33, 34, 36, 37, 39, 40, 52], "encod": [0, 9, 10, 31, 32, 42], "string": [0, 1, 9, 11, 13, 23, 26, 36, 53], "If": [0, 2, 21, 23, 25, 27, 31, 32, 33, 34, 35, 36, 41, 42, 45, 46, 48, 50, 52], "convert": [0, 13, 14, 23, 42], "byte": [0, 2, 23, 51, 53], "need": [0, 13, 16, 21, 22, 25, 32, 33, 35, 36, 40, 42, 43, 48, 53], "codec": [0, 12], "specifi": [0, 2, 13, 14, 23, 25, 27, 28, 33, 35, 38, 45, 46, 48, 52], "github": [0, 21, 27, 29, 32, 42, 48, 49, 50], "com": [0, 21, 23, 27, 29, 31, 32, 36, 37, 48, 49, 50, 51], "blob": [0, 32, 50], "master": [0, 21, 27, 28, 31, 32, 49, 50, 51], "nmt": [0, 50], "_": [0, 2, 13, 21, 23, 25, 26, 29, 31, 32, 33, 34, 35, 36, 37, 41, 43, 45, 47, 48, 50, 52], "transform": [0, 21, 25, 33, 36, 40, 41, 42], "py": [0, 2, 13, 17, 22, 25, 26, 27, 28, 29, 32, 33, 40, 42, 43, 48, 49, 50], "guid": [0, 24, 27, 30, 49], "heavili": 0, "influenc": [0, 41], "launcher": [0, 42], "core": [0, 41, 42], "pin": [0, 41, 42], "we": [0, 2, 21, 26, 27, 33, 34, 36, 40, 42, 45, 50], "recommend": [0, 26, 29, 31, 33], "cpu_launcher_en": [0, 42], "true": [0, 2, 9, 10, 21, 23, 25, 31, 33, 34, 35, 42, 50], "cpu_launcher_arg": [0, 42], "use_logical_cor": [0, 42], "more": [0, 2, 15, 21, 23, 25, 26, 27, 31, 33, 35, 37, 39, 40, 41, 45, 50, 53], "background": [0, 23], "found": [0, 37], "blog": [0, 41, 42], "post": [0, 2, 11, 13, 21, 23, 25, 31, 32, 33, 34, 35, 41, 42, 48, 50, 51, 52], "configur": [0, 1, 2, 9, 24, 29, 31, 34, 35, 36, 37, 42, 44, 45, 46, 50, 51, 52], "ye": [0, 38], "environ": [0, 2, 22, 28, 33, 35, 36, 40, 42, 46, 48, 49, 50], "variabl": [0, 2, 33, 35, 36, 42, 46, 48], "detail": [0, 2, 23, 25, 26, 29, 31, 33, 35, 36, 37, 39, 40, 48, 50, 52, 53], "requir": [0, 21, 23, 25, 27, 32, 33, 34, 36, 38, 41, 42, 43, 45, 50, 53], "txt": [0, 23, 32, 33, 39, 40, 50], "while": [0, 2, 16, 23, 32, 40, 41, 42, 45, 46, 50], "r": [0, 2, 33], "flag": [0, 2, 21, 23, 36, 46, 50], "extra": [0, 2, 23, 25, 27, 33, 42, 48, 50], "helm": [0, 1], "chart": [0, 1], "node": [0, 22, 23, 33, 48, 50, 51, 53], "ec2": [0, 35], "cluster": [0, 32], "There": [0, 2, 23, 34, 35, 36, 38, 42, 45], "format": [0, 1, 9, 13, 14, 23, 25, 31, 33, 35, 37, 41, 42, 43, 45], "templat": 0, "here": [0, 10, 14, 21, 23, 25, 26, 27, 32, 33, 35, 36, 37, 42, 45, 48, 50], "type": [0, 2, 9, 10, 11, 13, 14, 23, 25, 29, 31, 33, 35, 37, 38, 39, 40, 41, 45, 53], "behind": [0, 42, 43], "elast": 0, "loadbalanc": 0, "preserv": [0, 46], "runtim": [0, 9, 13, 21, 25, 29, 35, 42, 46, 52], "across": [0, 13, 43, 46], "session": [0, 46], "instanc": [0, 16, 25, 27, 33, 35, 46], "experienc": [0, 46], "either": [0, 2, 23, 25, 32, 33, 36, 42, 46], "plan": [0, 25, 27, 46, 48, 50], "unplan": [0, 46], "servic": [0, 1, 2, 3, 4, 11, 13, 18, 21, 23, 29, 32, 33, 34, 35, 36, 40, 42, 44, 46], "stop": [0, 31, 32, 45, 46], "its": [0, 2, 25, 33, 40, 42, 46], "upon": [0, 46], "restart": [0, 36, 46, 48], "These": [0, 2, 21, 23, 32, 34, 35, 41, 42, 43, 45, 50, 53], "save": [0, 29, 32, 34, 46, 50], "util": [0, 3, 4, 9, 13, 21, 24, 25, 32, 33, 36, 40, 41, 42, 45, 48, 53], "script": [0, 2, 10, 25, 28, 33, 35, 39, 42, 48], "hardwar": [0, 33, 40, 41, 42], "gpu": [0, 1, 9, 10, 11, 21, 22, 26, 27, 28, 32, 33, 35, 36, 40, 41, 42, 49], "compat": [0, 11, 13, 32, 41, 42], "A": [0, 1, 9, 11, 13, 14, 23, 25, 31, 32, 35, 42, 43, 45, 46, 48, 50, 53], "could": [0, 29, 31, 32, 33, 34, 40, 41], "cuda": [0, 22, 23, 25, 27, 28, 33, 40, 42, 49], "well": [0, 23, 34, 36, 40, 41, 43, 50], "build_imag": [0, 21], "sh": [0, 21, 35], "appropri": [0, 40], "option": [0, 9, 25, 27, 31, 33, 35, 36, 38, 41, 45, 50, 52], "help": [0, 33, 37, 40, 41, 42, 45, 48, 50], "To": [0, 2, 21, 23, 25, 26, 27, 29, 31, 33, 34, 35, 36, 37, 39, 40, 42, 43, 44, 45, 51, 52], "command": [0, 9, 21, 27, 28, 29, 31, 33, 35, 37, 40, 50], "b": [0, 1, 9, 36, 43, 45, 50], "branch_nam": 0, "commit_id": 0, "tag": [0, 13], "t": [0, 13, 21, 23, 26, 27, 31, 32, 34, 35, 50, 51], "tagnam": 0, "latest": [0, 21, 28, 41, 50], "The": [0, 2, 9, 13, 14, 21, 22, 23, 25, 27, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 48, 50, 51, 52, 53], "instal": [0, 1, 21, 23, 28, 29, 30, 32, 33, 37, 50], "where": [0, 13, 23, 28, 29, 31, 32, 33, 36, 40, 41, 42, 45, 46], "pypi": [0, 32, 48], "distribut": [0, 25, 28, 33], "look": [0, 21, 33, 35, 36, 41, 42, 43, 45], "accord": [0, 40], "doc": [0, 21, 27, 31, 32, 33, 36, 37, 50, 51], "overrid": [0, 10, 13, 23, 25, 35, 36, 45, 53], "store": [0, 9, 21, 23, 29, 32, 36, 45, 46, 48, 50], "load": [0, 1, 9, 11, 12, 13, 15, 17, 18, 25, 29, 31, 32, 34, 35, 36, 40, 42, 45, 50], "mandatori": [0, 45], "argument": [0, 9, 14, 23, 25, 36], "dure": [0, 9, 23, 25, 27, 33, 35], "start": [0, 1, 9, 10, 21, 23, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 40, 41, 44, 45, 46, 48, 50], "defin": [0, 9, 10, 11, 13, 16, 18, 23, 25, 27, 28, 31, 33, 34, 36, 42, 45, 53], "overridden": [0, 16, 23], "line": [0, 9, 36, 48], "manag": [0, 1, 20, 23, 24, 25, 27, 29, 32, 43, 44, 46, 48, 50, 53], "decid": 0, "which": [0, 2, 10, 11, 21, 23, 25, 27, 28, 31, 32, 33, 36, 40, 42, 43, 45, 46, 48, 50, 53], "relationship": 0, "w": [0, 1, 13, 26, 34, 37], "ie": [0, 25, 33], "tool": [0, 1, 25, 27, 29, 30, 32, 33, 40, 41, 50], "postman": [0, 50], "insomnia": 0, "even": [0, 41], "find": [0, 18, 25, 32, 33, 35, 40, 41, 42], "plugin": [0, 32, 50], "sdk": [0, 32], "data": [0, 1, 9, 11, 13, 14, 25, 27, 29, 31, 32, 33, 35, 41, 42, 43, 50, 53], "valu": [0, 2, 9, 10, 18, 23, 35, 36, 40, 42, 45, 48, 52, 53], "pair": [0, 10, 36, 45], "object": [0, 1, 9, 10, 11, 13, 14, 17, 23, 25, 26, 27, 32, 35, 39, 45, 50, 51, 53], "would": [0, 13, 23, 27, 28, 33, 36, 41, 45, 53], "modifi": [0, 10, 33, 42, 45], "postprocess": [0, 11, 13, 25, 33, 34, 35, 52, 53], "extend": [0, 11, 32], "just": [0, 27, 32, 41], "method": [0, 9, 13, 14, 18, 21, 25, 36, 38, 41, 42], "code": [0, 9, 11, 12, 21, 23, 24, 31, 32, 33, 34, 35, 36, 42, 43, 44, 45, 48], "zero": 0, "builtin": 0, "huggingfac": [0, 1, 25, 33, 36, 40], "zip": [0, 2, 23, 25, 45, 48], "consist": [0, 21, 22, 40, 46, 53], "artifact": [0, 1, 13, 25, 32, 35, 38], "extens": [0, 22, 41, 45, 50], "cmd": [0, 2, 23], "torch": [0, 2, 13, 25, 33, 35, 37, 42, 43, 47, 48, 49, 50, 53], "step": [0, 2, 25, 26, 27, 35, 41, 48, 50], "given": [0, 2, 9, 18, 35, 36, 37, 48, 50, 53], "current": [0, 9, 22, 23, 27, 29, 34, 35, 36, 43, 45, 46, 52], "allow": [0, 2, 25, 27, 29, 31, 33, 35, 40, 42, 43, 50, 52], "suppli": [0, 23, 25, 46, 52, 53], "one": [0, 16, 23, 26, 28, 29, 31, 33, 35, 36, 40, 43, 45], "number": [0, 1, 2, 13, 21, 23, 25, 26, 27, 29, 31, 32, 33, 35, 36, 40, 42, 48, 50, 52, 53], "model_dir": [0, 9, 11, 15, 17, 25, 33], "locat": [0, 2, 21, 23, 25, 34, 35, 36, 45, 52], "access": [0, 2, 31, 33, 35, 36, 37, 40, 42, 44, 50, 51, 52], "through": [0, 9, 13, 23, 36, 41, 42, 53], "context": [0, 3, 4, 11, 12, 13, 14, 15, 17, 23, 24, 25, 29, 31, 32, 33, 35, 36, 40, 42, 53], "entri": [0, 9, 13, 23, 29, 33, 35, 45, 50], "point": [0, 9, 10, 13, 27, 29, 33, 35, 37, 40, 45], "snippet": [0, 25], "system_properti": [0, 9, 25], "get": [0, 1, 11, 13, 14, 18, 21, 22, 23, 25, 29, 31, 32, 33, 35, 40, 42, 45, 49, 51, 52], "cli": [0, 2, 32, 41], "633": 0, "both": [0, 1, 21, 23, 27, 29, 32, 35, 36, 40, 42, 43, 44, 50], "v2": [0, 14, 35], "signatur": [0, 11, 25, 36, 43, 45], "note": [0, 22, 23, 25, 26, 27, 29, 35, 36, 37, 40, 41, 42, 46, 48, 50], "For": [0, 21, 22, 23, 25, 26, 29, 31, 33, 34, 35, 36, 37, 40, 41, 42, 43, 45, 46, 50], "replac": [0, 14, 36, 42], "charact": [0, 13], "e": [0, 23, 31, 33, 34, 35, 36, 41, 45, 47, 48, 50, 53], "26": 0, "669": 0, "local": [0, 21, 22, 23, 32, 35, 43, 45, 48, 49, 50, 52, 53], "publicli": [0, 2], "uri": [0, 35, 52, 53], "work": [0, 21, 23, 25, 30, 35, 36, 38, 40, 41, 42, 48, 50], "veri": [0, 33], "same": [0, 23, 25, 29, 33, 35, 36, 40, 41, 43, 50], "made": [0, 13, 34, 35, 36, 45, 50], "public": [0, 50, 53], "consol": 0, "instead": [0, 16, 36], "few": [0, 42, 48, 53], "reason": [0, 23, 42], "overhead": 0, "someth": 0, "dramat": [0, 42], "larger": [0, 31, 41, 42], "launch": [0, 21, 53], "control": [0, 27, 32, 40, 45], "dep": [0, 2], "per": [0, 2, 13, 33, 36, 40, 42, 43, 50], "intend": 0, "develop": [0, 1, 27, 50], "should": [0, 13, 16, 21, 23, 25, 27, 28, 36, 40, 41, 42, 43, 48, 50], "pre": [0, 1, 11, 13, 25, 32, 33, 35, 39, 41, 42, 50], "compress": 0, "decompress": 0, "becaus": [0, 21, 35, 36], "histor": 0, "came": 0, "involv": [0, 40, 41], "unload": 0, "ton": 0, "bucket": [0, 35, 41], "But": [0, 40], "user": [0, 13, 18, 23, 25, 29, 31, 32, 33, 35, 36, 42, 45, 46, 48, 50, 53], "smaller": [0, 40], "choos": [0, 35], "good": [0, 46], "bet": 0, "perform": [1, 16, 23, 24, 27, 30, 33, 34, 40, 41], "flexibl": [1, 11, 30, 33], "easi": [1, 27, 30, 45], "us": [1, 9, 10, 11, 13, 14, 16, 18, 23, 24, 25, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 52, 53], "eager": [1, 13, 18, 25, 38, 39], "mode": [1, 13, 25, 36, 37, 39, 40], "torchscript": [1, 13, 38, 39, 42, 50], "model": [1, 3, 9, 10, 11, 12, 13, 14, 15, 17, 18, 20, 24, 26, 29, 30, 31, 32, 34, 36, 37, 40, 42, 43, 47, 48, 49, 51, 52], "quick": [1, 30, 42], "server": [1, 9, 10, 20, 21, 22, 23, 24, 25, 27, 31, 32, 34, 42, 43, 45, 46, 48, 49, 50, 51], "usag": [1, 10, 25, 32, 36, 42, 45, 50], "tutori": [1, 33], "archiv": [1, 23, 26, 32, 33, 35, 36, 38, 39, 42, 45, 47, 48, 49, 50, 52, 53], "show": [1, 11, 21, 27, 35, 38, 40, 41, 45], "how": [1, 21, 23, 25, 27, 30, 32, 34, 36, 40, 41, 42, 45, 46], "packag": [1, 3, 4, 21, 24, 25, 27, 32, 33, 39, 42, 48, 50, 53], "file": [1, 9, 13, 18, 21, 22, 25, 26, 27, 28, 29, 31, 34, 35, 37, 38, 42, 43, 46, 48, 50, 52], "procedur": [1, 50], "explain": [1, 25, 30, 31, 33, 34], "rest": [1, 20, 23, 24, 31, 33, 45, 50, 51, 53], "api": [1, 9, 11, 25, 32, 33, 38, 45, 46, 48, 50, 53], "specif": [1, 13, 14, 29, 31, 32, 33, 35, 40, 42, 44, 45, 50, 52], "endpoint": [1, 9, 13, 27, 31, 32, 36, 37, 45], "grpc": [1, 20, 24, 31, 35], "support": [1, 2, 10, 11, 13, 22, 23, 26, 27, 29, 31, 32, 33, 35, 36, 42, 43, 45, 46, 49, 51, 52, 53], "infer": [1, 9, 11, 12, 13, 20, 23, 24, 25, 27, 29, 30, 32, 34, 35, 36, 39, 42, 44, 45, 48, 50], "call": [1, 14, 16, 23, 25, 27, 29, 31, 32, 33, 34, 35, 36, 38, 42, 43, 44, 45, 46, 50, 51, 52], "health": [1, 11, 20, 24, 29, 33], "deploi": [1, 2, 30, 32, 40, 41, 43], "scale": [1, 2, 20, 24, 25, 27, 30, 33, 50, 52, 53], "log": [1, 2, 23, 24, 32, 37, 41, 46], "metric": [1, 3, 4, 9, 20, 24, 30, 32, 34, 35, 44, 48], "prometheu": [1, 20, 24, 36], "grafana": [1, 20, 24], "dashboard": [1, 37], "captum": [1, 13, 14, 31, 39, 47], "explan": [1, 13, 14, 20, 24, 30, 43], "built": [1, 21, 23, 42, 45], "text": [1, 2, 13, 23, 25, 26, 39], "imag": [1, 9, 13, 21, 23, 25, 26, 27, 31, 33, 36, 39, 42, 45, 50, 51], "batch": [1, 9, 10, 13, 14, 15, 23, 24, 25, 26, 30, 32, 33, 35, 36, 40, 41, 42, 53], "creat": [1, 2, 9, 10, 12, 21, 23, 27, 32, 33, 35, 37, 38, 40, 42, 50], "workflow": [1, 20, 23, 24, 27, 28, 29, 32, 33, 36, 39, 44, 45, 46, 47], "compos": [1, 13], "python": [1, 9, 10, 14, 20, 21, 24, 28, 33, 35, 43, 48, 49, 50, 53], "function": [1, 2, 10, 11, 13, 14, 16, 18, 21, 25, 31, 32, 35, 42, 53], "sequenti": 1, "parallel": [1, 33, 40], "pipelin": [1, 33, 41], "classifi": [1, 25, 26, 36, 39, 50], "take": [1, 9, 13, 16, 23, 25, 27, 32, 33, 35, 40, 42, 45], "an": [1, 2, 10, 11, 13, 15, 18, 21, 23, 27, 29, 31, 32, 33, 35, 36, 38, 40, 41, 42, 45, 48, 50], "return": [1, 9, 10, 11, 12, 13, 14, 15, 18, 27, 29, 31, 33, 35, 36, 37, 40, 45, 51, 52], "name": [1, 2, 9, 10, 13, 14, 18, 21, 23, 25, 26, 27, 28, 31, 32, 33, 34, 35, 36, 37, 42, 43, 45, 48, 50, 52, 53], "input": [1, 11, 13, 14, 16, 25, 26, 31, 33, 35, 39, 40, 41, 42, 43, 53], "classif": [1, 13, 25, 26, 39, 40], "base": [1, 3, 9, 10, 11, 13, 16, 17, 18, 23, 24, 25, 33, 37, 41, 42, 45, 50, 53], "vocabulari": [1, 13], "detector": [1, 25, 26, 50], "detect": [1, 13, 26, 39, 43, 45, 48, 50], "class": [1, 9, 10, 11, 13, 14, 15, 16, 17, 18, 26, 31, 33, 36, 43], "bound": [1, 13, 26], "respect": [1, 13, 26], "segment": [1, 13, 25, 26, 39, 50], "output": [1, 11, 13, 14, 23, 25, 26, 27, 31, 34, 35, 42, 53], "shape": [1, 13, 14, 26], "cl": [1, 26], "h": [1, 13, 26, 31, 45], "height": [1, 13, 26], "width": [1, 13, 26], "languag": [1, 43], "sentenc": 1, "can": [1, 15, 21, 23, 25, 26, 27, 29, 31, 32, 33, 35, 36, 37, 40, 41, 42, 44, 45, 46, 48, 50, 52, 53], "sequenc": [1, 29, 31, 33, 39, 40, 41], "token": [1, 13, 29, 31, 33, 39, 41], "q": 1, "answer": [1, 48], "multi": [1, 32, 40, 42, 43], "modal": 1, "framework": [1, 11, 21, 32, 42, 43], "build": [1, 21, 32, 42, 48, 50], "combin": [1, 36], "audio": [1, 42], "video": [1, 42], "dual": 1, "translat": [1, 14, 43], "train": [1, 16, 26, 27, 39, 41, 45, 50], "readi": [1, 21, 25, 27, 29, 31, 33, 35, 39, 43, 50], "mani": [1, 32, 40, 42, 43, 45, 46], "intern": [1, 11, 25, 33, 36], "integr": [1, 33, 42], "usecas": [1, 26], "describ": [1, 13, 20, 23, 24, 36, 41, 53], "test": [1, 3, 4, 15, 16, 21, 22, 24, 26, 27, 28, 32, 35, 36, 43, 48], "regress": [1, 28, 33], "befor": [1, 21, 23, 25, 26, 31, 35, 40, 41], "ship": 1, "them": [1, 10, 16, 32, 35, 36, 40, 41, 45, 50], "product": [1, 30, 32, 42, 45, 50], "custom": [1, 9, 10, 13, 21, 24, 32, 33, 35, 37, 38, 42], "encrypt": [1, 20, 24], "s3": [1, 23, 35], "side": [1, 20, 24, 31, 35, 44], "km": [1, 35, 36], "serial": [1, 25, 27, 38, 40, 42, 50], "aw": [1, 23, 30, 35, 40], "dynamo": 1, "db": 1, "benchmark": [1, 27, 32], "profil": [1, 48], "jmeter": 1, "apach": 1, "bench": 1, "itself": [1, 40], "kubernet": [1, 31, 32], "demonstr": [1, 25, 50], "deploy": [1, 33, 40, 43, 50], "azur": 1, "googl": [1, 29, 32, 33, 43], "mlflow": 1, "kubeflow": 1, "vertex": [1, 30], "ai": [1, 30, 32, 33, 43], "nvidia": [1, 24, 41, 42, 48, 49, 50], "mp": [1, 24, 42], "optim": [1, 21, 30, 33], "worker": [1, 9, 10, 20, 21, 24, 25, 27, 29, 31, 32, 33, 34, 36, 40, 42, 46, 50, 53], "singl": [1, 10, 11, 40, 45], "section": [2, 21, 27, 33, 36, 40, 53], "common": [2, 28, 42], "face": [2, 21], "correspond": [2, 27, 36, 43, 50], "usual": [2, 33, 48], "some": [2, 9, 25, 26, 27, 28, 34, 35, 36, 40, 41, 42, 48, 50], "other": [2, 25, 32, 33, 35, 36, 40, 41, 50, 52, 53], "applic": [2, 23, 25, 29, 31, 35, 36, 41, 42, 50], "verifi": [2, 21, 33], "ss": 2, "ntl": 2, "grep": 2, "two": [2, 21, 23, 25, 34, 35, 36, 38, 40, 44, 50], "wai": [2, 23, 25, 33, 34, 38, 45, 46], "kill": 2, "run": [2, 9, 11, 13, 16, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 41, 42, 46, 48, 50], "differ": [2, 10, 11, 23, 25, 29, 32, 33, 35, 36, 40, 41, 42, 43, 45, 50], "than": [2, 23, 31, 36, 41], "md": [2, 22, 50, 53], "542": 2, "occur": 2, "17": [2, 28, 48, 49], "older": 2, "default": [2, 10, 13, 14, 20, 23, 24, 29, 31, 32, 33, 34, 37, 40, 42, 44, 45, 46, 50, 51, 52, 53], "max": [2, 21, 23, 33, 34, 42, 53], "size": [2, 9, 10, 13, 21, 23, 25, 26, 32, 33, 34, 35, 39, 40, 41, 42, 53], "respons": [2, 12, 13, 14, 23, 25, 29, 31, 32, 35, 36, 50, 51], "roughli": 2, "6": [2, 26, 28, 33, 36, 47], "5": [2, 13, 14, 26, 30, 33, 34, 35, 36, 37, 47], "mb": [2, 10, 34, 36, 39], "henc": [2, 27], "ani": [2, 25, 27, 29, 31, 32, 33, 34, 35, 36, 50], "greater": [2, 36], "5mb": 2, "cannot": 2, "upload": [2, 28], "updat": [2, 10, 21, 26, 27, 30, 33, 35, 41, 50, 53], "max_request_s": [2, 23], "max_response_s": [2, 23], "properti": [2, 9, 25, 32, 36, 37, 42, 43, 45, 48, 50], "cat": [2, 21, 31], "model_stor": [2, 21, 23, 27, 35, 36, 40, 45, 50], "ts": [2, 3, 4, 21, 23, 24, 25, 26, 29, 31, 32, 33, 34, 35, 36, 43, 45, 46, 50], "path": [2, 9, 21, 22, 23, 25, 27, 34, 35, 36, 38, 42, 45, 48, 49, 50, 52], "set": [2, 9, 14, 20, 21, 23, 24, 29, 31, 33, 36, 37, 40, 42, 43, 45, 46, 51, 52], "335": 2, "enabl": [2, 21, 25, 33, 36, 37, 40, 42, 44, 50], "nc": [2, 27, 36, 45, 50], "383": 2, "512": [2, 33, 41], "last": [2, 29, 31, 33, 46], "restor": [2, 46], "state": [2, 31, 32, 45, 46], "thrown": 2, "inconsist": 2, "compar": [2, 40, 42], "remov": [2, 13, 48], "log_loc": [2, 34, 46], "system": [2, 10, 11, 23, 25, 29, 32, 35, 36, 42, 43, 46, 48, 50, 52], "export": [2, 9, 23, 25, 27, 35, 42, 49], "desir": [2, 25, 36], "extract": 2, "654": 2, "give": [2, 31], "clear": 2, "messag": [2, 9, 11, 12, 18, 31, 34, 45], "try": [2, 13, 35, 41, 42, 45], "conflict": 2, "exist": [2, 10, 23, 26, 29, 31, 32, 33, 36], "500": [2, 18, 31, 36], "wa": [2, 21, 31, 32, 34, 36, 41], "download": [2, 27, 33, 35, 37, 39, 48, 52], "whether": [2, 35], "url": [2, 9, 21, 23, 35, 40, 45, 50, 52, 53], "In": [2, 21, 23, 25, 27, 33, 34, 36, 40, 41, 42, 45, 50], "spawn": [2, 42], "up": [2, 11, 21, 23, 25, 27, 29, 32, 33, 35, 40, 41, 42, 45, 50, 52], "increas": [2, 25, 35, 40, 41, 42, 50], "curl": [2, 20, 21, 24, 27, 28, 35, 37, 48, 50, 52], "x": [2, 21, 23, 28, 31, 35, 50, 52], "localhost": [2, 21, 23, 31, 35, 37, 44, 50, 51, 52], "model_nam": [2, 9, 10, 11, 14, 15, 17, 25, 31, 33, 35, 36, 37, 45, 50, 53], "like": [2, 10, 13, 14, 23, 27, 30, 31, 32, 33, 35, 36, 41, 42, 45], "egg": [2, 47], "json": [2, 3, 9, 10, 13, 18, 22, 23, 24, 27, 31, 32, 33, 35, 36, 40, 42, 43, 50, 51, 52, 53], "etc": [2, 25, 32, 41, 50], "mai": [2, 23, 25, 48], "write": [2, 23, 26, 43], "handler": [2, 9, 13, 14, 15, 23, 24, 29, 31, 32, 33, 35, 38, 42, 43, 50], "566": 2, "waveglow": [2, 25], "speech": [2, 25], "synthes": [2, 25], "creation": [2, 35], "mostli": [2, 42], "initi": [2, 11, 13, 17, 23, 25, 33, 35, 36, 38, 42, 50], "due": [2, 40], "erron": 2, "s": [2, 9, 10, 11, 18, 23, 24, 25, 26, 27, 30, 31, 33, 35, 37, 40, 41, 42, 45, 46, 50, 52, 53], "observ": 2, "miss": [2, 13, 25], "modul": [2, 3, 4, 24, 33, 48], "667": 2, "537": 2, "subpackag": [3, 4, 24], "submodul": [3, 4, 24], "dimens": [3, 4, 9, 24, 42], "metric_collector": [3, 4, 9, 24], "metric_encod": [3, 4, 9, 24], "metrics_stor": [3, 4, 9, 24], "process_memory_metr": [3, 4, 9, 24], "system_metr": [3, 4, 9, 24], "unit": [3, 4, 9, 15, 24, 26, 35, 36, 37], "model_servic": [3, 4, 9, 24], "protocol": [3, 4, 9, 14, 23, 24, 29, 31, 33, 35, 52], "otf_message_handl": [3, 4, 9, 24, 29, 31, 33], "torch_handl": [3, 4, 9, 24, 25, 26, 33], "request_envelop": [3, 9, 13, 24], "bodi": [3, 9, 13, 24, 25, 43], "kserv": [3, 9, 13, 20, 24, 35, 43], "kservev2": [3, 9, 13, 24], "unit_test": [3, 9, 13, 22, 24], "base_model": [3, 13, 15, 24], "test_util": [3, 13, 15, 24, 31, 33], "mock_context": [3, 13, 15, 24], "test_base_handl": [3, 9, 13, 24], "test_envelop": [3, 9, 13, 24], "test_image_classifi": [3, 9, 13, 24], "test_image_segment": [3, 9, 13, 24], "test_mnist_kf": [3, 9, 13, 24], "test_object_detector": [3, 9, 13, 24], "base_handl": [3, 4, 9, 24, 25, 42], "contract": [3, 4, 9, 24], "densenet_handl": [3, 4, 9, 24], "image_classifi": [3, 4, 9, 21, 24, 25, 27, 29, 31], "image_segment": [3, 4, 9, 24], "object_detector": [3, 4, 9, 24], "text_classifi": [3, 4, 9, 21, 24], "text_handl": [3, 4, 9, 24], "vision_handl": [3, 4, 9, 24], "timeit_decor": [3, 4, 9, 24], "arg_pars": [3, 4, 24], "model_load": [3, 4, 24], "model_serv": [3, 4, 24], "model_service_work": [3, 4, 24], "run_circleci_test": [3, 4, 24], "setup": [3, 4, 23, 24, 25, 28, 32, 33, 35, 37, 50], "regression_test": [3, 4, 24], "torchserve_san": [3, 4, 22, 24], "ts_script": [3, 4, 22, 24, 27, 28, 29, 32, 48, 49], "api_util": [3, 4, 24], "backend_util": [3, 4, 24], "frontend_util": [3, 4, 24], "install_depend": [3, 4, 22, 24, 27, 28, 48], "install_from_src": [3, 4, 24, 48, 49], "marsgen": [3, 4, 24], "modelarchiver_util": [3, 4, 24], "print_env_info": [3, 4, 24], "regression_util": [3, 4, 24], "sanity_util": [3, 4, 24], "shell_util": [3, 4, 24], "torchserve_grpc_cli": [3, 4, 24, 27, 29], "tsutil": [3, 4, 24], "validate_model_on_gpu": [3, 4, 24], "workflow_archiver_util": [3, 4, 24], "pars": [9, 13, 18, 52], "torchserv": [9, 13, 14, 17, 18, 20, 22, 24, 25, 31, 35, 37, 39, 43, 47, 51, 52], "argpars": 9, "sourc": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 23, 27, 33], "parser": [9, 32], "todo": 9, "add": [9, 10, 26, 29, 32, 33, 34, 35, 43, 48, 50], "static": [9, 23, 41], "extract_arg": 9, "arg": [9, 13, 16, 25, 32, 33, 35], "none": [9, 10, 11, 12, 13, 17, 18, 23, 25, 31, 33, 35, 36], "model_service_worker_arg": 9, "backend": [9, 18, 21, 25, 27, 29, 30, 31, 33, 34, 35, 37], "socket": [9, 12, 32, 42], "ts_parser": 9, "incom": [9, 21, 25, 32], "request": [9, 11, 13, 14, 21, 23, 24, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37, 39, 40, 45, 50, 53], "manifest": [9, 11, 25, 32, 35, 42, 45, 52], "batch_siz": [9, 21, 35, 42, 53], "mms_version": 9, "limit_max_image_pixel": [9, 23], "model_yaml_config": [9, 23, 33], "inform": [9, 13, 21, 23, 25, 27, 32, 33, 34, 35, 36, 37, 41, 42], "fix": [9, 41, 43], "time": [9, 10, 18, 21, 23, 25, 27, 31, 32, 33, 34, 35, 45, 46, 53], "get_all_request_head": 9, "idx": [9, 10, 17, 36], "int": [9, 10, 13, 36, 53], "dict": [9, 11, 13, 25, 32, 35, 36, 53], "str": [9, 10, 13, 18, 25, 31, 33, 36], "get_request_head": [9, 17, 35], "kei": [9, 10, 14, 23, 35, 36, 43, 50], "get_request_id": 9, "get_response_content_typ": 9, "get_response_head": 9, "get_response_statu": 9, "tupl": 9, "request_processor": 9, "set_all_response_statu": 9, "200": [9, 29, 31, 33, 34, 35, 36], "phrase": 9, "statu": [9, 21, 29, 31, 33, 35, 36, 52], "individu": 9, "param": [9, 10, 13, 18, 25, 27, 50, 53], "set_response_content_typ": 9, "set_response_head": 9, "set_response_statu": 9, "index": [9, 10, 26, 36, 50], "sent": [9, 11, 23, 25, 29, 31, 33], "handl": [9, 11, 13, 14, 21, 23, 26, 29, 31, 32, 33, 35, 42, 45], "requestprocessor": 9, "request_head": 9, "processor": [9, 23], "add_response_properti": 9, "get_request_properti": 9, "get_response_status_cod": 9, "get_response_status_phras": 9, "report_statu": 9, "reason_phras": 9, "loader": [9, 32, 42], "modelload": 9, "abstract": [9, 11, 14, 25], "gpu_id": [9, 17, 25], "envelop": [9, 14, 24, 32], "bool": [9, 16, 18], "from": [9, 11, 12, 13, 14, 21, 22, 23, 26, 29, 30, 31, 32, 33, 34, 35, 37, 38, 40, 41, 42, 43, 44, 45, 46, 50, 51, 52], "paramet": [9, 10, 11, 12, 13, 14, 18, 25, 33, 34, 35, 36, 45, 46, 50, 52, 53], "modelloaderfactori": 9, "get_model_load": 9, "tsmodelload": 9, "1": [9, 14, 21, 23, 25, 26, 27, 31, 34, 35, 36, 37, 39, 40, 42, 45, 50, 52, 53], "metrics_cach": 9, "metricscacheyamlimpl": 9, "load_properti": 9, "file_path": 9, "read": [9, 21, 23, 25], "map": [9, 10, 13, 18, 26, 33, 53], "modelservicework": 9, "mm": [9, 11], "front": [9, 10], "end": [9, 10, 36, 38, 41], "commun": [9, 39], "binari": [9, 32], "torchmodelservicework": 9, "s_type": 9, "s_name": 9, "host_addr": 9, "port_num": 9, "metrics_config": [9, 36], "handle_connect": 9, "cl_socket": 9, "connect": [9, 23, 32, 35, 40], "load_model": [9, 21, 23], "load_model_request": 9, "expect": [9, 14, 21, 23, 26, 31, 40, 42, 43, 50], "modelpath": 9, "modelnam": [9, 21, 23, 35, 36, 37, 52], "cpu": [9, 10, 22, 25, 27, 28, 30, 36, 39, 41, 42], "els": [9, 13, 25, 35, 36], "wrapper": [9, 11], "unwrapp": 9, "batchsiz": [9, 21, 23, 29, 33, 35, 52], "limitmaximagepixel": 9, "limit": [9, 33, 35, 40, 42, 49, 50, 52], "pillow": [9, 41], "max_image_pixel": 9, "run_serv": 9, "process": [9, 10, 11, 13, 21, 23, 25, 27, 30, 32, 33, 35, 38, 40, 41, 42], "listen": [9, 27, 29, 31, 32, 35, 37, 44, 51, 52], "customservic": 9, "definit": [9, 13, 18, 36], "entry_point": 9, "predict": [9, 13, 14, 20, 21, 23, 24, 26, 29, 33, 34, 35, 36, 45, 50, 53], "request_input": 9, "retrieve_data_for_infer": 9, "requestid": 9, "111": [9, 14], "222": 9, "3333": 9, "contenttyp": 9, "val1": 9, "set_cl_socket": 9, "emit_metr": [9, 36], "emit": [9, 36], "dictionari": [9, 10, 13, 14, 18, 25, 35], "metric_nam": [9, 36], "when": [9, 10, 13, 21, 23, 25, 27, 28, 29, 30, 31, 33, 34, 36, 37, 40, 41, 42, 43, 44, 46], "doe": [9, 13, 29, 33, 36, 38, 42, 46], "follow": [9, 13, 14, 21, 23, 25, 26, 27, 28, 29, 31, 33, 34, 35, 36, 40, 42, 46, 48, 50, 51, 52, 53], "c": [9, 23, 27, 30, 32, 34, 40, 44, 45, 48], "expos": [9, 23, 42], "standard": [9, 25, 36, 43], "ping": [9, 11, 21, 29, 31, 33], "descript": [9, 20, 24, 26, 33, 45, 53], "d": [9, 13, 34, 35, 40, 43, 45], "wait": [9, 21, 33, 35, 40, 53], "to_dict": 10, "request_id": [10, 29, 31, 33, 36], "metric_method": 10, "gener": [10, 21, 22, 23, 25, 27, 29, 31, 33, 35, 39, 40, 42, 44, 46], "print": [10, 34, 35], "stdout": [10, 27, 34], "reset": [10, 35], "order": [10, 23, 36, 40, 41, 43, 50], "float": [10, 36], "dump": [10, 34], "metricencod": 10, "skipkei": 10, "fals": [10, 12, 21, 23, 25, 28, 31, 33, 35, 37], "ensure_ascii": 10, "check_circular": 10, "allow_nan": 10, "sort_kei": 10, "indent": 10, "separ": [10, 23, 26, 36, 40], "jsonencod": 10, "obj": 10, "collect": [10, 23, 30, 34, 35, 36, 37, 41, 42, 45], "metricsstor": 10, "deprec": 10, "And": [10, 27, 32, 35, 42], "keep": [10, 32, 35], "add_count": [10, 36], "counter": [10, 37], "increment": [10, 36], "add_error": 10, "error": [10, 13, 23, 34], "add_metr": [10, 36], "add_perc": [10, 36], "percentag": 10, "add_siz": [10, 36], "kb": [10, 36, 39], "gb": [10, 36], "add_tim": [10, 35, 36], "ms": [10, 21, 34, 35, 36, 37, 53], "latenc": [10, 29, 31, 36, 40, 41, 42], "accept": [10, 23, 25, 33, 35, 36], "memori": [10, 21, 23, 33, 36, 40, 42], "pass": [10, 16, 17, 21, 23, 25, 33, 35, 38, 42, 43, 46, 52, 53], "pid": [10, 21, 32, 35], "gpuid": [10, 21], "check_process_mem_usag": 10, "stdin": 10, "mem_util": 10, "get_cpu_usag": 10, "psutil": [10, 32, 49], "collect_al": 10, "mod": 10, "num_of_gpu": 10, "cpu_util": 10, "disk_avail": 10, "disk_us": 10, "disk_util": 10, "gpu_util": 10, "memory_avail": 10, "memory_us": 10, "memory_util": 10, "element": 10, "modelservic": 11, "wrap": [11, 45], "preprocess": [11, 13, 25, 33, 34, 35, 43, 52, 53], "manner": 11, "easili": [11, 44], "backward": [11, 32, 42], "raw": [11, 13, 14, 21, 25, 27, 31, 49, 51], "back": [11, 25, 33], "client": [11, 20, 24, 31, 32, 33, 35, 40, 44], "healthi": [11, 21, 31], "singlenodeservic": 11, "singlenodemodel": 11, "otf": 12, "create_load_model_respons": 12, "create_predict_respons": 12, "ret": 12, "req_id_map": 12, "ts_stream_next": 12, "encode_response_head": 12, "resp_hdr_map": 12, "retrieve_msg": 12, "conn": 12, "retriev": [12, 36], "channel": [12, 23], "send_intermediate_predict_respons": [12, 29, 31, 33], "state_dict": 13, "basehandl": [13, 15, 16, 18, 26, 35, 36, 38, 42], "abc": [13, 14, 33, 36], "describe_handl": [13, 35], "explain_handl": [13, 25, 35], "data_preprocess": [13, 25, 35], "raw_data": [13, 25], "tensor": [13, 25, 33, 41, 53], "unprocess": [13, 25], "target": [13, 21, 25, 37, 41, 50], "outcom": [13, 35, 50], "contain": [13, 23, 25, 26, 33, 35, 38, 43, 45, 50], "pertain": [13, 34, 35], "artefact": 13, "kwarg": [13, 16, 25], "pt": [13, 16, 17, 25, 38, 50], "first": [13, 21, 23, 25, 27, 28, 30, 40, 41, 46], "rais": [13, 25], "runtimeerror": [13, 25], "setup_ort_sess": 13, "model_pt_path": [13, 25, 42], "map_loc": 13, "densenethandl": 13, "match": [13, 25, 36], "list_classes_from_modul": [13, 18], "parent_class": [13, 18], "imageclassifi": [13, 15, 25], "visionhandl": 13, "get_max_result_class": 13, "image_process": 13, "resiz": 13, "256": 13, "interpol": 13, "bilinear": 13, "max_siz": 13, "antialia": 13, "warn": [13, 34], "centercrop": 13, "224": 13, "totensor": 13, "normal": 13, "mean": [13, 32, 42], "485": 13, "456": 13, "406": 13, "std": 13, "229": 13, "225": 13, "set_max_result_class": 13, "topk": 13, "imagesegment": [13, 15], "n": [13, 22, 23, 26, 29, 31, 33, 34, 35, 48], "k": [13, 21, 50], "objectdetector": [13, 15], "threshold": 13, "NOT": 13, "textclassifi": 13, "texthandl": 13, "get_insight": [13, 25], "text_preprocess": 13, "calcul": [13, 25, 33], "insight": [13, 25, 42], "word": 13, "import": [13, 25, 29, 31, 33, 35, 36, 42], "form": [13, 14, 23, 36, 42], "whose": [13, 25, 39], "ngram": 13, "2": [13, 14, 21, 23, 25, 26, 28, 30, 31, 33, 35, 36, 41, 50, 52, 53], "come": [13, 23, 32, 41, 42, 45], "output_explain": [13, 25], "hit": 13, "basic": [13, 15, 25, 33], "cleanup": 13, "oper": [13, 21, 36, 40, 41, 42], "html": [13, 22], "lowercas": 13, "expand": 13, "i": [13, 22, 23, 29, 30, 31, 33, 36, 38, 41, 48, 50], "don": [13, 32], "do": [13, 22, 23, 25, 26, 33, 34, 36, 38, 42, 50], "accent": 13, "punctuat": 13, "source_vocab": 13, "after": [13, 27, 28, 34, 35, 36, 41, 42, 45, 46, 48], "perfom": 13, "get_source_vocab_path": 13, "ctx": [13, 33, 36], "get_word_token": 13, "input_token": 13, "construct": 13, "necessari": [13, 27, 33], "summarize_attribut": 13, "attribut": [13, 25, 36, 42], "summaris": 13, "multipl": [13, 23, 31, 33, 38, 40, 42, 50], "vision": [13, 23, 41, 50], "tensor_data": 13, "requestenvelop": 14, "reformat": 14, "orchestr": [14, 43], "seldon": [14, 32, 43], "flat": [14, 43], "item": [14, 35, 41, 48, 52], "vice": 14, "versa": 14, "baseenvelop": 14, "handle_fn": [14, 15], "interfac": [14, 23, 48], "format_output": 14, "ar": [14, 21, 22, 23, 25, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 48, 50, 53], "parse_input": 14, "grab": 14, "bodyenvelop": 14, "structur": [14, 43, 53], "outlin": 14, "www": [14, 23, 33], "tensorflow": 14, "org": [14, 21, 23, 27, 31, 32, 33, 34, 35, 48], "tfx": 14, "api_rest": 14, "jsonenvelop": 14, "implement": [14, 25, 35, 41, 42, 53], "captur": [14, 34, 53], "insid": [14, 40], "kserveenvelop": 14, "readabl": 14, "kservev2envelop": 14, "fserv": 14, "id": [14, 18, 21, 23, 25, 35, 36, 50], "f0222600": 14, "353f": 14, "47df": 14, "8d9d": 14, "c96d96fa894": 14, "bert": [14, 25, 39, 40, 41], "model_vers": [14, 35, 36, 37], "datatyp": 14, "int64": 14, "37": 14, "66": 14, "108": [14, 36], "109": 14, "ensur": [15, 25, 36, 41, 48], "execut": [15, 18, 22, 23, 27, 28, 37, 40, 42, 48, 50, 53], "base_model_context": 15, "test_batch_handl": 15, "test_inference_with_profiler_works_with_custom_initialize_method": 15, "test_single_handl": 15, "test_binari": 15, "test_bodi": 15, "test_json": 15, "test_json_batch": 15, "test_json_double_batch": 15, "complex": 15, "make": [15, 21, 23, 25, 26, 27, 31, 32, 33, 35, 36, 40, 41, 42, 43, 45, 48, 50, 51], "sure": [15, 21, 25, 26, 27, 33, 35, 40, 41, 43, 48, 50], "mux": 15, "sever": [15, 32], "demux": 15, "result": [15, 25, 29, 31, 33, 40, 42, 51], "image_byt": 15, "tmp_path_factori": 15, "test_handl": 15, "test_handle_explain": 15, "simpl": [16, 27, 41], "feed": [16, 41, 53], "forward": [16, 25], "argmaxmodel": 16, "comput": [16, 27, 40], "everi": [16, 28, 36, 53], "subclass": 16, "although": 16, "recip": 16, "within": [16, 31, 35, 36, 45], "afterward": 16, "sinc": [16, 36, 41], "former": 16, "care": [16, 32], "regist": [16, 20, 21, 23, 24, 27, 29, 38, 42, 45, 50, 53], "hook": 16, "latter": 16, "silent": 16, "ignor": [16, 36, 46], "save_pt_fil": 16, "filepath": 16, "mock": 17, "ad": [17, 30, 32, 33, 36, 40, 42], "without": [17, 33, 35, 40, 45, 46], "mockcontext": 17, "model_pt_fil": 17, "tmp": [17, 50], "model_fil": 17, "mnist": [17, 25, 31, 39, 45, 50], "model_yaml_config_fil": 17, "replic": 17, "exp": 17, "timeit": 18, "decor": 18, "func": 18, "pt2backend": 18, "enum": [18, 32], "enumer": 18, "aot_cudagraph": 18, "aot_eag": 18, "aot_nvfus": 18, "fx2trt": 18, "inductor": 18, "ipex": [18, 42], "nvfuser": 18, "ofi": 18, "onnxrt": 18, "torchxla_trace_onc": 18, "except": [18, 21, 36, 40], "predictionexcept": [18, 25], "error_cod": 18, "check_valid_pt2_backend": 18, "get_yaml_config": 18, "yaml_file_path": 18, "load_label_map": 18, "mapping_file_path": 18, "friendli": [18, 26], "map_class_to_label": 18, "prob": 18, "lbl_class": 18, "probabl": [18, 26, 31], "stream": [20, 24, 31], "unregist": [20, 23, 24, 29, 53], "aggreg": [21, 32, 35, 36, 40], "send": [21, 23, 25, 27, 29, 31, 33], "ml": [21, 40], "dl": 21, "onc": [21, 29, 31, 33, 37, 42, 50], "design": [21, 23, 32], "nativ": [21, 23, 42], "host": [21, 23, 27, 28, 32, 33, 34, 36, 37, 44, 45, 50], "resourc": [21, 27, 29, 33, 35, 36, 40, 41, 48, 52], "most": [21, 23, 25, 31, 32, 34, 41, 42], "turn": 21, "reduc": [21, 41], "expens": [21, 41], "jump": 21, "what": [21, 30, 33, 42, 45], "max_batch_delai": [21, 35], "know": [21, 27, 41, 45], "maximum": [21, 23, 31, 35, 40, 52, 53], "fill": [21, 40], "each": [21, 23, 26, 29, 31, 33, 40, 42, 46, 53], "full": [21, 29, 31, 33, 35], "see": [21, 23, 25, 26, 27, 31, 33, 34, 35, 36, 37, 40, 42, 44, 45, 51, 52], "hug": 21, "4": [21, 23, 33, 35, 36, 37, 39, 47, 52, 53], "interest": [21, 42, 45], "delai": [21, 23, 34, 35, 53], "receiv": [21, 23, 29, 31, 33, 34, 36, 40, 53], "doesn": [21, 23, 35], "timer": 21, "ever": 21, "were": [21, 36, 39, 45], "let": [21, 23, 45], "mar": [21, 23, 25, 27, 31, 32, 33, 35, 36, 38, 39, 45, 50, 52, 53], "50": [21, 53], "millisecond": [21, 35, 36, 37, 53], "8081": [21, 23, 35, 44, 50, 52], "milli": 21, "second": [21, 23, 28, 35, 40, 42, 50], "defaultvers": [21, 23], "marnam": [21, 23], "minwork": [21, 23, 31, 33, 35, 52], "maxwork": [21, 23, 33, 35, 52], "maxbatchdelai": [21, 23, 33, 35, 52], "responsetimeout": [21, 23, 33], "120": [21, 23, 33, 35], "associ": [21, 36], "relat": [21, 27, 33, 50], "frontend": [21, 23, 27, 29, 31, 33, 48], "tri": [21, 38], "bring": [21, 30], "inferenc": [21, 32], "thing": [21, 42, 45], "includ": [21, 26, 36, 42, 43, 50, 53], "port": [21, 29, 31, 32, 35, 37, 44, 45, 48, 51, 52], "8080": [21, 23, 27, 31, 35, 44, 50, 51], "inference_address": [21, 23], "127": [21, 23, 27, 31, 34, 37, 50], "management_address": [21, 23], "have": [21, 23, 25, 27, 29, 31, 33, 35, 36, 37, 40, 41, 42, 45, 48, 50, 53], "go": [21, 30, 41, 43], "10m": 21, "mar_fil": [21, 31, 33, 35], "batch_v2": 21, "10": [21, 23, 34, 35, 36, 48, 49], "initial_work": [21, 35, 50], "properli": [21, 35], "modelvers": [21, 35], "modelurl": [21, 35], "loadedatstartup": [21, 35], "9000": [21, 34, 35, 37], "starttim": [21, 35, 36], "2021": 21, "06": 21, "14t23": 21, "18": [21, 31, 34, 40, 45], "21": [21, 36, 37], "793z": 21, "memoryusag": [21, 35], "1726554112": 21, "19946": 21, "gpuusag": [21, 35], "mib": 21, "678": 21, "ljo": 21, "kitten": [21, 27, 29, 39, 50], "jpg": [21, 27, 29, 31, 39, 50, 51], "tiger_cat": [21, 27], "5848360657691956": 21, "tabbi": [21, 27, 31], "3782736361026764": 21, "egyptian_cat": [21, 27], "03441936895251274": 21, "lynx": [21, 27], "0005633446853607893": 21, "quilt": 21, "0002698268508538604": 21, "about": [21, 23, 25, 27, 41, 42, 45], "pleas": [21, 26, 35, 39, 40, 42, 50], "5000": [21, 52, 53], "Then": [21, 27, 33, 34], "14t22": 21, "44": [21, 35], "36": 21, "742z": 21, "19116": 21, "similar": [21, 33, 35], "previou": [21, 34, 35, 36, 40, 52], "being": [21, 33, 36, 41, 50], "entrypoint": 21, "referenc": [21, 36], "metrics_address": [21, 23], "8082": [21, 23, 37], "number_of_netty_thread": [21, 23], "32": [21, 40], "job_queue_s": [21, 23], "1000": [21, 35, 36], "home": [21, 22, 23, 27, 40, 48, 49, 50], "100": [21, 23, 33, 34, 35, 52], "g": [21, 22, 23, 31, 33, 35, 41, 48, 50, 53], "cv": 21, "cu102": [21, 22, 27], "rm": [21, 50], "p": [21, 22, 50], "v": [21, 35, 45, 50], "ubuntu": [21, 28, 35, 40], "alreadi": [22, 27, 36, 41, 45, 53], "dev": [22, 28, 32, 48], "cu121": [22, 27], "cu118": [22, 27], "cu117": [22, 27, 28], "cu116": [22, 27, 28], "cu113": [22, 27], "cu111": [22, 27], "cu101": [22, 27], "cu92": [22, 27], "gradlew": [22, 32], "clean": [22, 23, 42], "checkstyl": 22, "findbug": 22, "pmd": 22, "ut": 22, "report": [22, 27, 42], "m": [22, 27, 29, 30, 34], "cov": 22, "htmlcov": 22, "pylint": 22, "rn": 22, "rcfile": 22, "pylintrc": 22, "cd": [22, 29, 48, 49], "htmlcov_ut": 22, "model_archiv": 22, "pip": [22, 27, 29, 33, 48, 49], "htmlcov_it": 22, "integ_test": 22, "abov": [22, 23, 25, 27, 34, 36, 39, 40, 48, 50, 53], "excut": 22, "npm": [22, 48], "linux": [22, 24, 28], "sudo": [22, 37, 40, 49], "apt": [22, 49], "y": 22, "nodej": [22, 48], "mac": 22, "brew": 22, "broken": 22, "directori": [22, 23, 25, 27, 45, 46, 50], "recurs": 22, "config": [22, 31, 32, 36, 37, 38, 40, 42, 43, 45, 46, 48, 50], "link_check_config": 22, "done": [22, 25, 36, 42], "suffici": 23, "want": [23, 25, 27, 34, 35, 36, 38, 40, 45, 50], "topic": [23, 30, 45], "avail": [23, 26, 27, 31, 33, 35, 36, 42, 45, 46, 48, 50], "three": 23, "prioriti": 23, "thei": [23, 33, 36, 45], "chang": [23, 25, 27, 31, 35, 36, 37, 46, 48, 51, 52, 53], "behavior": [23, 25, 32, 33, 45], "java": [23, 28, 44, 48], "pythonpath": [23, 35], "higher": [23, 40, 42], "ts_config_fil": 23, "log4j2": [23, 34, 36, 45], "xml": [23, 34, 36, 45], "foreground": 23, "disabl": [23, 34, 37, 45], "footprint": [23, 40], "vmarg": [23, 34], "adjust": [23, 29, 35], "fit": [23, 32, 33], "valid": 23, "present": [23, 25, 35, 36, 45, 48, 52], "model1": [23, 53], "model2": [23, 53], "disk": [23, 36, 41], "pathnam": 23, "authent": 23, "avoid": [23, 27, 42], "unauthor": 23, "bind": 23, "ip": 23, "8443": [23, 50], "privat": [23, 35], "network": [23, 25, 41], "172": 23, "16": [23, 34, 40], "7070": [23, 29], "7071": [23, 29], "grpc_inference_port": 23, "grpc_management_port": 23, "coupl": [23, 36], "443": 23, "whatev": 23, "traffic": 23, "must": [23, 25, 27, 35, 42, 52], "certif": 23, "keystor": 23, "password": 23, "pkcs12": 23, "pkcs8": 23, "openssl": 23, "x509": 23, "chain": 23, "keytool": 23, "storepass": 23, "own": [23, 25, 35, 36, 40, 42], "genkei": 23, "keyalg": 23, "rsa": 23, "alia": [23, 48], "p12": 23, "changeit": 23, "storetyp": 23, "3600": 23, "keysiz": 23, "2048": 23, "dname": 23, "cn": 23, "my_t": 23, "ou": 23, "o": [23, 27, 31, 41, 49, 51], "l": 23, "palo": 23, "alto": 23, "st": 23, "california": 23, "8444": [23, 50], "8445": [23, 50], "keystore_pass": 23, "keystore_typ": 23, "self": [23, 25, 28, 33, 35, 36, 38, 42], "sign": 23, "cert": 23, "req": 23, "dai": [23, 28], "365": [23, 37], "newkei": 23, "keyout": 23, "mykei": 23, "mycert": 23, "pem": 23, "private_key_fil": 23, "certificate_fil": 23, "addit": [23, 33, 35, 36, 40, 41, 42, 43, 45], "header": [23, 31, 33], "tell": [23, 31, 34, 42, 45], "browser": [23, 37], "web": [23, 44, 45], "domain": 23, "permiss": 23, "select": [23, 25, 40], "cors_allowed_origin": 23, "yourdomain": 23, "preflight": 23, "cors_allowed_method": 23, "put": [23, 25, 32, 33, 35, 50], "cors_allowed_head": 23, "xx": 23, "maxdirectmemorys": 23, "affect": [23, 41], "prefer_direct_buff": 23, "depend": [23, 27, 28, 29, 32, 33, 36, 40, 42, 48, 49], "part": [23, 25, 30, 32, 35, 52], "seamless": [23, 25], "install_py_dep_per_model": [23, 50], "tar": 23, "gz": [23, 34], "might": [23, 27, 34, 35, 41, 45], "sensit": 23, "credenti": [23, 35], "arbitrari": 23, "secur": 23, "risk": 23, "blacklist_env_var": 23, "regular": [23, 36, 42], "express": 23, "filter": 23, "number_of_gpu": [23, 42, 50], "devic": [23, 25, 26, 33, 40, 50], "pci": 23, "bu": 23, "enable_metrics_api": [23, 37], "parametername1": 23, "parametervalue1": 23, "parametername2": 23, "parametervalue2": 23, "parameternamen": 23, "parametervaluen": 23, "minimum": [23, 35, 53], "msec": 23, "timeout": [23, 32, 33, 35, 53], "sec": 23, "over": [23, 29, 31, 33, 40, 41, 50, 53], "default_response_timeout": 23, "noop": [23, 35], "vgg16": [23, 39, 45], "yaml": [23, 31, 38, 42, 52, 53], "embed": [23, 25], "distinct": 23, "determin": [23, 32, 34, 40, 42, 50], "final": [23, 36, 40], "lowest": 23, "highest": [23, 31, 41], "fulli": [23, 40], "pippi": 23, "rpc": [23, 29, 33], "alloc": [23, 27, 40], "deviceid": [23, 33, 36], "round": [23, 25, 33, 35, 36], "robin": [23, 25, 33], "strategi": [23, 28], "assign": [23, 32, 33, 35], "otherwis": [23, 26, 33], "tune": [23, 30, 42], "impact": [23, 41, 42], "scalabl": 23, "throughput": [23, 27, 33, 34, 40, 41, 42], "enable_envvars_config": 23, "thread": [23, 32, 33, 36, 42], "child": 23, "eventloopgroup": 23, "group": 23, "eventloop": [23, 32], "event": 23, "logic": [23, 33, 42, 45], "netty_client_thread": 23, "workerthread": [23, 32], "default_workers_per_model": 23, "job": [23, 28, 30, 32, 36], "queue": [23, 33, 36, 40], "async_log": [23, 34], "asynchron": [23, 35], "deem": [23, 35], "unrespons": [23, 35], "reboot": [23, 35], "unregister_model_timeout": 23, "decode_input_request": 23, "decod": [23, 31, 33, 35, 41], "known": [23, 25, 41, 46, 49], "bytearrai": 23, "convers": [23, 42], "initial_worker_port": 23, "auto": [23, 33], "model_server_hom": 23, "6553500": 23, "pil": 23, "pixel": 23, "larg": [23, 24, 30, 38, 41], "payload": 23, "allowed_url": 23, "comma": 23, "regex": 23, "amazonaw": 23, "use_env_allowed_url": 23, "workflow_stor": [23, 36, 45, 52], "disable_system_metr": 23, "ts_": 23, "property_nam": 23, "ts_inference_address": 23, "troubleshoot": [24, 30], "coverag": 24, "advanc": [24, 33], "window": [24, 31, 46], "subsystem": 24, "wsl": 24, "faq": [24, 30], "invok": [25, 36, 45, 50], "Is": [25, 38], "dir": [25, 33, 50], "shown": [25, 33, 36, 40, 41, 42], "ll": [25, 27, 42], "act": 25, "def": [25, 29, 31, 33, 35, 36, 53], "entry_point_function_nam": 25, "sampl": [25, 27, 36, 39], "jit": [25, 32, 42], "similarli": 25, "global": [25, 37, 53], "is_avail": 25, "serialized_fil": 25, "serializedfil": 25, "os": [25, 28, 35], "join": [25, 31, 33], "isfil": 25, "engag": 25, "ask": [25, 30, 36], "startup": [25, 32, 46], "down": [25, 46], "against": [25, 34], "typic": 25, "modelhandl": [25, 33], "__init__": [25, 33], "_context": 25, "prediciton": 25, "pred_out": 25, "unexpect": 25, "513": 25, "nonetheless": 25, "below": [25, 26, 27, 33, 36, 37, 38, 40], "init": [25, 33], "pattern": [25, 34], "maintain": [25, 35], "model_handl": 25, "preprocessed_data": 25, "model_input": 25, "ndarrai": 25, "model_output": 25, "inference_output": 25, "postprocess_output": 25, "achiev": [25, 31, 33, 41], "place": [25, 50], "written": [25, 43], "hi": 25, "algorithm": [25, 32, 33], "lig": 25, "layerintegratedgradi": 25, "captum_sequence_forward": 25, "_is_explain": [25, 35], "so": [25, 27, 33, 36, 48], "neccessari": 25, "logger": [25, 35], "info": [25, 34, 35], "row": 25, "isinst": 25, "statement": 25, "default_handler_nam": 25, "defaulthandlerclass": 25, "customimageclassifi": 25, "procsess": 25, "goe": 25, "digit": [25, 39, 45], "identifi": 25, "model_version_numb": 25, "path_to_model_architecture_fil": 25, "path_to_state_dict_fil": 25, "comma_seperarted_additional_fil": 25, "python3": 25, "skip": 25, "waveglow_synthes": 25, "waveglow_model": 25, "nvidia_waveglowpyt_fp32_20190306": 25, "pth": [25, 27, 50], "waveglow_handl": 25, "tacotron": 25, "nvidia_tacotron2pyt_fp32_20190306": 25, "vcpu": [25, 27], "fashion": [25, 40], "consum": 26, "imagenet": [26, 39], "dataset": [26, 39], "rgb": 26, "top": [26, 43], "torchvis": [26, 41, 47, 49], "ag": 26, "comprehens": 26, "page": [26, 33, 35, 39, 42, 45, 52], "automat": [26, 27, 29, 33, 43, 50], "numer": 26, "simpli": [26, 33, 43], "welcom": 26, "isn": 26, "cover": [26, 34, 42, 45], "model_packag": 26, "alwai": [26, 41, 43], "saniti": 26, "submit": [26, 32, 39], "conda": [27, 32], "12": 27, "9": [27, 39], "complet": [27, 29, 31, 33, 34, 35, 41, 50], "clone": [27, 29, 48, 49], "repositori": 27, "git": [27, 29, 47, 48, 49], "parent": 27, "root": 27, "my_path": 27, "mkdir": [27, 29, 50], "wget": [27, 49], "densenet161": [27, 29, 39], "8d451a50": 27, "repo": [27, 29], "densenet_161": 27, "index_to_nam": [27, 50], "equal": [27, 31, 33, 36], "power": 27, "lot": [27, 45], "autosc": 27, "consider": 27, "minim": [27, 33, 37], "move": [27, 33, 50], "later": 27, "finer": 27, "grain": 27, "particular": 27, "u": [27, 29], "grpcio": [27, 29], "protobuf": [27, 29, 33], "proto": [27, 29], "grpc_tool": [27, 29], "protoc": [27, 29], "proto_path": [27, 29], "src": [27, 29, 32, 34, 36, 48], "python_out": [27, 29], "grpc_python_out": [27, 29], "cute": 27, "githubusercont": [27, 31, 49, 51], "kitten_smal": [27, 31, 39, 51], "46933549642562866": 27, "4633878469467163": 27, "06456148624420166": 27, "0012828214094042778": 27, "plastic_bag": 27, "00023323034110944718": 27, "interact": [27, 32], "ve": [27, 42], "seen": 27, "deep": [27, 33, 42, 45], "learn": [27, 30, 33, 42, 45], "registr": [27, 32, 35, 50, 52, 53], "record": 27, "high": [27, 29, 31, 33, 34, 41, 42, 45], "level": [27, 34, 36, 37, 40, 41, 42, 45, 53], "percentil": 27, "precis": 27, "visual": [27, 48], "debugg": 27, "under": [28, 35, 42, 43, 48], "trigger": 28, "manual": [28, 36], "workflow_dispatch": 28, "push": 28, "branch": 28, "pull": [28, 32, 39], "pull_request": 28, "nightli": [28, 32], "15am": 28, "schedul": [28, 40, 42], "cron": 28, "15": [28, 34, 37, 47], "02": [28, 35], "everyth": 28, "align": 28, "machin": [28, 30], "20": [28, 35, 37, 41], "04": 28, "ci": [28, 32], "term": [28, 40], "matrix": 28, "maco": [28, 35], "fail": [28, 48, 53], "fast": [28, 42], "indic": [28, 50], "One": [28, 36], "11": [28, 40], "7": [28, 33, 39], "v3": 28, "architectur": [28, 30], "x84": 28, "zulu": 28, "shell": 28, "codecov": 28, "io": [28, 41], "chmod": 28, "streampredict": [29, 33], "registermodel": [29, 35], "unregistermodel": [29, 35], "free": [29, 35, 42, 52], "scalework": [29, 35], "dynam": [29, 30, 32, 33, 35, 36, 41], "better": [29, 33, 35, 40, 42], "listmodel": [29, 35], "queri": [29, 35, 37, 52], "describemodel": [29, 35], "setdefault": [29, 35], "stub": 29, "intermedi": [29, 31, 33], "llm": [29, 31, 33], "until": [29, 31, 33, 35], "forc": [29, 33], "inferenceapisservic": [29, 31, 33], "empti": [29, 33], "torchservehealthrespons": [29, 33], "predictionsrequest": [29, 33], "predictionrespons": [29, 33], "style": [29, 31, 33], "rang": [29, 31, 33, 36], "intermediate_respons": [29, 31, 33], "success": [29, 31, 33, 34, 46], "hello": [29, 31, 33], "world": [29, 31, 33], "anim": [30, 42], "draw": [30, 42], "studi": [30, 42], "walmart": 30, "search": 30, "grok": 30, "intel": [30, 41], "principl": [30, 41], "amazon": [30, 36, 37], "inferentia": 30, "sagemak": [30, 32], "children": 30, "life": 30, "evolut": 30, "cresta": 30, "migrat": [30, 36], "quantit": 30, "comparison": 30, "platform": [30, 32, 43], "indepth": 30, "why": [30, 45], "best": [30, 42], "practic": 30, "improv": [30, 40, 41, 42], "perfrom": 30, "view": [30, 31, 35, 37, 45], "torcherv": 30, "frequent": 30, "question": 30, "swagger": [31, 35, 44], "codegen": [31, 35, 44], "modelserv": [31, 35], "maxretrytimeoutinsec": 31, "5min": 31, "recov": 31, "dead": 31, "activ": [31, 33, 39, 40, 41, 46], "unhealthi": 31, "less": 31, "resnet": [31, 34, 39, 45], "f": [31, 50], "squeezenet1_1": [31, 35], "dog": [31, 39, 45], "re": [31, 42, 43, 45, 48, 53], "open": [31, 32, 33, 42], "rb": 31, "n02123045": 31, "42514491081237793": 31, "chunk": 31, "test_echo_stream_infer": [31, 33], "start_torchserv": [31, 33], "no_config_snapshot": [31, 33], "gen_mar": [31, 33], "register_model": [31, 33], "echo_stream": [31, 33], "tf_inference_api": [31, 33], "foo": [31, 33], "assert": [31, 33], "transfer": [31, 33], "iter_cont": [31, 33], "chunk_siz": [31, 33], "append": [31, 33, 48], "unregister_model": [31, 33], "test_data": 31, "png": [31, 39], "004570948731989492": 31, "006216969640322402": 31, "008197565423679522": 31, "009563574612830427": 31, "008999274832810742": 31, "009673474804303854": 31, "007599905146155397": 31, "v1": [31, 35], "kf_request_json": 31, "spec": 31, "grade": 32, "track": 32, "those": [32, 36, 42, 46], "weight": [32, 38, 42, 45], "compon": [32, 41, 48], "portion": 32, "lifecycl": 32, "actual": [32, 45], "script_modul": 32, "eager_mode_model": 32, "along": [32, 33, 43, 45, 50], "storag": 32, "authz": 32, "authn": 32, "drop": [32, 42], "loadabl": 32, "docker": [32, 42, 50], "dockerfil": 32, "experiment": 32, "project": [32, 33], "guarante": 32, "k8": 32, "serving_sdk": 32, "potenti": [32, 41], "stuff": 32, "termin": [32, 34, 35, 48], "easier": [32, 42], "kfserv": 32, "startserv": 32, "8903ca1fb059eab3c1e8eccdee1376d4ff52fb67": 32, "wlm": [32, 34], "workerstatelisten": 32, "close": 32, "workerst": 32, "workloadmanag": 32, "concurr": [32, 40, 41, 42], "hashmap": 32, "backendgroup": 32, "threadpool": 32, "executor": 32, "pool": 32, "task": 32, "batchaggreg": 32, "modelmanag": 32, "apiutil": 32, "configmanag": [32, 48], "split": [33, 53], "partit": 33, "speed": [33, 41, 42], "torchrun": 33, "capabl": 33, "visibl": 33, "suppos": 33, "eight": [33, 40], "nproc": 33, "worker1": 33, "worker2": 33, "illustr": 33, "stage": 33, "microbatch": 33, "inherit": [33, 35, 38], "our": [33, 40], "custom_handl": 33, "base_pippy_handl": 33, "basepippyhandl": 33, "handler_util": 33, "pt_pippi": 33, "initialize_rpc_work": 33, "get_pipline_driv": 33, "super": [33, 34, 45], "local_rank": 33, "device_count": 33, "world_siz": 33, "devicetyp": 33, "paralleltyp": 33, "pp": 33, "tp": 33, "pptp": 33, "rout": 33, "rank0": 33, "rank": 33, "fontend": 33, "wish": [33, 46], "input_nam": 33, "input_id": 33, "fx": 33, "trace": 33, "model_typ": 33, "hf": 33, "wise": 33, "leav": 33, "blank": 33, "rpc_timeout": 33, "1800": 33, "num_worker_thread": 33, "max_length": 33, "80": 33, "length": [33, 35, 41], "bloom": 33, "pippy_handl": 33, "model_checkpoints_path": 33, "tgz": 33, "progress": 33, "soon": 33, "microsoft": [33, 48], "get_ds_engin": 33, "base_deepspeed_handl": 33, "basedeepspeedhandl": 33, "ds_engin": 33, "ds": 33, "filenam": [33, 34], "dtype": 33, "float16": 33, "replace_with_kernel_inject": 33, "tensor_parallel": 33, "tp_size": 33, "method1": 33, "method2": 33, "ds_build_op": 33, "deepspeed_handl": 33, "advantag": 33, "further": [33, 34, 36, 37], "maxim": 33, "appli": [33, 42], "low_cpu_mem_usag": 33, "librari": [33, 42], "checkpoint": 33, "pretrain": [33, 50], "hub": [33, 50], "cach": [33, 36], "caus": 33, "omp_number_thread": 33, "immedi": [33, 35], "incur": 33, "503": 33, "chatgpt": 33, "effect": [33, 41], "action": 33, "reject": 33, "capac": 33, "busi": 33, "jobqueues": 33, "usejobticket": 33, "understand": [34, 41, 43, 50], "layout": 34, "Be": 34, "familiar": 34, "log4j": [34, 45], "rollingfil": 34, "access_log": 34, "env": [34, 48], "filepattern": 34, "dd": 34, "mmm": 34, "patternlayout": [34, 36], "iso8601": 34, "polici": 34, "sizebasedtriggeringpolici": 34, "timebasedtriggeringpolici": 34, "defaultrolloverstrategi": 34, "2018": [34, 35], "13": [34, 37], "56": 34, "976": 34, "backendwork": 34, "64003": 34, "118": 34, "remot": [34, 42, 44, 45], "took": 34, "ts_log": 34, "5p": 34, "debug": 34, "stderr": 34, "14": 34, "46": [34, 36], "51": 34, "656": 34, "workerlifecycl": 34, "nnvm": 34, "legacy_json_util": 34, "cc": 34, "209": 34, "symbol": 34, "v0": [34, 35, 36], "attempt": [34, 53], "upgrad": 34, "657": 34, "217": [34, 39], "successfulli": [34, 50, 52], "59": 34, "926": 34, "60": 34, "117": 34, "31": 34, "52": 34, "dlog4j": 34, "configurationfil": 34, "altern": [34, 35], "lightweight": 34, "consid": [34, 36, 40], "recent": 34, "lost": 34, "unexpectedli": 34, "decreas": [35, 40], "managementapisservic": 35, "subfold": [35, 52], "internet": [35, 52], "module_nam": 35, "method_nam": 35, "least": 35, "synchron": [35, 50], "acknowledg": 35, "onlin": 35, "response_timeout": 35, "respond": 35, "period": 35, "squeezenet_v1": [35, 45], "sse": 35, "secret": 35, "region": 35, "squeezenet1": 35, "account": [35, 36], "iam": 35, "role": 35, "awss3readonlyaccess": 35, "s3_sse_km": 35, "forgot": 35, "between": [35, 40], "202": 35, "4dc54158": 35, "c6de": 35, "42aa": 35, "b5dd": 35, "ebcb5f721043": 35, "47": 35, "aliv": 35, "been": [35, 36, 41, 45, 48, 50, 52], "ok": 35, "ecd2e502": 35, "382f": 35, "4c3b": 35, "b425": 35, "519fbf6d3b85": 35, "89": 35, "min_work": [35, 50], "max_work": 35, "pend": 35, "exceed": 35, "infinit": 35, "flavor": 35, "42adc58": 35, "6956": 35, "4198": 35, "ad07": 35, "db6c620c4c1e": 35, "b72b1ea0": 35, "81c6": 35, "4cce": 35, "92c4": 35, "530d3cfe5d4a": 35, "63": 35, "3997ccd4": 35, "ae44": 35, "4570": 35, "b249": 35, "e361b08d3d47": 35, "77": 35, "02t13": 35, "53": 35, "034z": 35, "89247744": 35, "jobqueuestatu": 35, "remainingcapac": 35, "pendingrequest": 35, "noop_2": 35, "metadata": [35, 45], "output_describ": 35, "_is_describ": 35, "start_tim": [35, 36], "is_profiler_en": 35, "enable_torch_profil": [35, 42], "_infer_with_profil": [35, 42], "stop_tim": [35, 36], "handlertim": [35, 36, 37], "customizedmetadata": 35, "9010": 35, "2022": 35, "08t11": 35, "03": 35, "974z": 35, "98972": 35, "data1": 35, "data2": 35, "delet": [35, 36, 52], "next_page_token": [35, 52], "next": [35, 40, 48, 50, 52], "pagin": [35, 52], "nextpagetoken": [35, 52], "noop_v0": 35, "broadli": 36, "interv": 36, "minut": 36, "metrics_mod": [36, 37], "ts_metrics_mod": 36, "agent": 36, "log_directori": 36, "ts_metric": 36, "model_metr": 36, "terminolog": 36, "alias": 36, "nameofcountermetr": 36, "gaug": [36, 37], "nameofgaugemetr": 36, "histogram": 36, "nameofhistogrammetr": 36, "inferencetimeinm": 36, "numberofmetr": 36, "count": [36, 37], "gaugemodelmetricnameexampl": 36, "histogrammodelmetricnameexampl": 36, "central": 36, "inventori": 36, "altogeth": 36, "unless": 36, "toward": 36, "percent": [36, 37], "whenev": 36, "metricscach": 36, "anyth": 36, "abil": 36, "primari": 36, "OR": [36, 50], "my_tc": 36, "bertseqclassif": [36, 40], "chosen": 36, "semant": 36, "requests2xx": [36, 37], "hostnam": [36, 37], "total": 36, "300": 36, "requests4xx": [36, 37], "400": 36, "requests5xx": [36, 37], "ts_inference_requests_tot": [36, 37], "ts_inference_latency_microsecond": [36, 37], "microsecond": [36, 37], "ts_queue_latency_microsecond": [36, 37], "queuetim": [36, 37], "spent": 36, "workerthreadtim": [36, 37], "exclud": 36, "workerloadtim": [36, 37], "workernam": [36, 37], "taken": [36, 46, 50], "cpuutil": [36, 37], "memoryus": [36, 37], "megabyt": [36, 37], "memoryavail": [36, 37], "memoryutil": [36, 37], "diskusag": [36, 37], "gigabyt": [36, 37], "diskutil": [36, 37], "diskavail": [36, 37], "gpumemoryutil": [36, 37], "gpumemoryus": [36, 37], "gpuutil": [36, 37], "predictiontim": [36, 37], "introduc": 36, "restrict": [36, 50], "metrictyp": 36, "statsd": 36, "my_machine_nam": 36, "timestamp": [36, 46], "1682098185": 36, "318": 36, "0416717529297": 36, "jsonpatternlayout": 36, "metricnam": 36, "15547180175781": 36, "124": 36, "13163757324219": 36, "qlog": 36, "qloglayout": 36, "qlogsetupmodeldepend": 36, "1646686978": 36, "program": [36, 48], "mxnetmodelserv": 36, "5790": 36, "98046875": 36, "eoe": 36, "147dda19895c": 36, "ant": [36, 37], "confus": [36, 37], "fetch": [36, 37], "posit": 36, "metric_typ": 36, "genericmetr": 36, "dimension_nam": 36, "name1": 36, "name2": [36, 45], "countermetr": 36, "criteria": 36, "add_": 36, "explicitli": [36, 42], "name_of_model": 36, "dim1": 36, "dim2": 36, "some_nam": 36, "some_valu": 36, "dimn": 36, "name_n": 36, "value_n": 36, "add_metric_to_cach": 36, "cachingmetr": 36, "newli": 36, "add_or_upd": 36, "dimension_valu": 36, "distanc": 36, "dim3": 36, "assum": [36, 43, 50], "distanceinkm": 36, "inferencetim": 36, "end_tim": 36, "sizeofimag": 36, "img_siz": 36, "utilization_perc": 36, "__str__": 36, "get_metr": 36, "gaugemetricnam": 36, "examplecustomhandl": 36, "metric_type_enum": 36, "customhandlerexampl": 36, "sleep": 36, "sake": 36, "handlerseparatecount": 36, "handlercount": 36, "78": 36, "anoth": [36, 50], "had": 36, "histogram_example_metr": 36, "idea": 36, "42": [36, 39], "approach": [36, 41, 42], "metric1": 36, "value1": 36, "value2": 36, "prior": 36, "suitabl": 36, "unlik": 36, "88665a372f4b": 37, "054508209228516": 37, "resnet18": [37, 39], "83": 37, "resnet18_1": 37, "4593": 37, "9001": 37, "4592": 37, "5829": 37, "7421875": 37, "82": 37, "93": 37, "290371": 37, "129": 37, "8245": 37, "62109375": 37, "325": 37, "05113983154297": 37, "64": 37, "globoff": 37, "instruct": [37, 42, 48, 50], "yml": 37, "scrape_interv": 37, "evaluation_interv": 37, "scrape_config": 37, "job_nam": 37, "static_config": 37, "9090": 37, "navig": 37, "graph": 37, "3000": 37, "systemctl": 37, "daemon": [37, 40], "reload": 37, "flowchart": 38, "simplifi": [38, 43], "td": 38, "id1": 38, "id13": 38, "id2": 38, "id3": 38, "id4": 38, "id5": 38, "onnx": [38, 42], "id6": 38, "tensorrt": [38, 41, 42], "id7": 38, "id8": 38, "id9": 38, "id10": 38, "id11": 38, "id21": 38, "id20": 38, "id15": 38, "id16": 38, "id14": 38, "id22": 38, "archiev": 38, "faster": [38, 39, 41], "id17": 38, "propos": 39, "inclus": 39, "special": 39, "thank": 39, "alexnet": 39, "216": 39, "106": 39, "41": 39, "489": 39, "squeezenet": [39, 45], "1_1": 39, "152": 39, "214": 39, "rcnn": 39, "coco": 39, "148": 39, "person": 39, "mask": 39, "158": 39, "ag_new": 39, "169": 39, "sample_text": 39, "fcn": 39, "101": 39, "193": 39, "deeplabv3": 39, "384": 39, "386": 39, "105": 39, "215": 39, "mmf": 39, "recognit": [39, 45], "charad": 39, "549": 39, "372cc": 39, "mp4": 39, "mgpu": 39, "sample_text_captum_input": [39, 40], "breed": 39, "war": [39, 52, 53], "spin": 40, "thu": [40, 41], "isol": 40, "kernel": [40, 42], "drawback": [40, 42], "occupi": [40, 42], "scenario": [40, 42], "offer": [40, 41, 43], "share": [40, 50], "leverag": [40, 42], "benefit": [40, 41], "smi": [40, 41, 48, 50], "exclus": 40, "shutdown": [40, 46], "echo": [40, 49], "quit": 40, "48": 40, "volta": 40, "lead": 40, "failur": [40, 53], "decis": 40, "repres": 40, "workload": [40, 42], "primarili": 40, "investig": 40, "evolv": 40, "g4dn": 40, "4xlarg": 40, "2xlarg": 40, "concentr": 40, "measur": 40, "ab": 40, "still": 40, "left": 40, "five": 40, "median": 40, "overwrit": [40, 42], "accordingli": 40, "huggingface_transform": 40, "10000": [40, 53], "600": 40, "seq_classification_artifact": 40, "skew": [40, 43], "therefor": 40, "neglect": 40, "figur": 40, "four": 40, "steadi": 40, "wors": 40, "interpret": 40, "experi": 40, "bigger": [40, 42], "With": [40, 50], "diminish": 40, "25": 40, "saw": 40, "mix": 40, "certain": 40, "suggest": [40, 41], "highli": [40, 42], "situat": 40, "sole": 40, "focus": [40, 45], "pack": [40, 45], "diagnos": 41, "nlp": 41, "pad": 41, "too": 41, "driver": 41, "releas": 41, "oftentim": 41, "bug": 41, "overal": 41, "htop": 41, "obviou": 41, "biggest": 41, "bottleneck": [41, 42], "beyond": 41, "much": [41, 45], "quantifi": 41, "mitig": 41, "slow": 41, "scope": 41, "techniqu": 41, "async": 41, "hide": 41, "cost": [41, 42], "constant": 41, "unnecessarili": 41, "magnitud": 41, "short": [41, 45], "jpeg": 41, "often": 41, "libjpeg": 41, "turbo": 41, "simd": 41, "dali": [41, 42], "old": 41, "k80": 41, "factor": 41, "low": [41, 42], "hang": 41, "fruit": 41, "evalu": 41, "address": 41, "fp16": 41, "doubl": 41, "newer": 41, "neglig": 41, "accuraci": 41, "degrad": 41, "technic": 41, "quantiz": [41, 42], "seldom": 41, "suffer": 41, "loss": 41, "explor": 41, "articl": [41, 42], "neural": 41, "int8": 41, "awar": 41, "compressor": 41, "sophist": 41, "worth": 41, "signific": 41, "right": [41, 48], "balanc": 41, "smart": 41, "meet": 41, "sla": 41, "onnxruntim": 41, "lightseq": 41, "ctranslat": 41, "fusion": [41, 42], "distil": [41, 42], "gain": 41, "minilm": 41, "99": 41, "origin": 41, "2x": [41, 42], "sort": 41, "unnecessari": [41, 42], "exhaust": 41, "squeez": 41, "checklist": 42, "outsid": 42, "trick": 42, "prune": 42, "encourag": 42, "hard": 42, "easiest": 42, "ort": 42, "fastertransform": 42, "favorit": 42, "feel": 42, "pr": 42, "acceler": 42, "bit": 42, "At": [42, 48], "ort_sess": 42, "inferencesess": 42, "sess_opt": 42, "treat": 42, "vs": 42, "nn": 42, "transformerencod": 42, "author": 42, "bettertransform": 42, "exce": 42, "speedup": 42, "vari": 42, "batch_delai": 42, "lower": 42, "heavi": 42, "On": 42, "significantli": 42, "theori": 42, "discuss": [42, 45], "quickli": 42, "summar": 42, "hyperthread": 42, "affin": 42, "physic": 42, "numa": 42, "cross": 42, "ex": [42, 48], "contrari": 42, "clearli": 42, "sub": 42, "valuetoset": 42, "uniqu": 42, "around": 42, "portabl": 42, "iter": 42, "popular": 42, "block": 42, "helper": 42, "p50": 42, "p90": 42, "p99": 42, "visit": 42, "link": [42, 45, 50], "app": [42, 45, 48], "fine": [42, 48], "meta": 42, "scientist": 43, "possibl": [43, 50], "analyt": 43, "though": 43, "underli": 43, "plain": 43, "crucial": 43, "service_envelop": 43, "scala": 44, "javascript": 44, "snap": 45, "pictur": 45, "scene": 45, "identif": 45, "intak": 45, "littl": 45, "weed": 45, "dive": 45, "kind": [45, 50], "themselv": 45, "long": 45, "stori": 45, "ts_config": 45, "model_path1": 45, "model_path2": 45, "log_config": 45, "exit": 45, "model_loc": 45, "talk": 45, "sai": 45, "fanci": 45, "net": 45, "hotdog": 45, "sens": 45, "hot": 45, "model_location2": 45, "compris": [46, 53], "cfg": 46, "shut": 46, "intention": 46, "resili": 46, "prevent": 46, "katex": 47, "recommonmark": 47, "sphinxcontrib": 47, "pyyaml": 47, "torchtext": [47, 49], "pytest": 47, "theme": 47, "certifi": 48, "2019": [48, 49], "admin": 48, "anaconda": 48, "powershel": 48, "openjdk17": 48, "unzip": 48, "edit": 48, "jdk": [48, 49], "3_window": 48, "x64_bin": 48, "gui": [48, 50], "bin": [48, 49], "prompt": 48, "wheel": 48, "prebuilt": 48, "click": 48, "whl": 48, "java_hom": 48, "redistribut": 48, "studio": 48, "2015": 48, "2017": 48, "your_install_dr": 48, "corpor": 48, "nvsmi": 48, "prod": 48, "setup_wsl_ubuntu": 49, "bash": 49, "bashrc": 49, "openjdk": 49, "sentencepiec": 49, "conveni": 50, "prerequisit": 50, "chrome": 50, "default_handl": 50, "your_model_nam": 50, "your_model_fil": 50, "mv": 50, "directli": 50, "copi": 50, "volum": 50, "your_model_store_dir": 50, "kept": 50, "min": [50, 53], "your_input_fil": 50, "demostr": 50, "public_url": [50, 52], "ssl": 50, "your_path": 50, "8433": 50, "local_dir": 50, "your_docker_imag": 50, "s3_path": 50, "notic": 50, "proerti": 50, "apart": 50, "lib": 50, "your_custom_handler_py_fil": 50, "fairseq": 50, "your_requirements_txt": 50, "wfpredict": 51, "workflow_nam": [51, 52], "myworkflow": [51, 52], "leaf": 51, "dag": [51, 52], "workflownam": 52, "workflowurl": 52, "workflowdag": 52, "m1": [52, 53], "myworkflow1": 52, "myworkflow2": 52, "ensembl": 53, "flow": 53, "m2": 53, "m3": 53, "retri": 53, "model3": 53, "m4": 53, "model4": 53, "pre_process": 53, "syntax": 53, "ridden": 53, "dataflow": 53, "eg": 53, "function1": 53, "function2": 53, "aggregate_func": 53, "workflow_inference_api": 53, "workflow_management_api": 53, "serializ": 53, "arrai": 53}, "objects": {"": [[7, 0, 0, "-", "test"], [9, 0, 0, "-", "ts"]], "ts": [[9, 0, 0, "-", "arg_parser"], [9, 0, 0, "-", "context"], [10, 0, 0, "-", "metrics"], [9, 0, 0, "-", "model_loader"], [9, 0, 0, "-", "model_server"], [11, 0, 0, "-", "model_service"], [9, 0, 0, "-", "model_service_worker"], [12, 0, 0, "-", "protocol"], [9, 0, 0, "-", "service"], [13, 0, 0, "-", "torch_handler"], [18, 0, 0, "-", "utils"], [9, 0, 0, "-", "version"]], "ts.arg_parser": [[9, 1, 1, "", "ArgParser"]], "ts.arg_parser.ArgParser": [[9, 2, 1, "", "extract_args"], [9, 2, 1, "", "model_service_worker_args"], [9, 2, 1, "", "ts_parser"]], "ts.context": [[9, 1, 1, "", "Context"], [9, 1, 1, "", "RequestProcessor"]], "ts.context.Context": [[9, 2, 1, "", "get_all_request_header"], [9, 2, 1, "", "get_request_header"], [9, 2, 1, "", "get_request_id"], [9, 2, 1, "", "get_response_content_type"], [9, 2, 1, "", "get_response_headers"], [9, 2, 1, "", "get_response_status"], [9, 3, 1, "", "metrics"], [9, 3, 1, "", "request_processor"], [9, 2, 1, "", "set_all_response_status"], [9, 2, 1, "", "set_response_content_type"], [9, 2, 1, "", "set_response_header"], [9, 2, 1, "", "set_response_status"], [9, 3, 1, "", "system_properties"]], "ts.context.RequestProcessor": [[9, 2, 1, "", "add_response_property"], [9, 2, 1, "", "get_request_properties"], [9, 2, 1, "", "get_request_property"], [9, 2, 1, "", "get_response_header"], [9, 2, 1, "", "get_response_headers"], [9, 2, 1, "", "get_response_status_code"], [9, 2, 1, "", "get_response_status_phrase"], [9, 2, 1, "", "report_status"]], "ts.metrics": [[10, 0, 0, "-", "dimension"], [10, 0, 0, "-", "metric"], [10, 0, 0, "-", "metric_collector"], [10, 0, 0, "-", "metric_encoder"], [10, 0, 0, "-", "metrics_store"], [10, 0, 0, "-", "process_memory_metric"], [10, 0, 0, "-", "system_metrics"], [10, 0, 0, "-", "unit"]], "ts.metrics.dimension": [[10, 1, 1, "", "Dimension"]], "ts.metrics.dimension.Dimension": [[10, 2, 1, "", "to_dict"]], "ts.metrics.metric": [[10, 1, 1, "", "Metric"]], "ts.metrics.metric.Metric": [[10, 2, 1, "", "reset"], [10, 2, 1, "", "to_dict"], [10, 2, 1, "", "update"]], "ts.metrics.metric_encoder": [[10, 1, 1, "", "MetricEncoder"]], "ts.metrics.metric_encoder.MetricEncoder": [[10, 2, 1, "", "default"]], "ts.metrics.metrics_store": [[10, 1, 1, "", "MetricsStore"]], "ts.metrics.metrics_store.MetricsStore": [[10, 2, 1, "", "add_counter"], [10, 2, 1, "", "add_error"], [10, 2, 1, "", "add_metric"], [10, 2, 1, "", "add_percent"], [10, 2, 1, "", "add_size"], [10, 2, 1, "", "add_time"]], "ts.metrics.process_memory_metric": [[10, 4, 1, "", "check_process_mem_usage"], [10, 4, 1, "", "get_cpu_usage"]], "ts.metrics.system_metrics": [[10, 4, 1, "", "collect_all"], [10, 4, 1, "", "cpu_utilization"], [10, 4, 1, "", "disk_available"], [10, 4, 1, "", "disk_used"], [10, 4, 1, "", "disk_utilization"], [10, 4, 1, "", "gpu_utilization"], [10, 4, 1, "", "memory_available"], [10, 4, 1, "", "memory_used"], [10, 4, 1, "", "memory_utilization"]], "ts.metrics.unit": [[10, 1, 1, "", "Units"]], "ts.model_loader": [[9, 1, 1, "", "ModelLoader"], [9, 1, 1, "", "ModelLoaderFactory"], [9, 1, 1, "", "TsModelLoader"]], "ts.model_loader.ModelLoader": [[9, 2, 1, "", "load"]], "ts.model_loader.ModelLoaderFactory": [[9, 2, 1, "", "get_model_loader"]], "ts.model_loader.TsModelLoader": [[9, 2, 1, "", "load"]], "ts.model_server": [[9, 4, 1, "", "load_properties"], [9, 4, 1, "", "start"]], "ts.model_service": [[11, 0, 0, "-", "model_service"]], "ts.model_service.model_service": [[11, 1, 1, "", "ModelService"], [11, 1, 1, "", "SingleNodeService"]], "ts.model_service.model_service.ModelService": [[11, 2, 1, "", "handle"], [11, 2, 1, "", "inference"], [11, 2, 1, "", "initialize"], [11, 2, 1, "", "ping"], [11, 2, 1, "", "signature"]], "ts.model_service.model_service.SingleNodeService": [[11, 2, 1, "", "inference"]], "ts.model_service_worker": [[9, 1, 1, "", "TorchModelServiceWorker"]], "ts.model_service_worker.TorchModelServiceWorker": [[9, 2, 1, "", "handle_connection"], [9, 2, 1, "", "load_model"], [9, 2, 1, "", "run_server"]], "ts.protocol": [[12, 0, 0, "-", "otf_message_handler"]], "ts.protocol.otf_message_handler": [[12, 4, 1, "", "create_load_model_response"], [12, 4, 1, "", "create_predict_response"], [12, 4, 1, "", "encode_response_headers"], [12, 4, 1, "", "retrieve_msg"], [12, 4, 1, "", "send_intermediate_predict_response"]], "ts.service": [[9, 1, 1, "", "Service"], [9, 4, 1, "", "emit_metrics"]], "ts.service.Service": [[9, 3, 1, "", "context"], [9, 2, 1, "", "predict"], [9, 2, 1, "", "retrieve_data_for_inference"], [9, 2, 1, "", "set_cl_socket"]], "ts.torch_handler": [[13, 0, 0, "-", "base_handler"], [13, 0, 0, "-", "contractions"], [13, 0, 0, "-", "densenet_handler"], [13, 0, 0, "-", "image_classifier"], [13, 0, 0, "-", "image_segmenter"], [13, 0, 0, "-", "object_detector"], [14, 0, 0, "-", "request_envelope"], [13, 0, 0, "-", "text_classifier"], [13, 0, 0, "-", "text_handler"], [15, 0, 0, "-", "unit_tests"], [13, 0, 0, "-", "vision_handler"]], "ts.torch_handler.base_handler": [[13, 1, 1, "", "BaseHandler"], [13, 4, 1, "", "setup_ort_session"]], "ts.torch_handler.base_handler.BaseHandler": [[13, 2, 1, "", "describe_handle"], [13, 2, 1, "", "explain_handle"], [13, 2, 1, "", "handle"], [13, 2, 1, "", "inference"], [13, 2, 1, "", "initialize"], [13, 2, 1, "", "postprocess"], [13, 2, 1, "", "preprocess"]], "ts.torch_handler.densenet_handler": [[13, 1, 1, "", "DenseNetHandler"], [13, 4, 1, "", "list_classes_from_module"]], "ts.torch_handler.densenet_handler.DenseNetHandler": [[13, 2, 1, "", "handle"], [13, 2, 1, "", "inference"], [13, 2, 1, "", "initialize"]], "ts.torch_handler.image_classifier": [[13, 1, 1, "", "ImageClassifier"]], "ts.torch_handler.image_classifier.ImageClassifier": [[13, 2, 1, "", "get_max_result_classes"], [13, 5, 1, "", "image_processing"], [13, 2, 1, "", "postprocess"], [13, 2, 1, "", "set_max_result_classes"], [13, 5, 1, "", "topk"]], "ts.torch_handler.image_segmenter": [[13, 1, 1, "", "ImageSegmenter"]], "ts.torch_handler.image_segmenter.ImageSegmenter": [[13, 5, 1, "", "image_processing"], [13, 2, 1, "", "postprocess"]], "ts.torch_handler.object_detector": [[13, 1, 1, "", "ObjectDetector"]], "ts.torch_handler.object_detector.ObjectDetector": [[13, 5, 1, "", "image_processing"], [13, 2, 1, "", "initialize"], [13, 2, 1, "", "postprocess"], [13, 5, 1, "", "threshold"]], "ts.torch_handler.request_envelope": [[14, 0, 0, "-", "base"], [14, 0, 0, "-", "body"], [14, 0, 0, "-", "json"], [14, 0, 0, "-", "kserve"], [14, 0, 0, "-", "kservev2"]], "ts.torch_handler.request_envelope.base": [[14, 1, 1, "", "BaseEnvelope"]], "ts.torch_handler.request_envelope.base.BaseEnvelope": [[14, 2, 1, "", "format_output"], [14, 2, 1, "", "handle"], [14, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.body": [[14, 1, 1, "", "BodyEnvelope"]], "ts.torch_handler.request_envelope.body.BodyEnvelope": [[14, 2, 1, "", "format_output"], [14, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.json": [[14, 1, 1, "", "JSONEnvelope"]], "ts.torch_handler.request_envelope.json.JSONEnvelope": [[14, 2, 1, "", "format_output"], [14, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.kserve": [[14, 1, 1, "", "KServeEnvelope"]], "ts.torch_handler.request_envelope.kserve.KServeEnvelope": [[14, 2, 1, "", "format_output"], [14, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.kservev2": [[14, 1, 1, "", "KServev2Envelope"]], "ts.torch_handler.request_envelope.kservev2.KServev2Envelope": [[14, 2, 1, "", "format_output"], [14, 2, 1, "", "parse_input"]], "ts.torch_handler.text_classifier": [[13, 1, 1, "", "TextClassifier"]], "ts.torch_handler.text_classifier.TextClassifier": [[13, 2, 1, "", "get_insights"], [13, 2, 1, "", "inference"], [13, 5, 1, "", "ngrams"], [13, 2, 1, "", "postprocess"], [13, 2, 1, "", "preprocess"]], "ts.torch_handler.text_handler": [[13, 1, 1, "", "TextHandler"]], "ts.torch_handler.text_handler.TextHandler": [[13, 2, 1, "", "get_source_vocab_path"], [13, 2, 1, "", "get_word_token"], [13, 2, 1, "", "initialize"], [13, 2, 1, "", "summarize_attributions"]], "ts.torch_handler.unit_tests": [[16, 0, 0, "-", "models"], [15, 0, 0, "-", "test_base_handler"], [15, 0, 0, "-", "test_envelopes"], [15, 0, 0, "-", "test_image_classifier"], [15, 0, 0, "-", "test_image_segmenter"], [15, 0, 0, "-", "test_object_detector"], [17, 0, 0, "-", "test_utils"]], "ts.torch_handler.unit_tests.models": [[16, 0, 0, "-", "base_model"]], "ts.torch_handler.unit_tests.models.base_model": [[16, 1, 1, "", "ArgmaxModel"], [16, 4, 1, "", "save_pt_file"]], "ts.torch_handler.unit_tests.models.base_model.ArgmaxModel": [[16, 2, 1, "", "forward"], [16, 5, 1, "", "training"]], "ts.torch_handler.unit_tests.test_base_handler": [[15, 4, 1, "", "handler"], [15, 4, 1, "", "test_batch_handle"], [15, 4, 1, "", "test_inference_with_profiler_works_with_custom_initialize_method"], [15, 4, 1, "", "test_single_handle"]], "ts.torch_handler.unit_tests.test_envelopes": [[15, 4, 1, "", "handle_fn"], [15, 4, 1, "", "test_binary"], [15, 4, 1, "", "test_body"], [15, 4, 1, "", "test_json"], [15, 4, 1, "", "test_json_batch"], [15, 4, 1, "", "test_json_double_batch"]], "ts.torch_handler.unit_tests.test_image_classifier": [[15, 4, 1, "", "context"], [15, 4, 1, "", "handler"], [15, 4, 1, "", "image_bytes"], [15, 4, 1, "", "model_dir"], [15, 4, 1, "", "model_name"], [15, 4, 1, "", "test_handle"], [15, 4, 1, "", "test_handle_explain"]], "ts.torch_handler.unit_tests.test_image_segmenter": [[15, 4, 1, "", "context"], [15, 4, 1, "", "handler"], [15, 4, 1, "", "image_bytes"], [15, 4, 1, "", "model_dir"], [15, 4, 1, "", "model_name"], [15, 4, 1, "", "test_handle"]], "ts.torch_handler.unit_tests.test_object_detector": [[15, 4, 1, "", "context"], [15, 4, 1, "", "handler"], [15, 4, 1, "", "image_bytes"], [15, 4, 1, "", "model_dir"], [15, 4, 1, "", "model_name"], [15, 4, 1, "", "test_handle"]], "ts.torch_handler.unit_tests.test_utils": [[17, 0, 0, "-", "mock_context"]], "ts.torch_handler.unit_tests.test_utils.mock_context": [[17, 1, 1, "", "MockContext"]], "ts.torch_handler.unit_tests.test_utils.mock_context.MockContext": [[17, 2, 1, "", "get_request_header"]], "ts.torch_handler.vision_handler": [[13, 1, 1, "", "VisionHandler"]], "ts.torch_handler.vision_handler.VisionHandler": [[13, 2, 1, "", "get_insights"], [13, 2, 1, "", "initialize"], [13, 2, 1, "", "preprocess"]], "ts.utils": [[18, 0, 0, "-", "timeit_decorator"], [18, 0, 0, "-", "util"]], "ts.utils.timeit_decorator": [[18, 4, 1, "", "timeit"]], "ts.utils.util": [[18, 1, 1, "", "PT2Backend"], [18, 6, 1, "", "PredictionException"], [18, 4, 1, "", "check_valid_pt2_backend"], [18, 4, 1, "", "get_yaml_config"], [18, 4, 1, "", "list_classes_from_module"], [18, 4, 1, "", "load_label_mapping"], [18, 4, 1, "", "map_class_to_label"]], "ts.utils.util.PT2Backend": [[18, 5, 1, "", "AOT_CUDAGRAPHS"], [18, 5, 1, "", "AOT_EAGER"], [18, 5, 1, "", "AOT_NVFUSER"], [18, 5, 1, "", "EAGER"], [18, 5, 1, "", "FX2TRT"], [18, 5, 1, "", "INDUCTOR"], [18, 5, 1, "", "IPEX"], [18, 5, 1, "", "NVFUSER"], [18, 5, 1, "", "OFI"], [18, 5, 1, "", "ONNXRT"], [18, 5, 1, "", "TORCHXLA_TRACE_ONCE"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:property", "4": "py:function", "5": "py:attribute", "6": "py:exception"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "property", "Python property"], "4": ["py", "function", "Python function"], "5": ["py", "attribute", "Python attribute"], "6": ["py", "exception", "Python exception"]}, "titleterms": {"faq": 0, "s": [0, 21, 36], "gener": [0, 34, 36, 41], "doe": 0, "torchserv": [0, 1, 2, 21, 23, 26, 27, 28, 29, 30, 32, 33, 34, 36, 38, 40, 42, 44, 45, 46, 48, 49, 50, 53], "api": [0, 2, 3, 20, 21, 23, 24, 27, 29, 31, 35, 36, 37, 44, 51, 52], "follow": [0, 22], "some": 0, "rest": [0, 27, 44], "standard": 0, "how": [0, 2, 33, 38], "us": [0, 2, 21, 27, 33, 48, 50], "product": [0, 48], "what": [0, 32], "differ": 0, "between": 0, "python": [0, 2, 22, 23, 25, 27, 29, 32], "web": 0, "app": 0, "framework": 0, "like": 0, "flask": 0, "django": 0, "ar": [0, 2], "ani": 0, "sampl": 0, "model": [0, 2, 16, 21, 22, 23, 25, 27, 33, 35, 38, 39, 41, 45, 46, 50, 53], "avail": 0, "support": [0, 21], "other": [0, 23], "base": [0, 14, 27, 36], "program": 0, "languag": 0, "than": 0, "benefit": 0, "have": [0, 2], "over": 0, "aw": 0, "multi": 0, "server": [0, 29, 33, 37], "decod": 0, "intern": [0, 32], "infer": [0, 2, 21, 26, 31, 33, 41, 51], "respons": [0, 33], "client": [0, 27, 29], "side": [0, 29, 33], "perform": [0, 42], "do": [0, 32], "i": [0, 2], "improv": 0, "cpu": 0, "deploy": [0, 2], "config": [0, 2, 21, 23, 33, 34], "can": [0, 2], "run": [0, 22, 40, 45], "port": [0, 2, 23], "default": [0, 1, 21, 25, 26, 35, 36], "8080": [0, 2], "8081": [0, 2], "resolv": [0, 2], "specif": [0, 2, 23, 25, 53], "depend": [0, 2, 22, 25, 50], "deploi": [0, 50], "kubernet": 0, "elb": 0, "asg": 0, "backup": 0, "restor": 0, "state": 0, "build": [0, 22], "imag": 0, "from": [0, 25, 27, 36, 48, 49], "sourc": [0, 48, 49], "branch": [0, 22], "commit": 0, "id": 0, "creat": [0, 25, 28, 36], "dockerfil": 0, "dev": 0, "order": 0, "properti": [0, 21, 23, 34, 53], "path": 0, "model_stor": 0, "load_model": 0, "curl": [0, 31, 51], "make": 0, "request": [0, 2, 25, 43], "add": [0, 2, 36], "custom": [0, 2, 23, 25, 34, 36, 45, 50], "an": [0, 25], "exist": 0, "pass": 0, "multipl": [0, 25, 45], "call": 0, "my": [0, 2], "handler": [0, 1, 21, 25, 26, 27, 36, 53], "return": [0, 25], "output": 0, "enhanc": 0, "alwai": 0, "write": [0, 25], "ones": 0, "Is": 0, "possibl": 0, "hug": [0, 33], "face": [0, 33], "archiv": [0, 2, 21, 22, 25, 27], "mar": [0, 2], "file": [0, 2, 23, 32, 33, 36, 45, 53], "docker": [0, 21], "contain": [0, 21], "serial": 0, "singl": 0, "download": 0, "regist": [0, 2, 35, 52], "s3": 0, "presign": 0, "v4": 0, "url": 0, "host": 0, "set": [0, 35], "batch": [0, 21], "size": [0, 36], "sagemak": 0, "kei": 0, "paramet": [0, 23], "tune": [0, 33], "why": 0, "initi": 0, "so": 0, "slow": 0, "basic": 1, "featur": [1, 2, 26, 45], "exampl": [1, 23, 29, 31, 36, 51], "advanc": [1, 23, 25, 45], "troubleshoot": [2, 48], "guid": [2, 36, 42], "issu": [2, 47, 53], "fail": 2, "bind": 2, "address": [2, 23], "http": [2, 33, 47], "127": 2, "0": [2, 49], "1": [2, 33], "alreadi": 2, "java": [2, 32], "lang": 2, "nosuchmethoderror": 2, "when": 2, "start": [2, 25, 27, 36], "473": 2, "unabl": 2, "send": 2, "big": 2, "snapshot": [2, 46], "relat": [2, 53], "disabl": 2, "stop": [2, 27], "after": 2, "restart": 2, "invalidsnapshotexcept": 2, "except": 2, "where": 2, "store": [2, 27], "chang": 2, "temp": 2, "directori": [2, 32], "conflictstatusexcept": 2, "error": [2, 25], "code": [2, 22, 25, 27], "409": 2, "downloadmodelexcept": 2, "400": 2, "modelnotfoundexcept": 2, "404": 2, "serviceunavailableexcept": 2, "503": 2, "ad": 2, "requir": 2, "txt": 2, "packag": [2, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 22, 23], "list": [2, 35, 52], "get": [2, 27, 36], "instal": [2, 22, 25, 27, 48, 49], "backend": [2, 22, 23, 32, 36], "worker": [2, 23, 35], "monitor": 2, "thread": 2, "interrupt": 2, "process": 2, "di": 2, "develop": [3, 24, 48], "serv": [4, 27, 32, 33, 35, 45, 50], "run_circleci_test": 5, "modul": [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 25], "setup": [6, 21, 49], "test": [7, 50], "submodul": [7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], "regression_test": 7, "content": [7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 24, 25, 36, 45, 48, 49], "torchserve_san": 8, "ts": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 22], "subpackag": [9, 13, 15], "arg_pars": 9, "context": 9, "model_load": 9, "model_serv": 9, "model_service_work": 9, "servic": [9, 20, 24, 25, 45], "version": [9, 35], "metric": [10, 23, 36, 37, 45], "dimens": [10, 36], "metric_collector": 10, "metric_encod": 10, "metrics_stor": 10, "process_memory_metr": 10, "system_metr": 10, "unit": 10, "model_servic": 11, "protocol": 12, "otf_message_handl": 12, "torch_handl": [13, 14, 15, 16, 17], "base_handl": 13, "contract": 13, "densenet_handl": 13, "image_classifi": [13, 26], "image_segment": [13, 26], "object_detector": [13, 26], "text_classifi": [13, 26], "text_handl": 13, "vision_handl": 13, "request_envelop": 14, "bodi": 14, "json": [14, 26], "kserv": [14, 31], "kservev2": 14, "unit_test": [15, 16, 17], "test_base_handl": 15, "test_envelop": 15, "test_image_classifi": 15, "test_image_segment": 15, "test_mnist_kf": 15, "test_object_detector": 15, "base_model": 16, "test_util": 17, "mock_context": 17, "util": [18, 19], "timeit_decor": 18, "ts_script": 19, "api_util": 19, "backend_util": 19, "frontend_util": 19, "install_depend": 19, "install_from_src": 19, "marsgen": 19, "modelarchiver_util": 19, "print_env_info": 19, "regression_util": 19, "sanity_util": 19, "shell_util": 19, "torchserve_grpc_cli": 19, "tsutil": 19, "validate_model_on_gpu": 19, "workflow_archiver_util": 19, "thi": [21, 25, 36, 45, 48, 49], "document": [21, 25, 36, 45, 48, 49], "introduct": [21, 36], "prerequisit": [21, 34, 48], "resnet": 21, "152": 21, "configur": [21, 23], "demo": 21, "torch": [21, 27], "manag": [21, 35, 52], "through": [21, 27], "coverag": 22, "To": 22, "check": [22, 31], "stabil": 22, "saniti": 22, "suit": 22, "frontend": [22, 32, 36], "command": [22, 23, 45, 48], "pytest": 22, "lint": 22, "IT": 22, "markdown": [22, 47], "link": 22, "checker": 22, "environ": 23, "variabl": 23, "line": [23, 45], "jvm": 23, "option": 23, "load": [23, 33, 38], "startup": 23, "listen": 23, "grpc": [23, 27, 29, 33], "enabl": [23, 34], "ssl": 23, "cross": 23, "origin": 23, "resourc": [23, 42], "share": 23, "cor": 23, "prefer": 23, "direct": 23, "buffer": 23, "allow": 23, "restrict": 23, "access": [23, 34], "limit": 23, "gpu": [23, 25, 50], "usag": [23, 43, 48], "nvidia": [23, 40], "control": 23, "visibl": 23, "basehandl": 25, "level": 25, "entri": 25, "point": 25, "class": 25, "scratch": 25, "predict": [25, 27, 31, 51], "explan": [25, 31], "captum": 25, "extend": 25, "handl": 25, "execut": 25, "common": 26, "index_to_nam": 26, "contribut": [26, 27, 43], "For": [27, 48], "debian": 27, "system": [27, 41], "maco": 27, "window": [27, 48, 49], "inspect": 27, "log": [27, 34, 36, 45], "debug": 27, "github": [28, 47], "action": 28, "step": 28, "stream": [29, 33], "descript": [31, 35], "health": 31, "architectur": 32, "terminolog": 32, "pytorch": [32, 33, 42, 50], "thei": 32, "import": 32, "core": 32, "engin": 32, "larg": 33, "work": 33, "pippi": 33, "nativ": 33, "solut": 33, "deepspe": 33, "mii": 33, "acceler": 33, "tip": 33, "reduc": 33, "latenc": 33, "yaml": [33, 36], "sensit": 33, "applic": 33, "job": 33, "ticket": 33, "via": 33, "chunk": 33, "encod": 33, "type": [34, 36], "modifi": 34, "behavior": 34, "provid": 34, "asynchron": 34, "encrypt": 35, "scale": 35, "describ": [35, 52], "unregist": [35, 52], "enum": 36, "format": 36, "specifi": 36, "updat": 36, "pars": 36, "object": 36, "function": 36, "without": 36, "time": 36, "percentag": 36, "counter": 36, "backward": 36, "compat": 36, "warn": 36, "upgrad": 36, "prometheu": 37, "grafana": 37, "zoo": [39, 50], "mp": 40, "benchmark": [40, 42], "g4": 40, "instanc": 40, "p3": 40, "summari": 40, "optim": [41, 42], "checklist": 41, "profil": 42, "more": 42, "envelop": 43, "overview": 45, "technic": 45, "detail": 45, "interfac": 45, "argument": 45, "cf": 47, "com": 47, "ryanfox": 47, "sphinx": 47, "tabl": 47, "36": 47, "binari": [48, 49], "below": 48, "purpos": 48, "subsystem": 49, "linux": 49, "wsl": 49, "ubuntu": 49, "18": 49, "4": 49, "case": 50, "eager": 50, "mode": 50, "script": 50, "readymad": 50, "secur": 50, "third": 50, "parti": 50, "ab": 50, "workflow": [51, 52, 53], "dag": 53, "sequenti": 53, "parallel": 53, "doc": 53, "known": 53}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["FAQs", "README", "Troubleshooting", "api/dev_api", "api/modules", "api/run_circleci_tests", "api/setup", "api/test", "api/torchserve_sanity", "api/ts", "api/ts.metrics", "api/ts.model_service", "api/ts.protocol", "api/ts.torch_handler", "api/ts.torch_handler.request_envelope", "api/ts.torch_handler.unit_tests", "api/ts.torch_handler.unit_tests.models", "api/ts.torch_handler.unit_tests.test_utils", "api/ts.utils", "api/ts_scripts", "apis", "batch_inference_with_ts", "code_coverage", "configuration", "contents", "custom_service", "default_handlers", "getting_started", "github_actions", "grpc_api", "index", "inference_api", "internals", "large_model_inference", "logging", "management_api", "metrics", "metrics_api", "model_loading", "model_zoo", "nvidia_mps", "performance_checklist", "performance_guide", "request_envelopes", "rest_api", "server", "snapshot", "sphinx/requirements", "torchserve_on_win_native", "torchserve_on_wsl", "use_cases", "workflow_inference_api", "workflow_management_api", "workflows"], "filenames": ["FAQs.md", "README.md", "Troubleshooting.md", "api/dev_api.rst", "api/modules.rst", "api/run_circleci_tests.rst", "api/setup.rst", "api/test.rst", "api/torchserve_sanity.rst", "api/ts.rst", "api/ts.metrics.rst", "api/ts.model_service.rst", "api/ts.protocol.rst", "api/ts.torch_handler.rst", "api/ts.torch_handler.request_envelope.rst", "api/ts.torch_handler.unit_tests.rst", "api/ts.torch_handler.unit_tests.models.rst", "api/ts.torch_handler.unit_tests.test_utils.rst", "api/ts.utils.rst", "api/ts_scripts.rst", "apis.rst", "batch_inference_with_ts.md", "code_coverage.md", "configuration.md", "contents.rst", "custom_service.md", "default_handlers.md", "getting_started.md", "github_actions.md", "grpc_api.md", "index.rst", "inference_api.md", "internals.md", "large_model_inference.md", "logging.md", "management_api.md", "metrics.md", "metrics_api.md", "model_loading.md", "model_zoo.md", "nvidia_mps.md", "performance_checklist.md", "performance_guide.md", "request_envelopes.md", "rest_api.md", "server.md", "snapshot.md", "sphinx/requirements.txt", "torchserve_on_win_native.md", "torchserve_on_wsl.md", "use_cases.md", "workflow_inference_api.md", "workflow_management_api.md", "workflows.md"], "titles": ["FAQ\u2019S", "TorchServe", "Troubleshooting Guide", "<no title>", "serve", "run_circleci_tests module", "setup module", "test package", "torchserve_sanity module", "ts package", "ts.metrics package", "ts.model_service package", "ts.protocol package", "ts.torch_handler package", "ts.torch_handler.request_envelope package", "ts.torch_handler.unit_tests package", "ts.torch_handler.unit_tests.models package", "ts.torch_handler.unit_tests.test_utils package", "ts.utils package", "ts_scripts package", "<no title>", "Batch Inference with TorchServe", "Code Coverage", "Advanced configuration", "<no title>", "Custom Service", "TorchServe default inference handlers", "Getting started", "GitHub Actions for TorchServe", "TorchServe gRPC API", "TorchServe", "Inference API", "TorchServe internals", "Serving large models with Torchserve", "Logging in Torchserve", "Management API", "TorchServe Metrics", "Metrics API", "How to load a model in TorchServe", "Model Zoo", "Running TorchServe with NVIDIA MPS", "Model Inference Optimization Checklist", "Performance Guide", "Request Envelopes", "TorchServe REST API", "Running TorchServe", "TorchServe model snapshot", "cf. https://github.com/ryanfox/sphinx-markdown-tables/issues/36", "TorchServe on Windows", "TorchServe on Windows Subsystem for Linux (WSL)", "Torchserve Use Cases", "Workflow Inference API", "Management API", "TorchServe Workflows"], "terms": {"content": [0, 2, 3, 4, 23, 31, 35, 42], "thi": [0, 1, 2, 9, 10, 13, 14, 16, 18, 22, 23, 26, 27, 28, 29, 31, 32, 33, 34, 35, 37, 39, 40, 41, 42, 46, 50, 52, 53], "document": [0, 27, 29, 32, 33, 34, 35, 40, 50], "relev": [0, 2, 9, 32, 53], "readm": [0, 9, 21], "compliant": [0, 44], "openapi": [0, 31, 35, 44], "3": [0, 21, 23, 27, 28, 29, 31, 33, 34, 35, 36, 37, 39, 40, 44, 47, 53], "0": [0, 9, 10, 13, 14, 17, 21, 23, 25, 26, 27, 31, 33, 34, 35, 36, 37, 39, 42, 44, 47, 48, 50], "your": [0, 1, 2, 21, 23, 25, 26, 27, 30, 33, 34, 35, 38, 40, 41, 42, 43, 45, 48, 50], "case": [0, 2, 10, 15, 23, 24, 25, 27, 29, 30, 31, 33, 36, 40, 41, 42, 45, 53], "you": [0, 1, 2, 21, 23, 25, 26, 27, 31, 33, 34, 35, 36, 37, 38, 41, 42, 43, 44, 45, 46, 48, 50, 52], "abl": [0, 25, 33, 36, 42, 48, 50], "mechan": [0, 23], "standalon": [0, 23], "refer": [0, 2, 21, 25, 27, 29, 32, 33, 36, 37, 39, 40, 45, 48, 50, 53], "cloud": [0, 23, 32, 43], "cloudform": 0, "main": [0, 21, 27, 29, 32, 42, 48], "purpos": [0, 34, 36], "serv": [0, 1, 3, 13, 14, 20, 21, 22, 23, 24, 25, 29, 30, 31, 34, 36, 37, 39, 40, 42, 43, 48, 49, 51, 53], "via": [0, 1, 23, 25, 31, 36, 42, 45], "http": [0, 9, 14, 21, 23, 27, 28, 29, 31, 32, 34, 35, 36, 37, 45, 48, 49, 50, 51, 52], "netti": [0, 23], "engin": [0, 25, 35, 41, 50], "issu": [0, 41], "581": 0, "569": 0, "variou": [0, 13, 30, 38, 42], "provid": [0, 2, 9, 13, 21, 23, 25, 26, 29, 32, 33, 35, 36, 40, 41, 42, 43, 45, 52, 53], "out": [0, 14, 21, 23, 25, 26, 27, 35, 41, 45], "box": [0, 1, 13, 21, 26], "checkout": [0, 28], "zoo": [0, 1, 24, 29], "list": [0, 1, 9, 10, 11, 13, 14, 18, 20, 23, 24, 25, 26, 29, 31, 32, 33, 36, 39, 43, 53], "all": [0, 9, 10, 11, 13, 14, 16, 17, 18, 21, 22, 23, 25, 26, 27, 30, 32, 33, 34, 35, 36, 41, 42, 45, 48, 50, 52, 53], "also": [0, 2, 10, 13, 14, 23, 25, 26, 27, 29, 32, 33, 34, 36, 42, 45, 48, 50], "check": [0, 1, 2, 20, 24, 26, 27, 29, 33, 36, 41, 50], "exampl": [0, 2, 15, 20, 21, 24, 25, 26, 27, 28, 30, 32, 33, 34, 35, 39, 40, 41, 42, 43, 45, 50, 53], "folder": [0, 10, 23, 27, 32, 35, 45, 52], "No": [0, 23, 26, 38], "As": [0, 25, 26, 27, 34, 40, 41], "now": [0, 21, 23, 27, 45, 50], "onli": [0, 10, 16, 23, 25, 26, 27, 29, 31, 33, 35, 36, 37, 40, 41, 42, 43, 44, 45, 48, 51, 52, 53], "deriv": [0, 14, 25, 26], "howev": [0, 23, 37, 48, 50], "pytorch": [0, 1, 2, 13, 21, 23, 25, 27, 29, 30, 31, 34, 35, 38, 39, 41, 45, 47, 48, 49, 51, 53], "It": [0, 11, 13, 14, 22, 25, 26, 31, 33, 35, 36, 37, 40, 41, 42, 45, 50, 52, 53], "ha": [0, 23, 25, 32, 33, 35, 36, 38, 40, 42, 43, 45, 48, 50, 52], "new": [0, 25, 26, 28, 29, 31, 35, 36, 43, 50], "featur": [0, 21, 31, 33, 36, 46], "snapshot": [0, 1, 24, 45, 48, 53], "version": [0, 2, 3, 4, 20, 21, 22, 23, 24, 25, 27, 28, 29, 31, 32, 33, 34, 36, 41, 45, 46, 50, 53], "By": [0, 2, 23, 25, 29, 33, 37, 44, 48], "utf": [0, 23, 31, 33], "8": [0, 21, 23, 27, 28, 31, 33, 34, 36, 37, 39, 40, 52], "encod": [0, 9, 10, 31, 32, 42], "string": [0, 1, 9, 11, 13, 23, 26, 36, 53], "If": [0, 2, 21, 23, 25, 27, 31, 32, 33, 34, 35, 36, 41, 42, 45, 46, 48, 50, 52], "convert": [0, 13, 14, 23, 42], "byte": [0, 2, 23, 51, 53], "need": [0, 13, 16, 21, 22, 25, 32, 33, 35, 36, 40, 42, 43, 48, 53], "codec": [0, 12], "specifi": [0, 2, 13, 14, 23, 25, 27, 28, 33, 35, 38, 45, 46, 48, 52], "github": [0, 21, 27, 29, 32, 42, 48, 49, 50], "com": [0, 21, 23, 27, 29, 31, 32, 36, 37, 48, 49, 50, 51], "blob": [0, 32, 50], "master": [0, 21, 27, 28, 31, 32, 49, 50, 51], "nmt": [0, 50], "_": [0, 2, 13, 21, 23, 25, 26, 29, 31, 32, 33, 34, 35, 36, 37, 41, 43, 45, 47, 48, 50, 52], "transform": [0, 21, 25, 33, 36, 40, 41, 42], "py": [0, 2, 13, 17, 22, 25, 26, 27, 28, 29, 32, 33, 40, 42, 43, 48, 49, 50], "guid": [0, 24, 27, 30, 49], "heavili": 0, "influenc": [0, 41], "launcher": [0, 42], "core": [0, 41, 42], "pin": [0, 41, 42], "we": [0, 2, 21, 26, 27, 33, 34, 36, 40, 42, 45, 50], "recommend": [0, 26, 29, 31, 33], "cpu_launcher_en": [0, 42], "true": [0, 2, 9, 10, 21, 23, 25, 31, 33, 34, 35, 42, 50], "cpu_launcher_arg": [0, 42], "use_logical_cor": [0, 42], "more": [0, 2, 15, 21, 23, 25, 26, 27, 31, 33, 35, 37, 39, 40, 41, 45, 50, 53], "background": [0, 23], "found": [0, 37], "blog": [0, 41, 42], "post": [0, 2, 11, 13, 21, 23, 25, 31, 32, 33, 34, 35, 41, 42, 48, 50, 51, 52], "configur": [0, 1, 2, 9, 24, 29, 31, 34, 35, 36, 37, 42, 44, 45, 46, 50, 51, 52], "ye": [0, 38], "environ": [0, 2, 22, 28, 33, 35, 36, 40, 42, 46, 48, 49, 50], "variabl": [0, 2, 33, 35, 36, 42, 46, 48], "detail": [0, 2, 23, 25, 26, 29, 31, 33, 35, 36, 37, 39, 40, 48, 50, 52, 53], "requir": [0, 21, 23, 25, 27, 32, 33, 34, 36, 38, 41, 42, 43, 45, 50, 53], "txt": [0, 23, 32, 33, 39, 40, 50], "while": [0, 2, 16, 23, 32, 40, 41, 42, 45, 46, 50], "r": [0, 2, 33], "flag": [0, 2, 21, 23, 36, 46, 50], "extra": [0, 2, 23, 25, 27, 33, 42, 48, 50], "helm": [0, 1], "chart": [0, 1], "node": [0, 22, 23, 33, 48, 50, 51, 53], "ec2": [0, 35], "cluster": [0, 32], "There": [0, 2, 23, 34, 35, 36, 38, 42, 45], "format": [0, 1, 9, 13, 14, 23, 25, 31, 33, 35, 37, 41, 42, 43, 45], "templat": 0, "here": [0, 10, 14, 21, 23, 25, 26, 27, 32, 33, 35, 36, 37, 42, 45, 48, 50], "type": [0, 2, 9, 10, 11, 13, 14, 23, 25, 29, 31, 33, 35, 37, 38, 39, 40, 41, 45, 53], "behind": [0, 42, 43], "elast": 0, "loadbalanc": 0, "preserv": [0, 46], "runtim": [0, 9, 13, 21, 25, 29, 35, 42, 46, 52], "across": [0, 13, 43, 46], "session": [0, 46], "instanc": [0, 16, 25, 27, 33, 35, 46], "experienc": [0, 46], "either": [0, 2, 23, 25, 32, 33, 36, 42, 46], "plan": [0, 25, 27, 46, 48, 50], "unplan": [0, 46], "servic": [0, 1, 2, 3, 4, 11, 13, 18, 21, 23, 29, 32, 33, 34, 35, 36, 40, 42, 44, 46], "stop": [0, 31, 32, 45, 46], "its": [0, 2, 25, 33, 40, 42, 46], "upon": [0, 46], "restart": [0, 36, 46, 48], "These": [0, 2, 21, 23, 32, 34, 35, 41, 42, 43, 45, 50, 53], "save": [0, 29, 32, 34, 46, 50], "util": [0, 3, 4, 9, 13, 21, 24, 25, 32, 33, 36, 40, 41, 42, 45, 48, 53], "script": [0, 2, 10, 25, 28, 33, 35, 39, 42, 48], "hardwar": [0, 33, 40, 41, 42], "gpu": [0, 1, 9, 10, 11, 21, 22, 26, 27, 28, 32, 33, 35, 36, 40, 41, 42, 49], "compat": [0, 11, 13, 32, 41, 42], "A": [0, 1, 9, 11, 13, 14, 23, 25, 31, 32, 35, 42, 43, 45, 46, 48, 50, 53], "could": [0, 29, 31, 32, 33, 34, 40, 41], "cuda": [0, 22, 23, 25, 27, 28, 33, 40, 42, 49], "well": [0, 23, 34, 36, 40, 41, 43, 50], "build_imag": [0, 21], "sh": [0, 21, 35], "appropri": [0, 40], "option": [0, 9, 25, 27, 31, 33, 35, 36, 38, 41, 45, 50, 52], "help": [0, 33, 37, 40, 41, 42, 45, 48, 50], "To": [0, 2, 21, 23, 25, 26, 27, 29, 31, 33, 34, 35, 36, 37, 39, 40, 42, 43, 44, 45, 51, 52], "command": [0, 9, 21, 27, 28, 29, 31, 33, 35, 37, 40, 50], "b": [0, 1, 9, 36, 43, 45, 50], "branch_nam": 0, "commit_id": 0, "tag": [0, 13], "t": [0, 13, 21, 23, 26, 27, 31, 32, 34, 35, 50, 51], "tagnam": 0, "latest": [0, 21, 28, 41, 50], "The": [0, 2, 9, 13, 14, 21, 22, 23, 25, 27, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 48, 50, 51, 52, 53], "instal": [0, 1, 21, 23, 28, 29, 30, 32, 33, 37, 50], "where": [0, 13, 23, 28, 29, 31, 32, 33, 36, 40, 41, 42, 45, 46], "pypi": [0, 32, 48], "distribut": [0, 25, 28, 33], "look": [0, 21, 33, 35, 36, 41, 42, 43, 45], "accord": [0, 40], "doc": [0, 21, 27, 31, 32, 33, 36, 37, 50, 51], "overrid": [0, 10, 13, 23, 25, 35, 36, 45, 53], "store": [0, 9, 21, 23, 29, 32, 36, 45, 46, 48, 50], "load": [0, 1, 9, 11, 12, 13, 15, 17, 18, 25, 29, 31, 32, 34, 35, 36, 40, 42, 45, 50], "mandatori": [0, 45], "argument": [0, 9, 14, 23, 25, 36], "dure": [0, 9, 23, 25, 27, 33, 35], "start": [0, 1, 9, 10, 21, 23, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 40, 41, 44, 45, 46, 48, 50], "defin": [0, 9, 10, 11, 13, 16, 18, 23, 25, 27, 28, 31, 33, 34, 36, 42, 45, 53], "overridden": [0, 16, 23], "line": [0, 9, 36, 48], "manag": [0, 1, 20, 23, 24, 25, 27, 29, 32, 43, 44, 46, 48, 50, 53], "decid": 0, "which": [0, 2, 10, 11, 21, 23, 25, 27, 28, 31, 32, 33, 36, 40, 42, 43, 45, 46, 48, 50, 53], "relationship": 0, "w": [0, 1, 13, 26, 34, 37], "ie": [0, 25, 33], "tool": [0, 1, 25, 27, 29, 30, 32, 33, 40, 41, 50], "postman": [0, 50], "insomnia": 0, "even": [0, 41], "find": [0, 18, 25, 32, 33, 35, 40, 41, 42], "plugin": [0, 32, 50], "sdk": [0, 32], "data": [0, 1, 9, 11, 13, 14, 25, 27, 29, 31, 32, 33, 35, 41, 42, 43, 50, 53], "valu": [0, 2, 9, 10, 18, 23, 35, 36, 40, 42, 45, 48, 52, 53], "pair": [0, 10, 36, 45], "object": [0, 1, 9, 10, 11, 13, 14, 17, 23, 25, 26, 27, 32, 35, 39, 45, 50, 51, 53], "would": [0, 13, 23, 27, 28, 33, 36, 41, 45, 53], "modifi": [0, 10, 33, 42, 45], "postprocess": [0, 11, 13, 25, 33, 34, 35, 52, 53], "extend": [0, 11, 32], "just": [0, 27, 32, 41], "method": [0, 9, 13, 14, 18, 21, 25, 36, 38, 41, 42], "code": [0, 9, 11, 12, 21, 23, 24, 31, 32, 33, 34, 35, 36, 42, 43, 44, 45, 48], "zero": 0, "builtin": 0, "huggingfac": [0, 1, 25, 33, 36, 40], "zip": [0, 2, 23, 25, 45, 48], "consist": [0, 21, 22, 40, 46, 53], "artifact": [0, 1, 13, 25, 32, 35, 38], "extens": [0, 22, 41, 45, 50], "cmd": [0, 2, 23], "torch": [0, 2, 13, 25, 33, 35, 37, 42, 43, 47, 48, 49, 50, 53], "step": [0, 2, 25, 26, 27, 35, 41, 48, 50], "given": [0, 2, 9, 18, 35, 36, 37, 48, 50, 53], "current": [0, 9, 22, 23, 27, 29, 34, 35, 36, 43, 45, 46, 52], "allow": [0, 2, 25, 27, 29, 31, 33, 35, 40, 42, 43, 50, 52], "suppli": [0, 23, 25, 46, 52, 53], "one": [0, 16, 23, 26, 28, 29, 31, 33, 35, 36, 40, 43, 45], "number": [0, 1, 2, 13, 21, 23, 25, 26, 27, 29, 31, 32, 33, 35, 36, 40, 42, 48, 50, 52, 53], "model_dir": [0, 9, 11, 15, 17, 25, 33], "locat": [0, 2, 21, 23, 25, 34, 35, 36, 45, 52], "access": [0, 2, 31, 33, 35, 36, 37, 40, 42, 44, 50, 51, 52], "through": [0, 9, 13, 23, 36, 41, 42, 53], "context": [0, 3, 4, 11, 12, 13, 14, 15, 17, 23, 24, 25, 29, 31, 32, 33, 35, 36, 40, 42, 53], "entri": [0, 9, 13, 23, 29, 33, 35, 45, 50], "point": [0, 9, 10, 13, 27, 29, 33, 35, 37, 40, 45], "snippet": [0, 25], "system_properti": [0, 9, 25], "get": [0, 1, 11, 13, 14, 18, 21, 22, 23, 25, 29, 31, 32, 33, 35, 40, 42, 45, 49, 51, 52], "cli": [0, 2, 32, 41], "633": 0, "both": [0, 1, 21, 23, 27, 29, 32, 35, 36, 40, 42, 43, 44, 50], "v2": [0, 14, 35], "signatur": [0, 11, 25, 36, 43, 45], "note": [0, 22, 23, 25, 26, 27, 29, 35, 36, 37, 40, 41, 42, 46, 48, 50], "For": [0, 21, 22, 23, 25, 26, 29, 31, 33, 34, 35, 36, 37, 40, 41, 42, 43, 45, 46, 50], "replac": [0, 14, 36, 42], "charact": [0, 13], "e": [0, 23, 31, 33, 34, 35, 36, 41, 45, 47, 48, 50, 53], "26": 0, "669": 0, "local": [0, 21, 22, 23, 32, 35, 43, 45, 48, 49, 50, 52, 53], "publicli": [0, 2], "uri": [0, 35, 52, 53], "work": [0, 21, 23, 25, 30, 35, 36, 38, 40, 41, 42, 48, 50], "veri": [0, 33], "same": [0, 23, 25, 29, 33, 35, 36, 40, 41, 43, 50], "made": [0, 13, 34, 35, 36, 45, 50], "public": [0, 50, 53], "consol": 0, "instead": [0, 16, 36], "few": [0, 42, 48, 53], "reason": [0, 23, 42], "overhead": 0, "someth": 0, "dramat": [0, 42], "larger": [0, 31, 41, 42], "launch": [0, 21, 53], "control": [0, 27, 32, 40, 45], "dep": [0, 2], "per": [0, 2, 13, 33, 36, 40, 42, 43, 50], "intend": 0, "develop": [0, 1, 27, 50], "should": [0, 13, 16, 21, 23, 25, 27, 28, 36, 40, 41, 42, 43, 48, 50], "pre": [0, 1, 11, 13, 25, 32, 33, 35, 39, 41, 42, 50], "compress": 0, "decompress": 0, "becaus": [0, 21, 35, 36], "histor": 0, "came": 0, "involv": [0, 40, 41], "unload": 0, "ton": 0, "bucket": [0, 35, 41], "But": [0, 40], "user": [0, 13, 18, 23, 25, 29, 31, 32, 33, 35, 36, 42, 45, 46, 48, 50, 53], "smaller": [0, 40], "choos": [0, 35], "good": [0, 46], "bet": 0, "perform": [1, 16, 23, 24, 27, 30, 33, 34, 40, 41], "flexibl": [1, 11, 30, 33], "easi": [1, 27, 30, 45], "us": [1, 9, 10, 11, 13, 14, 16, 18, 23, 24, 25, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 52, 53], "eager": [1, 13, 18, 25, 38, 39], "mode": [1, 13, 25, 36, 37, 39, 40], "torchscript": [1, 13, 38, 39, 42, 50], "model": [1, 3, 9, 10, 11, 12, 13, 14, 15, 17, 18, 20, 24, 26, 29, 30, 31, 32, 34, 36, 37, 40, 42, 43, 47, 48, 49, 51, 52], "quick": [1, 30, 42], "server": [1, 9, 10, 20, 21, 22, 23, 24, 25, 27, 31, 32, 34, 42, 43, 45, 46, 48, 49, 50, 51], "usag": [1, 10, 25, 32, 36, 42, 45, 50], "tutori": [1, 33], "archiv": [1, 23, 26, 32, 33, 35, 36, 38, 39, 42, 45, 47, 48, 49, 50, 52, 53], "show": [1, 11, 21, 27, 35, 38, 40, 41, 45], "how": [1, 21, 23, 25, 27, 30, 32, 34, 36, 40, 41, 42, 45, 46], "packag": [1, 3, 4, 21, 24, 25, 27, 32, 33, 39, 42, 48, 50, 53], "file": [1, 9, 13, 18, 21, 22, 25, 26, 27, 28, 29, 31, 34, 35, 37, 38, 42, 43, 46, 48, 50, 52], "procedur": [1, 50], "explain": [1, 25, 30, 31, 33, 34], "rest": [1, 20, 23, 24, 31, 33, 45, 50, 51, 53], "api": [1, 9, 11, 25, 32, 33, 38, 45, 46, 48, 50, 53], "specif": [1, 13, 14, 29, 31, 32, 33, 35, 40, 42, 44, 45, 50, 52], "endpoint": [1, 9, 13, 27, 31, 32, 36, 37, 45], "grpc": [1, 20, 24, 31, 35], "support": [1, 2, 10, 11, 13, 22, 23, 26, 27, 29, 31, 32, 33, 35, 36, 42, 43, 45, 46, 49, 51, 52, 53], "infer": [1, 9, 11, 12, 13, 20, 23, 24, 25, 27, 29, 30, 32, 34, 35, 36, 39, 42, 44, 45, 48, 50], "call": [1, 14, 16, 23, 25, 27, 29, 31, 32, 33, 34, 35, 36, 38, 42, 43, 44, 45, 46, 50, 51, 52], "health": [1, 11, 20, 24, 29, 33], "deploi": [1, 2, 30, 32, 40, 41, 43], "scale": [1, 2, 20, 24, 25, 27, 30, 33, 50, 52, 53], "log": [1, 2, 23, 24, 32, 37, 41, 46], "metric": [1, 3, 4, 9, 20, 24, 30, 32, 34, 35, 44, 48], "prometheu": [1, 20, 24, 36], "grafana": [1, 20, 24], "dashboard": [1, 37], "captum": [1, 13, 14, 31, 39, 47], "explan": [1, 13, 14, 20, 24, 30, 43], "built": [1, 21, 23, 42, 45], "text": [1, 2, 13, 23, 25, 26, 39], "imag": [1, 9, 13, 21, 23, 25, 26, 27, 31, 33, 36, 39, 42, 45, 50, 51], "batch": [1, 9, 10, 13, 14, 15, 23, 24, 25, 26, 30, 32, 33, 35, 36, 40, 41, 42, 53], "creat": [1, 2, 9, 10, 12, 21, 23, 27, 32, 33, 35, 37, 38, 40, 42, 50], "workflow": [1, 20, 23, 24, 27, 28, 29, 32, 33, 36, 39, 44, 45, 46, 47], "compos": [1, 13], "python": [1, 9, 10, 14, 20, 21, 24, 28, 33, 35, 43, 48, 49, 50, 53], "function": [1, 2, 10, 11, 13, 14, 16, 18, 21, 25, 31, 32, 35, 42, 53], "sequenti": 1, "parallel": [1, 33, 40], "pipelin": [1, 33, 41], "classifi": [1, 25, 26, 36, 39, 50], "take": [1, 9, 13, 16, 23, 25, 27, 32, 33, 35, 40, 42, 45], "an": [1, 2, 10, 11, 13, 15, 18, 21, 23, 27, 29, 31, 32, 33, 35, 36, 38, 40, 41, 42, 45, 48, 50], "return": [1, 9, 10, 11, 12, 13, 14, 15, 18, 27, 29, 31, 33, 35, 36, 37, 40, 45, 51, 52], "name": [1, 2, 9, 10, 13, 14, 18, 21, 23, 25, 26, 27, 28, 31, 32, 33, 34, 35, 36, 37, 42, 43, 45, 48, 50, 52, 53], "input": [1, 11, 13, 14, 16, 25, 26, 31, 33, 35, 39, 40, 41, 42, 43, 53], "classif": [1, 13, 25, 26, 39, 40], "base": [1, 3, 9, 10, 11, 13, 16, 17, 18, 23, 24, 25, 33, 37, 41, 42, 45, 50, 53], "vocabulari": [1, 13], "detector": [1, 25, 26, 50], "detect": [1, 13, 26, 39, 43, 45, 48, 50], "class": [1, 9, 10, 11, 13, 14, 15, 16, 17, 18, 26, 31, 33, 36, 43], "bound": [1, 13, 26], "respect": [1, 13, 26], "segment": [1, 13, 25, 26, 39, 50], "output": [1, 11, 13, 14, 23, 25, 26, 27, 31, 34, 35, 42, 53], "shape": [1, 13, 14, 26], "cl": [1, 26], "h": [1, 13, 26, 31, 45], "height": [1, 13, 26], "width": [1, 13, 26], "languag": [1, 43], "sentenc": 1, "can": [1, 15, 21, 23, 25, 26, 27, 29, 31, 32, 33, 35, 36, 37, 40, 41, 42, 44, 45, 46, 48, 50, 52, 53], "sequenc": [1, 29, 31, 33, 39, 40, 41], "token": [1, 13, 29, 31, 33, 39, 41], "q": 1, "answer": [1, 48], "multi": [1, 32, 40, 42, 43], "modal": 1, "framework": [1, 11, 21, 32, 42, 43], "build": [1, 21, 32, 42, 48, 50], "combin": [1, 36], "audio": [1, 42], "video": [1, 42], "dual": 1, "translat": [1, 14, 43], "train": [1, 16, 26, 27, 39, 41, 45, 50], "readi": [1, 21, 25, 27, 29, 31, 33, 35, 39, 43, 50], "mani": [1, 32, 40, 42, 43, 45, 46], "intern": [1, 11, 25, 33, 36], "integr": [1, 33, 42], "usecas": [1, 26], "describ": [1, 13, 20, 23, 24, 36, 41, 53], "test": [1, 3, 4, 15, 16, 21, 22, 24, 26, 27, 28, 32, 35, 36, 43, 48], "regress": [1, 28, 33], "befor": [1, 21, 23, 25, 26, 31, 35, 40, 41], "ship": 1, "them": [1, 10, 16, 32, 35, 36, 40, 41, 45, 50], "product": [1, 30, 32, 42, 45, 50], "custom": [1, 9, 10, 13, 21, 24, 32, 33, 35, 37, 38, 42], "encrypt": [1, 20, 24], "s3": [1, 23, 35], "side": [1, 20, 24, 31, 35, 44], "km": [1, 35, 36], "serial": [1, 25, 27, 38, 40, 42, 50], "aw": [1, 23, 30, 35, 40], "dynamo": 1, "db": 1, "benchmark": [1, 27, 32], "profil": [1, 48], "jmeter": 1, "apach": 1, "bench": 1, "itself": [1, 40], "kubernet": [1, 31, 32], "demonstr": [1, 25, 50], "deploy": [1, 33, 40, 43, 50], "azur": 1, "googl": [1, 29, 32, 33, 43], "mlflow": 1, "kubeflow": 1, "vertex": [1, 30], "ai": [1, 30, 32, 33, 43], "nvidia": [1, 24, 41, 42, 48, 49, 50], "mp": [1, 24, 42], "optim": [1, 21, 30, 33], "worker": [1, 9, 10, 20, 21, 24, 25, 27, 29, 31, 32, 33, 34, 36, 40, 42, 46, 50, 53], "singl": [1, 10, 11, 40, 45], "section": [2, 21, 27, 33, 36, 40, 53], "common": [2, 28, 42], "face": [2, 21], "correspond": [2, 27, 36, 43, 50], "usual": [2, 33, 48], "some": [2, 9, 25, 26, 27, 28, 34, 35, 36, 40, 41, 42, 48, 50], "other": [2, 25, 32, 33, 35, 36, 40, 41, 50, 52, 53], "applic": [2, 23, 25, 29, 31, 35, 36, 41, 42, 50], "verifi": [2, 21, 33], "ss": 2, "ntl": 2, "grep": 2, "two": [2, 21, 23, 25, 34, 35, 36, 38, 40, 44, 50], "wai": [2, 23, 25, 33, 34, 38, 45, 46], "kill": 2, "run": [2, 9, 11, 13, 16, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 41, 42, 46, 48, 50], "differ": [2, 10, 11, 23, 25, 29, 32, 33, 35, 36, 40, 41, 42, 43, 45, 50], "than": [2, 23, 31, 36, 41], "md": [2, 22, 50, 53], "542": 2, "occur": 2, "17": [2, 28, 48, 49], "older": 2, "default": [2, 10, 13, 14, 20, 23, 24, 29, 31, 32, 33, 34, 37, 40, 42, 44, 45, 46, 50, 51, 52, 53], "max": [2, 21, 23, 33, 34, 42, 53], "size": [2, 9, 10, 13, 21, 23, 25, 26, 32, 33, 34, 35, 39, 40, 41, 42, 53], "respons": [2, 12, 13, 14, 23, 25, 29, 31, 32, 35, 36, 50, 51], "roughli": 2, "6": [2, 26, 28, 33, 36, 47], "5": [2, 13, 14, 26, 30, 33, 34, 35, 36, 37, 47], "mb": [2, 10, 34, 36, 39], "henc": [2, 27], "ani": [2, 25, 27, 29, 31, 32, 33, 34, 35, 36, 50], "greater": [2, 36], "5mb": 2, "cannot": 2, "upload": [2, 28], "updat": [2, 10, 21, 26, 27, 30, 33, 35, 41, 50, 53], "max_request_s": [2, 23], "max_response_s": [2, 23], "properti": [2, 9, 25, 32, 36, 37, 42, 43, 45, 48, 50], "cat": [2, 21, 31], "model_stor": [2, 21, 23, 27, 35, 36, 40, 45, 50], "ts": [2, 3, 4, 21, 23, 24, 25, 26, 29, 31, 32, 33, 34, 35, 36, 43, 45, 46, 50], "path": [2, 9, 21, 22, 23, 25, 27, 34, 35, 36, 38, 42, 45, 48, 49, 50, 52], "set": [2, 9, 14, 20, 21, 23, 24, 29, 31, 33, 36, 37, 40, 42, 43, 45, 46, 51, 52], "335": 2, "enabl": [2, 21, 25, 33, 36, 37, 40, 42, 44, 50], "nc": [2, 27, 36, 45, 50], "383": 2, "512": [2, 33, 41], "last": [2, 29, 31, 33, 46], "restor": [2, 46], "state": [2, 31, 32, 45, 46], "thrown": 2, "inconsist": 2, "compar": [2, 40, 42], "remov": [2, 13, 48], "log_loc": [2, 34, 46], "system": [2, 10, 11, 23, 25, 29, 32, 35, 36, 42, 43, 46, 48, 50, 52], "export": [2, 9, 23, 25, 27, 35, 42, 49], "desir": [2, 25, 36], "extract": 2, "654": 2, "give": [2, 31], "clear": 2, "messag": [2, 9, 11, 12, 18, 31, 34, 45], "try": [2, 13, 35, 41, 42, 45], "conflict": 2, "exist": [2, 10, 23, 26, 29, 31, 32, 33, 36], "500": [2, 18, 31, 36], "wa": [2, 21, 31, 32, 34, 36, 41], "download": [2, 27, 33, 35, 37, 39, 48, 52], "whether": [2, 35], "url": [2, 9, 21, 23, 35, 40, 45, 50, 52, 53], "In": [2, 21, 23, 25, 27, 33, 34, 36, 40, 41, 42, 45, 50], "spawn": [2, 42], "up": [2, 11, 21, 23, 25, 27, 29, 32, 33, 35, 40, 41, 42, 45, 50, 52], "increas": [2, 25, 35, 40, 41, 42, 50], "curl": [2, 20, 21, 24, 27, 28, 35, 37, 48, 50, 52], "x": [2, 21, 23, 28, 31, 35, 50, 52], "localhost": [2, 21, 23, 31, 35, 37, 44, 50, 51, 52], "model_nam": [2, 9, 10, 11, 14, 15, 17, 25, 31, 33, 35, 36, 37, 45, 50, 53], "like": [2, 10, 13, 14, 23, 27, 30, 31, 32, 33, 35, 36, 41, 42, 45], "egg": [2, 47], "json": [2, 3, 9, 10, 13, 18, 22, 23, 24, 27, 31, 32, 33, 35, 36, 40, 42, 43, 50, 51, 52, 53], "etc": [2, 25, 32, 41, 50], "mai": [2, 23, 25, 48], "write": [2, 23, 26, 43], "handler": [2, 9, 13, 14, 15, 23, 24, 29, 31, 32, 33, 35, 38, 42, 43, 50], "566": 2, "waveglow": [2, 25], "speech": [2, 25], "synthes": [2, 25], "creation": [2, 35], "mostli": [2, 42], "initi": [2, 11, 13, 17, 23, 25, 33, 35, 36, 38, 42, 50], "due": [2, 40], "erron": 2, "s": [2, 9, 10, 11, 18, 23, 24, 25, 26, 27, 30, 31, 33, 35, 37, 40, 41, 42, 45, 46, 50, 52, 53], "observ": 2, "miss": [2, 13, 25], "modul": [2, 3, 4, 24, 33, 48], "667": 2, "537": 2, "subpackag": [3, 4, 24], "submodul": [3, 4, 24], "dimens": [3, 4, 9, 24, 42], "metric_collector": [3, 4, 9, 24], "metric_encod": [3, 4, 9, 24], "metrics_stor": [3, 4, 9, 24], "process_memory_metr": [3, 4, 9, 24], "system_metr": [3, 4, 9, 24], "unit": [3, 4, 9, 15, 24, 26, 35, 36, 37], "model_servic": [3, 4, 9, 24], "protocol": [3, 4, 9, 14, 23, 24, 29, 31, 33, 35, 52], "otf_message_handl": [3, 4, 9, 24, 29, 31, 33], "torch_handl": [3, 4, 9, 24, 25, 26, 33], "request_envelop": [3, 9, 13, 24], "bodi": [3, 9, 13, 24, 25, 43], "kserv": [3, 9, 13, 20, 24, 35, 43], "kservev2": [3, 9, 13, 24], "unit_test": [3, 9, 13, 22, 24], "base_model": [3, 13, 15, 24], "test_util": [3, 13, 15, 24, 31, 33], "mock_context": [3, 13, 15, 24], "test_base_handl": [3, 9, 13, 24], "test_envelop": [3, 9, 13, 24], "test_image_classifi": [3, 9, 13, 24], "test_image_segment": [3, 9, 13, 24], "test_mnist_kf": [3, 9, 13, 24], "test_object_detector": [3, 9, 13, 24], "base_handl": [3, 4, 9, 24, 25, 42], "contract": [3, 4, 9, 24], "densenet_handl": [3, 4, 9, 24], "image_classifi": [3, 4, 9, 21, 24, 25, 27, 29, 31], "image_segment": [3, 4, 9, 24], "object_detector": [3, 4, 9, 24], "text_classifi": [3, 4, 9, 21, 24], "text_handl": [3, 4, 9, 24], "vision_handl": [3, 4, 9, 24], "timeit_decor": [3, 4, 9, 24], "arg_pars": [3, 4, 24], "model_load": [3, 4, 24], "model_serv": [3, 4, 24], "model_service_work": [3, 4, 24], "run_circleci_test": [3, 4, 24], "setup": [3, 4, 23, 24, 25, 28, 32, 33, 35, 37, 50], "regression_test": [3, 4, 24], "torchserve_san": [3, 4, 22, 24], "ts_script": [3, 4, 22, 24, 27, 28, 29, 32, 48, 49], "api_util": [3, 4, 24], "backend_util": [3, 4, 24], "frontend_util": [3, 4, 24], "install_depend": [3, 4, 22, 24, 27, 28, 48], "install_from_src": [3, 4, 24, 48, 49], "marsgen": [3, 4, 24], "modelarchiver_util": [3, 4, 24], "print_env_info": [3, 4, 24], "regression_util": [3, 4, 24], "sanity_util": [3, 4, 24], "shell_util": [3, 4, 24], "torchserve_grpc_cli": [3, 4, 24, 27, 29], "tsutil": [3, 4, 24], "validate_model_on_gpu": [3, 4, 24], "workflow_archiver_util": [3, 4, 24], "pars": [9, 13, 18, 52], "torchserv": [9, 13, 14, 17, 18, 20, 22, 24, 25, 31, 35, 37, 39, 43, 47, 51, 52], "argpars": 9, "sourc": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 23, 27, 33], "parser": [9, 32], "todo": 9, "add": [9, 10, 26, 29, 32, 33, 34, 35, 43, 48, 50], "static": [9, 23, 41], "extract_arg": 9, "arg": [9, 13, 16, 25, 32, 33, 35], "none": [9, 10, 11, 12, 13, 17, 18, 23, 25, 31, 33, 35, 36], "model_service_worker_arg": 9, "backend": [9, 18, 21, 25, 27, 29, 30, 31, 33, 34, 35, 37], "socket": [9, 12, 32, 42], "ts_parser": 9, "incom": [9, 21, 25, 32], "request": [9, 11, 13, 14, 21, 23, 24, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37, 39, 40, 45, 50, 53], "manifest": [9, 11, 25, 32, 35, 42, 45, 52], "batch_siz": [9, 21, 35, 42, 53], "mms_version": 9, "limit_max_image_pixel": [9, 23], "model_yaml_config": [9, 23, 33], "inform": [9, 13, 21, 23, 25, 27, 32, 33, 34, 35, 36, 37, 41, 42], "fix": [9, 41, 43], "time": [9, 10, 18, 21, 23, 25, 27, 31, 32, 33, 34, 35, 45, 46, 53], "get_all_request_head": 9, "idx": [9, 10, 17, 36], "int": [9, 10, 13, 36, 53], "dict": [9, 11, 13, 25, 32, 35, 36, 53], "str": [9, 10, 13, 18, 25, 31, 33, 36], "get_request_head": [9, 17, 35], "kei": [9, 10, 14, 23, 35, 36, 43, 50], "get_request_id": 9, "get_response_content_typ": 9, "get_response_head": 9, "get_response_statu": 9, "tupl": 9, "request_processor": 9, "set_all_response_statu": 9, "200": [9, 29, 31, 33, 34, 35, 36], "phrase": 9, "statu": [9, 21, 29, 31, 33, 35, 36, 52], "individu": 9, "param": [9, 10, 13, 18, 25, 27, 50, 53], "set_response_content_typ": 9, "set_response_head": 9, "set_response_statu": 9, "index": [9, 10, 26, 36, 50], "sent": [9, 11, 23, 25, 29, 31, 33], "handl": [9, 11, 13, 14, 21, 23, 26, 29, 31, 32, 33, 35, 42, 45], "requestprocessor": 9, "request_head": 9, "processor": [9, 23], "add_response_properti": 9, "get_request_properti": 9, "get_response_status_cod": 9, "get_response_status_phras": 9, "report_statu": 9, "reason_phras": 9, "loader": [9, 32, 42], "modelload": 9, "abstract": [9, 11, 14, 25], "gpu_id": [9, 17, 25], "envelop": [9, 14, 24, 32], "bool": [9, 16, 18], "from": [9, 11, 12, 13, 14, 21, 22, 23, 26, 29, 30, 31, 32, 33, 34, 35, 37, 38, 40, 41, 42, 43, 44, 45, 46, 50, 51, 52], "paramet": [9, 10, 11, 12, 13, 14, 18, 25, 33, 34, 35, 36, 45, 46, 50, 52, 53], "modelloaderfactori": 9, "get_model_load": 9, "tsmodelload": 9, "1": [9, 14, 21, 23, 25, 26, 27, 31, 34, 35, 36, 37, 39, 40, 42, 45, 50, 52, 53], "metrics_cach": 9, "metricscacheyamlimpl": 9, "load_properti": 9, "file_path": 9, "read": [9, 21, 23, 25], "map": [9, 10, 13, 18, 26, 33, 53], "modelservicework": 9, "mm": [9, 11], "front": [9, 10], "end": [9, 10, 36, 38, 41], "commun": [9, 39], "binari": [9, 32], "torchmodelservicework": 9, "s_type": 9, "s_name": 9, "host_addr": 9, "port_num": 9, "metrics_config": [9, 36], "handle_connect": 9, "cl_socket": 9, "connect": [9, 23, 32, 35, 40], "load_model": [9, 21, 23], "load_model_request": 9, "expect": [9, 14, 21, 23, 26, 31, 40, 42, 43, 50], "modelpath": 9, "modelnam": [9, 21, 23, 35, 36, 37, 52], "cpu": [9, 10, 22, 25, 27, 28, 30, 36, 39, 41, 42], "els": [9, 13, 25, 35, 36], "wrapper": [9, 11], "unwrapp": 9, "batchsiz": [9, 21, 23, 29, 33, 35, 52], "limitmaximagepixel": 9, "limit": [9, 33, 35, 40, 42, 49, 50, 52], "pillow": [9, 41], "max_image_pixel": 9, "run_serv": 9, "process": [9, 10, 11, 13, 21, 23, 25, 27, 30, 32, 33, 35, 38, 40, 41, 42], "listen": [9, 27, 29, 31, 32, 35, 37, 44, 51, 52], "customservic": 9, "definit": [9, 13, 18, 36], "entry_point": 9, "predict": [9, 13, 14, 20, 21, 23, 24, 26, 29, 33, 34, 35, 36, 45, 50, 53], "request_input": 9, "retrieve_data_for_infer": 9, "requestid": 9, "111": [9, 14], "222": 9, "3333": 9, "contenttyp": 9, "val1": 9, "set_cl_socket": 9, "emit_metr": [9, 36], "emit": [9, 36], "dictionari": [9, 10, 13, 14, 18, 25, 35], "metric_nam": [9, 36], "when": [9, 10, 13, 21, 23, 25, 27, 28, 29, 30, 31, 33, 34, 36, 37, 40, 41, 42, 43, 44, 46], "doe": [9, 13, 29, 33, 36, 38, 42, 46], "follow": [9, 13, 14, 21, 23, 25, 26, 27, 28, 29, 31, 33, 34, 35, 36, 40, 42, 46, 48, 50, 51, 52, 53], "c": [9, 23, 27, 30, 32, 34, 40, 44, 45, 48], "expos": [9, 23, 42], "standard": [9, 25, 36, 43], "ping": [9, 11, 21, 29, 31, 33], "descript": [9, 20, 24, 26, 33, 45, 53], "d": [9, 13, 34, 35, 40, 43, 45], "wait": [9, 21, 33, 35, 40, 53], "to_dict": 10, "request_id": [10, 29, 31, 33, 36], "metric_method": 10, "gener": [10, 21, 22, 23, 25, 27, 29, 31, 33, 35, 39, 40, 42, 44, 46], "print": [10, 34, 35], "stdout": [10, 27, 34], "reset": [10, 35], "order": [10, 23, 36, 40, 41, 43, 50], "float": [10, 36], "dump": [10, 34], "metricencod": 10, "skipkei": 10, "fals": [10, 12, 21, 23, 25, 28, 31, 33, 35, 37], "ensure_ascii": 10, "check_circular": 10, "allow_nan": 10, "sort_kei": 10, "indent": 10, "separ": [10, 23, 26, 36, 40], "jsonencod": 10, "obj": 10, "collect": [10, 23, 30, 34, 35, 36, 37, 41, 42, 45], "metricsstor": 10, "deprec": 10, "And": [10, 27, 32, 35, 42], "keep": [10, 32, 35], "add_count": [10, 36], "counter": [10, 37], "increment": [10, 36], "add_error": 10, "error": [10, 13, 23, 34], "add_metr": [10, 36], "add_perc": [10, 36], "percentag": 10, "add_siz": [10, 36], "kb": [10, 36, 39], "gb": [10, 36], "add_tim": [10, 35, 36], "ms": [10, 21, 34, 35, 36, 37, 53], "latenc": [10, 29, 31, 36, 40, 41, 42], "accept": [10, 23, 25, 33, 35, 36], "memori": [10, 21, 23, 33, 36, 40, 42], "pass": [10, 16, 17, 21, 23, 25, 33, 35, 38, 42, 43, 46, 52, 53], "pid": [10, 21, 32, 35], "gpuid": [10, 21], "check_process_mem_usag": 10, "stdin": 10, "mem_util": 10, "get_cpu_usag": 10, "psutil": [10, 32, 49], "collect_al": 10, "mod": 10, "num_of_gpu": 10, "cpu_util": 10, "disk_avail": 10, "disk_us": 10, "disk_util": 10, "gpu_util": 10, "memory_avail": 10, "memory_us": 10, "memory_util": 10, "element": 10, "modelservic": 11, "wrap": [11, 45], "preprocess": [11, 13, 25, 33, 34, 35, 43, 52, 53], "manner": 11, "easili": [11, 44], "backward": [11, 32, 42], "raw": [11, 13, 14, 21, 25, 27, 31, 49, 51], "back": [11, 25, 33], "client": [11, 20, 24, 31, 32, 33, 35, 40, 44], "healthi": [11, 21, 31], "singlenodeservic": 11, "singlenodemodel": 11, "otf": 12, "create_load_model_respons": 12, "create_predict_respons": 12, "ret": 12, "req_id_map": 12, "ts_stream_next": 12, "encode_response_head": 12, "resp_hdr_map": 12, "retrieve_msg": 12, "conn": 12, "retriev": [12, 36], "channel": [12, 23], "send_intermediate_predict_respons": [12, 29, 31, 33], "state_dict": 13, "basehandl": [13, 15, 16, 18, 26, 35, 36, 38, 42], "abc": [13, 14, 33, 36], "describe_handl": [13, 35], "explain_handl": [13, 25, 35], "data_preprocess": [13, 25, 35], "raw_data": [13, 25], "tensor": [13, 25, 33, 41, 53], "unprocess": [13, 25], "target": [13, 21, 25, 37, 41, 50], "outcom": [13, 35, 50], "contain": [13, 23, 25, 26, 33, 35, 38, 43, 45, 50], "pertain": [13, 34, 35], "kwarg": [13, 16, 25], "pt": [13, 16, 17, 25, 38, 50], "first": [13, 21, 23, 25, 27, 28, 30, 40, 41, 46], "rais": [13, 25], "runtimeerror": [13, 25], "setup_ort_sess": 13, "model_pt_path": [13, 25, 42], "map_loc": 13, "densenethandl": 13, "match": [13, 25, 36], "list_classes_from_modul": [13, 18], "parent_class": [13, 18], "imageclassifi": [13, 15, 25], "visionhandl": 13, "get_max_result_class": 13, "image_process": 13, "resiz": 13, "256": 13, "interpol": 13, "bilinear": 13, "max_siz": 13, "antialia": 13, "warn": [13, 34], "centercrop": 13, "224": 13, "totensor": 13, "normal": 13, "mean": [13, 32, 42], "485": 13, "456": 13, "406": 13, "std": 13, "229": 13, "225": 13, "set_max_result_class": 13, "topk": 13, "imagesegment": [13, 15], "n": [13, 22, 23, 26, 29, 31, 33, 34, 35, 48], "k": [13, 21, 50], "objectdetector": [13, 15], "threshold": 13, "NOT": 13, "textclassifi": 13, "texthandl": 13, "get_insight": [13, 25], "text_preprocess": 13, "calcul": [13, 25, 33], "insight": [13, 25, 42], "word": 13, "import": [13, 25, 29, 31, 33, 35, 36, 42], "form": [13, 14, 23, 36, 42], "whose": [13, 25, 39], "ngram": 13, "2": [13, 14, 21, 23, 25, 26, 28, 30, 31, 33, 35, 36, 41, 50, 52, 53], "come": [13, 23, 32, 41, 42, 45], "output_explain": [13, 25], "hit": 13, "basic": [13, 15, 25, 33], "cleanup": 13, "oper": [13, 21, 36, 40, 41, 42], "html": [13, 22], "lowercas": 13, "expand": 13, "i": [13, 22, 23, 29, 30, 31, 33, 36, 38, 41, 48, 50], "don": [13, 32], "do": [13, 22, 23, 25, 26, 33, 34, 36, 38, 42, 50], "accent": 13, "punctuat": 13, "source_vocab": 13, "after": [13, 27, 28, 34, 35, 36, 41, 42, 45, 46, 48], "perfom": 13, "get_source_vocab_path": 13, "ctx": [13, 33, 36], "get_word_token": 13, "input_token": 13, "construct": 13, "necessari": [13, 27, 33], "summarize_attribut": 13, "attribut": [13, 25, 36, 42], "summaris": 13, "multipl": [13, 23, 31, 33, 38, 40, 42, 50], "vision": [13, 23, 41, 50], "tensor_data": 13, "requestenvelop": 14, "reformat": 14, "orchestr": [14, 43], "seldon": [14, 32, 43], "flat": [14, 43], "item": [14, 35, 41, 48, 52], "vice": 14, "versa": 14, "baseenvelop": 14, "handle_fn": [14, 15], "interfac": [14, 23, 48], "format_output": 14, "ar": [14, 21, 22, 23, 25, 27, 28, 29, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 48, 50, 53], "parse_input": 14, "grab": 14, "bodyenvelop": 14, "structur": [14, 43, 53], "outlin": 14, "www": [14, 23, 33], "tensorflow": 14, "org": [14, 21, 23, 27, 31, 32, 33, 34, 35, 48], "tfx": 14, "api_rest": 14, "jsonenvelop": 14, "implement": [14, 25, 35, 41, 42, 53], "captur": [14, 34, 53], "insid": [14, 40], "kserveenvelop": 14, "readabl": 14, "kservev2envelop": 14, "fserv": 14, "id": [14, 18, 21, 23, 25, 35, 36, 50], "f0222600": 14, "353f": 14, "47df": 14, "8d9d": 14, "c96d96fa894": 14, "bert": [14, 25, 39, 40, 41], "model_vers": [14, 35, 36, 37], "datatyp": 14, "int64": 14, "37": 14, "66": 14, "108": [14, 36], "109": 14, "ensur": [15, 25, 36, 41, 48], "execut": [15, 18, 22, 23, 27, 28, 37, 40, 42, 48, 50, 53], "base_model_context": 15, "test_batch_handl": 15, "test_inference_with_profiler_works_with_custom_initialize_method": 15, "test_single_handl": 15, "test_binari": 15, "test_bodi": 15, "test_json": 15, "test_json_batch": 15, "test_json_double_batch": 15, "complex": 15, "make": [15, 21, 23, 25, 26, 27, 31, 32, 33, 35, 36, 40, 41, 42, 43, 45, 48, 50, 51], "sure": [15, 21, 25, 26, 27, 33, 35, 40, 41, 43, 48, 50], "mux": 15, "sever": [15, 32], "demux": 15, "result": [15, 25, 29, 31, 33, 40, 42, 51], "image_byt": 15, "tmp_path_factori": 15, "test_handl": 15, "test_handle_explain": 15, "simpl": [16, 27, 41], "feed": [16, 41, 53], "forward": [16, 25], "argmaxmodel": 16, "comput": [16, 27, 40], "everi": [16, 28, 36, 53], "subclass": 16, "although": 16, "recip": 16, "within": [16, 31, 35, 36, 45], "afterward": 16, "sinc": [16, 36, 41], "former": 16, "care": [16, 32], "regist": [16, 20, 21, 23, 24, 27, 29, 38, 42, 45, 50, 53], "hook": 16, "latter": 16, "silent": 16, "ignor": [16, 36, 46], "save_pt_fil": 16, "filepath": 16, "mock": 17, "ad": [17, 30, 32, 33, 36, 40, 42], "without": [17, 33, 35, 40, 45, 46], "mockcontext": 17, "model_pt_fil": 17, "tmp": [17, 50], "model_fil": 17, "mnist": [17, 25, 31, 39, 45, 50], "model_yaml_config_fil": 17, "replic": 17, "exp": 17, "timeit": 18, "decor": 18, "func": 18, "pt2backend": 18, "enum": [18, 32], "enumer": 18, "aot_cudagraph": 18, "aot_eag": 18, "aot_nvfus": 18, "fx2trt": 18, "inductor": 18, "ipex": [18, 42], "nvfuser": 18, "ofi": 18, "onnxrt": 18, "torchxla_trace_onc": 18, "except": [18, 21, 36, 40], "predictionexcept": [18, 25], "error_cod": 18, "check_valid_pt2_backend": 18, "get_yaml_config": 18, "yaml_file_path": 18, "load_label_map": 18, "mapping_file_path": 18, "friendli": [18, 26], "map_class_to_label": 18, "prob": 18, "lbl_class": 18, "probabl": [18, 26, 31], "stream": [20, 24, 31], "unregist": [20, 23, 24, 29, 53], "aggreg": [21, 32, 35, 36, 40], "send": [21, 23, 25, 27, 29, 31, 33], "ml": [21, 40], "dl": 21, "onc": [21, 29, 31, 33, 37, 42, 50], "design": [21, 23, 32], "nativ": [21, 23, 42], "host": [21, 23, 27, 28, 32, 33, 34, 36, 37, 44, 45, 50], "resourc": [21, 27, 29, 33, 35, 36, 40, 41, 48, 52], "most": [21, 23, 25, 31, 32, 34, 41, 42], "turn": 21, "reduc": [21, 41], "expens": [21, 41], "jump": 21, "what": [21, 30, 33, 42, 45], "max_batch_delai": [21, 35], "know": [21, 27, 41, 45], "maximum": [21, 23, 31, 35, 40, 52, 53], "fill": [21, 40], "each": [21, 23, 26, 29, 31, 33, 40, 42, 46, 53], "full": [21, 29, 31, 33, 35], "see": [21, 23, 25, 26, 27, 31, 33, 34, 35, 36, 37, 40, 42, 44, 45, 51, 52], "hug": 21, "4": [21, 23, 33, 35, 36, 37, 39, 47, 52, 53], "interest": [21, 42, 45], "delai": [21, 23, 34, 35, 53], "receiv": [21, 23, 29, 31, 33, 34, 36, 40, 53], "doesn": [21, 23, 35], "timer": 21, "ever": 21, "were": [21, 36, 39, 45], "let": [21, 23, 45], "mar": [21, 23, 25, 27, 31, 32, 33, 35, 36, 38, 39, 45, 50, 52, 53], "50": [21, 53], "millisecond": [21, 35, 36, 37, 53], "8081": [21, 23, 35, 44, 50, 52], "milli": 21, "second": [21, 23, 28, 35, 40, 42, 50], "defaultvers": [21, 23], "marnam": [21, 23], "minwork": [21, 23, 31, 33, 35, 52], "maxwork": [21, 23, 33, 35, 52], "maxbatchdelai": [21, 23, 33, 35, 52], "responsetimeout": [21, 23, 33], "120": [21, 23, 33, 35], "associ": [21, 36], "relat": [21, 27, 33, 50], "frontend": [21, 23, 27, 29, 31, 33, 48], "tri": [21, 38], "bring": [21, 30], "inferenc": [21, 32], "thing": [21, 42, 45], "includ": [21, 26, 36, 42, 43, 50, 53], "port": [21, 29, 31, 32, 35, 37, 44, 45, 48, 51, 52], "8080": [21, 23, 27, 31, 35, 44, 50, 51], "inference_address": [21, 23], "127": [21, 23, 27, 31, 34, 37, 50], "management_address": [21, 23], "have": [21, 23, 25, 27, 29, 31, 33, 35, 36, 37, 40, 41, 42, 45, 48, 50, 53], "go": [21, 30, 41, 43], "10m": 21, "mar_fil": [21, 31, 33, 35], "batch_v2": 21, "10": [21, 23, 34, 35, 36, 48, 49], "initial_work": [21, 35, 50], "properli": [21, 35], "modelvers": [21, 35], "modelurl": [21, 35], "loadedatstartup": [21, 35], "9000": [21, 34, 35, 37], "starttim": [21, 35, 36], "2021": 21, "06": 21, "14t23": 21, "18": [21, 31, 34, 40, 45], "21": [21, 36, 37], "793z": 21, "memoryusag": [21, 35], "1726554112": 21, "19946": 21, "gpuusag": [21, 35], "mib": 21, "678": 21, "ljo": 21, "kitten": [21, 27, 29, 39, 50], "jpg": [21, 27, 29, 31, 39, 50, 51], "tiger_cat": [21, 27], "5848360657691956": 21, "tabbi": [21, 27, 31], "3782736361026764": 21, "egyptian_cat": [21, 27], "03441936895251274": 21, "lynx": [21, 27], "0005633446853607893": 21, "quilt": 21, "0002698268508538604": 21, "about": [21, 23, 25, 27, 41, 42, 45], "pleas": [21, 26, 35, 39, 40, 42, 50], "5000": [21, 52, 53], "Then": [21, 27, 33, 34], "14t22": 21, "44": [21, 35], "36": 21, "742z": 21, "19116": 21, "similar": [21, 33, 35], "previou": [21, 34, 35, 36, 40, 52], "being": [21, 33, 36, 41, 50], "entrypoint": 21, "referenc": [21, 36], "metrics_address": [21, 23], "8082": [21, 23, 37], "number_of_netty_thread": [21, 23], "32": [21, 40], "job_queue_s": [21, 23], "1000": [21, 35, 36], "home": [21, 22, 23, 27, 40, 48, 49, 50], "100": [21, 23, 33, 34, 35, 52], "g": [21, 22, 23, 31, 33, 35, 41, 48, 50, 53], "cv": 21, "cu102": [21, 22, 27], "rm": [21, 50], "p": [21, 22, 50], "v": [21, 35, 45, 50], "ubuntu": [21, 28, 35, 40], "alreadi": [22, 27, 36, 41, 45, 53], "dev": [22, 28, 32, 48], "cu121": [22, 27], "cu118": [22, 27], "cu117": [22, 27, 28], "cu116": [22, 27, 28], "cu113": [22, 27], "cu111": [22, 27], "cu101": [22, 27], "cu92": [22, 27], "gradlew": [22, 32], "clean": [22, 23, 42], "checkstyl": 22, "findbug": 22, "pmd": 22, "ut": 22, "report": [22, 27, 42], "m": [22, 27, 29, 30, 34], "cov": 22, "htmlcov": 22, "pylint": 22, "rn": 22, "rcfile": 22, "pylintrc": 22, "cd": [22, 29, 48, 49], "htmlcov_ut": 22, "model_archiv": 22, "pip": [22, 27, 29, 33, 48, 49], "htmlcov_it": 22, "integ_test": 22, "abov": [22, 23, 25, 27, 34, 36, 39, 40, 48, 50, 53], "excut": 22, "npm": [22, 48], "linux": [22, 24, 28], "sudo": [22, 37, 40, 49], "apt": [22, 49], "y": 22, "nodej": [22, 48], "mac": 22, "brew": 22, "broken": 22, "directori": [22, 23, 25, 27, 45, 46, 50], "recurs": 22, "config": [22, 31, 32, 36, 37, 38, 40, 42, 43, 45, 46, 48, 50], "link_check_config": 22, "done": [22, 25, 36, 42], "suffici": 23, "want": [23, 25, 27, 34, 35, 36, 38, 40, 45, 50], "topic": [23, 30, 45], "avail": [23, 26, 27, 31, 33, 35, 36, 42, 45, 46, 48, 50], "three": 23, "prioriti": 23, "thei": [23, 33, 36, 45], "chang": [23, 25, 27, 31, 35, 36, 37, 46, 48, 51, 52, 53], "behavior": [23, 25, 32, 33, 45], "java": [23, 28, 44, 48], "pythonpath": [23, 35], "higher": [23, 40, 42], "ts_config_fil": 23, "log4j2": [23, 34, 36, 45], "xml": [23, 34, 36, 45], "foreground": 23, "disabl": [23, 34, 37, 45], "footprint": [23, 40], "vmarg": [23, 34], "adjust": [23, 29, 35], "fit": [23, 32, 33], "valid": 23, "present": [23, 25, 35, 36, 45, 48, 52], "model1": [23, 53], "model2": [23, 53], "disk": [23, 36, 41], "pathnam": 23, "authent": 23, "avoid": [23, 27, 42], "unauthor": 23, "bind": 23, "ip": 23, "8443": [23, 50], "privat": [23, 35], "network": [23, 25, 41], "172": 23, "16": [23, 34, 40], "7070": [23, 29], "7071": [23, 29], "grpc_inference_port": 23, "grpc_management_port": 23, "coupl": [23, 36], "443": 23, "whatev": 23, "traffic": 23, "must": [23, 25, 27, 35, 42, 52], "certif": 23, "keystor": 23, "password": 23, "pkcs12": 23, "pkcs8": 23, "openssl": 23, "x509": 23, "chain": 23, "keytool": 23, "storepass": 23, "own": [23, 25, 35, 36, 40, 42], "genkei": 23, "keyalg": 23, "rsa": 23, "alia": [23, 48], "p12": 23, "changeit": 23, "storetyp": 23, "3600": 23, "keysiz": 23, "2048": 23, "dname": 23, "cn": 23, "my_t": 23, "ou": 23, "o": [23, 27, 31, 41, 49, 51], "l": 23, "palo": 23, "alto": 23, "st": 23, "california": 23, "8444": [23, 50], "8445": [23, 50], "keystore_pass": 23, "keystore_typ": 23, "self": [23, 25, 28, 33, 35, 36, 38, 42], "sign": 23, "cert": 23, "req": 23, "dai": [23, 28], "365": [23, 37], "newkei": 23, "keyout": 23, "mykei": 23, "mycert": 23, "pem": 23, "private_key_fil": 23, "certificate_fil": 23, "addit": [23, 33, 35, 36, 40, 41, 42, 43, 45], "header": [23, 31, 33], "tell": [23, 31, 34, 42, 45], "browser": [23, 37], "web": [23, 44, 45], "domain": 23, "permiss": 23, "select": [23, 25, 40], "cors_allowed_origin": 23, "yourdomain": 23, "preflight": 23, "cors_allowed_method": 23, "put": [23, 25, 32, 33, 35, 50], "cors_allowed_head": 23, "xx": 23, "maxdirectmemorys": 23, "affect": [23, 41], "prefer_direct_buff": 23, "depend": [23, 27, 28, 29, 32, 33, 36, 40, 42, 48, 49], "part": [23, 25, 30, 32, 35, 52], "seamless": [23, 25], "install_py_dep_per_model": [23, 50], "tar": 23, "gz": [23, 34], "might": [23, 27, 34, 35, 41, 45], "sensit": 23, "credenti": [23, 35], "arbitrari": 23, "secur": 23, "risk": 23, "blacklist_env_var": 23, "regular": [23, 36, 42], "express": 23, "filter": 23, "number_of_gpu": [23, 42, 50], "devic": [23, 25, 26, 33, 40, 50], "pci": 23, "bu": 23, "enable_metrics_api": [23, 37], "parametername1": 23, "parametervalue1": 23, "parametername2": 23, "parametervalue2": 23, "parameternamen": 23, "parametervaluen": 23, "minimum": [23, 35, 53], "msec": 23, "timeout": [23, 32, 33, 35, 53], "sec": 23, "over": [23, 29, 31, 33, 40, 41, 50, 53], "default_response_timeout": 23, "noop": [23, 35], "vgg16": [23, 39, 45], "yaml": [23, 31, 38, 42, 52, 53], "embed": [23, 25], "distinct": 23, "determin": [23, 32, 34, 40, 42, 50], "final": [23, 36, 40], "lowest": 23, "highest": [23, 31, 41], "fulli": [23, 40], "pippi": 23, "rpc": [23, 29, 33], "alloc": [23, 27, 40], "deviceid": [23, 33, 36], "round": [23, 25, 33, 35, 36], "robin": [23, 25, 33], "strategi": [23, 28], "assign": [23, 32, 33, 35], "otherwis": [23, 26, 33], "tune": [23, 30, 42], "impact": [23, 41, 42], "scalabl": 23, "throughput": [23, 27, 33, 34, 40, 41, 42], "enable_envvars_config": 23, "thread": [23, 32, 33, 36, 42], "child": 23, "eventloopgroup": 23, "group": 23, "eventloop": [23, 32], "event": 23, "logic": [23, 33, 42, 45], "netty_client_thread": 23, "workerthread": [23, 32], "default_workers_per_model": 23, "job": [23, 28, 30, 32, 36], "queue": [23, 33, 36, 40], "async_log": [23, 34], "asynchron": [23, 35], "deem": [23, 35], "unrespons": [23, 35], "reboot": [23, 35], "unregister_model_timeout": 23, "decode_input_request": 23, "decod": [23, 31, 33, 35, 41], "known": [23, 25, 41, 46, 49], "bytearrai": 23, "convers": [23, 42], "initial_worker_port": 23, "auto": [23, 33], "model_server_hom": 23, "6553500": 23, "pil": 23, "pixel": 23, "larg": [23, 24, 30, 38, 41], "payload": 23, "allowed_url": 23, "comma": 23, "regex": 23, "amazonaw": 23, "use_env_allowed_url": 23, "workflow_stor": [23, 36, 45, 52], "disable_system_metr": 23, "ts_": 23, "property_nam": 23, "ts_inference_address": 23, "troubleshoot": [24, 30], "coverag": 24, "advanc": [24, 33], "window": [24, 31, 46], "subsystem": 24, "wsl": 24, "faq": [24, 30], "invok": [25, 36, 45, 50], "Is": [25, 38], "dir": [25, 33, 50], "shown": [25, 33, 36, 40, 41, 42], "ll": [25, 27, 42], "act": 25, "def": [25, 29, 31, 33, 35, 36, 53], "entry_point_function_nam": 25, "sampl": [25, 27, 36, 39], "jit": [25, 32, 42], "similarli": 25, "global": [25, 37, 53], "is_avail": 25, "serialized_fil": 25, "serializedfil": 25, "os": [25, 28, 35], "join": [25, 31, 33], "isfil": 25, "engag": 25, "ask": [25, 30, 36], "startup": [25, 32, 46], "down": [25, 46], "against": [25, 34], "typic": 25, "modelhandl": [25, 33], "__init__": [25, 33], "_context": 25, "prediciton": 25, "pred_out": 25, "unexpect": 25, "513": 25, "nonetheless": 25, "below": [25, 26, 27, 33, 36, 37, 38, 40], "init": [25, 33], "pattern": [25, 34], "maintain": [25, 35], "model_handl": 25, "preprocessed_data": 25, "model_input": 25, "ndarrai": 25, "model_output": 25, "inference_output": 25, "postprocess_output": 25, "achiev": [25, 31, 33, 41], "place": [25, 50], "written": [25, 43], "hi": 25, "algorithm": [25, 32, 33], "lig": 25, "layerintegratedgradi": 25, "captum_sequence_forward": 25, "_is_explain": [25, 35], "so": [25, 27, 33, 36, 48], "neccessari": 25, "logger": [25, 35], "info": [25, 34, 35], "row": 25, "isinst": 25, "statement": 25, "default_handler_nam": 25, "defaulthandlerclass": 25, "customimageclassifi": 25, "procsess": 25, "goe": 25, "digit": [25, 39, 45], "identifi": 25, "model_version_numb": 25, "path_to_model_architecture_fil": 25, "path_to_state_dict_fil": 25, "comma_seperarted_additional_fil": 25, "python3": 25, "skip": 25, "waveglow_synthes": 25, "waveglow_model": 25, "nvidia_waveglowpyt_fp32_20190306": 25, "pth": [25, 27, 50], "waveglow_handl": 25, "tacotron": 25, "nvidia_tacotron2pyt_fp32_20190306": 25, "vcpu": [25, 27], "fashion": [25, 40], "consum": 26, "imagenet": [26, 39], "dataset": [26, 39], "rgb": 26, "top": [26, 43], "torchvis": [26, 41, 47, 49], "ag": 26, "comprehens": 26, "page": [26, 33, 35, 39, 42, 45, 52], "automat": [26, 27, 29, 33, 43, 50], "numer": 26, "simpli": [26, 33, 43], "welcom": 26, "isn": 26, "cover": [26, 34, 42, 45], "model_packag": 26, "alwai": [26, 41, 43], "saniti": 26, "submit": [26, 32, 39], "conda": [27, 32], "12": 27, "9": [27, 39], "complet": [27, 29, 31, 33, 34, 35, 41, 50], "clone": [27, 29, 48, 49], "repositori": 27, "git": [27, 29, 47, 48, 49], "parent": 27, "root": 27, "my_path": 27, "mkdir": [27, 29, 50], "wget": [27, 49], "densenet161": [27, 29, 39], "8d451a50": 27, "repo": [27, 29], "densenet_161": 27, "index_to_nam": [27, 50], "equal": [27, 31, 33, 36], "power": 27, "lot": [27, 45], "autosc": 27, "consider": 27, "minim": [27, 33, 37], "move": [27, 33, 50], "later": 27, "finer": 27, "grain": 27, "particular": 27, "u": [27, 29], "grpcio": [27, 29], "protobuf": [27, 29, 33], "proto": [27, 29], "grpc_tool": [27, 29], "protoc": [27, 29], "proto_path": [27, 29], "src": [27, 29, 32, 34, 36, 48], "python_out": [27, 29], "grpc_python_out": [27, 29], "cute": 27, "githubusercont": [27, 31, 49, 51], "kitten_smal": [27, 31, 39, 51], "46933549642562866": 27, "4633878469467163": 27, "06456148624420166": 27, "0012828214094042778": 27, "plastic_bag": 27, "00023323034110944718": 27, "interact": [27, 32], "ve": [27, 42], "seen": 27, "deep": [27, 33, 42, 45], "learn": [27, 30, 33, 42, 45], "registr": [27, 32, 35, 50, 52, 53], "record": 27, "high": [27, 29, 31, 33, 34, 41, 42, 45], "level": [27, 34, 36, 37, 40, 41, 42, 45, 53], "percentil": 27, "precis": 27, "visual": [27, 48], "debugg": 27, "under": [28, 35, 42, 43, 48], "trigger": 28, "manual": [28, 36], "workflow_dispatch": 28, "push": 28, "branch": 28, "pull": [28, 32, 39], "pull_request": 28, "nightli": [28, 32], "15am": 28, "schedul": [28, 40, 42], "cron": 28, "15": [28, 34, 37, 47], "02": [28, 35], "everyth": 28, "align": 28, "machin": [28, 30], "20": [28, 35, 37, 41], "04": 28, "ci": [28, 32], "term": [28, 40], "matrix": 28, "maco": [28, 35], "fail": [28, 48, 53], "fast": [28, 42], "indic": [28, 50], "One": [28, 36], "11": [28, 40], "7": [28, 33, 39], "v3": 28, "architectur": [28, 30], "x84": 28, "zulu": 28, "shell": 28, "codecov": 28, "io": [28, 41], "chmod": 28, "streampredict": [29, 33], "registermodel": [29, 35], "unregistermodel": [29, 35], "free": [29, 35, 42, 52], "scalework": [29, 35], "dynam": [29, 30, 32, 33, 35, 36, 41], "better": [29, 33, 35, 40, 42], "listmodel": [29, 35], "queri": [29, 35, 37, 52], "describemodel": [29, 35], "setdefault": [29, 35], "stub": 29, "intermedi": [29, 31, 33], "llm": [29, 31, 33], "until": [29, 31, 33, 35], "forc": [29, 33], "inferenceapisservic": [29, 31, 33], "empti": [29, 33], "torchservehealthrespons": [29, 33], "predictionsrequest": [29, 33], "predictionrespons": [29, 33], "style": [29, 31, 33], "rang": [29, 31, 33, 36], "intermediate_respons": [29, 31, 33], "success": [29, 31, 33, 34, 46], "hello": [29, 31, 33], "world": [29, 31, 33], "anim": [30, 42], "draw": [30, 42], "studi": [30, 42], "walmart": 30, "search": 30, "grok": 30, "intel": [30, 41], "principl": [30, 41], "amazon": [30, 36, 37], "inferentia": 30, "sagemak": [30, 32], "children": 30, "life": 30, "evolut": 30, "cresta": 30, "migrat": [30, 36], "quantit": 30, "comparison": 30, "platform": [30, 32, 43], "indepth": 30, "why": [30, 45], "best": [30, 42], "practic": 30, "improv": [30, 40, 41, 42], "perfrom": 30, "view": [30, 31, 35, 37, 45], "torcherv": 30, "frequent": 30, "question": 30, "swagger": [31, 35, 44], "codegen": [31, 35, 44], "modelserv": [31, 35], "maxretrytimeoutinsec": 31, "5min": 31, "recov": 31, "dead": 31, "activ": [31, 33, 39, 40, 41, 46], "unhealthi": 31, "less": 31, "resnet": [31, 34, 39, 45], "f": [31, 50], "squeezenet1_1": [31, 35], "dog": [31, 39, 45], "re": [31, 42, 43, 45, 48, 53], "open": [31, 32, 33, 42], "rb": 31, "n02123045": 31, "42514491081237793": 31, "chunk": 31, "test_echo_stream_infer": [31, 33], "start_torchserv": [31, 33], "no_config_snapshot": [31, 33], "gen_mar": [31, 33], "register_model": [31, 33], "echo_stream": [31, 33], "tf_inference_api": [31, 33], "foo": [31, 33], "assert": [31, 33], "transfer": [31, 33], "iter_cont": [31, 33], "chunk_siz": [31, 33], "append": [31, 33, 48], "unregister_model": [31, 33], "test_data": 31, "png": [31, 39], "004570948731989492": 31, "006216969640322402": 31, "008197565423679522": 31, "009563574612830427": 31, "008999274832810742": 31, "009673474804303854": 31, "007599905146155397": 31, "v1": [31, 35], "kf_request_json": 31, "spec": 31, "grade": 32, "track": 32, "those": [32, 36, 42, 46], "weight": [32, 38, 42, 45], "compon": [32, 41, 48], "portion": 32, "lifecycl": 32, "actual": [32, 45], "script_modul": 32, "eager_mode_model": 32, "along": [32, 33, 43, 45, 50], "storag": 32, "authz": 32, "authn": 32, "drop": [32, 42], "loadabl": 32, "docker": [32, 42, 50], "dockerfil": 32, "experiment": 32, "project": [32, 33], "guarante": 32, "k8": 32, "serving_sdk": 32, "potenti": [32, 41], "stuff": 32, "termin": [32, 34, 35, 48], "easier": [32, 42], "kfserv": 32, "startserv": 32, "8903ca1fb059eab3c1e8eccdee1376d4ff52fb67": 32, "wlm": [32, 34], "workerstatelisten": 32, "close": 32, "workerst": 32, "workloadmanag": 32, "concurr": [32, 40, 41, 42], "hashmap": 32, "backendgroup": 32, "threadpool": 32, "executor": 32, "pool": 32, "task": 32, "batchaggreg": 32, "modelmanag": 32, "apiutil": 32, "configmanag": [32, 48], "split": [33, 53], "partit": 33, "speed": [33, 41, 42], "torchrun": 33, "capabl": 33, "visibl": 33, "suppos": 33, "eight": [33, 40], "nproc": 33, "worker1": 33, "worker2": 33, "illustr": 33, "stage": 33, "microbatch": 33, "inherit": [33, 35, 38], "our": [33, 40], "custom_handl": 33, "base_pippy_handl": 33, "basepippyhandl": 33, "handler_util": 33, "pt_pippi": 33, "initialize_rpc_work": 33, "get_pipline_driv": 33, "super": [33, 34, 45], "local_rank": 33, "device_count": 33, "world_siz": 33, "devicetyp": 33, "paralleltyp": 33, "pp": 33, "tp": 33, "pptp": 33, "rout": 33, "rank0": 33, "rank": 33, "fontend": 33, "wish": [33, 46], "input_nam": 33, "input_id": 33, "fx": 33, "trace": 33, "model_typ": 33, "hf": 33, "wise": 33, "leav": 33, "blank": 33, "rpc_timeout": 33, "1800": 33, "num_worker_thread": 33, "max_length": 33, "80": 33, "length": [33, 35, 41], "bloom": 33, "pippy_handl": 33, "model_checkpoints_path": 33, "tgz": 33, "progress": 33, "soon": 33, "microsoft": [33, 48], "get_ds_engin": 33, "base_deepspeed_handl": 33, "basedeepspeedhandl": 33, "ds_engin": 33, "ds": 33, "filenam": [33, 34], "dtype": 33, "float16": 33, "replace_with_kernel_inject": 33, "tensor_parallel": 33, "tp_size": 33, "method1": 33, "method2": 33, "ds_build_op": 33, "deepspeed_handl": 33, "advantag": 33, "further": [33, 34, 36, 37], "maxim": 33, "appli": [33, 42], "low_cpu_mem_usag": 33, "librari": [33, 42], "checkpoint": 33, "pretrain": [33, 50], "hub": [33, 50], "cach": [33, 36], "caus": 33, "omp_number_thread": 33, "immedi": [33, 35], "incur": 33, "503": 33, "chatgpt": 33, "effect": [33, 41], "action": 33, "reject": 33, "capac": 33, "busi": 33, "jobqueues": 33, "usejobticket": 33, "understand": [34, 41, 43, 50], "layout": 34, "Be": 34, "familiar": 34, "log4j": [34, 45], "rollingfil": 34, "access_log": 34, "env": [34, 48], "filepattern": 34, "dd": 34, "mmm": 34, "patternlayout": [34, 36], "iso8601": 34, "polici": 34, "sizebasedtriggeringpolici": 34, "timebasedtriggeringpolici": 34, "defaultrolloverstrategi": 34, "2018": [34, 35], "13": [34, 37], "56": 34, "976": 34, "backendwork": 34, "64003": 34, "118": 34, "remot": [34, 42, 44, 45], "took": 34, "ts_log": 34, "5p": 34, "debug": 34, "stderr": 34, "14": 34, "46": [34, 36], "51": 34, "656": 34, "workerlifecycl": 34, "nnvm": 34, "legacy_json_util": 34, "cc": 34, "209": 34, "symbol": 34, "v0": [34, 35, 36], "attempt": [34, 53], "upgrad": 34, "657": 34, "217": [34, 39], "successfulli": [34, 50, 52], "59": 34, "926": 34, "60": 34, "117": 34, "31": 34, "52": 34, "dlog4j": 34, "configurationfil": 34, "altern": [34, 35], "lightweight": 34, "consid": [34, 36, 40], "recent": 34, "lost": 34, "unexpectedli": 34, "decreas": [35, 40], "managementapisservic": 35, "subfold": [35, 52], "internet": [35, 52], "module_nam": 35, "method_nam": 35, "least": 35, "synchron": [35, 50], "acknowledg": 35, "onlin": 35, "response_timeout": 35, "respond": 35, "period": 35, "squeezenet_v1": [35, 45], "sse": 35, "secret": 35, "region": 35, "squeezenet1": 35, "account": [35, 36], "iam": 35, "role": 35, "awss3readonlyaccess": 35, "s3_sse_km": 35, "forgot": 35, "between": [35, 40], "202": 35, "4dc54158": 35, "c6de": 35, "42aa": 35, "b5dd": 35, "ebcb5f721043": 35, "47": 35, "aliv": 35, "been": [35, 36, 41, 45, 48, 50, 52], "ok": 35, "ecd2e502": 35, "382f": 35, "4c3b": 35, "b425": 35, "519fbf6d3b85": 35, "89": 35, "min_work": [35, 50], "max_work": 35, "pend": 35, "exceed": 35, "infinit": 35, "flavor": 35, "42adc58": 35, "6956": 35, "4198": 35, "ad07": 35, "db6c620c4c1e": 35, "b72b1ea0": 35, "81c6": 35, "4cce": 35, "92c4": 35, "530d3cfe5d4a": 35, "63": 35, "3997ccd4": 35, "ae44": 35, "4570": 35, "b249": 35, "e361b08d3d47": 35, "77": 35, "02t13": 35, "53": 35, "034z": 35, "89247744": 35, "jobqueuestatu": 35, "remainingcapac": 35, "pendingrequest": 35, "noop_2": 35, "metadata": [35, 45], "output_describ": 35, "_is_describ": 35, "start_tim": [35, 36], "is_profiler_en": 35, "enable_torch_profil": [35, 42], "_infer_with_profil": [35, 42], "stop_tim": [35, 36], "handlertim": [35, 36, 37], "customizedmetadata": 35, "9010": 35, "2022": 35, "08t11": 35, "03": 35, "974z": 35, "98972": 35, "data1": 35, "data2": 35, "delet": [35, 36, 52], "next_page_token": [35, 52], "next": [35, 40, 48, 50, 52], "pagin": [35, 52], "nextpagetoken": [35, 52], "noop_v0": 35, "broadli": 36, "interv": 36, "minut": 36, "metrics_mod": [36, 37], "ts_metrics_mod": 36, "agent": 36, "log_directori": 36, "ts_metric": 36, "model_metr": 36, "terminolog": 36, "alias": 36, "nameofcountermetr": 36, "gaug": [36, 37], "nameofgaugemetr": 36, "histogram": 36, "nameofhistogrammetr": 36, "inferencetimeinm": 36, "numberofmetr": 36, "count": [36, 37], "gaugemodelmetricnameexampl": 36, "histogrammodelmetricnameexampl": 36, "central": 36, "inventori": 36, "altogeth": 36, "unless": 36, "toward": 36, "percent": [36, 37], "whenev": 36, "metricscach": 36, "anyth": 36, "abil": 36, "primari": 36, "OR": [36, 50], "my_tc": 36, "bertseqclassif": [36, 40], "chosen": 36, "semant": 36, "requests2xx": [36, 37], "hostnam": [36, 37], "total": 36, "300": 36, "requests4xx": [36, 37], "400": 36, "requests5xx": [36, 37], "ts_inference_requests_tot": [36, 37], "ts_inference_latency_microsecond": [36, 37], "microsecond": [36, 37], "ts_queue_latency_microsecond": [36, 37], "queuetim": [36, 37], "spent": 36, "workerthreadtim": [36, 37], "exclud": 36, "workerloadtim": [36, 37], "workernam": [36, 37], "taken": [36, 46, 50], "cpuutil": [36, 37], "memoryus": [36, 37], "megabyt": [36, 37], "memoryavail": [36, 37], "memoryutil": [36, 37], "diskusag": [36, 37], "gigabyt": [36, 37], "diskutil": [36, 37], "diskavail": [36, 37], "gpumemoryutil": [36, 37], "gpumemoryus": [36, 37], "gpuutil": [36, 37], "predictiontim": [36, 37], "introduc": 36, "restrict": [36, 50], "metrictyp": 36, "statsd": 36, "my_machine_nam": 36, "timestamp": [36, 46], "1682098185": 36, "318": 36, "0416717529297": 36, "jsonpatternlayout": 36, "metricnam": 36, "15547180175781": 36, "124": 36, "13163757324219": 36, "qlog": 36, "qloglayout": 36, "qlogsetupmodeldepend": 36, "1646686978": 36, "program": [36, 48], "mxnetmodelserv": 36, "5790": 36, "98046875": 36, "eoe": 36, "147dda19895c": 36, "ant": [36, 37], "confus": [36, 37], "fetch": [36, 37], "posit": 36, "metric_typ": 36, "genericmetr": 36, "dimension_nam": 36, "name1": 36, "name2": [36, 45], "countermetr": 36, "criteria": 36, "add_": 36, "explicitli": [36, 42], "name_of_model": 36, "dim1": 36, "dim2": 36, "some_nam": 36, "some_valu": 36, "dimn": 36, "name_n": 36, "value_n": 36, "add_metric_to_cach": 36, "cachingmetr": 36, "newli": 36, "add_or_upd": 36, "dimension_valu": 36, "distanc": 36, "dim3": 36, "assum": [36, 43, 50], "distanceinkm": 36, "inferencetim": 36, "end_tim": 36, "sizeofimag": 36, "img_siz": 36, "utilization_perc": 36, "__str__": 36, "get_metr": 36, "gaugemetricnam": 36, "examplecustomhandl": 36, "metric_type_enum": 36, "customhandlerexampl": 36, "sleep": 36, "sake": 36, "handlerseparatecount": 36, "handlercount": 36, "78": 36, "anoth": [36, 50], "had": 36, "histogram_example_metr": 36, "idea": 36, "42": [36, 39], "approach": [36, 41, 42], "metric1": 36, "value1": 36, "value2": 36, "prior": 36, "suitabl": 36, "unlik": 36, "88665a372f4b": 37, "054508209228516": 37, "resnet18": [37, 39], "83": 37, "resnet18_1": 37, "4593": 37, "9001": 37, "4592": 37, "5829": 37, "7421875": 37, "82": 37, "93": 37, "290371": 37, "129": 37, "8245": 37, "62109375": 37, "325": 37, "05113983154297": 37, "64": 37, "globoff": 37, "instruct": [37, 42, 48, 50], "yml": 37, "scrape_interv": 37, "evaluation_interv": 37, "scrape_config": 37, "job_nam": 37, "static_config": 37, "9090": 37, "navig": 37, "graph": 37, "3000": 37, "systemctl": 37, "daemon": [37, 40], "reload": 37, "flowchart": 38, "simplifi": [38, 43], "td": 38, "id1": 38, "id13": 38, "id2": 38, "id3": 38, "id4": 38, "id5": 38, "onnx": [38, 42], "id6": 38, "tensorrt": [38, 41, 42], "id7": 38, "id8": 38, "id9": 38, "id10": 38, "id11": 38, "id21": 38, "id20": 38, "id15": 38, "id16": 38, "id14": 38, "id22": 38, "archiev": 38, "faster": [38, 39, 41], "id17": 38, "propos": 39, "inclus": 39, "special": 39, "thank": 39, "alexnet": 39, "216": 39, "106": 39, "41": 39, "489": 39, "squeezenet": [39, 45], "1_1": 39, "152": 39, "214": 39, "rcnn": 39, "coco": 39, "148": 39, "person": 39, "mask": 39, "158": 39, "ag_new": 39, "169": 39, "sample_text": 39, "fcn": 39, "101": 39, "193": 39, "deeplabv3": 39, "384": 39, "386": 39, "105": 39, "215": 39, "mmf": 39, "recognit": [39, 45], "charad": 39, "549": 39, "372cc": 39, "mp4": 39, "mgpu": 39, "sample_text_captum_input": [39, 40], "breed": 39, "war": [39, 52, 53], "spin": 40, "thu": [40, 41], "isol": 40, "kernel": [40, 42], "drawback": [40, 42], "occupi": [40, 42], "scenario": [40, 42], "offer": [40, 41, 43], "share": [40, 50], "leverag": [40, 42], "benefit": [40, 41], "smi": [40, 41, 48, 50], "exclus": 40, "shutdown": [40, 46], "echo": [40, 49], "quit": 40, "48": 40, "volta": 40, "lead": 40, "failur": [40, 53], "decis": 40, "repres": 40, "workload": [40, 42], "primarili": 40, "investig": 40, "evolv": 40, "g4dn": 40, "4xlarg": 40, "2xlarg": 40, "concentr": 40, "measur": 40, "ab": 40, "still": 40, "left": 40, "five": 40, "median": 40, "overwrit": [40, 42], "accordingli": 40, "huggingface_transform": 40, "10000": [40, 53], "600": 40, "seq_classification_artifact": 40, "skew": [40, 43], "therefor": 40, "neglect": 40, "figur": 40, "four": 40, "steadi": 40, "wors": 40, "interpret": 40, "experi": 40, "bigger": [40, 42], "With": [40, 50], "diminish": 40, "25": 40, "saw": 40, "mix": 40, "certain": 40, "suggest": [40, 41], "highli": [40, 42], "situat": 40, "sole": 40, "focus": [40, 45], "pack": [40, 45], "diagnos": 41, "nlp": 41, "pad": 41, "too": 41, "driver": 41, "releas": 41, "oftentim": 41, "bug": 41, "overal": 41, "htop": 41, "obviou": 41, "biggest": 41, "bottleneck": [41, 42], "beyond": 41, "much": [41, 45], "quantifi": 41, "mitig": 41, "slow": 41, "scope": 41, "techniqu": 41, "async": 41, "hide": 41, "cost": [41, 42], "constant": 41, "unnecessarili": 41, "magnitud": 41, "short": [41, 45], "jpeg": 41, "often": 41, "libjpeg": 41, "turbo": 41, "simd": 41, "dali": [41, 42], "old": 41, "k80": 41, "factor": 41, "low": [41, 42], "hang": 41, "fruit": 41, "evalu": 41, "address": 41, "fp16": 41, "doubl": 41, "newer": 41, "neglig": 41, "accuraci": 41, "degrad": 41, "technic": 41, "quantiz": [41, 42], "seldom": 41, "suffer": 41, "loss": 41, "explor": 41, "articl": [41, 42], "neural": 41, "int8": 41, "awar": 41, "compressor": 41, "sophist": 41, "worth": 41, "signific": 41, "right": [41, 48], "balanc": 41, "smart": 41, "meet": 41, "sla": 41, "onnxruntim": 41, "lightseq": 41, "ctranslat": 41, "fusion": [41, 42], "distil": [41, 42], "gain": 41, "minilm": 41, "99": 41, "origin": 41, "2x": [41, 42], "sort": 41, "unnecessari": [41, 42], "exhaust": 41, "squeez": 41, "checklist": 42, "outsid": 42, "trick": 42, "prune": 42, "encourag": 42, "hard": 42, "easiest": 42, "ort": 42, "fastertransform": 42, "favorit": 42, "feel": 42, "pr": 42, "acceler": 42, "bit": 42, "At": [42, 48], "ort_sess": 42, "inferencesess": 42, "sess_opt": 42, "treat": 42, "vs": 42, "nn": 42, "transformerencod": 42, "author": 42, "bettertransform": 42, "exce": 42, "speedup": 42, "vari": 42, "batch_delai": 42, "lower": 42, "heavi": 42, "On": 42, "significantli": 42, "theori": 42, "discuss": [42, 45], "quickli": 42, "summar": 42, "hyperthread": 42, "affin": 42, "physic": 42, "numa": 42, "cross": 42, "ex": [42, 48], "contrari": 42, "clearli": 42, "sub": 42, "valuetoset": 42, "uniqu": 42, "around": 42, "portabl": 42, "iter": 42, "popular": 42, "block": 42, "helper": 42, "p50": 42, "p90": 42, "p99": 42, "visit": 42, "link": [42, 45, 50], "app": [42, 45, 48], "fine": [42, 48], "meta": 42, "scientist": 43, "possibl": [43, 50], "analyt": 43, "though": 43, "underli": 43, "plain": 43, "crucial": 43, "service_envelop": 43, "scala": 44, "javascript": 44, "snap": 45, "pictur": 45, "scene": 45, "identif": 45, "intak": 45, "littl": 45, "weed": 45, "dive": 45, "kind": [45, 50], "themselv": 45, "long": 45, "stori": 45, "ts_config": 45, "model_path1": 45, "model_path2": 45, "log_config": 45, "exit": 45, "model_loc": 45, "talk": 45, "sai": 45, "fanci": 45, "net": 45, "hotdog": 45, "sens": 45, "hot": 45, "model_location2": 45, "compris": [46, 53], "cfg": 46, "shut": 46, "intention": 46, "resili": 46, "prevent": 46, "katex": 47, "recommonmark": 47, "sphinxcontrib": 47, "pyyaml": 47, "torchtext": [47, 49], "pytest": 47, "theme": 47, "certifi": 48, "2019": [48, 49], "admin": 48, "anaconda": 48, "powershel": 48, "openjdk17": 48, "unzip": 48, "edit": 48, "jdk": [48, 49], "3_window": 48, "x64_bin": 48, "gui": [48, 50], "bin": [48, 49], "prompt": 48, "wheel": 48, "prebuilt": 48, "click": 48, "whl": 48, "java_hom": 48, "redistribut": 48, "studio": 48, "2015": 48, "2017": 48, "your_install_dr": 48, "corpor": 48, "nvsmi": 48, "prod": 48, "setup_wsl_ubuntu": 49, "bash": 49, "bashrc": 49, "openjdk": 49, "sentencepiec": 49, "conveni": 50, "prerequisit": 50, "chrome": 50, "default_handl": 50, "your_model_nam": 50, "your_model_fil": 50, "mv": 50, "directli": 50, "copi": 50, "volum": 50, "your_model_store_dir": 50, "kept": 50, "min": [50, 53], "your_input_fil": 50, "demostr": 50, "public_url": [50, 52], "ssl": 50, "your_path": 50, "8433": 50, "local_dir": 50, "your_docker_imag": 50, "s3_path": 50, "notic": 50, "proerti": 50, "apart": 50, "lib": 50, "your_custom_handler_py_fil": 50, "fairseq": 50, "your_requirements_txt": 50, "wfpredict": 51, "workflow_nam": [51, 52], "myworkflow": [51, 52], "leaf": 51, "dag": [51, 52], "workflownam": 52, "workflowurl": 52, "workflowdag": 52, "m1": [52, 53], "myworkflow1": 52, "myworkflow2": 52, "ensembl": 53, "flow": 53, "m2": 53, "m3": 53, "retri": 53, "model3": 53, "m4": 53, "model4": 53, "pre_process": 53, "syntax": 53, "ridden": 53, "dataflow": 53, "eg": 53, "function1": 53, "function2": 53, "aggregate_func": 53, "workflow_inference_api": 53, "workflow_management_api": 53, "serializ": 53, "arrai": 53}, "objects": {"": [[7, 0, 0, "-", "test"], [9, 0, 0, "-", "ts"]], "ts": [[9, 0, 0, "-", "arg_parser"], [9, 0, 0, "-", "context"], [10, 0, 0, "-", "metrics"], [9, 0, 0, "-", "model_loader"], [9, 0, 0, "-", "model_server"], [11, 0, 0, "-", "model_service"], [9, 0, 0, "-", "model_service_worker"], [12, 0, 0, "-", "protocol"], [9, 0, 0, "-", "service"], [13, 0, 0, "-", "torch_handler"], [18, 0, 0, "-", "utils"], [9, 0, 0, "-", "version"]], "ts.arg_parser": [[9, 1, 1, "", "ArgParser"]], "ts.arg_parser.ArgParser": [[9, 2, 1, "", "extract_args"], [9, 2, 1, "", "model_service_worker_args"], [9, 2, 1, "", "ts_parser"]], "ts.context": [[9, 1, 1, "", "Context"], [9, 1, 1, "", "RequestProcessor"]], "ts.context.Context": [[9, 2, 1, "", "get_all_request_header"], [9, 2, 1, "", "get_request_header"], [9, 2, 1, "", "get_request_id"], [9, 2, 1, "", "get_response_content_type"], [9, 2, 1, "", "get_response_headers"], [9, 2, 1, "", "get_response_status"], [9, 3, 1, "", "metrics"], [9, 3, 1, "", "request_processor"], [9, 2, 1, "", "set_all_response_status"], [9, 2, 1, "", "set_response_content_type"], [9, 2, 1, "", "set_response_header"], [9, 2, 1, "", "set_response_status"], [9, 3, 1, "", "system_properties"]], "ts.context.RequestProcessor": [[9, 2, 1, "", "add_response_property"], [9, 2, 1, "", "get_request_properties"], [9, 2, 1, "", "get_request_property"], [9, 2, 1, "", "get_response_header"], [9, 2, 1, "", "get_response_headers"], [9, 2, 1, "", "get_response_status_code"], [9, 2, 1, "", "get_response_status_phrase"], [9, 2, 1, "", "report_status"]], "ts.metrics": [[10, 0, 0, "-", "dimension"], [10, 0, 0, "-", "metric"], [10, 0, 0, "-", "metric_collector"], [10, 0, 0, "-", "metric_encoder"], [10, 0, 0, "-", "metrics_store"], [10, 0, 0, "-", "process_memory_metric"], [10, 0, 0, "-", "system_metrics"], [10, 0, 0, "-", "unit"]], "ts.metrics.dimension": [[10, 1, 1, "", "Dimension"]], "ts.metrics.dimension.Dimension": [[10, 2, 1, "", "to_dict"]], "ts.metrics.metric": [[10, 1, 1, "", "Metric"]], "ts.metrics.metric.Metric": [[10, 2, 1, "", "reset"], [10, 2, 1, "", "to_dict"], [10, 2, 1, "", "update"]], "ts.metrics.metric_encoder": [[10, 1, 1, "", "MetricEncoder"]], "ts.metrics.metric_encoder.MetricEncoder": [[10, 2, 1, "", "default"]], "ts.metrics.metrics_store": [[10, 1, 1, "", "MetricsStore"]], "ts.metrics.metrics_store.MetricsStore": [[10, 2, 1, "", "add_counter"], [10, 2, 1, "", "add_error"], [10, 2, 1, "", "add_metric"], [10, 2, 1, "", "add_percent"], [10, 2, 1, "", "add_size"], [10, 2, 1, "", "add_time"]], "ts.metrics.process_memory_metric": [[10, 4, 1, "", "check_process_mem_usage"], [10, 4, 1, "", "get_cpu_usage"]], "ts.metrics.system_metrics": [[10, 4, 1, "", "collect_all"], [10, 4, 1, "", "cpu_utilization"], [10, 4, 1, "", "disk_available"], [10, 4, 1, "", "disk_used"], [10, 4, 1, "", "disk_utilization"], [10, 4, 1, "", "gpu_utilization"], [10, 4, 1, "", "memory_available"], [10, 4, 1, "", "memory_used"], [10, 4, 1, "", "memory_utilization"]], "ts.metrics.unit": [[10, 1, 1, "", "Units"]], "ts.model_loader": [[9, 1, 1, "", "ModelLoader"], [9, 1, 1, "", "ModelLoaderFactory"], [9, 1, 1, "", "TsModelLoader"]], "ts.model_loader.ModelLoader": [[9, 2, 1, "", "load"]], "ts.model_loader.ModelLoaderFactory": [[9, 2, 1, "", "get_model_loader"]], "ts.model_loader.TsModelLoader": [[9, 2, 1, "", "load"]], "ts.model_server": [[9, 4, 1, "", "load_properties"], [9, 4, 1, "", "start"]], "ts.model_service": [[11, 0, 0, "-", "model_service"]], "ts.model_service.model_service": [[11, 1, 1, "", "ModelService"], [11, 1, 1, "", "SingleNodeService"]], "ts.model_service.model_service.ModelService": [[11, 2, 1, "", "handle"], [11, 2, 1, "", "inference"], [11, 2, 1, "", "initialize"], [11, 2, 1, "", "ping"], [11, 2, 1, "", "signature"]], "ts.model_service.model_service.SingleNodeService": [[11, 2, 1, "", "inference"]], "ts.model_service_worker": [[9, 1, 1, "", "TorchModelServiceWorker"]], "ts.model_service_worker.TorchModelServiceWorker": [[9, 2, 1, "", "handle_connection"], [9, 2, 1, "", "load_model"], [9, 2, 1, "", "run_server"]], "ts.protocol": [[12, 0, 0, "-", "otf_message_handler"]], "ts.protocol.otf_message_handler": [[12, 4, 1, "", "create_load_model_response"], [12, 4, 1, "", "create_predict_response"], [12, 4, 1, "", "encode_response_headers"], [12, 4, 1, "", "retrieve_msg"], [12, 4, 1, "", "send_intermediate_predict_response"]], "ts.service": [[9, 1, 1, "", "Service"], [9, 4, 1, "", "emit_metrics"]], "ts.service.Service": [[9, 3, 1, "", "context"], [9, 2, 1, "", "predict"], [9, 2, 1, "", "retrieve_data_for_inference"], [9, 2, 1, "", "set_cl_socket"]], "ts.torch_handler": [[13, 0, 0, "-", "base_handler"], [13, 0, 0, "-", "contractions"], [13, 0, 0, "-", "densenet_handler"], [13, 0, 0, "-", "image_classifier"], [13, 0, 0, "-", "image_segmenter"], [13, 0, 0, "-", "object_detector"], [14, 0, 0, "-", "request_envelope"], [13, 0, 0, "-", "text_classifier"], [13, 0, 0, "-", "text_handler"], [15, 0, 0, "-", "unit_tests"], [13, 0, 0, "-", "vision_handler"]], "ts.torch_handler.base_handler": [[13, 1, 1, "", "BaseHandler"], [13, 4, 1, "", "setup_ort_session"]], "ts.torch_handler.base_handler.BaseHandler": [[13, 2, 1, "", "describe_handle"], [13, 2, 1, "", "explain_handle"], [13, 2, 1, "", "handle"], [13, 2, 1, "", "inference"], [13, 2, 1, "", "initialize"], [13, 2, 1, "", "postprocess"], [13, 2, 1, "", "preprocess"]], "ts.torch_handler.densenet_handler": [[13, 1, 1, "", "DenseNetHandler"], [13, 4, 1, "", "list_classes_from_module"]], "ts.torch_handler.densenet_handler.DenseNetHandler": [[13, 2, 1, "", "handle"], [13, 2, 1, "", "inference"], [13, 2, 1, "", "initialize"]], "ts.torch_handler.image_classifier": [[13, 1, 1, "", "ImageClassifier"]], "ts.torch_handler.image_classifier.ImageClassifier": [[13, 2, 1, "", "get_max_result_classes"], [13, 5, 1, "", "image_processing"], [13, 2, 1, "", "postprocess"], [13, 2, 1, "", "set_max_result_classes"], [13, 5, 1, "", "topk"]], "ts.torch_handler.image_segmenter": [[13, 1, 1, "", "ImageSegmenter"]], "ts.torch_handler.image_segmenter.ImageSegmenter": [[13, 5, 1, "", "image_processing"], [13, 2, 1, "", "postprocess"]], "ts.torch_handler.object_detector": [[13, 1, 1, "", "ObjectDetector"]], "ts.torch_handler.object_detector.ObjectDetector": [[13, 5, 1, "", "image_processing"], [13, 2, 1, "", "initialize"], [13, 2, 1, "", "postprocess"], [13, 5, 1, "", "threshold"]], "ts.torch_handler.request_envelope": [[14, 0, 0, "-", "base"], [14, 0, 0, "-", "body"], [14, 0, 0, "-", "json"], [14, 0, 0, "-", "kserve"], [14, 0, 0, "-", "kservev2"]], "ts.torch_handler.request_envelope.base": [[14, 1, 1, "", "BaseEnvelope"]], "ts.torch_handler.request_envelope.base.BaseEnvelope": [[14, 2, 1, "", "format_output"], [14, 2, 1, "", "handle"], [14, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.body": [[14, 1, 1, "", "BodyEnvelope"]], "ts.torch_handler.request_envelope.body.BodyEnvelope": [[14, 2, 1, "", "format_output"], [14, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.json": [[14, 1, 1, "", "JSONEnvelope"]], "ts.torch_handler.request_envelope.json.JSONEnvelope": [[14, 2, 1, "", "format_output"], [14, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.kserve": [[14, 1, 1, "", "KServeEnvelope"]], "ts.torch_handler.request_envelope.kserve.KServeEnvelope": [[14, 2, 1, "", "format_output"], [14, 2, 1, "", "parse_input"]], "ts.torch_handler.request_envelope.kservev2": [[14, 1, 1, "", "KServev2Envelope"]], "ts.torch_handler.request_envelope.kservev2.KServev2Envelope": [[14, 2, 1, "", "format_output"], [14, 2, 1, "", "parse_input"]], "ts.torch_handler.text_classifier": [[13, 1, 1, "", "TextClassifier"]], "ts.torch_handler.text_classifier.TextClassifier": [[13, 2, 1, "", "get_insights"], [13, 2, 1, "", "inference"], [13, 5, 1, "", "ngrams"], [13, 2, 1, "", "postprocess"], [13, 2, 1, "", "preprocess"]], "ts.torch_handler.text_handler": [[13, 1, 1, "", "TextHandler"]], "ts.torch_handler.text_handler.TextHandler": [[13, 2, 1, "", "get_source_vocab_path"], [13, 2, 1, "", "get_word_token"], [13, 2, 1, "", "initialize"], [13, 2, 1, "", "summarize_attributions"]], "ts.torch_handler.unit_tests": [[16, 0, 0, "-", "models"], [15, 0, 0, "-", "test_base_handler"], [15, 0, 0, "-", "test_envelopes"], [15, 0, 0, "-", "test_image_classifier"], [15, 0, 0, "-", "test_image_segmenter"], [15, 0, 0, "-", "test_object_detector"], [17, 0, 0, "-", "test_utils"]], "ts.torch_handler.unit_tests.models": [[16, 0, 0, "-", "base_model"]], "ts.torch_handler.unit_tests.models.base_model": [[16, 1, 1, "", "ArgmaxModel"], [16, 4, 1, "", "save_pt_file"]], "ts.torch_handler.unit_tests.models.base_model.ArgmaxModel": [[16, 2, 1, "", "forward"], [16, 5, 1, "", "training"]], "ts.torch_handler.unit_tests.test_base_handler": [[15, 4, 1, "", "handler"], [15, 4, 1, "", "test_batch_handle"], [15, 4, 1, "", "test_inference_with_profiler_works_with_custom_initialize_method"], [15, 4, 1, "", "test_single_handle"]], "ts.torch_handler.unit_tests.test_envelopes": [[15, 4, 1, "", "handle_fn"], [15, 4, 1, "", "test_binary"], [15, 4, 1, "", "test_body"], [15, 4, 1, "", "test_json"], [15, 4, 1, "", "test_json_batch"], [15, 4, 1, "", "test_json_double_batch"]], "ts.torch_handler.unit_tests.test_image_classifier": [[15, 4, 1, "", "context"], [15, 4, 1, "", "handler"], [15, 4, 1, "", "image_bytes"], [15, 4, 1, "", "model_dir"], [15, 4, 1, "", "model_name"], [15, 4, 1, "", "test_handle"], [15, 4, 1, "", "test_handle_explain"]], "ts.torch_handler.unit_tests.test_image_segmenter": [[15, 4, 1, "", "context"], [15, 4, 1, "", "handler"], [15, 4, 1, "", "image_bytes"], [15, 4, 1, "", "model_dir"], [15, 4, 1, "", "model_name"], [15, 4, 1, "", "test_handle"]], "ts.torch_handler.unit_tests.test_object_detector": [[15, 4, 1, "", "context"], [15, 4, 1, "", "handler"], [15, 4, 1, "", "image_bytes"], [15, 4, 1, "", "model_dir"], [15, 4, 1, "", "model_name"], [15, 4, 1, "", "test_handle"]], "ts.torch_handler.unit_tests.test_utils": [[17, 0, 0, "-", "mock_context"]], "ts.torch_handler.unit_tests.test_utils.mock_context": [[17, 1, 1, "", "MockContext"]], "ts.torch_handler.unit_tests.test_utils.mock_context.MockContext": [[17, 2, 1, "", "get_request_header"]], "ts.torch_handler.vision_handler": [[13, 1, 1, "", "VisionHandler"]], "ts.torch_handler.vision_handler.VisionHandler": [[13, 2, 1, "", "get_insights"], [13, 2, 1, "", "initialize"], [13, 2, 1, "", "preprocess"]], "ts.utils": [[18, 0, 0, "-", "timeit_decorator"], [18, 0, 0, "-", "util"]], "ts.utils.timeit_decorator": [[18, 4, 1, "", "timeit"]], "ts.utils.util": [[18, 1, 1, "", "PT2Backend"], [18, 6, 1, "", "PredictionException"], [18, 4, 1, "", "check_valid_pt2_backend"], [18, 4, 1, "", "get_yaml_config"], [18, 4, 1, "", "list_classes_from_module"], [18, 4, 1, "", "load_label_mapping"], [18, 4, 1, "", "map_class_to_label"]], "ts.utils.util.PT2Backend": [[18, 5, 1, "", "AOT_CUDAGRAPHS"], [18, 5, 1, "", "AOT_EAGER"], [18, 5, 1, "", "AOT_NVFUSER"], [18, 5, 1, "", "EAGER"], [18, 5, 1, "", "FX2TRT"], [18, 5, 1, "", "INDUCTOR"], [18, 5, 1, "", "IPEX"], [18, 5, 1, "", "NVFUSER"], [18, 5, 1, "", "OFI"], [18, 5, 1, "", "ONNXRT"], [18, 5, 1, "", "TORCHXLA_TRACE_ONCE"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:property", "4": "py:function", "5": "py:attribute", "6": "py:exception"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "property", "Python property"], "4": ["py", "function", "Python function"], "5": ["py", "attribute", "Python attribute"], "6": ["py", "exception", "Python exception"]}, "titleterms": {"faq": 0, "s": [0, 21, 36], "gener": [0, 34, 36, 41], "doe": 0, "torchserv": [0, 1, 2, 21, 23, 26, 27, 28, 29, 30, 32, 33, 34, 36, 38, 40, 42, 44, 45, 46, 48, 49, 50, 53], "api": [0, 2, 3, 20, 21, 23, 24, 27, 29, 31, 35, 36, 37, 44, 51, 52], "follow": [0, 22], "some": 0, "rest": [0, 27, 44], "standard": 0, "how": [0, 2, 33, 38], "us": [0, 2, 21, 27, 33, 48, 50], "product": [0, 48], "what": [0, 32], "differ": 0, "between": 0, "python": [0, 2, 22, 23, 25, 27, 29, 32], "web": 0, "app": 0, "framework": 0, "like": 0, "flask": 0, "django": 0, "ar": [0, 2], "ani": 0, "sampl": 0, "model": [0, 2, 16, 21, 22, 23, 25, 27, 33, 35, 38, 39, 41, 45, 46, 50, 53], "avail": 0, "support": [0, 21], "other": [0, 23], "base": [0, 14, 27, 36], "program": 0, "languag": 0, "than": 0, "benefit": 0, "have": [0, 2], "over": 0, "aw": 0, "multi": 0, "server": [0, 29, 33, 37], "decod": 0, "intern": [0, 32], "infer": [0, 2, 21, 26, 31, 33, 41, 51], "respons": [0, 33], "client": [0, 27, 29], "side": [0, 29, 33], "perform": [0, 42], "do": [0, 32], "i": [0, 2], "improv": 0, "cpu": 0, "deploy": [0, 2], "config": [0, 2, 21, 23, 33, 34], "can": [0, 2], "run": [0, 22, 40, 45], "port": [0, 2, 23], "default": [0, 1, 21, 25, 26, 35, 36], "8080": [0, 2], "8081": [0, 2], "resolv": [0, 2], "specif": [0, 2, 23, 25, 53], "depend": [0, 2, 22, 25, 50], "deploi": [0, 50], "kubernet": 0, "elb": 0, "asg": 0, "backup": 0, "restor": 0, "state": 0, "build": [0, 22], "imag": 0, "from": [0, 25, 27, 36, 48, 49], "sourc": [0, 48, 49], "branch": [0, 22], "commit": 0, "id": 0, "creat": [0, 25, 28, 36], "dockerfil": 0, "dev": 0, "order": 0, "properti": [0, 21, 23, 34, 53], "path": 0, "model_stor": 0, "load_model": 0, "curl": [0, 31, 51], "make": 0, "request": [0, 2, 25, 43], "add": [0, 2, 36], "custom": [0, 2, 23, 25, 34, 36, 45, 50], "an": [0, 25], "exist": 0, "pass": 0, "multipl": [0, 25, 45], "call": 0, "my": [0, 2], "handler": [0, 1, 21, 25, 26, 27, 36, 53], "return": [0, 25], "output": 0, "enhanc": 0, "alwai": 0, "write": [0, 25], "ones": 0, "Is": 0, "possibl": 0, "hug": [0, 33], "face": [0, 33], "archiv": [0, 2, 21, 22, 25, 27], "mar": [0, 2], "file": [0, 2, 23, 32, 33, 36, 45, 53], "docker": [0, 21], "contain": [0, 21], "serial": 0, "singl": 0, "download": 0, "regist": [0, 2, 35, 52], "s3": 0, "presign": 0, "v4": 0, "url": 0, "host": 0, "set": [0, 35], "batch": [0, 21], "size": [0, 36], "sagemak": 0, "kei": 0, "paramet": [0, 23], "tune": [0, 33], "why": 0, "initi": 0, "so": 0, "slow": 0, "basic": 1, "featur": [1, 2, 26, 45], "exampl": [1, 23, 29, 31, 36, 51], "advanc": [1, 23, 25, 45], "troubleshoot": [2, 48], "guid": [2, 36, 42], "issu": [2, 47, 53], "fail": 2, "bind": 2, "address": [2, 23], "http": [2, 33, 47], "127": 2, "0": [2, 49], "1": [2, 33], "alreadi": 2, "java": [2, 32], "lang": 2, "nosuchmethoderror": 2, "when": 2, "start": [2, 25, 27, 36], "473": 2, "unabl": 2, "send": 2, "big": 2, "snapshot": [2, 46], "relat": [2, 53], "disabl": 2, "stop": [2, 27], "after": 2, "restart": 2, "invalidsnapshotexcept": 2, "except": 2, "where": 2, "store": [2, 27], "chang": 2, "temp": 2, "directori": [2, 32], "conflictstatusexcept": 2, "error": [2, 25], "code": [2, 22, 25, 27], "409": 2, "downloadmodelexcept": 2, "400": 2, "modelnotfoundexcept": 2, "404": 2, "serviceunavailableexcept": 2, "503": 2, "ad": 2, "requir": 2, "txt": 2, "packag": [2, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 22, 23], "list": [2, 35, 52], "get": [2, 27, 36], "instal": [2, 22, 25, 27, 48, 49], "backend": [2, 22, 23, 32, 36], "worker": [2, 23, 35], "monitor": 2, "thread": 2, "interrupt": 2, "process": 2, "di": 2, "develop": [3, 24, 48], "serv": [4, 27, 32, 33, 35, 45, 50], "run_circleci_test": 5, "modul": [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 25], "setup": [6, 21, 49], "test": [7, 50], "submodul": [7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], "regression_test": 7, "content": [7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 24, 25, 36, 45, 48, 49], "torchserve_san": 8, "ts": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 22], "subpackag": [9, 13, 15], "arg_pars": 9, "context": 9, "model_load": 9, "model_serv": 9, "model_service_work": 9, "servic": [9, 20, 24, 25, 45], "version": [9, 35], "metric": [10, 23, 36, 37, 45], "dimens": [10, 36], "metric_collector": 10, "metric_encod": 10, "metrics_stor": 10, "process_memory_metr": 10, "system_metr": 10, "unit": 10, "model_servic": 11, "protocol": 12, "otf_message_handl": 12, "torch_handl": [13, 14, 15, 16, 17], "base_handl": 13, "contract": 13, "densenet_handl": 13, "image_classifi": [13, 26], "image_segment": [13, 26], "object_detector": [13, 26], "text_classifi": [13, 26], "text_handl": 13, "vision_handl": 13, "request_envelop": 14, "bodi": 14, "json": [14, 26], "kserv": [14, 31], "kservev2": 14, "unit_test": [15, 16, 17], "test_base_handl": 15, "test_envelop": 15, "test_image_classifi": 15, "test_image_segment": 15, "test_mnist_kf": 15, "test_object_detector": 15, "base_model": 16, "test_util": 17, "mock_context": 17, "util": [18, 19], "timeit_decor": 18, "ts_script": 19, "api_util": 19, "backend_util": 19, "frontend_util": 19, "install_depend": 19, "install_from_src": 19, "marsgen": 19, "modelarchiver_util": 19, "print_env_info": 19, "regression_util": 19, "sanity_util": 19, "shell_util": 19, "torchserve_grpc_cli": 19, "tsutil": 19, "validate_model_on_gpu": 19, "workflow_archiver_util": 19, "thi": [21, 25, 36, 45, 48, 49], "document": [21, 25, 36, 45, 48, 49], "introduct": [21, 36], "prerequisit": [21, 34, 48], "resnet": 21, "152": 21, "configur": [21, 23], "demo": 21, "torch": [21, 27], "manag": [21, 35, 52], "through": [21, 27], "coverag": 22, "To": 22, "check": [22, 31], "stabil": 22, "saniti": 22, "suit": 22, "frontend": [22, 32, 36], "command": [22, 23, 45, 48], "pytest": 22, "lint": 22, "IT": 22, "markdown": [22, 47], "link": 22, "checker": 22, "environ": 23, "variabl": 23, "line": [23, 45], "jvm": 23, "option": 23, "load": [23, 33, 38], "startup": 23, "listen": 23, "grpc": [23, 27, 29, 33], "enabl": [23, 34], "ssl": 23, "cross": 23, "origin": 23, "resourc": [23, 42], "share": 23, "cor": 23, "prefer": 23, "direct": 23, "buffer": 23, "allow": 23, "restrict": 23, "access": [23, 34], "limit": 23, "gpu": [23, 25, 50], "usag": [23, 43, 48], "nvidia": [23, 40], "control": 23, "visibl": 23, "basehandl": 25, "level": 25, "entri": 25, "point": 25, "class": 25, "scratch": 25, "predict": [25, 27, 31, 51], "explan": [25, 31], "captum": 25, "extend": 25, "handl": 25, "execut": 25, "common": 26, "index_to_nam": 26, "contribut": [26, 27, 43], "For": [27, 48], "debian": 27, "system": [27, 41], "maco": 27, "window": [27, 48, 49], "inspect": 27, "log": [27, 34, 36, 45], "debug": 27, "github": [28, 47], "action": 28, "step": 28, "stream": [29, 33], "descript": [31, 35], "health": 31, "architectur": 32, "terminolog": 32, "pytorch": [32, 33, 42, 50], "thei": 32, "import": 32, "core": 32, "engin": 32, "larg": 33, "work": 33, "pippi": 33, "nativ": 33, "solut": 33, "deepspe": 33, "mii": 33, "acceler": 33, "tip": 33, "reduc": 33, "latenc": 33, "yaml": [33, 36], "sensit": 33, "applic": 33, "job": 33, "ticket": 33, "via": 33, "chunk": 33, "encod": 33, "type": [34, 36], "modifi": 34, "behavior": 34, "provid": 34, "asynchron": 34, "encrypt": 35, "scale": 35, "describ": [35, 52], "unregist": [35, 52], "enum": 36, "format": 36, "specifi": 36, "updat": 36, "pars": 36, "object": 36, "function": 36, "without": 36, "time": 36, "percentag": 36, "counter": 36, "backward": 36, "compat": 36, "warn": 36, "upgrad": 36, "prometheu": 37, "grafana": 37, "zoo": [39, 50], "mp": 40, "benchmark": [40, 42], "g4": 40, "instanc": 40, "p3": 40, "summari": 40, "optim": [41, 42], "checklist": 41, "profil": 42, "more": 42, "envelop": 43, "overview": 45, "technic": 45, "detail": 45, "interfac": 45, "argument": 45, "cf": 47, "com": 47, "ryanfox": 47, "sphinx": 47, "tabl": 47, "36": 47, "binari": [48, 49], "below": 48, "purpos": 48, "subsystem": 49, "linux": 49, "wsl": 49, "ubuntu": 49, "18": 49, "4": 49, "case": 50, "eager": 50, "mode": 50, "script": 50, "readymad": 50, "secur": 50, "third": 50, "parti": 50, "ab": 50, "workflow": [51, 52, 53], "dag": 53, "sequenti": 53, "parallel": 53, "doc": 53, "known": 53}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 56}}) \ No newline at end of file