From 69224fac8319d1e01ccaa4b96add401fe87b1211 Mon Sep 17 00:00:00 2001 From: Gibson Chikafa Date: Fri, 8 Mar 2024 11:49:30 +0100 Subject: [PATCH] [HWORKS-888] Support for gRPC protocol in Python model deployments (#216) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --------- Co-authored-by: Javier de la Rúa Martínez --- python/hsml/client/exceptions.py | 7 + python/hsml/client/hopsworks/base.py | 2 +- python/hsml/client/hopsworks/external.py | 2 +- python/hsml/client/istio/base.py | 19 +- python/hsml/client/istio/external.py | 2 +- python/hsml/client/istio/grpc/__init__.py | 15 + python/hsml/client/istio/grpc/errors.py | 30 + python/hsml/client/istio/grpc/exceptions.py | 123 +++ .../client/istio/grpc/inference_client.py | 75 ++ .../hsml/client/istio/grpc/proto/__init__.py | 15 + .../istio/grpc/proto/grpc_predict_v2.proto | 362 ++++++++ .../istio/grpc/proto/grpc_predict_v2_pb2.py | 452 ++++++++++ .../istio/grpc/proto/grpc_predict_v2_pb2.pyi | 399 +++++++++ .../grpc/proto/grpc_predict_v2_pb2_grpc.py | 419 +++++++++ python/hsml/client/istio/internal.py | 8 +- python/hsml/client/istio/utils/__init__.py | 15 + python/hsml/client/istio/utils/infer_type.py | 811 ++++++++++++++++++ python/hsml/client/istio/utils/numpy_codec.py | 67 ++ python/hsml/constants.py | 3 + python/hsml/core/serving_api.py | 73 +- python/hsml/deployment.py | 18 +- python/hsml/engine/serving_engine.py | 276 ++++-- python/hsml/model.py | 5 +- python/hsml/model_serving.py | 5 +- python/hsml/predictor.py | 15 +- python/hsml/util.py | 2 +- python/setup.py | 2 + 27 files changed, 3142 insertions(+), 80 deletions(-) create mode 100644 python/hsml/client/istio/grpc/__init__.py create mode 100644 python/hsml/client/istio/grpc/errors.py create mode 100644 python/hsml/client/istio/grpc/exceptions.py create mode 100644 python/hsml/client/istio/grpc/inference_client.py create mode 100644 python/hsml/client/istio/grpc/proto/__init__.py create mode 100644 python/hsml/client/istio/grpc/proto/grpc_predict_v2.proto create mode 100644 python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.py create mode 100644 python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.pyi create mode 100644 python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2_grpc.py create mode 100644 python/hsml/client/istio/utils/__init__.py create mode 100644 python/hsml/client/istio/utils/infer_type.py create mode 100644 python/hsml/client/istio/utils/numpy_codec.py diff --git a/python/hsml/client/exceptions.py b/python/hsml/client/exceptions.py index b2394c188..6a59909db 100644 --- a/python/hsml/client/exceptions.py +++ b/python/hsml/client/exceptions.py @@ -71,6 +71,13 @@ class ModelServingException(Exception): ERROR_CODE_DEPLOYMENT_NOT_RUNNING = 250001 +class InternalClientError(TypeError): + """Raised when internal client cannot be initialized due to missing arguments.""" + + def __init__(self, message): + super().__init__(message) + + class ExternalClientError(TypeError): """Raised when external client cannot be initialized due to missing arguments.""" diff --git a/python/hsml/client/hopsworks/base.py b/python/hsml/client/hopsworks/base.py index a33afd86b..6b3ee719a 100644 --- a/python/hsml/client/hopsworks/base.py +++ b/python/hsml/client/hopsworks/base.py @@ -105,7 +105,7 @@ def _close(self): """Closes a client. Can be implemented for clean up purposes, not mandatory.""" self._connected = False - def replace_public_host(self, url): + def _replace_public_host(self, url): """replace hostname to public hostname set in HOPSWORKS_PUBLIC_HOST""" ui_url = url._replace(netloc=os.environ[self.HOPSWORKS_PUBLIC_HOST]) return ui_url diff --git a/python/hsml/client/hopsworks/external.py b/python/hsml/client/hopsworks/external.py index 762d3072a..ec66c51a8 100644 --- a/python/hsml/client/hopsworks/external.py +++ b/python/hsml/client/hopsworks/external.py @@ -77,7 +77,7 @@ def _get_project_info(self, project_name): """ return self._send_request("GET", ["project", "getProjectInfo", project_name]) - def replace_public_host(self, url): + def _replace_public_host(self, url): """no need to replace as we are already in external client""" return url diff --git a/python/hsml/client/istio/base.py b/python/hsml/client/istio/base.py index 2e4ee4208..9aaab9ba0 100644 --- a/python/hsml/client/istio/base.py +++ b/python/hsml/client/istio/base.py @@ -17,12 +17,12 @@ import os from abc import abstractmethod -from hsml.client import base, exceptions +from hsml.client import base +from hsml.client.istio.grpc.inference_client import GRPCInferenceServerClient class Client(base.Client): SERVING_API_KEY = "SERVING_API_KEY" - ISTIO_ENDPOINT = "ISTIO_ENDPOINT" HOPSWORKS_PUBLIC_HOST = "HOPSWORKS_PUBLIC_HOST" BASE_PATH_PARAMS = [] @@ -80,17 +80,18 @@ def _get_host_port_pair(self): host, port = endpoint.split(":") return host, port - def _get_serving_api_key(self): - """Retrieve serving API key from environment variable.""" - if self.SERVING_API_KEY not in os.environ: - raise exceptions.ExternalClientError("Serving API key not found") - return os.environ[self.SERVING_API_KEY] - def _close(self): """Closes a client. Can be implemented for clean up purposes, not mandatory.""" self._connected = False - def replace_public_host(self, url): + def _replace_public_host(self, url): """replace hostname to public hostname set in HOPSWORKS_PUBLIC_HOST""" ui_url = url._replace(netloc=os.environ[self.HOPSWORKS_PUBLIC_HOST]) return ui_url + + def _create_grpc_channel(self, service_hostname: str) -> GRPCInferenceServerClient: + return GRPCInferenceServerClient( + url=self._host + ":" + str(self._port), + channel_args=(("grpc.ssl_target_name_override", service_hostname),), + serving_api_key=self._auth._token, + ) diff --git a/python/hsml/client/istio/external.py b/python/hsml/client/istio/external.py index 0dc87fb7b..d6c47b612 100644 --- a/python/hsml/client/istio/external.py +++ b/python/hsml/client/istio/external.py @@ -48,7 +48,7 @@ def _close(self): """Closes a client.""" self._connected = False - def replace_public_host(self, url): + def _replace_public_host(self, url): """no need to replace as we are already in external client""" return url diff --git a/python/hsml/client/istio/grpc/__init__.py b/python/hsml/client/istio/grpc/__init__.py new file mode 100644 index 000000000..ff8055b9b --- /dev/null +++ b/python/hsml/client/istio/grpc/__init__.py @@ -0,0 +1,15 @@ +# +# Copyright 2024 Hopsworks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/hsml/client/istio/grpc/errors.py b/python/hsml/client/istio/grpc/errors.py new file mode 100644 index 000000000..062630bea --- /dev/null +++ b/python/hsml/client/istio/grpc/errors.py @@ -0,0 +1,30 @@ +# Copyright 2022 The KServe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# This implementation has been borrowed from the kserve/kserve repository +# https://github.com/kserve/kserve/blob/release-0.11/python/kserve/kserve/errors.py + + +class InvalidInput(ValueError): + """ + Exception class indicating invalid input arguments. + HTTP Servers should return HTTP_400 (Bad Request). + """ + + def __init__(self, reason): + self.reason = reason + + def __str__(self): + return self.reason diff --git a/python/hsml/client/istio/grpc/exceptions.py b/python/hsml/client/istio/grpc/exceptions.py new file mode 100644 index 000000000..6477c9488 --- /dev/null +++ b/python/hsml/client/istio/grpc/exceptions.py @@ -0,0 +1,123 @@ +# Copyright 2023 The KServe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# coding: utf-8 + +# This implementation has been borrowed from kserve/kserve repository +# https://github.com/kserve/kserve/blob/release-0.11/python/kserve/kserve/exceptions.py + +import six + + +class OpenApiException(Exception): + """The base exception class for all OpenAPIExceptions""" + + +class ApiTypeError(OpenApiException, TypeError): + def __init__(self, msg, path_to_item=None, valid_classes=None, key_type=None): + """Raises an exception for TypeErrors + + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (list): a list of keys an indices to get to the + current_item + None if unset + valid_classes (tuple): the primitive classes that current item + should be an instance of + None if unset + key_type (bool): False if our value is a value in a dict + True if it is a key in a dict + False if our item is an item in a list + None if unset + """ + self.path_to_item = path_to_item + self.valid_classes = valid_classes + self.key_type = key_type + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(ApiTypeError, self).__init__(full_msg) + + +class ApiValueError(OpenApiException, ValueError): + def __init__(self, msg, path_to_item=None): + """ + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (list) the path to the exception in the + received_data dict. None if unset + """ + + self.path_to_item = path_to_item + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(ApiValueError, self).__init__(full_msg) + + +class ApiKeyError(OpenApiException, KeyError): + def __init__(self, msg, path_to_item=None): + """ + Args: + msg (str): the exception message + + Keyword Args: + path_to_item (None/list) the path to the exception in the + received_data dict + """ + self.path_to_item = path_to_item + full_msg = msg + if path_to_item: + full_msg = "{0} at {1}".format(msg, render_path(path_to_item)) + super(ApiKeyError, self).__init__(full_msg) + + +class ApiException(OpenApiException): + def __init__(self, status=None, reason=None, http_resp=None): + if http_resp: + self.status = http_resp.status + self.reason = http_resp.reason + self.body = http_resp.data + self.headers = http_resp.getheaders() + else: + self.status = status + self.reason = reason + self.body = None + self.headers = None + + def __str__(self): + """Custom error messages for exception""" + error_message = "({0})\n" "Reason: {1}\n".format(self.status, self.reason) + if self.headers: + error_message += "HTTP response headers: {0}\n".format(self.headers) + + if self.body: + error_message += "HTTP response body: {0}\n".format(self.body) + + return error_message + + +def render_path(path_to_item): + """Returns a string representation of a path""" + result = "" + for pth in path_to_item: + if isinstance(pth, six.integer_types): + result += "[{0}]".format(pth) + else: + result += "['{0}']".format(pth) + return result diff --git a/python/hsml/client/istio/grpc/inference_client.py b/python/hsml/client/istio/grpc/inference_client.py new file mode 100644 index 000000000..7bfa51ede --- /dev/null +++ b/python/hsml/client/istio/grpc/inference_client.py @@ -0,0 +1,75 @@ +# +# Copyright 2024 Hopsworks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import grpc + +from hsml.client.istio.utils.infer_type import InferRequest, InferResponse +from hsml.client.istio.grpc.proto.grpc_predict_v2_pb2_grpc import ( + GRPCInferenceServiceStub, +) + + +class GRPCInferenceServerClient: + def __init__( + self, + url, + serving_api_key, + channel_args=None, + ): + if channel_args is not None: + channel_opt = channel_args + else: + channel_opt = [ + ("grpc.max_send_message_length", -1), + ("grpc.max_receive_message_length", -1), + ] + + # Authentication is done via API Key in the Authorization header + self._channel = grpc.insecure_channel(url, options=channel_opt) + self._client_stub = GRPCInferenceServiceStub(self._channel) + self._serving_api_key = serving_api_key + + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + self.close() + + def __del__(self): + """It is called during object garbage collection.""" + self.close() + + def close(self): + """Close the client. Future calls to server will result in an Error.""" + self._channel.close() + + def infer(self, infer_request: InferRequest, headers=None, client_timeout=None): + headers = {} if headers is None else headers + headers["authorization"] = "ApiKey " + self._serving_api_key + metadata = headers.items() + + # convert the InferRequest to a ModelInferRequest message + request = infer_request.to_grpc() + + try: + # send request + model_infer_response = self._client_stub.ModelInfer( + request=request, metadata=metadata, timeout=client_timeout + ) + except grpc.RpcError as rpc_error: + raise rpc_error + + # convert back the ModelInferResponse message to InferResponse + return InferResponse.from_grpc(model_infer_response) diff --git a/python/hsml/client/istio/grpc/proto/__init__.py b/python/hsml/client/istio/grpc/proto/__init__.py new file mode 100644 index 000000000..ff8055b9b --- /dev/null +++ b/python/hsml/client/istio/grpc/proto/__init__.py @@ -0,0 +1,15 @@ +# +# Copyright 2024 Hopsworks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/hsml/client/istio/grpc/proto/grpc_predict_v2.proto b/python/hsml/client/istio/grpc/proto/grpc_predict_v2.proto new file mode 100644 index 000000000..c05221d73 --- /dev/null +++ b/python/hsml/client/istio/grpc/proto/grpc_predict_v2.proto @@ -0,0 +1,362 @@ +// Copyright 2022 The KServe Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +package inference; + +// Inference Server GRPC endpoints. +service GRPCInferenceService +{ + // The ServerLive API indicates if the inference server is able to receive + // and respond to metadata and inference requests. + rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {} + + // The ServerReady API indicates if the server is ready for inferencing. + rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {} + + // The ModelReady API indicates if a specific model is ready for inferencing. + rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {} + + // The ServerMetadata API provides information about the server. Errors are + // indicated by the google.rpc.Status returned for the request. The OK code + // indicates success and other codes indicate failure. + rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {} + + // The per-model metadata API provides information about a model. Errors are + // indicated by the google.rpc.Status returned for the request. The OK code + // indicates success and other codes indicate failure. + rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {} + + // The ModelInfer API performs inference using the specified model. Errors are + // indicated by the google.rpc.Status returned for the request. The OK code + // indicates success and other codes indicate failure. + rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {} + + // Load or reload a model from a repository. + rpc RepositoryModelLoad(RepositoryModelLoadRequest) returns (RepositoryModelLoadResponse) {} + + // Unload a model. + rpc RepositoryModelUnload(RepositoryModelUnloadRequest) returns (RepositoryModelUnloadResponse) {} +} + +message ServerLiveRequest {} + +message ServerLiveResponse +{ + // True if the inference server is live, false if not live. + bool live = 1; +} + +message ServerReadyRequest {} + +message ServerReadyResponse +{ + // True if the inference server is ready, false if not ready. + bool ready = 1; +} + +message ModelReadyRequest +{ + // The name of the model to check for readiness. + string name = 1; + + // The version of the model to check for readiness. If not given the + // server will choose a version based on the model and internal policy. + string version = 2; +} + +message ModelReadyResponse +{ + // True if the model is ready, false if not ready. + bool ready = 1; +} + +message ServerMetadataRequest {} + +message ServerMetadataResponse +{ + // The server name. + string name = 1; + + // The server version. + string version = 2; + + // The extensions supported by the server. + repeated string extensions = 3; +} + +message ModelMetadataRequest +{ + // The name of the model. + string name = 1; + + // The version of the model to check for readiness. If not given the + // server will choose a version based on the model and internal policy. + string version = 2; +} + +message ModelMetadataResponse +{ + // Metadata for a tensor. + message TensorMetadata + { + // The tensor name. + string name = 1; + + // The tensor data type. + string datatype = 2; + + // The tensor shape. A variable-size dimension is represented + // by a -1 value. + repeated int64 shape = 3; + } + + // The model name. + string name = 1; + + // The versions of the model available on the server. + repeated string versions = 2; + + // The model's platform. See Platforms. + string platform = 3; + + // The model's inputs. + repeated TensorMetadata inputs = 4; + + // The model's outputs. + repeated TensorMetadata outputs = 5; +} + +message ModelInferRequest +{ + // An input tensor for an inference request. + message InferInputTensor + { + // The tensor name. + string name = 1; + + // The tensor data type. + string datatype = 2; + + // The tensor shape. + repeated int64 shape = 3; + + // Optional inference input tensor parameters. + map parameters = 4; + + // The tensor contents using a data-type format. This field must + // not be specified if "raw" tensor contents are being used for + // the inference request. + InferTensorContents contents = 5; + } + + // An output tensor requested for an inference request. + message InferRequestedOutputTensor + { + // The tensor name. + string name = 1; + + // Optional requested output tensor parameters. + map parameters = 2; + } + + // The name of the model to use for inferencing. + string model_name = 1; + + // The version of the model to use for inference. If not given the + // server will choose a version based on the model and internal policy. + string model_version = 2; + + // Optional identifier for the request. If specified will be + // returned in the response. + string id = 3; + + // Optional inference parameters. + map parameters = 4; + + // The input tensors for the inference. + repeated InferInputTensor inputs = 5; + + // The requested output tensors for the inference. Optional, if not + // specified all outputs produced by the model will be returned. + repeated InferRequestedOutputTensor outputs = 6; + + // The data contained in an input tensor can be represented in "raw" + // bytes form or in the repeated type that matches the tensor's data + // type. To use the raw representation 'raw_input_contents' must be + // initialized with data for each tensor in the same order as + // 'inputs'. For each tensor, the size of this content must match + // what is expected by the tensor's shape and data type. The raw + // data must be the flattened, one-dimensional, row-major order of + // the tensor elements without any stride or padding between the + // elements. Note that the FP16 and BF16 data types must be represented as + // raw content as there is no specific data type for a 16-bit float type. + // + // If this field is specified then InferInputTensor::contents must + // not be specified for any input tensor. + repeated bytes raw_input_contents = 7; +} + +message ModelInferResponse +{ + // An output tensor returned for an inference request. + message InferOutputTensor + { + // The tensor name. + string name = 1; + + // The tensor data type. + string datatype = 2; + + // The tensor shape. + repeated int64 shape = 3; + + // Optional output tensor parameters. + map parameters = 4; + + // The tensor contents using a data-type format. This field must + // not be specified if "raw" tensor contents are being used for + // the inference response. + InferTensorContents contents = 5; + } + + // The name of the model used for inference. + string model_name = 1; + + // The version of the model used for inference. + string model_version = 2; + + // The id of the inference request if one was specified. + string id = 3; + + // Optional inference response parameters. + map parameters = 4; + + // The output tensors holding inference results. + repeated InferOutputTensor outputs = 5; + + // The data contained in an output tensor can be represented in + // "raw" bytes form or in the repeated type that matches the + // tensor's data type. To use the raw representation 'raw_output_contents' + // must be initialized with data for each tensor in the same order as + // 'outputs'. For each tensor, the size of this content must match + // what is expected by the tensor's shape and data type. The raw + // data must be the flattened, one-dimensional, row-major order of + // the tensor elements without any stride or padding between the + // elements. Note that the FP16 and BF16 data types must be represented as + // raw content as there is no specific data type for a 16-bit float type. + // + // If this field is specified then InferOutputTensor::contents must + // not be specified for any output tensor. + repeated bytes raw_output_contents = 6; +} + +// An inference parameter value. The Parameters message describes a +// “name”/”value” pair, where the “name” is the name of the parameter +// and the “value” is a boolean, integer, or string corresponding to +// the parameter. +message InferParameter +{ + // The parameter value can be a string, an int64, a boolean + // or a message specific to a predefined parameter. + oneof parameter_choice + { + // A boolean parameter value. + bool bool_param = 1; + + // An int64 parameter value. + int64 int64_param = 2; + + // A string parameter value. + string string_param = 3; + } +} + +// The data contained in a tensor represented by the repeated type +// that matches the tensor's data type. Protobuf oneof is not used +// because oneofs cannot contain repeated fields. +message InferTensorContents +{ + // Representation for BOOL data type. The size must match what is + // expected by the tensor's shape. The contents must be the flattened, + // one-dimensional, row-major order of the tensor elements. + repeated bool bool_contents = 1; + + // Representation for INT8, INT16, and INT32 data types. The size + // must match what is expected by the tensor's shape. The contents + // must be the flattened, one-dimensional, row-major order of the + // tensor elements. + repeated int32 int_contents = 2; + + // Representation for INT64 data types. The size must match what + // is expected by the tensor's shape. The contents must be the + // flattened, one-dimensional, row-major order of the tensor elements. + repeated int64 int64_contents = 3; + + // Representation for UINT8, UINT16, and UINT32 data types. The size + // must match what is expected by the tensor's shape. The contents + // must be the flattened, one-dimensional, row-major order of the + // tensor elements. + repeated uint32 uint_contents = 4; + + // Representation for UINT64 data types. The size must match what + // is expected by the tensor's shape. The contents must be the + // flattened, one-dimensional, row-major order of the tensor elements. + repeated uint64 uint64_contents = 5; + + // Representation for FP32 data type. The size must match what is + // expected by the tensor's shape. The contents must be the flattened, + // one-dimensional, row-major order of the tensor elements. + repeated float fp32_contents = 6; + + // Representation for FP64 data type. The size must match what is + // expected by the tensor's shape. The contents must be the flattened, + // one-dimensional, row-major order of the tensor elements. + repeated double fp64_contents = 7; + + // Representation for BYTES data type. The size must match what is + // expected by the tensor's shape. The contents must be the flattened, + // one-dimensional, row-major order of the tensor elements. + repeated bytes bytes_contents = 8; +} + +message RepositoryModelLoadRequest +{ + // The name of the model to load, or reload. + string model_name = 1; +} + +message RepositoryModelLoadResponse +{ + // The name of the model trying to load or reload. + string model_name = 1; + + // boolean parameter to indicate whether model is loaded or not + bool isLoaded = 2; +} + +message RepositoryModelUnloadRequest +{ + // The name of the model to unload. + string model_name = 1; +} + +message RepositoryModelUnloadResponse +{ + // The name of the model trying to load or reload. + string model_name = 1; + + // boolean parameter to indicate whether model is unloaded or not + bool isUnloaded = 2; +} diff --git a/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.py b/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.py new file mode 100644 index 000000000..a0b035d7e --- /dev/null +++ b/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.py @@ -0,0 +1,452 @@ +# Copyright 2022 The KServe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: grpc_predict_v2.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\x15grpc_predict_v2.proto\x12\tinference"\x13\n\x11ServerLiveRequest""\n\x12ServerLiveResponse\x12\x0c\n\x04live\x18\x01 \x01(\x08"\x14\n\x12ServerReadyRequest"$\n\x13ServerReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08"2\n\x11ModelReadyRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t"#\n\x12ModelReadyResponse\x12\r\n\x05ready\x18\x01 \x01(\x08"\x17\n\x15ServerMetadataRequest"K\n\x16ServerMetadataResponse\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\x12\x12\n\nextensions\x18\x03 \x03(\t"5\n\x14ModelMetadataRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t"\x8d\x02\n\x15ModelMetadataResponse\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08versions\x18\x02 \x03(\t\x12\x10\n\x08platform\x18\x03 \x01(\t\x12?\n\x06inputs\x18\x04 \x03(\x0b\x32/.inference.ModelMetadataResponse.TensorMetadata\x12@\n\x07outputs\x18\x05 \x03(\x0b\x32/.inference.ModelMetadataResponse.TensorMetadata\x1a?\n\x0eTensorMetadata\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tatype\x18\x02 \x01(\t\x12\r\n\x05shape\x18\x03 \x03(\x03"\xee\x06\n\x11ModelInferRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12\n\n\x02id\x18\x03 \x01(\t\x12@\n\nparameters\x18\x04 \x03(\x0b\x32,.inference.ModelInferRequest.ParametersEntry\x12=\n\x06inputs\x18\x05 \x03(\x0b\x32-.inference.ModelInferRequest.InferInputTensor\x12H\n\x07outputs\x18\x06 \x03(\x0b\x32\x37.inference.ModelInferRequest.InferRequestedOutputTensor\x12\x1a\n\x12raw_input_contents\x18\x07 \x03(\x0c\x1a\x94\x02\n\x10InferInputTensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tatype\x18\x02 \x01(\t\x12\r\n\x05shape\x18\x03 \x03(\x03\x12Q\n\nparameters\x18\x04 \x03(\x0b\x32=.inference.ModelInferRequest.InferInputTensor.ParametersEntry\x12\x30\n\x08\x63ontents\x18\x05 \x01(\x0b\x32\x1e.inference.InferTensorContents\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01\x1a\xd5\x01\n\x1aInferRequestedOutputTensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12[\n\nparameters\x18\x02 \x03(\x0b\x32G.inference.ModelInferRequest.InferRequestedOutputTensor.ParametersEntry\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01"\xd5\x04\n\x12ModelInferResponse\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x15\n\rmodel_version\x18\x02 \x01(\t\x12\n\n\x02id\x18\x03 \x01(\t\x12\x41\n\nparameters\x18\x04 \x03(\x0b\x32-.inference.ModelInferResponse.ParametersEntry\x12@\n\x07outputs\x18\x05 \x03(\x0b\x32/.inference.ModelInferResponse.InferOutputTensor\x12\x1b\n\x13raw_output_contents\x18\x06 \x03(\x0c\x1a\x97\x02\n\x11InferOutputTensor\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x10\n\x08\x64\x61tatype\x18\x02 \x01(\t\x12\r\n\x05shape\x18\x03 \x03(\x03\x12S\n\nparameters\x18\x04 \x03(\x0b\x32?.inference.ModelInferResponse.InferOutputTensor.ParametersEntry\x12\x30\n\x08\x63ontents\x18\x05 \x01(\x0b\x32\x1e.inference.InferTensorContents\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01\x1aL\n\x0fParametersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12(\n\x05value\x18\x02 \x01(\x0b\x32\x19.inference.InferParameter:\x02\x38\x01"i\n\x0eInferParameter\x12\x14\n\nbool_param\x18\x01 \x01(\x08H\x00\x12\x15\n\x0bint64_param\x18\x02 \x01(\x03H\x00\x12\x16\n\x0cstring_param\x18\x03 \x01(\tH\x00\x42\x12\n\x10parameter_choice"\xd0\x01\n\x13InferTensorContents\x12\x15\n\rbool_contents\x18\x01 \x03(\x08\x12\x14\n\x0cint_contents\x18\x02 \x03(\x05\x12\x16\n\x0eint64_contents\x18\x03 \x03(\x03\x12\x15\n\ruint_contents\x18\x04 \x03(\r\x12\x17\n\x0fuint64_contents\x18\x05 \x03(\x04\x12\x15\n\rfp32_contents\x18\x06 \x03(\x02\x12\x15\n\rfp64_contents\x18\x07 \x03(\x01\x12\x16\n\x0e\x62ytes_contents\x18\x08 \x03(\x0c"0\n\x1aRepositoryModelLoadRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t"C\n\x1bRepositoryModelLoadResponse\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x10\n\x08isLoaded\x18\x02 \x01(\x08"2\n\x1cRepositoryModelUnloadRequest\x12\x12\n\nmodel_name\x18\x01 \x01(\t"G\n\x1dRepositoryModelUnloadResponse\x12\x12\n\nmodel_name\x18\x01 \x01(\t\x12\x12\n\nisUnloaded\x18\x02 \x01(\x08\x32\xd2\x05\n\x14GRPCInferenceService\x12K\n\nServerLive\x12\x1c.inference.ServerLiveRequest\x1a\x1d.inference.ServerLiveResponse"\x00\x12N\n\x0bServerReady\x12\x1d.inference.ServerReadyRequest\x1a\x1e.inference.ServerReadyResponse"\x00\x12K\n\nModelReady\x12\x1c.inference.ModelReadyRequest\x1a\x1d.inference.ModelReadyResponse"\x00\x12W\n\x0eServerMetadata\x12 .inference.ServerMetadataRequest\x1a!.inference.ServerMetadataResponse"\x00\x12T\n\rModelMetadata\x12\x1f.inference.ModelMetadataRequest\x1a .inference.ModelMetadataResponse"\x00\x12K\n\nModelInfer\x12\x1c.inference.ModelInferRequest\x1a\x1d.inference.ModelInferResponse"\x00\x12\x66\n\x13RepositoryModelLoad\x12%.inference.RepositoryModelLoadRequest\x1a&.inference.RepositoryModelLoadResponse"\x00\x12l\n\x15RepositoryModelUnload\x12\'.inference.RepositoryModelUnloadRequest\x1a(.inference.RepositoryModelUnloadResponse"\x00\x62\x06proto3' +) + + +_SERVERLIVEREQUEST = DESCRIPTOR.message_types_by_name["ServerLiveRequest"] +_SERVERLIVERESPONSE = DESCRIPTOR.message_types_by_name["ServerLiveResponse"] +_SERVERREADYREQUEST = DESCRIPTOR.message_types_by_name["ServerReadyRequest"] +_SERVERREADYRESPONSE = DESCRIPTOR.message_types_by_name["ServerReadyResponse"] +_MODELREADYREQUEST = DESCRIPTOR.message_types_by_name["ModelReadyRequest"] +_MODELREADYRESPONSE = DESCRIPTOR.message_types_by_name["ModelReadyResponse"] +_SERVERMETADATAREQUEST = DESCRIPTOR.message_types_by_name["ServerMetadataRequest"] +_SERVERMETADATARESPONSE = DESCRIPTOR.message_types_by_name["ServerMetadataResponse"] +_MODELMETADATAREQUEST = DESCRIPTOR.message_types_by_name["ModelMetadataRequest"] +_MODELMETADATARESPONSE = DESCRIPTOR.message_types_by_name["ModelMetadataResponse"] +_MODELMETADATARESPONSE_TENSORMETADATA = _MODELMETADATARESPONSE.nested_types_by_name[ + "TensorMetadata" +] +_MODELINFERREQUEST = DESCRIPTOR.message_types_by_name["ModelInferRequest"] +_MODELINFERREQUEST_INFERINPUTTENSOR = _MODELINFERREQUEST.nested_types_by_name[ + "InferInputTensor" +] +_MODELINFERREQUEST_INFERINPUTTENSOR_PARAMETERSENTRY = ( + _MODELINFERREQUEST_INFERINPUTTENSOR.nested_types_by_name["ParametersEntry"] +) +_MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR = _MODELINFERREQUEST.nested_types_by_name[ + "InferRequestedOutputTensor" +] +_MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR_PARAMETERSENTRY = ( + _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR.nested_types_by_name[ + "ParametersEntry" + ] +) +_MODELINFERREQUEST_PARAMETERSENTRY = _MODELINFERREQUEST.nested_types_by_name[ + "ParametersEntry" +] +_MODELINFERRESPONSE = DESCRIPTOR.message_types_by_name["ModelInferResponse"] +_MODELINFERRESPONSE_INFEROUTPUTTENSOR = _MODELINFERRESPONSE.nested_types_by_name[ + "InferOutputTensor" +] +_MODELINFERRESPONSE_INFEROUTPUTTENSOR_PARAMETERSENTRY = ( + _MODELINFERRESPONSE_INFEROUTPUTTENSOR.nested_types_by_name["ParametersEntry"] +) +_MODELINFERRESPONSE_PARAMETERSENTRY = _MODELINFERRESPONSE.nested_types_by_name[ + "ParametersEntry" +] +_INFERPARAMETER = DESCRIPTOR.message_types_by_name["InferParameter"] +_INFERTENSORCONTENTS = DESCRIPTOR.message_types_by_name["InferTensorContents"] +_REPOSITORYMODELLOADREQUEST = DESCRIPTOR.message_types_by_name[ + "RepositoryModelLoadRequest" +] +_REPOSITORYMODELLOADRESPONSE = DESCRIPTOR.message_types_by_name[ + "RepositoryModelLoadResponse" +] +_REPOSITORYMODELUNLOADREQUEST = DESCRIPTOR.message_types_by_name[ + "RepositoryModelUnloadRequest" +] +_REPOSITORYMODELUNLOADRESPONSE = DESCRIPTOR.message_types_by_name[ + "RepositoryModelUnloadResponse" +] +ServerLiveRequest = _reflection.GeneratedProtocolMessageType( + "ServerLiveRequest", + (_message.Message,), + { + "DESCRIPTOR": _SERVERLIVEREQUEST, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ServerLiveRequest) + }, +) +_sym_db.RegisterMessage(ServerLiveRequest) + +ServerLiveResponse = _reflection.GeneratedProtocolMessageType( + "ServerLiveResponse", + (_message.Message,), + { + "DESCRIPTOR": _SERVERLIVERESPONSE, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ServerLiveResponse) + }, +) +_sym_db.RegisterMessage(ServerLiveResponse) + +ServerReadyRequest = _reflection.GeneratedProtocolMessageType( + "ServerReadyRequest", + (_message.Message,), + { + "DESCRIPTOR": _SERVERREADYREQUEST, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ServerReadyRequest) + }, +) +_sym_db.RegisterMessage(ServerReadyRequest) + +ServerReadyResponse = _reflection.GeneratedProtocolMessageType( + "ServerReadyResponse", + (_message.Message,), + { + "DESCRIPTOR": _SERVERREADYRESPONSE, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ServerReadyResponse) + }, +) +_sym_db.RegisterMessage(ServerReadyResponse) + +ModelReadyRequest = _reflection.GeneratedProtocolMessageType( + "ModelReadyRequest", + (_message.Message,), + { + "DESCRIPTOR": _MODELREADYREQUEST, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ModelReadyRequest) + }, +) +_sym_db.RegisterMessage(ModelReadyRequest) + +ModelReadyResponse = _reflection.GeneratedProtocolMessageType( + "ModelReadyResponse", + (_message.Message,), + { + "DESCRIPTOR": _MODELREADYRESPONSE, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ModelReadyResponse) + }, +) +_sym_db.RegisterMessage(ModelReadyResponse) + +ServerMetadataRequest = _reflection.GeneratedProtocolMessageType( + "ServerMetadataRequest", + (_message.Message,), + { + "DESCRIPTOR": _SERVERMETADATAREQUEST, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ServerMetadataRequest) + }, +) +_sym_db.RegisterMessage(ServerMetadataRequest) + +ServerMetadataResponse = _reflection.GeneratedProtocolMessageType( + "ServerMetadataResponse", + (_message.Message,), + { + "DESCRIPTOR": _SERVERMETADATARESPONSE, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ServerMetadataResponse) + }, +) +_sym_db.RegisterMessage(ServerMetadataResponse) + +ModelMetadataRequest = _reflection.GeneratedProtocolMessageType( + "ModelMetadataRequest", + (_message.Message,), + { + "DESCRIPTOR": _MODELMETADATAREQUEST, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ModelMetadataRequest) + }, +) +_sym_db.RegisterMessage(ModelMetadataRequest) + +ModelMetadataResponse = _reflection.GeneratedProtocolMessageType( + "ModelMetadataResponse", + (_message.Message,), + { + "TensorMetadata": _reflection.GeneratedProtocolMessageType( + "TensorMetadata", + (_message.Message,), + { + "DESCRIPTOR": _MODELMETADATARESPONSE_TENSORMETADATA, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ModelMetadataResponse.TensorMetadata) + }, + ), + "DESCRIPTOR": _MODELMETADATARESPONSE, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ModelMetadataResponse) + }, +) +_sym_db.RegisterMessage(ModelMetadataResponse) +_sym_db.RegisterMessage(ModelMetadataResponse.TensorMetadata) + +ModelInferRequest = _reflection.GeneratedProtocolMessageType( + "ModelInferRequest", + (_message.Message,), + { + "InferInputTensor": _reflection.GeneratedProtocolMessageType( + "InferInputTensor", + (_message.Message,), + { + "ParametersEntry": _reflection.GeneratedProtocolMessageType( + "ParametersEntry", + (_message.Message,), + { + "DESCRIPTOR": _MODELINFERREQUEST_INFERINPUTTENSOR_PARAMETERSENTRY, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ModelInferRequest.InferInputTensor.ParametersEntry) + }, + ), + "DESCRIPTOR": _MODELINFERREQUEST_INFERINPUTTENSOR, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ModelInferRequest.InferInputTensor) + }, + ), + "InferRequestedOutputTensor": _reflection.GeneratedProtocolMessageType( + "InferRequestedOutputTensor", + (_message.Message,), + { + "ParametersEntry": _reflection.GeneratedProtocolMessageType( + "ParametersEntry", + (_message.Message,), + { + "DESCRIPTOR": _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR_PARAMETERSENTRY, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ModelInferRequest.InferRequestedOutputTensor.ParametersEntry) + }, + ), + "DESCRIPTOR": _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ModelInferRequest.InferRequestedOutputTensor) + }, + ), + "ParametersEntry": _reflection.GeneratedProtocolMessageType( + "ParametersEntry", + (_message.Message,), + { + "DESCRIPTOR": _MODELINFERREQUEST_PARAMETERSENTRY, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ModelInferRequest.ParametersEntry) + }, + ), + "DESCRIPTOR": _MODELINFERREQUEST, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ModelInferRequest) + }, +) +_sym_db.RegisterMessage(ModelInferRequest) +_sym_db.RegisterMessage(ModelInferRequest.InferInputTensor) +_sym_db.RegisterMessage(ModelInferRequest.InferInputTensor.ParametersEntry) +_sym_db.RegisterMessage(ModelInferRequest.InferRequestedOutputTensor) +_sym_db.RegisterMessage(ModelInferRequest.InferRequestedOutputTensor.ParametersEntry) +_sym_db.RegisterMessage(ModelInferRequest.ParametersEntry) + +ModelInferResponse = _reflection.GeneratedProtocolMessageType( + "ModelInferResponse", + (_message.Message,), + { + "InferOutputTensor": _reflection.GeneratedProtocolMessageType( + "InferOutputTensor", + (_message.Message,), + { + "ParametersEntry": _reflection.GeneratedProtocolMessageType( + "ParametersEntry", + (_message.Message,), + { + "DESCRIPTOR": _MODELINFERRESPONSE_INFEROUTPUTTENSOR_PARAMETERSENTRY, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ModelInferResponse.InferOutputTensor.ParametersEntry) + }, + ), + "DESCRIPTOR": _MODELINFERRESPONSE_INFEROUTPUTTENSOR, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ModelInferResponse.InferOutputTensor) + }, + ), + "ParametersEntry": _reflection.GeneratedProtocolMessageType( + "ParametersEntry", + (_message.Message,), + { + "DESCRIPTOR": _MODELINFERRESPONSE_PARAMETERSENTRY, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ModelInferResponse.ParametersEntry) + }, + ), + "DESCRIPTOR": _MODELINFERRESPONSE, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.ModelInferResponse) + }, +) +_sym_db.RegisterMessage(ModelInferResponse) +_sym_db.RegisterMessage(ModelInferResponse.InferOutputTensor) +_sym_db.RegisterMessage(ModelInferResponse.InferOutputTensor.ParametersEntry) +_sym_db.RegisterMessage(ModelInferResponse.ParametersEntry) + +InferParameter = _reflection.GeneratedProtocolMessageType( + "InferParameter", + (_message.Message,), + { + "DESCRIPTOR": _INFERPARAMETER, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.InferParameter) + }, +) +_sym_db.RegisterMessage(InferParameter) + +InferTensorContents = _reflection.GeneratedProtocolMessageType( + "InferTensorContents", + (_message.Message,), + { + "DESCRIPTOR": _INFERTENSORCONTENTS, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.InferTensorContents) + }, +) +_sym_db.RegisterMessage(InferTensorContents) + +RepositoryModelLoadRequest = _reflection.GeneratedProtocolMessageType( + "RepositoryModelLoadRequest", + (_message.Message,), + { + "DESCRIPTOR": _REPOSITORYMODELLOADREQUEST, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.RepositoryModelLoadRequest) + }, +) +_sym_db.RegisterMessage(RepositoryModelLoadRequest) + +RepositoryModelLoadResponse = _reflection.GeneratedProtocolMessageType( + "RepositoryModelLoadResponse", + (_message.Message,), + { + "DESCRIPTOR": _REPOSITORYMODELLOADRESPONSE, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.RepositoryModelLoadResponse) + }, +) +_sym_db.RegisterMessage(RepositoryModelLoadResponse) + +RepositoryModelUnloadRequest = _reflection.GeneratedProtocolMessageType( + "RepositoryModelUnloadRequest", + (_message.Message,), + { + "DESCRIPTOR": _REPOSITORYMODELUNLOADREQUEST, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.RepositoryModelUnloadRequest) + }, +) +_sym_db.RegisterMessage(RepositoryModelUnloadRequest) + +RepositoryModelUnloadResponse = _reflection.GeneratedProtocolMessageType( + "RepositoryModelUnloadResponse", + (_message.Message,), + { + "DESCRIPTOR": _REPOSITORYMODELUNLOADRESPONSE, + "__module__": "grpc_predict_v2_pb2", + # @@protoc_insertion_point(class_scope:inference.RepositoryModelUnloadResponse) + }, +) +_sym_db.RegisterMessage(RepositoryModelUnloadResponse) + +_GRPCINFERENCESERVICE = DESCRIPTOR.services_by_name["GRPCInferenceService"] +if _descriptor._USE_C_DESCRIPTORS == False: # noqa: E712 + + DESCRIPTOR._options = None + _MODELINFERREQUEST_INFERINPUTTENSOR_PARAMETERSENTRY._options = None + _MODELINFERREQUEST_INFERINPUTTENSOR_PARAMETERSENTRY._serialized_options = b"8\001" + _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR_PARAMETERSENTRY._options = None + _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR_PARAMETERSENTRY._serialized_options = ( + b"8\001" + ) + _MODELINFERREQUEST_PARAMETERSENTRY._options = None + _MODELINFERREQUEST_PARAMETERSENTRY._serialized_options = b"8\001" + _MODELINFERRESPONSE_INFEROUTPUTTENSOR_PARAMETERSENTRY._options = None + _MODELINFERRESPONSE_INFEROUTPUTTENSOR_PARAMETERSENTRY._serialized_options = b"8\001" + _MODELINFERRESPONSE_PARAMETERSENTRY._options = None + _MODELINFERRESPONSE_PARAMETERSENTRY._serialized_options = b"8\001" + _SERVERLIVEREQUEST._serialized_start = 36 + _SERVERLIVEREQUEST._serialized_end = 55 + _SERVERLIVERESPONSE._serialized_start = 57 + _SERVERLIVERESPONSE._serialized_end = 91 + _SERVERREADYREQUEST._serialized_start = 93 + _SERVERREADYREQUEST._serialized_end = 113 + _SERVERREADYRESPONSE._serialized_start = 115 + _SERVERREADYRESPONSE._serialized_end = 151 + _MODELREADYREQUEST._serialized_start = 153 + _MODELREADYREQUEST._serialized_end = 203 + _MODELREADYRESPONSE._serialized_start = 205 + _MODELREADYRESPONSE._serialized_end = 240 + _SERVERMETADATAREQUEST._serialized_start = 242 + _SERVERMETADATAREQUEST._serialized_end = 265 + _SERVERMETADATARESPONSE._serialized_start = 267 + _SERVERMETADATARESPONSE._serialized_end = 342 + _MODELMETADATAREQUEST._serialized_start = 344 + _MODELMETADATAREQUEST._serialized_end = 397 + _MODELMETADATARESPONSE._serialized_start = 400 + _MODELMETADATARESPONSE._serialized_end = 669 + _MODELMETADATARESPONSE_TENSORMETADATA._serialized_start = 606 + _MODELMETADATARESPONSE_TENSORMETADATA._serialized_end = 669 + _MODELINFERREQUEST._serialized_start = 672 + _MODELINFERREQUEST._serialized_end = 1550 + _MODELINFERREQUEST_INFERINPUTTENSOR._serialized_start = 980 + _MODELINFERREQUEST_INFERINPUTTENSOR._serialized_end = 1256 + _MODELINFERREQUEST_INFERINPUTTENSOR_PARAMETERSENTRY._serialized_start = 1180 + _MODELINFERREQUEST_INFERINPUTTENSOR_PARAMETERSENTRY._serialized_end = 1256 + _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR._serialized_start = 1259 + _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR._serialized_end = 1472 + _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR_PARAMETERSENTRY._serialized_start = ( + 1180 + ) + _MODELINFERREQUEST_INFERREQUESTEDOUTPUTTENSOR_PARAMETERSENTRY._serialized_end = 1256 + _MODELINFERREQUEST_PARAMETERSENTRY._serialized_start = 1180 + _MODELINFERREQUEST_PARAMETERSENTRY._serialized_end = 1256 + _MODELINFERRESPONSE._serialized_start = 1553 + _MODELINFERRESPONSE._serialized_end = 2150 + _MODELINFERRESPONSE_INFEROUTPUTTENSOR._serialized_start = 1793 + _MODELINFERRESPONSE_INFEROUTPUTTENSOR._serialized_end = 2072 + _MODELINFERRESPONSE_INFEROUTPUTTENSOR_PARAMETERSENTRY._serialized_start = 1180 + _MODELINFERRESPONSE_INFEROUTPUTTENSOR_PARAMETERSENTRY._serialized_end = 1256 + _MODELINFERRESPONSE_PARAMETERSENTRY._serialized_start = 1180 + _MODELINFERRESPONSE_PARAMETERSENTRY._serialized_end = 1256 + _INFERPARAMETER._serialized_start = 2152 + _INFERPARAMETER._serialized_end = 2257 + _INFERTENSORCONTENTS._serialized_start = 2260 + _INFERTENSORCONTENTS._serialized_end = 2468 + _REPOSITORYMODELLOADREQUEST._serialized_start = 2470 + _REPOSITORYMODELLOADREQUEST._serialized_end = 2518 + _REPOSITORYMODELLOADRESPONSE._serialized_start = 2520 + _REPOSITORYMODELLOADRESPONSE._serialized_end = 2587 + _REPOSITORYMODELUNLOADREQUEST._serialized_start = 2589 + _REPOSITORYMODELUNLOADREQUEST._serialized_end = 2639 + _REPOSITORYMODELUNLOADRESPONSE._serialized_start = 2641 + _REPOSITORYMODELUNLOADRESPONSE._serialized_end = 2712 + _GRPCINFERENCESERVICE._serialized_start = 2715 + _GRPCINFERENCESERVICE._serialized_end = 3437 +# @@protoc_insertion_point(module_scope) diff --git a/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.pyi b/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.pyi new file mode 100644 index 000000000..5407c606b --- /dev/null +++ b/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2.pyi @@ -0,0 +1,399 @@ +from google.protobuf.internal import containers as _containers +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from typing import ( + ClassVar as _ClassVar, + Iterable as _Iterable, + Mapping as _Mapping, + Optional as _Optional, + Union as _Union, +) + +DESCRIPTOR: _descriptor.FileDescriptor + +class InferParameter(_message.Message): + __slots__ = ["bool_param", "int64_param", "string_param"] + BOOL_PARAM_FIELD_NUMBER: _ClassVar[int] + INT64_PARAM_FIELD_NUMBER: _ClassVar[int] + STRING_PARAM_FIELD_NUMBER: _ClassVar[int] + bool_param: bool + int64_param: int + string_param: str + def __init__( + self, + bool_param: bool = ..., + int64_param: _Optional[int] = ..., + string_param: _Optional[str] = ..., + ) -> None: ... + +class InferTensorContents(_message.Message): + __slots__ = [ + "bool_contents", + "bytes_contents", + "fp32_contents", + "fp64_contents", + "int64_contents", + "int_contents", + "uint64_contents", + "uint_contents", + ] + BOOL_CONTENTS_FIELD_NUMBER: _ClassVar[int] + BYTES_CONTENTS_FIELD_NUMBER: _ClassVar[int] + FP32_CONTENTS_FIELD_NUMBER: _ClassVar[int] + FP64_CONTENTS_FIELD_NUMBER: _ClassVar[int] + INT64_CONTENTS_FIELD_NUMBER: _ClassVar[int] + INT_CONTENTS_FIELD_NUMBER: _ClassVar[int] + UINT64_CONTENTS_FIELD_NUMBER: _ClassVar[int] + UINT_CONTENTS_FIELD_NUMBER: _ClassVar[int] + bool_contents: _containers.RepeatedScalarFieldContainer[bool] + bytes_contents: _containers.RepeatedScalarFieldContainer[bytes] + fp32_contents: _containers.RepeatedScalarFieldContainer[float] + fp64_contents: _containers.RepeatedScalarFieldContainer[float] + int64_contents: _containers.RepeatedScalarFieldContainer[int] + int_contents: _containers.RepeatedScalarFieldContainer[int] + uint64_contents: _containers.RepeatedScalarFieldContainer[int] + uint_contents: _containers.RepeatedScalarFieldContainer[int] + def __init__( + self, + bool_contents: _Optional[_Iterable[bool]] = ..., + int_contents: _Optional[_Iterable[int]] = ..., + int64_contents: _Optional[_Iterable[int]] = ..., + uint_contents: _Optional[_Iterable[int]] = ..., + uint64_contents: _Optional[_Iterable[int]] = ..., + fp32_contents: _Optional[_Iterable[float]] = ..., + fp64_contents: _Optional[_Iterable[float]] = ..., + bytes_contents: _Optional[_Iterable[bytes]] = ..., + ) -> None: ... + +class ModelInferRequest(_message.Message): + __slots__ = [ + "id", + "inputs", + "model_name", + "model_version", + "outputs", + "parameters", + "raw_input_contents", + ] + + class InferInputTensor(_message.Message): + __slots__ = ["contents", "datatype", "name", "parameters", "shape"] + + class ParametersEntry(_message.Message): + __slots__ = ["key", "value"] + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: InferParameter + def __init__( + self, + key: _Optional[str] = ..., + value: _Optional[_Union[InferParameter, _Mapping]] = ..., + ) -> None: ... + CONTENTS_FIELD_NUMBER: _ClassVar[int] + DATATYPE_FIELD_NUMBER: _ClassVar[int] + NAME_FIELD_NUMBER: _ClassVar[int] + PARAMETERS_FIELD_NUMBER: _ClassVar[int] + SHAPE_FIELD_NUMBER: _ClassVar[int] + contents: InferTensorContents + datatype: str + name: str + parameters: _containers.MessageMap[str, InferParameter] + shape: _containers.RepeatedScalarFieldContainer[int] + def __init__( + self, + name: _Optional[str] = ..., + datatype: _Optional[str] = ..., + shape: _Optional[_Iterable[int]] = ..., + parameters: _Optional[_Mapping[str, InferParameter]] = ..., + contents: _Optional[_Union[InferTensorContents, _Mapping]] = ..., + ) -> None: ... + + class InferRequestedOutputTensor(_message.Message): + __slots__ = ["name", "parameters"] + + class ParametersEntry(_message.Message): + __slots__ = ["key", "value"] + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: InferParameter + def __init__( + self, + key: _Optional[str] = ..., + value: _Optional[_Union[InferParameter, _Mapping]] = ..., + ) -> None: ... + NAME_FIELD_NUMBER: _ClassVar[int] + PARAMETERS_FIELD_NUMBER: _ClassVar[int] + name: str + parameters: _containers.MessageMap[str, InferParameter] + def __init__( + self, + name: _Optional[str] = ..., + parameters: _Optional[_Mapping[str, InferParameter]] = ..., + ) -> None: ... + + class ParametersEntry(_message.Message): + __slots__ = ["key", "value"] + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: InferParameter + def __init__( + self, + key: _Optional[str] = ..., + value: _Optional[_Union[InferParameter, _Mapping]] = ..., + ) -> None: ... + ID_FIELD_NUMBER: _ClassVar[int] + INPUTS_FIELD_NUMBER: _ClassVar[int] + MODEL_NAME_FIELD_NUMBER: _ClassVar[int] + MODEL_VERSION_FIELD_NUMBER: _ClassVar[int] + OUTPUTS_FIELD_NUMBER: _ClassVar[int] + PARAMETERS_FIELD_NUMBER: _ClassVar[int] + RAW_INPUT_CONTENTS_FIELD_NUMBER: _ClassVar[int] + id: str + inputs: _containers.RepeatedCompositeFieldContainer[ + ModelInferRequest.InferInputTensor + ] + model_name: str + model_version: str + outputs: _containers.RepeatedCompositeFieldContainer[ + ModelInferRequest.InferRequestedOutputTensor + ] + parameters: _containers.MessageMap[str, InferParameter] + raw_input_contents: _containers.RepeatedScalarFieldContainer[bytes] + def __init__( + self, + model_name: _Optional[str] = ..., + model_version: _Optional[str] = ..., + id: _Optional[str] = ..., + parameters: _Optional[_Mapping[str, InferParameter]] = ..., + inputs: _Optional[ + _Iterable[_Union[ModelInferRequest.InferInputTensor, _Mapping]] + ] = ..., + outputs: _Optional[ + _Iterable[_Union[ModelInferRequest.InferRequestedOutputTensor, _Mapping]] + ] = ..., + raw_input_contents: _Optional[_Iterable[bytes]] = ..., + ) -> None: ... + +class ModelInferResponse(_message.Message): + __slots__ = [ + "id", + "model_name", + "model_version", + "outputs", + "parameters", + "raw_output_contents", + ] + + class InferOutputTensor(_message.Message): + __slots__ = ["contents", "datatype", "name", "parameters", "shape"] + + class ParametersEntry(_message.Message): + __slots__ = ["key", "value"] + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: InferParameter + def __init__( + self, + key: _Optional[str] = ..., + value: _Optional[_Union[InferParameter, _Mapping]] = ..., + ) -> None: ... + CONTENTS_FIELD_NUMBER: _ClassVar[int] + DATATYPE_FIELD_NUMBER: _ClassVar[int] + NAME_FIELD_NUMBER: _ClassVar[int] + PARAMETERS_FIELD_NUMBER: _ClassVar[int] + SHAPE_FIELD_NUMBER: _ClassVar[int] + contents: InferTensorContents + datatype: str + name: str + parameters: _containers.MessageMap[str, InferParameter] + shape: _containers.RepeatedScalarFieldContainer[int] + def __init__( + self, + name: _Optional[str] = ..., + datatype: _Optional[str] = ..., + shape: _Optional[_Iterable[int]] = ..., + parameters: _Optional[_Mapping[str, InferParameter]] = ..., + contents: _Optional[_Union[InferTensorContents, _Mapping]] = ..., + ) -> None: ... + + class ParametersEntry(_message.Message): + __slots__ = ["key", "value"] + KEY_FIELD_NUMBER: _ClassVar[int] + VALUE_FIELD_NUMBER: _ClassVar[int] + key: str + value: InferParameter + def __init__( + self, + key: _Optional[str] = ..., + value: _Optional[_Union[InferParameter, _Mapping]] = ..., + ) -> None: ... + ID_FIELD_NUMBER: _ClassVar[int] + MODEL_NAME_FIELD_NUMBER: _ClassVar[int] + MODEL_VERSION_FIELD_NUMBER: _ClassVar[int] + OUTPUTS_FIELD_NUMBER: _ClassVar[int] + PARAMETERS_FIELD_NUMBER: _ClassVar[int] + RAW_OUTPUT_CONTENTS_FIELD_NUMBER: _ClassVar[int] + id: str + model_name: str + model_version: str + outputs: _containers.RepeatedCompositeFieldContainer[ + ModelInferResponse.InferOutputTensor + ] + parameters: _containers.MessageMap[str, InferParameter] + raw_output_contents: _containers.RepeatedScalarFieldContainer[bytes] + def __init__( + self, + model_name: _Optional[str] = ..., + model_version: _Optional[str] = ..., + id: _Optional[str] = ..., + parameters: _Optional[_Mapping[str, InferParameter]] = ..., + outputs: _Optional[ + _Iterable[_Union[ModelInferResponse.InferOutputTensor, _Mapping]] + ] = ..., + raw_output_contents: _Optional[_Iterable[bytes]] = ..., + ) -> None: ... + +class ModelMetadataRequest(_message.Message): + __slots__ = ["name", "version"] + NAME_FIELD_NUMBER: _ClassVar[int] + VERSION_FIELD_NUMBER: _ClassVar[int] + name: str + version: str + def __init__( + self, name: _Optional[str] = ..., version: _Optional[str] = ... + ) -> None: ... + +class ModelMetadataResponse(_message.Message): + __slots__ = ["inputs", "name", "outputs", "platform", "versions"] + + class TensorMetadata(_message.Message): + __slots__ = ["datatype", "name", "shape"] + DATATYPE_FIELD_NUMBER: _ClassVar[int] + NAME_FIELD_NUMBER: _ClassVar[int] + SHAPE_FIELD_NUMBER: _ClassVar[int] + datatype: str + name: str + shape: _containers.RepeatedScalarFieldContainer[int] + def __init__( + self, + name: _Optional[str] = ..., + datatype: _Optional[str] = ..., + shape: _Optional[_Iterable[int]] = ..., + ) -> None: ... + INPUTS_FIELD_NUMBER: _ClassVar[int] + NAME_FIELD_NUMBER: _ClassVar[int] + OUTPUTS_FIELD_NUMBER: _ClassVar[int] + PLATFORM_FIELD_NUMBER: _ClassVar[int] + VERSIONS_FIELD_NUMBER: _ClassVar[int] + inputs: _containers.RepeatedCompositeFieldContainer[ + ModelMetadataResponse.TensorMetadata + ] + name: str + outputs: _containers.RepeatedCompositeFieldContainer[ + ModelMetadataResponse.TensorMetadata + ] + platform: str + versions: _containers.RepeatedScalarFieldContainer[str] + def __init__( + self, + name: _Optional[str] = ..., + versions: _Optional[_Iterable[str]] = ..., + platform: _Optional[str] = ..., + inputs: _Optional[ + _Iterable[_Union[ModelMetadataResponse.TensorMetadata, _Mapping]] + ] = ..., + outputs: _Optional[ + _Iterable[_Union[ModelMetadataResponse.TensorMetadata, _Mapping]] + ] = ..., + ) -> None: ... + +class ModelReadyRequest(_message.Message): + __slots__ = ["name", "version"] + NAME_FIELD_NUMBER: _ClassVar[int] + VERSION_FIELD_NUMBER: _ClassVar[int] + name: str + version: str + def __init__( + self, name: _Optional[str] = ..., version: _Optional[str] = ... + ) -> None: ... + +class ModelReadyResponse(_message.Message): + __slots__ = ["ready"] + READY_FIELD_NUMBER: _ClassVar[int] + ready: bool + def __init__(self, ready: bool = ...) -> None: ... + +class RepositoryModelLoadRequest(_message.Message): + __slots__ = ["model_name"] + MODEL_NAME_FIELD_NUMBER: _ClassVar[int] + model_name: str + def __init__(self, model_name: _Optional[str] = ...) -> None: ... + +class RepositoryModelLoadResponse(_message.Message): + __slots__ = ["isLoaded", "model_name"] + ISLOADED_FIELD_NUMBER: _ClassVar[int] + MODEL_NAME_FIELD_NUMBER: _ClassVar[int] + isLoaded: bool + model_name: str + def __init__( + self, model_name: _Optional[str] = ..., isLoaded: bool = ... + ) -> None: ... + +class RepositoryModelUnloadRequest(_message.Message): + __slots__ = ["model_name"] + MODEL_NAME_FIELD_NUMBER: _ClassVar[int] + model_name: str + def __init__(self, model_name: _Optional[str] = ...) -> None: ... + +class RepositoryModelUnloadResponse(_message.Message): + __slots__ = ["isUnloaded", "model_name"] + ISUNLOADED_FIELD_NUMBER: _ClassVar[int] + MODEL_NAME_FIELD_NUMBER: _ClassVar[int] + isUnloaded: bool + model_name: str + def __init__( + self, model_name: _Optional[str] = ..., isUnloaded: bool = ... + ) -> None: ... + +class ServerLiveRequest(_message.Message): + __slots__ = [] + def __init__(self) -> None: ... + +class ServerLiveResponse(_message.Message): + __slots__ = ["live"] + LIVE_FIELD_NUMBER: _ClassVar[int] + live: bool + def __init__(self, live: bool = ...) -> None: ... + +class ServerMetadataRequest(_message.Message): + __slots__ = [] + def __init__(self) -> None: ... + +class ServerMetadataResponse(_message.Message): + __slots__ = ["extensions", "name", "version"] + EXTENSIONS_FIELD_NUMBER: _ClassVar[int] + NAME_FIELD_NUMBER: _ClassVar[int] + VERSION_FIELD_NUMBER: _ClassVar[int] + extensions: _containers.RepeatedScalarFieldContainer[str] + name: str + version: str + def __init__( + self, + name: _Optional[str] = ..., + version: _Optional[str] = ..., + extensions: _Optional[_Iterable[str]] = ..., + ) -> None: ... + +class ServerReadyRequest(_message.Message): + __slots__ = [] + def __init__(self) -> None: ... + +class ServerReadyResponse(_message.Message): + __slots__ = ["ready"] + READY_FIELD_NUMBER: _ClassVar[int] + ready: bool + def __init__(self, ready: bool = ...) -> None: ... diff --git a/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2_grpc.py b/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2_grpc.py new file mode 100644 index 000000000..fc525ea05 --- /dev/null +++ b/python/hsml/client/istio/grpc/proto/grpc_predict_v2_pb2_grpc.py @@ -0,0 +1,419 @@ +# Copyright 2022 The KServe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! +"""Client and server classes corresponding to protobuf-defined services.""" +import hsml.client.istio.grpc.inference_client as inference_client + +import hsml.client.istio.grpc.proto.grpc_predict_v2_pb2 as grpc__predict__v2__pb2 + + +class GRPCInferenceServiceStub(object): + """Inference Server GRPC endpoints.""" + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.ServerLive = channel.unary_unary( + "/inference.GRPCInferenceService/ServerLive", + request_serializer=grpc__predict__v2__pb2.ServerLiveRequest.SerializeToString, + response_deserializer=grpc__predict__v2__pb2.ServerLiveResponse.FromString, + ) + self.ServerReady = channel.unary_unary( + "/inference.GRPCInferenceService/ServerReady", + request_serializer=grpc__predict__v2__pb2.ServerReadyRequest.SerializeToString, + response_deserializer=grpc__predict__v2__pb2.ServerReadyResponse.FromString, + ) + self.ModelReady = channel.unary_unary( + "/inference.GRPCInferenceService/ModelReady", + request_serializer=grpc__predict__v2__pb2.ModelReadyRequest.SerializeToString, + response_deserializer=grpc__predict__v2__pb2.ModelReadyResponse.FromString, + ) + self.ServerMetadata = channel.unary_unary( + "/inference.GRPCInferenceService/ServerMetadata", + request_serializer=grpc__predict__v2__pb2.ServerMetadataRequest.SerializeToString, + response_deserializer=grpc__predict__v2__pb2.ServerMetadataResponse.FromString, + ) + self.ModelMetadata = channel.unary_unary( + "/inference.GRPCInferenceService/ModelMetadata", + request_serializer=grpc__predict__v2__pb2.ModelMetadataRequest.SerializeToString, + response_deserializer=grpc__predict__v2__pb2.ModelMetadataResponse.FromString, + ) + self.ModelInfer = channel.unary_unary( + "/inference.GRPCInferenceService/ModelInfer", + request_serializer=grpc__predict__v2__pb2.ModelInferRequest.SerializeToString, + response_deserializer=grpc__predict__v2__pb2.ModelInferResponse.FromString, + ) + self.RepositoryModelLoad = channel.unary_unary( + "/inference.GRPCInferenceService/RepositoryModelLoad", + request_serializer=grpc__predict__v2__pb2.RepositoryModelLoadRequest.SerializeToString, + response_deserializer=grpc__predict__v2__pb2.RepositoryModelLoadResponse.FromString, + ) + self.RepositoryModelUnload = channel.unary_unary( + "/inference.GRPCInferenceService/RepositoryModelUnload", + request_serializer=grpc__predict__v2__pb2.RepositoryModelUnloadRequest.SerializeToString, + response_deserializer=grpc__predict__v2__pb2.RepositoryModelUnloadResponse.FromString, + ) + + +class GRPCInferenceServiceServicer(object): + """Inference Server GRPC endpoints.""" + + def ServerLive(self, request, context): + """The ServerLive API indicates if the inference server is able to receive + and respond to metadata and inference requests. + """ + context.set_code(inference_client.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + + def ServerReady(self, request, context): + """The ServerReady API indicates if the server is ready for inferencing.""" + context.set_code(inference_client.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + + def ModelReady(self, request, context): + """The ModelReady API indicates if a specific model is ready for inferencing.""" + context.set_code(inference_client.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + + def ServerMetadata(self, request, context): + """The ServerMetadata API provides information about the server. Errors are + indicated by the google.rpc.Status returned for the request. The OK code + indicates success and other codes indicate failure. + """ + context.set_code(inference_client.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + + def ModelMetadata(self, request, context): + """The per-model metadata API provides information about a model. Errors are + indicated by the google.rpc.Status returned for the request. The OK code + indicates success and other codes indicate failure. + """ + context.set_code(inference_client.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + + def ModelInfer(self, request, context): + """The ModelInfer API performs inference using the specified model. Errors are + indicated by the google.rpc.Status returned for the request. The OK code + indicates success and other codes indicate failure. + """ + context.set_code(inference_client.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + + def RepositoryModelLoad(self, request, context): + """Load or reload a model from a repository.""" + context.set_code(inference_client.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + + def RepositoryModelUnload(self, request, context): + """Unload a model.""" + context.set_code(inference_client.StatusCode.UNIMPLEMENTED) + context.set_details("Method not implemented!") + raise NotImplementedError("Method not implemented!") + + +def add_GRPCInferenceServiceServicer_to_server(servicer, server): + rpc_method_handlers = { + "ServerLive": inference_client.unary_unary_rpc_method_handler( + servicer.ServerLive, + request_deserializer=grpc__predict__v2__pb2.ServerLiveRequest.FromString, + response_serializer=grpc__predict__v2__pb2.ServerLiveResponse.SerializeToString, + ), + "ServerReady": inference_client.unary_unary_rpc_method_handler( + servicer.ServerReady, + request_deserializer=grpc__predict__v2__pb2.ServerReadyRequest.FromString, + response_serializer=grpc__predict__v2__pb2.ServerReadyResponse.SerializeToString, + ), + "ModelReady": inference_client.unary_unary_rpc_method_handler( + servicer.ModelReady, + request_deserializer=grpc__predict__v2__pb2.ModelReadyRequest.FromString, + response_serializer=grpc__predict__v2__pb2.ModelReadyResponse.SerializeToString, + ), + "ServerMetadata": inference_client.unary_unary_rpc_method_handler( + servicer.ServerMetadata, + request_deserializer=grpc__predict__v2__pb2.ServerMetadataRequest.FromString, + response_serializer=grpc__predict__v2__pb2.ServerMetadataResponse.SerializeToString, + ), + "ModelMetadata": inference_client.unary_unary_rpc_method_handler( + servicer.ModelMetadata, + request_deserializer=grpc__predict__v2__pb2.ModelMetadataRequest.FromString, + response_serializer=grpc__predict__v2__pb2.ModelMetadataResponse.SerializeToString, + ), + "ModelInfer": inference_client.unary_unary_rpc_method_handler( + servicer.ModelInfer, + request_deserializer=grpc__predict__v2__pb2.ModelInferRequest.FromString, + response_serializer=grpc__predict__v2__pb2.ModelInferResponse.SerializeToString, + ), + "RepositoryModelLoad": inference_client.unary_unary_rpc_method_handler( + servicer.RepositoryModelLoad, + request_deserializer=grpc__predict__v2__pb2.RepositoryModelLoadRequest.FromString, + response_serializer=grpc__predict__v2__pb2.RepositoryModelLoadResponse.SerializeToString, + ), + "RepositoryModelUnload": inference_client.unary_unary_rpc_method_handler( + servicer.RepositoryModelUnload, + request_deserializer=grpc__predict__v2__pb2.RepositoryModelUnloadRequest.FromString, + response_serializer=grpc__predict__v2__pb2.RepositoryModelUnloadResponse.SerializeToString, + ), + } + generic_handler = inference_client.method_handlers_generic_handler( + "inference.GRPCInferenceService", rpc_method_handlers + ) + server.add_generic_rpc_handlers((generic_handler,)) + + +# This class is part of an EXPERIMENTAL API. +class GRPCInferenceService(object): + """Inference Server GRPC endpoints.""" + + @staticmethod + def ServerLive( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return inference_client.experimental.unary_unary( + request, + target, + "/inference.GRPCInferenceService/ServerLive", + grpc__predict__v2__pb2.ServerLiveRequest.SerializeToString, + grpc__predict__v2__pb2.ServerLiveResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) + + @staticmethod + def ServerReady( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return inference_client.experimental.unary_unary( + request, + target, + "/inference.GRPCInferenceService/ServerReady", + grpc__predict__v2__pb2.ServerReadyRequest.SerializeToString, + grpc__predict__v2__pb2.ServerReadyResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) + + @staticmethod + def ModelReady( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return inference_client.experimental.unary_unary( + request, + target, + "/inference.GRPCInferenceService/ModelReady", + grpc__predict__v2__pb2.ModelReadyRequest.SerializeToString, + grpc__predict__v2__pb2.ModelReadyResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) + + @staticmethod + def ServerMetadata( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return inference_client.experimental.unary_unary( + request, + target, + "/inference.GRPCInferenceService/ServerMetadata", + grpc__predict__v2__pb2.ServerMetadataRequest.SerializeToString, + grpc__predict__v2__pb2.ServerMetadataResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) + + @staticmethod + def ModelMetadata( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return inference_client.experimental.unary_unary( + request, + target, + "/inference.GRPCInferenceService/ModelMetadata", + grpc__predict__v2__pb2.ModelMetadataRequest.SerializeToString, + grpc__predict__v2__pb2.ModelMetadataResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) + + @staticmethod + def ModelInfer( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return inference_client.experimental.unary_unary( + request, + target, + "/inference.GRPCInferenceService/ModelInfer", + grpc__predict__v2__pb2.ModelInferRequest.SerializeToString, + grpc__predict__v2__pb2.ModelInferResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) + + @staticmethod + def RepositoryModelLoad( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return inference_client.experimental.unary_unary( + request, + target, + "/inference.GRPCInferenceService/RepositoryModelLoad", + grpc__predict__v2__pb2.RepositoryModelLoadRequest.SerializeToString, + grpc__predict__v2__pb2.RepositoryModelLoadResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) + + @staticmethod + def RepositoryModelUnload( + request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None, + ): + return inference_client.experimental.unary_unary( + request, + target, + "/inference.GRPCInferenceService/RepositoryModelUnload", + grpc__predict__v2__pb2.RepositoryModelUnloadRequest.SerializeToString, + grpc__predict__v2__pb2.RepositoryModelUnloadResponse.FromString, + options, + channel_credentials, + insecure, + call_credentials, + compression, + wait_for_ready, + timeout, + metadata, + ) diff --git a/python/hsml/client/istio/internal.py b/python/hsml/client/istio/internal.py index 1033f941e..feab49d71 100644 --- a/python/hsml/client/istio/internal.py +++ b/python/hsml/client/istio/internal.py @@ -21,7 +21,7 @@ from pathlib import Path -from hsml.client import auth +from hsml.client import auth, exceptions from hsml.client.istio import base as istio try: @@ -198,3 +198,9 @@ def _get_cert_pw(self): with pwd_path.open() as f: return f.read() + + def _get_serving_api_key(self): + """Retrieve serving API key from environment variable.""" + if self.SERVING_API_KEY not in os.environ: + raise exceptions.InternalClientError("Serving API key not found") + return os.environ[self.SERVING_API_KEY] diff --git a/python/hsml/client/istio/utils/__init__.py b/python/hsml/client/istio/utils/__init__.py new file mode 100644 index 000000000..ff8055b9b --- /dev/null +++ b/python/hsml/client/istio/utils/__init__.py @@ -0,0 +1,15 @@ +# +# Copyright 2024 Hopsworks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/python/hsml/client/istio/utils/infer_type.py b/python/hsml/client/istio/utils/infer_type.py new file mode 100644 index 000000000..7feabc20e --- /dev/null +++ b/python/hsml/client/istio/utils/infer_type.py @@ -0,0 +1,811 @@ +# Copyright 2023 The KServe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This implementation has been borrowed from kserve/kserve repository +# https://github.com/kserve/kserve/blob/release-0.11/python/kserve/kserve/protocol/infer_type.py + +from typing import Optional, List, Dict + +import struct +import numpy +import numpy as np +import pandas as pd +from hsml.client.istio.grpc.errors import InvalidInput +from hsml.client.istio.grpc.proto.grpc_predict_v2_pb2 import ( + ModelInferRequest, + InferTensorContents, + ModelInferResponse, +) +from hsml.client.istio.utils.numpy_codec import to_np_dtype, from_np_dtype + +GRPC_CONTENT_DATATYPE_MAPPINGS = { + "BOOL": "bool_contents", + "INT8": "int_contents", + "INT16": "int_contents", + "INT32": "int_contents", + "INT64": "int64_contents", + "UINT8": "uint_contents", + "UINT16": "uint_contents", + "UINT32": "uint_contents", + "UINT64": "uint64_contents", + "FP32": "fp32_contents", + "FP64": "fp64_contents", + "BYTES": "bytes_contents", +} + + +def raise_error(msg): + """ + Raise error with the provided message + """ + raise InferenceServerException(msg=msg) from None + + +def serialize_byte_tensor(input_tensor): + """ + Serializes a bytes tensor into a flat numpy array of length prepended + bytes. The numpy array should use dtype of np.object. For np.bytes, + numpy will remove trailing zeros at the end of byte sequence and because + of this it should be avoided. + + Parameters + ---------- + input_tensor : np.array + The bytes tensor to serialize. + + Returns + ------- + serialized_bytes_tensor : np.array + The 1-D numpy array of type uint8 containing the serialized bytes in row-major form. + + Raises + ------ + InferenceServerException + If unable to serialize the given tensor. + """ + + if input_tensor.size == 0: + return np.empty([0], dtype=np.object_) + + # If the input is a tensor of string/bytes objects, then must flatten those into + # a 1-dimensional array containing the 4-byte byte size followed by the + # actual element bytes. All elements are concatenated together in row-major + # order. + + if (input_tensor.dtype != np.object_) and (input_tensor.dtype.type != np.bytes_): + raise_error("cannot serialize bytes tensor: invalid datatype") + + flattened_ls = [] + # 'C' order is row-major. + for obj in np.nditer(input_tensor, flags=["refs_ok"], order="C"): + # If directly passing bytes to BYTES type, + # don't convert it to str as Python will encode the + # bytes which may distort the meaning + if input_tensor.dtype == np.object_: + if type(obj.item()) == bytes: + s = obj.item() + else: + s = str(obj.item()).encode("utf-8") + else: + s = obj.item() + flattened_ls.append(struct.pack(" np.ndarray: + dtype = to_np_dtype(self.datatype) + if dtype is None: + raise InvalidInput("invalid datatype in the input") + if self._raw_data is not None: + np_array = np.frombuffer(self._raw_data, dtype=dtype) + return np_array.reshape(self._shape) + else: + np_array = np.array(self._data, dtype=dtype) + return np_array.reshape(self._shape) + + def set_data_from_numpy(self, input_tensor, binary_data=True): + """Set the tensor data from the specified numpy array for + input associated with this object. + Parameters + ---------- + input_tensor : numpy array + The tensor data in numpy array format + binary_data : bool + Indicates whether to set data for the input in binary format + or explicit tensor within JSON. The default value is True, + which means the data will be delivered as binary data in the + HTTP body after the JSON object. + Raises + ------ + InferenceServerException + If failed to set data for the tensor. + """ + if not isinstance(input_tensor, (np.ndarray,)): + raise_error("input_tensor must be a numpy array") + + dtype = from_np_dtype(input_tensor.dtype) + if self._datatype != dtype: + raise_error( + "got unexpected datatype {} from numpy array, expected {}".format( + dtype, self._datatype + ) + ) + valid_shape = True + if len(self._shape) != len(input_tensor.shape): + valid_shape = False + else: + for i in range(len(self._shape)): + if self._shape[i] != input_tensor.shape[i]: + valid_shape = False + if not valid_shape: + raise_error( + "got unexpected numpy array shape [{}], expected [{}]".format( + str(input_tensor.shape)[1:-1], str(self._shape)[1:-1] + ) + ) + + if not binary_data: + self._parameters.pop("binary_data_size", None) + self._raw_data = None + if self._datatype == "BYTES": + self._data = [] + try: + if input_tensor.size > 0: + for obj in np.nditer( + input_tensor, flags=["refs_ok"], order="C" + ): + # We need to convert the object to string using utf-8, + # if we want to use the binary_data=False. JSON requires + # the input to be a UTF-8 string. + if input_tensor.dtype == np.object_: + if type(obj.item()) == bytes: + self._data.append(str(obj.item(), encoding="utf-8")) + else: + self._data.append(str(obj.item())) + else: + self._data.append(str(obj.item(), encoding="utf-8")) + except UnicodeDecodeError: + raise_error( + f'Failed to encode "{obj.item()}" using UTF-8. Please use binary_data=True, if' + " you want to pass a byte array." + ) + else: + self._data = [val.item() for val in input_tensor.flatten()] + else: + self._data = None + if self._datatype == "BYTES": + serialized_output = serialize_byte_tensor(input_tensor) + if serialized_output.size > 0: + self._raw_data = serialized_output.item() + else: + self._raw_data = b"" + else: + self._raw_data = input_tensor.tobytes() + self._parameters["binary_data_size"] = len(self._raw_data) + + +def get_content(datatype: str, data: InferTensorContents): + if datatype == "BOOL": + return list(data.bool_contents) + elif datatype in ["UINT8", "UINT16", "UINT32"]: + return list(data.uint_contents) + elif datatype == "UINT64": + return list(data.uint64_contents) + elif datatype in ["INT8", "INT16", "INT32"]: + return list(data.int_contents) + elif datatype == "INT64": + return list(data.int64_contents) + elif datatype == "FP32": + return list(data.fp32_contents) + elif datatype == "FP64": + return list(data.fp64_contents) + elif datatype == "BYTES": + return list(data.bytes_contents) + else: + raise InvalidInput("invalid content type") + + +class InferRequest: + """InferenceRequest Model + + $inference_request = + { + "id" : $string #optional, + "parameters" : $parameters #optional, + "inputs" : [ $request_input, ... ], + "outputs" : [ $request_output, ... ] #optional + } + """ + + id: Optional[str] + model_name: str + parameters: Optional[Dict] + inputs: List[InferInput] + from_grpc: bool + + def __init__( + self, + model_name: str, + infer_inputs: List[InferInput], + request_id=None, + raw_inputs=None, + from_grpc=False, + parameters=None, + ): + if parameters is None: + parameters = {} + self.id = request_id + self.model_name = model_name + self.inputs = infer_inputs + self.parameters = parameters + self.from_grpc = from_grpc + if raw_inputs: + for i, raw_input in enumerate(raw_inputs): + self.inputs[i]._raw_data = raw_input + + @classmethod + def from_grpc(cls, request: ModelInferRequest): + infer_inputs = [ + InferInput( + name=input_tensor.name, + shape=list(input_tensor.shape), + datatype=input_tensor.datatype, + data=get_content(input_tensor.datatype, input_tensor.contents), + parameters=input_tensor.parameters, + ) + for input_tensor in request.inputs + ] + return cls( + request_id=request.id, + model_name=request.model_name, + infer_inputs=infer_inputs, + raw_inputs=request.raw_input_contents, + from_grpc=True, + parameters=request.parameters, + ) + + def to_rest(self) -> Dict: + """Converts the InferRequest object to v2 REST InferenceRequest message""" + infer_inputs = [] + for infer_input in self.inputs: + infer_input_dict = { + "name": infer_input.name, + "shape": infer_input.shape, + "datatype": infer_input.datatype, + } + if isinstance(infer_input.data, numpy.ndarray): + infer_input.set_data_from_numpy(infer_input.data, binary_data=False) + infer_input_dict["data"] = infer_input.data + else: + infer_input_dict["data"] = infer_input.data + infer_inputs.append(infer_input_dict) + return {"id": self.id, "inputs": infer_inputs} + + def to_grpc(self) -> ModelInferRequest: + """Converts the InferRequest object to gRPC ModelInferRequest message""" + infer_inputs = [] + raw_input_contents = [] + for infer_input in self.inputs: + if isinstance(infer_input.data, numpy.ndarray): + infer_input.set_data_from_numpy(infer_input.data, binary_data=True) + infer_input_dict = { + "name": infer_input.name, + "shape": infer_input.shape, + "datatype": infer_input.datatype, + } + if infer_input._raw_data is not None: + raw_input_contents.append(infer_input._raw_data) + else: + if not isinstance(infer_input.data, List): + raise InvalidInput("input data is not a List") + infer_input_dict["contents"] = {} + data_key = GRPC_CONTENT_DATATYPE_MAPPINGS.get( + infer_input.datatype, None + ) + if data_key is not None: + infer_input._data = [ + bytes(val, "utf-8") if isinstance(val, str) else val + for val in infer_input.data + ] # str to byte conversion for grpc proto + infer_input_dict["contents"][data_key] = infer_input.data + else: + raise InvalidInput("invalid input datatype") + infer_inputs.append(infer_input_dict) + + return ModelInferRequest( + id=self.id, + model_name=self.model_name, + inputs=infer_inputs, + raw_input_contents=raw_input_contents, + ) + + def as_dataframe(self) -> pd.DataFrame: + """ + Decode the tensor inputs as pandas dataframe + """ + dfs = [] + for input in self.inputs: + input_data = input.data + if input.datatype == "BYTES": + input_data = [ + str(val, "utf-8") if isinstance(val, bytes) else val + for val in input.data + ] + dfs.append(pd.DataFrame(input_data, columns=[input.name])) + return pd.concat(dfs, axis=1) + + +class InferOutput: + def __init__(self, name, shape, datatype, data=None, parameters=None): + """An object of InferOutput class is used to describe + input tensor for an inference request. + Parameters + ---------- + name : str + The name of input whose data will be described by this object + shape : list + The shape of the associated input. + datatype : str + The datatype of the associated input. + data : Union[List, InferTensorContents] + The data of the REST/gRPC input. When data is not set, raw_data is used for gRPC for numpy array bytes. + parameters : dict + The additional server-specific parameters. + """ + if parameters is None: + parameters = {} + self._name = name + self._shape = shape + self._datatype = datatype + self._parameters = parameters + self._data = data + self._raw_data = None + + @property + def name(self): + """Get the name of input associated with this object. + Returns + ------- + str + The name of input + """ + return self._name + + @property + def datatype(self): + """Get the datatype of input associated with this object. + Returns + ------- + str + The datatype of input + """ + return self._datatype + + @property + def data(self): + """Get the data of InferOutput""" + return self._data + + @property + def shape(self): + """Get the shape of input associated with this object. + Returns + ------- + list + The shape of input + """ + return self._shape + + @property + def parameters(self): + """Get the parameters of input associated with this object. + Returns + ------- + dict + The key, value pair of string and InferParameter + """ + return self._parameters + + def set_shape(self, shape): + """Set the shape of input. + Parameters + ---------- + shape : list + The shape of the associated input. + """ + self._shape = shape + + def as_numpy(self) -> numpy.ndarray: + """ + Decode the tensor data as numpy array + """ + dtype = to_np_dtype(self.datatype) + if dtype is None: + raise InvalidInput("invalid datatype in the input") + if self._raw_data is not None: + np_array = np.frombuffer(self._raw_data, dtype=dtype) + return np_array.reshape(self._shape) + else: + np_array = np.array(self._data, dtype=dtype) + return np_array.reshape(self._shape) + + def set_data_from_numpy(self, input_tensor, binary_data=True): + """Set the tensor data from the specified numpy array for + input associated with this object. + Parameters + ---------- + input_tensor : numpy array + The tensor data in numpy array format + binary_data : bool + Indicates whether to set data for the input in binary format + or explicit tensor within JSON. The default value is True, + which means the data will be delivered as binary data in the + HTTP body after the JSON object. + Raises + ------ + InferenceServerException + If failed to set data for the tensor. + """ + if not isinstance(input_tensor, (np.ndarray,)): + raise_error("input_tensor must be a numpy array") + + dtype = from_np_dtype(input_tensor.dtype) + if self._datatype != dtype: + raise_error( + "got unexpected datatype {} from numpy array, expected {}".format( + dtype, self._datatype + ) + ) + valid_shape = True + if len(self._shape) != len(input_tensor.shape): + valid_shape = False + else: + for i in range(len(self._shape)): + if self._shape[i] != input_tensor.shape[i]: + valid_shape = False + if not valid_shape: + raise_error( + "got unexpected numpy array shape [{}], expected [{}]".format( + str(input_tensor.shape)[1:-1], str(self._shape)[1:-1] + ) + ) + + if not binary_data: + self._parameters.pop("binary_data_size", None) + self._raw_data = None + if self._datatype == "BYTES": + self._data = [] + try: + if input_tensor.size > 0: + for obj in np.nditer( + input_tensor, flags=["refs_ok"], order="C" + ): + # We need to convert the object to string using utf-8, + # if we want to use the binary_data=False. JSON requires + # the input to be a UTF-8 string. + if input_tensor.dtype == np.object_: + if type(obj.item()) == bytes: + self._data.append(str(obj.item(), encoding="utf-8")) + else: + self._data.append(str(obj.item())) + else: + self._data.append(str(obj.item(), encoding="utf-8")) + except UnicodeDecodeError: + raise_error( + f'Failed to encode "{obj.item()}" using UTF-8. Please use binary_data=True, if' + " you want to pass a byte array." + ) + else: + self._data = [val.item() for val in input_tensor.flatten()] + else: + self._data = None + if self._datatype == "BYTES": + serialized_output = serialize_byte_tensor(input_tensor) + if serialized_output.size > 0: + self._raw_data = serialized_output.item() + else: + self._raw_data = b"" + else: + self._raw_data = input_tensor.tobytes() + self._parameters["binary_data_size"] = len(self._raw_data) + + +class InferResponse: + """InferenceResponse + + $inference_response = + { + "model_name" : $string, + "model_version" : $string #optional, + "id" : $string, + "parameters" : $parameters #optional, + "outputs" : [ $response_output, ... ] + } + """ + + id: str + model_name: str + parameters: Optional[Dict] + outputs: List[InferOutput] + from_grpc: bool + + def __init__( + self, + response_id: str, + model_name: str, + infer_outputs: List[InferOutput], + raw_outputs=None, + from_grpc=False, + parameters=None, + ): + if parameters is None: + parameters = {} + self.id = response_id + self.model_name = model_name + self.outputs = infer_outputs + self.parameters = parameters + self.from_grpc = from_grpc + if raw_outputs: + for i, raw_output in enumerate(raw_outputs): + self.outputs[i]._raw_data = raw_output + + @classmethod + def from_grpc(cls, response: ModelInferResponse) -> "InferResponse": + infer_outputs = [ + InferOutput( + name=output.name, + shape=list(output.shape), + datatype=output.datatype, + data=get_content(output.datatype, output.contents), + parameters=output.parameters, + ) + for output in response.outputs + ] + return cls( + model_name=response.model_name, + response_id=response.id, + parameters=response.parameters, + infer_outputs=infer_outputs, + raw_outputs=response.raw_output_contents, + from_grpc=True, + ) + + @classmethod + def from_rest(cls, model_name: str, response: Dict) -> "InferResponse": + infer_outputs = [ + InferOutput( + name=output["name"], + shape=list(output["shape"]), + datatype=output["datatype"], + data=output["data"], + parameters=output.get("parameters", {}), + ) + for output in response["outputs"] + ] + return cls( + model_name=model_name, + response_id=response.get("id", None), + parameters=response.get("parameters", {}), + infer_outputs=infer_outputs, + ) + + def to_rest(self) -> Dict: + """Converts the InferResponse object to v2 REST InferenceRequest message""" + infer_outputs = [] + for i, infer_output in enumerate(self.outputs): + infer_output_dict = { + "name": infer_output.name, + "shape": infer_output.shape, + "datatype": infer_output.datatype, + } + if isinstance(infer_output.data, numpy.ndarray): + infer_output.set_data_from_numpy(infer_output.data, binary_data=False) + infer_output_dict["data"] = infer_output.data + elif isinstance(infer_output._raw_data, bytes): + infer_output_dict["data"] = infer_output.as_numpy().tolist() + else: + infer_output_dict["data"] = infer_output.data + infer_outputs.append(infer_output_dict) + res = {"id": self.id, "model_name": self.model_name, "outputs": infer_outputs} + return res + + def to_grpc(self) -> ModelInferResponse: + """Converts the InferResponse object to gRPC ModelInferRequest message""" + infer_outputs = [] + raw_output_contents = [] + for infer_output in self.outputs: + if isinstance(infer_output.data, numpy.ndarray): + infer_output.set_data_from_numpy(infer_output.data, binary_data=True) + infer_output_dict = { + "name": infer_output.name, + "shape": infer_output.shape, + "datatype": infer_output.datatype, + } + if infer_output._raw_data is not None: + raw_output_contents.append(infer_output._raw_data) + else: + if not isinstance(infer_output.data, List): + raise InvalidInput("output data is not a List") + infer_output_dict["contents"] = {} + data_key = GRPC_CONTENT_DATATYPE_MAPPINGS.get( + infer_output.datatype, None + ) + if data_key is not None: + infer_output._data = [ + bytes(val, "utf-8") if isinstance(val, str) else val + for val in infer_output.data + ] # str to byte conversion for grpc proto + infer_output_dict["contents"][data_key] = infer_output.data + else: + raise InvalidInput("to_grpc: invalid output datatype") + infer_outputs.append(infer_output_dict) + + return ModelInferResponse( + id=self.id, + model_name=self.model_name, + outputs=infer_outputs, + raw_output_contents=raw_output_contents, + ) diff --git a/python/hsml/client/istio/utils/numpy_codec.py b/python/hsml/client/istio/utils/numpy_codec.py new file mode 100644 index 000000000..3c6ecb606 --- /dev/null +++ b/python/hsml/client/istio/utils/numpy_codec.py @@ -0,0 +1,67 @@ +# Copyright 2021 The KServe Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This implementation has been borrowed from kserve/kserve repository +# https://github.com/kserve/kserve/blob/release-0.11/python/kserve/kserve/utils/numpy_codec.py + +import numpy as np + + +def to_np_dtype(dtype): + dtype_map = { + "BOOL": bool, + "INT8": np.int8, + "INT16": np.int16, + "INT32": np.int32, + "INT64": np.int64, + "UINT8": np.uint8, + "UINT16": np.uint16, + "UINT32": np.uint32, + "UINT64": np.uint64, + "FP16": np.float16, + "FP32": np.float32, + "FP64": np.float64, + "BYTES": np.object_, + } + return dtype_map.get(dtype, None) + + +def from_np_dtype(np_dtype): + if np_dtype == bool: + return "BOOL" + elif np_dtype == np.int8: + return "INT8" + elif np_dtype == np.int16: + return "INT16" + elif np_dtype == np.int32: + return "INT32" + elif np_dtype == np.int64: + return "INT64" + elif np_dtype == np.uint8: + return "UINT8" + elif np_dtype == np.uint16: + return "UINT16" + elif np_dtype == np.uint32: + return "UINT32" + elif np_dtype == np.uint64: + return "UINT64" + elif np_dtype == np.float16: + return "FP16" + elif np_dtype == np.float32: + return "FP32" + elif np_dtype == np.float64: + return "FP64" + elif np_dtype == np.object_ or np_dtype.type == np.bytes_: + return "BYTES" + return None diff --git a/python/hsml/constants.py b/python/hsml/constants.py index 682c7884b..42dd7a412 100644 --- a/python/hsml/constants.py +++ b/python/hsml/constants.py @@ -108,6 +108,9 @@ class INFERENCE_ENDPOINTS: PORT_NAME_HTTPS = "HTTPS" PORT_NAME_STATUS_PORT = "STATUS" PORT_NAME_TLS = "TLS" + # protocol + API_PROTOCOL_REST = "REST" + API_PROTOCOL_GRPC = "GRPC" class DEPLOYABLE_COMPONENT: diff --git a/python/hsml/core/serving_api.py b/python/hsml/core/serving_api.py index 9d2bb8cd0..49b1cabb4 100644 --- a/python/hsml/core/serving_api.py +++ b/python/hsml/core/serving_api.py @@ -15,11 +15,17 @@ # import json +from typing import Union, Dict, List from hsml import client, deployment, predictor_state from hsml import inference_endpoint from hsml import deployable_component_logs -from hsml.constants import ARTIFACT_VERSION +from hsml.constants import ARTIFACT_VERSION, INFERENCE_ENDPOINTS as IE +from hsml.client.istio.utils.infer_type import ( + InferRequest, + InferInput, + InferOutput, +) class ServingApi: @@ -189,21 +195,37 @@ def reset_changes(self, deployment_instance): def send_inference_request( self, deployment_instance, - data: dict, + data: Union[Dict, List[InferInput]], through_hopsworks: bool = False, - ): + ) -> Union[Dict, List[InferOutput]]: """Send inference requests to a deployment with a certain id :param deployment_instance: metadata object of the deployment to be used for the prediction :type deployment_instance: Deployment - :param data: payload of the inference requests - :type data: dict - :param through_hopsworks: whether to send the inference request through the Hopsworks REST API + :param data: payload of the inference request + :type data: Union[Dict, List[InferInput]] + :param through_hopsworks: whether to send the inference request through the Hopsworks REST API or not :type through_hopsworks: bool :return: inference response - :rtype: dict + :rtype: Union[Dict, List[InferOutput]] """ + if deployment_instance.api_protocol == IE.API_PROTOCOL_REST: + # REST protocol, use hopsworks or istio client + return self._send_inference_request_via_rest_protocol( + deployment_instance, data, through_hopsworks + ) + else: + # gRPC protocol, use the deployment grpc channel + return self._send_inference_request_via_grpc_protocol( + deployment_instance, data + ) + def _send_inference_request_via_rest_protocol( + self, + deployment_instance, + data: Dict, + through_hopsworks: bool = False, + ) -> Dict: headers = {"content-type": "application/json"} if through_hopsworks: # use Hopsworks client @@ -228,10 +250,47 @@ def send_inference_request( path_params = self._get_hopsworks_inference_path( _client._project_id, deployment_instance ) + + # send inference request return _client._send_request( "POST", path_params, headers=headers, data=json.dumps(data) ) + def _send_inference_request_via_grpc_protocol( + self, deployment_instance, data: List[InferInput] + ) -> List[InferOutput]: + # get grpc channel + if deployment_instance._grpc_channel is None: + # The gRPC channel is lazily initialized. The first call to deployment.predict() will initialize + # the channel, which will be reused in all following calls on the same deployment object. + # The gRPC channel is freed when calling deployment.stop() + print("Initializing gRPC channel...") + deployment_instance._grpc_channel = self._create_grpc_channel( + deployment_instance.name + ) + # build an infer request + request = InferRequest( + infer_inputs=data, + model_name=deployment_instance.name, + ) + + # send infer request + infer_response = deployment_instance._grpc_channel.infer( + infer_request=request, headers=None + ) + + # extract infer outputs + return infer_response.outputs + + def _create_grpc_channel(self, deployment_name: str): + _client = client.get_istio_instance() + service_hostname = self._get_inference_request_host_header( + _client._project_name, + deployment_name, + client.get_knative_domain(), + ) + return _client._create_grpc_channel(service_hostname) + def is_kserve_installed(self): """Check if kserve is installed diff --git a/python/hsml/deployment.py b/python/hsml/deployment.py index eb68300ea..94d9f87b4 100644 --- a/python/hsml/deployment.py +++ b/python/hsml/deployment.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Union, Optional +from typing import Union, Dict, Optional, List from hsml import client, util from hsml import predictor as predictor_mod @@ -28,6 +28,7 @@ from hsml.client.exceptions import ModelServingException from hsml.constants import DEPLOYABLE_COMPONENT, PREDICTOR_STATE +from hsml.client.istio.utils.infer_type import InferInput class Deployment: @@ -60,6 +61,7 @@ def __init__( self._serving_api = serving_api.ServingApi() self._serving_engine = serving_engine.ServingEngine() + self._grpc_channel = None def save(self, await_update: Optional[int] = 60): """Persist this deployment including the predictor and metadata to Model Serving. @@ -163,7 +165,11 @@ def is_stopped(self, or_created=True) -> bool: ) ) - def predict(self, data: dict = None, inputs: list = None): + def predict( + self, + data: Union[Dict, InferInput] = None, + inputs: Union[List, Dict] = None, + ): """Send inference requests to the deployment. One of data or inputs parameters must be set. If both are set, inputs will be ignored. @@ -432,6 +438,14 @@ def creator(self): """Creator of the predictor.""" return self._predictor.creator + @property + def api_protocol(self): + return self._predictor.api_protocol + + @api_protocol.setter + def api_protocol(self, api_protocol: str): + self._predictor.api_protocol = api_protocol + def __repr__(self): desc = ( f", description: {self._description!r}" diff --git a/python/hsml/engine/serving_engine.py b/python/hsml/engine/serving_engine.py index f0d508bf5..8341bcec8 100644 --- a/python/hsml/engine/serving_engine.py +++ b/python/hsml/engine/serving_engine.py @@ -14,6 +14,8 @@ # limitations under the License. # +from typing import Union, Dict, List + import os import time import uuid @@ -22,10 +24,17 @@ from hsml import util -from hsml.constants import DEPLOYMENT, PREDICTOR, PREDICTOR_STATE +from hsml.constants import ( + DEPLOYMENT, + PREDICTOR, + PREDICTOR_STATE, + INFERENCE_ENDPOINTS as IE, +) + from hsml.core import serving_api, dataset_api from hsml.client.exceptions import ModelServingException, RestAPIError +from hsml.client.istio.utils.infer_type import InferInput class ServingEngine: @@ -174,58 +183,8 @@ def update_progress(state, num_instances): update_progress, ) - def predict(self, deployment_instance, data, inputs): - payload = self._build_inference_payload(data, inputs) - - serving_tool = deployment_instance.predictor.serving_tool - through_hopsworks = ( - serving_tool != PREDICTOR.SERVING_TOOL_KSERVE - ) # if not KServe, send request to Hopsworks - try: - return self._serving_api.send_inference_request( - deployment_instance, payload, through_hopsworks - ) - except RestAPIError as re: - if ( - re.response.status_code == RestAPIError.STATUS_CODE_NOT_FOUND - or re.error_code - == ModelServingException.ERROR_CODE_DEPLOYMENT_NOT_RUNNING - ): - raise ModelServingException( - "Deployment not created or running. If it is already created, start it by using `.start()` or check its status with .get_state()" - ) - - re.args = ( - re.args[0] + "\n\n Check the model server logs by using `.get_logs()`", - ) - raise re - - def _build_inference_payload(self, data, inputs): - """Build or check the payload for an inference request. If the 'data' parameter is provided, this method ensures - it contains one of 'instances' or 'inputs' keys needed by the model server. Otherwise, if the 'inputs' parameter - is provided, this method builds the correct request payload using the 'instances' key. - While the 'inputs' key is only supported by default deployments, the 'instances' key is supported in all types of deployments. - """ - if data is not None: # check data - if not isinstance(data, dict): - raise ModelServingException( - "Inference data must be a dictionary. Otherwise, use the inputs parameter." - ) - if "instances" not in data and "inputs" not in data: - raise ModelServingException("Inference data is missing 'instances' key") - else: # parse inputs - if not isinstance(inputs, list): - data = {"instances": [inputs]} # wrap inputs in a list - else: - data = {"instances": inputs} # use given inputs list by default - # check depth of the list: at least two levels are required for batch inference - # if the content is neither a list or dict, wrap it in an additional list - for i in inputs: - if not isinstance(i, list) and not isinstance(i, dict): - # if there are no two levels, wrap inputs in a list - data = {"instances": [inputs]} - break - return data + # free grpc channel + deployment_instance._grpc_channel = None def _check_status(self, deployment_instance, desired_status): state = deployment_instance.get_state() @@ -490,7 +449,11 @@ def delete(self, deployment_instance, force=False): if state is None: return - if not force and state.status != PREDICTOR_STATE.STATUS_STOPPED: + if ( + not force + and state.status != PREDICTOR_STATE.STATUS_STOPPED + and state.status != PREDICTOR_STATE.STATUS_CREATED + ): raise ModelServingException( "Deployment not stopped, please stop it first by using `.stop()` or check its status with .get_state()" ) @@ -535,3 +498,208 @@ def get_logs(self, deployment_instance, component, tail): ) return self._serving_api.get_logs(deployment_instance, component, tail) + + # Model inference + + def predict( + self, + deployment_instance, + data: Union[Dict, List[InferInput]], + inputs: Union[Dict, List[Dict]], + ): + # validate user-provided payload + self._validate_inference_payload(deployment_instance.api_protocol, data, inputs) + + # build inference payload based on API protocol + payload = self._build_inference_payload( + deployment_instance.api_protocol, data, inputs + ) + + # if not KServe, send request through Hopsworks + serving_tool = deployment_instance.predictor.serving_tool + through_hopsworks = serving_tool != PREDICTOR.SERVING_TOOL_KSERVE + try: + return self._serving_api.send_inference_request( + deployment_instance, payload, through_hopsworks + ) + except RestAPIError as re: + if ( + re.response.status_code == RestAPIError.STATUS_CODE_NOT_FOUND + or re.error_code + == ModelServingException.ERROR_CODE_DEPLOYMENT_NOT_RUNNING + ): + raise ModelServingException( + "Deployment not created or running. If it is already created, start it by using `.start()` or check its status with .get_state()" + ) + + re.args = ( + re.args[0] + "\n\n Check the model server logs by using `.get_logs()`", + ) + raise re + + def _validate_inference_payload( + self, + api_protocol, + data: Union[Dict, List[InferInput]], + inputs: Union[Dict, List[Dict]], + ): + """Validates the user-provided inference payload. Either data or inputs parameter is expected, but both cannot be provided together.""" + # check null inputs + if data is not None and inputs is not None: + raise ModelServingException( + "Inference data and inputs parameters cannot be provided together." + ) + # check data or inputs + if data is not None: + self._validate_inference_data(api_protocol, data) + else: + self._validate_inference_inputs(api_protocol, inputs) + + def _validate_inference_data( + self, api_protocol, data: Union[Dict, List[InferInput]] + ): + """Validates the inference payload when provided through the `data` parameter. The data parameter contains the raw payload to be sent + in the inference request and should have the corresponding type and format depending on the API protocol. + For the REST protocol, data should be a dictionary. For GRPC protocol, one or more InferInput objects is expected. + """ + if api_protocol == IE.API_PROTOCOL_REST: # REST protocol + if isinstance(data, Dict): + if "instances" not in data and "inputs" not in data: + raise ModelServingException( + "Inference data is missing 'instances' key." + ) + + payload = data["instances"] if "instances" in data else data["inputs"] + if not isinstance(payload, List): + raise ModelServingException( + "Instances field should contain a 2-dim list." + ) + elif len(payload) == 0: + raise ModelServingException( + "Inference data cannot contain an empty list." + ) + elif not isinstance(payload[0], List): + raise ModelServingException( + "Instances field should contain a 2-dim list." + ) + elif len(payload[0]) == 0: + raise ModelServingException( + "Inference data cannot contain an empty list." + ) + else: # not Dict + if isinstance(data, InferInput) or ( + isinstance(data, List) and isinstance(data[0], InferInput) + ): + raise ModelServingException( + "Inference data cannot contain `InferInput` for deployments with gRPC protocol disabled. Use a dictionary instead." + ) + raise ModelServingException( + "Inference data must be a dictionary. Otherwise, use the `inputs` parameter." + ) + + else: # gRPC protocol + if isinstance(data, Dict): + raise ModelServingException( + "Inference data cannot be a dictionary for deployments with gRPC protocol enabled. " + "Create a `InferInput` object or use the `inputs` parameter instead." + ) + elif isinstance(data, List): + if len(data) == 0: + raise ModelServingException( + "Inference data cannot contain an empty list." + ) + if not isinstance(data[0], InferInput): + raise ModelServingException( + "Inference data must contain a list of `InferInput` objects. Otherwise, use the `inputs` parameter." + ) + else: + raise ModelServingException( + "Inference data must contain a list of `InferInput` objects for deployments with gRPC protocol enabled." + ) + + def _validate_inference_inputs( + self, api_protocol, inputs: Union[Dict, List[Dict]], recursive_call=False + ): + """Validates the inference payload when provided through the `inputs` parameter. The inputs parameter contains only the payload values, + which will be parsed when building the request payload. It can be either a dictionary or a list. + """ + if isinstance(inputs, List): + if len(inputs) == 0: + raise ModelServingException("Inference inputs cannot be an empty list.") + else: + self._validate_inference_inputs( + api_protocol, inputs[0], recursive_call=True + ) + elif isinstance(inputs, InferInput): + raise ModelServingException( + "Inference inputs cannot be of type `InferInput`. Use the `data` parameter instead." + ) + elif isinstance(inputs, Dict): + required_keys = ("name", "shape", "datatype", "data") + if api_protocol == IE.API_PROTOCOL_GRPC and not all( + k in inputs for k in required_keys + ): + raise ModelServingException( + f"Inference inputs is missing one or more keys. Required keys are [{', '.join(required_keys)}]." + ) + elif not recursive_call or (api_protocol == IE.API_PROTOCOL_GRPC): + # if it is the first call to this method, inputs have an invalid type/format + # if GRPC protocol is used, only Dict type is valid for the input values + raise ModelServingException( + "Inference inputs type is not valid. Supported types are dictionary and list." + ) + + def _build_inference_payload( + self, + api_protocol, + data: Union[Dict, List[InferInput]], + inputs: Union[Dict, List[Dict]], + ): + """Build the inference payload for an inference request. If the 'data' parameter is provided, this method ensures + it has the correct format depending on the API protocol. Otherwise, if the 'inputs' parameter is provided, this method + builds the correct request payload depending on the API protocol. + """ + if data is not None: + # data contains the raw payload (dict or InferInput), nothing needs to be changed + return data + else: # parse inputs + return self._parse_inference_inputs(api_protocol, inputs) + + def _parse_inference_inputs( + self, api_protocol, inputs: Union[Dict, List[Dict]], recursive_call=False + ): + if api_protocol == IE.API_PROTOCOL_REST: # REST protocol + if not isinstance(inputs, List): + data = {"instances": [[inputs]]} # wrap inputs in a 2-dim list + else: + data = {"instances": inputs} # use given inputs list by default + # check depth of the list: at least two levels are required for batch inference + # if the content is neither a list or dict, wrap it in an additional list + for i in inputs: + if not isinstance(i, List) and not isinstance(i, Dict): + # if there are no two levels, wrap inputs in a list + data = {"instances": [inputs]} + break + else: # gRPC protocol + if isinstance(inputs, Dict): # Dict + data = InferInput( + name=inputs["name"], + shape=inputs["shape"], + datatype=inputs["datatype"], + data=inputs["data"], + parameters=( + inputs["parameters"] if "parameters" in inputs else None + ), + ) + if not recursive_call: + # if inputs is of type Dict, return a singleton + data = [data] + + else: # List[Dict] + data = inputs + for index, inputs_item in enumerate(inputs): + data[index] = self._parse_inference_inputs( + api_protocol, inputs_item, recursive_call=True + ) + + return data diff --git a/python/hsml/model.py b/python/hsml/model.py index 32cda793b..a1974b471 100644 --- a/python/hsml/model.py +++ b/python/hsml/model.py @@ -20,7 +20,7 @@ from hsml import client, util -from hsml.constants import ARTIFACT_VERSION +from hsml.constants import ARTIFACT_VERSION, INFERENCE_ENDPOINTS as IE from hsml.engine import model_engine from hsml.predictor import Predictor from hsml.resources import PredictorResources @@ -136,6 +136,7 @@ def deploy( inference_logger: Optional[Union[InferenceLogger, dict]] = None, inference_batcher: Optional[Union[InferenceBatcher, dict]] = None, transformer: Optional[Union[Transformer, dict]] = None, + api_protocol: Optional[str] = IE.API_PROTOCOL_REST, ): """Deploy the model. @@ -165,6 +166,7 @@ def deploy( inference_logger: Inference logger configuration. inference_batcher: Inference batcher configuration. transformer: Transformer to be deployed together with the predictor. + api_protocol: API protocol to be enabled in the deployment (i.e., 'REST' or 'GRPC'). Defaults to 'REST'. # Returns `Deployment`: The deployment metadata object of a new or existing deployment. @@ -184,6 +186,7 @@ def deploy( inference_logger=inference_logger, inference_batcher=inference_batcher, transformer=transformer, + api_protocol=api_protocol, ) return predictor.deploy() diff --git a/python/hsml/model_serving.py b/python/hsml/model_serving.py index 5851fa4f6..c5a3b2495 100644 --- a/python/hsml/model_serving.py +++ b/python/hsml/model_serving.py @@ -18,7 +18,7 @@ from hsml import util -from hsml.constants import ARTIFACT_VERSION, PREDICTOR_STATE +from hsml.constants import ARTIFACT_VERSION, PREDICTOR_STATE, INFERENCE_ENDPOINTS as IE from hsml.core import serving_api from hsml.model import Model from hsml.predictor import Predictor @@ -156,6 +156,7 @@ def create_predictor( inference_logger: Optional[Union[InferenceLogger, dict, str]] = None, inference_batcher: Optional[Union[InferenceBatcher, dict]] = None, transformer: Optional[Union[Transformer, dict]] = None, + api_protocol: Optional[str] = IE.API_PROTOCOL_REST, ): """Create a Predictor metadata object. @@ -192,6 +193,7 @@ def create_predictor( inference_logger: Inference logger configuration. inference_batcher: Inference batcher configuration. transformer: Transformer to be deployed together with the predictor. + api_protocol: API protocol to be enabled in the deployment (i.e., 'REST' or 'GRPC'). Defaults to 'REST'. # Returns `Predictor`. The predictor metadata object. @@ -210,6 +212,7 @@ def create_predictor( inference_logger=inference_logger, inference_batcher=inference_batcher, transformer=transformer, + api_protocol=api_protocol, ) def create_transformer( diff --git a/python/hsml/predictor.py b/python/hsml/predictor.py index 0feabec61..a06962d72 100644 --- a/python/hsml/predictor.py +++ b/python/hsml/predictor.py @@ -21,7 +21,7 @@ from hsml import deployment from hsml import client -from hsml.constants import ARTIFACT_VERSION, PREDICTOR, MODEL +from hsml.constants import ARTIFACT_VERSION, PREDICTOR, MODEL, INFERENCE_ENDPOINTS from hsml.transformer import Transformer from hsml.predictor_state import PredictorState from hsml.deployable_component import DeployableComponent @@ -52,6 +52,7 @@ def __init__( description: Optional[str] = None, created_at: Optional[str] = None, creator: Optional[str] = None, + api_protocol: Optional[str] = INFERENCE_ENDPOINTS.API_PROTOCOL_REST, **kwargs, ): serving_tool = ( @@ -86,6 +87,7 @@ def __init__( ) self._transformer = util.get_obj_from_json(transformer, Transformer) self._validate_script_file(self._model_framework, self._script_file) + self._api_protocol = api_protocol def deploy(self): """Create a deployment for this predictor and persists it in the Model Serving. @@ -252,6 +254,7 @@ def extract_fields_from_json(cls, json_decamelized): kwargs["id"] = json_decamelized.pop("id") kwargs["created_at"] = json_decamelized.pop("created") kwargs["creator"] = json_decamelized.pop("creator") + kwargs["api_protocol"] = json_decamelized.pop("api_protocol") return kwargs def update_from_response_json(self, json_dict): @@ -278,6 +281,7 @@ def to_dict(self): "modelServer": self._model_server, "servingTool": self._serving_tool, "predictor": self._script_file, + "apiProtocol": self._api_protocol, } if self._resources is not None: json = {**json, **self._resources.to_dict()} @@ -431,6 +435,15 @@ def requested_instances(self): num_instances += self._transformer.resources.num_instances return num_instances + @property + def api_protocol(self): + """API protocol enabled in the predictor (e.g., HTTP or GRPC).""" + return self._api_protocol + + @api_protocol.setter + def api_protocol(self, api_protocol): + self._api_protocol = api_protocol + def __repr__(self): desc = ( f", description: {self._description!r}" diff --git a/python/hsml/util.py b/python/hsml/util.py index aa5309b94..83631a135 100644 --- a/python/hsml/util.py +++ b/python/hsml/util.py @@ -261,7 +261,7 @@ def get_hostname_replaced_url(sub_path: str): :return: href url """ href = urljoin(client.get_instance()._base_url, sub_path) - url_parsed = client.get_instance().replace_public_host(urlparse(href)) + url_parsed = client.get_instance()._replace_public_host(urlparse(href)) return url_parsed.geturl() diff --git a/python/setup.py b/python/setup.py index 3c1e5858b..6adba2f1d 100644 --- a/python/setup.py +++ b/python/setup.py @@ -43,6 +43,8 @@ def read(fname): "pyjks", "mock", "tqdm", + "grpcio>=1.49.1,<2.0.0", # ^1.49.1 + "protobuf>=3.19.0,<4.0.0", # ^3.19.0 ], extras_require={ "dev": ["pytest", "flake8", "black"],