From 2a7eb7adb60574207c15fa6dc07e493e91a1a859 Mon Sep 17 00:00:00 2001 From: Theodoros Katzalis Date: Mon, 19 Aug 2024 10:36:47 +0200 Subject: [PATCH 1/5] Remove unused proto buff of InputShape and OutputShape Since we removed ModelInfo interface, the proto buff for InputShape and OutputShape, and their conversions are redundant --- proto/inference.proto | 39 +--- tests/test_converters.py | 106 +--------- tiktorch/converters.py | 57 +----- tiktorch/proto/inference_pb2.py | 341 +++----------------------------- 4 files changed, 28 insertions(+), 515 deletions(-) diff --git a/proto/inference.proto b/proto/inference.proto index 39845dc5..3187bb39 100644 --- a/proto/inference.proto +++ b/proto/inference.proto @@ -2,6 +2,7 @@ syntax = "proto3"; service Inference { rpc CreateModelSession(CreateModelSessionRequest) returns (ModelSession) {} + rpc CloseModelSession(ModelSession) returns (Empty) {} rpc CreateDatasetDescription(CreateDatasetDescriptionRequest) returns (DatasetDescription) {} @@ -56,34 +57,6 @@ message NamedFloats { } -/* InputShape will always be expected to have `shape` set. - * For `ShapeType` PARAMETRIZED, also a `stepShape` has to be given. - * ref: https://github.com/bioimage-io/spec-bioimage-io/blob/gh-pages/model_spec_latest.md */ -message InputShape { - enum ShapeType { - EXPLICIT = 0; - PARAMETRIZED = 1; - } - - ShapeType shapeType = 1; - // shape is min, when PARAMETRIZED - NamedInts shape = 2; - NamedInts stepShape = 4; -} - -message OutputShape { - enum ShapeType { - EXPLICIT = 0; - IMPLICIT = 1; - } - ShapeType shapeType = 1; - NamedInts shape = 2; - NamedInts halo = 3; - string referenceTensor = 4; - NamedFloats scale = 5; - NamedFloats offset = 6; -} - message ModelSession { string id = 1; } @@ -142,13 +115,3 @@ service FlightControl { rpc Shutdown(Empty) returns (Empty) {} } -message ModelInfo { - repeated string deviceIds = 1; -} - -message CreateModelSessionChunkedRequest { - oneof data { - ModelInfo info = 1; - Blob chunk = 2; - } -} diff --git a/tests/test_converters.py b/tests/test_converters.py index be268e42..e112ede9 100644 --- a/tests/test_converters.py +++ b/tests/test_converters.py @@ -3,18 +3,7 @@ import xarray as xr from numpy.testing import assert_array_equal -from tiktorch.converters import ( - NamedExplicitOutputShape, - NamedImplicitOutputShape, - NamedParametrizedShape, - Sample, - input_shape_to_pb_input_shape, - numpy_to_pb_tensor, - output_shape_to_pb_output_shape, - pb_tensor_to_numpy, - pb_tensor_to_xarray, - xarray_to_pb_tensor, -) +from tiktorch.converters import Sample, numpy_to_pb_tensor, pb_tensor_to_numpy, pb_tensor_to_xarray, xarray_to_pb_tensor from tiktorch.proto import inference_pb2 @@ -177,99 +166,6 @@ def test_should_same_data(self, shape): assert_array_equal(arr, result_arr) -class TestShapeConversions: - def to_named_explicit_shape(self, shape, axes, halo): - return NamedExplicitOutputShape( - halo=[(name, dim) for name, dim in zip(axes, halo)], shape=[(name, dim) for name, dim in zip(axes, shape)] - ) - - def to_named_implicit_shape(self, axes, halo, offset, scales, reference_tensor): - return NamedImplicitOutputShape( - halo=[(name, dim) for name, dim in zip(axes, halo)], - offset=[(name, dim) for name, dim in zip(axes, offset)], - scale=[(name, scale) for name, scale in zip(axes, scales)], - reference_tensor=reference_tensor, - ) - - def to_named_paramtrized_shape(self, min_shape, axes, step): - return NamedParametrizedShape( - min_shape=[(name, dim) for name, dim in zip(axes, min_shape)], - step_shape=[(name, dim) for name, dim in zip(axes, step)], - ) - - @pytest.mark.parametrize( - "shape,axes,halo", - [((42,), "x", (0,)), ((42, 128, 5), "abc", (1, 1, 1)), ((5, 4, 3, 2, 1, 42), "btzyxc", (1, 2, 3, 4, 5, 24))], - ) - def test_explicit_output_shape(self, shape, axes, halo): - named_shape = self.to_named_explicit_shape(shape, axes, halo) - pb_shape = output_shape_to_pb_output_shape(named_shape) - - assert pb_shape.shapeType == 0 - assert pb_shape.referenceTensor == "" - assert len(pb_shape.scale.namedFloats) == 0 - assert len(pb_shape.offset.namedFloats) == 0 - - assert [(d.name, d.size) for d in pb_shape.halo.namedInts] == [(name, size) for name, size in zip(axes, halo)] - assert [(d.name, d.size) for d in pb_shape.shape.namedInts] == [(name, size) for name, size in zip(axes, shape)] - - @pytest.mark.parametrize( - "axes,halo,offset,scales,reference_tensor", - [("x", (0,), (10,), (1.0,), "forty-two"), ("abc", (1, 1, 1), (1, 2, 3), (1.0, 2.0, 3.0), "helloworld")], - ) - def test_implicit_output_shape(self, axes, halo, offset, scales, reference_tensor): - named_shape = self.to_named_implicit_shape(axes, halo, offset, scales, reference_tensor) - pb_shape = output_shape_to_pb_output_shape(named_shape) - - assert pb_shape.shapeType == 1 - assert pb_shape.referenceTensor == reference_tensor - assert [(d.name, d.size) for d in pb_shape.scale.namedFloats] == [ - (name, size) for name, size in zip(axes, scales) - ] - assert [(d.name, d.size) for d in pb_shape.offset.namedFloats] == [ - (name, size) for name, size in zip(axes, offset) - ] - - assert [(d.name, d.size) for d in pb_shape.halo.namedInts] == [(name, size) for name, size in zip(axes, halo)] - assert len(pb_shape.shape.namedInts) == 0 - - def test_output_shape_raises(self): - shape = [("a", 1)] - with pytest.raises(TypeError): - _ = output_shape_to_pb_output_shape(shape) - - @pytest.mark.parametrize( - "shape,axes", - [((42,), "x"), ((42, 128, 5), "abc"), ((5, 4, 3, 2, 1, 42), "btzyxc")], - ) - def test_explicit_input_shape(self, shape, axes): - named_shape = [(name, dim) for name, dim in zip(axes, shape)] - pb_shape = input_shape_to_pb_input_shape(named_shape) - - assert pb_shape.shapeType == 0 - assert [(d.name, d.size) for d in pb_shape.shape.namedInts] == [(name, size) for name, size in zip(axes, shape)] - - @pytest.mark.parametrize( - "min_shape,axes,step", - [ - ((42,), "x", (5,)), - ((42, 128, 5), "abc", (1, 2, 3)), - ((5, 4, 3, 2, 1, 42), "btzyxc", (15, 24, 33, 42, 51, 642)), - ], - ) - def test_parametrized_input_shape(self, min_shape, axes, step): - named_shape = self.to_named_paramtrized_shape(min_shape, axes, step) - pb_shape = input_shape_to_pb_input_shape(named_shape) - - assert pb_shape.shapeType == 1 - assert [(d.name, d.size) for d in pb_shape.shape.namedInts] == [ - (name, size) for name, size in zip(axes, min_shape) - ] - assert [(d.name, d.size) for d in pb_shape.stepShape.namedInts] == [ - (name, size) for name, size in zip(axes, step) - ] - - class TestSample: def test_create_sample_from_pb_tensors(self): arr_1 = np.arange(32 * 32, dtype=np.int64).reshape(32, 32) diff --git a/tiktorch/converters.py b/tiktorch/converters.py index b7fee3fe..d54a4b5f 100644 --- a/tiktorch/converters.py +++ b/tiktorch/converters.py @@ -1,7 +1,7 @@ from __future__ import annotations import dataclasses -from typing import Dict, List, Tuple, Union +from typing import Dict, List, Tuple import numpy as np import xarray as xr @@ -15,26 +15,6 @@ NamedVec = List[NamedFloat] -@dataclasses.dataclass -class NamedParametrizedShape: - min_shape: NamedShape - step_shape: NamedShape - - -@dataclasses.dataclass -class NamedExplicitOutputShape: - shape: NamedShape - halo: NamedShape - - -@dataclasses.dataclass -class NamedImplicitOutputShape: - reference_tensor: str - offset: NamedShape - scale: NamedVec - halo: NamedShape - - @dataclasses.dataclass(frozen=True) class Sample: tensors: Dict[str, xr.DataArray] @@ -77,41 +57,6 @@ def name_float_tuples_to_pb_NamedFloats(name_float_tuples) -> inference_pb2.Name ) -def input_shape_to_pb_input_shape(input_shape: Union[NamedShape, NamedParametrizedShape]) -> inference_pb2.InputShape: - if isinstance(input_shape, NamedParametrizedShape): - return inference_pb2.InputShape( - shapeType=1, - shape=name_int_tuples_to_pb_NamedInts(input_shape.min_shape), - stepShape=name_int_tuples_to_pb_NamedInts(input_shape.step_shape), - ) - else: - return inference_pb2.InputShape( - shapeType=0, - shape=name_int_tuples_to_pb_NamedInts(input_shape), - ) - - -def output_shape_to_pb_output_shape( - output_shape: Union[NamedExplicitOutputShape, NamedImplicitOutputShape] -) -> inference_pb2.InputShape: - if isinstance(output_shape, NamedImplicitOutputShape): - return inference_pb2.OutputShape( - shapeType=1, - halo=name_int_tuples_to_pb_NamedInts(output_shape.halo), - referenceTensor=output_shape.reference_tensor, - scale=name_float_tuples_to_pb_NamedFloats(output_shape.scale), - offset=name_float_tuples_to_pb_NamedFloats(output_shape.offset), - ) - elif isinstance(output_shape, NamedExplicitOutputShape): - return inference_pb2.OutputShape( - shapeType=0, - shape=name_int_tuples_to_pb_NamedInts(output_shape.shape), - halo=name_int_tuples_to_pb_NamedInts(output_shape.halo), - ) - else: - raise TypeError(f"Conversion not supported for type {type(output_shape)}") - - def pb_tensor_to_xarray(tensor: inference_pb2.Tensor) -> inference_pb2.Tensor: if not tensor.dtype: raise ValueError("Tensor dtype is not specified") diff --git a/tiktorch/proto/inference_pb2.py b/tiktorch/proto/inference_pb2.py index dc5c6c9c..b5e4dbb7 100644 --- a/tiktorch/proto/inference_pb2.py +++ b/tiktorch/proto/inference_pb2.py @@ -20,7 +20,7 @@ syntax='proto3', serialized_options=None, create_key=_descriptor._internal_create_key, - serialized_pb=b'\n\x0finference.proto\"Y\n\x06\x44\x65vice\x12\n\n\x02id\x18\x01 \x01(\t\x12\x1e\n\x06status\x18\x02 \x01(\x0e\x32\x0e.Device.Status\"#\n\x06Status\x12\r\n\tAVAILABLE\x10\x00\x12\n\n\x06IN_USE\x10\x01\"W\n\x1f\x43reateDatasetDescriptionRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x0c\n\x04mean\x18\x03 \x01(\x01\x12\x0e\n\x06stddev\x18\x04 \x01(\x01\" \n\x12\x44\x61tasetDescription\x12\n\n\x02id\x18\x01 \x01(\t\"\'\n\x04\x42lob\x12\x0e\n\x06\x66ormat\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\x0c\"i\n\x19\x43reateModelSessionRequest\x12\x13\n\tmodel_uri\x18\x01 \x01(\tH\x00\x12\x1b\n\nmodel_blob\x18\x02 \x01(\x0b\x32\x05.BlobH\x00\x12\x11\n\tdeviceIds\x18\x05 \x03(\tB\x07\n\x05model\")\n\tNamedInts\x12\x1c\n\tnamedInts\x18\x01 \x03(\x0b\x32\t.NamedInt\"/\n\x0bNamedFloats\x12 \n\x0bnamedFloats\x18\x01 \x03(\x0b\x32\x0b.NamedFloat\"\x9d\x01\n\nInputShape\x12(\n\tshapeType\x18\x01 \x01(\x0e\x32\x15.InputShape.ShapeType\x12\x19\n\x05shape\x18\x02 \x01(\x0b\x32\n.NamedInts\x12\x1d\n\tstepShape\x18\x04 \x01(\x0b\x32\n.NamedInts\"+\n\tShapeType\x12\x0c\n\x08\x45XPLICIT\x10\x00\x12\x10\n\x0cPARAMETRIZED\x10\x01\"\xea\x01\n\x0bOutputShape\x12)\n\tshapeType\x18\x01 \x01(\x0e\x32\x16.OutputShape.ShapeType\x12\x19\n\x05shape\x18\x02 \x01(\x0b\x32\n.NamedInts\x12\x18\n\x04halo\x18\x03 \x01(\x0b\x32\n.NamedInts\x12\x17\n\x0freferenceTensor\x18\x04 \x01(\t\x12\x1b\n\x05scale\x18\x05 \x01(\x0b\x32\x0c.NamedFloats\x12\x1c\n\x06offset\x18\x06 \x01(\x0b\x32\x0c.NamedFloats\"\'\n\tShapeType\x12\x0c\n\x08\x45XPLICIT\x10\x00\x12\x0c\n\x08IMPLICIT\x10\x01\"\x1a\n\x0cModelSession\x12\n\n\x02id\x18\x01 \x01(\t\"\x9e\x01\n\x08LogEntry\x12\x11\n\ttimestamp\x18\x01 \x01(\r\x12\x1e\n\x05level\x18\x02 \x01(\x0e\x32\x0f.LogEntry.Level\x12\x0f\n\x07\x63ontent\x18\x03 \x01(\t\"N\n\x05Level\x12\n\n\x06NOTSET\x10\x00\x12\t\n\x05\x44\x45\x42UG\x10\x01\x12\x08\n\x04INFO\x10\x02\x12\x0b\n\x07WARNING\x10\x03\x12\t\n\x05\x45RROR\x10\x04\x12\x0c\n\x08\x43RITICAL\x10\x05\"#\n\x07\x44\x65vices\x12\x18\n\x07\x64\x65vices\x18\x01 \x03(\x0b\x32\x07.Device\"&\n\x08NamedInt\x12\x0c\n\x04size\x18\x01 \x01(\r\x12\x0c\n\x04name\x18\x02 \x01(\t\"(\n\nNamedFloat\x12\x0c\n\x04size\x18\x01 \x01(\x02\x12\x0c\n\x04name\x18\x02 \x01(\t\"S\n\x06Tensor\x12\x0e\n\x06\x62uffer\x18\x01 \x01(\x0c\x12\r\n\x05\x64type\x18\x02 \x01(\t\x12\x10\n\x08tensorId\x18\x03 \x01(\t\x12\x18\n\x05shape\x18\x04 \x03(\x0b\x32\t.NamedInt\"U\n\x0ePredictRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x11\n\tdatasetId\x18\x02 \x01(\t\x12\x18\n\x07tensors\x18\x03 \x03(\x0b\x32\x07.Tensor\"+\n\x0fPredictResponse\x12\x18\n\x07tensors\x18\x01 \x03(\x0b\x32\x07.Tensor\"\x07\n\x05\x45mpty\"\x1e\n\tModelInfo\x12\x11\n\tdeviceIds\x18\x01 \x03(\t\"^\n CreateModelSessionChunkedRequest\x12\x1a\n\x04info\x18\x01 \x01(\x0b\x32\n.ModelInfoH\x00\x12\x16\n\x05\x63hunk\x18\x02 \x01(\x0b\x32\x05.BlobH\x00\x42\x06\n\x04\x64\x61ta2\xc6\x02\n\tInference\x12\x41\n\x12\x43reateModelSession\x12\x1a.CreateModelSessionRequest\x1a\r.ModelSession\"\x00\x12,\n\x11\x43loseModelSession\x12\r.ModelSession\x1a\x06.Empty\"\x00\x12S\n\x18\x43reateDatasetDescription\x12 .CreateDatasetDescriptionRequest\x1a\x13.DatasetDescription\"\x00\x12 \n\x07GetLogs\x12\x06.Empty\x1a\t.LogEntry\"\x00\x30\x01\x12!\n\x0bListDevices\x12\x06.Empty\x1a\x08.Devices\"\x00\x12.\n\x07Predict\x12\x0f.PredictRequest\x1a\x10.PredictResponse\"\x00\x32G\n\rFlightControl\x12\x18\n\x04Ping\x12\x06.Empty\x1a\x06.Empty\"\x00\x12\x1c\n\x08Shutdown\x12\x06.Empty\x1a\x06.Empty\"\x00\x62\x06proto3' + serialized_pb=b'\n\x0finference.proto\"Y\n\x06\x44\x65vice\x12\n\n\x02id\x18\x01 \x01(\t\x12\x1e\n\x06status\x18\x02 \x01(\x0e\x32\x0e.Device.Status\"#\n\x06Status\x12\r\n\tAVAILABLE\x10\x00\x12\n\n\x06IN_USE\x10\x01\"W\n\x1f\x43reateDatasetDescriptionRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x0c\n\x04mean\x18\x03 \x01(\x01\x12\x0e\n\x06stddev\x18\x04 \x01(\x01\" \n\x12\x44\x61tasetDescription\x12\n\n\x02id\x18\x01 \x01(\t\"\'\n\x04\x42lob\x12\x0e\n\x06\x66ormat\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\x0c\"i\n\x19\x43reateModelSessionRequest\x12\x13\n\tmodel_uri\x18\x01 \x01(\tH\x00\x12\x1b\n\nmodel_blob\x18\x02 \x01(\x0b\x32\x05.BlobH\x00\x12\x11\n\tdeviceIds\x18\x05 \x03(\tB\x07\n\x05model\")\n\tNamedInts\x12\x1c\n\tnamedInts\x18\x01 \x03(\x0b\x32\t.NamedInt\"/\n\x0bNamedFloats\x12 \n\x0bnamedFloats\x18\x01 \x03(\x0b\x32\x0b.NamedFloat\"\x1a\n\x0cModelSession\x12\n\n\x02id\x18\x01 \x01(\t\"\x9e\x01\n\x08LogEntry\x12\x11\n\ttimestamp\x18\x01 \x01(\r\x12\x1e\n\x05level\x18\x02 \x01(\x0e\x32\x0f.LogEntry.Level\x12\x0f\n\x07\x63ontent\x18\x03 \x01(\t\"N\n\x05Level\x12\n\n\x06NOTSET\x10\x00\x12\t\n\x05\x44\x45\x42UG\x10\x01\x12\x08\n\x04INFO\x10\x02\x12\x0b\n\x07WARNING\x10\x03\x12\t\n\x05\x45RROR\x10\x04\x12\x0c\n\x08\x43RITICAL\x10\x05\"#\n\x07\x44\x65vices\x12\x18\n\x07\x64\x65vices\x18\x01 \x03(\x0b\x32\x07.Device\"&\n\x08NamedInt\x12\x0c\n\x04size\x18\x01 \x01(\r\x12\x0c\n\x04name\x18\x02 \x01(\t\"(\n\nNamedFloat\x12\x0c\n\x04size\x18\x01 \x01(\x02\x12\x0c\n\x04name\x18\x02 \x01(\t\"S\n\x06Tensor\x12\x0e\n\x06\x62uffer\x18\x01 \x01(\x0c\x12\r\n\x05\x64type\x18\x02 \x01(\t\x12\x10\n\x08tensorId\x18\x03 \x01(\t\x12\x18\n\x05shape\x18\x04 \x03(\x0b\x32\t.NamedInt\"U\n\x0ePredictRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x11\n\tdatasetId\x18\x02 \x01(\t\x12\x18\n\x07tensors\x18\x03 \x03(\x0b\x32\x07.Tensor\"+\n\x0fPredictResponse\x12\x18\n\x07tensors\x18\x01 \x03(\x0b\x32\x07.Tensor\"\x07\n\x05\x45mpty2\xc6\x02\n\tInference\x12\x41\n\x12\x43reateModelSession\x12\x1a.CreateModelSessionRequest\x1a\r.ModelSession\"\x00\x12,\n\x11\x43loseModelSession\x12\r.ModelSession\x1a\x06.Empty\"\x00\x12S\n\x18\x43reateDatasetDescription\x12 .CreateDatasetDescriptionRequest\x1a\x13.DatasetDescription\"\x00\x12 \n\x07GetLogs\x12\x06.Empty\x1a\t.LogEntry\"\x00\x30\x01\x12!\n\x0bListDevices\x12\x06.Empty\x1a\x08.Devices\"\x00\x12.\n\x07Predict\x12\x0f.PredictRequest\x1a\x10.PredictResponse\"\x00\x32G\n\rFlightControl\x12\x18\n\x04Ping\x12\x06.Empty\x1a\x06.Empty\"\x00\x12\x1c\n\x08Shutdown\x12\x06.Empty\x1a\x06.Empty\"\x00\x62\x06proto3' ) @@ -50,56 +50,6 @@ ) _sym_db.RegisterEnumDescriptor(_DEVICE_STATUS) -_INPUTSHAPE_SHAPETYPE = _descriptor.EnumDescriptor( - name='ShapeType', - full_name='InputShape.ShapeType', - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name='EXPLICIT', index=0, number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key), - _descriptor.EnumValueDescriptor( - name='PARAMETRIZED', index=1, number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key), - ], - containing_type=None, - serialized_options=None, - serialized_start=588, - serialized_end=631, -) -_sym_db.RegisterEnumDescriptor(_INPUTSHAPE_SHAPETYPE) - -_OUTPUTSHAPE_SHAPETYPE = _descriptor.EnumDescriptor( - name='ShapeType', - full_name='OutputShape.ShapeType', - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name='EXPLICIT', index=0, number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key), - _descriptor.EnumValueDescriptor( - name='IMPLICIT', index=1, number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key), - ], - containing_type=None, - serialized_options=None, - serialized_start=829, - serialized_end=868, -) -_sym_db.RegisterEnumDescriptor(_OUTPUTSHAPE_SHAPETYPE) - _LOGENTRY_LEVEL = _descriptor.EnumDescriptor( name='Level', full_name='LogEntry.Level', @@ -140,8 +90,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=979, - serialized_end=1057, + serialized_start=582, + serialized_end=660, ) _sym_db.RegisterEnumDescriptor(_LOGENTRY_LEVEL) @@ -418,121 +368,6 @@ ) -_INPUTSHAPE = _descriptor.Descriptor( - name='InputShape', - full_name='InputShape', - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name='shapeType', full_name='InputShape.shapeType', index=0, - number=1, type=14, cpp_type=8, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='shape', full_name='InputShape.shape', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='stepShape', full_name='InputShape.stepShape', index=2, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _INPUTSHAPE_SHAPETYPE, - ], - serialized_options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=474, - serialized_end=631, -) - - -_OUTPUTSHAPE = _descriptor.Descriptor( - name='OutputShape', - full_name='OutputShape', - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name='shapeType', full_name='OutputShape.shapeType', index=0, - number=1, type=14, cpp_type=8, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='shape', full_name='OutputShape.shape', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='halo', full_name='OutputShape.halo', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='referenceTensor', full_name='OutputShape.referenceTensor', index=3, - number=4, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=b"".decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='scale', full_name='OutputShape.scale', index=4, - number=5, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='offset', full_name='OutputShape.offset', index=5, - number=6, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _OUTPUTSHAPE_SHAPETYPE, - ], - serialized_options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=634, - serialized_end=868, -) - - _MODELSESSION = _descriptor.Descriptor( name='ModelSession', full_name='ModelSession', @@ -560,8 +395,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=870, - serialized_end=896, + serialized_start=473, + serialized_end=499, ) @@ -607,8 +442,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=899, - serialized_end=1057, + serialized_start=502, + serialized_end=660, ) @@ -639,8 +474,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1059, - serialized_end=1094, + serialized_start=662, + serialized_end=697, ) @@ -678,8 +513,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1096, - serialized_end=1134, + serialized_start=699, + serialized_end=737, ) @@ -717,8 +552,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1136, - serialized_end=1176, + serialized_start=739, + serialized_end=779, ) @@ -770,8 +605,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1178, - serialized_end=1261, + serialized_start=781, + serialized_end=864, ) @@ -816,8 +651,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1263, - serialized_end=1348, + serialized_start=866, + serialized_end=951, ) @@ -848,8 +683,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1350, - serialized_end=1393, + serialized_start=953, + serialized_end=996, ) @@ -873,84 +708,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1395, - serialized_end=1402, -) - - -_MODELINFO = _descriptor.Descriptor( - name='ModelInfo', - full_name='ModelInfo', - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name='deviceIds', full_name='ModelInfo.deviceIds', index=0, - number=1, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1404, - serialized_end=1434, -) - - -_CREATEMODELSESSIONCHUNKEDREQUEST = _descriptor.Descriptor( - name='CreateModelSessionChunkedRequest', - full_name='CreateModelSessionChunkedRequest', - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name='info', full_name='CreateModelSessionChunkedRequest.info', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - _descriptor.FieldDescriptor( - name='chunk', full_name='CreateModelSessionChunkedRequest.chunk', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='data', full_name='CreateModelSessionChunkedRequest.data', - index=0, containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[]), - ], - serialized_start=1436, - serialized_end=1530, + serialized_start=998, + serialized_end=1005, ) _DEVICE.fields_by_name['status'].enum_type = _DEVICE_STATUS @@ -964,30 +723,12 @@ _CREATEMODELSESSIONREQUEST.fields_by_name['model_blob'].containing_oneof = _CREATEMODELSESSIONREQUEST.oneofs_by_name['model'] _NAMEDINTS.fields_by_name['namedInts'].message_type = _NAMEDINT _NAMEDFLOATS.fields_by_name['namedFloats'].message_type = _NAMEDFLOAT -_INPUTSHAPE.fields_by_name['shapeType'].enum_type = _INPUTSHAPE_SHAPETYPE -_INPUTSHAPE.fields_by_name['shape'].message_type = _NAMEDINTS -_INPUTSHAPE.fields_by_name['stepShape'].message_type = _NAMEDINTS -_INPUTSHAPE_SHAPETYPE.containing_type = _INPUTSHAPE -_OUTPUTSHAPE.fields_by_name['shapeType'].enum_type = _OUTPUTSHAPE_SHAPETYPE -_OUTPUTSHAPE.fields_by_name['shape'].message_type = _NAMEDINTS -_OUTPUTSHAPE.fields_by_name['halo'].message_type = _NAMEDINTS -_OUTPUTSHAPE.fields_by_name['scale'].message_type = _NAMEDFLOATS -_OUTPUTSHAPE.fields_by_name['offset'].message_type = _NAMEDFLOATS -_OUTPUTSHAPE_SHAPETYPE.containing_type = _OUTPUTSHAPE _LOGENTRY.fields_by_name['level'].enum_type = _LOGENTRY_LEVEL _LOGENTRY_LEVEL.containing_type = _LOGENTRY _DEVICES.fields_by_name['devices'].message_type = _DEVICE _TENSOR.fields_by_name['shape'].message_type = _NAMEDINT _PREDICTREQUEST.fields_by_name['tensors'].message_type = _TENSOR _PREDICTRESPONSE.fields_by_name['tensors'].message_type = _TENSOR -_CREATEMODELSESSIONCHUNKEDREQUEST.fields_by_name['info'].message_type = _MODELINFO -_CREATEMODELSESSIONCHUNKEDREQUEST.fields_by_name['chunk'].message_type = _BLOB -_CREATEMODELSESSIONCHUNKEDREQUEST.oneofs_by_name['data'].fields.append( - _CREATEMODELSESSIONCHUNKEDREQUEST.fields_by_name['info']) -_CREATEMODELSESSIONCHUNKEDREQUEST.fields_by_name['info'].containing_oneof = _CREATEMODELSESSIONCHUNKEDREQUEST.oneofs_by_name['data'] -_CREATEMODELSESSIONCHUNKEDREQUEST.oneofs_by_name['data'].fields.append( - _CREATEMODELSESSIONCHUNKEDREQUEST.fields_by_name['chunk']) -_CREATEMODELSESSIONCHUNKEDREQUEST.fields_by_name['chunk'].containing_oneof = _CREATEMODELSESSIONCHUNKEDREQUEST.oneofs_by_name['data'] DESCRIPTOR.message_types_by_name['Device'] = _DEVICE DESCRIPTOR.message_types_by_name['CreateDatasetDescriptionRequest'] = _CREATEDATASETDESCRIPTIONREQUEST DESCRIPTOR.message_types_by_name['DatasetDescription'] = _DATASETDESCRIPTION @@ -995,8 +736,6 @@ DESCRIPTOR.message_types_by_name['CreateModelSessionRequest'] = _CREATEMODELSESSIONREQUEST DESCRIPTOR.message_types_by_name['NamedInts'] = _NAMEDINTS DESCRIPTOR.message_types_by_name['NamedFloats'] = _NAMEDFLOATS -DESCRIPTOR.message_types_by_name['InputShape'] = _INPUTSHAPE -DESCRIPTOR.message_types_by_name['OutputShape'] = _OUTPUTSHAPE DESCRIPTOR.message_types_by_name['ModelSession'] = _MODELSESSION DESCRIPTOR.message_types_by_name['LogEntry'] = _LOGENTRY DESCRIPTOR.message_types_by_name['Devices'] = _DEVICES @@ -1006,8 +745,6 @@ DESCRIPTOR.message_types_by_name['PredictRequest'] = _PREDICTREQUEST DESCRIPTOR.message_types_by_name['PredictResponse'] = _PREDICTRESPONSE DESCRIPTOR.message_types_by_name['Empty'] = _EMPTY -DESCRIPTOR.message_types_by_name['ModelInfo'] = _MODELINFO -DESCRIPTOR.message_types_by_name['CreateModelSessionChunkedRequest'] = _CREATEMODELSESSIONCHUNKEDREQUEST _sym_db.RegisterFileDescriptor(DESCRIPTOR) Device = _reflection.GeneratedProtocolMessageType('Device', (_message.Message,), { @@ -1059,20 +796,6 @@ }) _sym_db.RegisterMessage(NamedFloats) -InputShape = _reflection.GeneratedProtocolMessageType('InputShape', (_message.Message,), { - 'DESCRIPTOR' : _INPUTSHAPE, - '__module__' : 'inference_pb2' - # @@protoc_insertion_point(class_scope:InputShape) - }) -_sym_db.RegisterMessage(InputShape) - -OutputShape = _reflection.GeneratedProtocolMessageType('OutputShape', (_message.Message,), { - 'DESCRIPTOR' : _OUTPUTSHAPE, - '__module__' : 'inference_pb2' - # @@protoc_insertion_point(class_scope:OutputShape) - }) -_sym_db.RegisterMessage(OutputShape) - ModelSession = _reflection.GeneratedProtocolMessageType('ModelSession', (_message.Message,), { 'DESCRIPTOR' : _MODELSESSION, '__module__' : 'inference_pb2' @@ -1136,20 +859,6 @@ }) _sym_db.RegisterMessage(Empty) -ModelInfo = _reflection.GeneratedProtocolMessageType('ModelInfo', (_message.Message,), { - 'DESCRIPTOR' : _MODELINFO, - '__module__' : 'inference_pb2' - # @@protoc_insertion_point(class_scope:ModelInfo) - }) -_sym_db.RegisterMessage(ModelInfo) - -CreateModelSessionChunkedRequest = _reflection.GeneratedProtocolMessageType('CreateModelSessionChunkedRequest', (_message.Message,), { - 'DESCRIPTOR' : _CREATEMODELSESSIONCHUNKEDREQUEST, - '__module__' : 'inference_pb2' - # @@protoc_insertion_point(class_scope:CreateModelSessionChunkedRequest) - }) -_sym_db.RegisterMessage(CreateModelSessionChunkedRequest) - _INFERENCE = _descriptor.ServiceDescriptor( @@ -1159,8 +868,8 @@ index=0, serialized_options=None, create_key=_descriptor._internal_create_key, - serialized_start=1533, - serialized_end=1859, + serialized_start=1008, + serialized_end=1334, methods=[ _descriptor.MethodDescriptor( name='CreateModelSession', @@ -1235,8 +944,8 @@ index=1, serialized_options=None, create_key=_descriptor._internal_create_key, - serialized_start=1861, - serialized_end=1932, + serialized_start=1336, + serialized_end=1407, methods=[ _descriptor.MethodDescriptor( name='Ping', From 529243e8693200c8772b0ef68f25b34123554a62 Mon Sep 17 00:00:00 2001 From: Theodoros Katzalis Date: Mon, 19 Aug 2024 10:41:45 +0200 Subject: [PATCH 2/5] Move InputTensorValidator to converters module --- tiktorch/converters.py | 89 ++++++++++++++++++++- tiktorch/server/grpc/inference_servicer.py | 4 +- tiktorch/server/session/process.py | 90 +--------------------- 3 files changed, 91 insertions(+), 92 deletions(-) diff --git a/tiktorch/converters.py b/tiktorch/converters.py index d54a4b5f..06c5195e 100644 --- a/tiktorch/converters.py +++ b/tiktorch/converters.py @@ -1,10 +1,12 @@ from __future__ import annotations import dataclasses -from typing import Dict, List, Tuple +from typing import Dict, Iterator, List, Tuple import numpy as np import xarray as xr +from bioimageio.core.resource_io import nodes +from bioimageio.core.resource_io.nodes import ParametrizedInputShape from tiktorch.proto import inference_pb2 @@ -15,6 +17,91 @@ NamedVec = List[NamedFloat] +class InputTensorValidator: + def __init__(self, input_specs: List[nodes.InputTensor]): + self._input_specs = input_specs + + def check_tensors(self, sample: Sample): + for tensor_id, tensor in sample.tensors.items(): + self.check_shape(tensor_id, tensor.dims, tensor.shape) + + def _get_input_tensors_with_names(self) -> Dict[str, nodes.InputTensor]: + return {tensor.name: tensor for tensor in self._input_specs} + + def check_shape(self, tensor_id: str, axes: Tuple[str, ...], shape: Tuple[int, ...]): + shape = self.get_axes_with_size(axes, shape) + spec = self._get_input_spec(tensor_id) + if isinstance(spec.shape, list): + self._check_shape_explicit(spec, shape) + elif isinstance(spec.shape, ParametrizedInputShape): + self._check_shape_parameterized(spec, shape) + else: + raise ValueError(f"Unexpected shape {spec.shape}") + + def _get_input_spec(self, tensor_id: str) -> nodes.InputTensor: + self._check_spec_exists(tensor_id) + specs = [spec for spec in self._input_specs if spec.name == tensor_id] + assert len(specs) == 1, "ids of tensor specs should be unique" + return specs[0] + + def _check_spec_exists(self, tensor_id: str): + spec_names = [spec.name for spec in self._input_specs] + if tensor_id not in spec_names: + raise ValueError(f"Spec {tensor_id} doesn't exist for specs {spec_names}") + + def _check_shape_explicit(self, spec: nodes.InputTensor, tensor_shape: Dict[str, int]): + assert self.is_shape_explicit(spec) + reference_shape = {name: size for name, size in zip(spec.axes, spec.shape)} + self.check_same_axes(reference_shape, tensor_shape) + if reference_shape != tensor_shape: + raise ValueError(f"Incompatible shapes found {tensor_shape}, expected {reference_shape}") + + def _check_shape_parameterized(self, spec: nodes.InputTensor, tensor_shape: Dict[str, int]): + assert isinstance(spec.shape, ParametrizedInputShape) + if not self.is_shape(tensor_shape.values()): + raise ValueError(f"Invalid shape's sizes {tensor_shape}") + + min_shape = self.get_axes_with_size(spec.axes, tuple(spec.shape.min)) + step = self.get_axes_with_size(spec.axes, tuple(spec.shape.step)) + self.check_same_axes(tensor_shape, min_shape) + + tensor_shapes_arr = np.array(list(tensor_shape.values())) + min_shape_arr = np.array(list(min_shape.values())) + step_arr = np.array(list(step.values())) + diff = tensor_shapes_arr - min_shape_arr + if any(size < 0 for size in diff): + raise ValueError(f"Tensor shape {tensor_shape} smaller than min shape {min_shape}") + + non_zero_idx = np.nonzero(step_arr) + multipliers = diff[non_zero_idx] / step_arr[non_zero_idx] + multiplier = np.unique(multipliers) + if len(multiplier) == 1 and self.is_natural_number(multiplier[0]): + return + raise ValueError(f"Tensor shape {tensor_shape} not valid for spec {spec}") + + @staticmethod + def check_same_axes(source: Dict[str, int], target: Dict[str, int]): + if source.keys() != target.keys(): + raise ValueError(f"Incompatible axes for tensor {target} and reference {source}") + + @staticmethod + def is_natural_number(n) -> bool: + return n % 1 == 0.0 and n >= 0 + + @staticmethod + def is_shape(shape: Iterator[int]) -> bool: + return all(InputTensorValidator.is_natural_number(dim) for dim in shape) + + @staticmethod + def get_axes_with_size(axes: Tuple[str, ...], shape: Tuple[int, ...]) -> Dict[str, int]: + assert len(axes) == len(shape) + return {name: size for name, size in zip(axes, shape)} + + @staticmethod + def is_shape_explicit(spec: nodes.InputTensor) -> bool: + return isinstance(spec.shape, list) + + @dataclasses.dataclass(frozen=True) class Sample: tensors: Dict[str, xr.DataArray] diff --git a/tiktorch/server/grpc/inference_servicer.py b/tiktorch/server/grpc/inference_servicer.py index f09e0bae..3dab96dd 100644 --- a/tiktorch/server/grpc/inference_servicer.py +++ b/tiktorch/server/grpc/inference_servicer.py @@ -2,11 +2,11 @@ import grpc -from tiktorch.converters import Sample +from tiktorch.converters import InputTensorValidator, Sample from tiktorch.proto import inference_pb2, inference_pb2_grpc from tiktorch.server.data_store import IDataStore from tiktorch.server.device_pool import DeviceStatus, IDevicePool -from tiktorch.server.session.process import InputTensorValidator, start_model_session_process +from tiktorch.server.session.process import start_model_session_process from tiktorch.server.session_manager import Session, SessionManager diff --git a/tiktorch/server/session/process.py b/tiktorch/server/session/process.py index c9e0186e..af46f0c6 100644 --- a/tiktorch/server/session/process.py +++ b/tiktorch/server/session/process.py @@ -4,13 +4,10 @@ import uuid from concurrent.futures import Future from multiprocessing.connection import Connection -from typing import Dict, Iterator, List, Optional, Tuple +from typing import List, Optional, Tuple -import numpy as np from bioimageio.core import load_resource_description from bioimageio.core.prediction_pipeline import PredictionPipeline, create_prediction_pipeline -from bioimageio.core.resource_io import nodes -from bioimageio.core.resource_io.nodes import ParametrizedInputShape from tiktorch import log from tiktorch.rpc import Shutdown @@ -22,91 +19,6 @@ from .rpc_interface import IRPCModelSession -class InputTensorValidator: - def __init__(self, input_specs: List[nodes.InputTensor]): - self._input_specs = input_specs - - def check_tensors(self, sample: Sample): - for tensor_id, tensor in sample.tensors.items(): - self.check_shape(tensor_id, tensor.dims, tensor.shape) - - def _get_input_tensors_with_names(self) -> Dict[str, nodes.InputTensor]: - return {tensor.name: tensor for tensor in self._input_specs} - - def check_shape(self, tensor_id: str, axes: Tuple[str, ...], shape: Tuple[int, ...]): - shape = self.get_axes_with_size(axes, shape) - spec = self._get_input_spec(tensor_id) - if isinstance(spec.shape, list): - self._check_shape_explicit(spec, shape) - elif isinstance(spec.shape, ParametrizedInputShape): - self._check_shape_parameterized(spec, shape) - else: - raise ValueError(f"Unexpected shape {spec.shape}") - - def _get_input_spec(self, tensor_id: str) -> nodes.InputTensor: - self._check_spec_exists(tensor_id) - specs = [spec for spec in self._input_specs if spec.name == tensor_id] - assert len(specs) == 1, "ids of tensor specs should be unique" - return specs[0] - - def _check_spec_exists(self, tensor_id: str): - spec_names = [spec.name for spec in self._input_specs] - if tensor_id not in spec_names: - raise ValueError(f"Spec {tensor_id} doesn't exist for specs {spec_names}") - - def _check_shape_explicit(self, spec: nodes.InputTensor, tensor_shape: Dict[str, int]): - assert self.is_shape_explicit(spec) - reference_shape = {name: size for name, size in zip(spec.axes, spec.shape)} - self.check_same_axes(reference_shape, tensor_shape) - if reference_shape != tensor_shape: - raise ValueError(f"Incompatible shapes found {tensor_shape}, expected {reference_shape}") - - def _check_shape_parameterized(self, spec: nodes.InputTensor, tensor_shape: Dict[str, int]): - assert isinstance(spec.shape, ParametrizedInputShape) - if not self.is_shape(tensor_shape.values()): - raise ValueError(f"Invalid shape's sizes {tensor_shape}") - - min_shape = self.get_axes_with_size(spec.axes, tuple(spec.shape.min)) - step = self.get_axes_with_size(spec.axes, tuple(spec.shape.step)) - self.check_same_axes(tensor_shape, min_shape) - - tensor_shapes_arr = np.array(list(tensor_shape.values())) - min_shape_arr = np.array(list(min_shape.values())) - step_arr = np.array(list(step.values())) - diff = tensor_shapes_arr - min_shape_arr - if any(size < 0 for size in diff): - raise ValueError(f"Tensor shape {tensor_shape} smaller than min shape {min_shape}") - - non_zero_idx = np.nonzero(step_arr) - multipliers = diff[non_zero_idx] / step_arr[non_zero_idx] - multiplier = np.unique(multipliers) - if len(multiplier) == 1 and self.is_natural_number(multiplier[0]): - return - raise ValueError(f"Tensor shape {tensor_shape} not valid for spec {spec}") - - @staticmethod - def check_same_axes(source: Dict[str, int], target: Dict[str, int]): - if source.keys() != target.keys(): - raise ValueError(f"Incompatible axes for tensor {target} and reference {source}") - - @staticmethod - def is_natural_number(n) -> bool: - return n % 1 == 0.0 and n >= 0 - - @staticmethod - def is_shape(shape: Iterator[int]) -> bool: - return all(InputTensorValidator.is_natural_number(dim) for dim in shape) - - @staticmethod - def get_axes_with_size(axes: Tuple[str, ...], shape: Tuple[int, ...]) -> Dict[str, int]: - assert len(axes) == len(shape) - return {name: size for name, size in zip(axes, shape)} - - @staticmethod - def is_shape_explicit(spec: nodes.InputTensor) -> bool: - return isinstance(spec.shape, list) - - class ModelSessionProcess(IRPCModelSession[PredictionPipeline]): def __init__(self, model: PredictionPipeline) -> None: super().__init__(model) From 6f441ea5210790234a7483e7c578c013bb14c9a7 Mon Sep 17 00:00:00 2001 From: Theodoros Katzalis Date: Fri, 2 Aug 2024 15:39:27 +0200 Subject: [PATCH 3/5] Add test data for emulating gpu out of memory --- tests/data/dummy_cuda_out_of_memory/dummy.md | 0 .../dummy_cuda_out_of_memory.py | 13 ++++ .../dummy_cuda_out_of_memory/dummy_in.npy | Bin 0 -> 65664 bytes .../dummy_cuda_out_of_memory/dummy_out.npy | Bin 0 -> 65664 bytes .../dummy_cuda_out_of_memory/environment.yaml | 0 tests/data/dummy_cuda_out_of_memory/rdf.yaml | 57 ++++++++++++++++++ tests/data/dummy_cuda_out_of_memory/weights | Bin 0 -> 232 bytes 7 files changed, 70 insertions(+) create mode 100644 tests/data/dummy_cuda_out_of_memory/dummy.md create mode 100644 tests/data/dummy_cuda_out_of_memory/dummy_cuda_out_of_memory.py create mode 100644 tests/data/dummy_cuda_out_of_memory/dummy_in.npy create mode 100644 tests/data/dummy_cuda_out_of_memory/dummy_out.npy create mode 100644 tests/data/dummy_cuda_out_of_memory/environment.yaml create mode 100644 tests/data/dummy_cuda_out_of_memory/rdf.yaml create mode 100644 tests/data/dummy_cuda_out_of_memory/weights diff --git a/tests/data/dummy_cuda_out_of_memory/dummy.md b/tests/data/dummy_cuda_out_of_memory/dummy.md new file mode 100644 index 00000000..e69de29b diff --git a/tests/data/dummy_cuda_out_of_memory/dummy_cuda_out_of_memory.py b/tests/data/dummy_cuda_out_of_memory/dummy_cuda_out_of_memory.py new file mode 100644 index 00000000..7c8ea20f --- /dev/null +++ b/tests/data/dummy_cuda_out_of_memory/dummy_cuda_out_of_memory.py @@ -0,0 +1,13 @@ +import numpy as np +from torch import Tensor, nn + +MAX_SHAPE = (1, 1, 10, 10) + + +class Dummy(nn.Module): + def forward(self, input: Tensor): + input_size = np.prod(input.shape) + max_size = np.prod(MAX_SHAPE) + if input_size > max_size: + raise RuntimeError("out of memory") + return input + 1 diff --git a/tests/data/dummy_cuda_out_of_memory/dummy_in.npy b/tests/data/dummy_cuda_out_of_memory/dummy_in.npy new file mode 100644 index 0000000000000000000000000000000000000000..96a78a7b87dbeef0611f2cf9c97f5b0f72cf8396 GIT binary patch literal 65664 zcmeIuF$%&!6a>)NdW!8ic9?hT-PR-do;;I-_!YTjq~sd zf7UR|-Yeq=3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d| z0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdb zFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?0 z00Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u< z3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs# zzyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d|0}L?000Rs#zyJdbFu(u<3^2d| P0}L?000Rs#zyJeZi2B~R literal 0 HcmV?d00001 diff --git a/tests/data/dummy_cuda_out_of_memory/environment.yaml b/tests/data/dummy_cuda_out_of_memory/environment.yaml new file mode 100644 index 00000000..e69de29b diff --git a/tests/data/dummy_cuda_out_of_memory/rdf.yaml b/tests/data/dummy_cuda_out_of_memory/rdf.yaml new file mode 100644 index 00000000..aa16a785 --- /dev/null +++ b/tests/data/dummy_cuda_out_of_memory/rdf.yaml @@ -0,0 +1,57 @@ +format_version: 0.3.3 +language: python +framework: pytorch + +name: UNet2DNucleiBroad +description: A 2d U-Net pretrained on broad nucleus dataset. +cite: + - text: "Ronneberger, Olaf et al. U-net: Convolutional networks for biomedical image segmentation. MICCAI 2015." + doi: https://doi.org/10.1007/978-3-319-24574-4_28 +authors: + - name: "ilastik-team" + affiliation: "EMBL Heidelberg" + +documentation: dummy.md +tags: [pytorch, nucleus-segmentation] +license: MIT +git_repo: https://github.com/ilastik/tiktorch +covers: [] + +source: dummy_cuda_out_of_memory.py::Dummy +sha256: 00ffb1647cf7ec524892206dce6258d9da498fe040c62838f31b501a09bfd573 +timestamp: 2019-12-11T12:22:32Z # ISO 8601 + +test_inputs: [dummy_in.npy] +test_outputs: [dummy_out.npy] + +weights: + pytorch_state_dict: + source: ./weights + sha256: 518cb80bad2eb3ec3dfbe6bab74920951391ce8fb24e15cf59b9b9f052a575a6 + authors: + - name: "ilastik-team" + affiliation: "EMBL Heidelberg" + + +# TODO double check inputs/outputs +inputs: + - name: input + axes: bcyx + data_type: float32 + data_range: [-inf, inf] + shape: + min: [1, 1, 5, 5] + step: [0, 0, 1, 1] + +outputs: + - name: output + axes: bcyx + data_type: float32 + data_range: [0, 1] + shape: + reference_tensor: input # FIXME(m-novikov) ignoring for now + scale: [1, 1, 1, 1] + offset: [0, 0, 0, 0] + halo: [0, 0, 2, 2] # Should be moved to outputs + +type: model diff --git a/tests/data/dummy_cuda_out_of_memory/weights b/tests/data/dummy_cuda_out_of_memory/weights new file mode 100644 index 0000000000000000000000000000000000000000..da14f34253e8e85c58d498b94b7ed3b933de0dc6 GIT binary patch literal 232 zcmXwz!D<3Q42EZCXI)$ndhM-8z4#R3Wf7&PTx8kq1|1n^%uEz4EO;zFLm!~8)<-Dx z(0J%2B;TL>e{T9-x!=#_&&%O!^E|)l)pss+AT@<2rPL_~4qb4~1!JA5ye?% zhYkAPwx09{R08>W!Y0{wOq35~rcek>`w*FmT0<_A^-QDMfO&WXzoW7?=d)yX663IA zNlDc@_875W*p4ewvscOn(lC~r=7`+(Ew_~KAA$jaii&|oP)hgzXjx||r8hC&lA8Yk D#+O8X literal 0 HcmV?d00001 From b1595d7d318e033851f3f111f281c42950d2394a Mon Sep 17 00:00:00 2001 From: Theodoros Katzalis Date: Mon, 19 Aug 2024 10:51:20 +0200 Subject: [PATCH 4/5] Add procedures for checking gpu out of memory for given shapes Two procedures have been added: - Get the maximum tensor shape - Check if a tensor's shape fits to memory --- environment.yml | 1 + proto/inference.proto | 27 +- tests/conftest.py | 11 + tests/test_converters.py | 2 +- .../test_grpc/test_inference_servicer.py | 82 +++++- tiktorch/converters.py | 92 ++++--- tiktorch/proto/inference_pb2.py | 249 +++++++++++++++++- tiktorch/proto/inference_pb2_grpc.py | 66 +++++ tiktorch/server/grpc/inference_servicer.py | 100 ++++++- tiktorch/server/session/process.py | 7 +- 10 files changed, 581 insertions(+), 56 deletions(-) diff --git a/environment.yml b/environment.yml index 94b04c80..c985a6c8 100644 --- a/environment.yml +++ b/environment.yml @@ -49,6 +49,7 @@ dependencies: - bump2version - mypy - pre_commit + - grpcio-tools - pip diff --git a/proto/inference.proto b/proto/inference.proto index 3187bb39..e4d66917 100644 --- a/proto/inference.proto +++ b/proto/inference.proto @@ -12,6 +12,10 @@ service Inference { rpc ListDevices(Empty) returns (Devices) {} rpc Predict(PredictRequest) returns (PredictResponse) {} + + rpc IsCudaOutOfMemory(IsCudaOutOfMemoryRequest) returns (IsCudaOutOfMemoryResponse) {} + + rpc MaxCudaMemoryShape(MaxCudaMemoryShapeRequest) returns (MaxCudaMemoryShapeResponse) {} } message Device { @@ -56,7 +60,6 @@ message NamedFloats { repeated NamedFloat namedFloats = 1; } - message ModelSession { string id = 1; } @@ -97,6 +100,28 @@ message Tensor { repeated NamedInt shape = 4; } +message IsCudaOutOfMemoryRequest { + string modelSessionId = 1; + string tensorId = 3; + NamedInts shape = 2; +} + +message IsCudaOutOfMemoryResponse { + bool isCudaOutOfMemory = 1; +} + +message MaxCudaMemoryShapeRequest { + string modelSessionId = 1; + string tensorId = 2; + NamedInts stepShape = 3; + NamedInts minShape = 4; + NamedInts maxShape = 5; +} + +message MaxCudaMemoryShapeResponse { + NamedInts maxShape = 1; +} + message PredictRequest { string modelSessionId = 1; string datasetId = 2; diff --git a/tests/conftest.py b/tests/conftest.py index 1118935a..70d99fbf 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,6 +21,9 @@ TEST_BIOIMAGEIO_DUMMY_EXPLICIT = "dummy" TEST_BIOIMAGEIO_DUMMY_EXPLICIT_RDF = f"{TEST_BIOIMAGEIO_DUMMY_EXPLICIT}/Dummy.model.yaml" TEST_BIOIMAGEIO_DUMMY_PARAM_RDF = "dummy_param/Dummy.model_param.yaml" +TEST_BIOIMAGEIO_DUMMY = "dummy" +TEST_BIOIMAGEIO_DUMMY_CUDA_OUT_OF_MEMORY = "dummy_cuda_out_of_memory" + TEST_BIOIMAGEIO_TENSORFLOW_DUMMY = "dummy_tensorflow" TEST_BIOIMAGEIO_TORCHSCRIPT = "unet2d_torchscript" @@ -138,6 +141,14 @@ def _bioimageio_package(rdf_source): return data +@pytest.fixture +def bioimageio_dummy_cuda_out_of_memory_model_bytes(data_path): + rdf_source = data_path / TEST_BIOIMAGEIO_DUMMY_CUDA_OUT_OF_MEMORY / "rdf.yaml" + data = io.BytesIO() + export_resource_package(rdf_source, output_path=data) + return data + + def archive(directory): result = io.BytesIO() diff --git a/tests/test_converters.py b/tests/test_converters.py index e112ede9..1041697d 100644 --- a/tests/test_converters.py +++ b/tests/test_converters.py @@ -189,7 +189,7 @@ def test_create_sample_from_pb_tensors(self): assert sample.tensors["input1"].equals(xr.DataArray(arr_1, dims=["x", "y"])) assert sample.tensors["input2"].equals(xr.DataArray(arr_2, dims=["x", "y"])) - def test_create_sample_from_raw_data(self): + def test_create_sample_from_xr_tensors(self): arr_1 = np.arange(32 * 32, dtype=np.int64).reshape(32, 32) tensor_1 = xr.DataArray(arr_1, dims=["x", "y"]) arr_2 = np.arange(64 * 64, dtype=np.int64).reshape(64, 64) diff --git a/tests/test_server/test_grpc/test_inference_servicer.py b/tests/test_server/test_grpc/test_inference_servicer.py index 864c4d6a..788f24c9 100644 --- a/tests/test_server/test_grpc/test_inference_servicer.py +++ b/tests/test_server/test_grpc/test_inference_servicer.py @@ -1,3 +1,6 @@ +from typing import Tuple +from unittest.mock import patch + import grpc import numpy as np import pytest @@ -5,10 +8,11 @@ from numpy.testing import assert_array_equal from tiktorch import converters +from tiktorch.converters import get_axes_with_size, named_shape_to_pb_NamedInts from tiktorch.proto import inference_pb2, inference_pb2_grpc from tiktorch.server.data_store import DataStore from tiktorch.server.device_pool import TorchDevicePool -from tiktorch.server.grpc import inference_servicer +from tiktorch.server.grpc import InferenceServicer, inference_servicer from tiktorch.server.session_manager import SessionManager @@ -32,6 +36,12 @@ def grpc_stub_cls(grpc_channel): return inference_pb2_grpc.InferenceStub +@pytest.fixture +def inference_servicer_gpu(): + with patch.object(InferenceServicer, "_is_gpu", lambda x: True): + yield + + def valid_model_request(model_bytes, device_ids=None): return inference_pb2.CreateModelSessionRequest( model_blob=inference_pb2.Blob(content=model_bytes.getvalue()), deviceIds=device_ids or ["cpu"] @@ -232,3 +242,73 @@ def test_call_predict_tf(self, grpc_stub, bioimageio_dummy_tensorflow_model_byte assert len(res.tensors) == 1 assert res.tensors[0].tensorId == output_tensor_id assert_array_equal(expected, converters.pb_tensor_to_numpy(res.tensors[0])) + + +class TestCudaMemory: + MAX_SHAPE = (1, 1, 10, 10) + AXES = ("b", "c", "y", "x") + + def to_pb_namedInts(self, shape: Tuple[int, ...]) -> inference_pb2.NamedInts: + return named_shape_to_pb_NamedInts(get_axes_with_size(self.AXES, shape)) + + @pytest.mark.parametrize( + "min_shape, max_shape, step_shape, expected", + [ + ((1, 1, 5, 5), (1, 1, 11, 11), (0, 0, 1, 1), MAX_SHAPE), + ((1, 1, 5, 5), (1, 1, 6, 6), (0, 0, 1, 1), [1, 1, 6, 6]), + ], + ) + def test_max_cuda_memory( + self, + inference_servicer_gpu, + min_shape, + max_shape, + step_shape, + expected, + grpc_stub, + bioimageio_dummy_cuda_out_of_memory_model_bytes, + ): + min_shape = self.to_pb_namedInts(min_shape) + max_shape = self.to_pb_namedInts(max_shape) + step_shape = self.to_pb_namedInts(step_shape) + + model = grpc_stub.CreateModelSession(valid_model_request(bioimageio_dummy_cuda_out_of_memory_model_bytes)) + res = grpc_stub.MaxCudaMemoryShape( + inference_pb2.MaxCudaMemoryShapeRequest( + modelSessionId=model.id, tensorId="input", minShape=min_shape, maxShape=max_shape, stepShape=step_shape + ) + ) + grpc_stub.CloseModelSession(model) + assert res.maxShape == self.to_pb_namedInts(expected) + + def test_max_cuda_memory_not_found( + self, inference_servicer_gpu, grpc_stub, bioimageio_dummy_cuda_out_of_memory_model_bytes + ): + model = grpc_stub.CreateModelSession(valid_model_request(bioimageio_dummy_cuda_out_of_memory_model_bytes)) + min_shape = self.to_pb_namedInts((1, 1, 11, 11)) + max_shape = self.to_pb_namedInts((1, 1, 12, 12)) + step = self.to_pb_namedInts((0, 0, 1, 1)) + with pytest.raises(grpc.RpcError) as error: + grpc_stub.MaxCudaMemoryShape( + inference_pb2.MaxCudaMemoryShapeRequest( + modelSessionId=model.id, tensorId="input", minShape=min_shape, maxShape=max_shape, stepShape=step + ) + ) + assert error.value.code() == grpc.StatusCode.NOT_FOUND + assert error.value.details() == "no valid shape" + grpc_stub.CloseModelSession(model) + + @pytest.mark.parametrize( + "shape, expected", + [((1, 1, 10, 10), False), ((1, 1, 99, 99), True)], + ) + def test_is_out_of_memory( + self, inference_servicer_gpu, shape, expected, grpc_stub, bioimageio_dummy_cuda_out_of_memory_model_bytes + ): + model = grpc_stub.CreateModelSession(valid_model_request(bioimageio_dummy_cuda_out_of_memory_model_bytes)) + shape = self.to_pb_namedInts(shape) + res = grpc_stub.IsCudaOutOfMemory( + inference_pb2.IsCudaOutOfMemoryRequest(modelSessionId=model.id, tensorId="input", shape=shape) + ) + grpc_stub.CloseModelSession(model) + assert res.isCudaOutOfMemory is expected diff --git a/tiktorch/converters.py b/tiktorch/converters.py index 06c5195e..b1dcbe16 100644 --- a/tiktorch/converters.py +++ b/tiktorch/converters.py @@ -1,7 +1,7 @@ from __future__ import annotations import dataclasses -from typing import Dict, Iterator, List, Tuple +from typing import Dict, List, Optional, Sequence, Tuple import numpy as np import xarray as xr @@ -11,10 +11,15 @@ from tiktorch.proto import inference_pb2 # pairs of axis-shape for a single tensor -NamedInt = Tuple[str, int] -NamedFloat = Tuple[str, float] -NamedShape = List[NamedInt] -NamedVec = List[NamedFloat] +NamedShape = Dict[str, int] +NamedVec = Dict[str, float] + + +def get_axes_with_size(axes: Tuple[str, ...], shape: Tuple[int, ...]) -> NamedShape: + if len(axes) != len(shape): + raise ValueError(f"{axes} and {shape} incompatible length. It should be equal") + InputTensorValidator.is_shape(shape) + return {name: size for name, size in zip(axes, shape)} class InputTensorValidator: @@ -29,7 +34,7 @@ def _get_input_tensors_with_names(self) -> Dict[str, nodes.InputTensor]: return {tensor.name: tensor for tensor in self._input_specs} def check_shape(self, tensor_id: str, axes: Tuple[str, ...], shape: Tuple[int, ...]): - shape = self.get_axes_with_size(axes, shape) + shape = get_axes_with_size(axes, shape) spec = self._get_input_spec(tensor_id) if isinstance(spec.shape, list): self._check_shape_explicit(spec, shape) @@ -49,38 +54,49 @@ def _check_spec_exists(self, tensor_id: str): if tensor_id not in spec_names: raise ValueError(f"Spec {tensor_id} doesn't exist for specs {spec_names}") - def _check_shape_explicit(self, spec: nodes.InputTensor, tensor_shape: Dict[str, int]): + def _check_shape_explicit(self, spec: nodes.InputTensor, tensor_shape: NamedShape): assert self.is_shape_explicit(spec) reference_shape = {name: size for name, size in zip(spec.axes, spec.shape)} self.check_same_axes(reference_shape, tensor_shape) if reference_shape != tensor_shape: raise ValueError(f"Incompatible shapes found {tensor_shape}, expected {reference_shape}") - def _check_shape_parameterized(self, spec: nodes.InputTensor, tensor_shape: Dict[str, int]): + def _check_shape_parameterized(self, spec: nodes.InputTensor, tensor_shape: NamedShape): assert isinstance(spec.shape, ParametrizedInputShape) if not self.is_shape(tensor_shape.values()): raise ValueError(f"Invalid shape's sizes {tensor_shape}") - min_shape = self.get_axes_with_size(spec.axes, tuple(spec.shape.min)) - step = self.get_axes_with_size(spec.axes, tuple(spec.shape.step)) - self.check_same_axes(tensor_shape, min_shape) + min_shape = get_axes_with_size(spec.axes, tuple(spec.shape.min)) + step_shape = get_axes_with_size(spec.axes, tuple(spec.shape.step)) + multiplier = self.get_num_increments_from_param_shape( + NamedParametrizedShape(min_shape, step_shape), tensor_shape + ) + if multiplier is None: + raise ValueError(f"Tensor shape {tensor_shape} not valid for spec {spec}") - tensor_shapes_arr = np.array(list(tensor_shape.values())) - min_shape_arr = np.array(list(min_shape.values())) - step_arr = np.array(list(step.values())) - diff = tensor_shapes_arr - min_shape_arr + @staticmethod + def get_num_increments_from_param_shape( + param_shape: NamedParametrizedShape, max_shape: NamedShape + ) -> Optional[int]: + InputTensorValidator.check_same_axes(param_shape.min_shape, max_shape) + max_shape_arr = np.array(list(max_shape.values())) + min_shape_arr = np.array(list(param_shape.min_shape.values())) + step_arr = np.array(list(param_shape.step_shape.values())) + diff = max_shape_arr - min_shape_arr if any(size < 0 for size in diff): - raise ValueError(f"Tensor shape {tensor_shape} smaller than min shape {min_shape}") + raise ValueError(f"Max shape {max_shape_arr} smaller than min shape {min_shape_arr}") non_zero_idx = np.nonzero(step_arr) - multipliers = diff[non_zero_idx] / step_arr[non_zero_idx] - multiplier = np.unique(multipliers) - if len(multiplier) == 1 and self.is_natural_number(multiplier[0]): - return - raise ValueError(f"Tensor shape {tensor_shape} not valid for spec {spec}") + num_increments = diff[non_zero_idx] / step_arr[non_zero_idx] + num_increments = np.unique(num_increments) + if len(num_increments) == 1 and InputTensorValidator.is_natural_number(num_increments[0]): + num_increment = num_increments[0] + assert np.array_equal(min_shape_arr + num_increment * step_arr, max_shape_arr) + return int(num_increment) + return None @staticmethod - def check_same_axes(source: Dict[str, int], target: Dict[str, int]): + def check_same_axes(source: NamedShape, target: NamedShape): if source.keys() != target.keys(): raise ValueError(f"Incompatible axes for tensor {target} and reference {source}") @@ -89,19 +105,27 @@ def is_natural_number(n) -> bool: return n % 1 == 0.0 and n >= 0 @staticmethod - def is_shape(shape: Iterator[int]) -> bool: + def is_shape(shape: Sequence[int]) -> bool: return all(InputTensorValidator.is_natural_number(dim) for dim in shape) - @staticmethod - def get_axes_with_size(axes: Tuple[str, ...], shape: Tuple[int, ...]) -> Dict[str, int]: - assert len(axes) == len(shape) - return {name: size for name, size in zip(axes, shape)} - @staticmethod def is_shape_explicit(spec: nodes.InputTensor) -> bool: return isinstance(spec.shape, list) +@dataclasses.dataclass(frozen=True) +class NamedParametrizedShape: + min_shape: NamedShape + step_shape: NamedShape + + def __post_init__(self): + InputTensorValidator.check_same_axes(self.min_shape, self.step_shape) + + @property + def axes(self) -> Tuple[str, ...]: + return tuple(self.min_shape.keys()) + + @dataclasses.dataclass(frozen=True) class Sample: tensors: Dict[str, xr.DataArray] @@ -132,15 +156,19 @@ def xarray_to_pb_tensor(tensor_id: str, array: xr.DataArray) -> inference_pb2.Te return inference_pb2.Tensor(tensorId=tensor_id, dtype=str(array.dtype), shape=shape, buffer=bytes(array.data)) -def name_int_tuples_to_pb_NamedInts(name_int_tuples) -> inference_pb2.NamedInts: +def named_shape_to_pb_NamedInts(name_int_tuples: NamedShape) -> inference_pb2.NamedInts: return inference_pb2.NamedInts( - namedInts=[inference_pb2.NamedInt(size=dim, name=name) for name, dim in name_int_tuples] + namedInts=[inference_pb2.NamedInt(size=dim, name=name) for name, dim in name_int_tuples.items()] ) -def name_float_tuples_to_pb_NamedFloats(name_float_tuples) -> inference_pb2.NamedFloats: +def pb_NamedInts_to_named_shape(pb_named_ints: inference_pb2.NamedInt) -> NamedShape: + return {dim.name: dim.size for dim in pb_named_ints.namedInts} + + +def name_float_tuples_to_pb_NamedFloats(name_float_tuples: NamedVec) -> inference_pb2.NamedFloats: return inference_pb2.NamedFloats( - namedFloats=[inference_pb2.NamedFloat(size=dim, name=name) for name, dim in name_float_tuples] + namedFloats=[inference_pb2.NamedFloat(size=dim, name=name) for name, dim in name_float_tuples.items()] ) diff --git a/tiktorch/proto/inference_pb2.py b/tiktorch/proto/inference_pb2.py index b5e4dbb7..92a0fb0b 100644 --- a/tiktorch/proto/inference_pb2.py +++ b/tiktorch/proto/inference_pb2.py @@ -20,7 +20,7 @@ syntax='proto3', serialized_options=None, create_key=_descriptor._internal_create_key, - serialized_pb=b'\n\x0finference.proto\"Y\n\x06\x44\x65vice\x12\n\n\x02id\x18\x01 \x01(\t\x12\x1e\n\x06status\x18\x02 \x01(\x0e\x32\x0e.Device.Status\"#\n\x06Status\x12\r\n\tAVAILABLE\x10\x00\x12\n\n\x06IN_USE\x10\x01\"W\n\x1f\x43reateDatasetDescriptionRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x0c\n\x04mean\x18\x03 \x01(\x01\x12\x0e\n\x06stddev\x18\x04 \x01(\x01\" \n\x12\x44\x61tasetDescription\x12\n\n\x02id\x18\x01 \x01(\t\"\'\n\x04\x42lob\x12\x0e\n\x06\x66ormat\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\x0c\"i\n\x19\x43reateModelSessionRequest\x12\x13\n\tmodel_uri\x18\x01 \x01(\tH\x00\x12\x1b\n\nmodel_blob\x18\x02 \x01(\x0b\x32\x05.BlobH\x00\x12\x11\n\tdeviceIds\x18\x05 \x03(\tB\x07\n\x05model\")\n\tNamedInts\x12\x1c\n\tnamedInts\x18\x01 \x03(\x0b\x32\t.NamedInt\"/\n\x0bNamedFloats\x12 \n\x0bnamedFloats\x18\x01 \x03(\x0b\x32\x0b.NamedFloat\"\x1a\n\x0cModelSession\x12\n\n\x02id\x18\x01 \x01(\t\"\x9e\x01\n\x08LogEntry\x12\x11\n\ttimestamp\x18\x01 \x01(\r\x12\x1e\n\x05level\x18\x02 \x01(\x0e\x32\x0f.LogEntry.Level\x12\x0f\n\x07\x63ontent\x18\x03 \x01(\t\"N\n\x05Level\x12\n\n\x06NOTSET\x10\x00\x12\t\n\x05\x44\x45\x42UG\x10\x01\x12\x08\n\x04INFO\x10\x02\x12\x0b\n\x07WARNING\x10\x03\x12\t\n\x05\x45RROR\x10\x04\x12\x0c\n\x08\x43RITICAL\x10\x05\"#\n\x07\x44\x65vices\x12\x18\n\x07\x64\x65vices\x18\x01 \x03(\x0b\x32\x07.Device\"&\n\x08NamedInt\x12\x0c\n\x04size\x18\x01 \x01(\r\x12\x0c\n\x04name\x18\x02 \x01(\t\"(\n\nNamedFloat\x12\x0c\n\x04size\x18\x01 \x01(\x02\x12\x0c\n\x04name\x18\x02 \x01(\t\"S\n\x06Tensor\x12\x0e\n\x06\x62uffer\x18\x01 \x01(\x0c\x12\r\n\x05\x64type\x18\x02 \x01(\t\x12\x10\n\x08tensorId\x18\x03 \x01(\t\x12\x18\n\x05shape\x18\x04 \x03(\x0b\x32\t.NamedInt\"U\n\x0ePredictRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x11\n\tdatasetId\x18\x02 \x01(\t\x12\x18\n\x07tensors\x18\x03 \x03(\x0b\x32\x07.Tensor\"+\n\x0fPredictResponse\x12\x18\n\x07tensors\x18\x01 \x03(\x0b\x32\x07.Tensor\"\x07\n\x05\x45mpty2\xc6\x02\n\tInference\x12\x41\n\x12\x43reateModelSession\x12\x1a.CreateModelSessionRequest\x1a\r.ModelSession\"\x00\x12,\n\x11\x43loseModelSession\x12\r.ModelSession\x1a\x06.Empty\"\x00\x12S\n\x18\x43reateDatasetDescription\x12 .CreateDatasetDescriptionRequest\x1a\x13.DatasetDescription\"\x00\x12 \n\x07GetLogs\x12\x06.Empty\x1a\t.LogEntry\"\x00\x30\x01\x12!\n\x0bListDevices\x12\x06.Empty\x1a\x08.Devices\"\x00\x12.\n\x07Predict\x12\x0f.PredictRequest\x1a\x10.PredictResponse\"\x00\x32G\n\rFlightControl\x12\x18\n\x04Ping\x12\x06.Empty\x1a\x06.Empty\"\x00\x12\x1c\n\x08Shutdown\x12\x06.Empty\x1a\x06.Empty\"\x00\x62\x06proto3' + serialized_pb=b'\n\x0finference.proto\"Y\n\x06\x44\x65vice\x12\n\n\x02id\x18\x01 \x01(\t\x12\x1e\n\x06status\x18\x02 \x01(\x0e\x32\x0e.Device.Status\"#\n\x06Status\x12\r\n\tAVAILABLE\x10\x00\x12\n\n\x06IN_USE\x10\x01\"W\n\x1f\x43reateDatasetDescriptionRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x0c\n\x04mean\x18\x03 \x01(\x01\x12\x0e\n\x06stddev\x18\x04 \x01(\x01\" \n\x12\x44\x61tasetDescription\x12\n\n\x02id\x18\x01 \x01(\t\"\'\n\x04\x42lob\x12\x0e\n\x06\x66ormat\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\x0c\"i\n\x19\x43reateModelSessionRequest\x12\x13\n\tmodel_uri\x18\x01 \x01(\tH\x00\x12\x1b\n\nmodel_blob\x18\x02 \x01(\x0b\x32\x05.BlobH\x00\x12\x11\n\tdeviceIds\x18\x05 \x03(\tB\x07\n\x05model\")\n\tNamedInts\x12\x1c\n\tnamedInts\x18\x01 \x03(\x0b\x32\t.NamedInt\"/\n\x0bNamedFloats\x12 \n\x0bnamedFloats\x18\x01 \x03(\x0b\x32\x0b.NamedFloat\"\x1a\n\x0cModelSession\x12\n\n\x02id\x18\x01 \x01(\t\"\x9e\x01\n\x08LogEntry\x12\x11\n\ttimestamp\x18\x01 \x01(\r\x12\x1e\n\x05level\x18\x02 \x01(\x0e\x32\x0f.LogEntry.Level\x12\x0f\n\x07\x63ontent\x18\x03 \x01(\t\"N\n\x05Level\x12\n\n\x06NOTSET\x10\x00\x12\t\n\x05\x44\x45\x42UG\x10\x01\x12\x08\n\x04INFO\x10\x02\x12\x0b\n\x07WARNING\x10\x03\x12\t\n\x05\x45RROR\x10\x04\x12\x0c\n\x08\x43RITICAL\x10\x05\"#\n\x07\x44\x65vices\x12\x18\n\x07\x64\x65vices\x18\x01 \x03(\x0b\x32\x07.Device\"&\n\x08NamedInt\x12\x0c\n\x04size\x18\x01 \x01(\r\x12\x0c\n\x04name\x18\x02 \x01(\t\"(\n\nNamedFloat\x12\x0c\n\x04size\x18\x01 \x01(\x02\x12\x0c\n\x04name\x18\x02 \x01(\t\"S\n\x06Tensor\x12\x0e\n\x06\x62uffer\x18\x01 \x01(\x0c\x12\r\n\x05\x64type\x18\x02 \x01(\t\x12\x10\n\x08tensorId\x18\x03 \x01(\t\x12\x18\n\x05shape\x18\x04 \x03(\x0b\x32\t.NamedInt\"_\n\x18IsCudaOutOfMemoryRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x10\n\x08tensorId\x18\x03 \x01(\t\x12\x19\n\x05shape\x18\x02 \x01(\x0b\x32\n.NamedInts\"6\n\x19IsCudaOutOfMemoryResponse\x12\x19\n\x11isCudaOutOfMemory\x18\x01 \x01(\x08\"\xa0\x01\n\x19MaxCudaMemoryShapeRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x10\n\x08tensorId\x18\x02 \x01(\t\x12\x1d\n\tstepShape\x18\x03 \x01(\x0b\x32\n.NamedInts\x12\x1c\n\x08minShape\x18\x04 \x01(\x0b\x32\n.NamedInts\x12\x1c\n\x08maxShape\x18\x05 \x01(\x0b\x32\n.NamedInts\":\n\x1aMaxCudaMemoryShapeResponse\x12\x1c\n\x08maxShape\x18\x01 \x01(\x0b\x32\n.NamedInts\"U\n\x0ePredictRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x11\n\tdatasetId\x18\x02 \x01(\t\x12\x18\n\x07tensors\x18\x03 \x03(\x0b\x32\x07.Tensor\"+\n\x0fPredictResponse\x12\x18\n\x07tensors\x18\x01 \x03(\x0b\x32\x07.Tensor\"\x07\n\x05\x45mpty2\xe5\x03\n\tInference\x12\x41\n\x12\x43reateModelSession\x12\x1a.CreateModelSessionRequest\x1a\r.ModelSession\"\x00\x12,\n\x11\x43loseModelSession\x12\r.ModelSession\x1a\x06.Empty\"\x00\x12S\n\x18\x43reateDatasetDescription\x12 .CreateDatasetDescriptionRequest\x1a\x13.DatasetDescription\"\x00\x12 \n\x07GetLogs\x12\x06.Empty\x1a\t.LogEntry\"\x00\x30\x01\x12!\n\x0bListDevices\x12\x06.Empty\x1a\x08.Devices\"\x00\x12.\n\x07Predict\x12\x0f.PredictRequest\x1a\x10.PredictResponse\"\x00\x12L\n\x11IsCudaOutOfMemory\x12\x19.IsCudaOutOfMemoryRequest\x1a\x1a.IsCudaOutOfMemoryResponse\"\x00\x12O\n\x12MaxCudaMemoryShape\x12\x1a.MaxCudaMemoryShapeRequest\x1a\x1b.MaxCudaMemoryShapeResponse\"\x00\x32G\n\rFlightControl\x12\x18\n\x04Ping\x12\x06.Empty\x1a\x06.Empty\"\x00\x12\x1c\n\x08Shutdown\x12\x06.Empty\x1a\x06.Empty\"\x00\x62\x06proto3' ) @@ -610,6 +610,176 @@ ) +_ISCUDAOUTOFMEMORYREQUEST = _descriptor.Descriptor( + name='IsCudaOutOfMemoryRequest', + full_name='IsCudaOutOfMemoryRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + create_key=_descriptor._internal_create_key, + fields=[ + _descriptor.FieldDescriptor( + name='modelSessionId', full_name='IsCudaOutOfMemoryRequest.modelSessionId', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='tensorId', full_name='IsCudaOutOfMemoryRequest.tensorId', index=1, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='shape', full_name='IsCudaOutOfMemoryRequest.shape', index=2, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=866, + serialized_end=961, +) + + +_ISCUDAOUTOFMEMORYRESPONSE = _descriptor.Descriptor( + name='IsCudaOutOfMemoryResponse', + full_name='IsCudaOutOfMemoryResponse', + filename=None, + file=DESCRIPTOR, + containing_type=None, + create_key=_descriptor._internal_create_key, + fields=[ + _descriptor.FieldDescriptor( + name='isCudaOutOfMemory', full_name='IsCudaOutOfMemoryResponse.isCudaOutOfMemory', index=0, + number=1, type=8, cpp_type=7, label=1, + has_default_value=False, default_value=False, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=963, + serialized_end=1017, +) + + +_MAXCUDAMEMORYSHAPEREQUEST = _descriptor.Descriptor( + name='MaxCudaMemoryShapeRequest', + full_name='MaxCudaMemoryShapeRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + create_key=_descriptor._internal_create_key, + fields=[ + _descriptor.FieldDescriptor( + name='modelSessionId', full_name='MaxCudaMemoryShapeRequest.modelSessionId', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='tensorId', full_name='MaxCudaMemoryShapeRequest.tensorId', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='stepShape', full_name='MaxCudaMemoryShapeRequest.stepShape', index=2, + number=3, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='minShape', full_name='MaxCudaMemoryShapeRequest.minShape', index=3, + number=4, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='maxShape', full_name='MaxCudaMemoryShapeRequest.maxShape', index=4, + number=5, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1020, + serialized_end=1180, +) + + +_MAXCUDAMEMORYSHAPERESPONSE = _descriptor.Descriptor( + name='MaxCudaMemoryShapeResponse', + full_name='MaxCudaMemoryShapeResponse', + filename=None, + file=DESCRIPTOR, + containing_type=None, + create_key=_descriptor._internal_create_key, + fields=[ + _descriptor.FieldDescriptor( + name='maxShape', full_name='MaxCudaMemoryShapeResponse.maxShape', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1182, + serialized_end=1240, +) + + _PREDICTREQUEST = _descriptor.Descriptor( name='PredictRequest', full_name='PredictRequest', @@ -651,8 +821,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=866, - serialized_end=951, + serialized_start=1242, + serialized_end=1327, ) @@ -683,8 +853,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=953, - serialized_end=996, + serialized_start=1329, + serialized_end=1372, ) @@ -708,8 +878,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=998, - serialized_end=1005, + serialized_start=1374, + serialized_end=1381, ) _DEVICE.fields_by_name['status'].enum_type = _DEVICE_STATUS @@ -727,6 +897,11 @@ _LOGENTRY_LEVEL.containing_type = _LOGENTRY _DEVICES.fields_by_name['devices'].message_type = _DEVICE _TENSOR.fields_by_name['shape'].message_type = _NAMEDINT +_ISCUDAOUTOFMEMORYREQUEST.fields_by_name['shape'].message_type = _NAMEDINTS +_MAXCUDAMEMORYSHAPEREQUEST.fields_by_name['stepShape'].message_type = _NAMEDINTS +_MAXCUDAMEMORYSHAPEREQUEST.fields_by_name['minShape'].message_type = _NAMEDINTS +_MAXCUDAMEMORYSHAPEREQUEST.fields_by_name['maxShape'].message_type = _NAMEDINTS +_MAXCUDAMEMORYSHAPERESPONSE.fields_by_name['maxShape'].message_type = _NAMEDINTS _PREDICTREQUEST.fields_by_name['tensors'].message_type = _TENSOR _PREDICTRESPONSE.fields_by_name['tensors'].message_type = _TENSOR DESCRIPTOR.message_types_by_name['Device'] = _DEVICE @@ -742,6 +917,10 @@ DESCRIPTOR.message_types_by_name['NamedInt'] = _NAMEDINT DESCRIPTOR.message_types_by_name['NamedFloat'] = _NAMEDFLOAT DESCRIPTOR.message_types_by_name['Tensor'] = _TENSOR +DESCRIPTOR.message_types_by_name['IsCudaOutOfMemoryRequest'] = _ISCUDAOUTOFMEMORYREQUEST +DESCRIPTOR.message_types_by_name['IsCudaOutOfMemoryResponse'] = _ISCUDAOUTOFMEMORYRESPONSE +DESCRIPTOR.message_types_by_name['MaxCudaMemoryShapeRequest'] = _MAXCUDAMEMORYSHAPEREQUEST +DESCRIPTOR.message_types_by_name['MaxCudaMemoryShapeResponse'] = _MAXCUDAMEMORYSHAPERESPONSE DESCRIPTOR.message_types_by_name['PredictRequest'] = _PREDICTREQUEST DESCRIPTOR.message_types_by_name['PredictResponse'] = _PREDICTRESPONSE DESCRIPTOR.message_types_by_name['Empty'] = _EMPTY @@ -838,6 +1017,34 @@ }) _sym_db.RegisterMessage(Tensor) +IsCudaOutOfMemoryRequest = _reflection.GeneratedProtocolMessageType('IsCudaOutOfMemoryRequest', (_message.Message,), { + 'DESCRIPTOR' : _ISCUDAOUTOFMEMORYREQUEST, + '__module__' : 'inference_pb2' + # @@protoc_insertion_point(class_scope:IsCudaOutOfMemoryRequest) + }) +_sym_db.RegisterMessage(IsCudaOutOfMemoryRequest) + +IsCudaOutOfMemoryResponse = _reflection.GeneratedProtocolMessageType('IsCudaOutOfMemoryResponse', (_message.Message,), { + 'DESCRIPTOR' : _ISCUDAOUTOFMEMORYRESPONSE, + '__module__' : 'inference_pb2' + # @@protoc_insertion_point(class_scope:IsCudaOutOfMemoryResponse) + }) +_sym_db.RegisterMessage(IsCudaOutOfMemoryResponse) + +MaxCudaMemoryShapeRequest = _reflection.GeneratedProtocolMessageType('MaxCudaMemoryShapeRequest', (_message.Message,), { + 'DESCRIPTOR' : _MAXCUDAMEMORYSHAPEREQUEST, + '__module__' : 'inference_pb2' + # @@protoc_insertion_point(class_scope:MaxCudaMemoryShapeRequest) + }) +_sym_db.RegisterMessage(MaxCudaMemoryShapeRequest) + +MaxCudaMemoryShapeResponse = _reflection.GeneratedProtocolMessageType('MaxCudaMemoryShapeResponse', (_message.Message,), { + 'DESCRIPTOR' : _MAXCUDAMEMORYSHAPERESPONSE, + '__module__' : 'inference_pb2' + # @@protoc_insertion_point(class_scope:MaxCudaMemoryShapeResponse) + }) +_sym_db.RegisterMessage(MaxCudaMemoryShapeResponse) + PredictRequest = _reflection.GeneratedProtocolMessageType('PredictRequest', (_message.Message,), { 'DESCRIPTOR' : _PREDICTREQUEST, '__module__' : 'inference_pb2' @@ -868,8 +1075,8 @@ index=0, serialized_options=None, create_key=_descriptor._internal_create_key, - serialized_start=1008, - serialized_end=1334, + serialized_start=1384, + serialized_end=1869, methods=[ _descriptor.MethodDescriptor( name='CreateModelSession', @@ -931,6 +1138,26 @@ serialized_options=None, create_key=_descriptor._internal_create_key, ), + _descriptor.MethodDescriptor( + name='IsCudaOutOfMemory', + full_name='Inference.IsCudaOutOfMemory', + index=6, + containing_service=None, + input_type=_ISCUDAOUTOFMEMORYREQUEST, + output_type=_ISCUDAOUTOFMEMORYRESPONSE, + serialized_options=None, + create_key=_descriptor._internal_create_key, + ), + _descriptor.MethodDescriptor( + name='MaxCudaMemoryShape', + full_name='Inference.MaxCudaMemoryShape', + index=7, + containing_service=None, + input_type=_MAXCUDAMEMORYSHAPEREQUEST, + output_type=_MAXCUDAMEMORYSHAPERESPONSE, + serialized_options=None, + create_key=_descriptor._internal_create_key, + ), ]) _sym_db.RegisterServiceDescriptor(_INFERENCE) @@ -944,8 +1171,8 @@ index=1, serialized_options=None, create_key=_descriptor._internal_create_key, - serialized_start=1336, - serialized_end=1407, + serialized_start=1871, + serialized_end=1942, methods=[ _descriptor.MethodDescriptor( name='Ping', diff --git a/tiktorch/proto/inference_pb2_grpc.py b/tiktorch/proto/inference_pb2_grpc.py index f983d4c5..e8aa9805 100644 --- a/tiktorch/proto/inference_pb2_grpc.py +++ b/tiktorch/proto/inference_pb2_grpc.py @@ -44,6 +44,16 @@ def __init__(self, channel): request_serializer=inference__pb2.PredictRequest.SerializeToString, response_deserializer=inference__pb2.PredictResponse.FromString, ) + self.IsCudaOutOfMemory = channel.unary_unary( + '/Inference/IsCudaOutOfMemory', + request_serializer=inference__pb2.IsCudaOutOfMemoryRequest.SerializeToString, + response_deserializer=inference__pb2.IsCudaOutOfMemoryResponse.FromString, + ) + self.MaxCudaMemoryShape = channel.unary_unary( + '/Inference/MaxCudaMemoryShape', + request_serializer=inference__pb2.MaxCudaMemoryShapeRequest.SerializeToString, + response_deserializer=inference__pb2.MaxCudaMemoryShapeResponse.FromString, + ) class InferenceServicer(object): @@ -85,6 +95,18 @@ def Predict(self, request, context): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def IsCudaOutOfMemory(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def MaxCudaMemoryShape(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_InferenceServicer_to_server(servicer, server): rpc_method_handlers = { @@ -118,6 +140,16 @@ def add_InferenceServicer_to_server(servicer, server): request_deserializer=inference__pb2.PredictRequest.FromString, response_serializer=inference__pb2.PredictResponse.SerializeToString, ), + 'IsCudaOutOfMemory': grpc.unary_unary_rpc_method_handler( + servicer.IsCudaOutOfMemory, + request_deserializer=inference__pb2.IsCudaOutOfMemoryRequest.FromString, + response_serializer=inference__pb2.IsCudaOutOfMemoryResponse.SerializeToString, + ), + 'MaxCudaMemoryShape': grpc.unary_unary_rpc_method_handler( + servicer.MaxCudaMemoryShape, + request_deserializer=inference__pb2.MaxCudaMemoryShapeRequest.FromString, + response_serializer=inference__pb2.MaxCudaMemoryShapeResponse.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'Inference', rpc_method_handlers) @@ -230,6 +262,40 @@ def Predict(request, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + @staticmethod + def IsCudaOutOfMemory(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/Inference/IsCudaOutOfMemory', + inference__pb2.IsCudaOutOfMemoryRequest.SerializeToString, + inference__pb2.IsCudaOutOfMemoryResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def MaxCudaMemoryShape(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/Inference/MaxCudaMemoryShape', + inference__pb2.MaxCudaMemoryShapeRequest.SerializeToString, + inference__pb2.MaxCudaMemoryShapeResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + class FlightControlStub(object): """Missing associated documentation comment in .proto file.""" diff --git a/tiktorch/server/grpc/inference_servicer.py b/tiktorch/server/grpc/inference_servicer.py index 3dab96dd..54fa7e0c 100644 --- a/tiktorch/server/grpc/inference_servicer.py +++ b/tiktorch/server/grpc/inference_servicer.py @@ -1,9 +1,23 @@ +import gc import time +from typing import Optional import grpc - -from tiktorch.converters import InputTensorValidator, Sample +import numpy as np +import torch.cuda +import xarray + +from tiktorch.converters import ( + InputTensorValidator, + NamedParametrizedShape, + NamedShape, + Sample, + get_axes_with_size, + named_shape_to_pb_NamedInts, + pb_NamedInts_to_named_shape, +) from tiktorch.proto import inference_pb2, inference_pb2_grpc +from tiktorch.rpc.mp import BioModelClient from tiktorch.server.data_store import IDataStore from tiktorch.server.device_pool import DeviceStatus, IDevicePool from tiktorch.server.session.process import start_model_session_process @@ -39,7 +53,6 @@ def CreateModelSession( session = self.__session_manager.create_session(client) session.on_close(lease.terminate) session.on_close(client.api.shutdown) - return inference_pb2.ModelSession(id=session.id) def CreateDatasetDescription( @@ -76,13 +89,88 @@ def ListDevices(self, request: inference_pb2.Empty, context) -> inference_pb2.De def Predict(self, request: inference_pb2.PredictRequest, context) -> inference_pb2.PredictResponse: session = self._getModelSession(context, request.modelSessionId) input_sample = Sample.from_pb_tensors(request.tensors) - tensor_validator = InputTensorValidator(session.bio_model_client.input_specs) - tensor_validator.check_tensors(input_sample) - res = session.bio_model_client.api.forward(input_sample) + res = self._validated_forward(session.bio_model_client, input_sample) output_tensor_ids = [tensor.name for tensor in session.bio_model_client.output_specs] output_sample = Sample.from_xr_tensors(output_tensor_ids, res) return inference_pb2.PredictResponse(tensors=output_sample.to_pb_tensors()) + def MaxCudaMemoryShape( + self, request: inference_pb2.MaxCudaMemoryShapeRequest, context + ) -> inference_pb2.MaxCudaMemoryShapeResponse: + session = self._getModelSession(context, request.modelSessionId) + min_shape = pb_NamedInts_to_named_shape(request.minShape) + step_shape = pb_NamedInts_to_named_shape(request.stepShape) + max_shape = pb_NamedInts_to_named_shape(request.maxShape) + max_valid_shape = self._get_max_shape( + tensor_id=request.tensorId, + client=session.bio_model_client, + param_shape=NamedParametrizedShape(min_shape, step_shape), + max_shape=max_shape, + ) + if max_valid_shape is None: + context.abort(grpc.StatusCode.NOT_FOUND, "no valid shape") + return inference_pb2.MaxCudaMemoryShapeResponse(maxShape=named_shape_to_pb_NamedInts(max_valid_shape)) + + def IsCudaOutOfMemory( + self, request: inference_pb2.IsCudaOutOfMemoryRequest, context + ) -> inference_pb2.IsCudaOutOfMemoryResponse: + session = self._getModelSession(context, request.modelSessionId) + return inference_pb2.IsCudaOutOfMemoryResponse( + isCudaOutOfMemory=self._is_cuda_out_of_memory( + session.bio_model_client, request.tensorId, pb_NamedInts_to_named_shape(request.shape) + ) + ) + + def _get_max_shape( + self, + client: BioModelClient, + tensor_id: str, + param_shape: NamedParametrizedShape, + max_shape: NamedShape, + ) -> Optional[NamedShape]: + num_increment = InputTensorValidator.get_num_increments_from_param_shape(param_shape, max_shape) + if num_increment is None: + raise ValueError( + f"Invalid parameterized shape min: {param_shape}, with max: {max_shape}\n" + f"max != min + n * step, where n belongs to (0, 1, 2, ...)" + ) + + max_shape_arr = np.array(list(max_shape.values())) + step_shape_arr = np.array(list(param_shape.step_shape.values())) + for increment in range(num_increment): + max_shape_arr = max_shape_arr - increment * step_shape_arr + max_shape = get_axes_with_size(param_shape.axes, max_shape_arr) + if not self._is_cuda_out_of_memory(client, tensor_id, max_shape): + return max_shape + return None + + def _is_cuda_out_of_memory(self, client: BioModelClient, tensor_id: str, shape: NamedShape) -> bool: + if not self._is_gpu(): + return False + is_out_of_memory = False + dummy_tensor = xarray.DataArray(np.random.rand(*shape.values()), dims=shape.keys()) + sample = Sample.from_xr_tensors(tensor_ids=[tensor_id], tensors_data=[dummy_tensor]) + try: + self._validated_forward(client, sample) + except RuntimeError as e: + if "out of memory" in str(e): + is_out_of_memory = True + print(f"Using shape {shape} will cause out of memory.") + else: + raise + finally: + gc.collect() # attempt to explicitly deallocate memory + torch.cuda.empty_cache() + return is_out_of_memory + + def _validated_forward(self, client: BioModelClient, sample: Sample): + validator = InputTensorValidator(client.input_specs) + validator.check_tensors(sample) + return client.api.forward(sample) + + def _is_gpu(self) -> bool: + return torch.cuda.is_available() + def _getModelSession(self, context, modelSessionId: str) -> Session: if not modelSessionId: context.abort(grpc.StatusCode.FAILED_PRECONDITION, "model-session-id has not been provided by client") diff --git a/tiktorch/server/session/process.py b/tiktorch/server/session/process.py index af46f0c6..2c3cedf1 100644 --- a/tiktorch/server/session/process.py +++ b/tiktorch/server/session/process.py @@ -10,13 +10,12 @@ from bioimageio.core.prediction_pipeline import PredictionPipeline, create_prediction_pipeline from tiktorch import log +from tiktorch.converters import Sample from tiktorch.rpc import Shutdown from tiktorch.rpc import mp as _mp_rpc from tiktorch.rpc.mp import BioModelClient, MPServer - -from ...converters import Sample -from .backend import base -from .rpc_interface import IRPCModelSession +from tiktorch.server.session import IRPCModelSession +from tiktorch.server.session.backend import base class ModelSessionProcess(IRPCModelSession[PredictionPipeline]): From 46ac782fade61a58f4e88ce7f68103fde76db230 Mon Sep 17 00:00:00 2001 From: Theodoros Katzalis Date: Mon, 19 Aug 2024 14:51:21 +0200 Subject: [PATCH 5/5] Add device id to cuda requests The current interface supports multiple device ids. To check if a cuda memory request is a valid one, meaning that a gpu is detected, a device id is needed to do the check for the available ones if any. --- proto/inference.proto | 6 +- .../test_grpc/test_inference_servicer.py | 66 ++++++++++++++++--- tiktorch/proto/inference_pb2.py | 54 +++++++++------ tiktorch/rpc/mp.py | 2 + tiktorch/server/grpc/inference_servicer.py | 12 ++-- tiktorch/server/session/process.py | 6 +- 6 files changed, 109 insertions(+), 37 deletions(-) diff --git a/proto/inference.proto b/proto/inference.proto index e4d66917..fa026b89 100644 --- a/proto/inference.proto +++ b/proto/inference.proto @@ -102,8 +102,9 @@ message Tensor { message IsCudaOutOfMemoryRequest { string modelSessionId = 1; - string tensorId = 3; - NamedInts shape = 2; + string tensorId = 2; + NamedInts shape = 3; + string deviceId = 4; } message IsCudaOutOfMemoryResponse { @@ -116,6 +117,7 @@ message MaxCudaMemoryShapeRequest { NamedInts stepShape = 3; NamedInts minShape = 4; NamedInts maxShape = 5; + string deviceId = 6; } message MaxCudaMemoryShapeResponse { diff --git a/tests/test_server/test_grpc/test_inference_servicer.py b/tests/test_server/test_grpc/test_inference_servicer.py index 788f24c9..3349c27f 100644 --- a/tests/test_server/test_grpc/test_inference_servicer.py +++ b/tests/test_server/test_grpc/test_inference_servicer.py @@ -36,9 +36,9 @@ def grpc_stub_cls(grpc_channel): return inference_pb2_grpc.InferenceStub -@pytest.fixture -def inference_servicer_gpu(): - with patch.object(InferenceServicer, "_is_gpu", lambda x: True): +@pytest.fixture() +def gpu_exists(): + with patch.object(InferenceServicer, "_check_gpu_exists", lambda *args: None): yield @@ -260,7 +260,7 @@ def to_pb_namedInts(self, shape: Tuple[int, ...]) -> inference_pb2.NamedInts: ) def test_max_cuda_memory( self, - inference_servicer_gpu, + gpu_exists, min_shape, max_shape, step_shape, @@ -275,15 +275,54 @@ def test_max_cuda_memory( model = grpc_stub.CreateModelSession(valid_model_request(bioimageio_dummy_cuda_out_of_memory_model_bytes)) res = grpc_stub.MaxCudaMemoryShape( inference_pb2.MaxCudaMemoryShapeRequest( - modelSessionId=model.id, tensorId="input", minShape=min_shape, maxShape=max_shape, stepShape=step_shape + modelSessionId=model.id, + tensorId="input", + deviceId="cuda:0", + minShape=min_shape, + maxShape=max_shape, + stepShape=step_shape, ) ) grpc_stub.CloseModelSession(model) assert res.maxShape == self.to_pb_namedInts(expected) - def test_max_cuda_memory_not_found( - self, inference_servicer_gpu, grpc_stub, bioimageio_dummy_cuda_out_of_memory_model_bytes + @pytest.mark.parametrize( + "min_shape, max_shape, step_shape, description", + [ + ((1, 1, 6, 6), (1, 1, 5, 5), (0, 0, 1, 1), "Max shape [1 1 5 5] smaller than min shape [1 1 6 6]"), + ((1, 1, 5, 5), (1, 1, 6, 6), (0, 0, 2, 1), "Invalid parameterized shape"), + ], + ) + def test_max_cuda_memory_invalid_request( + self, + description, + gpu_exists, + min_shape, + max_shape, + step_shape, + grpc_stub, + bioimageio_dummy_cuda_out_of_memory_model_bytes, ): + min_shape = self.to_pb_namedInts(min_shape) + max_shape = self.to_pb_namedInts(max_shape) + step_shape = self.to_pb_namedInts(step_shape) + + model = grpc_stub.CreateModelSession(valid_model_request(bioimageio_dummy_cuda_out_of_memory_model_bytes)) + with pytest.raises(grpc.RpcError) as error: + grpc_stub.MaxCudaMemoryShape( + inference_pb2.MaxCudaMemoryShapeRequest( + modelSessionId=model.id, + tensorId="input", + deviceId="cuda:0", + minShape=min_shape, + maxShape=max_shape, + stepShape=step_shape, + ) + ) + assert error.value.details().startswith(f"Exception calling application: {description}") + grpc_stub.CloseModelSession(model) + + def test_max_cuda_memory_not_found(self, gpu_exists, grpc_stub, bioimageio_dummy_cuda_out_of_memory_model_bytes): model = grpc_stub.CreateModelSession(valid_model_request(bioimageio_dummy_cuda_out_of_memory_model_bytes)) min_shape = self.to_pb_namedInts((1, 1, 11, 11)) max_shape = self.to_pb_namedInts((1, 1, 12, 12)) @@ -291,7 +330,12 @@ def test_max_cuda_memory_not_found( with pytest.raises(grpc.RpcError) as error: grpc_stub.MaxCudaMemoryShape( inference_pb2.MaxCudaMemoryShapeRequest( - modelSessionId=model.id, tensorId="input", minShape=min_shape, maxShape=max_shape, stepShape=step + modelSessionId=model.id, + tensorId="input", + deviceId="cuda:0", + minShape=min_shape, + maxShape=max_shape, + stepShape=step, ) ) assert error.value.code() == grpc.StatusCode.NOT_FOUND @@ -303,12 +347,14 @@ def test_max_cuda_memory_not_found( [((1, 1, 10, 10), False), ((1, 1, 99, 99), True)], ) def test_is_out_of_memory( - self, inference_servicer_gpu, shape, expected, grpc_stub, bioimageio_dummy_cuda_out_of_memory_model_bytes + self, gpu_exists, shape, expected, grpc_stub, bioimageio_dummy_cuda_out_of_memory_model_bytes ): model = grpc_stub.CreateModelSession(valid_model_request(bioimageio_dummy_cuda_out_of_memory_model_bytes)) shape = self.to_pb_namedInts(shape) res = grpc_stub.IsCudaOutOfMemory( - inference_pb2.IsCudaOutOfMemoryRequest(modelSessionId=model.id, tensorId="input", shape=shape) + inference_pb2.IsCudaOutOfMemoryRequest( + modelSessionId=model.id, tensorId="input", deviceId="cuda:0", shape=shape + ) ) grpc_stub.CloseModelSession(model) assert res.isCudaOutOfMemory is expected diff --git a/tiktorch/proto/inference_pb2.py b/tiktorch/proto/inference_pb2.py index 92a0fb0b..5cbdd161 100644 --- a/tiktorch/proto/inference_pb2.py +++ b/tiktorch/proto/inference_pb2.py @@ -20,7 +20,7 @@ syntax='proto3', serialized_options=None, create_key=_descriptor._internal_create_key, - serialized_pb=b'\n\x0finference.proto\"Y\n\x06\x44\x65vice\x12\n\n\x02id\x18\x01 \x01(\t\x12\x1e\n\x06status\x18\x02 \x01(\x0e\x32\x0e.Device.Status\"#\n\x06Status\x12\r\n\tAVAILABLE\x10\x00\x12\n\n\x06IN_USE\x10\x01\"W\n\x1f\x43reateDatasetDescriptionRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x0c\n\x04mean\x18\x03 \x01(\x01\x12\x0e\n\x06stddev\x18\x04 \x01(\x01\" \n\x12\x44\x61tasetDescription\x12\n\n\x02id\x18\x01 \x01(\t\"\'\n\x04\x42lob\x12\x0e\n\x06\x66ormat\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\x0c\"i\n\x19\x43reateModelSessionRequest\x12\x13\n\tmodel_uri\x18\x01 \x01(\tH\x00\x12\x1b\n\nmodel_blob\x18\x02 \x01(\x0b\x32\x05.BlobH\x00\x12\x11\n\tdeviceIds\x18\x05 \x03(\tB\x07\n\x05model\")\n\tNamedInts\x12\x1c\n\tnamedInts\x18\x01 \x03(\x0b\x32\t.NamedInt\"/\n\x0bNamedFloats\x12 \n\x0bnamedFloats\x18\x01 \x03(\x0b\x32\x0b.NamedFloat\"\x1a\n\x0cModelSession\x12\n\n\x02id\x18\x01 \x01(\t\"\x9e\x01\n\x08LogEntry\x12\x11\n\ttimestamp\x18\x01 \x01(\r\x12\x1e\n\x05level\x18\x02 \x01(\x0e\x32\x0f.LogEntry.Level\x12\x0f\n\x07\x63ontent\x18\x03 \x01(\t\"N\n\x05Level\x12\n\n\x06NOTSET\x10\x00\x12\t\n\x05\x44\x45\x42UG\x10\x01\x12\x08\n\x04INFO\x10\x02\x12\x0b\n\x07WARNING\x10\x03\x12\t\n\x05\x45RROR\x10\x04\x12\x0c\n\x08\x43RITICAL\x10\x05\"#\n\x07\x44\x65vices\x12\x18\n\x07\x64\x65vices\x18\x01 \x03(\x0b\x32\x07.Device\"&\n\x08NamedInt\x12\x0c\n\x04size\x18\x01 \x01(\r\x12\x0c\n\x04name\x18\x02 \x01(\t\"(\n\nNamedFloat\x12\x0c\n\x04size\x18\x01 \x01(\x02\x12\x0c\n\x04name\x18\x02 \x01(\t\"S\n\x06Tensor\x12\x0e\n\x06\x62uffer\x18\x01 \x01(\x0c\x12\r\n\x05\x64type\x18\x02 \x01(\t\x12\x10\n\x08tensorId\x18\x03 \x01(\t\x12\x18\n\x05shape\x18\x04 \x03(\x0b\x32\t.NamedInt\"_\n\x18IsCudaOutOfMemoryRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x10\n\x08tensorId\x18\x03 \x01(\t\x12\x19\n\x05shape\x18\x02 \x01(\x0b\x32\n.NamedInts\"6\n\x19IsCudaOutOfMemoryResponse\x12\x19\n\x11isCudaOutOfMemory\x18\x01 \x01(\x08\"\xa0\x01\n\x19MaxCudaMemoryShapeRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x10\n\x08tensorId\x18\x02 \x01(\t\x12\x1d\n\tstepShape\x18\x03 \x01(\x0b\x32\n.NamedInts\x12\x1c\n\x08minShape\x18\x04 \x01(\x0b\x32\n.NamedInts\x12\x1c\n\x08maxShape\x18\x05 \x01(\x0b\x32\n.NamedInts\":\n\x1aMaxCudaMemoryShapeResponse\x12\x1c\n\x08maxShape\x18\x01 \x01(\x0b\x32\n.NamedInts\"U\n\x0ePredictRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x11\n\tdatasetId\x18\x02 \x01(\t\x12\x18\n\x07tensors\x18\x03 \x03(\x0b\x32\x07.Tensor\"+\n\x0fPredictResponse\x12\x18\n\x07tensors\x18\x01 \x03(\x0b\x32\x07.Tensor\"\x07\n\x05\x45mpty2\xe5\x03\n\tInference\x12\x41\n\x12\x43reateModelSession\x12\x1a.CreateModelSessionRequest\x1a\r.ModelSession\"\x00\x12,\n\x11\x43loseModelSession\x12\r.ModelSession\x1a\x06.Empty\"\x00\x12S\n\x18\x43reateDatasetDescription\x12 .CreateDatasetDescriptionRequest\x1a\x13.DatasetDescription\"\x00\x12 \n\x07GetLogs\x12\x06.Empty\x1a\t.LogEntry\"\x00\x30\x01\x12!\n\x0bListDevices\x12\x06.Empty\x1a\x08.Devices\"\x00\x12.\n\x07Predict\x12\x0f.PredictRequest\x1a\x10.PredictResponse\"\x00\x12L\n\x11IsCudaOutOfMemory\x12\x19.IsCudaOutOfMemoryRequest\x1a\x1a.IsCudaOutOfMemoryResponse\"\x00\x12O\n\x12MaxCudaMemoryShape\x12\x1a.MaxCudaMemoryShapeRequest\x1a\x1b.MaxCudaMemoryShapeResponse\"\x00\x32G\n\rFlightControl\x12\x18\n\x04Ping\x12\x06.Empty\x1a\x06.Empty\"\x00\x12\x1c\n\x08Shutdown\x12\x06.Empty\x1a\x06.Empty\"\x00\x62\x06proto3' + serialized_pb=b'\n\x0finference.proto\"Y\n\x06\x44\x65vice\x12\n\n\x02id\x18\x01 \x01(\t\x12\x1e\n\x06status\x18\x02 \x01(\x0e\x32\x0e.Device.Status\"#\n\x06Status\x12\r\n\tAVAILABLE\x10\x00\x12\n\n\x06IN_USE\x10\x01\"W\n\x1f\x43reateDatasetDescriptionRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x0c\n\x04mean\x18\x03 \x01(\x01\x12\x0e\n\x06stddev\x18\x04 \x01(\x01\" \n\x12\x44\x61tasetDescription\x12\n\n\x02id\x18\x01 \x01(\t\"\'\n\x04\x42lob\x12\x0e\n\x06\x66ormat\x18\x01 \x01(\t\x12\x0f\n\x07\x63ontent\x18\x02 \x01(\x0c\"i\n\x19\x43reateModelSessionRequest\x12\x13\n\tmodel_uri\x18\x01 \x01(\tH\x00\x12\x1b\n\nmodel_blob\x18\x02 \x01(\x0b\x32\x05.BlobH\x00\x12\x11\n\tdeviceIds\x18\x05 \x03(\tB\x07\n\x05model\")\n\tNamedInts\x12\x1c\n\tnamedInts\x18\x01 \x03(\x0b\x32\t.NamedInt\"/\n\x0bNamedFloats\x12 \n\x0bnamedFloats\x18\x01 \x03(\x0b\x32\x0b.NamedFloat\"\x1a\n\x0cModelSession\x12\n\n\x02id\x18\x01 \x01(\t\"\x9e\x01\n\x08LogEntry\x12\x11\n\ttimestamp\x18\x01 \x01(\r\x12\x1e\n\x05level\x18\x02 \x01(\x0e\x32\x0f.LogEntry.Level\x12\x0f\n\x07\x63ontent\x18\x03 \x01(\t\"N\n\x05Level\x12\n\n\x06NOTSET\x10\x00\x12\t\n\x05\x44\x45\x42UG\x10\x01\x12\x08\n\x04INFO\x10\x02\x12\x0b\n\x07WARNING\x10\x03\x12\t\n\x05\x45RROR\x10\x04\x12\x0c\n\x08\x43RITICAL\x10\x05\"#\n\x07\x44\x65vices\x12\x18\n\x07\x64\x65vices\x18\x01 \x03(\x0b\x32\x07.Device\"&\n\x08NamedInt\x12\x0c\n\x04size\x18\x01 \x01(\r\x12\x0c\n\x04name\x18\x02 \x01(\t\"(\n\nNamedFloat\x12\x0c\n\x04size\x18\x01 \x01(\x02\x12\x0c\n\x04name\x18\x02 \x01(\t\"S\n\x06Tensor\x12\x0e\n\x06\x62uffer\x18\x01 \x01(\x0c\x12\r\n\x05\x64type\x18\x02 \x01(\t\x12\x10\n\x08tensorId\x18\x03 \x01(\t\x12\x18\n\x05shape\x18\x04 \x03(\x0b\x32\t.NamedInt\"q\n\x18IsCudaOutOfMemoryRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x10\n\x08tensorId\x18\x02 \x01(\t\x12\x19\n\x05shape\x18\x03 \x01(\x0b\x32\n.NamedInts\x12\x10\n\x08\x64\x65viceId\x18\x04 \x01(\t\"6\n\x19IsCudaOutOfMemoryResponse\x12\x19\n\x11isCudaOutOfMemory\x18\x01 \x01(\x08\"\xb2\x01\n\x19MaxCudaMemoryShapeRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x10\n\x08tensorId\x18\x02 \x01(\t\x12\x1d\n\tstepShape\x18\x03 \x01(\x0b\x32\n.NamedInts\x12\x1c\n\x08minShape\x18\x04 \x01(\x0b\x32\n.NamedInts\x12\x1c\n\x08maxShape\x18\x05 \x01(\x0b\x32\n.NamedInts\x12\x10\n\x08\x64\x65viceId\x18\x06 \x01(\t\":\n\x1aMaxCudaMemoryShapeResponse\x12\x1c\n\x08maxShape\x18\x01 \x01(\x0b\x32\n.NamedInts\"U\n\x0ePredictRequest\x12\x16\n\x0emodelSessionId\x18\x01 \x01(\t\x12\x11\n\tdatasetId\x18\x02 \x01(\t\x12\x18\n\x07tensors\x18\x03 \x03(\x0b\x32\x07.Tensor\"+\n\x0fPredictResponse\x12\x18\n\x07tensors\x18\x01 \x03(\x0b\x32\x07.Tensor\"\x07\n\x05\x45mpty2\xe5\x03\n\tInference\x12\x41\n\x12\x43reateModelSession\x12\x1a.CreateModelSessionRequest\x1a\r.ModelSession\"\x00\x12,\n\x11\x43loseModelSession\x12\r.ModelSession\x1a\x06.Empty\"\x00\x12S\n\x18\x43reateDatasetDescription\x12 .CreateDatasetDescriptionRequest\x1a\x13.DatasetDescription\"\x00\x12 \n\x07GetLogs\x12\x06.Empty\x1a\t.LogEntry\"\x00\x30\x01\x12!\n\x0bListDevices\x12\x06.Empty\x1a\x08.Devices\"\x00\x12.\n\x07Predict\x12\x0f.PredictRequest\x1a\x10.PredictResponse\"\x00\x12L\n\x11IsCudaOutOfMemory\x12\x19.IsCudaOutOfMemoryRequest\x1a\x1a.IsCudaOutOfMemoryResponse\"\x00\x12O\n\x12MaxCudaMemoryShape\x12\x1a.MaxCudaMemoryShapeRequest\x1a\x1b.MaxCudaMemoryShapeResponse\"\x00\x32G\n\rFlightControl\x12\x18\n\x04Ping\x12\x06.Empty\x1a\x06.Empty\"\x00\x12\x1c\n\x08Shutdown\x12\x06.Empty\x1a\x06.Empty\"\x00\x62\x06proto3' ) @@ -627,18 +627,25 @@ serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), _descriptor.FieldDescriptor( name='tensorId', full_name='IsCudaOutOfMemoryRequest.tensorId', index=1, - number=3, type=9, cpp_type=9, label=1, + number=2, type=9, cpp_type=9, label=1, has_default_value=False, default_value=b"".decode('utf-8'), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), _descriptor.FieldDescriptor( name='shape', full_name='IsCudaOutOfMemoryRequest.shape', index=2, - number=2, type=11, cpp_type=10, label=1, + number=3, type=11, cpp_type=10, label=1, has_default_value=False, default_value=None, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='deviceId', full_name='IsCudaOutOfMemoryRequest.deviceId', index=3, + number=4, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), ], extensions=[ ], @@ -652,7 +659,7 @@ oneofs=[ ], serialized_start=866, - serialized_end=961, + serialized_end=979, ) @@ -683,8 +690,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=963, - serialized_end=1017, + serialized_start=981, + serialized_end=1035, ) @@ -731,6 +738,13 @@ message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), + _descriptor.FieldDescriptor( + name='deviceId', full_name='MaxCudaMemoryShapeRequest.deviceId', index=5, + number=6, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR, create_key=_descriptor._internal_create_key), ], extensions=[ ], @@ -743,8 +757,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1020, - serialized_end=1180, + serialized_start=1038, + serialized_end=1216, ) @@ -775,8 +789,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1182, - serialized_end=1240, + serialized_start=1218, + serialized_end=1276, ) @@ -821,8 +835,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1242, - serialized_end=1327, + serialized_start=1278, + serialized_end=1363, ) @@ -853,8 +867,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1329, - serialized_end=1372, + serialized_start=1365, + serialized_end=1408, ) @@ -878,8 +892,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1374, - serialized_end=1381, + serialized_start=1410, + serialized_end=1417, ) _DEVICE.fields_by_name['status'].enum_type = _DEVICE_STATUS @@ -1075,8 +1089,8 @@ index=0, serialized_options=None, create_key=_descriptor._internal_create_key, - serialized_start=1384, - serialized_end=1869, + serialized_start=1420, + serialized_end=1905, methods=[ _descriptor.MethodDescriptor( name='CreateModelSession', @@ -1171,8 +1185,8 @@ index=1, serialized_options=None, create_key=_descriptor._internal_create_key, - serialized_start=1871, - serialized_end=1942, + serialized_start=1907, + serialized_end=1978, methods=[ _descriptor.MethodDescriptor( name='Ping', diff --git a/tiktorch/rpc/mp.py b/tiktorch/rpc/mp.py index 0cd69bf4..b7e448da 100644 --- a/tiktorch/rpc/mp.py +++ b/tiktorch/rpc/mp.py @@ -112,9 +112,11 @@ class _Api: @dataclasses.dataclass(frozen=True) class BioModelClient: + name: str api: IRPCModelSession input_specs: List[nodes.InputTensor] output_specs: List[nodes.OutputTensor] + devices: List[str] class MPClient: diff --git a/tiktorch/server/grpc/inference_servicer.py b/tiktorch/server/grpc/inference_servicer.py index 54fa7e0c..b17b7d67 100644 --- a/tiktorch/server/grpc/inference_servicer.py +++ b/tiktorch/server/grpc/inference_servicer.py @@ -98,6 +98,7 @@ def MaxCudaMemoryShape( self, request: inference_pb2.MaxCudaMemoryShapeRequest, context ) -> inference_pb2.MaxCudaMemoryShapeResponse: session = self._getModelSession(context, request.modelSessionId) + self._check_gpu_exists(session.bio_model_client, request.deviceId) min_shape = pb_NamedInts_to_named_shape(request.minShape) step_shape = pb_NamedInts_to_named_shape(request.stepShape) max_shape = pb_NamedInts_to_named_shape(request.maxShape) @@ -115,6 +116,7 @@ def IsCudaOutOfMemory( self, request: inference_pb2.IsCudaOutOfMemoryRequest, context ) -> inference_pb2.IsCudaOutOfMemoryResponse: session = self._getModelSession(context, request.modelSessionId) + self._check_gpu_exists(session.bio_model_client, request.deviceId) return inference_pb2.IsCudaOutOfMemoryResponse( isCudaOutOfMemory=self._is_cuda_out_of_memory( session.bio_model_client, request.tensorId, pb_NamedInts_to_named_shape(request.shape) @@ -145,8 +147,6 @@ def _get_max_shape( return None def _is_cuda_out_of_memory(self, client: BioModelClient, tensor_id: str, shape: NamedShape) -> bool: - if not self._is_gpu(): - return False is_out_of_memory = False dummy_tensor = xarray.DataArray(np.random.rand(*shape.values()), dims=shape.keys()) sample = Sample.from_xr_tensors(tensor_ids=[tensor_id], tensors_data=[dummy_tensor]) @@ -168,8 +168,12 @@ def _validated_forward(self, client: BioModelClient, sample: Sample): validator.check_tensors(sample) return client.api.forward(sample) - def _is_gpu(self) -> bool: - return torch.cuda.is_available() + def _check_gpu_exists(self, client: BioModelClient, device_id: str): + gpu_device_ids = [device.id for device in self.__device_pool.list_devices() if device.id.startswith("cuda")] + if len(gpu_device_ids) == 0: + raise ValueError("Not available gpus found") + if device_id not in client.devices: + raise ValueError(f"{device_id} not found for model {client.name}") def _getModelSession(self, context, modelSessionId: str) -> Session: if not modelSessionId: diff --git a/tiktorch/server/session/process.py b/tiktorch/server/session/process.py index 2c3cedf1..d898f97a 100644 --- a/tiktorch/server/session/process.py +++ b/tiktorch/server/session/process.py @@ -76,7 +76,11 @@ def start_model_session_process( proc.start() api = _mp_rpc.create_client_api(iface_cls=IRPCModelSession, conn=client_conn) return proc, BioModelClient( - input_specs=prediction_pipeline.input_specs, output_specs=prediction_pipeline.output_specs, api=api + name=prediction_pipeline.name, + devices=devices, + input_specs=prediction_pipeline.input_specs, + output_specs=prediction_pipeline.output_specs, + api=api, )