diff --git a/benchmarking/testserver/models/text-model/model-settings.json b/benchmarking/testserver/models/text-model/model-settings.json deleted file mode 100644 index 12eed8415..000000000 --- a/benchmarking/testserver/models/text-model/model-settings.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "name": "text-model", - - "implementation": "text_model.TextModel", - - "versions": ["text-model/v1.2.3"], - "platform": "mlserver", - "inputs": [ - { - "datatype": "BYTES", - "name": "prompt", - "shape": [1] - } - ], - "outputs": [ - { - "datatype": "BYTES", - "name": "output", - "shape": [1] - } - ] -} diff --git a/benchmarking/testserver/models/text-model/settings.json b/benchmarking/testserver/models/text-model/settings.json deleted file mode 100644 index b5af111b4..000000000 --- a/benchmarking/testserver/models/text-model/settings.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "debug": false, - "parallel_workers": 0, - "gzip_enabled": false, - "metrics_endpoint": null -} diff --git a/benchmarking/testserver/models/text-model/text_model.py b/benchmarking/testserver/models/text-model/text_model.py deleted file mode 100644 index 4475b3c92..000000000 --- a/benchmarking/testserver/models/text-model/text_model.py +++ /dev/null @@ -1,45 +0,0 @@ -import asyncio -from typing import AsyncIterator -from mlserver import MLModel -from mlserver.types import InferenceRequest, InferenceResponse -from mlserver.codecs import StringCodec - - -class TextModel(MLModel): - - async def predict(self, payload: InferenceRequest) -> InferenceResponse: - text = StringCodec.decode_input(payload.inputs[0])[0] - return InferenceResponse( - model_name=self._settings.name, - outputs=[ - StringCodec.encode_output( - name="output", - payload=[text], - use_bytes=True, - ), - ], - ) - - async def predict_stream( - self, payloads: AsyncIterator[InferenceRequest] - ) -> AsyncIterator[InferenceResponse]: - payload = [_ async for _ in payloads][0] - text = StringCodec.decode_input(payload.inputs[0])[0] - words = text.split(" ") - - split_text = [] - for i, word in enumerate(words): - split_text.append(word if i == 0 else " " + word) - - for word in split_text: - await asyncio.sleep(0.5) - yield InferenceResponse( - model_name=self._settings.name, - outputs=[ - StringCodec.encode_output( - name="output", - payload=[word], - use_bytes=True, - ), - ], - )