...

vadimkantorov · Sep 21, 2023 · 7faec4d · 7faec4d
1 parent c2f3ae9
commit 7faec4d
Show file tree

Hide file tree

Showing 6 changed files with 147 additions and 1 deletion.
diff --git a/.github/workflows/buildtritoninferenceserver.yml b/.github/workflows/buildtritoninferenceserver.yml
@@ -0,0 +1,19 @@
+jobs:
+
+  buildtritoninferenceserver:
+    runs-on: ubuntu-22.04
+    steps:
+      - name: Install Prerequisites
+        run:  sudo add-apt-repository ppa:mhier/libboost-latest && sudo apt-get update && apt-get install -y git build-essential rapidjson-dev libssl-dev libre2-dev libb64-dev libarchive-dev libboost1.81-dev
+
+      - name: Clone Triton
+        run: git clone https://github.com/triton-inference-server/server --branch r23.08 --single-branch --depth 1
+
+      - name: Build Triton
+        run: cd server && python ./build.py -v --no-container-build --enable-logging --enable-stats --enable-tracing --build-dir="$PWD/build" --backend python  --extra-core-cmake-arg=TRITON_ENABLE_GRPC=OFF --extra-core-cmake-arg=TRITON_ENABLE_HTTP=ON  --extra-core-cmake-arg=TRITON_ENABLE_ENSEMBLE=ON
+
+      - name: Archive artifacts
+        uses: actions/upload-artifact@v2
+        with:
+          name: tritoninferenceserver
+          path: build/opt/tritonserver/
diff --git a/README.md b/README.md
@@ -1 +1,32 @@
-# tritoninferernceprimer
+##  Example of building and running NVidia Triton Inference server on a CPU-only Docker-less system
+
+Here we have a few example Python models accepting a batch of JSON objects and returning a batch of JSON objects. These models are connected in a pipeline.
+
+```shell
+## https://github.com/triton-inference-server/server/blob/5dd9398dd76a90a117ce6b3052e15561337fe88b/build.py#L1006-L1009
+#sudo add-apt-repository ppa:mhier/libboost-latest
+#sudo apt-get update
+#sudo apt install cmake rapidjson-dev libssl-dev libre2-dev libb64-dev libarchive-dev libboost1.81-dev
+#git clone https://github.com/triton-inference-server/server --branch r23.08 --single-branch --depth 1
+#pushd server
+#python3 ./build.py -v --no-container-build --enable-logging --enable-stats --enable-tracing --build-dir="$PWD/build" --backend python  --extra-core-cmake-arg=TRITON_ENABLE_GRPC=OFF --extra-core-cmake-arg=TRITON_ENABLE_HTTP=ON  --extra-core-cmake-arg=TRITON_ENABLE_ENSEMBLE=ON
+#export PATH=$PWD/server/build/opt/tritonserver/bin/:$PATH
+#sudo ln -s $PWD/build/install/tritonserver /opt
+#popd
+
+tritonserver --model-repository $PWD/models
+
+curl -i http://localhost:8000/v2/health/ready
+# HTTP/1.1 200 OK
+
+curl -i -X POST localhost:8000/v2/models/modelA/infer -H 'Inference-Header-Content-Length: 138' -H "Content-Type: application/octet-stream" --data-binary '{"inputs":[{"name":"INPUT0","shape":[5],"datatype":"UINT8","parameters":{"binary_data_size":5}}],"parameters":{"binary_data_output":true}}hello'
+
+curl -i -X POST localhost:8000/v2/models/modelB/infer -H 'Inference-Header-Content-Length: 138' -H "Content-Type: application/octet-stream" --data-binary '{"inputs":[{"name":"INPUT0","shape":[5],"datatype":"UINT8","parameters":{"binary_data_size":5}}],"parameters":{"binary_data_output":true}}hello'
+
+```
+
+## References
+- https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/build.md#cpu-only-build
+- https://github.com/triton-inference-server/python_backend
+- https://github.com/triton-inference-server/python_backend/tree/main/examples/preprocessing
+- https://github.com/triton-inference-server/python_backend/tree/main/examples/auto_complete
diff --git a/models/modelA/1/model.py b/models/modelA/1/model.py
@@ -0,0 +1,26 @@
+import json
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        auto_complete_model_config.add_input( {"name": "INPUT0",  "data_type": "TYPE_UINT8", "dims": [-1]})
+        auto_complete_model_config.add_output({"name": "OUTPUT0", "data_type": "TYPE_UINT8", "dims": [-1]})
+        auto_complete_model_config.set_max_batch_size(0)
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        responses = []
+        for request in requests:
+            in_numpy = pb_utils.get_input_tensor_by_name(request, "INPUT0").as_numpy()
+            in_str = str(bytes(in_numpy), 'utf8')
+            #in_obj = json.loads(in_str)
+            #out_obj = in_obj.copy()
+            #out_obj['foo'] = 'modelA: ' + out_obj['foo']
+            #out_str = json.dumps(out_obj)
+            out_str = 'modelA:' + in_str
+            out_numpy = np.frombuffer(bytes(out_str, 'utf8'), dtype = np.uint8)
+            out_pb = pb_utils.Tensor("OUTPUT0", out_numpy)
+            responses.append(pb_utils.InferenceResponse(output_tensors = [out_pb]))
+        return responses
diff --git a/models/modelB/1/model.py b/models/modelB/1/model.py
@@ -0,0 +1,22 @@
+import json
+import numpy as np
+import triton_python_backend_utils as pb_utils
+
+class TritonPythonModel:
+    @staticmethod
+    def auto_complete_config(auto_complete_model_config):
+        auto_complete_model_config.add_input( {"name": "INPUT0",  "data_type": "TYPE_UINT8", "dims": [-1]})
+        auto_complete_model_config.add_output({"name": "OUTPUT0", "data_type": "TYPE_UINT8", "dims": [-1]})
+        auto_complete_model_config.set_max_batch_size(0)
+        return auto_complete_model_config
+
+    def execute(self, requests):
+        responses = []
+        for request in requests:
+            in_numpy = pb_utils.get_input_tensor_by_name(request, "INPUT0").as_numpy()
+            in_str = str(bytes(in_numpy), 'utf8')
+            out_str = 'modelB:' + in_str
+            out_numpy = np.frombuffer(bytes(out_str, 'utf8'), dtype = np.uint8)
+            out_pb = pb_utils.Tensor("OUTPUT0", out_numpy)
+            responses.append(pb_utils.InferenceResponse(output_tensors = [out_pb]))
+        return responses
diff --git a/models/pipeline/1/.gitignore b/models/pipeline/1/.gitignore
diff --git a/models/pipeline/config.pbtxt b/models/pipeline/config.pbtxt
@@ -0,0 +1,48 @@
+name: "pipeline"
+max_batch_size: 0
+platform: "ensemble"
+
+input [
+  {
+    name: "INPUT0"
+    data_type: TYPE_UINT8
+    dims: [ -1 ]
+  }
+]
+
+output [
+  {
+    name: "OUTPUT0"
+    data_type: TYPE_UINT8
+    dims: [ -1 ]
+  }
+]
+
+ensemble_scheduling {
+  step [
+    {
+      model_name: "modelA"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "INPUT0"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "modelA_output"
+      }
+    },
+    {
+      model_name: "modelB"
+      model_version: -1
+      input_map {
+        key: "INPUT0"
+        value: "modelA_output"
+      }
+      output_map {
+        key: "OUTPUT0"
+        value: "OUTPUT0"
+      }
+    }
+  ]
+}