Skip to content

Commit

Permalink
...
Browse files Browse the repository at this point in the history
  • Loading branch information
vadimkantorov committed Sep 21, 2023
1 parent c2f3ae9 commit 7faec4d
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 1 deletion.
19 changes: 19 additions & 0 deletions .github/workflows/buildtritoninferenceserver.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
jobs:

buildtritoninferenceserver:
runs-on: ubuntu-22.04
steps:
- name: Install Prerequisites
run: sudo add-apt-repository ppa:mhier/libboost-latest && sudo apt-get update && apt-get install -y git build-essential rapidjson-dev libssl-dev libre2-dev libb64-dev libarchive-dev libboost1.81-dev

- name: Clone Triton
run: git clone https://github.com/triton-inference-server/server --branch r23.08 --single-branch --depth 1

- name: Build Triton
run: cd server && python ./build.py -v --no-container-build --enable-logging --enable-stats --enable-tracing --build-dir="$PWD/build" --backend python --extra-core-cmake-arg=TRITON_ENABLE_GRPC=OFF --extra-core-cmake-arg=TRITON_ENABLE_HTTP=ON --extra-core-cmake-arg=TRITON_ENABLE_ENSEMBLE=ON

- name: Archive artifacts
uses: actions/upload-artifact@v2
with:
name: tritoninferenceserver
path: build/opt/tritonserver/
33 changes: 32 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,32 @@
# tritoninferernceprimer
## Example of building and running NVidia Triton Inference server on a CPU-only Docker-less system

Here we have a few example Python models accepting a batch of JSON objects and returning a batch of JSON objects. These models are connected in a pipeline.

```shell
## https://github.com/triton-inference-server/server/blob/5dd9398dd76a90a117ce6b3052e15561337fe88b/build.py#L1006-L1009
#sudo add-apt-repository ppa:mhier/libboost-latest
#sudo apt-get update
#sudo apt install cmake rapidjson-dev libssl-dev libre2-dev libb64-dev libarchive-dev libboost1.81-dev
#git clone https://github.com/triton-inference-server/server --branch r23.08 --single-branch --depth 1
#pushd server
#python3 ./build.py -v --no-container-build --enable-logging --enable-stats --enable-tracing --build-dir="$PWD/build" --backend python --extra-core-cmake-arg=TRITON_ENABLE_GRPC=OFF --extra-core-cmake-arg=TRITON_ENABLE_HTTP=ON --extra-core-cmake-arg=TRITON_ENABLE_ENSEMBLE=ON
#export PATH=$PWD/server/build/opt/tritonserver/bin/:$PATH
#sudo ln -s $PWD/build/install/tritonserver /opt
#popd

tritonserver --model-repository $PWD/models

curl -i http://localhost:8000/v2/health/ready
# HTTP/1.1 200 OK

curl -i -X POST localhost:8000/v2/models/modelA/infer -H 'Inference-Header-Content-Length: 138' -H "Content-Type: application/octet-stream" --data-binary '{"inputs":[{"name":"INPUT0","shape":[5],"datatype":"UINT8","parameters":{"binary_data_size":5}}],"parameters":{"binary_data_output":true}}hello'

curl -i -X POST localhost:8000/v2/models/modelB/infer -H 'Inference-Header-Content-Length: 138' -H "Content-Type: application/octet-stream" --data-binary '{"inputs":[{"name":"INPUT0","shape":[5],"datatype":"UINT8","parameters":{"binary_data_size":5}}],"parameters":{"binary_data_output":true}}hello'

```

## References
- https://github.com/triton-inference-server/server/blob/main/docs/customization_guide/build.md#cpu-only-build
- https://github.com/triton-inference-server/python_backend
- https://github.com/triton-inference-server/python_backend/tree/main/examples/preprocessing
- https://github.com/triton-inference-server/python_backend/tree/main/examples/auto_complete
26 changes: 26 additions & 0 deletions models/modelA/1/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import json
import numpy as np
import triton_python_backend_utils as pb_utils

class TritonPythonModel:
@staticmethod
def auto_complete_config(auto_complete_model_config):
auto_complete_model_config.add_input( {"name": "INPUT0", "data_type": "TYPE_UINT8", "dims": [-1]})
auto_complete_model_config.add_output({"name": "OUTPUT0", "data_type": "TYPE_UINT8", "dims": [-1]})
auto_complete_model_config.set_max_batch_size(0)
return auto_complete_model_config

def execute(self, requests):
responses = []
for request in requests:
in_numpy = pb_utils.get_input_tensor_by_name(request, "INPUT0").as_numpy()
in_str = str(bytes(in_numpy), 'utf8')
#in_obj = json.loads(in_str)
#out_obj = in_obj.copy()
#out_obj['foo'] = 'modelA: ' + out_obj['foo']
#out_str = json.dumps(out_obj)
out_str = 'modelA:' + in_str
out_numpy = np.frombuffer(bytes(out_str, 'utf8'), dtype = np.uint8)
out_pb = pb_utils.Tensor("OUTPUT0", out_numpy)
responses.append(pb_utils.InferenceResponse(output_tensors = [out_pb]))
return responses
22 changes: 22 additions & 0 deletions models/modelB/1/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import json
import numpy as np
import triton_python_backend_utils as pb_utils

class TritonPythonModel:
@staticmethod
def auto_complete_config(auto_complete_model_config):
auto_complete_model_config.add_input( {"name": "INPUT0", "data_type": "TYPE_UINT8", "dims": [-1]})
auto_complete_model_config.add_output({"name": "OUTPUT0", "data_type": "TYPE_UINT8", "dims": [-1]})
auto_complete_model_config.set_max_batch_size(0)
return auto_complete_model_config

def execute(self, requests):
responses = []
for request in requests:
in_numpy = pb_utils.get_input_tensor_by_name(request, "INPUT0").as_numpy()
in_str = str(bytes(in_numpy), 'utf8')
out_str = 'modelB:' + in_str
out_numpy = np.frombuffer(bytes(out_str, 'utf8'), dtype = np.uint8)
out_pb = pb_utils.Tensor("OUTPUT0", out_numpy)
responses.append(pb_utils.InferenceResponse(output_tensors = [out_pb]))
return responses
Empty file added models/pipeline/1/.gitignore
Empty file.
48 changes: 48 additions & 0 deletions models/pipeline/config.pbtxt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: "pipeline"
max_batch_size: 0
platform: "ensemble"

input [
{
name: "INPUT0"
data_type: TYPE_UINT8
dims: [ -1 ]
}
]

output [
{
name: "OUTPUT0"
data_type: TYPE_UINT8
dims: [ -1 ]
}
]

ensemble_scheduling {
step [
{
model_name: "modelA"
model_version: -1
input_map {
key: "INPUT0"
value: "INPUT0"
}
output_map {
key: "OUTPUT0"
value: "modelA_output"
}
},
{
model_name: "modelB"
model_version: -1
input_map {
key: "INPUT0"
value: "modelA_output"
}
output_map {
key: "OUTPUT0"
value: "OUTPUT0"
}
}
]
}

0 comments on commit 7faec4d

Please sign in to comment.