api.proto

// Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//  * Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
//  * Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
//  * Neither the name of NVIDIA CORPORATION nor the names of its
//    contributors may be used to endorse or promote products derived
//    from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

syntax = "proto3";

package nvidia.inferenceserver;

//@@.. cpp:namespace:: nvidia::inferenceserver

//@@.. cpp:var:: message InferSharedMemory
//@@
//@@   The meta-data for the shared memory from which to read the input
//@@   data and/or write the output data.
//@@
message InferSharedMemory
{
  //@@  .. cpp:var:: string name
  //@@
  //@@     The name given during registration of a shared memory region that
  //@@     holds the input data (or where the output data should be written).
  //@@
  string name = 1;

  //@@  .. cpp:var:: uint64 offset
  //@@
  //@@     The offset from the start of the shared memory region.
  //@@     start = offset, end = offset + size;
  //@@
  uint64 offset = 2;

  //@@  .. cpp:var:: uint64 byte_size
  //@@
  //@@     Size of the memory block, in bytes.
  //@@
  uint64 byte_size = 3;
}

//@@
//@@.. cpp:var:: message InferRequestHeader
//@@
//@@   Meta-data for an inferencing request. The actual input data is
//@@   delivered separate from this header, in the HTTP body for an HTTP
//@@   request, or in the :cpp:var:`InferRequest` message for a gRPC request.
//@@
message InferRequestHeader
{
  //@@  .. cpp:enum:: Flag
  //@@
  //@@     Flags that can be associated with an inference request.
  //@@     All flags are packed bitwise into the 'flags' field and
  //@@     so the value of each must be a power-of-2.
  //@@
  enum Flag {
    //@@    .. cpp:enumerator:: Flag::FLAG_NONE = 0
    //@@
    //@@       Value indicating no flags are enabled.
    //@@
    FLAG_NONE = 0;

    //@@    .. cpp:enumerator:: Flag::FLAG_SEQUENCE_START = 1 << 0
    //@@
    //@@       This request is the start of a related sequence of requests.
    //@@
    FLAG_SEQUENCE_START = 1;

    //@@    .. cpp:enumerator:: Flag::FLAG_SEQUENCE_END = 1 << 1
    //@@
    //@@       This request is the end of a related sequence of requests.
    //@@
    FLAG_SEQUENCE_END = 2;
  }

  //@@  .. cpp:var:: message Input
  //@@
  //@@     Meta-data for an input tensor provided as part of an inferencing
  //@@     request.
  //@@
  message Input
  {
    //@@    .. cpp:var:: string name
    //@@
    //@@       The name of the input tensor.
    //@@
    string name = 1;

    //@@    .. cpp:var:: int64 dims (repeated)
    //@@
    //@@       The shape of the input tensor, not including the batch dimension.
    //@@       Optional if the model configuration for this input explicitly
    //@@       specifies all dimensions of the shape. Required if the model
    //@@       configuration for this input has any wildcard dimensions (-1).
    //@@
    repeated int64 dims = 2;

    //@@    .. cpp:var:: uint64 batch_byte_size
    //@@
    //@@       The size of the full batch of the input tensor, in bytes.
    //@@       Optional for tensors with fixed-sized datatypes. Required
    //@@       for tensors with a non-fixed-size datatype (like STRING).
    //@@
    uint64 batch_byte_size = 3;

    //@@    .. cpp:var:: InferSharedMemory shared_memory
    //@@
    //@@       It is the location in shared memory that contains the tensor data
    //@@       for this input. Using shared memory is optional but if this
    //@@       message is used, all fields are required.
    //@@
    InferSharedMemory shared_memory = 4;
  }

  //@@  .. cpp:var:: message Output
  //@@
  //@@     Meta-data for a requested output tensor as part of an inferencing
  //@@     request.
  //@@
  message Output
  {
    //@@    .. cpp:var:: string name
    //@@
    //@@       The name of the output tensor.
    //@@
    string name = 1;

    //@@    .. cpp:var:: message Class
    //@@
    //@@       Options for an output returned as a classification.
    //@@
    message Class
    {
      //@@      .. cpp:var:: uint32 count
      //@@
      //@@         Indicates how many classification values should be returned
      //@@         for the output. The 'count' highest priority values are
      //@@         returned.
      //@@
      uint32 count = 1;
    }

    //@@    .. cpp:var:: Class cls
    //@@
    //@@       Optional. If defined return this output as a classification
    //@@       instead of raw data. The output tensor will be interpreted as
    //@@       probabilities and the classifications associated with the
    //@@       highest probabilities will be returned.
    //@@
    Class cls = 3;

    //@@    .. cpp:var:: InferSharedMemory shared_memory
    //@@
    //@@       It is the location in shared memory that the result tensor data
    //@@       for this output will be written. Using shared memory is optional
    //@@       but if this message is used, all fields are required.
    //@@
    InferSharedMemory shared_memory = 4;
  }

  //@@  .. cpp:var:: uint64 id
  //@@
  //@@     The ID of the inference request. The response of the request will
  //@@     have the same ID in InferResponseHeader. The request sender can use
  //@@     the ID to correlate the response to corresponding request if needed.
  //@@
  uint64 id = 5;

  //@@  .. cpp:var:: uint32 flags
  //@@
  //@@     The flags associated with this request. This field holds a bitwise-or
  //@@     of all flag values.
  //@@
  uint32 flags = 6;

  //@@  .. cpp:var:: uint64 correlation_id
  //@@
  //@@     The correlation ID of the inference request. Default is 0, which
  //@@     indictes that the request has no correlation ID. The correlation ID
  //@@     is used to indicate two or more inference request are related to
  //@@     each other. How this relationship is handled by the inference
  //@@     server is determined by the model's scheduling policy.
  //@@
  uint64 correlation_id = 4;

  //@@  .. cpp:var:: uint32 batch_size
  //@@
  //@@     The batch size of the inference request. This must be >= 1. For
  //@@     models that don't support batching, batch_size must be 1.
  //@@
  uint32 batch_size = 1;

  //@@  .. cpp:var:: Input input (repeated)
  //@@
  //@@     The input meta-data for the inputs provided with the the inference
  //@@     request.
  //@@
  repeated Input input = 2;

  //@@  .. cpp:var:: Output output (repeated)
  //@@
  //@@     The output meta-data for the inputs provided with the the inference
  //@@     request.
  //@@
  repeated Output output = 3;
}

//@@
//@@.. cpp:var:: message InferResponseHeader
//@@
//@@   Meta-data for the response to an inferencing request. The actual output
//@@   data is delivered separate from this header, in the HTTP body for an HTTP
//@@   request, or in the :cpp:var:`InferResponse` message for a gRPC request.
//@@
message InferResponseHeader
{
  //@@  .. cpp:var:: message Output
  //@@
  //@@     Meta-data for an output tensor requested as part of an inferencing
  //@@     request.
  //@@
  message Output
  {
    //@@    .. cpp:var:: string name
    //@@
    //@@       The name of the output tensor.
    //@@
    string name = 1;

    //@@    .. cpp:var:: message Raw
    //@@
    //@@       Meta-data for an output tensor being returned as raw data.
    //@@
    message Raw
    {
      //@@      .. cpp:var:: int64 dims (repeated)
      //@@
      //@@         The shape of the output tensor, not including the batch
      //@@         dimension.
      //@@
      repeated int64 dims = 1;

      //@@      .. cpp:var:: uint64 batch_byte_size
      //@@
      //@@         The full size of the output tensor, in bytes. For a
      //@@         batch output, this is the size of the entire batch.
      //@@
      uint64 batch_byte_size = 2;
    }

    //@@    .. cpp:var:: message Class
    //@@
    //@@       Information about each classification for this output.
    //@@
    message Class
    {
      //@@      .. cpp:var:: int32 idx
      //@@
      //@@         The classification index.
      //@@
      int32 idx = 1;

      //@@      .. cpp:var:: float value
      //@@
      //@@         The classification value as a float (typically a
      //@@         probability).
      //@@
      float value = 2;

      //@@      .. cpp:var:: string label
      //@@
      //@@         The label for the class (optional, only available if provided
      //@@         by the model).
      //@@
      string label = 3;
    }

    //@@    .. cpp:var:: message Classes
    //@@
    //@@       Meta-data for an output tensor being returned as classifications.
    //@@
    message Classes
    {
      //@@      .. cpp:var:: Class cls (repeated)
      //@@
      //@@         The topk classes for this output.
      //@@
      repeated Class cls = 1;
    }

    //@@    .. cpp:var:: Raw raw
    //@@
    //@@       If specified deliver results for this output as raw tensor data.
    //@@       The actual output data is delivered in the HTTP body for an HTTP
    //@@       request, or in the :cpp:var:`InferResponse` message for a gRPC
    //@@       request. Only one of 'raw' and 'batch_classes' may be specified.
    //@@
    Raw raw = 2;

    //@@    .. cpp:var:: Classes batch_classes (repeated)
    //@@
    //@@       If specified deliver results for this output as classifications.
    //@@       There is one :cpp:var:`Classes` object for each batch entry in
    //@@       the output. Only one of 'raw' and 'batch_classes' may be
    //@@       specified.
    //@@
    repeated Classes batch_classes = 3;
  }

  //@@  .. cpp:var:: uint64 id
  //@@
  //@@     The ID of the inference response. The response will have the same ID
  //@@     as the ID of its originated request. The request sender can use
  //@@     the ID to correlate the response to corresponding request if needed.
  //@@
  uint64 id = 5;

  //@@  .. cpp:var:: string model_name
  //@@
  //@@     The name of the model that produced the outputs.
  //@@
  string model_name = 1;

  //@@  .. cpp:var:: int64 model_version
  //@@
  //@@     The version of the model that produced the outputs.
  //@@
  int64 model_version = 2;

  //@@  .. cpp:var:: uint32 batch_size
  //@@
  //@@     The batch size of the outputs. This will always be equal to the
  //@@     batch size of the inputs. For models that don't support
  //@@     batching the batch_size will be 1.
  //@@
  uint32 batch_size = 3;

  //@@  .. cpp:var:: Output output (repeated)
  //@@
  //@@     The outputs, in the same order as they were requested in
  //@@     :cpp:var:`InferRequestHeader`.
  //@@
  repeated Output output = 4;
}