executable.fbs

// IDL file for DarwiNN Executable.

namespace platforms.darwinn;


// A new file identifier should only be introduced if a different schema, with
// probably a different root node, is needed. This shall be a very rare case.
file_identifier "DWN1";

enum Description : short {
  // Bundle::Alu::MOVI instruction to load output activation base address.
  BASE_ADDRESS_OUTPUT_ACTIVATION = 0,

  // Bundle::Alu::MOVI instruction to load input activation base address.
  BASE_ADDRESS_INPUT_ACTIVATION = 1,

  // Bundle::Alu::MOVI instruction to load parameter base address.
  BASE_ADDRESS_PARAMETER = 2,

  // Bundle::Alu::MOVI instruction to load scratch buffer base address.
  BASE_ADDRESS_SCRATCH = 3,
}

enum Position : short {
  // Lower 32-bit of 64-bit address.
  LOWER_32BIT = 0,

  // Upper 32-bit of 64-bit address.
  UPPER_32BIT = 1,
}

// Linker metadata. Enums for various special fields in the encoded instruction
// stream that will be populated by the driver at run time.
table Meta {
  // Indicates which base address this metadata is targeting.
  desc:Description;

  // For input/output/scratch, provides batch information.
  // Parameter will not contain batch.
  batch:int;

  // Name of the input/output layer for input/output activations. Parameter and
  // scratch should not have this field.
  name:string;

  // Tells which bit position to update.
  position:Position;
}

// Holds offset information of a field in an instruction bit stream chunk.
table FieldOffset {
  // Linker metadata.
  meta:Meta;

  // Bit offset.
  offset_bit:int;
}

// Holds information for an instruction bitstream chunk.
table InstructionBitstream {
  // Encoded bitstream for a real hardware.
  bitstream:[ubyte];

  // Offset (in bits) of various fields in the instruction bit stream. These
  // fields are filled in by the driver before sending the instruction stream
  // to the hardware.
  field_offsets:[FieldOffset];
}


// Represents interrupt coming through descriptor path.
enum InterruptType : short {
  // Scalar core supports 4 interrupts.
  SCALAR_CORE_INT_0 = 0,
  SCALAR_CORE_INT_1 = 1,
  SCALAR_CORE_INT_2 = 2,
  SCALAR_CORE_INT_3 = 3,
}

// Represents direction of DMA.
enum Direction : short {
  // From host to device.
  INFEED = 0,

  // From device to host.
  OUTFEED = 1,
}

// Holds DMA hint information for DMA descriptors.
table DmaDescriptorHint {
  // Metadata to indicate the DMA descriptor.
  meta:Meta;

  // Since base address is determined at link time, byte offset from base
  // address is recorded here.
  offset_in_bytes:int;

  // Number of bytes to be transferred for this hint.
  size_in_bytes:int;
}

// Holds interrupt hint information.
table InterruptHint {
  type:InterruptType;
}

// Holds Instuction hint information.
table InstructionHint {
  // Instruction chunk. Whole instruction chunk is always transferred.
  instruction_chunk_index:int;
}

// Holds fence hint. Fence enforces that all DMA hints before Fence should be
// processed completely before processing any DMA hints after the Fence.
table FenceHint {
}

// A hint can be any one of the following.
union AnyHint {
  DmaDescriptorHint,
  InstructionHint,
  InterruptHint,
  FenceHint,
}

// Hints deterministic DMA.
table DmaHint {
  any_hint:AnyHint;

  // Direction of DMA.
  direction:Direction;
}

// A complete collection of DMA hints for either input or output.
table DmaHints {
  // Series of hints.
  hints:[DmaHint];

  // True if "hints" cover all the DMAs in the model.
  fully_deterministic:bool;
}

// A group of simple int->int map that helps us to translate a user-visible
// coordinate value to hardware-friendly data layout for the final output
// activation.
//
// Note that this is needed only for 3D output. 1D output, this field will not
// be used and a user is not supposed to use this function.
//
//
// Let's use an example when we have 2x2 tiles and we want to produce 4x5x32
// output tensor (y/x/z order).
//
// In this example, tile0 and tile2 will produce a 2x3x32 tensor and tile1 and
// tile3 will produce a 2x2x32 tensor.
//
// +--------+--------+
// | Tile0  | Tile1  |
// | 2x3x32 | 2x2x32 |
// +--------+--------+
// | Tile2  | Tile3  |
// | 2x3x32 | 2x2x32 |
// +--------+--------+
//
// y_coordinate_to_linear_tile_id_map will be (0, 0, 2, 2), encoding the
// linearized tile ID of the first tile of a row that a target y value will be
// stored.
//
// x_coordinate_to_linear_tile_id_map will be (0, 0, 0, 1, 1), encoding the
// X tile ID of a tile that will hold corresponding x value.
//
// linearized_tile_byte_offset will be (0, 192, 320, 512) encoding the starting
// byte offset of output of each tile when we fully linearize output.
//
// x_coordinate_to_local_byte_offset will be (0, 32, 64, 0, 32) as byte
// offset, encoding byte offset for each local x offset.
//
// y_coordinate_to_local_y_offset will be (0, 1, 0, 1) as y offset for
// y=0 will be 0 in each tile while that for y=1 will be 1.
//
// x_coordinate_to_local_y_row_size will be (3*32, 3*32, 3*32, 2*32, 2*32) as
// each y-row for Tile0/2 is 3*32 bytes and that for Tile1/3 is 2*32 bytes.
table OutputLayout {
  // Holds a map from a tensor Y coordinate value to the linearized ID of the
  // first tile of rows that produces output values for a given Y coordinate.
  y_coordinate_to_linear_tile_id_map:[int];

  // Holds a map for a given x coordinate value to tile ID within a row of
  // tiles.
  x_coordinate_to_linear_tile_id_map:[int];

  // Holds an accumulated offset value for each tile.
  linearized_tile_byte_offset:[int];

  // Holds a map from a tensor x coordinate to local byte offset within each
  // tile.
  x_coordinate_to_local_byte_offset:[int];

  // Holds a map from a tensor y coordinate to local y offset within each tile.
  y_coordinate_to_local_y_offset:[int];

  // Holds a map from a tensor x coordinate to local y row size within each
  // tile.
  x_coordinate_to_local_y_row_size:[int];
}

// Inclusive range of numbers.
struct Range {
  start:int;
  end:int;
}

// Tensor shape
table TensorShape {
  // List of inclusive index range (start, end) of each dimension.
  dimension:[Range];
}

// Tensor layout describes how tensor elements are stored in a linear memory
// space. See details in go/darwinn-output-layout.
table TensorLayout {
  // Tensor shape stored in this layout.
  shape:TensorShape;

  // Distance (in number of elements) between two adjacent elements in each
  // dimension.
  stride:[int];
}

// Represents output tensor shape of each tile. This information will be used
// for re-layout in the host.
table OutputShapeInfo {
  // The final model output is transferred to the host in a list of tensor
  // slices (sub-tensors). A slice is a collection of elements that can be
  // represented as a single tensor shape and tensor layout.
  slice_layout:[TensorLayout];

  // Base offset (in bytes) of the first element in the layout.
  slice_offset:[int];
}

// Numerics-related constant values needed for interpreting output tensor.
table NumericsConstants {
  zero_point:int;
  dequantization_factor:float;
}

//   //depot/google3/api/runtime_version.h:runtime_version,
//   //depot/google3/platforms/darwinn/driver/test_data/backward_compatibility/BUILD:test_cases)

// Layer data type information.
// Note: The DataType enum should be synced with
// platforms/darwinn/model/config/array.proto.
enum DataType : short {
  // Unsigned fixed point (it would be more appropriate to call this an affine
  // value) means there is a scale and zero point associated with this tensor,
  // To transform unsigned fixed-point values to real values:
  //   real_value = (unsigned_fixed-point_value - zero_point) * scale
  FIXED_POINT8 = 0,
  FIXED_POINT16 = 1,
  // SIGNED_FIXED_POINT32 is a signed fixed point but is given an enum value
  // of 2 due to historical reason. Please see the below for documentation of
  // signed fixed-point types.
  SIGNED_FIXED_POINT32 = 2,

  // BFLOAT is Google’s own floating point format, with 8 bit exponent and 8 bit
  // significand (7 bit stored significand).
  BFLOAT = 3,
  // HALF is industry standard IEEE 754-2008 binary16, with 5 bit exponent and
  // 11 bit significand (10 bit stored significand).
  HALF = 4,
  // SINGLE is industry standard IEEE 754-2008 binary32, with 8 bit exponent and
  // 24 bit significant (23 bit stored signficand).
  SINGLE = 5,

  // Signed fixed point data types. Number is stored in two's complement format.
  // There is an associated scale but no zero point. To transform fixed-point
  // values to real values:
  //   real_value = signed_fixedpoint_value * scale
  SIGNED_FIXED_POINT8 = 8,
  SIGNED_FIXED_POINT16 = 9,
}

//   //depot/google3/api/runtime_version.h:runtime_version,
//   //depot/google3/platforms/darwinn/driver/test_data/backward_compatibility/BUILD:test_cases)

// Output layer specific information.
table OutputLayer {
  // Encapsulates information needed to transform a multi-dimensional output
  // tensor to its original YXZ layout. This field must be set for any tensor
  // with x_dim and y_dim more than 1.
  layout:OutputLayout;
  data_type:DataType; // deprecated

  // Output shape information that is streamed from the tiles.
  shape_info:OutputShapeInfo;
}

// Input layer specific information.
table InputLayer {
}

// One of output or input layer.
union AnyLayer {
  OutputLayer,
  InputLayer,
}

// Layer information.
table Layer {
  // Name of the corresponding input/output layer.
  name:string;

  // Size in bytes, including padding. This number is for batch_size=1. The
  // unpadded byte size of a tensor is:
  //   x_dim * y_dim * z_dim * bytes_per_data_type.
  size_bytes:int;

  // Dimension info. All these fields should be set for input and output
  // tensors. ?_dim=1 means we don't have ? dimension. For example, in a single
  // dimensional tensor x_dim=1, y_dim=1, z_dim=N.
  y_dim:int;
  x_dim:int;
  z_dim:int;

  // Numerics constants used for dequantization and quantization.
  numerics:NumericsConstants;

  // For input layer, this is the data type of input, for output layer, this is the data type of output.
  data_type:DataType;

  // Input or Output Layer specific information.
  any_layer:AnyLayer;

  // How many times this layer will get executed per inference. Default is 1.
  // This information will be used to create large enough buffer to host inputs
  // and outputs for layers that will get executed several times per inference.
  execution_count_per_inference:int = 1;

  // If set, the activations on this layer will be cached on TPU DRAM (if DRAM
  // is available and there is enough free space on it).
  cache_on_dram:bool = false;

  // Tensor shape info.
  shape:TensorShape;
}

// Specifies the nature of an executable.
enum ExecutableType : short {
  // Everything needed to run a successful inference is included.
  STAND_ALONE = 0,

  // Only loads parameters into TPU memory. This type of executable should
  // always accompany at least 1 EXECUTION_ONLY executable in the same package.
  PARAMETER_CACHING = 1,

  // This type of executable assumes the parameters are already cached on TPU.
  // This type should always be accompanied by a PARAMETER_CACHING executable in
  // the same package.
  EXECUTION_ONLY = 2,
}

table Executable {
  // Executable format version. Set to 0 for now.
  version:int = 0;

  // Model name.
  name:string;

  // Model protobuf in binary serialized format.
  serialized_model:[ubyte];

  // Batch size. That is the number of inputs that can be simultaneously
  // processed.
  batch_size:int;

  // Size in bytes of the scratch buffer expected for this model.
  // This number is for batch_size=1.
  scratch_size_bytes:int;

  // Encoded instruction bitstreams.
  instruction_bitstreams:[InstructionBitstream];

  // Parameter stream. This field must be guaranteed to be aligned by the code
  // that produces the flat buffer. As of now, executable_converter ensures
  // this.
  parameters:[ubyte];

  // Dma Hints.
  dma_hints:DmaHints;

  // Input layer Information
  input_layers:[Layer];

  // Output layer Information.
  output_layers:[Layer];

  // Chip that the executable was compiled for.
  chip:string;

  // Deprecated.  Use estimated_cycles_64bit below instead.
  estimated_cycles:int;

  // The maximum amount of narrow memory bytes that is guaranteed to be used per
  // tile. All narrow memory used in a tile is guaranteed to be at byte
  // addresses below this value.
  used_narrow_memory_bytes_per_tile:int;

  // Type of this executable. If not specified, runtime assumes STAND_ALONE.
  type:ExecutableType;

  // Parameter-caching executables with the same token can cache their
  // parameters together on the TPU SRAM.
  parameter_caching_token:uint64;

  // If set, parameters in this model will be loaded in the TPU DRAM for higher
  // performance. TPU DRAM is available on some architectures. TPU DRAM is a
  // scarce resource, therefore only selected models can have this option
  // enabled. If this option is enabled and enough TPU DRAM is not available an
  // error is returned at run time.
  use_tpu_dram_for_parameters:bool = false;

  // Estimated runtime in cycles for this model.
  estimated_cycles_64bit:int64;
}

// MultiExecutable encapsulates one or more DarwiNN serialized executables that
// are all part of the same package.
table MultiExecutable {
  serialized_executables:[string];
}

// Serialized package allows individual packages to stay page-aligned
// relative to beginning of the byte array.
table SerializedPackage {
  serialized_package:[ubyte] (nested_flatbuffer: "Package");
}

// The collection of executables, signature and everything else that is needed
// for DarwiNN runtime to run one or more models that are related.
table Package {
  // Minimum runtime version needed to process this package correctly.
  min_runtime_version:int;

  // A serialized MultiExecutable.
  serialized_multi_executable:[ubyte];

  // Signature of serialized_multi_executable.
  signature:[ubyte];

  // The version of this package to identify assumptions on the structure.
  keypair_version:int;

  // Specifies the version of DarwiNN compiler used to create this package.
  compiler_version:string;

  // Chip ID in the virtual cluster to execute these graphs.
  // 0 if this package is compiled to run on a single chip.
  // -1 if this is a multiple-chip package.
  virtual_chip_id:int = 0;

  // Package data for individual chip to execute.
  // Note that the package data is not aligned in package bundle file, but it
  // will be loaded into aligned memory block at model registration.
  // An intermediate table SerializedPackage is needed, for flatbuffer only
  // supports 1-d vector.
  // TODO: Consider creating a new root type for new chips.
  multi_chip_package:[SerializedPackage];

  // A user-specified identifier. This is for limited use of offline compiled
  // models.
  model_identifier:string;
}

root_type Package;

//   //depot/google3/api/runtime_version.h:runtime_version,
//   //depot/google3/platforms/darwinn/driver/test_data/backward_compatibility/BUILD:test_cases)