Skip to content

Commit

Permalink
Improve TessellateIPU debug context naming (compute sets & tensors)
Browse files Browse the repository at this point in the history
This PR improves on a couple of debug naming aspects in TessellateIPU:
* Generating readable name for `tile_map` operations (discarding metadata);
* Proper naming of outputs;

This improves massively the readability of a Popvision profile from a program
using TessellateIPU.
  • Loading branch information
balancap committed Sep 29, 2023
1 parent af0cc1f commit 91b9352
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 25 deletions.
5 changes: 3 additions & 2 deletions tessellate_ipu/lib/tessellate_ipu_ops_jax.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright (c) 2022 Graphcore Ltd. All rights reserved.
#include <nanobind/nanobind.h>

#include <iostream>

#include "ipu_custom_primitive.hpp"
#include "tile_array_ops.hpp"
#include "tile_map_ops.hpp"
Expand Down Expand Up @@ -238,11 +240,10 @@ class TileMapEquationCall : public jax::ipu::PrimitiveInterface {
poplar::Graph& graph, const std::vector<poplar::Tensor>& inputs,
std::vector<poplar::Tensor>& outputs, const std::string& attributes,
const std::string& debug_prefix) {
const auto debug_context = poplar::DebugContext(debug_prefix);
const auto tile_equation =
ipu::from_json_str<ipu::TileMapEquation>(attributes);
return lowerTileMapCallToPoplar(graph, inputs, outputs, tile_equation,
debug_context);
debug_prefix);
}
};

Expand Down
78 changes: 60 additions & 18 deletions tessellate_ipu/lib/tile_map_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,43 @@
#include <iostream>
namespace ipu {

namespace {

/**
* @brief Make a (readable) tile_map call debug prefix.
* Improves PopVision user experience when using TessellateIPU!
*/
std::string makeTileMapCallDebugPrefix(const std::string& raw_debug_prefix,
const std::string& primitive_name) {
const auto format_debug_prefix = [&raw_debug_prefix,
&primitive_name](std::size_t idx) {
// const std::string debug_prefix = raw_debug_prefix.substr(0, idx) +
// "tile_map";
const std::string debug_prefix =
fmt::format("{}{}[{}]", raw_debug_prefix.substr(0, idx), "tile_map",
primitive_name);
return debug_prefix;
};
std::string::size_type idx;
// A bit of ugly string pattern matching to remove the metadata, but keep
// namespace.
idx = raw_debug_prefix.rfind("tile_map_equation_call_single_out[");
if (idx != std::string::npos) {
return format_debug_prefix(idx);
}
idx = raw_debug_prefix.rfind("tile_map_equation_call_multi_out[");
if (idx != std::string::npos) {
return format_debug_prefix(idx);
}
// Failing => just keep the same prefix.
return raw_debug_prefix;
}

} // namespace

std::vector<poplar::Tensor> TileMapEquation::allocateInputTensors(
poplar::Graph& graph, const std::vector<poplar::Tensor>& inputs) const {
poplar::Graph& graph, const std::vector<poplar::Tensor>& inputs,
const poplar::DebugContext& debug_context) const {
FMT_ASSERT(inputs.size() <= inputs_info.size(),
"Inconsistent input vector size.");

Expand All @@ -18,9 +53,9 @@ std::vector<poplar::Tensor> TileMapEquation::allocateInputTensors(
const std::string raw_values = input_info.constant_data.decode();
const auto raw_values_ref =
poplar::ArrayRef<char>(raw_values.data(), raw_values.size());
auto t = createReplicatedConstantTensor(graph, input_info.aval.dtype,
input_info.aval.shape,
raw_values_ref, this->tiles);
auto t = createReplicatedConstantTensor(
graph, input_info.aval.dtype, input_info.aval.shape, raw_values_ref,
this->tiles, {debug_context, input_info.name});
inputs_all.push_back(t);
} else {
// Keep existing input tensor.
Expand All @@ -32,7 +67,8 @@ std::vector<poplar::Tensor> TileMapEquation::allocateInputTensors(
}

std::vector<poplar::Tensor> TileMapEquation::allocateOutputTensors(
poplar::Graph& graph, const std::vector<poplar::Tensor>& inputs) const {
poplar::Graph& graph, const std::vector<poplar::Tensor>& inputs,
const poplar::DebugContext& debug_context) const {
FMT_ASSERT(inputs.size() == inputs_info.size(),
"Inconsistent input vector size.");

Expand All @@ -48,9 +84,9 @@ std::vector<poplar::Tensor> TileMapEquation::allocateOutputTensors(
outputs.push_back(inputs.at(idx));
} else if (outinfo.iotype == VertexIOType::Out) {
// Allocate an output tensor with proper shape.
outputs.push_back(createShardedVariable(graph,
toPoplar(outinfo.aval.dtype),
outinfo.aval.shape, this->tiles));
outputs.push_back(createShardedVariable(
graph, toPoplar(outinfo.aval.dtype), outinfo.aval.shape, this->tiles,
{debug_context, outinfo.name}));
} else {
throw std::runtime_error("Unknown IO type for vertex output tensor.");
}
Expand All @@ -59,26 +95,26 @@ std::vector<poplar::Tensor> TileMapEquation::allocateOutputTensors(
}

std::optional<poplar::Tensor> TileMapEquation::allocateTmpSpaceTensor(
poplar::Graph& graph) const {
poplar::Graph& graph, const poplar::DebugContext& debug_context) const {
if (!useTmpSpace()) {
return std::nullopt;
}
return createShardedVariable(graph, toPoplar(tmp_space_aval.dtype),
{tmp_space_aval.size()}, this->tiles);
{tmp_space_aval.size()}, this->tiles,
{debug_context, "tmp_space"});
}

void TileMapEquation::add(poplar::Graph& graph, poplar::program::Sequence& prog,
const std::vector<poplar::Tensor>& inputs,
const std::vector<poplar::Tensor>& outputs,
const poplar::DebugContext& debug_prefix) const {
const poplar::DebugContext& debug_context) const {
FMT_ASSERT(inputs.size() == inputs_info.size(),
"Inconsistent inputs vector size.");
FMT_ASSERT(outputs.size() == outputs_info.size(),
"Inconsistent outputs vector size.");
poplar::DebugContext debug_context(debug_prefix, this->pname);

// Tensor used for vertex temp. scratch space.
auto tmp_space_tensor_opt = allocateTmpSpaceTensor(graph);
auto tmp_space_tensor_opt = allocateTmpSpaceTensor(graph, debug_context);

poplar::ComputeSet cs = graph.addComputeSet(debug_context);
for (size_t tidx = 0; tidx < tiles.size(); ++tidx) {
Expand Down Expand Up @@ -122,9 +158,10 @@ void TileMapEquation::add(poplar::Graph& graph, poplar::program::Sequence& prog,
std::vector<poplar::Tensor> TileMapEquation::add(
poplar::Graph& graph, poplar::program::Sequence& prog,
const std::vector<poplar::Tensor>& inputs,
const poplar::DebugContext& debug_prefix) const {
const poplar::DebugContext& debug_context) const {
// All input tensors: i.e. add constant tensors.
const auto inputs_all = this->allocateInputTensors(graph, inputs);
const auto inputs_all =
this->allocateInputTensors(graph, inputs, debug_context);

// No vertex => assume identity function.
// Forwarding inputs, with just potential change of shape and dtype.
Expand All @@ -148,8 +185,9 @@ std::vector<poplar::Tensor> TileMapEquation::add(
return outputs_all;
}
// Usual path => map a vertex.
const auto outputs = this->allocateOutputTensors(graph, inputs);
this->add(graph, prog, inputs_all, outputs, debug_prefix);
const auto outputs =
this->allocateOutputTensors(graph, inputs, debug_context);
this->add(graph, prog, inputs_all, outputs, debug_context);
return outputs;
}

Expand All @@ -173,8 +211,12 @@ std::size_t TileMapEquation::numInOuts() const {
poplar::program::Program lowerTileMapCallToPoplar(
poplar::Graph& graph, const std::vector<poplar::Tensor>& inputs,
std::vector<poplar::Tensor>& outputs, const TileMapEquation& tile_map_eqn,
const poplar::DebugContext& debug_context) {
const std::string& raw_debug_prefix) {
auto prog = poplar::program::Sequence();
// Base debug context used in the `tile_map` operation.
const auto debug_prefix =
makeTileMapCallDebugPrefix(raw_debug_prefix, tile_map_eqn.pname);
const auto debug_context = poplar::DebugContext(debug_prefix);
// IPU tiles synchronization before compute set.
if (tile_map_eqn.sync) {
const auto sync_type = poplar::SyncType::INTERNAL;
Expand Down
12 changes: 7 additions & 5 deletions tessellate_ipu/lib/tile_map_ops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,8 @@ struct TileMapEquation {
* @return Collection of input tensors.
*/
std::vector<poplar::Tensor> allocateInputTensors(
poplar::Graph& graph, const std::vector<poplar::Tensor>& inputs) const;
poplar::Graph& graph, const std::vector<poplar::Tensor>& inputs,
const poplar::DebugContext& debug_context) const;

/**
* @brief Allocate output (or use existing input) tensors.
Expand All @@ -277,13 +278,14 @@ struct TileMapEquation {
* @return Collection of output tensors.
*/
std::vector<poplar::Tensor> allocateOutputTensors(
poplar::Graph& graph, const std::vector<poplar::Tensor>& inputs) const;
poplar::Graph& graph, const std::vector<poplar::Tensor>& inputs,
const poplar::DebugContext& debug_context) const;

/**
* @brief Allocate the temporary-scratch space tensor (if used).
*/
std::optional<poplar::Tensor> allocateTmpSpaceTensor(
poplar::Graph& graph) const;
poplar::Graph& graph, const poplar::DebugContext& debug_context) const;

/**
* @brief Add vertex/equation to Poplar graph & compute set.
Expand Down Expand Up @@ -334,12 +336,12 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(TileMapEquation, pname, vname, tiles,
* @param inputs List of inputs.
* @param outputs List of outputs, to update.
* @param tile_map_eqn TileMapEquation info.
* @param debug_context Poplar debug context.
* @param debug_prefix Poplar (raw) debug prefix.
* @return Poplar program.
*/
poplar::program::Program lowerTileMapCallToPoplar(
poplar::Graph& graph, const std::vector<poplar::Tensor>& inputs,
std::vector<poplar::Tensor>& outputs, const TileMapEquation& tile_map_eqn,
const poplar::DebugContext& debug_context);
const std::string& debug_prefix);

} // namespace ipu

0 comments on commit 91b9352

Please sign in to comment.