Skip to content

Commit

Permalink
[libshortfin] Implement invocation.
Browse files Browse the repository at this point in the history
  • Loading branch information
stellaraccident committed Aug 28, 2024
1 parent ba58e4d commit 35cd344
Show file tree
Hide file tree
Showing 16 changed files with 501 additions and 234 deletions.
81 changes: 65 additions & 16 deletions libshortfin/bindings/python/array_binding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,23 @@ using namespace shortfin::array;
namespace shortfin::python {

namespace {
static const char DOCSTRING_ARRAY_COPY_FROM[] =
R"(Copy contents from a source array to this array.
Equivalent to `dest_array.storage.copy_from(source_array.storage)`.
)";

static const char DOCSTRING_ARRAY_COPY_TO[] =
R"(Copy contents this array to a destination array.
Equivalent to `dest_array.storage.copy_from(source_array.storage)`.
)";

static const char DOCSTRING_ARRAY_FILL[] = R"(Fill an array with a value.
Equivalent to `array.storage.fill(pattern)`.
)";

static const char DOCSTRING_STORAGE_DATA[] = R"(Access raw binary contents.
Accessing `foo = storage.data` is equivalent to `storage.data.map(read=True)`.
Expand All @@ -28,6 +45,23 @@ As with `map`, this will only work on buffers that are host visible, which
includes all host buffers and device buffers created with the necessary access.
)";

static const char DOCSTRING_STORAGE_COPY_FROM[] =
R"(Copy contents from a source storage to this array.
This operation executes asynchronously and the effect will only be visible
once the execution scope has been synced to the point of mutation.
)";

static const char DOCSTRING_STORAGE_FILL[] = R"(Fill a storage with a value.
Takes as argument any value that can be interpreted as a buffer with the Python
buffer protocol of size 1, 2, or 4 bytes. The storage will be filled uniformly
with the pattern.
This operation executes asynchronously and the effect will only be visible
once the execution scope has been synced to the point of mutation.
)";

static const char DOCSTRING_STORAGE_MAP[] =
R"(Create a mapping of the buffer contents in host memory.
Expand Down Expand Up @@ -104,26 +138,30 @@ void BindArray(py::module_ &m) {
.def_static(
"allocate_host",
[](local::ScopedDevice &device, iree_device_size_t allocation_size) {
return storage::AllocateHost(device, allocation_size);
return storage::allocate_host(device, allocation_size);
},
py::arg("device"), py::arg("allocation_size"), py::keep_alive<0, 1>())
.def_static(
"allocate_device",
[](local::ScopedDevice &device, iree_device_size_t allocation_size) {
return storage::AllocateDevice(device, allocation_size);
return storage::allocate_device(device, allocation_size);
},
py::arg("device"), py::arg("allocation_size"), py::keep_alive<0, 1>())
.def("fill",
[](storage &self, py::handle buffer) {
Py_buffer py_view;
int flags = PyBUF_FORMAT | PyBUF_ND; // C-Contiguous ND.
if (PyObject_GetBuffer(buffer.ptr(), &py_view, flags) != 0) {
throw py::python_error();
}
PyBufferReleaser py_view_releaser(py_view);
self.Fill(py_view.buf, py_view.len);
})
.def("copy_from", [](storage &self, storage &src) { self.CopyFrom(src); })
.def(
"fill",
[](storage &self, py::handle buffer) {
Py_buffer py_view;
int flags = PyBUF_FORMAT | PyBUF_ND; // C-Contiguous ND.
if (PyObject_GetBuffer(buffer.ptr(), &py_view, flags) != 0) {
throw py::python_error();
}
PyBufferReleaser py_view_releaser(py_view);
self.fill(py_view.buf, py_view.len);
},
py::arg("pattern"), DOCSTRING_STORAGE_FILL)
.def(
"copy_from", [](storage &self, storage &src) { self.copy_from(src); },
py::arg("source_storage"), DOCSTRING_STORAGE_COPY_FROM)
.def(
"map",
[](storage &self, bool read, bool write, bool discard) {
Expand All @@ -137,7 +175,7 @@ void BindArray(py::module_ &m) {
}
mapping *cpp_mapping = nullptr;
py::object py_mapping = CreateMappingObject(&cpp_mapping);
self.MapExplicit(
self.map_explicit(
*cpp_mapping,
static_cast<iree_hal_memory_access_bits_t>(access));
return py_mapping;
Expand All @@ -154,12 +192,12 @@ void BindArray(py::module_ &m) {
[](storage &self) {
mapping *cpp_mapping = nullptr;
py::object py_mapping = CreateMappingObject(&cpp_mapping);
*cpp_mapping = self.MapRead();
*cpp_mapping = self.map_read();
return py_mapping;
},
[](storage &self, py::handle buffer_obj) {
PyBufferRequest src_info(buffer_obj, PyBUF_SIMPLE);
auto dest_data = self.MapWriteDiscard();
auto dest_data = self.map_write_discard();
if (src_info.view().len > dest_data.size()) {
throw std::invalid_argument(
fmt::format("Cannot write {} bytes into buffer of {} bytes",
Expand Down Expand Up @@ -243,6 +281,17 @@ void BindArray(py::module_ &m) {
py::rv_policy::reference_internal)
.def_prop_ro("storage", &device_array::storage,
py::rv_policy::reference_internal)

.def(
"fill",
[](py::handle self, py::handle buffer) {
self.attr("storage").attr("fill")(buffer);
},
py::arg("pattern"), DOCSTRING_ARRAY_FILL)
.def("copy_from", &device_array::copy_from, py::arg("source_array"),
DOCSTRING_ARRAY_COPY_FROM)
.def("copy_to", &device_array::copy_to, py::arg("dest_array"),
DOCSTRING_ARRAY_COPY_TO)
.def("__repr__", &device_array::to_s);
}

Expand Down
11 changes: 10 additions & 1 deletion libshortfin/bindings/python/lib_ext.cc
Original file line number Diff line number Diff line change
Expand Up @@ -379,8 +379,17 @@ void BindLocal(py::module_ &m) {
.def("__add__", &local::DeviceAffinity::AddDevice)
.def("__repr__", &local::DeviceAffinity::to_s);

py::class_<local::Program>(m, "Program");
py::class_<local::Program>(m, "Program")
.def_prop_ro("exports", &local::Program::exports)
.def("lookup_function", &local::Program::LookupRequiredFunction)
.def("__getitem__", &local::Program::LookupRequiredFunction);
py::class_<local::ProgramFunction>(m, "ProgramFunction")
.def_prop_ro("name", &local::ProgramFunction::name)
.def_prop_ro("calling_convention",
&local::ProgramFunction::calling_convention)
.def("__repr__", &local::ProgramFunction::to_s);
py::class_<local::ProgramModule>(m, "ProgramModule")
.def_prop_ro("exports", &local::ProgramModule::exports)
.def("__repr__", &local::ProgramModule::to_s)
.def_static("load", &local::ProgramModule::Load, py::arg("system"),
py::arg("path"), py::arg("mmap") = true);
Expand Down
13 changes: 9 additions & 4 deletions libshortfin/examples/python/mobilenet_server/inference_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, raw_image_data):
class InferenceProcess(sf.Process):
def __init__(self, program, request_queue, **kwargs):
super().__init__(**kwargs)
self.program = program
self.main_function = program["module.torch-jit-export$async"]
self.request_reader = request_queue.reader()
self.device = self.scope.device(0)
self.device_input = sfnp.device_array(
Expand All @@ -41,9 +41,14 @@ async def run(self):
# support for. Generally, APIs on storage should be mirrored onto
# the array.
self.host_staging.storage.data = request.raw_image_data
print(self.host_staging)
self.device_input.storage.copy_from(self.host_staging.storage)
print(self.device_input)
print("host_staging =", self.host_staging)
self.device_input.copy_from(self.host_staging)
output = await self.scope.invoke(self.main_function, self.device_input)
print("OUTPUT:", output)
# read_back = self.device_input.for_transfer()
# read_back.copy_from(self.device_input)
# await self.device
# print("read back =", read_back)


class Main:
Expand Down
22 changes: 12 additions & 10 deletions libshortfin/src/shortfin/array/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,19 @@ template class InlinedDims<std::size_t>;
// device_array
// -------------------------------------------------------------------------- //

const mapping device_array::data() const { return storage_.MapRead(); }
const mapping device_array::data() const { return storage_.map_read(); }

mapping device_array::data() { return storage_.MapRead(); }
mapping device_array::data() { return storage_.map_read(); }

mapping device_array::data_rw() { return storage_.MapReadWrite(); }
mapping device_array::data_rw() { return storage_.map_read_write(); }

mapping device_array::data_w() { return storage_.MapWriteDiscard(); }
mapping device_array::data_w() { return storage_.map_write_discard(); }

std::optional<mapping> device_array::map_memory_for_xtensor() {
if (storage_.is_mappable_for_read_write()) {
return storage_.MapReadWrite();
return storage_.map_read_write();
} else if (storage_.is_mappable_for_read()) {
return storage_.MapRead();
return storage_.map_read();
}
return {};
}
Expand All @@ -52,10 +52,12 @@ std::string device_array::to_s() const {
}
}

return fmt::format("device_array([{}], dtype='{}', device={}({})) ={}{}",
fmt::join(shape(), ", "), dtype().name(),
storage_.device().to_s(), storage_.formatted_memory_type(),
contents_prefix, contents);
return fmt::format(
"device_array([{}], dtype='{}', device={}(type={}, usage={}, access={})) "
"={}{}",
fmt::join(shape(), ", "), dtype().name(), storage_.device().to_s(),
storage_.formatted_memory_type(), storage_.formatted_buffer_usage(),
storage_.formatted_memory_access(), contents_prefix, contents);
}

} // namespace shortfin::array
21 changes: 19 additions & 2 deletions libshortfin/src/shortfin/array/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class SHORTFIN_API device_array
static device_array for_device(local::ScopedDevice &device,
std::span<const size_t> shape, DType dtype) {
return device_array(
storage::AllocateDevice(device, dtype.compute_dense_nd_size(shape)),
storage::allocate_device(device, dtype.compute_dense_nd_size(shape)),
shape, dtype);
}

Expand All @@ -76,7 +76,7 @@ class SHORTFIN_API device_array
static device_array for_host(local::ScopedDevice &device,
std::span<const size_t> shape, DType dtype) {
return device_array(
storage::AllocateHost(device, dtype.compute_dense_nd_size(shape)),
storage::allocate_host(device, dtype.compute_dense_nd_size(shape)),
shape, dtype);
}

Expand All @@ -85,6 +85,23 @@ class SHORTFIN_API device_array
return for_host(storage().device(), shape(), dtype());
}

// Enqueues a fill of the storage with an arbitrary pattern of the given
// size. The pattern size must be 1, 2, or 4. Equivalent to calling the same
// on the backing storage.
void fill(const void *pattern, iree_host_size_t pattern_length) {
storage_.fill(pattern, pattern_length);
}

// Performs either a d2h, h2d or d2d transfer from a source storage to this
// storage. Equivalent to calling the same on the backing storage.
void copy_from(device_array &source_array) {
storage_.copy_from(source_array.storage_);
}
// Inverse of copy_from.
void copy_to(device_array &dest_array) {
dest_array.storage_.copy_from(storage_);
}

// Untyped access to the backing data. The array must be mappable. Specific
// access modes:
// * data(): Read-only access to the data.
Expand Down
32 changes: 19 additions & 13 deletions libshortfin/src/shortfin/array/storage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ storage::storage(local::ScopedDevice device, iree::hal_buffer_ptr buffer,
}
storage::~storage() { logging::destruct("array::storage", this); }

storage storage::AllocateDevice(ScopedDevice &device,
iree_device_size_t allocation_size) {
storage storage::allocate_device(ScopedDevice &device,
iree_device_size_t allocation_size) {
if (!device.raw_device()) {
throw std::invalid_argument("Cannot allocate with a null device affinity");
}
Expand All @@ -54,8 +54,8 @@ storage storage::AllocateDevice(ScopedDevice &device,
device.scope().NewTimelineResource());
}

storage storage::AllocateHost(ScopedDevice &device,
iree_device_size_t allocation_size) {
storage storage::allocate_host(ScopedDevice &device,
iree_device_size_t allocation_size) {
if (!device.raw_device()) {
throw std::invalid_argument("Cannot allocate with a null device affinity");
}
Expand All @@ -64,7 +64,8 @@ storage storage::AllocateHost(ScopedDevice &device,
iree_hal_buffer_params_t params = {
.usage = IREE_HAL_BUFFER_USAGE_MAPPING,
.access = IREE_HAL_MEMORY_ACCESS_ALL,
.type = IREE_HAL_MEMORY_TYPE_OPTIMAL_FOR_HOST,
.type = IREE_HAL_MEMORY_TYPE_OPTIMAL_FOR_HOST |
IREE_HAL_MEMORY_TYPE_DEVICE_VISIBLE,
.queue_affinity = device.affinity().queue_affinity(),
};
if (device.affinity().queue_affinity() != 0) {
Expand All @@ -76,15 +77,15 @@ storage storage::AllocateHost(ScopedDevice &device,
device.scope().NewTimelineResource());
}

storage storage::Subspan(iree_device_size_t byte_offset,
storage storage::subspan(iree_device_size_t byte_offset,
iree_device_size_t byte_length) {
storage new_storage(device_, {}, timeline_resource_);
SHORTFIN_THROW_IF_ERROR(iree_hal_buffer_subspan(
buffer_, byte_offset, byte_length, new_storage.buffer_.for_output()));
return new_storage;
}

void storage::Fill(const void *pattern, iree_host_size_t pattern_length) {
void storage::fill(const void *pattern, iree_host_size_t pattern_length) {
device_.scope().scheduler().AppendCommandBuffer(
device_, TransactionType::TRANSFER, [&](Account &account) {
// Must depend on all of this buffer's use dependencies to avoid
Expand All @@ -94,9 +95,8 @@ void storage::Fill(const void *pattern, iree_host_size_t pattern_length) {
// write-after-write hazard.
account.active_deps_extend(timeline_resource_->mutation_barrier());

// TODO: I need to join the submission dependencies on the account
// with the timeline resource idle fence to ensure that
// write-after-access is properly sequenced.
SHORTFIN_SCHED_LOG(" : FillBuffer({})",
static_cast<void *>(buffer_.get()));
SHORTFIN_THROW_IF_ERROR(iree_hal_command_buffer_fill_buffer(
account.active_command_buffer(),
iree_hal_make_buffer_ref(
Expand All @@ -111,7 +111,7 @@ void storage::Fill(const void *pattern, iree_host_size_t pattern_length) {
});
}

void storage::CopyFrom(storage &source_storage) {
void storage::copy_from(storage &source_storage) {
device_.scope().scheduler().AppendCommandBuffer(
device_, TransactionType::TRANSFER, [&](Account &account) {
// Must depend on the source's mutation dependencies to avoid
Expand All @@ -122,17 +122,23 @@ void storage::CopyFrom(storage &source_storage) {
account.active_deps_extend(timeline_resource_->use_barrier());
account.active_deps_extend(timeline_resource_->mutation_barrier());

SHORTFIN_SCHED_LOG(" : CopyBuffer({} -> {})",
static_cast<void *>(source_storage.buffer_.get()),
static_cast<void *>(buffer_.get()));
SHORTFIN_THROW_IF_ERROR(iree_hal_command_buffer_copy_buffer(
account.active_command_buffer(),
/*source_ref=*/
iree_hal_make_buffer_ref(source_storage.buffer_, 0, byte_length()),
/*target_ref=*/
iree_hal_make_buffer_ref(buffer_, 0, byte_length())));

// And move our own mutation barrier to the current pending timeline
// Move our own mutation barrier to the current pending timeline
// value.
timeline_resource_->set_mutation_barrier(
account.timeline_sem(), account.timeline_idle_timepoint());
// And extend the source use barrier.
source_storage.timeline_resource_->use_barrier_insert(
account.timeline_sem(), account.timeline_idle_timepoint());
});
}

Expand All @@ -150,7 +156,7 @@ bool storage::is_mappable_for_read_write() const {
(IREE_HAL_MEMORY_ACCESS_READ | IREE_HAL_MEMORY_ACCESS_WRITE));
}

void storage::MapExplicit(mapping &mapping, iree_hal_memory_access_t access) {
void storage::map_explicit(mapping &mapping, iree_hal_memory_access_t access) {
assert(access != IREE_HAL_MEMORY_ACCESS_NONE);
mapping.reset();
SHORTFIN_THROW_IF_ERROR(iree_hal_buffer_map_range(
Expand Down
Loading

0 comments on commit 35cd344

Please sign in to comment.