Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add allocated CPU and GPU memory reporting #81

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,15 @@ mtrtRuntimeClientGetNumDevices(MTRT_RuntimeClient client, int32_t *numDevices);
MLIR_CAPI_EXPORTED MTRT_Status mtrtRuntimeClientGetDevice(
MTRT_RuntimeClient client, int32_t index, MTRT_Device *device);

/// Retrieve the runtiem client that manages the specified memref.
/// Retrieve the runtime client that manages the specified memref.
MLIR_CAPI_EXPORTED MTRT_RuntimeClient
mtrtMemRefGetClient(MTRT_MemRefValue memref);

/// Retrieve the runtime client allocated cpu and gpu memory.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This isn't quite accurate. You're reporting the CPU/GPU memory that is being tracked by the RuntimeClient. It can track buffers that are externally allocated.

MTRT_Status mtrtReportAllocatedMemory(MTRT_RuntimeClient client,
int64_t *totalCpuMemory,
int64_t *totalGpuMemory);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's use uint64_t or size_t here.


//===----------------------------------------------------------------------===//
// Data Transfer
//===----------------------------------------------------------------------===//
Expand Down
10 changes: 10 additions & 0 deletions mlir-tensorrt/compiler/lib/CAPI/Runtime/Runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,16 @@ MTRT_Status mtrtRuntimeClientGetDevice(MTRT_RuntimeClient client, int32_t index,
return mtrtStatusGetOk();
}

MTRT_Status mtrtReportAllocatedMemory(MTRT_RuntimeClient client,
int64_t *totalCpuMemory,
int64_t *totalGpuMemory) {
RuntimeClient *cppClient = unwrap(client);
auto const &allocated = cppClient->getAllocTracker().reportAllocatedMemory();
*totalCpuMemory = allocated.first;
*totalGpuMemory = allocated.second;
return mtrtStatusGetOk();
}

//===----------------------------------------------------------------------===//
// MTRT_ScalarValue
//===----------------------------------------------------------------------===//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,9 @@ class AllocTracker {
/// Return true if the tracker's map contains `ptr`.
bool contains(uintptr_t ptr) const;

/// Report total CPU and GPU memory allocated by runtime client.
std::pair<int64_t, int64_t> reportAllocatedMemory() const;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There actually could are more types than just these two, so I'd prefer if we separate it into a struct or array. Array could be indexed by all the potential values of PointerType.


private:
llvm::DenseMap<uintptr_t, PointerInfo> map;
};
Expand Down
18 changes: 18 additions & 0 deletions mlir-tensorrt/executor/lib/Runtime/API/API.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,24 @@ PointerInfo AllocTracker::lookupOrDefault(uintptr_t ptr) const {
return map.at(ptr);
}

std::pair<int64_t, int64_t> AllocTracker::reportAllocatedMemory() const {
int64_t totalCpuMemory = 0;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should use uint64_t here

int64_t totalGpuMemory = 0;

for (const auto &entry : map) {
const PointerInfo &info = entry.second;
if (info.isExternallyManaged())
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@christopherbate Is this sufficient for tracking only internally managed/allocated pointers?

continue;
if (info.type == PointerType::host || info.type == PointerType::pinned_host) {
totalCpuMemory += info.size;
} else if (info.type == PointerType::device || info.type == PointerType::unified) {
totalGpuMemory += info.size;
}
}

return {totalCpuMemory, totalGpuMemory};
}

StatusOr<PointerInfo> runtime::allocate(AllocTracker &tracker, PointerType type,
uint64_t size,
std::optional<uint32_t> alignment,
Expand Down
13 changes: 12 additions & 1 deletion mlir-tensorrt/python/bindings/Runtime/RuntimePyBind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,18 @@ PYBIND11_MODULE(_api, m) {
THROW_IF_MTRT_ERROR(s);
},
py::arg("device_memref"), py::arg("existing_host_memref"),
py::arg("stream") = py::none());
py::arg("stream") = py::none())
.def(
"report_allocated_memory",
[](PyRuntimeClient &self) {
int64_t totalGpuMemory;
int64_t totalCpuMemory;
MTRT_Status s = mtrtReportAllocatedMemory(self, &totalCpuMemory, &totalGpuMemory);
THROW_IF_MTRT_ERROR(s);
py::object namedtuple = py::module::import("collections").attr("namedtuple");
py::object MemoryUsage = namedtuple("MemoryUsage", "cpu_memory gpu_memory");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You'll need to update the stubs so users can see this type information in the IDE.

return MemoryUsage(totalCpuMemory, totalGpuMemory);
});

py::class_<PyRuntimeValue>(m, "RuntimeValue", py::module_local())
.def_property_readonly(MTRT_PYTHON_CAPI_PTR_ATTR,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import mlir_tensorrt.runtime.api as runtime
import numpy as np
import cupy as cp

TESTS = []

Expand Down Expand Up @@ -190,6 +191,57 @@ def test_host_memref():
# CHECK-NEXT: PointerType.host
# CHECK-NEXT: mlir_tensorrt.compiler.api.MemRefValue._CAPIPtr


@make_test
def test_report_allocated_memory():
client = runtime.RuntimeClient()
devices = client.get_devices()

np_arr = np.ones((1000), dtype=np.int32)
cp_arr = cp.ones((1000), dtype=np.int32)

# Allocate GPU memory
memref = client.create_memref(np_arr, device=devices[0])
memory_usage = client.report_allocated_memory()
print("CPU Memory: ", memory_usage.cpu_memory)
print("GPU Memory: ", memory_usage.gpu_memory)

# Allocate CPU memory
memref = client.create_memref(np_arr)
memory_usage = client.report_allocated_memory()
print("CPU Memory: ", memory_usage.cpu_memory)
print("GPU Memory: ", memory_usage.gpu_memory)

# No CPU memory allocation as creating a view.
memref = client.create_host_memref_view(
np_arr.ctypes.data, shape=list(np_arr.shape), dtype=runtime.ScalarTypeCode.i32
)
memory_usage = client.report_allocated_memory()
print("CPU Memory: ", memory_usage.cpu_memory)
print("GPU Memory: ", memory_usage.gpu_memory)

# No CPU memory allocation as creating a view.
memref = client.create_device_memref_view(
cp_arr.data.ptr,
shape=list(cp_arr.shape),
dtype=runtime.ScalarTypeCode.i32,
device=devices[0],
)
memory_usage = client.report_allocated_memory()
print("CPU Memory: ", memory_usage.cpu_memory)
print("GPU Memory: ", memory_usage.gpu_memory)


# CHECK-LABEL: Test: test_report_allocated_memory
# CHECK: CPU Memory: 0
# CHECK: GPU Memory: 4000
# CHECK: CPU Memory: 4000
# CHECK: GPU Memory: 0
# CHECK: CPU Memory: 0
# CHECK: GPU Memory: 0
# CHECK: CPU Memory: 0
# CHECK: GPU Memory: 0

if __name__ == "__main__":
for t in TESTS:
t()