NVIDIA · jhalakpatel · Aug 9, 2024 · christopherbate · Aug 10, 2024 · christopherbate
@@ -221,10 +221,15 @@ mtrtRuntimeClientGetNumDevices(MTRT_RuntimeClient client, int32_t *numDevices);
 MLIR_CAPI_EXPORTED MTRT_Status mtrtRuntimeClientGetDevice(
     MTRT_RuntimeClient client, int32_t index, MTRT_Device *device);
 
-/// Retrieve the runtiem client that manages the specified memref.
+/// Retrieve the runtime client that manages the specified memref.
 MLIR_CAPI_EXPORTED MTRT_RuntimeClient
 mtrtMemRefGetClient(MTRT_MemRefValue memref);
 
+/// Retrieve the runtime client allocated cpu and gpu memory.
+MTRT_Status mtrtReportAllocatedMemory(MTRT_RuntimeClient client,
+                                  int64_t *totalCpuMemory,
+                                  int64_t *totalGpuMemory);
+
 //===----------------------------------------------------------------------===//
 // Data Transfer
 //===----------------------------------------------------------------------===//

@@ -640,6 +640,16 @@ MTRT_Status mtrtRuntimeClientGetDevice(MTRT_RuntimeClient client, int32_t index,
   return mtrtStatusGetOk();
 }
 
+MTRT_Status mtrtReportAllocatedMemory(MTRT_RuntimeClient client,
+                                  int64_t *totalCpuMemory,
+                                  int64_t *totalGpuMemory) {
+  RuntimeClient *cppClient = unwrap(client);
+  auto const &allocated = cppClient->getAllocTracker().reportAllocatedMemory();
+  *totalCpuMemory = allocated.first;
+  *totalGpuMemory = allocated.second;
+  return mtrtStatusGetOk();
+}
+
 //===----------------------------------------------------------------------===//
 // MTRT_ScalarValue
 //===----------------------------------------------------------------------===//

@@ -773,6 +773,9 @@ class AllocTracker {
   /// Return true if the tracker's map contains `ptr`.
   bool contains(uintptr_t ptr) const;
 
+  /// Report total CPU and GPU memory allocated by runtime client.
+  std::pair<int64_t, int64_t> reportAllocatedMemory() const;
+
 private:
   llvm::DenseMap<uintptr_t, PointerInfo> map;
 };

@@ -429,6 +429,24 @@ PointerInfo AllocTracker::lookupOrDefault(uintptr_t ptr) const {
   return map.at(ptr);
 }
 
+std::pair<int64_t, int64_t> AllocTracker::reportAllocatedMemory() const {
+    int64_t totalCpuMemory = 0;
+    int64_t totalGpuMemory = 0;
+
+    for (const auto &entry : map) {
+        const PointerInfo &info = entry.second;
+        if (info.isExternallyManaged())
+          continue;
+        if (info.type == PointerType::host || info.type == PointerType::pinned_host) {
+            totalCpuMemory += info.size;
+        } else if (info.type == PointerType::device || info.type == PointerType::unified) {
+            totalGpuMemory += info.size;
+        }
+    }
+
+    return {totalCpuMemory, totalGpuMemory};
+}
+
 StatusOr<PointerInfo> runtime::allocate(AllocTracker &tracker, PointerType type,
                                         uint64_t size,
                                         std::optional<uint32_t> alignment,

@@ -775,7 +775,18 @@ PYBIND11_MODULE(_api, m) {
             THROW_IF_MTRT_ERROR(s);
           },
           py::arg("device_memref"), py::arg("existing_host_memref"),
-          py::arg("stream") = py::none());
+          py::arg("stream") = py::none())
+      .def(
+          "report_allocated_memory",
+          [](PyRuntimeClient &self) {
+            int64_t totalGpuMemory;
+            int64_t totalCpuMemory;
+            MTRT_Status s = mtrtReportAllocatedMemory(self, &totalCpuMemory, &totalGpuMemory);
+            THROW_IF_MTRT_ERROR(s);
+            py::object namedtuple = py::module::import("collections").attr("namedtuple");
+            py::object MemoryUsage = namedtuple("MemoryUsage", "cpu_memory gpu_memory");
+            return MemoryUsage(totalCpuMemory, totalGpuMemory);
+          });
 
   py::class_<PyRuntimeValue>(m, "RuntimeValue", py::module_local())
       .def_property_readonly(MTRT_PYTHON_CAPI_PTR_ATTR,

@@ -3,6 +3,7 @@
 
 import mlir_tensorrt.runtime.api as runtime
 import numpy as np
+import cupy as cp
 
 TESTS = []
 
@@ -190,6 +191,57 @@ def test_host_memref():
 #  CHECK-NEXT: PointerType.host
 #  CHECK-NEXT:  mlir_tensorrt.compiler.api.MemRefValue._CAPIPtr
 
+
+@make_test
+def test_report_allocated_memory():
+    client = runtime.RuntimeClient()
+    devices = client.get_devices()
+
+    np_arr = np.ones((1000), dtype=np.int32)
+    cp_arr = cp.ones((1000), dtype=np.int32)
+
+    # Allocate GPU memory
+    memref = client.create_memref(np_arr, device=devices[0])
+    memory_usage = client.report_allocated_memory()
+    print("CPU Memory: ", memory_usage.cpu_memory)
+    print("GPU Memory: ", memory_usage.gpu_memory)
+
+    # Allocate CPU memory
+    memref = client.create_memref(np_arr)
+    memory_usage = client.report_allocated_memory()
+    print("CPU Memory: ", memory_usage.cpu_memory)
+    print("GPU Memory: ", memory_usage.gpu_memory)
+
+    # No CPU memory allocation as creating a view.
+    memref = client.create_host_memref_view(
+        np_arr.ctypes.data, shape=list(np_arr.shape), dtype=runtime.ScalarTypeCode.i32
+    )
+    memory_usage = client.report_allocated_memory()
+    print("CPU Memory: ", memory_usage.cpu_memory)
+    print("GPU Memory: ", memory_usage.gpu_memory)
+
+    # No CPU memory allocation as creating a view.
+    memref = client.create_device_memref_view(
+        cp_arr.data.ptr,
+        shape=list(cp_arr.shape),
+        dtype=runtime.ScalarTypeCode.i32,
+        device=devices[0],
+    )
+    memory_usage = client.report_allocated_memory()
+    print("CPU Memory: ", memory_usage.cpu_memory)
+    print("GPU Memory: ", memory_usage.gpu_memory)
+
+
+# CHECK-LABEL: Test:  test_report_allocated_memory
+#       CHECK:    CPU Memory:  0
+#       CHECK:    GPU Memory:  4000
+#       CHECK:    CPU Memory:  4000
+#       CHECK:    GPU Memory:  0
+#       CHECK:    CPU Memory:  0
+#       CHECK:    GPU Memory:  0
+#       CHECK:    CPU Memory:  0
+#       CHECK:    GPU Memory:  0
+
 if __name__ == "__main__":
     for t in TESTS:
         t()