Skip to content

Commit

Permalink
DeviceMemoryUsage: add AMD support, use ProcessLogger for version.sh (#…
Browse files Browse the repository at this point in the history
…1060)

* DeviceMemoryUsage: only use on Nvidia, use ProcessLogger for version.sh

* don't force loading mpi4py for config checks

* fix Numpy actx spelling

* add DeviceMemoryUsageAMD

* clean up MPI actx handling
  • Loading branch information
matthiasdiener committed Aug 28, 2024
1 parent 0d680df commit 263582b
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 12 deletions.
2 changes: 1 addition & 1 deletion examples/run_examples.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function endgroup {

# }}}

python -c "from grudge.array_context import MPINumpyArrayConext" && numpy_actx_available=numpy || numpy_actx_available=
python -c "from grudge.array_context import MPINumpyArrayContext" && numpy_actx_available=numpy || numpy_actx_available=

echo "Numpy array context available: $numpy_actx_available"

Expand Down
27 changes: 22 additions & 5 deletions mirgecom/array_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
.. autofunction:: actx_class_is_eager
.. autofunction:: actx_class_is_profiling
.. autofunction:: actx_class_is_numpy
.. autofunction:: actx_class_is_distributed
.. autofunction:: initialize_actx
"""

Expand Down Expand Up @@ -110,15 +111,24 @@ def actx_class_is_numpy(actx_class: Type[ArrayContext]) -> bool:
return False


def actx_class_is_distributed(actx_class: Type[ArrayContext]) -> bool:
"""Return True if *actx_class* is distributed."""
from grudge.array_context import MPIBasedArrayContext
return issubclass(actx_class, MPIBasedArrayContext)


def actx_class_has_fallback_args(actx_class: Type[ArrayContext]) -> bool:
"""Return True if *actx_class* has fallback arguments."""
import inspect
spec = inspect.getfullargspec(actx_class.__init__)
return "use_axis_tag_inference_fallback" in spec.args


def _check_cache_dirs_node() -> None:
def _check_cache_dirs_node(actx: ArrayContext) -> None:
"""Check whether multiple ranks share cache directories on the same node."""
if not actx_class_is_distributed(type(actx)):
return

from mpi4py import MPI

size = MPI.COMM_WORLD.Get_size()
Expand Down Expand Up @@ -176,6 +186,9 @@ def _check_gpu_oversubscription(actx: ArrayContext) -> None:
Only works with CUDA devices currently due to the use of the
PCI_DOMAIN_ID_NV extension.
"""
if not actx_class_is_distributed(type(actx)):
return

from mpi4py import MPI
import pyopencl as cl

Expand Down Expand Up @@ -227,11 +240,15 @@ def _check_gpu_oversubscription(actx: ArrayContext) -> None:

def log_disk_cache_config(actx: ArrayContext) -> None:
"""Log the disk cache configuration."""
from mpi4py import MPI

assert isinstance(actx, (PyOpenCLArrayContext, PytatoPyOpenCLArrayContext))

rank = MPI.COMM_WORLD.Get_rank()
if actx_class_is_distributed(type(actx)):
from grudge.array_context import MPIBasedArrayContext
assert isinstance(actx, MPIBasedArrayContext)
rank = actx.mpi_communicator.Get_rank()
else:
rank = 0

res = f"Rank {rank} disk cache config: "

from pyopencl.characterize import nv_compute_capability, get_pocl_version
Expand Down Expand Up @@ -336,7 +353,7 @@ def initialize_actx(
# or pocl, and therefore we don't need to examine their caching).
if actx_class_is_pyopencl(actx_class):
_check_gpu_oversubscription(actx)
_check_cache_dirs_node()
_check_cache_dirs_node(actx)
log_disk_cache_config(actx)

return actx
47 changes: 41 additions & 6 deletions mirgecom/logging_quantities.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
.. autoclass:: DiscretizationBasedQuantity
.. autoclass:: KernelProfile
.. autoclass:: PythonMemoryUsage
.. autoclass:: DeviceMemoryUsage
.. autoclass:: DeviceMemoryUsageCUDA
.. autoclass:: DeviceMemoryUsageAMD
.. autofunction:: initialize_logmgr
.. autofunction:: logmgr_add_cl_device_info
.. autofunction:: logmgr_add_device_memory_usage
Expand All @@ -40,6 +41,8 @@
.. autofunction:: logmgr_set_time
"""

import logging

from logpyle import (LogQuantity, PostLogQuantity, LogManager,
MultiPostLogQuantity, add_run_info,
add_general_quantities, add_simulation_quantities)
Expand All @@ -55,6 +58,9 @@
import grudge.op as oper
from typing import List


logger = logging.getLogger(__name__)

MemPoolType = Union[cl.tools.MemoryPool, cl.tools.SVMPool]


Expand Down Expand Up @@ -114,9 +120,13 @@ def logmgr_add_device_name(logmgr: LogManager, queue: cl.CommandQueue): # noqa:
def logmgr_add_device_memory_usage(logmgr: LogManager, queue: cl.CommandQueue) \
-> None:
"""Add the OpenCL device memory usage to the log."""
if not queue or not (queue.device.type & cl.device_type.GPU):
if not queue:
return
logmgr.add_quantity(DeviceMemoryUsage())

if queue.device.vendor.lower().startswith("nvidia"):
logmgr.add_quantity(DeviceMemoryUsageCUDA())
elif queue.device.vendor.lower().startswith("advanced micro devices"):
logmgr.add_quantity(DeviceMemoryUsageAMD(queue.device))


def logmgr_add_mempool_usage(logmgr: LogManager, pool: MemPoolType) -> None:
Expand Down Expand Up @@ -192,8 +202,10 @@ def add_package_versions(mgr: LogManager, path_to_version_sh: Optional[str] = No
warn("Could not find emirge's version.sh.")

else:
from pytools import ProcessLogger
try:
output = subprocess.check_output(path_to_version_sh)
with ProcessLogger(logger, "emirge's version.sh"):
output = subprocess.check_output(path_to_version_sh)
except OSError as e:
warn("Could not record emirge's package versions: " + str(e))

Expand Down Expand Up @@ -397,8 +409,8 @@ def __call__(self) -> float:
return self.process.memory_info()[0] / 1024 / 1024


class DeviceMemoryUsage(PostLogQuantity):
"""Logging support for GPU memory usage (Nvidia only currently)."""
class DeviceMemoryUsageCUDA(PostLogQuantity):
"""Logging support for Nvidia CUDA GPU memory usage."""

def __init__(self, name: Optional[str] = None) -> None:

Expand Down Expand Up @@ -442,6 +454,29 @@ def __call__(self) -> Optional[float]:
return (total.value - free.value) / 1024 / 1024


class DeviceMemoryUsageAMD(PostLogQuantity):
"""Logging support for AMD GPU memory usage."""

def __init__(self, dev: cl.Device, name: Optional[str] = None) -> None:

if name is None:
name = "memory_usage_gpu"

super().__init__(name, "MByte", description="Memory usage (GPU)")

self.dev = dev
self.global_mem_size_mbyte = dev.global_mem_size / 1024 / 1024

def __call__(self) -> Optional[float]:
"""Return the memory usage in MByte."""
# NB: dev.global_mem_size is in Bytes,
# dev.global_free_memory_amd is in KByte,
# the actual granularity of the returned values appears to be MByte
# (like in CUDA)

return self.global_mem_size_mbyte - self.dev.global_free_memory_amd[0] / 1024


class MempoolMemoryUsage(MultiPostLogQuantity):
"""Logging support for memory pool usage."""

Expand Down

0 comments on commit 263582b

Please sign in to comment.