Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DeviceMemoryUsage: add AMD support, use ProcessLogger for version.sh #1060

Merged
merged 8 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/run_examples.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function endgroup {

# }}}

python -c "from grudge.array_context import MPINumpyArrayConext" && numpy_actx_available=numpy || numpy_actx_available=
python -c "from grudge.array_context import MPINumpyArrayContext" && numpy_actx_available=numpy || numpy_actx_available=

echo "Numpy array context available: $numpy_actx_available"

Expand Down
27 changes: 22 additions & 5 deletions mirgecom/array_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
.. autofunction:: actx_class_is_eager
.. autofunction:: actx_class_is_profiling
.. autofunction:: actx_class_is_numpy
.. autofunction:: actx_class_is_distributed
.. autofunction:: initialize_actx
"""

Expand Down Expand Up @@ -110,15 +111,24 @@ def actx_class_is_numpy(actx_class: Type[ArrayContext]) -> bool:
return False


def actx_class_is_distributed(actx_class: Type[ArrayContext]) -> bool:
"""Return True if *actx_class* is distributed."""
from grudge.array_context import MPIBasedArrayContext
return issubclass(actx_class, MPIBasedArrayContext)


def actx_class_has_fallback_args(actx_class: Type[ArrayContext]) -> bool:
"""Return True if *actx_class* has fallback arguments."""
import inspect
spec = inspect.getfullargspec(actx_class.__init__)
return "use_axis_tag_inference_fallback" in spec.args


def _check_cache_dirs_node() -> None:
def _check_cache_dirs_node(actx: ArrayContext) -> None:
"""Check whether multiple ranks share cache directories on the same node."""
if not actx_class_is_distributed(type(actx)):
return

from mpi4py import MPI

size = MPI.COMM_WORLD.Get_size()
Expand Down Expand Up @@ -176,6 +186,9 @@ def _check_gpu_oversubscription(actx: ArrayContext) -> None:
Only works with CUDA devices currently due to the use of the
PCI_DOMAIN_ID_NV extension.
"""
if not actx_class_is_distributed(type(actx)):
return

from mpi4py import MPI
import pyopencl as cl

Expand Down Expand Up @@ -227,11 +240,15 @@ def _check_gpu_oversubscription(actx: ArrayContext) -> None:

def log_disk_cache_config(actx: ArrayContext) -> None:
"""Log the disk cache configuration."""
from mpi4py import MPI

assert isinstance(actx, (PyOpenCLArrayContext, PytatoPyOpenCLArrayContext))

rank = MPI.COMM_WORLD.Get_rank()
if actx_class_is_distributed(type(actx)):
from grudge.array_context import MPIBasedArrayContext
assert isinstance(actx, MPIBasedArrayContext)
rank = actx.mpi_communicator.Get_rank()
else:
rank = 0

res = f"Rank {rank} disk cache config: "

from pyopencl.characterize import nv_compute_capability, get_pocl_version
Expand Down Expand Up @@ -336,7 +353,7 @@ def initialize_actx(
# or pocl, and therefore we don't need to examine their caching).
if actx_class_is_pyopencl(actx_class):
_check_gpu_oversubscription(actx)
_check_cache_dirs_node()
_check_cache_dirs_node(actx)
log_disk_cache_config(actx)

return actx
47 changes: 41 additions & 6 deletions mirgecom/logging_quantities.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
.. autoclass:: DiscretizationBasedQuantity
.. autoclass:: KernelProfile
.. autoclass:: PythonMemoryUsage
.. autoclass:: DeviceMemoryUsage
.. autoclass:: DeviceMemoryUsageCUDA
.. autoclass:: DeviceMemoryUsageAMD
.. autofunction:: initialize_logmgr
.. autofunction:: logmgr_add_cl_device_info
.. autofunction:: logmgr_add_device_memory_usage
Expand All @@ -40,6 +41,8 @@
.. autofunction:: logmgr_set_time
"""

import logging

from logpyle import (LogQuantity, PostLogQuantity, LogManager,
MultiPostLogQuantity, add_run_info,
add_general_quantities, add_simulation_quantities)
Expand All @@ -55,6 +58,9 @@
import grudge.op as oper
from typing import List


logger = logging.getLogger(__name__)

MemPoolType = Union[cl.tools.MemoryPool, cl.tools.SVMPool]


Expand Down Expand Up @@ -114,9 +120,13 @@ def logmgr_add_device_name(logmgr: LogManager, queue: cl.CommandQueue): # noqa:
def logmgr_add_device_memory_usage(logmgr: LogManager, queue: cl.CommandQueue) \
-> None:
"""Add the OpenCL device memory usage to the log."""
if not queue or not (queue.device.type & cl.device_type.GPU):
if not queue:
return
logmgr.add_quantity(DeviceMemoryUsage())

if queue.device.vendor.lower().startswith("nvidia"):
logmgr.add_quantity(DeviceMemoryUsageCUDA())
elif queue.device.vendor.lower().startswith("advanced micro devices"):
logmgr.add_quantity(DeviceMemoryUsageAMD(queue.device))


def logmgr_add_mempool_usage(logmgr: LogManager, pool: MemPoolType) -> None:
Expand Down Expand Up @@ -192,8 +202,10 @@ def add_package_versions(mgr: LogManager, path_to_version_sh: Optional[str] = No
warn("Could not find emirge's version.sh.")

else:
from pytools import ProcessLogger
try:
output = subprocess.check_output(path_to_version_sh)
with ProcessLogger(logger, "emirge's version.sh"):
output = subprocess.check_output(path_to_version_sh)
except OSError as e:
warn("Could not record emirge's package versions: " + str(e))

Expand Down Expand Up @@ -397,8 +409,8 @@ def __call__(self) -> float:
return self.process.memory_info()[0] / 1024 / 1024


class DeviceMemoryUsage(PostLogQuantity):
"""Logging support for GPU memory usage (Nvidia only currently)."""
class DeviceMemoryUsageCUDA(PostLogQuantity):
"""Logging support for Nvidia CUDA GPU memory usage."""

def __init__(self, name: Optional[str] = None) -> None:

Expand Down Expand Up @@ -442,6 +454,29 @@ def __call__(self) -> Optional[float]:
return (total.value - free.value) / 1024 / 1024


class DeviceMemoryUsageAMD(PostLogQuantity):
"""Logging support for AMD GPU memory usage."""

def __init__(self, dev: cl.Device, name: Optional[str] = None) -> None:

if name is None:
name = "memory_usage_gpu"

super().__init__(name, "MByte", description="Memory usage (GPU)")

self.dev = dev
self.global_mem_size_mbyte = dev.global_mem_size / 1024 / 1024

def __call__(self) -> Optional[float]:
"""Return the memory usage in MByte."""
# NB: dev.global_mem_size is in Bytes,
# dev.global_free_memory_amd is in KByte,
# the actual granularity of the returned values appears to be MByte
# (like in CUDA)

return self.global_mem_size_mbyte - self.dev.global_free_memory_amd[0] / 1024


class MempoolMemoryUsage(MultiPostLogQuantity):
"""Logging support for memory pool usage."""

Expand Down