Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DeviceMemoryUsage: add AMD support, use ProcessLogger for version.sh #1060

Merged
merged 8 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/run_examples.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function endgroup {

# }}}

python -c "from grudge.array_context import MPINumpyArrayConext" && numpy_actx_available=numpy || numpy_actx_available=
python -c "from grudge.array_context import MPINumpyArrayContext" && numpy_actx_available=numpy || numpy_actx_available=

echo "Numpy array context available: $numpy_actx_available"

Expand Down
17 changes: 15 additions & 2 deletions mirgecom/array_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ def actx_class_has_fallback_args(actx_class: Type[ArrayContext]) -> bool:

def _check_cache_dirs_node() -> None:
"""Check whether multiple ranks share cache directories on the same node."""
import sys
if "mpi4py" not in sys.modules:
return

from mpi4py import MPI

size = MPI.COMM_WORLD.Get_size()
Expand Down Expand Up @@ -176,6 +180,10 @@ def _check_gpu_oversubscription(actx: ArrayContext) -> None:
Only works with CUDA devices currently due to the use of the
PCI_DOMAIN_ID_NV extension.
"""
import sys
if "mpi4py" not in sys.modules:
return

majosm marked this conversation as resolved.
Show resolved Hide resolved
from mpi4py import MPI
import pyopencl as cl

Expand Down Expand Up @@ -227,11 +235,16 @@ def _check_gpu_oversubscription(actx: ArrayContext) -> None:

def log_disk_cache_config(actx: ArrayContext) -> None:
"""Log the disk cache configuration."""
from mpi4py import MPI
import sys

assert isinstance(actx, (PyOpenCLArrayContext, PytatoPyOpenCLArrayContext))

rank = MPI.COMM_WORLD.Get_rank()
if "mpi4py" in sys.modules:
from mpi4py import MPI
rank = MPI.COMM_WORLD.Get_rank()
else:
rank = 0

majosm marked this conversation as resolved.
Show resolved Hide resolved
res = f"Rank {rank} disk cache config: "

from pyopencl.characterize import nv_compute_capability, get_pocl_version
Expand Down
47 changes: 41 additions & 6 deletions mirgecom/logging_quantities.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@
.. autoclass:: DiscretizationBasedQuantity
.. autoclass:: KernelProfile
.. autoclass:: PythonMemoryUsage
.. autoclass:: DeviceMemoryUsage
.. autoclass:: DeviceMemoryUsageCUDA
.. autoclass:: DeviceMemoryUsageAMD
.. autofunction:: initialize_logmgr
.. autofunction:: logmgr_add_cl_device_info
.. autofunction:: logmgr_add_device_memory_usage
Expand All @@ -40,6 +41,8 @@
.. autofunction:: logmgr_set_time
"""

import logging

from logpyle import (LogQuantity, PostLogQuantity, LogManager,
MultiPostLogQuantity, add_run_info,
add_general_quantities, add_simulation_quantities)
Expand All @@ -55,6 +58,9 @@
import grudge.op as oper
from typing import List


logger = logging.getLogger(__name__)

MemPoolType = Union[cl.tools.MemoryPool, cl.tools.SVMPool]


Expand Down Expand Up @@ -114,9 +120,13 @@ def logmgr_add_device_name(logmgr: LogManager, queue: cl.CommandQueue): # noqa:
def logmgr_add_device_memory_usage(logmgr: LogManager, queue: cl.CommandQueue) \
-> None:
"""Add the OpenCL device memory usage to the log."""
if not queue or not (queue.device.type & cl.device_type.GPU):
if not queue:
return
logmgr.add_quantity(DeviceMemoryUsage())

if queue.device.vendor.lower().startswith("nvidia"):
logmgr.add_quantity(DeviceMemoryUsageCUDA())
elif queue.device.vendor.lower().startswith("advanced micro devices"):
logmgr.add_quantity(DeviceMemoryUsageAMD(queue.device))


def logmgr_add_mempool_usage(logmgr: LogManager, pool: MemPoolType) -> None:
Expand Down Expand Up @@ -192,8 +202,10 @@ def add_package_versions(mgr: LogManager, path_to_version_sh: Optional[str] = No
warn("Could not find emirge's version.sh.")

else:
from pytools import ProcessLogger
try:
output = subprocess.check_output(path_to_version_sh)
with ProcessLogger(logger, "emirge's version.sh"):
output = subprocess.check_output(path_to_version_sh)
except OSError as e:
warn("Could not record emirge's package versions: " + str(e))

Expand Down Expand Up @@ -397,8 +409,8 @@ def __call__(self) -> float:
return self.process.memory_info()[0] / 1024 / 1024


class DeviceMemoryUsage(PostLogQuantity):
"""Logging support for GPU memory usage (Nvidia only currently)."""
class DeviceMemoryUsageCUDA(PostLogQuantity):
"""Logging support for Nvidia CUDA GPU memory usage."""

def __init__(self, name: Optional[str] = None) -> None:

Expand Down Expand Up @@ -442,6 +454,29 @@ def __call__(self) -> Optional[float]:
return (total.value - free.value) / 1024 / 1024


class DeviceMemoryUsageAMD(PostLogQuantity):
"""Logging support for AMD GPU memory usage."""

def __init__(self, dev: cl.Device, name: Optional[str] = None) -> None:

if name is None:
name = "memory_usage_gpu"

super().__init__(name, "MByte", description="Memory usage (GPU)")

self.dev = dev
self.global_mem_size_mbyte = dev.global_mem_size / 1024 / 1024

def __call__(self) -> Optional[float]:
"""Return the memory usage in MByte."""
# NB: dev.global_mem_size is in Bytes,
# dev.global_free_memory_amd is in KByte,
# the actual granularity of the returned values appears to be MByte
# (like in CUDA)

return self.global_mem_size_mbyte - self.dev.global_free_memory_amd[0] / 1024


class MempoolMemoryUsage(MultiPostLogQuantity):
"""Logging support for memory pool usage."""

Expand Down