-
Notifications
You must be signed in to change notification settings - Fork 229
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
compiler: Unified Memory Allocator #2023
base: master
Are you sure you want to change the base?
Changes from all commits
0befd4e
db87362
ef1f368
ca806b3
50cd534
539254c
ddb5991
d337ac8
6511b06
ce12f56
3ce03ba
f4231e2
c4444a1
f3f90c1
41838ae
241e444
e724ffb
9379b31
7814a46
6df7a06
76dcdb1
6ad6611
92ba35c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,9 +13,10 @@ | |
from devito.parameters import configuration | ||
from devito.tools import dtype_to_ctype | ||
|
||
|
||
__all__ = ['ALLOC_FLAT', 'ALLOC_NUMA_LOCAL', 'ALLOC_NUMA_ANY', | ||
'ALLOC_KNL_MCDRAM', 'ALLOC_KNL_DRAM', 'ALLOC_GUARD', | ||
'default_allocator'] | ||
'ALLOC_CUPY', 'default_allocator'] | ||
|
||
|
||
class MemoryAllocator(object): | ||
|
@@ -76,10 +77,19 @@ def alloc(self, shape, dtype): | |
if c_pointer is None: | ||
raise RuntimeError("Unable to allocate %d elements in memory", str(size)) | ||
|
||
# cast to 1D array of the specified size | ||
ctype_1d = ctype * size | ||
buf = ctypes.cast(c_pointer, ctypes.POINTER(ctype_1d)).contents | ||
pointer = np.frombuffer(buf, dtype=dtype) | ||
if c_pointer: | ||
# cast to 1D array of the specified size | ||
ctype_1d = ctype * size | ||
buf = ctypes.cast(c_pointer, ctypes.POINTER(ctype_1d)).contents | ||
pointer = np.frombuffer(buf, dtype=dtype) | ||
|
||
# During the execution in MPI, domain splitting can generate a situation where | ||
# the allocated data size is zero, as we have observed with Sparse Functions. | ||
# When this occurs, Cupy returns a pointer with a value of zero. This | ||
# conditional statement was defined for this case. | ||
else: | ||
pointer = np.empty(shape=(0), dtype=dtype) | ||
|
||
# pointer.reshape should not be used here because it may introduce a copy | ||
# From https://docs.scipy.org/doc/numpy/reference/generated/numpy.reshape.html: | ||
# It is not always possible to change the shape of an array without copying the | ||
|
@@ -317,6 +327,51 @@ def put_local(self): | |
return self._node == 'local' | ||
|
||
|
||
class CupyAllocator(MemoryAllocator): | ||
|
||
""" | ||
Memory allocator based on Unified Memory concept. The allocation is made using Cupy. | ||
""" | ||
_mempool = None | ||
|
||
@classmethod | ||
def initialize(cls): | ||
|
||
try: | ||
import cupy as cp | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Move
and just check if None here. MPI should just be imported at the top from devito.mpi |
||
cls.lib = cp | ||
cls._initialize_shared_memory() | ||
try: | ||
from devito.mpi import MPI | ||
cls.MPI = MPI | ||
cls._set_device_for_mpi() | ||
except ImportError: | ||
cls.MPI = None | ||
except ImportError: | ||
cls.lib = None | ||
|
||
@classmethod | ||
def _initialize_shared_memory(cls): | ||
cls._mempool = cls.lib.cuda.MemoryPool(cls.lib.cuda.malloc_managed) | ||
cls.lib.cuda.set_allocator(cls._mempool.malloc) | ||
|
||
@classmethod | ||
def _set_device_for_mpi(cls): | ||
if cls.MPI.Is_initialized(): | ||
n_gpu = cls.lib.cuda.runtime.getDeviceCount() | ||
rank_l = cls.MPI.COMM_WORLD.Split_type(cls.MPI.COMM_TYPE_SHARED).Get_rank() | ||
cls.lib.cuda.runtime.setDevice(rank_l % n_gpu) | ||
|
||
def _alloc_C_libcall(self, size, ctype): | ||
if not self.available(): | ||
raise ImportError("Couldn't initialize cupy or MPI elements of alocation") | ||
mem_obj = self.lib.zeros(size, dtype=ctype) | ||
return mem_obj.data.ptr, (mem_obj,) | ||
|
||
def free(self, _): | ||
self._mempool.free_all_blocks() | ||
|
||
|
||
class ExternalAllocator(MemoryAllocator): | ||
|
||
""" | ||
|
@@ -373,6 +428,7 @@ def alloc(self, shape, dtype): | |
ALLOC_KNL_MCDRAM = NumaAllocator(1) | ||
ALLOC_NUMA_ANY = NumaAllocator('any') | ||
ALLOC_NUMA_LOCAL = NumaAllocator('local') | ||
ALLOC_CUPY = CupyAllocator() | ||
|
||
custom_allocators = {} | ||
"""User-defined allocators.""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,12 +2,14 @@ | |
import numpy as np | ||
|
||
from devito import (Grid, Function, TimeFunction, SparseTimeFunction, Dimension, # noqa | ||
Eq, Operator, ALLOC_GUARD, ALLOC_FLAT, configuration, switchconfig) | ||
Eq, Operator, ALLOC_GUARD, ALLOC_FLAT, ALLOC_CUPY, | ||
configuration, switchconfig) | ||
from devito.data import LEFT, RIGHT, Decomposition, loc_data_idx, convert_index | ||
from devito.tools import as_tuple | ||
from devito.types import Scalar | ||
from devito.data.allocators import ExternalAllocator | ||
|
||
from conftest import skipif | ||
|
||
class TestDataBasic(object): | ||
|
||
|
@@ -1473,6 +1475,55 @@ def test_gather_time_function(self): | |
assert ans == np.array(None) | ||
|
||
|
||
class TestAllocators(object): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. move to |
||
|
||
@skipif('nodevice') | ||
def test_uma_allocation(self): | ||
""" | ||
Test Unified Memory allocation. | ||
""" | ||
import cupy as cp | ||
|
||
nt = 5 | ||
grid = Grid(shape=(4, 4, 4)) | ||
|
||
u = Function(name='u', grid=grid, allocator=ALLOC_CUPY) | ||
u.data[:] = 5 | ||
address = u.data.ctypes.data | ||
pointerAttr = cp.cuda.runtime.pointerGetAttributes(address) | ||
assert pointerAttr.devicePointer == pointerAttr.hostPointer | ||
|
||
v = TimeFunction(name='v', grid=grid, save=nt, allocator=ALLOC_CUPY) | ||
v.data[:] = 5 | ||
address = v.data.ctypes.data | ||
pointerAttr = cp.cuda.runtime.pointerGetAttributes(address) | ||
assert pointerAttr.devicePointer == pointerAttr.hostPointer | ||
|
||
def test_external_allocator(self): | ||
shape = (2, 2) | ||
space_order = 0 | ||
numpy_array = np.ones(shape, dtype=np.float32) | ||
g = Grid(shape) | ||
f = Function(name='f', space_order=space_order, grid=g, | ||
allocator=ExternalAllocator(numpy_array), initializer=lambda x: None) | ||
|
||
# Ensure the two arrays have the same value | ||
assert(np.array_equal(f.data, numpy_array)) | ||
|
||
# Ensure the original numpy array is unchanged | ||
assert(np.array_equal(numpy_array, np.ones(shape, dtype=np.float32))) | ||
|
||
# Change the underlying numpy array | ||
numpy_array[:] = 3. | ||
# Ensure the function.data changes too | ||
assert(np.array_equal(f.data, numpy_array)) | ||
|
||
# Change the function.data | ||
f.data[:] = 4. | ||
# Ensure the underlying numpy array changes too | ||
assert(np.array_equal(f.data, numpy_array)) | ||
|
||
|
||
def test_scalar_arg_substitution(): | ||
""" | ||
Tests the relaxed (compared to other devito sympy subclasses) | ||
|
@@ -1519,31 +1570,6 @@ def test_numpy_c_contiguous(): | |
assert(u._data_allocated.flags.c_contiguous) | ||
|
||
|
||
def test_external_allocator(): | ||
shape = (2, 2) | ||
space_order = 0 | ||
numpy_array = np.ones(shape, dtype=np.float32) | ||
g = Grid(shape) | ||
f = Function(name='f', space_order=space_order, grid=g, | ||
allocator=ExternalAllocator(numpy_array), initializer=lambda x: None) | ||
|
||
# Ensure the two arrays have the same value | ||
assert(np.array_equal(f.data, numpy_array)) | ||
|
||
# Ensure the original numpy array is unchanged | ||
assert(np.array_equal(numpy_array, np.ones(shape, dtype=np.float32))) | ||
|
||
# Change the underlying numpy array | ||
numpy_array[:] = 3. | ||
# Ensure the function.data changes too | ||
assert(np.array_equal(f.data, numpy_array)) | ||
|
||
# Change the function.data | ||
f.data[:] = 4. | ||
# Ensure the underlying numpy array changes too | ||
assert(np.array_equal(f.data, numpy_array)) | ||
|
||
|
||
def test_boolean_masking_array(): | ||
""" | ||
Test truth value of array, raised in Python 3.9 (MFE for issue #1788) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How can we end up here? the
c_pointer is None
case is already aboveThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
During the execution in MPI, domain splitting can generate a situation where the allocated data size is zero, as we have observed with Sparse Functions. When this occurs, Cupy returns a pointer with a value of zero. This conditional statement was defined for this case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add a comment noting this, until some better solution is around?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We will push it, George.