devitocodes · guaacoelho · Oct 19, 2022 · Oct 19, 2022 · Oct 19, 2022 · Oct 19, 2022
diff --git a/devito/data/allocators.py b/devito/data/allocators.py
@@ -13,9 +13,15 @@
 from devito.parameters import configuration
 from devito.tools import dtype_to_ctype
 
+# try:
+#     from mpi4py import MPI  # noqa
+# except ImportError:
+#     MPI = None
+
+
 __all__ = ['ALLOC_FLAT', 'ALLOC_NUMA_LOCAL', 'ALLOC_NUMA_ANY',
            'ALLOC_KNL_MCDRAM', 'ALLOC_KNL_DRAM', 'ALLOC_GUARD',
-           'default_allocator']
+           'ALLOC_CUPY', 'default_allocator']
 
 
 class MemoryAllocator(object):
@@ -76,10 +82,13 @@ def alloc(self, shape, dtype):
         if c_pointer is None:
             raise RuntimeError("Unable to allocate %d elements in memory", str(size))
 
-        # cast to 1D array of the specified size
-        ctype_1d = ctype * size
-        buf = ctypes.cast(c_pointer, ctypes.POINTER(ctype_1d)).contents
-        pointer = np.frombuffer(buf, dtype=dtype)
+        if c_pointer:
+            # cast to 1D array of the specified size
+            ctype_1d = ctype * size
+            buf = ctypes.cast(c_pointer, ctypes.POINTER(ctype_1d)).contents
+            pointer = np.frombuffer(buf, dtype=dtype)
+        else:
+            pointer = np.empty(shape=(0), dtype=dtype)
         # pointer.reshape should not be used here because it may introduce a copy
         # From https://docs.scipy.org/doc/numpy/reference/generated/numpy.reshape.html:
         # It is not always possible to change the shape of an array without copying the
@@ -317,6 +326,51 @@ def put_local(self):
         return self._node == 'local'
 
 
+class CupyAllocator(MemoryAllocator):
+
+    """
+    Memory allocator based on Unified Memory concept. The allocation is made using Cupy.
+    """
+    _mempool = None
+
+    @classmethod
+    def initialize(cls):
+
+        try:
+            import cupy as cp
+            cls.lib = cp
+            cls._initialize_shared_memory()
+            try:
+                from mpi4py import MPI
+                cls.MPI = MPI
+                cls._set_device_for_mpi()
+            except:
+                cls.MPI = None
+        except:
+            cls.lib = None
+
+    @classmethod
+    def _initialize_shared_memory(cls):
+        cls._mempool = cls.lib.cuda.MemoryPool(cls.lib.cuda.malloc_managed)
+        cls.lib.cuda.set_allocator(cls._mempool.malloc)
+
+    @classmethod
+    def _set_device_for_mpi(cls):
+        if cls.MPI.Is_initialized():
+            n_gpu = cls.lib.cuda.runtime.getDeviceCount()
+            rank_l = cls.MPI.COMM_WORLD.Split_type(cls.MPI.COMM_TYPE_SHARED).Get_rank()
+            cls.lib.cuda.runtime.setDevice(rank_l % n_gpu)
+
+    def _alloc_C_libcall(self, size, ctype):
+        if not self.available():
+            raise ImportError("Couldn't initialize cupy or MPI elements of alocation")
+        mem_obj = self.lib.zeros(size, dtype=self.lib.float64)
+        return mem_obj.data.ptr, (mem_obj,)
+
+    def free(self, _):
+        self._mempool.free_all_blocks()
+
+
 class ExternalAllocator(MemoryAllocator):
 
     """
@@ -373,6 +427,7 @@ def alloc(self, shape, dtype):
 ALLOC_KNL_MCDRAM = NumaAllocator(1)
 ALLOC_NUMA_ANY = NumaAllocator('any')
 ALLOC_NUMA_LOCAL = NumaAllocator('local')
+ALLOC_CUPY = CupyAllocator()
 
 custom_allocators = {}
 """User-defined allocators."""

diff --git a/tests/test_data.py b/tests/test_data.py
@@ -1,8 +1,10 @@
 import pytest
 import numpy as np
+import cupy as cp
 
 from devito import (Grid, Function, TimeFunction, SparseTimeFunction, Dimension, # noqa
-                    Eq, Operator, ALLOC_GUARD, ALLOC_FLAT, configuration, switchconfig)
+                    Eq, Operator, ALLOC_GUARD, ALLOC_FLAT, ALLOC_CUPY,
+                    configuration, switchconfig)
 from devito.data import LEFT, RIGHT, Decomposition, loc_data_idx, convert_index
 from devito.tools import as_tuple
 from devito.types import Scalar
@@ -1473,6 +1475,52 @@ def test_gather_time_function(self):
             assert ans == np.array(None)
 
 
+class TestAllocators(object):
+
+    def test_uma_allocation(self):
+        """
+        Test Unified Memory allocation.
+        """
+        nt = 5
+        grid = Grid(shape=(4, 4, 4))
+
+        u = Function(name='u', grid=grid, allocator=ALLOC_CUPY)
+        u.data[:] = 5
+        address = u.data.ctypes.data
+        pointerAttr = cp.cuda.runtime.pointerGetAttributes(address)
+        assert pointerAttr.devicePointer == pointerAttr.hostPointer
+
+        v = TimeFunction(name='v', grid=grid, save=nt, allocator=ALLOC_CUPY)
+        v.data[:] = 5
+        address = v.data.ctypes.data
+        pointerAttr = cp.cuda.runtime.pointerGetAttributes(address)
+        assert pointerAttr.devicePointer == pointerAttr.hostPointer
+
+    def test_external_allocator(self):
+        shape = (2, 2)
+        space_order = 0
+        numpy_array = np.ones(shape, dtype=np.float32)
+        g = Grid(shape)
+        f = Function(name='f', space_order=space_order, grid=g,
+                     allocator=ExternalAllocator(numpy_array), initializer=lambda x: None)
+
+        # Ensure the two arrays have the same value
+        assert(np.array_equal(f.data, numpy_array))
+
+        # Ensure the original numpy array is unchanged
+        assert(np.array_equal(numpy_array, np.ones(shape, dtype=np.float32)))
+
+        # Change the underlying numpy array
+        numpy_array[:] = 3.
+        # Ensure the function.data changes too
+        assert(np.array_equal(f.data, numpy_array))
+
+        # Change the function.data
+        f.data[:] = 4.
+        # Ensure the underlying numpy array changes too
+        assert(np.array_equal(f.data, numpy_array))
+
+
 def test_scalar_arg_substitution():
     """
     Tests the relaxed (compared to other devito sympy subclasses)
@@ -1519,31 +1567,6 @@ def test_numpy_c_contiguous():
     assert(u._data_allocated.flags.c_contiguous)
 
 
-def test_external_allocator():
-    shape = (2, 2)
-    space_order = 0
-    numpy_array = np.ones(shape, dtype=np.float32)
-    g = Grid(shape)
-    f = Function(name='f', space_order=space_order, grid=g,
-                 allocator=ExternalAllocator(numpy_array), initializer=lambda x: None)
-
-    # Ensure the two arrays have the same value
-    assert(np.array_equal(f.data, numpy_array))
-
-    # Ensure the original numpy array is unchanged
-    assert(np.array_equal(numpy_array, np.ones(shape, dtype=np.float32)))
-
-    # Change the underlying numpy array
-    numpy_array[:] = 3.
-    # Ensure the function.data changes too
-    assert(np.array_equal(f.data, numpy_array))
-
-    # Change the function.data
-    f.data[:] = 4.
-    # Ensure the underlying numpy array changes too
-    assert(np.array_equal(f.data, numpy_array))
-
-
 def test_boolean_masking_array():
     """
     Test truth value of array, raised in Python 3.9 (MFE for issue #1788)